Jump to content

[SOLVED] MySQL result resource errors when inserting into DB


hellonoko

Recommended Posts

My below code crawls through a blog and the inserts the found links into my database.

 

However I am receiving the following error for each time I try to insert a link:

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 74

 

Line 74 compares the link to be inserted with existing rows to avoid duplicates.

 

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';

	$query = mysql_query("SELECT * FROM links WHERE link=$link LIMIT 1");

	$rows = mysql_num_rows($query);

	if ( $rows == 0)
	{
		$query = "INSERT INTO links (link) VALUES ('$link')";
    	
		if ($result = mysql_query($query)) 
		{
     	 		$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 			//echo "<br>";
    		} 
	} 
} 

 

I am also noticing that even with this error about 1200 rows are inserted when it should be just about 600.

 

This code worked fine in another version of the page any idea what I am doing wrong?

 

Thanks

 

<?php

mysql_connect("localhost","sharingi_ian","*****")or die ("Could not connect to database");
mysql_select_db("sharingi_scrape") or die ("Could not select database");

//$target_url = "http://empreintes-digitales.fr";
$target_url = 'http://redthreat.wordpress.com/';
//$target_url= 'http://www.kissatlanta.com/blog/';
//$target_url= 'http://www.empreintes-digitales.fr/';

$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';

// crawl first page
$clean_links = crawl_page( $target_url, $userAgent, $list_links);

// seperates links into links that are direct mp3 links and other links.
//

foreach($clean_links as $key => $value) 
{ 
  		if( strpos( $value, ".mp3") !== FALSE) 
	{ 
		$mp3_links[] = $value;
  		}
	else
	{
		$other_links[] = $value;
	}
} 

$mp3_links = array_values($mp3_links); 
$other_links = array_values($other_links); 

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';
}

echo '<br>';

foreach ($other_links as $link)       
{
   		echo $link.'<br>';
}

/////// crawls second layer of links

foreach ($other_links as $link)       
{
   		$clean_links = crawl_page( $link , $userAgent, $list_links);

	foreach($clean_links as $key => $value) 
	{ 
  			if( strpos( $value, ".mp3") !== FALSE) 
		{ 
			$mp3_links[] = $value;
  			}
		else
		{
			$other_links[] = $value;
		}
	} 

	$mp3_links = array_values($mp3_links); 
	$other_links = array_values($other_links); 
}    

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';

	$query = mysql_query("SELECT * FROM links WHERE link=$link LIMIT 1");

	$rows = mysql_num_rows($query);

	if ( $rows == 0)
	{
		$query = "INSERT INTO links (link) VALUES ('$link')";
    	
		if ($result = mysql_query($query)) 
		{
     	 		$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 			//echo "<br>";
    		} 
	} 
}

echo '<br>';

foreach ($other_links as $link)       
{
   		echo $link.'<br>';
	if (mysql_num_rows(mysql_query("SELECT * FROM links WHERE link=$link LIMIT 1")) == 0)
	{
		$query = "INSERT INTO links (link) VALUES ('$link')";
    	
		if ($result = mysql_query($query)) 
		{
     	 		$link_count = $link_count + 1; 
    		}
} 

}


echo $links_count;


function crawl_page( $target_url, $userAgent, $links)
{
	$ch = curl_init();

	curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
	curl_setopt($ch, CURLOPT_URL,$target_url);
	curl_setopt($ch, CURLOPT_FAILONERROR, false);
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
	curl_setopt($ch, CURLOPT_AUTOREFERER, true);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
	curl_setopt($ch, CURLOPT_TIMEOUT, 100);

	$html = curl_exec($ch);

	if (!$html) 
	{
		echo "<br />cURL error number:" .curl_errno($ch);
		echo "<br />cURL error:" . curl_error($ch);
		exit;
	}

	//
	// load scrapped data into the DOM
	//

	$dom = new DOMDocument();
	@$dom->loadHTML($html);

	//
	// get only LINKS from the DOM with XPath
	//

	$xpath = new DOMXPath($dom);
	$hrefs = $xpath->evaluate("/html/body//a");

	//
	// go through all the links and store to db or whatever
	//


	for ($i = 0; $i < $hrefs->length; $i++) 
	{
		$href = $hrefs->item($i);
		$url = $href->getAttribute('href');

		//if the $url does not contain the web site base address: http://www.thesite.com/ then add it onto the front

		$clean_link = checkURL( $url, $target_url);
		$clean_link = str_replace( "http://" , "" , $clean_link);
		$clean_link = str_replace( "//" , "/" , $clean_link);

		$links[] = $clean_link;

		//removes empty array values

		foreach($links as $key => $value) 
		{ 
  				if($value == "") 
			{ 
    				unset($links[$key]); 
  				} 
		} 

		$links = array_values($links); 
	}	

	return $links; 
}


function checkURL($url, $target_url)
{

	if ( strpos($url, ".mp3") !== FALSE )
	{
		if ( strpos($url , "http") === FALSE )
		{
			//echo 'FIXED: ';
			$url = $target_url."/".$url;
			//echo '<br><br>';

			return $url;
		}
		return $url;
	}

	$pos = strpos($url , $target_url);

	if ( $pos === FALSE )
	{
		if ( strpos($url , "http") === FALSE )
		{
			//echo 'FIXED: ';
			$url = $target_url."/".$url;
			//echo '<br><br>';

			return $url;
		}
	}
	else
	{
		//echo 'COMPLETE: '.$url;
		//echo '<br><br>';

		return $url;
	}
}	
?>

Well there are two instances of it but yes it is mysql_num_rows() that is giving the error.

 

I was able to make it mostly work by cleaning up my query using ` `

 

But now I can see on the links that it still errors with they have ' or " in the names.

 

Examples:

 

rednicko.com/080923/Klaxons-Gravity'sRainbow(Guns'N'BombsFreakoutRemix).mp3

 

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 76

rednicko.com/080923/GhostfaceKiller-CharlieBrown(Guns'N'BombsRemix).mp3

 

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 76

 

Here:

if ($link != NULL)
	{
		$exists = mysql_query("SELECT * FROM `links` WHERE link = '$link' LIMIT 1");

		$rows = mysql_num_rows($exists);

		if ( $rows == 0)
		{

			$type = "mp3";

			$query = "INSERT INTO links (`link`, `type`) VALUES ('$link' ,'$type' )";
    	
			if ($result = mysql_query($query)) 
			{
     	 			$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 				//echo "<br>";
    			} 
		} 
	}

 

Only errors on links that contain ' and possibly "

 

 

 

 

fingers crossed


<?php

if ($link != NULL)
      {
         $exists = mysql_query("SELECT * FROM `links` WHERE link = '".mysql_real_escape_string($link)."' LIMIT 1");
      
         $rows = mysql_num_rows($exists);
         
         if ( $rows == 0)
         {
            
            $type = "mp3";
            
            $query = "INSERT INTO links (`link`, `type`) VALUES ('".mysql_real_escape_string($link)."' ,'".mysql_escape_string($type)."' )";
       
            if ($result = mysql_query($query))
            {
                  $link_count = $link_count + 1; //echo "<b>link added to db</b>";
                //echo "<br>";
             }
         }
      }
?>

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.