Jump to content

[SOLVED] MySQL result resource errors when inserting into DB


hellonoko

Recommended Posts

My below code crawls through a blog and the inserts the found links into my database.

 

However I am receiving the following error for each time I try to insert a link:

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 74

 

Line 74 compares the link to be inserted with existing rows to avoid duplicates.

 

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';

	$query = mysql_query("SELECT * FROM links WHERE link=$link LIMIT 1");

	$rows = mysql_num_rows($query);

	if ( $rows == 0)
	{
		$query = "INSERT INTO links (link) VALUES ('$link')";
    	
		if ($result = mysql_query($query)) 
		{
     	 		$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 			//echo "<br>";
    		} 
	} 
} 

 

I am also noticing that even with this error about 1200 rows are inserted when it should be just about 600.

 

This code worked fine in another version of the page any idea what I am doing wrong?

 

Thanks

 

<?php

mysql_connect("localhost","sharingi_ian","*****")or die ("Could not connect to database");
mysql_select_db("sharingi_scrape") or die ("Could not select database");

//$target_url = "http://empreintes-digitales.fr";
$target_url = 'http://redthreat.wordpress.com/';
//$target_url= 'http://www.kissatlanta.com/blog/';
//$target_url= 'http://www.empreintes-digitales.fr/';

$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';

// crawl first page
$clean_links = crawl_page( $target_url, $userAgent, $list_links);

// seperates links into links that are direct mp3 links and other links.
//

foreach($clean_links as $key => $value) 
{ 
  		if( strpos( $value, ".mp3") !== FALSE) 
	{ 
		$mp3_links[] = $value;
  		}
	else
	{
		$other_links[] = $value;
	}
} 

$mp3_links = array_values($mp3_links); 
$other_links = array_values($other_links); 

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';
}

echo '<br>';

foreach ($other_links as $link)       
{
   		echo $link.'<br>';
}

/////// crawls second layer of links

foreach ($other_links as $link)       
{
   		$clean_links = crawl_page( $link , $userAgent, $list_links);

	foreach($clean_links as $key => $value) 
	{ 
  			if( strpos( $value, ".mp3") !== FALSE) 
		{ 
			$mp3_links[] = $value;
  			}
		else
		{
			$other_links[] = $value;
		}
	} 

	$mp3_links = array_values($mp3_links); 
	$other_links = array_values($other_links); 
}    

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';

	$query = mysql_query("SELECT * FROM links WHERE link=$link LIMIT 1");

	$rows = mysql_num_rows($query);

	if ( $rows == 0)
	{
		$query = "INSERT INTO links (link) VALUES ('$link')";
    	
		if ($result = mysql_query($query)) 
		{
     	 		$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 			//echo "<br>";
    		} 
	} 
}

echo '<br>';

foreach ($other_links as $link)       
{
   		echo $link.'<br>';
	if (mysql_num_rows(mysql_query("SELECT * FROM links WHERE link=$link LIMIT 1")) == 0)
	{
		$query = "INSERT INTO links (link) VALUES ('$link')";
    	
		if ($result = mysql_query($query)) 
		{
     	 		$link_count = $link_count + 1; 
    		}
} 

}


echo $links_count;


function crawl_page( $target_url, $userAgent, $links)
{
	$ch = curl_init();

	curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
	curl_setopt($ch, CURLOPT_URL,$target_url);
	curl_setopt($ch, CURLOPT_FAILONERROR, false);
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
	curl_setopt($ch, CURLOPT_AUTOREFERER, true);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
	curl_setopt($ch, CURLOPT_TIMEOUT, 100);

	$html = curl_exec($ch);

	if (!$html) 
	{
		echo "<br />cURL error number:" .curl_errno($ch);
		echo "<br />cURL error:" . curl_error($ch);
		exit;
	}

	//
	// load scrapped data into the DOM
	//

	$dom = new DOMDocument();
	@$dom->loadHTML($html);

	//
	// get only LINKS from the DOM with XPath
	//

	$xpath = new DOMXPath($dom);
	$hrefs = $xpath->evaluate("/html/body//a");

	//
	// go through all the links and store to db or whatever
	//


	for ($i = 0; $i < $hrefs->length; $i++) 
	{
		$href = $hrefs->item($i);
		$url = $href->getAttribute('href');

		//if the $url does not contain the web site base address: http://www.thesite.com/ then add it onto the front

		$clean_link = checkURL( $url, $target_url);
		$clean_link = str_replace( "http://" , "" , $clean_link);
		$clean_link = str_replace( "//" , "/" , $clean_link);

		$links[] = $clean_link;

		//removes empty array values

		foreach($links as $key => $value) 
		{ 
  				if($value == "") 
			{ 
    				unset($links[$key]); 
  				} 
		} 

		$links = array_values($links); 
	}	

	return $links; 
}


function checkURL($url, $target_url)
{

	if ( strpos($url, ".mp3") !== FALSE )
	{
		if ( strpos($url , "http") === FALSE )
		{
			//echo 'FIXED: ';
			$url = $target_url."/".$url;
			//echo '<br><br>';

			return $url;
		}
		return $url;
	}

	$pos = strpos($url , $target_url);

	if ( $pos === FALSE )
	{
		if ( strpos($url , "http") === FALSE )
		{
			//echo 'FIXED: ';
			$url = $target_url."/".$url;
			//echo '<br><br>';

			return $url;
		}
	}
	else
	{
		//echo 'COMPLETE: '.$url;
		//echo '<br><br>';

		return $url;
	}
}	
?>

Link to comment
Share on other sites

Well there are two instances of it but yes it is mysql_num_rows() that is giving the error.

 

I was able to make it mostly work by cleaning up my query using ` `

 

But now I can see on the links that it still errors with they have ' or " in the names.

 

Examples:

 

rednicko.com/080923/Klaxons-Gravity'sRainbow(Guns'N'BombsFreakoutRemix).mp3

 

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 76

rednicko.com/080923/GhostfaceKiller-CharlieBrown(Guns'N'BombsRemix).mp3

 

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 76

 

Link to comment
Share on other sites

Here:

if ($link != NULL)
	{
		$exists = mysql_query("SELECT * FROM `links` WHERE link = '$link' LIMIT 1");

		$rows = mysql_num_rows($exists);

		if ( $rows == 0)
		{

			$type = "mp3";

			$query = "INSERT INTO links (`link`, `type`) VALUES ('$link' ,'$type' )";
    	
			if ($result = mysql_query($query)) 
			{
     	 			$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 				//echo "<br>";
    			} 
		} 
	}

 

Only errors on links that contain ' and possibly "

 

 

 

Link to comment
Share on other sites

 

fingers crossed


<?php

if ($link != NULL)
      {
         $exists = mysql_query("SELECT * FROM `links` WHERE link = '".mysql_real_escape_string($link)."' LIMIT 1");
      
         $rows = mysql_num_rows($exists);
         
         if ( $rows == 0)
         {
            
            $type = "mp3";
            
            $query = "INSERT INTO links (`link`, `type`) VALUES ('".mysql_real_escape_string($link)."' ,'".mysql_escape_string($type)."' )";
       
            if ($result = mysql_query($query))
            {
                  $link_count = $link_count + 1; //echo "<b>link added to db</b>";
                //echo "<br>";
             }
         }
      }
?>

Link to comment
Share on other sites

This thread is more than a year old. Please don't revive it unless you have something important to add.

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.