Jump to content

[SOLVED] Warning: mysql_num_rows(): error when inserting links into DB


hellonoko

Recommended Posts

I am receiving errors when I try to put an array of scraped URLs into a DB.

 

Error:

this.bigstereo.net/wp-content/uploads/2009/03/tomorrow-wow-remix.mp3

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 82
this.bigstereo.net/wp-content/uploads/2009/03/01 Counterpoint 1.mp3

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 82
this.bigstereo.net/wp-content/uploads/2009/03/Lips (Spruce Lee Inner Jungle Mix).mp3

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 82

 

I was having similar problems with links that contained ' or " but cleaned up my query with mysql_real_escape_string() and was working perfect gathering links from another site.

 

Can't see what the problem is with this. Any suggestions?

 

Line 82 is:

			$rows = mysql_num_rows($exists);

 

Thanks

 

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';

	if ($link != NULL)
	{
		$exists = mysql_query("SELECT * FROM `links` WHERE link = '".mysql_real_escape_string($link)."' LIMIT 1");

		$rows = mysql_num_rows($exists);

		if ( $rows == 0)
		{

			$type = "mp3";

			$query = "INSERT INTO links (`link`, `type`) VALUES ('".mysql_real_escape_string($link)."' ,'".mysql_real_escape_string($type)."' )";
    	
			if ($result = mysql_query($query)) 
			{
     	 			$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 				//echo "<br>";
    			} 
		} 
	}
}

You probably have SQL errors, rerun your script and tell me if there are any errors displayed..

 

         $exists = mysql_query("SELECT * FROM `links` WHERE link = '".mysql_real_escape_string($link)."' LIMIT 1") or die(mysql_error());

Errors:

Notice: Undefined variable: list_links in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 21

Once

 

Notice: Undefined variable: list_links in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 58

Many times.

 

MySQL server has gone away

At end.

 

Full code:

<?php

ini_set ("display_errors", "1");
error_reporting(E_ALL);

mysql_connect("localhost","sharingi_ian","***")or die ("Could not connect to database");
mysql_select_db("sharingi_scrape") or die ("Could not select database");

//$target_url = "http://empreintes-digitales.fr";
//$target_url = 'http://redthreat.wordpress.com/';
//$target_url= 'http://www.kissatlanta.com/blog/';
//$target_url= 'http://www.empreintes-digitales.fr/';

//$target_url = 'http://electrorash.com/';

$target_url = 'http://this.bigstereo.net/';

$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';

// crawl first page
$clean_links = crawl_page( $target_url, $userAgent, $list_links);

// seperates links into links that are direct mp3 links and other links.
//

foreach($clean_links as $key => $value) 
{ 
  		if( strpos( $value, ".mp3") !== FALSE) 
	{ 
		$mp3_links[] = $value;
  		}
	else
	{
		$other_links[] = $value;
	}
} 

$mp3_links = array_values($mp3_links); 
$other_links = array_values($other_links); 

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';
}

echo '<br>';

foreach ($other_links as $link)       
{
   		echo $link.'<br>';
}

/////// crawls second layer of links

foreach ($other_links as $link)       
{
   		
	$clean_links = crawl_page( $link , $userAgent, $list_links);

	foreach($clean_links as $key => $value) 
	{ 
  			if( strpos( $value, ".mp3") !== FALSE) 
		{ 
			$mp3_links[] = $value;
  			}
		else
		{
			$other_links[] = $value;
		}

	} 

	$mp3_links = array_values($mp3_links); 
	$other_links = array_values($other_links); 
}    

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';

	if ($link != NULL)
	{
		$exists = mysql_query("SELECT * FROM `links` WHERE link = '".mysql_real_escape_string($link)."' LIMIT 1") or die(mysql_error());

		$rows = mysql_num_rows($exists);

		if ( $rows == 0)
		{

			$type = "mp3";

			$query = "INSERT INTO links (`link`, `type`) VALUES ('".mysql_real_escape_string($link)."' ,'".mysql_real_escape_string($type)."' )";
    	
			if ($result = mysql_query($query)) 
			{
     	 			$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 				//echo "<br>";
    			} 
		} 
	}
}
echo '<br>';

foreach ($other_links as $link)       
{
	$type = "link";

   		echo $link.'<br>';
	if (mysql_num_rows(mysql_query("SELECT * FROM `links` WHERE link = '$link' LIMIT 1")) == 0)
	{
		$query = "INSERT INTO links ( `link` , `type` ) VALUES ('$link' , '$type' )";
    	
		if ($result = mysql_query($query)) 
		{
     	 		$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 			//echo "<br>";
    		}
	} 

}


echo $links_count;


function crawl_page( $target_url, $userAgent, $links)
{
	$ch = curl_init();

	curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
	curl_setopt($ch, CURLOPT_URL,$target_url);
	curl_setopt($ch, CURLOPT_FAILONERROR, false);
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
	curl_setopt($ch, CURLOPT_AUTOREFERER, true);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
	curl_setopt($ch, CURLOPT_TIMEOUT, 100);

	$html = curl_exec($ch);

	if (!$html) 
	{
		echo "<br />cURL error number:" .curl_errno($ch);
		echo "<br />cURL error:" . curl_error($ch);
		exit;
	}

	//
	// load scrapped data into the DOM
	//

	$dom = new DOMDocument();
	@$dom->loadHTML($html);

	//
	// get only LINKS from the DOM with XPath
	//

	$xpath = new DOMXPath($dom);
	$hrefs = $xpath->evaluate("/html/body//a");

	//
	// go through all the links and store to db or whatever
	//


	for ($i = 0; $i < $hrefs->length; $i++) 
	{
		$href = $hrefs->item($i);
		$url = $href->getAttribute('href');

		//if the $url does not contain the web site base address: http://www.thesite.com/ then add it onto the front

		$clean_link = checkURL( $url, $target_url);
		$clean_link = str_replace( "http://" , "" , $clean_link);
		$clean_link = str_replace( "//" , "/" , $clean_link);

		$links[] = $clean_link;

		//removes empty array values

		foreach($links as $key => $value) 
		{ 
  				if($value == "") 
			{ 
    				unset($links[$key]); 
  				} 
		} 

		$links = array_values($links); 
	}	

	return $links; 
}


function checkURL($url, $target_url)
{

	if ( strpos($url, ".mp3") !== FALSE )
	{
		if ( strpos($url , "http") === FALSE )
		{
			//echo 'FIXED: ';
			$url = $target_url."/".$url;
			//echo '<br><br>';

			return $url;
		}
		return $url;
	}

	$pos = strpos($url , $target_url);

	if ( $pos === FALSE )
	{
		if ( strpos($url , "http") === FALSE )
		{
			//echo 'FIXED: ';
			$url = $target_url."/".$url;
			//echo '<br><br>';

			return $url;
		}
	}
	else
	{
		//echo 'COMPLETE: '.$url;
		//echo '<br><br>';

		return $url;
	}
}	
?>

 

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.