Jump to content

[SOLVED] Warning: mysql_num_rows(): error when inserting links into DB


hellonoko

Recommended Posts

I am receiving errors when I try to put an array of scraped URLs into a DB.

 

Error:

this.bigstereo.net/wp-content/uploads/2009/03/tomorrow-wow-remix.mp3

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 82
this.bigstereo.net/wp-content/uploads/2009/03/01 Counterpoint 1.mp3

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 82
this.bigstereo.net/wp-content/uploads/2009/03/Lips (Spruce Lee Inner Jungle Mix).mp3

Warning: mysql_num_rows(): supplied argument is not a valid MySQL result resource in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 82

 

I was having similar problems with links that contained ' or " but cleaned up my query with mysql_real_escape_string() and was working perfect gathering links from another site.

 

Can't see what the problem is with this. Any suggestions?

 

Line 82 is:

			$rows = mysql_num_rows($exists);

 

Thanks

 

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';

	if ($link != NULL)
	{
		$exists = mysql_query("SELECT * FROM `links` WHERE link = '".mysql_real_escape_string($link)."' LIMIT 1");

		$rows = mysql_num_rows($exists);

		if ( $rows == 0)
		{

			$type = "mp3";

			$query = "INSERT INTO links (`link`, `type`) VALUES ('".mysql_real_escape_string($link)."' ,'".mysql_real_escape_string($type)."' )";
    	
			if ($result = mysql_query($query)) 
			{
     	 			$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 				//echo "<br>";
    			} 
		} 
	}
}

Link to comment
Share on other sites

You probably have SQL errors, rerun your script and tell me if there are any errors displayed..

 

         $exists = mysql_query("SELECT * FROM `links` WHERE link = '".mysql_real_escape_string($link)."' LIMIT 1") or die(mysql_error());

Link to comment
Share on other sites

Errors:

Notice: Undefined variable: list_links in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 21

Once

 

Notice: Undefined variable: list_links in /home2/sharingi/public_html/scrape/url_scraperV2.php on line 58

Many times.

 

MySQL server has gone away

At end.

 

Full code:

<?php

ini_set ("display_errors", "1");
error_reporting(E_ALL);

mysql_connect("localhost","sharingi_ian","***")or die ("Could not connect to database");
mysql_select_db("sharingi_scrape") or die ("Could not select database");

//$target_url = "http://empreintes-digitales.fr";
//$target_url = 'http://redthreat.wordpress.com/';
//$target_url= 'http://www.kissatlanta.com/blog/';
//$target_url= 'http://www.empreintes-digitales.fr/';

//$target_url = 'http://electrorash.com/';

$target_url = 'http://this.bigstereo.net/';

$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';

// crawl first page
$clean_links = crawl_page( $target_url, $userAgent, $list_links);

// seperates links into links that are direct mp3 links and other links.
//

foreach($clean_links as $key => $value) 
{ 
  		if( strpos( $value, ".mp3") !== FALSE) 
	{ 
		$mp3_links[] = $value;
  		}
	else
	{
		$other_links[] = $value;
	}
} 

$mp3_links = array_values($mp3_links); 
$other_links = array_values($other_links); 

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';
}

echo '<br>';

foreach ($other_links as $link)       
{
   		echo $link.'<br>';
}

/////// crawls second layer of links

foreach ($other_links as $link)       
{
   		
	$clean_links = crawl_page( $link , $userAgent, $list_links);

	foreach($clean_links as $key => $value) 
	{ 
  			if( strpos( $value, ".mp3") !== FALSE) 
		{ 
			$mp3_links[] = $value;
  			}
		else
		{
			$other_links[] = $value;
		}

	} 

	$mp3_links = array_values($mp3_links); 
	$other_links = array_values($other_links); 
}    

foreach ($mp3_links as $link)       
{
   		echo $link.'<br>';

	if ($link != NULL)
	{
		$exists = mysql_query("SELECT * FROM `links` WHERE link = '".mysql_real_escape_string($link)."' LIMIT 1") or die(mysql_error());

		$rows = mysql_num_rows($exists);

		if ( $rows == 0)
		{

			$type = "mp3";

			$query = "INSERT INTO links (`link`, `type`) VALUES ('".mysql_real_escape_string($link)."' ,'".mysql_real_escape_string($type)."' )";
    	
			if ($result = mysql_query($query)) 
			{
     	 			$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 				//echo "<br>";
    			} 
		} 
	}
}
echo '<br>';

foreach ($other_links as $link)       
{
	$type = "link";

   		echo $link.'<br>';
	if (mysql_num_rows(mysql_query("SELECT * FROM `links` WHERE link = '$link' LIMIT 1")) == 0)
	{
		$query = "INSERT INTO links ( `link` , `type` ) VALUES ('$link' , '$type' )";
    	
		if ($result = mysql_query($query)) 
		{
     	 		$link_count = $link_count + 1; //echo "<b>link added to db</b>";
 			//echo "<br>";
    		}
	} 

}


echo $links_count;


function crawl_page( $target_url, $userAgent, $links)
{
	$ch = curl_init();

	curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
	curl_setopt($ch, CURLOPT_URL,$target_url);
	curl_setopt($ch, CURLOPT_FAILONERROR, false);
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
	curl_setopt($ch, CURLOPT_AUTOREFERER, true);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
	curl_setopt($ch, CURLOPT_TIMEOUT, 100);

	$html = curl_exec($ch);

	if (!$html) 
	{
		echo "<br />cURL error number:" .curl_errno($ch);
		echo "<br />cURL error:" . curl_error($ch);
		exit;
	}

	//
	// load scrapped data into the DOM
	//

	$dom = new DOMDocument();
	@$dom->loadHTML($html);

	//
	// get only LINKS from the DOM with XPath
	//

	$xpath = new DOMXPath($dom);
	$hrefs = $xpath->evaluate("/html/body//a");

	//
	// go through all the links and store to db or whatever
	//


	for ($i = 0; $i < $hrefs->length; $i++) 
	{
		$href = $hrefs->item($i);
		$url = $href->getAttribute('href');

		//if the $url does not contain the web site base address: http://www.thesite.com/ then add it onto the front

		$clean_link = checkURL( $url, $target_url);
		$clean_link = str_replace( "http://" , "" , $clean_link);
		$clean_link = str_replace( "//" , "/" , $clean_link);

		$links[] = $clean_link;

		//removes empty array values

		foreach($links as $key => $value) 
		{ 
  				if($value == "") 
			{ 
    				unset($links[$key]); 
  				} 
		} 

		$links = array_values($links); 
	}	

	return $links; 
}


function checkURL($url, $target_url)
{

	if ( strpos($url, ".mp3") !== FALSE )
	{
		if ( strpos($url , "http") === FALSE )
		{
			//echo 'FIXED: ';
			$url = $target_url."/".$url;
			//echo '<br><br>';

			return $url;
		}
		return $url;
	}

	$pos = strpos($url , $target_url);

	if ( $pos === FALSE )
	{
		if ( strpos($url , "http") === FALSE )
		{
			//echo 'FIXED: ';
			$url = $target_url."/".$url;
			//echo '<br><br>';

			return $url;
		}
	}
	else
	{
		//echo 'COMPLETE: '.$url;
		//echo '<br><br>';

		return $url;
	}
}	
?>

 

Link to comment
Share on other sites

This thread is more than a year old. Please don't revive it unless you have something important to add.

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.