Jump to content

mysql_fetch_assoc(): supplied argument is not a valid MySQL result?


Recommended Posts

The error is this,

 

PHP Warning:  mysql_fetch_assoc(): supplied argument is not a valid MySQL result resource in /home/coder9/public_html/spider/email.scraper.php on line 98

 

The codes are these,

 

<?php
/*
Written by: Aziz S. Hussain
Email: azizsaleh@gmail.com
Website: www.azizsaleh.com
Produced under GPL License
*/

class scraper
{
// URL that stores first URL to start
var $startURL;

// List of allowed page extensions
var $allowedExtensions = array('.css','.xml','.rss','.ico','.js','.gif','.jpg','.jpeg','.png','.bmp','.wmv'
	,'.avi','.mp3','.flash','.swf','.css');

// Which URL to scrape
var $useURL;

// Start path, for links that are relative
var $startPath;

// Set start path
function setStartPath($path = NULL){
	if($path != NULL)
	{
		$this->startPath = $path;
	} else {
		$temp = explode('/',$this->startURL);
		$this->startPath = $temp[0].'//'.$temp[2];
	}
}

// Add the start URL
function startURL($theURL){
	// Set start URL
	$this->startURL = $theURL;
}

// Function to get URL contents
function getContents($url)
{
	$ch = curl_init(); // initialize curl handle
	curl_setopt($ch, CURLOPT_HEADER, 0);
	curl_setopt($ch, CURLOPT_VERBOSE, 0);
	curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible;)");
	curl_setopt($ch, CURLOPT_AUTOREFERER, false);
	curl_setopt($ch, CURLOPT_CONNECTTIMEOUT,7);
	curl_setopt($ch, CURLOPT_REFERER, 'http://'.$this->useURL);
	curl_setopt($ch, CURLOPT_URL,$url); // set url to post to
	curl_setopt($ch, CURLOPT_FAILONERROR, 1);
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);// allow redirects
	curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); // return into a variable
	curl_setopt($ch, CURLOPT_TIMEOUT, 50); // times out after 50s
	curl_setopt($ch, CURLOPT_POST, 0); // set POST method
	$buffer = curl_exec($ch); // run the whole process
	curl_close($ch); 
	return $buffer;
}

// Actually do the URLS
function startScraping()
{
	// Get page content
	$pageContent = $this->getContents($this->startURL);
	echo 'Scraping URL: '.$this->startURL.PHP_EOL;

	// Get list of all emails on page
	preg_match_all('/([\w+\.]*\w+@[\w+\.]*\w+[\w+\-\w+]*\.\w+)/is',$pageContent,$results);
	// Add the email to the email list array
	$insertCount=0;
	foreach($results[1] as $curEmail)
	{
		$insert = mysql_query("INSERT INTO 'emaillist' ('emailadd') VALUES ('$curEmail')");
		if($insert){$insertCount++;}
	}

	echo 'Emails found: '.number_format($insertCount).PHP_EOL;

	// Mark the page done
	$insert = mysql_query("INSERT INTO 'finishedurls' ('urlname') VALUES ('".$this->startURL."')");

	// Get list of new page URLS is emails were found on previous page
	preg_match_all('/href="([^"]+)"/Umis',$pageContent,$results);
	$currentList = $this->cleanListURLs($results[1]);

	$insertURLCount=0;
	// Add the list to the array
	foreach($currentList as $curURL)
	{
		$insert = mysql_query("INSERT INTO 'workingurls' ('urlname') VALUES ('$curURL')");
		if($insert){$insertURLCount++;}
	}

	echo 'URLs found: '.number_format($insertURLCount).PHP_EOL;

	$getURL = mysql_fetch_assoc(mysql_query("SELECT 'urlname' FROM 'workingurls' ORDER BY ASC LIMIT 1"));
	$remove = mysql_query("DELETE FROM 'workingurls' WHERE 'urlname'='$getURL[urlname]' LIMIT 1");

	// Get the new page ready
	$this->startURL = $getURL['urlname'];
	$this->setStartPath();

	// If no more pages, return
	if($this->startURL == NULL){ return;}
	// Clean vars
	unset($results,$pageContent);
	// If more pages, loop again
	$this->startScraping();
}

// Function to clean input URLS
function cleanListURLs($linkList)
{	
	foreach($linkList as $sub => $url)
	{
		// Check if only 1 character - there must exist at least / character
		if(strlen($url) <= 1){unset($linkList[$sub]);}
		// Check for any javascript
		if(eregi('javascript',$url)){unset($linkList[$sub]);}
		// Check for invalid extensions
		str_replace($this->allowedExtensions,'',$url,$count);
		if($count > 0){ unset($linkList[$sub]);}
		// If URL starts with #, ignore
		if(substr($url,0,1) == '#'){unset($linkList[$sub]);}

		// If everything is OK and path is relative, add starting path
		if(substr($url,0,1) == '/' || substr($url,0,1) == '?' || substr($url,0,1) == '='){
			$linkList[$sub] = $this->startPath.$url;
		}
	}
	return $linkList;
}

function drop_table() {
	mysql_query("DROP TABLE emaillist;");
	mysql_query("DROP TABLE finishedurls;");
	mysql_query("DROP TABLE workingurls;");
}

function show_emails() {
	$result = mysql_query("SELECT * FROM emaillist");

	while($row = mysql_fetch_array($result)){
		echo $row['emailadd'];
		echo "<br />";
	}	
}

}
?>

 

 

Thanks in advanced.

 

 

This thread is more than a year old. Please don't revive it unless you have something important to add.

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.