Jump to content

mysql_fetch_assoc(): supplied argument is not a valid MySQL result?


ask9

Recommended Posts

The error is this,

 

PHP Warning:  mysql_fetch_assoc(): supplied argument is not a valid MySQL result resource in /home/coder9/public_html/spider/email.scraper.php on line 98

 

The codes are these,

 

<?php
/*
Written by: Aziz S. Hussain
Email: [email protected]
Website: www.azizsaleh.com
Produced under GPL License
*/

class scraper
{
// URL that stores first URL to start
var $startURL;

// List of allowed page extensions
var $allowedExtensions = array('.css','.xml','.rss','.ico','.js','.gif','.jpg','.jpeg','.png','.bmp','.wmv'
	,'.avi','.mp3','.flash','.swf','.css');

// Which URL to scrape
var $useURL;

// Start path, for links that are relative
var $startPath;

// Set start path
function setStartPath($path = NULL){
	if($path != NULL)
	{
		$this->startPath = $path;
	} else {
		$temp = explode('/',$this->startURL);
		$this->startPath = $temp[0].'//'.$temp[2];
	}
}

// Add the start URL
function startURL($theURL){
	// Set start URL
	$this->startURL = $theURL;
}

// Function to get URL contents
function getContents($url)
{
	$ch = curl_init(); // initialize curl handle
	curl_setopt($ch, CURLOPT_HEADER, 0);
	curl_setopt($ch, CURLOPT_VERBOSE, 0);
	curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible;)");
	curl_setopt($ch, CURLOPT_AUTOREFERER, false);
	curl_setopt($ch, CURLOPT_CONNECTTIMEOUT,7);
	curl_setopt($ch, CURLOPT_REFERER, 'http://'.$this->useURL);
	curl_setopt($ch, CURLOPT_URL,$url); // set url to post to
	curl_setopt($ch, CURLOPT_FAILONERROR, 1);
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);// allow redirects
	curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); // return into a variable
	curl_setopt($ch, CURLOPT_TIMEOUT, 50); // times out after 50s
	curl_setopt($ch, CURLOPT_POST, 0); // set POST method
	$buffer = curl_exec($ch); // run the whole process
	curl_close($ch); 
	return $buffer;
}

// Actually do the URLS
function startScraping()
{
	// Get page content
	$pageContent = $this->getContents($this->startURL);
	echo 'Scraping URL: '.$this->startURL.PHP_EOL;

	// Get list of all emails on page
	preg_match_all('/([\w+\.]*\w+@[\w+\.]*\w+[\w+\-\w+]*\.\w+)/is',$pageContent,$results);
	// Add the email to the email list array
	$insertCount=0;
	foreach($results[1] as $curEmail)
	{
		$insert = mysql_query("INSERT INTO 'emaillist' ('emailadd') VALUES ('$curEmail')");
		if($insert){$insertCount++;}
	}

	echo 'Emails found: '.number_format($insertCount).PHP_EOL;

	// Mark the page done
	$insert = mysql_query("INSERT INTO 'finishedurls' ('urlname') VALUES ('".$this->startURL."')");

	// Get list of new page URLS is emails were found on previous page
	preg_match_all('/href="([^"]+)"/Umis',$pageContent,$results);
	$currentList = $this->cleanListURLs($results[1]);

	$insertURLCount=0;
	// Add the list to the array
	foreach($currentList as $curURL)
	{
		$insert = mysql_query("INSERT INTO 'workingurls' ('urlname') VALUES ('$curURL')");
		if($insert){$insertURLCount++;}
	}

	echo 'URLs found: '.number_format($insertURLCount).PHP_EOL;

	$getURL = mysql_fetch_assoc(mysql_query("SELECT 'urlname' FROM 'workingurls' ORDER BY ASC LIMIT 1"));
	$remove = mysql_query("DELETE FROM 'workingurls' WHERE 'urlname'='$getURL[urlname]' LIMIT 1");

	// Get the new page ready
	$this->startURL = $getURL['urlname'];
	$this->setStartPath();

	// If no more pages, return
	if($this->startURL == NULL){ return;}
	// Clean vars
	unset($results,$pageContent);
	// If more pages, loop again
	$this->startScraping();
}

// Function to clean input URLS
function cleanListURLs($linkList)
{	
	foreach($linkList as $sub => $url)
	{
		// Check if only 1 character - there must exist at least / character
		if(strlen($url) <= 1){unset($linkList[$sub]);}
		// Check for any javascript
		if(eregi('javascript',$url)){unset($linkList[$sub]);}
		// Check for invalid extensions
		str_replace($this->allowedExtensions,'',$url,$count);
		if($count > 0){ unset($linkList[$sub]);}
		// If URL starts with #, ignore
		if(substr($url,0,1) == '#'){unset($linkList[$sub]);}

		// If everything is OK and path is relative, add starting path
		if(substr($url,0,1) == '/' || substr($url,0,1) == '?' || substr($url,0,1) == '='){
			$linkList[$sub] = $this->startPath.$url;
		}
	}
	return $linkList;
}

function drop_table() {
	mysql_query("DROP TABLE emaillist;");
	mysql_query("DROP TABLE finishedurls;");
	mysql_query("DROP TABLE workingurls;");
}

function show_emails() {
	$result = mysql_query("SELECT * FROM emaillist");

	while($row = mysql_fetch_array($result)){
		echo $row['emailadd'];
		echo "<br />";
	}	
}

}
?>

 

 

Thanks in advanced.

 

 

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.