Jump to content

[SOLVED] WTF?! Variable value disapearing?


hellonoko

Recommended Posts

I have a simple site scraper that uses CURL.

 

I was just adding some code to it that compares certain links to the base site address and then completes them if necessary. For example if it scans:

 

index.php it makes that link into http://www.site.com/login.php

 

My problem is that $site_url (line 3) at the very top of the page echos correctly.

 

But then when I get down to line 91 where I have put in a echo to test it. Nothing. Empty.

 

These are the only two times it is called. Where is it going? I even tried adding a echo at the very bottom end of my code to make sure it wasn't just me using it in the wrong place or something.

 

Any ideas? Thanks.

 

<?php

echo $site_url = 'http://www.empreintes-digitales.fr<br><br>';
$target_url = 'http://www.empreintes-digitales.fr/index.php?post=794';

//$target_url = 'http://redthreat.wordpress.com/';
//$target_url= 'http://www.kissatlanta.com/blog/';
//$target_url= 'http://www.empreintes-digitales.fr/';

$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';

crawl_page( $target_url, $userAgent);

function crawl_page( $target_url, $userAgent)
{
	$ch = curl_init();

	curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
	curl_setopt($ch, CURLOPT_URL,$target_url);
	curl_setopt($ch, CURLOPT_FAILONERROR, true);
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
	curl_setopt($ch, CURLOPT_AUTOREFERER, true);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
	curl_setopt($ch, CURLOPT_TIMEOUT, 10);

	$html = curl_exec($ch);

	if (!$html) 
	{
		echo "<br />cURL error number:" .curl_errno($ch);
		echo "<br />cURL error:" . curl_error($ch);
		exit;
	}

	//
	// load scrapped data into the DOM
	//

	$dom = new DOMDocument();
	@$dom->loadHTML($html);

	//
	// get only LINKS from the DOM with XPath
	//

	$xpath = new DOMXPath($dom);
	$hrefs = $xpath->evaluate("/html/body//a");

	//
	// go through all the links and store to db or whatever
	//
	for ($i = 0; $i < $hrefs->length; $i++) 
	{
		$href = $hrefs->item($i);
		$url = $href->getAttribute('href');

		$links_1[$link] = $url;

		echo $url;
		echo '<br>';
	}



	for ($i = 0; $i < $hrefs->length; $i++) 
	{
		$href = $hrefs->item($i);
		$url = $href->getAttribute('href');

		$find = ".mp3";
		$pos = strpos($url, $find);

		if ($pos === false) 
		{
   			
		} 
		else 
		{
   			// function to store to db
			//storeLink($url,$target_url);
			echo 'File: ';
			echo $url;
			echo '<br>';

			$last_slash = strripos( $url ,"/");

			$clean_file_name = substr( $url , $last_slash + 1 , strlen($url) );

			//fixes the url if it does not have a FULL address

			echo $site_url;

			if ( strstr( $url , $base_url) !=  TRUE )
			{
				echo '<b>BROKEN URL</b><br>';
				echo $base_url;
				$url = $base_url . $url;
				echo 'FIXED URL: '.$url.'<BR>';

			}
			exit();

			echo 'From: ';
			echo $target_url;
			echo '<br>';

			//directory to copy to (must be CHMOD to 777)
			$copydir = "/home2/sharingi/public_html/scrape/scraped/";

			$data = file_get_contents($url);
			$file = fopen($copydir . $clean_file_name, "w+");
			fputs($file, $data);
			fclose($file);

			//$savefile="tempimg/".time().".jpg";


			//$ch = curl_init ($copydir);
			//$fp = fopen ($copydir . $clean_file_name, "w+");
			//curl_setopt ($ch, CURLOPT_FILE, $fp);
			//curl_setopt ($ch, CURLOPT_HEADER, 0);
			//curl_exec ($ch);
			//curl_close ($ch);
			//fclose ($fp);

			//echo "Coppied!";
			//echo "<br><br>";
		}	
	}

}

?>

 

 

Link to comment
https://forums.phpfreaks.com/topic/149937-solved-wtf-variable-value-disapearing/
Share on other sites

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.