Jump to content

whats wrong with this code. No result


Help!php

Recommended Posts

This code should read a sitemap and print results on a text file but for some reason it doesnt. any idea why or whats wrong with it

 

require_once 'simplehtmldom/simple_html_dom.php';

$_ECHO = FALSE;
$html = new simple_html_dom();
$printerListFileName = "pl_list.txt";
$outputFileName = "pl_new.txt";

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
//		cleanBadChars()
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
function cleanBadChars( $plFieldValue )
{
	$badChars = array('â', '€', '™', 'â', 'Â', '“');

	$cleanFieldValue = str_replace( $badChars, "", $plFieldValue );

	return( trim( $cleanFieldValue ) );
}

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
//		cleanDBField()
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
function cleanDBField( $CLPField, $plFieldValueDirty )
{
	global $_ECHO;

	if($_ECHO) echo "$CLPField=>$plFieldValueDirty<br />";

	$plFieldValueClean = cleanBadChars( $plFieldValueDirty );

	switch( $CLPField )
	{
		case "productid":
			$clpFieldValueClean = preg_replace( "/Item Number:  /", "", $plFieldValueClean );
			$clpFieldValueClean = str_replace( "/", "_", $clpFieldValueClean );
		break;

		case "name":
			$clpFieldValueClean = trim( strstr( $plFieldValueClean, " " ) );
			$clpFieldValueClean = strstr( $clpFieldValueClean, "Minolta" ) ? strstr( $clpFieldValueClean, " " ) : $clpFieldValueClean;
		break;

		case "manufacturer":
			$clpFieldValueClean = ucfirst( strtolower( substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, " " ) ) ) );
			$clpFieldValueClean = strstr( $clpFieldValueClean, "Konica" ) ? $clpFieldValueClean . " Minolta" : $clpFieldValueClean;
			$clpFieldValueClean = strstr( $clpFieldValueClean, "Hp" ) ? "HP" : $clpFieldValueClean;
		break;

		case "format":
			$clpFieldValueClean = $plFieldValueClean;
		break;

		case "platform":
			$arr = array();
			$arr[] = strstr( $plFieldValueClean, "Windows" ) ? "Windows" : "";
			$arr[] = strstr( $plFieldValueClean, "Mac" ) ? "Mac" : "";
			$clpFieldValueClean = count( $arr ) == 2 ? implode( " / ", $arr ) : implode( "", $arr );
		break;

		case "bwppm":
			$clpFieldValueClean = strstr( $plFieldValueClean, "i" ) ? substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, "i" ) ) : substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, "p" ) );
			$clpFieldValueClean = ( $num = intval( $clpFieldValueClean ) ) > 0 ? $num : "NULL";
		break;

		case "cppm":
			$clpFieldValueClean = strstr( $plFieldValueClean, "i" ) ? substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, "i" ) ) : substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, "p" ) );
			$clpFieldValueClean = ( $num = intval( $clpFieldValueClean ) ) > 0 ? $num : "NULL";
		break;

		case "resolution":
			$clpFieldValueClean = substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, "dpi"  ) );
		break;

		case "ram":
			$ramValues = explode( " ", $plFieldValueClean );
			$clpFieldValueClean = str_replace( array( "MB", "GB", "KB" ), "", $ramValues[ 0 ] );
		break;

		case "maxram":
			$clpFieldValueClean = $plFieldValueClean;
		break;

		case "ethernet":
			$clpFieldValueClean = !strstr( $plFieldValueClean, "Yes") ? "NULL" : "Yes";
		break;

		case "usb":
			$clpFieldValueClean = strlen ( $plFieldValueClean ) > 1 ? "Yes" : "NULL";
		break;

		case "firstprint":
			$clpFieldValueClean = explode( " ",  $plFieldValueClean );
			$clpFieldValueClean = $clpFieldValueClean[ 0 ];
		break;

		case "parallel":
			$clpFieldValueClean = strstr( $plFieldValueClean, "Parallel" ) ? "Yes" : "NULL";
		break;

		case "duplex":
			$clpFieldValueClean = strlen( $plFieldValueClean ) < 2 ? "Manual" : $plFieldValueClean;
		break;

		case "printmethod":
			$clpFieldValueClean = preg_replace( "/ Printer/", "", $plFieldValueClean );
			$clpFieldValueClean = preg_replace( "/ Fax/", "", $clpFieldValueClean );
		break;

		case "category":
			if ( strstr( $plFieldValueClean, "Multifunction" ) )
				$clpFieldValueClean = "Multifunction";
			elseif ( strstr( $plFieldValueClean, "Laser" ) )
				$clpFieldValueClean = strstr( $plFieldValueClean, "Colour" ) ? "Colour Laser" : "Mono Laser";
			elseif ( strstr( $plFieldValueClean, "Fax" ) )
				$clpFieldValueClean = "Fax";
			elseif ( strstr( $plFieldValueClean, "Dot Matrix" ) )
				$clpFieldValueClean = "Dot Matrix";
			elseif ( strstr( $plFieldValueClean, "Inkjet" ) )
				$clpFieldValueClean = "Inkjet";
			elseif ( strstr( $plFieldValueClean, "Label" ) )
				$clpFieldValueClean = "Label";
			elseif ( strstr( $plFieldValueClean, "Scanner" ) )
				$clpFieldValueClean = "Scanner";
			elseif ( strstr( $plFieldValueClean, "Thermal" ) )
				$clpFieldValueClean = "Thermal";
		break;

		case "description":
			$clpFieldValueClean = $plFieldValueClean;
		break;

		case "rrp":
			$clpFieldValueClean = strstr( $plFieldValueClean, "£" );
			$clpFieldValueClean = preg_replace( "/£/", "", $clpFieldValueClean );
		break;

		case "paper":
			$clpFieldValueClean = $plFieldValueClean;
		break;

		case "additional":
			$clpFieldValueClean = $plFieldValueClean;
		break;

		case "offertext":
			if ( $plFieldValueClean == "" )
				$clpFieldValueClean = "New Low price On This Printer";
			else
			{					
				$clpFieldValueClean = str_replace( "£", "pound;", $plFieldValueClean );
				$clpFieldValueClean = str_replace( "&", "amp;", $clpFieldValueClean );			
				$clpFieldValueClean = str_replace( "pound;", "£", $clpFieldValueClean );
				$clpFieldValueClean = str_replace( "amp;", "&", $clpFieldValueClean );			

				$clpFieldValueClean .= " <br /><br />Offer ends";
			}
		break;

		case "specialhead":
			if ( $plFieldValueClean == "" )
				$clpFieldValueClean = "New Low price On This Printer";
			else
			{					
				$clpFieldValueClean = str_replace( "£", "pound;", $plFieldValueClean);
				$clpFieldValueClean = str_replace( "&", "amp;", $clpFieldValueClean );			
				$clpFieldValueClean = str_replace( "pound;", "£", $clpFieldValueClean );
				$clpFieldValueClean = str_replace( "amp;", "&", $clpFieldValueClean );
			}
		break;

		default:
		break;
	}

	$clpFieldValueClean = preg_replace( "/\"/", "", $clpFieldValueClean );

	if($_ECHO) echo "$CLPField=>$clpFieldValueClean<br />";

	return( trim( $clpFieldValueClean ) );	
}

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
//		fetchPrinterDetails()
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
function fetchPrinterDetails( $printerURL )
{
	global $html;
	global $outputFileName;
	global $_ECHO;

	$line = "";
	$fileContents = file_get_contents( $printerURL );

	if($_ECHO) echo "Scraping details started for " . $printerURL . "<br />";

	$html->load( $fileContents );

	$stop = FALSE;
	$metas = $html->find( "meta[name=Keywords]" );

	if ( isset( $metas[ 0 ] ) )
	{
		$stop = 	strstr( strtoupper( $metas[ 0 ]->content ), "EXDEMO" ) || 
					strstr( strtoupper( $metas[ 0 ]->content ), "BOXOPEN" )|| 
					strstr( strtoupper( $metas[ 0 ]->content ), "BOX OPEN" )|| 
					strstr( strtoupper( $metas[ 0 ]->content ), "DISCONTINUED" );

		$stop = $stop ? $stop : !( strstr( strtoupper( $metas[ 0 ]->content ), "PRINTER" ) || strstr( strtoupper( $metas[ 0 ]->content ), "FAX" ) );
		$stop = $stop ? $stop :    strstr( strtoupper( $metas[ 0 ]->content ), "ACCESSORIES" );
	}
	else
		if ( $_ECHO ) echo "<meta> tag NOT FOUND<br />";

	// Dont bother with non-current  printers 
	if ( $stop )
	{
		if ( $_ECHO ) echo "Ignoring $printerURL<br />";
		return;
	}

	$DBFields = array 
	( 
		"productid" => "#ctl00_placeholderMain_lblItem", 
		"name" => "#ctl00_placeholderMain_lblProductHead", 
		"manufacturer" => "h1", 
		"format" => "@Product Group Output",
		"platform" => "@Operating Systems Supported", 
		"height" => "NULL", 
		"width" => "NULL", 
		"depth" => "NULL", 
		"weight" => "NULL", 
		"bwppm" => "@Speed Monochrome", 
		"cppm" => "@Speed Colour",
		"resolution" => "Printer Resolution@Printer Enhanced Resolution",
		"ram" => "@Memory (Maximum)",
		"maxram" => "NULL",
		"ethernet" => "@Network Ready",
		"parallel" => "@Interface Type(s)", 
		"usb" => "USB Port@USB Ports", 
		"firstprint" => "First Page@Print First Page", 
		"warmupprint" => "NULL", 
		"duplex" => "@Double Sided Printing", 
		"printmethod" => "@Technology", 
		"relability" => "NULL", 
		"standby" => "NULL", 
		"running" => "NULL", 
		"category" => "h1", 
		"description" => ".productdescriptioncontainer", 
		"rrp" => "#ctl00_placeholderMain_lbltxtProductPrice", 
		"printspeed" => "NULL", 
		"large" => "NULL", 
		"discont" => "NULL", 
		"pdf" => "DEFAULT=1", 
		"paper" => "@Paper Handling Input 1", 
		"multi" => "NULL", 
		"additional" => "@Paper Handling Input 2", 
		"CPppma3" => "NULL", 
		"CPppm" => "NULL", 
		"CPram" => "NULL", 
		"CPmaxram" => "NULL", 
		"CPresolution" => "NULL", 
		"Fmodem" => "NULL", 
		"Fresolution" => "NULL", 
		"Fcompatability" => "NULL", 
		"Fram" => "NULL", 
		"Fmaxram" => "NULL", 
		"SCspeed" => "NULL", 
		"SCresolution" => "NULL", 
		"SCmodes" => "NULL", 
		"specialid" => "DEFAULT=1", 
		"offertext" => "#ctl00_placeholderMain_lblMareketingText", 
		"image" => "NULL", 
		"promo" => "NULL", 
		"metatag" => "NULL", 
		"metadescrip" => "NULL", 
		"pricerunner" => "NULL", 
		"google" => "NULL", 
		"offerdate" => "NULL", 
		"specialtext" => "NULL", 
		"specialhead" => "#ctl00_placeholderMain_lblMareketingText" 
	);

	foreach( $DBFields as $CLPField => $PLField )
	{ echo $PLField;
		if ( $PLField == "NULL" )
			$line .= '"' . trim( $PLField ) . '",';
		elseif ( strstr( $PLField, "DEFAULT=" ) )
			$line .= '"' . str_replace( "DEFAULT=", "", $PLField ) . '",';
		else
		{
			// This is a Spec field so we will need to work out which one
			if ( strstr( $PLField, "@" ) != FALSE )
			{
				// Get all the spec titles
				$specTitles = $html->find( ".specleftitem" );

				// Look for the field title
				if ( isset( $specTitles[ 0 ] ) )
				{
					$clpFieldValue = "NULL";
					$possFields = explode( "@", $PLField );

					// Loop thru all spec items
					foreach( $specTitles as $specTitle )
					{
						// Check all poss fields for a match
						foreach( $possFields as $possField )
						{
							if ( trim( $specTitle->plaintext ) == $possField )
							{
								$clpFieldValue = $specTitle->next_sibling()->plaintext;
								$line .= '"' . cleanDBField( $CLPField, $clpFieldValue ) . '",';
								break;
							}
						}

						if ( $clpFieldValue != "NULL" )
							break;
					}

					if ( $clpFieldValue == "NULL" )
						$line .= '"' . $clpFieldValue . '",';
				}
			}
			else
			{
				$plFieldValue = $html->find( $PLField );

				// Found the field in the PL page ?
				if ( isset( $plFieldValue[ 0 ] ) )
				{
					$clpFieldValue = $plFieldValue[ 0 ]->plaintext;
					$line .= '"' . cleanDBField( $CLPField, $clpFieldValue ) . '",';
				}
				else
				{
					$line .= '"NULL",';
				}
			}
		}
	}

	$line = preg_replace( "/,$/", "\n", trim( $line ) );

	$fp = fopen( $outputFileName, "a" );
	fputs( $fp, $line );
	fclose( $fp );
	//echo "stop:". $stop;
	if($_ECHO) echo "Scraping details completed for " . $printerURL . "<br />";
}

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
//		scrapePrinters()
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
function scrapePrinters()
{
	global $printerListFileName;
	global $outputFileName;
	global $_ECHO;

	set_time_limit( 0 );

	if ( file_exists( $printerListFileName ) == FALSE )
	{
		if($_ECHO) echo "Cannnot find $printerListFileName so quitting...<br />";
		exit(0);
	}

	if($_ECHO) echo "Deleting existing $outputFileName file...<br />";

	if ( file_exists( $outputFileName ) == TRUE )
		unlink( $outputFileName );

	// List of PL printers taken from sitemap page of PL website
	$fp = fopen( $printerListFileName, "r" );

	if($_ECHO) echo "Fetching printer details started...<br />";

	while ( $printerURL = fgets( $fp ) )	
		fetchPrinterDetails( trim( $printerURL ) );

	if($_ECHO) echo "Fetching printer details completed...<br />";

	fclose( $fp );
}

scrapePrinters();

function test()
{
	global $html;

	$files = array( "OKI-C810n-Box-Opened--P110692.aspx", "HP-1320-P4453.aspx", "Waste-Toner-Cleaner-Pack-12-000-Pages--P48796.aspx", "Lexmark-C543dn-P6117.aspx", "Brother-FAX-T104-P11767.aspx", "Black-Toner-3500-pages--P110364.aspx", "Lexmark-X544dn-P9732.aspx", "EB-05-IEEE-1394-Expansion-Board-P30721.aspx", "Kodak-Photo-Paper-Gloss-A4-210-x-297mm-20-Sheets-165gsm--P13998.aspx", "Xerox-7600-P13571.aspx" );

	echo "STARTING...<br />";

	foreach( $files as $file )
	{
		fetchPrinterDetails( trim( $file ) );
	}

	echo "DONE...<br />";
}

Link to comment
https://forums.phpfreaks.com/topic/258814-whats-wrong-with-this-code-no-result/
Share on other sites

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.