Help!php Posted March 13, 2012 Share Posted March 13, 2012 This code should read a sitemap and print results on a text file but for some reason it doesnt. any idea why or whats wrong with it require_once 'simplehtmldom/simple_html_dom.php'; $_ECHO = FALSE; $html = new simple_html_dom(); $printerListFileName = "pl_list.txt"; $outputFileName = "pl_new.txt"; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // // cleanBadChars() // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// function cleanBadChars( $plFieldValue ) { $badChars = array('â', '€', '™', 'â', 'Â', '“'); $cleanFieldValue = str_replace( $badChars, "", $plFieldValue ); return( trim( $cleanFieldValue ) ); } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // // cleanDBField() // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// function cleanDBField( $CLPField, $plFieldValueDirty ) { global $_ECHO; if($_ECHO) echo "$CLPField=>$plFieldValueDirty<br />"; $plFieldValueClean = cleanBadChars( $plFieldValueDirty ); switch( $CLPField ) { case "productid": $clpFieldValueClean = preg_replace( "/Item Number: /", "", $plFieldValueClean ); $clpFieldValueClean = str_replace( "/", "_", $clpFieldValueClean ); break; case "name": $clpFieldValueClean = trim( strstr( $plFieldValueClean, " " ) ); $clpFieldValueClean = strstr( $clpFieldValueClean, "Minolta" ) ? strstr( $clpFieldValueClean, " " ) : $clpFieldValueClean; break; case "manufacturer": $clpFieldValueClean = ucfirst( strtolower( substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, " " ) ) ) ); $clpFieldValueClean = strstr( $clpFieldValueClean, "Konica" ) ? $clpFieldValueClean . " Minolta" : $clpFieldValueClean; $clpFieldValueClean = strstr( $clpFieldValueClean, "Hp" ) ? "HP" : $clpFieldValueClean; break; case "format": $clpFieldValueClean = $plFieldValueClean; break; case "platform": $arr = array(); $arr[] = strstr( $plFieldValueClean, "Windows" ) ? "Windows" : ""; $arr[] = strstr( $plFieldValueClean, "Mac" ) ? "Mac" : ""; $clpFieldValueClean = count( $arr ) == 2 ? implode( " / ", $arr ) : implode( "", $arr ); break; case "bwppm": $clpFieldValueClean = strstr( $plFieldValueClean, "i" ) ? substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, "i" ) ) : substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, "p" ) ); $clpFieldValueClean = ( $num = intval( $clpFieldValueClean ) ) > 0 ? $num : "NULL"; break; case "cppm": $clpFieldValueClean = strstr( $plFieldValueClean, "i" ) ? substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, "i" ) ) : substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, "p" ) ); $clpFieldValueClean = ( $num = intval( $clpFieldValueClean ) ) > 0 ? $num : "NULL"; break; case "resolution": $clpFieldValueClean = substr( $plFieldValueClean, 0, strpos( $plFieldValueClean, "dpi" ) ); break; case "ram": $ramValues = explode( " ", $plFieldValueClean ); $clpFieldValueClean = str_replace( array( "MB", "GB", "KB" ), "", $ramValues[ 0 ] ); break; case "maxram": $clpFieldValueClean = $plFieldValueClean; break; case "ethernet": $clpFieldValueClean = !strstr( $plFieldValueClean, "Yes") ? "NULL" : "Yes"; break; case "usb": $clpFieldValueClean = strlen ( $plFieldValueClean ) > 1 ? "Yes" : "NULL"; break; case "firstprint": $clpFieldValueClean = explode( " ", $plFieldValueClean ); $clpFieldValueClean = $clpFieldValueClean[ 0 ]; break; case "parallel": $clpFieldValueClean = strstr( $plFieldValueClean, "Parallel" ) ? "Yes" : "NULL"; break; case "duplex": $clpFieldValueClean = strlen( $plFieldValueClean ) < 2 ? "Manual" : $plFieldValueClean; break; case "printmethod": $clpFieldValueClean = preg_replace( "/ Printer/", "", $plFieldValueClean ); $clpFieldValueClean = preg_replace( "/ Fax/", "", $clpFieldValueClean ); break; case "category": if ( strstr( $plFieldValueClean, "Multifunction" ) ) $clpFieldValueClean = "Multifunction"; elseif ( strstr( $plFieldValueClean, "Laser" ) ) $clpFieldValueClean = strstr( $plFieldValueClean, "Colour" ) ? "Colour Laser" : "Mono Laser"; elseif ( strstr( $plFieldValueClean, "Fax" ) ) $clpFieldValueClean = "Fax"; elseif ( strstr( $plFieldValueClean, "Dot Matrix" ) ) $clpFieldValueClean = "Dot Matrix"; elseif ( strstr( $plFieldValueClean, "Inkjet" ) ) $clpFieldValueClean = "Inkjet"; elseif ( strstr( $plFieldValueClean, "Label" ) ) $clpFieldValueClean = "Label"; elseif ( strstr( $plFieldValueClean, "Scanner" ) ) $clpFieldValueClean = "Scanner"; elseif ( strstr( $plFieldValueClean, "Thermal" ) ) $clpFieldValueClean = "Thermal"; break; case "description": $clpFieldValueClean = $plFieldValueClean; break; case "rrp": $clpFieldValueClean = strstr( $plFieldValueClean, "£" ); $clpFieldValueClean = preg_replace( "/£/", "", $clpFieldValueClean ); break; case "paper": $clpFieldValueClean = $plFieldValueClean; break; case "additional": $clpFieldValueClean = $plFieldValueClean; break; case "offertext": if ( $plFieldValueClean == "" ) $clpFieldValueClean = "New Low price On This Printer"; else { $clpFieldValueClean = str_replace( "£", "pound;", $plFieldValueClean ); $clpFieldValueClean = str_replace( "&", "amp;", $clpFieldValueClean ); $clpFieldValueClean = str_replace( "pound;", "£", $clpFieldValueClean ); $clpFieldValueClean = str_replace( "amp;", "&", $clpFieldValueClean ); $clpFieldValueClean .= " <br /><br />Offer ends"; } break; case "specialhead": if ( $plFieldValueClean == "" ) $clpFieldValueClean = "New Low price On This Printer"; else { $clpFieldValueClean = str_replace( "£", "pound;", $plFieldValueClean); $clpFieldValueClean = str_replace( "&", "amp;", $clpFieldValueClean ); $clpFieldValueClean = str_replace( "pound;", "£", $clpFieldValueClean ); $clpFieldValueClean = str_replace( "amp;", "&", $clpFieldValueClean ); } break; default: break; } $clpFieldValueClean = preg_replace( "/\"/", "", $clpFieldValueClean ); if($_ECHO) echo "$CLPField=>$clpFieldValueClean<br />"; return( trim( $clpFieldValueClean ) ); } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // // fetchPrinterDetails() // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// function fetchPrinterDetails( $printerURL ) { global $html; global $outputFileName; global $_ECHO; $line = ""; $fileContents = file_get_contents( $printerURL ); if($_ECHO) echo "Scraping details started for " . $printerURL . "<br />"; $html->load( $fileContents ); $stop = FALSE; $metas = $html->find( "meta[name=Keywords]" ); if ( isset( $metas[ 0 ] ) ) { $stop = strstr( strtoupper( $metas[ 0 ]->content ), "EXDEMO" ) || strstr( strtoupper( $metas[ 0 ]->content ), "BOXOPEN" )|| strstr( strtoupper( $metas[ 0 ]->content ), "BOX OPEN" )|| strstr( strtoupper( $metas[ 0 ]->content ), "DISCONTINUED" ); $stop = $stop ? $stop : !( strstr( strtoupper( $metas[ 0 ]->content ), "PRINTER" ) || strstr( strtoupper( $metas[ 0 ]->content ), "FAX" ) ); $stop = $stop ? $stop : strstr( strtoupper( $metas[ 0 ]->content ), "ACCESSORIES" ); } else if ( $_ECHO ) echo "<meta> tag NOT FOUND<br />"; // Dont bother with non-current printers if ( $stop ) { if ( $_ECHO ) echo "Ignoring $printerURL<br />"; return; } $DBFields = array ( "productid" => "#ctl00_placeholderMain_lblItem", "name" => "#ctl00_placeholderMain_lblProductHead", "manufacturer" => "h1", "format" => "@Product Group Output", "platform" => "@Operating Systems Supported", "height" => "NULL", "width" => "NULL", "depth" => "NULL", "weight" => "NULL", "bwppm" => "@Speed Monochrome", "cppm" => "@Speed Colour", "resolution" => "Printer Resolution@Printer Enhanced Resolution", "ram" => "@Memory (Maximum)", "maxram" => "NULL", "ethernet" => "@Network Ready", "parallel" => "@Interface Type(s)", "usb" => "USB Port@USB Ports", "firstprint" => "First Page@Print First Page", "warmupprint" => "NULL", "duplex" => "@Double Sided Printing", "printmethod" => "@Technology", "relability" => "NULL", "standby" => "NULL", "running" => "NULL", "category" => "h1", "description" => ".productdescriptioncontainer", "rrp" => "#ctl00_placeholderMain_lbltxtProductPrice", "printspeed" => "NULL", "large" => "NULL", "discont" => "NULL", "pdf" => "DEFAULT=1", "paper" => "@Paper Handling Input 1", "multi" => "NULL", "additional" => "@Paper Handling Input 2", "CPppma3" => "NULL", "CPppm" => "NULL", "CPram" => "NULL", "CPmaxram" => "NULL", "CPresolution" => "NULL", "Fmodem" => "NULL", "Fresolution" => "NULL", "Fcompatability" => "NULL", "Fram" => "NULL", "Fmaxram" => "NULL", "SCspeed" => "NULL", "SCresolution" => "NULL", "SCmodes" => "NULL", "specialid" => "DEFAULT=1", "offertext" => "#ctl00_placeholderMain_lblMareketingText", "image" => "NULL", "promo" => "NULL", "metatag" => "NULL", "metadescrip" => "NULL", "pricerunner" => "NULL", "google" => "NULL", "offerdate" => "NULL", "specialtext" => "NULL", "specialhead" => "#ctl00_placeholderMain_lblMareketingText" ); foreach( $DBFields as $CLPField => $PLField ) { echo $PLField; if ( $PLField == "NULL" ) $line .= '"' . trim( $PLField ) . '",'; elseif ( strstr( $PLField, "DEFAULT=" ) ) $line .= '"' . str_replace( "DEFAULT=", "", $PLField ) . '",'; else { // This is a Spec field so we will need to work out which one if ( strstr( $PLField, "@" ) != FALSE ) { // Get all the spec titles $specTitles = $html->find( ".specleftitem" ); // Look for the field title if ( isset( $specTitles[ 0 ] ) ) { $clpFieldValue = "NULL"; $possFields = explode( "@", $PLField ); // Loop thru all spec items foreach( $specTitles as $specTitle ) { // Check all poss fields for a match foreach( $possFields as $possField ) { if ( trim( $specTitle->plaintext ) == $possField ) { $clpFieldValue = $specTitle->next_sibling()->plaintext; $line .= '"' . cleanDBField( $CLPField, $clpFieldValue ) . '",'; break; } } if ( $clpFieldValue != "NULL" ) break; } if ( $clpFieldValue == "NULL" ) $line .= '"' . $clpFieldValue . '",'; } } else { $plFieldValue = $html->find( $PLField ); // Found the field in the PL page ? if ( isset( $plFieldValue[ 0 ] ) ) { $clpFieldValue = $plFieldValue[ 0 ]->plaintext; $line .= '"' . cleanDBField( $CLPField, $clpFieldValue ) . '",'; } else { $line .= '"NULL",'; } } } } $line = preg_replace( "/,$/", "\n", trim( $line ) ); $fp = fopen( $outputFileName, "a" ); fputs( $fp, $line ); fclose( $fp ); //echo "stop:". $stop; if($_ECHO) echo "Scraping details completed for " . $printerURL . "<br />"; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // // scrapePrinters() // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// function scrapePrinters() { global $printerListFileName; global $outputFileName; global $_ECHO; set_time_limit( 0 ); if ( file_exists( $printerListFileName ) == FALSE ) { if($_ECHO) echo "Cannnot find $printerListFileName so quitting...<br />"; exit(0); } if($_ECHO) echo "Deleting existing $outputFileName file...<br />"; if ( file_exists( $outputFileName ) == TRUE ) unlink( $outputFileName ); // List of PL printers taken from sitemap page of PL website $fp = fopen( $printerListFileName, "r" ); if($_ECHO) echo "Fetching printer details started...<br />"; while ( $printerURL = fgets( $fp ) ) fetchPrinterDetails( trim( $printerURL ) ); if($_ECHO) echo "Fetching printer details completed...<br />"; fclose( $fp ); } scrapePrinters(); function test() { global $html; $files = array( "OKI-C810n-Box-Opened--P110692.aspx", "HP-1320-P4453.aspx", "Waste-Toner-Cleaner-Pack-12-000-Pages--P48796.aspx", "Lexmark-C543dn-P6117.aspx", "Brother-FAX-T104-P11767.aspx", "Black-Toner-3500-pages--P110364.aspx", "Lexmark-X544dn-P9732.aspx", "EB-05-IEEE-1394-Expansion-Board-P30721.aspx", "Kodak-Photo-Paper-Gloss-A4-210-x-297mm-20-Sheets-165gsm--P13998.aspx", "Xerox-7600-P13571.aspx" ); echo "STARTING...<br />"; foreach( $files as $file ) { fetchPrinterDetails( trim( $file ) ); } echo "DONE...<br />"; } Quote Link to comment https://forums.phpfreaks.com/topic/258814-whats-wrong-with-this-code-no-result/ Share on other sites More sharing options...
trq Posted March 13, 2012 Share Posted March 13, 2012 You might want to at least narrow your problem down to something more specific. Dumping a bunch of code with a "this doesn't work" won't generally get you very far. Quote Link to comment https://forums.phpfreaks.com/topic/258814-whats-wrong-with-this-code-no-result/#findComment-1326756 Share on other sites More sharing options...
Help!php Posted March 13, 2012 Author Share Posted March 13, 2012 My code should read a sitemap and get all the information which arent already on the database. For now it doesnt do that. so how would i go about finsing the issue on this code. Hope that makes sense Quote Link to comment https://forums.phpfreaks.com/topic/258814-whats-wrong-with-this-code-no-result/#findComment-1326759 Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.