Help!php Posted March 12, 2012 Share Posted March 12, 2012 Html Dom parser gets price and few other information from a different website and adds this to my database. In my database there is URL row which is used to look at their website and find the information for each product. All the new ones should get the URL and save this to the database and add the price but this is not working. It works with the URL that is inside my database but the new ones arent detected but it should. My code is shown below. Please help require_once 'simplehtmldom/simple_html_dom.php'; $_ECHO = FALSE; $outputFileName = "sitemap.txt"; set_time_limit( 0 ); echo "MYSQL: Connecting to DB...<br />"; // Connect to DB $con = mysql_connect( "localhost", "root", "" ); // Select DB mysql_select_db( "db", $con ); // Get exisiting list of PL product URLs $qry = "SELECT * FROM SITE WHERE NOT ISNULL(productid) AND price= '0.00' ORDER BY productid ASC"; $result = mysql_query ( $qry, $con ); $html = new simple_html_dom(); echo "START updating PLSITEMAP DB...<br />"; while ( $resultArray = mysql_fetch_assoc( $result ) ) { //if ( $_ECHO ) echo "{$resultArray[ 'url' ]}<br />"; //if ( $_ECHO ) echo "{$resultArray[ 'rrp' ]}<br />"; //if ( $_ECHO ) echo "{$resultArray[ 'productid' ]}<br />"; //exit(0); $url = $resultArray[ 'url' ]; $fileContents = file_get_contents( $url ); $html->load( $fileContents ); $stop = FALSE; $type = ""; $metas = $html->find( "meta[name=Keywords]" ); if ( isset( $metas[ 0 ] ) ) { //echo $metas[ 0 ]->content; //exit(0); $stop = strstr( strtoupper( $metas[ 0 ]->content ), "EXDEMO" ) || strstr( strtoupper( $metas[ 0 ]->content ), "BOXOPEN" )|| strstr( strtoupper( $metas[ 0 ]->content ), "BOX OPEN" )|| strstr( strtoupper( $metas[ 0 ]->content ), "DISCONTINUED" ); $type = ( strstr( strtoupper( $metas[ 0 ]->content ), "PRINTER" ) == FALSE ? "c" : "p" ); //$stop = $stop ? $stop : ( strstr( strtoupper( $metas[ 0 ]->content ), "PRINTER" ) == FALSE ); } else { if ( $_ECHO ) echo "<meta> tag NOT FOUND<br />"; $stop = TRUE; } if ( !$stop ) { $pid = $html->find( "#ctl00_placeholderMain_lblItem" ); // See if its in the page if ( isset( $pid[ 0 ] ) ) { $pn = strip_tags( $pid[ 0 ] ); //$pn = substr( $pn, strpos( $pn, ";" ) + 1 ); $pn = strpos($pn, ';') !== FALSE ? substr( $pn, strpos( $pn, ";" ) + 1 ) : $pn; $pn = str_replace( "/", "_", $pn ); $pn = trim( $pn ); } else { // Didnt find product id so no point going any further $pn = "0"; } if ( $_ECHO ) echo "$pn<br />"; // Look for the price $rrp = $html->find( "#ctl00_placeholderMain_lbltxtProductPrice" ); if ( isset( $rrp[ 0 ] ) ) { // Tidy it up - remove commas and weird Word chars $price = str_replace("Â", "", strip_tags( $rrp[0] ) ); $price = substr( $price, strpos( $price, ";" ) + 1 ); $price = str_replace(",", "", $price ); $price = trim( $price ); } else { $price = "0.00"; } if ( $_ECHO ) echo "$price<br />"; $qry = "UPDATE SITE SET price='$price', type='$type' WHERE url='$url'"; if ( $_ECHO ) echo "$qry<br />"; mysql_query ( $qry, $con ); } } if ( $_ECHO ) echo "DONE updating PLSITEMAP DB<br />"; exit(0); if($_ECHO) echo "MYSQL: Deleting existing table...<br />"; // Delete any existing table data $qry = "DROP TABLE IF EXISTS SITE"; mysql_query ( $qry, $con ); if($_ECHO) echo "MYSQL: Creating new table...<br />"; // Create new one $qry = "CREATE TABLE SITE ( productid varchar(30), price decimal(6,2), url varchar( 1024 ) )"; // Create the table mysql_query ( $qry, $con ); $numSitemapPages = 350; $html = new simple_html_dom(); if($_ECHO) echo "START: Fetching site map...<br />"; for( $i = 0; $i < $numSitemapPages; $i++ ) { if($_ECHO) echo "Page $i<br />"; $fileContents = file_get_contents( "http://www.website.co.uk/SiteMap-S" . $i . ".aspx" ); $html->load( $fileContents ); $hrefs = $html->find( "a[style=color: Blue; text-decoration: underline;]" ); if ( isset( $hrefs[ 0 ] ) ) { foreach( $hrefs as $href ) { $url = "http://www.website.co.uk/" . $href->href; $qry = "INSERT INTO PLSITEMAP (url) VALUES( '$url' )"; mysql_query( $qry, $con ); if($_ECHO) echo "MYSQL: Added $href->href to DB<br />"; } } else if($_ECHO) echo "NO URLS FOUND ON THIS PAGE!<br />"; } echo "END: Fetching site map...<br />"; exit(0); if($_ECHO) echo "MYSQL: Cleaning DB list...<br />"; // This should filter out most non-printer products $qry = "SELECT * FROM `SITE` where url not like '%coax%' and url not like '%brochure%' and url not like '%cabinet%' and url not like '%kit%' and url not like '%yellow%' and url not like '%magenta%' and url not like '%cyan%' and url not like '%warranty%' and url not like '%service%' and url not like '%zebra%' and url not like '%simm%' and url not like '%dimm%' and url not like '%memory%' and url not like '%ribbon%' and url not like '%cartridge%' and url not like '%paper%' and url not like '%transparency%' and url not like '%hard-disk%' and url not like '%year%' and url not like '%-Sheet-%' and url not like '%Fuser%' and url not like '%Imaging%' and url not like '%print-head%' and url not like '%Duplex-Unit%' and url not like '%black-image%' and url not like '%unit%' and url not like '%transfer-%' and url not like '%maintenance%' and url not like '%spindle%' and url not like '%mailbox%' and url not like '%acessory%' and url not like '%barcode%' and url not like '%toner%' and url not like '%label%' and url not like '%feeder%' and url not like '%server%' and url not like '%tape%' and url not like '%ex-demo%' and url not like '%opened%' and url not like '%creased%' and (url like '%brother%' or url like '%canon%' or url like '%dell%' or url like '%epson%' or url like '%hp%' or url like '%konica%' or url like '%kyocera%' or url like '%lexmark%' or url like '%oki%' or url like '%panasonic%' or url like '%ricoh%' or url like '%samsung%' or url like '%tally%' or url like '%xante%' or url like '%xerox%')"; if($_ECHO) echo "MYSQL: Exporting printer list...<br />"; $result = mysql_query ( $qry, $con ); $fp = fopen( $outputFileName, "w" ); while ( $row = mysql_fetch_assoc( $result ) ) { fputs( $fp, $row["url"] . "\n" ); } fclose( $fp ); if($_ECHO) echo "MYSQL: Done exporting printer list...<br />"; mysql_close( $con ); Quote Link to comment https://forums.phpfreaks.com/topic/258728-html-dom-parser/ Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.