anon Posted December 23, 2007 Share Posted December 23, 2007 Hi, here's the code for a web crawler which I found at www.spiderbyte.org # <?php # # // Simple Spider Build v1.0 Beta # // Coded By Black Widow ¤ SpiderByte.Org ¤ # # // Start The Spider # $ch = curl_init(); # curl_setopt($ch, CURLOPT_URL,"http://www.StartingSite.com"); # curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); # $result=curl_exec ($ch); # curl_close ($ch); # # // Search The Results From The Starting Site # if( $result ) # { # preg_match_all( ‘/<a href="(http:\/\/[^0-9].+?)"/’, $result, $output, PREG_SET_ORDER ); # foreach( $output as $item ) # { # # // Write URLs It Found To The Database # $db=mysql_connect ("localhost", "USERNAME", "PASSWORD") or die # (‘I cannot connect to the database because: ‘ . mysql_error()); # mysql_select_db ("DB_NAME", $db); # $table = "urls"; # $sqlquery = "INSERT INTO $table # VALUES(’$id’,'$item[1]’)"; # $results = mysql_query($sqlquery); # # // Spider The New URLs For More URLs # $db=mysql_connect ("localhost", "USERNAME", "PASSWORD") or die # (‘I cannot connect to the database because: ‘ . mysql_error()); # mysql_select_db ("DB_NAME", $db); # $spider = mysql_query("SELECT * FROM $table"); # # while($row = mysql_fetch_array( $spider )) { # # foreach( $row as $spurl){ # # $ch = curl_init(); # curl_setopt($ch, CURLOPT_URL,"$spurl"); # curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); # $result2=curl_exec ($ch); # curl_close ($ch); # # if( $result2 ) # { # preg_match_all( ‘/"(http:\/\/[^0-9].+?)"/’, $result2, $output2, PREG_SET_ORDER ); # foreach( $output2 as $item2 ){ # # // Write All New URLs Found Into The Database # $db=mysql_connect ("localhost", "USERNAME", "PASSWORD") or die # (‘I cannot connect to the database because: ‘ . mysql_error()); # mysql_select_db ("DB_NAME", $db); # $table = "urls"; # $sqlquery = "INSERT INTO $table # VALUES(’$id’,'$item2[1]’)"; # $results = mysql_query($sqlquery); # $see = mysql_query("SELECT urls FROM $table"); # # while($row2 = mysql_fetch_array( $see )) { # # echo $row2["urls"]; # echo ("<br />"); # # }}}}}}} # # ?> I want to edit this code to crawl URL's I specified in a Database. How would i do that? Link to comment https://forums.phpfreaks.com/topic/82954-help-editing-web-crawler-code/ Share on other sites More sharing options...
Recommended Posts
Archived
This topic is now archived and is closed to further replies.