Jump to content

guymclarenza

Members
  • Posts

    64
  • Joined

  • Last visited

Community Answers

  1. guymclarenza's post in still trying to learn was marked as the answer   
    I worked out where to do the var dumps,  I think I now understand what this script is doing. 
     
  2. guymclarenza's post in This worked till I added some code, Now it does not, was marked as the answer   
    <?php include("classes/DomDocumentParser.php"); //this could be the problem it appears in the error file $alreadyCrawled() = array(); $crawling = array(); $startUrl = "https://imagimedia.co.za"; //Get variable from input $hnam = str_replace("http://", "", "$startUrl"); $hnam = str_replace("https://", "", "$hnam"); $hnam = str_replace("www.", "", "$hnam"); $hnam = substr($hnam, 0, -6); function createLink($src, $url) { $scheme = parse_url($url)["scheme"]; $host = parse_url($url)["host"]; if(substr($src, 0, 2) == "//") { $src = $scheme.":".$src; } else if(substr($src, 0, 1) == "/") { $src = $scheme."://".$host.$src; } else if(substr($src, 0, 2) == "./") { $src = $scheme."://".$host.dirname(parse_url($url)["path"]).substr($src, 1); } else if(substr($src, 0, 3) == "../") { $src = $scheme."://".$host."/".substr($src, 3); } else if (substr($src, 0, 4) != "http") { $src = $scheme."://".$host."/".$src; } return $src; } function followLinks($url) { global $hnam; global $alreadyCrawled; global $crawling; $parser = new DomDocumentParser($url); $linkList = $parser->getLinks(); foreach($linkList as $link) { $href = $link->getAttribute("href"); if(strpos($href, "#") !== false) { continue; } else if(substr($href, 0, 11) == "javascript:") { continue; } $href = createLink($href, $url); if(strpos($href, "$hnam") == false) { continue; } //this could be the problem if(!in_array($href, $alreadyCrawled)) { $alreadyCrawled[] = $href; $crawling[] = $href; //insert $href } echo $href."<br />"; } array_shift($crawling); foreach($crawling as $site) { followLinks($site); } } followLinks($startUrl); ?> include file
    <?php class DomDocumentParser { private $doc; public function __construct($url) { $options = array( 'http'=>array('method'=>"GET", 'header'=>"User-Agent: imagimediaBot/0.1\n") ); $context = stream_context_create($options); $this->doc = new DomDocument(); @$this->doc->loadHTML(file_get_contents($url, false, $context)); } public function getLinks() { return $this->doc->getElementsByTagName("a"); } } ?>
×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.