guymclarenza's post in This worked till I added some code, Now it does not, was marked as the answer
December 19, 2020
<?php
include("classes/DomDocumentParser.php");
//this could be the problem it appears in the error file
$alreadyCrawled() = array();
$crawling = array();
$startUrl = "https://imagimedia.co.za"; //Get variable from input
$hnam = str_replace("http://", "", "$startUrl");
$hnam = str_replace("https://", "", "$hnam");
$hnam = str_replace("www.", "", "$hnam");
$hnam = substr($hnam, 0, -6);
function createLink($src, $url) {
$scheme = parse_url($url)["scheme"];
$host = parse_url($url)["host"];
if(substr($src, 0, 2) == "//") {
$src = $scheme.":".$src;
}
else if(substr($src, 0, 1) == "/") {
$src = $scheme."://".$host.$src;
}
else if(substr($src, 0, 2) == "./") {
$src = $scheme."://".$host.dirname(parse_url($url)["path"]).substr($src, 1);
}
else if(substr($src, 0, 3) == "../") {
$src = $scheme."://".$host."/".substr($src, 3);
}
else if (substr($src, 0, 4) != "http") {
$src = $scheme."://".$host."/".$src;
}
return $src;
}
function followLinks($url) {
global $hnam;
global $alreadyCrawled;
global $crawling;
$parser = new DomDocumentParser($url);
$linkList = $parser->getLinks();
foreach($linkList as $link) {
$href = $link->getAttribute("href");
if(strpos($href, "#") !== false) {
continue;
}
else if(substr($href, 0, 11) == "javascript:") {
continue;
}
$href = createLink($href, $url);
if(strpos($href, "$hnam") == false) {
continue;
}
//this could be the problem
if(!in_array($href, $alreadyCrawled)) {
$alreadyCrawled[] = $href;
$crawling[] = $href;
//insert $href
}
echo $href."<br />";
}
array_shift($crawling);
foreach($crawling as $site) {
followLinks($site);
}
}
followLinks($startUrl);
?>
include file
<?php
class DomDocumentParser {
private $doc;
public function __construct($url) {
$options = array(
'http'=>array('method'=>"GET", 'header'=>"User-Agent: imagimediaBot/0.1\n")
);
$context = stream_context_create($options);
$this->doc = new DomDocument();
@$this->doc->loadHTML(file_get_contents($url, false, $context));
}
public function getLinks() {
return $this->doc->getElementsByTagName("a");
}
}
?>