Pro.Luv Posted December 10, 2008 Share Posted December 10, 2008 Hi, I have this code that gets links from web pages but it also gets javascript links like if a link is: document.form1.brand.options[document.form1.brand.selectedIndex].value I don't want it to fetch those links how can I stop it ? <? function getLinks($link) { /*** return array ***/ $ret = array(); /*** a new dom object ***/ $dom = new domDocument; /*** get the HTML (suppress errors) ***/ @$dom->loadHTML(file_get_contents($link)); /*** remove silly white space ***/ $dom->preserveWhiteSpace = false; /*** get the links from the HTML ***/ $links = $dom->getElementsByTagName('a'); /*** loop over the links ***/ foreach ($links as $tag) { $ret[$tag->getAttribute('href')] = $tag->childNodes->item(0)->nodeValue; } return $ret; } /*** a link to search ***/ $link = "http://www.website.com"; /*** get the links ***/ $urls = getLinks($link); /*** check for results ***/ if(sizeof($urls) > 0) { foreach($urls as $key => $value) { echo $key.'<br >'; } } else { echo "No links found at $link"; } ?> thanks Link to comment https://forums.phpfreaks.com/topic/136352-help-with-code/ Share on other sites More sharing options...
jonsjava Posted December 20, 2008 Share Posted December 20, 2008 My IDE is broke (switched to Gnome from KDE, and Zend died), so I couldn't test this, but it should do the trick. <?php function getLinks($link) { /*** return array ***/ $ret = array(); /*** a new dom object ***/ $dom = new domDocument; /*** get the HTML (suppress errors) ***/ @$dom->loadHTML(file_get_contents($link)); /*** remove silly white space ***/ $dom->preserveWhiteSpace = false; /*** get the links from the HTML ***/ $links = $dom->getElementsByTagName('a'); /*** loop over the links ***/ foreach ($links as $tag) { if (!stristr($tag, "document."){ $ret[$tag->getAttribute('href')] = $tag->childNodes->item(0)->nodeValue; } } return $ret; } /*** a link to search ***/ $link = "http://www.website.com"; /*** get the links ***/ $urls = getLinks($link); /*** check for results ***/ if(sizeof($urls) > 0) { foreach($urls as $key => $value) { echo $key.'<br >'; } } else { echo "No links found at $link"; } ?> Link to comment https://forums.phpfreaks.com/topic/136352-help-with-code/#findComment-720180 Share on other sites More sharing options...
Recommended Posts
Archived
This topic is now archived and is closed to further replies.