The Little Guy Posted December 29, 2008 Share Posted December 29, 2008 I am trying to convert a link such as: ../somedir/somefile.html to a URI like: http://somesite.com/root/somedir/somefile.html My code browses a page, and gets all the links, well as we all know links are not always formatted like the way above, they can be absolute, or relativeas well. I need to make my links that are gathered from all page to be EXACT links, such as: http://somesite.com/root/somedir/somefile.html I am not 100% sure of where to begin on this... I think I can get the absolute links, that seems easy, but the relative links seem a little harder to do. Any Ideas of how I could do this? Link to comment https://forums.phpfreaks.com/topic/138744-relative-to-exact-links/ Share on other sites More sharing options...
rhodesa Posted December 29, 2008 Share Posted December 29, 2008 if it doesn't start with http, prepend it with dirname('http://www.site.com/path/to/page.html') Link to comment https://forums.phpfreaks.com/topic/138744-relative-to-exact-links/#findComment-725403 Share on other sites More sharing options...
The Little Guy Posted December 29, 2008 Author Share Posted December 29, 2008 will I ever get a relative path returned back to me? Link to comment https://forums.phpfreaks.com/topic/138744-relative-to-exact-links/#findComment-725436 Share on other sites More sharing options...
rhodesa Posted December 29, 2008 Share Posted December 29, 2008 what does the code look like so far? Link to comment https://forums.phpfreaks.com/topic/138744-relative-to-exact-links/#findComment-725500 Share on other sites More sharing options...
The Little Guy Posted December 29, 2008 Author Share Posted December 29, 2008 So far it looks like this: <?php include '/home/ryannaddy/dudeel.com/incl/includes.php'; // create a new cURL resource $ch = curl_init(); $url = 'http://google.com'; // set URL and other appropriate options curl_setopt($ch, CURLOPT_URL, "http://google.com/"); curl_setopt($ch, CURLOPT_HEADER, FALSE); curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); // grab URL and pass it to the browser $opt = curl_exec($ch); $reURL = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); $opt = preg_replace('~<(style|script)[^>]*>.*?</1>~s',' ',$opt); $opt = preg_replace("~( | | )~s",' ',$opt); $opt = preg_replace("/ss+/",' ',$opt); if(preg_match_all('~href="(.+?)"~',$opt,$matches)){ foreach($matches[1] as $match){ if(!preg_match("~^http~",$match)){ //chdir($match); echo str_replace('//','/',$reURL.dirname($match))." "; }elseif(!preg_match("~^..~",$match)){ echo 'here'." "; }else{ echo $match." "; } //mysql_query(sprintf()); } } //echo strip_tags($opt); // close cURL resource, and free up system resources curl_close($ch); //echo " "; ?> I should mention that this is going to run as a cron, and not in a browser. Link to comment https://forums.phpfreaks.com/topic/138744-relative-to-exact-links/#findComment-725649 Share on other sites More sharing options...
rhodesa Posted December 29, 2008 Share Posted December 29, 2008 how about: <?php include '/home/ryannaddy/dudeel.com/incl/includes.php'; // create a new cURL resource $ch = curl_init(); $url = 'http://www.cnn.com'; // set URL and other appropriate options curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, FALSE); curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); // grab URL and pass it to the browser $opt = curl_exec($ch); $reURL = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); $opt = preg_replace('~<(style|script)[^>]*>.*?</>~s',' ',$opt); $opt = preg_replace("~( | | )~s",' ',$opt); $opt = preg_replace("/ss+/",' ',$opt); if(preg_match_all('~href="(.+?)"~',$opt,$matches)){ foreach($matches[1] as $match){ if(preg_match("~^javascript~",$match)) continue; //Skip JavaScript if(preg_match("~^(http|ftp)~",$match)){ $href = $match; }else{ $href = $reURL.$match; } echo "<a href=\"$href\">$href</a><br>"; //mysql_query(sprintf()); } } //echo strip_tags($opt); // close cURL resource, and free up system resources curl_close($ch); //echo " "; ?> Link to comment https://forums.phpfreaks.com/topic/138744-relative-to-exact-links/#findComment-725679 Share on other sites More sharing options...
The Little Guy Posted December 30, 2008 Author Share Posted December 30, 2008 I noticed your page had some ../ in it, so I tested it, and got this value: http://vectorloft.com/main/index.html../stylesheet.css http://vectorloft.com/main/index.html../index.html http://vectorloft.com/main/index.htmlthecrew.html http://vectorloft.com/main/index.htmlwhoweare.html http://vectorloft.com/main/index.html../music http://vectorloft.com/main/index.htmlportfolio.html http://vectorloft.com/main/index.htmlcontactus.html http://vectorloft.com/main/index.html../blog Link to comment https://forums.phpfreaks.com/topic/138744-relative-to-exact-links/#findComment-725890 Share on other sites More sharing options...
rhodesa Posted December 30, 2008 Share Posted December 30, 2008 for that you need to use some more code: <?php include '/home/ryannaddy/dudeel.com/incl/includes.php'; // create a new cURL resource $ch = curl_init(); $url = 'http://vectorloft.com/main/index.html'; // set URL and other appropriate options curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, FALSE); curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); // grab URL and pass it to the browser $opt = curl_exec($ch); $parts = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)); $parts['path'] = dirname($parts['path']); $reURL = $parts['scheme'].'://'.$parts['host'].$parts['path']; //$opt = preg_replace('~<(style|script)[^>]*>.*?</>~s',' ',$opt); //$opt = preg_replace("~( | | )~s",' ',$opt); //$opt = preg_replace("/ss+/",' ',$opt); if(preg_match_all('~href="(.+?)"~',$opt,$matches)){ foreach($matches[1] as $match){ if(preg_match("~^javascript~",$match)) continue; //Skip JavaScript if(preg_match("~^(http|ftp)~",$match)){ $href = $match; }else{ $href = $reURL.'/'.$match; } echo "<a href=\"$href\">$href</a><br>"; //mysql_query(sprintf()); } } //echo strip_tags($opt); // close cURL resource, and free up system resources curl_close($ch); //echo " "; ?> Link to comment https://forums.phpfreaks.com/topic/138744-relative-to-exact-links/#findComment-725903 Share on other sites More sharing options...
The Little Guy Posted December 30, 2008 Author Share Posted December 30, 2008 Alright! thanks that works. Will that work if there is more than one like this: ../../ do you know? This is the code: <?php include '/home/ryannaddy/dudeel.com/incl/includes.php'; // create a new cURL resource $ch = curl_init(); $url = 'http://vectorloft.com/main/index.html'; //$url = 'http://dudeel.com/add?a=site'; // set URL and other appropriate options curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, FALSE); curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); // grab URL and pass it to the browser $opt = curl_exec($ch); $parts = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)); $parts['path'] = dirname($parts['path']); $reURL = $parts['scheme'].'://'.$parts['host'].$parts['path'].'/'; //echo $reURL; $opt = preg_replace('~<(style|script)[^>]*>.*?</\1>~s',' ',$opt); $opt = preg_replace('~<link[^>]*>~s',' ',$opt); $opt = preg_replace("~(\n|\r| )~s",' ',$opt); $opt = preg_replace("/\s\s+/",' ',$opt); if(preg_match_all('~href="(.+?)"~',$opt,$matches)){ foreach($matches[1] as $match){ if(preg_match("~^javascript~",$match)) continue; //Skip JavaScript if(preg_match("~^(http|ftp)~",$match)){ $href = $match; }else{ $href = $reURL.preg_replace('~^/~','',str_replace('..','',$match)); } echo "$href\n"; //mysql_query(sprintf()); } } // close cURL resource, and free up system resources curl_close($ch); //echo "\n"; ?> Link to comment https://forums.phpfreaks.com/topic/138744-relative-to-exact-links/#findComment-725914 Share on other sites More sharing options...
Recommended Posts
Archived
This topic is now archived and is closed to further replies.