sidorak95 Posted June 13, 2011 Share Posted June 13, 2011 Hi, I've been trying to find a specific text on a webpage and make a popup if it's there. A friend recommended I use cURL to try to open the page first, and then go on from there. I found a simple script on the internet: <?php function get_data($url) { $ch = curl_init(); $timeout = 5; curl_setopt($ch,CURLOPT_URL,$url); curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout); $data = curl_exec($ch); curl_close($ch); return $data; } $returned_content = get_data('http://www.example.com/'); echo $returned_content; ?> This works successfully on both Google and Yahoo. However, for the site in question, nothing is displayed. A quick check shows that the result is empty. I've tried setting the user-agent to Firefox, and then the webpage is now displayed. The admin of that site says that no user-agent is being blocked, except for bots in one directory that I'm not trying to access. Both file_get_contents and fopen result in an empty page. Does anyone have any idea why this is? Thanks. Quote Link to comment Share on other sites More sharing options...
QuickOldCar Posted June 13, 2011 Share Posted June 13, 2011 Whats the url of the website that your talking about. Quote Link to comment Share on other sites More sharing options...
sidorak95 Posted June 13, 2011 Author Share Posted June 13, 2011 The site is: http://www.clickcritters.com/ Quote Link to comment Share on other sites More sharing options...
QuickOldCar Posted June 13, 2011 Share Posted June 13, 2011 My curl script can fetch the info on the page. Can echo the html and is there. What you do with the html will be up to you. <?php $url = "http://www.clickcritters.com/"; /*connect to the url using curl to see if exists and get the information*/ //$cookie = tempnam('tmp','cookie'); //$cookie_file_path = "tmp/"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); //curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie); //curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file_path); curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3'); curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); curl_setopt($ch, CURLOPT_TIMEOUT, 15); curl_setopt($ch, CURLOPT_MAXREDIRS, 15); curl_setopt($ch, CURLOPT_HEADER, 1); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_AUTOREFERER, true); curl_setopt ($ch, CURLOPT_FILETIME, 1); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($ch, CURLOPT_ENCODING , ""); $curl_session = curl_init(); //curl_setopt($curl_session, CURLOPT_COOKIEJAR, $cookie); //curl_setopt($curl_session, CURLOPT_COOKIEFILE, $cookie_file_path); curl_setopt($curl_session, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3'); curl_setopt($curl_session, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); curl_setopt($curl_session, CURLOPT_ENCODING , ""); curl_setopt($curl_session, CURLOPT_TIMEOUT, 15); curl_setopt($curl_session, CURLOPT_HEADER, 1); curl_setopt($curl_session, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($curl_session, CURLOPT_HEADER, true); curl_setopt($curl_session, CURLOPT_MAXREDIRS, 15); curl_setopt($curl_session, CURLOPT_RETURNTRANSFER, true); curl_setopt( $curl_session, CURLOPT_AUTOREFERER, true ); curl_setopt ($curl_session, CURLOPT_HTTPGET, true); curl_setopt($curl_session, CURLOPT_URL, $url); $string = mysql_real_escape_string(curl_exec($curl_session)); $html = mysql_real_escape_string(curl_exec ($ch)); $info = curl_getinfo($ch); /*curl response check and to resolve url to the actual location*/ $response = curl_getinfo( $ch ); if ($response['http_code'] == 301 || $response['http_code'] == 302) { ini_set("user_agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3"); $headers = get_headers($response['url']); $location = ""; foreach( $headers as $value ) { if ( substr( strtolower($value), 0, 9 ) == "location:" ) return get_final_url( trim( substr( $value, 9, strlen($value) ) ) ); } } if ( preg_match("/window\.location\.replace\('(.*)'\)/i", $con_addtent, $value) || preg_match("/window\.location\=[\"'](.*)[\"']/i", $con_addtent, $value) || preg_match("/location\.href\=[\"'](.*)[\"']/i", $con_addtent, $value) ) { $finalurl = get_final_url($value[1]); } else { $finalurl = $response['url']; } $html = curl_exec($ch); $header = "Location: "; //echo $finalurl."<br />"; //view site echo $html; //stripped tags text //echo strip_tags($html); curl_close($ch); ?> Quote Link to comment Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.