Jump to content

Opening websites with fopen/cURL


sidorak95

Recommended Posts

Hi,

 

I've been trying to find a specific text on a webpage and make a popup if it's there. A friend recommended I use cURL to try to open the page first, and then go on from there. I found a simple script on the internet:

<?php
function get_data($url)
{

  $ch = curl_init();
  $timeout = 5;
  curl_setopt($ch,CURLOPT_URL,$url);
  curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
  curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);

  $data = curl_exec($ch);
  curl_close($ch);
  return $data;
}


$returned_content = get_data('http://www.example.com/');
echo $returned_content;
?>

This works successfully on both Google and Yahoo. However, for the site in question, nothing is displayed. A quick check shows that the result is empty. I've tried setting the user-agent to Firefox, and then the webpage is now displayed.

 

The admin of that site says that no user-agent is being blocked, except for bots in one directory that I'm not trying to access.

 

Both file_get_contents and fopen result in an empty page.

 

Does anyone have any idea why this is?

 

Thanks.

Link to comment
https://forums.phpfreaks.com/topic/239186-opening-websites-with-fopencurl/
Share on other sites

My curl script can fetch the info on the page.

Can echo the html and is there.

What you do with the html will be up to you.

 

<?php
$url = "http://www.clickcritters.com/";
/*connect to the url using curl to see if exists and get the information*/
        //$cookie = tempnam('tmp','cookie');
        //$cookie_file_path = "tmp/";
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        //curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
        //curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file_path);
        curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3');
        curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
        curl_setopt($ch, CURLOPT_TIMEOUT, 15);
        curl_setopt($ch, CURLOPT_MAXREDIRS, 15);
        curl_setopt($ch, CURLOPT_HEADER, 1);
        curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_AUTOREFERER, true);
        curl_setopt ($ch, CURLOPT_FILETIME, 1);
        curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
        curl_setopt($ch, CURLOPT_ENCODING , "");
        $curl_session = curl_init();
        //curl_setopt($curl_session, CURLOPT_COOKIEJAR, $cookie);
        //curl_setopt($curl_session, CURLOPT_COOKIEFILE, $cookie_file_path);
        curl_setopt($curl_session, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3');
        curl_setopt($curl_session, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
        curl_setopt($curl_session, CURLOPT_ENCODING , "");
        curl_setopt($curl_session, CURLOPT_TIMEOUT, 15);
        curl_setopt($curl_session, CURLOPT_HEADER, 1);
        curl_setopt($curl_session, CURLOPT_SSL_VERIFYPEER, FALSE);
        curl_setopt($curl_session, CURLOPT_HEADER, true);
        curl_setopt($curl_session, CURLOPT_MAXREDIRS, 15);
        curl_setopt($curl_session, CURLOPT_RETURNTRANSFER, true);
        curl_setopt( $curl_session, CURLOPT_AUTOREFERER, true );
        curl_setopt ($curl_session, CURLOPT_HTTPGET, true);
        curl_setopt($curl_session, CURLOPT_URL, $url);
        $string = mysql_real_escape_string(curl_exec($curl_session));
        $html = mysql_real_escape_string(curl_exec ($ch));
        $info = curl_getinfo($ch);
        /*curl response check and to resolve url to the actual location*/
        $response = curl_getinfo( $ch );
        if ($response['http_code'] == 301 || $response['http_code'] == 302) {
            ini_set("user_agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3");
            $headers = get_headers($response['url']);

            $location = "";
            foreach( $headers as $value ) {
                if ( substr( strtolower($value), 0, 9 ) == "location:" )
                    return get_final_url( trim( substr( $value, 9, strlen($value) ) ) );
            }
        }

        if (    preg_match("/window\.location\.replace\('(.*)'\)/i", $con_addtent, $value) ||
        preg_match("/window\.location\=[\"'](.*)[\"']/i", $con_addtent, $value) ||
        preg_match("/location\.href\=[\"'](.*)[\"']/i", $con_addtent, $value)
)
       {
            $finalurl = get_final_url($value[1]);
        }
        else {

            $finalurl = $response['url'];
        }
        
        $html = curl_exec($ch);
        $header = "Location: ";
        
//echo $finalurl."<br />";
//view site
echo $html;
//stripped tags text
//echo strip_tags($html);

curl_close($ch);
?>

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.