Jump to content

what is best way to get curl url source code?


dflow

Recommended Posts

<?php
/**
* Get a web file (HTML, XHTML, XML, image, etc.) from a URL.  Return an
* array containing the HTTP server response header fields and content.
*/
function get_web_page( $url )
{
    $options = array(
        CURLOPT_RETURNTRANSFER => true,     // return web page
        CURLOPT_HEADER         => false,    // don't return headers
        CURLOPT_FOLLOWLOCATION => true,     // follow redirects
        CURLOPT_ENCODING       => "",       // handle all encodings
        CURLOPT_USERAGENT      => "spider", // who am i
        CURLOPT_AUTOREFERER    => true,     // set referer on redirect
        CURLOPT_CONNECTTIMEOUT => 120,      // timeout on connect
        CURLOPT_TIMEOUT        => 120,      // timeout on response
        CURLOPT_MAXREDIRS      => 10,       // stop after 10 redirects
    );

    $ch      = curl_init( $url );
    curl_setopt_array( $ch, $options );
    $content = curl_exec( $ch );
    $err     = curl_errno( $ch );
    $errmsg  = curl_error( $ch );
    $header  = curl_getinfo( $ch );
    curl_close( $ch );

    $header['errno']   = $err;
    $header['errmsg']  = $errmsg;
    $header['content'] = $content;
    return $header;
}

$data = get_web_page('http://www.google.com');

echo $data['content'];

?>

 

i get a

<?php
/**
* Get a web file (HTML, XHTML, XML, image, etc.) from a URL.  Return an
* array containing the HTTP server response header fields and content.
*/
function get_web_page( $url )
{
    $options = array(
        CURLOPT_RETURNTRANSFER => true,     // return web page
        CURLOPT_HEADER         => false,    // don't return headers
        CURLOPT_FOLLOWLOCATION => true,     // follow redirects
        CURLOPT_ENCODING       => "",       // handle all encodings
        CURLOPT_USERAGENT      => "spider", // who am i
        CURLOPT_AUTOREFERER    => true,     // set referer on redirect
        CURLOPT_CONNECTTIMEOUT => 120,      // timeout on connect
        CURLOPT_TIMEOUT        => 120,      // timeout on response
        CURLOPT_MAXREDIRS      => 10,       // stop after 10 redirects
    );

    $ch      = curl_init( $url );
    curl_setopt_array( $ch, $options );
    $content = curl_exec( $ch );
    $err     = curl_errno( $ch );
    $errmsg  = curl_error( $ch );
    $header  = curl_getinfo( $ch );
    curl_close( $ch );

    $header['errno']   = $err;
    $header['errmsg']  = $errmsg;
    $header['content'] = $content;
    return $header;
}

$data = get_web_page('http://www.google.com');

echo $data['content'];

?>

 

i get an error

on this line

$ch      = curl_init( $url );

You can side step curl altogether if your server allows the use of wrappers ...

 


<?php

$content = file_get_contents('http://somewhere.com');

function get_string_between($string, $start, $end) {

$string = " " . $string;

$ini = strpos($string,$start);

if ($ini == 0) return "";

$ini += strlen($start);

$len = strpos($string, $end, $ini) – $ini;

return substr($string, $ini, $len);

}

echo get_string_between($content, '<html>', '</html>');

?>

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.