Jump to content

PHP and HTML DOM?


3raser

Recommended Posts

Here's a cURL function to get the HTML as well and an example of getting the title text.

 

<?php

function getHTML($url, $cookie = '')
{
    $options = array(
        CURLOPT_COOKIE         => $cookie,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_HEADER         => false,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_ENCODING       => "",
        CURLOPT_USERAGENT      => "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9",
        CURLOPT_AUTOREFERER    => true,
        CURLOPT_CONNECTTIMEOUT => 120,
        CURLOPT_TIMEOUT        => 120,
        CURLOPT_MAXREDIRS      => 10
    );

    $ch = curl_init($url);
    curl_setopt_array($ch, $options);

    $content = curl_exec($ch);
    $err     = curl_errno($ch);
    $errmsg  = curl_error($ch);
    $header  = curl_getinfo($ch);

    curl_close($ch);

    $header['errno']   = $err;
    $header['errmsg']  = $errmsg;
    $header['content'] = $content;

    return $header['content'];
}

$html = getHTML('http://google.com');

$doc = new DOMDocument();
@$doc -> loadHTML($html);
$xp = new DOMXPath($doc);

$title = $xp -> evaluate("//title/text()") -> item(0) -> nodeValue;

echo $title;

?>

Link to comment
https://forums.phpfreaks.com/topic/245570-php-and-html-dom/#findComment-1261272
Share on other sites

Here's a cURL function to get the HTML as well and an example of getting the title text.

 

<?php

function getHTML($url, $cookie = '')
{
    $options = array(
        CURLOPT_COOKIE         => $cookie,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_HEADER         => false,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_ENCODING       => "",
        CURLOPT_USERAGENT      => "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9",
        CURLOPT_AUTOREFERER    => true,
        CURLOPT_CONNECTTIMEOUT => 120,
        CURLOPT_TIMEOUT        => 120,
        CURLOPT_MAXREDIRS      => 10
    );

    $ch = curl_init($url);
    curl_setopt_array($ch, $options);

    $content = curl_exec($ch);
    $err     = curl_errno($ch);
    $errmsg  = curl_error($ch);
    $header  = curl_getinfo($ch);

    curl_close($ch);

    $header['errno']   = $err;
    $header['errmsg']  = $errmsg;
    $header['content'] = $content;

    return $header['content'];
}

$html = getHTML('http://google.com');

$doc = new DOMDocument();
@$doc -> loadHTML($html);
$xp = new DOMXPath($doc);

$title = $xp -> evaluate("//title/text()") -> item(0) -> nodeValue;

echo $title;

?>

 

Gives me this: Fatal error: Call to undefined function curl_init() in /www/zxq.net/n/o/v/novatop/htdocs/grabber.php on line 18

 

Link to comment
https://forums.phpfreaks.com/topic/245570-php-and-html-dom/#findComment-1261277
Share on other sites

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.