Jump to content

Scrape contents of login-required page


gibbons

Recommended Posts

Hi all, I'm trying to scrape the contents of a page that is behind a login screen; namely: http://my.mail.ru/apps. Here's my code. It almost works, but doesn't appear to be properly logging in -- I just get a login screen on the url download. Any ideas? Thanks much.

 

Here's my code

 


<?php

$ch=login();
$html=downloadUrl('http://my.mail.ru/apps', $ch);
echo $html;
  
function downloadUrl($Url, $ch){
curl_setopt($ch, CURLOPT_URL, $Url);
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_REFERER, "http://my.mail.ru/cgi-bin/login?noclear=1&page=http%3a%2f%2fmy.mail.ru%2fapps%2f");
curl_setopt($ch, CURLOPT_USERAGENT, "MozillaXYZ/1.0");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$output = curl_exec($ch);
return $output;
}



function login(){
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, 'http://my.mail.ru/cgi-bin/login?noclear=1&page=http%3a%2f%2fmy.mail.ru%2fapps%2f'); //login URL
    curl_setopt ($ch, CURLOPT_POST, 1);
$postData='
                page=http%3A%2F%2Fmy.mail.ru%2Fapps%2F
                &Login=username
                &Domain=mail.ru
    &Password=password';
    curl_setopt ($ch, CURLOPT_POSTFIELDS, $postData);
    curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
                curl_setopt ($ch, CURLOPT_FOLLOWLOCATION,1);
                curl_setopt ($ch, CURLOPT_MAXREDIRS, 10);  
    curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
    $store = curl_exec ($ch);
    return $ch;
}


?>



 

Link to comment
https://forums.phpfreaks.com/topic/228526-scrape-contents-of-login-required-page/
Share on other sites

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.