gibbons Posted February 22, 2011 Share Posted February 22, 2011 Hi all, I'm trying to scrape the contents of a page that is behind a login screen; namely: http://my.mail.ru/apps. Here's my code. It almost works, but doesn't appear to be properly logging in -- I just get a login screen on the url download. Any ideas? Thanks much. Here's my code <?php $ch=login(); $html=downloadUrl('http://my.mail.ru/apps', $ch); echo $html; function downloadUrl($Url, $ch){ curl_setopt($ch, CURLOPT_URL, $Url); curl_setopt($ch, CURLOPT_POST, 0); curl_setopt($ch, CURLOPT_REFERER, "http://my.mail.ru/cgi-bin/login?noclear=1&page=http%3a%2f%2fmy.mail.ru%2fapps%2f"); curl_setopt($ch, CURLOPT_USERAGENT, "MozillaXYZ/1.0"); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_TIMEOUT, 10); $output = curl_exec($ch); return $output; } function login(){ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, 'http://my.mail.ru/cgi-bin/login?noclear=1&page=http%3a%2f%2fmy.mail.ru%2fapps%2f'); //login URL curl_setopt ($ch, CURLOPT_POST, 1); $postData=' page=http%3A%2F%2Fmy.mail.ru%2Fapps%2F &Login=username &Domain=mail.ru &Password=password'; curl_setopt ($ch, CURLOPT_POSTFIELDS, $postData); curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt'); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION,1); curl_setopt ($ch, CURLOPT_MAXREDIRS, 10); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); $store = curl_exec ($ch); return $ch; } ?> Link to comment https://forums.phpfreaks.com/topic/228526-scrape-contents-of-login-required-page/ Share on other sites More sharing options...
gibbons Posted February 28, 2011 Author Share Posted February 28, 2011 bump? Link to comment https://forums.phpfreaks.com/topic/228526-scrape-contents-of-login-required-page/#findComment-1180914 Share on other sites More sharing options...
Recommended Posts
Archived
This topic is now archived and is closed to further replies.