gibbons Posted February 22, 2011 Share Posted February 22, 2011 Hi all, I'm trying to scrape the contents of a page that is behind a login screen; namely: http://my.mail.ru/apps. Here's my code. It almost works, but doesn't appear to be properly logging in -- I just get a login screen on the url download. Any ideas? Thanks much. Here's my code <?php $ch=login(); $html=downloadUrl('http://my.mail.ru/apps', $ch); echo $html; function downloadUrl($Url, $ch){ curl_setopt($ch, CURLOPT_URL, $Url); curl_setopt($ch, CURLOPT_POST, 0); curl_setopt($ch, CURLOPT_REFERER, "http://my.mail.ru/cgi-bin/login?noclear=1&page=http%3a%2f%2fmy.mail.ru%2fapps%2f"); curl_setopt($ch, CURLOPT_USERAGENT, "MozillaXYZ/1.0"); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_TIMEOUT, 10); $output = curl_exec($ch); return $output; } function login(){ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, 'http://my.mail.ru/cgi-bin/login?noclear=1&page=http%3a%2f%2fmy.mail.ru%2fapps%2f'); //login URL curl_setopt ($ch, CURLOPT_POST, 1); $postData=' page=http%3A%2F%2Fmy.mail.ru%2Fapps%2F &Login=username &Domain=mail.ru &Password=password'; curl_setopt ($ch, CURLOPT_POSTFIELDS, $postData); curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt'); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION,1); curl_setopt ($ch, CURLOPT_MAXREDIRS, 10); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); $store = curl_exec ($ch); return $ch; } ?> Quote Link to comment https://forums.phpfreaks.com/topic/228526-scrape-contents-of-login-required-page/ Share on other sites More sharing options...
gibbons Posted February 28, 2011 Author Share Posted February 28, 2011 bump? Quote Link to comment https://forums.phpfreaks.com/topic/228526-scrape-contents-of-login-required-page/#findComment-1180914 Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.