lordphate Posted January 12, 2009 Share Posted January 12, 2009 Hey Everyone, I am trying to download 2 files, a video and a screenshot from a website, it requires login, and without alogin you get 403 error. I am a member there, so i've tried to write this up function video_import_url() { $info = auth_member(); set_time_limit(0); include_once(DOC_ROOT."/functions/form_functions.php"); $values = form_handle_input("videos_import_url"); ///////////////////////////////////////////////////////// $LOGINURL = "http://www.DOMAIN.com/login.html"; $id = "Username"; $password = "Password"; $POSTFIELDS = 'username='.$id.'&password='.$password; $agent = "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4) Gecko/20030624 Netscape/7.1 (ax)"; $cookie_file_path = "/home/USER/pubic_html/.cookies/"; $reffer = "http://www.DOMAIN.com/login.html"; $curl = curl_init(); curl_setopt($curl, CURLOPT_URL,$LOGINURL); curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2); curl_setopt($curl, CURLOPT_USERAGENT, $agent); curl_setopt($curl, CURLOPT_POST, 1); curl_setopt($curl, CURLOPT_POSTFIELDS,$POSTFIELDS); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($curl, CURLOPT_REFERER, $reffer); curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file_path); curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie_file_path); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); $result = curl_exec ($curl); /////////////////////////////////////////////////////// $hash = build_unique_path(DOC_ROOT."/vid",".flv"); $dist_file = DOC_ROOT."/vid/".$hash.".flv"; $out = fopen( $dist_file, 'wb' ); $ch = curl_init(); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_URL, $values["video"]); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_USERAGENT, $agent); curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); curl_setopt($ch, CURLOPT_REFERER, $reffer); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file_path); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path); $c = curl_exec($ch); curl_close($ch); fwrite( $out, $c ); fclose( $out ); ////////////////////////////////////////////////////// $scr = build_unique_path(DOC_ROOT."/scr",".jpg"); $screen = DOC_ROOT."/scr/".$scr."_1.jpg"; $out = fopen( $screen, 'wb' ); $ch = curl_init(); $ch = curl_init(); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_URL, $values["screenshot"]); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_USERAGENT, $agent); curl_setopt($ch, CURLOPT_REFERER, $reffer); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file_path); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path); $c = curl_exec($ch); curl_close($ch); fwrite( $out, $c ); fclose( $out ); /////////////////////////////////////////////////////// curl_close ($curl); if(!file_exists($dist_file)) { echo $dist_file.' aka '.$values["title"].' did not download'; exit; } $duration = extract_duration_from_file( $dist_file ); $tags = explode(" ", $values["tags"]); $tags = handle_empties( $tags ); $vals["tags"] = "|".implode("|", $tags)."|"; $sql_query = "INSERT INTO videos (mem_id,title,description,category,tags,date,country,location,broadcast,comments,comment_voting,responses,ratings,embedding,vhash,shash,posted,approved,converted,screenshot) VALUES (:mem_id:,':title:',':description:',':category:',':tags:',':date:','223',':location:',':broadcast:',':comments:',':comment_voting:',':responses:',':ratings:','0',':hash:',':shash:',:time:,':approved:','1','1')"; $vals["mem_id"] = $info["mem_id"]; $vals["category"] = $values["category"]; $vals["title"] = $values["title"]; $vals["description"] = $values["description"]; $vals["approved"] = ( $GLOBALS["Videos"]->approve ) ? 0 : 1; $vals["time"] = time(); $vals["hash"] = $hash; $vals["shash"] = $scr; $GLOBALS["DB"]->values = $vals; $vid_id = $GLOBALS["DB"]->insert( $sql_query ); $GLOBALS["DB"]->clean(); load_page( "index.php?page=videos§ion=upload_complete&vid_id=".$vid_id ); } Please note that the problem is not with $hash as it does create the file, but in the ".flv" file you edit it using notepad or something similar and you get <?xml version="1.0" encoding="iso-8859-1"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <title>403 - Forbidden</title> </head> <body> <h1>403 - Forbidden</h1> </body> </html> any suggestions? Quote Link to comment Share on other sites More sharing options...
salami1_1 Posted January 12, 2009 Share Posted January 12, 2009 what you are not telling is that if its a htaccess protected page or just a html login form... but anywayz for htaccess protected page (without curl) you can use this function works as a charm: function getfilecontents($url,$format=0, $user='', $pass='', $referer='') { if (!empty($user)) { $authentification = base64_encode($user.':'.$pass); $authline = "Authorization: Basic $authentification\r\n"; } if (!empty($referer)) { $refererline = "Referer: $this->_referer\r\n"; } $url_info=parse_url($url); $port = isset($url_info['port']) ? $url_info['port'] : 80; $fp=fsockopen($url_info['host'], $port, $errno, $errstr, 30); if($fp) { $head = "GET ".@$url_info['path']."?".@$url_info['query']." HTTP/1.0\r\n"; if (!empty($url_info['port'])) { $head .= "Host: ".@$url_info['host'].":".$url_info['port']."\r\n"; } else { $head .= "Host: ".@$url_info['host']."\r\n"; } $head .= "Connection: Close\r\n"; $head .= "Accept: */*\r\n"; $head .= $refererline; $head .= $authline; $head .= "\r\n"; $content = ""; if(fputs($fp, $head) != FALSE){ while(!feof($fp)) { if(!$content .= fgets($fp,2048)) { return FALSE; } } fclose($fp); return $content; } else{ return FALSE; } } else { return FALSE; } } You might need to edit a little as I copied this function straight out of a class which I made for one of my websites. Quote Link to comment Share on other sites More sharing options...
lordphate Posted January 12, 2009 Author Share Posted January 12, 2009 It is a login html page. Not htpasswd I believe it has to do with cookies ? Quote Link to comment Share on other sites More sharing options...
aschk Posted January 12, 2009 Share Posted January 12, 2009 If the subsequent pages require a cookie (as you would expect with a session) then you need to maintain cookies during the execution of your script, and pass them through each request. Quote Link to comment Share on other sites More sharing options...
lordphate Posted January 12, 2009 Author Share Posted January 12, 2009 I do not think the cookies are saving to my server... Is my login curl script correct on fetching cookies ? Quote Link to comment Share on other sites More sharing options...
lordphate Posted January 14, 2009 Author Share Posted January 14, 2009 Okay heres' an update on this. You are not required to login to download the file, the url to download the file is only shown while logged in...so something else is stopping me from downloading the file from curl. URL WAS HERE GET /flv/c4e79206ac774634e64ddf32d3d9c26e/496caee5/0810/18/48f991816be67/48f991816be67.flv HTTP/1.1 Host: HOST WAS HERE User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 Accept-Language: en-us,en;q=0.5 Accept-Encoding: gzip,deflate Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7 Keep-Alive: 300 Connection: keep-alive Cookie: __utma=41441447.536235636.1231387991.1231856911.1231859410.10; __utmz=41441447.1231387991.1.1.utmccn=(direct)|utmcsr=(direct)|utmcmd=(none); __utmc=41441447; __utmb=41441447 Range: bytes=658608- If-Range: "1105876484" HTTP/1.x 206 Partial Content Content-Type: video/x-flv Accept-Ranges: bytes Etag: "1105876484" Last-Modified: Sat, 18 Oct 2008 07:34:40 GMT Content-Range: bytes 658608-76447996/76447997 Content-Length: 75789389 Date: Tue, 13 Jan 2009 15:11:38 GMT Server: lighttpd/1.4.19 Any idea what i'm missing ? Heres my updated code: ///////////////////////////////////////////////////////// $LOGINURL = "http://www.DOMAIN.com/login.html"; $id = "username"; $password = "password"; $POSTFIELDS = 'username='.$id.'&password='.$password; $agent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"; $cookie_file_path = "cookies.txt"; $reffer = "http://www.DOMAIN.com"; $hash = build_unique_path(DOC_ROOT."/vid",".flv"); $dist_file = DOC_ROOT."/vid/".$hash.".flv"; $flash = fopen( $dist_file, 'wb' ); $scr = build_unique_path(DOC_ROOT."/scr",".jpg"); $screen = DOC_ROOT."/scr/".$scr."_1.jpg"; $jpeg = fopen( $screen, 'wb' ); $curl = curl_init(); curl_setopt($curl, CURLOPT_URL,$LOGINURL); curl_setopt($curl, CURLOPT_USERAGENT, $agent); curl_setopt($curl, CURLOPT_POST, 1); curl_setopt($curl, CURLOPT_POSTFIELDS,$POSTFIELDS); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($curl, CURLOPT_REFERER, $reffer); curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file_path); curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie_file_path); $store = curl_exec ($curl); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($curl, CURLOPT_REFERER, $reffer); curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file_path); curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie_file_path); curl_setopt($curl, CURLOPT_USERAGENT, $agent); curl_setopt($curl, CURLOPT_URL, $values["video"]); $flv = curl_exec($curl); fwrite( $flash, $flv ); fclose( $flash ); curl_setopt($curl, CURLOPT_URL, $values["screenshot"]); $jpg = curl_exec($curl); curl_close($curl); fwrite( $jpeg, $jpg ); fclose( $jpeg ); Quote Link to comment Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.