Search the Community
Showing results for tags 'web scraping http fetching'.
-
I administer a site where we bet on Danish Superliga matches every week. In order not to manually enter all the odds of the weekly matches I am trying to grab the odds from this site: http://www.bold.dk/o...ndex.php?liga=1 ... but whatever method I've come up with so far. All I get is request time outs and absolutely no data! Every other site I try my code on works fine. It is just this site, that seem to be protected in some way. Can any of you guys supply me with a working code/suggestions for grabbing data from this site? I have tried these solutions (works of any other site than bold.dk): $url = "http://www.bold.dk/odds/index.php?liga=1"; SOLUTION 1: $source = file_get_contents($url); SOLUTION 2: function url_get_contents ($url) { if (!function_exists('curl_init')){ die('CURL is not installed!'); } $timeout = 5; $agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1"; $ch = curl_init(); $header[] = "Cache-Control: max-age=0"; $header[] = "Connection: keep-alive"; $header[] = "Keep-Alive: 300"; $header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7"; $header[] = "Accept-Language: en-us,en;q=0.5"; $header[] = "Pragma: "; // browsers keep this blank. curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_HTTPHEADER, $header); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($ch, CURLOPT_USERAGENT, $agent); $output = curl_exec($ch); $info = curl_getinfo($ch); curl_close($ch); return $output; } $source = url_get_contents ($url);