ftpaccess Posted July 16, 2010 Share Posted July 16, 2010 Hello Everyone. I recently got a project in which i have to get google results through PHP. What i mean is once the query is entered in google, the first 100 results are stored in an array and then later if we want we can search through those results URL's. I know this could be done through google api but that has a limit of doing only 8 searches. So i have to use a different technique to do this. Can anyone help me with this. thanks ftp Quote Link to comment Share on other sites More sharing options...
trq Posted July 16, 2010 Share Posted July 16, 2010 Can anyone help me with this. Where are you stuck? Quote Link to comment Share on other sites More sharing options...
ftpaccess Posted July 16, 2010 Author Share Posted July 16, 2010 this is my code. <?php function getUrlList($query){ $target = "www.google.com"; $header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,"; $header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; $header[] = "Cache-Control: max-age=0"; $header[] = "Connection: keep-alive"; $header[] = "Keep-Alive: 300"; $header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7"; $header[] = "Accept-Language: en-us,en;q=0.5"; $header[] = "Pragma: "; // browsers keep this blank. //$url = sprintf("http://%s/#hl=en&source=hp&q=%s", $target, $query); $site = "www.xyz.com"; $url = sprintf("http://%s/softwareCategories.php",$site); $curl = curl_init($url); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_HTTPHEADER, $header); curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.google.com/bot.html)'); curl_setopt($curl, CURLOPT_REFERER, $target); curl_setopt($curl, CURLOPT_TIMEOUT, 10); curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate'); curl_setopt($curl, CURLOPT_MAXREDIRS, 4); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURLOPT_POST, 1); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 0); $html = curl_exec($curl); $status = curl_getinfo($curl,CURLINFO_HTTP_CODE); curl_close($curl); if(preg_match_all("'<\s*a\s+.*href\s*=\s*([\"\'])?(?(1) (.*?)\\1 | ([^\s\>]+))'isx", $html, $matches, PREG_SET_ORDER)) return array(); $results = array(); foreach($matches as $match){ if(preg_match_all( '/"([^"]*)",/i', $match[1], $parts)) continue; //if(preg_match('/(.+?)/', $parts[1][0], $url_parts)) continue; $refUrl = urldecode($parts[1][1]); $one_result=array( 'RefUrl' => $refUrl ); array_push($results,$one_result); } return $results; } $urlList = getUrlList('test'); print_r($urlList); ?> Quote Link to comment Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.