Mikho Posted May 10, 2013 Share Posted May 10, 2013 (edited) I am building a plagiarsm checking tool for my company but have problems on calculating the uniqueness factor. Calculation works by searching the snippet into Google using GSERP. The script then checks if there where results then snippet is not unique. This is my code; $snippet = '"' . join(" ", array_slice($contentArray, $start, $limit)) . '"'; $start += $limit; $end += $limit; $counter++; $url = ''; $lang = 'en'; $gserp = (g_serp($snippet, $url, $lang)); $gserpCount = count($gserp); . . . . . . error_reporting(E_ALL ^ E_NOTICE); //helper function -- file_get_contents using curl function file_get_contents_curl($url, $referer = '', $ua = '') { $ch = curl_init($url); curl_setopt($ch, CURLOPT_HEADER, FALSE); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); if ($referer != '') { curl_setopt($ch, CURLOPT_REFERER, $referer); } if ($ua != '') { curl_setopt($ch, CURLOPT_USERAGENT, $ua); } curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); curl_setopt($ch, CURLOPT_TIMEOUT, 30); $data = curl_exec($ch); curl_close($ch); return $data; } //this is the main function function g_serp($keyword, $url, $lang = 'en') { $results = array(); $g_url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=' . urlencode($keyword) . '&rsz=large&userip=' . $_SERVER['REMOTE_DDR'] . '&hl=' . $lang; for ($i = 0; $i < 64; $i+= { $start = $i; $referer = $_SERVER['HTTP_REFERER']; //change this into your real domain $rawdata = file_get_contents_curl($g_url . '&start=' . $start, $referer, $_SERVER['HTTP_USER_AGENT']); $decoded = json_decode($rawdata, TRUE); //decode as assoc array if (is_array($decoded['responseData']['results'])) { $pos = $start; foreach ($decoded['responseData']['results'] as $result) { //if (substr_count(strtolower($result['url']), $url)) { // $GLOBALS['index'] = $pos + 1; // } $res['position'] = $pos + 1; $res['title'] = $result['titleNoFormatting']; $res['url'] = $result['unescapedUrl']; array_push($results, $res); $pos++; } } } return $results; }Anyone have any idea what I may be doing wrong or how best I can go about this? Edited May 10, 2013 by ignace Quote Link to comment Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.