Jump to content

google.de generating additional info in the link - Scraping


Nuv

Recommended Posts

Hey,

 

I'm trying to scrape google.de/shopping . However I'm having following problem.

 

Please check -

 

view-source:http://www.google.de/search?hl=de&tbm=shop&q=4242002690209&oq=4242002690209

 

The url for the product is something like

 

<a href="/products/catalog?hl=de&q=4242002690209&um=1&ie=UTF-8&tbm=shop&cid=2594634728159287170&sa=X&ei=5etEUJOlNI3zrQfuh4HADQ&ved=0CFYQ8wIwAA">Bosch MCM 42024 Red Diamond/Silber, Styline K?chenmaschine</a>

 

And the url of the product i'm getting from my code is

 

/products/catalog?hl=de&q=4242002690209&um=1&ie=UTF-8&tbm=shop&cid=2594634728159287170

 

 

Why is it that "&sa=X&ei=5etEUJOlNI3zrQfuh4HADQ&ved=0CFYQ8wIwAA" part is missing when im trying to scrape the url and where is it coming from ?

 

P.S - In "&sa=X&ei=5etEUJOlNI3zrQfuh4HADQ&ved=0CFYQ8wIwAA" ei value changes everytime so try searching "Bosch MCM 42024 Red Diamond/Silber, Styline K?chenmaschine" while checking the source code.

 

Code im using -

 

<?php
  $get_EAN = '4242002690209';
  $url = "http://www.google.de/search?hl=de&tbm=shop&q=".$get_EAN."&oq=".$get_EAN;
  
  $ch = curl_init();
     
  curl_setopt ($ch, CURLOPT_URL, $url);
  curl_setopt ($ch, CURLOPT_USERAGENT, "msn");
  curl_setopt ($ch, CURLOPT_HEADER, 0);
  curl_setopt($ch, CURLOPT_AUTOREFERER, 1);
  curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
  curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
    
  $get_product = curl_exec ($ch);
  curl_close ($ch);
    
  preg_match_all('~<div class="pslimain"><h3 class="r"><a href="(.*?)"~s', $get_product, $get_price); 
  
  $url = "http://www.google.de".$get_price[1][0];
  
  $ch = curl_init();
     
  curl_setopt ($ch, CURLOPT_URL, $url);
  curl_setopt ($ch, CURLOPT_USERAGENT, "msn");
  curl_setopt ($ch, CURLOPT_HEADER, 0);
  curl_setopt($ch, CURLOPT_AUTOREFERER, 1);
  curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
  curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
    
  $list_price = curl_exec ($ch);
  curl_close ($ch);
  
  print_r($get_price[1][0]);
  //print_r($get_product);
  //print_r($list_price);
  
  
?>

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.