hellonoko Posted March 3, 2009 Share Posted March 3, 2009 I am using the below script to scrape mp3s from a music blog. However after copying about 5 songs I receive this error: Fatal error: Allowed memory size of 33554432 bytes exhausted (tried to allocate 26738688 bytes) in /home2/sharingi/public_html/scrape/scrape.php on line 78 Is there anyways I can change my code so that I don't do this. Somehow clearing memory between file copies? Thanks <?php $target_url = 'http://redthreat.wordpress.com/'; $userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)'; $ch = curl_init(); curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); curl_setopt($ch, CURLOPT_URL,$target_url); curl_setopt($ch, CURLOPT_FAILONERROR, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_AUTOREFERER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER,true); curl_setopt($ch, CURLOPT_TIMEOUT, 10); $html = curl_exec($ch); if (!$html) { echo "<br />cURL error number:" .curl_errno($ch); echo "<br />cURL error:" . curl_error($ch); exit; } // // load scrapped data into the DOM // $dom = new DOMDocument(); @$dom->loadHTML($html); // // get only LINKS from the DOM with XPath // $xpath = new DOMXPath($dom); $hrefs = $xpath->evaluate("/html/body//a"); // // go through all the links and store to db or whatever // for ($i = 0; $i < $hrefs->length; $i++) { $href = $hrefs->item($i); $url = $href->getAttribute('href'); $find = ".mp3"; $pos = strpos($url, $find); if ($pos === false) { } else { // function to store to db //storeLink($url,$target_url); echo $url; echo '<br>'; $last_slash = strripos( $url ,"/"); $clean_file_name = substr( $url , $last_slash + 1 , strlen($url) ); echo '<br>'; //echo $target_url; //echo '<br><br>'; //directory to copy to (must be CHMOD to 777) $copydir = "/home2/sharingi/public_html/scrape/scraped/"; $data = file_get_contents($url); $file = fopen($copydir . $clean_file_name, "w+"); fputs($file, $data); fclose($file); echo "Coppied!"; echo "<br><br>"; } } ?> Quote Link to comment Share on other sites More sharing options...
mrdamien Posted March 3, 2009 Share Posted March 3, 2009 Not using file_get_contents will help. Instead try using curl with the CURLOPT_FILE option. Quote Link to comment Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.