Jump to content

[SOLVED] preg_match_all fun!!


piznac

Recommended Posts

Say I have a list of urls, and there is a certain pattern example:

 

http://www.articlebiz.com/

http://www.articlebiz.com/

http://www.articlebiz.com/featured_articles/1/

http://www.articlebiz.com/recently_added_articles/1/

http://www.articlebiz.com/most_viewed_articles/1/

http://www.articlebiz.com/commented_articles/1/

http://www.articlebiz.com/article_search/

http://www.articlebiz.com/submit_article/

http://www.articlebiz.com/author_tos/

http://www.articlebiz.com/rss_article_feeds/

http://www.articlebiz.com/publisher_tos/

http://www.articlebiz.com/article/102736-1-phulkari-art-of-punjab-a-novel-indian-craft/

http://www.articlebiz.com/article/98041-1-plus-size-denims-and-jeans/

http://www.articlebiz.com/article/94329-1-moroccan-bedrooms-create-your-harem-style-room/

http://www.articlebiz.com/article/93949-1-plus-size-jeans-for-women/

http://www.articlebiz.com/article/94872-1-wedding-dress-trains/

http://www.articlebiz.com/article/94878-1-wedding-gown-necklines/

http://www.articlebiz.com/article/94871-1-wedding-veils/

http://www.articlebiz.com/article/94870-1-wedding-headpieces/

http://www.articlebiz.com/article/89155-1-to-frame-or-not-to-frame-that-is-the-question/

http://www.articlebiz.com/article/91428-1-apply-correctly-make-up/

http://www.articlebiz.com/article/85574-1-defending-your-denim-how-to-keep-your-favorite-jeans-in-perfect-condition/

http://www.articlebiz.com/article/84445-1-crochet-tips-you-need-to-know/

http://www.articlebiz.com/article/78848-1-do-you-want-your-presents-to-stand-out/

http://www.articlebiz.com/article/76779-1-memory-quilts/

http://www.articlebiz.com/article/77512-1-silk-garments/

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=2

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=3

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=4

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=5

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=6

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=7

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=2

http://www.articlebiz.com/terms_of_service/

http://www.articlebiz.com/privacy_policy/

http://www.articlebiz.com/contact_us/

http://www.articlebiz.com/submit_article/

http://www.articlebiz.com/sign_in.jsp

http://www.ewebcounter.com/

 

And I want to get rid of any results that dont start with this:

 

http://www.articlebiz.com/article/

 

I tried this:

 

preg_match_all("/http:\/\/www.articlebiz.com\/article/\")

 

but it simplys returns this:

http://www.articlebiz.com/article/

 

for each url with that in it. Now how to I also inculde what is after that? I tried this:

//preg_match_all("/http:\/\/www.articlebiz.com\/article/([a-z0-9\.\"'\/:\-_?&=]+)i")), $new, $matches2);

but its not working. Im new to the preg crap any help?

 

 

Link to comment
https://forums.phpfreaks.com/topic/75536-solved-preg_match_all-fun/
Share on other sites

<?php

$str = <<<HEREDOC
http://www.articlebiz.com/article/76779-1-memory-quilts/
http://www.articlebiz.com/article/77512-1-silk-garments/
http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=2
HEREDOC;

preg_match_all("/(http:\/\/www\.articlebiz\.com\/article\/.*?)\n/i", $str, $matches);
print_r($matches[0]);

?>

 

 

Orio.

extending on Orio some more code

 

$str = <<<HEREDOC
http://www.articlebiz.com/article/76779-1-memory-quilts/
http://www.articlebiz.com/article/77512-1-silk-garments/
http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=2
HEREDOC;


preg_match_all("@http://www.articlebiz.com/article/([a-z0-9\.\"'\/:\-_?&=]+)@i", $str, $links);

print_r($links);

Ok,... Im not getting anything returned on that. Most likely cause something else is messed up in my script,.. could you guys take a look at it and see maybe where I went wrong?

 

<?php
$keyword = "embroidery";
$url1 = "http://www.articlebiz.com/browse.jsp?keywords=$keyword";
$userAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; DigExt; SV1; .NET CLR 2.0.50727; .NET CLR 1.1.4322)";

$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_URL,$url1);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$html = curl_exec($ch);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($ch);
echo "<br />cURL error:" . curl_error($ch);
exit;
}

$pattern = "/\<a class=l href=\"([a-z0-9\.\"'\/:\-_?&=]+)\"/i";

//for($i=0; $i<10000; $i++)
//{
  
 preg_match_all(("/(href[= = ])(.*?)(>)(.*?)(<\/a>+)/i"), $html, $matches);

foreach($matches[2] as $va){
$merge = array_merge($va);
foreach($merge as $new){
	$new2 = explode('"',$new);
	//echo "$new2[1]<br />";
	preg_match_all("/(http:\/\/www\.articlebiz\.com\/article\/.*?)\n/i", $new2[1], $matches2);
	print_r($matches2);
	//print_r($matches2);
	//foreach($matches2 as $new33){
		//echo "$new33<br />";
		//print_r($new33);
	//}

}
//$nomerge = explode('"',$merge);
//echo $nomerge[0];
}
//$merger = array_merge($va);

/*foreach($va as $link){
	echo "$link<br />";
}

} */   

//}

?>

here nice and simple

 

$keyword = "embroidery";
$url1 = "http://www.articlebiz.com/browse.jsp?keywords=$keyword";
$userAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; DigExt; SV1; .NET CLR 2.0.50727; .NET CLR 1.1.4322)";

$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_URL,$url1);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$html = curl_exec($ch);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($ch);
echo "<br />cURL error:" . curl_error($ch);
exit;
}




preg_match_all("@http://www.articlebiz.com/article/([a-z0-9\.\"'\/:\-_?&=]+)@i", $html, $links);

print_r($links);

 

hope that was what you wanted

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.