Jump to content

[SOLVED] preg_match_all fun!!


piznac

Recommended Posts

Say I have a list of urls, and there is a certain pattern example:

 

http://www.articlebiz.com/

http://www.articlebiz.com/

http://www.articlebiz.com/featured_articles/1/

http://www.articlebiz.com/recently_added_articles/1/

http://www.articlebiz.com/most_viewed_articles/1/

http://www.articlebiz.com/commented_articles/1/

http://www.articlebiz.com/article_search/

http://www.articlebiz.com/submit_article/

http://www.articlebiz.com/author_tos/

http://www.articlebiz.com/rss_article_feeds/

http://www.articlebiz.com/publisher_tos/

http://www.articlebiz.com/article/102736-1-phulkari-art-of-punjab-a-novel-indian-craft/

http://www.articlebiz.com/article/98041-1-plus-size-denims-and-jeans/

http://www.articlebiz.com/article/94329-1-moroccan-bedrooms-create-your-harem-style-room/

http://www.articlebiz.com/article/93949-1-plus-size-jeans-for-women/

http://www.articlebiz.com/article/94872-1-wedding-dress-trains/

http://www.articlebiz.com/article/94878-1-wedding-gown-necklines/

http://www.articlebiz.com/article/94871-1-wedding-veils/

http://www.articlebiz.com/article/94870-1-wedding-headpieces/

http://www.articlebiz.com/article/89155-1-to-frame-or-not-to-frame-that-is-the-question/

http://www.articlebiz.com/article/91428-1-apply-correctly-make-up/

http://www.articlebiz.com/article/85574-1-defending-your-denim-how-to-keep-your-favorite-jeans-in-perfect-condition/

http://www.articlebiz.com/article/84445-1-crochet-tips-you-need-to-know/

http://www.articlebiz.com/article/78848-1-do-you-want-your-presents-to-stand-out/

http://www.articlebiz.com/article/76779-1-memory-quilts/

http://www.articlebiz.com/article/77512-1-silk-garments/

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=2

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=3

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=4

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=5

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=6

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=7

http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=2

http://www.articlebiz.com/terms_of_service/

http://www.articlebiz.com/privacy_policy/

http://www.articlebiz.com/contact_us/

http://www.articlebiz.com/submit_article/

http://www.articlebiz.com/sign_in.jsp

http://www.ewebcounter.com/

 

And I want to get rid of any results that dont start with this:

 

http://www.articlebiz.com/article/

 

I tried this:

 

preg_match_all("/http:\/\/www.articlebiz.com\/article/\")

 

but it simplys returns this:

http://www.articlebiz.com/article/

 

for each url with that in it. Now how to I also inculde what is after that? I tried this:

//preg_match_all("/http:\/\/www.articlebiz.com\/article/([a-z0-9\.\"'\/:\-_?&=]+)i")), $new, $matches2);

but its not working. Im new to the preg crap any help?

 

 

Link to comment
Share on other sites

<?php

$str = <<<HEREDOC
http://www.articlebiz.com/article/76779-1-memory-quilts/
http://www.articlebiz.com/article/77512-1-silk-garments/
http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=2
HEREDOC;

preg_match_all("/(http:\/\/www\.articlebiz\.com\/article\/.*?)\n/i", $str, $matches);
print_r($matches[0]);

?>

 

 

Orio.

Link to comment
Share on other sites

extending on Orio some more code

 

$str = <<<HEREDOC
http://www.articlebiz.com/article/76779-1-memory-quilts/
http://www.articlebiz.com/article/77512-1-silk-garments/
http://www.articlebiz.com/browse.jsp?keywords=embroidery&index=2
HEREDOC;


preg_match_all("@http://www.articlebiz.com/article/([a-z0-9\.\"'\/:\-_?&=]+)@i", $str, $links);

print_r($links);

Link to comment
Share on other sites

Ok,... Im not getting anything returned on that. Most likely cause something else is messed up in my script,.. could you guys take a look at it and see maybe where I went wrong?

 

<?php
$keyword = "embroidery";
$url1 = "http://www.articlebiz.com/browse.jsp?keywords=$keyword";
$userAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; DigExt; SV1; .NET CLR 2.0.50727; .NET CLR 1.1.4322)";

$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_URL,$url1);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$html = curl_exec($ch);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($ch);
echo "<br />cURL error:" . curl_error($ch);
exit;
}

$pattern = "/\<a class=l href=\"([a-z0-9\.\"'\/:\-_?&=]+)\"/i";

//for($i=0; $i<10000; $i++)
//{
  
 preg_match_all(("/(href[= = ])(.*?)(>)(.*?)(<\/a>+)/i"), $html, $matches);

foreach($matches[2] as $va){
$merge = array_merge($va);
foreach($merge as $new){
	$new2 = explode('"',$new);
	//echo "$new2[1]<br />";
	preg_match_all("/(http:\/\/www\.articlebiz\.com\/article\/.*?)\n/i", $new2[1], $matches2);
	print_r($matches2);
	//print_r($matches2);
	//foreach($matches2 as $new33){
		//echo "$new33<br />";
		//print_r($new33);
	//}

}
//$nomerge = explode('"',$merge);
//echo $nomerge[0];
}
//$merger = array_merge($va);

/*foreach($va as $link){
	echo "$link<br />";
}

} */   

//}

?>

Link to comment
Share on other sites

here nice and simple

 

$keyword = "embroidery";
$url1 = "http://www.articlebiz.com/browse.jsp?keywords=$keyword";
$userAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; DigExt; SV1; .NET CLR 2.0.50727; .NET CLR 1.1.4322)";

$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_URL,$url1);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$html = curl_exec($ch);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($ch);
echo "<br />cURL error:" . curl_error($ch);
exit;
}




preg_match_all("@http://www.articlebiz.com/article/([a-z0-9\.\"'\/:\-_?&=]+)@i", $html, $links);

print_r($links);

 

hope that was what you wanted

Link to comment
Share on other sites

This thread is more than a year old. Please don't revive it unless you have something important to add.

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.