Jump to content

PHP RSS aggregator theory


keeps21

Recommended Posts

Just thinking about creating a website to take in multiple rss feeds, merging the data into an array and then outputting the latest 10 or so.

 

Pseudocode

# Initialise feedArray
$feedArray = array();

foreach feed :
    # Grab and read feed 
    
    # Add feed data into $feedArray - ie title, link, description, date

endforeach;

# sort feed array

# output
foreach item in feedarray :
    #output data
endforeach;

 

Does that make sense?

 

Link to comment
https://forums.phpfreaks.com/topic/161361-php-rss-aggregator-theory/
Share on other sites

This is the code I've come up with - and it seems to be working alright for me.

 

Can anyone make any suggestions as to improvements - especially with  regards to the way I'm handling the date.

 

Cheers

 

Here's the code.

 

<?php

// Convert date
function get_date($date) {

// Date is in the format Mon, 08 Jun 2009
$str = $date;

$strArray = explode(' ', $str);
array_shift($strArray);
array_pop($strArray);

switch ($strArray[1]) {
	case  'Jan':
		$strArray[1] = '01';
	break;

	case  'Feb':
		$strArray[1] = '02';
	break;

	case  'Mar':
		$strArray[1] = '03';
	break;

	case  'Apr':
		$strArray[1] = '04';
	break;

	case  'May':
		$strArray[1] = '05';
	break;

	case  'Jun':
		$strArray[1] = '06';
	break;

	case  'Jul':
		$strArray[1] = '07';
	break;

	case  'Aug':
		$strArray[1] = '08';
	break;

	case  'Sep':
		$strArray[1] = '09';
	break;

	case  'Oct':
		$strArray[1] = '10';
	break;

	case  'Nov':
		$strArray[1] = '11';
	break;

	case  'Dec':
		$strArray[1] = '12';
	break;

	default:
	break;
}
// re-form date
$date = $strArray[2].'-'.$strArray[1].'-'.$strArray[0].' '.$strArray[3];
return $date;
}

// Parse feed
function parse_feed($feed='') {

	$rss =  simplexml_load_file($feed);

	if ($rss) { // Feed is valid and well formed

		$newsfeed = array();
		$i=0;

		foreach ($rss->channel->item as $item) {
			$newsfeed[$i]['title'] = $item->title;
			$newsfeed[$i]['pubDate'] = get_date($item->pubDate);
			$newsfeed[$i]['description'] = $item->description;
			$newsfeed[$i]['link'] = $item->link;
		$i++;
		}

		return $newsfeed;
}	
}

// Feeds to parse
$google = parse_feed('http://news.google.co.uk/news?um=1&ned=uk&hl=en&q=football&output=rss');
$bbc = parse_feed('http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml');

// Array to store feed data in
$merged = array();

// Add feed 1 data to array
foreach($google as $data) :
$merged[] = $data;
endforeach;

// Add feed 2 data to array
foreach($bbc as $data) :
$merged[] = $data;
endforeach;


// Sort the data with volume descending, edition ascending
// Add $data as the last parameter, to sort by the common key
foreach ($merged as $key => $row) {
$pubdate[$key]  = $row['pubDate'];
}

array_multisort($pubdate, SORT_DESC, SORT_STRING, $merged);

// Output array
foreach($merged as $m) :
echo $m['pubDate'] . ' - ' . $m['title'] . '<br />';
endforeach;

Now amended to show items in groups.

 

  • Last Hour
  • 1-2 Hours Old
  • 2-4 Hours Old
  • Over 4 Hours Old

 

Code is shown below - I'd be very grateful for any suggestions,improvements or advice.  :)

 

<?php
// Convert date
function get_date($date)
{
    return date('Y-m-d H:i:s', strtotime($date));
} 

// Convert date to timestamp
function convert_to_timestamp($date)
{
    return strtotime($date);
} 

// Parse feed
function parse_feed($feed='') {

	$rss =  simplexml_load_file($feed);

	if ($rss) { // Feed is valid and well formed

		$newsfeed = array();
		$i=0;

		foreach ($rss->channel->item as $item) {
			$newsfeed[$i]['title'] = $item->title;
			$newsfeed[$i]['pubDate'] = get_date($item->pubDate);
			$newsfeed[$i]['timestamp'] = convert_to_timestamp($item->pubDate);
			$newsfeed[$i]['description'] = $item->description;
			$newsfeed[$i]['link'] = $item->link;
		$i++;
		}

		return $newsfeed;
}	
}

// Feeds to parse
$google = parse_feed('http://news.google.co.uk/news?um=1&ned=uk&hl=en&q=football&output=rss');
$bbc = parse_feed('http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml');

// Array to store feed data in
$merged = array();

// Add feed 1 data to array
foreach($google as $data) :
$merged[] = $data;
endforeach;

// Add feed 2 data to array
foreach($bbc as $data) :
$merged[] = $data;
endforeach;


// Sort the data with volume descending, edition ascending
// Add $data as the last parameter, to sort by the common key
foreach ($merged as $key => $row) {
$pubdate[$key]  = $row['pubDate'];
}

array_multisort($pubdate, SORT_DESC, SORT_STRING, $merged);

// Output array
foreach($merged as $m) :
if (time() - $m['timestamp'] <= 3600) { // Last Hour
	if ($last != 1) {
		echo '<h1>Last Hour</h1>';
		$last = 1;
	}
	echo $m['pubDate'] . ' - ' . $m['title'] . '<br />';

} elseif (time() - $m['timestamp'] <= 7200 && time() - $m['timestamp'] > 3600 ) { // between 1 and 2 hours
	if ($onetotwo != 1) {
		echo '<h1>1-2 Hours Old</h1>';
		$onetotwo = 1;
	}		
	echo $m['pubDate'] . ' - ' . $m['title'] . '<br />';

} elseif (time() - $m['timestamp'] <= 14400 && time() - $m['timestamp'] > 7200 ) { // between 2 and 4 hours
	if ($twotofour != 1) {
		echo '<h1>2-4 Hours Old</h1>';
		$twotofour = 1;
	}
	echo $m['pubDate'] . ' - ' . $m['title'] . '<br />';
} else { // over 4 hours old
	if ($overfour != 1) {
		echo '<h1>Over 4 Hours Old</h1>';
		$overfour = 1;
	}
	echo $m['pubDate'] . ' - ' . $m['title'] . '<br />';
}
endforeach;

 

Further amendments.

 

<?php
# Set default timezone
date_default_timezone_set('Europe/London');
ini_set('display_errors', 1);
error_reporting(E_ALL|E_STRICT);

// Convert date
function get_date($date)
{
    return date('Y-m-d H:i:s', strtotime($date));
} 

// Convert date to timestamp
function convert_to_timestamp($date)
{
    return strtotime($date);
} 

// Parse feed
function parse_feed($feed) {

$rss =  simplexml_load_file($feed);

if ($rss) { // Feed is valid and well formed

	$newsfeed = array();
	$i=0;

	foreach ($rss->channel->item as $item) {
		$newsfeed[$i]['title'] = $item->title;
		$newsfeed[$i]['pubDate'] = get_date($item->pubDate);
		$newsfeed[$i]['timestamp'] = convert_to_timestamp($item->pubDate);
		$newsfeed[$i]['description'] = $item->description;
		$newsfeed[$i]['link'] = $item->link;
	$i++;
	}

	return $newsfeed;
}	
}

// $feeds array will be populated from the database in the future
$feeds = array('google' => 'http://news.google.co.uk/news?um=1&ned=uk&hl=en&q=football&output=rss',
			'bbc' => 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'
		);

$merged = array();

foreach ($feeds as $feed) :
$fe = parse_feed($feed);
foreach ($fe as $f):
	$merged[] = $f;
endforeach;	
endforeach;					

// Sort the data with volume descending, edition ascending
// Add $data as the last parameter, to sort by the common key
foreach ($merged as $key => $row) {
$pubdate[$key]  = $row['pubDate'];
}

array_multisort($pubdate, SORT_DESC, SORT_STRING, $merged);

// Output array
foreach($merged as $m) :
if (time() - $m['timestamp'] <= 3600) { // Last Hour
	if (!isset($last)) {
		echo '<h1>Last Hour</h1>';
		$last = 1;
	}
	echo date( 'd-m-Y H:i', $m['timestamp']) . ' - ' . $m['title'] . '<br />';

} elseif (time() - $m['timestamp'] <= 7200 && time() - $m['timestamp'] > 3600 ) { // between 1 and 2 hours
	if (!isset($onetotwo)) {
		echo '<h1>1-2 Hours Old</h1>';
		$onetotwo = 1;
	}		
	echo date( 'd-m-Y H:i', $m['timestamp']) . ' - ' . $m['title'] . '<br />';

} elseif (time() - $m['timestamp'] <= 14400 && time() - $m['timestamp'] > 7200 ) { // between 2 and 4 hours
	if (!isset($twotofour)) {
		echo '<h1>2-4 Hours Old</h1>';
		$twotofour = 1;
	}
	echo date( 'd-m-Y H:i', $m['timestamp']) . ' - ' . $m['title'] . '<br />';

} else { // over 4 hours old
	if (!isset($overfour)) {
		echo '<h1>Over 4 Hours Old</h1>';
		$overfour = 1;
	}
	echo date( 'd-m-Y H:i', $m['timestamp']) . ' - ' . $m['title'] . '<br />';
}
endforeach;

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.