I was playing with a similar approach that didn't use regex except to extract the GUID status. Barands is a bit cleaner.
function get_pb_data()
{
$pb_data = array();
$base_url = 'http://74.80.133.251/7778/';
// Retrieve main page
$base_page = file_get_contents($base_url);
// Exit and show error if couldn't be retrieved
if ( ! $base_page)
{
exit('Could not retrieve main page: ' . $base_page);
}
// These are substring items that will be removed from each line of text of the HTML
$items_to_remove = array(
"\r", // hidden return chars (if present)
'<p> ', // <p> tags with trailing space (note no closing </p>'s in src)
'"', // Quotes around username
'[', // Bracket around date/time
']', // Bracket around date/time
'(W) GUID=' // (W), don't know if needed, and GUID= text
);
// Remove the items from the raw page text
$base_page = str_replace($items_to_remove, '', $base_page);
// Create an array for each line
$lines = explode("\n", $base_page);
// Cycle through the lines, format the data and store it in a new array
foreach($lines as $line)
{
// Remove the anchor tag from the line to isolate the punkbuster id ($pb_id)
$line = strip_tags($line);
if (substr_count($line, ' ') == 4)
{
// Grab the fields by exploding on spaces
list($pb_id, $username, $guid, $date, $time) = explode(' ', $line);
// Grab the "status" string from the GUID line within ()
preg_match('/\((.*?)\)/', $guid, $guid_status);
// Remove the status string from the GUID line to isolate GUID
$guid = str_replace($guid_status[0], '', $guid);
// Replace dots with dashes in the date for a mysql valid format
$date = str_replace('.', '-', $date);
// Store the formatted user data. Normally would go in a db or something...
$pb_data[] = array(
'id' => $pb_id,
'username' => $username,
'guid' => $guid,
'guid_status' => $guid_status[1],
'image_source' => $base_url . 'pb' . $pb_id . '.png',
'date' => $date,
'time' => $time,
'datetime' => $date . ' ' . $time
);
}
}
return $pb_data;
}
output:
$data = get_pb_data();
echo '<pre>';
print_r($data);
--------------
Array
(
[0] => Array
(
[id] => 001646
[username] => -=D3G=-RotGM
[guid] => 00000000000000076561198133386615
[guid_status] => VALID
[image_source] => http://74.80.133.251/7778/pb001646.png
[date] => 2015-06-25
[time] => 17:20:36
[datetime] => 2015-06-25 17:20:36
)
[1] => Array
(
[id] => 001647
[username] => -=D3G=-Icey842
[guid] => 00000000000000076561198091035675
[guid_status] => VALID
[image_source] => http://74.80.133.251/7778/pb001647.png
[date] => 2015-06-25
[time] => 17:22:18
[datetime] => 2015-06-25 17:22:18
)
[2] => Array
(
[id] => 001648
[username] => budsanonymous
[guid] => 00000000000000076561198188792511
[guid_status] => VALID
[image_source] => http://74.80.133.251/7778/pb001648.png
[date] => 2015-06-25
[time] => 17:23:28
[datetime] => 2015-06-25 17:23:28
)
[3] => Array
(
[id] => 001649
[username] => -=D3G=-Roosevelt
[guid] => 00000000000000076561198161436214
[guid_status] => VALID
[image_source] => http://74.80.133.251/7778/pb001649.png
[date] => 2015-06-25
[time] => 17:23:48
[datetime] => 2015-06-25 17:23:48
)
[4] => Array
(
[id] => 001650
[username] => -=D3G=-RotGM
[guid] => 00000000000000076561198133386615
[guid_status] => VALID
[image_source] => http://74.80.133.251/7778/pb001650.png
[date] => 2015-06-25
[time] => 17:26:12
[datetime] => 2015-06-25 17:26:12
)
)