Jump to content

Parsing XML in MYSQL using php, Date issue


webmonkie

Recommended Posts

Help!

 

I am trying to parse an XML file into a MYSQL database and retreve it later on sorted by date.  So far I have

 

<?php

// connect to database

$link = dbConnect();
if (!$link) {
$ERR .= 'Unable to connect to database.<br>Please try again.<br>';
}
else { $MESG = 'Database Connection Established<br>'; }
if ($ERR) { echo "$ERR<br>"; }
else {
//echo "$MESG<br>";
}

$url = "this is the feed";


/******************************************* 
* Function cached_fopen_url(): 
* Caches data from a HTTP resource/url to a 
* local file (HTTP headers are stipped) 
* @returns a file resoruce / or FALSE if failure. 
**/

function stripfromtext($haystack, $bfstarttext, $endsection) { 
  $startpostext = $bfstarttext; 
  $startposlen = strlen($startpostext); 
  $startpos = strpos($haystack, $startpostext); 
  $endpostext = $endsection; 
  $endposlen = strlen($endpostext); 
  $endpos = strpos($haystack, $endpostext, $startpos); 
  return substr($haystack, $startpos + $startposlen, $endpos - ($startpos + $startposlen)); 
}

function delfromtext($haystack, $bfstarttext, $endsection) { 
  $startpostext = $bfstarttext; 
  $startposlen = strlen($startpostext);
  $totallen = strlen($haystack); 
  $startpos = strpos($haystack, $startpostext); 
  $endpostext = $endsection; 
  $endposlen = strlen($endpostext); 
  $endpos = strpos($haystack, $endpostext, $startpos); 
  $result = substr($haystack, 0, $startpos);
  $result .= substr($haystack, $endpos, $totallen);
  return $result;
}

function cached_fopen_url($url, $file_mode, $timeout_seconds = 300, $fsocket_timeout = 300) 
{ 

$debug = false; 

clearstatcache(); 

if ($debug) { 
print "local_cache creation_time =" . 
@filemtime($cache_filename) . 
" actual time = " . time() . 
" timeout = " . 
$timeout_seconds ."<p>"; 
} 

if ( ( @file_exists($cache_filename ) and ( ( @filemtime($cache_filename) + $timeout_seconds) > ( time() ) ) ) ) { 
// ok, file is already cached and young enouth 
if ($debug) { print "using cached file ($cache_filename) <p>";} 
} else { 

if ($debug) { print "cacheing file ($url) to local ($cache_filename)<p>";} 

$urlParts = parse_url($url); 
$host = $urlParts['host']; 
$port = (isset($urlParts['port'])) ? $urlParts['port'] : 80; 

if( !$fp = @fsockopen( $host, $port, $errno, $errstr, $fsocket_timeout )) { 
// Server not responding 

} else { 

if( !fputs( $fp, "GET $url HTTP/1.0\r\nHost:$host\r\n\r\n" )) { 
die( "unable to send get request" ); 
} 

$data = null; 
stream_set_timeout($fp, $fsocket_timeout); 
$status = socket_get_status($fp); 
while( !feof($fp) && !$status['timed_out']) 
{ 
$data .= fgets ($fp,8192); 
$status = socket_get_status($fp); 
} 
fclose ($fp); 

// strip headers 
$sData = split("\r\n\r\n", $data, 2); 
$data = $sData[1]; 

} 
} 

// ok, point to (fresh) cached file 
if ( @file_exists($cache_filename )) { 
$handle = fopen($cache_filename, $file_mode); 
return $handle; 
} 

return $data; 
}

$xml = cached_fopen_url($url,"r");

//echo "$xml";

$articlecount = stripfromtext($xml, 'ArticleCount="', '">');

//echo "Articlecount: $articlecount";

$newsdata = array();
$listitems = "";

for ($i=0;$i<$articlecount;$i++) {

$article = stripfromtext($xml, '<Article', 'Article>');

$created = stripfromtext($article, 'Created="', '" ID');

$newsdata[$i]['created'] = $created;

//echo "Created: $created<br />";

$category = stripfromtext($article, '<Categories>', '</Categories>');

$newsdata[$i]['category'] = $category;

//echo "Category: $category<br />";

$heading = stripfromtext($article, '<Heading>', '</Heading>');
$heading = mysql_real_escape_string($heading);

$newsdata[$i]['heading'] = $heading;

//echo "Heading: $heading<br />";
$date = stripfromtext($article, '<Date>', '</Date>'); 
$newsdata[$i]['date'] = date("Y-m-d",strtotime($date));     

echo "Date: $date<br />";          

$contents = stripfromtext($article, '<Contents>', '</Contents>');

$contents = delfromtext($contents, '<img', '/>');

$contents = str_replace("<![CDATA[", "", $contents);

$contents = str_replace("/>]]>", "", $contents);

$contents = mysql_real_escape_string($contents);

$newsdata[$i]['contents'] = $contents;

//echo "Contents: $contents";

$xml = (delfromtext($xml, '<Article', 'Article>'));

//$listitems .= "<li><a href=\"index.php?section=news&subsection=showarticle&id=$i\" class=\"adfero\">$date <strong>$heading</strong></a></li>";

}

//print_r($newsdata);

$added = 0;

foreach ($newsdata as $key => $val) {

$heading = $val['heading'];

$sql = "SELECT id FROM feed WHERE heading = '$heading'";
$result = mysql_query($sql, $link) or die ((mysql_error()) . ($sql));
$count = mysql_num_rows($result);
//echo "$sql<br>";
//echo "count $count<br>";

if ($count<1) {

$category = $val['category'];
$created = $val['created'];
$time  = strtotime($created);
$content = $val['contents'];
$added_date= strtotime($date);

$sql = "INSERT INTO feed (category, created, heading, content, added_date) 
VALUES ('$category', FROM_UNIXTIME($time), '$heading', '$content', '$date')";  

//echo "$sql<br>";
mysql_query($sql, $link) or die ((mysql_error()) . ($sql));

$added++;

}

}

//echo "added $added";

?>

 

and the xml

 

<?xml version="1.0" encoding="Windows-1252"?>
<InfoStreamResults Copyright="(c) 2006 Adfero Ltd." ArticleCount="20">
  <Article Created="17:10:25" ID="18134274">
    <Heading>Metering and affordability should be linked </Heading>
    <Date>30/04/2007</Date>
    <Contents><![CDATA[Title. <br/><br/>This is the data. <img alt="ADNFCR-999-ID-18134274-ADNFCR" src="http://this is the feed" />]]></Contents>
    <Categories>
      <Category ID="438008116">Utilities</Category>
    </Categories>
  </Article>

<?php
function dmy2ISO ($dt)
{
    list($d, $m, $y) = explode('/', $dt);
    return "$y-$m-$d";
}

$xml = simplexml_load_file('my.xml');

foreach ($xml->Article as $art)
{
    $id = $art['ID'];
    $heading = (string)$art->Heading;
    $contents = (string)$art->Contents;
    $date = dmy2ISO((string)$art->Date);

    foreach ($art->Categories as $cat)
        $cats[] = (string)$cat->Category['ID'];
        
    echo "$id | $date | $heading | $contents | <br>";
    echo '<pre>', print_r($cats, true), '</pre>';
}
?>

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.