randomsai Posted March 2, 2010 Share Posted March 2, 2010 as the title says, my rss/xml parser cannot read any tags of my feed where there are quotations involved. the code will be shown below, BTW the url where the xml file is generated is from my database, but this is my primary website http://www.newsday.co.tt/rss.xml: <?PHP HEADER('content-type: text/plain'); $f=0; // define hooks to rss_parser class as xml functions do not allow object methods as handlers. FUNCTION rss_start_element($parser, $name, $attributes) { GLOBAL $rss; $rss->start_element($parser, $name, $attributes); } FUNCTION rss_end_element($parser, $name) { GLOBAL $rss; $rss->end_element($parser, $name); } FUNCTION rss_character_data($parser, $data) { GLOBAL $rss; $rss->character_data($parser, $data); } CLASS rss_parser { // constructor. setup parser options and handlers. FUNCTION rss_parser() { $this->error = ''; $this->file = ''; $this->channel = ARRAY(); $this->data = ''; $this->stack = ARRAY(); $this->num_items = 0; $this->xml_parser = XML_PARSER_CREATE(); XML_SET_ELEMENT_HANDLER($this->xml_parser, "rss_start_element", "rss_end_element"); XML_SET_CHARACTER_DATA_HANDLER($this->xml_parser, "rss_character_data"); } FUNCTION character_data($parser, $data) { IF (EMPTY($this->data)) $this->data = TRIM($data); ELSE $this->data .= ' '.TRIM($data); } FUNCTION start_element($parser, $name, $attrs) { SWITCH($name) { CASE 'RSS': BREAK; CASE 'CHANNEL': BREAK; CASE 'ITEM': ARRAY_PUSH($this->stack, $name); ARRAY_PUSH($this->stack, $this->num_items); // push item index. $this->item[$this->num_items] = ARRAY(); $this->num_items++; BREAK; CASE 'TEXTINPUT': ARRAY_PUSH($this->stack, $name); BREAK; DEFAULT: ARRAY_PUSH($this->stack, $name); BREAK; } } FUNCTION end_element($parser, $name) { SWITCH ($name) { CASE 'RSS': BREAK; CASE 'CHANNEL': BREAK; CASE 'ITEM': ARRAY_POP($this->stack); ARRAY_POP($this->stack); BREAK; CASE 'TEXTINPUT': ARRAY_POP($this->stack); BREAK; DEFAULT: // child element. $element = (IMPLODE("']['",$this->stack)); EVAL("\$this->channel['$element']=\$this->data;"); // this does all the hard work. ARRAY_POP($this->stack); $this->data = ''; BREAK; } } FUNCTION parse() { IF (!($fp = @FOPEN($this->file, "r"))) { $this->error = "Could not open RSS source \"$this->file\"."; RETURN FALSE; } WHILE ($data = FREAD($fp, 4096)) { IF (!XML_PARSE($this->xml_parser, $data, FEOF($fp))) { $this->error = SPRINTF("XML error: %s at line %d.", XML_ERROR_STRING(XML_GET_ERROR_CODE($this->xml_parser)), XML_GET_CURRENT_LINE_NUMBER($this->xml_parser)); RETURN FALSE; } } XML_PARSER_FREE($this->xml_parser); RETURN TRUE; } } $con1 = mysql_connect("localhost", "root", "") or die(mysql_error()); mysql_select_db("sources") or die(mysql_error()); $data = mysql_query("SELECT * FROM news") or die(mysql_error()); mysql_close($con1); while ($info = mysql_fetch_array( $data )) { $url = $info['URL']; if ( @fopen( $url, 'r' ) ) { $rss = NEW rss_parser(); $rss->file = "$url"; $rss->parse() or DIE($rss->error); IF ($rss->error) PRINT $rss->error; PRINT_R($rss->channel); } Quote Link to comment https://forums.phpfreaks.com/topic/193866-help-my-xml-parser-cant-read/ Share on other sites More sharing options...
ToonMariner Posted March 2, 2010 Share Posted March 2, 2010 try using cdata tags around each string of content... Quote Link to comment https://forums.phpfreaks.com/topic/193866-help-my-xml-parser-cant-read/#findComment-1020314 Share on other sites More sharing options...
harristweed Posted March 2, 2010 Share Posted March 2, 2010 two things...first feed dosn't validate, that will break the parser... http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Fwww.newsday.co.tt%2Frss.xml Sorry This feed does not validate. * 'ascii' codec can't decode byte 0x93 in position 1801: ordinal not in range(128) (maybe a high-bit character?) [help] In addition, interoperability with the widest range of feed readers could be improved by implementing the following recommendations. * line 38, column 170: description contains bad characters (7 occurrences) [help] ... g to any of the floors in the building. ?Nobody is allowed to go upstair ... ^ * line 67, column 0: Missing atom:link with rel="self" [help] and secondly, why not parse it the easy way... $xml_feed="http://www.newsday.co.tt/rss.xml"; if(!$xml = simplexml_load_file("$xml_feed")){ $message.="Can't connect to feed $xml_feed"; } foreach ($xml->channel->item as $value){ $title=$value->title; $description=$value->description; echo"<p>$title</p>\n"; echo"<p>$description</p>\n"; } Quote Link to comment https://forums.phpfreaks.com/topic/193866-help-my-xml-parser-cant-read/#findComment-1020362 Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.