Jump to content

help!! my xml parser can't read ""


randomsai

Recommended Posts

as the title says, my rss/xml parser cannot read any tags of my feed where there are quotations involved. the code will be shown below, BTW the url where the xml file is generated is from my database, but this is my primary website http://www.newsday.co.tt/rss.xml:

 

 

<?PHP

HEADER('content-type: text/plain');

  $f=0;

// define hooks to rss_parser class as xml functions do not allow object methods as handlers.

FUNCTION rss_start_element($parser, $name, $attributes) {

  GLOBAL $rss;

  $rss->start_element($parser, $name, $attributes);

}

 

FUNCTION rss_end_element($parser, $name) {

  GLOBAL $rss;

  $rss->end_element($parser, $name);

}

 

FUNCTION rss_character_data($parser, $data) {

  GLOBAL $rss;

  $rss->character_data($parser, $data);

}

 

 

CLASS rss_parser {

 

// constructor. setup parser options and handlers.

FUNCTION rss_parser() {

 

 

  $this->error = '';

  $this->file = '';

 

  $this->channel = ARRAY();

  $this->data = '';

  $this->stack = ARRAY();

  $this->num_items = 0;

 

  $this->xml_parser = XML_PARSER_CREATE();

  XML_SET_ELEMENT_HANDLER($this->xml_parser, "rss_start_element", "rss_end_element");

  XML_SET_CHARACTER_DATA_HANDLER($this->xml_parser, "rss_character_data");

}

 

FUNCTION character_data($parser, $data) {

  IF (EMPTY($this->data)) $this->data = TRIM($data);

  ELSE $this->data .= ' '.TRIM($data);             

}

 

FUNCTION start_element($parser, $name, $attrs) {

  SWITCH($name) {

    CASE 'RSS':

      BREAK;

 

    CASE 'CHANNEL':

      BREAK;

 

 

    CASE 'ITEM':

      ARRAY_PUSH($this->stack, $name);

      ARRAY_PUSH($this->stack, $this->num_items); // push item index.

      $this->item[$this->num_items] = ARRAY();

      $this->num_items++;

      BREAK;

 

    CASE 'TEXTINPUT':

      ARRAY_PUSH($this->stack, $name);

      BREAK;

 

    DEFAULT:

      ARRAY_PUSH($this->stack, $name);

      BREAK;

 

  } 

}

 

FUNCTION end_element($parser, $name) {

  SWITCH ($name) {

    CASE 'RSS':

      BREAK;

 

    CASE 'CHANNEL':

      BREAK;

 

 

    CASE 'ITEM':

      ARRAY_POP($this->stack);

      ARRAY_POP($this->stack);

      BREAK;

 

    CASE 'TEXTINPUT':

      ARRAY_POP($this->stack);

      BREAK;

 

    DEFAULT: // child element.

      $element = (IMPLODE("']['",$this->stack));     

      EVAL("\$this->channel['$element']=\$this->data;"); // this does all the hard work.

      ARRAY_POP($this->stack);

      $this->data = '';

      BREAK;

     

  }

}

 

 

 

FUNCTION parse() {

 

  IF (!($fp = @FOPEN($this->file, "r"))) {

    $this->error = "Could not open RSS source \"$this->file\".";

    RETURN FALSE;

  }

  WHILE ($data = FREAD($fp, 4096)) {

    IF (!XML_PARSE($this->xml_parser, $data, FEOF($fp))) {

      $this->error = SPRINTF("XML error: %s at line %d.",

        XML_ERROR_STRING(XML_GET_ERROR_CODE($this->xml_parser)),

        XML_GET_CURRENT_LINE_NUMBER($this->xml_parser));

      RETURN FALSE;

    }

  }

  XML_PARSER_FREE($this->xml_parser);

  RETURN TRUE;

}

 

}

$con1 = mysql_connect("localhost", "root", "") or die(mysql_error());

mysql_select_db("sources") or die(mysql_error());

$data = mysql_query("SELECT * FROM news")

or die(mysql_error());

mysql_close($con1);

while ($info = mysql_fetch_array( $data ))

$url = $info['URL'];

 

 

  if ( @fopen( $url, 'r' ) ) 

$rss = NEW rss_parser();

$rss->file = "$url";

$rss->parse() or DIE($rss->error);

IF ($rss->error) PRINT $rss->error;

PRINT_R($rss->channel);

 

}

Link to comment
Share on other sites

two things...first feed dosn't validate, that will break the parser...

 

http://validator.w3.org/feed/check.cgi?url=http%3A%2F%2Fwww.newsday.co.tt%2Frss.xml

Sorry

 

This feed does not validate.

 

    *

 

      'ascii' codec can't decode byte 0x93 in position 1801: ordinal not in range(128) (maybe a high-bit character?) [help]

 

 

In addition, interoperability with the widest range of feed readers could be improved by implementing the following recommendations.

 

    *

 

      line 38, column 170: description contains bad characters (7 occurrences) [help]

 

          ... g to any of the floors in the building. ?Nobody is allowed to go upstair ...

                                                      ^

 

    *

 

      line 67, column 0: Missing atom:link with rel="self" [help]

 

 

and secondly, why not parse it the easy way...

  $xml_feed="http://www.newsday.co.tt/rss.xml";
  
  if(!$xml = simplexml_load_file("$xml_feed")){
    $message.="Can't connect to feed $xml_feed";
  }
  foreach ($xml->channel->item as $value){
      $title=$value->title;
      $description=$value->description;
      echo"<p>$title</p>\n";
      echo"<p>$description</p>\n";
      
  }

 

Link to comment
Share on other sites

This thread is more than a year old. Please don't revive it unless you have something important to add.

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.