thetxt Posted June 24, 2007 Share Posted June 24, 2007 I'm trying to extract information from a raw e-mail. I'm using a parseMail() class to parse out all of the header information and content information. I am having trouble figuring out the best way to get the e-mail body. JUST the text of the e-mail. Any suggestions? Quote Link to comment Share on other sites More sharing options...
cooldude832 Posted June 24, 2007 Share Posted June 24, 2007 Where are the messages stored? Quote Link to comment Share on other sites More sharing options...
thetxt Posted June 24, 2007 Author Share Posted June 24, 2007 I have a catch all script that parses the mail and sorts it into my mysql database accordingly. I've been doing some testing just grabbing the mail from a text file. Here is the class I am using to parse everything else. I've tried different ways to modify it to grab the message body as well but no luck. <?php // parse an incoming mail // Version 0.5, 2005/03/16 // Copyright (c) Frank Rust, TU Braunschweig (f.rust@tu-bs.de) // // This code is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This code is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // Since this is a very short Program the GNU General Public License // is not included. Please find it on the website of the Open Software // Foundation at // http://www.fsf.org/licensing/licenses/lgpl.txt // or write to the Free Software Foundation, Inc., 59 Temple Place, // Suite 330, Boston, MA 02111-1307 USA class parseMail { var $from=""; var $to=""; var $subject=""; var $received=""; var $date=""; var $message_id=""; var $content_type=""; var $part =array(); // decode a mail header function parseMail($text="") { $start=0; $lastheader=""; while (true) { $end=strpos($text,"\n",$start); $line=substr($text,$start,$end-$start); $start=$end+1; if ($line=="") break; // end of headers! if (substr($line,0,1)=="\t") { $$last.="\n".$line; } if (preg_match("/^(From:)\s*(.*)$/",$line,$matches)) { $last="from"; $$last=$matches[2]; } if (preg_match("/^(Received:)\s*(.*)$/",$line,$matches)) { $last="received"; $$last=$matches[2]; } if (preg_match("/^(To:)\s*(.*)$/",$line,$matches)) { $last="to"; $$last=$matches[2]; } if (preg_match("/^(Subject:)\s*(.*)$/",$line,$matches)) { $last="subject"; $$last=$matches[2]; } if (preg_match("/^(Date:)\s*(.*)$/",$line,$matches)) { $last="date"; $$last=$matches[2]; } if (preg_match("/^(Content-Type:)\s*(.*)$/",$line,$matches)) { $last="content_type"; $$last=$matches[2]; } if (preg_match("/^(Message-Id:)\s*(.*)$/",$line,$matches)) { $last="message_id"; $$last=$matches[2]; } } $this->from=$from; $this->received=$received; $this->to=$to; $this->subject=$subject; $this->date=$date; $this->content_type=$content_type; $this->message_id=$message_id; if (preg_match("/^multipart\/mixed;/",$content_type)) { $b=strpos($content_type,"boundary="); $boundary=substr($content_type,$b+strlen("boundary=")); $boundary=substr($boundary,1,strlen($boundary)-2); $this->multipartSplit($boundary,substr($text,$start)); } else { $this->part[0]['Content-Type']=$content_type; $this->part[0]['content']=substr($text,$start); } } // decode a multipart header function multipartHeaders($partid,$mailbody) { $text=substr($mailbody,$this->part[$partid]['start'], $this->part[$partid]['ende']-$this->part[$partid]['start']); $start=0; $lastheader=""; while (true) { $end=strpos($text,"\n",$start); $line=substr($text,$start,$end-$start); $start=$end+1; if ($line=="") break; // end of headers! if (substr($line,0,1)=="\t") { $$last.="\n".$line; } if (preg_match("/^(Content-Type:)\s*(.*)$/",$line,$matches)) { $last="c_t"; $$last=$matches[2]; } if (preg_match("/^(Content-Transfer-Encoding:)\s*(.*)$/",$line,$matches)) { $last="c_t_e"; $$last=$matches[2]; } if (preg_match("/^(Content-Description:)\s*(.*)$/",$line,$matches)) { $last="c_desc"; $$last=$matches[2]; } if (preg_match("/^(Content-Disposition:)\s*(.*)$/",$line,$matches)) { $last="c_disp"; $$last=$matches[2]; } } if ($c_t_e=="base64") { $this->part[$partid]['content']=base64_decode(substr($text,$start)); $c_t_e="8bit"; } else { $this->part[$partid]['content']=substr($text,$start); } $this->part[$partid]['Content-Type']=$c_t; $this->part[$partid]['Content-Transfer-Encoding']=$c_t_e; $this->part[$partid]['Content-Description']=$c_desc; $this->part[$partid]['Content-Disposition']=$c_disp; unset($this->part[$partid]['start']); unset($this->part[$partid]['ende']); } // we have a multipart message body // split the parts function multipartSplit($boundary,$text) { $start=0; $b_len=strlen("--".$boundary); $partcount=0; while (true) { // should have an emergency exit... $end=strpos($text,"--".$boundary,$start); if (substr($text,$end+$b_len,1)=="\n") { // '\n' => part boundary $this->part[$partcount]['start']=$end+$b_len+1; if ($partcount) { $this->part[$partcount-1]['ende']=$end-1; $this->multipartHeaders($partcount-1,$text); } $start=$end+$b_len+1; $partcount++; } else { // '--' => end boundary $this->part[$partcount-1]['ende']=$end-1; $this->multipartHeaders($partcount-1,$text); break; } } } } ?> Here is my test catchall script: <?php $email = file_get_contents("email.php"); require("parse_class.php"); // parse that file $msg = new parseMail($email); $from = eregi("<([^>]+)>",$msg->from,$arr); $from = $arr[1]; $mybody = "from: ".$from; $mybody .= "<br>subject: ".$msg->subject; $mybody .= "<br>date: ".$msg->date; $mybody .= "<br>received: ".$msg->received; $mybody .= "<br>message: <br>".$body; $mybody .= "<br>-------------endmail-------------<br>"; print_r("<pre>$mybody</pre>"); print_r($msg); ?> This is something I've tried to grab the body but it still brings back the content type stuff. I'm in the middle of working on this code right now so it might not make complete sense: // empty vars $from = ""; $subject = ""; $headers = ""; $message = ""; $isheader = true; $endmsg = false; // handle email $lines = explode("\n", $email); for ($i=0; $i < count($lines); $i++) { if (!$endmsg) { if (!$isheader) { if (trim($lines[$i])=="") { // not a header, but message $body .= $lines[$i]."\n"; // look out for end of message part if (preg_match("/^------=_Part_(.*)/", $lines[$i], $matches)) { $endmsg = true; } } if (trim($lines[$i])=="") { // empty line, header section has ended $isheader = false; } } } Quote Link to comment Share on other sites More sharing options...
thetxt Posted June 24, 2007 Author Share Posted June 24, 2007 *bump* Anyone have any suggestions? Quote Link to comment Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.