Jump to content

parse email


thetxt

Recommended Posts

I have a catch all script that parses the mail and sorts it into my mysql database accordingly. I've been doing some testing just grabbing the mail from a text file.

 

Here is the class I am using to parse everything else. I've tried different ways to modify it to grab the message body as well but no luck.

 

<?php
    // parse an incoming mail
    // Version 0.5, 2005/03/16
    // Copyright (c) Frank Rust, TU Braunschweig ([email protected])
    //
    // This code is free software; you can redistribute it and/or modify
    // it under the terms of the GNU General Public License as published by
    // the Free Software Foundation; either version 2 of the License, or
    // (at your option) any later version.
    //
    // This code is distributed in the hope that it will be useful,
    // but WITHOUT ANY WARRANTY; without even the implied warranty of
    // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    // GNU General Public License for more details.
    //
    // Since this is a very short Program the GNU General Public License
    // is not included. Please find it on the website of the Open Software
    // Foundation at
    //     http://www.fsf.org/licensing/licenses/lgpl.txt
    // or write to the Free Software Foundation, Inc., 59 Temple Place,
    // Suite 330, Boston, MA  02111-1307  USA
    
    class parseMail {
        var $from="";
        var $to="";
        var $subject="";
        var $received="";
        var $date="";
        var $message_id="";
        var $content_type="";
        var $part =array();
        
        // decode a mail header
        function parseMail($text="") {
            $start=0;
            $lastheader="";
            while (true) {
                $end=strpos($text,"\n",$start);
                $line=substr($text,$start,$end-$start);
                $start=$end+1;
                if ($line=="") break; // end of headers!
                if (substr($line,0,1)=="\t") {
                    $$last.="\n".$line;
                }
                if (preg_match("/^(From:)\s*(.*)$/",$line,$matches)) {
                    $last="from";
                    $$last=$matches[2];
                }
                if (preg_match("/^(Received:)\s*(.*)$/",$line,$matches)) {
                    $last="received";
                    $$last=$matches[2];
                }
                if (preg_match("/^(To:)\s*(.*)$/",$line,$matches)) {
                    $last="to";
                    $$last=$matches[2];
                }
                if (preg_match("/^(Subject:)\s*(.*)$/",$line,$matches)) {
                    $last="subject";
                    $$last=$matches[2];
                }
                if (preg_match("/^(Date:)\s*(.*)$/",$line,$matches)) {
                    $last="date";
                    $$last=$matches[2];
                }
                if (preg_match("/^(Content-Type:)\s*(.*)$/",$line,$matches)) {
                    $last="content_type";
                    $$last=$matches[2];
                }
                if (preg_match("/^(Message-Id:)\s*(.*)$/",$line,$matches)) {
                    $last="message_id";
                    $$last=$matches[2];
                }
            }
            $this->from=$from;
            $this->received=$received;
            $this->to=$to;
            $this->subject=$subject;
            $this->date=$date;
            $this->content_type=$content_type;
            $this->message_id=$message_id;
            
            if (preg_match("/^multipart\/mixed;/",$content_type)) {
                $b=strpos($content_type,"boundary=");
                $boundary=substr($content_type,$b+strlen("boundary="));
                $boundary=substr($boundary,1,strlen($boundary)-2);
                $this->multipartSplit($boundary,substr($text,$start));
                
            } else {
                $this->part[0]['Content-Type']=$content_type;
                $this->part[0]['content']=substr($text,$start);
            }
        }
        // decode a multipart header
        function multipartHeaders($partid,$mailbody) {
            $text=substr($mailbody,$this->part[$partid]['start'],
                         $this->part[$partid]['ende']-$this->part[$partid]['start']);

            $start=0;
            $lastheader="";
            while (true) {
                $end=strpos($text,"\n",$start);
                $line=substr($text,$start,$end-$start);
                $start=$end+1;
                if ($line=="") break; // end of headers!
                if (substr($line,0,1)=="\t") {
                    $$last.="\n".$line;
                }
                if (preg_match("/^(Content-Type:)\s*(.*)$/",$line,$matches)) {
                    $last="c_t";
                    $$last=$matches[2];
                }
                if (preg_match("/^(Content-Transfer-Encoding:)\s*(.*)$/",$line,$matches)) {
                    $last="c_t_e";
                    $$last=$matches[2];
                }
                if (preg_match("/^(Content-Description:)\s*(.*)$/",$line,$matches)) {
                    $last="c_desc";
                    $$last=$matches[2];
                }
                if (preg_match("/^(Content-Disposition:)\s*(.*)$/",$line,$matches)) {
                    $last="c_disp";
                    $$last=$matches[2];
                }
            }
            if ($c_t_e=="base64") {
                $this->part[$partid]['content']=base64_decode(substr($text,$start));
                $c_t_e="8bit";
            } else {
                $this->part[$partid]['content']=substr($text,$start);    
            }
            $this->part[$partid]['Content-Type']=$c_t;
            $this->part[$partid]['Content-Transfer-Encoding']=$c_t_e;
            $this->part[$partid]['Content-Description']=$c_desc;
            $this->part[$partid]['Content-Disposition']=$c_disp;
            unset($this->part[$partid]['start']);
            unset($this->part[$partid]['ende']);
        }
        // we have a multipart message body
        // split the parts
        function multipartSplit($boundary,$text) {
            $start=0;
            $b_len=strlen("--".$boundary);
            $partcount=0;
            while (true) { // should have an emergency exit...
                $end=strpos($text,"--".$boundary,$start);
                if (substr($text,$end+$b_len,1)=="\n") {
                    // '\n' => part boundary
                    $this->part[$partcount]['start']=$end+$b_len+1;
                    if ($partcount) {
                        $this->part[$partcount-1]['ende']=$end-1;
                        $this->multipartHeaders($partcount-1,$text);
                    }
                    $start=$end+$b_len+1;
                    $partcount++;
                } else {
                    // '--' => end boundary
                    $this->part[$partcount-1]['ende']=$end-1;                
                    $this->multipartHeaders($partcount-1,$text);
                    break;
                }
            }    
        }
    }
  
?> 

 

Here is my test catchall script:

 

<?php

$email = file_get_contents("email.php");

require("parse_class.php");
// parse that file
$msg = new parseMail($email);

$from = eregi("<([^>]+)>",$msg->from,$arr);
$from = $arr[1];
$mybody = "from: ".$from;
$mybody .= "<br>subject: ".$msg->subject;
$mybody .= "<br>date: ".$msg->date;
$mybody .= "<br>received: ".$msg->received;
$mybody .= "<br>message: <br>".$body;

$mybody .= "<br>-------------endmail-------------<br>";
print_r("<pre>$mybody</pre>");
print_r($msg);

?>

 

This is something I've tried to grab the body but it still brings back the content type stuff. I'm in the middle of working on this code right now so it might not make complete sense:

// empty vars
$from = "";
$subject = "";
$headers = "";
$message = "";
$isheader = true;
$endmsg = false;

// handle email
$lines = explode("\n", $email);

for ($i=0; $i < count($lines); $i++) {
    if (!$endmsg) {
	if (!$isheader) {
		if (trim($lines[$i])=="") {
		// not a header, but message
		$body .= $lines[$i]."\n";

		// look out for end of message part
		if (preg_match("/^------=_Part_(.*)/", $lines[$i], $matches)) {
			$endmsg = true;
		}
	}

	if (trim($lines[$i])=="") {
		// empty line, header section has ended
		$isheader = false;
	}
}
}

Link to comment
https://forums.phpfreaks.com/topic/56912-parse-email/#findComment-281169
Share on other sites

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.