Zend - The PHP Company




Email

Add Code


Mime Decoder  

Type: class
Added by: richardhey
Entered: 01/10/2001
Last modified: 31/10/2000
Rating: - (fewer than 3 votes)
Views: 15840
This class will enable you to decode mime email. It can for example decode the mime torture test made available by UW. There is an example script available at http://www.phpguru.org to help you get started. This class is also available through PEAR


<?php
//
// +----------------------------------------------------------------------+
// | PHP version 4.0                                                      |
// +----------------------------------------------------------------------+
// | Copyright (c) 1997-2001 The PHP Group                                |
// +----------------------------------------------------------------------+
// | This source file is subject to version 2.02 of the PHP license,      |
// | that is bundled with this package in the file LICENSE, and is        |
// | available at through the world-wide-web at                           |
// | http://www.php.net/license/2_02.txt.                                 |
// | If you did not receive a copy of the PHP license and are unable to   |
// | obtain it through the world-wide-web, please send a note to          |
// | license@php.net so we can mail you a copy immediately.               |
// +----------------------------------------------------------------------+
// | Authors: Richard Heyes <richard.heyes@heyes-computing.net>           |
// +----------------------------------------------------------------------+

    
require_once('PEAR.php');

/**
*  +----------------------------- IMPORTANT ------------------------------+
*  | Usage of this class compared to native php extensions such as        |
*  | mailparse or imap, is slow and may be feature deficient. If available|
*  | you are STRONGLY recommended to use the php extensions.              |
*  +----------------------------------------------------------------------+
*
* Mime Decoding class
*
* This class will parse a raw mime email and return
* the structure. Returned structure is similar to 
* that returned by imap_fetchstructure().
*
* USAGE: (assume $input is your raw email)
*
* $decode = new Mail_mimeDecode($input, "rn");
* $structure = $decode->decode();
print_r($structure);
*
* Or statically:
*
* $params['input'] = $input;
* $structure = Mail_mimeDecode::decode($params);
print_r($structure);
*
* TODO:
*  - Implement further content types, eg. multipart/parallel,
*    perhaps even message/partial.
*  - Implement decoding of bodies (Just need the _decodeBodies method writing)
*
* @author  Richard Heyes <richard.heyes@heyes-computing.net>
* @version $Revision: 1.6 $
* @package Mail
*/

class Mail_mimeDecode extends PEAR{

    
/**
     * The raw email to decode
     * @var    string
     */
    
var $_input;

    
/**
     * The header part of the input
     * @var    string
     */
    
var $_header;

    
/**
     * The body part of the input
     * @var    string
     */
    
var $_body;

    
/**
     * If an error occurs, this is used to store the message
     * @var    string
     */
    
var $_error;

    
/**
     * Flag to determine whether to include bodies in the
     * returned object.
     * @var    boolean
     */
    
var $_include_bodies;

    
/**
     * Flag to determine whether to decode bodies
     * @var    boolean
     */
    
var $_decode_bodies;

    
/**
     * Flag to determine whether to decode headers
     * @var    boolean
     */
    
var $_decode_headers;

    
/**
     * Variable to hold the line end type.
     * @var    string
     */
    
var $_crlf;

    
/**
     * Constructor.
     * 
     * Sets up the object, initialise the variables, and splits and
     * stores the header and body of the input.
     *
     * @param string The input to decode
     * @param string CRLF type to use (CRLF/LF/CR)
     * @access public
     */
    
function Mail_mimeDecode($input$crlf "rn")
    {

        
$this->_crlf $crlf;
        list(
$header$body) = $this->_splitBodyHeader($input);

        
$this->_input          $input;
        
$this->_header         $header;
        
$this->_body           $body;
        
$this->_decode_bodies  false;
        
$this->_include_bodies true;
    }

    
/**
     * Begins the decoding process. If called statically
     * it will create an object and call the decode() method
     * of it.
     * 
     * @param array An array of various parameters that determine
     *              various things:
     *              include_bodies - Whether to include the body in the returned
     *                               object.
     *              decode_bodies  - Whether to decode the bodies
     *                               of the parts. (Transfer encoding)
     *              decode_headers - Whether to decode headers
     *              input          - If called statically, this will be treated
     *                               as the input
     *              crlf           - If called statically, this will be used as
     *                               the crlf value.
     * @return object Decoded results
     * @access public
     */
    
function decode($params null)
    {

        
// Have we been called statically? If so, create an object and pass details to that.
        
if (!isset($this) AND isset($params['input'])) {

            if (isset(
$params['crlf']))
                
$obj = new Mail_mimeDecode($params['input'], $params['crlf']);
            else
                
$obj = new Mail_mimeDecode($params['input']);
            
$structure $obj->decode($params);

        
// Called statically but no input
        
} elseif (!isset($this)) {
            return 
$this->raiseError('Called statically and no input given');

        
// Called via an object
        
} else {
            
$this->_include_bodies = isset($params['include_bodies'])  ? $params['include_bodies']  : false;
            
$this->_decode_bodies  = isset($params['decode_bodies'])   ? $params['decode_bodies']   : false;
            
$this->_decode_headers = isset($params['decode_headers'])  ? $params['decode_headers']  : false;
            
            
$structure $this->_decode($this->_header$this->_body);
            if(
$structure === false)
                
$structure $this->raiseError($this->_error);
        }

        return 
$structure;
    }

    
/**
     * Performs the decoding. Decodes the body string passed to it
     * If it finds certain content-types it will call itself in a
     * recursive fashion
     * 
     * @param string Header section
     * @param string Body section
     * @return object Results of decoding process
     * @access private
     */
    
function _decode($headers$body$default_ctype 'text/plain')
    {
        
$return = new stdClass;
        
$headers $this->_parseHeaders($headers);

        foreach (
$headers as $value) {
            if (isset(
$return->headers[strtolower($value['name'])]) AND !is_array($return->headers[strtolower($value['name'])])) {
                
$return->headers[strtolower($value['name'])]   = array($return->headers[strtolower($value['name'])]);
                
$return->headers[strtolower($value['name'])][] = $value['value'];

            } elseif (isset(
$return->headers[strtolower($value['name'])])) {
                
$return->headers[strtolower($value['name'])][] = $value['value'];

            } else {
                
$return->headers[strtolower($value['name'])] = $value['value'];
            }
        }

        
reset($headers);
        while (list(
$key$value) = each($headers)) {
            
$headers[$key]['name'] = strtolower($headers[$key]['name']);
            switch (
$headers[$key]['name']) {

                case 
'content-type':
                    
$content_type $this->_parseHeaderValue($headers[$key]['value']);
    
                    if (
preg_match('/([0-9a-z+.-]+)/([0-9a-z+.-]+)/i'$content_type['value'], $regs)) {
                        
$return->ctype_primary   $regs[1];
                        
$return->ctype_secondary $regs[2];
                    }
    
                    if (isset(
$content_type['other'])) {
                        while (list(
$p_name$p_value) = each($content_type['other'])) {
                            
$return->ctype_parameters[$p_name] = $p_value;
                        }
                    }
                    break;

                case 
'content-disposition';
                    
$content_disposition $this->_parseHeaderValue($headers[$key]['value']);
                    
$return->disposition   $content_disposition['value'];
                    if (isset(
$content_disposition['other'])) {
                        while (list(
$p_name$p_value) = each($content_disposition['other'])) {
                            
$return->d_parameters[$p_name] = $p_value;
                        }
                    }
                    break;

                case 
'content-transfer-encoding':
                    
$content_transfer_encoding $this->_parseHeaderValue($headers[$key]['value']);
                    break;
            }
        }

        if (isset(
$content_type)) {

            switch (
strtolower($content_type['value'])) {
                case 
'text/plain':
                    
$encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
                    
$this->_include_bodies $return->body = ($this->_decode_bodies $this->_decodeBody($body$encoding) : $body) : null;
                    break;
    
                case 
'text/html':
                    
$encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
                    
$this->_include_bodies $return->body = ($this->_decode_bodies $this->_decodeBody($body$encoding) : $body) : null;
                    break;

                case 
'multipart/digest':
                case 
'multipart/alternative':
                case 
'multipart/related':
                case 
'multipart/mixed':
                    if(!isset(
$content_type['other']['boundary'])){
                        
$this->_error 'No boundary found for ' $content_type['value'] . ' part';
                        return 
false;
                    }

                    
$default_ctype = (strtolower($content_type['value']) === 'multipart/digest') ? 'message/rfc822' 'text/plain';

                    
$parts $this->_boundarySplit($body$content_type['other']['boundary']);
                    for (
$i 0$i count($parts); $i++) {
                        list(
$part_header$part_body) = $this->_splitBodyHeader($parts[$i]);
                        
$part $this->_decode($part_header$part_body$default_ctype);
                        if(
$part === false)
                            
$part $this->raiseError($this->_error);
                        
$return->parts[] = $part;
                    }
                    break;

                case 
'message/rfc822':
                    
$obj = new Mail_mimeDecode($body$this->_crlf);
                    
$return->parts[] = $obj->decode(array('include_bodies' => $this->_include_bodies));
                    unset(
$obj);
                    break;

                default:
                    
$this->_include_bodies $return->body = ($this->_decode_bodies $this->_decodeBody($body$content_transfer_encoding['value']) : $body) : null;
                    break;
            }

        } else {
            
$ctype explode('/'$default_ctype);
            
$return->ctype_primary   $ctype[0];
            
$return->ctype_secondary $ctype[1];
            
$this->_include_bodies $return->body = ($this->_decode_bodies $this->_decodeBody($body) : $body) : null;
        }
        
        return 
$return;
    }

    
/**
     * Given a string containing a header and body
     * section, this function will split them (at the first
     * blank line) and return them.
     * 
     * @param string Input to split apart
     * @return array Contains header and body section
     * @access private
     */
    
function _splitBodyHeader($input)
    {

        
$pos strpos($input$this->_crlf $this->_crlf);
        if (
$pos === false) {
            
$this->_error 'Could not split header and body';
            return 
false;
        }

        
$header substr($input0$pos);
        
$body   substr($input$pos+(2*strlen($this->_crlf)));

        return array(
$header$body);
    }

    
/**
     * Parse headers given in $input and return
     * as assoc array.
     * 
     * @param string Headers to parse
     * @return array Contains parsed headers
     * @access private
     */
    
function _parseHeaders($input)
    {

        if (
$input !== '') {
            
// Unfold the input
            
$input   preg_replace('/' $this->_crlf "(t| )/"' '$input);
            
$headers explode($this->_crlftrim($input));
    
            foreach (
$headers as $value) {
                
$hdr_name substr($value0$pos strpos($value':'));
                
$hdr_value substr($value$pos+1);
                
$return[] = array(
                                  
'name'  => $hdr_name,
                                  
'value' => $this->_decode_headers $this->_decodeHeader($hdr_value) : $hdr_value
                                 
);
            }
        } else {
            
$return = array();
        }

        return 
$return;
    }

    
/**
     * Function to parse a header value,
     * extract first part, and any secondary
     * parts (after ;) This function is not as
     * robust as it could be. Eg. header comments
     * in the wrong place will probably break it.
     * 
     * @param string Header value to parse
     * @return array Contains parsed result
     * @access private
     */
    
function _parseHeaderValue($input)
    {

        if ((
$pos strpos($input';')) !== false) {

            
$return['value'] = trim(substr($input0$pos));
            
$input trim(substr($input$pos+1));

            if (
strlen($input) > 0) {
                
preg_match_all('/(([[:alnum:]]+)="?([^"]*)"?s?;?)+/i'$input$matches);

                for (
$i 0$i count($matches[2]); $i++) {
                    
$return['other'][strtolower($matches[2][$i])] = $matches[3][$i];
                }
            }
        } else {
            
$return['value'] = trim($input);
        }
        
        return 
$return;
    }

    
/**
     * This function splits the input based
     * on the given boundary
     * 
     * @param string Input to parse
     * @return array Contains array of resulting mime parts
     * @access private
     */
    
function _boundarySplit($input$boundary)
    {
        
$tmp explode('--'.$boundary$input);

        for (
$i=1$i<count($tmp)-1$i++) {
            
$parts[] = $tmp[$i];
        }

        return 
$parts;
    }

    
/**
     * Given a header, this function will decode it
     * according to RFC2047. Probably not *exactly*
     * conformant, but it does pass all the given
     * examples (in RFC2047).
     *
     * @param string Input header value to decode
     * @return string Decoded header value
     * @access private
     */
    
function _decodeHeader($input)
    {
        
// Remove white space between encoded-words
        
$input preg_replace('/(=?[^?]+?(Q|B)?[^?]*?=)( |' "t|" $this->_crlf ')+=?/''1=?'$input);

        
// For each encoded-word...
        
while (preg_match('/(=?([^?]+)?(Q|B)?([^?]*)?=)/'$input$matches)) {

            
$encoded  $matches[1];
            
$charset  $matches[2];
            
$encoding $matches[3];
            
$text     $matches[4];

            switch (
$encoding) {
                case 
'B':
                    
$text base64_decode($text);
                    break;

                case 
'Q':
                    
$text str_replace('_'' '$text);
                    
preg_match_all('/=([A-F0-9]{2})/'$text$matches);
                    foreach(
$matches[1] as $value)
                        
$text str_replace('='.$valuechr(hexdec($value)), $text);
                    break;
            }

            
$input str_replace($encoded$text$input);
        }
        
        return 
$input;
    }

    
/**
     * Given a body string and an encoding type, 
     * this function will decode and return it.
     *
     * @param  string Input body to decode
     * @param  string Encoding type to use.
     * @return string Decoded body
     * @access private
     */
    
function _decodeBody($input$encoding '7bit')
    {
        switch (
$encoding) {
            case 
'7bit':
                return 
$input;
                break;

            case 
'quoted-printable':
                return 
$this->_quotedPrintableDecode($input);
                break;

            case 
'base64':
                return 
base64_decode($input);
                break;

            default:
                return 
$input;
        }
    }

    
/**
     * Given a quoted-printable string, this
     * function will decode and return it.
     *
     * @param  string Input body to decode
     * @return string Decoded body
     * @access private
     */
    
function _quotedPrintableDecode($input)
    {
        
// Remove soft line breaks
        
$input preg_replace("/=r?n/"''$input);

        
// Replace encoded characters
        
if (preg_match_all('/=[A-Z0-9]{2}/'$input$matches)) {
            
$matches array_unique($matches[0]);
            foreach (
$matches as $value) {
                
$input str_replace($valuechr(hexdec(substr($value,1))), $input);
            }
        }

        return 
$input;
    }

// End of class
?>


Usage Example




Rate This Script





Search



This Category All Categories