Zend - The PHP Company




XML

Add Code


SophosParse  

Type: class
Added by: bobtfish
Entered: 21/05/2002
Last modified: 05/12/2001
Rating: - (fewer than 3 votes)
Views: 4659
A parser to parse the (broken) rss feeds given out by Sophos (www.sophos.com) about their latest virus alerts..


<?php
/*                              SophosParse v0.1b                             */
/* A simple parser for the Anti Virus information from Sophos (www.sophos.com */
/* This code is copyright (C) 2002 Tomas Doran                                */
/* You can always find the latest version of the code at
   http://opensource.northernprinciple.com/projects/sophosparse/

   This code is liscenced under the GNU GPL.
   Please see the file named LICENSE in the distributed package for the full
   terms and conditions

   Information about how to use this code can be found in the README
   file in the distributed package.
*/

class SophosParse {
    var 
$type;
    var 
$fp;
    var 
$parser_state=array();
    var 
$data=array();
    var 
$parser;

    function 
SophosParse($pointer$type) {
        
$this->object_self_ref=&$this;
        if (!
$pointer || feof($pointer)) {
            die (
"SophosParse initialised without a valid file pointer");
        }
        
$this->fp=$pointer;
        switch (
$type) {
            case 
"latest"$this->type="latest";
                break;
            case 
"topten"$this->type="topten";
                break;
            case 
"hoax"$this->type="hoax";
                break;
            default: die(
"SophosParse initialised without a valid type. Valid types are latest, topten");
                break;
        }
        
$this->parser_output=array();
        
$this->initparser();
    }

    function 
initparser() {
        
$this->parser_state["intitle"]=false;
        
$this->parser_state["seenchannel"]=0;
        
$this->parser_state["initem"]=false;
        
$this->parser_state["inlink"]=false;
        
$this->parser_state["title_data"]=false;
        
$this->parser_state["link_data"]=false;
        
$this->parser=xml_parser_create();
        
xml_set_object($this->parser, &$this);
                
xml_set_element_handler($this->parser"startElement""endElement");
                
xml_set_character_data_handler($this->parser"characterData");
    }

    function 
parse() {
        
// Read the XML file 4KB at a time
        
while ($data fread($this->fp4096)) {
            
// Parse each 4KB chunk with the XML parser created above
            
xml_parse($this->parser$datafeof($this->fp))
            
// Handle errors in parsing
            
or die(sprintf("XML error: %s at line %d"
                
xml_error_string(xml_get_error_code($this->parser)), 
                
xml_get_current_line_number($this->parser)));
        }

        
// Close the XML file
        
fclose($this->fp);

        
// Free up memory used by the XML parser
        
xml_parser_free($this->parser);
        return(
$this->data);
    }

    function 
startelement($parser$tagName$attrs) {
        switch (
$tagName) {
            case 
"TITLE"
                if (
$this->parser_state["intitle"]) {
                    die(
"Parse error in RSS data - nested &lt;title&gt; elements");
                }
                
$this->parser_state["intitle"]=true;
            break;
            case 
"CHANNEL":
                
$this->parser_state["seenchannel"]++;
            break;
            case 
"ITEM":
                            if (
$this->parser_state["initem"]) {
                                    die(
"Parse error in RSS data - nested &lt;item&gt; elements");
                            }
                               
$this->parser_state["initem"]=true;
                
$this->parser_state["title_data"] = false;
                
$this->parser_state["link_data"] = false;
            break;
            case 
"LINK":
                if (
$this->parser_state["inlink"]) {
                    die(
"Parse error in RSS data - nested &lt;link&gt; elements");
                }
                
$this->parser_state["inlink"]=true;
            break;    
        }
    }

    function 
endElement($parser$tagName) {
        switch (
$tagName) {
            case 
"TITLE":
                if (!
$this->parser_state["intitle"]) {
                    die(
"Parse error in RSS data - &lt;/title&gt; element without &lt;title&gt; element");
                }
                
$this->parser_state["intitle"]=false;
            break;
            case 
"CHANNEL":
                
$this->parser_state["seenchannel"]++;
            break;
            case 
"ITEM":
                if (!
$this->parser_state["initem"]) {
                    die(
"Parse error in RSS data - &lt;/item&gt; element without &lt;item&gt; element");
                }
                
$this->parser_state["initem"] = false;
                
$output=array();
                if (
$this->parser_state["title_data"] && $this->parser_state["link_data"] && 
                    (
$this->parser_state["seenchannel"] >= 2) && !stristr($this->parser_state["title_data"], "SOPHOS")) {
                    switch (
$this->type) {
                        case 
"latest":
                            
//Title data is split into date and title at this point.
                            
$data=explode(" "$this->parser_state["title_data"]);
                            
$date_day=array_shift($data);
                            
$date_month=array_shift($data);
                            
$output["date"] = $date_month " " $date_day;
                            
$output["date_day"] = $date_day;
                            
$output["date_month"] = $date_month;
                            
$output["title"] = join(" "$data);
                        break;
                        case 
"topten":
                        case 
"hoax":
                            
$data=explode(" "$this->parser_state["title_data"]);
                            
$rank=array_shift($data);
                            
$output["rank"] = $rank;
                            
$output["title"] = join(" "$data);
                        break;
                        default:
                                      die(
"In SophosParse->endElement() with unknown parser type! Should have never got here - giving up");
                                       break;
                    }
                    
$output["link"]=$this->parser_state["link_data"];
                    
array_push($this->data$output);
                }
                
$this->parser_state["title_data"] = false;
                
$this->parser_state["link_data"] = false;
            break;
            case 
"LINK":
                            if (!
$this->parser_state["inlink"]) {
                                    die(
"Parse error in RSS data - &lt;/link&gt; element without &lt;/link&gt; element");
                            }
                            
$this->parser_state["inlink"]=false;
            break;
        }
    }

    function 
characterData($parser$data) {
        if (
$this->parser_state["intitle"] && ($this->parser_state["seenchannel"] >= 2) && $this->parser_state["initem"]) {
            
$this->parser_state["title_data"] .= $data;
        }
        if (
$this->parser_state["inlink"] && ($this->parser_state["seenchannel"] >= 2) && $this->parser_state["initem"]) {
            
$this->parser_state["link_data"].=$data;
        }
    }
}

?>

?>


Usage Example


See the example


Rate This Script





Search



This Category All Categories