Zend - The PHP Company




HTML

Add Code


HTML parser  

Type: class
Added by: xath
Entered: 24/11/2000
Last modified: 01/12/2000
Rating: - (fewer than 3 votes)
Views: 9353
parses through text in what is hopefully an efficient manner. It will check the validity of HTML code, including nested tags, among other things. Needs my class tagtrack.php to work, which I have included here. Needs some comments, but I'm to lazy now, will get to that later. Many thanks to Adrian for his help with this class.


<?
include "tagtrack.php";

class 
parser
  
{
    var 
$neutralize_post;
    var 
$replace_word_list;
    var 
$replace_ltr_list;
    var 
$in_tag_ltr_replace;
    var 
$tags;

    function 
parser ()
          {
        
$this->replace_word_list = array
          (
          );

        
$this->replace_ltr_list = array
          (
            
"""        => "\"",
            
"'"        => "\'"
          
);

        
$this->in_tag_ltr_replace = array
          (
            
"""        => " ",
            "'"        => " "
          );
        
        $this->tags = array
          (
            "table"         => new TagTrack ("table",    true,    array("tr")        ),
            "tr"            => new TagTrack ("tr",         false,     array("td")        ),
            "td"            => new TagTrack ("td",         false,    "*"            ),
            "br"        => new TagTrack ("br",        true,   ""            ),
            "a"        => new TagTrack ("a",        true,    "*"            ),
            "div"        => new TagTrack ("div",        true,    "*"            ),
            "em"        => new TagTrack ("em",        true,    "*"            ),
            "pre"        => new TagTrack ("pre",        true,    "*"            ),
            "font"        => new TagTrack ("font",    true,    "*"            ),
            "s"        => new TagTrack ("s",        true,    "*"            ),
            "b"        => new TagTrack ("b",         true,     "*"            ),
            "u"        => new TagTrack ("u",        true,    "*"            ),
            "center"    => new TagTrack ("center",    true,    "*"            ),
            "blockquote"    => new TagTrack ("blockquote",    true,    "*"            ),
            "i"        => new TagTrack ("i",        true,    "*"            ),
            "p"        => new TagTrack ("p",        true,    "*"            ),
            "li"        => new TagTrack    ("li",        true,    "*"            ),
            "image"        => new TagTrack ("image",    true,    ""            )
          );

          }

        function NextWord ($string, $i, $delim, $entity)
          {
        $wrd="";
                for (;$i<strlen($string);$i++)
                  {
                        $ltr = $string[$i];

                        if ($entity[$ltr])
                          {
                                if (!$wrd) return $ltr;
                                $i--;
                                return $wrd;
                          }

            if ($delim[$ltr] && $wrd) return $wrd;
            if ($this->replace_ltr_list[$ltr]) $ltr=$this->replace_ltr_list[$ltr];
            $wrd = $wrd.$ltr;
                  }

                return $wrd;
          }

    function TagScan ($string, &$i, $tag_count)
      {
        $i++;
        $closing = 0;
        $delim = array (" " => 1, ">" => 1, "'" => 1, """ 
=> 1);
        
$entitie = array ();
        
            
$word $this->NextWord ($string, &$i$delim$entitie);

        
$tag  strtolower ($word);

        if (
$tag[0] == "/"
          {
            
$closing 1;
            
$tag substr ($tag1);
          }

        if (
$this->tags[$tag])
          {
            if (!
$this->tags[$tag]->VerifyTag($closing)) $this->neutralize_post "<font color=red>You have an error in your HTML near $word</font>";
          }
        else 
$word $this->neutralize_post "<font color=red>$tag not permitted</font>";

        
$word $word." ";
        for (;
$string[$i] != ">" && $i<strlen($string);$i++)
          {
                        if (!
$this->in_tag_ltr_replace[$string[$i]]) $word $word.$string[$i]; 
            
$word.$this->in_tag_ltr_replace[$string[$i]];
          }
        
        return (
"<".trim($word).">");
      }

    function 
EscapeScan ($string$i)
      {
        
$i++;
        
$word ="";
        for (;
$string[$i] != "\" && $i < strlen ($string); $i++)
          {
            
$word = $word.$string[$i];
          }
        
$i++;
        return (htmlentities(
$word));
      }

    function scan (
$string)
      {    
        global 
$tag_stack;
        
$tag_stack = array();
            
$text="";
        
$string = $string." ";
        
$this->neutralize_post="";
        
$delim = array (" " => 1);
        
$entitie = array ("<" => 1, "\" => 1);

            for (
$i=0;$i<strlen($string);$i++)
              {
                    
$word = $this->NextWord ($string, &$i$delim$entitie); 

            if (
$word == "\")
                          {
                                
$text = $text.$this->EscapeScan ($string, &$i);
                                continue;
                          }

            if (
$this->replace_word_list[$word])
              {
                
$text = $text.$this->replace_word_list[$word];
                continue;
              }

                    if (
$word == "<") $word = $this->TagScan ($string, &$i, &$tag_count);

            if (count (
$tag_stack) > 20) $this->neutralize_post = "WAY TO MUCH NESTINGSTOP IT";

            if (
$this->neutralize_post) return(htmlentities(addslashes($string)).$this->neutralize_post);

                    
$text = $text." ".$word;
              }

        if (
$c = end ($tag_stack)) return (htmlentities(addslashes($string))."YOUR HTML TAG <font color=red$c->label </fontMAY NOT BE COMPLETED");
            return (
$text);
      }
  }

?>

//and here is the tagtrack class, which checks the
//validity of nested HTML tags.
<?

$tag_stack = array();

class TagTrack
  {
        var 
$label;
        var 
$top_level;
        var 
$follower;

        function TagTrack (
$label$top_level = 1, $followers = 0)
          {
                
$this->label = $label;
                
$this->top_level = $top_level;
                
$this->followers = $followers;
          }

        function VerifyTag (
$closing)
          {
                global 
$tag_stack;

                
$prev=end($tag_stack);
        if (
$closing)
          {
            if (
$this->label = $prev->label)
              {
                array_pop (
$tag_stack);
                return (
$this->label);
              }
            else return (0);
          }

        if (
$prev && $pf = $prev->followers)
          {
            if (is_array (
$pf) && !in_array ($this->label$pf)) 
                return (0);
            elseif (
$pf == "*" && !$this->top_level)
                return (0);
          }

        if (
$this->followers)
            array_push (
$tag_stack$this);

        return (
$this->label);
          }
  }

?>



Usage Example




Rate This Script





Search



This Category All Categories