Zend - The PHP Company




Files and Directories

Add Code


File Search Class  

Type: class
Added by: g3voodoo
Entered: 18/08/2001
Last modified: 08/12/2000
Rating: - (fewer than 3 votes)
Views: 6978
This is a simple class that will let you search specified directories on your server, using specified file extensions, for a query string.


<?php

    
/*****************************************************************
    
     PHP SEARCH ENGINE 1.0
     
     Created By
     ---------------------------------
     David Green
     dlgreen@u.washington.edu
     
     Variables for class "search":
     ---------------------------------
      $base_path = the system path to your base directory
      $directories = an array containing the directories you want to search
      $file_exts = an array containing file extensions you want to search
      $files = an array of files within a searchable directory
      $query = the query you are searching for
      $matches = an array of files that contained the query
      $self = the path to your search page ($PHP_SELF)
      $index = the filename of your index.dat
      $use_index = a boolean flag
     
     Variables for class "result":
     ---------------------------------
      var $query = the query you are searching for
      var $path = the path of the matching file
      var $name = the name of the matching file
      var $title = the <title></title> of the document
      var $summary = a summary of the document's <body></body>
      var $base_url = the http address to your web site
      
     Variables for class "index":
     ---------------------------------
      $base_path = the system path to your base directory
      $directories = an array containing the directories you want to search
      $file_exts = an array containing file extensions you want to search
      $files = an array of files within a searchable directory
      $index = the filename of your index.dat
      
     Example Code:
     ---------------------------------
     
      *** IF YOU DON'T WANT TO USE AN INDEX IN YOUR SEARCH ***
      *** USING AN INDEXED SEARCH WILL REDUCE SEARCH TIME ***
      $search = new search("the query", $PHP_SELF);
      $search->set_dirs("list of searchable directories");
      $search->set_exts("list of searchable file types");
      $search->start();
      unset($search);
      
      *** HOW TO CREATE AN INDEX OF FILES TO ALLOW FASTER SEARCHING ***
      $temp = new index();
      $temp->set_dirs("list of searchable directories");
      $temp->set_exts("list of searchable file types");
      $temp->create();
      unset($temp);
      
      *** HOW TO USE AN INDEX IN YOUR SEARCH ***
      $search = new search("the query", $PHP_SELF);
      $search->use_index();
      $search->start();
      unset($search);
      
      
     *****************************************************************/

    
class index
    
{
        var 
$base_path "/home/web/sites/php_search/";
        var 
$directories = array();
        var 
$file_exts = array();
        var 
$files = array();
        var 
$index "index.dat";
        
        function 
index()
        {
            
// DO NOTHING
        
}
        
        function 
set_dirs($list)
        {
            
$this->directories explode(","$list);
        }
        
        function 
set_exts($list)
        {
            
$this->file_exts explode(","$list);
        }
        
        function 
create()
        {
            foreach(
$this->directories as $path)
            {
                
$cpath $this->base_path.$path;
                if (!
chdir($cpath)) $this->printError("The directory $path was not found on this server.");
                
$this->files[$path] = $this->get_files($cpath$this->file_exts$tempArray[2]);
            }
            
            
reset($this->files);
            if (!
$fp fopen("$this->base_path$this->index""w")) $this->printError("Error while writing index file.");
                
            while (list(
$path,$file) = each($this->files))
            {
                
fwrite($fp$path);
                foreach(
$file as $temp)
                {
                    
fwrite($fp";".$temp);    
                }
                
fwrite($fp"n");
            }
            
fclose($fp);
        }
        
        function 
get_files($path$file_exts$self)
        {    
            
$temp = array();

            if (
$dir opendir($path))
            {
                while (
$file readdir($dir))
                {    
                    if (
filetype($file) == "file")
                    {
                        foreach(
$file_exts as $ext)

                        {
                            if (
eregi("(.+)(..+)"$file$parts) && ($ext == $parts[2]) && ($parts[0] != $self)) array_push($temp$file);
                        }
                    }
                }
                return (
$temp);
            }
        }
        
    };

    class 
search
    
{
        var 
$base_path "/home/web/sites/php_search/";
        var 
$directories = array();
        var 
$file_exts = array();
        var 
$files = array();
        var 
$query;
        var 
$matches = array();
        var 
$self;
        var 
$index "index.dat";
        var 
$use_index false;


        function 
search($query,$self)
        {
            
$this->query $query;
            
$this->self $self;
        }

        function 
set_dirs($list)
        {
            
$this->directories explode(","$list);
        }

        function 
set_exts($list)

        {
            
$this->file_exts explode(","$list);
        }



        function 
start()
        {

            
$time_start getmicrotime();

            
// SCRIPT SHOULD NOT OPEN SEARCHING ORIGIN
            
if (eregi("(/.*/)(.*$)"$this->self$tempArray))

            if (!
$this->use_index)
            {
                foreach(
$this->directories as $path)
                {
                    
$cpath $this->base_path.$path;
                    if (!
chdir($cpath)) $this->printError("The directory $path was not found on this server.");
                    
$this->files[$path] = $this->get_files($cpath$this->file_exts$tempArray[2]);
                }
            }
            

            
// IF THERE ARE NO VALID FILES, EXIT SWITCH
            
if (count($this->files) < 1)
            {
                
$this->printError("There were no valid files located.");
            }


            
$i 0;

            
// IF THERE ARE VALID FILES FOUND, SEARCH THEM FOR STRING
            
while (list($path,$file) = each($this->files))
            {
                
$cpath $this->base_path.$path;

                
// ERROR ACCESSING GIVEN PATH
                
if (!chdir($cpath)) $this->printError("Error accessing directory $path.");

                foreach(
$file as $temp)
                {
                    if (
$fp fopen($temp"r"))
                    {
                        
$contents fread($fpfilesize($temp));
                        
fclose($fp);
                        
                        
$contents $this->strip_garbage($contents);

                        if (!
stristr($contents$search))
                        {
                            
$this->matches[$i] = new result($this->query);
                            
$this->matches[$i]->path $path;
                            
$this->matches[$i]->name $temp;
                            
$this->matches[$i]->get_props($contents);
                            
$i++;
                        }
                    }
                    else 
$this->printError("Error opening $filename.");
                }
            }

            
$time_end getmicrotime();

            
// IF THERE ARE NO MATCHES MADE, EXIT SWITCH
            
if (count($this->matches) < 1)
            {
                
$this->printError("No matches were made.");
            }

            
// IF THERE ARE MATCHES MADE, PRINT RESULTS
            
for ($i 0$i count($this->matches); $i++)
            {
                
$this->matches[$i]->printRecord();
            }
            

            
// FOR TESTING PURPOSES ONLY
            
$time_total substr(($time_end $time_start), 05);
            echo 
"<p>Your search took $time_total.";
        }



        function 
get_files($path$file_exts$self)
        {    
            
$temp = array();

            if (
$dir opendir($path))
            {
                while (
$file readdir($dir))
                {    
                    if (
filetype($file) == "file")
                    {
                        foreach(
$file_exts as $ext)

                        {
                            if (
eregi("(.+)(..+)"$file$parts) && ($ext == $parts[2]) && ($parts[0] != $self)) array_push($temp$file);
                        }
                    }
                }
                return (
$temp);
            }
        }


        function 
strip_garbage($content)
        {
            
$html = array("<style.+>.+</style>","<script.+>.+</script>","<object.+>.+</object>","<embed.+>","<meta.+>","<b>","</b>","<i>","</i>","<u>","</u>");

            for (
$i 0$i count($html); $i++)

            {
                
$content eregi_replace($html[$i], ""$content);
            }
            
            return(
$content);
        }
        
        
        function 
use_index()
        {
            
$this->use_index true;
            
            if (!
chdir($this->base_path)) $this->printError("Error accessing basepath.");
            if (!
$fp fopen($this->index"r")) $this->printError("No index currently exists.");
            
            
$contents fread($fpfilesize($this->index));
            
fclose($fp);
            
            
$tempArray explode("n"$contents);
            
$tempLines = array();
            
            for (
$i 0$i count($tempArray); $i++)
            {
                
$tempLines[$i] = explode(";"$tempArray[$i]);
            }
            
            for (
$i 0$i count($tempLines); $i++)
            {
                
$this->directories[$i] = $tempLines[$i][0];
                
$tempFiles = array();
                for (
$j 1$j count($tempLines[$i]); $j++)
                {
                    
$tempFiles[$j-1] = $tempLines[$i][$j];
                }
                
$this->files[$this->directories[$i]] = $tempFiles;
                unset(
$tempFiles);
            }
            
            unset(
$tempLines);
            unset(
$tempArray);
        }
        
        function 
make_index()
        {
            
$this->make_index true;
        }


        function 
printError($errorTxt)

        {
            echo (
$errorTxt);
        }

    };


    class 
result
    
{
        var 
$query;
        var 
$path;
        var 
$name;
        var 
$title;
        var 
$summary;
        var 
$base_url "http://cesare2.adhost.com/php_search";
        
        function 
result($value)
        {
            
$this->query $value;
            
$this->path "";
            
$this->name "";
            
$this->title "";
            
$this->summary "";
        }
        
        function 
get_props($contents)
        {
            
// GET DOCUMENT TITLE
            
if (eregi("(<title>)(.+)(</title>)"$contents$arr)) $this->title $arr[2];
            else 
$this->title "Untitled Document";
            
$this->title trim($this->title);    
            
            
// GET SUMMARY OF DOCUMENT AND FORMAT IT FOR DISPLAY
            
if (eregi("(<body.?>)"$contents))
            {
                
eregi("(<body.?>)(.+)(</body>)"$contents$arr);
                
$contents $arr[2];
            }
            
$contents strip_tags($contents);
            
$pos = (strpos(strtolower($contents), strtolower($this->query)));
            if (
$pos 100 1$pos 0;
            else 
$pos -= 100;
            
$this->summary substr($contents$pos200);
            
$this->summary eregi_replace($this->query"<b>$this->query</b>"$this->summary);
            
$this->summary trim($this->summary);    
        }
        
        function 
printRecord()
        {
            
// CORRECT PATH INFORMATION
            
$this->path $this->correct_path($this->path);
            
            
// DISPLAY CONTENT
            
echo "<b>".$this->title."</b><br>n";
            echo 
"<b>...</b>&nbsp;".$this->summary."&nbsp;<b>...</b><br>n";
            echo 
"<a href='$this->base_url/$this->path$this->name'>$this->base_url/$this->path$this->name</a><p>nn";
        }
        
        function 
correct_path($old_path)
        {
            
$old_path eregi_replace("."" "$old_path);
            if (
eregi("(.*)([a-z0-9]$)"$old_path$parts)) $old_path eregi_replace("[a-z0-9]$""$parts[2]/"$old_path);
            
$new_path trim($old_path);
            return (
$new_path);
        }
        
    };

?>


Usage Example




Rate This Script





Search



This Category All Categories