Files and Directories
|
|
|
|
<?php
/*****************************************************************
PHP SEARCH ENGINE 1.0
Created By
---------------------------------
David Green
dlgreen@u.washington.edu
Variables for class "search":
---------------------------------
$base_path = the system path to your base directory
$directories = an array containing the directories you want to search
$file_exts = an array containing file extensions you want to search
$files = an array of files within a searchable directory
$query = the query you are searching for
$matches = an array of files that contained the query
$self = the path to your search page ($PHP_SELF)
$index = the filename of your index.dat
$use_index = a boolean flag
Variables for class "result":
---------------------------------
var $query = the query you are searching for
var $path = the path of the matching file
var $name = the name of the matching file
var $title = the <title></title> of the document
var $summary = a summary of the document's <body></body>
var $base_url = the http address to your web site
Variables for class "index":
---------------------------------
$base_path = the system path to your base directory
$directories = an array containing the directories you want to search
$file_exts = an array containing file extensions you want to search
$files = an array of files within a searchable directory
$index = the filename of your index.dat
Example Code:
---------------------------------
*** IF YOU DON'T WANT TO USE AN INDEX IN YOUR SEARCH ***
*** USING AN INDEXED SEARCH WILL REDUCE SEARCH TIME ***
$search = new search("the query", $PHP_SELF);
$search->set_dirs("list of searchable directories");
$search->set_exts("list of searchable file types");
$search->start();
unset($search);
*** HOW TO CREATE AN INDEX OF FILES TO ALLOW FASTER SEARCHING ***
$temp = new index();
$temp->set_dirs("list of searchable directories");
$temp->set_exts("list of searchable file types");
$temp->create();
unset($temp);
*** HOW TO USE AN INDEX IN YOUR SEARCH ***
$search = new search("the query", $PHP_SELF);
$search->use_index();
$search->start();
unset($search);
*****************************************************************/
class index
{
var $base_path = "/home/web/sites/php_search/";
var $directories = array();
var $file_exts = array();
var $files = array();
var $index = "index.dat";
function index()
{
// DO NOTHING
}
function set_dirs($list)
{
$this->directories = explode(",", $list);
}
function set_exts($list)
{
$this->file_exts = explode(",", $list);
}
function create()
{
foreach($this->directories as $path)
{
$cpath = $this->base_path.$path;
if (!chdir($cpath)) $this->printError("The directory $path was not found on this server.");
$this->files[$path] = $this->get_files($cpath, $this->file_exts, $tempArray[2]);
}
reset($this->files);
if (!$fp = fopen("$this->base_path$this->index", "w")) $this->printError("Error while writing index file.");
while (list($path,$file) = each($this->files))
{
fwrite($fp, $path);
foreach($file as $temp)
{
fwrite($fp, ";".$temp);
}
fwrite($fp, "n");
}
fclose($fp);
}
function get_files($path, $file_exts, $self)
{
$temp = array();
if ($dir = opendir($path))
{
while ($file = readdir($dir))
{
if (filetype($file) == "file")
{
foreach($file_exts as $ext)
{
if (eregi("(.+)(..+)", $file, $parts) && ($ext == $parts[2]) && ($parts[0] != $self)) array_push($temp, $file);
}
}
}
return ($temp);
}
}
};
class search
{
var $base_path = "/home/web/sites/php_search/";
var $directories = array();
var $file_exts = array();
var $files = array();
var $query;
var $matches = array();
var $self;
var $index = "index.dat";
var $use_index = false;
function search($query,$self)
{
$this->query = $query;
$this->self = $self;
}
function set_dirs($list)
{
$this->directories = explode(",", $list);
}
function set_exts($list)
{
$this->file_exts = explode(",", $list);
}
function start()
{
$time_start = getmicrotime();
// SCRIPT SHOULD NOT OPEN SEARCHING ORIGIN
if (eregi("(/.*/)(.*$)", $this->self, $tempArray))
if (!$this->use_index)
{
foreach($this->directories as $path)
{
$cpath = $this->base_path.$path;
if (!chdir($cpath)) $this->printError("The directory $path was not found on this server.");
$this->files[$path] = $this->get_files($cpath, $this->file_exts, $tempArray[2]);
}
}
// IF THERE ARE NO VALID FILES, EXIT SWITCH
if (count($this->files) < 1)
{
$this->printError("There were no valid files located.");
}
$i = 0;
// IF THERE ARE VALID FILES FOUND, SEARCH THEM FOR STRING
while (list($path,$file) = each($this->files))
{
$cpath = $this->base_path.$path;
// ERROR ACCESSING GIVEN PATH
if (!chdir($cpath)) $this->printError("Error accessing directory $path.");
foreach($file as $temp)
{
if ($fp = fopen($temp, "r"))
{
$contents = fread($fp, filesize($temp));
fclose($fp);
$contents = $this->strip_garbage($contents);
if (!stristr($contents, $search))
{
$this->matches[$i] = new result($this->query);
$this->matches[$i]->path = $path;
$this->matches[$i]->name = $temp;
$this->matches[$i]->get_props($contents);
$i++;
}
}
else $this->printError("Error opening $filename.");
}
}
$time_end = getmicrotime();
// IF THERE ARE NO MATCHES MADE, EXIT SWITCH
if (count($this->matches) < 1)
{
$this->printError("No matches were made.");
}
// IF THERE ARE MATCHES MADE, PRINT RESULTS
for ($i = 0; $i < count($this->matches); $i++)
{
$this->matches[$i]->printRecord();
}
// FOR TESTING PURPOSES ONLY
$time_total = substr(($time_end - $time_start), 0, 5);
echo "<p>Your search took $time_total.";
}
function get_files($path, $file_exts, $self)
{
$temp = array();
if ($dir = opendir($path))
{
while ($file = readdir($dir))
{
if (filetype($file) == "file")
{
foreach($file_exts as $ext)
{
if (eregi("(.+)(..+)", $file, $parts) && ($ext == $parts[2]) && ($parts[0] != $self)) array_push($temp, $file);
}
}
}
return ($temp);
}
}
function strip_garbage($content)
{
$html = array("<style.+>.+</style>","<script.+>.+</script>","<object.+>.+</object>","<embed.+>","<meta.+>","<b>","</b>","<i>","</i>","<u>","</u>");
for ($i = 0; $i < count($html); $i++)
{
$content = eregi_replace($html[$i], "", $content);
}
return($content);
}
function use_index()
{
$this->use_index = true;
if (!chdir($this->base_path)) $this->printError("Error accessing basepath.");
if (!$fp = fopen($this->index, "r")) $this->printError("No index currently exists.");
$contents = fread($fp, filesize($this->index));
fclose($fp);
$tempArray = explode("n", $contents);
$tempLines = array();
for ($i = 0; $i < count($tempArray); $i++)
{
$tempLines[$i] = explode(";", $tempArray[$i]);
}
for ($i = 0; $i < count($tempLines); $i++)
{
$this->directories[$i] = $tempLines[$i][0];
$tempFiles = array();
for ($j = 1; $j < count($tempLines[$i]); $j++)
{
$tempFiles[$j-1] = $tempLines[$i][$j];
}
$this->files[$this->directories[$i]] = $tempFiles;
unset($tempFiles);
}
unset($tempLines);
unset($tempArray);
}
function make_index()
{
$this->make_index = true;
}
function printError($errorTxt)
{
echo ($errorTxt);
}
};
class result
{
var $query;
var $path;
var $name;
var $title;
var $summary;
var $base_url = "http://cesare2.adhost.com/php_search";
function result($value)
{
$this->query = $value;
$this->path = "";
$this->name = "";
$this->title = "";
$this->summary = "";
}
function get_props($contents)
{
// GET DOCUMENT TITLE
if (eregi("(<title>)(.+)(</title>)", $contents, $arr)) $this->title = $arr[2];
else $this->title = "Untitled Document";
$this->title = trim($this->title);
// GET SUMMARY OF DOCUMENT AND FORMAT IT FOR DISPLAY
if (eregi("(<body.?>)", $contents))
{
eregi("(<body.?>)(.+)(</body>)", $contents, $arr);
$contents = $arr[2];
}
$contents = strip_tags($contents);
$pos = (strpos(strtolower($contents), strtolower($this->query)));
if ($pos - 100 < 1) $pos = 0;
else $pos -= 100;
$this->summary = substr($contents, $pos, 200);
$this->summary = eregi_replace($this->query, "<b>$this->query</b>", $this->summary);
$this->summary = trim($this->summary);
}
function printRecord()
{
// CORRECT PATH INFORMATION
$this->path = $this->correct_path($this->path);
// DISPLAY CONTENT
echo "<b>".$this->title."</b><br>n";
echo "<b>...</b> ".$this->summary." <b>...</b><br>n";
echo "<a href='$this->base_url/$this->path$this->name'>$this->base_url/$this->path$this->name</a><p>nn";
}
function correct_path($old_path)
{
$old_path = eregi_replace(".", " ", $old_path);
if (eregi("(.*)([a-z0-9]$)", $old_path, $parts)) $old_path = eregi_replace("[a-z0-9]$", "$parts[2]/", $old_path);
$new_path = trim($old_path);
return ($new_path);
}
};
?>
|
|
|
Usage Example
|
|
|
Rate This Script
|
|
|
|