Link Checkers
|
|
|
|
<?php
function check_link($link) {
$main = array();
$ch = curl_init();
curl_setopt ($ch, CURLOPT_URL, $link);
curl_setopt ($ch, CURLOPT_HEADER, 1);
curl_setopt ($ch, CURLOPT_NOBODY, 1);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($ch, CURLOPT_NETRC, 1);
curl_setopt ($ch, CURLOPT_TIMEOUT, 10);
ob_start();
curl_exec ($ch);
$stuff = ob_get_contents();
ob_end_clean();
curl_close ($ch);
$parts = split("n",$stuff,2);
$main = split(" ",$parts[0],3);
return $main;
} // function check_link ?>
|
|
|
Usage Example
|
<?php
// This code example pulls links from a database table and
// checks them. The links table is called "downloads"
// and the check record table is "downloads_check".
// Depending on how many entries are in your database,
// the check may take a while, so set the time limit
// accordingly...
set_time_limit(3600);
// get database connection include...
require_once("dbconn.php");
// function definition for check_link...
// expects a link url as string
// returns an array of three elements:
// return_array[0] = HTTP version
// return_array[1] = Returned error number (200, 404, etc)
// return_array[2] = Returned error text ("OK", "File Not Found", etc)
function check_link($link) {
$main = array();
$ch = curl_init();
curl_setopt ($ch, CURLOPT_URL, $link);
curl_setopt ($ch, CURLOPT_HEADER, 1);
curl_setopt ($ch, CURLOPT_NOBODY, 1);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($ch, CURLOPT_NETRC, 1); // omit if you know no urls are FTP links...
curl_setopt ($ch, CURLOPT_TIMEOUT, 10);
ob_start();
curl_exec ($ch);
$stuff = ob_get_contents();
ob_end_clean();
curl_close ($ch);
$parts = split("n",$stuff,2);
$main = split(" ",$parts[0],3);
return $main;
} // function
// Holder for FTP uid/pwd combos.
// Omit this piece of code if you know you have no FTP links.
$entries = array();
// The next three code groups create a netrc file for use
// with any FTP links that may be in your system. If you leave
// this part out and the FTP server asks for a password, your
// script will pause until a password is entered. This becomes really
// annoying if you are running this from the command line or as a cron.
// Feel free to omit these if you know you do not have any FTP links.
// deal with ftp://username:password@server.com...
$query = "SELECT link FROM downloads WHERE link LIKE 'ftp:%@%' AND link LIKE 'ftp:%:%@%'";
$result = mysql_query($query, $dbconn);
while ($row=mysql_fetch_object($result)) {
$parts = split("@",$row->link);
$name = split("/",$parts[0]);
$username = array_pop($name);
$uidpwd = split(":",$username);
$server = split("/",$parts[1]);
$machine = $server[0];
$entry = "machine $machinen username $uidpwd[0]n password $uidpwd[1]nn";
if (!(in_array($entry,$entries))) {
array_push($entries,$entry);
} // if
} // while
// deal with ftp://username@server.com...
$query = "SELECT link FROM downloads WHERE link LIKE 'ftp:%@%' AND link NOT LIKE 'ftp:%:%@%'";
$result = mysql_query($query, $dbconn);
while ($row=mysql_fetch_object($result)) {
$parts = split("@",$row->link);
$name = split("/",$parts[0]);
$username = array_pop($name);
$server = split("/",$parts[1]);
$machine = $server[0];
$entry = "machine $machinen username $usernamen password anonymousnn";
if (!(in_array($entry,$entries))) {
array_push($entries,$entry);
} // if
} // while
// create temp working file...
$fp = fopen("temp.txt","w");
while (list($key, $val) = each($entries)) {
fwrite($fp,$val);
} // while
fclose($fp);
// rename temp file to netrc
rename("temp.txt","netrc");
// Pull all the links from your database table...
$query = "select download_id, link from downloads order by download_id";
$result = mysql_query($query, $dbconn);
// Check all returned links from database...
while ($row=mysql_fetch_object($result)) {
$main = check_link($row->link);
$query = "insert into downloads_check (download_id, error_number, error_text, date_check) values ($row->download_id, '$main[1]', '$main[2]', now())";
mysql_query($query, $dbconn);
} // while
// NOTE: Returns of blank are either "bad url" or "server is down".
// If you are just interested in good links and want to delete bad
// ones from your links list, you don't care if the problem was
// the link syntax or the server status. Either way, the link doesn't
// go anywhere, so it is a candidate for deletion.
// As for a policy on when to remove links from your data set,
// I am checking all links once a day (at 3am). If the link is bad
// 7 days in a row, I will remove the link from the list. Why 7 days?
// Some users only run thier server on the weekend. A seprate script
// does the "remove link from data set" check.
?>
|
|
|
Rate This Script
|
|
|
|