HTML
|
|
|
|
<? function extract_links( $data ) {
unset($location);
$links = array();
$links[] = array(); // the contents of the anchors (highlighted text)
$links[] = array(); // the URLs corresponding to each element of $links
$pos = 0;
$i = 0;
while (!(($pos = strpos($data,"<",$pos)) === false)) {
$pos++;
$endpos = strpos($data,">",$pos);
$tag = substr($data,$pos,$endpos-$pos);
$tag = trim($tag);
if (isset($location)) { // look for a </A>
if (!strcasecmp(strtok($tag," "),"/A")) {
$link = substr($data,$linkpos,$pos-1-$linkpos);
$links[][0] = strip_tags($link);
$links[sizeof($links)-1][1] = $location;
unset($location);
}
$pos = $endpos+1;
} else { // look for a <A ...>
if (!strcasecmp(strtok($tag," "),"A")) {
if (eregi("HREF[ tnrv]*=[ tnrv]*"([^"]*)"",$tag,$regs));
else if (eregi("HREF[ tnrv]*=[ tnrv]*([^ tnrv]*)",$tag,$regs));
else $regs[1] = "";
if ($regs[1]) { // Only use it if it seems to be reasonable
$location = $regs[1];
}
$pos = $endpos+1;
$linkpos = $pos;
} else {
$pos = $endpos+1;
}
}
$i++;
}
return $links;
}
?>
|
|
|
Usage Example
|
$fp = fopen("testdata.htm", "r");
$data = "";
while (!feof($fp)) {
$data.= fgets($fp, 4096);
}
fclose($fp);
$data = ereg_replace("n|r", " ", $data);
$extracted_links = extract_links($data);
for ($i=0; $i<sizeof($extracted_links); $i++) {
echo "<A HREF="".$extracted_links[$i][1]."">".$extracted_links[$i][0]."</A><BR>n";
}
|
|
|
Rate This Script
|
|
|
|