Developers can use the browscap.ini to detect if the User-Agent is a search engine or data collection spider or not. However, that file is rather large and will be expensive on resources. If you need a faster, simpler (albeit less accurate) method, this class can be useful.
/** comment */
function setUA($pUA=NULL) {
if (is_null($pUA) && isset($_SERVER['HTTP_USER_AGENT'])) {
$this->mUA = $_SERVER['HTTP_USER_AGENT'];
} else {
$this->mUA = $pUA;
} // fi
}
/** if this is a spider/crawler */
function isSpider($pUA=NULL) {
$this->setUA($pUA);
$vBots = array(
‘AdsBot-Google’,
‘alexa’,
‘appie’,
‘Ask Jeeves’,
‘Baiduspider’,
‘crawler’,
‘FAST’,
‘Firefly’,
‘froogle’,
‘girafabot’,
‘Googlebot’,
‘ia_archiver’,
‘InfoSeek’,
‘inktomi’,
‘Java/’,
‘looksmart’,
‘msnbot’,
‘NationalDirectory’,
‘rabaz’,
‘Scooter’,
‘Slurp’,
‘Spade’,
‘TECNOSEEK’,
‘Teoma’,
‘URL_Spider_SQL’,
‘WebBug’,
‘WebCapture’,
‘WebFindBot’,
‘Wget/’,
‘ZyBorg’,
‘libwww-perl/’,
‘httpunit/’,
‘WebZIP/’,
);
foreach ($vBots AS $vPat) {
if (stripos($this->mUA,$vPat)!==FALSE) {
return TRUE;
} // fi
} // rof
return FALSE;
}
}
?>