PHP根据IP UA判断是否真假蜘蛛的代码!
最近一直在做自建统计源码(https://www.4414.cn/thread-143681-1-1.html)的二开,发现缺少蜘蛛爬虫的统计功能,于是自己抄袭了一段,给有需要的网友,可以集成到自己的网站上,代码如下:
//获取访问IP
function get_ip() {
if(array_key_exists('HTTP_X_FORWARDED_FOR', $_SERVER)) {
if(strpos($_SERVER['HTTP_X_FORWARDED_FOR'], ',')) {
$ips = explode(',', $_SERVER['HTTP_X_FORWARDED_FOR']);
return trim(reset($ips));
} else {
return $_SERVER['HTTP_X_FORWARDED_FOR'];
}
} else if (array_key_exists('REMOTE_ADDR', $_SERVER)) {
return $_SERVER['REMOTE_ADDR'];
} else if (array_key_exists('HTTP_CLIENT_IP', $_SERVER)) {
return $_SERVER['HTTP_CLIENT_IP'];
}
return '';
}
/**
*检查IP及蜘蛛真实性
* 用法 echo $bot; 或 (check_spider('66.249.74.44',$_SERVER['HTTP_USER_AGENT']));
* @param string $ip IP地址
* @param string $ua ua地址
* @return spider名字
*/
function check_spider($ip,$ua)
{
static $spider_list=array(
'谷歌蜘蛛'=>array('Googlebot','googlebot.com'),
'百度蜘蛛'=>array('Baiduspider','.baidu.'),
'搜狗蜘蛛'=>array('Sogou web spider','sogou.com'),
'头条蜘蛛'=>array('Bytespider','bytedance.com'),
'神马蜘蛛'=>array('YisouSpider','sm.cn'),
'360蜘蛛'=>array('360Spider','ny.adsl'),
'华为蜘蛛'=>array('PetalBot','petalsearch.com'),
'华为蜘蛛'=>array('AspiegelBot','aspiegel.com'),
'苹果蜘蛛'=>array('Applebot','apple.com'),
'有道蜘蛛'=>array('YoudaoBot','youdao.com'),
'腾讯蜘蛛'=>array('Sosospider',''),
'bing蜘蛛'=>array('bingbot','msn.com')
);
if(!preg_match('/^(\d{1,3}\.){3}\d{1,3}$/',$ip)) return false;
if(empty($ua)) return false;
foreach ($spider_list as $k=>$v)
{
///如果找到了匹配蜘蛛
if(stripos($ua,$v[0])!==false)
{
//反查IP绑定域名
$domain = gethostbyaddr($ip);
if($domain && stripos($domain,$v[1])!==false)
{
return .$k.' UA: '.$ua;
}
}
}
return '正常访客 UA: '.$ua;
}
//$bot=check_spider(get_ip(),$_SERVER['HTTP_USER_AGENT']);
//调试输出
//echo $bot;
//获取访问IP
function get_ip() {
if(array_key_exists('HTTP_X_FORWARDED_FOR', $_SERVER)) {
if(strpos($_SERVER['HTTP_X_FORWARDED_FOR'], ',')) {
$ips = explode(',', $_SERVER['HTTP_X_FORWARDED_FOR']);
return trim(reset($ips));
} else {
return $_SERVER['HTTP_X_FORWARDED_FOR'];
}
} else if (array_key_exists('REMOTE_ADDR', $_SERVER)) {
return $_SERVER['REMOTE_ADDR'];
} else if (array_key_exists('HTTP_CLIENT_IP', $_SERVER)) {
return $_SERVER['HTTP_CLIENT_IP'];
}
return '';
}
/**
*检查IP及蜘蛛真实性
* 用法 echo $bot; 或 (check_spider('66.249.74.44',$_SERVER['HTTP_USER_AGENT']));
* @param string $ip IP地址
* @param string $ua ua地址
* @return spider名字
*/
function check_spider($ip,$ua)
{
static $spider_list=array(
'谷歌蜘蛛'=>array('Googlebot','googlebot.com'),
'百度蜘蛛'=>array('Baiduspider','.baidu.'),
'搜狗蜘蛛'=>array('Sogou web spider','sogou.com'),
'头条蜘蛛'=>array('Bytespider','bytedance.com'),
'神马蜘蛛'=>array('YisouSpider','sm.cn'),
'360蜘蛛'=>array('360Spider','ny.adsl'),
'华为蜘蛛'=>array('PetalBot','petalsearch.com'),
'华为蜘蛛'=>array('AspiegelBot','aspiegel.com'),
'苹果蜘蛛'=>array('Applebot','apple.com'),
'有道蜘蛛'=>array('YoudaoBot','youdao.com'),
'腾讯蜘蛛'=>array('Sosospider',''),
'bing蜘蛛'=>array('bingbot','msn.com')
);
if(!preg_match('/^(\d{1,3}\.){3}\d{1,3}$/',$ip)) return false;
if(empty($ua)) return false;
foreach ($spider_list as $k=>$v)
{
///如果找到了匹配蜘蛛
if(stripos($ua,$v[0])!==false)
{
//反查IP绑定域名
$domain = gethostbyaddr($ip);
if($domain && stripos($domain,$v[1])!==false)
{
return .$k.' UA: '.$ua;
}
}
}
return '正常访客 UA: '.$ua;
}
//$bot=check_spider(get_ip(),$_SERVER['HTTP_USER_AGENT']);
//调试输出
//echo $bot;