-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSpiderDetector.php
More file actions
89 lines (86 loc) · 1.83 KB
/
SpiderDetector.php
File metadata and controls
89 lines (86 loc) · 1.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
<?php
/**
* 爬虫机器人检测器
* 用于检测访问者是否爬虫机器人
*
* @author fdipzone
* @DateTime 2024-07-13 21:51:59
*
*/
class SpiderDetector
{
/**
* 爬虫机器人列表
*
* @var array
* @author fdipzone
* @DateTime 2024-07-13 21:55:17
*
*/
private static $spider_list = [
'TencentTraveler',
'Baiduspider+',
'BaiduGame',
'Googlebot',
'msnbot',
'Sosospider+',
'Sogou web spider',
'ia_archiver',
'Yahoo! Slurp',
'YoudaoBot',
'Yahoo Slurp',
'MSNBot',
'Java (Often spam bot)',
'BaiDuSpider',
'Voila',
'Yandex bot',
'BSpider',
'twiceler',
'Sogou Spider',
'Speedy Spider',
'Google AdSense',
'Heritrix',
'Python-urllib',
'Alexa (IA Archiver)',
'Ask',
'Exabot',
'Custo',
'OutfoxBot/YodaoBot',
'yacy',
'SurveyBot',
'legs',
'lwp-trivial',
'Nutch',
'StackRambler',
'The web archive (IA Archiver)',
'Perl tool',
'MJ12bot',
'Netcraft',
'MSIECrawler',
'WGet tools',
'larbin',
'Fish search',
];
/**
* 判断访问者是否爬虫机器人
*
* @author fdipzone
* @DateTime 2024-07-13 21:52:45
*
* @return boolean
*/
public static function isSpider():bool
{
// 获取访问者 agent
$agent= strtolower(isset($_SERVER['HTTP_USER_AGENT'])? $_SERVER['HTTP_USER_AGENT'] : '');
// 判断是否爬虫
foreach(self::$spider_list as $spider)
{
if(strpos($agent, strtolower($spider))!==false)
{
return true;
}
}
return false;
}
}