diff --git a/lib/bots.json b/lib/bots.json new file mode 100644 index 0000000..a034850 --- /dev/null +++ b/lib/bots.json @@ -0,0 +1,2523 @@ +[ + { + "pattern": "Googlebot\\/", + "url": "http://www.google.com/bot.html", + "instances": [ + "Googlebot/2.1 (+http://www.google.com/bot.html)", + "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Safari/537.36", + "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview Analytics) Chrome/27.0.1453 Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" + ] + }, + { + "pattern": "Googlebot-Mobile", + "instances": [ + "DoCoMo/2.0 N905i(c100;TB;W24H16) (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", + "Nokia6820/2.0 (4.83) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", + "SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)" + ] + }, + { + "pattern": "Googlebot-Image", + "instances": [ + "Googlebot-Image/1.0" + ] + }, + { + "pattern": "Googlebot-News", + "instances": [ + "Googlebot-News" + ] + }, + { + "pattern": "Googlebot-Video", + "instances": [ + "Googlebot-Video/1.0" + ] + }, + { + "pattern": "AdsBot-Google([^-]|$)", + "url": "https://support.google.com/webmasters/answer/1061943?hl=en", + "instances": [ + "AdsBot-Google (+http://www.google.com/adsbot.html)" + ] + }, + { + "pattern": "AdsBot-Google-Mobile", + "addition_date": "2017/08/21", + "url": "https://support.google.com/adwords/answer/2404197", + "instances": [ + "AdsBot-Google-Mobile-Apps", + "Mozilla/5.0 (Linux; Android 5.0; SM-G920A) AppleWebKit (KHTML, like Gecko) Chrome Mobile Safari (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)" + ] + }, + { + "pattern": "Mediapartners-Google", + "url": "https://support.google.com/webmasters/answer/1061943?hl=en", + "instances": [ + "Mediapartners-Google", + "Mozilla/5.0 (compatible; MSIE or Firefox mutant; not on Windows server;) Daumoa/4.0 (Following Mediapartners-Google)", + "Mozilla/5.0 (iPhone; U; CPU iPhone OS 10_0 like Mac OS X; en-us) AppleWebKit/602.1.38 (KHTML, like Gecko) Version/10.0 Mobile/14A5297c Safari/602.1 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)" + ] + }, + { + "pattern": "Mediapartners \\(Googlebot\\)", + "addition_date": "2017/08/08", + "url": "https://support.google.com/webmasters/answer/1061943?hl=en", + "instances": [] + }, + { + "pattern": "APIs-Google", + "addition_date": "2017/08/08", + "url": "https://support.google.com/webmasters/answer/1061943?hl=en", + "instances": [ + "APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)" + ] + }, + { + "pattern": "bingbot", + "url": "http://www.bing.com/bingbot.htm", + "instances": [ + "Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; NOKIA; Lumia 530) like Gecko (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)", + "Mozilla/5.0 (compatible; adidxbot/2.0; http://www.bing.com/bingbot.htm)", + "Mozilla/5.0 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)", + "Mozilla/5.0 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)", + "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm", + "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", + "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) SitemapProbe", + "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; adidxbot/2.0; http://www.bing.com/bingbot.htm)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", + "Mozilla/5.0 (seoanalyzer; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)" + ] + }, + { + "pattern": "slurp", + "url": "http://help.yahoo.com/help/us/ysearch/slurp", + "instances": [ + "Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)", + "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)" + ] + }, + { + "pattern": "java", + "instances": [ + "Apache-HttpClient/4.2.3 (java 1.5)", + "Apache-HttpClient/4.2.5 (java 1.5)", + "Apache-HttpClient/4.3.1 (java 1.5)", + "Apache-HttpClient/4.3.3 (java 1.5)", + "Apache-HttpClient/4.3.5 (java 1.5)", + "Apache-HttpClient/UNAVAILABLE (java 1.4)", + "yacybot (-global; amd64 FreeBSD 9.2-RELEASE-p10; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 2.6.32-042stab111.11; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 2.6.32-042stab116.1; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html" + ] + }, + { + "pattern": "[wW]get", + "instances": [ + "WGETbot/1.0 (+http://wget.alanreed.org)", + "Wget/1.14 (linux-gnu)" + ] + }, + { + "pattern": "curl", + "instances": [ + "eCairn-Grabber/1.0 (+http://ecairn.com/grabber) curl/7.15" + ] + }, + { + "pattern": "Commons-HttpClient", + "instances": [ + "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)", + "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/4.3 +http://www.linkedin.com)" + ] + }, + { + "pattern": "Python-urllib", + "instances": [ + "Python-urllib/2.7 (+http://aranea.juls.savba.sk/aranea_about.html)", + "Python-urllib/2.7 (+http://ella.juls.savba.sk/aranea_about)", + "Python-urllib/2.7 (+http://sketch.juls.savba.sk/aranea_about)" + ] + }, + { + "pattern": "libwww", + "instances": [ + "2Bone_LinkChecker/1.0 libwww-perl/6.03", + "2Bone_LinkChkr/1.0 libwww-perl/6.03", + "W3C-checklink/2.90 libwww-perl/5.64", + "W3C-checklink/3.6.2.3 libwww-perl/5.64", + "W3C-checklink/4.2 [4.20] libwww-perl/5.803", + "W3C-checklink/4.2.1 [4.21] libwww-perl/5.803", + "W3C-checklink/4.3 [4.42] libwww-perl/5.805", + "W3C-checklink/4.3 [4.42] libwww-perl/5.808", + "W3C-checklink/4.3 [4.42] libwww-perl/5.820", + "W3C-checklink/4.5 [4.154] libwww-perl/5.823", + "W3C-checklink/4.5 [4.160] libwww-perl/5.823", + "amibot - http://www.amidalla.de - tech@amidalla.com libwww-perl/5.831" + ] + }, + { + "pattern": "httpunit", + "instances": [ + "httpunit/1.x" + ] + }, + { + "pattern": "nutch", + "instances": [ + "NutchCVS/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)", + "istellabot-nutch/Nutch-1.10" + ] + }, + { + "pattern": "Go-http-client", + "addition_date": "2016/03/26", + "url": "https://golang.org/pkg/net/http/", + "instances": [ + "Go-http-client/1.1" + ] + }, + { + "pattern": "phpcrawl", + "addition_date": "2012-09/17", + "url": "http://phpcrawl.cuab.de/", + "instances": [ + "phpcrawl" + ] + }, + { + "pattern": "msnbot", + "url": "http://search.msn.com/msnbot.htm", + "instances": [ + "adidxbot/1.1 (+http://search.msn.com/msnbot.htm)", + "adidxbot/2.0 (+http://search.msn.com/msnbot.htm)", + "librabot/1.0 (+http://search.msn.com/msnbot.htm)", + "librabot/2.0 (+http://search.msn.com/msnbot.htm)", + "msnbot-NewsBlogs/2.0b (+http://search.msn.com/msnbot.htm)", + "msnbot-UDiscovery/2.0b (+http://search.msn.com/msnbot.htm)", + "msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)", + "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)", + "msnbot-media/2.0b (+http://search.msn.com/msnbot.htm)", + "msnbot/1.0 (+http://search.msn.com/msnbot.htm)", + "msnbot/1.1 (+http://search.msn.com/msnbot.htm)", + "msnbot/2.0b (+http://search.msn.com/msnbot.htm)", + "msnbot/2.0b (+http://search.msn.com/msnbot.htm).", + "msnbot/2.0b (+http://search.msn.com/msnbot.htm)._" + ] + }, + { + "pattern": "jyxobot", + "instances": [] + }, + { + "pattern": "FAST-WebCrawler", + "instances": [ + "FAST-WebCrawler/3.6/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)", + "FAST-WebCrawler/3.7 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)", + "FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)", + "FAST-WebCrawler/3.8" + ] + }, + { + "pattern": "FAST Enterprise Crawler", + "instances": [ + "FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/", + "FAST Enterprise Crawler 6 used by Schibsted (webcrawl@schibstedsok.no)" + ] + }, + { + "pattern": "BIGLOTRON", + "instances": [ + "BIGLOTRON (Beta 2;GNU/Linux)" + ] + }, + { + "pattern": "Teoma", + "instances": [ + "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)", + "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://about.ask.com/en/docs/about/webmasters.shtml)" + ], + "url": "http://about.ask.com/en/docs/about/webmasters.shtml" + }, + { + "pattern": "convera", + "instances": [ + "ConveraCrawler/0.9e (+http://ews.converasearch.com/crawl.htm)" + ], + "url": "http://ews.converasearch.com/crawl.htm" + }, + { + "pattern": "seekbot", + "instances": [ + "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.2" + ], + "url": "http://www.seekbot.net/bot.html" + }, + { + "pattern": "Gigabot", + "instances": [ + "Gigabot/1.0", + "Gigabot/2.0 (http://www.gigablast.com/spider.html)" + ], + "url": "http://www.gigablast.com/spider.html" + }, + { + "pattern": "Gigablast", + "instances": [ + "GigablastOpenSource/1.0" + ], + "url": "https://github.com/gigablast/open-source-search-engine" + }, + { + "pattern": "exabot", + "instances": [ + "Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)", + "Mozilla/5.0 (compatible; Exabot PyExalead/3.0; +http://www.exabot.com/go/robot)", + "Mozilla/5.0 (compatible; Exabot-Images/3.0; +http://www.exabot.com/go/robot)", + "Mozilla/5.0 (compatible; Exabot/3.0 (BiggerBetter); +http://www.exabot.com/go/robot)", + "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)" + ] + }, + { + "pattern": "ia_archiver", + "instances": [ + "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)", + "ia_archiver-web.archive.org" + ] + }, + { + "pattern": "GingerCrawler", + "instances": [ + "GingerCrawler/1.0 (Language Assistant for Dyslexics; www.gingersoftware.com/crawler_agent.htm; support at ginger software dot com)" + ] + }, + { + "pattern": "webmon ", + "instances": [] + }, + { + "pattern": "HTTrack", + "instances": [ + "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)" + ] + }, + { + "pattern": "grub.org", + "instances": [ + "Mozilla/4.0 (compatible; grub-client-0.3.0; Crawl your own stuff with http://grub.org)", + "Mozilla/4.0 (compatible; grub-client-1.0.4; Crawl your own stuff with http://grub.org)", + "Mozilla/4.0 (compatible; grub-client-1.0.5; Crawl your own stuff with http://grub.org)", + "Mozilla/4.0 (compatible; grub-client-1.0.6; Crawl your own stuff with http://grub.org)", + "Mozilla/4.0 (compatible; grub-client-1.0.7; Crawl your own stuff with http://grub.org)", + "Mozilla/4.0 (compatible; grub-client-1.1.1; Crawl your own stuff with http://grub.org)", + "Mozilla/4.0 (compatible; grub-client-1.2.1; Crawl your own stuff with http://grub.org)", + "Mozilla/4.0 (compatible; grub-client-1.3.1; Crawl your own stuff with http://grub.org)", + "Mozilla/4.0 (compatible; grub-client-1.3.7; Crawl your own stuff with http://grub.org)", + "Mozilla/4.0 (compatible; grub-client-1.4.3; Crawl your own stuff with http://grub.org)", + "Mozilla/4.0 (compatible; grub-client-1.5.3; Crawl your own stuff with http://grub.org)" + ] + }, + { + "pattern": "UsineNouvelleCrawler", + "instances": [] + }, + { + "pattern": "antibot", + "instances": [] + }, + { + "pattern": "netresearchserver", + "instances": [] + }, + { + "pattern": "speedy", + "instances": [ + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) Speedy Spider (http://www.entireweb.com/about/search_tech/speedy_spider/)", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) Speedy Spider for SpeedyAds (http://www.entireweb.com/about/search_tech/speedy_spider/)", + "Mozilla/5.0 (compatible; Speedy Spider; http://www.entireweb.com/about/search_tech/speedy_spider/)", + "Speedy Spider (Entireweb; Beta/1.2; http://www.entireweb.com/about/search_tech/speedyspider/)", + "Speedy Spider (http://www.entireweb.com/about/search_tech/speedy_spider/)" + ] + }, + { + "pattern": "fluffy", + "instances": [] + }, + { + "pattern": "bibnum.bnf", + "instances": [ + "Mozilla/5.0 (compatible; bnf.fr_bot; +http://bibnum.bnf.fr/robot/bnf.html)" + ] + }, + { + "pattern": "findlink", + "instances": [ + "findlinks/1.0 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/1.1.3-beta8 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/1.1.3-beta9 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/1.1.5-beta7 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/1.1.6-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/1.1.6-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/; YaCy 0.1; yacy.net)", + "findlinks/1.1.6-beta2 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/1.1.6-beta3 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/1.1.6-beta4 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/1.1.6-beta5 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/1.1.6-beta6 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.0 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.0.1 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.0.2 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.0.4 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.0.5 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.0.9 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.1 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.1.3 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.1.5 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.2 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.5 (+http://wortschatz.uni-leipzig.de/findlinks/)", + "findlinks/2.6 (+http://wortschatz.uni-leipzig.de/findlinks/)" + ] + }, + { + "pattern": "msrbot", + "instances": [] + }, + { + "pattern": "panscient", + "instances": [ + "panscient.com" + ] + }, + { + "pattern": "yacybot", + "instances": [ + "yacybot (-global; amd64 FreeBSD 9.2-RELEASE-p10; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 2.6.32-042stab111.11; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 2.6.32-042stab116.1; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.13.0-61-generic; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.8.0_111; Europe/de) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.19.0-15-generic; java 1.8.0_45-internal; Europe/de) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_67; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Linux 4.4.0-57-generic; java 9-internal; Europe/en) http://yacy.net/bot.html", + "yacybot (-global; amd64 Windows 8 6.2; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", + "yacybot (-global; amd64 Windows 8.1 6.3; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 FreeBSD 10.3-RELEASE-p7; java 1.7.0_95; GMT/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 FreeBSD 10.3-RELEASE; java 1.8.0_77; GMT/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-042stab093.4; java 1.7.0_65; Etc/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-042stab094.8; java 1.7.0_79; America/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-042stab108.8; java 1.7.0_91; America/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 2.6.32-573.3.1.el6.x86_64; java 1.7.0_85; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.10.0-229.7.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.10.0-327.22.2.el7.x86_64; java 1.7.0_101; Etc/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.11.10-21-desktop; java 1.7.0_51; America/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.12.1; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-45-generic; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-74-generic; java 1.7.0_91; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.13.0-88-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.14-0.bpo.1-amd64; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16-0.bpo.2-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_111; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; America/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_91; Europe/de) http://yacy.net/bot.html", + "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_95; Europe/en) http://yacy.net/bot.html" + ] + }, + { + "pattern": "AISearchBot", + "instances": [] + }, + { + "pattern": "IOI", + "instances": [] + }, + { + "pattern": "ips-agent", + "instances": [ + "BlackBerry9000/4.6.0.167 Profile/MIDP-2.0 Configuration/CLDC-1.1 VendorID/102 ips-agent", + "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.12; ips-agent) Gecko/20050922 Fedora/1.0.7-1.1.fc4 Firefox/1.0.7", + "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.3; ips-agent) Gecko/20090824 Fedora/1.0.7-1.1.fc4 Firefox/3.5.3", + "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.24; ips-agent) Gecko/20111107 Ubuntu/10.04 (lucid) Firefox/3.6.24", + "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:14.0; ips-agent) Gecko/20100101 Firefox/14.0.1" + ] + }, + { + "pattern": "tagoobot", + "instances": [] + }, + { + "pattern": "MJ12bot", + "instances": [ + "MJ12bot/v1.2.0 (http://majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.2.1; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.2.3; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.2.4; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.2.5; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.3.0; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.3.1; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.3.2; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.3.3; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.0; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.1; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.2; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.3; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.4 (domain ownership verifier); http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.4; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.5; http://www.majestic12.co.uk/bot.php?+)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.6; http://mj12bot.com/)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://mj12bot.com/)", + "Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)" + ] + }, + { + "pattern": "woriobot", + "instances": [ + "Mozilla/5.0 (compatible; woriobot +http://worio.com)", + "Mozilla/5.0 (compatible; woriobot support [at] zite [dot] com +http://zite.com)" + ] + }, + { + "pattern": "yanga", + "instances": [ + "Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)" + ] + }, + { + "pattern": "buzzbot", + "instances": [ + "Buzzbot/1.0 (Buzzbot; http://www.buzzstream.com; buzzbot@buzzstream.com)" + ] + }, + { + "pattern": "mlbot", + "instances": [ + "MLBot (www.metadatalabs.com/mlbot)" + ] + }, + { + "pattern": "YandexBot", + "url": "http://yandex.com/bots", + "instances": [ + "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)" + ], + "addition_date": "2015/04/14" + }, + { + "pattern": "yandex.com\\/bots", + "url": "https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml#robot-in-logs", + "instances": [ + "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)" + ], + "addition_date": "2016/12/01" + }, + { + "pattern": "purebot", + "addition_date": "2010/01/19", + "instances": [] + }, + { + "pattern": "Linguee Bot", + "addition_date": "2010/01/26", + "url": "http://www.linguee.com/bot", + "instances": [ + "Linguee Bot (http://www.linguee.com/bot)", + "Linguee Bot (http://www.linguee.com/bot; bot@linguee.com)" + ] + }, + { + "pattern": "CyberPatrol", + "addition_date": "2010/02/11", + "url": "http://www.cyberpatrol.com/cyberpatrolcrawler.asp", + "instances": [ + "CyberPatrol SiteCat Webbot (http://www.cyberpatrol.com/cyberpatrolcrawler.asp)" + ] + }, + { + "pattern": "voilabot", + "addition_date": "2010/05/18", + "instances": [ + "Mozilla/5.0 (Windows NT 5.1; U; Win64; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)", + "Mozilla/5.0 (compatible; OrangeBot/2.0; support.voilabot@orange.com)" + ] + }, + { + "pattern": "Baiduspider", + "addition_date": "2010/07/15", + "url": "http://www.baidu.jp/spider/", + "instances": [ + "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" + ] + }, + { + "pattern": "citeseerxbot", + "addition_date": "2010/07/17", + "instances": [] + }, + { + "pattern": "spbot", + "addition_date": "2010/07/31", + "url": "http://www.seoprofiler.com/bot", + "instances": [ + "Mozilla/5.0 (compatible; spbot/1.0; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/1.1; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/1.2; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.0.1; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.0.2; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.0.3; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.0.4; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/2.0; +http://www.seoprofiler.com/bot/ )", + "Mozilla/5.0 (compatible; spbot/2.1; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/3.0; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/3.1; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.1; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.2; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.3; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.4; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.5; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.6; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.7; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.7; +https://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.8; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/4.0.9; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/4.0; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0a; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.0b; +http://www.seoprofiler.com/bot )", + "Mozilla/5.0 (compatible; spbot/4.1.0; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/4.2.0; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/4.3.0; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/4.4.0; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/4.4.1; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/4.4.2; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/5.0.1; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/5.0.2; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/5.0.3; +http://OpenLinkProfiler.org/bot )", + "Mozilla/5.0 (compatible; spbot/5.0; +http://OpenLinkProfiler.org/bot )" + ] + }, + { + "pattern": "twengabot", + "addition_date": "2010/08/03", + "url": "http://www.twenga.com/bot.html", + "instances": [] + }, + { + "pattern": "postrank", + "addition_date": "2010/08/03", + "url": "http://www.postrank.com", + "instances": [ + "PostRank/2.0 (postrank.com)", + "PostRank/2.0 (postrank.com; 1 subscribers)" + ] + }, + { + "pattern": "turnitinbot", + "addition_date": "2010/09/26", + "url": "http://www.turnitin.com", + "instances": [] + }, + { + "pattern": "scribdbot", + "addition_date": "2010/09/28", + "url": "http://www.scribd.com", + "instances": [] + }, + { + "pattern": "page2rss", + "addition_date": "2010/10/07", + "url": "http://www.page2rss.com", + "instances": [ + "Mozilla/5.0 (compatible; Page2RSS/0.7; +http://page2rss.com/)" + ] + }, + { + "pattern": "sitebot", + "addition_date": "2010/12/15", + "url": "http://www.sitebot.org", + "instances": [ + "Mozilla/5.0 (compatible; Whoiswebsitebot/0.1; +http://www.whoiswebsite.net)" + ] + }, + { + "pattern": "linkdex", + "addition_date": "2011/01/06", + "url": "http://www.linkdex.com", + "instances": [ + "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/about/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/about/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/bots/)", + "Mozilla/5.0 (compatible; linkdexbot/2.2; +http://www.linkdex.com/bots/)", + "linkdex.com/v2.0", + "linkdexbot/Nutch-1.0-dev (http://www.linkdex.com/; crawl at linkdex dot com)" + ] + }, + { + "pattern": "Adidxbot", + "url": "http://onlinehelp.microsoft.com/en-us/bing/hh204496.aspx", + "instances": [] + }, + { + "pattern": "blekkobot", + "url": "http://blekko.com/about/blekkobot", + "instances": [ + "Mozilla/5.0 (compatible; Blekkobot; ScoutJet; +http://blekko.com/about/blekkobot)" + ] + }, + { + "pattern": "ezooms", + "addition_date": "2011/04/27", + "url": "http://www.phpbb.com/community/viewtopic.php?f=64&t=935605&start=450#p12948289", + "instances": [ + "Mozilla/5.0 (compatible; Ezooms/1.0; ezooms.bot@gmail.com)" + ] + }, + { + "pattern": "dotbot", + "addition_date": "2011/04/27", + "instances": [ + "Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)", + "dotbot" + ] + }, + { + "pattern": "Mail.RU_Bot", + "addition_date": "2011/04/27", + "instances": [ + "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/2.0; +http://go.mail.ru/", + "Mozilla/5.0 (compatible; Mail.RU_Bot/2.0; +http://go.mail.ru/" + ] + }, + { + "pattern": "discobot", + "addition_date": "2011/05/03", + "url": "http://discoveryengine.com/discobot.html", + "instances": [ + "Mozilla/5.0 (compatible; discobot/1.0; +http://discoveryengine.com/discobot.html)", + "Mozilla/5.0 (compatible; discobot/2.0; +http://discoveryengine.com/discobot.html)", + "mozilla/5.0 (compatible; discobot/1.1; +http://discoveryengine.com/discobot.html)" + ] + }, + { + "pattern": "heritrix", + "addition_date": "2011/06/21", + "url": "http://crawler.archive.org/", + "instances": [ + "Mozilla/5.0 (compatible; archive.org_bot/heritrix-1.15.4 +http://www.archive.org)", + "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.webarchiv.cz)", + "Mozilla/5.0 (compatible; heritrix/1.12.1b +http://netarkivet.dk/website/info.html)", + "Mozilla/5.0 (compatible; heritrix/1.14.2 +http://rjpower.org)", + "Mozilla/5.0 (compatible; heritrix/1.14.2 +http://www.webarchiv.cz)", + "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://archive.org)", + "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://www.accelobot.com)", + "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://www.webarchiv.cz)", + "Mozilla/5.0 (compatible; heritrix/1.14.3.r6601 +http://www.buddybuzz.net/yptrino)", + "Mozilla/5.0 (compatible; heritrix/1.14.4 +http://parsijoo.ir)", + "Mozilla/5.0 (compatible; heritrix/1.14.4 +http://www.exif-search.com)", + "Mozilla/5.0 (compatible; heritrix/2.0.2 +http://aihit.com)", + "Mozilla/5.0 (compatible; heritrix/2.0.2 +http://seekda.com)", + "Mozilla/5.0 (compatible; heritrix/3.0.0-SNAPSHOT-20091120.021634 +http://crawler.archive.org)", + "Mozilla/5.0 (compatible; heritrix/3.1.0-RC1 +http://boston.lti.cs.cmu.edu/crawler_12/)", + "Mozilla/5.0 (compatible; heritrix/3.1.1 +http://places.tomtom.com/crawlerinfo)", + "Mozilla/5.0 (compatible; heritrix/3.1.1 +http://www.mixdata.com)", + "Mozilla/5.0 (compatible; heritrix/3.1.1-SNAPSHOT-20120116.200628 +http://www.archive.org/details/archive.org_bot)", + "Mozilla/5.0 (compatible; heritrix/3.1.1; UniLeipzigASV +http://corpora.informatik.uni-leipzig.de/crawler_faq.html)", + "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.crim.ca)", + "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.exif-search.com)", + "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.mixdata.com)", + "Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20140702-2247 +http://archive.org/details/archive.org_bot)", + "Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20160309-0050; UniLeipzigASV +http://corpora.informatik.uni-leipzig.de/crawler_faq.html)", + "Mozilla/5.0 (compatible; sukibot_heritrix/3.1.1 +http://suki.ling.helsinki.fi/eng/webmasters.html)" + ] + }, + { + "pattern": "findthatfile", + "addition_date": "2011/06/21", + "url": "http://www.findthatfile.com/", + "instances": [] + }, + { + "pattern": "europarchive.org", + "addition_date": "2011/06/21", + "url": "", + "instances": [ + "Mozilla/5.0 (compatible; MSIE 7.0 +http://www.europarchive.org)" + ] + }, + { + "pattern": "NerdByNature.Bot", + "addition_date": "2011/07/12", + "url": "http://www.nerdbynature.net/bot", + "instances": [ + "Mozilla/5.0 (compatible; NerdByNature.Bot; http://www.nerdbynature.net/bot)" + ] + }, + { + "pattern": "sistrix crawler", + "addition_date": "2011/08/02", + "instances": [] + }, + { + "pattern": "AhrefsBot", + "addition_date": "2011/08/28", + "instances": [ + "Mozilla/5.0 (compatible; AhrefsBot/5.2; News; +http://ahrefs.com/robot/)" + ] + }, + { + "pattern": "Aboundex", + "addition_date": "2011/09/28", + "url": "http://www.aboundex.com/crawler/", + "instances": [ + "Aboundex/0.2 (http://www.aboundex.com/crawler/)", + "Aboundex/0.3 (http://www.aboundex.com/crawler/)" + ] + }, + { + "pattern": "domaincrawler", + "addition_date": "2011/10/21", + "instances": [ + "CipaCrawler/3.0 (info@domaincrawler.com; http://www.domaincrawler.com/www.example.com)" + ] + }, + { + "pattern": "wbsearchbot", + "addition_date": "2011/12/21", + "url": "http://www.warebay.com/bot.html", + "instances": [] + }, + { + "pattern": "summify", + "addition_date": "2012/01/04", + "url": "http://summify.com", + "instances": [ + "Summify (Summify/1.0.1; +http://summify.com)" + ] + }, + { + "pattern": "CCBot", + "addition_date": "2012/02/05", + "url": "http://www.commoncrawl.org/bot.html", + "instances": [ + "CCBot/2.0 (http://commoncrawl.org/faq/)" + ] + }, + { + "pattern": "edisterbot", + "addition_date": "2012/02/25", + "instances": [] + }, + { + "pattern": "seznambot", + "addition_date": "2012/03/14", + "instances": [ + "Mozilla/5.0 (compatible; SeznamBot/3.2-test1-1; +http://napoveda.seznam.cz/en/seznambot-intro/)", + "Mozilla/5.0 (compatible; SeznamBot/3.2-test1; +http://napoveda.seznam.cz/en/seznambot-intro/)", + "Mozilla/5.0 (compatible; SeznamBot/3.2-test2; +http://napoveda.seznam.cz/en/seznambot-intro/)", + "Mozilla/5.0 (compatible; SeznamBot/3.2-test4; +http://napoveda.seznam.cz/en/seznambot-intro/)", + "Mozilla/5.0 (compatible; SeznamBot/3.2; +http://napoveda.seznam.cz/en/seznambot-intro/)" + ] + }, + { + "pattern": "ec2linkfinder", + "addition_date": "2012/03/22", + "instances": [ + "ec2linkfinder" + ] + }, + { + "pattern": "gslfbot", + "addition_date": "2012/04/03", + "instances": [] + }, + { + "pattern": "aihitbot", + "addition_date": "2012/04/16", + "instances": [] + }, + { + "pattern": "intelium_bot", + "addition_date": "2012/05/07", + "instances": [] + }, + { + "pattern": "facebookexternalhit", + "addition_date": "2012/05/07", + "instances": [ + "facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)", + "facebookexternalhit/1.1", + "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)" + ] + }, + { + "pattern": "Yeti", + "addition_date": "2012/05/07", + "url": "http://naver.me/bot", + "instances": [ + "Mozilla/5.0 (compatible; Yeti/1.1; +http://naver.me/bot)" + ] + }, + { + "pattern": "RetrevoPageAnalyzer", + "addition_date": "2012/05/07", + "instances": [ + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; RetrevoPageAnalyzer; +http://www.retrevo.com/content/about-us)" + ] + }, + { + "pattern": "lb-spider", + "addition_date": "2012/05/07", + "instances": [] + }, + { + "pattern": "Sogou", + "addition_date": "2012/05/13", + "url": "http://www.sogou.com/docs/help/webmasters.htm#07", + "instances": [ + "Sogou News Spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", + "Sogou Pic Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", + "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)" + ] + }, + { + "pattern": "lssbot", + "addition_date": "2012/05/15", + "instances": [] + }, + { + "pattern": "careerbot", + "addition_date": "2012/05/23", + "url": "http://www.career-x.de/bot.html", + "instances": [] + }, + { + "pattern": "wotbox", + "addition_date": "2012/06/12", + "url": "http://www.wotbox.com", + "instances": [ + "Wotbox/2.0 (bot@wotbox.com; http://www.wotbox.com)", + "Wotbox/2.01 (+http://www.wotbox.com/bot/)" + ] + }, + { + "pattern": "wocbot", + "addition_date": "2012/07/25", + "url": "http://www.wocodi.com/crawler", + "instances": [] + }, + { + "pattern": "ichiro", + "addition_date": "2012/08/28", + "url": "http://help.goo.ne.jp/help/article/1142", + "instances": [ + "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/help/article/1142/)", + "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://search.goo.ne.jp/option/use/sub4/sub4-1/)", + "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo;+http://search.goo.ne.jp/option/use/sub4/sub4-1/)", + "DoCoMo/2.0 P900i(c100;TB;W24H11)(compatible; ichiro/mobile goo;+http://help.goo.ne.jp/door/crawler.html)", + "DoCoMo/2.0 P901i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/door/crawler.html)", + "KDDI-CA31 UP.Browser/6.2.0.7.3.129 (GUI) MMP/2.0 (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/help/article/1142/)", + "KDDI-CA31 UP.Browser/6.2.0.7.3.129 (GUI) MMP/2.0 (compatible; ichiro/mobile goo; +http://search.goo.ne.jp/option/use/sub4/sub4-1/)", + "KDDI-CA31 UP.Browser/6.2.0.7.3.129 (GUI) MMP/2.0 (compatible; ichiro/mobile goo;+http://search.goo.ne.jp/option/use/sub4/sub4-1/)", + "ichiro/2.0 (http://help.goo.ne.jp/door/crawler.html)", + "ichiro/2.0 (ichiro@nttr.co.jp)", + "ichiro/3.0 (http://help.goo.ne.jp/door/crawler.html)", + "ichiro/3.0 (http://help.goo.ne.jp/help/article/1142)", + "ichiro/3.0 (http://search.goo.ne.jp/option/use/sub4/sub4-1/)", + "ichiro/4.0 (http://help.goo.ne.jp/door/crawler.html)", + "ichiro/5.0 (http://help.goo.ne.jp/door/crawler.html)" + ] + }, + { + "pattern": "DuckDuckBot", + "addition_date": "2012/09/19", + "url": "http://duckduckgo.com/duckduckbot.html", + "instances": [ + "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)", + "DuckDuckBot/1.1; (+http://duckduckgo.com/duckduckbot.html)" + ] + }, + { + "pattern": "lssrocketcrawler", + "addition_date": "2012/09/24", + "instances": [] + }, + { + "pattern": "drupact", + "addition_date": "2012/09/27", + "url": "http://www.arocom.de/drupact", + "instances": [ + "drupact/0.7; http://www.arocom.de/drupact" + ] + }, + { + "pattern": "webcompanycrawler", + "addition_date": "2012/10/03", + "instances": [] + }, + { + "pattern": "acoonbot", + "addition_date": "2012/10/07", + "url": "http://www.acoon.de/robot.asp", + "instances": [] + }, + { + "pattern": "openindexspider", + "addition_date": "2012/10/26", + "url": "http://www.openindex.io/en/webmasters/spider.html", + "instances": [] + }, + { + "pattern": "gnam gnam spider", + "addition_date": "2012/10/31", + "instances": [] + }, + { + "pattern": "web-archive-net.com.bot", + "instances": [] + }, + { + "pattern": "backlinkcrawler", + "addition_date": "2013/01/04", + "instances": [] + }, + { + "pattern": "coccoc", + "addition_date": "2013/01/04", + "url": "http://help.coccoc.vn/", + "instances": [ + "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/)", + "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/searchengine)", + "Mozilla/5.0 (compatible; coccocbot-image/1.0; +http://help.coccoc.com/searchengine)", + "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)", + "Mozilla/5.0 (compatible; image.coccoc/1.0; +http://help.coccoc.com/)", + "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/)", + "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/searchengine)", + "coccoc", + "coccoc/1.0 ()", + "coccoc/1.0 (http://help.coccoc.com/)", + "coccoc/1.0 (http://help.coccoc.vn/)" + ] + }, + { + "pattern": "integromedb", + "addition_date": "2013/01/10", + "url": "http://www.integromedb.org/Crawler", + "instances": [ + "www.integromedb.org/Crawler" + ] + }, + { + "pattern": "content crawler spider", + "addition_date": "2013/01/11", + "instances": [] + }, + { + "pattern": "toplistbot", + "addition_date": "2013/02/05", + "instances": [] + }, + { + "pattern": "seokicks-robot", + "addition_date": "2013/02/25", + "instances": [] + }, + { + "pattern": "it2media-domain-crawler", + "addition_date": "2013/03/12", + "instances": [ + "it2media-domain-crawler/1.0 on crawler-prod.it2media.de", + "it2media-domain-crawler/2.0" + ] + }, + { + "pattern": "ip-web-crawler.com", + "addition_date": "2013/03/22", + "instances": [] + }, + { + "pattern": "siteexplorer.info", + "addition_date": "2013/05/01", + "instances": [ + "Mozilla/5.0 (compatible; SiteExplorer/1.0b; +http://siteexplorer.info/)", + "Mozilla/5.0 (compatible; SiteExplorer/1.1b; +http://siteexplorer.info/Backlink-Checker-Spider/)" + ] + }, + { + "pattern": "elisabot", + "addition_date": "2013/06/27", + "instances": [] + }, + { + "pattern": "proximic", + "addition_date": "2013/09/12", + "url": "http://www.proximic.com/info/spider.php", + "instances": [ + "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com)", + "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php)" + ] + }, + { + "pattern": "changedetection", + "addition_date": "2013/09/13", + "url": "http://www.changedetection.com/bot.html", + "instances": [ + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://www.changedetection.com/bot.html )" + ] + }, + { + "pattern": "blexbot", + "addition_date": "2013/10/03", + "url": "http://webmeup-crawler.com/", + "instances": [] + }, + { + "pattern": "arabot", + "addition_date": "2013/10/09", + "instances": [] + }, + { + "pattern": "WeSEE:Search", + "addition_date": "2013/11/18", + "instances": [ + "WeSEE:Search", + "WeSEE:Search/0.1 (Alpha, http://www.wesee.com/en/support/bot/)" + ] + }, + { + "pattern": "niki-bot", + "addition_date": "2014/01/01", + "instances": [] + }, + { + "pattern": "CrystalSemanticsBot", + "addition_date": "2014/02/17", + "url": "http://www.crystalsemantics.com/user-agent/", + "instances": [] + }, + { + "pattern": "rogerbot", + "addition_date": "2014/02/28", + "url": "http://moz.com/help/pro/what-is-rogerbot-", + "instances": [ + "Mozilla/5.0 (compatible; rogerBot/1.0; UrlCrawler; http://www.seomoz.org/dp/rogerbot)", + "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+partager@moz.com)", + "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+shiny@moz.com)", + "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com", + "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com)", + "rogerbot/1.0 (http://www.moz.com/dp/rogerbot, rogerbot-crawler@moz.com)", + "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-crawler+shiny@seomoz.org)", + "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-crawler@seomoz.org)", + "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-wherecat@moz.com)", + "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr2-crawler-05@moz.com)", + "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr4-crawler-11@moz.com)", + "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr4-crawler-15@moz.com)", + "rogerbot/1.2 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+phaser-testing-crawler-01@moz.com)" + ] + }, + { + "pattern": "360Spider", + "addition_date": "2014/03/14", + "url": "http://needs-be.blogspot.co.uk/2013/02/how-to-block-spider360.html", + "instances": [ + "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1; 360Spider", + "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1; 360Spider(compatible; HaosouSpider; http://www.haosou.com/help/help_3_2.html)", + "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36 QIHU 360SE; 360Spider", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; ) Firefox/1.5.0.11; 360Spider", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.0.11) Firefox/1.5.0.11; 360Spider", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.0.11) Firefox/1.5.0.11 360Spider;", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.0.11) Gecko/20070312 Firefox/1.5.0.11; 360Spider", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); 360Spider", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); 360Spider(compatible; HaosouSpider; http://www.haosou.com/help/help_3_2.html)" + ] + }, + { + "pattern": "psbot", + "addition_date": "2014/03/31", + "url": "http://www.picsearch.com/bot.html", + "instances": [ + "psbot-image (+http://www.picsearch.com/bot.html)", + "psbot-page (+http://www.picsearch.com/bot.html)", + "psbot/0.1 (+http://www.picsearch.com/bot.html)" + ] + }, + { + "pattern": "InterfaxScanBot", + "addition_date": "2014/03/31", + "url": "http://scan-interfax.ru", + "instances": [] + }, + { + "pattern": "CC Metadata Scaper", + "addition_date": "2014/04/01", + "url": "http://wiki.creativecommons.org/Metadata_Scraper", + "instances": [ + "CC Metadata Scaper http://wiki.creativecommons.org/Metadata_Scraper" + ] + }, + { + "pattern": "g00g1e.net", + "addition_date": "2014/04/01", + "url": "http://www.g00g1e.net/", + "instances": [] + }, + { + "pattern": "GrapeshotCrawler", + "addition_date": "2014/04/01", + "url": "http://www.grapeshot.co.uk/crawler.php", + "instances": [ + "Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)" + ] + }, + { + "pattern": "urlappendbot", + "addition_date": "2014/05/10", + "url": "http://www.profound.net/urlappendbot.html", + "instances": [ + "Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)" + ] + }, + { + "pattern": "brainobot", + "addition_date": "2014/06/24", + "instances": [] + }, + { + "pattern": "fr-crawler", + "addition_date": "2014/07/31", + "instances": [ + "Mozilla/5.0 (compatible; fr-crawler/1.1)" + ] + }, + { + "pattern": "binlar", + "addition_date": "2014/09/12", + "instances": [ + "binlar_2.6.3 binlar2.6.3@unspecified.mail", + "binlar_2.6.3 binlar_2.6.3@unspecified.mail", + "binlar_2.6.3 larbin2.6.3@unspecified.mail", + "binlar_2.6.3 phanendra_kalapala@McAfee.com", + "binlar_2.6.3 test@mgmt.mic" + ] + }, + { + "pattern": "SimpleCrawler", + "addition_date": "2014/09/12", + "instances": [ + "SimpleCrawler/0.1" + ] + }, + { + "pattern": "Twitterbot", + "addition_date": "2014/09/12", + "url": "https://dev.twitter.com/cards/getting-started", + "instances": [ + "Twitterbot/0.1", + "Twitterbot/1.0" + ] + }, + { + "pattern": "cXensebot", + "addition_date": "2014/10/05", + "instances": [ + "cXensebot/1.1a" + ], + "url": "http://www.cxense.com/bot.html" + }, + { + "pattern": "smtbot", + "addition_date": "2014/10/04", + "instances": [ + "Mozilla/5.0 (compatible; SMTBot/1.0; +http://www.similartech.com/smtbot)", + "SMTBot (similartech.com/smtbot)" + ], + "url": "http://www.similartech.com/smtbot" + }, + { + "pattern": "bnf.fr_bot", + "addition_date": "2014/11/18", + "url": "http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html", + "instances": [ + "Mozilla/5.0 (compatible; bnf.fr_bot; +http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html)" + ] + }, + { + "pattern": "A6-Indexer", + "addition_date": "2014/12/05", + "url": "http://www.a6corp.com/a6-web-scraping-policy/", + "instances": [ + "A6-Indexer" + ] + }, + { + "pattern": "ADmantX", + "addition_date": "2014/12/05", + "url": "http://www.admantx.com", + "instances": [ + "ADmantX Platform Semantic Analyzer - ADmantX Inc. - www.admantx.com - support@admantx.com" + ] + }, + { + "pattern": "Facebot", + "url": "https://developers.facebook.com/docs/sharing/best-practices#crawl", + "addition_date": "2014/12/30", + "instances": [ + "Facebot/1.0" + ] + }, + { + "pattern": "OrangeBot", + "instances": [ + "Mozilla/5.0 (compatible; OrangeBot/2.0; support.orangebot@orange.com" + ], + "addition_date": "2015/01/12" + }, + { + "pattern": "memorybot", + "url": "http://mignify.com/bot.htm", + "instances": [ + "Mozilla/5.0 (compatible; memorybot/1.21.14 +http://mignify.com/bot.html)" + ], + "addition_date": "2015/02/01" + }, + { + "pattern": "AdvBot", + "url": "http://advbot.net/bot.html", + "instances": [ + "Mozilla/5.0 (compatible; AdvBot/2.0; +http://advbot.net/bot.html)" + ], + "addition_date": "2015/02/01" + }, + { + "pattern": "MegaIndex", + "url": "https://www.megaindex.ru/?tab=linkAnalyze", + "instances": [ + "Mozilla/5.0 (compatible; MegaIndex.ru/2.0; +https://www.megaindex.ru/?tab=linkAnalyze)" + ], + "addition_date": "2015/03/28" + }, + { + "pattern": "SemanticScholarBot", + "url": "http://s2.allenai.org/bot.html", + "instances": [ + "SemanticScholarBot/1.0 (+http://s2.allenai.org/bot.html)" + ], + "addition_date": "2015/03/28" + }, + { + "pattern": "ltx71", + "url": "http://ltx71.com/", + "instances": [ + "ltx71 - (http://ltx71.com/)" + ], + "addition_date": "2015/04/04" + }, + { + "pattern": "nerdybot", + "url": "http://nerdybot.com/", + "instances": [ + "nerdybot" + ], + "addition_date": "2015/04/05" + }, + { + "pattern": "xovibot", + "url": "http://www.xovibot.net/", + "instances": [ + "Mozilla/5.0 (compatible; XoviBot/2.0; +http://www.xovibot.net/)" + ], + "addition_date": "2015/04/05" + }, + { + "pattern": "BUbiNG", + "url": "http://law.di.unimi.it/BUbiNG.html", + "instances": [ + "BUbiNG (+http://law.di.unimi.it/BUbiNG.html)" + ], + "addition_date": "2015/04/06" + }, + { + "pattern": "Qwantify", + "url": "https://www.qwant.com/", + "instances": [ + "Mozilla/5.0 (compatible; Qwantify/2.0n; +https://www.qwant.com/)/*" + ], + "addition_date": "2015/04/06" + }, + { + "pattern": "archive.org_bot", + "url": "http://www.archive.org/details/archive.org_bot", + "instances": [ + "Mozilla/5.0 (compatible; archive.org_bot +http://www.archive.org/details/archive.org_bot)" + ], + "addition_date": "2015/04/14" + }, + { + "pattern": "Applebot", + "url": "http://www.apple.com/go/applebot", + "addition_date": "2015/04/15", + "instances": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1)", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)", + "Mozilla/5.0 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1; +http://www.apple.com/go/applebot)" + ] + }, + { + "pattern": "TweetmemeBot", + "url": "http://datasift.com/bot.html", + "instances": [ + "Mozilla/5.0 (TweetmemeBot/4.0; +http://datasift.com/bot.html) Gecko/20100101 Firefox/31.0" + ], + "addition_date": "2015/04/15" + }, + { + "pattern": "crawler4j", + "url": "https://github.com/yasserg/crawler4j", + "instances": [ + "crawler4j (http://code.google.com/p/crawler4j/)" + ], + "addition_date": "2015/05/07" + }, + { + "pattern": "findxbot", + "url": "http://www.findxbot.com", + "instances": [ + "Mozilla/5.0 (compatible; Findxbot/1.0; +http://www.findxbot.com)" + ], + "addition_date": "2015/05/07" + }, + { + "pattern": "S[eE][mM]rushBot", + "url": "http://www.semrush.com/bot.html", + "instances": [ + "Mozilla/5.0 (compatible; SemrushBot/0.98~bl; +http://www.semrush.com/bot.html)", + "SEMrushBot" + ], + "addition_date": "2015/05/26" + }, + { + "pattern": "yoozBot", + "url": "http://yooz.ir", + "instances": [ + "Mozilla/5.0 (compatible; yoozBot-2.2; http://yooz.ir; info@yooz.ir)" + ], + "addition_date": "2015/05/26" + }, + { + "pattern": "lipperhey", + "url": "http://www.lipperhey.com/", + "instances": [ + "Mozilla/5.0 (compatible; Lipperhey Link Explorer; http://www.lipperhey.com/)", + "Mozilla/5.0 (compatible; Lipperhey SEO Service; http://www.lipperhey.com/)", + "Mozilla/5.0 (compatible; Lipperhey Site Explorer; http://www.lipperhey.com/)", + "Mozilla/5.0 (compatible; Lipperhey-Kaus-Australis/5.0; +https://www.lipperhey.com/en/about/)" + ], + "addition_date": "2015/08/26" + }, + { + "pattern": "Y!J-ASR", + "url": "http://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/", + "instances": [ + "Y!J-ASR/0.1 crawler (http://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/)" + ], + "addition_date": "2015/05/26" + }, + { + "pattern": "Domain Re-Animator Bot", + "url": "http://domainreanimator.com", + "instances": [ + "Domain Re-Animator Bot (http://domainreanimator.com) - support@domainreanimator.com" + ], + "addition_date": "2015/04/14" + }, + { + "pattern": "AddThis", + "url": "https://www.addthis.com", + "instances": [ + "AddThis.com robot tech.support@clearspring.com" + ], + "addition_date": "2015/06/02" + }, + { + "pattern": "Screaming Frog SEO Spider", + "url": "http://www.screamingfrog.co.uk/seo-spider", + "instances": [ + "Screaming Frog SEO Spider/5.1" + ], + "addition_date": "2016/01/08" + }, + { + "pattern": "MetaURI", + "url": "http://www.useragentstring.com/MetaURI_id_17683.php", + "instances": [ + "MetaURI API/2.0 +metauri.com" + ], + "addition_date": "2016/01/02" + }, + { + "pattern": "Scrapy", + "url": "http://scrapy.org/", + "instances": [ + "Scrapy/1.0.3 (+http://scrapy.org)" + ], + "addition_date": "2016/01/02" + }, + { + "pattern": "Livelap[bB]ot", + "url": "http://site.livelap.com/crawler", + "instances": [ + "LivelapBot/0.2 (http://site.livelap.com/crawler)", + "Livelapbot/0.1" + ], + "addition_date": "2016/01/02" + }, + { + "pattern": "OpenHoseBot", + "url": "http://www.openhose.org/bot.html", + "instances": [ + "Mozilla/5.0 (compatible; OpenHoseBot/2.1; +http://www.openhose.org/bot.html)" + ], + "addition_date": "2016/01/02" + }, + { + "pattern": "CapsuleChecker", + "url": "http://www.capsulink.com/about", + "instances": [ + "CapsuleChecker (http://www.capsulink.com/)" + ], + "addition_date": "2016/01/02" + }, + { + "pattern": "collection@infegy.com", + "url": "http://infegy.com/", + "instances": [ + "Mozilla/5.0 (compatible) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36 collection@infegy.com" + ], + "addition_date": "2016/01/03" + }, + { + "pattern": "IstellaBot", + "url": "http://www.tiscali.it/", + "instances": [ + "Mozilla/5.0 (compatible; IstellaBot/1.23.15 +http://www.tiscali.it/)" + ], + "addition_date": "2016/01/09" + }, + { + "pattern": "DeuSu\\/", + "addition_date": "2016/01/23", + "url": "https://deusu.de/robot.html", + "instances": [ + "Mozilla/5.0 (compatible; DeuSu/0.1.0; +https://deusu.org)", + "Mozilla/5.0 (compatible; DeuSu/5.0.2; +https://deusu.de/robot.html)" + ] + }, + { + "pattern": "betaBot", + "addition_date": "2016/01/23", + "instances": [] + }, + { + "pattern": "Cliqzbot\\/", + "addition_date": "2016/01/23", + "url": "http://cliqz.com/company/cliqzbot", + "instances": [ + "Cliqzbot/0.1 (+http://cliqz.com +cliqzbot@cliqz.com)", + "Cliqzbot/0.1 (+http://cliqz.com/company/cliqzbot)", + "Mozilla/5.0 (compatible; Cliqzbot/0.1 +http://cliqz.com/company/cliqzbot)", + "Mozilla/5.0 (compatible; Cliqzbot/1.0 +http://cliqz.com/company/cliqzbot)" + ] + }, + { + "pattern": "MojeekBot\\/", + "addition_date": "2016/01/23", + "url": "https://www.mojeek.com/bot.html", + "instances": [ + "MojeekBot/0.2 (archi; http://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html#relaunch)", + "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.5; http://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.6; +https://www.mojeek.com/bot.html)", + "Mozilla/5.0 (compatible; MojeekBot/0.6; http://www.mojeek.com/bot.html)" + ] + }, + { + "pattern": "netEstate NE Crawler", + "addition_date": "2016/01/23", + "url": "+http://www.website-datenbank.de/", + "instances": [ + "netEstate NE Crawler (+http://www.sengine.info/)", + "netEstate NE Crawler (+http://www.website-datenbank.de/)" + ] + }, + { + "pattern": "SafeSearch microdata crawler", + "addition_date": "2016/01/23", + "url": "https://safesearch.avira.com", + "instances": [ + "SafeSearch microdata crawler (https://safesearch.avira.com, safesearch-abuse@avira.com)" + ] + }, + { + "pattern": "Gluten Free Crawler\\/", + "addition_date": "2016/01/23", + "url": "http://glutenfreepleasure.com/", + "instances": [ + "Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/)" + ] + }, + { + "pattern": "Sonic", + "addition_date": "2016/02/08", + "url": "http://www.yama.info.waseda.ac.jp/~crawler/info.html", + "instances": [ + "Mozilla/5.0 (compatible; RankSonicSiteAuditor/1.0; +https://ranksonic.com/ranksonic_sab.html)", + "Mozilla/5.0 (compatible; Sonic/1.0; http://www.yama.info.waseda.ac.jp/~crawler/info.html)", + "Mozzila/5.0 (compatible; Sonic/1.0; http://www.yama.info.waseda.ac.jp/~crawler/info.html)" + ] + }, + { + "pattern": "Sysomos", + "addition_date": "2016/02/08", + "url": "http://www.sysomos.com", + "instances": [ + "Mozilla/5.0 (compatible; Sysomos/1.0; +http://www.sysomos.com/; Sysomos)" + ] + }, + { + "pattern": "Trove", + "addition_date": "2016/02/08", + "url": "http://www.trove.com", + "instances": [] + }, + { + "pattern": "deadlinkchecker", + "addition_date": "2016/02/08", + "url": "http://www.deadlinkchecker.com", + "instances": [ + "www.deadlinkchecker.com Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36", + "www.deadlinkchecker.com XMLHTTP/1.0", + "www.deadlinkchecker.com XMLHTTP/1.0 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36" + ] + }, + { + "pattern": "Slack-ImgProxy", + "addition_date": "2016/04/25", + "url": "https://api.slack.com/robots", + "instances": [ + "Slack-ImgProxy (+https://api.slack.com/robots)", + "Slack-ImgProxy 0.59 (+https://api.slack.com/robots)", + "Slack-ImgProxy 0.66 (+https://api.slack.com/robots)", + "Slack-ImgProxy 1.106 (+https://api.slack.com/robots)", + "Slack-ImgProxy 1.138 (+https://api.slack.com/robots)", + "Slack-ImgProxy 149 (+https://api.slack.com/robots)" + ] + }, + { + "pattern": "Embedly", + "addition_date": "2016/04/25", + "url": "http://support.embed.ly", + "instances": [ + "Embedly +support@embed.ly", + "Mozilla/5.0 (compatible; Embedly/0.2; +http://support.embed.ly/)", + "Mozilla/5.0 (compatible; Embedly/0.2; snap; +http://support.embed.ly/)" + ] + }, + { + "pattern": "RankActiveLinkBot", + "addition_date": "2016/06/20", + "url": "https://rankactive.com/resources/rankactive-linkbot", + "instances": [ + "Mozilla/5.0 (compatible; RankActiveLinkBot; +https://rankactive.com/resources/rankactive-linkbot)" + ] + }, + { + "pattern": "iskanie", + "addition_date": "2016/09/02", + "url": "http://www.iskanie.com", + "instances": [ + "iskanie (+http://www.iskanie.com)" + ] + }, + { + "pattern": "SafeDNSBot", + "addition_date": "2016/09/10", + "url": "https://www.safedns.com/searchbot", + "instances": [ + "SafeDNSBot (https://www.safedns.com/searchbot)" + ] + }, + { + "pattern": "SkypeUriPreview", + "addition_date": "2016/10/10", + "instances": [ + "Mozilla/5.0 (Windows NT 6.1; WOW64) SkypeUriPreview Preview/0.5" + ] + }, + { + "pattern": "Veoozbot", + "addition_date": "2016/11/03", + "url": "http://www.veooz.com/veoozbot.html", + "instances": [ + "Mozilla/5.0 (compatible; Veoozbot/1.0; +http://www.veooz.com/veoozbot.html)" + ] + }, + { + "pattern": "Slackbot", + "addition_date": "2016/11/03", + "url": "https://api.slack.com/robots", + "instances": [ + "Slackbot-LinkExpanding (+https://api.slack.com/robots)", + "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)" + ] + }, + { + "pattern": "redditbot", + "addition_date": "2016/11/03", + "url": "http://www.reddit.com/feedback", + "instances": [ + "Mozilla/5.0 (compatible; redditbot/1.0; +http://www.reddit.com/feedback)" + ] + }, + { + "pattern": "datagnionbot", + "addition_date": "2016/11/03", + "url": "http://www.datagnion.com/bot.html", + "instances": [ + "datagnionbot (+http://www.datagnion.com/bot.html)" + ] + }, + { + "pattern": "Google-Adwords-Instant", + "addition_date": "2016/11/03", + "url": "http://www.google.com/adsbot.html", + "instances": [ + "Google-Adwords-Instant (+http://www.google.com/adsbot.html)" + ] + }, + { + "pattern": "adbeat_bot", + "addition_date": "2016/11/04", + "instances": [ + "Mozilla/5.0 (compatible; adbeat_bot; +support@adbeat.com; support@adbeat.com)", + "adbeat_bot" + ] + }, + { + "pattern": "WhatsApp", + "addition_date": "2016/11/15", + "url": "https://www.whatsapp.com/", + "instances": [ + "WhatsApp", + "WhatsApp/2.12.15/i", + "WhatsApp/2.12.16/i", + "WhatsApp/2.12.17/i", + "WhatsApp/2.12.449 A", + "WhatsApp/2.12.453 A", + "WhatsApp/2.12.510 A", + "WhatsApp/2.12.540 A", + "WhatsApp/2.12.548 A", + "WhatsApp/2.12.555 A", + "WhatsApp/2.12.556 A", + "WhatsApp/2.16.1/i", + "WhatsApp/2.16.13 A", + "WhatsApp/2.16.2/i", + "WhatsApp/2.16.42 A", + "WhatsApp/2.16.57 A" + ] + }, + { + "pattern": "contxbot", + "addition_date": "2017/02/25", + "instances": [ + "Mozilla/5.0 (compatible;contxbot/1.0)" + ] + }, + { + "pattern": "pinterest", + "addition_date": "2017/03/03", + "instances": [ + "Pinterest/0.2 (+http://www.pinterest.com/bot.html)" + ], + "url": "http://www.pinterest.com/bot.html" + }, + { + "pattern": "electricmonk", + "addition_date": "2017/03/04", + "instances": [ + "Mozilla/5.0 (compatible; electricmonk/3.2.0 +https://www.duedil.com/our-crawler/)" + ], + "url": "https://www.duedil.com/our-crawler/" + }, + { + "pattern": "GarlikCrawler", + "addition_date": "2017/03/18", + "instances": [ + "GarlikCrawler/1.2 (http://garlik.com/, crawler@garlik.com)" + ], + "url": "http://garlik.com/" + }, + { + "pattern": "BingPreview\\/", + "addition_date": "2017/04/23", + "url": "https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0", + "instances": [ + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b", + "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0; BingPreview/1.0b) like Gecko", + "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0; WOW64; Trident/6.0; BingPreview/1.0b)", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; WOW64; Trident/5.0; BingPreview/1.0b)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 BingPreview/1.0b" + ] + }, + { + "pattern": "vebidoobot", + "addition_date": "2017/05/08", + "instances": [ + "Mozilla/5.0 (compatible; vebidoobot/1.0; +https://blog.vebidoo.de/vebidoobot/" + ], + "url": "https://blog.vebidoo.de/vebidoobot/" + }, + { + "pattern": "FemtosearchBot", + "addition_date": "2017/05/16", + "instances": [ + "Mozilla/5.0 (compatible; FemtosearchBot/1.0; http://femtosearch.com)" + ], + "url": "http://femtosearch.com" + }, + { + "pattern": "Yahoo Link Preview", + "addition_date": "2017/06/28", + "instances": [ + "Mozilla/5.0 (compatible; Yahoo Link Preview; https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html)" + ], + "url": "https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html" + }, + { + "pattern": "MetaJobBot", + "addition_date": "2017/08/16", + "instances": [ + "Mozilla/5.0 (compatible; MetaJobBot; http://www.metajob.de/crawler)" + ], + "url": "http://www.metajob.de/the/crawler" + }, + { + "pattern": "DomainStatsBot", + "addition_date": "2017/08/16", + "instances": [ + "DomainStatsBot/1.0 (http://domainstats.io/our-bot)" + ], + "url": "http://domainstats.io/our-bot" + }, + { + "pattern": "mindUpBot", + "addition_date": "2017/08/16", + "instances": [ + "mindUpBot (datenbutler.de)" + ], + "url": "http://www.datenbutler.de/" + }, + { + "pattern": "Daum", + "addition_date": "2017/08/16", + "instances": [ + "Mozilla/5.0 (compatible; Daum/4.1; +http://cs.daum.net/faq/15/4118.html?faqId=28966)" + ], + "url": "http://cs.daum.net/faq/15/4118.html?faqId=28966" + }, + { + "pattern": "Jugendschutzprogramm-Crawler", + "addition_date": "2017/08/16", + "instances": [ + "Jugendschutzprogramm-Crawler; Info: http://www.jugendschutzprogramm.de" + ], + "url": "http://www.jugendschutzprogramm.de" + }, + { + "pattern": "Xenu Link Sleuth", + "addition_date": "2017/08/19", + "instances": [ + "Xenu Link Sleuth/1.3.8" + ], + "url": "http://home.snafu.de/tilman/xenulink.html" + }, + { + "pattern": "Pcore-HTTP", + "addition_date": "2017/08/19", + "instances": [ + "Pcore-HTTP/v0.40.3" + ], + "url": "https://bitbucket.org/softvisio/pcore/overview" + }, + { + "pattern": "moatbot", + "addition_date": "2017/09/16", + "instances": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36 moatbot", + "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4 moatbot" + ], + "url": "https://moat.com" + }, + { + "pattern": "KosmioBot", + "addition_date": "2017/09/16", + "instances": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.125 Safari/537.36 (compatible; KosmioBot/1.0; +http://kosm.io/bot.html)" + ], + "url": "http://kosm.io/bot.html" + }, + { + "pattern": "Pingdom", + "addition_date": "2017/09/16", + "instances": [ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/59.0.3071.109 Chrome/59.0.3071.109 Safari/537.36 PingdomPageSpeed/1.0 (pingbot/2.0; +http://www.pingdom.com/)" + ], + "url": "http://www.pingdom.com" + }, + { + "pattern": "PhantomJS", + "addition_date": "2017/09/18", + "instances": [ + "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1 bl.uk_lddc_renderbot/2.0.0 (+ http://www.bl.uk/aboutus/legaldeposit/websites/websites/faqswebmaster/index.html)" + ], + "url": "http://phantomjs.org/" + }, + { + "pattern": "Gowikibot", + "addition_date": "2017/10/26", + "instances": [ + "Mozilla/5.0 (compatible; Gowikibot/1.0; +http://www.gowikibot.com)" + ], + "url": "http://www.gowikibot.com" + }, + { + "pattern": "PiplBot", + "addition_date": "2017/10/30", + "instances": [ + "Mozilla/5.0+(compatible;+PiplBot;+http://www.pipl.com/bot/)" + ], + "url": "http://www.pipl.com/bot/" + }, + { + "pattern": "Discordbot", + "addition_date": "2017/09/22", + "url": "https://discordapp.com", + "instances": [ + "Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)" + ] + }, + { + "pattern": "TelegramBot", + "addition_date": "2017/10/01", + "instances": [ + "TelegramBot (like TwitterBot)" + ] + }, + { + "pattern": "InfoPath.2", + "addition_date": "2017/10/07", + "instances": [ + "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; InfoPath.2)" + ] + }, + { + "pattern": "Jetslide", + "addition_date": "2017/09/27", + "url": "http://jetsli.de/crawler", + "instances": [ + "Mozilla/5.0 (compatible; Jetslide; +http://jetsli.de/crawler)" + ] + }, + { + "pattern": "newsharecounts", + "addition_date": "2017/09/30", + "url": "http://newsharecounts.com/crawler", + "instances": [ + "Mozilla/5.0 (compatible; NewShareCounts.com/1.0; +http://newsharecounts.com/crawler)" + ] + }, + { + "pattern": "James BOT", + "addition_date": "2017/10/12", + "url": "http://cognitiveseo.com/bot.html", + "instances": [ + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6 - James BOT - WebCrawler http://cognitiveseo.com/bot.html" + ] + }, + { + "pattern": "Barkrowler", + "addition_date": "2017/10/09", + "url": "http://www.exensa.com/crawl", + "instances": [ + "Barkrowler/0.5.1 (experimenting / debugging - sorry for your logs ) http://www.exensa.com/crawl - admin@exensa.com -- based on BuBiNG" + ] + }, + { + "pattern": "TinEye", + "addition_date": "2017/10/14", + "url": "http://www.tineye.com/crawler.html", + "instances": [ + "Mozilla/5.0 (compatible; TinEye-bot/1.31; +http://www.tineye.com/crawler.html)", + "TinEye/1.1 (http://tineye.com/crawler.html)" + ] + }, + { + "pattern": "SocialRankIOBot", + "addition_date": "2017/10/19", + "url": "http://socialrank.io/about", + "instances": [ + "SocialRankIOBot; http://socialrank.io/about" + ] + }, + { + "pattern": "trendictionbot", + "addition_date": "2017/10/30", + "url": "http://www.trendiction.de/bot", + "instances": [ + "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.0; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20071127 Firefox/3.0.0.11" + ] + }, + { + "pattern": "Ocarinabot", + "addition_date": "2017/09/27", + "instances": [ + "Ocarinabot" + ] + }, + { + "pattern": "epicbot", + "addition_date": "2017/10/31", + "url": "http://www.epictions.com/epicbot", + "instances": [ + "Mozilla/5.0 (compatible; epicbot; +http://www.epictions.com/epicbot)" + ] + }, + { + "pattern": "Primalbot", + "addition_date": "2017/09/27", + "url": "https://www.primal.com", + "instances": [ + "Mozilla/5.0 (compatible; Primalbot; +https://www.primal.com;)" + ] + }, + { + "pattern": "DuckDuckGo-Favicons-Bot", + "addition_date": "2017/10/06", + "url": "http://duckduckgo.com", + "instances": [ + "Mozilla/5.0 (compatible; DuckDuckGo-Favicons-Bot/1.0; +http://duckduckgo.com)" + ] + }, + { + "pattern": "GnowitNewsbot", + "addition_date": "2017/10/30", + "url": "http://www.gnowit.com", + "instances": [ + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0 / GnowitNewsbot / Contact information at http://www.gnowit.com" + ] + }, + { + "pattern": "Leikibot", + "addition_date": "2017/09/24", + "url": "http://www.leiki.com", + "instances": [ + "Mozilla/5.0 (Windows NT 6.3;compatible; Leikibot/1.0; +http://www.leiki.com)" + ] + }, + { + "pattern": "LinkArchiver", + "addition_date": "2017/09/24", + "instances": [ + "@LinkArchiver twitter bot" + ] + }, + { + "pattern": "YaK", + "addition_date": "2017/09/25", + "url": "http://linkfluence.com", + "instances": [ + "Mozilla/5.0 (compatible; YaK/1.0; http://linkfluence.com/; bot@linkfluence.com)" + ] + }, + { + "pattern": "PaperLiBot", + "addition_date": "2017/09/25", + "url": "http://support.paper.li/entries/20023257-what-is-paper-li", + "instances": [ + "Mozilla/5.0 (compatible; PaperLiBot/2.1; http://support.paper.li/entries/20023257-what-is-paper-li)" + ] + }, + { + "pattern": "Digg Deeper", + "addition_date": "2017/09/26", + "url": "http://digg.com/about", + "instances": [ + "Digg Deeper/v1 (http://digg.com/about)" + ] + }, + { + "pattern": "dcrawl", + "addition_date": "2017/09/22", + "instances": [ + "dcrawl/1.0" + ] + }, + { + "pattern": "Snacktory", + "addition_date": "2017/09/23", + "url": "https://github.com/karussell/snacktory", + "instances": [ + "Mozilla/5.0 (compatible; Snacktory; +https://github.com/karussell/snacktory)" + ] + }, + { + "pattern": "AndersPinkBot", + "addition_date": "2017/09/24", + "url": "http://anderspink.com/bot.html", + "instances": [ + "Mozilla/5.0 (compatible; AndersPinkBot/1.0; +http://anderspink.com/bot.html)" + ] + }, + { + "pattern": "Fyrebot", + "addition_date": "2017/09/22", + "instances": [ + "Fyrebot/1.0" + ] + }, + { + "pattern": "EveryoneSocialBot", + "addition_date": "2017/09/22", + "url": "http://everyonesocial.com", + "instances": [ + "Mozilla/5.0 (compatible; EveryoneSocialBot/1.0; support@everyonesocial.com http://everyonesocial.com/)" + ] + }, + { + "pattern": "Mediatoolkitbot", + "addition_date": "2017/10/06", + "url": "http://mediatoolkit.com", + "instances": [ + "Mediatoolkitbot (complaints@mediatoolkit.com)" + ] + }, + { + "pattern": "Luminator-robots", + "addition_date": "2017/09/22", + "instances": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/30.0.1599.66 Safari/537.13 Luminator-robots/2.0" + ] + }, + { + "pattern": "ExtLinksBot", + "addition_date": "2017/11/02", + "url": "https://extlinks.com/Bot.html", + "instances": [ + "Mozilla/5.0 (compatible; ExtLinksBot/1.5 +https://extlinks.com/Bot.html)" + ] + }, + { + "pattern": "SurveyBot", + "addition_date": "2017/11/02", + "instances": [ + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.13) Gecko/2009073022 Firefox/3.5.2 (.NET CLR 3.5.30729) SurveyBot/2.3 (DomainTools)" + ] + }, + { + "pattern": "NING", + "addition_date": "2017/11/02", + "instances": [ + "NING/1.0" + ] + }, + { + "pattern": "okhttp", + "addition_date": "2017/11/02", + "instances": [ + "okhttp/2.5.0" + ] + }, + { + "pattern": "Nuzzel", + "addition_date": "2017/11/02", + "instances": [ + "Nuzzel" + ] + }, + { + "pattern": "omgili", + "addition_date": "2017/11/02", + "url": "http://omgili.com", + "instances": [ + "omgili/0.5 +http://omgili.com" + ] + }, + { + "pattern": "PocketParser", + "addition_date": "2017/11/02", + "url": "https://getpocket.com/pocketparser_ua", + "instances": [ + "PocketParser/2.0 (+https://getpocket.com/pocketparser_ua)" + ] + }, + { + "pattern": "YisouSpider", + "addition_date": "2017/11/02", + "instances": [ + "YisouSpider" + ] + }, + { + "pattern": "um-LN", + "addition_date": "2017/11/02", + "instances": [ + "Mozilla/5.0 (compatible; um-LN/1.0; mailto: techinfo@ubermetrics-technologies.com)" + ] + }, + { + "pattern": "ToutiaoSpider", + "addition_date": "2017/11/02", + "url": "http://web.toutiao.com/media_cooperation/", + "instances": [ + "Mozilla/5.0 (compatible; ToutiaoSpider/1.0; http://web.toutiao.com/media_cooperation/;)" + ] + }, + { + "pattern": "MuckRack", + "addition_date": "2017/11/02", + "url": "http://muckrack.com", + "instances": [ + "Mozilla/5.0 (compatible; MuckRack/1.0; +http://muckrack.com)" + ] + }, + { + "pattern": "Jamie's Spider", + "addition_date": "2017/11/02", + "url": "http://jamiembrown.com/", + "instances": [ + "Jamie's Spider (http://jamiembrown.com/)" + ] + }, + { + "pattern": "AHC", + "addition_date": "2017/11/02", + "instances": [ + "AHC/2.0" + ] + }, + { + "pattern": "NetcraftSurveyAgent", + "addition_date": "2017/11/02", + "instances": [ + "Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)" + ] + }, + { + "pattern": "Laserlikebot", + "addition_date": "2017/11/02", + "instances": [ + "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Laserlikebot/0.1)" + ] + }, + { + "pattern": "Apache-HttpClient", + "addition_date": "2017/11/02", + "instances": [ + "Apache-HttpClient/4.4.1 (Java/1.8.0_65)" + ] + }, + { + "pattern": "AppEngine-Google", + "addition_date": "2017/11/02", + "instances": [ + "AppEngine-Google; (+http://code.google.com/appengine; appid: example)" + ] + }, + { + "pattern": "Jetty", + "addition_date": "2017/11/02", + "instances": [ + "Jetty/9.3.z-SNAPSHOT" + ] + }, + { + "pattern": "Upflow", + "addition_date": "2017/11/02", + "instances": [ + "Upflow/1.0" + ] + }, + { + "pattern": "Thinklab", + "addition_date": "2017/11/02", + "url": "thinklab.com", + "instances": [ + "Thinklab (thinklab.com)" + ] + }, + { + "pattern": "Traackr.com", + "addition_date": "2017/11/02", + "url": "Traackr.com", + "instances": [ + "Traackr.com" + ] + }, + { + "pattern": "Twurly", + "addition_date": "2017/11/02", + "url": "http://twurly.org", + "instances": [ + "Ruby, Twurly v1.1 (http://twurly.org)" + ] + }, + { + "pattern": "Mastodon", + "addition_date": "2017/11/02", + "instances": [ + "http.rb/2.2.2 (Mastodon/1.5.1; +https://example-masto-instance.org/)" + ] + }, + { + "pattern": "http_get", + "addition_date": "2017/11/02", + "instances": [ + "http_get" + ] + }, + { + "pattern": "DnyzBot", + "addition_date": "2017/11/20", + "instances": [ + "Mozilla/5.0 (compatible; DnyzBot/1.0)" + ] + }, + { + "pattern": "botify", + "addition_date": "2018/02/01", + "instances": [ + "Mozilla/5.0 (compatible; botify; http://botify.com)" + ] + }, + { + "pattern": "007ac9 Crawler", + "addition_date": "2018/02/09", + "instances": [ + "Mozilla/5.0 (compatible; 007ac9 Crawler; http://crawler.007ac9.net/)" + ] + }, + { + "pattern": "BehloolBot", + "addition_date": "2018/02/09", + "instances": [ + "Mozilla/5.0 (compatible; BehloolBot/beta; +http://www.webeaver.com/bot)" + ] + }, + { + "pattern": "BrandVerity", + "addition_date": "2018/02/27", + "instances": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:41.0) Gecko/20100101 Firefox/55.0 BrandVerity/1.0 (http://www.brandverity.com/why-is-brandverity-visiting-me)" + ] + }, + { + "pattern": "check_http", + "addition_date": "2018/02/09", + "instances": [ + "check_http/v2.2.1 (nagios-plugins 2.2.1)" + ] + }, + { + "pattern": "BDCbot", + "addition_date": "2018/02/09", + "instances": [ + "Mozilla/5.0 (Windows NT 6.1; compatible; BDCbot/1.0; +http://bigweb.bigdatacorp.com.br/faq.aspx) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36" + ] + }, + { + "pattern": "ZumBot", + "addition_date": "2018/02/09", + "instances": [ + "Mozilla/5.0 (compatible; ZumBot/1.0; http://help.zum.com/inquiry)" + ] + }, + { + "pattern": "EZID", + "addition_date": "2018/02/09", + "instances": [ + "EZID (EZID link checker; https://ezid.cdlib.org/)" + ] + }, + { + "pattern": "ICC-Crawler", + "addition_date": "2018/02/28", + "instances": [ + "ICC-Crawler/2.0 (Mozilla-compatible; ; http://ucri.nict.go.jp/en/icccrawler.html)" + ], + "url": "http://ucri.nict.go.jp/en/icccrawler.html" + }, + { + "pattern": "ArchiveBot", + "addition_date": "2018/02/28", + "instances": [ + "ArchiveTeam ArchiveBot/20170106.02 (wpull 2.0.2)" + ], + "url": "https://github.com/ArchiveTeam/ArchiveBot" + }, + { + "pattern": "LCC", + "addition_date": "2018/02/28", + "instances": [ + "LCC (+http://corpora.informatik.uni-leipzig.de/crawler_faq.html)" + ], + "url": "http://corpora.informatik.uni-leipzig.de/crawler_faq.html" + }, + { + "pattern": "filterdb.iss.net\\/crawler", + "addition_date": "2018/03/16", + "instances": [ + "Mozilla/5.0 (compatible; oBot/2.3.1; +http://filterdb.iss.net/crawler/)" + ], + "url": "http://filterdb.iss.net/crawler/" + }, + { + "pattern": "BLP_bbot", + "addition_date": "2018/03/27", + "instances": [ + "BLP_bbot/0.1" + ] + }, + { + "pattern": "BomboraBot", + "addition_date": "2018/03/27", + "instances": [ + "Mozilla/5.0 (compatible; BomboraBot/1.0; +http://www.bombora.com/bot)" + ], + "url": "http://www.bombora.com/bot" + }, + { + "pattern": "Buck\\/", + "addition_date": "2018/03/27", + "instances": [ + "Buck/2.2; (+https://app.hypefactors.com/media-monitoring/about.html)" + ], + "url": "https://app.hypefactors.com/media-monitoring/about.html" + }, + { + "pattern": "Companybook-Crawler", + "addition_date": "2018/03/27", + "instances": [ + "Companybook-Crawler (+https://www.companybooknetworking.com/)" + ], + "url": "https://www.companybooknetworking.com/" + }, + { + "pattern": "Genieo", + "addition_date": "2018/03/27", + "instances": [ + "Mozilla/5.0 (compatible; Genieo/1.0 http://www.genieo.com/webfilter.html)" + ], + "url": "http://www.genieo.com/webfilter.html" + }, + { + "pattern": "magpie-crawler", + "addition_date": "2018/03/27", + "instances": [ + "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)" + ], + "url": "http://www.brandwatch.net" + }, + { + "pattern": "MeltwaterNews", + "addition_date": "2018/03/27", + "instances": [ + "MeltwaterNews www.meltwater.com" + ], + "url": "http://www.meltwater.com" + }, + { + "pattern": "Moreover", + "addition_date": "2018/03/27", + "instances": [ + "Mozilla/5.0 Moreover/5.1 (+http://www.moreover.com)" + ], + "url": "http://www.moreover.com" + }, + { + "pattern": "newspaper\\/", + "addition_date": "2018/03/27", + "instances": [ + "newspaper/0.2.5", + "newspaper/0.2.6", + "newspaper/0.1.0.7" + ] + }, + { + "pattern": "ScoutJet", + "addition_date": "2018/03/27", + "instances": [ + "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)" + ], + "url": "http://www.scoutjet.com/" + }, + { + "pattern": "sentry\\/", + "addition_date": "2018/03/27", + "instances": [ + "sentry/8.22.0 (https://sentry.io)" + ], + "url": "https://sentry.io" + }, + { + "pattern": "StorygizeBot", + "addition_date": "2018/03/27", + "instances": [ + "Mozilla/5.0 (compatible; StorygizeBot; http://www.storygize.com)" + ], + "url": "http://www.storygize.com" + }, + { + "pattern": "UptimeRobot", + "addition_date": "2018/03/27", + "instances": [ + "Mozilla/5.0+(compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)" + ], + "url": "http://www.uptimerobot.com/" + }, + { + "pattern": "OutclicksBot", + "addition_date": "2018/04/21", + "instances": [ + "OutclicksBot/2 +https://www.outclicks.net/agent/VjzDygCuk4ubNmg40ZMbFqT0sIh7UfOKk8s8ZMiupUR", + "OutclicksBot/2 +https://www.outclicks.net/agent/gIYbZ38dfAuhZkrFVl7sJBFOUhOVct6J1SvxgmBZgCe", + "OutclicksBot/2 +https://www.outclicks.net/agent/PryJzTl8POCRHfvEUlRN5FKtZoWDQOBEvFJ2wh6KH5J" + ], + "url": "https://www.outclicks.net" + } +] diff --git a/lib/gatheranalytics.php b/lib/gatheranalytics.php index 279eb66..25c2e7b 100644 --- a/lib/gatheranalytics.php +++ b/lib/gatheranalytics.php @@ -21,6 +21,13 @@ if (!$database->has("settings", ["AND" => ["siteid" => getsiteid(), "key" => "an throw new Exception("Do-Not-Track header detected, skipping analytics"); } + $bots = json_decode(file_get_contents(__DIR__ . "/bots.json"), true); + foreach ($bots as $bot) { + if (preg_match('/' . $bot['pattern'] . '/', $_SERVER['HTTP_USER_AGENT'])) { + throw new Exception("Bot/crawler detected, skipping analytics"); + } + } + $time = date("Y-m-d H:i:s"); /**