regexes.bots.yml Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of device-detector Show documentation
Show all versions of device-detector Show documentation
The Universal Device Detection library that parses User Agents and detects devices (desktop, tablet, mobile, tv, cars, console, etc.), clients (browsers, feed readers, media players, PIMs, ...), operating systems, brands and models.
###############
# Device Detector - The Universal Device Detection library for parsing User Agents
#
# @link http://piwik.org
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
###############
- regex: '360Spider(-Image|-Video)?'
name: '360Spider'
category: 'Search bot'
url: 'http://www.so.com/help/help_3_2.html'
producer:
name: 'Online Media Group, Inc.'
url: ''
- regex: 'Aboundex'
name: 'Aboundexbot'
category: 'Search bot'
url: 'http://www.aboundex.com/crawler/'
producer:
name: 'Aboundex.com'
url: 'http://www.aboundex.com'
- regex: 'AcoonBot'
name: 'Acoon'
category: 'Search bot'
url: 'http://www.acoon.de/robot.asp'
producer:
name: 'Acoon GmbH'
url: 'http://www.acoon.de'
- regex: 'AddThis\.com'
name: 'AddThis.com'
category: 'Social Media Agent'
url: ''
producer:
name: 'Clearspring Technologies, Inc.'
url: 'http://www.clearspring.com'
- regex: 'AhrefsBot'
name: 'aHrefs Bot'
category: 'Crawler'
url: 'http://ahrefs.com/robot'
producer:
name: 'Ahrefs Pte Ltd'
url: 'http://ahrefs.com/robot'
- regex: 'ia_archiver|alexabot|verifybot'
name: 'Alexa Crawler'
category: 'Search bot'
url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
producer:
name: 'Alexa Internet'
url: 'http://www.alexa.com'
- regex: 'alexa site audit'
name: 'Alexa Site Audit'
category: 'Site Monitor'
url: 'http://www.alexa.com/help/webmasters'
producer:
name: 'Alexa Internet'
url: 'http://www.alexa.com'
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
name: 'Amazon Route53 Health Check'
category: 'Service Agent'
producer:
name: 'Amazon Web Services'
url: 'https://aws.amazon.com/'
- regex: 'AmorankSpider'
name: 'Amorank Spider'
category: 'Crawler'
url: 'http://amorank.com/webcrawler.html'
producer:
name: 'Amorank'
url: 'http://www.amorank.com'
- regex: 'ApacheBench'
name: 'ApacheBench'
category: 'Benchmark'
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
producer:
name: 'The Apache Software Foundation'
url: 'http://www.apache.org/foundation/'
- regex: 'Applebot'
name: 'Applebot'
category: 'Crawler'
url: 'http://www.apple.com/go/applebot'
producer:
name: 'Apple Inc'
url: 'http://www.apple.com'
- regex: 'Arachni'
name: 'Arachni'
category: 'Security Checker'
url: 'http://www.arachni-scanner.com'
producer:
name: 'Sarosys LLC'
url: 'http://www.sarosys.com/'
- regex: 'Castro 2, Episode Duration Lookup'
name: 'Castro 2'
category: 'Service Agent'
url: 'http://supertop.co/castro/'
producer:
name: 'Supertop'
url: 'http://supertop.co'
- regex: 'Curious George'
name: 'Analytics SEO Crawler'
category: 'Crawler'
url: 'http://www.analyticsseo.com/crawler'
producer:
name: 'Analytics SEO'
url: 'http://www.analyticsseo.com'
- regex: 'archive\.org_bot|special_archiver'
name: 'archive.org bot'
category: 'Crawler'
url: 'http://www.archive.org/details/archive.org_bot'
producer:
name: 'The Internet Archive'
url: 'http://www.archive.org'
- regex: 'Ask Jeeves/Teoma'
name: 'Ask Jeeves'
category: 'Search bot'
url: ''
producer:
name: 'Ask Jeeves Inc.'
url: 'http://www.ask.com'
- regex: 'Backlink-Check\.de'
name: 'Backlink-Check.de'
category: 'Crawler'
url: 'http://www.backlink-check.de/bot.html'
producer:
name: 'Mediagreen Medienservice'
url: 'http://www.backlink-check.de'
- regex: 'BacklinkCrawler'
name: 'BacklinkCrawler'
category: 'Crawler'
url: 'http://www.backlinktest.com/crawler.html'
producer:
name: '2.0Promotion GbR'
url: 'http://www.backlinktest.com'
- regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
name: 'Baidu Spider'
category: 'Search bot'
url: 'http://www.baidu.com/search/spider.htm'
producer:
name: 'Baidu'
url: 'http://www.baidu.com'
- regex: 'BazQux'
name: 'BazQux Reader'
url: 'https://bazqux.com/fetcher'
category: 'Feed Fetcher'
producer:
name: ''
url: ''
- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
name: 'BingBot'
category: 'Search bot'
url: 'http://search.msn.com/msnbot.htmn'
producer:
name: 'Microsoft Corporation'
url: 'http://www.microsoft.com'
- regex: 'Blekkobot'
name: 'Blekkobot'
category: 'Search bot'
url: 'http://blekko.com/about/blekkobot'
producer:
name: 'Blekko'
url: 'http://blekko.com'
- regex: 'BLEXBot(Test)?'
name: 'BLEXBot Crawler'
category: 'Crawler'
url: 'http://webmeup-crawler.com'
producer:
name: 'WebMeUp'
url: 'http://webmeup.com'
- regex: 'Bloglovin'
name: 'Bloglovin'
url: 'http://www.bloglovin.com'
category: 'Feed Fetcher'
producer:
name: ''
url: ''
- regex: 'Blogtrottr'
name: 'Blogtrottr'
url: ''
category: 'Feed Fetcher'
producer:
name: 'Blogtrottr Ltd'
url: 'https://blogtrottr.com/'
- regex: 'BoardReader Blog Indexer'
name: 'BoardReader Blog Indexer'
category: 'Crawler'
producer:
name: 'BoardReader'
url: 'http://boardreader.com/'
- regex: 'BountiiBot'
name: 'Bountii Bot'
category: 'Search bot'
url: 'http://bountii.com/contact.php'
producer:
name: 'Bountii Inc.'
url: 'http://bountii.com'
- regex: 'Browsershots'
name: 'Browsershots'
category: 'Service Agent'
url: 'http://browsershots.org/faq'
producer:
name: 'Browsershots.org'
url: 'http://browsershots.org'
- regex: 'BUbiNG'
name: 'BUbiNG'
category: 'Crawler'
url: 'http://law.di.unimi.it/BUbiNG.html'
producer:
name: 'The Laboratory for Web Algorithmics (LAW)'
url: 'http://law.di.unimi.it/software.php#buging'
- regex: '(?