All Downloads are FREE. Search and download functionalities are using the official Maven repository.

regexes.bots.yml Maven / Gradle / Ivy

Go to download

The Universal Device Detection library that parses User Agents and detects devices (desktop, tablet, mobile, tv, cars, console, etc.), clients (browsers, feed readers, media players, PIMs, ...), operating systems, brands and models.

There is a newer version: 1.0.10
Show newest version
###############
# Device Detector - The Universal Device Detection library for parsing User Agents
#
# @link http://piwik.org
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
###############

- regex: '360Spider(-Image|-Video)?'
  name: '360Spider'
  category: 'Search bot'
  url: 'http://www.so.com/help/help_3_2.html'
  producer:
    name: 'Online Media Group, Inc.'
    url: ''

- regex: 'Aboundex'
  name: 'Aboundexbot'
  category: 'Search bot'
  url: 'http://www.aboundex.com/crawler/'
  producer:
    name: 'Aboundex.com'
    url: 'http://www.aboundex.com'

- regex: 'AcoonBot'
  name: 'Acoon'
  category: 'Search bot'
  url: 'http://www.acoon.de/robot.asp'
  producer:
    name: 'Acoon GmbH'
    url: 'http://www.acoon.de'

- regex: 'AddThis\.com'
  name: 'AddThis.com'
  category: 'Social Media Agent'
  url: ''
  producer:
    name: 'Clearspring Technologies, Inc.'
    url: 'http://www.clearspring.com'

- regex: 'AhrefsBot'
  name: 'aHrefs Bot'
  category: 'Crawler'
  url: 'http://ahrefs.com/robot'
  producer:
    name: 'Ahrefs Pte Ltd'
    url: 'http://ahrefs.com/robot'

- regex: 'ia_archiver|alexabot|verifybot'
  name: 'Alexa Crawler'
  category: 'Search bot'
  url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
  producer:
    name: 'Alexa Internet'
    url: 'http://www.alexa.com'

- regex: 'alexa site audit'
  name: 'Alexa Site Audit'
  category: 'Site Monitor'
  url: 'http://www.alexa.com/help/webmasters'
  producer:
    name: 'Alexa Internet'
    url: 'http://www.alexa.com'

- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
  name: 'Amazon Route53 Health Check'
  category: 'Service Agent'
  producer:
    name: 'Amazon Web Services'
    url: 'https://aws.amazon.com/'

- regex: 'AmorankSpider'
  name: 'Amorank Spider'
  category: 'Crawler'
  url: 'http://amorank.com/webcrawler.html'
  producer:
    name: 'Amorank'
    url: 'http://www.amorank.com'

- regex: 'ApacheBench'
  name: 'ApacheBench'
  category: 'Benchmark'
  url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
  producer:
    name: 'The Apache Software Foundation'
    url: 'http://www.apache.org/foundation/'

- regex: 'Applebot'
  name: 'Applebot'
  category: 'Crawler'
  url: 'http://www.apple.com/go/applebot'
  producer:
    name: 'Apple Inc'
    url: 'http://www.apple.com'

- regex: 'Arachni'
  name: 'Arachni'
  category: 'Security Checker'
  url: 'http://www.arachni-scanner.com'
  producer:
    name: 'Sarosys LLC'
    url: 'http://www.sarosys.com/'

- regex: 'Castro 2, Episode Duration Lookup'
  name: 'Castro 2'
  category: 'Service Agent'
  url: 'http://supertop.co/castro/'
  producer: 
    name: 'Supertop'
    url: 'http://supertop.co'

- regex: 'Curious George'
  name: 'Analytics SEO Crawler'
  category: 'Crawler'
  url: 'http://www.analyticsseo.com/crawler'
  producer:
    name: 'Analytics SEO'
    url: 'http://www.analyticsseo.com'

- regex: 'archive\.org_bot|special_archiver'
  name: 'archive.org bot'
  category: 'Crawler'
  url: 'http://www.archive.org/details/archive.org_bot'
  producer:
    name: 'The Internet Archive'
    url: 'http://www.archive.org'

- regex: 'Ask Jeeves/Teoma'
  name: 'Ask Jeeves'
  category: 'Search bot'
  url: ''
  producer:
    name: 'Ask Jeeves Inc.'
    url: 'http://www.ask.com'

- regex: 'Backlink-Check\.de'
  name: 'Backlink-Check.de'
  category: 'Crawler'
  url: 'http://www.backlink-check.de/bot.html'
  producer:
    name: 'Mediagreen Medienservice'
    url: 'http://www.backlink-check.de'

- regex: 'BacklinkCrawler'
  name: 'BacklinkCrawler'
  category: 'Crawler'
  url: 'http://www.backlinktest.com/crawler.html'
  producer:
    name: '2.0Promotion GbR'
    url: 'http://www.backlinktest.com'

- regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
  name: 'Baidu Spider'
  category: 'Search bot'
  url: 'http://www.baidu.com/search/spider.htm'
  producer:
    name: 'Baidu'
    url: 'http://www.baidu.com'

- regex: 'BazQux'
  name: 'BazQux Reader'
  url: 'https://bazqux.com/fetcher'
  category: 'Feed Fetcher'
  producer:
    name: ''
    url: ''

- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
  name: 'BingBot'
  category: 'Search bot'
  url: 'http://search.msn.com/msnbot.htmn'
  producer:
    name: 'Microsoft Corporation'
    url: 'http://www.microsoft.com'

- regex: 'Blekkobot'
  name: 'Blekkobot'
  category: 'Search bot'
  url: 'http://blekko.com/about/blekkobot'
  producer:
    name: 'Blekko'
    url: 'http://blekko.com'

- regex: 'BLEXBot(Test)?'
  name: 'BLEXBot Crawler'
  category: 'Crawler'
  url: 'http://webmeup-crawler.com'
  producer:
    name: 'WebMeUp'
    url: 'http://webmeup.com'

- regex: 'Bloglovin'
  name: 'Bloglovin'
  url: 'http://www.bloglovin.com'
  category: 'Feed Fetcher'
  producer:
    name: ''
    url: ''

- regex: 'Blogtrottr'
  name: 'Blogtrottr'
  url: ''
  category: 'Feed Fetcher'
  producer:
    name: 'Blogtrottr Ltd'
    url: 'https://blogtrottr.com/'

- regex: 'BoardReader Blog Indexer'
  name: 'BoardReader Blog Indexer'
  category: 'Crawler'
  producer:
    name: 'BoardReader'
    url: 'http://boardreader.com/'

- regex: 'BountiiBot'
  name: 'Bountii Bot'
  category: 'Search bot'
  url: 'http://bountii.com/contact.php'
  producer:
    name: 'Bountii Inc.'
    url: 'http://bountii.com'

- regex: 'Browsershots'
  name: 'Browsershots'
  category: 'Service Agent'
  url: 'http://browsershots.org/faq'
  producer:
    name: 'Browsershots.org'
    url: 'http://browsershots.org'

- regex: 'BUbiNG'
  name: 'BUbiNG'
  category: 'Crawler'
  url: 'http://law.di.unimi.it/BUbiNG.html'
  producer:
    name: 'The Laboratory for Web Algorithmics (LAW)'
    url: 'http://law.di.unimi.it/software.php#buging'

- regex: '(?




© 2015 - 2024 Weber Informatics LLC | Privacy Policy