All Downloads are FREE. Search and download functionalities are using the official Maven repository.

regexes.bots.yml Maven / Gradle / Ivy

- regex: '360Spider'
  name: '360Spider'
  category: 'Search bot'
  url: 'https://www.so.com/help/help_3_2.html'
  producer:
    name: 'Online Media Group, Inc.'
    url: ''

- regex: 'Aboundex'
  name: 'Aboundexbot'
  category: 'Search bot'
  url: 'http://www.aboundex.com/crawler/'
  producer:
    name: 'Aboundex.com'
    url: 'http://www.aboundex.com'

- regex: 'AcoonBot'
  name: 'Acoon'
  category: 'Search bot'
  url: 'http://www.acoon.de/robot.asp'
  producer:
    name: 'Acoon GmbH'
    url: 'http://www.acoon.de'

- regex: 'AddThis\.com'
  name: 'AddThis.com'
  category: 'Social Media Agent'
  url: ''
  producer:
    name: 'Clearspring Technologies, Inc.'
    url: 'http://www.clearspring.com'

- regex: 'AhrefsBot'
  name: 'aHrefs Bot'
  category: 'Crawler'
  url: 'https://ahrefs.com/robot'
  producer:
    name: 'Ahrefs Pte Ltd'
    url: 'https://ahrefs.com/robot'

- regex: 'ia_archiver|alexabot|verifybot'
  name: 'Alexa Crawler'
  category: 'Search bot'
  url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
  producer:
    name: 'Alexa Internet'
    url: 'https://www.alexa.com'

- regex: 'alexa site audit'
  name: 'Alexa Site Audit'
  category: 'Site Monitor'
  url: 'https://support.alexa.com/hc/en-us/articles/200450194'
  producer:
    name: 'Alexa Internet'
    url: 'https://www.alexa.com'

- regex: 'Amazonbot'
  name: 'Amazon Bot'
  category: 'Crawler'
  url: 'https://developer.amazon.com/support/amazonbot'
  producer:
    name: 'Amazon.com, Inc.'
    url: 'https://www.amazon.com/'

- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
  name: 'Amazon Route53 Health Check'
  category: 'Service Agent'
  producer:
    name: 'Amazon Web Services'
    url: 'https://aws.amazon.com/'

- regex: 'AmorankSpider'
  name: 'Amorank Spider'
  category: 'Crawler'
  url: 'http://amorank.com/webcrawler.html'
  producer:
    name: 'Amorank'
    url: 'http://www.amorank.com'

- regex: 'ApacheBench'
  name: 'ApacheBench'
  category: 'Benchmark'
  url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
  producer:
    name: 'The Apache Software Foundation'
    url: 'https://www.apache.org/foundation/'

- regex: 'Applebot'
  name: 'Applebot'
  category: 'Crawler'
  url: 'https://support.apple.com/en-us/HT204683'
  producer:
    name: 'Apple Inc'
    url: 'https://www.apple.com'

- regex: "AppSignalBot"
  name: "AppSignalBot"
  category: "Site Monitor"
  url: "https://docs.appsignal.com/uptime-monitoring/"
  producer:
    name: "AppSignal"
    url: "https://appsignal.com/"

- regex: 'Arachni'
  name: 'Arachni'
  category: 'Security Checker'
  url: 'https://www.arachni-scanner.com/'
  producer:
    name: 'Sarosys LLC'
    url: 'https://www.sarosys.com/'

- regex: 'AspiegelBot'
  name: 'AspiegelBot'
  category: 'Crawler'
  url: 'https://aspiegel.com/'
  producer:
    name: 'Huawei'
    url: 'https://www.huawei.com/'

- regex: 'Castro 2, Episode Duration Lookup'
  name: 'Castro 2'
  category: 'Service Agent'
  url: 'http://supertop.co/castro/'
  producer:
    name: 'Supertop'
    url: 'http://supertop.co'

- regex: 'Curious George'
  name: 'Analytics SEO Crawler'
  category: 'Crawler'
  url: 'http://www.analyticsseo.com/crawler'
  producer:
    name: 'Analytics SEO'
    url: 'http://www.analyticsseo.com'

- regex: 'archive\.org_bot|special_archiver'
  name: 'archive.org bot'
  category: 'Crawler'
  url: 'https://archive.org/details/archive.org_bot'
  producer:
    name: 'The Internet Archive'
    url: 'https://archive.org'

- regex: 'Ask Jeeves/Teoma'
  name: 'Ask Jeeves'
  category: 'Search bot'
  url: ''
  producer:
    name: 'Ask Jeeves Inc.'
    url: 'http://www.ask.com'

- regex: 'Backlink-Check\.de'
  name: 'Backlink-Check.de'
  category: 'Crawler'
  url: 'http://www.backlink-check.de/bot.html'
  producer:
    name: 'Mediagreen Medienservice'
    url: 'http://www.backlink-check.de'

- regex: 'BacklinkCrawler'
  name: 'BacklinkCrawler'
  category: 'Crawler'
  url: 'http://www.backlinktest.com/crawler.html'
  producer:
    name: '2.0Promotion GbR'
    url: 'http://www.backlinktest.com'

- regex: 'Baidu.*spider|baidu Transcoder'
  name: 'Baidu Spider'
  category: 'Search bot'
  url: 'http://www.baidu.com/search/spider.htm'
  producer:
    name: 'Baidu'
    url: 'http://www.baidu.com'

- regex: 'BazQux'
  name: 'BazQux Reader'
  url: 'https://bazqux.com/fetcher'
  category: 'Feed Fetcher'
  producer:
    name: ''
    url: ''

- regex: 'Better Uptime Bot'
  name: 'Better Uptime Bot'
  category: 'Site Monitor'
  url: 'https://betteruptime.com/faq'
  producer:
    name: 'Better Uptime'
    url: 'https://betteruptime.com/'

- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
  name: 'BingBot'
  category: 'Search bot'
  url: 'http://search.msn.com/msnbot.htmn'
  producer:
    name: 'Microsoft Corporation'
    url: 'http://www.microsoft.com'

- regex: 'Blekkobot'
  name: 'Blekkobot'
  category: 'Search bot'
  url: 'http://blekko.com/about/blekkobot'
  producer:
    name: 'Blekko'
    url: 'http://blekko.com'

- regex: 'BLEXBot'
  name: 'BLEXBot Crawler'
  category: 'Crawler'
  url: 'http://webmeup-crawler.com'
  producer:
    name: 'WebMeUp'
    url: 'http://webmeup.com'

- regex: 'Bloglovin'
  name: 'Bloglovin'
  url: 'http://www.bloglovin.com'
  category: 'Feed Fetcher'
  producer:
    name: ''
    url: ''

- regex: 'Blogtrottr'
  name: 'Blogtrottr'
  url: ''
  category: 'Feed Fetcher'
  producer:
    name: 'Blogtrottr Ltd'
    url: 'https://blogtrottr.com/'

- regex: 'BoardReader Blog Indexer'
  name: 'BoardReader Blog Indexer'
  category: 'Crawler'
  producer:
    name: 'BoardReader'
    url: 'https://boardreader.com/'

- regex: 'BountiiBot'
  name: 'Bountii Bot'
  category: 'Search bot'
  url: 'http://bountii.com/contact.php'
  producer:
    name: 'Bountii Inc.'
    url: 'http://bountii.com'

- regex: 'Browsershots'
  name: 'Browsershots'
  category: 'Service Agent'
  url: 'http://browsershots.org/faq'
  producer:
    name: 'Browsershots.org'
    url: 'http://browsershots.org'

- regex: 'BUbiNG'
  name: 'BUbiNG'
  category: 'Crawler'
  url: 'http://law.di.unimi.it/BUbiNG.html'
  producer:
    name: 'The Laboratory for Web Algorithmics (LAW)'
    url: 'http://law.di.unimi.it/software.php#buging'

- regex: '(?




© 2015 - 2024 Weber Informatics LLC | Privacy Policy