regexes.bots.yml Maven / Gradle / Ivy
- regex: '360Spider'
name: '360Spider'
category: 'Search bot'
url: 'https://www.so.com/help/help_3_2.html'
producer:
name: 'Online Media Group, Inc.'
url: ''
- regex: 'Aboundex'
name: 'Aboundexbot'
category: 'Search bot'
url: 'http://www.aboundex.com/crawler/'
producer:
name: 'Aboundex.com'
url: 'http://www.aboundex.com'
- regex: 'AcoonBot'
name: 'Acoon'
category: 'Search bot'
url: 'http://www.acoon.de/robot.asp'
producer:
name: 'Acoon GmbH'
url: 'http://www.acoon.de'
- regex: 'AddThis\.com'
name: 'AddThis.com'
category: 'Social Media Agent'
url: ''
producer:
name: 'Clearspring Technologies, Inc.'
url: 'http://www.clearspring.com'
- regex: 'AhrefsBot'
name: 'aHrefs Bot'
category: 'Crawler'
url: 'https://ahrefs.com/robot'
producer:
name: 'Ahrefs Pte Ltd'
url: 'https://ahrefs.com/robot'
- regex: 'ia_archiver|alexabot|verifybot'
name: 'Alexa Crawler'
category: 'Search bot'
url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
producer:
name: 'Alexa Internet'
url: 'https://www.alexa.com'
- regex: 'alexa site audit'
name: 'Alexa Site Audit'
category: 'Site Monitor'
url: 'https://support.alexa.com/hc/en-us/articles/200450194'
producer:
name: 'Alexa Internet'
url: 'https://www.alexa.com'
- regex: 'Amazonbot'
name: 'Amazon Bot'
category: 'Crawler'
url: 'https://developer.amazon.com/support/amazonbot'
producer:
name: 'Amazon.com, Inc.'
url: 'https://www.amazon.com/'
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
name: 'Amazon Route53 Health Check'
category: 'Service Agent'
producer:
name: 'Amazon Web Services'
url: 'https://aws.amazon.com/'
- regex: 'AmorankSpider'
name: 'Amorank Spider'
category: 'Crawler'
url: 'http://amorank.com/webcrawler.html'
producer:
name: 'Amorank'
url: 'http://www.amorank.com'
- regex: 'ApacheBench'
name: 'ApacheBench'
category: 'Benchmark'
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
producer:
name: 'The Apache Software Foundation'
url: 'https://www.apache.org/foundation/'
- regex: 'Applebot'
name: 'Applebot'
category: 'Crawler'
url: 'https://support.apple.com/en-us/HT204683'
producer:
name: 'Apple Inc'
url: 'https://www.apple.com'
- regex: "AppSignalBot"
name: "AppSignalBot"
category: "Site Monitor"
url: "https://docs.appsignal.com/uptime-monitoring/"
producer:
name: "AppSignal"
url: "https://appsignal.com/"
- regex: 'Arachni'
name: 'Arachni'
category: 'Security Checker'
url: 'https://www.arachni-scanner.com/'
producer:
name: 'Sarosys LLC'
url: 'https://www.sarosys.com/'
- regex: 'AspiegelBot'
name: 'AspiegelBot'
category: 'Crawler'
url: 'https://aspiegel.com/'
producer:
name: 'Huawei'
url: 'https://www.huawei.com/'
- regex: 'Castro 2, Episode Duration Lookup'
name: 'Castro 2'
category: 'Service Agent'
url: 'http://supertop.co/castro/'
producer:
name: 'Supertop'
url: 'http://supertop.co'
- regex: 'Curious George'
name: 'Analytics SEO Crawler'
category: 'Crawler'
url: 'http://www.analyticsseo.com/crawler'
producer:
name: 'Analytics SEO'
url: 'http://www.analyticsseo.com'
- regex: 'archive\.org_bot|special_archiver'
name: 'archive.org bot'
category: 'Crawler'
url: 'https://archive.org/details/archive.org_bot'
producer:
name: 'The Internet Archive'
url: 'https://archive.org'
- regex: 'Ask Jeeves/Teoma'
name: 'Ask Jeeves'
category: 'Search bot'
url: ''
producer:
name: 'Ask Jeeves Inc.'
url: 'http://www.ask.com'
- regex: 'Backlink-Check\.de'
name: 'Backlink-Check.de'
category: 'Crawler'
url: 'http://www.backlink-check.de/bot.html'
producer:
name: 'Mediagreen Medienservice'
url: 'http://www.backlink-check.de'
- regex: 'BacklinkCrawler'
name: 'BacklinkCrawler'
category: 'Crawler'
url: 'http://www.backlinktest.com/crawler.html'
producer:
name: '2.0Promotion GbR'
url: 'http://www.backlinktest.com'
- regex: 'Baidu.*spider|baidu Transcoder'
name: 'Baidu Spider'
category: 'Search bot'
url: 'http://www.baidu.com/search/spider.htm'
producer:
name: 'Baidu'
url: 'http://www.baidu.com'
- regex: 'BazQux'
name: 'BazQux Reader'
url: 'https://bazqux.com/fetcher'
category: 'Feed Fetcher'
producer:
name: ''
url: ''
- regex: 'Better Uptime Bot'
name: 'Better Uptime Bot'
category: 'Site Monitor'
url: 'https://betteruptime.com/faq'
producer:
name: 'Better Uptime'
url: 'https://betteruptime.com/'
- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
name: 'BingBot'
category: 'Search bot'
url: 'http://search.msn.com/msnbot.htmn'
producer:
name: 'Microsoft Corporation'
url: 'http://www.microsoft.com'
- regex: 'Blekkobot'
name: 'Blekkobot'
category: 'Search bot'
url: 'http://blekko.com/about/blekkobot'
producer:
name: 'Blekko'
url: 'http://blekko.com'
- regex: 'BLEXBot'
name: 'BLEXBot Crawler'
category: 'Crawler'
url: 'http://webmeup-crawler.com'
producer:
name: 'WebMeUp'
url: 'http://webmeup.com'
- regex: 'Bloglovin'
name: 'Bloglovin'
url: 'http://www.bloglovin.com'
category: 'Feed Fetcher'
producer:
name: ''
url: ''
- regex: 'Blogtrottr'
name: 'Blogtrottr'
url: ''
category: 'Feed Fetcher'
producer:
name: 'Blogtrottr Ltd'
url: 'https://blogtrottr.com/'
- regex: 'BoardReader Blog Indexer'
name: 'BoardReader Blog Indexer'
category: 'Crawler'
producer:
name: 'BoardReader'
url: 'https://boardreader.com/'
- regex: 'BountiiBot'
name: 'Bountii Bot'
category: 'Search bot'
url: 'http://bountii.com/contact.php'
producer:
name: 'Bountii Inc.'
url: 'http://bountii.com'
- regex: 'Browsershots'
name: 'Browsershots'
category: 'Service Agent'
url: 'http://browsershots.org/faq'
producer:
name: 'Browsershots.org'
url: 'http://browsershots.org'
- regex: 'BUbiNG'
name: 'BUbiNG'
category: 'Crawler'
url: 'http://law.di.unimi.it/BUbiNG.html'
producer:
name: 'The Laboratory for Web Algorithmics (LAW)'
url: 'http://law.di.unimi.it/software.php#buging'
- regex: '(?