io.github.minorg.whoisclient.parser.WhoisRegistrantFieldParser Maven / Gradle / Ivy
package io.github.minorg.whoisclient.parser;
import static com.google.common.base.Preconditions.checkNotNull;
import java.util.regex.Matcher;
import javax.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.github.minorg.whoisclient.Gazetteers;
import io.github.minorg.whoisclient.ParsedWhoisRecord;
import io.github.minorg.whoisclient.ParsedWhoisRegistrant;
public class WhoisRegistrantFieldParser extends WhoisPatternFieldParser {
public WhoisRegistrantFieldParser(final Gazetteers gazetteers) {
this(ParsedWhoisRecord.FieldMetadata.REGISTRANT, gazetteers, new String[] {
" Registrant:[ ]*\\n (?.*)\\n (?.*)\\n (?.*)\\n (?.*), (?.*) (?.*)\\n (?.*)\\n(?: Phone: (?.*)\\n)? Email: (?.*)\\n", // Corporate
// Domains,
// Inc.
"Registrant:\\n (?.+)\\n (?.+)\\n(?: (?.*)\\n)?(?: (?.*)\\n)? (?.+), (?.+)\\n (?.+)\\n (?.+)\\n (?.+)\\n\\n", // OVH
"(?:Registrant ID:(?.+)\\n)?Registrant Name:(?.*)\\n(?:Registrant Organization:(?.*)\\n)?Registrant Street1?:(?.*)\\n(?:Registrant Street2:(?.*)\\n)?(?:Registrant Street3:(?.*)\\n)?Registrant City:(?.*)\\nRegistrant State/Province:(?.*)\\nRegistrant Postal Code:(?.*)\\nRegistrant Country:(?.*)\\nRegistrant Phone:(?.*)\\n(?:Registrant Phone Ext.:(?.*)\\n)?(?:Registrant FAX:(?.*)\\n)?(?:Registrant FAX Ext.:(?.*)\\n)?Registrant Email:(?.*)", // Public
// Interest
// Registry
// (.org),
// nic.pw,
// No-IP.com
"Registrant ID:(?.+)\\nRegistrant Name:(?.*)\\n(?:Registrant Organization:(?.*)\\n)?Registrant Address1?:(?.*)\\n(?:Registrant Address2:(?.*)\\n)?(?:Registrant Address3:(?.*)\\n)?Registrant City:(?.*)\\nRegistrant State/Province:(?.*)\\nRegistrant Country/Economy:(?.*)\\nRegistrant Postal Code:(?.*)\\nRegistrant Phone:(?.*)\\n(?:Registrant Phone Ext.:(?.*)\\n)?(?:Registrant FAX:(?.*)\\n)?(?:Registrant FAX Ext.:(?.*)\\n)?Registrant E-mail:(?.*)", // .ME,
// DotAsia
"Registrant ID:\\s*(?.+)\\nRegistrant Name:\\s*(?.+)\\nRegistrant Organization:\\s*(?.*)\\nRegistrant Address1:\\s*(?.+)\\nRegistrant Address2:\\s*(?.*)\\nRegistrant City:\\s*(?.+)\\nRegistrant State/Province:\\s*(?.+)\\nRegistrant Postal Code:\\s*(?.+)\\nRegistrant Country:\\s*(?.+)\\nRegistrant Country Code:\\s*(?.+)\\nRegistrant Phone Number:\\s*(?.+)\\nRegistrant Email:\\s*(?.+)\\n", // .CO
// Internet
"Registrant Contact: (?.+)\\nRegistrant Organization: (?.+)\\nRegistrant Name: (?.+)\\nRegistrant Street: (?.+)\\nRegistrant City: (?.+)\\nRegistrant Postal Code: (?.+)\\nRegistrant State: (?.+)\\nRegistrant Country: (?.+)\\nRegistrant Phone: (?.*)\\nRegistrant Phone Ext: (?.*)\\nRegistrant Fax: (?.*)\\nRegistrant Fax Ext: (?.*)\\nRegistrant Email: (?.*)\\n", // Key-Systems
// GmbH
"(?:Registrant ID:[ ]*(?.*)\\n)?Registrant Name:[ ]*(?.*)\\n(?:Registrant Organization:[ ]*(?.*)\\n)?Registrant Street:[ ]*(?.+)\\n(?:Registrant Street:[ ]*(?.+)\\n)?(?:Registrant Street:[ ]*(?.+)\\n)?Registrant City:[ ]*(?.+)\\nRegistrant State(?:\\/Province)?:[ ]*(?.*)\\nRegistrant Postal Code:[ ]*(?.+)\\nRegistrant Country:[ ]*(?.+)\\n(?:Registrant Phone:[ ]*(?.*)\\n)?(?:Registrant Phone Ext:[ ]*(?.*)\\n)?(?:Registrant Fax:[ ]*(?.*)\\n)?(?:Registrant Fax Ext:[ ]*(?.*)\\n)?(?:Registrant Email:[ ]*(?.+)\\n)?", // WildWestDomains,
// GoDaddy,
// Namecheap/eNom,
// Ascio,
// Musedoma
// (.museum),
// EuroDNS,
// nic.ps
"Registrant\\n(?: (?.+)\\n)? (?.+)\\n Email:(?.+)\\n (?.+)\\n(?: (?.+)\\n)? (?.+) (?.+)\\n (?.+)\\n Tel: (?.+)\\n\\n", // internet.bs
" Registrant Contact Details:[ ]*\\n (?.*)\\n (?.*)[ ]{2,}\\((?.*)\\)\\n (?.*)\\n(?: (?.*)\\n)?(?: (?.*)\\n)? (?.*)\\n (?.*),(?.*)\\n (?.*)\\n Tel. (?.*)", // Whois.com
"owner-id:[ ]*(?.*)\\n(?:owner-organization:[ ]*(?.*)\\n)?owner-name:[ ]*(?.*)\\nowner-street:[ ]*(?.*)\\nowner-city:[ ]*(?.*)\\nowner-zip:[ ]*(?.*)\\nowner-country:[ ]*(?.*)\\n(?:owner-phone:[ ]*(?.*)\\n)?(?:owner-fax:[ ]*(?.*)\\n)?owner-email:[ ]*(?.*)", // InterNetworX
"Registrant:\\n registrant_org: (?.*)\\n registrant_name: (?.*)\\n registrant_email: (?.*)\\n registrant_address: (?.*)\\n registrant_city: (?.*)\\n registrant_state: (?.*)\\n registrant_zip: (?.*)\\n registrant_country: (?.*)\\n registrant_phone: (?.*)", // Bellnames
"Holder of domain name:\\n(?[\\S\\s]+)\\n(?.+)\\n(?[A-Z0-9-]+)\\s+(?.+)\\n(?.+)\\nContractual Language", // nic.ch
"\\n\\n(?:Owner)?\\s+: (?.*)\\n(?:\\s+: (?.*)\\n)?\\s+: (?.*)\\n\\s+: (?.*)\\n\\s+: (?.*)\\n\\s+: (?.*)\\n", // nic.io
"Contact Information:\\n\\[Name\\]\\s*(?.*)\\n\\[Email\\]\\s*(?.*)\\n\\[Web Page\\]\\s*(?.*)\\n\\[Postal code\\]\\s*(?.*)\\n\\[Postal Address\\]\\s*(?.*)\\n(?:\\s+(?.*)\\n)?(?:\\s+(?.*)\\n)?\\[Phone\\]\\s*(?.*)\\n\\[Fax\\]\\s*(?.*)\\n", // jprs.jp
"g\\. \\[Organization\\] (?.+)\\n", // .co.jp
// registrations
// at
// jprs.jp
"Registrant ID:(?.*)\\nRegistrant Name:(?.*)\\n(?:Registrant Organization:(?.*)\\n)?Registrant Address1:(?.*)\\n(?:Registrant Address2:(?.*)\\n)?(?:Registrant Address3:(?.*)\\n)?Registrant City:(?.*)\\n(?:Registrant State/Province:(?.*)\\n)?(?:Registrant Postal Code:(?.*)\\n)?Registrant Country:(?.*)\\nRegistrant Country Code:.*\\nRegistrant Phone Number:(?.*)\\n(?:Registrant Facsimile Number:(?.*)\\n)?Registrant Email:(?.*)", // .US,
// .biz
// (NeuStar),
// .buzz,
// .moe
// (Interlink
// Co.
// Ltd.)
"Registrant\\n Name: (?.+)\\n(?: Organization: (?.+)\\n)? ContactID: (?.+)\\n(?: Address: (?.+)\\n(?: (?.+)\\n(?: (?.+)\\n)?)? (?.+)\\n (?.+)\\n (?.+)\\n (?.+)\\n)?(?: Created: (?.+)\\n)?(?: Last Update: (?.+)\\n)?", // nic.it
" Organisation Name[.]* (?.*)\\n Organisation Address[.]* (?.*)\\n Organisation Address[.]* (?.*)\\n(?: Organisation Address[.]* (?.*)\\n)? Organisation Address[.]* (?.*)\\n Organisation Address[.]* (?.*)\\n Organisation Address[.]* (?.*)\\n Organisation Address[.]* (?.*)", // Melbourne
// IT
// (what
// a
// horrid
// format...)
"Registrant:[ ]*(?.+)\\n[\\s\\S]*Eligibility Name:[ ]*(?.+)\\n[\\s\\S]*Registrant Contact ID:[ ]*(?.+)\\n", // .au
// business
"Eligibility Type:[ ]*Citizen\\/Resident\\n[\\s\\S]*Registrant Contact ID:[ ]*(?.+)\\n[\\s\\S]*Registrant Contact Name:[ ]*(?.+)\\n", // .au
// individual
"Registrant:[ ]*(?.+)\\n[\\s\\S]*Eligibility Type:[ ]*(Higher Education Institution|Company|Incorporated Association|Other)\\n[\\s\\S]*Registrant Contact ID:[ ]*(?.+)\\n[\\s\\S]*Registrant Contact Name:[ ]*(?.+)\\n", // .au
// educational,
// company,
// 'incorporated
// association'
// (non-profit?),
// other
// (spotted
// for
// linux.conf.au,
// unsure
// if
// also
// for
// others)
" Registrant:\\n (?.+)\\n\\n Registrant type:\\n .*\\n\\n Registrant's address:\\n The registrant .* opted to have", // Nominet
// (.uk)
// with
// hidden
// address
" Registrant:\\n (?.+)\\n\\n[\\s\\S]* Registrant type:\\n .*\\n\\n Registrant's address:\\n (?.+)\\n(?: (?.+)\\n(?: (?.+)\\n)??)?? (?[^0-9\\n]+)\\n(?: (?.+)\\n)? (?.+)\\n (?.+)\\n\\n", // Nominet
// (.uk)
// with
// visible
// address
"Domain Owner:\\n\\t(?.+)\\n\\n[\\s\\S]*?(?:Registrant Contact:\\n\\t(?.+))?\\n\\nRegistrant(?:'s)? (?:a|A)ddress:(?:\\n\\t(?.+)\\n(?:\\t(?.+)\\n)?(?:\\t(?.+)\\n)?\\t(?.+)\\n\\t(?.+))?\\n\\t(?.+)(?:\\n\\t(?.+) \\(Phone\\)\\n\\t(?.+) \\(FAX\\)\\n\\t(?.+))?\\n\\n", // .ac.uk
// -
// what
// a
// mess...
"Registrant ID: (?.+)\\nRegistrant: (?.+)\\nRegistrant Contact Email: (?.+)", // .cn
// (CNNIC)
"Registrant contact:\\n (?.+)\\n (?.*)\\n (?.+), (?.+) (?.+) (?.+)\\n\\n", // Fabulous.com
"registrant-name:\\s*(?.+)\\nregistrant-type:\\s*(?.+)\\nregistrant-address:\\s*(?.+)\\nregistrant-postcode:\\s*(?.+)\\nregistrant-city:\\s*(?.+)\\nregistrant-country:\\s*(?.+)\\n(?:registrant-phone:\\s*(?.+)\\n)?(?:registrant-email:\\s*(?.+)\\n)?", // Hetzner
"Registrant Contact Information :[ ]*\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n\\n", // GAL
// Communication
"Contact Information : For Customer # [0-9]+[ ]*\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n\\n", // GAL
// Communication
// alternative
// (private
// WHOIS)
// format?
"Registrant:\\n Name: (?.+)\\n City: (?.+)\\n State: (?.+)\\n Country: (?.+)\\n", // Akky
// (.com.mx)
" Registrant:\\n (?.+)\\n (?.+)\\n (?