All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.minorg.whoisclient.parser.WhoisRegistrantFieldParser Maven / Gradle / Ivy

There is a newer version: 1.0.1
Show newest version
package io.github.minorg.whoisclient.parser;

import static com.google.common.base.Preconditions.checkNotNull;

import java.util.regex.Matcher;

import javax.annotation.Nullable;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

import io.github.minorg.whoisclient.Gazetteers;
import io.github.minorg.whoisclient.ParsedWhoisRecord;
import io.github.minorg.whoisclient.ParsedWhoisRegistrant;

public class WhoisRegistrantFieldParser extends WhoisPatternFieldParser {
    public WhoisRegistrantFieldParser(final Gazetteers gazetteers) {
        this(ParsedWhoisRecord.FieldMetadata.REGISTRANT, gazetteers, new String[] {
                "   Registrant:[ ]*\\n      (?.*)\\n      (?.*)\\n      (?.*)\\n      (?.*), (?.*) (?.*)\\n      (?.*)\\n(?:      Phone: (?.*)\\n)?      Email: (?.*)\\n", // Corporate
                                                                                                                                                                                                                                                       // Domains,
                                                                                                                                                                                                                                                       // Inc.
                "Registrant:\\n  (?.+)\\n  (?.+)\\n(?:  (?.*)\\n)?(?:  (?.*)\\n)?  (?.+), (?.+)\\n  (?.+)\\n  (?.+)\\n  (?.+)\\n\\n", // OVH
                "(?:Registrant ID:(?.+)\\n)?Registrant Name:(?.*)\\n(?:Registrant Organization:(?.*)\\n)?Registrant Street1?:(?.*)\\n(?:Registrant Street2:(?.*)\\n)?(?:Registrant Street3:(?.*)\\n)?Registrant City:(?.*)\\nRegistrant State/Province:(?.*)\\nRegistrant Postal Code:(?.*)\\nRegistrant Country:(?.*)\\nRegistrant Phone:(?.*)\\n(?:Registrant Phone Ext.:(?.*)\\n)?(?:Registrant FAX:(?.*)\\n)?(?:Registrant FAX Ext.:(?.*)\\n)?Registrant Email:(?.*)", // Public
nterest
egistry
org),
nic.pw,
o-IP.com
                "Registrant ID:(?.+)\\nRegistrant Name:(?.*)\\n(?:Registrant Organization:(?.*)\\n)?Registrant Address1?:(?.*)\\n(?:Registrant Address2:(?.*)\\n)?(?:Registrant Address3:(?.*)\\n)?Registrant City:(?.*)\\nRegistrant State/Province:(?.*)\\nRegistrant Country/Economy:(?.*)\\nRegistrant Postal Code:(?.*)\\nRegistrant Phone:(?.*)\\n(?:Registrant Phone Ext.:(?.*)\\n)?(?:Registrant FAX:(?.*)\\n)?(?:Registrant FAX Ext.:(?.*)\\n)?Registrant E-mail:(?.*)", // .ME,
otAsia
                "Registrant ID:\\s*(?.+)\\nRegistrant Name:\\s*(?.+)\\nRegistrant Organization:\\s*(?.*)\\nRegistrant Address1:\\s*(?.+)\\nRegistrant Address2:\\s*(?.*)\\nRegistrant City:\\s*(?.+)\\nRegistrant State/Province:\\s*(?.+)\\nRegistrant Postal Code:\\s*(?.+)\\nRegistrant Country:\\s*(?.+)\\nRegistrant Country Code:\\s*(?.+)\\nRegistrant Phone Number:\\s*(?.+)\\nRegistrant Email:\\s*(?.+)\\n", // .CO
nternet
                "Registrant Contact: (?.+)\\nRegistrant Organization: (?.+)\\nRegistrant Name: (?.+)\\nRegistrant Street: (?.+)\\nRegistrant City: (?.+)\\nRegistrant Postal Code: (?.+)\\nRegistrant State: (?.+)\\nRegistrant Country: (?.+)\\nRegistrant Phone: (?.*)\\nRegistrant Phone Ext: (?.*)\\nRegistrant Fax: (?.*)\\nRegistrant Fax Ext: (?.*)\\nRegistrant Email: (?.*)\\n", // Key-Systems
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     // GmbH
                "(?:Registrant ID:[ ]*(?.*)\\n)?Registrant Name:[ ]*(?.*)\\n(?:Registrant Organization:[ ]*(?.*)\\n)?Registrant Street:[ ]*(?.+)\\n(?:Registrant Street:[ ]*(?.+)\\n)?(?:Registrant Street:[ ]*(?.+)\\n)?Registrant City:[ ]*(?.+)\\nRegistrant State(?:\\/Province)?:[ ]*(?.*)\\nRegistrant Postal Code:[ ]*(?.+)\\nRegistrant Country:[ ]*(?.+)\\n(?:Registrant Phone:[ ]*(?.*)\\n)?(?:Registrant Phone Ext:[ ]*(?.*)\\n)?(?:Registrant Fax:[ ]*(?.*)\\n)?(?:Registrant Fax Ext:[ ]*(?.*)\\n)?(?:Registrant Email:[ ]*(?.+)\\n)?", // WildWestDomains,
oDaddy,
amecheap/eNom,
scio,
usedoma
museum),
uroDNS,
nic.ps
                "Registrant\\n(?:    (?.+)\\n)?    (?.+)\\n    Email:(?.+)\\n    (?.+)\\n(?:    (?.+)\\n)?    (?.+) (?.+)\\n    (?.+)\\n    Tel: (?.+)\\n\\n", // internet.bs
                " Registrant Contact Details:[ ]*\\n    (?.*)\\n    (?.*)[ ]{2,}\\((?.*)\\)\\n    (?.*)\\n(?:    (?.*)\\n)?(?:    (?.*)\\n)?    (?.*)\\n    (?.*),(?.*)\\n    (?.*)\\n    Tel. (?.*)", // Whois.com
                "owner-id:[ ]*(?.*)\\n(?:owner-organization:[ ]*(?.*)\\n)?owner-name:[ ]*(?.*)\\nowner-street:[ ]*(?.*)\\nowner-city:[ ]*(?.*)\\nowner-zip:[ ]*(?.*)\\nowner-country:[ ]*(?.*)\\n(?:owner-phone:[ ]*(?.*)\\n)?(?:owner-fax:[ ]*(?.*)\\n)?owner-email:[ ]*(?.*)", // InterNetworX
                "Registrant:\\n registrant_org: (?.*)\\n registrant_name: (?.*)\\n registrant_email: (?.*)\\n registrant_address: (?
.*)\\n registrant_city: (?.*)\\n registrant_state: (?.*)\\n registrant_zip: (?.*)\\n registrant_country: (?.*)\\n registrant_phone: (?.*)", // Bellnames "Holder of domain name:\\n(?[\\S\\s]+)\\n(?.+)\\n(?[A-Z0-9-]+)\\s+(?.+)\\n(?.+)\\nContractual Language", // nic.ch "\\n\\n(?:Owner)?\\s+: (?.*)\\n(?:\\s+: (?.*)\\n)?\\s+: (?.*)\\n\\s+: (?.*)\\n\\s+: (?.*)\\n\\s+: (?.*)\\n", // nic.io "Contact Information:\\n\\[Name\\]\\s*(?.*)\\n\\[Email\\]\\s*(?.*)\\n\\[Web Page\\]\\s*(?.*)\\n\\[Postal code\\]\\s*(?.*)\\n\\[Postal Address\\]\\s*(?.*)\\n(?:\\s+(?.*)\\n)?(?:\\s+(?.*)\\n)?\\[Phone\\]\\s*(?.*)\\n\\[Fax\\]\\s*(?.*)\\n", // jprs.jp "g\\. \\[Organization\\] (?.+)\\n", // .co.jp // registrations // at // jprs.jp "Registrant ID:(?.*)\\nRegistrant Name:(?.*)\\n(?:Registrant Organization:(?.*)\\n)?Registrant Address1:(?.*)\\n(?:Registrant Address2:(?.*)\\n)?(?:Registrant Address3:(?.*)\\n)?Registrant City:(?.*)\\n(?:Registrant State/Province:(?.*)\\n)?(?:Registrant Postal Code:(?.*)\\n)?Registrant Country:(?.*)\\nRegistrant Country Code:.*\\nRegistrant Phone Number:(?.*)\\n(?:Registrant Facsimile Number:(?.*)\\n)?Registrant Email:(?.*)", // .US, // .biz // (NeuStar), // .buzz, // .moe // (Interlink // Co. // Ltd.) "Registrant\\n Name: (?.+)\\n(?: Organization: (?.+)\\n)? ContactID: (?.+)\\n(?: Address: (?.+)\\n(?: (?.+)\\n(?: (?.+)\\n)?)? (?.+)\\n (?.+)\\n (?.+)\\n (?.+)\\n)?(?: Created: (?.+)\\n)?(?: Last Update: (?.+)\\n)?", // nic.it " Organisation Name[.]* (?.*)\\n Organisation Address[.]* (?.*)\\n Organisation Address[.]* (?.*)\\n(?: Organisation Address[.]* (?.*)\\n)? Organisation Address[.]* (?.*)\\n Organisation Address[.]* (?.*)\\n Organisation Address[.]* (?.*)\\n Organisation Address[.]* (?.*)", // Melbourne // IT // (what // a // horrid // format...) "Registrant:[ ]*(?.+)\\n[\\s\\S]*Eligibility Name:[ ]*(?.+)\\n[\\s\\S]*Registrant Contact ID:[ ]*(?.+)\\n", // .au // business "Eligibility Type:[ ]*Citizen\\/Resident\\n[\\s\\S]*Registrant Contact ID:[ ]*(?.+)\\n[\\s\\S]*Registrant Contact Name:[ ]*(?.+)\\n", // .au // individual "Registrant:[ ]*(?.+)\\n[\\s\\S]*Eligibility Type:[ ]*(Higher Education Institution|Company|Incorporated Association|Other)\\n[\\s\\S]*Registrant Contact ID:[ ]*(?.+)\\n[\\s\\S]*Registrant Contact Name:[ ]*(?.+)\\n", // .au // educational, // company, // 'incorporated // association' // (non-profit?), // other // (spotted // for // linux.conf.au, // unsure // if // also // for // others) " Registrant:\\n (?.+)\\n\\n Registrant type:\\n .*\\n\\n Registrant's address:\\n The registrant .* opted to have", // Nominet // (.uk) // with // hidden // address " Registrant:\\n (?.+)\\n\\n[\\s\\S]* Registrant type:\\n .*\\n\\n Registrant's address:\\n (?.+)\\n(?: (?.+)\\n(?: (?.+)\\n)??)?? (?[^0-9\\n]+)\\n(?: (?.+)\\n)? (?.+)\\n (?.+)\\n\\n", // Nominet // (.uk) // with // visible // address "Domain Owner:\\n\\t(?.+)\\n\\n[\\s\\S]*?(?:Registrant Contact:\\n\\t(?.+))?\\n\\nRegistrant(?:'s)? (?:a|A)ddress:(?:\\n\\t(?.+)\\n(?:\\t(?.+)\\n)?(?:\\t(?.+)\\n)?\\t(?.+)\\n\\t(?.+))?\\n\\t(?.+)(?:\\n\\t(?.+) \\(Phone\\)\\n\\t(?.+) \\(FAX\\)\\n\\t(?.+))?\\n\\n", // .ac.uk // - // what // a // mess... "Registrant ID: (?.+)\\nRegistrant: (?.+)\\nRegistrant Contact Email: (?.+)", // .cn // (CNNIC) "Registrant contact:\\n (?.+)\\n (?.*)\\n (?.+), (?.+) (?.+) (?.+)\\n\\n", // Fabulous.com "registrant-name:\\s*(?.+)\\nregistrant-type:\\s*(?.+)\\nregistrant-address:\\s*(?.+)\\nregistrant-postcode:\\s*(?.+)\\nregistrant-city:\\s*(?.+)\\nregistrant-country:\\s*(?.+)\\n(?:registrant-phone:\\s*(?.+)\\n)?(?:registrant-email:\\s*(?.+)\\n)?", // Hetzner "Registrant Contact Information :[ ]*\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n\\n", // GAL // Communication "Contact Information : For Customer # [0-9]+[ ]*\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n[ ]+(?.*)\\n\\n", // GAL // Communication // alternative // (private // WHOIS) // format? "Registrant:\\n Name: (?.+)\\n City: (?.+)\\n State: (?.+)\\n Country: (?.+)\\n", // Akky // (.com.mx) " Registrant:\\n (?.+)\\n (?.+)\\n (?.+) (?\\S+),[ ]+(?.+)\\n (?.+)", // .am "Domain Holder: (?.+)\\n(?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?))?)?)?)?)?)?, (?[^.,]+), (?.+), (?.+)\\n(?.+)\\n(?[A-Z]+)\\n", // .co.th, // format // 1 "Domain Holder: (?.+)\\n(?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?))?)?)?)?)?)?, (?.+)\\n(?.+)\\n(?[A-Z]+)\\n", // .co.th, // format // 2 "Domain Holder: (?.+)\\n(?.+)\\n(?:(?.+)\\n)?(?:(?.+)\\n)?.+?, (?.+)\\n(?.+)\\n(?.+)\\n(?[A-Z]+)\\n", // .co.th, // format // 3 "Domain Holder: (?.+)\\n(?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?)(?:,+ (?.+?))?)?)?)?)?)?\\n(?.+),? (?[A-Z]{2,3})(?: [A-Z0-9]+)?\\n(?.+)\\n(?[A-Z]+)\\n", // .co.th, // format // 4 " Registrant:\\n (?.+)\\n (?.+) (?.+)\\n (?.*)\\n (?.*)\\n (?.*)\\n (?.+), (?[^,\\n]*)\\n (?.+)\\n", // .com.tw // (Western // registrars) "Registrant:\\n(?.+)\\n(?.+)\\n(?.+?)(?:,+(?.+?)(?:,+(?.+?)(?:,+(?.+?)(?:,+(?.+?)(?:,+(?.+?)(?:,+(?.+?))?)?)?)?)?)?,(?.+),(?.+)\\n\\n Contact:\\n (?.+) (?.+)\\n TEL: (?.+?)(?:(?:#|ext.?)(?.+))?\\n FAX: (?.+)(?:(?:#|ext.?)(?.+))?\\n", // .com.tw // (TWNIC/SEEDNET, // Taiwanese // companies // only?) "Registrant Contact Information:\\n\\nCompany English Name \\(It should be the same as the registered/corporation name on your Business Register Certificate or relevant documents\\):(?.+)\\nCompany Chinese name:(?.+)\\nAddress: (?.+)\\nCountry: (?.+)\\nEmail: (?.+)\\n", // HKDNR // (.hk) "Registrant ID:(?.+)\\nRegistrant Name:(?.*)\\n(?:Registrant Organization:(?.*)\\n)?Registrant Street1:(?.+?)\\n(?:Registrant Street2:(?.+?)\\n(?:Registrant Street3:(?.+?)\\n)?)?Registrant City:(?.+)\\nRegistrant State:(?.*)\\nRegistrant Postal Code:(?.+)\\nRegistrant Country:(?[A-Z]+)\\nRegistrant Phone:(?.*?)\\nRegistrant Fax:(?.*)\\nRegistrant Email:(?.+)\\n", // Realtime // Register "owner:\\s+(?.+)", // .br "person:\\s+(?.+)", // nic.ru (person) "org:\\s+(?.+)", // nic.ru (organization) }); } protected WhoisRegistrantFieldParser(final ParsedWhoisRecord.FieldMetadata field, final Gazetteers gazetteers, final String[] patternStrings) { super(patternStrings); this.field = checkNotNull(field); this.gazetteers = checkNotNull(gazetteers); } @Override public final boolean parse(final String raw, final ParsedWhoisRecord.Builder recordBuilder) throws WhoisRecordParseException { final Optional match = _match(raw); if (!match.isPresent()) { return false; } final ParsedWhoisRegistrant.Builder registrantBuilder = ParsedWhoisRegistrant.builder(); boolean empty = true; for (final ParsedWhoisRegistrant.FieldMetadata registrantField : ParsedWhoisRegistrant.FieldMetadata.values()) { String registrantFieldName; switch (registrantField) { case COUNTRY_CODE: registrantFieldName = "countrycode"; break; case FIRST_NAME: registrantFieldName = "firstname"; break; case LAST_NAME: registrantFieldName = "lastname"; break; case PHONE_EXT: registrantFieldName = "phoneext"; break; case POSTAL_CODE: registrantFieldName = "postalcode"; break; case STREET: { final ImmutableList.Builder streetBuilder = ImmutableList.builder(); for (int streetI = 0; streetI < 8; streetI++) { @Nullable final String street = __groupNullable(match.get(), streetI == 0 ? "street" : ("street" + streetI)); if (street == null) { continue; } streetBuilder.add(street); } final ImmutableList street = streetBuilder.build(); if (!street.isEmpty()) { registrantBuilder.setStreet(street); } continue; } default: registrantFieldName = registrantField.getThriftName(); break; } final String registrantFieldValue = __groupNullable(match.get(), registrantFieldName); if (registrantFieldValue == null) { continue; } // Translate codes to names switch (registrantField) { case CITY: { // Translate a city=airport code to the city name @Nullable final String cityNameForAirportCode = gazetteers.getAirportNamesByCode().get(registrantFieldValue); if (cityNameForAirportCode != null) { logger.debug("translated airport {} to city {}", registrantFieldValue, cityNameForAirportCode); registrantBuilder.setAirportCode(registrantFieldValue); registrantBuilder.setCity(cityNameForAirportCode); empty = false; continue; } break; } case COUNTRY: { // Translate a country=country code to the country name @Nullable final String countryNameForCountryCode = gazetteers.getCountryNamesByCode().get(registrantFieldValue); if (countryNameForCountryCode != null) { logger.debug("translated country {} to {}", registrantFieldValue, countryNameForCountryCode); registrantBuilder.setCountryCode(registrantFieldValue); registrantBuilder.setCountry(countryNameForCountryCode); empty = false; continue; } break; } default: break; } registrantBuilder.set(registrantField, registrantFieldValue); empty = false; } if (empty) { logger.warn("{}: matched registrant regex to '{}' but no fields found", field, match.get().group()); return false; } if (registrantBuilder.getState().isPresent() && registrantBuilder.getCountry().isPresent()) { // Translate a state=state code to a state name ImmutableMap statesGazetteer; switch (registrantBuilder.getCountry().get().toLowerCase()) { case "australia": statesGazetteer = gazetteers.getAustralianStateNamesByCode(); break; case "canada": statesGazetteer = gazetteers.getCanadianStateNamesByCode(); break; case "united states": case "united states of america": statesGazetteer = gazetteers.getAmericanStateNamesByCode(); break; default: statesGazetteer = null; } if (statesGazetteer != null) { @Nullable final String stateNameForStateCode = statesGazetteer.get(registrantBuilder.getState().get()); if (stateNameForStateCode != null) { logger.debug("translated state {} in country {} to {}", registrantBuilder.getState().get(), registrantBuilder.getCountry().get(), stateNameForStateCode); registrantBuilder.setStateCode(registrantBuilder.getState().get()); registrantBuilder.setState(stateNameForStateCode); } } } final ParsedWhoisRegistrant registrant = registrantBuilder.build(); recordBuilder.set(this.field, registrant); return true; } private @Nullable String __groupNullable(final Matcher matcher, final String name) { String value; try { value = matcher.group(name); } catch (final IllegalArgumentException e) { return null; } if (value == null) { return null; } else if (value.isEmpty()) { return null; } else if (StringUtils.isBlank(value)) { return null; } return value; } private final ParsedWhoisRecord.FieldMetadata field; private final Gazetteers gazetteers; private final static Logger logger = LoggerFactory.getLogger(WhoisRegistrantFieldParser.class); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy