com.hurence.logisland.processor.enrichment.IpToGeo Maven / Gradle / Ivy
The newest version!
/**
* Copyright (C) 2016 Hurence ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hurence.logisland.processor.enrichment;
import static com.hurence.logisland.service.iptogeo.IpToGeoService.*;
import com.hurence.logisland.annotation.documentation.CapabilityDescription;
import com.hurence.logisland.annotation.documentation.Tags;
import com.hurence.logisland.component.PropertyDescriptor;
import com.hurence.logisland.component.PropertyValue;
import com.hurence.logisland.processor.ProcessContext;
import com.hurence.logisland.record.FieldType;
import com.hurence.logisland.record.Record;
import com.hurence.logisland.service.cache.CacheService;
import com.hurence.logisland.service.iptogeo.IpToGeoService;
import com.hurence.logisland.validator.StandardValidators;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
@Tags({"geo", "enrich", "ip"})
@CapabilityDescription("Looks up geolocation information for an IP address. The attribute that contains the IP address to lookup must be provided in the **"
+ IpAbstractProcessor.PROP_IP_ADDRESS_FIELD + "** property. By default, the geo information are put in a hierarchical structure. " +
"That is, if the name of the IP field is 'X', then the the geo attributes added by enrichment are added under a father field" +
" named X_geo. \"_geo\" is the default hierarchical suffix that may be changed with the **" + IpToGeo.PROP_HIERARCHICAL_SUFFIX +
"** property. If one wants to put the geo fields at the same level as the IP field, then the **" + IpToGeo.PROP_HIERARCHICAL + "** property should be set to false and then the geo attributes are " +
" created at the same level as him with the naming pattern X_geo_. \"_geo_\" is the default flat suffix but this may be changed with the **" +
IpToGeo.PROP_FLAT_SUFFIX + "** property. The IpToGeo processor requires a reference to an Ip to Geo service. This must be defined in the **" +
IpToGeo.PROP_IP_TO_GEO_SERVICE + "** property. The added geo fields are dependant on the underlying Ip to Geo service. The **" +
IpToGeo.PROP_GEO_FIELDS + "** property must contain the list of geo fields that should be created if data is available for " +
" the IP to resolve. This property defaults to \"*\" which means to add every available fields. If one only wants a subset of the fields, " +
" one must define a comma separated list of fields as a value for the **" + IpToGeo.PROP_GEO_FIELDS + "** property. The list of the available geo fields" +
" is in the description of the **" + IpToGeo.PROP_GEO_FIELDS + "** property."
)
public class IpToGeo extends IpAbstractProcessor {
private static Logger logger = LoggerFactory.getLogger(IpToGeo.class);
protected static final String PROP_IP_TO_GEO_SERVICE = "iptogeo.service";
protected static final String PROP_GEO_FIELDS = "geo.fields";
protected static final String PROP_HIERARCHICAL = "geo.hierarchical";
protected static final String PROP_HIERARCHICAL_SUFFIX = "geo.hierarchical.suffix";
protected static final String PROP_FLAT_SUFFIX = "geo.flat.suffix";
protected static final String PROP_DEBUG = "debug";
protected static final long DEFAULT_CACHE_VALIDITY_PERIOD = 0;
protected long cacheValidityPeriodSec = DEFAULT_CACHE_VALIDITY_PERIOD;
protected CacheService cacheService;
protected boolean debug = false;
static final String DEBUG_FROM_CACHE_SUFFIX = "_from_cache";
protected static final String PROP_CACHE_SERVICE = "cache.service";
public static final PropertyDescriptor IP_TO_GEO_SERVICE = new PropertyDescriptor.Builder()
.name(PROP_IP_TO_GEO_SERVICE)
.description("The reference to the IP to Geo service to use.")
.required(true)
.identifiesControllerService(IpToGeoService.class)
.build();
public static final PropertyDescriptor GEO_FIELDS = new PropertyDescriptor.Builder()
.name(PROP_GEO_FIELDS)
.description("Comma separated list of geo information fields to add to the record. Defaults to '*', which means to include all available fields. If a list " +
"of fields is specified and the data is not available, the geo field is not created. The geo fields are dependant on the underlying defined Ip to Geo service. " +
"The currently only supported type of Ip to Geo service is the Maxmind Ip to Geo service. This means that the currently " +
"supported list of geo fields is the following:" +
"**continent**: the identified continent for this IP address. " +
"**continent_code**: the identified continent code for this IP address. " +
"**city**: the identified city for this IP address. " +
"**latitude**: the identified latitude for this IP address. " +
"**longitude**: the identified longitude for this IP address. " +
"**location**: the identified location for this IP address, defined as Geo-point expressed as a string with the format: 'latitude,longitude'. " +
"**accuracy_radius**: the approximate accuracy radius, in kilometers, around the latitude and longitude for the location. " +
"**time_zone**: the identified time zone for this IP address. " +
"**subdivision_N**: the identified subdivision for this IP address. N is a one-up number at the end of the attribute name, starting with 0. " +
"**subdivision_isocode_N**: the iso code matching the identified subdivision_N. " +
"**country**: the identified country for this IP address. " +
"**country_isocode**: the iso code for the identified country for this IP address. " +
"**postalcode**: the identified postal code for this IP address. " +
"**lookup_micros**: the number of microseconds that the geo lookup took. The Ip to Geo service must have the " + IpToGeoService.GEO_FIELD_LOOKUP_TIME_MICROS + " property enabled in order to have this field available."
)
.required(false)
.addValidator(StandardValidators.COMMA_SEPARATED_LIST_VALIDATOR)
.defaultValue("*")
.build();
public static final PropertyDescriptor HIERARCHICAL = new PropertyDescriptor.Builder()
.name(PROP_HIERARCHICAL)
.description("Should the additional geo information fields be added under a hierarchical father field or not.")
.required(false)
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
.defaultValue("true")
.build();
public static final PropertyDescriptor HIERARCHICAL_SUFFIX = new PropertyDescriptor.Builder()
.name(PROP_HIERARCHICAL_SUFFIX)
.description("Suffix to use for the field holding geo information. If " + PROP_HIERARCHICAL +
" is true, then use this suffix appended to the IP field name to define the father field name." +
" This may be used for instance to distinguish between geo fields with various locales using many" +
" Ip to Geo service instances.")
.required(false)
.defaultValue("_geo")
.build();
public static final PropertyDescriptor FLAT_SUFFIX = new PropertyDescriptor.Builder()
.name(PROP_FLAT_SUFFIX)
.description("Suffix to use for geo information fields when they are flat. If " + PROP_HIERARCHICAL +
" is false, then use this suffix appended to the IP field name but before the geo field name." +
" This may be used for instance to distinguish between geo fields with various locales using many" +
" Ip to Geo service instances.")
.required(false)
.defaultValue("_geo_")
.build();
public static final PropertyDescriptor CONFIG_CACHE_SERVICE = new PropertyDescriptor.Builder()
.name(PROP_CACHE_SERVICE)
.description("The name of the cache service to use.")
.required(true)
.identifiesControllerService(CacheService.class)
.build();
/* WARNING: This property is commented as right now we don't support live Geolite db update. */
// public static final PropertyDescriptor CONFIG_CACHE_MAX_TIME = new PropertyDescriptor.Builder()
// .name(PROP_CACHE_MAX_TIME)
// .description("The amount of time, in seconds, for which a cached geoInfo value is valid in the cache service. After this delay, " +
// "the next new request to translate the same IP into geoInfo will trigger a new request in the Geolite db and the" +
// " result will overwrite the entry in the cache. This will facilitate the support in the future for live upgrade of the Geolite database." +
// " A value of 0 seconds disables this expiration mechanism. The default value is " + DEFAULT_CACHE_VALIDITY_PERIOD +
// " seconds, which corresponds to new requests triggered every day if a record with the same IP passes every" +
// " day in the processor."
// )
// .required(false)
// .addValidator(StandardValidators.INTEGER_VALIDATOR)
// .defaultValue(new Long(DEFAULT_CACHE_VALIDITY_PERIOD).toString())
// .build();
public static final PropertyDescriptor CONFIG_DEBUG = new PropertyDescriptor.Builder()
.name(PROP_DEBUG)
.description("If true, an additional debug field is added. If the geo info fields prefix is X," +
" a debug field named X" + DEBUG_FROM_CACHE_SUFFIX + " contains a boolean value" +
" to indicate the origin of the geo fields. The default value for this property is false (debug is disabled).")
.required(false)
.defaultValue("false")
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
.build();
// Ip to Geo service to use to perform the translation requests
private IpToGeoService ipToGeoService = null;
// List of fields to add (* means all available fields)
private String geoFields = "*";
// Should we use all available fields or the list in geoFields?
private boolean allFields = true;
// Should the geo fields be added in a hierarchical view or as flat fields
private boolean hierarchical = true;
// Suffix to append to the ip field name for defining the the father field name if hierarchical is true
private String hierarchicalSuffix = "_geo";
// Suffix to append to the ip field name and before the geo field name if hierarchical is false
private String flatSuffix = "_geo_";
private boolean needSubdivision = false;
private boolean needSubdivisionIsocode = false;
// Supported field names. Key: geo field name, Value: the field type to use
static Map supportedGeoFieldNames = new HashMap() {{
put(GEO_FIELD_LOOKUP_TIME_MICROS, FieldType.INT);
put(GEO_FIELD_CONTINENT, FieldType.STRING);
put(GEO_FIELD_CONTINENT_CODE, FieldType.STRING);
put(GEO_FIELD_CITY, FieldType.STRING);
put(GEO_FIELD_LATITUDE, FieldType.DOUBLE);
put(GEO_FIELD_LONGITUDE, FieldType.DOUBLE);
put(GEO_FIELD_LOCATION, FieldType.STRING);
put(GEO_FIELD_ACCURACY_RADIUS, FieldType.INT);
put(GEO_FIELD_TIME_ZONE, FieldType.STRING);
put(GEO_FIELD_SUBDIVISION, FieldType.STRING);
put(GEO_FIELD_SUBDIVISION_ISOCODE, FieldType.STRING);
put(GEO_FIELD_COUNTRY, FieldType.STRING);
put(GEO_FIELD_COUNTRY_ISOCODE, FieldType.STRING);
put(GEO_FIELD_POSTALCODE, FieldType.STRING);
}};
@Override
public List getSupportedPropertyDescriptors() {
final List properties = super.getSupportedPropertyDescriptors();
properties.add(IP_TO_GEO_SERVICE);
properties.add(GEO_FIELDS);
properties.add(HIERARCHICAL);
properties.add(HIERARCHICAL_SUFFIX);
properties.add(FLAT_SUFFIX);
properties.add(CONFIG_CACHE_SERVICE);
// properties.add(CONFIG_CACHE_MAX_TIME);
properties.add(CONFIG_DEBUG);
return properties;
}
@Override
public boolean hasControllerService() {
return true;
}
@Override
public void init(final ProcessContext context) {
/**
* Get the Ip to Geo Service
*/
ipToGeoService = context.getPropertyValue(IP_TO_GEO_SERVICE).asControllerService(IpToGeoService.class);
if(ipToGeoService == null) {
logger.error("IpToGeoService service is not initialized!");
}
PropertyValue propertyValue = context.getPropertyValue(GEO_FIELDS);
if (propertyValue != null) {
geoFields = propertyValue.asString();
}
allFields = geoFields.trim().equals("*");
propertyValue = context.getPropertyValue(HIERARCHICAL);
if (propertyValue != null) {
hierarchical = propertyValue.asBoolean();
}
propertyValue = context.getPropertyValue(HIERARCHICAL_SUFFIX);
if (propertyValue != null) {
hierarchicalSuffix = propertyValue.asString();
}
propertyValue = context.getPropertyValue(FLAT_SUFFIX);
if (propertyValue != null) {
flatSuffix = propertyValue.asString();
}
cacheService = context.getPropertyValue(CONFIG_CACHE_SERVICE).asControllerService(CacheService.class);
if(cacheService == null) {
logger.error("Cache service is not initialized!");
}
}
/**
* Get the list of geo fields to add
* @return the list of geo fields to add
*/
private Set getConfiguredGeoFieldNames() throws Exception
{
Set result = new HashSet();
for (String field : geoFields.trim().split(","))
{
field = field.trim();
if (supportedGeoFieldNames.containsKey(field))
{
result.add(field);
if (field.equals(GEO_FIELD_SUBDIVISION))
{
// Keep track of the fact that GEO_FIELD_SUBDIVISION is requested
needSubdivision = true;
}
if (field.equals(GEO_FIELD_SUBDIVISION_ISOCODE))
{
// Keep track of the fact that GEO_FIELD_SUBDIVISION_ISOCODE is requested
needSubdivisionIsocode = true;
}
} else
{
throw new Exception("Unsupported geo field name: " + field);
}
}
return result;
}
protected void processIp(Record record, String ip, ProcessContext context) {
debug = context.getPropertyValue(CONFIG_DEBUG).asBoolean();
// cacheValidityPeriodSec = (long)context.getPropertyValue(CONFIG_CACHE_MAX_TIME).asInteger();
/**
* Attempt to find info from the cache
*/
IpToGeo.CacheEntry cacheEntry = null;
try {
cacheEntry = cacheService.get(ip);
} catch (Exception e) {
logger.trace("Could not use cache!");
}
/**
* If something in the cache, get it and be sure it is not obsolete
*/
Map geoInfo = null;
boolean fromCache = true;
if (cacheEntry != null) { // Something in the cache?
geoInfo = cacheEntry.getGeoInfo();
if (cacheValidityPeriodSec > 0) { // Cache validity period enabled?
long cacheTime = cacheEntry.getTime();
long now = System.currentTimeMillis();
long cacheAge = now - cacheTime;
if (cacheAge > (cacheValidityPeriodSec * 1000L)) { // Cache entry older than allowed max age?
geoInfo = null; // Cache entry expired, force triggering a new request
}
}
}
if (geoInfo == null) {
fromCache = false;
/**
* Not in the cache or cache entry expired
* Call the Ip to Geo service and fill responses as new fields
*/
geoInfo = ipToGeoService.getGeoInfo(ip);
/**
* Remove unwanted fields if some specific fields configured
*/
if (!allFields)
{
try {
filterFields(geoInfo);
} catch (Exception e) {
logger.error(e.getMessage());
return;
}
}
try {
// Store the geoInfo into the cache
cacheEntry = new CacheEntry(geoInfo, System.currentTimeMillis());
cacheService.set(ip, cacheEntry);
} catch (Exception e) {
logger.trace("Could not put entry in the cache:" + e.getMessage());
}
}
final String ipAttributeName = context.getProperty(IP_ADDRESS_FIELD);
if (hierarchical)
{
/**
* Add the geo fields under a father field named :
* Let's say the ip field is src_ip, then we'll create a father field named src_ip_geo
* under which we put all the geo fields:
* src_ip: "123.125.42.15"
* src_ip_geo: {
* geo_city: "London",
* geo_longitude: -0.0931,
* ...
* }
*/
record.setField(ipAttributeName + hierarchicalSuffix, FieldType.MAP, geoInfo);
if (debug)
{
// Add some debug fields
record.setField(ipAttributeName + hierarchicalSuffix + DEBUG_FROM_CACHE_SUFFIX, FieldType.BOOLEAN, fromCache);
}
} else
{
/**
* Add the geo fields as fields whose names are derived from the ip field:
* _geo_city, _geo_longitude....
*/
for (Map.Entry entry : geoInfo.entrySet())
{
addRecordField(record,
ipAttributeName + flatSuffix + entry.getKey(),
entry.getKey(),
entry.getValue());
}
if (debug)
{
// Add some debug fields
record.setField(ipAttributeName + flatSuffix + DEBUG_FROM_CACHE_SUFFIX, FieldType.BOOLEAN, fromCache);
}
}
}
/**
* Filter fields returned by the Ip to Geo service according to the configured ones
* @param geoInfo Map containing the fields returned by the Ip to Geo service
* @throws Exception
*/
private void filterFields(Map geoInfo) throws Exception
{
Set requestedFields = getConfiguredGeoFieldNames();
for(Iterator> iterator = geoInfo.entrySet().iterator();
iterator.hasNext(); ) {
Map.Entry entry = iterator.next();
String geoFieldName = entry.getKey();
if(!requestedFields.contains(geoFieldName)) {
if (needSubdivision || needSubdivisionIsocode)
{
// Requested Subdivision or SubdivisionIsocode or Both
if (needSubdivision && needSubdivisionIsocode)
{
// Requested Both Subdivision and SubdivisionIsocode
if (!geoFieldName.startsWith(GEO_FIELD_SUBDIVISION))
{
iterator.remove();
}
} else if (needSubdivision)
{
// Requested Subdivision only
if (!geoFieldName.startsWith(GEO_FIELD_SUBDIVISION) ||
geoFieldName.startsWith(GEO_FIELD_SUBDIVISION_ISOCODE))
{
iterator.remove();
}
}
else
{
// Requested SubdivisionIsocode only
if (!geoFieldName.startsWith(GEO_FIELD_SUBDIVISION_ISOCODE))
{
iterator.remove();
}
}
} else
{
// Not a requested field, remove it
iterator.remove();
}
}
}
}
/**
* Add the provided geo field to the record
* @param record Record to update
* @param attributeName Geo field name
* @param value Geo field value
*/
private void addRecordField(Record record, String attributeName, String geoFieldName, Object value)
{
FieldType fieldType = supportedGeoFieldNames.get(geoFieldName);
if (fieldType == null) // Handle subdivision and subdivision_isocode fields (geo_subdivision_0 is not geo_subdivision)
{
fieldType = FieldType.STRING;
}
record.setField(attributeName, fieldType, value);
}
/**
* Cached entity
*/
private static class CacheEntry
{
// geoInfo translated from the ip (or the ip if the geoInfo could not be found)
private Map geoInfo = null;
// Time at which this cache entry has been stored in the cache service
private long time = 0L;
public CacheEntry(Map geoInfo, long time)
{
this.geoInfo = geoInfo;
this.time = time;
}
public Map getGeoInfo()
{
return geoInfo;
}
public long getTime()
{
return time;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy