All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.stanbol.entityhub.core.mapping.DefaultFieldMapperImpl Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.stanbol.entityhub.core.mapping;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import java.util.regex.Pattern;

import org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum;
import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper;
import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.Constraint.ConstraintType;
import org.apache.stanbol.entityhub.servicesapi.util.PatternUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This is only an intermediate solution just to have the functionality.
 * This needs to be refactored! This is something similar to a semantic lifting
 * work flow that could used schema translation, reasoning ... 

* The goal is to build a simple Module that supports basics things (like the * stuff provided by this implementation) and that allow other implementations * to do the advanced stuff.

* Currently I hope, that when the functionality is in place it is easier to * see what a good design for this part of the Entityhub would be. * TODO: refactoring (see above comment) * @author Rupert Westenthaler * */ public class DefaultFieldMapperImpl implements FieldMapper, Cloneable { private final Logger log = LoggerFactory.getLogger(DefaultFieldMapperImpl.class); private final Set mappings; // private final Map> ignoreFieldMap; // private final Map> ignoreWildcardMap; private final Map> fieldMap; private final Map> wildcardMap; private Collection unmodMappings; private ValueConverterFactory valueConverter; //private Map mappings = Collections.synchronizedMap(new HashMap()); public DefaultFieldMapperImpl(ValueConverterFactory valueConverter) { super(); mappings = new HashSet(); unmodMappings = Collections.unmodifiableCollection(mappings); fieldMap = new HashMap>(); wildcardMap = new HashMap>(); if(valueConverter == null){ throw new IllegalArgumentException("The parsed ValueConverterFactory MUST NOT be NULL"); } this.valueConverter = valueConverter; // ignoreFieldMap = new HashMap>(); // ignoreWildcardMap = new HashMap>(); } /** * Internally used by clone * @param fieldMap * @param wildcardMap */ private DefaultFieldMapperImpl(ValueConverterFactory valueConverter,Set mappings,Map> fieldMap, Map> wildcardMap){ this(valueConverter); this.mappings.addAll(mappings); this.fieldMap.putAll(fieldMap); this.wildcardMap.putAll(wildcardMap); } /** * Getter for all the defined Mappings for a given field name * @param field the name of the field * @return all the active Mappings */ protected List getMappings(String field){ final List fieldMappings = new ArrayList(); //first search the fieldMappings Collection tmp = fieldMap.get(field); if(tmp != null){ fieldMappings.addAll(tmp); } //now iterate over the Wildcard Mappings for(Entry> entry : wildcardMap.entrySet()){ if(entry.getKey().matcher(field).find()){ fieldMappings.addAll(entry.getValue()); } } Collections.sort(fieldMappings, FieldMappingUtils.FIELD_MAPPING_COMPARATOR); return fieldMappings; } /* (non-Javadoc) * @see org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper#addMapping(org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping) */ public void addMapping(FieldMapping mapping){ if(mapping == null){ return; } if(mappings.add(mapping)){ if(mapping.usesWildcard()){ Pattern fieldPattern = mapping.getRegexPattern(); synchronized (wildcardMap) { Set fieldPatternMappings = wildcardMap.get(fieldPattern); if(fieldPatternMappings == null){ fieldPatternMappings = new HashSet();//new TreeSet(FieldMappingUtils.FIELD_MAPPING_COMPARATOR); wildcardMap.put(fieldPattern, fieldPatternMappings); } fieldPatternMappings.add(mapping); } } else { String fieldName = mapping.getFieldPattern(); synchronized (fieldMap) { Set fieldPatternMappings = fieldMap.get(fieldName); if(fieldPatternMappings == null){ fieldPatternMappings = new HashSet();//new TreeSet(FieldMappingUtils.FIELD_MAPPING_COMPARATOR); fieldMap.put(fieldName, fieldPatternMappings); } fieldPatternMappings.add(mapping); } } } //else already present -> nothing todo } public Collection getMappings(){ return unmodMappings; } // private static String getPrefix(String fieldPattern){ // return fieldPattern.split("[\\?\\*]")[0]; // } /* (non-Javadoc) * @see org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper#removeFieldMapping(org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping) */ public void removeFieldMapping(FieldMapping mapping){ if(mapping == null){ return; } if(mappings.remove(mapping)){ if(mapping.usesWildcard()){ Pattern fieldPattern = mapping.getRegexPattern(); synchronized (wildcardMap) { Collection fieldPatternMappings = wildcardMap.get(fieldPattern); if(fieldPatternMappings != null){ if(fieldPatternMappings.remove(mapping) && fieldPatternMappings.isEmpty()){ //clean up the prefix if last value is removed wildcardMap.remove(fieldPattern); } } } } else { String fieldPattern = mapping.getFieldPattern(); synchronized (fieldMap) { Collection fieldPatternMappings = fieldMap.get(fieldPattern); if(fieldPatternMappings != null){ if(fieldPatternMappings.remove(mapping) && fieldPatternMappings.isEmpty()){ //clean up the prefix if last value is removed fieldMap.remove(fieldPattern); } } } } } //else nothing todo } /** * Removes the FieldMapping based on the fieldPattern * @param fieldPattern the field pattern */ public void removeFieldMapping(String fieldPattern){ if(fieldPattern == null || fieldPattern.length()<1){ return; } if(PatternUtils.usesWildCard(fieldPattern)){ Pattern pattern = Pattern.compile(PatternUtils.wildcardToRegex(fieldPattern,true)); synchronized (wildcardMap) { wildcardMap.remove(pattern); } } else { synchronized (fieldMap) { fieldMap.remove(fieldPattern); } } } /* (non-Javadoc) * @see org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper#applyMappings(org.apache.stanbol.entityhub.servicesapi.model.Representation, org.apache.stanbol.entityhub.servicesapi.model.Representation) */ public Representation applyMappings(Representation source, Representation target, ValueFactory valueFactory) { Collection fields = new HashSet(); for(Iterator fieldIt = source.getFieldNames();fieldIt.hasNext();){ fields.add(fieldIt.next()); } for(String field : fields){ // log.info(" > process field: "+field); //get the active Mappings List activeMappings = getMappings(field); if(!activeMappings.isEmpty()){ //get all the values (store them in an Collection, because we need them more than once) Collection values = new ArrayList(); for(Iterator valueIt = source.get(field);valueIt.hasNext();){ values.add(valueIt.next()); } //only to be sure, that this is not changed by Filters! values = Collections.unmodifiableCollection(values); /* * (1) Before working with the values first analyse the active * mappings and filters. Two things * a) Init Wildcard Filters: * Language filters set on namespaces are executed on all field * mappings that define no language filter * b) calculate the mapped fields. Possible there are no mappings * left. Than we need not to process all the values */ Set targetFields = new HashSet(); TextConstraint globalFilter = null; Collection globalFiltered = null; /* * NOTE: the mappings are sorted in the way, that the most * prominent one will be at index 0. The wildcard "*" will * be always the last. * So we need to parse backwards because than more prominent * things will overwrite and win! */ for(int i=activeMappings.size()-1;i>=0;i--){ FieldMapping mapping = activeMappings.get(i); if(mapping.usesWildcard() //if wildcard && !mapping.ignoreField() && //and not ignore mapping.getFilter() != null && //and a filter is present mapping.getFilter().getType() == ConstraintType.text){ //and of type text //set the global text filter. //NOTE: the active mappings are sorted in that way, that // the most specific one is set last globalFilter = (TextConstraint)mapping.getFilter(); } for(String targetField : mapping.getMappings()){ if(mapping.ignoreField()){ targetFields.remove(targetField); } else { targetFields.add(targetField); } } } // log.info(" o targets: "+targetFields); // log.info(" o global text filter: "+globalFilter); if(globalFilter != null){ globalFiltered = new HashSet(values); //parse false ass third argument, because we need not to filter //non-Text values for wildcard filter! processFilter(globalFilter, globalFiltered,false); } //now process the mappings for(FieldMapping mapping : activeMappings){ if(!mapping.ignoreField() && !Collections.disjoint(targetFields, mapping.getMappings())){ processMapping(mapping, valueFactory, field, values,globalFiltered, targetFields, target); // } else if(!mapping.ignoreField()) { // log.info(String.format(" << ignore mapping %s ",mapping)); // } else { // log.info(String.format(" << %s ",mapping)); } } } } /* * TODO: return a "MappingReport" * All mapping activities should be documented and stored with the * MappedEntity as MappingActivity! */ return target; } /** * * @param mapping * @param valueFactory The value factory used to create converted values * @param field * @param values * @param globalFiltered * @param targets */ private void processMapping(FieldMapping mapping, ValueFactory valueFactory,String field, Collection values, Collection globalFiltered, Set activeTargets,Representation targetRepresentation) { //parsed mappings are all !ignore and some mappings are active Collection filtered; //this collection will be modified by the filters later on if(globalFiltered == null || //if no global filter is present and therefore globalFiltered == null or //there is a more special text filter defined in this mapping mapping.getFilter() != null && mapping.getFilter().getType() == ConstraintType.text){ filtered = new HashSet(values);//start with all values } else { //start with the values filtered by the global filter filtered = new HashSet(globalFiltered); } if(mapping.getFilter()!=null){ switch (mapping.getFilter().getType()) { case value: ValueConstraint valueConstraint = (ValueConstraint)mapping.getFilter(); processFilter(valueConstraint,filtered,valueFactory); break; case text: TextConstraint textConstraint = (TextConstraint)mapping.getFilter(); //for wildcard mappings only filter TextValues. if the mapping is //for a specific field filter also non text values. processFilter(textConstraint,filtered,!mapping.usesWildcard()); break; default: log.warn(String.format("Filter of type %s are not supported -> select all values! (Constraint=%s)", mapping.getFilter().getType(),mapping.getFilter())); break; } /* * TODO: add general purpose functionality to apply Constraints. * Currently this is done by the specific Query Implementations :( * - use the constraint to filter the values collection! */ } //nothing to do for(String mappedField : mapping.getMappings()){ //activeTargets still uses null for the current field // -> this is because wildcard filters can not know the actual field name if(activeTargets.contains(mappedField)){ //so use null to match if(mappedField == null){ //and than replace null with the field name mappedField = field; } // log.info(String.format(" >> copy%s to %s &d values", // mappedField.equals(field)?"":" from "+field,mappedField,filtered.size())); targetRepresentation.add(mappedField, filtered); // } else { // log.info(String.format(" << ignore%s %s", // mappedField.equals(field)?"":"mapping from "+field+"to",mappedField)); } } } /** * This method filters the parsed {@link Text} values based on the languages * parsed in the {@link TextConstraint}. * This method modifies the parsed collection by using the * {@link Iterator#remove()} method. * @param textConstraint the text constraint containing the active languages * @param values the values to filter. This method modifies this collection * @return the modified collection to allow nested calls */ private Collection processFilter(TextConstraint textConstraint, Collection values,boolean filterNonTextValues) { if(textConstraint.getTexts() != null){ log.warn("Filtering based on values is not implemented"); } /* * TODO: If filterNonTextValues=true and acceptDefaultLanguate=true * we could also try to convert non-Text values to Text (by using * the valueConverter. */ Set langs = textConstraint.getLanguages(); boolean acceptDefaultLanguage = textConstraint.getLanguages().contains(null); for(Iterator it = values.iterator();it.hasNext();){ Object value = it.next(); if(value instanceof Text){ if(!langs.contains(((Text)value).getLanguage())){ it.remove(); // log.info(String.format(" - value %s(type:%s) rejected by text filter",value,value.getClass())); // } else { // log.info(String.format(" + value %s(type:%s) accepted by text filter",value,value.getClass())); } } else if(filterNonTextValues && value instanceof String){ //Strings only if the default language is enabled if(!acceptDefaultLanguage){ it.remove(); // log.info(String.format(" - value %s(type:%s) rejected by text filter",value,value.getClass())); // } else { // log.info(String.format(" + value %s(type:%s) accepted by text filter",value,value.getClass())); } } else if(filterNonTextValues){ it.remove(); // log.info(String.format(" - value %s(type:%s) rejected by text filter",value,value.getClass())); } //else non text value and filterNonTextValues=false -> nothing to do } return values; } /** * This method converts - or if not possible filters the parsed values based * on the parsed constraint * @param valueConstraint * @param values * @return */ private Collection processFilter(ValueConstraint valueConstraint, Collection values,ValueFactory valueFactory) { if(valueConstraint.getValues() != null){ log.warn("Filtering based on values is not yet implemented"); } //1) collect all active dataTypes //first a EnumSet for really fast containsAll ... operations Set activeDataTypes = EnumSet.noneOf(DataTypeEnum.class); //second a List to keep track of the ordering of the dataTypes in the //constraint for later conversions! List sortedActiveDataTypes = new ArrayList(valueConstraint.getDataTypes().size()); //NOTE: using a LinkedHashSet would slow down this code, because EnumSet // gives constant processing time even for bulk operations! for(String dataTypeUri : valueConstraint.getDataTypes()){ DataTypeEnum dataType = DataTypeEnum.getDataType(dataTypeUri); if(dataType == null){ log.warn(String.format("DataType %s not supported")); } else { if(activeDataTypes.add(dataType)){ //only of set has changed to avoid duplicates in the list sortedActiveDataTypes.add(dataType); } } } //2) now process the values // log.info(" --- Filter values ---"); //calculating acceptable and not acceptable types needs some processing time //and usually values will be only of very less different types. //Therefore it makes sense to cache accepted and rejected types! Set> accepted = new HashSet>(); Set> rejected = new HashSet>(); //Set that stores rejected values. Such will be converted later on! Set needConversion = new HashSet(); for(Iterator it = values.iterator();it.hasNext();){ Object value = it.next(); // if(accepted.contains(value.getClass())){ // log.info(String.format(" + value %s(type:%s) accepted by value filter",value,value.getClass())); //nothing to do // } else if(rejected.contains(value.getClass())){ it.remove(); //remove also the current value of that type needConversion.add(value); //save as value that need to be converted // log.info(String.format(" - value %s(type:%s) rejected by value filter",value,value.getClass())); } else { //new class ... calculate Set valueTypes = DataTypeEnum.getAllDataTypes(value.getClass()); if(valueTypes.removeAll(activeDataTypes)){ accepted.add(value.getClass()); // log.info(String.format(" + value %s(type:%s) accepted by value filter",value,value.getClass())); } else { rejected.add(getClass()); it.remove(); //remove the Item needConversion.add(value); //save as value that need to be converted // log.info(String.format(" - value %s(type:%s) rejected by value filter",value,value.getClass())); } } } //3) try to convert values to the active dataTypes // log.info(" --- Try to Convert rejected values ---"); for(Object value : needConversion){ Object converted = null; DataTypeEnum convertedTo = null; for(Iterator dataTypes = sortedActiveDataTypes.iterator(); //iterate over all active dataTypes converted == null && dataTypes.hasNext();){ //while converted still null and more dataTypes to try convertedTo = dataTypes.next(); converted = valueConverter.convert(value, convertedTo.getUri(),valueFactory); //try the conversion } if(converted != null){ // log.info(String.format(" + value %s(javaType=%s) successfully converted to %s(datatype=%s)", // value,value.getClass().getSimpleName(),converted,convertedTo.getShortName())); values.add(converted); // } else { // log.info(String.format(" - value %s(javaType=%s) could not be converted"), // value,value.getClass().getSimpleName()); } } return values; } @Override public DefaultFieldMapperImpl clone() { return new DefaultFieldMapperImpl(this.valueConverter,this.mappings,this.fieldMap, this.wildcardMap); } @Override public int hashCode() { return mappings.hashCode(); } @Override public boolean equals(Object o) { return o instanceof DefaultFieldMapperImpl && ((DefaultFieldMapperImpl)o).mappings.equals(mappings); } }