com.cinchapi.etl.Transformer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of data-transform-api Show documentation
Show all versions of data-transform-api Show documentation
A common API for data transformation.
/*
* Copyright (c) 2013-2017 Cinchapi Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cinchapi.etl;
import java.util.AbstractMap;
import java.util.Map;
import java.util.Map.Entry;
import javax.annotation.Nullable;
import com.cinchapi.common.collect.AnyMaps;
import com.cinchapi.common.collect.MergeStrategies;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
/**
* A {@link Transformer} is a routine that takes a key/value pair and
* potentially alters one or both of them, prior to import.
*
* Sometimes, raw data from a source must be modified before being imported into
* Concourse, for example:
*
* - Modifying keys by changing their case or stripping illegal
* characters
* - Normalizing values, for example, converting strings to a specific case or
* sanitizing
* - Compacting representation by values from an enumerated set of values to a
* simple integer
* - Constructing a link or resolvable link
*
*
* @author Jeff Nelson
*/
@FunctionalInterface
public interface Transformer {
/**
* Potentially transform one or both of the {@code key}/{@code value} pair.
* If no transformation should occur, it is acceptable to return
* {@code null} to inform the caller that the import values are acceptable
* in their passed in state.
* Otherwise, the preferred pair should be wrapped in an {@link Entry}
* object.
*
* @param key the raw key to potentially transform
* @param value the raw value to potentially transform; in which case, it is
* acceptable to return any kind of object, but it is recommended
* to return an encoded String (i.e. don't return a
* {@link com.cinchapi.concourse.Link} object, but return a
* string that encodes a link (@record@) instead)
* @return a {@link Entry} object that contains the transformed
* {@code key}/{@code value} pair or {@code null} if no
* transformation occurred
* @deprecated in version 1.1.0; scheduled to be removed in version 2.0.0.
* Use {@link #transform(String, Object)} instead.
*/
@Nullable
@Deprecated
public default Entry transform(String key, String value) {
Map transformed = transform(key, (Object) value);
if(transformed != null) {
Entry entry = Iterables
.getFirst(transformed.entrySet(), null);
if(entry != null) {
return new AbstractMap.SimpleImmutableEntry<>(entry.getKey(),
entry.getValue());
}
}
return null;
}
/**
* Transform all of the key/value pairs in the {@code object} and returned
* the merged result.
*
* The result of the transformations will be merged using the
* {@link MergeStrategies#upsert(Object, Object) upsert} merge strategy.
*
*
* @param object
* @return the transformation
*/
public default Map transform(Map object) {
Map transformed = Maps.newLinkedHashMap();
object.forEach((key, value) -> {
Map data = transform(key, value);
data = data == null ? AnyMaps.create(key, value) : data;
AnyMaps.mergeInPlace(transformed, data, MergeStrategies::upsert);
});
return transformed;
}
/**
* Potentially transform the provided {@code key} and {@code value} pair.
*
* There are four possible transformation scenarios:
*
* - Neither the key or value changes
* - Only the key changes
* - Only the value changes
* - Both the key and value change
*
* In scenario 1, this method returns {@code null}. In the other scenarios,
* this method returns a {@link Map} which contains the data that will
* replace the original {@code key} and {@code value}. So, if the
* input parameters are part of a larger data map, the caller should
* {@link com.cinchapi.common.collect.Association#merge(Map) merge} the data
* from this map with the source data.
*
*
* Even though the inputs to this method are simple, a {@link Map} is
* returned to allow for complex transformations. For example,
*
* - You can transform a single value into multiple values by including
* multiple objects in the associated values collection.
* - You can transform a single key/value pair into multiple key/value
* pairs by including multiple keys in the returned map
*
* For the basic case of transforming a single key/value pair into another
* single key/value pair, you should use the
* {@link Transformation#to(Object, Object)} utility.
*
*
* @param key the raw key to potentially transform
* @param value the raw value to potentially transform
* @return the transformation
*/
@Nullable
public Map transform(String key, Object value);
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy