All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.datacleaner.components.remote.RemoteTransformer Maven / Gradle / Ivy
/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.components.remote;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.metamodel.schema.ColumnTypeImpl;
import org.apache.metamodel.util.EqualsBuilder;
import org.datacleaner.Version;
import org.datacleaner.api.Close;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Validate;
import org.datacleaner.configuration.RemoteServerData;
import org.datacleaner.job.concurrent.PreviousErrorsExistException;
import org.datacleaner.restclient.ComponentConfiguration;
import org.datacleaner.restclient.ComponentRESTClient;
import org.datacleaner.restclient.ComponentsRestClientUtils;
import org.datacleaner.restclient.CreateInput;
import org.datacleaner.restclient.ProcessStatelessInput;
import org.datacleaner.restclient.ProcessStatelessOutput;
import org.datacleaner.restclient.RESTClientException;
import org.datacleaner.restclient.Serializator;
import org.datacleaner.util.batch.BatchRowCollectingTransformer;
import org.datacleaner.util.batch.BatchSink;
import org.datacleaner.util.batch.BatchSource;
import org.datacleaner.util.convert.StringConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.types.ArraySchema;
import com.fasterxml.jackson.module.jsonSchema.types.ValueTypeSchema;
/**
* Transformer that is actually a proxy to a remote transformer sitting at DataCleaner Monitor server.
* Instances of this transformer can be created only by
* {@link org.datacleaner.descriptors.RemoteTransformerDescriptorImpl} component descriptors.
*
* @Since 9/1/15
*/
public class RemoteTransformer extends BatchRowCollectingTransformer {
private static final Logger logger = LoggerFactory.getLogger(RemoteTransformer.class);
private static final ObjectMapper mapper = Serializator.getJacksonObjectMapper();
private final RemoteServerData serverData;
private String componentDisplayName;
private ComponentRESTClient client;
private Map configuredProperties = new TreeMap<>();
private final AtomicBoolean failed = new AtomicBoolean(false);
private final SingleValueErrorAwareCache cachedOutputColumns = new SingleValueErrorAwareCache() {
@Override
protected OutputColumns fetch(CreateInput input) throws Exception {
return getOutputColumnsInternal(input);
}
};
public RemoteTransformer(RemoteServerData serverData, String componentDisplayName) {
this.serverData = serverData;
this.componentDisplayName = componentDisplayName;
}
@Initialize
public void initClient() throws RemoteComponentException {
try {
logger.debug("Initializing '{}' @{}", componentDisplayName, this.hashCode());
client = new ComponentRESTClient(serverData.getUrl(), serverData.getUsername(), serverData.getPassword(),
Version.getVersion());
} catch (Exception e) {
throw new RemoteComponentException(
"Remote component '" + componentDisplayName + "' is temporarily unavailable. \n" + e.getMessage());
}
}
@Close
public void closeClient() {
logger.debug("closing '{}' @{}", componentDisplayName, this.hashCode());
client = null;
}
@Validate
public void validate() throws Exception {
CreateInput createInput = new CreateInput();
createInput.configuration = getConfiguration(getUsedInputColumns());
try {
cachedOutputColumns.getCachedValue(createInput);
} catch(RESTClientException e) {
if(e.getCode() == 422) {
// Validation failed - simplify the error message
throw new RuntimeException(e.getReason());
}
}
}
@Override
public OutputColumns getOutputColumns() {
CreateInput createInput = new CreateInput();
createInput.configuration = getConfiguration(getUsedInputColumns());
try {
return cachedOutputColumns.getCachedValue(createInput);
} catch(Exception e) {
logger.debug("Error retrieving columns of transformer '" + componentDisplayName + "': " + e.toString());
return OutputColumns.NO_OUTPUT_COLUMNS;
}
}
private boolean isOutputColumnEnumeration(JsonSchema schema) {
if(schema == null){
return false;
}
boolean isArray = schema.isArraySchema();
JsonSchema baseSchema;
if (isArray) {
baseSchema = ((ArraySchema) schema).getItems().asSingleItems().getSchema();
} else {
baseSchema = schema;
}
if (baseSchema instanceof ValueTypeSchema) {
Set enums = ((ValueTypeSchema) baseSchema).getEnums();
if (enums != null && !enums.isEmpty()) {
return true;
}
}
return false;
}
private ComponentConfiguration getConfiguration(List> inputColumns) {
ComponentConfiguration configuration = new ComponentConfiguration();
for(Map.Entry propertyE: configuredProperties.entrySet()) {
configuration.getProperties().put(propertyE.getKey(), mapper.valueToTree(propertyE.getValue()));
}
for(InputColumn> col: inputColumns) {
configuration.getColumns().add(ComponentsRestClientUtils.createInputColumnSpecification(
col.getName(),
col.getDataType(),
ColumnTypeImpl.convertColumnType(col.getDataType()).getName(),
mapper.getNodeFactory()));
}
return configuration;
}
private List> getUsedInputColumns() {
ArrayList> columns = new ArrayList<>();
for(Object propValue: configuredProperties.values()) {
if(propValue instanceof InputColumn) {
columns.add((InputColumn>) propValue);
} else if(propValue instanceof InputColumn[]) {
for(InputColumn> col: ((InputColumn[])propValue)) {
columns.add(col);
}
} else if(propValue instanceof Collection) {
for(Object value: ((Collection>)propValue)) {
if(value instanceof InputColumn) {
columns.add((InputColumn>)value);
} else {
// don't iterate the rest if the first item is not an input column.
break;
}
}
}
// TODO: are maps possible?
}
return columns;
}
private void convertOutputRows(JsonNode rowSets, BatchSink> sink, int sinkSize) {
OutputColumns outCols = getOutputColumns();
if(rowSets == null || rowSets.size() < 1) { throw new RuntimeException("Expected exactly 1 row in response"); }
int rowI = 0;
for(JsonNode rowSet: rowSets) {
if(rowI >= sinkSize) {
throw new RuntimeException("Expected " + sinkSize + " rows, but got more");
}
List outRowSet = new ArrayList<>();
for(JsonNode row: rowSet) {
final List values = new ArrayList<>();
int i = 0;
for (JsonNode value : row) {
// TODO: should JsonNode be the default?
Class> cl = String.class;
if (i < outCols.getColumnCount()) {
cl = outCols.getColumnType(i);
}
values.add(convertOutputValue(value, cl));
i++;
}
outRowSet.add(values.toArray(new Object[values.size()]));
}
sink.setOutput(rowI, outRowSet);
rowI++;
}
if(rowI < sinkSize) {
throw new RuntimeException("Expected " + sinkSize + " rows, but got only " + rowI);
}
}
private Object convertOutputValue(JsonNode value, Class> cl) {
try {
if(cl == JsonNode.class) {
return value;
}
if(cl == File.class) {
return StringConverter.simpleInstance().deserialize(value.asText(), cl);
}
return mapper.readValue(value.traverse(), cl);
} catch(Exception e) {
throw new RuntimeException("Cannot convert table value of type '" + cl + "': " + value.toString(), e);
}
}
public void setPropertyValue(String propertyName, Object value) {
if(EqualsBuilder.equals(value, configuredProperties.get(propertyName))) {
return;
}
logger.debug("Setting '{}'.'{}' = {}", componentDisplayName, propertyName, value);
if(value == null) {
configuredProperties.remove(propertyName);
} else {
configuredProperties.put(propertyName, value);
}
}
public Object getPropertyValue(String propertyName) {
return configuredProperties.get(propertyName);
}
@Override
public void map(BatchSource source, BatchSink> sink) {
List> cols = getUsedInputColumns();
int size = source.size();
Object[] rows = new Object[size];
for(int i = 0; i < size; i++) {
InputRow inputRow = source.getInput(i);
Object[] values = new Object[cols.size()];
int j = 0;
for(InputColumn> col: cols) {
values[j] = inputRow.getValue(col);
j++;
}
rows[i] = values;
}
ProcessStatelessInput input = new ProcessStatelessInput();
input.configuration = getConfiguration(cols);
input.data = mapper.valueToTree(rows);
logger.debug("Processing remotely {} rows", size);
if (client == null) {
if (failed.get()) {
throw new PreviousErrorsExistException();
}
throw new RuntimeException("Remote transformer's connection has already been closed. ");
}
ProcessStatelessOutput out;
try {
out = client.processStateless(componentDisplayName, input);
} catch (RuntimeException e) {
boolean alreadyFailed = failed.getAndSet(true);
if (!alreadyFailed) {
throw new RuntimeException("Remote transformer failed: " + e.getMessage(), e);
} else {
throw new PreviousErrorsExistException();
}
}
convertOutputRows(out.rows, sink, size);
}
private OutputColumns getOutputColumnsInternal(CreateInput createInput) throws Exception {
logger.debug("Getting output columns from server");
boolean wasInit = false;
if (client == null) {
wasInit = true;
initClient();
}
try {
org.datacleaner.restclient.OutputColumns columnsSpec = client.getOutputColumns(componentDisplayName, createInput);
OutputColumns outCols = new OutputColumns(columnsSpec.getColumns().size(), Object.class);
int i = 0;
for (org.datacleaner.restclient.OutputColumns.OutputColumn colSpec : columnsSpec.getColumns()) {
outCols.setColumnName(i, colSpec.name);
try {
outCols.setColumnType(i, Class.forName(colSpec.type));
} catch (ClassNotFoundException e) {
final Class> type;
if (isOutputColumnEnumeration(colSpec.schema)) {
type = String.class;
} else {
// For unknown types we specify "Object" as a class
// This causes that Jackson will deserialize it not as a JsonNode,
// but simple Java Maps, Lists, Strings etc.
// We NEED it, because we need the values to be Serializable.
// This is because some analyzer results contain the values
// and analyzer results must be serializable (e.g. to save it in DC monitor, or
// to send it over wire when doing ditributed computing etc.)
type = Object.class;
}
outCols.setColumnType(i, type);
}
i++;
}
return outCols;
} finally {
if (wasInit) {
closeClient();
}
}
}
}