org.datacleaner.components.remote.RemoteTransformer Maven / Gradle / Ivy
/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.components.remote;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.metamodel.schema.ColumnTypeImpl;
import org.apache.metamodel.util.EqualsBuilder;
import org.datacleaner.Version;
import org.datacleaner.api.Close;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Validate;
import org.datacleaner.configuration.RemoteServerData;
import org.datacleaner.job.concurrent.PreviousErrorsExistException;
import org.datacleaner.restclient.ComponentConfiguration;
import org.datacleaner.restclient.ComponentRESTClient;
import org.datacleaner.restclient.ComponentsRestClientUtils;
import org.datacleaner.restclient.CreateInput;
import org.datacleaner.restclient.ProcessStatelessInput;
import org.datacleaner.restclient.ProcessStatelessOutput;
import org.datacleaner.restclient.RESTClientException;
import org.datacleaner.restclient.Serializator;
import org.datacleaner.util.batch.BatchRowCollectingTransformer;
import org.datacleaner.util.batch.BatchSink;
import org.datacleaner.util.batch.BatchSource;
import org.datacleaner.util.convert.StringConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.types.ArraySchema;
import com.fasterxml.jackson.module.jsonSchema.types.ValueTypeSchema;
/**
* Transformer that is actually a proxy to a remote transformer sitting at DataCleaner Monitor server.
* Instances of this transformer can be created only by
* {@link org.datacleaner.descriptors.RemoteTransformerDescriptorImpl} component descriptors.
*
* @Since 9/1/15
*/
public class RemoteTransformer extends BatchRowCollectingTransformer {
private static final Logger logger = LoggerFactory.getLogger(RemoteTransformer.class);
private static final ObjectMapper mapper = Serializator.getJacksonObjectMapper();
private final RemoteServerData serverData;
private String componentDisplayName;
private ComponentRESTClient client;
private Map configuredProperties = new TreeMap<>();
private final AtomicBoolean failed = new AtomicBoolean(false);
private final SingleValueErrorAwareCache cachedOutputColumns = new SingleValueErrorAwareCache() {
@Override
protected OutputColumns fetch(CreateInput input) throws Exception {
return getOutputColumnsInternal(input);
}
};
public RemoteTransformer(RemoteServerData serverData, String componentDisplayName) {
this.serverData = serverData;
this.componentDisplayName = componentDisplayName;
}
@Initialize
public void initClient() throws RemoteComponentException {
try {
logger.debug("Initializing '{}' @{}", componentDisplayName, this.hashCode());
client = new ComponentRESTClient(serverData.getUrl(), serverData.getUsername(), serverData.getPassword(),
Version.getVersion());
} catch (Exception e) {
throw new RemoteComponentException(
"Remote component '" + componentDisplayName + "' is temporarily unavailable. \n" + e.getMessage());
}
}
@Close
public void closeClient() {
logger.debug("closing '{}' @{}", componentDisplayName, this.hashCode());
client = null;
}
@Validate
public void validate() throws Exception {
CreateInput createInput = new CreateInput();
createInput.configuration = getConfiguration(getUsedInputColumns());
try {
cachedOutputColumns.getCachedValue(createInput);
} catch(RESTClientException e) {
if(e.getCode() == 422) {
// Validation failed - simplify the error message
throw new RuntimeException(e.getReason());
}
}
}
@Override
public OutputColumns getOutputColumns() {
CreateInput createInput = new CreateInput();
createInput.configuration = getConfiguration(getUsedInputColumns());
try {
return cachedOutputColumns.getCachedValue(createInput);
} catch(Exception e) {
logger.debug("Error retrieving columns of transformer '" + componentDisplayName + "': " + e.toString());
return OutputColumns.NO_OUTPUT_COLUMNS;
}
}
private boolean isOutputColumnEnumeration(JsonSchema schema) {
if(schema == null){
return false;
}
boolean isArray = schema.isArraySchema();
JsonSchema baseSchema;
if (isArray) {
baseSchema = ((ArraySchema) schema).getItems().asSingleItems().getSchema();
} else {
baseSchema = schema;
}
if (baseSchema instanceof ValueTypeSchema) {
Set enums = ((ValueTypeSchema) baseSchema).getEnums();
if (enums != null && !enums.isEmpty()) {
return true;
}
}
return false;
}
private ComponentConfiguration getConfiguration(List> inputColumns) {
ComponentConfiguration configuration = new ComponentConfiguration();
for(Map.Entry propertyE: configuredProperties.entrySet()) {
configuration.getProperties().put(propertyE.getKey(), mapper.valueToTree(propertyE.getValue()));
}
for(InputColumn> col: inputColumns) {
configuration.getColumns().add(ComponentsRestClientUtils.createInputColumnSpecification(
col.getName(),
col.getDataType(),
ColumnTypeImpl.convertColumnType(col.getDataType()).getName(),
mapper.getNodeFactory()));
}
return configuration;
}
private List> getUsedInputColumns() {
ArrayList> columns = new ArrayList<>();
for(Object propValue: configuredProperties.values()) {
if(propValue instanceof InputColumn) {
columns.add((InputColumn>) propValue);
} else if(propValue instanceof InputColumn[]) {
for(InputColumn> col: ((InputColumn[])propValue)) {
columns.add(col);
}
} else if(propValue instanceof Collection) {
for(Object value: ((Collection>)propValue)) {
if(value instanceof InputColumn) {
columns.add((InputColumn>)value);
} else {
// don't iterate the rest if the first item is not an input column.
break;
}
}
}
// TODO: are maps possible?
}
return columns;
}
private void convertOutputRows(JsonNode rowSets, BatchSink> sink, int sinkSize) {
OutputColumns outCols = getOutputColumns();
if(rowSets == null || rowSets.size() < 1) { throw new RuntimeException("Expected exactly 1 row in response"); }
int rowI = 0;
for(JsonNode rowSet: rowSets) {
if(rowI >= sinkSize) {
throw new RuntimeException("Expected " + sinkSize + " rows, but got more");
}
List © 2015 - 2025 Weber Informatics LLC | Privacy Policy