Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.components.remote;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.metamodel.schema.ColumnTypeImpl;
import org.apache.metamodel.util.EqualsBuilder;
import org.datacleaner.Version;
import org.datacleaner.api.Close;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Validate;
import org.datacleaner.configuration.RemoteServerData;
import org.datacleaner.job.concurrent.PreviousErrorsExistException;
import org.datacleaner.restclient.ComponentConfiguration;
import org.datacleaner.restclient.ComponentRESTClient;
import org.datacleaner.restclient.ComponentsRestClientUtils;
import org.datacleaner.restclient.CreateInput;
import org.datacleaner.restclient.ProcessStatelessInput;
import org.datacleaner.restclient.ProcessStatelessOutput;
import org.datacleaner.restclient.RESTClientException;
import org.datacleaner.restclient.Serializator;
import org.datacleaner.util.batch.BatchRowCollectingTransformer;
import org.datacleaner.util.batch.BatchSink;
import org.datacleaner.util.batch.BatchSource;
import org.datacleaner.util.convert.StringConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.types.ArraySchema;
import com.fasterxml.jackson.module.jsonSchema.types.ValueTypeSchema;
/**
* Transformer that is actually a proxy to a remote transformer sitting at DataCleaner Monitor server.
* Instances of this transformer can be created only by
* {@link org.datacleaner.descriptors.RemoteTransformerDescriptorImpl} component descriptors.
*
* @Since 9/1/15
*/
public class RemoteTransformer extends BatchRowCollectingTransformer {
private static final Logger logger = LoggerFactory.getLogger(RemoteTransformer.class);
private static final ObjectMapper mapper = Serializator.getJacksonObjectMapper();
private final RemoteServerData serverData;
private final AtomicBoolean failed = new AtomicBoolean(false);
private String componentDisplayName;
private ComponentRESTClient client;
private final SingleValueErrorAwareCache cachedOutputColumns =
new SingleValueErrorAwareCache() {
@Override
protected OutputColumns fetch(final CreateInput input) throws Exception {
return getOutputColumnsInternal(input);
}
};
private Map configuredProperties = new TreeMap<>();
public RemoteTransformer(final RemoteServerData serverData, final String componentDisplayName) {
this.serverData = serverData;
this.componentDisplayName = componentDisplayName;
}
@Initialize
public void initClient() throws RemoteComponentException {
try {
logger.debug("Initializing '{}' @{}", componentDisplayName, this.hashCode());
client = new ComponentRESTClient(serverData.getUrl(), serverData.getUsername(), serverData.getPassword(),
Version.getVersion());
} catch (final Exception e) {
throw new RemoteComponentException(
"Remote component '" + componentDisplayName + "' is temporarily unavailable. \n" + e.getMessage());
}
}
@Close
public void closeClient() {
logger.debug("closing '{}' @{}", componentDisplayName, this.hashCode());
client = null;
}
@Validate
public void validate() throws Exception {
final CreateInput createInput = new CreateInput();
createInput.configuration = getConfiguration(getUsedInputColumns());
try {
cachedOutputColumns.getCachedValue(createInput);
} catch (final RESTClientException e) {
if (e.getCode() == 422) {
// Validation failed - simplify the error message
throw new RuntimeException(e.getReason());
}
}
}
@Override
public OutputColumns getOutputColumns() {
final CreateInput createInput = new CreateInput();
createInput.configuration = getConfiguration(getUsedInputColumns());
try {
return cachedOutputColumns.getCachedValue(createInput);
} catch (final Exception e) {
logger.debug("Error retrieving columns of transformer '" + componentDisplayName + "': " + e.toString());
return OutputColumns.NO_OUTPUT_COLUMNS;
}
}
private boolean isOutputColumnEnumeration(final JsonSchema schema) {
if (schema == null) {
return false;
}
final boolean isArray = schema.isArraySchema();
final JsonSchema baseSchema;
if (isArray) {
baseSchema = ((ArraySchema) schema).getItems().asSingleItems().getSchema();
} else {
baseSchema = schema;
}
if (baseSchema instanceof ValueTypeSchema) {
final Set enums = ((ValueTypeSchema) baseSchema).getEnums();
if (enums != null && !enums.isEmpty()) {
return true;
}
}
return false;
}
private ComponentConfiguration getConfiguration(final List> inputColumns) {
final ComponentConfiguration configuration = new ComponentConfiguration();
for (final Map.Entry propertyE : configuredProperties.entrySet()) {
configuration.getProperties().put(propertyE.getKey(), mapper.valueToTree(propertyE.getValue()));
}
for (final InputColumn> col : inputColumns) {
configuration.getColumns().add(ComponentsRestClientUtils
.createInputColumnSpecification(col.getName(), col.getDataType(),
ColumnTypeImpl.convertColumnType(col.getDataType()).getName(), mapper.getNodeFactory()));
}
return configuration;
}
private List> getUsedInputColumns() {
final ArrayList> columns = new ArrayList<>();
for (final Object propValue : configuredProperties.values()) {
if (propValue instanceof InputColumn) {
columns.add((InputColumn>) propValue);
} else if (propValue instanceof InputColumn[]) {
for (final InputColumn> col : ((InputColumn[]) propValue)) {
columns.add(col);
}
} else if (propValue instanceof Collection) {
for (final Object value : ((Collection>) propValue)) {
if (value instanceof InputColumn) {
columns.add((InputColumn>) value);
} else {
// don't iterate the rest if the first item is not an input column.
break;
}
}
}
// TODO: are maps possible?
}
return columns;
}
private void convertOutputRows(final JsonNode rowSets, final BatchSink> sink,
final int sinkSize) {
final OutputColumns outCols = getOutputColumns();
if (rowSets == null || rowSets.size() < 1) {
throw new RuntimeException("Expected exactly 1 row in response");
}
int rowI = 0;
for (final JsonNode rowSet : rowSets) {
if (rowI >= sinkSize) {
throw new RuntimeException("Expected " + sinkSize + " rows, but got more");
}
final List