
org.datacleaner.components.http.HttpRequestTransformer Maven / Gradle / Ivy
/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.components.http;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.Callable;
import javax.inject.Inject;
import javax.inject.Named;
import org.apache.http.HttpEntity;
import org.apache.http.HttpEntityEnclosingRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
import org.apache.metamodel.util.FileHelper;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Close;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.MappedProperty;
import org.datacleaner.api.NumberProperty;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.StringProperty;
import org.datacleaner.api.Transformer;
import org.datacleaner.components.categories.ImproveSuperCategory;
import org.datacleaner.components.categories.ReferenceDataCategory;
import org.datacleaner.util.StringUtils;
import org.datacleaner.util.ws.PooledServiceSession;
import org.datacleaner.util.ws.ServiceResult;
import com.google.common.base.Strings;
@Named("HTTP request")
@Categorized(value = ReferenceDataCategory.class, superCategory = ImproveSuperCategory.class)
@Description("Sends a HTTP request for each record and retrieves the response as transformation output.\n"
+ "For each request you can have dynamic elements in the URL or in the request body that is sent. Provide variable names that are unique to the URL and request body and reference them there. For instance:\n"
+ "URL: http://www.google.com/?q=${term} "
+ "Input: column1 "
+ "Variable: ${term}
")
public class HttpRequestTransformer implements Transformer {
public static final String PROPERTY_INPUT_COLUMNS = "Input";
public static final String PROPERTY_VARIABLE_NAMES = "Variable names";
private static final String PROPERTY_URL = "URL";
@Inject
@Configured(value = PROPERTY_URL, order = 1)
@Description("The URL to invoke. The URL will be pre-processed by replacing any variable names in it with the corresponding dynamic values.")
String url = "http://";
@Inject
@Configured(order = 2)
HttpMethod method = HttpMethod.POST;
@Inject
@Configured(value = PROPERTY_INPUT_COLUMNS, order = 3)
InputColumn>[] input;
@Inject
@Configured(value = PROPERTY_VARIABLE_NAMES, order = 4)
@MappedProperty(PROPERTY_INPUT_COLUMNS)
String[] variableNames;
@Inject
@Configured(order = 5)
@StringProperty(multiline = true, emptyString = true)
@Description("The body of the request to invoke. The request body will be pre-processed by replacing any variable names in it with the corresponding dynamic values.")
String requestBody = "";
@Inject
@Configured(required = false, order = 100)
Map headers;
@Inject
@Configured(required = false, order = 101)
String charset = HTTP.DEF_CONTENT_CHARSET.name();
@Inject
@Configured(required = false, order = 150)
@NumberProperty(negative = false, zero = false, positive = true)
@Description("The maximum number of requests that may be fired at the same time.\n"
+ "Higher values may provide better throughput while it may also add load to the HTTP server.")
int maxConcurrentRequests = 20;
private CloseableHttpClient _httpClient;
private PooledServiceSession
© 2015 - 2025 Weber Informatics LLC | Privacy Policy