com.google.refine.model.recon.ReconciledDataExtensionJob Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of main Show documentation
Show all versions of main Show documentation
OpenRefine is a free, open source power tool for working with messy data and improving it
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
*
*/
package com.google.refine.model.recon;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonView;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.expr.functions.ToDate;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconType;
import com.google.refine.util.HttpClient;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.JsonViews;
import com.google.refine.util.ParsingUtilities;
public class ReconciledDataExtensionJob {
static public class DataExtensionProperty {
@JsonProperty("id")
public final String id;
@JsonProperty("name")
@JsonView(JsonViews.NonSaveMode.class)
public final String name;
@JsonProperty("settings")
@JsonInclude(Include.NON_NULL)
public final Map settings;
@JsonCreator
public DataExtensionProperty(
@JsonProperty("id") String id,
@JsonProperty("name") String name,
@JsonProperty("settings") Map settings) {
this.id = id;
this.name = name;
this.settings = settings;
}
}
static public class DataExtensionConfig {
@JsonProperty("properties")
public final List properties;
@JsonCreator
public DataExtensionConfig(
@JsonProperty("properties") List properties) {
this.properties = properties;
}
public static DataExtensionConfig reconstruct(String json) throws IOException {
return ParsingUtilities.mapper.readValue(json, DataExtensionConfig.class);
}
}
static public class DataExtensionQuery extends DataExtensionConfig {
@JsonProperty("ids")
public final List ids;
@JsonCreator
public DataExtensionQuery(
@JsonProperty("ids") List ids,
@JsonProperty("properties") List properties) {
super(properties);
this.ids = ids;
}
}
static public class DataExtension {
final public Object[][] data;
public DataExtension(Object[][] data) {
this.data = data;
}
}
// Json serialization is used in PreviewExtendDataCommand
static public class ColumnInfo {
@JsonProperty("name")
final public String name;
@JsonProperty("id")
final public String id;
final public ReconType expectedType;
@JsonCreator
protected ColumnInfo(
@JsonProperty("name") String name,
@JsonProperty("id") String id,
@JsonProperty("type") ReconType expectedType) {
this.name = name;
this.id = id;
this.expectedType = expectedType;
}
}
final public DataExtensionConfig extension;
final public String endpoint;
final public List columns = new ArrayList();
// not final: initialized lazily
private static HttpClient httpClient = null;
public ReconciledDataExtensionJob(DataExtensionConfig obj, String endpoint) {
this.extension = obj;
this.endpoint = endpoint;
}
/*
* TODO Although the HTTP code has been unified, there may still be opportunity to refactor a higher level querying
* library out of this which could be shared with StandardReconConfig
*
* It may also be possible to extract a library to query reconciliation services which could be used outside of
* OpenRefine.
*/
public Map extend(
Set ids,
Map reconCandidateMap) throws Exception {
StringWriter writer = new StringWriter();
formulateQuery(ids, extension, writer);
String query = writer.toString();
String response = postExtendQuery(this.endpoint, query);
ObjectNode o = ParsingUtilities.mapper.readValue(response, ObjectNode.class);
if (columns.size() == 0) {
// Extract the column metadata
List newColumns = ParsingUtilities.mapper.convertValue(o.get("meta"), new TypeReference>() {
});
columns.addAll(newColumns);
}
Map map = new HashMap();
if (o.has("rows") && o.get("rows") instanceof ObjectNode) {
ObjectNode records = (ObjectNode) o.get("rows");
// for each identifier
for (String id : ids) {
if (records.has(id) && records.get(id) instanceof ObjectNode) {
ObjectNode record = (ObjectNode) records.get(id);
ReconciledDataExtensionJob.DataExtension ext = collectResult(record, reconCandidateMap);
if (ext != null) {
map.put(id, ext);
}
}
}
}
return map;
}
static protected String postExtendQuery(String endpoint, String query) throws IOException {
return getHttpClient().postNameValue(endpoint, "extend", query);
}
private static HttpClient getHttpClient() {
if (httpClient == null) {
httpClient = new HttpClient();
}
return httpClient;
}
protected ReconciledDataExtensionJob.DataExtension collectResult(
ObjectNode record,
Map reconCandidateMap) {
List