com.cinchapi.concourse.importer.LineBasedImporter Maven / Gradle / Ivy
/*
* Copyright (c) 2013-2018 Cinchapi Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cinchapi.concourse.importer;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import com.cinchapi.concourse.Concourse;
import com.cinchapi.concourse.Constants;
import com.cinchapi.concourse.thrift.Operator;
import com.cinchapi.concourse.util.Convert;
import com.cinchapi.concourse.util.FileOps;
import com.cinchapi.concourse.util.QuoteAwareStringSplitter;
import com.cinchapi.concourse.util.Strings;
import com.cinchapi.concourse.util.TLists;
import com.google.common.collect.Lists;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonPrimitive;
/**
* An {@link Importer} that handles data from a file that can be delimited into
* one or more lines. Each line is considered a single group of data that can be
* converted to a multimap and imported in one or more records in Concourse.
*
* @author Jeff Nelson
*/
@Deprecated
public abstract class LineBasedImporter extends JsonImporter {
/**
* A flag that indicates whether the importer should use the optimized split
* path that takes advantage of the {@link QuoteAwareStringSplitter}.
*/
protected boolean useOptimizedSplitPath = true; // visible for testing
/**
* Construct a new instance.
*
* @param concourse
*/
protected LineBasedImporter(Concourse concourse, Logger log) {
super(concourse, log);
useOptimizedSplitPath = delimiter().length() == 1;
}
@Override
public final Set importFile(String file) {
return importFile(file, null);
}
/**
* Import the data contained in {@code file} into {@link Concourse}.
*
* Note that if {@code resolveKey} is specified, an attempt
* will be made to add the data in from each group into the existing records
* that are found using {@code resolveKey} and its corresponding value in
* the group.
*
*
* @param file
* @param resolveKey
* @return a collection of {@link ImportResult} objects that describes the
* records created/affected from the import and whether any errors
* occurred.
*/
public final Set importFile(String file,
@Nullable String resolveKey) {
// TODO add option to specify batchSize, which is how many objects to
// send over the wire in one atomic batch
List lines = FileOps.readLines(file);
String[] keys = header();
JsonArray array = new JsonArray();
boolean checkedFileFormat = false;
for (String line : lines) {
if(!checkedFileFormat) {
validateFileFormat(line);
checkedFileFormat = true;
}
if(keys == null) {
keys = parseKeys(line);
log.info("Parsed keys from header: " + line);
}
else {
JsonObject object = parseLine(line, keys);
if(resolveKey != null && object.has(resolveKey)) {
JsonElement resolveValue = object.get(resolveKey);
if(!resolveValue.isJsonArray()) {
JsonArray temp = new JsonArray();
temp.add(resolveValue);
resolveValue = temp;
}
for (int i = 0; i < resolveValue.getAsJsonArray()
.size(); ++i) {
String value = resolveValue.getAsJsonArray().get(i)
.toString();
Object stored = Convert.stringToJava(value);
Set resolved = concourse.find(resolveKey,
Operator.EQUALS, stored);
for (long record : resolved) {
object = parseLine(line, keys); // this is
// inefficient, but
// there is no good
// way to clone the
// original object
object.addProperty(
Constants.JSON_RESERVED_IDENTIFIER_NAME,
record);
array.add(object);
}
}
}
else {
array.add(object);
}
log.info("Importing {}", line);
}
}
Set records = importString(array.toString());
return records;
}
/**
* The delimiter that is used to split fields on each line.
*
* @return the delimiter
*/
protected abstract String delimiter();
/**
* This method is provided so the subclass can provide an ordered array of
* headers if they are not provided in the file as the first line.
*
* @return the header information
*/
protected String[] header() {
return null;
}
/**
* At a minimum, this method is responsible for taking a raw source string
* value and converting it to a {@link JsonElement}. The default
* implementation makes an effort to represent numeric and boolean values as
* appropriate {@link JsonPrimitive json primitives}. All other kinds of
* values are represented as strings, which is the correct format for the
* server to handle masquerading types (i.e. resolvable links, links, tags,
* forced doubles, etc).
*
* The default behaviour is appropriate in most cases, but this method can
* be used by subclasses to define dynamic intermediary transformations to
* data to better prepare it for import.
*
* Examples
* Specifying Link Resolution
*
* The server will convert raw data of the form
* @<key>@value@<key>@
into a Link to all the
* records where key equals value in Concourse. For this purpose, the
* subclass can convert the raw value to this form using the
* {@link Convert#stringToResolvableLinkSpecification(String, String)}
* method.
*
*
*
Normalizing Data
* It may be desirable to normalize the raw data before input. For example,
* the subclass may wish to convert all strings to a specific case, or
* sanitize inputs, etc.
*
*
*
Compacting Representation
* If a column in a file contains a enumerated set of string values, it may
* be desirable to transform the values to a string representation of a
* number so that, when converted, the data is more compact and takes up
* less space.
*
*
* @param key
* @param value
* @return the transformed values in a JsonArray
*/
protected JsonElement transformValue(String key, String value) {
JsonPrimitive element;
Object parsed;
if((parsed = Strings.tryParseNumberStrict(value)) != null) {
element = new JsonPrimitive((Number) parsed);
}
else if((parsed = Strings.tryParseBoolean(value)) != null) {
element = new JsonPrimitive((Boolean) parsed);
}
else {
element = new JsonPrimitive(value);
element.toString();
}
return element;
}
/**
* Check {@code line} to determine if is valid for the the file format that
* is supported by the importer.
*
* @param line is a line of the file being imported
* @throws IllegalArgumentException if the line from the file is
* not acceptable for the file format
*
*/
protected abstract void validateFileFormat(String line)
throws IllegalArgumentException;
/**
* Parse the keys from the {@code line}. The delimiter can be specified by
* the subclass in the {@link #delimiter()} method.
*
* @param line
* @return an array of keys
*/
private final String[] parseKeys(String line) {
String[] keys = null;
if(useOptimizedSplitPath) {
QuoteAwareStringSplitter it = new QuoteAwareStringSplitter(line,
delimiter().charAt(0));
List keysList = Lists.newArrayList();
while (it.hasNext()) {
keysList.add(it.next().trim());
}
keys = TLists.toArrayCasted(keysList, String.class);
}
else {
keys = Strings.splitStringByDelimiterButRespectQuotes(line,
delimiter());
for (int i = 0; i < keys.length; ++i) {
keys[i] = keys[i].trim();
}
}
return keys;
}
/**
* Parse the data from {@code line} into a {@link JsonObject} that is
* appropriate for import. The subclass can customize the behaviour of this
* process by overriding the {@link #header()} and
* {@link #transformValue(String, String)} methods.
*
* @param line
* @param keys
* @return the line data encoded as a JsonObject
*/
private final JsonObject parseLine(String line, String... keys) {
line = line.trim();
JsonObject json = new JsonObject();
String[] toks = null;
if(useOptimizedSplitPath) {
QuoteAwareStringSplitter it = new QuoteAwareStringSplitter(line,
delimiter().charAt(0));
List toksList = Lists.newArrayList();
while (it.hasNext()) {
toksList.add(it.next());
}
toks = TLists.toArrayCasted(toksList, String.class);
}
else {
toks = Strings.splitStringByDelimiterButRespectQuotes(line,
delimiter());
}
for (int i = 0; i < Math.min(keys.length, toks.length); ++i) {
if(StringUtils.isBlank(toks[i])) {
continue;
}
JsonElement value = transformValue(keys[i], toks[i]);
json.add(keys[i], value);
}
return json;
}
}