org.elasticsearch.hadoop.serialization.bulk.AbstractBulkFactory.old Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-hadoop-cascading Show documentation
Show all versions of elasticsearch-hadoop-cascading Show documentation
Elasticsearch Hadoop Cascading
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.hadoop.serialization.bulk;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.rest.Resource;
import org.elasticsearch.hadoop.serialization.builder.ValueWriter;
import org.elasticsearch.hadoop.serialization.field.ConstantFieldExtractor;
import org.elasticsearch.hadoop.serialization.field.FieldExplainer;
import org.elasticsearch.hadoop.serialization.field.FieldExtractor;
import org.elasticsearch.hadoop.serialization.field.IndexExtractor;
import org.elasticsearch.hadoop.serialization.field.JsonFieldExtractors;
import org.elasticsearch.hadoop.serialization.json.JacksonJsonGenerator;
import org.elasticsearch.hadoop.util.BytesArray;
import org.elasticsearch.hadoop.util.BytesArrayPool;
import org.elasticsearch.hadoop.util.FastByteArrayOutputStream;
import org.elasticsearch.hadoop.util.ObjectUtils;
import org.elasticsearch.hadoop.util.StringUtils;
abstract class AbstractBulkFactory implements BulkFactory {
private static Log log = LogFactory.getLog(AbstractBulkFactory.class);
private boolean jsonInput;
private JsonFieldExtractors jsonExtractors;
protected Settings settings;
private ValueWriter valueWriter;
// used when specifying an index pattern
private IndexExtractor indexExtractor;
private FieldExtractor idExtractor, parentExtractor, routingExtractor, versionExtractor, ttlExtractor,
timestampExtractor, paramsExtractor;
private MetadataExtractor metaExtractor;
class FieldWriter {
final FieldExtractor extractor;
final BytesArrayPool pool = new BytesArrayPool();
FieldWriter(FieldExtractor extractor) {
this.extractor = extractor;
}
BytesArrayPool write(Object object) {
pool.reset();
Object value = extractor.field(object);
if (value == FieldExtractor.NOT_FOUND) {
String obj = (extractor instanceof FieldExplainer ? ((FieldExplainer) extractor).toString(object) : object.toString());
throw new EsHadoopIllegalArgumentException(String.format("[%s] cannot extract value from entity [%s] | instance [%s]", extractor, obj.getClass(), obj));
}
if (value instanceof List) {
for (Object val : (List) value) {
doWrite(val);
}
}
// if/else to save one collection/iterator instance
else {
doWrite(value);
}
return pool;
}
void doWrite(Object value) {
// common-case - constants or JDK types
if (value instanceof String || jsonInput || value instanceof Number || value == null) {
String valueString = (value == null ? "null" : value.toString());
if (value instanceof String && !jsonInput) {
valueString = StringUtils.toJsonString(valueString);
}
pool.get().bytes(valueString);
}
else if (value instanceof RawJson) {
pool.get().bytes(((RawJson) value).json());
}
// library specific type - use the value writer (a bit overkill but handles collections/arrays properly)
else {
BytesArray ba = pool.get();
JacksonJsonGenerator generator = new JacksonJsonGenerator(new FastByteArrayOutputStream(ba));
valueWriter.write(value, generator);
generator.flush();
generator.close();
}
}
@Override
public String toString() {
return "FieldWriter for " + extractor;
}
}
AbstractBulkFactory(Settings settings, MetadataExtractor metaExtractor) {
this.settings = settings;
this.valueWriter = ObjectUtils.instantiate(settings.getSerializerValueWriterClassName(), settings);
this.metaExtractor = metaExtractor;
initFieldExtractors(settings);
}
private void initFieldExtractors(Settings settings) {
jsonInput = settings.getInputAsJson();
if (jsonInput) {
if (log.isDebugEnabled()) {
log.debug("JSON input; using internal field extractor for efficient parsing...");
}
jsonExtractors = new JsonFieldExtractors(settings);
indexExtractor = jsonExtractors.indexAndType();
idExtractor = jsonExtractors.id();
parentExtractor = jsonExtractors.parent();
routingExtractor = jsonExtractors.routing();
versionExtractor = jsonExtractors.version();
ttlExtractor = jsonExtractors.ttl();
timestampExtractor = jsonExtractors.timestamp();
paramsExtractor = jsonExtractors.params();
}
else {
// init extractors (if needed)
if (settings.getMappingId() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingId());
idExtractor = ObjectUtils. instantiate(settings.getMappingIdExtractorClassName(), settings);
}
if (settings.getMappingParent() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingParent());
parentExtractor = ObjectUtils. instantiate(settings.getMappingParentExtractorClassName(), settings);
}
if (settings.getMappingRouting() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingRouting());
routingExtractor = ObjectUtils. instantiate(settings.getMappingRoutingExtractorClassName(), settings);
}
if (settings.getMappingTtl() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingTtl());
ttlExtractor = ObjectUtils. instantiate(settings.getMappingTtlExtractorClassName(), settings);
}
if (settings.getMappingVersion() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingVersion());
versionExtractor = ObjectUtils. instantiate(settings.getMappingVersionExtractorClassName(), settings);
}
if (settings.getMappingTimestamp() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingTimestamp());
timestampExtractor = ObjectUtils. instantiate(
settings.getMappingTimestampExtractorClassName(), settings);
}
// create adapter
IndexExtractor iformat = ObjectUtils. instantiate(settings.getMappingIndexExtractorClassName(), settings);
iformat.compile(new Resource(settings, false).toString());
if (iformat.hasPattern()) {
indexExtractor = iformat;
}
// param extractor
if (settings.hasUpdateScriptParams()) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getUpdateScriptParams());
paramsExtractor = ObjectUtils.instantiate(settings.getMappingParamsExtractorClassName(), settings);
}
if (log.isTraceEnabled()) {
log.trace(String.format("Instantiated value writer [%s]", valueWriter));
if (idExtractor != null) {
log.trace(String.format("Instantiated id extractor [%s]", idExtractor));
}
if (parentExtractor != null) {
log.trace(String.format("Instantiated parent extractor [%s]", parentExtractor));
}
if (routingExtractor != null) {
log.trace(String.format("Instantiated routing extractor [%s]", routingExtractor));
}
if (ttlExtractor != null) {
log.trace(String.format("Instantiated ttl extractor [%s]", ttlExtractor));
}
if (versionExtractor != null) {
log.trace(String.format("Instantiated version extractor [%s]", versionExtractor));
}
if (timestampExtractor != null) {
log.trace(String.format("Instantiated timestamp extractor [%s]", timestampExtractor));
}
if (paramsExtractor != null) {
log.trace(String.format("Instantiated params extractor [%s]", paramsExtractor));
}
}
}
}
protected IndexExtractor index() {
return indexExtractor;
}
protected FieldExtractor id() {
return idExtractor;
}
protected FieldExtractor parent() {
return parentExtractor;
}
protected FieldExtractor routing() {
return routingExtractor;
}
protected FieldExtractor ttl() {
return ttlExtractor;
}
protected FieldExtractor version() {
return versionExtractor;
}
protected FieldExtractor timestamp() {
return timestampExtractor;
}
protected FieldExtractor params() {
return paramsExtractor;
}
@Override
public BulkCommand createBulk() {
List