org.elasticsearch.hadoop.serialization.bulk.AbstractBulkFactory.old Maven / Gradle / Ivy
                 Go to download
                
        
                    Show more of this group  Show more artifacts with this name
Show all versions of elasticsearch-hadoop-mr Show documentation
                Show all versions of elasticsearch-hadoop-mr Show documentation
Elasticsearch Hadoop Map/Reduce
                
            /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.serialization.bulk;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.rest.Resource;
import org.elasticsearch.hadoop.serialization.builder.ValueWriter;
import org.elasticsearch.hadoop.serialization.field.ConstantFieldExtractor;
import org.elasticsearch.hadoop.serialization.field.FieldExplainer;
import org.elasticsearch.hadoop.serialization.field.FieldExtractor;
import org.elasticsearch.hadoop.serialization.field.IndexExtractor;
import org.elasticsearch.hadoop.serialization.field.JsonFieldExtractors;
import org.elasticsearch.hadoop.serialization.json.JacksonJsonGenerator;
import org.elasticsearch.hadoop.util.BytesArray;
import org.elasticsearch.hadoop.util.BytesArrayPool;
import org.elasticsearch.hadoop.util.FastByteArrayOutputStream;
import org.elasticsearch.hadoop.util.ObjectUtils;
import org.elasticsearch.hadoop.util.StringUtils;
abstract class AbstractBulkFactory implements BulkFactory {
    private static Log log = LogFactory.getLog(AbstractBulkFactory.class);
    private boolean jsonInput;
    private JsonFieldExtractors jsonExtractors;
    protected Settings settings;
    private ValueWriter valueWriter;
    // used when specifying an index pattern
    private IndexExtractor indexExtractor;
    private FieldExtractor idExtractor, parentExtractor, routingExtractor, versionExtractor, ttlExtractor,
            timestampExtractor, paramsExtractor;
    private MetadataExtractor metaExtractor;
    class FieldWriter {
        final FieldExtractor extractor;
        final BytesArrayPool pool = new BytesArrayPool();
        FieldWriter(FieldExtractor extractor) {
            this.extractor = extractor;
        }
        BytesArrayPool write(Object object) {
            pool.reset();
            Object value = extractor.field(object);
            if (value == FieldExtractor.NOT_FOUND) {
                String obj = (extractor instanceof FieldExplainer ? ((FieldExplainer) extractor).toString(object) : object.toString());
                throw new EsHadoopIllegalArgumentException(String.format("[%s] cannot extract value from entity [%s] | instance [%s]", extractor, obj.getClass(), obj));
            }
            if (value instanceof List) {
                for (Object val : (List) value) {
                    doWrite(val);
                }
            }
            // if/else to save one collection/iterator instance
            else {
                doWrite(value);
            }
            return pool;
        }
        void doWrite(Object value) {
            // common-case - constants or JDK types
            if (value instanceof String || jsonInput || value instanceof Number || value == null) {
                String valueString = (value == null ? "null" : value.toString());
                if (value instanceof String && !jsonInput) {
                    valueString = StringUtils.toJsonString(valueString);
                }
                pool.get().bytes(valueString);
            }
            else if (value instanceof RawJson) {
                pool.get().bytes(((RawJson) value).json());
            }
            // library specific type - use the value writer (a bit overkill but handles collections/arrays properly)
            else {
                BytesArray ba = pool.get();
                JacksonJsonGenerator generator = new JacksonJsonGenerator(new FastByteArrayOutputStream(ba));
                valueWriter.write(value, generator);
                generator.flush();
                generator.close();
            }
        }
        @Override
        public String toString() {
            return "FieldWriter for " + extractor;
        }
    }
    AbstractBulkFactory(Settings settings, MetadataExtractor metaExtractor) {
        this.settings = settings;
        this.valueWriter = ObjectUtils.instantiate(settings.getSerializerValueWriterClassName(), settings);
        this.metaExtractor = metaExtractor;
        initFieldExtractors(settings);
    }
    private void initFieldExtractors(Settings settings) {
        jsonInput = settings.getInputAsJson();
        if (jsonInput) {
            if (log.isDebugEnabled()) {
                log.debug("JSON input; using internal field extractor for efficient parsing...");
            }
            jsonExtractors = new JsonFieldExtractors(settings);
            indexExtractor = jsonExtractors.indexAndType();
            idExtractor = jsonExtractors.id();
            parentExtractor = jsonExtractors.parent();
            routingExtractor = jsonExtractors.routing();
            versionExtractor = jsonExtractors.version();
            ttlExtractor = jsonExtractors.ttl();
            timestampExtractor = jsonExtractors.timestamp();
            paramsExtractor = jsonExtractors.params();
        }
        else {
            // init extractors (if needed)
            if (settings.getMappingId() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingId());
                idExtractor = ObjectUtils. instantiate(settings.getMappingIdExtractorClassName(), settings);
            }
            if (settings.getMappingParent() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingParent());
                parentExtractor = ObjectUtils. instantiate(settings.getMappingParentExtractorClassName(), settings);
            }
            if (settings.getMappingRouting() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingRouting());
                routingExtractor = ObjectUtils. instantiate(settings.getMappingRoutingExtractorClassName(), settings);
            }
            if (settings.getMappingTtl() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingTtl());
                ttlExtractor = ObjectUtils. instantiate(settings.getMappingTtlExtractorClassName(), settings);
            }
            if (settings.getMappingVersion() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingVersion());
                versionExtractor = ObjectUtils. instantiate(settings.getMappingVersionExtractorClassName(), settings);
            }
            if (settings.getMappingTimestamp() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingTimestamp());
                timestampExtractor = ObjectUtils. instantiate(
                        settings.getMappingTimestampExtractorClassName(), settings);
            }
            // create adapter
            IndexExtractor iformat = ObjectUtils. instantiate(settings.getMappingIndexExtractorClassName(), settings);
            iformat.compile(new Resource(settings, false).toString());
            if (iformat.hasPattern()) {
                indexExtractor = iformat;
            }
            // param extractor
            if (settings.hasUpdateScriptParams()) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getUpdateScriptParams());
                paramsExtractor = ObjectUtils.instantiate(settings.getMappingParamsExtractorClassName(), settings);
            }
            if (log.isTraceEnabled()) {
                log.trace(String.format("Instantiated value writer [%s]", valueWriter));
                if (idExtractor != null) {
                    log.trace(String.format("Instantiated id extractor [%s]", idExtractor));
                }
                if (parentExtractor != null) {
                    log.trace(String.format("Instantiated parent extractor [%s]", parentExtractor));
                }
                if (routingExtractor != null) {
                    log.trace(String.format("Instantiated routing extractor [%s]", routingExtractor));
                }
                if (ttlExtractor != null) {
                    log.trace(String.format("Instantiated ttl extractor [%s]", ttlExtractor));
                }
                if (versionExtractor != null) {
                    log.trace(String.format("Instantiated version extractor [%s]", versionExtractor));
                }
                if (timestampExtractor != null) {
                    log.trace(String.format("Instantiated timestamp extractor [%s]", timestampExtractor));
                }
                if (paramsExtractor != null) {
                    log.trace(String.format("Instantiated params extractor [%s]", paramsExtractor));
                }
            }
        }
    }
    protected IndexExtractor index() {
        return indexExtractor;
    }
    protected FieldExtractor id() {
        return idExtractor;
    }
    protected FieldExtractor parent() {
        return parentExtractor;
    }
    protected FieldExtractor routing() {
        return routingExtractor;
    }
    protected FieldExtractor ttl() {
        return ttlExtractor;
    }
    protected FieldExtractor version() {
        return versionExtractor;
    }
    protected FieldExtractor timestamp() {
        return timestampExtractor;
    }
    protected FieldExtractor params() {
        return paramsExtractor;
    }
    @Override
    public BulkCommand createBulk() {
        List           © 2015 - 2025 Weber Informatics LLC | Privacy Policy