All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.hadoop.serialization.bulk.AbstractBulkFactory.old Maven / Gradle / Ivy

There is a newer version: 8.17.0
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.serialization.bulk;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.rest.Resource;
import org.elasticsearch.hadoop.serialization.builder.ValueWriter;
import org.elasticsearch.hadoop.serialization.field.ConstantFieldExtractor;
import org.elasticsearch.hadoop.serialization.field.FieldExplainer;
import org.elasticsearch.hadoop.serialization.field.FieldExtractor;
import org.elasticsearch.hadoop.serialization.field.IndexExtractor;
import org.elasticsearch.hadoop.serialization.field.JsonFieldExtractors;
import org.elasticsearch.hadoop.serialization.json.JacksonJsonGenerator;
import org.elasticsearch.hadoop.util.BytesArray;
import org.elasticsearch.hadoop.util.BytesArrayPool;
import org.elasticsearch.hadoop.util.FastByteArrayOutputStream;
import org.elasticsearch.hadoop.util.ObjectUtils;
import org.elasticsearch.hadoop.util.StringUtils;


abstract class AbstractBulkFactory implements BulkFactory {

    private static Log log = LogFactory.getLog(AbstractBulkFactory.class);

    private boolean jsonInput;
    private JsonFieldExtractors jsonExtractors;

    protected Settings settings;
    private ValueWriter valueWriter;
    // used when specifying an index pattern
    private IndexExtractor indexExtractor;
    private FieldExtractor idExtractor, parentExtractor, routingExtractor, versionExtractor, ttlExtractor,
            timestampExtractor, paramsExtractor;

    private MetadataExtractor metaExtractor;

    class FieldWriter {
        final FieldExtractor extractor;
        final BytesArrayPool pool = new BytesArrayPool();

        FieldWriter(FieldExtractor extractor) {
            this.extractor = extractor;
        }

        BytesArrayPool write(Object object) {
            pool.reset();

            Object value = extractor.field(object);
            if (value == FieldExtractor.NOT_FOUND) {
                String obj = (extractor instanceof FieldExplainer ? ((FieldExplainer) extractor).toString(object) : object.toString());
                throw new EsHadoopIllegalArgumentException(String.format("[%s] cannot extract value from entity [%s] | instance [%s]", extractor, obj.getClass(), obj));
            }

            if (value instanceof List) {
                for (Object val : (List) value) {
                    doWrite(val);
                }
            }
            // if/else to save one collection/iterator instance
            else {
                doWrite(value);
            }

            return pool;
        }

        void doWrite(Object value) {
            // common-case - constants or JDK types
            if (value instanceof String || jsonInput || value instanceof Number || value == null) {
                String valueString = (value == null ? "null" : value.toString());
                if (value instanceof String && !jsonInput) {
                    valueString = StringUtils.toJsonString(valueString);
                }

                pool.get().bytes(valueString);
            }
            else if (value instanceof RawJson) {
                pool.get().bytes(((RawJson) value).json());
            }
            // library specific type - use the value writer (a bit overkill but handles collections/arrays properly)
            else {
                BytesArray ba = pool.get();
                JacksonJsonGenerator generator = new JacksonJsonGenerator(new FastByteArrayOutputStream(ba));
                valueWriter.write(value, generator);
                generator.flush();
                generator.close();
            }
        }

        @Override
        public String toString() {
            return "FieldWriter for " + extractor;
        }
    }


    AbstractBulkFactory(Settings settings, MetadataExtractor metaExtractor) {
        this.settings = settings;
        this.valueWriter = ObjectUtils.instantiate(settings.getSerializerValueWriterClassName(), settings);
        this.metaExtractor = metaExtractor;
        initFieldExtractors(settings);
    }

    private void initFieldExtractors(Settings settings) {
        jsonInput = settings.getInputAsJson();

        if (jsonInput) {
            if (log.isDebugEnabled()) {
                log.debug("JSON input; using internal field extractor for efficient parsing...");
            }

            jsonExtractors = new JsonFieldExtractors(settings);
            indexExtractor = jsonExtractors.indexAndType();

            idExtractor = jsonExtractors.id();
            parentExtractor = jsonExtractors.parent();
            routingExtractor = jsonExtractors.routing();
            versionExtractor = jsonExtractors.version();
            ttlExtractor = jsonExtractors.ttl();
            timestampExtractor = jsonExtractors.timestamp();
            paramsExtractor = jsonExtractors.params();
        }
        else {
            // init extractors (if needed)
            if (settings.getMappingId() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingId());
                idExtractor = ObjectUtils. instantiate(settings.getMappingIdExtractorClassName(), settings);
            }
            if (settings.getMappingParent() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingParent());
                parentExtractor = ObjectUtils. instantiate(settings.getMappingParentExtractorClassName(), settings);
            }
            if (settings.getMappingRouting() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingRouting());
                routingExtractor = ObjectUtils. instantiate(settings.getMappingRoutingExtractorClassName(), settings);
            }
            if (settings.getMappingTtl() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingTtl());
                ttlExtractor = ObjectUtils. instantiate(settings.getMappingTtlExtractorClassName(), settings);
            }
            if (settings.getMappingVersion() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingVersion());
                versionExtractor = ObjectUtils. instantiate(settings.getMappingVersionExtractorClassName(), settings);
            }
            if (settings.getMappingTimestamp() != null) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingTimestamp());
                timestampExtractor = ObjectUtils. instantiate(
                        settings.getMappingTimestampExtractorClassName(), settings);
            }

            // create adapter
            IndexExtractor iformat = ObjectUtils. instantiate(settings.getMappingIndexExtractorClassName(), settings);
            iformat.compile(new Resource(settings, false).toString());

            if (iformat.hasPattern()) {
                indexExtractor = iformat;
            }

            // param extractor
            if (settings.hasUpdateScriptParams()) {
                settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getUpdateScriptParams());
                paramsExtractor = ObjectUtils.instantiate(settings.getMappingParamsExtractorClassName(), settings);
            }

            if (log.isTraceEnabled()) {
                log.trace(String.format("Instantiated value writer [%s]", valueWriter));
                if (idExtractor != null) {
                    log.trace(String.format("Instantiated id extractor [%s]", idExtractor));
                }
                if (parentExtractor != null) {
                    log.trace(String.format("Instantiated parent extractor [%s]", parentExtractor));
                }
                if (routingExtractor != null) {
                    log.trace(String.format("Instantiated routing extractor [%s]", routingExtractor));
                }
                if (ttlExtractor != null) {
                    log.trace(String.format("Instantiated ttl extractor [%s]", ttlExtractor));
                }
                if (versionExtractor != null) {
                    log.trace(String.format("Instantiated version extractor [%s]", versionExtractor));
                }
                if (timestampExtractor != null) {
                    log.trace(String.format("Instantiated timestamp extractor [%s]", timestampExtractor));
                }
                if (paramsExtractor != null) {
                    log.trace(String.format("Instantiated params extractor [%s]", paramsExtractor));
                }
            }
        }
    }

    protected IndexExtractor index() {
        return indexExtractor;
    }

    protected FieldExtractor id() {
        return idExtractor;
    }

    protected FieldExtractor parent() {
        return parentExtractor;
    }

    protected FieldExtractor routing() {
        return routingExtractor;
    }

    protected FieldExtractor ttl() {
        return ttlExtractor;
    }

    protected FieldExtractor version() {
        return versionExtractor;
    }

    protected FieldExtractor timestamp() {
        return timestampExtractor;
    }

    protected FieldExtractor params() {
        return paramsExtractor;
    }

    @Override
    public BulkCommand createBulk() {
        List before = new ArrayList();
        writeBeforeObject(before);

        List after = new ArrayList();
        writeAfterObject(after);

        before = compact(before);
        after = compact(after);

        boolean isScriptUpdate = settings.hasUpdateScript();
        // compress pieces
        if (jsonInput) {
            if (isScriptUpdate) {
                return new JsonScriptTemplateBulk(before, after, jsonExtractors, settings);
            }
            return new JsonTemplatedBulk(before, after, jsonExtractors, settings);
        }
        if (isScriptUpdate) {
            return new ScriptTemplateBulk(settings, before, after, valueWriter);
        }
        return new TemplatedBulk(before, after, valueWriter);
    }

    protected void writeAfterObject(List after) {
        after.add("\n");
    }

    private List compact(List list) {
        if (list == null || list.isEmpty()) {
            return null;
        }

        List compacted = new ArrayList();
        StringBuilder stringAccumulator = new StringBuilder();
        String lastString = null;
        boolean hasSeenIndexExtractor = false;
        for (Object object : list) {
            if (object instanceof FieldExtractor) {
                hasSeenIndexExtractor = object instanceof IndexExtractor;
                if (stringAccumulator.length() > 0) {
                    compacted.add(new BytesArray(stringAccumulator.toString()));
                    stringAccumulator.setLength(0);
                    lastString = null;
                }
                compacted.add(createFieldWriter((FieldExtractor) object));
            }
            else {
                String str = object.toString();
                if (str.startsWith("\"") && (lastString == null || (lastString != null && lastString.endsWith("\"")))) {
                    //if (((lastString != null && lastString.endsWith("\"")) || (lastString == null && (hasSeenIndexExtractor)) && ) {
                    stringAccumulator.append(",");
                }
                hasSeenIndexExtractor = false;
                lastString = str;
                stringAccumulator.append(str);
            }
        }

        if (stringAccumulator.length() > 0) {
            compacted.add(new BytesArray(stringAccumulator.toString()));
        }
        return compacted;
    }

    protected Object createFieldWriter(FieldExtractor extractor) {
        return new FieldWriter(extractor);
    }

    protected void writeBeforeObject(List pieces) {
        if (metaExtractor != null) {
            pieces.add(e)
        }
        else {
            startHeader(pieces);

            index(pieces);

            id(pieces);
            parent(pieces);
            routing(pieces);
            ttl(pieces);
            version(pieces);
            timestamp(pieces);
        }

        otherHeader(pieces);
        endHeader(pieces);

        scriptParams(pieces);
        }
    }

    private void startHeader(List pieces) {
        pieces.add("{\"" + getOperation() + "\":{");
    }

    private void endHeader(List pieces) {
        pieces.add("}}\n");
    }

    protected boolean index(List pieces) {
        if (index() != null) {
            pieces.add(index());
            return true;
        }
        return false;
    }

    protected boolean id(List pieces) {
        if (id() != null) {
            pieces.add("\"_id\":");
            pieces.add(id());
            return true;
        }
        return false;
    }

    protected abstract String getOperation();

    protected boolean parent(List pieces) {
        if (parent() != null) {
            pieces.add("\"_parent\":");
            pieces.add(parent());
            return true;
        }
        return false;
    }

    protected boolean routing(List pieces) {
        if (routing() != null) {
            pieces.add("\"_routing\":");
            pieces.add(routing());
            return true;
        }
        return false;
    }

    protected boolean ttl(List pieces) {
        if (ttl() != null) {
            pieces.add("\"_ttl\":");
            pieces.add(ttl());
            return true;
        }
        return false;
    }

    protected boolean version(List pieces) {
        if (version() != null) {
            if (settings.hasMappingVersionType()) {
                pieces.add("\"_version_type\":\"");
                pieces.add(settings.getMappingVersionType());
                pieces.add("\"");
            }
            pieces.add("\"_version\":");
            pieces.add(version());
            return true;
        }
        return false;
    }

    protected boolean timestamp(List pieces) {
        if (timestamp() != null) {
            pieces.add("\"_timestamp\":");
            pieces.add(timestamp());
            return true;
        }
        return false;
    }

    protected void otherHeader(List pieces) {
        // no-op
    }

    private boolean scriptParams(List pieces) {
        // handle json params first
        if (settings.hasUpdateScriptParamsJson()) {
            pieces.add("{\"params\":");
            pieces.add(settings.getUpdateScriptParamsJson().trim());
            pieces.add(",");
            return true;
        }
        if (params() != null) {
            pieces.add("{\"params\":{");
            pieces.add(params());
            pieces.add("},");
            return true;
        }
        return false;
    }
}