All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.hadoop.serialization.field.JsonFieldExtractors Maven / Gradle / Ivy

There is a newer version: 8.8.2
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.serialization.field;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.rest.Resource;
import org.elasticsearch.hadoop.serialization.ParsingUtils;
import org.elasticsearch.hadoop.serialization.dto.mapping.MappingUtils;
import org.elasticsearch.hadoop.serialization.json.JacksonJsonParser;
import org.elasticsearch.hadoop.util.BytesArray;
import org.elasticsearch.hadoop.util.ObjectUtils;
import org.elasticsearch.hadoop.util.StringUtils;

/**
 * Dedicated extractor for field parsing. Optimized to extract all the fields in only one parsing of the document.
 */
public class JsonFieldExtractors {

    private static Log log = LogFactory.getLog(JsonFieldExtractors.class);

    private final Settings settings;

    private final List results = new ArrayList(6);
    private String[] paths;

    private FieldExtractor id, parent, routing, ttl, version, timestamp;
    private AbstractIndexExtractor indexExtractor;
    private AbstractDefaultParamsExtractor params;

    class PrecomputedFieldExtractor implements FieldExtractor {

        private final int slot;
        private final String fieldName;
        private final boolean returnAsJson;

        public PrecomputedFieldExtractor(int slot, String fieldName, boolean returnAsJson) {
            this.slot = slot;
            this.fieldName = fieldName;
            this.returnAsJson = returnAsJson;
        }

        @Override
        public Object field(Object target) {
            Object result = results.get(slot);
            if (result == ParsingUtils.NOT_FOUND) {
                return FieldExtractor.NOT_FOUND;
            }
            return (returnAsJson ? StringUtils.toJsonString(result) : result);
        }

        @Override
        public String toString() {
            return String.format("JsonExtractor for field [%s]", fieldName);
        }
    }

    private static class FixedFieldExtractor implements FieldExtractor {
        private final Object value;

        public FixedFieldExtractor(Object value) {
            this.value = value;
        }

        @Override
        public Object field(Object target) {
            return value;
        }

        @Override
        public String toString() {
            return "ConstantJsonExtractor";
        }
    }

    public JsonFieldExtractors(Settings settings) {
        this.settings = settings;

        final List jsonPaths = new ArrayList();

        id = init(settings.getMappingId(), jsonPaths);
        parent = init(settings.getMappingParent(), jsonPaths);
        // Routing is a special case in that it can be set by two properties
        List routings = new ArrayList(2);
        initAndAdd(settings.getMappingRouting(), jsonPaths, routings);
        initAndAdd(MappingUtils.joinParentField(settings), jsonPaths, routings);
        routing = (routings.size() != 0) ? new ChainedFieldExtractor(routings) : null;
        ttl = init(settings.getMappingTtl(), jsonPaths);
        version = init(settings.getMappingVersion(), jsonPaths);
        timestamp = init(settings.getMappingTimestamp(), jsonPaths);

        // create index format
        indexExtractor = new AbstractIndexExtractor() {
            @Override
            protected FieldExtractor createFieldExtractor(String fieldName) {
                return createJsonFieldExtractor(fieldName, jsonPaths, false);
            }
        };
        indexExtractor.setSettings(settings);

        indexExtractor.compile(new Resource(settings, false).toString());

        // if there's no pattern, simply remove it
        indexExtractor = (indexExtractor.hasPattern() ? indexExtractor : null);

        if (settings.hasUpdateScriptParams()) {
            params = new AbstractDefaultParamsExtractor() {
                @Override
                protected FieldExtractor createFieldExtractor(String fieldName) {
                    return init(fieldName, jsonPaths);
                }
            };
            params.setSettings(settings);
        }

        paths = jsonPaths.toArray(new String[jsonPaths.size()]);
    }

    private void initAndAdd(String fieldName, List pathList, List results) {
        FieldExtractor result = init(fieldName, pathList);
        if (result != null) {
            results.add(result);
        }
    }

    private FieldExtractor init(String fieldName, List pathList) {
        if (fieldName != null) {
            Object constant = initConstant(fieldName);
            if (constant != null) {
                return new FixedFieldExtractor(constant);
            }
            else {
                return createJsonFieldExtractor(fieldName, pathList, true);
            }
        }
        return null;
    }

    private FieldExtractor createJsonFieldExtractor(String fieldName, List pathList, boolean asJson) {
        pathList.add(fieldName);
        return new PrecomputedFieldExtractor(pathList.size() - 1, fieldName, asJson);
    }

    private Object initConstant(String field) {
        // don't do any escaping and pass the user JSON as is
        if (field != null && field.startsWith("<") && field.endsWith(">")) {
            return ExtractorUtils.extractConstant(field.substring(1, field.length() - 1), settings.getMappingConstantAutoQuote());
        }
        return null;
    }

    public IndexExtractor indexAndType() {
        return indexExtractor;
    }

    public FieldExtractor id() {
        return id;
    }

    public FieldExtractor parent() {
        return parent;
    }

    public FieldExtractor routing() {
        return routing;
    }

    public FieldExtractor ttl() {
        return ttl;
    }

    public FieldExtractor version() {
        return version;
    }

    public FieldExtractor timestamp() {
        return timestamp;
    }

    public void process(BytesArray storage) {
        // no extractors, no lookups
        if (ObjectUtils.isEmpty(paths)) {
            return;
        }

        results.clear();

        if (log.isTraceEnabled()) {
            log.trace(String.format("About to look for paths [%s] in doc [%s]", Arrays.toString(paths), storage));
        }

        results.addAll(ParsingUtils.values(new JacksonJsonParser(storage.bytes(), 0, storage.length()), paths));
    }

    public FieldExtractor params() {
        return params;
    }
}