All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.hadoop.cascading.EsLocalScheme Maven / Gradle / Ivy

There is a newer version: 8.17.0
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.cascading;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.logging.LogFactory;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.cfg.SettingsManager;
import org.elasticsearch.hadoop.mr.Counter;
import org.elasticsearch.hadoop.rest.InitializationUtils;
import org.elasticsearch.hadoop.rest.RestRepository;
import org.elasticsearch.hadoop.rest.ScrollQuery;
import org.elasticsearch.hadoop.rest.stats.Stats;
import org.elasticsearch.hadoop.util.FieldAlias;
import org.elasticsearch.hadoop.util.SettingsUtils;
import org.elasticsearch.hadoop.util.StringUtils;

import cascading.flow.FlowProcess;
import cascading.scheme.Scheme;
import cascading.scheme.SinkCall;
import cascading.scheme.SourceCall;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;

/**
 * Cascading Scheme handling
 */
class EsLocalScheme extends Scheme {

    private static final long serialVersionUID = 979036202776892844L;

    private final String resource;
    private final String query;
    private final String host;
    private final int port;
    private final Properties props;
    private transient RestRepository client;

    private boolean IS_ES_10;

    EsLocalScheme(String host, int port, String index, String query, Fields fields, Properties props) {
        this.resource = index;
        this.query = query;
        this.host = host;
        this.port = port;
        if (fields != null) {
            setSinkFields(fields);
            setSourceFields(fields);
        }
        this.props = props;
    }

    @Override
    public void sourcePrepare(FlowProcess flowProcess, SourceCall sourceCall) throws IOException {
        super.sourcePrepare(flowProcess, sourceCall);

        Object[] context = new Object[1];
        Settings settings = SettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props);
        context[0] = CascadingUtils.alias(settings);
        sourceCall.setContext(context);
        IS_ES_10 = SettingsUtils.isEs10(settings);
    }

    @Override
    public void sourceCleanup(FlowProcess flowProcess, SourceCall sourceCall) throws IOException {
        // in case of a source there's no local client so do all reporting here
        report(sourceCall.getInput().stats(), flowProcess);
        report(sourceCall.getInput().repository().stats(), flowProcess);
        sourceCall.getInput().close();
        sourceCall.setContext(null);
        // used for consistency
        cleanupClient(flowProcess);
    }

    @Override
    public void sinkCleanup(FlowProcess flowProcess, SinkCall sinkCall) throws IOException {
        cleanupClient(flowProcess);
    }

    private void cleanupClient(FlowProcess flowProcess) throws IOException {
        if (client != null) {
            client.close();
            report(client.stats(), flowProcess);
            client = null;
        }
    }

    private void report(Stats stats, FlowProcess flowProcess) {
        // report current stats
        for (Counter count : Counter.ALL) {
            flowProcess.increment(count, count.get(stats));
        }
    }

    @Override
    public void sinkPrepare(FlowProcess flowProcess, SinkCall sinkCall) throws IOException {
        super.sinkPrepare(flowProcess, sinkCall);

        Object[] context = new Object[1];
        Settings settings = SettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props);
        context[0] = CascadingUtils.fieldToAlias(settings, getSinkFields());
        sinkCall.setContext(context);
    }

    @Override
    public void sourceConfInit(FlowProcess flowProcess, Tap tap, Properties conf) {
        initClient(conf, true);
    }

    @Override
    public void sinkConfInit(FlowProcess flowProcess, Tap tap, Properties conf) {
        initClient(conf, false);
        InitializationUtils.checkIndexExistence(client);
    }

    private void initClient(Properties props, boolean read) {
        if (client == null) {
            Settings settings = CascadingUtils.addDefaultsToSettings(props, this.props, LogFactory.getLog(EsTap.class));
            CascadingUtils.init(settings, host, port, resource, query, read);
            client = new RestRepository(settings);
        }
    }

    @SuppressWarnings("unchecked")
    @Override
    public boolean source(FlowProcess flowProcess, SourceCall sourceCall) throws IOException {
        ScrollQuery query = sourceCall.getInput();

        if (!query.hasNext()) {
            return false;
        }

        TupleEntry entry = sourceCall.getIncomingEntry();
        Map data = (Map) query.next()[1];
        FieldAlias alias = (FieldAlias) sourceCall.getContext()[0];

        if (entry.getFields().isDefined()) {
            // lookup using writables
            // TODO: it's worth benchmarking whether using an index/offset yields significantly better performance
            for (Comparable field : entry.getFields()) {
                if (IS_ES_10) {
                    Object result = data;
                    // check for multi-level alias
                    for (String level : StringUtils.tokenize(alias.toES(field.toString()), ".")) {
                        result = ((Map) result).get(level);
                        if (result == null) {
                            break;
                        }
                    }
                    entry.setObject(field, result);
                }
                else {
                    //NB: coercion should be applied automatically by the TupleEntry
                    entry.setObject(field, data.get(alias.toES(field.toString())));
                }
            }
        }
        else {
            // no definition means no coercion
            List elements = Tuple.elements(entry.getTuple());
            elements.clear();
            elements.addAll(data.values());
        }
        return true;
    }

    @Override
    public void sink(FlowProcess flowProcess, SinkCall sinkCall) throws IOException {
        client.writeToIndex(sinkCall);
    }
}