All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.hadoop.rest.RestClient Maven / Gradle / Ivy

There is a newer version: 8.15.1
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.rest;

import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.map.DeserializationConfig;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.SerializationConfig;
import org.elasticsearch.hadoop.EsHadoopIllegalStateException;
import org.elasticsearch.hadoop.cfg.ConfigurationOptions;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.rest.Request.Method;
import org.elasticsearch.hadoop.rest.query.QueryBuilder;
import org.elasticsearch.hadoop.rest.stats.Stats;
import org.elasticsearch.hadoop.rest.stats.StatsAware;
import org.elasticsearch.hadoop.serialization.ParsingUtils;
import org.elasticsearch.hadoop.serialization.dto.NodeInfo;
import org.elasticsearch.hadoop.serialization.json.JacksonJsonGenerator;
import org.elasticsearch.hadoop.serialization.json.JacksonJsonParser;
import org.elasticsearch.hadoop.serialization.json.JsonFactory;
import org.elasticsearch.hadoop.serialization.json.ObjectReader;
import org.elasticsearch.hadoop.util.ByteSequence;
import org.elasticsearch.hadoop.util.BytesArray;
import org.elasticsearch.hadoop.util.EsMajorVersion;
import org.elasticsearch.hadoop.util.FastByteArrayOutputStream;
import org.elasticsearch.hadoop.util.IOUtils;
import org.elasticsearch.hadoop.util.ObjectUtils;
import org.elasticsearch.hadoop.util.StringUtils;
import org.elasticsearch.hadoop.util.TrackingBytesArray;
import org.elasticsearch.hadoop.util.unit.TimeValue;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;

import static org.elasticsearch.hadoop.rest.Request.Method.DELETE;
import static org.elasticsearch.hadoop.rest.Request.Method.GET;
import static org.elasticsearch.hadoop.rest.Request.Method.HEAD;
import static org.elasticsearch.hadoop.rest.Request.Method.POST;
import static org.elasticsearch.hadoop.rest.Request.Method.PUT;

public class RestClient implements Closeable, StatsAware {

    private final static int MAX_BULK_ERROR_MESSAGES = 5;

    private NetworkClient network;
    private final ObjectMapper mapper;
    private final TimeValue scrollKeepAlive;
    private final boolean indexReadMissingAsEmpty;
    private final HttpRetryPolicy retryPolicy;
    final EsMajorVersion internalVersion;

    {
        mapper = new ObjectMapper();
        mapper.configure(DeserializationConfig.Feature.USE_ANNOTATIONS, false);
        mapper.configure(SerializationConfig.Feature.USE_ANNOTATIONS, false);
    }


    private final Stats stats = new Stats();

    public enum HEALTH {
        RED, YELLOW, GREEN
    }

    public RestClient(Settings settings) {
        network = new NetworkClient(settings);

        scrollKeepAlive = TimeValue.timeValueMillis(settings.getScrollKeepAlive());
        indexReadMissingAsEmpty = settings.getIndexReadMissingAsEmpty();

        String retryPolicyName = settings.getBatchWriteRetryPolicy();

        if (ConfigurationOptions.ES_BATCH_WRITE_RETRY_POLICY_SIMPLE.equals(retryPolicyName)) {
            retryPolicyName = SimpleHttpRetryPolicy.class.getName();
        }
        else if (ConfigurationOptions.ES_BATCH_WRITE_RETRY_POLICY_NONE.equals(retryPolicyName)) {
            retryPolicyName = NoHttpRetryPolicy.class.getName();
        }

        retryPolicy = ObjectUtils.instantiate(retryPolicyName, settings);
        // Assume that the elasticsearch major version is the latest if the version is not already present in the settings
        internalVersion = settings.getInternalVersionOrLatest();
    }

    public List getHttpNodes(boolean clientNodeOnly) {
        Map> nodesData = get("_nodes/http", "nodes");
        List nodes = new ArrayList();

        for (Entry> entry : nodesData.entrySet()) {
            NodeInfo node = new NodeInfo(entry.getKey(), entry.getValue());
            if (node.hasHttp() && (!clientNodeOnly || node.isClient())) {
                nodes.add(node);
            }
        }
        return nodes;
    }

    public List getHttpClientNodes() {
        return getHttpNodes(true);
    }

    public List getHttpDataNodes() {
        List nodes = getHttpNodes(false);

        Iterator it = nodes.iterator();
        while (it.hasNext()) {
            NodeInfo node = it.next();
            if (!node.isData()) {
                it.remove();
            }
        }
        return nodes;
    }

    public List getHttpIngestNodes() {
        List nodes = getHttpNodes(false);

        Iterator it = nodes.iterator();
        while (it.hasNext()) {
            NodeInfo nodeInfo = it.next();
            if (!nodeInfo.isIngest()) {
                it.remove();
            }
        }
        return nodes;
    }

    public  T get(String q, String string) {
        return parseContent(execute(GET, q), string);
    }

    @SuppressWarnings("unchecked")
    private  T parseContent(InputStream content, String string) {
        Map map = Collections.emptyMap();

        try {
            // create parser manually to lower Jackson requirements
            JsonParser jsonParser = mapper.getJsonFactory().createJsonParser(content);
            try {
                map = mapper.readValue(jsonParser, Map.class);
            } finally {
                countStreamStats(content);
            }
        } catch (IOException ex) {
            throw new EsHadoopParsingException(ex);
        }

        return (T) (string != null ? map.get(string) : map);
    }

    public BulkResponse bulk(Resource resource, TrackingBytesArray data) {
        Retry retry = retryPolicy.init();
        BulkResponse processedResponse;

        boolean isRetry = false;

        do {
            // NB: dynamically get the stats since the transport can change
            long start = network.transportStats().netTotalTime;
            Response response = execute(PUT, resource.bulk(), data);
            long spent = network.transportStats().netTotalTime - start;

            stats.bulkTotal++;
            stats.docsSent += data.entries();
            stats.bulkTotalTime += spent;
            // bytes will be counted by the transport layer

            if (isRetry) {
                stats.docsRetried += data.entries();
                stats.bytesRetried += data.length();
                stats.bulkRetries++;
                stats.bulkRetriesTotalTime += spent;
            }

            isRetry = true;

            processedResponse = processBulkResponse(response, data);
        } while (data.length() > 0 && retry.retry(processedResponse.getHttpStatus()));

        return processedResponse;
    }

    @SuppressWarnings("rawtypes")
    BulkResponse processBulkResponse(Response response, TrackingBytesArray data) {
        InputStream content = response.body();
        try {
            ObjectReader r = JsonFactory.objectReader(mapper, Map.class);
            JsonParser parser = mapper.getJsonFactory().createJsonParser(content);
            try {
                if (ParsingUtils.seek(new JacksonJsonParser(parser), "items") == null) {
                    // recorded bytes are ack here
                    stats.bytesAccepted += data.length();
                    stats.docsAccepted += data.entries();
                    return BulkResponse.ok(data.entries());
                }
            } finally {
                countStreamStats(content);
            }

            int docsSent = data.entries();
            List errorMessageSample = new ArrayList(MAX_BULK_ERROR_MESSAGES);
            int errorMessagesSoFar = 0;
            int entryToDeletePosition = 0; // head of the list
            for (Iterator iterator = r.readValues(parser); iterator.hasNext();) {
                Map map = iterator.next();
                Map values = (Map) map.values().iterator().next();
                Integer status = (Integer) values.get("status");

                String error = extractError(values);
                if (error != null && !error.isEmpty()) {
                    if ((status != null && HttpStatus.canRetry(status)) || error.contains("EsRejectedExecutionException")) {
                        entryToDeletePosition++;
                        if (errorMessagesSoFar < MAX_BULK_ERROR_MESSAGES) {
                            // We don't want to spam the log with the same error message 1000 times.
                            // Chances are that the error message is the same across all failed writes
                            // and if it is not, then there are probably only a few different errors which
                            // this should pick up.
                            errorMessageSample.add(error);
                            errorMessagesSoFar++;
                        }
                    }
                    else {
                        String message = (status != null ?
                                String.format("[%s] returned %s(%s) - %s", response.uri(), HttpStatus.getText(status), status, prettify(error)) : prettify(error));
                        throw new EsHadoopInvalidRequest(String.format("Found unrecoverable error %s; Bailing out..", message));
                    }
                }
                else {
                    stats.bytesAccepted += data.length(entryToDeletePosition);
                    stats.docsAccepted += 1;
                    data.remove(entryToDeletePosition);
                }
            }

            int httpStatusToReport = entryToDeletePosition > 0 ? HttpStatus.SERVICE_UNAVAILABLE : HttpStatus.OK;
            return new BulkResponse(httpStatusToReport, docsSent, data.leftoversPosition(), errorMessageSample);
            // catch IO/parsing exceptions
        } catch (IOException ex) {
            throw new EsHadoopParsingException(ex);
        }
    }

    private String extractError(Map jsonMap) {
        Object err = jsonMap.get("error");
        String error = "";
        if (err != null) {
            // part of ES 2.0
            if (err instanceof Map) {
                Map m = ((Map) err);
                err = m.get("root_cause");
                if (err == null) {
                    if (m.containsKey("reason")) {
                        error = m.get("reason").toString();
                    }
                    else if (m.containsKey("caused_by")) {
                        error += ";" + ((Map) m.get("caused_by")).get("reason");
                    }
                    else {
                        error = m.toString();
                    }
                }
                else {
                    if (err instanceof List) {
                        Object nested = ((List) err).get(0);
                        if (nested instanceof Map) {
                            Map nestedM = (Map) nested;
                            if (nestedM.containsKey("reason")) {
                                error = nestedM.get("reason").toString();
                            }
                            else {
                                error = nested.toString();
                            }
                        }
                        else {
                            error = nested.toString();
                        }
                    }
                    else {
                        error = err.toString();
                    }
                }
            }
            else {
                error = err.toString();
            }
        }
        return error;
    }

    private String prettify(String error) {
        if (internalVersion.onOrAfter(EsMajorVersion.V_2_X)) {
            return error;
        }

        String invalidFragment = ErrorUtils.extractInvalidXContent(error);
        String header = (invalidFragment != null ? "Invalid JSON fragment received[" + invalidFragment + "]" : "");
        return header + "[" + error + "]";
    }

    private String prettify(String error, ByteSequence body) {
        if (internalVersion.onOrAfter(EsMajorVersion.V_2_X)) {
            return error;
        }
        String message = ErrorUtils.extractJsonParse(error, body);
        return (message != null ? error + "; fragment[" + message + "]" : error);
    }

    public void refresh(Resource resource) {
        execute(POST, resource.refresh());
    }

    public List>> targetShards(String index, String routing) {
        List>> shardsJson = null;

        // https://github.com/elasticsearch/elasticsearch/issues/2726
        String target = index + "/_search_shards";
        if (routing != null) {
            target += "?routing=" + StringUtils.encodeQuery(routing);
        }
        if (indexReadMissingAsEmpty) {
            Request req = new SimpleRequest(GET, null, target);
            Response res = executeNotFoundAllowed(req);
            if (res.status() == HttpStatus.OK) {
                shardsJson = parseContent(res.body(), "shards");
            }
            else {
                shardsJson = Collections.emptyList();
            }
        }
        else {
            shardsJson = get(target, "shards");
        }

        return shardsJson;
    }

    @SuppressWarnings("unchecked")
    public Map getMapping(String query) {
        return (Map) get(query, null);
    }

    public Map sampleForFields(String indexAndType, Collection fields) {
        if (fields == null || fields.isEmpty()) {
            return Collections.emptyMap();
        }

        StringBuilder sb = new StringBuilder();
        sb.append("{ \"terminate_after\":1, \"size\":1,\n");
        // use source since some fields might be objects
        sb.append("\"_source\": [");
        for (String field : fields) {
            sb.append(String.format(Locale.ROOT, "\"%s\",", field));
        }
        // remove trailing ,
        sb.setLength(sb.length() - 1);
        sb.append("],\n\"query\":{");

        if (internalVersion.onOrAfter(EsMajorVersion.V_2_X)) {
            sb.append("\"bool\": { \"must\":[");
        }
        else {
            sb.append("\"constant_score\":{ \"filter\": { \"and\":[");

        }
        for (String field: fields) {
            sb.append(String.format(Locale.ROOT, "\n{ \"exists\":{ \"field\":\"%s\"} },", field));
        }
        // remove trailing ,
        sb.setLength(sb.length() - 1);
        sb.append("\n]}");

        if (internalVersion.on(EsMajorVersion.V_1_X)) {
            sb.append("}");
        }

        sb.append("}}");

        Map>> hits = parseContent(execute(GET, indexAndType + "/_search", new BytesArray(sb.toString())).body(), "hits");
        List> docs = hits.get("hits");
        if (docs == null || docs.isEmpty()) {
            return Collections.emptyMap();
        }
        Map foundFields = docs.get(0);
        Map fieldInfo = (Map) foundFields.get("_source");

        return fieldInfo;
    }

    @Override
    public void close() {
        if (network != null) {
            network.close();
            stats.aggregate(network.stats());
            network = null;
        }
    }

    protected InputStream execute(Request request) {
        return execute(request, true).body();
    }

    protected InputStream execute(Method method, String path) {
        return execute(new SimpleRequest(method, null, path));
    }

    protected Response execute(Method method, String path, boolean checkStatus) {
        return execute(new SimpleRequest(method, null, path), checkStatus);
    }

    protected Response execute(Method method, String path, ByteSequence buffer) {
        return execute(new SimpleRequest(method, null, path, null, buffer), true);
    }

    protected Response execute(Method method, String path, ByteSequence buffer, boolean checkStatus) {
        return execute(new SimpleRequest(method, null, path, null, buffer), checkStatus);
    }

    protected Response execute(Request request, boolean checkStatus) {
        Response response = network.execute(request);
        if (checkStatus) {
            checkResponse(request, response);
        }
        return response;
    }

    protected Response executeNotFoundAllowed(Request req) {
        Response res = execute(req, false);
        switch (res.status()) {
        case HttpStatus.OK:
            break;
        case HttpStatus.NOT_FOUND:
            break;
        default:
            checkResponse(req, res);
        }

        return res;
    }

    private void checkResponse(Request request, Response response) {
        if (response.hasFailed()) {
            // check error first
            String msg = null;
            // try to parse the answer
            try {
                msg = extractError(this. parseContent(response.body(), null));
                if (response.isClientError()) {
                    msg = msg + "\n" + request.body();
                }
                else {
                    msg = prettify(msg, request.body());
                }
            } catch (Exception ex) {
                // can't parse message, move on
            }

            if (!StringUtils.hasText(msg)) {
                msg = String.format("[%s] on [%s] failed; server[%s] returned [%s|%s:%s]", request.method().name(),
                        request.path(), response.uri(), response.status(), response.statusDescription(),
                        IOUtils.asStringAlways(response.body()));
            }

            throw new EsHadoopInvalidRequest(msg);
        }
    }

    public InputStream scroll(String scrollId) {
        // NB: dynamically get the stats since the transport can change
        long start = network.transportStats().netTotalTime;
        try {
            // use post instead of get to avoid some weird encoding issues (caused by the long URL)
            InputStream is = execute(POST, "_search/scroll?scroll=" + scrollKeepAlive.toString(),
                    new BytesArray(scrollId)).body();
            stats.scrollTotal++;
            return is;
        } finally {
            stats.scrollTotalTime += network.transportStats().netTotalTime - start;
        }
    }

    public boolean delete(String indexOrType) {
        Request req = new SimpleRequest(DELETE, null, indexOrType);
        Response res = executeNotFoundAllowed(req);
        return (res.status() == HttpStatus.OK ? true : false);
    }
    public boolean deleteScroll(String scrollId) {
        Request req = new SimpleRequest(DELETE, null, "_search/scroll", new BytesArray(scrollId.getBytes(StringUtils.UTF_8)));
        Response res = executeNotFoundAllowed(req);
        return (res.status() == HttpStatus.OK ? true : false);
    }

    public boolean exists(String indexOrType) {
        Request req = new SimpleRequest(HEAD, null, indexOrType);
        Response res = executeNotFoundAllowed(req);

        return (res.status() == HttpStatus.OK ? true : false);
    }

    public boolean touch(String indexOrType) {
        if (!exists(indexOrType)) {
            Response response = execute(PUT, indexOrType, false);

            if (response.hasFailed()) {
                String msg = null;
                // try to parse the answer
                try {
                    msg = parseContent(response.body(), "error");
                } catch (Exception ex) {
                    // can't parse message, move on
                }

                if (StringUtils.hasText(msg) && !msg.contains("IndexAlreadyExistsException")) {
                    throw new EsHadoopIllegalStateException(msg);
                }
            }
            return response.hasSucceeded();
        }
        return false;
    }

    public long count(String indexAndType, QueryBuilder query) {
        return count(indexAndType, null, query);
    }

    public long count(String indexAndType, String shardId, QueryBuilder query) {
        return internalVersion.onOrAfter(EsMajorVersion.V_5_X) ?
                countInES5X(indexAndType, shardId, query) : countBeforeES5X(indexAndType, shardId, query);
    }

    private long countBeforeES5X(String indexAndType, String shardId, QueryBuilder query) {
        StringBuilder uri = new StringBuilder(indexAndType);
        uri.append("/_count");
        if (StringUtils.hasLength(shardId)) {
            uri.append("?preference=_shards:");
            uri.append(shardId);
        }
        Response response = execute(GET, uri.toString(), searchRequest(query));
        Number count = (Number) parseContent(response.body(), "count");
        return (count != null ? count.longValue() : -1);
    }

    private long countInES5X(String indexAndType, String shardId, QueryBuilder query) {
        StringBuilder uri = new StringBuilder(indexAndType);
        uri.append("/_search?size=0");
        if (StringUtils.hasLength(shardId)) {
            uri.append("&preference=_shards:");
            uri.append(shardId);
        }
        Response response = execute(GET, uri.toString(), searchRequest(query));
        Map content = parseContent(response.body(), "hits");
        Number count = (Number) content.get("total");
        return (count != null ? count.longValue() : -1);
    }

    static BytesArray searchRequest(QueryBuilder query) {
        FastByteArrayOutputStream out = new FastByteArrayOutputStream(256);
        JacksonJsonGenerator generator = new JacksonJsonGenerator(out);
        try {
            generator.writeBeginObject();
            generator.writeFieldName("query");
            generator.writeBeginObject();
            query.toJson(generator);
            generator.writeEndObject();
            generator.writeEndObject();
        } finally {
            generator.close();
        }
        return out.bytes();
    }

    public boolean isAlias(String query) {
        Map aliases = (Map) get(query, null);
        return (aliases.size() > 1);
    }

    public void putMapping(String index, String mapping, byte[] bytes) {
        // create index first (if needed) - it might return 403/404
        touch(index);

        execute(PUT, mapping, new BytesArray(bytes));
    }

    public EsMajorVersion remoteEsVersion() {
        Map result = get("", "version");
        if (result == null || !StringUtils.hasText(result.get("number"))) {
            throw new EsHadoopIllegalStateException("Unable to retrieve elasticsearch version.");
        }
        return EsMajorVersion.parse(result.get("number"));
    }

    public boolean health(String index, HEALTH health, TimeValue timeout) {
        StringBuilder sb = new StringBuilder("/_cluster/health/");
        sb.append(index);
        sb.append("?wait_for_status=");
        sb.append(health.name().toLowerCase(Locale.ROOT));
        sb.append("&timeout=");
        sb.append(timeout.toString());

        return (Boolean.TRUE.equals(get(sb.toString(), "timed_out")));
    }

    @Override
    public Stats stats() {
        Stats copy = new Stats(stats);
        if (network != null) {
            copy.aggregate(network.stats());
        }
        return copy;
    }

    private void countStreamStats(InputStream content) {
        if (content instanceof StatsAware) {
            stats.aggregate(((StatsAware) content).stats());
        }
    }

    public String getCurrentNode() {
        return network.currentNode();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy