com.netflix.ndbench.plugin.es.EsWriter Maven / Gradle / Ivy

Go to download
/*
 *  Copyright 2016 Netflix, Inc.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */
package com.netflix.ndbench.plugin.es;

import com.netflix.ndbench.api.plugin.DataGenerator;
import org.apache.http.entity.StringEntity;
import org.apache.http.message.BasicHeader;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.RestClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Collections;
import java.util.Date;
import java.util.UUID;


/**
 * 
 * Writes instances of JSON documents returned by {@link EsUtils#createDefaultDocumentAsJson} to Elasticsearch either
 * via single  REST calls, or via the bulk write API.
 * 

 * The responsibilities of this class are to:
 * 

 * 
 * determine if a particular write operation is done via Elasticsearch's bulk API or not (using the value of
 * the isBulkWrite  boolean passed to this class's constructor)
 * 
 * 
 * 

 * immediately issue a PUT of the document to Elasticsearch if a given write is determined not to be handled by
 * bulk write.
 * 
 * 
 * 

 * maintain a thread local buffer of pending documents to be bulk written.
 * 
 * 
 * 

 * flush the pending buffer once its size equals {@link IEsConfig#getBulkWriteBatchSize()}.
 * 
 * 
 * 
 * 
 * Note that if a document is written via bulk write its isBulkWrite attribute will be "true".
 * 

 * Note that any exception thrown will be bubbled up to the driver and will result in a failed write being recorded.
 * 
 */
class EsWriter {
    private static final Logger logger = LoggerFactory.getLogger(EsWriter.class);

    private final String esIndexUrl;
    private final int bulkWriteBatchSize;
    private final String indexName;
    private final String esDocType;
    private final boolean isBulkWrite;

    private static final BasicHeader CONTENT_TYPE_HDR_JSON = new BasicHeader("Content-Type", "application/json");

    private final DataGenerator dataGenerator;
    private final int indexRollsPerDay;


    /**
     * Returns a writer whose {@link EsWriter#writeDocument } method will issue writes to 'esIndexName' and 'esDocType'.
     *
     * @param esIndexName        - index name to which writes will be targeted (with possibly appended date pattern,
     *                           as determined by {@link IEsConfig#getIndexRollsPerDay()}
     * @param esDocType          - document type (of index named esIndexName) to which writes will be targeted
     * @param isBulkWrite        - whether to perform 1 write or a batch of writes in the context of a 'writeSingle'
     *                           call
     * @param indexRollsPerDay  - a value determined by the configuration setting {@link IEsConfig#getIndexRollsPerDay()}
     * @param bulkWriteBatchSize - the size the bulk write queue must reach before a bulk write operation is performed
     *                           and the queue is flushed (ignored if isBulkWrite = false, but nevertheless
     *                           cannot be less than or equal to zero.)
     * @param dataGenerator      - data generator used to inject random values into documents written to Elasticsearch.
     */
    EsWriter(String esIndexName,
             String esDocType,
             boolean isBulkWrite,
             int indexRollsPerDay,
             int bulkWriteBatchSize,
             DataGenerator dataGenerator) {
        if (bulkWriteBatchSize < 0) {
            throw new IllegalArgumentException("bulkWriteBatchSize  cannot be less than to zero");
        }
        if (!isBulkWrite && indexRollsPerDay > 0) {
            throw new IllegalArgumentException(
                    "getIndexRollsPerDay fast property only makes sense to be set when isBulkWrite is set ");
        }

        this.esIndexUrl = esIndexName + "/" + esDocType;
        this.esDocType = esDocType;
        this.indexName = esIndexName;
        this.bulkWriteBatchSize = bulkWriteBatchSize;
        this.isBulkWrite = isBulkWrite;
        this.indexRollsPerDay = indexRollsPerDay;
        this.dataGenerator = dataGenerator;
    }


    /**
     * Issues writes to esIndexName' and esDocType given 'restClient' (which determines the host/port of the
     * Elasticsearch cluster to write to.)
     */
    WriteResult writeDocument(RestClient restClient,
                              String key,
                              Boolean randomizeKeys) throws Exception {
        if (isBulkWrite) {
            writeBatchSizeWorthOfDocs(restClient, key, randomizeKeys);
        } else {
            writeSingleDoc(restClient, key, randomizeKeys);
        }

        return WriteResult.PROVISIONAL_RESULT_THAT_ASSUMES_ALL_WENT_WELL;
    }

    private void writeSingleDoc(RestClient restClient,
                                String key,
                                Boolean randomizeKeys) throws IOException {
        String randomizedKey = key + (randomizeKeys ? UUID.randomUUID().toString() : "");
        String url = "/" + esIndexUrl + "/" + randomizedKey;
        String doc = EsUtils.createDefaultDocumentAsJson(dataGenerator, false);
        Response response =
                restClient.performRequest(
                        "PUT",
                        url,
                        Collections.emptyMap(),
                        new StringEntity(doc),
                        CONTENT_TYPE_HDR_JSON);
        logger.debug(
                "write of doc with key={} to index={} resulted in response: {}", randomizedKey, indexName, response);

        int responseCode = response.getStatusLine().getStatusCode();
        if (responseCode != 200 && responseCode != 201) {
            throw new RuntimeException("write operation failed [" + url + "]. response: " + response);
        }
    }

    private void writeBatchSizeWorthOfDocs(RestClient restClient,
                                           String key,
                                           Boolean randomizeKeys) throws IOException {
        StringBuilder stringBuilder = new StringBuilder();
        String indexName = constructIndexName(this.indexName, indexRollsPerDay, new Date());
        for (int i = 0; i < bulkWriteBatchSize; i++) {
            String doc = EsUtils.createDefaultDocumentAsJson(dataGenerator, true);
            String randomizedKey = key + (randomizeKeys ? UUID.randomUUID().toString() : "");
            stringBuilder.append(jsonForAddingDoc(randomizedKey, doc, indexName));
            stringBuilder.append("\n");
        }
        String json = stringBuilder.toString();
        Response response = restClient.performRequest("POST", "/_bulk", Collections.emptyMap(), new StringEntity(json), CONTENT_TYPE_HDR_JSON);
        if (logger.isTraceEnabled()) {
            logger.trace("got response: {} after sending bulk write payload of: {}", response, json);
        } else {
            logger.debug("GOT response: {} after sending bulk write payload", response);
        }
    }


    private String jsonForAddingDoc(String key, String doc, String indexName) {
        String metadata = String.format(
                "{ \"index\" : { \"_index\" : \"%s\", \"_type\" : \"%s\", \"_id\" : \"%s\" } }",
                indexName,
                esDocType,
                key);
        String retval = metadata + "\n" + doc;           // yes. could do in format, but this is clearer
        logger.trace("bulk write payload for one doc: {}", retval);
        return retval;
    }


    /**
     * methods below are package scoped to facilitate unit testing
     */
    static String constructIndexName(String indexName, int indexRollsPerDay, Date date) {
        if (indexRollsPerDay > 0) {
            ZonedDateTime zdt = ZonedDateTime.ofInstant(date.toInstant(), ZoneId.of("UTC"));

            int minutesPerRoll = 1440 / indexRollsPerDay;
            int minutesElapsedSinceStartOfDay = zdt.getHour() * 60 + zdt.getMinute();
            int nthRoll = minutesElapsedSinceStartOfDay  / minutesPerRoll;
            String retval = String.format("%s-%s.%04d", indexName, zdt.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")), nthRoll);
            logger.debug("constructIndexName from rolls per day = {} gives: {}", indexRollsPerDay, retval);
            return retval;
        } else {
            return indexName;
        }
    }
}