org.elasticsearch.hadoop.rest.RestRepository Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-hadoop-mr Show documentation
Show all versions of elasticsearch-hadoop-mr Show documentation
Elasticsearch Hadoop Map/Reduce
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.hadoop.rest;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.elasticsearch.hadoop.EsHadoopIllegalStateException;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.rest.bulk.BulkProcessor;
import org.elasticsearch.hadoop.rest.bulk.BulkResponse;
import org.elasticsearch.hadoop.rest.query.QueryUtils;
import org.elasticsearch.hadoop.rest.stats.Stats;
import org.elasticsearch.hadoop.rest.stats.StatsAware;
import org.elasticsearch.hadoop.serialization.ScrollReader;
import org.elasticsearch.hadoop.serialization.ScrollReader.Scroll;
import org.elasticsearch.hadoop.serialization.ScrollReaderConfigBuilder;
import org.elasticsearch.hadoop.serialization.builder.JdkValueReader;
import org.elasticsearch.hadoop.serialization.bulk.BulkCommands;
import org.elasticsearch.hadoop.serialization.bulk.BulkEntryWriter;
import org.elasticsearch.hadoop.serialization.bulk.MetadataExtractor;
import org.elasticsearch.hadoop.serialization.dto.NodeInfo;
import org.elasticsearch.hadoop.serialization.dto.ShardInfo;
import org.elasticsearch.hadoop.serialization.dto.mapping.FieldParser;
import org.elasticsearch.hadoop.serialization.dto.mapping.GeoField;
import org.elasticsearch.hadoop.serialization.dto.mapping.GeoField.GeoType;
import org.elasticsearch.hadoop.serialization.dto.mapping.Mapping;
import org.elasticsearch.hadoop.serialization.dto.mapping.MappingSet;
import org.elasticsearch.hadoop.serialization.dto.mapping.MappingUtils;
import org.elasticsearch.hadoop.serialization.handler.read.impl.AbortOnlyHandlerLoader;
import org.elasticsearch.hadoop.util.Assert;
import org.elasticsearch.hadoop.util.BytesArray;
import org.elasticsearch.hadoop.util.BytesRef;
import org.elasticsearch.hadoop.util.EsMajorVersion;
import org.elasticsearch.hadoop.util.SettingsUtils;
import org.elasticsearch.hadoop.util.StringUtils;
import org.elasticsearch.hadoop.util.unit.TimeValue;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import static org.elasticsearch.hadoop.rest.Request.Method.POST;
/**
* Rest client performing high-level operations using buffers to improve performance. Stateful in that once created, it
* is used to perform updates against the same index.
*/
public class RestRepository implements Closeable, StatsAware {
private static Log log = LogFactory.getLog(RestRepository.class);
// wrapper around existing BA (for cases where the serialization already occurred)
private BytesRef trivialBytesRef;
private boolean writeInitialized = false;
private RestClient client;
// optional extractor passed lazily to BulkCommand
private MetadataExtractor metaExtractor;
private BulkEntryWriter bulkEntryWriter;
private BulkProcessor bulkProcessor;
// Internal
private static class Resources {
private final Settings resourceSettings;
private Resource resourceRead;
private Resource resourceWrite;
public Resources(Settings resourceSettings) {
this.resourceSettings = resourceSettings;
}
public Resource getResourceRead() {
if (resourceRead == null) {
if (StringUtils.hasText(resourceSettings.getResourceRead())) {
resourceRead = new Resource(resourceSettings, true);
}
}
return resourceRead;
}
public Resource getResourceWrite() {
if (resourceWrite == null) {
if (StringUtils.hasText(resourceSettings.getResourceWrite())) {
resourceWrite = new Resource(resourceSettings, false);
}
}
return resourceWrite;
}
}
private final Settings settings;
private Resources resources;
private final Stats stats = new Stats();
public RestRepository(Settings settings) {
this.settings = settings;
this.resources = new Resources(settings);
// Check if we have a read resource first, and if not, THEN check the write resource
// The write resource has more strict parsing rules, and if the process is only reading
// with a resource that isn't good for writing, then eagerly parsing the resource as a
// write resource can erroneously throw an error. Instead, we should just get the write
// resource lazily as needed.
Assert.isTrue(resources.getResourceRead() != null || resources.getResourceWrite() != null, "Invalid configuration - No read or write resource specified");
this.client = new RestClient(settings);
}
/** postpone writing initialization since we can do only reading so there's no need to allocate buffers */
private void lazyInitWriting() {
if (!writeInitialized) {
this.writeInitialized = true;
this.bulkProcessor = new BulkProcessor(client, resources.getResourceWrite(), settings);
this.trivialBytesRef = new BytesRef();
this.bulkEntryWriter = new BulkEntryWriter(settings, BulkCommands.create(settings, metaExtractor, client.clusterInfo.getMajorVersion()));
}
}
ScrollQuery scanAll(String query, BytesArray body, ScrollReader reader) {
return scanLimit(query, body, -1, reader);
}
/**
* Returns a pageable (scan based) result to the given query.
*
* @param query scan query
* @param reader scroll reader
* @return a scroll query
*/
ScrollQuery scanLimit(String query, BytesArray body, long limit, ScrollReader reader) {
return new ScrollQuery(this, query, body, limit, reader);
}
public void addRuntimeFieldExtractor(MetadataExtractor metaExtractor) {
this.metaExtractor = metaExtractor;
}
/**
* Writes the objects to index.
*
* @param object object to add to the index
*/
public void writeToIndex(Object object) {
Assert.notNull(object, "no object data given");
lazyInitWriting();
BytesRef serialized = bulkEntryWriter.writeBulkEntry(object);
if (serialized != null) {
doWriteToIndex(serialized);
}
}
/**
* Writes the objects to index.
*
* @param ba The data as a bytes array
*/
public void writeProcessedToIndex(BytesArray ba) {
Assert.notNull(ba, "no data given");
Assert.isTrue(ba.length() > 0, "no data given");
lazyInitWriting();
trivialBytesRef.reset();
trivialBytesRef.add(ba);
doWriteToIndex(trivialBytesRef);
}
private void doWriteToIndex(BytesRef payload) {
bulkProcessor.add(payload);
payload.reset();
}
public BulkResponse tryFlush() {
Assert.isTrue(writeInitialized, "Cannot flush non-initialized write operation");
return bulkProcessor.tryFlush();
}
public void flush() {
Assert.isTrue(writeInitialized, "Cannot flush non-initialized write operation");
bulkProcessor.flush();
}
@Override
public void close() {
if (log.isDebugEnabled()) {
log.debug("Closing repository and connection to Elasticsearch ...");
}
// bail out if closed before
if (client == null) {
return;
}
try {
if (bulkProcessor != null) {
bulkProcessor.close();
// Aggregate stats before discarding them.
stats.aggregate(bulkProcessor.stats());
bulkProcessor = null;
}
if (bulkEntryWriter != null) {
bulkEntryWriter.close();
bulkEntryWriter = null;
}
} finally {
client.close();
// Aggregate stats before discarding them.
stats.aggregate(client.stats());
client = null;
}
}
public RestClient getRestClient() {
return client;
}
public List>> getReadTargetShards() {
for (int retries = 0; retries < 3; retries++) {
List>> result = doGetReadTargetShards();
if (result != null) {
return result;
}
}
throw new EsHadoopIllegalStateException("Cluster state volatile; cannot find node backing shards - please check whether your cluster is stable");
}
protected List>> doGetReadTargetShards() {
return client.targetShards(resources.getResourceRead().index(), SettingsUtils.getFixedRouting(settings));
}
public Map getWriteTargetPrimaryShards(boolean clientNodesOnly) {
for (int retries = 0; retries < 3; retries++) {
Map map = doGetWriteTargetPrimaryShards(clientNodesOnly);
if (map != null) {
return map;
}
}
throw new EsHadoopIllegalStateException("Cluster state volatile; cannot find node backing shards - please check whether your cluster is stable");
}
protected Map doGetWriteTargetPrimaryShards(boolean clientNodesOnly) {
List>> info = client.targetShards(resources.getResourceWrite().index(), SettingsUtils.getFixedRouting(settings));
Map shards = new LinkedHashMap();
List nodes = client.getHttpNodes(clientNodesOnly);
Map nodeMap = new HashMap(nodes.size());
for (NodeInfo node : nodes) {
nodeMap.put(node.getId(), node);
}
for (List