All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nl.vpro.elasticsearch.highlevel.HighLevelElasticSearchIterator Maven / Gradle / Ivy

The newest version!
package nl.vpro.elasticsearch.highlevel;

import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;

import java.io.IOException;
import java.time.Duration;
import java.time.Instant;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;

import javax.management.ObjectName;

import org.apache.http.util.EntityUtils;
import org.apache.lucene.search.TotalHits;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.elasticsearch.action.search.*;
import org.elasticsearch.client.*;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.search.*;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.meeuw.math.windowed.WindowedEventRate;

import com.fasterxml.jackson.databind.JsonNode;

import nl.vpro.elasticsearch.ElasticSearchIteratorInterface;
import nl.vpro.elasticsearchclient.ElasticSearchIterator;
import nl.vpro.jackson2.Jackson2Mapper;
import nl.vpro.jmx.MBeans;
import nl.vpro.util.ThreadPools;

/**
  * A wrapper around the Elastic Search scroll interface, to expose it as a simple {@link Iterator}
 * 
{@code
 *        try (ElasticSearchIterator i = ElasticSearchIterator.searchHits(client)) {
 *        SearchSourceBuilder search = i.prepareSearchSource("pageupdates-publish");
 *        // fill your request here
 *
 *        i.start() // optional
 *
 *        i.forEachRemaining((node) -> {
 *             String url = node.get("url").textValue();
 *             if (i.getCount() % 1000 == 0) {
 *                 log.info("{}: {}", i.getCount(), url);
 *
 *             }
 *         });
 *         }
 *
 * }
* @author Michiel Meeuwissen * @since 2.22 */ @Slf4j public class HighLevelElasticSearchIterator implements ElasticSearchIteratorInterface, HighLevelElasticSearchIteratorMXBean { private static long instances = 0; @Getter private final long instance = instances++; private final Function adapt; private final RestHighLevelClient client; private SearchResponse response; @Getter private Long count = -1L; private SearchHits hits; private String scrollId; private boolean hasNext; private int i = -1; private T next; private boolean needsNext = true; private String[] indices; @Getter @Setter private String[] routing; @Getter private Instant start; @Getter private Duration duration = Duration.ofMillis(0); private SearchSourceBuilder searchSourceBuilder; private final Duration scrollContext; private Long totalSize = null; private TotalRelation totalRelation = TotalRelation.EQUAL_TO; private final Boolean requestVersion; @Getter private final WindowedEventRate rate; private final boolean closeRate; private final ObjectName objectName; @Getter @Setter private RequestOptions requestOptions; public static HighLevelElasticSearchIterator.Builder searchHitsBuilder(RestHighLevelClient client) { return HighLevelElasticSearchIterator.builder() .client(client); } public static HighLevelElasticSearchIterator searchHits(RestHighLevelClient client) { return searchHitsBuilder(client).build(); } public static HighLevelElasticSearchIterator sources(RestHighLevelClient client) { return HighLevelElasticSearchIterator.builder() .client(client) .adapt(adapterTo(JsonNode.class)) .build(); } @lombok.Builder(builderClassName = "Builder") @lombok.SneakyThrows protected HighLevelElasticSearchIterator( @lombok.NonNull RestHighLevelClient client, Function adapt, Class adaptTo, Duration scrollContext, String beanName, WindowedEventRate rateMeasurerer, List routingIds, RequestOptions requestOptions, Boolean requestVersion ) { this.adapt = adapterTo(adapt, adaptTo); this.client = client; this.scrollContext = scrollContext == null ? Duration.ofSeconds(30) : scrollContext; if (this.scrollContext.isNegative()) { throw new IllegalArgumentException(); } if (beanName != null) { objectName = MBeans.registerBean(this, instance + "-" + beanName); } else { objectName = null; } this.rate = rateMeasurerer == null ? WindowedEventRate.builder() .bucketCount(5) .bucketDuration(Duration.ofMinutes(1)) .build() : rateMeasurerer; this.closeRate = rateMeasurerer == null; this.routing = routingIds == null ? null : routingIds.toArray(new String[0]); this.requestOptions = requestOptions == null ? RequestOptions.DEFAULT : requestOptions; this.requestVersion = requestVersion; } public static Function adapterTo(Class clazz) { return searchHit -> { try { return Jackson2Mapper.getLenientInstance() .readValue(searchHit.getSourceRef().toBytesRef().bytes, clazz); } catch (Exception e) { log.warn("{}: {}", searchHit, e.getMessage()); return null; } }; } @SuppressWarnings("unchecked") private static Function adapterTo(Function adapter, Class clazz) { if (adapter != null && clazz != null) { throw new IllegalArgumentException(); } if (clazz != null) { return adapterTo(clazz); } if (adapter == null) { return searchHit -> (T) searchHit; } return adapter; } public SearchSourceBuilder prepareSearchSource(String... indices) { this.indices = indices; this.searchSourceBuilder = new SearchSourceBuilder(); return this.searchSourceBuilder; } @Override public boolean hasNext() { findNext(); return hasNext; } /** * Retrieves the first batch of results. From now on you can't modified the query any more. * This will also happen implicitely if you don't call this, but sometimes it make the code clearer when doing it explicitely. * */ public void start() { if (response != null) { throw new IllegalStateException(); } else { firstBatch(); } } protected void findNext() { if (needsNext) { synchronized (this) { long start = System.nanoTime(); try { if (response == null) { if (! firstBatch()) { return; } } i++; boolean newHasNext = i < hits.getHits().length; if (!newHasNext) { nextBatch(); } else { hasNext = true; } if (hasNext) { next = adapt.apply(hits.getHits()[i]); } else { close(); } needsNext = false; } finally { duration = duration.plusNanos(System.nanoTime() - start); } } } } @Override public float getFraction() { long total = Duration.between(start, Instant.now()).toMillis(); long es = duration.toMillis(); return (float) es / total; } protected boolean firstBatch() { if (searchSourceBuilder == null) { throw new IllegalStateException("prepareSearch not called"); } try { SearchRequest searchRequest = new SearchRequest(indices, searchSourceBuilder); if (requestVersion != null) { searchSourceBuilder.version(requestVersion); } start = Instant.now(); searchRequest.scroll(getScroll()); response = client.search(searchRequest, requestOptions); } catch (IOException ioe) { //log.error(ioe.getMessage()); throw new RuntimeException("For request " + searchSourceBuilder.toString() + ":" + ioe.getMessage(), ioe); } if (hits == null) { readResponse(); } String newScrollId = response.getScrollId(); if (newScrollId != null) { log.debug("Scroll id {} -> {}", scrollId, newScrollId); scrollId = newScrollId; SCROLL_IDS.add(scrollId); } TotalHits total = hits.getTotalHits(); totalSize = total.value; if (totalSize == 0) { hasNext = false; needsNext = false; close(); return false; } return true; } private Scroll getScroll() { return new Scroll(new TimeValue(scrollContext.toMillis(), TimeUnit.MILLISECONDS)); } private void nextBatch() { if (scrollId != null) { try { SearchScrollRequest searchScrollRequest = new SearchScrollRequest(scrollId); searchScrollRequest.scroll(getScroll()); response = client.scroll(searchScrollRequest, requestOptions); log.debug("New scroll"); String newScrollId = response.getScrollId(); if (!scrollId.equals(newScrollId)) { log.info("new scroll id {}", newScrollId); SCROLL_IDS.remove(scrollId); scrollId = newScrollId; SCROLL_IDS.add(scrollId); } readResponse(); i = 0; hasNext = hits.getHits().length > 0; } catch(ResponseException re) { log.warn(re.getMessage()); hits = null; hasNext = false; } catch (IOException ioe) { log.error(ioe.getMessage()); throw new RuntimeException("For request " + searchSourceBuilder.toString() + ":" + ioe.getMessage(), ioe); } } else { log.warn("No scroll id found, so not possible to scroll next batch"); hasNext = false; } } protected void readResponse() { hits = response.getHits(); if (hits != null) { TotalHits total = hits.getTotalHits(); totalSize = total.value; } } @Override public T next() { findNext(); if (!hasNext) { throw new NoSuchElementException(); } count++; needsNext = true; rate.newEvent(); return next; } @Override public @NonNull Optional getSize() { findNext(); return Optional.ofNullable(this.totalSize); } @Override public Optional getSizeQualifier() { findNext(); if (hits != null) { TotalHits total = hits.getTotalHits(); this.totalRelation = TotalRelation.valueOf(total.relation.name()); this.totalSize = total.value; } return Optional.ofNullable(this.totalRelation); } public SearchResponse getResponse() { findNext(); return response; } @Override public String toString() { return client + " " + searchSourceBuilder + " " + count; } @Override public void close() { if (objectName != null) { ThreadPools.backgroundExecutor.schedule(() -> MBeans.unregister(objectName), 2, TimeUnit.MINUTES); } if (closeRate) { rate.close(); } if (scrollId != null) { try { ClearScrollRequest clearScrollRequest = new ClearScrollRequest(); clearScrollRequest.addScrollId(scrollId); ClearScrollResponse clearScrollResponse = client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT); if (clearScrollResponse.isSucceeded()) { log.debug("Deleted {} {}", scrollId, clearScrollResponse); SCROLL_IDS.remove(scrollId); } else { log.warn("Something wrong deleting scroll id {} {}", scrollId, clearScrollResponse); } scrollId = null; } catch (ResponseException re) { if (re.getResponse().getStatusLine().getStatusCode() == 404) { log.debug("Not found to delete"); } else { log.warn(re.getMessage()); } EntityUtils.consumeQuietly(re.getResponse().getEntity()); } catch (Exception e) { log.warn(e.getMessage()); } } else { log.debug("no need to close"); } } @Override public double getSpeed() { return rate.getRate(); } public static class Builder extends ElasticSearchIterator.AbstractBuilder> { public Builder() { } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy