Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright 2020 Yelp Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.yelp.nrtsearch.server.luceneserver;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.MoreExecutors;
import com.yelp.nrtsearch.server.grpc.DeadlineUtils;
import com.yelp.nrtsearch.server.grpc.FacetResult;
import com.yelp.nrtsearch.server.grpc.ProfileResult;
import com.yelp.nrtsearch.server.grpc.SearchRequest;
import com.yelp.nrtsearch.server.grpc.SearchResponse;
import com.yelp.nrtsearch.server.grpc.SearchResponse.Hit;
import com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.CompositeFieldValue;
import com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.FieldValue;
import com.yelp.nrtsearch.server.grpc.SearchResponse.SearchState;
import com.yelp.nrtsearch.server.grpc.TotalHits;
import com.yelp.nrtsearch.server.luceneserver.doc.LoadedDocValues;
import com.yelp.nrtsearch.server.luceneserver.facet.DrillSidewaysImpl;
import com.yelp.nrtsearch.server.luceneserver.facet.FacetTopDocs;
import com.yelp.nrtsearch.server.luceneserver.field.BooleanFieldDef;
import com.yelp.nrtsearch.server.luceneserver.field.DateTimeFieldDef;
import com.yelp.nrtsearch.server.luceneserver.field.FieldDef;
import com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef;
import com.yelp.nrtsearch.server.luceneserver.field.RuntimeFieldDef;
import com.yelp.nrtsearch.server.luceneserver.field.VirtualFieldDef;
import com.yelp.nrtsearch.server.luceneserver.innerhit.InnerHitFetchTask;
import com.yelp.nrtsearch.server.luceneserver.rescore.RescoreTask;
import com.yelp.nrtsearch.server.luceneserver.script.RuntimeScript;
import com.yelp.nrtsearch.server.luceneserver.search.FieldFetchContext;
import com.yelp.nrtsearch.server.luceneserver.search.SearchContext;
import com.yelp.nrtsearch.server.luceneserver.search.SearchCutoffWrapper.CollectionTimeoutException;
import com.yelp.nrtsearch.server.luceneserver.search.SearchRequestProcessor;
import com.yelp.nrtsearch.server.luceneserver.search.SearcherResult;
import com.yelp.nrtsearch.server.monitoring.SearchResponseCollector;
import com.yelp.nrtsearch.server.utils.ObjectToCompositeFieldTransformer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.taxonomy.SearcherTaxonomyManager;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SearchHandler implements Handler {
private static final ExecutorService DIRECT_EXECUTOR = MoreExecutors.newDirectExecutorService();
private static final Logger logger = LoggerFactory.getLogger(SearchHandler.class);
private final ThreadPoolExecutor threadPoolExecutor;
private final boolean warming;
public SearchHandler(ThreadPoolExecutor threadPoolExecutor) {
this(threadPoolExecutor, false);
}
/**
* @param threadPoolExecutor Threadpool to execute a parallel search
* @param warming set to true if we are warming the index right now
*/
public SearchHandler(ThreadPoolExecutor threadPoolExecutor, boolean warming) {
this.threadPoolExecutor = threadPoolExecutor;
this.warming = warming;
}
@Override
public SearchResponse handle(IndexState indexState, SearchRequest searchRequest)
throws SearchHandlerException {
// this request may have been waiting in the grpc queue too long
DeadlineUtils.checkDeadline("SearchHandler: start", "SEARCH");
ShardState shardState = indexState.getShard(0);
// Index won't be started if we are currently warming
if (!warming) {
indexState.verifyStarted();
}
var diagnostics = SearchResponse.Diagnostics.newBuilder();
SearcherTaxonomyManager.SearcherAndTaxonomy s = null;
SearchContext searchContext;
try {
s =
getSearcherAndTaxonomy(
searchRequest, indexState, shardState, diagnostics, threadPoolExecutor);
ProfileResult.Builder profileResultBuilder = null;
if (searchRequest.getProfile()) {
profileResultBuilder = ProfileResult.newBuilder();
}
searchContext =
SearchRequestProcessor.buildContextForRequest(
searchRequest, indexState, shardState, s, diagnostics, profileResultBuilder);
long searchStartTime = System.nanoTime();
SearcherResult searcherResult;
if (!searchRequest.getFacetsList().isEmpty()) {
DrillDownQuery ddq = (DrillDownQuery) searchContext.getQuery();
List grpcFacetResults = new ArrayList<>();
// Run the drill sideways search on the direct executor to run subtasks in the
// current (grpc) thread. If we use the search thread pool for this, it can cause a
// deadlock trying to execute the dependent parallel search tasks. Since we do not
// currently add additional drill down definitions, there will only be one drill
// sideways task per query.
DrillSideways drillS =
new DrillSidewaysImpl(
s.searcher,
indexState.getFacetsConfig(),
s.taxonomyReader,
searchRequest.getFacetsList(),
s,
indexState,
shardState,
searchContext.getQueryFields(),
grpcFacetResults,
DIRECT_EXECUTOR,
diagnostics);
DrillSideways.ConcurrentDrillSidewaysResult concurrentDrillSidewaysResult;
try {
concurrentDrillSidewaysResult =
drillS.search(ddq, searchContext.getCollector().getWrappedManager());
} catch (RuntimeException e) {
// Searching with DrillSideways wraps exceptions in a few layers.
// Try to find if this was caused by a timeout, if so, re-wrap
// so that the top level exception is the same as when not using facets.
CollectionTimeoutException timeoutException = findTimeoutException(e);
if (timeoutException != null) {
throw new CollectionTimeoutException(timeoutException.getMessage(), e);
}
throw e;
}
searcherResult = concurrentDrillSidewaysResult.collectorResult;
searchContext.getResponseBuilder().addAllFacetResult(grpcFacetResults);
searchContext
.getResponseBuilder()
.addAllFacetResult(
FacetTopDocs.facetTopDocsSample(
searcherResult.getTopDocs(),
searchRequest.getFacetsList(),
indexState,
s.searcher,
diagnostics));
} else {
try {
searcherResult =
s.searcher.search(
searchContext.getQuery(), searchContext.getCollector().getWrappedManager());
} catch (RuntimeException e) {
CollectionTimeoutException timeoutException = findTimeoutException(e);
if (timeoutException != null) {
throw new CollectionTimeoutException(timeoutException.getMessage(), e);
}
throw e;
}
}
TopDocs hits = searcherResult.getTopDocs();
// add results from any extra collectors
searchContext
.getResponseBuilder()
.putAllCollectorResults(searcherResult.getCollectorResults());
searchContext.getResponseBuilder().setHitTimeout(searchContext.getCollector().hadTimeout());
searchContext
.getResponseBuilder()
.setTerminatedEarly(searchContext.getCollector().terminatedEarly());
diagnostics.setFirstPassSearchTimeMs(((System.nanoTime() - searchStartTime) / 1000000.0));
DeadlineUtils.checkDeadline("SearchHandler: post recall", diagnostics, "SEARCH");
// add detailed timing metrics for query execution
if (profileResultBuilder != null) {
searchContext.getCollector().maybeAddProfiling(profileResultBuilder);
}
long rescoreStartTime = System.nanoTime();
if (!searchContext.getRescorers().isEmpty()) {
for (RescoreTask rescorer : searchContext.getRescorers()) {
long startNS = System.nanoTime();
hits = rescorer.rescore(hits, searchContext);
long endNS = System.nanoTime();
diagnostics.putRescorersTimeMs(rescorer.getName(), (endNS - startNS) / 1000000.0);
DeadlineUtils.checkDeadline(
"SearchHandler: post " + rescorer.getName(), diagnostics, "SEARCH");
}
diagnostics.setRescoreTimeMs(((System.nanoTime() - rescoreStartTime) / 1000000.0));
}
long t0 = System.nanoTime();
hits = getHitsFromOffset(hits, searchContext.getStartHit(), searchContext.getTopHits());
// create Hit.Builder for each hit, and populate with lucene doc id and ranking info
setResponseHits(searchContext, hits);
// fill Hit.Builder with requested fields
fetchFields(searchContext);
SearchState.Builder searchState = SearchState.newBuilder();
searchContext.getResponseBuilder().setSearchState(searchState);
searchState.setTimestamp(searchContext.getTimestampSec());
// Record searcher version that handled this request:
searchState.setSearcherVersion(((DirectoryReader) s.searcher.getIndexReader()).getVersion());
// Fill in lastDoc for searchAfter:
if (hits.scoreDocs.length != 0) {
ScoreDoc lastHit = hits.scoreDocs[hits.scoreDocs.length - 1];
searchState.setLastDocId(lastHit.doc);
searchContext.getCollector().fillLastHit(searchState, lastHit);
}
searchContext.getResponseBuilder().setSearchState(searchState);
diagnostics.setGetFieldsTimeMs(((System.nanoTime() - t0) / 1000000.0));
if (searchContext.getFetchTasks().getHighlightFetchTask() != null) {
diagnostics.setHighlightTimeMs(
searchContext.getFetchTasks().getHighlightFetchTask().getTimeTakenMs());
}
if (searchContext.getFetchTasks().getInnerHitFetchTaskList() != null) {
diagnostics.putAllInnerHitsDiagnostics(
searchContext.getFetchTasks().getInnerHitFetchTaskList().stream()
.collect(
Collectors.toMap(
task -> task.getInnerHitContext().getInnerHitName(),
InnerHitFetchTask::getDiagnostic)));
}
searchContext.getResponseBuilder().setDiagnostics(diagnostics);
if (profileResultBuilder != null) {
searchContext.getResponseBuilder().setProfileResult(profileResultBuilder);
}
} catch (IOException | InterruptedException | ExecutionException e) {
logger.warn(e.getMessage(), e);
throw new SearchHandlerException(e);
} finally {
// NOTE: this is a little iffy, because we may not
// have obtained this searcher from the NRTManager
// (i.e. sometimes we pulled from
// SearcherLifetimeManager, other times (if
// snapshot was specified) we opened ourselves,
// but under-the-hood all these methods just call
// s.getIndexReader().decRef(), which is what release
// does:
try {
if (s != null) {
shardState.release(s);
}
} catch (IOException e) {
logger.warn("Failed to release searcher reference previously acquired by acquire()", e);
throw new SearchHandlerException(e);
}
}
// Add searchRequest to warmer if needed
try {
if (!warming && indexState.getWarmer() != null) {
indexState.getWarmer().addSearchRequest(searchRequest);
}
} catch (Exception e) {
logger.error("Unable to add warming query", e);
}
// if we are out of time, don't bother with serialization
DeadlineUtils.checkDeadline("SearchHandler: end", diagnostics, "SEARCH");
SearchResponse searchResponse = searchContext.getResponseBuilder().build();
if (!warming) {
SearchResponseCollector.updateSearchResponseMetrics(
searchResponse,
searchContext.getIndexState().getName(),
searchContext.getIndexState().getVerboseMetrics());
}
return searchResponse;
}
/**
* Fetch/compute field values for the top hits. This operation may be done in parallel, based on
* the setting for the fetch thread pool. In addition to filling hit fields, any query {@link
* com.yelp.nrtsearch.server.luceneserver.search.FetchTasks.FetchTask}s are executed.
*
* @param searchContext search parameters
* @throws IOException on error reading index data
* @throws ExecutionException on error when performing parallel fetch
* @throws InterruptedException if parallel fetch is interrupted
*/
private void fetchFields(SearchContext searchContext)
throws IOException, ExecutionException, InterruptedException {
if (searchContext.getResponseBuilder().getHitsBuilderList().isEmpty()) {
return;
}
// sort hits by lucene doc id
List hitBuilders =
new ArrayList<>(searchContext.getResponseBuilder().getHitsBuilderList());
hitBuilders.sort(Comparator.comparing(Hit.Builder::getLuceneDocId));
IndexState indexState = searchContext.getIndexState();
int fetch_thread_pool_size = indexState.getThreadPoolConfiguration().getMaxFetchThreads();
int min_parallel_fetch_num_fields =
indexState.getThreadPoolConfiguration().getMinParallelFetchNumFields();
int min_parallel_fetch_num_hits =
indexState.getThreadPoolConfiguration().getMinParallelFetchNumHits();
boolean parallelFetchByField =
indexState.getThreadPoolConfiguration().getParallelFetchByField();
if (parallelFetchByField
&& fetch_thread_pool_size > 1
&& searchContext.getRetrieveFields().keySet().size() > min_parallel_fetch_num_fields
&& hitBuilders.size() > min_parallel_fetch_num_hits) {
// Fetch fields in parallel
List leaves =
searchContext.getSearcherAndTaxonomy().searcher.getIndexReader().leaves();
List hitIdToLeaves = new ArrayList<>();
for (int hitIndex = 0; hitIndex < hitBuilders.size(); ++hitIndex) {
var hitResponse = hitBuilders.get(hitIndex);
LeafReaderContext leaf =
leaves.get(ReaderUtil.subIndex(hitResponse.getLuceneDocId(), leaves));
hitIdToLeaves.add(hitIndex, leaf);
}
List fields = new ArrayList<>(searchContext.getRetrieveFields().keySet());
// parallelism is min of fetchThreadPoolSize and fields.size() / MIN_PARALLEL_NUM_FIELDS
// round up
int parallelism =
Math.min(
fetch_thread_pool_size,
(fields.size() + min_parallel_fetch_num_fields - 1) / min_parallel_fetch_num_fields);
List> fieldsChunks =
Lists.partition(fields, (fields.size() + parallelism - 1) / parallelism);
List>>> futures = new ArrayList<>();
// Only parallel by fields here, which should work well for doc values and virtual fields
// For row based stored fields, we should do it by hit id.
// Stored fields are not widely used for NRTSearch (not recommended for memory usage)
for (List fieldsChunk : fieldsChunks) {
futures.add(
indexState
.getFetchThreadPoolExecutor()
.submit(
new FillFieldsTask(
indexState,
searchContext.getSearcherAndTaxonomy().searcher,
hitIdToLeaves,
hitBuilders,
fieldsChunk,
searchContext)));
}
for (Future>> future : futures) {
List