com.vmware.xenon.services.common.LuceneDocumentIndexService Maven / Gradle / Ivy
/*
* Copyright (c) 2014-2015 VMware, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, without warranties or
* conditions of any kind, EITHER EXPRESS OR IMPLIED. See the License for the
* specific language governing permissions and limitations under the License.
*/
package com.vmware.xenon.services.common;
import static com.vmware.xenon.services.common.LuceneIndexDocumentHelper.GROUP_BY_PROPERTY_NAME_SUFFIX;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.Semaphore;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.stream.Stream;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene60.Lucene60FieldInfosFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexUpgrader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.GroupingSearch;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import com.vmware.xenon.common.FileUtils;
import com.vmware.xenon.common.NamedThreadFactory;
import com.vmware.xenon.common.NodeSelectorService.SelectOwnerResponse;
import com.vmware.xenon.common.Operation;
import com.vmware.xenon.common.Operation.AuthorizationContext;
import com.vmware.xenon.common.Operation.CompletionHandler;
import com.vmware.xenon.common.OperationContext;
import com.vmware.xenon.common.QueryFilterUtils;
import com.vmware.xenon.common.ReflectionUtils;
import com.vmware.xenon.common.RoundRobinOperationQueue;
import com.vmware.xenon.common.Service;
import com.vmware.xenon.common.ServiceDocument;
import com.vmware.xenon.common.ServiceDocumentDescription;
import com.vmware.xenon.common.ServiceDocumentDescription.DocumentIndexingOption;
import com.vmware.xenon.common.ServiceDocumentQueryResult;
import com.vmware.xenon.common.ServiceHost.ServiceHostState.MemoryLimitType;
import com.vmware.xenon.common.ServiceStatUtils;
import com.vmware.xenon.common.ServiceStats.ServiceStat;
import com.vmware.xenon.common.ServiceStats.TimeSeriesStats.AggregationType;
import com.vmware.xenon.common.StatelessService;
import com.vmware.xenon.common.TaskState.TaskStage;
import com.vmware.xenon.common.UriUtils;
import com.vmware.xenon.common.Utils;
import com.vmware.xenon.common.opentracing.TracingExecutor;
import com.vmware.xenon.common.serialization.GsonSerializers;
import com.vmware.xenon.common.serialization.KryoSerializers;
import com.vmware.xenon.services.common.QueryFilter.QueryFilterException;
import com.vmware.xenon.services.common.QueryPageService.LuceneQueryPage;
import com.vmware.xenon.services.common.QueryTask.QuerySpecification;
import com.vmware.xenon.services.common.QueryTask.QuerySpecification.QueryOption;
import com.vmware.xenon.services.common.QueryTask.QuerySpecification.QueryRuntimeContext;
import com.vmware.xenon.services.common.QueryTask.QueryTerm.MatchType;
import com.vmware.xenon.services.common.ServiceHostManagementService.BackupType;
public class LuceneDocumentIndexService extends StatelessService {
public static final String SELF_LINK = ServiceUriPaths.CORE_DOCUMENT_INDEX;
public static final String PROPERTY_NAME_QUERY_THREAD_COUNT = Utils.PROPERTY_NAME_PREFIX
+ LuceneDocumentIndexService.class.getSimpleName()
+ ".QUERY_THREAD_COUNT";
public static final int QUERY_THREAD_COUNT = Integer.getInteger(
PROPERTY_NAME_QUERY_THREAD_COUNT,
Utils.DEFAULT_THREAD_COUNT * 2);
public static final String PROPERTY_NAME_UPDATE_THREAD_COUNT = Utils.PROPERTY_NAME_PREFIX
+ LuceneDocumentIndexService.class.getSimpleName()
+ ".UPDATE_THREAD_COUNT";
public static final int UPDATE_THREAD_COUNT = Integer.getInteger(
PROPERTY_NAME_UPDATE_THREAD_COUNT,
Utils.DEFAULT_THREAD_COUNT / 2);
public static final String PROPERTY_NAME_QUERY_QUEUE_DEPTH = Utils.PROPERTY_NAME_PREFIX
+ "LuceneDocumentIndexService.queryQueueDepth";
public static final int QUERY_QUEUE_DEPTH = Integer.getInteger(
PROPERTY_NAME_QUERY_QUEUE_DEPTH,
10 * Service.OPERATION_QUEUE_DEFAULT_LIMIT
);
public static final String PROPERTY_NAME_UPDATE_QUEUE_DEPTH = Utils.PROPERTY_NAME_PREFIX
+ "LuceneDocumentIndexService.updateQueueDepth";
public static final int UPDATE_QUEUE_DEPTH = Integer.getInteger(
PROPERTY_NAME_UPDATE_QUEUE_DEPTH,
10 * Service.OPERATION_QUEUE_DEFAULT_LIMIT
);
public static final String FILE_PATH_LUCENE = "lucene";
public static final int DEFAULT_INDEX_FILE_COUNT_THRESHOLD_FOR_WRITER_REFRESH = 10000;
public static final int DEFAULT_INDEX_SEARCHER_COUNT_THRESHOLD = 200;
public static final int DEFAULT_QUERY_RESULT_LIMIT = 10000;
public static final int DEFAULT_QUERY_PAGE_RESULT_LIMIT = 10000;
public static final int DEFAULT_EXPIRED_DOCUMENT_SEARCH_THRESHOLD = 10000;
public static final int DEFAULT_METADATA_UPDATE_MAX_QUEUE_DEPTH = 10000;
public static final long DEFAULT_PAGINATED_SEARCHER_EXPIRATION_DELAY = TimeUnit.SECONDS.toMicros(1);
private static final String DOCUMENTS_WITHOUT_RESULTS = "DocumentsWithoutResults";
/**
* Try to find a reusable searcher this many times.
*/
private static final int SEARCHER_REUSE_MAX_ATTEMPTS = 50;
protected String indexDirectory;
private static int expiredDocumentSearchThreshold = 1000;
private static int indexFileCountThresholdForWriterRefresh = DEFAULT_INDEX_FILE_COUNT_THRESHOLD_FOR_WRITER_REFRESH;
private static int versionRetentionBulkCleanupThreshold = 10000;
private static int versionRetentionServiceThreshold = 100;
private static int queryResultLimit = DEFAULT_QUERY_RESULT_LIMIT;
private static int queryPageResultLimit = DEFAULT_QUERY_PAGE_RESULT_LIMIT;
private static long searcherRefreshIntervalMicros = 0;
private static int metadataUpdateMaxQueueDepth = DEFAULT_METADATA_UPDATE_MAX_QUEUE_DEPTH;
private final Runnable queryTaskHandler = this::handleQueryRequest;
private final Runnable updateRequestHandler = this::handleUpdateRequest;
public static void setImplicitQueryResultLimit(int limit) {
queryResultLimit = limit;
}
public static int getImplicitQueryResultLimit() {
return queryResultLimit;
}
public static void setImplicitQueryProcessingPageSize(int limit) {
queryPageResultLimit = limit;
}
public static int getImplicitQueryProcessingPageSize() {
return queryPageResultLimit;
}
public static void setIndexFileCountThresholdForWriterRefresh(int count) {
indexFileCountThresholdForWriterRefresh = count;
}
public static int getIndexFileCountThresholdForWriterRefresh() {
return indexFileCountThresholdForWriterRefresh;
}
public static void setExpiredDocumentSearchThreshold(int count) {
expiredDocumentSearchThreshold = count;
}
public static int getExpiredDocumentSearchThreshold() {
return expiredDocumentSearchThreshold;
}
public static void setVersionRetentionBulkCleanupThreshold(int count) {
versionRetentionBulkCleanupThreshold = count;
}
public static int getVersionRetentionBulkCleanupThreshold() {
return versionRetentionBulkCleanupThreshold;
}
public static void setVersionRetentionServiceThreshold(int count) {
versionRetentionServiceThreshold = count;
}
public static int getVersionRetentionServiceThreshold() {
return versionRetentionServiceThreshold;
}
public static long getSearcherRefreshIntervalMicros() {
return searcherRefreshIntervalMicros;
}
public static void setSearcherRefreshIntervalMicros(long interval) {
searcherRefreshIntervalMicros = interval;
}
public static void setMetadataUpdateMaxQueueDepth(int depth) {
metadataUpdateMaxQueueDepth = depth;
}
public static int getMetadataUpdateMaxQueueDepth() {
return metadataUpdateMaxQueueDepth;
}
static final String LUCENE_FIELD_NAME_BINARY_SERIALIZED_STATE = "binarySerializedState";
static final String LUCENE_FIELD_NAME_JSON_SERIALIZED_STATE = "jsonSerializedState";
public static final String STAT_NAME_ACTIVE_QUERY_FILTERS = "activeQueryFilterCount";
public static final String STAT_NAME_ACTIVE_PAGINATED_QUERIES = "activePaginatedQueryCount";
public static final String STAT_NAME_COMMIT_COUNT = "commitCount";
public static final String STAT_NAME_INDEX_LOAD_RETRY_COUNT = "indexLoadRetryCount";
public static final String STAT_NAME_COMMIT_DURATION_MICROS = "commitDurationMicros";
public static final String STAT_NAME_GROUP_QUERY_COUNT = "groupQueryCount";
public static final String STAT_NAME_QUERY_DURATION_MICROS = "queryDurationMicros";
public static final String STAT_NAME_GROUP_QUERY_DURATION_MICROS = "groupQueryDurationMicros";
public static final String STAT_NAME_QUERY_SINGLE_DURATION_MICROS = "querySingleDurationMicros";
public static final String STAT_NAME_QUERY_ALL_VERSIONS_DURATION_MICROS = "queryAllVersionsDurationMicros";
public static final String STAT_NAME_RESULT_PROCESSING_DURATION_MICROS = "resultProcessingDurationMicros";
public static final String STAT_NAME_INDEXED_FIELD_COUNT = "indexedFieldCount";
public static final String STAT_NAME_INDEXED_DOCUMENT_COUNT = "indexedDocumentCount";
public static final String STAT_NAME_FORCED_UPDATE_DOCUMENT_DELETE_COUNT = "singleVersionDocumentDeleteCount";
public static final String STAT_NAME_FIELD_COUNT_PER_DOCUMENT = "fieldCountPerDocument";
public static final String STAT_NAME_INDEXING_DURATION_MICROS = "indexingDurationMicros";
public static final String STAT_NAME_SEARCHER_UPDATE_COUNT = "indexSearcherUpdateCount";
public static final String STAT_NAME_SEARCHER_REUSE_BY_DOCUMENT_KIND_COUNT = "indexSearcherReuseByDocumentKindCount";
public static final String STAT_NAME_PAGINATED_SEARCHER_UPDATE_COUNT = "paginatedIndexSearcherUpdateCount";
public static final String STAT_NAME_PAGINATED_SEARCHER_FORCE_DELETION_COUNT = "paginatedIndexSearcherForceDeletionCount";
public static final String STAT_NAME_WRITER_ALREADY_CLOSED_EXCEPTION_COUNT = "indexWriterAlreadyClosedFailureCount";
public static final String STAT_NAME_READER_ALREADY_CLOSED_EXCEPTION_COUNT = "indexReaderAlreadyClosedFailureCount";
public static final String STAT_NAME_SERVICE_DELETE_COUNT = "serviceDeleteCount";
public static final String STAT_NAME_DOCUMENT_EXPIRATION_COUNT = "expiredDocumentCount";
public static final String STAT_NAME_DOCUMENT_EXPIRATION_FORCED_MAINTENANCE_COUNT = "expiredDocumentForcedMaintenanceCount";
public static final String STAT_NAME_METADATA_INDEXING_UPDATE_COUNT = "metadataIndexingUpdateCount";
public static final String STAT_NAME_VERSION_CACHE_LOOKUP_COUNT = "versionCacheLookupCount";
public static final String STAT_NAME_VERSION_CACHE_MISS_COUNT = "versionCacheMissCount";
public static final String STAT_NAME_VERSION_CACHE_ENTRY_COUNT = "versionCacheEntryCount";
public static final String STAT_NAME_MAINTENANCE_SEARCHER_REFRESH_DURATION_MICROS =
"maintenanceSearcherRefreshDurationMicros";
public static final String STAT_NAME_MAINTENANCE_DOCUMENT_EXPIRATION_DURATION_MICROS =
"maintenanceDocumentExpirationDurationMicros";
public static final String STAT_NAME_MAINTENANCE_VERSION_RETENTION_DURATION_MICROS =
"maintenanceVersionRetentionDurationMicros";
public static final String STAT_NAME_MAINTENANCE_METADATA_INDEXING_DURATION_MICROS =
"maintenanceMetadataIndexingDurationMicros";
public static final String STAT_NAME_DOCUMENT_KIND_QUERY_COUNT_FORMAT = "documentKindQueryCount-%s";
public static final String STAT_NAME_NON_DOCUMENT_KIND_QUERY_COUNT = "nonDocumentKindQueryCount";
public static final String STAT_NAME_SINGLE_QUERY_BY_FACTORY_COUNT_FORMAT = "singleQueryByFactoryCount-%s";
public static final String STAT_NAME_PREFIX_UPDATE_QUEUE_DEPTH = "updateQueueDepth";
public static final String STAT_NAME_FORMAT_UPDATE_QUEUE_DEPTH = STAT_NAME_PREFIX_UPDATE_QUEUE_DEPTH + "-%s";
public static final String STAT_NAME_PREFIX_QUERY_QUEUE_DEPTH = "queryQueueDepth";
public static final String STAT_NAME_FORMAT_QUERY_QUEUE_DEPTH = STAT_NAME_PREFIX_QUERY_QUEUE_DEPTH + "-%s";
private static final String STAT_NAME_MAINTENANCE_MEMORY_LIMIT_DURATION_MICROS =
"maintenanceMemoryLimitDurationMicros";
private static final String STAT_NAME_MAINTENANCE_FILE_LIMIT_REFRESH_DURATION_MICROS =
"maintenanceFileLimitRefreshDurationMicros";
static final String STAT_NAME_VERSION_RETENTION_SERVICE_COUNT = "versionRetentionServiceCount";
static final String STAT_NAME_ITERATIONS_PER_QUERY = "iterationsPerQuery";
private static final EnumSet AGGREGATION_TYPE_AVG_MAX =
EnumSet.of(AggregationType.AVG, AggregationType.MAX);
private static final EnumSet AGGREGATION_TYPE_SUM = EnumSet.of(AggregationType.SUM);
/**
* Synchronization object used to coordinate index searcher refresh
*/
protected final Object searchSync = new Object();
/**
* Synchronization object used to coordinate document metadata updates.
*/
private final Object metadataUpdateSync = new Object();
/**
* Synchronization object used to coordinate index writer update
*/
protected final Semaphore writerSync = new Semaphore(
UPDATE_THREAD_COUNT + QUERY_THREAD_COUNT);
/**
* Map of searchers per thread id. We do not use a ThreadLocal since we need visibility to this map
* from the maintenance logic
*/
protected Map searchers = new HashMap<>();
private ThreadLocal indexDocumentHelper = ThreadLocal
.withInitial(LuceneIndexDocumentHelper::new);
/**
* Searcher refresh time, per searcher (using hash code)
*/
protected Map searcherUpdateTimesMicros = new ConcurrentHashMap<>();
/**
* Searchers used for paginated query tasks.
*/
protected TreeMap paginatedSearchersByCreationTime = new TreeMap<>();
protected TreeMap> paginatedSearchersByExpirationTime = new TreeMap<>();
protected IndexWriter writer = null;
protected Map activeQueries = new ConcurrentHashMap<>();
private long writerUpdateTimeMicros;
private long writerCreationTimeMicros;
/**
* Time when memory pressure removed {@link #updatesPerLink} entries.
*/
private long serviceRemovalDetectedTimeMicros;
private final Map updatesPerLink = new HashMap<>();
private final Map liveVersionsPerLink = new HashMap<>();
private final Map immutableParentLinks = new HashMap<>();
private final Map documentKindUpdateInfo = new HashMap<>();
private final SortedSet metadataUpdates =
new TreeSet<>(Comparator.comparingLong((info) -> info.updateTimeMicros));
private final Map metadataUpdatesPerLink = new HashMap<>();
// memory pressure threshold in bytes
long updateMapMemoryLimit;
private Sort versionSort;
ExecutorService privateIndexingExecutor;
ExecutorService privateQueryExecutor;
private Set fieldsToLoadIndexingIdLookup;
private Set fieldToLoadVersionLookup;
private Set fieldsToLoadNoExpand;
private Set fieldsToLoadWithExpand;
private final RoundRobinOperationQueue queryQueue = new RoundRobinOperationQueue(
"index-service-query",
Integer.getInteger(PROPERTY_NAME_QUERY_QUEUE_DEPTH, Service.OPERATION_QUEUE_DEFAULT_LIMIT));
private final RoundRobinOperationQueue updateQueue = new RoundRobinOperationQueue(
"index-service-update",
Integer.getInteger(PROPERTY_NAME_UPDATE_QUEUE_DEPTH, 10 * Service.OPERATION_QUEUE_DEFAULT_LIMIT));
private URI uri;
private FieldInfoCache fieldInfoCache;
public static class MetadataUpdateInfo {
public String selfLink;
public String kind;
public long updateTimeMicros;
}
public static class DocumentUpdateInfo {
public long updateTimeMicros;
public long version;
}
public static class PaginatedSearcherInfo {
public long creationTimeMicros;
public long expirationTimeMicros;
public boolean singleUse;
public IndexSearcher searcher;
}
public static class DeleteQueryRuntimeContextRequest extends ServiceDocument {
public QueryRuntimeContext context;
static final String KIND = Utils.buildKind(DeleteQueryRuntimeContextRequest.class);
}
/**
* NOTE: use backup API in ServiceHostManagementService instead of this class.
**/
public static class BackupRequest extends ServiceDocument {
static final String KIND = Utils.buildKind(BackupRequest.class);
}
public static class BackupResponse extends ServiceDocument {
public URI backupFile;
static final String KIND = Utils.buildKind(BackupResponse.class);
}
/**
* Special GET request/response body to retrieve lucene related info.
*
* Internal usage only mainly for backup/restore.
*/
public static class InternalDocumentIndexInfo {
public IndexWriter indexWriter;
public String indexDirectory;
public LuceneDocumentIndexService luceneIndexService;
public Semaphore writerSync;
}
/**
* NOTE: use restore API in ServiceHostManagementService instead of this class.
**/
public static class RestoreRequest extends ServiceDocument {
public URI backupFile;
public Long timeSnapshotBoundaryMicros;
static final String KIND = Utils.buildKind(RestoreRequest.class);
}
public static class MaintenanceRequest {
static final String KIND = Utils.buildKind(MaintenanceRequest.class);
}
/**
* Used for lucene commit notification.
*/
public static class CommitInfo {
public static final String KIND = Utils.buildKind(CommitInfo.class);
public String kind = CommitInfo.KIND;
/**
* Result of lucene commit.
*
* From {@link IndexWriter#commit()}:
*
* If nothing was committed, because there were no
* pending changes, this returns -1. Otherwise, it returns
* the sequence number such that all indexing operations
* prior to this sequence will be included in the commit
* point, and all other operations will not.
*
* The sequence number
* of the last operation in the commit. All sequence numbers <= this value
* will be reflected in the commit, and all others will not.
*/
public long sequenceNumber;
}
public LuceneDocumentIndexService() {
this(FILE_PATH_LUCENE);
}
public LuceneDocumentIndexService(String indexDirectory) {
super(ServiceDocument.class);
super.toggleOption(ServiceOption.CORE, true);
super.toggleOption(ServiceOption.PERIODIC_MAINTENANCE, true);
this.indexDirectory = indexDirectory;
}
private boolean isDurable() {
return this.indexDirectory != null;
}
@Override
public void handleStart(final Operation post) {
super.setMaintenanceIntervalMicros(getHost().getMaintenanceIntervalMicros() * 5);
// index service getUri() will be invoked on every load and save call for every operation,
// so its worth caching (plus we only have a very small number of index services
this.uri = super.getUri();
ExecutorService es = new ThreadPoolExecutor(QUERY_THREAD_COUNT, QUERY_THREAD_COUNT,
1, TimeUnit.MINUTES,
new ArrayBlockingQueue<>(QUERY_QUEUE_DEPTH),
new NamedThreadFactory(getUri() + "/queries"));
this.privateQueryExecutor = TracingExecutor.create(es, this.getHost().getTracer());
es = new ThreadPoolExecutor(UPDATE_THREAD_COUNT, UPDATE_THREAD_COUNT,
1, TimeUnit.MINUTES,
new ArrayBlockingQueue<>(UPDATE_QUEUE_DEPTH),
new NamedThreadFactory(getUri() + "/updates"));
this.privateIndexingExecutor = TracingExecutor.create(es, this.getHost().getTracer());
initializeInstance();
if (isDurable()) {
// create durable index writer
File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory);
for (int retryCount = 0; retryCount < 2; retryCount++) {
try {
createWriter(directory, true);
// we do not actually know if the index is OK, until we try to query
doSelfValidationQuery();
if (retryCount == 1) {
logInfo("Retry to create index writer was successful");
}
break;
} catch (Exception e) {
adjustStat(STAT_NAME_INDEX_LOAD_RETRY_COUNT, 1);
if (retryCount < 1) {
logWarning("Failure creating index writer: %s, will retry",
Utils.toString(e));
close(this.writer);
archiveCorruptIndexFiles(directory);
continue;
}
logWarning("Failure creating index writer: %s", Utils.toString(e));
post.fail(e);
return;
}
}
} else {
// create RAM based index writer
try {
createWriter(null, false);
} catch (Exception e) {
logSevere(e);
post.fail(e);
return;
}
}
initializeStats();
post.complete();
}
private void initializeInstance() {
this.liveVersionsPerLink.clear();
this.updatesPerLink.clear();
this.searcherUpdateTimesMicros.clear();
this.paginatedSearchersByCreationTime.clear();
this.paginatedSearchersByExpirationTime.clear();
this.versionSort = new Sort(new SortedNumericSortField(ServiceDocument.FIELD_NAME_VERSION,
SortField.Type.LONG, true));
this.fieldsToLoadIndexingIdLookup = new HashSet<>();
this.fieldsToLoadIndexingIdLookup.add(ServiceDocument.FIELD_NAME_VERSION);
this.fieldsToLoadIndexingIdLookup.add(ServiceDocument.FIELD_NAME_UPDATE_ACTION);
this.fieldsToLoadIndexingIdLookup.add(LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_ID);
this.fieldsToLoadIndexingIdLookup.add(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS);
this.fieldsToLoadIndexingIdLookup.add(LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_METADATA_VALUE_TOMBSTONE_TIME);
this.fieldToLoadVersionLookup = new HashSet<>();
this.fieldToLoadVersionLookup.add(ServiceDocument.FIELD_NAME_VERSION);
this.fieldToLoadVersionLookup.add(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS);
this.fieldsToLoadNoExpand = new HashSet<>();
this.fieldsToLoadNoExpand.add(ServiceDocument.FIELD_NAME_SELF_LINK);
this.fieldsToLoadNoExpand.add(ServiceDocument.FIELD_NAME_VERSION);
this.fieldsToLoadNoExpand.add(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS);
this.fieldsToLoadNoExpand.add(ServiceDocument.FIELD_NAME_UPDATE_ACTION);
this.fieldsToLoadWithExpand = new HashSet<>(this.fieldsToLoadNoExpand);
this.fieldsToLoadWithExpand.add(ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS);
this.fieldsToLoadWithExpand.add(LUCENE_FIELD_NAME_BINARY_SERIALIZED_STATE);
}
private void initializeStats() {
IndexWriter w = this.writer;
setTimeSeriesStat(STAT_NAME_INDEXED_DOCUMENT_COUNT, AGGREGATION_TYPE_SUM,
w != null ? w.numDocs() : 0);
// simple estimate on field count, just so our first bin does not have a completely bogus
// number
setTimeSeriesStat(STAT_NAME_INDEXED_FIELD_COUNT, AGGREGATION_TYPE_SUM,
w != null ? w.numDocs() * 10 : 0);
}
private void setTimeSeriesStat(String name, EnumSet type, double v) {
if (!this.hasOption(ServiceOption.INSTRUMENTATION)) {
return;
}
ServiceStat dayStat = ServiceStatUtils.getOrCreateDailyTimeSeriesStat(this, name, type);
this.setStat(dayStat, v);
ServiceStat hourStat = ServiceStatUtils.getOrCreateHourlyTimeSeriesStat(this, name, type);
this.setStat(hourStat, v);
}
private void adjustTimeSeriesStat(String name, EnumSet type, double delta) {
if (!this.hasOption(ServiceOption.INSTRUMENTATION)) {
return;
}
ServiceStat dayStat = ServiceStatUtils.getOrCreateDailyTimeSeriesStat(this, name, type);
this.adjustStat(dayStat, delta);
ServiceStat hourStat = ServiceStatUtils.getOrCreateHourlyTimeSeriesStat(this, name, type);
this.adjustStat(hourStat, delta);
}
private void setTimeSeriesHistogramStat(String name, EnumSet type, double v) {
if (!this.hasOption(ServiceOption.INSTRUMENTATION)) {
return;
}
ServiceStat dayStat = ServiceStatUtils.getOrCreateDailyTimeSeriesHistogramStat(this, name, type);
this.setStat(dayStat, v);
ServiceStat hourStat = ServiceStatUtils.getOrCreateHourlyTimeSeriesHistogramStat(this, name, type);
this.setStat(hourStat, v);
}
private String getQueryStatName(QueryTask.Query query) {
if (query.term != null) {
if (query.term.propertyName.equals(ServiceDocument.FIELD_NAME_KIND)) {
return String.format(STAT_NAME_DOCUMENT_KIND_QUERY_COUNT_FORMAT, query.term.matchValue);
}
return STAT_NAME_NON_DOCUMENT_KIND_QUERY_COUNT;
}
StringBuilder kindSb = new StringBuilder();
for (QueryTask.Query clause : query.booleanClauses) {
if (clause.term == null || clause.term.propertyName == null || clause.term.matchValue == null) {
continue;
}
if (clause.term.propertyName.equals(ServiceDocument.FIELD_NAME_KIND)) {
if (kindSb.length() > 0) {
kindSb.append(", ");
}
kindSb.append(clause.term.matchValue);
}
}
if (kindSb.length() > 0) {
return String.format(STAT_NAME_DOCUMENT_KIND_QUERY_COUNT_FORMAT, kindSb.toString());
}
return STAT_NAME_NON_DOCUMENT_KIND_QUERY_COUNT;
}
public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception {
Directory luceneDirectory = directory != null ? MMapDirectory.open(directory.toPath())
: new RAMDirectory();
return createWriterWithLuceneDirectory(luceneDirectory, doUpgrade);
}
IndexWriter createWriterWithLuceneDirectory(Directory dir, boolean doUpgrade) throws Exception {
Analyzer analyzer = new SimpleAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setCodec(createCodec());
Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
if (totalMBs != null) {
long cacheSizeMB = (totalMBs * 99) / 100;
cacheSizeMB = Math.max(1, cacheSizeMB);
iwc.setRAMBufferSizeMB(cacheSizeMB);
// reserve 1% of service memory budget for version cache
long memoryLimitMB = Math.max(1, totalMBs / 100);
this.updateMapMemoryLimit = memoryLimitMB * 1024 * 1024;
}
// Upgrade the index in place if necessary.
if (doUpgrade && DirectoryReader.indexExists(dir)) {
upgradeIndex(dir);
}
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(
new KeepOnlyLastCommitDeletionPolicy()));
IndexWriter w = new IndexWriter(dir, iwc);
overwriteCodecInSegmentsBeforeInitialCommit(iwc.getCodec(), w);
w.commit();
synchronized (this.searchSync) {
this.writer = w;
this.updatesPerLink.clear();
this.writerUpdateTimeMicros = Utils.getNowMicrosUtc();
this.writerCreationTimeMicros = this.writerUpdateTimeMicros;
}
return this.writer;
}
/**
* This hack is needed because segments know which codec they were persisted with.
* The {@link LuceneCodecWithFixes} declares the same name as the default code and when read back from
* disk the non-caching (original) coded will be used.
*
* Codecs are meant to be stateless so this is why there is no easy way to pass state during segment read. The
* {@link LuceneCodecWithFixes} though keeps state in a {@link FieldInfoCache}.
*
* That's why after initial load the segment are having their codec overwritten using the reflective calls below.
* This method will bail out at the first error ignoring all optimizations but will be able to read any index
* on disk, even ones saved with pre-6.0 codecs.
*
* @param codec
* @param writer
*/
private void overwriteCodecInSegmentsBeforeInitialCommit(Codec codec, IndexWriter writer) {
if (this.fieldInfoCache == null) {
return;
}
try {
Field segmentInfosF = writer.getClass().getDeclaredField("segmentInfos");
segmentInfosF.setAccessible(true);
SegmentInfos segmentInfos = (SegmentInfos) segmentInfosF.get(writer);
// must use reflection as codec can be set once only
// in this case it's OK as the replaced object is the same 99%
// and thread-safe by design
Field codecF = SegmentInfo.class.getDeclaredField("codec");
codecF.setAccessible(true);
for (SegmentCommitInfo sci : segmentInfos) {
Codec originalCodec = sci.info.getCodec();
if (originalCodec.fieldInfosFormat() instanceof Lucene60FieldInfosFormat) {
// only change it if we know how to handle it.
codecF.set(sci.info, codec);
}
}
} catch (Exception e) {
getHost().log(Level.WARNING,
"Caching of FieldInfos will not be be enabled on committed segments: %s", e);
}
}
private Codec createCodec() {
// get the default for the current Lucene version
Codec codec = Codec.getDefault();
if (!(codec.fieldInfosFormat() instanceof Lucene60FieldInfosFormat)) {
// during lucene upgrade make sure to introduce a caching version of
// the FieldInfosFormat class, similar to Lucene60FieldInfosFormatWithCache
getHost().log(Level.WARNING,
"Caching of FieldInfo will be disabled: unsupported Lucene version");
return codec;
}
this.fieldInfoCache = new FieldInfoCache();
return new LuceneCodecWithFixes(codec, this.fieldInfoCache);
}
private void upgradeIndex(Directory dir) throws IOException {
boolean doUpgrade = false;
String lastSegmentsFile = SegmentInfos.getLastCommitSegmentsFileName(dir.listAll());
SegmentInfos sis = SegmentInfos.readCommit(dir, lastSegmentsFile);
for (SegmentCommitInfo commit : sis) {
if (!commit.info.getVersion().equals(Version.LATEST)) {
logInfo("Found Index version %s", commit.info.getVersion().toString());
doUpgrade = true;
break;
}
}
if (doUpgrade) {
logInfo("Upgrading index to %s", Version.LATEST.toString());
IndexWriterConfig iwc = new IndexWriterConfig(null);
new IndexUpgrader(dir, iwc, false).upgrade();
this.writerUpdateTimeMicros = Utils.getNowMicrosUtc();
}
}
void archiveCorruptIndexFiles(File directory) {
File newDirectory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory
+ "." + Utils.getNowMicrosUtc());
try {
logWarning("Archiving corrupt index files to %s", newDirectory.toPath());
Files.createDirectory(newDirectory.toPath());
// we assume a flat directory structure for the LUCENE directory
FileUtils.moveOrDeleteFiles(directory, newDirectory, false);
} catch (IOException e) {
logWarning(e.toString());
}
}
/**
* Issues a query to verify index is healthy
*/
private void doSelfValidationQuery() throws Exception {
TermQuery tq = new TermQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, getSelfLink()));
ServiceDocumentQueryResult rsp = new ServiceDocumentQueryResult();
Operation op = Operation.createGet(getUri());
EnumSet options = EnumSet.of(QueryOption.INCLUDE_ALL_VERSIONS);
IndexSearcher s = new IndexSearcher(DirectoryReader.open(this.writer, true, true));
queryIndexPaginated(op, options, s, tq, null, Integer.MAX_VALUE, 0, null, null, rsp, null,
Utils.getNowMicrosUtc());
}
private void handleDeleteRuntimeContext(Operation op) throws Exception {
DeleteQueryRuntimeContextRequest request = (DeleteQueryRuntimeContextRequest)
op.getBodyRaw();
if (request.context == null) {
throw new IllegalArgumentException("Context cannot be null");
}
IndexSearcher nativeSearcher = (IndexSearcher) request.context.nativeSearcher;
if (nativeSearcher == null) {
throw new IllegalArgumentException("Native searcher must be present");
}
PaginatedSearcherInfo infoToRemove;
synchronized (this.searchSync) {
infoToRemove = removeSearcherInfoUnsafe(nativeSearcher);
}
if (infoToRemove == null) {
op.complete();
return;
}
try {
infoToRemove.searcher.getIndexReader().close();
} catch (Exception ignored) {
}
op.complete();
adjustTimeSeriesStat(STAT_NAME_PAGINATED_SEARCHER_FORCE_DELETION_COUNT,
AGGREGATION_TYPE_SUM, 1);
}
private PaginatedSearcherInfo removeSearcherInfoUnsafe(IndexSearcher searcher) {
PaginatedSearcherInfo infoToRemove = null;
Iterator> itr =
this.paginatedSearchersByCreationTime.entrySet().iterator();
while (itr.hasNext()) {
PaginatedSearcherInfo info = itr.next().getValue();
if (info.searcher.equals(searcher)) {
if (!info.singleUse) {
throw new IllegalStateException(
"Context deletion is supported only for SINGLE_USE queries");
}
infoToRemove = info;
itr.remove();
break;
}
}
if (infoToRemove == null) {
return null;
}
long expirationTime = infoToRemove.expirationTimeMicros;
List expirationList =
this.paginatedSearchersByExpirationTime.get(expirationTime);
expirationList.remove(infoToRemove);
if (expirationList.isEmpty()) {
this.paginatedSearchersByExpirationTime.remove(expirationTime);
}
this.searcherUpdateTimesMicros.remove(infoToRemove.searcher.hashCode());
return infoToRemove;
}
private void handleBackup(Operation op) throws Exception {
if (!isDurable()) {
op.fail(new IllegalStateException("Index service is not durable"));
return;
}
// Delegate to LuceneDocumentIndexBackupService
logWarning("Please use backup feature from %s.", ServiceHostManagementService.class);
String outFileName = this.indexDirectory + "-" + Utils.getNowMicrosUtc();
Path zipFilePath = Files.createTempFile(outFileName, ".zip");
ServiceHostManagementService.BackupRequest backupRequest = new ServiceHostManagementService.BackupRequest();
backupRequest.kind = ServiceHostManagementService.BackupRequest.KIND;
backupRequest.backupType = BackupType.ZIP;
backupRequest.destination = zipFilePath.toUri();
// delegate backup to backup service
Operation patch = Operation.createPatch(this, ServiceUriPaths.CORE_DOCUMENT_INDEX_BACKUP)
.transferRequestHeadersFrom(op)
.transferRefererFrom(op)
.setBody(backupRequest)
.setCompletion((o, e) -> {
if (e != null) {
op.fail(e);
return;
}
BackupResponse response = new BackupResponse();
response.backupFile = backupRequest.destination;
op.transferResponseHeadersFrom(o);
op.setBodyNoCloning(response);
op.complete();
});
sendRequest(patch);
}
private void handleRestore(Operation op) {
if (!isDurable()) {
op.fail(new IllegalStateException("Index service is not durable"));
return;
}
// Delegate to LuceneDocumentIndexBackupService
logWarning("Please use restore feature from %s.", ServiceHostManagementService.class);
RestoreRequest req = op.getBody(RestoreRequest.class);
ServiceHostManagementService.RestoreRequest restoreRequest = new ServiceHostManagementService.RestoreRequest();
restoreRequest.kind = ServiceHostManagementService.RestoreRequest.KIND;
restoreRequest.destination = req.backupFile;
restoreRequest.timeSnapshotBoundaryMicros = req.timeSnapshotBoundaryMicros;
// delegate restore to backup service
Operation patch = Operation.createPatch(this, ServiceUriPaths.CORE_DOCUMENT_INDEX_BACKUP)
.transferRequestHeadersFrom(op)
.transferRefererFrom(op)
.setBody(restoreRequest)
.setCompletion((o, e) -> {
if (e != null) {
op.fail(e);
return;
}
op.transferResponseHeadersFrom(o);
op.complete();
});
sendRequest(patch);
}
@Override
public void authorizeRequest(Operation op) {
op.complete();
}
@Override
public void handleRequest(Operation op) {
Action a = op.getAction();
if (a == Action.PUT) {
Operation.failActionNotSupported(op);
return;
}
if (a == Action.PATCH && op.isRemote()) {
// PATCH is reserved for in-process QueryTaskService
Operation.failActionNotSupported(op);
return;
}
try {
if (a == Action.GET || a == Action.PATCH) {
if (offerQueryOperation(op)) {
this.privateQueryExecutor.submit(this.queryTaskHandler);
}
} else {
if (offerUpdateOperation(op)) {
this.privateIndexingExecutor.submit(this.updateRequestHandler);
}
}
} catch (RejectedExecutionException e) {
op.fail(e);
}
}
private void handleQueryRequest() {
Operation op = pollQueryOperation();
if (op == null) {
return;
}
if (op.getExpirationMicrosUtc() > 0 && op.getExpirationMicrosUtc() < Utils.getSystemNowMicrosUtc()) {
op.fail(new RejectedExecutionException("Operation has expired"));
return;
}
OperationContext originalContext = OperationContext.getOperationContext();
try {
this.writerSync.acquire();
OperationContext.setFrom(op);
switch (op.getAction()) {
case GET:
// handle special GET request. Internal call only. Currently from backup/restore services.
if (!op.isRemote() && op.hasBody() && op.getBodyRaw() instanceof InternalDocumentIndexInfo) {
InternalDocumentIndexInfo response = new InternalDocumentIndexInfo();
response.indexWriter = this.writer;
response.indexDirectory = this.indexDirectory;
response.luceneIndexService = this;
response.writerSync = this.writerSync;
op.setBodyNoCloning(response).complete();
} else {
handleGetImpl(op);
}
break;
case PATCH:
ServiceDocument sd = (ServiceDocument) op.getBodyRaw();
if (sd.documentKind != null) {
if (sd.documentKind.equals(QueryTask.KIND)) {
QueryTask task = (QueryTask) sd;
handleQueryTaskPatch(op, task);
break;
}
if (sd.documentKind.equals(DeleteQueryRuntimeContextRequest.KIND)) {
handleDeleteRuntimeContext(op);
break;
}
if (sd.documentKind.equals(BackupRequest.KIND)) {
handleBackup(op);
break;
}
if (sd.documentKind.equals(RestoreRequest.KIND)) {
handleRestore(op);
break;
}
}
Operation.failActionNotSupported(op);
break;
default:
break;
}
} catch (Exception e) {
checkFailureAndRecover(e);
op.fail(e);
} finally {
OperationContext.setFrom(originalContext);
this.writerSync.release();
}
}
private void handleUpdateRequest() {
Operation op = pollUpdateOperation();
if (op == null) {
return;
}
OperationContext originalContext = OperationContext.getOperationContext();
try {
this.writerSync.acquire();
OperationContext.setFrom(op);
switch (op.getAction()) {
case DELETE:
handleDeleteImpl(op);
break;
case POST:
Object o = op.getBodyRaw();
if (o != null) {
if (o instanceof UpdateIndexRequest) {
updateIndex(op);
break;
}
if (o instanceof MaintenanceRequest) {
handleMaintenanceImpl(op);
break;
}
}
Operation.failActionNotSupported(op);
break;
default:
break;
}
} catch (Exception e) {
checkFailureAndRecover(e);
op.fail(e);
} finally {
OperationContext.setFrom(originalContext);
this.writerSync.release();
}
}
private void handleQueryTaskPatch(Operation op, QueryTask task) throws Exception {
QueryTask.QuerySpecification qs = task.querySpec;
Query luceneQuery = (Query) qs.context.nativeQuery;
Sort luceneSort = (Sort) qs.context.nativeSort;
if (luceneQuery == null) {
luceneQuery = LuceneQueryConverter.convert(task.querySpec.query, qs.context);
if (qs.options.contains(QueryOption.TIME_SNAPSHOT)) {
Query latestDocumentClause = LongPoint.newRangeQuery(
ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, 0,
qs.timeSnapshotBoundaryMicros);
luceneQuery = new BooleanQuery.Builder()
.add(latestDocumentClause, Occur.MUST)
.add(luceneQuery, Occur.FILTER).build();
}
qs.context.nativeQuery = luceneQuery;
}
if (luceneSort == null && task.querySpec.options != null
&& task.querySpec.options.contains(QuerySpecification.QueryOption.SORT)) {
luceneSort = LuceneQueryConverter.convertToLuceneSort(task.querySpec, false);
task.querySpec.context.nativeSort = luceneSort;
}
if (qs.options.contains(QueryOption.CONTINUOUS)) {
if (handleContinuousQueryTaskPatch(op, task, qs)) {
return;
}
// intentional fall through for tasks just starting and need to execute a query
}
if (qs.options.contains(QueryOption.GROUP_BY)) {
handleGroupByQueryTaskPatch(op, task);
return;
}
LuceneQueryPage lucenePage = (LuceneQueryPage) qs.context.nativePage;
IndexSearcher s = (IndexSearcher) qs.context.nativeSearcher;
ServiceDocumentQueryResult rsp = new ServiceDocumentQueryResult();
if (s == null && qs.resultLimit != null && qs.resultLimit > 0
&& qs.resultLimit != Integer.MAX_VALUE
&& !qs.options.contains(QueryOption.TOP_RESULTS)) {
// this is a paginated query. If this is the start of the query, create a dedicated searcher
// for this query and all its pages. It will be expired when the query task itself expires.
// Since expiration of QueryPageService and index-searcher uses different mechanism, to guarantee
// that index-searcher still exists when QueryPageService expired, add some delay for searcher
// expiration time.
Set documentKind = qs.context.kindScope;
// avoid overflow
boolean addDelay = task.documentExpirationTimeMicros < Long.MAX_VALUE - DEFAULT_PAGINATED_SEARCHER_EXPIRATION_DELAY;
long expiration = addDelay ?
(task.documentExpirationTimeMicros + DEFAULT_PAGINATED_SEARCHER_EXPIRATION_DELAY) :
task.documentExpirationTimeMicros;
s = createOrUpdatePaginatedQuerySearcher(expiration, this.writer, documentKind, qs.options);
}
if (!queryIndex(s, op, null, qs.options, luceneQuery, lucenePage,
qs.resultLimit,
task.documentExpirationTimeMicros, task.indexLink, task.nodeSelectorLink, rsp, qs)) {
op.setBodyNoCloning(rsp).complete();
}
}
private boolean handleContinuousQueryTaskPatch(Operation op, QueryTask task,
QueryTask.QuerySpecification qs) throws QueryFilterException {
switch (task.taskInfo.stage) {
case CREATED:
logWarning("Task %s is in invalid state: %s", task.taskInfo.stage);
op.fail(new IllegalStateException("Stage not supported"));
return true;
case STARTED:
QueryTask clonedTask = new QueryTask();
clonedTask.documentSelfLink = task.documentSelfLink;
clonedTask.querySpec = task.querySpec;
clonedTask.querySpec.context.filter = QueryFilter.create(qs.query);
clonedTask.querySpec.context.subjectLink = getSubject(op);
this.activeQueries.put(task.documentSelfLink, clonedTask);
adjustTimeSeriesStat(STAT_NAME_ACTIVE_QUERY_FILTERS, AGGREGATION_TYPE_SUM,
1);
logInfo("Activated continuous query task: %s", task.documentSelfLink);
break;
case CANCELLED:
case FAILED:
case FINISHED:
if (this.activeQueries.remove(task.documentSelfLink) != null) {
adjustTimeSeriesStat(STAT_NAME_ACTIVE_QUERY_FILTERS, AGGREGATION_TYPE_SUM,
-1);
}
op.complete();
return true;
default:
break;
}
return false;
}
private IndexSearcher createOrUpdatePaginatedQuerySearcher(long expirationMicros,
IndexWriter w, Set kindScope, EnumSet queryOptions)
throws IOException {
boolean doNotRefresh = queryOptions.contains(QueryOption.DO_NOT_REFRESH);
boolean singleUse = queryOptions.contains(QueryOption.SINGLE_USE);
if (singleUse || (!doNotRefresh && kindScope == null)) {
return createPaginatedQuerySearcher(expirationMicros, w, singleUse);
}
IndexSearcher searcher;
synchronized (this.searchSync) {
searcher = getOrUpdateExistingSearcher(expirationMicros, kindScope, doNotRefresh);
}
if (searcher != null) {
return searcher;
}
return createPaginatedQuerySearcher(expirationMicros, w, false);
}
private IndexSearcher getOrUpdateExistingSearcher(long newExpirationMicros,
Set kindScope, boolean doNotRefresh) {
if (this.paginatedSearchersByCreationTime.isEmpty()) {
return null;
}
int maxAttempts = SEARCHER_REUSE_MAX_ATTEMPTS;
PaginatedSearcherInfo info = null;
for (PaginatedSearcherInfo i : this.paginatedSearchersByCreationTime.descendingMap().values()) {
if (maxAttempts-- < 0) {
break;
}
if (i.singleUse) {
continue;
}
// check the searcher for kindScope update time
Long searcherUpdateTime = this.searcherUpdateTimesMicros.get(i.searcher.hashCode());
if (searcherUpdateTime == null) {
// under load, very rarely searcherUpdateTime may end up null
continue;
}
if (documentNeedsNewSearcher(null, kindScope, -1, searcherUpdateTime, doNotRefresh)) {
continue;
}
info = i;
break;
}
if (info == null) {
return null;
}
adjustTimeSeriesStat(STAT_NAME_SEARCHER_REUSE_BY_DOCUMENT_KIND_COUNT, AGGREGATION_TYPE_SUM, 1);
long currentExpirationMicros = info.expirationTimeMicros;
if (newExpirationMicros <= currentExpirationMicros) {
return info.searcher;
}
// update paginatedSearchersByExpirationTime with new expiration
List expirationList = this.paginatedSearchersByExpirationTime.get(
currentExpirationMicros);
if (expirationList == null || !expirationList.contains(info)) {
throw new IllegalStateException("Searcher not found in expiration list");
}
expirationList.remove(info);
if (expirationList.isEmpty()) {
this.paginatedSearchersByExpirationTime.remove(currentExpirationMicros);
}
info.expirationTimeMicros = newExpirationMicros;
// initialize the array with size = 1: unlikely that two searcher will expire
// at the same microsecond. The default size of 10 is almost never filled up.
expirationList = this.paginatedSearchersByExpirationTime.computeIfAbsent(
newExpirationMicros, _k -> new ArrayList<>(1));
expirationList.add(info);
return info.searcher;
}
private IndexSearcher createPaginatedQuerySearcher(long expirationMicros, IndexWriter w,
boolean singleUse) throws IOException {
if (w == null) {
throw new IllegalStateException("Writer not available");
}
adjustTimeSeriesStat(STAT_NAME_PAGINATED_SEARCHER_UPDATE_COUNT, AGGREGATION_TYPE_SUM, 1);
long now = Utils.getNowMicrosUtc();
IndexSearcher s = new IndexSearcher(DirectoryReader.open(w, true, true));
s.setSimilarity(s.getSimilarity(false));
PaginatedSearcherInfo info = new PaginatedSearcherInfo();
info.creationTimeMicros = now;
info.expirationTimeMicros = expirationMicros;
info.singleUse = singleUse;
info.searcher = s;
synchronized (this.searchSync) {
this.paginatedSearchersByCreationTime.put(info.creationTimeMicros, info);
List expirationList = this.paginatedSearchersByExpirationTime
.computeIfAbsent(info.expirationTimeMicros, _k -> new ArrayList<>(1));
expirationList.add(info);
this.searcherUpdateTimesMicros.put(s.hashCode(), now);
}
return s;
}
public void handleGetImpl(Operation get) throws Exception {
String selfLink = null;
Long version = null;
ServiceOption serviceOption = ServiceOption.NONE;
EnumSet options = EnumSet.noneOf(QueryOption.class);
if (get.hasPragmaDirective(Operation.PRAGMA_DIRECTIVE_INDEX_CHECK)) {
// fast path for checking if a service exists, and loading its latest state
serviceOption = ServiceOption.PERSISTENCE;
// the GET operation URI is set to the service we want to load, not the self link
// of the index service. This is only possible when the operation was directly
// dispatched from the local host, on the index service
selfLink = get.getUri().getPath();
options.add(QueryOption.INCLUDE_DELETED);
} else {
// REST API for loading service state, given a set of URI query parameters
Map params = UriUtils.parseUriQueryParams(get.getUri());
String cap = params.get(UriUtils.URI_PARAM_CAPABILITY);
if (cap != null) {
serviceOption = ServiceOption.valueOf(cap);
}
if (serviceOption == ServiceOption.IMMUTABLE) {
options.add(QueryOption.INCLUDE_ALL_VERSIONS);
serviceOption = ServiceOption.PERSISTENCE;
}
if (params.containsKey(UriUtils.URI_PARAM_INCLUDE_DELETED)) {
options.add(QueryOption.INCLUDE_DELETED);
}
if (params.containsKey(ServiceDocument.FIELD_NAME_VERSION)) {
version = Long.parseLong(params.get(ServiceDocument.FIELD_NAME_VERSION));
}
selfLink = params.get(ServiceDocument.FIELD_NAME_SELF_LINK);
String fieldToExpand = params.get(UriUtils.URI_PARAM_ODATA_EXPAND);
if (fieldToExpand == null) {
fieldToExpand = params.get(UriUtils.URI_PARAM_ODATA_EXPAND_NO_DOLLAR_SIGN);
}
if (fieldToExpand != null
&& fieldToExpand
.equals(ServiceDocumentQueryResult.FIELD_NAME_DOCUMENT_LINKS)) {
options.add(QueryOption.EXPAND_CONTENT);
}
}
if (selfLink == null) {
get.fail(new IllegalArgumentException(
ServiceDocument.FIELD_NAME_SELF_LINK + " query parameter is required"));
return;
}
if (!selfLink.endsWith(UriUtils.URI_WILDCARD_CHAR)) {
// Enforce auth check for the returning document for remote GET requests.
// This is mainly for the direct client requests to the index-service such as
// "/core/document-index?documentSelfLink=...".
// Some other core services also perform remote GET (e.g.: NodeSelectorSynchronizationService),
// but they populate appropriate auth context such as system-user.
// For non-wildcard selfLink request, auth check is performed as part of queryIndex().
if (get.isRemote() && getHost().isAuthorizationEnabled()) {
get.nestCompletion((op, ex) -> {
if (ex != null) {
get.fail(ex);
return;
}
if (get.getAuthorizationContext().isSystemUser() || !op.hasBody()) {
// when there is no matching document, we cannot evaluate the auth, thus simply complete.
get.complete();
return;
}
// evaluate whether the matched document is authorized for the user
QueryFilter queryFilter = get.getAuthorizationContext().getResourceQueryFilter(Action.GET);
if (queryFilter == null) {
// do not match anything
queryFilter = QueryFilter.FALSE;
}
// This completion handler is called right after it retrieved the document from lucene and
// deserialized it to its state type.
// Since calling "op.getBody(ServiceDocument.class)" changes(down cast) the actual document object
// to an instance of ServiceDocument, it will lose the additional data which might be required in
// authorization filters; Therefore, here uses "op.getBodyRaw()" and just cast to ServiceDocument
// which doesn't convert the document object.
ServiceDocument doc = (ServiceDocument) op.getBodyRaw();
if (!QueryFilterUtils.evaluate(queryFilter, doc, getHost())) {
get.fail(Operation.STATUS_CODE_FORBIDDEN);
return;
}
get.complete();
});
}
// Most basic query is retrieving latest document at latest version for a specific link
queryIndexSingle(selfLink, get, version);
return;
}
// Self link prefix query, returns all self links with the same prefix. A GET on a
// factory translates to this query.
int resultLimit = Integer.MAX_VALUE;
selfLink = selfLink.substring(0, selfLink.length() - 1);
Query tq = new PrefixQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, selfLink));
ServiceDocumentQueryResult rsp = new ServiceDocumentQueryResult();
rsp.documentLinks = new ArrayList<>();
if (queryIndex(null, get, selfLink, options, tq, null, resultLimit, 0, null, null, rsp,
null)) {
return;
}
if (serviceOption == ServiceOption.PERSISTENCE) {
// specific index requested but no results, return empty response
get.setBodyNoCloning(rsp).complete();
return;
}
// no results in the index, search the service host started services
queryServiceHost(selfLink + UriUtils.URI_WILDCARD_CHAR, options, get);
}
/**
* retrieves the next available operation given the fairness scheme
*/
private Operation pollQueryOperation() {
return this.queryQueue.poll();
}
private Operation pollUpdateOperation() {
return this.updateQueue.poll();
}
/**
* Queues operation in a multi-queue that uses the subject as the key per queue
*/
private boolean offerQueryOperation(Operation op) {
String subject = getSubject(op);
return this.queryQueue.offer(subject, op);
}
private boolean offerUpdateOperation(Operation op) {
String subject = getSubject(op);
return this.updateQueue.offer(subject, op);
}
private String getSubject(Operation op) {
if (op.getAuthorizationContext() != null
&& op.getAuthorizationContext().isSystemUser()) {
return SystemUserService.SELF_LINK;
}
if (getHost().isAuthorizationEnabled()) {
return op.getAuthorizationContext().getClaims().getSubject();
}
return GuestUserService.SELF_LINK;
}
private boolean queryIndex(
IndexSearcher s,
Operation op,
String selfLinkPrefix,
EnumSet options,
Query tq,
LuceneQueryPage page,
int count,
long expiration,
String indexLink,
String nodeSelectorPath,
ServiceDocumentQueryResult rsp,
QuerySpecification qs) throws Exception {
if (options == null) {
options = EnumSet.noneOf(QueryOption.class);
}
if (options.contains(QueryOption.EXPAND_CONTENT)
|| options.contains(QueryOption.EXPAND_BINARY_CONTENT)
|| options.contains(QueryOption.EXPAND_SELECTED_FIELDS)) {
rsp.documents = new HashMap<>();
}
if (options.contains(QueryOption.COUNT)) {
rsp.documentCount = 0L;
} else {
rsp.documentLinks = new ArrayList<>();
}
IndexWriter w = this.writer;
if (w == null) {
op.fail(new CancellationException("Index writer is null"));
return true;
}
Set kindScope = null;
if (qs != null && qs.context != null) {
kindScope = qs.context.kindScope;
}
if (s == null) {
s = createOrRefreshSearcher(selfLinkPrefix, kindScope, count, w,
options.contains(QueryOption.DO_NOT_REFRESH));
}
long queryStartTimeMicros = Utils.getNowMicrosUtc();
tq = updateQuery(op, qs, tq, queryStartTimeMicros, options);
if (tq == null) {
return false;
}
if (qs != null && qs.query != null && this.hasOption(ServiceOption.INSTRUMENTATION)) {
String queryStat = getQueryStatName(qs.query);
this.adjustStat(queryStat, 1);
}
ServiceDocumentQueryResult result;
if (options.contains(QueryOption.COUNT)) {
result = queryIndexCount(options, s, tq, rsp, qs, queryStartTimeMicros, nodeSelectorPath);
} else {
result = queryIndexPaginated(op, options, s, tq, page, count, expiration, indexLink, nodeSelectorPath,
rsp, qs, queryStartTimeMicros);
}
result.documentOwner = getHost().getId();
if (!options.contains(QueryOption.COUNT) && result.documentLinks.isEmpty()) {
return false;
}
op.setBodyNoCloning(result).complete();
return true;
}
private void queryIndexSingle(String selfLink, Operation op, Long version)
throws Exception {
IndexWriter w = this.writer;
if (w == null) {
op.fail(new CancellationException("Index writer is null"));
return;
}
IndexSearcher s = createOrRefreshSearcher(selfLink, null, 1, w, false);
long startNanos = System.nanoTime();
TopDocs hits = queryIndexForVersion(selfLink, s, version, null);
long durationNanos = System.nanoTime() - startNanos;
setTimeSeriesHistogramStat(STAT_NAME_QUERY_SINGLE_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX, TimeUnit.NANOSECONDS.toMicros(durationNanos));
if (hasOption(ServiceOption.INSTRUMENTATION)) {
String factoryLink = UriUtils.getParentPath(selfLink);
if (factoryLink != null) {
String statKey = String.format(STAT_NAME_SINGLE_QUERY_BY_FACTORY_COUNT_FORMAT, factoryLink);
adjustStat(statKey, 1);
}
}
if (hits.totalHits == 0) {
op.complete();
return;
}
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
loadDoc(s, visitor, hits.scoreDocs[0].doc, this.fieldsToLoadWithExpand);
boolean hasExpired = false;
Long expiration = visitor.documentExpirationTimeMicros;
if (expiration != null) {
hasExpired = expiration <= Utils.getSystemNowMicrosUtc();
}
if (hasExpired) {
op.complete();
return;
}
ServiceDocument sd = getStateFromLuceneDocument(visitor, selfLink);
op.setBodyNoCloning(sd).complete();
}
/**
* Find the document given a self link and version number.
*
* This function is used for two purposes; find given version to...
* 1) load state if the service state is not yet cached
* 2) filter query results to only include the given version
*
* In case (1), authorization is applied in the service host (either against
* the cached state or freshly loaded state).
* In case (2), authorization should NOT be applied because the original query
* already included the resource group query per the authorization context.
* Query results will be filtered given the REAL latest version, not the latest
* version subject to the resource group query. This means older versions of
* a document will NOT appear in the query result if the user is not authorized
* to see the newer version.
*
* If given version is null then function returns the latest version.
* And if given version is not found then no document is returned.
*/
private TopDocs queryIndexForVersion(String selfLink, IndexSearcher s, Long version, Long documentsUpdatedBeforeInMicros)
throws IOException {
Query tqSelfLink = new TermQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, selfLink));
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(tqSelfLink, Occur.MUST);
// when QueryOption.TIME_SNAPSHOT is enabled (documentsUpdatedBeforeInMicros i.e. QuerySpecification.timeSnapshotBoundaryMicros is present)
// perform query to find a document with link updated before supplied time.
if (documentsUpdatedBeforeInMicros != null) {
Query documentsUpdatedBeforeInMicrosQuery = LongPoint.newRangeQuery(
ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, 0, documentsUpdatedBeforeInMicros);
builder.add(documentsUpdatedBeforeInMicrosQuery, Occur.MUST);
} else if (version != null) {
Query versionQuery = LongPoint.newRangeQuery(
ServiceDocument.FIELD_NAME_VERSION, version, version);
builder.add(versionQuery, Occur.MUST);
}
TopDocs hits = s.search(builder.build(), 1, this.versionSort, false, false);
return hits;
}
private void queryServiceHost(String selfLink, EnumSet options, Operation op) {
if (options.contains(QueryOption.EXPAND_CONTENT)) {
// the index writers had no results, ask the host a simple prefix query
// for the services, and do a manual expand
op.nestCompletion(o -> {
expandLinks(o, op);
});
}
getHost().queryServiceUris(selfLink, op);
}
/**
* This routine modifies a user-specified query to include clauses which
* apply the resource group query specified by the operation's authorization
* context and which exclude expired documents.
*
* If the operation was executed by the system user, no resource group query
* is applied.
*
* If no query needs to be executed return null
*
* @return Augmented query.
*/
private Query updateQuery(Operation op, QuerySpecification qs, Query tq, long now,
EnumSet queryOptions) {
Query expirationClause = LongPoint.newRangeQuery(
ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS, 1, now);
BooleanQuery.Builder builder = new BooleanQuery.Builder()
.add(expirationClause, Occur.MUST_NOT)
.add(tq, Occur.FILTER);
if (queryOptions.contains(QueryOption.INDEXED_METADATA)) {
if (!queryOptions.contains(QueryOption.INCLUDE_ALL_VERSIONS)
&& !queryOptions.contains(QueryOption.TIME_SNAPSHOT)) {
Query currentClause = NumericDocValuesField.newExactQuery(
LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_METADATA_VALUE_TOMBSTONE_TIME,
LuceneIndexDocumentHelper.ACTIVE_DOCUMENT_TOMBSTONE_TIME);
builder.add(currentClause, Occur.MUST);
}
// There is a bug in lucene where sort and numeric doc values don't play well
// apply the optimization to limit the resultset only when there is no sort specified
if ((qs != null && qs.sortTerm == null) &&
queryOptions.contains(QueryOption.TIME_SNAPSHOT)) {
Query tombstoneClause = NumericDocValuesField.newRangeQuery(
LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_METADATA_VALUE_TOMBSTONE_TIME,
qs.timeSnapshotBoundaryMicros, LuceneIndexDocumentHelper.ACTIVE_DOCUMENT_TOMBSTONE_TIME);
builder.add(tombstoneClause, Occur.MUST);
}
}
if (!getHost().isAuthorizationEnabled()) {
return builder.build();
}
AuthorizationContext ctx = op.getAuthorizationContext();
if (ctx == null) {
// Don't allow operation if no authorization context and auth is enabled
return null;
}
// Allow unconditionally if this is the system user
if (ctx.isSystemUser()) {
return builder.build();
}
// If the resource query in the authorization context is unspecified,
// use a Lucene query that doesn't return any documents so that every
// result will be empty.
QueryTask.Query resourceQuery = ctx.getResourceQuery(Action.GET);
Query rq = null;
if (resourceQuery == null) {
rq = new MatchNoDocsQuery();
} else {
rq = LuceneQueryConverter.convert(resourceQuery, null);
}
builder.add(rq, Occur.FILTER);
return builder.build();
}
private void handleGroupByQueryTaskPatch(Operation op, QueryTask task) throws IOException {
QuerySpecification qs = task.querySpec;
IndexSearcher s = (IndexSearcher) qs.context.nativeSearcher;
LuceneQueryPage page = (LuceneQueryPage) qs.context.nativePage;
Query tq = (Query) qs.context.nativeQuery;
Sort sort = (Sort) qs.context.nativeSort;
if (sort == null && qs.sortTerm != null) {
sort = LuceneQueryConverter.convertToLuceneSort(qs, false);
}
Sort groupSort = null;
if (qs.groupSortTerm != null) {
groupSort = LuceneQueryConverter.convertToLuceneSort(qs, true);
}
GroupingSearch groupingSearch;
if (qs.groupByTerm.propertyType == ServiceDocumentDescription.TypeName.LONG ||
qs.groupByTerm.propertyType == ServiceDocumentDescription.TypeName.DOUBLE) {
groupingSearch = new GroupingSearch(qs.groupByTerm.propertyName + GROUP_BY_PROPERTY_NAME_SUFFIX);
} else {
groupingSearch = new GroupingSearch(
LuceneIndexDocumentHelper.createSortFieldPropertyName(qs.groupByTerm.propertyName));
}
groupingSearch.setGroupSort(groupSort);
groupingSearch.setSortWithinGroup(sort);
adjustTimeSeriesStat(STAT_NAME_GROUP_QUERY_COUNT, AGGREGATION_TYPE_SUM, 1);
int groupOffset = page != null ? page.groupOffset : 0;
int groupLimit = qs.groupResultLimit != null ? qs.groupResultLimit : 10000;
Set kindScope = qs.context.kindScope;
if (s == null && qs.groupResultLimit != null) {
// Since expiration of QueryPageService and index-searcher uses different mechanism, to guarantee
// that index-searcher still exists when QueryPageService expired, add some delay for searcher
// expiration time.
boolean addDelay = task.documentExpirationTimeMicros < Long.MAX_VALUE - DEFAULT_PAGINATED_SEARCHER_EXPIRATION_DELAY;
long expiration = addDelay ?
(task.documentExpirationTimeMicros + DEFAULT_PAGINATED_SEARCHER_EXPIRATION_DELAY) :
task.documentExpirationTimeMicros;
s = createOrUpdatePaginatedQuerySearcher(expiration, this.writer, kindScope, qs.options);
}
if (s == null) {
s = createOrRefreshSearcher(null, kindScope, Integer.MAX_VALUE, this.writer,
qs.options.contains(QueryOption.DO_NOT_REFRESH));
}
ServiceDocumentQueryResult rsp = new ServiceDocumentQueryResult();
rsp.nextPageLinksPerGroup = new TreeMap<>();
// perform the actual search
long startNanos = System.nanoTime();
TopGroups> groups = groupingSearch.search(s, tq, groupOffset, groupLimit);
long durationNanos = System.nanoTime() - startNanos;
setTimeSeriesHistogramStat(STAT_NAME_GROUP_QUERY_DURATION_MICROS, AGGREGATION_TYPE_AVG_MAX,
TimeUnit.NANOSECONDS.toMicros(durationNanos));
// generate page links for each grouped result
for (GroupDocs> groupDocs : groups.groups) {
if (groupDocs.totalHits == 0) {
continue;
}
QueryTask.Query perGroupQuery = Utils.clone(qs.query);
String groupValue;
// groupValue can be ANY OF ( GROUPS, null )
// The "null" group signifies documents that do not have the property.
if (groupDocs.groupValue != null) {
groupValue = ((BytesRef) groupDocs.groupValue).utf8ToString();
} else {
groupValue = DOCUMENTS_WITHOUT_RESULTS;
}
// we need to modify the query to include a top level clause that restricts scope
// to documents with the groupBy field and value
QueryTask.Query clause = new QueryTask.Query()
.setTermPropertyName(qs.groupByTerm.propertyName)
.setTermMatchType(MatchType.TERM);
clause.occurance = QueryTask.Query.Occurance.MUST_OCCUR;
if (qs.groupByTerm.propertyType == ServiceDocumentDescription.TypeName.LONG
&& groupDocs.groupValue != null) {
clause.setNumericRange(QueryTask.NumericRange.createEqualRange(Long.parseLong(groupValue)));
} else if (qs.groupByTerm.propertyType == ServiceDocumentDescription.TypeName.DOUBLE
&& groupDocs.groupValue != null) {
clause.setNumericRange(QueryTask.NumericRange.createEqualRange(Double.parseDouble(groupValue)));
} else {
clause.setTermMatchValue(groupValue);
}
if (perGroupQuery.booleanClauses == null) {
QueryTask.Query topLevelClause = perGroupQuery;
perGroupQuery.addBooleanClause(topLevelClause);
}
perGroupQuery.addBooleanClause(clause);
Query lucenePerGroupQuery = LuceneQueryConverter.convert(perGroupQuery, qs.context);
// for each group generate a query page link
String pageLink = createNextPage(op, s, qs, lucenePerGroupQuery, sort,
null, 0, null,
task.documentExpirationTimeMicros, task.indexLink, task.nodeSelectorLink, false);
rsp.nextPageLinksPerGroup.put(groupValue, pageLink);
}
if (qs.groupResultLimit != null && groups.groups.length >= groupLimit) {
// check if we need to generate a next page for the next set of group results
groups = groupingSearch.search(s, tq, groupLimit + groupOffset, groupLimit);
if (groups.totalGroupedHitCount > 0) {
rsp.nextPageLink = createNextPage(op, s, qs, tq, sort,
null, 0, groupLimit + groupOffset,
task.documentExpirationTimeMicros, task.indexLink, task.nodeSelectorLink, page != null);
}
}
op.setBodyNoCloning(rsp).complete();
}
private ServiceDocumentQueryResult queryIndexCount(
EnumSet queryOptions,
IndexSearcher searcher,
Query termQuery,
ServiceDocumentQueryResult response,
QuerySpecification querySpec,
long queryStartTimeMicros,
String nodeSelectorPath)
throws Exception {
if (queryOptions.contains(QueryOption.INCLUDE_ALL_VERSIONS)) {
// Special handling for queries which include all versions in order to avoid allocating
// a large, unnecessary ScoreDocs array.
response.documentCount = (long) searcher.count(termQuery);
long queryTimeMicros = Utils.getNowMicrosUtc() - queryStartTimeMicros;
response.queryTimeMicros = queryTimeMicros;
setTimeSeriesHistogramStat(STAT_NAME_QUERY_ALL_VERSIONS_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX, queryTimeMicros);
return response;
}
response.queryTimeMicros = 0L;
TopDocs results = null;
ScoreDoc after = null;
long start = queryStartTimeMicros;
int resultLimit = queryResultLimit;
if (querySpec.resultLimit != null && querySpec.resultLimit != Integer.MAX_VALUE) {
resultLimit = querySpec.resultLimit;
if (querySpec.resultLimit < DEFAULT_QUERY_RESULT_LIMIT) {
logWarning("\n*****\n"
+ "resultLimit value is too low, query will take much longer on large result sets."
+ "Do not set this value for COUNT queries, or set it above default of "
+ DEFAULT_QUERY_RESULT_LIMIT
+ "\n*****\n");
}
}
do {
results = searcher.searchAfter(after, termQuery, resultLimit);
long queryEndTimeMicros = Utils.getNowMicrosUtc();
long luceneQueryDurationMicros = queryEndTimeMicros - start;
long queryDurationMicros = queryEndTimeMicros - queryStartTimeMicros;
response.queryTimeMicros = queryDurationMicros;
if (results == null || results.scoreDocs == null || results.scoreDocs.length == 0) {
break;
}
setTimeSeriesHistogramStat(STAT_NAME_QUERY_ALL_VERSIONS_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX, luceneQueryDurationMicros);
after = processQueryResults(querySpec, queryOptions, resultLimit, searcher,
response,
results.scoreDocs, start, nodeSelectorPath, false);
long now = Utils.getNowMicrosUtc();
setTimeSeriesHistogramStat(STAT_NAME_RESULT_PROCESSING_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX, now - queryEndTimeMicros);
start = now;
} while (true);
response.documentLinks.clear();
return response;
}
private ServiceDocumentQueryResult queryIndexPaginated(Operation op,
EnumSet options,
IndexSearcher s,
Query tq,
LuceneQueryPage page,
int count,
long expiration,
String indexLink,
String nodeSelectorPath,
ServiceDocumentQueryResult rsp,
QuerySpecification qs,
long queryStartTimeMicros) throws Exception {
ScoreDoc[] hits;
ScoreDoc after = null;
boolean hasExplicitLimit = count != Integer.MAX_VALUE;
boolean isPaginatedQuery = hasExplicitLimit
&& !options.contains(QueryOption.TOP_RESULTS);
boolean hasPage = page != null;
boolean shouldProcessResults = true;
boolean useDirectSearch = options.contains(QueryOption.TOP_RESULTS)
&& options.contains(QueryOption.INCLUDE_ALL_VERSIONS);
int resultLimit = count;
int hitCount;
if (isPaginatedQuery && !hasPage) {
// QueryTask.resultLimit was set, but we don't have a page param yet, which means this
// is the initial POST to create the queryTask. Since the initial query results will be
// discarded in this case, just set the limit to 1 and do not process results.
resultLimit = 1;
hitCount = 1;
shouldProcessResults = false;
rsp.documentCount = 1L;
} else if (!hasExplicitLimit) {
// The query does not have an explicit result limit set. We still specify an implicit
// limit in order to avoid out of memory conditions, since Lucene will use the limit in
// order to allocate a results array; however, if the number of hits returned by Lucene
// is higher than the default limit, we will fail the query later.
hitCount = queryResultLimit;
} else if (!options.contains(QueryOption.INCLUDE_ALL_VERSIONS)) {
// The query has an explicit result limit set, but the value is specified in terms of
// the number of desired results in the QueryTask, not the expected number of Lucene
// documents which must be processed in order to generate these results. Adjust the
// Lucene query page size to account for this discrepancy.
hitCount = Math.max(resultLimit, queryPageResultLimit);
} else {
hitCount = resultLimit;
}
if (hasPage) {
// For example, via GET of QueryTask.nextPageLink
after = page.after;
rsp.prevPageLink = page.previousPageLink;
}
Sort sort = this.versionSort;
if (qs != null && qs.sortTerm != null) {
// see if query is part of a task and already has a cached sort
if (qs.context != null) {
sort = (Sort) qs.context.nativeSort;
}
if (sort == null) {
sort = LuceneQueryConverter.convertToLuceneSort(qs, false);
}
}
TopDocs results = null;
int queryCount = 0;
rsp.queryTimeMicros = 0L;
long start = queryStartTimeMicros;
int offset = (qs == null || qs.offset == null) ? 0 : qs.offset;
do {
// Special-case handling of single-version documents to use search() instead of
// searchAfter(). This will prevent Lucene from holding the full result set in memory.
if (useDirectSearch) {
if (sort == null) {
results = s.search(tq, hitCount);
} else {
results = s.search(tq, hitCount, sort, false, false);
}
} else {
if (sort == null) {
results = s.searchAfter(after, tq, hitCount);
} else {
results = s.searchAfter(after, tq, hitCount, sort, false, false);
}
}
if (results == null) {
return rsp;
}
queryCount++;
long end = Utils.getNowMicrosUtc();
if (!hasExplicitLimit && !hasPage && !isPaginatedQuery
&& results.totalHits > hitCount) {
throw new IllegalStateException(
"Query returned large number of results, please specify a resultLimit. Results:"
+ results.totalHits + ", QuerySpec: " + Utils.toJson(qs));
}
hits = results.scoreDocs;
long queryTime = end - start;
rsp.documentCount = 0L;
rsp.queryTimeMicros += queryTime;
ScoreDoc bottom = null;
if (shouldProcessResults) {
start = end;
bottom = processQueryResults(qs, options, count, s, rsp, hits,
queryStartTimeMicros, nodeSelectorPath, true);
end = Utils.getNowMicrosUtc();
// remove docs for offset
int size = rsp.documentLinks.size();
if (size < offset) {
rsp.documentLinks.clear();
rsp.documentCount = 0L;
if (rsp.documents != null) {
rsp.documents.clear();
}
offset -= size;
} else {
List links = rsp.documentLinks.subList(0, offset);
if (rsp.documents != null) {
links.forEach(rsp.documents::remove);
}
rsp.documentCount -= links.size();
links.clear();
offset = 0;
}
if (hasOption(ServiceOption.INSTRUMENTATION)) {
String statName = options.contains(QueryOption.INCLUDE_ALL_VERSIONS)
? STAT_NAME_QUERY_ALL_VERSIONS_DURATION_MICROS
: STAT_NAME_QUERY_DURATION_MICROS;
setTimeSeriesHistogramStat(statName, AGGREGATION_TYPE_AVG_MAX, queryTime);
setTimeSeriesHistogramStat(STAT_NAME_RESULT_PROCESSING_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX, end - start);
}
}
if (count == Integer.MAX_VALUE || useDirectSearch) {
// single pass
break;
}
if (hits.length == 0) {
break;
}
if (isPaginatedQuery) {
if (!hasPage) {
bottom = null;
}
if (!hasPage || rsp.documentLinks.size() >= count
|| hits.length < resultLimit) {
// query had less results then per page limit or page is full of results
boolean createNextPageLink = true;
if (hasPage) {
int numOfHits = hitCount + offset;
createNextPageLink = checkNextPageHasEntry(bottom, options, s,
tq, sort, numOfHits, qs, queryStartTimeMicros, nodeSelectorPath);
}
if (createNextPageLink) {
rsp.nextPageLink = createNextPage(op, s, qs, tq, sort, bottom,
offset, null, expiration, indexLink, nodeSelectorPath, hasPage);
}
break;
}
}
after = bottom;
resultLimit = count - rsp.documentLinks.size();
} while (resultLimit > 0);
if (hasOption(ServiceOption.INSTRUMENTATION)) {
ServiceStat st = ServiceStatUtils.getOrCreateHistogramStat(this, STAT_NAME_ITERATIONS_PER_QUERY);
setStat(st, queryCount);
}
return rsp;
}
/**
* Checks next page exists or not.
*
* If there is a valid entry in searchAfter result, this returns true.
* If searchAfter result is empty or entries are all invalid(expired, etc), this returns false.
*
* For example, let's say there are 5 docs. doc=1,2,5 are valid and doc=3,4 are expired(invalid).
*
* When limit=2, the first page shows doc=1,2. In this logic, searchAfter will first fetch
* doc=3,4 but they are invalid(filtered out in `processQueryResults`).
* Next iteration will hit doc=5 and it is a valid entry. Therefore, it returns true.
*
* If doc=1,2 are valid and doc=3,4,5 are invalid, then searchAfter will hit doc=3,4 and
* doc=5. However, all entries are invalid. This returns false indicating there is no next page.
*/
private boolean checkNextPageHasEntry(ScoreDoc after,
EnumSet options,
IndexSearcher s,
Query tq,
Sort sort,
int count,
QuerySpecification qs,
long queryStartTimeMicros,
String nodeSelectorPath) throws Exception {
boolean hasValidNextPageEntry = false;
// Iterate searchAfter until it finds a *valid* entry.
// If loop reaches to the end and no valid entries found, then current page is the last page.
while (after != null) {
// fetch next page
TopDocs nextPageResults;
if (sort == null) {
nextPageResults = s.searchAfter(after, tq, count);
} else {
nextPageResults = s.searchAfter(after, tq, count, sort, false, false);
}
if (nextPageResults == null) {
break;
}
ScoreDoc[] hits = nextPageResults.scoreDocs;
if (hits.length == 0) {
// reached to the end
break;
}
ServiceDocumentQueryResult rspForNextPage = new ServiceDocumentQueryResult();
rspForNextPage.documents = new HashMap<>();
// use resultLimit=1 as even one found result means there has to be a next page
after = processQueryResults(qs, options, 1, s, rspForNextPage, hits,
queryStartTimeMicros, nodeSelectorPath, false);
if (rspForNextPage.documentCount > 0) {
hasValidNextPageEntry = true;
break;
}
}
return hasValidNextPageEntry;
}
/**
* Starts a {@code QueryPageService} to track a partial search result set, associated with a
* index searcher and search pointers. The page can be used for both grouped queries or
* document queries
*/
private String createNextPage(Operation op, IndexSearcher s, QuerySpecification qs,
Query tq,
Sort sort,
ScoreDoc after,
int offset,
Integer groupOffset,
long expiration,
String indexLink,
String nodeSelectorPath,
boolean hasPage) {
String nextPageId = Utils.getNowMicrosUtc() + "";
URI u = UriUtils.buildUri(getHost(), UriUtils.buildUriPath(ServiceUriPaths.CORE_QUERY_PAGE,
nextPageId));
// the page link must point to this node, since the index searcher and results have been
// computed locally. Transform the link to a query page forwarder link, which will
// transparently forward requests to the current node.
URI forwarderUri = UriUtils.buildForwardToQueryPageUri(u, getHost().getId());
String nextLink = forwarderUri.getPath() + UriUtils.URI_QUERY_CHAR
+ forwarderUri.getQuery();
// Compute previous page link. When FORWARD_ONLY option is specified, do not create previous page link.
String prevLinkForNewPage = null;
boolean isForwardOnly = qs.options.contains(QueryOption.FORWARD_ONLY);
if (!isForwardOnly) {
URI forwarderUriOfPrevLinkForNewPage = UriUtils.buildForwardToQueryPageUri(op.getReferer(),
getHost().getId());
prevLinkForNewPage = forwarderUriOfPrevLinkForNewPage.getPath()
+ UriUtils.URI_QUERY_CHAR + forwarderUriOfPrevLinkForNewPage.getQuery();
}
// Requests to core/query-page are forwarded to document-index (this service) and
// referrer of that forwarded request is set to original query-page request.
// This method is called when query-page wants to create new page for a paginated query.
// If a new page is going to be created then it is safe to use query-page link
// from referrer as previous page link of this new page being created.
LuceneQueryPage page = null;
if (after != null || groupOffset == null) {
// page for documents
page = new LuceneQueryPage(hasPage ? prevLinkForNewPage : null, after);
} else {
// page for group results
page = new LuceneQueryPage(hasPage ? prevLinkForNewPage : null, groupOffset);
}
QuerySpecification spec = new QuerySpecification();
qs.copyTo(spec);
if (groupOffset == null) {
spec.options.remove(QueryOption.GROUP_BY);
}
spec.offset = offset;
spec.context.nativeQuery = tq;
spec.context.nativePage = page;
spec.context.nativeSearcher = s;
spec.context.nativeSort = sort;
ServiceDocument body = new ServiceDocument();
body.documentSelfLink = u.getPath();
body.documentExpirationTimeMicros = expiration;
AuthorizationContext ctx = op.getAuthorizationContext();
if (ctx != null) {
body.documentAuthPrincipalLink = ctx.getClaims().getSubject();
}
Operation startPost = Operation
.createPost(u)
.setBody(body)
.setCompletion((o, e) -> {
if (e != null) {
logWarning("Unable to start next page service: %s", e.toString());
}
});
if (ctx != null) {
setAuthorizationContext(startPost, ctx);
}
getHost().startService(startPost, new QueryPageService(spec, indexLink, nodeSelectorPath));
return nextLink;
}
private ScoreDoc processQueryResults(QuerySpecification qs, EnumSet options,
int resultLimit, IndexSearcher s, ServiceDocumentQueryResult rsp, ScoreDoc[] hits,
long queryStartTimeMicros,
String nodeSelectorPath,
boolean populateResponse) throws Exception {
ScoreDoc lastDocVisited = null;
Set fieldsToLoad = this.fieldsToLoadNoExpand;
if (populateResponse && (options.contains(QueryOption.EXPAND_CONTENT)
|| options.contains(QueryOption.OWNER_SELECTION)
|| options.contains(QueryOption.EXPAND_BINARY_CONTENT)
|| options.contains(QueryOption.EXPAND_SELECTED_FIELDS))) {
fieldsToLoad = this.fieldsToLoadWithExpand;
}
if (populateResponse && options.contains(QueryOption.SELECT_LINKS)) {
fieldsToLoad = new HashSet<>(fieldsToLoad);
for (QueryTask.QueryTerm link : qs.linkTerms) {
fieldsToLoad.add(link.propertyName);
}
}
// Keep duplicates out
Set uniques = new LinkedHashSet<>(rsp.documentLinks);
final boolean hasCountOption = options.contains(QueryOption.COUNT);
boolean hasIncludeAllVersionsOption = options.contains(QueryOption.INCLUDE_ALL_VERSIONS);
Set linkWhiteList = null;
long documentsUpdatedBefore = -1;
// will contain the links for which post processing should to be skipped
// added to support TIME_SNAPSHOT, can be extended in future to represent qs.context.documentLinkBlackList
Set linkBlackList = options.contains(QueryOption.TIME_SNAPSHOT)
? Collections.emptySet() : null;
if (qs != null) {
if (qs.context != null && qs.context.documentLinkWhiteList != null) {
linkWhiteList = qs.context.documentLinkWhiteList;
}
if (qs.timeSnapshotBoundaryMicros != null) {
documentsUpdatedBefore = qs.timeSnapshotBoundaryMicros;
}
}
long searcherUpdateTime = getSearcherUpdateTime(s, queryStartTimeMicros);
Map latestVersionPerLink = new HashMap<>();
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
for (ScoreDoc sd : hits) {
if (!hasCountOption && uniques.size() >= resultLimit) {
break;
}
lastDocVisited = sd;
loadDoc(s, visitor, sd.doc, fieldsToLoad);
String link = visitor.documentSelfLink;
String originalLink = link;
// ignore results not in supplied white list
// and also those are in blacklisted links
if ((linkWhiteList != null && !linkWhiteList.contains(link))
|| (linkBlackList != null && linkBlackList.contains(originalLink))) {
continue;
}
long documentVersion = visitor.documentVersion;
Long latestVersion = latestVersionPerLink.get(originalLink);
if (hasIncludeAllVersionsOption) {
// Decorate link with version. If a document is marked deleted, at any version,
// we will include it in the results
link = UriUtils.buildPathWithVersion(link, documentVersion);
} else {
// We first determine what is the latest document version.
// We then use the latest version to determine if the current document result is relevant.
if (latestVersion == null) {
latestVersion = getLatestVersion(s, searcherUpdateTime, link, documentVersion,
documentsUpdatedBefore);
// latestVersion == -1 means there was no document version
// in history, adding it to blacklist so as to avoid
// processing the documents which were found later
if (latestVersion == -1) {
linkBlackList.add(originalLink);
continue;
}
latestVersionPerLink.put(originalLink, latestVersion);
}
if (documentVersion < latestVersion) {
continue;
}
boolean isDeleted = Action.DELETE.name()
.equals(visitor.documentUpdateAction);
if (isDeleted && !options.contains(QueryOption.INCLUDE_DELETED)) {
// ignore a document if its marked deleted and it has the latest version
if (documentVersion >= latestVersion) {
uniques.remove(link);
if (rsp.documents != null) {
rsp.documents.remove(link);
}
if (rsp.selectedLinksPerDocument != null) {
rsp.selectedLinksPerDocument.remove(link);
}
}
continue;
}
}
if (hasCountOption || !populateResponse) {
// count unique instances of this link
uniques.add(link);
continue;
}
String json = null;
ServiceDocument state = null;
if (options.contains(QueryOption.EXPAND_CONTENT)
|| options.contains(QueryOption.OWNER_SELECTION)
|| options.contains(QueryOption.EXPAND_SELECTED_FIELDS)) {
state = getStateFromLuceneDocument(visitor, originalLink);
if (state == null) {
// support reading JSON serialized state for backwards compatibility
augmentDoc(s, visitor, sd.doc, LUCENE_FIELD_NAME_JSON_SERIALIZED_STATE);
json = visitor.jsonSerializedState;
if (json == null) {
continue;
}
}
}
if (options.contains(QueryOption.OWNER_SELECTION)) {
if (!processQueryResultsForOwnerSelection(json, state, nodeSelectorPath)) {
continue;
}
}
if (options.contains(QueryOption.EXPAND_BINARY_CONTENT) && !rsp.documents.containsKey(link)) {
byte[] binaryData = visitor.binarySerializedState;
if (binaryData != null) {
ByteBuffer buffer = ByteBuffer.wrap(binaryData, 0, binaryData.length);
rsp.documents.put(link, buffer);
} else {
logWarning("Binary State not found for %s", link);
}
} else if (options.contains(QueryOption.EXPAND_CONTENT) && !rsp.documents.containsKey(link)) {
if (options.contains(QueryOption.EXPAND_BUILTIN_CONTENT_ONLY)) {
ServiceDocument stateClone = new ServiceDocument();
state.copyTo(stateClone);
rsp.documents.put(link, stateClone);
} else if (state == null) {
rsp.documents.put(link, Utils.fromJson(json, JsonElement.class));
} else {
JsonObject jo = toJsonElement(state);
rsp.documents.put(link, jo);
}
} else if (options.contains(QueryOption.EXPAND_SELECTED_FIELDS) && !rsp.documents.containsKey(link)) {
// filter out only the selected fields
Set selectFields = new TreeSet<>();
if (qs != null) {
qs.selectTerms.forEach(qt -> selectFields.add(qt.propertyName));
}
// create an uninitialized copy
ServiceDocument copy = state.getClass().newInstance();
for (String selectField : selectFields) {
// transfer only needed fields
Field field = ReflectionUtils.getField(state.getClass(), selectField);
if (field != null) {
Object value = field.get(state);
if (value != null) {
field.set(copy, value);
}
} else {
logFine("Unknown field '%s' passed for EXPAND_SELECTED_FIELDS", selectField);
}
}
JsonObject jo = toJsonElement(copy);
// this is called only for primitive-typed fields, the rest are nullified already
jo.entrySet().removeIf(entry -> !selectFields.contains(entry.getKey()));
rsp.documents.put(link, jo);
}
if (options.contains(QueryOption.SELECT_LINKS)) {
processQueryResultsForSelectLinks(s, qs, rsp, visitor, sd.doc, link, state);
}
uniques.add(link);
}
rsp.documentLinks.clear();
rsp.documentLinks.addAll(uniques);
rsp.documentCount = (long) rsp.documentLinks.size();
return lastDocVisited;
}
private JsonObject toJsonElement(ServiceDocument state) {
return (JsonObject) GsonSerializers.getJsonMapperFor(state.getClass()).toJsonElement(state);
}
private void loadDoc(IndexSearcher s, DocumentStoredFieldVisitor visitor, int docId, Set fields) throws IOException {
visitor.reset(fields);
s.doc(docId, visitor);
}
private void augmentDoc(IndexSearcher s, DocumentStoredFieldVisitor visitor, int docId, String field) throws IOException {
visitor.reset(field);
s.doc(docId, visitor);
}
private boolean processQueryResultsForOwnerSelection(String json, ServiceDocument state, String nodeSelectorPath) {
String documentSelfLink;
if (state == null) {
documentSelfLink = Utils.fromJson(json, ServiceDocument.class).documentSelfLink;
} else {
documentSelfLink = state.documentSelfLink;
}
// when node-selector is not specified via query, use the one for index-service which may be null
if (nodeSelectorPath == null) {
nodeSelectorPath = getPeerNodeSelectorPath();
}
SelectOwnerResponse ownerResponse = getHost().findOwnerNode(nodeSelectorPath, documentSelfLink);
// omit the result if the documentOwner is not the same as the local owner
return ownerResponse != null && ownerResponse.isLocalHostOwner;
}
private ServiceDocument processQueryResultsForSelectLinks(IndexSearcher s,
QuerySpecification qs, ServiceDocumentQueryResult rsp, DocumentStoredFieldVisitor d, int docId,
String link,
ServiceDocument state) throws Exception {
if (rsp.selectedLinksPerDocument == null) {
rsp.selectedLinksPerDocument = new HashMap<>();
rsp.selectedLinks = new HashSet<>();
}
Map linksPerDocument = rsp.selectedLinksPerDocument.get(link);
if (linksPerDocument == null) {
linksPerDocument = new HashMap<>();
rsp.selectedLinksPerDocument.put(link, linksPerDocument);
}
for (QueryTask.QueryTerm qt : qs.linkTerms) {
String linkValue = d.getLink(qt.propertyName);
if (linkValue != null) {
linksPerDocument.put(qt.propertyName, linkValue);
rsp.selectedLinks.add(linkValue);
continue;
}
// if there is no stored field with the link term property name, it might be
// a field with a collection of links. We do not store those in lucene, they are
// part of the binary serialized state.
if (state == null) {
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
loadDoc(s, visitor, docId, this.fieldsToLoadWithExpand);
state = getStateFromLuceneDocument(visitor, link);
if (state == null) {
logWarning("Skipping link term %s for %s, can not find serialized state",
qt.propertyName, link);
continue;
}
}
Field linkCollectionField = ReflectionUtils
.getField(state.getClass(), qt.propertyName);
if (linkCollectionField == null) {
continue;
}
Object fieldValue = linkCollectionField.get(state);
if (fieldValue == null) {
continue;
}
if (!(fieldValue instanceof Collection>)) {
logWarning("Skipping link term %s for %s, field is not a collection",
qt.propertyName, link);
continue;
}
@SuppressWarnings("unchecked")
Collection linkCollection = (Collection) fieldValue;
int index = 0;
for (String item : linkCollection) {
if (item != null) {
linksPerDocument.put(
QuerySpecification.buildLinkCollectionItemName(qt.propertyName, index++),
item);
rsp.selectedLinks.add(item);
}
}
}
return state;
}
private ServiceDocument getStateFromLuceneDocument(DocumentStoredFieldVisitor doc, String link) {
byte[] binaryStateField = doc.binarySerializedState;
if (binaryStateField == null) {
logWarning("State not found for %s", link);
return null;
}
ServiceDocument state = (ServiceDocument) KryoSerializers.deserializeDocument(binaryStateField,
0, binaryStateField.length);
if (state.documentSelfLink == null) {
state.documentSelfLink = link;
}
if (state.documentKind == null) {
state.documentKind = Utils.buildKind(state.getClass());
}
return state;
}
private long getSearcherUpdateTime(IndexSearcher s, long queryStartTimeMicros) {
if (s == null) {
return 0L;
}
return this.searcherUpdateTimesMicros.getOrDefault(s.hashCode(), queryStartTimeMicros);
}
private long getLatestVersion(IndexSearcher s,
long searcherUpdateTime,
String link, long version, long documentsUpdatedBeforeInMicros) throws IOException {
if (hasOption(ServiceOption.INSTRUMENTATION)) {
adjustStat(STAT_NAME_VERSION_CACHE_LOOKUP_COUNT, 1);
}
synchronized (this.searchSync) {
DocumentUpdateInfo dui = this.updatesPerLink.get(link);
if (documentsUpdatedBeforeInMicros == -1 && dui != null && dui.updateTimeMicros <= searcherUpdateTime) {
return Math.max(version, dui.version);
}
if (!this.immutableParentLinks.isEmpty()) {
String parentLink = UriUtils.getParentPath(link);
if (this.immutableParentLinks.containsKey(parentLink)) {
// all immutable services have just a single, zero, version
return 0;
}
}
}
if (hasOption(ServiceOption.INSTRUMENTATION)) {
adjustStat(STAT_NAME_VERSION_CACHE_MISS_COUNT, 1);
}
TopDocs td = queryIndexForVersion(link, s, null,
documentsUpdatedBeforeInMicros > 0 ? documentsUpdatedBeforeInMicros : null);
// Checking if total hits were Zero when QueryOption.TIME_SNAPSHOT is enabled
if (documentsUpdatedBeforeInMicros != -1 && td.totalHits == 0) {
return -1;
}
if (td.totalHits == 0) {
return version;
}
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
loadDoc(s, visitor, td.scoreDocs[0].doc, this.fieldToLoadVersionLookup);
long latestVersion = visitor.documentVersion;
long updateTime = visitor.documentUpdateTimeMicros;
// attempt to refresh or create new version cache entry, from the entry in the query results
// The update method will reject the update if the version is stale
updateLinkInfoCache(null, link, null, latestVersion, updateTime);
return latestVersion;
}
private void expandLinks(Operation o, Operation get) {
ServiceDocumentQueryResult r = o.getBody(ServiceDocumentQueryResult.class);
if (r.documentLinks == null || r.documentLinks.isEmpty()) {
get.setBodyNoCloning(r).complete();
return;
}
r.documents = new HashMap<>();
AtomicInteger i = new AtomicInteger(r.documentLinks.size());
CompletionHandler c = (op, e) -> {
try {
if (e != null) {
logWarning("failure expanding %s: %s", op.getUri().getPath(), e.getMessage());
return;
}
synchronized (r.documents) {
r.documents.put(op.getUri().getPath(), op.getBodyRaw());
}
} finally {
if (i.decrementAndGet() == 0) {
get.setBodyNoCloning(r).complete();
}
}
};
for (String selfLink : r.documentLinks) {
sendRequest(Operation.createGet(this, selfLink)
.setCompletion(c));
}
}
public void handleDeleteImpl(Operation delete) throws Exception {
setProcessingStage(ProcessingStage.STOPPED);
this.privateIndexingExecutor.shutdown();
this.privateQueryExecutor.shutdown();
IndexWriter w = this.writer;
this.writer = null;
close(w);
this.getHost().stopService(this);
delete.complete();
}
void close(IndexWriter wr) {
try {
if (wr == null) {
return;
}
logInfo("Document count: %d ", wr.maxDoc());
wr.commit();
wr.close();
} catch (Exception e) {
}
}
protected void updateIndex(Operation updateOp) throws Exception {
UpdateIndexRequest r = updateOp.getBody(UpdateIndexRequest.class);
ServiceDocument s = r.document;
ServiceDocumentDescription desc = r.description;
if (updateOp.isRemote()) {
updateOp.fail(new IllegalStateException("Remote requests not allowed"));
return;
}
if (s == null) {
updateOp.fail(new IllegalArgumentException("document is required"));
return;
}
String link = s.documentSelfLink;
if (link == null) {
updateOp.fail(new IllegalArgumentException(
"documentSelfLink is required"));
return;
}
if (s.documentUpdateAction == null) {
updateOp.fail(new IllegalArgumentException(
"documentUpdateAction is required"));
return;
}
if (desc == null) {
updateOp.fail(new IllegalArgumentException("description is required"));
return;
}
IndexWriter wr = this.writer;
if (wr == null) {
updateOp.fail(new CancellationException("Index writer is null"));
return;
}
s.documentDescription = null;
LuceneIndexDocumentHelper indexDocHelper = this.indexDocumentHelper.get();
indexDocHelper.addSelfLinkField(link);
if (s.documentKind != null) {
indexDocHelper.addKindField(s.documentKind);
}
indexDocHelper.addUpdateActionField(s.documentUpdateAction);
indexDocHelper.addBinaryStateFieldToDocument(s, r.serializedDocument, desc);
if (s.documentAuthPrincipalLink != null) {
indexDocHelper.addAuthPrincipalLinkField(s.documentAuthPrincipalLink);
}
if (s.documentTransactionId != null) {
indexDocHelper.addTxIdField(s.documentTransactionId);
}
indexDocHelper.addUpdateTimeField(s.documentUpdateTimeMicros);
if (s.documentExpirationTimeMicros > 0) {
indexDocHelper.addExpirationTimeField(s.documentExpirationTimeMicros);
}
indexDocHelper.addVersionField(s.documentVersion);
if (desc.documentIndexingOptions.contains(DocumentIndexingOption.INDEX_METADATA)) {
indexDocHelper.addIndexingIdField(link, s.documentEpoch, s.documentVersion);
indexDocHelper.addTombstoneTimeField();
}
Document threadLocalDoc = indexDocHelper.getDoc();
try {
if (desc.propertyDescriptions == null
|| desc.propertyDescriptions.isEmpty()) {
// no additional property type information, so we will add the
// document with common fields indexed plus the full body
addDocumentToIndex(wr, updateOp, threadLocalDoc, s, desc);
return;
}
indexDocHelper.addIndexableFieldsToDocument(s, desc);
if (hasOption(ServiceOption.INSTRUMENTATION)) {
int fieldCount = indexDocHelper.getDoc().getFields().size();
setTimeSeriesStat(STAT_NAME_INDEXED_FIELD_COUNT, AGGREGATION_TYPE_SUM, fieldCount);
ServiceStat st = ServiceStatUtils.getOrCreateHistogramStat(this, STAT_NAME_FIELD_COUNT_PER_DOCUMENT);
setStat(st, fieldCount);
}
addDocumentToIndex(wr, updateOp, threadLocalDoc, s, desc);
} finally {
// NOTE: The Document is a thread local managed by the index document helper. Its fields
// must be cleared *after* its added to the index (above) and *before* its re-used.
// After the fields are cleared, the document can not be used in this scope
threadLocalDoc.clear();
}
}
private void checkDocumentRetentionLimit(ServiceDocument state, ServiceDocumentDescription desc)
throws IOException {
if (desc.versionRetentionLimit
== ServiceDocumentDescription.FIELD_VALUE_DISABLED_VERSION_RETENTION) {
return;
}
long limit = Math.max(1L, desc.versionRetentionLimit);
if (state.documentVersion < limit) {
return;
}
// If the addition of the new document version has not pushed the current document across
// a retention threshold boundary, then return. A retention boundary is reached when the
// addition of a new document means that more versions of the document are present in the
// index than the versionRetentionLimit specified in the service document description.
long floor = Math.max(1L, desc.versionRetentionFloor);
if (floor > limit) {
floor = limit;
}
long chunkThreshold = Math.max(1L, limit - floor);
if (((state.documentVersion - limit) % chunkThreshold) != 0) {
return;
}
String link = state.documentSelfLink;
long newValue = state.documentVersion - floor;
synchronized (this.liveVersionsPerLink) {
Long currentValue = this.liveVersionsPerLink.get(link);
if (currentValue == null || newValue > currentValue) {
this.liveVersionsPerLink.put(link, newValue);
}
}
}
/**
* Will attempt to re-open index writer to recover from a specific exception. The method
* assumes the caller has acquired the writer semaphore
*/
private void checkFailureAndRecover(Exception e) {
// When document create or update fails with an exception. Clear the threadLocalDoc.
Document threadLocalDoc = this.indexDocumentHelper.get().getDoc();
threadLocalDoc.clear();
if (getHost().isStopping()) {
logInfo("Exception after host stop, on index service thread: %s", e.toString());
return;
}
if (!(e instanceof AlreadyClosedException)) {
logSevere("Exception on index service thread: %s", Utils.toString(e));
return;
}
IndexWriter w = this.writer;
if ((w != null && w.isOpen()) || e.getMessage().contains("IndexReader")) {
// The already closed exception can happen due to an expired searcher, simply
// log in that case
adjustStat(STAT_NAME_READER_ALREADY_CLOSED_EXCEPTION_COUNT, 1);
logWarning("Exception on index service thread: %s", Utils.toString(e));
return;
}
logSevere("Exception on index service thread: %s", Utils.toString(e));
this.adjustStat(STAT_NAME_WRITER_ALREADY_CLOSED_EXCEPTION_COUNT, 1);
applyFileLimitRefreshWriter(true);
}
private void deleteAllDocumentsForSelfLinkForcedPost(IndexWriter wr, ServiceDocument sd)
throws IOException {
// Delete all previous versions from the index. If we do not, we will end up with
// duplicate version history
adjustStat(STAT_NAME_FORCED_UPDATE_DOCUMENT_DELETE_COUNT, 1);
wr.deleteDocuments(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, sd.documentSelfLink));
synchronized (this.searchSync) {
// Clean previous cached entry
this.updatesPerLink.remove(sd.documentSelfLink);
long now = Utils.getNowMicrosUtc();
this.writerUpdateTimeMicros = now;
this.serviceRemovalDetectedTimeMicros = now;
}
updateLinkInfoCache(getHost().buildDocumentDescription(sd.documentSelfLink),
sd.documentSelfLink, sd.documentKind, 0, Utils.getNowMicrosUtc());
}
private void deleteAllDocumentsForSelfLink(Operation postOrDelete, String link,
ServiceDocument state)
throws Exception {
deleteDocumentsFromIndex(postOrDelete,
state != null ? getHost().buildDocumentDescription(state.documentSelfLink) : null,
link, state != null ? state.documentKind : null, 0, Long.MAX_VALUE);
synchronized (this.searchSync) {
// Remove previous cached entry
this.updatesPerLink.remove(link);
long now = Utils.getNowMicrosUtc();
this.writerUpdateTimeMicros = now;
this.serviceRemovalDetectedTimeMicros = now;
}
adjustTimeSeriesStat(STAT_NAME_SERVICE_DELETE_COUNT, AGGREGATION_TYPE_SUM, 1);
logFine("%s expired", link);
if (state == null) {
return;
}
applyActiveQueries(postOrDelete, state, null);
// remove service, if its running
sendRequest(Operation.createDelete(this, state.documentSelfLink)
.setBodyNoCloning(state)
.disableFailureLogging(true)
.addPragmaDirective(Operation.PRAGMA_DIRECTIVE_NO_INDEX_UPDATE));
}
private void deleteDocumentsFromIndex(Operation delete, ServiceDocumentDescription desc, String link, String kind, long oldestVersion,
long newestVersion) throws Exception {
IndexWriter wr = this.writer;
if (wr == null) {
delete.fail(new CancellationException("Index writer is null"));
return;
}
deleteDocumentFromIndex(link, oldestVersion, newestVersion, wr);
// Use time AFTER index was updated to be sure that it can be compared
// against the time the searcher was updated and have this change
// be reflected in the new searcher. If the start time would be used,
// it is possible to race with updating the searcher and NOT have this
// change be reflected in the searcher.
updateLinkInfoCache(desc, link, kind, newestVersion, Utils.getNowMicrosUtc());
delete.complete();
}
private void deleteDocumentFromIndex(String link, long oldestVersion, long newestVersion,
IndexWriter wr) throws IOException {
Query linkQuery = new TermQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK, link));
Query versionQuery = LongPoint.newRangeQuery(ServiceDocument.FIELD_NAME_VERSION,
oldestVersion, newestVersion);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(versionQuery, Occur.MUST);
builder.add(linkQuery, Occur.MUST);
BooleanQuery bq = builder.build();
wr.deleteDocuments(bq);
}
private void addDocumentToIndex(IndexWriter wr, Operation op, Document doc, ServiceDocument sd,
ServiceDocumentDescription desc) throws IOException {
long startNanos = 0;
if (hasOption(ServiceOption.INSTRUMENTATION)) {
startNanos = System.nanoTime();
}
if (op.getAction() == Action.POST
&& op.hasPragmaDirective(Operation.PRAGMA_DIRECTIVE_FORCE_INDEX_UPDATE)) {
// DocumentIndexingOption.INDEX_METADATA instructs the index service to maintain
// additional metadata attributes in the index, such as whether a particular Lucene
// document represents the "current" version of a service, or whether the service is
// deleted.
//
// Since these attributes are updated out of band, there is the potential of a race
// between this update and the metadata attribute updates, which occur during index
// service maintenance. This race cannot be avoided by the caller, so we use a lock
// here to force metadata indexing updates to be flushed before deleting the existing
// documents.
//
// Note this may cause significant additional latency under load for this particular
// combination of options (PRAGMA_DIRECTIVE_FORCE_INDEX_UPDATE and INDEX_METADATA). My
// original preference was to fail operations in this category; however, supporting
// this scenario is required in order to support migration for the time being, where
// services with INDEX_METADATA may be deleted and recreated.
if (desc.documentIndexingOptions.contains(DocumentIndexingOption.INDEX_METADATA)) {
synchronized (this.metadataUpdateSync) {
synchronized (this.metadataUpdates) {
this.metadataUpdatesPerLink.remove(sd.documentSelfLink);
this.metadataUpdates.removeIf((info) ->
info.selfLink.equals(sd.documentSelfLink));
}
}
}
deleteAllDocumentsForSelfLinkForcedPost(wr, sd);
}
wr.addDocument(doc);
if (hasOption(ServiceOption.INSTRUMENTATION)) {
long durationNanos = System.nanoTime() - startNanos;
setTimeSeriesStat(STAT_NAME_INDEXED_DOCUMENT_COUNT, AGGREGATION_TYPE_SUM, 1);
setTimeSeriesHistogramStat(STAT_NAME_INDEXING_DURATION_MICROS, AGGREGATION_TYPE_AVG_MAX,
TimeUnit.NANOSECONDS.toMicros(durationNanos));
}
// Use time AFTER index was updated to be sure that it can be compared
// against the time the searcher was updated and have this change
// be reflected in the new searcher. If the start time would be used,
// it is possible to race with updating the searcher and NOT have this
// change be reflected in the searcher.
long updateTime = Utils.getNowMicrosUtc();
updateLinkInfoCache(desc, sd.documentSelfLink, sd.documentKind, sd.documentVersion,
updateTime);
op.setBody(null).complete();
checkDocumentRetentionLimit(sd, desc);
checkDocumentIndexingMetadata(sd, desc, updateTime);
applyActiveQueries(op, sd, desc);
}
private void checkDocumentIndexingMetadata(ServiceDocument sd, ServiceDocumentDescription desc,
long updateTimeMicros) {
if (!desc.documentIndexingOptions.contains(DocumentIndexingOption.INDEX_METADATA)) {
return;
}
if (sd.documentVersion == 0) {
return;
}
synchronized (this.metadataUpdates) {
MetadataUpdateInfo info = this.metadataUpdatesPerLink.get(sd.documentSelfLink);
if (info != null) {
if (info.updateTimeMicros < updateTimeMicros) {
this.metadataUpdates.remove(info);
info.updateTimeMicros = updateTimeMicros;
this.metadataUpdates.add(info);
}
return;
}
info = new MetadataUpdateInfo();
info.selfLink = sd.documentSelfLink;
info.kind = sd.documentKind;
info.updateTimeMicros = updateTimeMicros;
this.metadataUpdatesPerLink.put(sd.documentSelfLink, info);
this.metadataUpdates.add(info);
}
}
private void updateLinkInfoCache(ServiceDocumentDescription desc,
String link, String kind, long version, long lastAccessTime) {
boolean isImmutable = desc != null
&& desc.serviceCapabilities != null
&& desc.serviceCapabilities.contains(ServiceOption.IMMUTABLE);
synchronized (this.searchSync) {
if (isImmutable) {
String parent = UriUtils.getParentPath(link);
this.immutableParentLinks.compute(parent, (k, time) -> {
if (time == null) {
time = lastAccessTime;
} else {
time = Math.max(time, lastAccessTime);
}
return time;
});
} else {
this.updatesPerLink.compute(link, (k, entry) -> {
if (entry == null) {
entry = new DocumentUpdateInfo();
}
if (version >= entry.version) {
entry.updateTimeMicros = Math.max(entry.updateTimeMicros, lastAccessTime);
entry.version = version;
}
return entry;
});
}
if (kind != null) {
this.documentKindUpdateInfo.compute(kind, (k, entry) -> {
if (entry == null) {
entry = 0L;
}
entry = Math.max(entry, lastAccessTime);
return entry;
});
}
// The index update time may only be increased.
if (this.writerUpdateTimeMicros < lastAccessTime) {
this.writerUpdateTimeMicros = lastAccessTime;
}
}
}
private void updateLinkInfoCacheForMetadataUpdates(long updateTimeMicros,
Collection entries) {
synchronized (this.searchSync) {
for (MetadataUpdateInfo info : entries) {
this.updatesPerLink.compute(info.selfLink, (k, entry) -> {
if (entry != null) {
entry.updateTimeMicros = Math.max(entry.updateTimeMicros, updateTimeMicros);
}
return entry;
});
this.documentKindUpdateInfo.compute(info.kind, (k, entry) -> {
entry = Math.max(entry, updateTimeMicros);
return entry;
});
}
if (this.writerUpdateTimeMicros < updateTimeMicros) {
this.writerUpdateTimeMicros = updateTimeMicros;
}
}
}
/**
* Returns an updated {@link IndexSearcher} to query {@code selfLink}.
*
* If the index has been updated since the last {@link IndexSearcher} was created, those
* changes will not be reflected by that {@link IndexSearcher}. However, for performance
* reasons, we do not want to create a new one for every query either.
*
* We create one in one of following conditions:
*
* 1) No searcher for this index exists.
* 2) The query is across many links or multiple version, not a specific one,
* and the index was changed.
* 3) The query is for a specific self link AND the self link has seen an update
* after the searcher was last updated.
*
* @param selfLink
* @param resultLimit
* @param w
* @return an {@link IndexSearcher} that is fresh enough to execute the specified query
* @throws IOException
*/
private IndexSearcher createOrRefreshSearcher(String selfLink, Set kindScope,
int resultLimit, IndexWriter w,
boolean doNotRefresh)
throws IOException {
IndexSearcher s;
boolean needNewSearcher = false;
long threadId = Thread.currentThread().getId();
long now = Utils.getNowMicrosUtc();
synchronized (this.searchSync) {
s = this.searchers.get(threadId);
long searcherUpdateTime = getSearcherUpdateTime(s, 0);
if (s == null) {
needNewSearcher = true;
} else {
needNewSearcher = documentNeedsNewSearcher(selfLink, kindScope, resultLimit,
searcherUpdateTime, doNotRefresh);
}
}
if (s != null && !needNewSearcher) {
adjustTimeSeriesStat(STAT_NAME_SEARCHER_REUSE_BY_DOCUMENT_KIND_COUNT, AGGREGATION_TYPE_SUM, 1);
return s;
}
if (s != null) {
IndexReader oldReader = s.getIndexReader();
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) oldReader, w);
if (newReader == null || newReader == oldReader) {
return s;
}
oldReader.close();
this.searcherUpdateTimesMicros.remove(s.hashCode());
s = new IndexSearcher(newReader);
} else {
s = new IndexSearcher(DirectoryReader.open(w, true, true));
}
s.setSimilarity(s.getSimilarity(false));
adjustTimeSeriesStat(STAT_NAME_SEARCHER_UPDATE_COUNT, AGGREGATION_TYPE_SUM, 1);
synchronized (this.searchSync) {
this.searchers.put(threadId, s);
this.searcherUpdateTimesMicros.put(s.hashCode(), now);
return s;
}
}
private boolean documentNeedsNewSearcher(String selfLink, Set kindScope,
int resultLimit, long searcherUpdateTime, boolean doNotRefresh) {
if (selfLink != null && resultLimit == 1) {
DocumentUpdateInfo du = this.updatesPerLink.get(selfLink);
// ODL services may be created and removed due to memory pressure while searcher was not updated.
// Then, retrieval of those services will fail because searcher doesn't know the creation yet.
// To incorporate such service removal, also check the serviceRemovalDetectedTimeMicros.
if (du == null && (searcherUpdateTime < this.serviceRemovalDetectedTimeMicros)) {
return true;
}
if (du != null && du.updateTimeMicros >= searcherUpdateTime) {
return true;
} else {
String parent = UriUtils.getParentPath(selfLink);
Long updateTime = this.immutableParentLinks.get(parent);
if (updateTime != null && updateTime >= searcherUpdateTime) {
return true;
}
}
} else {
boolean needNewSearcher = false;
long indexUpdateTime;
if (kindScope == null) {
indexUpdateTime = this.writerUpdateTimeMicros;
} else {
// Retrieve the most recent updatetime for given kinds.
// If not exists(no update happened for the kinds), return Long.MIN to reuse existing searcher
indexUpdateTime = kindScope.stream()
.map(this.documentKindUpdateInfo::get)
.filter(Objects::nonNull)
.max(Long::compare)
.orElse(Long.MIN_VALUE);
}
if (searcherUpdateTime < indexUpdateTime) {
needNewSearcher = true;
}
// for a query with DO_NOT_REFRESH, if all other checks suggest
// we need a new searcher check to see if enough time has elapsed
// since the index was updated
if (doNotRefresh && needNewSearcher) {
if ((indexUpdateTime + searcherRefreshIntervalMicros) >= Utils.getSystemNowMicrosUtc()) {
return false;
}
}
return needNewSearcher;
}
return false;
}
@Override
public URI getUri() {
return this.uri;
}
@Override
public void handleMaintenance(Operation post) {
if (this.fieldInfoCache != null) {
this.fieldInfoCache.handleMaintenance();
}
Operation maintenanceOp = Operation
.createPost(this.getUri())
.setBodyNoCloning(new MaintenanceRequest())
.setCompletion((o, ex) -> {
if (ex != null) {
post.fail(ex);
return;
}
post.complete();
});
setAuthorizationContext(maintenanceOp, getSystemAuthorizationContext());
handleRequest(maintenanceOp);
}
private void handleMaintenanceImpl(Operation op) throws Exception {
try {
IndexWriter w = this.writer;
if (w == null) {
op.fail(new CancellationException("Index writer is null"));
return;
}
long searcherCreationTime = Utils.getNowMicrosUtc();
synchronized (this.metadataUpdates) {
this.metadataUpdatesPerLink.clear();
}
long startNanos = System.nanoTime();
IndexSearcher s = createOrRefreshSearcher(null, null, Integer.MAX_VALUE, w, false);
long endNanos = System.nanoTime();
setTimeSeriesHistogramStat(STAT_NAME_MAINTENANCE_SEARCHER_REFRESH_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX,
TimeUnit.NANOSECONDS.toMicros(endNanos - startNanos));
long deadline = Utils.getSystemNowMicrosUtc() + getMaintenanceIntervalMicros();
startNanos = endNanos;
applyDocumentExpirationPolicy(s, deadline);
endNanos = System.nanoTime();
setTimeSeriesHistogramStat(STAT_NAME_MAINTENANCE_DOCUMENT_EXPIRATION_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX,
TimeUnit.NANOSECONDS.toMicros(endNanos - startNanos));
startNanos = endNanos;
applyDocumentVersionRetentionPolicy(deadline);
endNanos = System.nanoTime();
setTimeSeriesHistogramStat(STAT_NAME_MAINTENANCE_VERSION_RETENTION_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX,
TimeUnit.NANOSECONDS.toMicros(endNanos - startNanos));
startNanos = endNanos;
synchronized (this.metadataUpdateSync) {
applyMetadataIndexingUpdates(s, searcherCreationTime, deadline);
}
endNanos = System.nanoTime();
setTimeSeriesHistogramStat(STAT_NAME_MAINTENANCE_METADATA_INDEXING_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX,
TimeUnit.NANOSECONDS.toMicros(endNanos - startNanos));
startNanos = endNanos;
applyMemoryLimit();
endNanos = System.nanoTime();
setTimeSeriesHistogramStat(STAT_NAME_MAINTENANCE_MEMORY_LIMIT_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX,
TimeUnit.NANOSECONDS.toMicros(endNanos - startNanos));
startNanos = endNanos;
long sequenceNumber = w.commit();
endNanos = System.nanoTime();
adjustTimeSeriesStat(STAT_NAME_COMMIT_COUNT, AGGREGATION_TYPE_SUM, 1);
setTimeSeriesHistogramStat(STAT_NAME_COMMIT_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX,
TimeUnit.NANOSECONDS.toMicros(endNanos - startNanos));
// Only send notification when something has committed.
// When there was nothing to commit, sequence number is -1.
if (sequenceNumber > -1) {
CommitInfo commitInfo = new CommitInfo();
commitInfo.sequenceNumber = sequenceNumber;
publish(Operation.createPatch(null).setBody(commitInfo));
}
startNanos = endNanos;
applyFileLimitRefreshWriter(false);
endNanos = System.nanoTime();
setTimeSeriesHistogramStat(STAT_NAME_MAINTENANCE_FILE_LIMIT_REFRESH_DURATION_MICROS,
AGGREGATION_TYPE_AVG_MAX,
TimeUnit.NANOSECONDS.toMicros(endNanos - startNanos));
if (this.hasOption(ServiceOption.INSTRUMENTATION)) {
setStat(LuceneDocumentIndexService.STAT_NAME_INDEXED_DOCUMENT_COUNT, w.numDocs());
logQueueDepthStat(this.updateQueue, STAT_NAME_FORMAT_UPDATE_QUEUE_DEPTH);
logQueueDepthStat(this.queryQueue, STAT_NAME_FORMAT_QUERY_QUEUE_DEPTH);
}
op.complete();
} catch (Exception e) {
if (this.getHost().isStopping()) {
op.fail(new CancellationException("Host is stopping"));
return;
}
logWarning("Attempting recovery due to error: %s", Utils.toString(e));
applyFileLimitRefreshWriter(true);
op.fail(e);
}
}
private void logQueueDepthStat(RoundRobinOperationQueue queue, String format) {
Map sizes = queue.sizesByKey();
for (Entry e : sizes.entrySet()) {
String statName = String.format(format, e.getKey());
setTimeSeriesStat(statName, AGGREGATION_TYPE_AVG_MAX, e.getValue());
}
}
private void applyMetadataIndexingUpdates(IndexSearcher searcher, long searcherCreationTime,
long deadline) throws IOException {
Map entries = new HashMap<>();
synchronized (this.metadataUpdates) {
Iterator it = this.metadataUpdates.iterator();
while (it.hasNext()) {
MetadataUpdateInfo info = it.next();
if (info.updateTimeMicros > searcherCreationTime) {
break;
}
entries.put(info.selfLink, info);
it.remove();
}
}
if (entries.isEmpty()) {
return;
}
Collection entriesToProcess = entries.values();
int queueDepth = entriesToProcess.size();
Iterator it = entriesToProcess.iterator();
int updateCount = 0;
while (it.hasNext() && --queueDepth > metadataUpdateMaxQueueDepth) {
IndexWriter wr = this.writer;
if (wr == null) {
break;
}
updateCount += applyMetadataIndexingUpdate(searcher, wr, it.next());
}
while (it.hasNext() && Utils.getSystemNowMicrosUtc() < deadline) {
IndexWriter wr = this.writer;
if (wr == null) {
break;
}
updateCount += applyMetadataIndexingUpdate(searcher, wr, it.next());
}
if (it.hasNext()) {
synchronized (this.metadataUpdates) {
while (it.hasNext()) {
MetadataUpdateInfo info = it.next();
it.remove();
this.metadataUpdatesPerLink.putIfAbsent(info.selfLink, info);
this.metadataUpdates.add(info);
}
}
}
updateLinkInfoCacheForMetadataUpdates(Utils.getNowMicrosUtc(), entriesToProcess);
if (updateCount > 0) {
setTimeSeriesHistogramStat(STAT_NAME_METADATA_INDEXING_UPDATE_COUNT,
AGGREGATION_TYPE_SUM, updateCount);
}
}
private long applyMetadataIndexingUpdate(IndexSearcher searcher, IndexWriter wr,
MetadataUpdateInfo info) throws IOException {
Query selfLinkClause = new TermQuery(new Term(ServiceDocument.FIELD_NAME_SELF_LINK,
info.selfLink));
Query currentClause = NumericDocValuesField.newExactQuery(
LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_METADATA_VALUE_TOMBSTONE_TIME,
LuceneIndexDocumentHelper.ACTIVE_DOCUMENT_TOMBSTONE_TIME);
Query booleanQuery = new BooleanQuery.Builder()
.add(selfLinkClause, Occur.MUST)
.add(currentClause, Occur.MUST)
.build();
//
// In a perfect world, we'd sort the results here and examine the first result to determine
// whether the document has been deleted. Unfortunately, Lucene 6.5 has a bug where, for
// queries which specify sorts, NumericDocValuesField query clauses are ignored (these
// queries are new and experimental in 6.5). As a result, we must traverse the unordered
// results and track the highest result that we've seen.
//
long highestVersion = -1;
String lastUpdateAction = null;
final int pageSize = 10000;
long updateCount = 0;
ScoreDoc after = null;
// DocumentStoredFieldVisitor is a list as we can have multiple entries for the same
// version because of how synchronization works
Map> versionToDocsMap = new HashMap<>();
while (true) {
TopDocs results = searcher.searchAfter(after, booleanQuery, pageSize);
if (results == null || results.scoreDocs == null || results.scoreDocs.length == 0) {
break;
}
for (ScoreDoc scoreDoc : results.scoreDocs) {
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
loadDoc(searcher, visitor, scoreDoc.doc, this.fieldsToLoadIndexingIdLookup);
List versionDocList = versionToDocsMap.get(visitor.documentVersion);
if (versionDocList == null) {
versionDocList = new ArrayList<>();
versionToDocsMap.put(visitor.documentVersion, versionDocList);
}
versionDocList.add(visitor);
if (visitor.documentVersion > highestVersion) {
highestVersion = visitor.documentVersion;
lastUpdateAction = visitor.documentUpdateAction;
}
}
// check to see if the next version is available for all documents returned in the query above
Set missingVersions = new HashSet<>();
for (Long version : versionToDocsMap.keySet()) {
if (version == highestVersion) {
continue;
}
if (!versionToDocsMap.containsKey(version + 1)) {
missingVersions.add(version + 1);
}
}
// fetch docs for the missing versions
Query versionClause = LongPoint.newSetQuery(ServiceDocument.FIELD_NAME_VERSION, missingVersions);
Query missingVersionQuery = new BooleanQuery.Builder()
.add(selfLinkClause, Occur.MUST)
.add(versionClause, Occur.MUST)
.build();
TopDocs missingVersionResult = searcher.searchAfter(after, missingVersionQuery, pageSize);
if (missingVersionResult != null && missingVersionResult.scoreDocs != null
&& missingVersionResult.scoreDocs.length != 0) {
for (ScoreDoc scoreDoc : missingVersionResult.scoreDocs) {
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
loadDoc(searcher, visitor, scoreDoc.doc, this.fieldsToLoadIndexingIdLookup);
List versionDocList = versionToDocsMap.get(visitor.documentVersion);
if (versionDocList == null) {
versionDocList = new ArrayList<>();
versionToDocsMap.put(visitor.documentVersion, versionDocList);
}
versionDocList.add(visitor);
}
}
// update the metadata for fields as necessary
for (List visitorDocs : versionToDocsMap.values()) {
for (DocumentStoredFieldVisitor visitor : visitorDocs) {
if ((visitor.documentVersion == highestVersion &&
!Action.DELETE.toString().equals(lastUpdateAction)) ||
visitor.documentTombstoneTimeMicros != LuceneIndexDocumentHelper.ACTIVE_DOCUMENT_TOMBSTONE_TIME) {
continue;
}
Long nextVersionCreationTime = null;
if (visitor.documentVersion == highestVersion) {
// pick the update time on the first entry. They should be the same for all docs of the same version
nextVersionCreationTime = versionToDocsMap.get(visitor.documentVersion).get(0).documentUpdateTimeMicros;
} else {
List list = versionToDocsMap.get(visitor.documentVersion + 1);
if (list != null) {
nextVersionCreationTime = list.get(0).documentUpdateTimeMicros;
}
}
if (nextVersionCreationTime != null) {
updateTombstoneTime(wr, visitor.documentIndexingId, nextVersionCreationTime);
updateCount++;
}
}
}
if (results.scoreDocs.length < pageSize) {
break;
}
after = results.scoreDocs[results.scoreDocs.length - 1];
}
return updateCount;
}
private void updateTombstoneTime(IndexWriter wr, String indexingId, long documentUpdateTimeMicros) throws IOException {
Term indexingIdTerm = new Term(LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_ID,
indexingId);
wr.updateNumericDocValue(indexingIdTerm,
LuceneIndexDocumentHelper.FIELD_NAME_INDEXING_METADATA_VALUE_TOMBSTONE_TIME, documentUpdateTimeMicros);
}
private void applyFileLimitRefreshWriter(boolean force) {
if (getHost().isStopping()) {
return;
}
if (!isDurable()) {
return;
}
long now = Utils.getNowMicrosUtc();
if (now - this.writerCreationTimeMicros < getHost()
.getMaintenanceIntervalMicros()) {
logInfo("Skipping writer re-open, it was created recently");
return;
}
File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory);
Stream stream = null;
long count;
try {
stream = Files.list(directory.toPath());
count = stream.count();
if (!force && count < indexFileCountThresholdForWriterRefresh) {
return;
}
} catch (IOException e1) {
logSevere(e1);
return;
} finally {
if (stream != null) {
stream.close();
}
}
final int acquireReleaseCount = QUERY_THREAD_COUNT + UPDATE_THREAD_COUNT;
try {
// Do not proceed unless we have blocked all reader+writer threads. We assume
// the semaphore is already acquired by the current thread
this.writerSync.release();
this.writerSync.acquire(acquireReleaseCount);
IndexWriter w = this.writer;
if (w == null) {
return;
}
logInfo("(%s) closing all %d searchers, document count: %d, file count: %d",
this.writerSync, this.searchers.size(), w.maxDoc(), count);
for (IndexSearcher s : this.searchers.values()) {
s.getIndexReader().close();
this.searcherUpdateTimesMicros.remove(s.hashCode());
}
this.searchers.clear();
if (!force) {
return;
}
logInfo("Closing all paginated searchers (%d)",
this.paginatedSearchersByCreationTime.size());
for (PaginatedSearcherInfo info : this.paginatedSearchersByCreationTime.values()) {
try {
IndexSearcher s = info.searcher;
s.getIndexReader().close();
} catch (Exception ignored) {
}
}
this.paginatedSearchersByCreationTime.clear();
this.paginatedSearchersByExpirationTime.clear();
this.searcherUpdateTimesMicros.clear();
try {
w.close();
} catch (Exception ignored) {
}
w = createWriter(directory, false);
stream = Files.list(directory.toPath());
count = stream.count();
logInfo("(%s) reopened writer, document count: %d, file count: %d",
this.writerSync, w.maxDoc(), count);
} catch (Exception e) {
// If we fail to re-open we should stop the host, since we can not recover.
logSevere(e);
logWarning("Stopping local host since index is not accessible");
close(this.writer);
this.writer = null;
sendRequest(Operation.createDelete(this, ServiceUriPaths.CORE_MANAGEMENT));
} finally {
// release all but one, so we stay owning one reference to the semaphore
this.writerSync.release(acquireReleaseCount - 1);
if (stream != null) {
stream.close();
}
}
}
private void applyDocumentVersionRetentionPolicy(long deadline) throws Exception {
Map links = new HashMap<>();
Iterator> it;
do {
int count = 0;
synchronized (this.liveVersionsPerLink) {
it = this.liveVersionsPerLink.entrySet().iterator();
while (it.hasNext() && count < versionRetentionServiceThreshold) {
Entry e = it.next();
links.put(e.getKey(), e.getValue());
it.remove();
count++;
}
}
if (links.isEmpty()) {
break;
}
adjustTimeSeriesStat(STAT_NAME_VERSION_RETENTION_SERVICE_COUNT, AGGREGATION_TYPE_SUM,
links.size());
Operation dummyDelete = Operation.createDelete(null);
for (Entry e : links.entrySet()) {
IndexWriter wr = this.writer;
if (wr == null) {
return;
}
deleteDocumentsFromIndex(dummyDelete, null, e.getKey(), null, 0, e.getValue());
}
links.clear();
} while (Utils.getSystemNowMicrosUtc() < deadline);
}
private void applyMemoryLimit() {
if (getHost().isStopping()) {
return;
}
// close any paginated query searchers that have expired
long now = Utils.getNowMicrosUtc();
applyMemoryLimitToDocumentUpdateInfo();
Map> entriesToClose = new HashMap<>();
long activePaginatedQueries;
synchronized (this.searchSync) {
Iterator>> itr =
this.paginatedSearchersByExpirationTime.entrySet().iterator();
while (itr.hasNext()) {
Entry> entry = itr.next();
long expirationMicros = entry.getKey();
if (expirationMicros > now) {
break;
}
List expirationList = entry.getValue();
for (PaginatedSearcherInfo info : expirationList) {
this.paginatedSearchersByCreationTime.remove(info.creationTimeMicros);
this.searcherUpdateTimesMicros.remove(info.searcher.hashCode());
}
entriesToClose.put(expirationMicros, expirationList);
itr.remove();
}
activePaginatedQueries = this.paginatedSearchersByCreationTime.size();
}
setTimeSeriesStat(STAT_NAME_ACTIVE_PAGINATED_QUERIES, AGGREGATION_TYPE_AVG_MAX,
activePaginatedQueries);
for (Entry> entry : entriesToClose.entrySet()) {
for (PaginatedSearcherInfo info : entry.getValue()) {
logFine("Closing paginated query searcher, expired at %d", entry.getKey());
try {
info.searcher.getIndexReader().close();
} catch (Exception ignored) {
}
}
}
}
void applyMemoryLimitToDocumentUpdateInfo() {
long memThresholdBytes = this.updateMapMemoryLimit;
final int bytesPerLinkEstimate = 64;
int count = 0;
if (hasOption(ServiceOption.INSTRUMENTATION)) {
setStat(STAT_NAME_VERSION_CACHE_ENTRY_COUNT, this.updatesPerLink.size());
}
// Note: this code will be updated in the future. It currently calls a host
// method, inside a lock, which is always a bad idea. The getServiceStage()
// method is lock free, but its still brittle.
synchronized (this.searchSync) {
if (this.updatesPerLink.isEmpty()) {
return;
}
if (memThresholdBytes > this.updatesPerLink.size() * bytesPerLinkEstimate) {
return;
}
Iterator> li = this.updatesPerLink.entrySet()
.iterator();
while (li.hasNext()) {
Entry e = li.next();
// remove entries for services no longer attached / started on host
if (getHost().getServiceStage(e.getKey()) == null) {
count++;
li.remove();
}
}
// update index time to force searcher update, per thread
this.writerUpdateTimeMicros = Utils.getNowMicrosUtc();
}
if (count == 0) {
return;
}
this.serviceRemovalDetectedTimeMicros = Utils.getNowMicrosUtc();
logInfo("Cleared %d document update entries", count);
}
private void applyDocumentExpirationPolicy(IndexSearcher s, long deadline) throws Exception {
Query versionQuery = LongPoint.newRangeQuery(
ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS, 1L, Utils.getNowMicrosUtc());
ScoreDoc after = null;
Operation dummyDelete = null;
boolean firstQuery = true;
Map latestVersions = new HashMap<>();
do {
TopDocs results = s.searchAfter(after, versionQuery, expiredDocumentSearchThreshold,
this.versionSort, false, false);
if (results.scoreDocs == null || results.scoreDocs.length == 0) {
return;
}
after = results.scoreDocs[results.scoreDocs.length - 1];
if (firstQuery && results.totalHits > expiredDocumentSearchThreshold) {
adjustTimeSeriesStat(STAT_NAME_DOCUMENT_EXPIRATION_FORCED_MAINTENANCE_COUNT,
AGGREGATION_TYPE_SUM, 1);
}
firstQuery = false;
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
for (ScoreDoc scoreDoc : results.scoreDocs) {
loadDoc(s, visitor, scoreDoc.doc, this.fieldsToLoadNoExpand);
String documentSelfLink = visitor.documentSelfLink;
Long latestVersion = latestVersions.get(documentSelfLink);
if (latestVersion == null) {
long searcherUpdateTime = getSearcherUpdateTime(s, 0);
latestVersion = getLatestVersion(s, searcherUpdateTime, documentSelfLink, 0,
-1);
latestVersions.put(documentSelfLink, latestVersion);
}
if (visitor.documentVersion < latestVersion) {
continue;
}
// update document with one that has all fields, including binary state
augmentDoc(s, visitor, scoreDoc.doc, LUCENE_FIELD_NAME_BINARY_SERIALIZED_STATE);
ServiceDocument serviceDocument = null;
try {
serviceDocument = getStateFromLuceneDocument(visitor, documentSelfLink);
} catch (Exception e) {
logWarning("Error deserializing state for %s: %s", documentSelfLink,
e.getMessage());
}
if (dummyDelete == null) {
dummyDelete = Operation.createDelete(null);
}
deleteAllDocumentsForSelfLink(dummyDelete, documentSelfLink, serviceDocument);
adjustTimeSeriesStat(STAT_NAME_DOCUMENT_EXPIRATION_COUNT, AGGREGATION_TYPE_SUM, 1);
}
} while (Utils.getSystemNowMicrosUtc() < deadline);
}
private void applyActiveQueries(Operation op, ServiceDocument latestState,
ServiceDocumentDescription desc) {
if (this.activeQueries.isEmpty()) {
return;
}
if (op.getAction() == Action.DELETE) {
// This code path is reached for document expiration, but the last update action for
// expired documents is usually a PATCH or PUT. Dummy up a document body with a last
// update action of DELETE for the purpose of providing notifications.
latestState = Utils.clone(latestState);
latestState.documentUpdateAction = Action.DELETE.name();
}
// set current context from the operation so all active query task notifications carry the
// same context as the operation that updated the index
OperationContext.setFrom(op);
// TODO Optimize. We currently traverse each query independently. We can collapse the queries
// and evaluate clauses keeping track which clauses applied, then skip any queries accordingly.
for (Entry taskEntry : this.activeQueries.entrySet()) {
if (getHost().isStopping()) {
continue;
}
QueryTask activeTask = taskEntry.getValue();
QueryFilter filter = activeTask.querySpec.context.filter;
if (desc == null) {
if (!QueryFilterUtils.evaluate(filter, latestState, getHost())) {
continue;
}
} else {
if (!filter.evaluate(latestState, desc)) {
continue;
}
}
QueryTask patchBody = new QueryTask();
patchBody.taskInfo.stage = TaskStage.STARTED;
patchBody.querySpec = null;
patchBody.results = new ServiceDocumentQueryResult();
patchBody.results.documentLinks.add(latestState.documentSelfLink);
if (activeTask.querySpec.options.contains(QueryOption.EXPAND_CONTENT) ||
activeTask.querySpec.options.contains(QueryOption.COUNT)) {
patchBody.results.documents = new HashMap<>();
patchBody.results.documents.put(latestState.documentSelfLink, latestState);
}
// Send PATCH to continuous query task with document that passed the query filter.
// Any subscribers will get notified with the body containing just this document
Operation patchOperation = Operation.createPatch(this, activeTask.documentSelfLink)
.setBodyNoCloning(
patchBody);
// Set the authorization context to the user who created the continous query.
OperationContext currentContext = OperationContext.getOperationContext();
if (activeTask.querySpec.context.subjectLink != null) {
setAuthorizationContext(patchOperation,
getAuthorizationContextForSubject(
activeTask.querySpec.context.subjectLink));
}
sendRequest(patchOperation);
OperationContext.restoreOperationContext(currentContext);
}
}
void setWriterUpdateTimeMicros(long writerUpdateTimeMicros) {
this.writerUpdateTimeMicros = writerUpdateTimeMicros;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy