org.apache.lucene.search.ShardSearchingTestBase Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-test-framework Show documentation
Show all versions of lucene-test-framework Show documentation
Apache Lucene (module: test-framework)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.PrintStreamInfoStream;
import org.apache.lucene.util.TestUtil;
// TODO
// - doc blocks? so we can test joins/grouping...
// - controlled consistency (NRTMgr)
/**
* Base test class for simulating distributed search across multiple shards.
*/
public abstract class ShardSearchingTestBase extends LuceneTestCase {
// TODO: maybe SLM should throw this instead of returning null...
/**
* Thrown when the lease for a searcher has expired.
*/
public static class SearcherExpiredException extends RuntimeException {
public SearcherExpiredException(String message) {
super(message);
}
}
private static class FieldAndShardVersion {
private final long version;
private final int nodeID;
private final String field;
public FieldAndShardVersion(int nodeID, long version, String field) {
this.nodeID = nodeID;
this.version = version;
this.field = field;
}
@Override
public int hashCode() {
return (int) (version * nodeID + field.hashCode());
}
@Override
public boolean equals(Object _other) {
if (!(_other instanceof FieldAndShardVersion)) {
return false;
}
final FieldAndShardVersion other = (FieldAndShardVersion) _other;
return field.equals(other.field) && version == other.version && nodeID == other.nodeID;
}
@Override
public String toString() {
return "FieldAndShardVersion(field=" + field + " nodeID=" + nodeID + " version=" + version+ ")";
}
}
private static class TermAndShardVersion {
private final long version;
private final int nodeID;
private final Term term;
public TermAndShardVersion(int nodeID, long version, Term term) {
this.nodeID = nodeID;
this.version = version;
this.term = term;
}
@Override
public int hashCode() {
return (int) (version * nodeID + term.hashCode());
}
@Override
public boolean equals(Object _other) {
if (!(_other instanceof TermAndShardVersion)) {
return false;
}
final TermAndShardVersion other = (TermAndShardVersion) _other;
return term.equals(other.term) && version == other.version && nodeID == other.nodeID;
}
}
// We share collection stats for these fields on each node
// reopen:
private final String[] fieldsToShare = new String[] {"body", "title"};
// Called by one node once it has reopened, to notify all
// other nodes. This is just a mock (since it goes and
// directly updates all other nodes, in RAM)... in a real
// env this would hit the wire, sending version &
// collection stats to all other nodes:
void broadcastNodeReopen(int nodeID, long version, IndexSearcher newSearcher) throws IOException {
if (VERBOSE) {
System.out.println("REOPEN: nodeID=" + nodeID + " version=" + version + " maxDoc=" + newSearcher.getIndexReader().maxDoc());
}
// Broadcast new collection stats for this node to all
// other nodes:
for(String field : fieldsToShare) {
final CollectionStatistics stats = newSearcher.collectionStatistics(field);
for (NodeState node : nodes) {
// Don't put my own collection stats into the cache;
// we pull locally:
if (node.myNodeID != nodeID) {
node.collectionStatsCache.put(new FieldAndShardVersion(nodeID, version, field), stats);
}
}
}
for (NodeState node : nodes) {
node.updateNodeVersion(nodeID, version);
}
}
// TODO: broadcastNodeExpire? then we can purge the
// known-stale cache entries...
// MOCK: in a real env you have to hit the wire
// (send this query to all remote nodes
// concurrently):
TopDocs searchNode(int nodeID, long[] nodeVersions, Query q, Sort sort, int numHits, ScoreDoc searchAfter) throws IOException {
final NodeState.ShardIndexSearcher s = nodes[nodeID].acquire(nodeVersions);
try {
if (sort == null) {
if (searchAfter != null) {
return s.localSearchAfter(searchAfter, q, numHits);
} else {
return s.localSearch(q, numHits);
}
} else {
assert searchAfter == null; // not supported yet
return s.localSearch(q, numHits, sort);
}
} finally {
nodes[nodeID].release(s);
}
}
// Mock: in a real env, this would hit the wire and get
// term stats from remote node
Map getNodeTermStats(Set terms, int nodeID, long version) throws IOException {
final NodeState node = nodes[nodeID];
final Map stats = new HashMap<>();
final IndexSearcher s = node.searchers.acquire(version);
if (s == null) {
throw new SearcherExpiredException("node=" + nodeID + " version=" + version);
}
try {
for(Term term : terms) {
final TermContext termContext = TermContext.build(s.getIndexReader().getContext(), term);
stats.put(term, s.termStatistics(term, termContext));
}
} finally {
node.searchers.release(s);
}
return stats;
}
protected final class NodeState implements Closeable {
public final Directory dir;
public final IndexWriter writer;
public final SearcherLifetimeManager searchers;
public final SearcherManager mgr;
public final int myNodeID;
public final long[] currentNodeVersions;
// TODO: nothing evicts from here!!! Somehow, on searcher
// expiration on remote nodes we must evict from our
// local cache...? And still LRU otherwise (for the
// still-live searchers).
private final Map collectionStatsCache = new ConcurrentHashMap<>();
private final Map termStatsCache = new ConcurrentHashMap<>();
/** Matches docs in the local shard but scores based on
* aggregated stats ("mock distributed scoring") from all
* nodes. */
public class ShardIndexSearcher extends IndexSearcher {
// Version for the node searchers we search:
public final long[] nodeVersions;
public final int myNodeID;
public ShardIndexSearcher(long[] nodeVersions, IndexReader localReader, int nodeID) {
super(localReader);
this.nodeVersions = nodeVersions;
myNodeID = nodeID;
assert myNodeID == NodeState.this.myNodeID: "myNodeID=" + nodeID + " NodeState.this.myNodeID=" + NodeState.this.myNodeID;
}
@Override
public Query rewrite(Query original) throws IOException {
final IndexSearcher localSearcher = new IndexSearcher(getIndexReader());
final Weight weight = localSearcher.createNormalizedWeight(original, true);
final Set terms = new HashSet<>();
weight.extractTerms(terms);
// Make a single request to remote nodes for term
// stats:
for(int nodeID=0;nodeID missing = new HashSet<>();
for(Term term : terms) {
final TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], term);
if (!termStatsCache.containsKey(key)) {
missing.add(term);
}
}
if (missing.size() != 0) {
for(Map.Entry ent : getNodeTermStats(missing, nodeID, nodeVersions[nodeID]).entrySet()) {
final TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], ent.getKey());
termStatsCache.put(key, ent.getValue());
}
}
}
return weight.getQuery();
}
@Override
public TermStatistics termStatistics(Term term, TermContext context) throws IOException {
assert term != null;
long docFreq = 0;
long totalTermFreq = 0;
for(int nodeID=0;nodeID= 0 && nodeDocFreq >= 0) {
docFreq += nodeDocFreq;
} else {
docFreq = -1;
}
long nodeTotalTermFreq = subStats.totalTermFreq();
if (totalTermFreq >= 0 && nodeTotalTermFreq >= 0) {
totalTermFreq += nodeTotalTermFreq;
} else {
totalTermFreq = -1;
}
}
return new TermStatistics(term.bytes(), docFreq, totalTermFreq);
}
@Override
public CollectionStatistics collectionStatistics(String field) throws IOException {
// TODO: we could compute this on init and cache,
// since we are re-inited whenever any nodes have a
// new reader
long docCount = 0;
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
long maxDoc = 0;
for(int nodeID=0;nodeID
© 2015 - 2025 Weber Informatics LLC | Privacy Policy