All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.ShardSearchingTestBase Maven / Gradle / Ivy

There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.PrintStreamInfoStream;
import org.apache.lucene.util.TestUtil;

// TODO
//   - doc blocks?  so we can test joins/grouping...
//   - controlled consistency (NRTMgr)

/**
 * Base test class for simulating distributed search across multiple shards.
 */
public abstract class ShardSearchingTestBase extends LuceneTestCase {

  // TODO: maybe SLM should throw this instead of returning null...
  /**
   * Thrown when the lease for a searcher has expired.
   */
  public static class SearcherExpiredException extends RuntimeException {
    public SearcherExpiredException(String message) {
      super(message);
    }
  }

  private static class FieldAndShardVersion {
    private final long version;
    private final int nodeID;
    private final String field;

    public FieldAndShardVersion(int nodeID, long version, String field) {
      this.nodeID = nodeID;
      this.version = version;
      this.field = field;
    }

    @Override
    public int hashCode() {
      return (int) (version * nodeID + field.hashCode());
    }

    @Override
    public boolean equals(Object _other) {
      if (!(_other instanceof FieldAndShardVersion)) {
        return false;
      }

      final FieldAndShardVersion other = (FieldAndShardVersion) _other;

      return field.equals(other.field) && version == other.version && nodeID == other.nodeID;
    }

    @Override
    public String toString() {
      return "FieldAndShardVersion(field=" + field + " nodeID=" + nodeID + " version=" + version+ ")";
    }
  }

  private static class TermAndShardVersion {
    private final long version;
    private final int nodeID;
    private final Term term;

    public TermAndShardVersion(int nodeID, long version, Term term) {
      this.nodeID = nodeID;
      this.version = version;
      this.term = term;
    }

    @Override
    public int hashCode() {
      return (int) (version * nodeID + term.hashCode());
    }

    @Override
    public boolean equals(Object _other) {
      if (!(_other instanceof TermAndShardVersion)) {
        return false;
      }

      final TermAndShardVersion other = (TermAndShardVersion) _other;

      return term.equals(other.term) && version == other.version && nodeID == other.nodeID;
    }
  }

  // We share collection stats for these fields on each node
  // reopen:
  private final String[] fieldsToShare = new String[] {"body", "title"};

  // Called by one node once it has reopened, to notify all
  // other nodes.  This is just a mock (since it goes and
  // directly updates all other nodes, in RAM)... in a real
  // env this would hit the wire, sending version &
  // collection stats to all other nodes:
  void broadcastNodeReopen(int nodeID, long version, IndexSearcher newSearcher) throws IOException {

    if (VERBOSE) {
      System.out.println("REOPEN: nodeID=" + nodeID + " version=" + version + " maxDoc=" + newSearcher.getIndexReader().maxDoc());
    }

    // Broadcast new collection stats for this node to all
    // other nodes:
    for(String field : fieldsToShare) {
      final CollectionStatistics stats = newSearcher.collectionStatistics(field);
      for (NodeState node : nodes) {
        // Don't put my own collection stats into the cache;
        // we pull locally:
        if (node.myNodeID != nodeID) {
          node.collectionStatsCache.put(new FieldAndShardVersion(nodeID, version, field), stats);
        }
      }
    }
    for (NodeState node : nodes) {
      node.updateNodeVersion(nodeID, version);
    }
  }

  // TODO: broadcastNodeExpire?  then we can purge the
  // known-stale cache entries...

  // MOCK: in a real env you have to hit the wire
  // (send this query to all remote nodes
  // concurrently):
  TopDocs searchNode(int nodeID, long[] nodeVersions, Query q, Sort sort, int numHits, ScoreDoc searchAfter) throws IOException {
    final NodeState.ShardIndexSearcher s = nodes[nodeID].acquire(nodeVersions);
    try {
      if (sort == null) {
        if (searchAfter != null) {
          return s.localSearchAfter(searchAfter, q, numHits);
        } else {
          return s.localSearch(q, numHits);
        }
      } else {
        assert searchAfter == null;  // not supported yet
        return s.localSearch(q, numHits, sort);
      }
    } finally {
      nodes[nodeID].release(s);
    }
  }

  // Mock: in a real env, this would hit the wire and get
  // term stats from remote node
  Map getNodeTermStats(Set terms, int nodeID, long version) throws IOException {
    final NodeState node = nodes[nodeID];
    final Map stats = new HashMap<>();
    final IndexSearcher s = node.searchers.acquire(version);
    if (s == null) {
      throw new SearcherExpiredException("node=" + nodeID + " version=" + version);
    }
    try {
      for(Term term : terms) {
        final TermContext termContext = TermContext.build(s.getIndexReader().getContext(), term);
        stats.put(term, s.termStatistics(term, termContext));
      }
    } finally {
      node.searchers.release(s);
    }
    return stats;
  }

  protected final class NodeState implements Closeable {
    public final Directory dir;
    public final IndexWriter writer;
    public final SearcherLifetimeManager searchers;
    public final SearcherManager mgr;
    public final int myNodeID;
    public final long[] currentNodeVersions;

    // TODO: nothing evicts from here!!!  Somehow, on searcher
    // expiration on remote nodes we must evict from our
    // local cache...?  And still LRU otherwise (for the
    // still-live searchers).

    private final Map collectionStatsCache = new ConcurrentHashMap<>();
    private final Map termStatsCache = new ConcurrentHashMap<>();

    /** Matches docs in the local shard but scores based on
     *  aggregated stats ("mock distributed scoring") from all
     *  nodes. */ 

    public class ShardIndexSearcher extends IndexSearcher {
      // Version for the node searchers we search:
      public final long[] nodeVersions;
      public final int myNodeID;

      public ShardIndexSearcher(long[] nodeVersions, IndexReader localReader, int nodeID) {
        super(localReader);
        this.nodeVersions = nodeVersions;
        myNodeID = nodeID;
        assert myNodeID == NodeState.this.myNodeID: "myNodeID=" + nodeID + " NodeState.this.myNodeID=" + NodeState.this.myNodeID;
      }

      @Override
      public Query rewrite(Query original) throws IOException {
        final IndexSearcher localSearcher = new IndexSearcher(getIndexReader());
        final Weight weight = localSearcher.createNormalizedWeight(original, true);
        final Set terms = new HashSet<>();
        weight.extractTerms(terms);

        // Make a single request to remote nodes for term
        // stats:
        for(int nodeID=0;nodeID missing = new HashSet<>();
          for(Term term : terms) {
            final TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], term);
            if (!termStatsCache.containsKey(key)) {
              missing.add(term);
            }
          }
          if (missing.size() != 0) {
            for(Map.Entry ent : getNodeTermStats(missing, nodeID, nodeVersions[nodeID]).entrySet()) {
              final TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], ent.getKey());
              termStatsCache.put(key, ent.getValue());
            }
          }
        }

        return weight.getQuery();
      }

      @Override
      public TermStatistics termStatistics(Term term, TermContext context) throws IOException {
        assert term != null;
        long docFreq = 0;
        long totalTermFreq = 0;
        for(int nodeID=0;nodeID= 0 && nodeDocFreq >= 0) {
            docFreq += nodeDocFreq;
          } else {
            docFreq = -1;
          }
          
          long nodeTotalTermFreq = subStats.totalTermFreq();
          if (totalTermFreq >= 0 && nodeTotalTermFreq >= 0) {
            totalTermFreq += nodeTotalTermFreq;
          } else {
            totalTermFreq = -1;
          }
        }

        return new TermStatistics(term.bytes(), docFreq, totalTermFreq);
      }

      @Override
      public CollectionStatistics collectionStatistics(String field) throws IOException {
        // TODO: we could compute this on init and cache,
        // since we are re-inited whenever any nodes have a
        // new reader
        long docCount = 0;
        long sumTotalTermFreq = 0;
        long sumDocFreq = 0;
        long maxDoc = 0;

        for(int nodeID=0;nodeID




© 2015 - 2025 Weber Informatics LLC | Privacy Policy