org.apache.lucene.index.ThreadedIndexingAndSearchingTestCase Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-test-framework Show documentation
Apache Lucene (module: test-framework)
There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;
import java.nio.file.Path;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FailOnNonBulkMergesInfoStream;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util.PrintStreamInfoStream;
import org.apache.lucene.util.TestUtil;

// TODO
//   - mix in forceMerge, addIndexes
//   - randomly mix in non-congruent docs

/** Utility class that spawns multiple indexing and
 *  searching threads. */
public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCase {

  protected final AtomicBoolean failed = new AtomicBoolean();
  protected final AtomicInteger addCount = new AtomicInteger();
  protected final AtomicInteger delCount = new AtomicInteger();
  protected final AtomicInteger packCount = new AtomicInteger();

  protected Directory dir;
  protected IndexWriter writer;

  private static class SubDocs {
    public final String packID;
    public final List subIDs;
    public boolean deleted;

    public SubDocs(String packID, List subIDs) {
      this.packID = packID;
      this.subIDs = subIDs;
    }
  }

  // Called per-search
  protected abstract IndexSearcher getCurrentSearcher() throws Exception;

  protected abstract IndexSearcher getFinalSearcher() throws Exception;

  protected void releaseSearcher(IndexSearcher s) throws Exception {
  }

  // Called once to run searching
  protected abstract void doSearching(ExecutorService es, long stopTime) throws Exception;

  protected Directory getDirectory(Directory in) {
    return in;
  }

  protected void updateDocuments(Term id, List> docs) throws Exception {
    writer.updateDocuments(id, docs);
  }

  protected void addDocuments(Term id, List> docs) throws Exception {
    writer.addDocuments(docs);
  }

  protected void addDocument(Term id, Iterable doc) throws Exception {
    writer.addDocument(doc);
  }

  protected void updateDocument(Term term, Iterable doc) throws Exception {
    writer.updateDocument(term, doc);
  }

  protected void deleteDocuments(Term term) throws Exception {
    writer.deleteDocuments(term);
  }

  protected void doAfterIndexingThreadDone() {
  }

  private Thread[] launchIndexingThreads(final LineFileDocs docs,
                                         int numThreads,
                                         final long stopTime,
                                         final Set delIDs,
                                         final Set delPackIDs,
                                         final List allSubDocs) {
    final Thread[] threads = new Thread[numThreads];
    for(int thread=0;thread toDeleteIDs = new ArrayList<>();
            final List toDeleteSubDocs = new ArrayList<>();
            while(System.currentTimeMillis() < stopTime && !failed.get()) {
              try {

                // Occasional longish pause if running
                // nightly
                if (LuceneTestCase.TEST_NIGHTLY && random().nextInt(6) == 3) {
                  if (VERBOSE) {
                    System.out.println(Thread.currentThread().getName() + ": now long sleep");
                  }
                  Thread.sleep(TestUtil.nextInt(random(), 50, 500));
                }

                // Rate limit ingest rate:
                if (random().nextInt(7) == 5) {
                  Thread.sleep(TestUtil.nextInt(random(), 1, 10));
                  if (VERBOSE) {
                    System.out.println(Thread.currentThread().getName() + ": done sleep");
                  }
                }

                Document doc = docs.nextDoc();
                if (doc == null) {
                  break;
                }

                // Maybe add randomly named field
                final String addedField;
                if (random().nextBoolean()) {
                  addedField = "extra" + random().nextInt(40);
                  doc.add(newTextField(addedField, "a random field", Field.Store.YES));
                } else {
                  addedField = null;
                }

                if (random().nextBoolean()) {

                  if (random().nextBoolean()) {
                    // Add/update doc block:
                    final String packID;
                    final SubDocs delSubDocs;
                    if (toDeleteSubDocs.size() > 0 && random().nextBoolean()) {
                      delSubDocs = toDeleteSubDocs.get(random().nextInt(toDeleteSubDocs.size()));
                      assert !delSubDocs.deleted;
                      toDeleteSubDocs.remove(delSubDocs);
                      // Update doc block, replacing prior packID
                      packID = delSubDocs.packID;
                    } else {
                      delSubDocs = null;
                      // Add doc block, using new packID
                      packID = packCount.getAndIncrement() + "";
                    }

                    final Field packIDField = newStringField("packID", packID, Field.Store.YES);
                    final List docIDs = new ArrayList<>();
                    final SubDocs subDocs = new SubDocs(packID, docIDs);
                    final List docsList = new ArrayList<>();

                    allSubDocs.add(subDocs);
                    doc.add(packIDField);
                    docsList.add(TestUtil.cloneDocument(doc));
                    docIDs.add(doc.get("docid"));

                    final int maxDocCount = TestUtil.nextInt(random(), 1, 10);
                    while(docsList.size() < maxDocCount) {
                      doc = docs.nextDoc();
                      if (doc == null) {
                        break;
                      }
                      docsList.add(TestUtil.cloneDocument(doc));
                      docIDs.add(doc.get("docid"));
                    }
                    addCount.addAndGet(docsList.size());

                    final Term packIDTerm = new Term("packID", packID);

                    if (delSubDocs != null) {
                      delSubDocs.deleted = true;
                      delIDs.addAll(delSubDocs.subIDs);
                      delCount.addAndGet(delSubDocs.subIDs.size());
                      if (VERBOSE) {
                        System.out.println(Thread.currentThread().getName() + ": update pack packID=" + delSubDocs.packID + " count=" + docsList.size() + " docs=" + docIDs);
                      }
                      updateDocuments(packIDTerm, docsList);
                    } else {
                      if (VERBOSE) {
                        System.out.println(Thread.currentThread().getName() + ": add pack packID=" + packID + " count=" + docsList.size() + " docs=" + docIDs);
                      }
                      addDocuments(packIDTerm, docsList);
                    }
                    doc.removeField("packID");

                    if (random().nextInt(5) == 2) {
                      if (VERBOSE) {
                        System.out.println(Thread.currentThread().getName() + ": buffer del id:" + packID);
                      }
                      toDeleteSubDocs.add(subDocs);
                    }

                  } else {
                    // Add single doc
                    final String docid = doc.get("docid");
                    if (VERBOSE) {
                      System.out.println(Thread.currentThread().getName() + ": add doc docid:" + docid);
                    }
                    addDocument(new Term("docid", docid), doc);
                    addCount.getAndIncrement();

                    if (random().nextInt(5) == 3) {
                      if (VERBOSE) {
                        System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("docid"));
                      }
                      toDeleteIDs.add(docid);
                    }
                  }
                } else {

                  // Update single doc, but we never re-use
                  // and ID so the delete will never
                  // actually happen:
                  if (VERBOSE) {
                    System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("docid"));
                  }
                  final String docid = doc.get("docid");
                  updateDocument(new Term("docid", docid), doc);
                  addCount.getAndIncrement();

                  if (random().nextInt(5) == 3) {
                    if (VERBOSE) {
                      System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("docid"));
                    }
                    toDeleteIDs.add(docid);
                  }
                }

                if (random().nextInt(30) == 17) {
                  if (VERBOSE) {
                    System.out.println(Thread.currentThread().getName() + ": apply " + toDeleteIDs.size() + " deletes");
                  }
                  for(String id : toDeleteIDs) {
                    if (VERBOSE) {
                      System.out.println(Thread.currentThread().getName() + ": del term=id:" + id);
                    }
                    deleteDocuments(new Term("docid", id));
                  }
                  final int count = delCount.addAndGet(toDeleteIDs.size());
                  if (VERBOSE) {
                    System.out.println(Thread.currentThread().getName() + ": tot " + count + " deletes");
                  }
                  delIDs.addAll(toDeleteIDs);
                  toDeleteIDs.clear();

                  for(SubDocs subDocs : toDeleteSubDocs) {
                    assert !subDocs.deleted;
                    delPackIDs.add(subDocs.packID);
                    deleteDocuments(new Term("packID", subDocs.packID));
                    subDocs.deleted = true;
                    if (VERBOSE) {
                      System.out.println(Thread.currentThread().getName() + ": del subs: " + subDocs.subIDs + " packID=" + subDocs.packID);
                    }
                    delIDs.addAll(subDocs.subIDs);
                    delCount.addAndGet(subDocs.subIDs.size());
                  }
                  toDeleteSubDocs.clear();
                }
                if (addedField != null) {
                  doc.removeField(addedField);
                }
              } catch (Throwable t) {
                System.out.println(Thread.currentThread().getName() + ": hit exc");
                t.printStackTrace();
                failed.set(true);
                throw new RuntimeException(t);
              }
            }
            if (VERBOSE) {
              System.out.println(Thread.currentThread().getName() + ": indexing done");
            }

            doAfterIndexingThreadDone();
          }
        };
      threads[thread].start();
    }

    return threads;
  }

  protected void runSearchThreads(final long stopTimeMS) throws Exception {
    final int numThreads = TestUtil.nextInt(random(), 1, 5);
    final Thread[] searchThreads = new Thread[numThreads];
    final AtomicInteger totHits = new AtomicInteger();

    // silly starting guess:
    final AtomicInteger totTermCount = new AtomicInteger(100);

    // TODO: we should enrich this to do more interesting searches
    for(int thread=0;thread diagnostics = segReader.getSegmentInfo().info.getDiagnostics();
                    assertNotNull(diagnostics);
                    String source = diagnostics.get("source");
                    assertNotNull(source);
                    if (source.equals("merge")) {
                      assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + warmed + " diagnostics=" + diagnostics + " si=" + segReader.getSegmentInfo(),
                                 !assertMergedSegmentsWarmed || warmed.containsKey(segReader.core));
                    }
                  }
                  if (s.getIndexReader().numDocs() > 0) {
                    smokeTestSearcher(s);
                    Fields fields = MultiFields.getFields(s.getIndexReader());
                    Terms terms = fields.terms("body");
                    if (terms == null) {
                      continue;
                    }
                    TermsEnum termsEnum = terms.iterator();
                    int seenTermCount = 0;
                    int shift;
                    int trigger; 
                    if (totTermCount.get() < 30) {
                      shift = 0;
                      trigger = 1;
                    } else {
                      trigger = totTermCount.get()/30;
                      shift = random().nextInt(trigger);
                    }
                    while (System.currentTimeMillis() < stopTimeMS) {
                      BytesRef term = termsEnum.next();
                      if (term == null) {
                        totTermCount.set(seenTermCount);
                        break;
                      }
                      seenTermCount++;
                      // search 30 terms
                      if ((seenTermCount + shift) % trigger == 0) {
                        //if (VERBOSE) {
                        //System.out.println(Thread.currentThread().getName() + " now search body:" + term.utf8ToString());
                        //}
                        totHits.addAndGet(runQuery(s, new TermQuery(new Term("body", BytesRef.deepCopyOf(term)))));
                      }
                    }
                    //if (VERBOSE) {
                    //System.out.println(Thread.currentThread().getName() + ": search done");
                    //}
                  }
                } finally {
                  releaseSearcher(s);
                }
              } catch (Throwable t) {
                System.out.println(Thread.currentThread().getName() + ": hit exc");
                failed.set(true);
                t.printStackTrace(System.out);
                throw new RuntimeException(t);
              }
            }
          }
        };
      searchThreads[thread].start();
    }

    for(Thread thread : searchThreads) {
      thread.join();
    }

    if (VERBOSE) {
      System.out.println("TEST: DONE search: totHits=" + totHits);
    }
  }

  protected void doAfterWriter(ExecutorService es) throws Exception {
  }

  protected void doClose() throws Exception {
  }

  protected boolean assertMergedSegmentsWarmed = true;

  private final Map warmed = Collections.synchronizedMap(new WeakHashMap());

  public void runTest(String testName) throws Exception {

    failed.set(false);
    addCount.set(0);
    delCount.set(0);
    packCount.set(0);

    final long t0 = System.currentTimeMillis();

    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random);
    final Path tempDir = createTempDir(testName);
    dir = getDirectory(newMockFSDirectory(tempDir)); // some subclasses rely on this being MDW
    if (dir instanceof BaseDirectoryWrapper) {
      ((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
    }
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false);
    conf.setInfoStream(new FailOnNonBulkMergesInfoStream());
    if (conf.getMergePolicy() instanceof MockRandomMergePolicy) {
      ((MockRandomMergePolicy)conf.getMergePolicy()).setDoNonBulkMerges(false);
    }

    if (LuceneTestCase.TEST_NIGHTLY) {
      // newIWConfig makes smallish max seg size, which
      // results in tons and tons of segments for this test
      // when run nightly:
      MergePolicy mp = conf.getMergePolicy();
      if (mp instanceof TieredMergePolicy) {
        ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
      } else if (mp instanceof LogByteSizeMergePolicy) {
        ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
      } else if (mp instanceof LogMergePolicy) {
        ((LogMergePolicy) mp).setMaxMergeDocs(100000);
      }
      // when running nightly, merging can still have crazy parameters, 
      // and might use many per-field codecs. turn on CFS for IW flushes
      // and ensure CFS ratio is reasonable to keep it contained.
      conf.setUseCompoundFile(true);
      mp.setNoCFSRatio(Math.max(0.25d, mp.getNoCFSRatio()));
    }

    conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
      @Override
      public void warm(LeafReader reader) throws IOException {
        if (VERBOSE) {
          System.out.println("TEST: now warm merged reader=" + reader);
        }
        warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
        final int maxDoc = reader.maxDoc();
        final Bits liveDocs = reader.getLiveDocs();
        int sum = 0;
        final int inc = Math.max(1, maxDoc/50);
        for(int docID=0;docID delIDs = Collections.synchronizedSet(new HashSet());
    final Set delPackIDs = Collections.synchronizedSet(new HashSet());
    final List allSubDocs = Collections.synchronizedList(new ArrayList());

    final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC*1000;

    final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);

    if (VERBOSE) {
      System.out.println("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis()-t0) + " ms]");
    }

    // Let index build up a bit
    Thread.sleep(100);

    doSearching(es, stopTime);

    if (VERBOSE) {
      System.out.println("TEST: all searching done [" + (System.currentTimeMillis()-t0) + " ms]");
    }
    
    for(Thread thread : indexThreads) {
      thread.join();
    }

    if (VERBOSE) {
      System.out.println("TEST: done join indexing threads [" + (System.currentTimeMillis()-t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
    }

    final IndexSearcher s = getFinalSearcher();
    if (VERBOSE) {
      System.out.println("TEST: finalSearcher=" + s);
    }

    assertFalse(failed.get());

    boolean doFail = false;

    // Verify: make sure delIDs are in fact deleted:
    for(String id : delIDs) {
      final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
      if (hits.totalHits != 0) {
        System.out.println("doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc);
        doFail = true;
      }
    }

    // Verify: make sure delPackIDs are in fact deleted:
    for(String id : delPackIDs) {
      final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
      if (hits.totalHits != 0) {
        System.out.println("packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
        doFail = true;
      }
    }

    // Verify: make sure each group of sub-docs are still in docID order:
    for(SubDocs subDocs : allSubDocs) {
      TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
      if (!subDocs.deleted) {
        // We sort by relevance but the scores should be identical so sort falls back to by docID:
        if (hits.totalHits != subDocs.subIDs.size()) {
          System.out.println("packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits);
          doFail = true;
        } else {
          int lastDocID = -1;
          int startDocID = -1;
          for(ScoreDoc scoreDoc : hits.scoreDocs) {
            final int docID = scoreDoc.doc;
            if (lastDocID != -1) {
              assertEquals(1+lastDocID, docID);
            } else {
              startDocID = docID;
            }
            lastDocID = docID;
            final Document doc = s.doc(docID);
            assertEquals(subDocs.packID, doc.get("packID"));
          }

          lastDocID = startDocID - 1;
          for(String subID : subDocs.subIDs) {
            hits = s.search(new TermQuery(new Term("docid", subID)), 1);
            assertEquals(1, hits.totalHits);
            final int docID = hits.scoreDocs[0].doc;
            if (lastDocID != -1) {
              assertEquals(1+lastDocID, docID);
            }
            lastDocID = docID;
          }
        }
      } else {
        // Pack was deleted -- make sure its docs are
        // deleted.  We can't verify packID is deleted
        // because we can re-use packID for update:
        for(String subID : subDocs.subIDs) {
          assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
        }
      }
    }

    // Verify: make sure all not-deleted docs are in fact
    // not deleted:
    final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
    docs.close();

    for(int id=0;id