All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.BasePointsFormatTestCase Maven / Gradle / Ivy

There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;

/**
 * Abstract class to do basic tests for a points format.
 * NOTE: This test focuses on the points impl, nothing else.
 * The [stretch] goal is for this test to be
 * so thorough in testing a new PointsFormat that if this
 * test passes, then all Lucene/Solr tests should also pass.  Ie,
 * if there is some bug in a given PointsFormat that this
 * test fails to catch then this test needs to be improved! */
public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCase {

  @Override
  protected void addRandomFields(Document doc) {
    final int numValues = random().nextInt(3);
    for (int i = 0; i < numValues; i++) {
      doc.add(new IntPoint("f", random().nextInt()));
    }
  }
  
  public void testBasic() throws Exception {
    Directory dir = getDirectory(20);
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] point = new byte[4];
    for(int i=0;i<20;i++) {
      Document doc = new Document();
      NumericUtils.intToSortableBytes(i, point, 0);
      doc.add(new BinaryPoint("dim", point));
      w.addDocument(doc);
    }
    w.forceMerge(1);
    w.close();

    DirectoryReader r = DirectoryReader.open(dir);
    LeafReader sub = getOnlyLeafReader(r);
    PointValues values = sub.getPointValues();

    // Simple test: make sure intersect can visit every doc:
    BitSet seen = new BitSet();
    values.intersect("dim",
                     new IntersectVisitor() {
                       @Override
                       public Relation compare(byte[] minPacked, byte[] maxPacked) {
                         return Relation.CELL_CROSSES_QUERY;
                       }
                       public void visit(int docID) {
                         throw new IllegalStateException();
                       }
                       public void visit(int docID, byte[] packedValue) {
                         seen.set(docID);
                         assertEquals(docID, NumericUtils.sortableBytesToInt(packedValue, 0));
                       }
                     });
    assertEquals(20, seen.cardinality());
    IOUtils.close(r, dir);
  }

  public void testMerge() throws Exception {
    Directory dir = getDirectory(20);
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] point = new byte[4];
    for(int i=0;i<20;i++) {
      Document doc = new Document();
      NumericUtils.intToSortableBytes(i, point, 0);
      doc.add(new BinaryPoint("dim", point));
      w.addDocument(doc);
      if (i == 10) {
        w.commit();
      }
    }
    w.forceMerge(1);
    w.close();

    DirectoryReader r = DirectoryReader.open(dir);
    LeafReader sub = getOnlyLeafReader(r);
    PointValues values = sub.getPointValues();

    // Simple test: make sure intersect can visit every doc:
    BitSet seen = new BitSet();
    values.intersect("dim",
                     new IntersectVisitor() {
                       @Override
                       public Relation compare(byte[] minPacked, byte[] maxPacked) {
                         return Relation.CELL_CROSSES_QUERY;
                       }
                       public void visit(int docID) {
                         throw new IllegalStateException();
                       }
                       public void visit(int docID, byte[] packedValue) {
                         seen.set(docID);
                         assertEquals(docID, NumericUtils.sortableBytesToInt(packedValue, 0));
                       }
                     });
    assertEquals(20, seen.cardinality());
    IOUtils.close(r, dir);
  }

  public void testAllPointDocsDeletedInSegment() throws Exception {
    Directory dir = getDirectory(20);
    IndexWriterConfig iwc = newIndexWriterConfig();
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] point = new byte[4];
    for(int i=0;i<10;i++) {
      Document doc = new Document();
      NumericUtils.intToSortableBytes(i, point, 0);
      doc.add(new BinaryPoint("dim", point));
      doc.add(new NumericDocValuesField("id", i));
      doc.add(newStringField("x", "x", Field.Store.NO));
      w.addDocument(doc);
    }
    w.addDocument(new Document());
    w.deleteDocuments(new Term("x", "x"));
    if (random().nextBoolean()) {
      w.forceMerge(1);
    }
    w.close();
    DirectoryReader r = DirectoryReader.open(dir);
    assertEquals(1, r.numDocs());
    Bits liveDocs = MultiFields.getLiveDocs(r);

    for(LeafReaderContext ctx : r.leaves()) {
      PointValues values = ctx.reader().getPointValues();
      NumericDocValues idValues = ctx.reader().getNumericDocValues("id");
      if (idValues == null) {
        // this is (surprisingly) OK, because if the random IWC flushes all 10 docs before the 11th doc is added, and force merge runs, it
        // will drop the 100% deleted segments, and the "id" field never exists in the final single doc segment
        continue;
      }
      if (values != null) {
        BitSet seen = new BitSet();
        values.intersect("dim",
                         new IntersectVisitor() {
                           @Override
                           public Relation compare(byte[] minPacked, byte[] maxPacked) {
                             return Relation.CELL_CROSSES_QUERY;
                           }
                           public void visit(int docID) {
                             throw new IllegalStateException();
                           }
                           public void visit(int docID, byte[] packedValue) {
                             if (liveDocs.get(docID)) {
                               seen.set(docID);
                             }
                             assertEquals(idValues.get(docID), NumericUtils.sortableBytesToInt(packedValue, 0));
                           }
                         });
        assertEquals(0, seen.cardinality());
      }
    }
    IOUtils.close(r, dir);
  }

  /** Make sure we close open files, delete temp files, etc., on exception */
  public void testWithExceptions() throws Exception {
    int numDocs = atLeast(10000);
    int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
    int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);

    byte[][][] docValues = new byte[numDocs][][];

    for(int docID=0;docID docValues = new ArrayList<>();
    List docIDs = new ArrayList<>();

    for(int docID=0;docID 0) {
        docValues[docID][theEqualDim] = docValues[0][theEqualDim];
      }
    }

    verify(docValues, null, numDims, numBytesPerDim);
  }

  // this should trigger run-length compression with lengths that are greater than 255
  public void testOneDimTwoValues() throws Exception {
    int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
    int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);

    int numDocs = atLeast(1000);
    int theDim = random().nextInt(numDims);
    byte[] value1 = new byte[numBytesPerDim];
    random().nextBytes(value1);
    byte[] value2 = new byte[numBytesPerDim];
    random().nextBytes(value2);
    byte[][][] docValues = new byte[numDocs][][];

    for(int docID=0;docID " + values[dim]);
          }
        }
        docs[docID] = values;
        Document doc = new Document();
        doc.add(new BinaryPoint("field", bytes));
        w.addDocument(doc);
      }

      DirectoryReader r = w.getReader();
      w.close();

      int iters = atLeast(100);
      for(int iter=0;iter= 0;

                  if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
                    return Relation.CELL_OUTSIDE_QUERY;
                  } else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
                    crosses = true;
                  }
                }

                if (crosses) {
                  return Relation.CELL_CROSSES_QUERY;
                } else {
                  return Relation.CELL_INSIDE_QUERY;
                }
              }
            });
        }

        for(int docID=0;docID 0) {
              expected = false;
              break;
            }
          }
          boolean actual = hits.get(docID);
          assertEquals("docID=" + docID, expected, actual);
        }
      }
      r.close();
      }
  }

  public void testRandomBinaryTiny() throws Exception {
    doTestRandomBinary(10);
  }

  public void testRandomBinaryMedium() throws Exception {
    doTestRandomBinary(10000);
  }

  @Nightly
  public void testRandomBinaryBig() throws Exception {
    assumeFalse("too slow with SimpleText", Codec.getDefault().getName().equals("SimpleText"));
    doTestRandomBinary(200000);
  }

  private void doTestRandomBinary(int count) throws Exception {
    int numDocs = TestUtil.nextInt(random(), count, count*2);
    int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
    int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);

    byte[][][] docValues = new byte[numDocs][][];

    for(int docID=0;docID 0) {
            System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], 0, numBytesPerDim);
          }
        }
      }
    }

    // 20% of the time we add into a separate directory, then at some point use
    // addIndexes to bring the indexed point values to the main directory:
    Directory saveDir;
    RandomIndexWriter saveW;
    int addIndexesAt;
    if (random().nextInt(5) == 1) {
      saveDir = dir;
      saveW = w;
      dir = getDirectory(numValues);
      if (useRealWriter) {
        iwc = new IndexWriterConfig(new MockAnalyzer(random()));
      } else {
        iwc = newIndexWriterConfig();
      }
      if (expectExceptions) {
        MergeScheduler ms = iwc.getMergeScheduler();
        if (ms instanceof ConcurrentMergeScheduler) {
          ((ConcurrentMergeScheduler) ms).setSuppressExceptions();
        }
      }
      w = new RandomIndexWriter(random(), dir, iwc);
      addIndexesAt = TestUtil.nextInt(random(), 1, numValues-1);
    } else {
      saveW = null;
      saveDir = null;
      addIndexesAt = 0;
    }

    try {

      Document doc = null;
      int lastID = -1;
      for(int ord=0;ord 0) {
            System.arraycopy(leafMaxValues, dim*numBytesPerDim, maxValues, dim*numBytesPerDim, numBytesPerDim);
          }
        }
      }

      byte[] scratch = new byte[numBytesPerDim];
      for(int dim=0;dim 0) {
                    //System.out.println("  query_outside_cell");
                    return Relation.CELL_OUTSIDE_QUERY;
                  } else if (StringHelper.compare(numBytesPerDim, minPacked, dim*numBytesPerDim, queryMin[dim], 0) < 0 ||
                             StringHelper.compare(numBytesPerDim, maxPacked, dim*numBytesPerDim, queryMax[dim], 0) > 0) {
                    crosses = true;
                  }
                }

                if (crosses) {
                  //System.out.println("  query_crosses_cell");
                  return Relation.CELL_CROSSES_QUERY;
                } else {
                  //System.out.println("  cell_inside_query");
                  return Relation.CELL_INSIDE_QUERY;
                }
              }
            });
        }

        BitSet expected = new BitSet();
        for(int ord=0;ord 0) {
              matches = false;
              break;
            }
          }

          if (matches) {
            int id;
            if (ids == null) {
              id = ord;
            } else {
              id = ids[ord];
            }
            expected.set(id);
          }
        }

        int limit = Math.max(expected.length(), hits.length());
        int failCount = 0;
        int successCount = 0;
        for(int id=0;id subs = new ArrayList<>();
          for (LeafReaderContext context : r.leaves()) {
            subs.add((CodecReader) context.reader());
          }
          if (VERBOSE) {
            System.out.println("TEST: now use addIndexes(CodecReader[]) to switch writers");
          }
          saveW.addIndexes(subs.toArray(new CodecReader[subs.size()]));
        } else {
          if (VERBOSE) {
            System.out.println("TEST: now use TestUtil.addIndexesSlowly(DirectoryReader[]) to switch writers");
          }
          TestUtil.addIndexesSlowly(saveW.w, r);
        }
      }
    } else {
      // Add via directory:
      if (VERBOSE) {
        System.out.println("TEST: now use addIndexes(Directory[]) to switch writers");
      }
      w.close();
      saveW.addIndexes(new Directory[] {dir});
    }
    w.close();
    dir.close();
  }

  private BigInteger randomBigInt(int numBytes) {
    BigInteger x = new BigInteger(numBytes*8-1, random());
    if (random().nextBoolean()) {
      x = x.negate();
    }
    return x;
  }

  private Directory getDirectory(int numPoints) throws IOException {
    Directory dir;
    if (numPoints > 100000) {
      dir = newFSDirectory(createTempDir("TestBKDTree"));
    } else {
      dir = newDirectory();
    }
    //dir = FSDirectory.open(createTempDir());
    return dir;
  }

  @Override
  protected boolean mergeIsStable() {
    // suppress this test from base class: merges for BKD trees are not stable because the tree created by merge will have a different
    // structure than the tree created by adding points separately
    return false;
  }

  // LUCENE-7491
  public void testMixedSchema() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    iwc.setMaxBufferedDocs(2);
    for(int i=0;i<2;i++) {
      Document doc = new Document();
      doc.add(new StringField("id", Integer.toString(i), Field.Store.NO));
      doc.add(new IntPoint("int", i));
      w.addDocument(doc);
    }
    // index has 1 segment now (with 2 docs) and that segment does have points, but the "id" field in particular does NOT

    Document doc = new Document();
    doc.add(new IntPoint("id", 0));
    w.addDocument(doc);
    // now we write another segment where the id field does have points:
    
    w.forceMerge(1);
    IOUtils.close(w, dir);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy