
org.apache.lucene.index.BasePointsFormatTestCase Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-test-framework Show documentation
Show all versions of lucene-test-framework Show documentation
Apache Lucene (module: test-framework)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
/**
* Abstract class to do basic tests for a points format.
* NOTE: This test focuses on the points impl, nothing else.
* The [stretch] goal is for this test to be
* so thorough in testing a new PointsFormat that if this
* test passes, then all Lucene/Solr tests should also pass. Ie,
* if there is some bug in a given PointsFormat that this
* test fails to catch then this test needs to be improved! */
public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCase {
@Override
protected void addRandomFields(Document doc) {
final int numValues = random().nextInt(3);
for (int i = 0; i < numValues; i++) {
doc.add(new IntPoint("f", random().nextInt()));
}
}
public void testBasic() throws Exception {
Directory dir = getDirectory(20);
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter w = new IndexWriter(dir, iwc);
byte[] point = new byte[4];
for(int i=0;i<20;i++) {
Document doc = new Document();
NumericUtils.intToSortableBytes(i, point, 0);
doc.add(new BinaryPoint("dim", point));
w.addDocument(doc);
}
w.forceMerge(1);
w.close();
DirectoryReader r = DirectoryReader.open(dir);
LeafReader sub = getOnlyLeafReader(r);
PointValues values = sub.getPointValues();
// Simple test: make sure intersect can visit every doc:
BitSet seen = new BitSet();
values.intersect("dim",
new IntersectVisitor() {
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.CELL_CROSSES_QUERY;
}
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) {
seen.set(docID);
assertEquals(docID, NumericUtils.sortableBytesToInt(packedValue, 0));
}
});
assertEquals(20, seen.cardinality());
IOUtils.close(r, dir);
}
public void testMerge() throws Exception {
Directory dir = getDirectory(20);
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter w = new IndexWriter(dir, iwc);
byte[] point = new byte[4];
for(int i=0;i<20;i++) {
Document doc = new Document();
NumericUtils.intToSortableBytes(i, point, 0);
doc.add(new BinaryPoint("dim", point));
w.addDocument(doc);
if (i == 10) {
w.commit();
}
}
w.forceMerge(1);
w.close();
DirectoryReader r = DirectoryReader.open(dir);
LeafReader sub = getOnlyLeafReader(r);
PointValues values = sub.getPointValues();
// Simple test: make sure intersect can visit every doc:
BitSet seen = new BitSet();
values.intersect("dim",
new IntersectVisitor() {
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.CELL_CROSSES_QUERY;
}
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) {
seen.set(docID);
assertEquals(docID, NumericUtils.sortableBytesToInt(packedValue, 0));
}
});
assertEquals(20, seen.cardinality());
IOUtils.close(r, dir);
}
public void testAllPointDocsDeletedInSegment() throws Exception {
Directory dir = getDirectory(20);
IndexWriterConfig iwc = newIndexWriterConfig();
IndexWriter w = new IndexWriter(dir, iwc);
byte[] point = new byte[4];
for(int i=0;i<10;i++) {
Document doc = new Document();
NumericUtils.intToSortableBytes(i, point, 0);
doc.add(new BinaryPoint("dim", point));
doc.add(new NumericDocValuesField("id", i));
doc.add(newStringField("x", "x", Field.Store.NO));
w.addDocument(doc);
}
w.addDocument(new Document());
w.deleteDocuments(new Term("x", "x"));
if (random().nextBoolean()) {
w.forceMerge(1);
}
w.close();
DirectoryReader r = DirectoryReader.open(dir);
assertEquals(1, r.numDocs());
Bits liveDocs = MultiFields.getLiveDocs(r);
for(LeafReaderContext ctx : r.leaves()) {
PointValues values = ctx.reader().getPointValues();
NumericDocValues idValues = ctx.reader().getNumericDocValues("id");
if (idValues == null) {
// this is (surprisingly) OK, because if the random IWC flushes all 10 docs before the 11th doc is added, and force merge runs, it
// will drop the 100% deleted segments, and the "id" field never exists in the final single doc segment
continue;
}
if (values != null) {
BitSet seen = new BitSet();
values.intersect("dim",
new IntersectVisitor() {
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.CELL_CROSSES_QUERY;
}
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) {
if (liveDocs.get(docID)) {
seen.set(docID);
}
assertEquals(idValues.get(docID), NumericUtils.sortableBytesToInt(packedValue, 0));
}
});
assertEquals(0, seen.cardinality());
}
}
IOUtils.close(r, dir);
}
/** Make sure we close open files, delete temp files, etc., on exception */
public void testWithExceptions() throws Exception {
int numDocs = atLeast(10000);
int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID docValues = new ArrayList<>();
List docIDs = new ArrayList<>();
for(int docID=0;docID 0) {
docValues[docID][theEqualDim] = docValues[0][theEqualDim];
}
}
verify(docValues, null, numDims, numBytesPerDim);
}
// this should trigger run-length compression with lengths that are greater than 255
public void testOneDimTwoValues() throws Exception {
int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int numDocs = atLeast(1000);
int theDim = random().nextInt(numDims);
byte[] value1 = new byte[numBytesPerDim];
random().nextBytes(value1);
byte[] value2 = new byte[numBytesPerDim];
random().nextBytes(value2);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID " + values[dim]);
}
}
docs[docID] = values;
Document doc = new Document();
doc.add(new BinaryPoint("field", bytes));
w.addDocument(doc);
}
DirectoryReader r = w.getReader();
w.close();
int iters = atLeast(100);
for(int iter=0;iter= 0;
if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
crosses = true;
}
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
}
for(int docID=0;docID 0) {
expected = false;
break;
}
}
boolean actual = hits.get(docID);
assertEquals("docID=" + docID, expected, actual);
}
}
r.close();
}
}
public void testRandomBinaryTiny() throws Exception {
doTestRandomBinary(10);
}
public void testRandomBinaryMedium() throws Exception {
doTestRandomBinary(10000);
}
@Nightly
public void testRandomBinaryBig() throws Exception {
assumeFalse("too slow with SimpleText", Codec.getDefault().getName().equals("SimpleText"));
doTestRandomBinary(200000);
}
private void doTestRandomBinary(int count) throws Exception {
int numDocs = TestUtil.nextInt(random(), count, count*2);
int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
byte[][][] docValues = new byte[numDocs][][];
for(int docID=0;docID 0) {
System.arraycopy(docValues[ord][dim], 0, expectedMaxValues[dim], 0, numBytesPerDim);
}
}
}
}
// 20% of the time we add into a separate directory, then at some point use
// addIndexes to bring the indexed point values to the main directory:
Directory saveDir;
RandomIndexWriter saveW;
int addIndexesAt;
if (random().nextInt(5) == 1) {
saveDir = dir;
saveW = w;
dir = getDirectory(numValues);
if (useRealWriter) {
iwc = new IndexWriterConfig(new MockAnalyzer(random()));
} else {
iwc = newIndexWriterConfig();
}
if (expectExceptions) {
MergeScheduler ms = iwc.getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
((ConcurrentMergeScheduler) ms).setSuppressExceptions();
}
}
w = new RandomIndexWriter(random(), dir, iwc);
addIndexesAt = TestUtil.nextInt(random(), 1, numValues-1);
} else {
saveW = null;
saveDir = null;
addIndexesAt = 0;
}
try {
Document doc = null;
int lastID = -1;
for(int ord=0;ord 0) {
System.arraycopy(leafMaxValues, dim*numBytesPerDim, maxValues, dim*numBytesPerDim, numBytesPerDim);
}
}
}
byte[] scratch = new byte[numBytesPerDim];
for(int dim=0;dim 0) {
//System.out.println(" query_outside_cell");
return Relation.CELL_OUTSIDE_QUERY;
} else if (StringHelper.compare(numBytesPerDim, minPacked, dim*numBytesPerDim, queryMin[dim], 0) < 0 ||
StringHelper.compare(numBytesPerDim, maxPacked, dim*numBytesPerDim, queryMax[dim], 0) > 0) {
crosses = true;
}
}
if (crosses) {
//System.out.println(" query_crosses_cell");
return Relation.CELL_CROSSES_QUERY;
} else {
//System.out.println(" cell_inside_query");
return Relation.CELL_INSIDE_QUERY;
}
}
});
}
BitSet expected = new BitSet();
for(int ord=0;ord 0) {
matches = false;
break;
}
}
if (matches) {
int id;
if (ids == null) {
id = ord;
} else {
id = ids[ord];
}
expected.set(id);
}
}
int limit = Math.max(expected.length(), hits.length());
int failCount = 0;
int successCount = 0;
for(int id=0;id subs = new ArrayList<>();
for (LeafReaderContext context : r.leaves()) {
subs.add((CodecReader) context.reader());
}
if (VERBOSE) {
System.out.println("TEST: now use addIndexes(CodecReader[]) to switch writers");
}
saveW.addIndexes(subs.toArray(new CodecReader[subs.size()]));
} else {
if (VERBOSE) {
System.out.println("TEST: now use TestUtil.addIndexesSlowly(DirectoryReader[]) to switch writers");
}
TestUtil.addIndexesSlowly(saveW.w, r);
}
}
} else {
// Add via directory:
if (VERBOSE) {
System.out.println("TEST: now use addIndexes(Directory[]) to switch writers");
}
w.close();
saveW.addIndexes(new Directory[] {dir});
}
w.close();
dir.close();
}
private BigInteger randomBigInt(int numBytes) {
BigInteger x = new BigInteger(numBytes*8-1, random());
if (random().nextBoolean()) {
x = x.negate();
}
return x;
}
private Directory getDirectory(int numPoints) throws IOException {
Directory dir;
if (numPoints > 100000) {
dir = newFSDirectory(createTempDir("TestBKDTree"));
} else {
dir = newDirectory();
}
//dir = FSDirectory.open(createTempDir());
return dir;
}
@Override
protected boolean mergeIsStable() {
// suppress this test from base class: merges for BKD trees are not stable because the tree created by merge will have a different
// structure than the tree created by adding points separately
return false;
}
// LUCENE-7491
public void testMixedSchema() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
iwc.setMaxBufferedDocs(2);
for(int i=0;i<2;i++) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Field.Store.NO));
doc.add(new IntPoint("int", i));
w.addDocument(doc);
}
// index has 1 segment now (with 2 docs) and that segment does have points, but the "id" field in particular does NOT
Document doc = new Document();
doc.add(new IntPoint("id", 0));
w.addDocument(doc);
// now we write another segment where the id field does have points:
w.forceMerge(1);
IOUtils.close(w, dir);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy