org.apache.lucene.codecs.compressing.CompressingTermVectorsReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
/*
* COPIED FROM APACHE LUCENE 4.7.2
*
* Git URL: [email protected]:apache/lucene.git, tag: releases/lucene-solr/4.7.2, path: lucene/core/src/java
*
* (see https://issues.apache.org/jira/browse/OAK-10786 for details)
*/
package org.apache.lucene.codecs.compressing;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.BLOCK_SIZE;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_DAT;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_IDX;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.FLAGS_BITS;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.OFFSETS;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.PAYLOADS;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.POSITIONS;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_EXTENSION;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START;
import java.io.Closeable;
import java.io.IOException;
import java.util.Comparator;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.BlockPackedReaderIterator;
import org.apache.lucene.util.packed.PackedInts;
/**
* {@link TermVectorsReader} for {@link CompressingTermVectorsFormat}.
* @lucene.experimental
*/
public final class CompressingTermVectorsReader extends TermVectorsReader implements Closeable {
private final FieldInfos fieldInfos;
final CompressingStoredFieldsIndexReader indexReader;
final IndexInput vectorsStream;
private final int packedIntsVersion;
private final CompressionMode compressionMode;
private final Decompressor decompressor;
private final int chunkSize;
private final int numDocs;
private boolean closed;
private final BlockPackedReaderIterator reader;
// used by clone
private CompressingTermVectorsReader(CompressingTermVectorsReader reader) {
this.fieldInfos = reader.fieldInfos;
this.vectorsStream = reader.vectorsStream.clone();
this.indexReader = reader.indexReader.clone();
this.packedIntsVersion = reader.packedIntsVersion;
this.compressionMode = reader.compressionMode;
this.decompressor = reader.decompressor.clone();
this.chunkSize = reader.chunkSize;
this.numDocs = reader.numDocs;
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
this.closed = false;
}
/** Sole constructor. */
public CompressingTermVectorsReader(Directory d, SegmentInfo si, String segmentSuffix, FieldInfos fn,
IOContext context, String formatName, CompressionMode compressionMode) throws IOException {
this.compressionMode = compressionMode;
final String segment = si.name;
boolean success = false;
fieldInfos = fn;
numDocs = si.getDocCount();
IndexInput indexStream = null;
try {
// Load the index into memory
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
indexStream = d.openInput(indexStreamFN, context);
final String codecNameIdx = formatName + CODEC_SFX_IDX;
CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
indexStream.close();
indexStream = null;
// Open the data file and read metadata
final String vectorsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION);
vectorsStream = d.openInput(vectorsStreamFN, context);
final String codecNameDat = formatName + CODEC_SFX_DAT;
CodecUtil.checkHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
packedIntsVersion = vectorsStream.readVInt();
chunkSize = vectorsStream.readVInt();
decompressor = compressionMode.newDecompressor();
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(this, indexStream);
}
}
}
CompressionMode getCompressionMode() {
return compressionMode;
}
int getChunkSize() {
return chunkSize;
}
int getPackedIntsVersion() {
return packedIntsVersion;
}
CompressingStoredFieldsIndexReader getIndex() {
return indexReader;
}
IndexInput getVectorsStream() {
return vectorsStream;
}
/**
* @throws AlreadyClosedException if this TermVectorsReader is closed
*/
private void ensureOpen() throws AlreadyClosedException {
if (closed) {
throw new AlreadyClosedException("this FieldsReader is closed");
}
}
@Override
public void close() throws IOException {
if (!closed) {
IOUtils.close(vectorsStream);
closed = true;
}
}
@Override
public TermVectorsReader clone() {
return new CompressingTermVectorsReader(this);
}
@Override
public Fields get(int doc) throws IOException {
ensureOpen();
// seek to the right place
{
final long startPointer = indexReader.getStartPointer(doc);
vectorsStream.seek(startPointer);
}
// decode
// - docBase: first doc ID of the chunk
// - chunkDocs: number of docs of the chunk
final int docBase = vectorsStream.readVInt();
final int chunkDocs = vectorsStream.readVInt();
if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) {
throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")");
}
final int skip; // number of fields to skip
final int numFields; // number of fields of the document we're looking for
final int totalFields; // total number of fields of the chunk (sum for all docs)
if (chunkDocs == 1) {
skip = 0;
numFields = totalFields = vectorsStream.readVInt();
} else {
reader.reset(vectorsStream, chunkDocs);
int sum = 0;
for (int i = docBase; i < doc; ++i) {
sum += reader.next();
}
skip = sum;
numFields = (int) reader.next();
sum += numFields;
for (int i = doc + 1; i < docBase + chunkDocs; ++i) {
sum += reader.next();
}
totalFields = sum;
}
if (numFields == 0) {
// no vectors
return null;
}
// read field numbers that have term vectors
final int[] fieldNums;
{
final int token = vectorsStream.readByte() & 0xFF;
assert token != 0; // means no term vectors, cannot happen since we checked for numFields == 0
final int bitsPerFieldNum = token & 0x1F;
int totalDistinctFields = token >>> 5;
if (totalDistinctFields == 0x07) {
totalDistinctFields += vectorsStream.readVInt();
}
++totalDistinctFields;
final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1);
fieldNums = new int[totalDistinctFields];
for (int i = 0; i < totalDistinctFields; ++i) {
fieldNums[i] = (int) it.next();
}
}
// read field numbers and flags
final int[] fieldNumOffs = new int[numFields];
final PackedInts.Reader flags;
{
final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
final PackedInts.Reader allFieldNumOffs = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
switch (vectorsStream.readVInt()) {
case 0:
final PackedInts.Reader fieldFlags = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, fieldNums.length, FLAGS_BITS);
PackedInts.Mutable f = PackedInts.getMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
for (int i = 0; i < totalFields; ++i) {
final int fieldNumOff = (int) allFieldNumOffs.get(i);
assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length;
final int fgs = (int) fieldFlags.get(fieldNumOff);
f.set(i, fgs);
}
flags = f;
break;
case 1:
flags = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, FLAGS_BITS);
break;
default:
throw new AssertionError();
}
for (int i = 0; i < numFields; ++i) {
fieldNumOffs[i] = (int) allFieldNumOffs.get(skip + i);
}
}
// number of terms per field for all fields
final PackedInts.Reader numTerms;
final int totalTerms;
{
final int bitsRequired = vectorsStream.readVInt();
numTerms = PackedInts.getReaderNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsRequired);
int sum = 0;
for (int i = 0; i < totalFields; ++i) {
sum += numTerms.get(i);
}
totalTerms = sum;
}
// term lengths
int docOff = 0, docLen = 0, totalLen;
final int[] fieldLengths = new int[numFields];
final int[][] prefixLengths = new int[numFields][];
final int[][] suffixLengths = new int[numFields][];
{
reader.reset(vectorsStream, totalTerms);
// skip
int toSkip = 0;
for (int i = 0; i < skip; ++i) {
toSkip += numTerms.get(i);
}
reader.skip(toSkip);
// read prefix lengths
for (int i = 0; i < numFields; ++i) {
final int termCount = (int) numTerms.get(skip + i);
final int[] fieldPrefixLengths = new int[termCount];
prefixLengths[i] = fieldPrefixLengths;
for (int j = 0; j < termCount; ) {
final LongsRef next = reader.next(termCount - j);
for (int k = 0; k < next.length; ++k) {
fieldPrefixLengths[j++] = (int) next.longs[next.offset + k];
}
}
}
reader.skip(totalTerms - reader.ord());
reader.reset(vectorsStream, totalTerms);
// skip
toSkip = 0;
for (int i = 0; i < skip; ++i) {
for (int j = 0; j < numTerms.get(i); ++j) {
docOff += reader.next();
}
}
for (int i = 0; i < numFields; ++i) {
final int termCount = (int) numTerms.get(skip + i);
final int[] fieldSuffixLengths = new int[termCount];
suffixLengths[i] = fieldSuffixLengths;
for (int j = 0; j < termCount; ) {
final LongsRef next = reader.next(termCount - j);
for (int k = 0; k < next.length; ++k) {
fieldSuffixLengths[j++] = (int) next.longs[next.offset + k];
}
}
fieldLengths[i] = sum(suffixLengths[i]);
docLen += fieldLengths[i];
}
totalLen = docOff + docLen;
for (int i = skip + numFields; i < totalFields; ++i) {
for (int j = 0; j < numTerms.get(i); ++j) {
totalLen += reader.next();
}
}
}
// term freqs
final int[] termFreqs = new int[totalTerms];
{
reader.reset(vectorsStream, totalTerms);
for (int i = 0; i < totalTerms; ) {
final LongsRef next = reader.next(totalTerms - i);
for (int k = 0; k < next.length; ++k) {
termFreqs[i++] = 1 + (int) next.longs[next.offset + k];
}
}
}
// total number of positions, offsets and payloads
int totalPositions = 0, totalOffsets = 0, totalPayloads = 0;
for (int i = 0, termIndex = 0; i < totalFields; ++i) {
final int f = (int) flags.get(i);
final int termCount = (int) numTerms.get(i);
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex++];
if ((f & POSITIONS) != 0) {
totalPositions += freq;
}
if ((f & OFFSETS) != 0) {
totalOffsets += freq;
}
if ((f & PAYLOADS) != 0) {
totalPayloads += freq;
}
}
assert i != totalFields - 1 || termIndex == totalTerms : termIndex + " " + totalTerms;
}
final int[][] positionIndex = positionIndex(skip, numFields, numTerms, termFreqs);
final int[][] positions, startOffsets, lengths;
if (totalPositions > 0) {
positions = readPositions(skip, numFields, flags, numTerms, termFreqs, POSITIONS, totalPositions, positionIndex);
} else {
positions = new int[numFields][];
}
if (totalOffsets > 0) {
// average number of chars per term
final float[] charsPerTerm = new float[fieldNums.length];
for (int i = 0; i < charsPerTerm.length; ++i) {
charsPerTerm[i] = Float.intBitsToFloat(vectorsStream.readInt());
}
startOffsets = readPositions(skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
lengths = readPositions(skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
for (int i = 0; i < numFields; ++i) {
final int[] fStartOffsets = startOffsets[i];
final int[] fPositions = positions[i];
// patch offsets from positions
if (fStartOffsets != null && fPositions != null) {
final float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]];
for (int j = 0; j < startOffsets[i].length; ++j) {
fStartOffsets[j] += (int) (fieldCharsPerTerm * fPositions[j]);
}
}
if (fStartOffsets != null) {
final int[] fPrefixLengths = prefixLengths[i];
final int[] fSuffixLengths = suffixLengths[i];
final int[] fLengths = lengths[i];
for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) {
// delta-decode start offsets and patch lengths using term lengths
final int termLength = fPrefixLengths[j] + fSuffixLengths[j];
lengths[i][positionIndex[i][j]] += termLength;
for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) {
fStartOffsets[k] += fStartOffsets[k - 1];
fLengths[k] += termLength;
}
}
}
}
} else {
startOffsets = lengths = new int[numFields][];
}
if (totalPositions > 0) {
// delta-decode positions
for (int i = 0; i < numFields; ++i) {
final int[] fPositions = positions[i];
final int[] fpositionIndex = positionIndex[i];
if (fPositions != null) {
for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) {
// delta-decode start offsets
for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) {
fPositions[k] += fPositions[k - 1];
}
}
}
}
}
// payload lengths
final int[][] payloadIndex = new int[numFields][];
int totalPayloadLength = 0;
int payloadOff = 0;
int payloadLen = 0;
if (totalPayloads > 0) {
reader.reset(vectorsStream, totalPayloads);
// skip
int termIndex = 0;
for (int i = 0; i < skip; ++i) {
final int f = (int) flags.get(i);
final int termCount = (int) numTerms.get(i);
if ((f & PAYLOADS) != 0) {
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex + j];
for (int k = 0; k < freq; ++k) {
final int l = (int) reader.next();
payloadOff += l;
}
}
}
termIndex += termCount;
}
totalPayloadLength = payloadOff;
// read doc payload lengths
for (int i = 0; i < numFields; ++i) {
final int f = (int) flags.get(skip + i);
final int termCount = (int) numTerms.get(skip + i);
if ((f & PAYLOADS) != 0) {
final int totalFreq = positionIndex[i][termCount];
payloadIndex[i] = new int[totalFreq + 1];
int posIdx = 0;
payloadIndex[i][posIdx] = payloadLen;
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex + j];
for (int k = 0; k < freq; ++k) {
final int payloadLength = (int) reader.next();
payloadLen += payloadLength;
payloadIndex[i][posIdx+1] = payloadLen;
++posIdx;
}
}
assert posIdx == totalFreq;
}
termIndex += termCount;
}
totalPayloadLength += payloadLen;
for (int i = skip + numFields; i < totalFields; ++i) {
final int f = (int) flags.get(i);
final int termCount = (int) numTerms.get(i);
if ((f & PAYLOADS) != 0) {
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex + j];
for (int k = 0; k < freq; ++k) {
totalPayloadLength += reader.next();
}
}
}
termIndex += termCount;
}
assert termIndex == totalTerms : termIndex + " " + totalTerms;
}
// decompress data
final BytesRef suffixBytes = new BytesRef();
decompressor.decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes);
suffixBytes.length = docLen;
final BytesRef payloadBytes = new BytesRef(suffixBytes.bytes, suffixBytes.offset + docLen, payloadLen);
final int[] fieldFlags = new int[numFields];
for (int i = 0; i < numFields; ++i) {
fieldFlags[i] = (int) flags.get(skip + i);
}
final int[] fieldNumTerms = new int[numFields];
for (int i = 0; i < numFields; ++i) {
fieldNumTerms[i] = (int) numTerms.get(skip + i);
}
final int[][] fieldTermFreqs = new int[numFields][];
{
int termIdx = 0;
for (int i = 0; i < skip; ++i) {
termIdx += numTerms.get(i);
}
for (int i = 0; i < numFields; ++i) {
final int termCount = (int) numTerms.get(skip + i);
fieldTermFreqs[i] = new int[termCount];
for (int j = 0; j < termCount; ++j) {
fieldTermFreqs[i][j] = termFreqs[termIdx++];
}
}
}
assert sum(fieldLengths) == docLen : sum(fieldLengths) + " != " + docLen;
return new TVFields(fieldNums, fieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths,
prefixLengths, suffixLengths, fieldTermFreqs,
positionIndex, positions, startOffsets, lengths,
payloadBytes, payloadIndex,
suffixBytes);
}
// field -> term index -> position index
private int[][] positionIndex(int skip, int numFields, PackedInts.Reader numTerms, int[] termFreqs) {
final int[][] positionIndex = new int[numFields][];
int termIndex = 0;
for (int i = 0; i < skip; ++i) {
final int termCount = (int) numTerms.get(i);
termIndex += termCount;
}
for (int i = 0; i < numFields; ++i) {
final int termCount = (int) numTerms.get(skip + i);
positionIndex[i] = new int[termCount + 1];
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex+j];
positionIndex[i][j + 1] = positionIndex[i][j] + freq;
}
termIndex += termCount;
}
return positionIndex;
}
private int[][] readPositions(int skip, int numFields, PackedInts.Reader flags, PackedInts.Reader numTerms, int[] termFreqs, int flag, final int totalPositions, int[][] positionIndex) throws IOException {
final int[][] positions = new int[numFields][];
reader.reset(vectorsStream, totalPositions);
// skip
int toSkip = 0;
int termIndex = 0;
for (int i = 0; i < skip; ++i) {
final int f = (int) flags.get(i);
final int termCount = (int) numTerms.get(i);
if ((f & flag) != 0) {
for (int j = 0; j < termCount; ++j) {
final int freq = termFreqs[termIndex+j];
toSkip += freq;
}
}
termIndex += termCount;
}
reader.skip(toSkip);
// read doc positions
for (int i = 0; i < numFields; ++i) {
final int f = (int) flags.get(skip + i);
final int termCount = (int) numTerms.get(skip + i);
if ((f & flag) != 0) {
final int totalFreq = positionIndex[i][termCount];
final int[] fieldPositions = new int[totalFreq];
positions[i] = fieldPositions;
for (int j = 0; j < totalFreq; ) {
final LongsRef nextPositions = reader.next(totalFreq - j);
for (int k = 0; k < nextPositions.length; ++k) {
fieldPositions[j++] = (int) nextPositions.longs[nextPositions.offset + k];
}
}
}
termIndex += termCount;
}
reader.skip(totalPositions - reader.ord());
return positions;
}
private class TVFields extends Fields {
private final int[] fieldNums, fieldFlags, fieldNumOffs, numTerms, fieldLengths;
private final int[][] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
private final BytesRef suffixBytes, payloadBytes;
public TVFields(int[] fieldNums, int[] fieldFlags, int[] fieldNumOffs, int[] numTerms, int[] fieldLengths,
int[][] prefixLengths, int[][] suffixLengths, int[][] termFreqs,
int[][] positionIndex, int[][] positions, int[][] startOffsets, int[][] lengths,
BytesRef payloadBytes, int[][] payloadIndex,
BytesRef suffixBytes) {
this.fieldNums = fieldNums;
this.fieldFlags = fieldFlags;
this.fieldNumOffs = fieldNumOffs;
this.numTerms = numTerms;
this.fieldLengths = fieldLengths;
this.prefixLengths = prefixLengths;
this.suffixLengths = suffixLengths;
this.termFreqs = termFreqs;
this.positionIndex = positionIndex;
this.positions = positions;
this.startOffsets = startOffsets;
this.lengths = lengths;
this.payloadBytes = payloadBytes;
this.payloadIndex = payloadIndex;
this.suffixBytes = suffixBytes;
}
@Override
public Iterator iterator() {
return new Iterator() {
int i = 0;
@Override
public boolean hasNext() {
return i < fieldNumOffs.length;
}
@Override
public String next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
final int fieldNum = fieldNums[fieldNumOffs[i++]];
return fieldInfos.fieldInfo(fieldNum).name;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
@Override
public Terms terms(String field) throws IOException {
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
if (fieldInfo == null) {
return null;
}
int idx = -1;
for (int i = 0; i < fieldNumOffs.length; ++i) {
if (fieldNums[fieldNumOffs[i]] == fieldInfo.number) {
idx = i;
break;
}
}
if (idx == -1 || numTerms[idx] == 0) {
// no term
return null;
}
int fieldOff = 0, fieldLen = -1;
for (int i = 0; i < fieldNumOffs.length; ++i) {
if (i < idx) {
fieldOff += fieldLengths[i];
} else {
fieldLen = fieldLengths[i];
break;
}
}
assert fieldLen >= 0;
return new TVTerms(numTerms[idx], fieldFlags[idx],
prefixLengths[idx], suffixLengths[idx], termFreqs[idx],
positionIndex[idx], positions[idx], startOffsets[idx], lengths[idx],
payloadIndex[idx], payloadBytes,
new BytesRef(suffixBytes.bytes, suffixBytes.offset + fieldOff, fieldLen));
}
@Override
public int size() {
return fieldNumOffs.length;
}
}
private class TVTerms extends Terms {
private final int numTerms, flags;
private final int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
private final BytesRef termBytes, payloadBytes;
TVTerms(int numTerms, int flags, int[] prefixLengths, int[] suffixLengths, int[] termFreqs,
int[] positionIndex, int[] positions, int[] startOffsets, int[] lengths,
int[] payloadIndex, BytesRef payloadBytes,
BytesRef termBytes) {
this.numTerms = numTerms;
this.flags = flags;
this.prefixLengths = prefixLengths;
this.suffixLengths = suffixLengths;
this.termFreqs = termFreqs;
this.positionIndex = positionIndex;
this.positions = positions;
this.startOffsets = startOffsets;
this.lengths = lengths;
this.payloadIndex = payloadIndex;
this.payloadBytes = payloadBytes;
this.termBytes = termBytes;
}
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
final TVTermsEnum termsEnum;
if (reuse != null && reuse instanceof TVTermsEnum) {
termsEnum = (TVTermsEnum) reuse;
} else {
termsEnum = new TVTermsEnum();
}
termsEnum.reset(numTerms, flags, prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths,
payloadIndex, payloadBytes,
new ByteArrayDataInput(termBytes.bytes, termBytes.offset, termBytes.length));
return termsEnum;
}
@Override
public Comparator getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public long size() throws IOException {
return numTerms;
}
@Override
public long getSumTotalTermFreq() throws IOException {
return -1L;
}
@Override
public long getSumDocFreq() throws IOException {
return numTerms;
}
@Override
public int getDocCount() throws IOException {
return 1;
}
@Override
public boolean hasFreqs() {
return true;
}
@Override
public boolean hasOffsets() {
return (flags & OFFSETS) != 0;
}
@Override
public boolean hasPositions() {
return (flags & POSITIONS) != 0;
}
@Override
public boolean hasPayloads() {
return (flags & PAYLOADS) != 0;
}
}
private static class TVTermsEnum extends TermsEnum {
private int numTerms, startPos, ord;
private int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
private ByteArrayDataInput in;
private BytesRef payloads;
private final BytesRef term;
private TVTermsEnum() {
term = new BytesRef(16);
}
void reset(int numTerms, int flags, int[] prefixLengths, int[] suffixLengths, int[] termFreqs, int[] positionIndex, int[] positions, int[] startOffsets, int[] lengths,
int[] payloadIndex, BytesRef payloads, ByteArrayDataInput in) {
this.numTerms = numTerms;
this.prefixLengths = prefixLengths;
this.suffixLengths = suffixLengths;
this.termFreqs = termFreqs;
this.positionIndex = positionIndex;
this.positions = positions;
this.startOffsets = startOffsets;
this.lengths = lengths;
this.payloadIndex = payloadIndex;
this.payloads = payloads;
this.in = in;
startPos = in.getPosition();
reset();
}
void reset() {
term.length = 0;
in.setPosition(startPos);
ord = -1;
}
@Override
public BytesRef next() throws IOException {
if (ord == numTerms - 1) {
return null;
} else {
assert ord < numTerms;
++ord;
}
// read term
term.offset = 0;
term.length = prefixLengths[ord] + suffixLengths[ord];
if (term.length > term.bytes.length) {
term.bytes = ArrayUtil.grow(term.bytes, term.length);
}
in.readBytes(term.bytes, prefixLengths[ord], suffixLengths[ord]);
return term;
}
@Override
public Comparator getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public SeekStatus seekCeil(BytesRef text)
throws IOException {
if (ord < numTerms && ord >= 0) {
final int cmp = term().compareTo(text);
if (cmp == 0) {
return SeekStatus.FOUND;
} else if (cmp > 0) {
reset();
}
}
// linear scan
while (true) {
final BytesRef term = next();
if (term == null) {
return SeekStatus.END;
}
final int cmp = term.compareTo(text);
if (cmp > 0) {
return SeekStatus.NOT_FOUND;
} else if (cmp == 0) {
return SeekStatus.FOUND;
}
}
}
@Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public BytesRef term() throws IOException {
return term;
}
@Override
public long ord() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int docFreq() throws IOException {
return 1;
}
@Override
public long totalTermFreq() throws IOException {
return termFreqs[ord];
}
@Override
public final DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
final TVDocsEnum docsEnum;
if (reuse != null && reuse instanceof TVDocsEnum) {
docsEnum = (TVDocsEnum) reuse;
} else {
docsEnum = new TVDocsEnum();
}
docsEnum.reset(liveDocs, termFreqs[ord], positionIndex[ord], positions, startOffsets, lengths, payloads, payloadIndex);
return docsEnum;
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
if (positions == null && startOffsets == null) {
return null;
}
// TODO: slightly sheisty
return (DocsAndPositionsEnum) docs(liveDocs, reuse, flags);
}
}
private static class TVDocsEnum extends DocsAndPositionsEnum {
private Bits liveDocs;
private int doc = -1;
private int termFreq;
private int positionIndex;
private int[] positions;
private int[] startOffsets;
private int[] lengths;
private final BytesRef payload;
private int[] payloadIndex;
private int basePayloadOffset;
private int i;
TVDocsEnum() {
payload = new BytesRef();
}
public void reset(Bits liveDocs, int freq, int positionIndex, int[] positions,
int[] startOffsets, int[] lengths, BytesRef payloads,
int[] payloadIndex) {
this.liveDocs = liveDocs;
this.termFreq = freq;
this.positionIndex = positionIndex;
this.positions = positions;
this.startOffsets = startOffsets;
this.lengths = lengths;
this.basePayloadOffset = payloads.offset;
this.payload.bytes = payloads.bytes;
payload.offset = payload.length = 0;
this.payloadIndex = payloadIndex;
doc = i = -1;
}
private void checkDoc() {
if (doc == NO_MORE_DOCS) {
throw new IllegalStateException("DocsEnum exhausted");
} else if (doc == -1) {
throw new IllegalStateException("DocsEnum not started");
}
}
private void checkPosition() {
checkDoc();
if (i < 0) {
throw new IllegalStateException("Position enum not started");
} else if (i >= termFreq) {
throw new IllegalStateException("Read past last position");
}
}
@Override
public int nextPosition() throws IOException {
if (doc != 0) {
throw new IllegalStateException();
} else if (i >= termFreq - 1) {
throw new IllegalStateException("Read past last position");
}
++i;
if (payloadIndex != null) {
payload.offset = basePayloadOffset + payloadIndex[positionIndex + i];
payload.length = payloadIndex[positionIndex + i + 1] - payloadIndex[positionIndex + i];
}
if (positions == null) {
return -1;
} else {
return positions[positionIndex + i];
}
}
@Override
public int startOffset() throws IOException {
checkPosition();
if (startOffsets == null) {
return -1;
} else {
return startOffsets[positionIndex + i];
}
}
@Override
public int endOffset() throws IOException {
checkPosition();
if (startOffsets == null) {
return -1;
} else {
return startOffsets[positionIndex + i] + lengths[positionIndex + i];
}
}
@Override
public BytesRef getPayload() throws IOException {
checkPosition();
if (payloadIndex == null || payload.length == 0) {
return null;
} else {
return payload;
}
}
@Override
public int freq() throws IOException {
checkDoc();
return termFreq;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
if (doc == -1 && (liveDocs == null || liveDocs.get(0))) {
return (doc = 0);
} else {
return (doc = NO_MORE_DOCS);
}
}
@Override
public int advance(int target) throws IOException {
return slowAdvance(target);
}
@Override
public long cost() {
return 1;
}
}
private static int sum(int[] arr) {
int sum = 0;
for (int el : arr) {
sum += el;
}
return sum;
}
@Override
public long ramBytesUsed() {
return indexReader.ramBytesUsed();
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy