org.apache.lucene.codecs.simpletext.SimpleTextSkipReader Maven / Gradle / Ivy
Show all versions of lucene-codecs Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.simpletext;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.CHILD_POINTER;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.FREQ;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.IMPACT;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.IMPACTS;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.IMPACTS_END;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.LEVEL_LENGTH;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.NORM;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.SKIP_DOC;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.SKIP_DOC_FP;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.SKIP_LIST;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.codecs.MultiLevelSkipListReader;
import org.apache.lucene.index.Impact;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.StringHelper;
/**
* This class reads skip lists with multiple levels.
*
* See {@link SimpleTextFieldsWriter} for the information about the encoding of the multi level
* skip lists.
*
* @lucene.experimental
*/
class SimpleTextSkipReader extends MultiLevelSkipListReader {
private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
private final BytesRefBuilder scratch = new BytesRefBuilder();
private Impacts impacts;
private List> perLevelImpacts;
private long nextSkipDocFP = -1;
private int numLevels = 1;
private boolean hasSkipList = false;
SimpleTextSkipReader(IndexInput skipStream) {
super(
skipStream,
SimpleTextSkipWriter.maxSkipLevels,
SimpleTextSkipWriter.BLOCK_SIZE,
SimpleTextSkipWriter.skipMultiplier);
impacts =
new Impacts() {
@Override
public int numLevels() {
return numLevels;
}
@Override
public int getDocIdUpTo(int level) {
return skipDoc[level];
}
@Override
public List getImpacts(int level) {
assert level < numLevels;
return perLevelImpacts.get(level);
}
};
init();
}
@Override
public int skipTo(int target) throws IOException {
if (!hasSkipList) {
return -1;
}
int result = super.skipTo(target);
if (numberOfSkipLevels > 0) {
numLevels = numberOfSkipLevels;
} else {
// End of postings don't have skip data anymore, so we fill with dummy data
// like SlowImpactsEnum.
numLevels = 1;
perLevelImpacts.add(0, Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L)));
}
return result;
}
@Override
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
perLevelImpacts.get(level).clear();
int skipDoc = DocIdSetIterator.NO_MORE_DOCS;
ChecksumIndexInput input = new BufferedChecksumIndexInput(skipStream);
int freq = 1;
while (true) {
SimpleTextUtil.readLine(input, scratch);
if (scratch.get().equals(SimpleTextFieldsWriter.END)) {
SimpleTextUtil.checkFooter(input);
break;
} else if (scratch.get().equals(IMPACTS_END)
|| scratch.get().equals(SimpleTextFieldsWriter.TERM)
|| scratch.get().equals(SimpleTextFieldsWriter.FIELD)) {
break;
} else if (StringHelper.startsWith(scratch.get(), SKIP_LIST)) {
// continue
} else if (StringHelper.startsWith(scratch.get(), SKIP_DOC)) {
scratchUTF16.copyUTF8Bytes(
scratch.bytes(), SKIP_DOC.length, scratch.length() - SKIP_DOC.length);
skipDoc = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
// Because the MultiLevelSkipListReader stores doc id delta,but simple text codec stores doc
// id
skipDoc = skipDoc - super.skipDoc[level];
} else if (StringHelper.startsWith(scratch.get(), SKIP_DOC_FP)) {
scratchUTF16.copyUTF8Bytes(
scratch.bytes(), SKIP_DOC_FP.length, scratch.length() - SKIP_DOC_FP.length);
nextSkipDocFP = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
} else if (StringHelper.startsWith(scratch.get(), IMPACTS)
|| StringHelper.startsWith(scratch.get(), IMPACT)) {
// continue;
} else if (StringHelper.startsWith(scratch.get(), FREQ)) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length() - FREQ.length);
freq = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
} else if (StringHelper.startsWith(scratch.get(), NORM)) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), NORM.length, scratch.length() - NORM.length);
long norm = Long.parseLong(scratchUTF16.toString());
Impact impact = new Impact(freq, norm);
perLevelImpacts.get(level).add(impact);
}
}
return skipDoc;
}
@Override
protected long readLevelLength(IndexInput skipStream) throws IOException {
SimpleTextUtil.readLine(skipStream, scratch);
scratchUTF16.copyUTF8Bytes(
scratch.bytes(), LEVEL_LENGTH.length, scratch.length() - LEVEL_LENGTH.length);
return Long.parseLong(scratchUTF16.toString());
}
@Override
protected long readChildPointer(IndexInput skipStream) throws IOException {
SimpleTextUtil.readLine(skipStream, scratch);
scratchUTF16.copyUTF8Bytes(
scratch.bytes(), CHILD_POINTER.length, scratch.length() - CHILD_POINTER.length);
return Long.parseLong(scratchUTF16.toString());
}
void reset(long skipPointer, int docFreq) throws IOException {
init();
if (skipPointer > 0) {
super.init(skipPointer, docFreq);
hasSkipList = true;
}
}
private void init() {
nextSkipDocFP = -1;
numLevels = 1;
perLevelImpacts = new ArrayList<>(maxNumberOfSkipLevels);
for (int level = 0; level < maxNumberOfSkipLevels; level++) {
List impacts = new ArrayList<>();
impacts.add(new Impact(Integer.MAX_VALUE, 1L));
perLevelImpacts.add(level, impacts);
}
hasSkipList = false;
}
Impacts getImpacts() {
return impacts;
}
long getNextSkipDocFP() {
return nextSkipDocFP;
}
int getNextSkipDoc() {
if (!hasSkipList) {
return DocIdSetIterator.NO_MORE_DOCS;
}
return skipDoc[0];
}
boolean hasSkipList() {
return hasSkipList;
}
}