All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.simpletext.SimpleTextSkipReader Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.simpletext;

import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.CHILD_POINTER;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.FREQ;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.IMPACT;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.IMPACTS;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.IMPACTS_END;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.LEVEL_LENGTH;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.NORM;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.SKIP_DOC;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.SKIP_DOC_FP;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.SKIP_LIST;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.codecs.MultiLevelSkipListReader;
import org.apache.lucene.index.Impact;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.StringHelper;

/**
 * This class reads skip lists with multiple levels.
 *
 * 

See {@link SimpleTextFieldsWriter} for the information about the encoding of the multi level * skip lists. * * @lucene.experimental */ class SimpleTextSkipReader extends MultiLevelSkipListReader { private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder(); private final BytesRefBuilder scratch = new BytesRefBuilder(); private Impacts impacts; private List> perLevelImpacts; private long nextSkipDocFP = -1; private int numLevels = 1; private boolean hasSkipList = false; SimpleTextSkipReader(IndexInput skipStream) { super( skipStream, SimpleTextSkipWriter.maxSkipLevels, SimpleTextSkipWriter.BLOCK_SIZE, SimpleTextSkipWriter.skipMultiplier); impacts = new Impacts() { @Override public int numLevels() { return numLevels; } @Override public int getDocIdUpTo(int level) { return skipDoc[level]; } @Override public List getImpacts(int level) { assert level < numLevels; return perLevelImpacts.get(level); } }; init(); } @Override public int skipTo(int target) throws IOException { if (!hasSkipList) { return -1; } int result = super.skipTo(target); if (numberOfSkipLevels > 0) { numLevels = numberOfSkipLevels; } else { // End of postings don't have skip data anymore, so we fill with dummy data // like SlowImpactsEnum. numLevels = 1; perLevelImpacts.add(0, Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L))); } return result; } @Override protected int readSkipData(int level, IndexInput skipStream) throws IOException { perLevelImpacts.get(level).clear(); int skipDoc = DocIdSetIterator.NO_MORE_DOCS; ChecksumIndexInput input = new BufferedChecksumIndexInput(skipStream); int freq = 1; while (true) { SimpleTextUtil.readLine(input, scratch); if (scratch.get().equals(SimpleTextFieldsWriter.END)) { SimpleTextUtil.checkFooter(input); break; } else if (scratch.get().equals(IMPACTS_END) || scratch.get().equals(SimpleTextFieldsWriter.TERM) || scratch.get().equals(SimpleTextFieldsWriter.FIELD)) { break; } else if (StringHelper.startsWith(scratch.get(), SKIP_LIST)) { // continue } else if (StringHelper.startsWith(scratch.get(), SKIP_DOC)) { scratchUTF16.copyUTF8Bytes( scratch.bytes(), SKIP_DOC.length, scratch.length() - SKIP_DOC.length); skipDoc = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); // Because the MultiLevelSkipListReader stores doc id delta,but simple text codec stores doc // id skipDoc = skipDoc - super.skipDoc[level]; } else if (StringHelper.startsWith(scratch.get(), SKIP_DOC_FP)) { scratchUTF16.copyUTF8Bytes( scratch.bytes(), SKIP_DOC_FP.length, scratch.length() - SKIP_DOC_FP.length); nextSkipDocFP = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); } else if (StringHelper.startsWith(scratch.get(), IMPACTS) || StringHelper.startsWith(scratch.get(), IMPACT)) { // continue; } else if (StringHelper.startsWith(scratch.get(), FREQ)) { scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length() - FREQ.length); freq = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); } else if (StringHelper.startsWith(scratch.get(), NORM)) { scratchUTF16.copyUTF8Bytes(scratch.bytes(), NORM.length, scratch.length() - NORM.length); long norm = Long.parseLong(scratchUTF16.toString()); Impact impact = new Impact(freq, norm); perLevelImpacts.get(level).add(impact); } } return skipDoc; } @Override protected long readLevelLength(IndexInput skipStream) throws IOException { SimpleTextUtil.readLine(skipStream, scratch); scratchUTF16.copyUTF8Bytes( scratch.bytes(), LEVEL_LENGTH.length, scratch.length() - LEVEL_LENGTH.length); return Long.parseLong(scratchUTF16.toString()); } @Override protected long readChildPointer(IndexInput skipStream) throws IOException { SimpleTextUtil.readLine(skipStream, scratch); scratchUTF16.copyUTF8Bytes( scratch.bytes(), CHILD_POINTER.length, scratch.length() - CHILD_POINTER.length); return Long.parseLong(scratchUTF16.toString()); } void reset(long skipPointer, int docFreq) throws IOException { init(); if (skipPointer > 0) { super.init(skipPointer, docFreq); hasSkipList = true; } } private void init() { nextSkipDocFP = -1; numLevels = 1; perLevelImpacts = new ArrayList<>(maxNumberOfSkipLevels); for (int level = 0; level < maxNumberOfSkipLevels; level++) { List impacts = new ArrayList<>(); impacts.add(new Impact(Integer.MAX_VALUE, 1L)); perLevelImpacts.add(level, impacts); } hasSkipList = false; } Impacts getImpacts() { return impacts; } long getNextSkipDocFP() { return nextSkipDocFP; } int getNextSkipDoc() { if (!hasSkipList) { return DocIdSetIterator.NO_MORE_DOCS; } return skipDoc[0]; } boolean hasSkipList() { return hasSkipList; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy