org.apache.lucene.codecs.simpletext.SimpleTextFieldsReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-codecs Show documentation
Show all versions of lucene-codecs Show documentation
Codecs and postings formats for Apache Lucene.
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.simpletext;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
import static org.apache.lucene.codecs.simpletext.SimpleTextSkipWriter.SKIP_LIST;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
class SimpleTextFieldsReader extends FieldsProducer {
private final TreeMap fields;
private final IndexInput in;
private final FieldInfos fieldInfos;
private final int maxDoc;
public SimpleTextFieldsReader(SegmentReadState state) throws IOException {
this.maxDoc = state.segmentInfo.maxDoc();
fieldInfos = state.fieldInfos;
in =
state.directory.openInput(
SimpleTextPostingsFormat.getPostingsFileName(
state.segmentInfo.name, state.segmentSuffix),
state.context);
boolean success = false;
try {
fields = readFields(in.clone());
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(this);
}
}
}
private TreeMap readFields(IndexInput in) throws IOException {
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
BytesRefBuilder scratch = new BytesRefBuilder();
TreeMap fields = new TreeMap<>();
while (true) {
SimpleTextUtil.readLine(input, scratch);
if (scratch.get().equals(END)) {
SimpleTextUtil.checkFooter(input);
return fields;
} else if (StringHelper.startsWith(scratch.get(), FIELD)) {
String fieldName =
new String(
scratch.bytes(),
FIELD.length,
scratch.length() - FIELD.length,
StandardCharsets.UTF_8);
fields.put(fieldName, input.getFilePointer());
}
}
}
private class SimpleTextTermsEnum extends BaseTermsEnum {
private final IndexOptions indexOptions;
private int docFreq;
private long totalTermFreq;
private long docsStart;
private long skipPointer;
private boolean ended;
private final BytesRefFSTEnum<
PairOutputs.Pair, PairOutputs.Pair>>
fstEnum;
public SimpleTextTermsEnum(
FST, PairOutputs.Pair>> fst,
IndexOptions indexOptions) {
this.indexOptions = indexOptions;
fstEnum = new BytesRefFSTEnum<>(fst);
}
@Override
public boolean seekExact(BytesRef text) throws IOException {
final BytesRefFSTEnum.InputOutput<
PairOutputs.Pair, PairOutputs.Pair>>
result = fstEnum.seekExact(text);
if (result != null) {
PairOutputs.Pair, PairOutputs.Pair> pair =
result.output;
PairOutputs.Pair pair1 = pair.output1;
PairOutputs.Pair pair2 = pair.output2;
docsStart = pair1.output1;
skipPointer = pair1.output2;
docFreq = pair2.output1.intValue();
totalTermFreq = pair2.output2;
return true;
} else {
return false;
}
}
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {
// System.out.println("seek to text=" + text.utf8ToString());
final BytesRefFSTEnum.InputOutput<
PairOutputs.Pair, PairOutputs.Pair>>
result = fstEnum.seekCeil(text);
if (result == null) {
// System.out.println(" end");
return SeekStatus.END;
} else {
// System.out.println(" got text=" + term.utf8ToString());
PairOutputs.Pair, PairOutputs.Pair> pair =
result.output;
PairOutputs.Pair pair1 = pair.output1;
PairOutputs.Pair pair2 = pair.output2;
docsStart = pair1.output1;
skipPointer = pair1.output2;
docFreq = pair2.output1.intValue();
totalTermFreq = pair2.output2;
if (result.input.equals(text)) {
// System.out.println(" match docsStart=" + docsStart);
return SeekStatus.FOUND;
} else {
// System.out.println(" not match docsStart=" + docsStart);
return SeekStatus.NOT_FOUND;
}
}
}
@Override
public BytesRef next() throws IOException {
assert !ended;
final BytesRefFSTEnum.InputOutput<
PairOutputs.Pair, PairOutputs.Pair>>
result = fstEnum.next();
if (result != null) {
PairOutputs.Pair, PairOutputs.Pair> pair =
result.output;
PairOutputs.Pair pair1 = pair.output1;
PairOutputs.Pair pair2 = pair.output2;
docsStart = pair1.output1;
skipPointer = pair1.output2;
docFreq = pair2.output1.intValue();
totalTermFreq = pair2.output2;
return result.input;
} else {
return null;
}
}
@Override
public BytesRef term() {
return fstEnum.current().input;
}
@Override
public long ord() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void seekExact(long ord) {
throw new UnsupportedOperationException();
}
@Override
public int docFreq() {
return docFreq;
}
@Override
public long totalTermFreq() {
return indexOptions == IndexOptions.DOCS ? docFreq : totalTermFreq;
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
boolean hasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
if (hasPositions && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
SimpleTextPostingsEnum docsAndPositionsEnum;
if (reuse != null
&& reuse instanceof SimpleTextPostingsEnum
&& ((SimpleTextPostingsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsAndPositionsEnum = (SimpleTextPostingsEnum) reuse;
} else {
docsAndPositionsEnum = new SimpleTextPostingsEnum();
}
return docsAndPositionsEnum.reset(docsStart, indexOptions, docFreq, skipPointer);
}
SimpleTextDocsEnum docsEnum;
if (reuse != null
&& reuse instanceof SimpleTextDocsEnum
&& ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsEnum = (SimpleTextDocsEnum) reuse;
} else {
docsEnum = new SimpleTextDocsEnum();
}
return docsEnum.reset(docsStart, indexOptions == IndexOptions.DOCS, docFreq, skipPointer);
}
@Override
public ImpactsEnum impacts(int flags) throws IOException {
if (docFreq <= SimpleTextSkipWriter.BLOCK_SIZE) {
// no skip data
return new SlowImpactsEnum(postings(null, flags));
}
return (ImpactsEnum) postings(null, flags);
}
}
private class SimpleTextDocsEnum extends ImpactsEnum {
private final IndexInput inStart;
private final IndexInput in;
private boolean omitTF;
private int docID = -1;
private int tf;
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
private int cost;
// for skip list data
private SimpleTextSkipReader skipReader;
private int nextSkipDoc = 0;
private long seekTo = -1;
public SimpleTextDocsEnum() {
this.inStart = SimpleTextFieldsReader.this.in;
this.in = this.inStart.clone();
this.skipReader = new SimpleTextSkipReader(this.inStart.clone());
}
public boolean canReuse(IndexInput in) {
return in == inStart;
}
public SimpleTextDocsEnum reset(long fp, boolean omitTF, int docFreq, long skipPointer)
throws IOException {
in.seek(fp);
this.omitTF = omitTF;
docID = -1;
tf = 1;
cost = docFreq;
skipReader.reset(skipPointer, docFreq);
nextSkipDoc = 0;
seekTo = -1;
return this;
}
@Override
public int docID() {
return docID;
}
@Override
public int freq() throws IOException {
return tf;
}
@Override
public int nextPosition() throws IOException {
return -1;
}
@Override
public int startOffset() throws IOException {
return -1;
}
@Override
public int endOffset() throws IOException {
return -1;
}
@Override
public BytesRef getPayload() throws IOException {
return null;
}
@Override
public int nextDoc() throws IOException {
return advance(docID + 1);
}
private int readDoc() throws IOException {
if (docID == NO_MORE_DOCS) {
return docID;
}
boolean first = true;
int termFreq = 0;
while (true) {
final long lineStart = in.getFilePointer();
SimpleTextUtil.readLine(in, scratch);
if (StringHelper.startsWith(scratch.get(), DOC)) {
if (!first) {
in.seek(lineStart);
if (!omitTF) {
tf = termFreq;
}
return docID;
}
scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length() - DOC.length);
docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
termFreq = 0;
first = false;
} else if (StringHelper.startsWith(scratch.get(), FREQ)) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length() - FREQ.length);
termFreq = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
} else if (StringHelper.startsWith(scratch.get(), POS)) {
// skip termFreq++;
} else if (StringHelper.startsWith(scratch.get(), START_OFFSET)) {
// skip
} else if (StringHelper.startsWith(scratch.get(), END_OFFSET)) {
// skip
} else if (StringHelper.startsWith(scratch.get(), PAYLOAD)) {
// skip
} else {
assert StringHelper.startsWith(scratch.get(), SimpleTextSkipWriter.SKIP_LIST)
|| StringHelper.startsWith(scratch.get(), TERM)
|| StringHelper.startsWith(scratch.get(), FIELD)
|| StringHelper.startsWith(scratch.get(), END)
: "scratch=" + scratch.get().utf8ToString();
if (!first) {
in.seek(lineStart);
if (!omitTF) {
tf = termFreq;
}
return docID;
}
return docID = NO_MORE_DOCS;
}
}
}
private int advanceTarget(int target) throws IOException {
if (seekTo > 0) {
in.seek(seekTo);
seekTo = -1;
}
assert docID() < target;
int doc;
do {
doc = readDoc();
} while (doc < target);
return doc;
}
@Override
public int advance(int target) throws IOException {
advanceShallow(target);
return advanceTarget(target);
}
@Override
public long cost() {
return cost;
}
@Override
public void advanceShallow(int target) throws IOException {
if (target > nextSkipDoc) {
skipReader.skipTo(target);
if (skipReader.getNextSkipDoc() != DocIdSetIterator.NO_MORE_DOCS) {
seekTo = skipReader.getNextSkipDocFP();
}
nextSkipDoc = skipReader.getNextSkipDoc();
}
assert nextSkipDoc >= target;
}
@Override
public Impacts getImpacts() throws IOException {
advanceShallow(docID);
return skipReader.getImpacts();
}
}
private class SimpleTextPostingsEnum extends ImpactsEnum {
private final IndexInput inStart;
private final IndexInput in;
private int docID = -1;
private int tf;
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final BytesRefBuilder scratch2 = new BytesRefBuilder();
private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
private final CharsRefBuilder scratchUTF16_2 = new CharsRefBuilder();
private int pos;
private BytesRef payload;
private long nextDocStart;
private boolean readOffsets;
private boolean readPositions;
private int startOffset;
private int endOffset;
private int cost;
// for skip list data
private SimpleTextSkipReader skipReader;
private int nextSkipDoc = 0;
private long seekTo = -1;
public SimpleTextPostingsEnum() {
this.inStart = SimpleTextFieldsReader.this.in;
this.in = inStart.clone();
this.skipReader = new SimpleTextSkipReader(this.inStart.clone());
}
public boolean canReuse(IndexInput in) {
return in == inStart;
}
public SimpleTextPostingsEnum reset(
long fp, IndexOptions indexOptions, int docFreq, long skipPointer) throws IOException {
nextDocStart = fp;
docID = -1;
readPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
readOffsets =
indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (!readOffsets) {
startOffset = -1;
endOffset = -1;
}
cost = docFreq;
skipReader.reset(skipPointer, docFreq);
nextSkipDoc = 0;
seekTo = -1;
return this;
}
@Override
public int docID() {
return docID;
}
@Override
public int freq() throws IOException {
return tf;
}
@Override
public int nextDoc() throws IOException {
return advance(docID + 1);
}
private int readDoc() throws IOException {
boolean first = true;
in.seek(nextDocStart);
long posStart = 0;
while (true) {
final long lineStart = in.getFilePointer();
SimpleTextUtil.readLine(in, scratch);
// System.out.println("NEXT DOC: " + scratch.utf8ToString());
if (StringHelper.startsWith(scratch.get(), DOC)) {
if (!first) {
nextDocStart = lineStart;
in.seek(posStart);
return docID;
}
scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length() - DOC.length);
docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
tf = 0;
first = false;
} else if (StringHelper.startsWith(scratch.get(), FREQ)) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length() - FREQ.length);
tf = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
posStart = in.getFilePointer();
} else if (StringHelper.startsWith(scratch.get(), POS)) {
// skip
} else if (StringHelper.startsWith(scratch.get(), START_OFFSET)) {
// skip
} else if (StringHelper.startsWith(scratch.get(), END_OFFSET)) {
// skip
} else if (StringHelper.startsWith(scratch.get(), PAYLOAD)) {
// skip
} else {
assert StringHelper.startsWith(scratch.get(), SimpleTextSkipWriter.SKIP_LIST)
|| StringHelper.startsWith(scratch.get(), TERM)
|| StringHelper.startsWith(scratch.get(), FIELD)
|| StringHelper.startsWith(scratch.get(), END);
if (!first) {
nextDocStart = lineStart;
in.seek(posStart);
return docID;
}
return docID = NO_MORE_DOCS;
}
}
}
private int advanceTarget(int target) throws IOException {
if (seekTo > 0) {
nextDocStart = seekTo;
seekTo = -1;
}
assert docID() < target;
int doc;
do {
doc = readDoc();
} while (doc < target);
return doc;
}
@Override
public int advance(int target) throws IOException {
advanceShallow(target);
return advanceTarget(target);
}
@Override
public int nextPosition() throws IOException {
if (readPositions) {
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch.get(), POS)
: "got line=" + scratch.get().utf8ToString();
scratchUTF16_2.copyUTF8Bytes(scratch.bytes(), POS.length, scratch.length() - POS.length);
pos = ArrayUtil.parseInt(scratchUTF16_2.chars(), 0, scratchUTF16_2.length());
} else {
pos = -1;
}
if (readOffsets) {
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch.get(), START_OFFSET)
: "got line=" + scratch.get().utf8ToString();
scratchUTF16_2.copyUTF8Bytes(
scratch.bytes(), START_OFFSET.length, scratch.length() - START_OFFSET.length);
startOffset = ArrayUtil.parseInt(scratchUTF16_2.chars(), 0, scratchUTF16_2.length());
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch.get(), END_OFFSET)
: "got line=" + scratch.get().utf8ToString();
scratchUTF16_2.grow(scratch.length() - END_OFFSET.length);
scratchUTF16_2.copyUTF8Bytes(
scratch.bytes(), END_OFFSET.length, scratch.length() - END_OFFSET.length);
endOffset = ArrayUtil.parseInt(scratchUTF16_2.chars(), 0, scratchUTF16_2.length());
}
final long fp = in.getFilePointer();
SimpleTextUtil.readLine(in, scratch);
if (StringHelper.startsWith(scratch.get(), PAYLOAD)) {
final int len = scratch.length() - PAYLOAD.length;
scratch2.growNoCopy(len);
System.arraycopy(scratch.bytes(), PAYLOAD.length, scratch2.bytes(), 0, len);
scratch2.setLength(len);
payload = scratch2.get();
} else {
payload = null;
in.seek(fp);
}
return pos;
}
@Override
public int startOffset() throws IOException {
return startOffset;
}
@Override
public int endOffset() throws IOException {
return endOffset;
}
@Override
public BytesRef getPayload() {
return payload;
}
@Override
public long cost() {
return cost;
}
@Override
public void advanceShallow(int target) throws IOException {
if (target > nextSkipDoc) {
skipReader.skipTo(target);
if (skipReader.getNextSkipDoc() != DocIdSetIterator.NO_MORE_DOCS) {
seekTo = skipReader.getNextSkipDocFP();
}
}
nextSkipDoc = skipReader.getNextSkipDoc();
assert nextSkipDoc >= target;
}
@Override
public Impacts getImpacts() throws IOException {
advanceShallow(docID);
return skipReader.getImpacts();
}
}
private class SimpleTextTerms extends Terms {
private final long termsStart;
private final FieldInfo fieldInfo;
private final int maxDoc;
private long sumTotalTermFreq;
private long sumDocFreq;
private int docCount;
private FST, PairOutputs.Pair>> fst;
private int termCount;
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
public SimpleTextTerms(String field, long termsStart, int maxDoc) throws IOException {
this.maxDoc = maxDoc;
this.termsStart = termsStart;
fieldInfo = fieldInfos.fieldInfo(field);
loadTerms();
}
private void loadTerms() throws IOException {
PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
final FSTCompiler<
PairOutputs.Pair, PairOutputs.Pair>>
fstCompiler;
final PairOutputs outputsOuter = new PairOutputs<>(posIntOutputs, posIntOutputs);
final PairOutputs outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
final PairOutputs, PairOutputs.Pair> outputs =
new PairOutputs<>(outputsOuter, outputsInner);
fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
IndexInput in = SimpleTextFieldsReader.this.in.clone();
in.seek(termsStart);
final BytesRefBuilder lastTerm = new BytesRefBuilder();
long lastDocsStart = -1;
int docFreq = 0;
long totalTermFreq = 0;
long skipPointer = 0;
FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
while (true) {
SimpleTextUtil.readLine(in, scratch);
if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
if (lastDocsStart != -1) {
fstCompiler.add(
Util.toIntsRef(lastTerm.get(), scratchIntsRef),
outputs.newPair(
outputsOuter.newPair(lastDocsStart, skipPointer),
outputsInner.newPair((long) docFreq, totalTermFreq)));
sumTotalTermFreq += totalTermFreq;
}
break;
} else if (StringHelper.startsWith(scratch.get(), DOC)) {
docFreq++;
sumDocFreq++;
totalTermFreq++;
scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length() - DOC.length);
int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
visitedDocs.set(docID);
} else if (StringHelper.startsWith(scratch.get(), FREQ)) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length() - FREQ.length);
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1;
} else if (StringHelper.startsWith(scratch.get(), SKIP_LIST)) {
skipPointer = in.getFilePointer();
} else if (StringHelper.startsWith(scratch.get(), TERM)) {
if (lastDocsStart != -1) {
fstCompiler.add(
Util.toIntsRef(lastTerm.get(), scratchIntsRef),
outputs.newPair(
outputsOuter.newPair(lastDocsStart, skipPointer),
outputsInner.newPair((long) docFreq, totalTermFreq)));
}
lastDocsStart = in.getFilePointer();
final int len = scratch.length() - TERM.length;
lastTerm.growNoCopy(len);
System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len);
lastTerm.setLength(len);
docFreq = 0;
sumTotalTermFreq += totalTermFreq;
totalTermFreq = 0;
termCount++;
skipPointer = 0;
}
}
docCount = visitedDocs.cardinality();
fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
/*
PrintStream ps = new PrintStream("out.dot");
fst.toDot(ps);
ps.close();
System.out.println("SAVED out.dot");
*/
// System.out.println("FST " + fst.sizeInBytes());
}
@Override
public String toString() {
return getClass().getSimpleName()
+ "(terms="
+ termCount
+ ",postings="
+ sumDocFreq
+ ",positions="
+ sumTotalTermFreq
+ ",docs="
+ docCount
+ ")";
}
@Override
public TermsEnum iterator() throws IOException {
if (fst != null) {
return new SimpleTextTermsEnum(fst, fieldInfo.getIndexOptions());
} else {
return TermsEnum.EMPTY;
}
}
@Override
public long size() {
return (long) termCount;
}
@Override
public long getSumTotalTermFreq() {
return sumTotalTermFreq;
}
@Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
@Override
public int getDocCount() throws IOException {
return docCount;
}
@Override
public boolean hasFreqs() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
}
@Override
public boolean hasPositions() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
@Override
public boolean hasPayloads() {
return fieldInfo.hasPayloads();
}
}
@Override
public Iterator iterator() {
return Collections.unmodifiableSet(fields.keySet()).iterator();
}
private final Map termsCache = new HashMap<>();
@Override
public synchronized Terms terms(String field) throws IOException {
SimpleTextTerms terms = termsCache.get(field);
if (terms == null) {
Long fp = fields.get(field);
if (fp == null) {
return null;
} else {
terms = new SimpleTextTerms(field, fp, maxDoc);
termsCache.put(field, terms);
}
}
return terms;
}
@Override
public int size() {
return -1;
}
@Override
public void close() throws IOException {
in.close();
}
@Override
public String toString() {
return getClass().getSimpleName() + "(fields=" + fields.size() + ")";
}
@Override
public void checkIntegrity() throws IOException {}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy