org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-codecs Show documentation
Show all versions of lucene-codecs Show documentation
Apache Lucene (module: codecs)
package org.apache.lucene.codecs.simpletext;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
import java.util.Comparator;
class SimpleTextFieldsWriter extends FieldsConsumer {
private final IndexOutput out;
private final BytesRef scratch = new BytesRef(10);
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef TERM = new BytesRef(" term ");
final static BytesRef DOC = new BytesRef(" doc ");
final static BytesRef FREQ = new BytesRef(" freq ");
final static BytesRef POS = new BytesRef(" pos ");
final static BytesRef START_OFFSET = new BytesRef(" startOffset ");
final static BytesRef END_OFFSET = new BytesRef(" endOffset ");
final static BytesRef PAYLOAD = new BytesRef(" payload ");
public SimpleTextFieldsWriter(SegmentWriteState state) throws IOException {
final String fileName = SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix);
out = state.directory.createOutput(fileName, state.context);
}
private void write(String s) throws IOException {
SimpleTextUtil.write(out, s, scratch);
}
private void write(BytesRef b) throws IOException {
SimpleTextUtil.write(out, b);
}
private void newline() throws IOException {
SimpleTextUtil.writeNewline(out);
}
@Override
public TermsConsumer addField(FieldInfo field) throws IOException {
write(FIELD);
write(field.name);
newline();
return new SimpleTextTermsWriter(field);
}
private class SimpleTextTermsWriter extends TermsConsumer {
private final SimpleTextPostingsWriter postingsWriter;
public SimpleTextTermsWriter(FieldInfo field) {
postingsWriter = new SimpleTextPostingsWriter(field);
}
@Override
public PostingsConsumer startTerm(BytesRef term) throws IOException {
return postingsWriter.reset(term);
}
@Override
public void finishTerm(BytesRef term, TermStats stats) throws IOException {
}
@Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
}
@Override
public Comparator getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
}
private class SimpleTextPostingsWriter extends PostingsConsumer {
private BytesRef term;
private boolean wroteTerm;
private final IndexOptions indexOptions;
private final boolean writePositions;
private final boolean writeOffsets;
// for assert:
private int lastStartOffset = 0;
public SimpleTextPostingsWriter(FieldInfo field) {
this.indexOptions = field.getIndexOptions();
writePositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
//System.out.println("writeOffsets=" + writeOffsets);
//System.out.println("writePos=" + writePositions);
}
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
if (!wroteTerm) {
// we lazily do this, in case the term had zero docs
write(TERM);
write(term);
newline();
wroteTerm = true;
}
write(DOC);
write(Integer.toString(docID));
newline();
if (indexOptions != IndexOptions.DOCS_ONLY) {
write(FREQ);
write(Integer.toString(termDocFreq));
newline();
}
lastStartOffset = 0;
}
public PostingsConsumer reset(BytesRef term) {
this.term = term;
wroteTerm = false;
return this;
}
@Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
if (writePositions) {
write(POS);
write(Integer.toString(position));
newline();
}
if (writeOffsets) {
assert endOffset >= startOffset;
assert startOffset >= lastStartOffset: "startOffset=" + startOffset + " lastStartOffset=" + lastStartOffset;
lastStartOffset = startOffset;
write(START_OFFSET);
write(Integer.toString(startOffset));
newline();
write(END_OFFSET);
write(Integer.toString(endOffset));
newline();
}
if (payload != null && payload.length > 0) {
assert payload.length != 0;
write(PAYLOAD);
write(payload);
newline();
}
}
@Override
public void finishDoc() {
}
}
@Override
public void close() throws IOException {
try {
write(END);
newline();
} finally {
out.close();
}
}
}