All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.StoredFieldsWriter Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs;

import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.StandardCharsets;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;

/**
 * Codec API for writing stored fields:
 * 
    *
  1. For every document, {@link #startDocument()} is called, * informing the Codec that a new document has started. *
  2. {@link #writeField(FieldInfo, IndexableField)} is called for * each field in the document. *
  3. After all documents have been written, {@link #finish(FieldInfos, int)} * is called for verification/sanity-checks. *
  4. Finally the writer is closed ({@link #close()}) *
* * @lucene.experimental */ public abstract class StoredFieldsWriter implements Closeable { /** Sole constructor. (For invocation by subclass * constructors, typically implicit.) */ protected StoredFieldsWriter() { } /** Called before writing the stored fields of the document. * {@link #writeField(FieldInfo, IndexableField)} will be called * for each stored field. Note that this is * called even if the document has no stored fields. */ public abstract void startDocument() throws IOException; /** Called when a document and all its fields have been added. */ public void finishDocument() throws IOException {} /** Writes a single stored field. */ public abstract void writeField(FieldInfo info, IndexableField field) throws IOException; /** Called before {@link #close()}, passing in the number * of documents that were written. Note that this is * intentionally redundant (equivalent to the number of * calls to {@link #startDocument()}, but a Codec should * check that this is the case to detect the JRE bug described * in LUCENE-1282. */ public abstract void finish(FieldInfos fis, int numDocs) throws IOException; /** Merges in the stored fields from the readers in * mergeState. The default implementation skips * over deleted documents, and uses {@link #startDocument()}, * {@link #writeField(FieldInfo, IndexableField)}, and {@link #finish(FieldInfos, int)}, * returning the number of documents that were written. * Implementations can override this method for more sophisticated * merging (bulk-byte copying, etc). */ public int merge(MergeState mergeState) throws IOException { int docCount = 0; for (int i=0;i * Use like this: *
   * MergeVisitor visitor = new MergeVisitor(mergeState, readerIndex);
   * for (...) {
   *   startDocument();
   *   storedFieldsReader.visitDocument(docID, visitor);
   *   finishDocument();
   * }
   * 
*/ protected class MergeVisitor extends StoredFieldVisitor implements IndexableField { BytesRef binaryValue; String stringValue; Number numericValue; FieldInfo currentField; FieldInfos remapper; /** * Create new merge visitor. */ public MergeVisitor(MergeState mergeState, int readerIndex) { // if field numbers are aligned, we can save hash lookups // on every field access. Otherwise, we need to lookup // fieldname each time, and remap to a new number. for (FieldInfo fi : mergeState.fieldInfos[readerIndex]) { FieldInfo other = mergeState.mergeFieldInfos.fieldInfo(fi.number); if (other == null || !other.name.equals(fi.name)) { remapper = mergeState.mergeFieldInfos; break; } } } @Override public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { reset(fieldInfo); // TODO: can we avoid new BR here? binaryValue = new BytesRef(value); write(); } @Override public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { reset(fieldInfo); // TODO: can we avoid new String here? stringValue = new String(value, StandardCharsets.UTF_8); write(); } @Override public void intField(FieldInfo fieldInfo, int value) throws IOException { reset(fieldInfo); numericValue = value; write(); } @Override public void longField(FieldInfo fieldInfo, long value) throws IOException { reset(fieldInfo); numericValue = value; write(); } @Override public void floatField(FieldInfo fieldInfo, float value) throws IOException { reset(fieldInfo); numericValue = value; write(); } @Override public void doubleField(FieldInfo fieldInfo, double value) throws IOException { reset(fieldInfo); numericValue = value; write(); } @Override public Status needsField(FieldInfo fieldInfo) throws IOException { return Status.YES; } @Override public String name() { return currentField.name; } @Override public IndexableFieldType fieldType() { return StoredField.TYPE; } @Override public BytesRef binaryValue() { return binaryValue; } @Override public String stringValue() { return stringValue; } @Override public Number numericValue() { return numericValue; } @Override public Reader readerValue() { return null; } @Override public float boost() { return 1F; } @Override public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { return null; } void reset(FieldInfo field) { if (remapper != null) { // field numbers are not aligned, we need to remap to the new field number currentField = remapper.fieldInfo(field.name); } else { currentField = field; } binaryValue = null; stringValue = null; numericValue = null; } void write() throws IOException { writeField(currentField, this); } } @Override public abstract void close() throws IOException; }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy