
org.apache.lucene.document.Field Maven / Gradle / Ivy
Show all versions of org.apache.servicemix.bundles.lucene
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.StoredFieldDataInput;
import org.apache.lucene.util.BytesRef;
/**
* Expert: directly create a field for a document. Most users should use one of the sugar
* subclasses:
*
*
* - {@link TextField}: {@link Reader} or {@link String} indexed for full-text search
*
- {@link StringField}: {@link String} indexed verbatim as a single token
*
- {@link IntField}: {@code int} indexed for exact/range queries.
*
- {@link LongField}: {@code long} indexed for exact/range queries.
*
- {@link FloatField}: {@code float} indexed for exact/range queries.
*
- {@link DoubleField}: {@code double} indexed for exact/range queries.
*
- {@link SortedDocValuesField}: {@code byte[]} indexed column-wise for sorting/faceting
*
- {@link SortedSetDocValuesField}: {@code SortedSet
} indexed column-wise for
* sorting/faceting
* - {@link NumericDocValuesField}: {@code long} indexed column-wise for sorting/faceting
*
- {@link SortedNumericDocValuesField}: {@code SortedSet
} indexed column-wise for
* sorting/faceting
* - {@link StoredField}: Stored-only value for retrieving in summary results
*
*
* A field is a section of a Document. Each field has three parts: name, type and value. Values
* may be text (String, Reader or pre-analyzed TokenStream), binary (byte[]), or numeric (a Number).
* Fields are optionally stored in the index, so that they may be returned with hits on the
* document.
*
*
NOTE: the field type is an {@link IndexableFieldType}. Making changes to the state of the
* IndexableFieldType will impact any Field it is used in. It is strongly recommended that no
* changes be made after Field instantiation.
*/
public class Field implements IndexableField {
/** Field's type */
protected final IndexableFieldType type;
/** Field's name */
protected final String name;
/** Field's value */
protected Object fieldsData;
/**
* Expert: creates a field with no initial value. This is intended to be used by custom {@link
* Field} sub-classes with pre-configured {@link IndexableFieldType}s.
*
* @param name field name
* @param type field type
* @throws IllegalArgumentException if either the name or type is null.
*/
protected Field(String name, IndexableFieldType type) {
if (name == null) {
throw new IllegalArgumentException("name must not be null");
}
this.name = name;
if (type == null) {
throw new IllegalArgumentException("type must not be null");
}
this.type = type;
}
/**
* Create field with Reader value.
*
* @param name field name
* @param reader reader value
* @param type field type
* @throws IllegalArgumentException if either the name or type is null, or if the field's type is
* stored(), or if tokenized() is false.
* @throws NullPointerException if the reader is null
*/
public Field(String name, Reader reader, IndexableFieldType type) {
if (name == null) {
throw new IllegalArgumentException("name must not be null");
}
if (type == null) {
throw new IllegalArgumentException("type must not be null");
}
if (reader == null) {
throw new NullPointerException("reader must not be null");
}
if (type.stored()) {
throw new IllegalArgumentException("fields with a Reader value cannot be stored");
}
if (type.indexOptions() != IndexOptions.NONE && !type.tokenized()) {
throw new IllegalArgumentException("non-tokenized fields must use String values");
}
this.name = name;
this.fieldsData = reader;
this.type = type;
}
/**
* Create field with TokenStream value.
*
* @param name field name
* @param tokenStream TokenStream value
* @param type field type
* @throws IllegalArgumentException if either the name or type is null, or if the field's type is
* stored(), or if tokenized() is false, or if indexed() is false.
* @throws NullPointerException if the tokenStream is null
*/
public Field(String name, TokenStream tokenStream, IndexableFieldType type) {
if (name == null) {
throw new IllegalArgumentException("name must not be null");
}
if (tokenStream == null) {
throw new NullPointerException("tokenStream must not be null");
}
if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) {
throw new IllegalArgumentException("TokenStream fields must be indexed and tokenized");
}
if (type.stored()) {
throw new IllegalArgumentException("TokenStream fields cannot be stored");
}
this.name = name;
this.fieldsData = tokenStream;
this.type = type;
}
/**
* Create field with binary value.
*
*
NOTE: the provided byte[] is not copied so be sure not to change it until you're done with
* this field.
*
* @param name field name
* @param value byte array pointing to binary content (not copied)
* @param type field type
* @throws IllegalArgumentException if the field name, value or type is null, or the field's type
* is indexed().
*/
public Field(String name, byte[] value, IndexableFieldType type) {
this(name, value, 0, value.length, type);
}
/**
* Create field with binary value.
*
*
NOTE: the provided byte[] is not copied so be sure not to change it until you're done with
* this field.
*
* @param name field name
* @param value byte array pointing to binary content (not copied)
* @param offset starting position of the byte array
* @param length valid length of the byte array
* @param type field type
* @throws IllegalArgumentException if the field name, value or type is null, or the field's type
* is indexed().
*/
public Field(String name, byte[] value, int offset, int length, IndexableFieldType type) {
this(name, value != null ? new BytesRef(value, offset, length) : null, type);
}
/**
* Create field with binary value.
*
*
NOTE: the provided BytesRef is not copied so be sure not to change it until you're done with
* this field.
*
* @param name field name
* @param bytes BytesRef pointing to binary content (not copied)
* @param type field type
* @throws IllegalArgumentException if the field name, bytes or type is null, or the field's type
* is indexed().
*/
public Field(String name, BytesRef bytes, IndexableFieldType type) {
if (name == null) {
throw new IllegalArgumentException("name must not be null");
}
if (bytes == null) {
throw new IllegalArgumentException("bytes must not be null");
}
if (type == null) {
throw new IllegalArgumentException("type must not be null");
}
if (type.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0
|| type.storeTermVectorOffsets()) {
throw new IllegalArgumentException("It doesn't make sense to index offsets on binary fields");
}
if (type.indexOptions() != IndexOptions.NONE && type.tokenized()) {
throw new IllegalArgumentException("cannot set a BytesRef value on a tokenized field");
}
if (type.indexOptions() == IndexOptions.NONE
&& type.pointDimensionCount() == 0
&& type.docValuesType() == DocValuesType.NONE
&& type.stored() == false) {
throw new IllegalArgumentException(
"it doesn't make sense to have a field that is neither indexed, nor doc-valued, nor stored");
}
this.name = name;
this.fieldsData = bytes;
this.type = type;
}
// TODO: allow direct construction of int, long, float, double value too..?
/**
* Create field with String value.
*
* @param name field name
* @param value string value
* @param type field type
* @throws IllegalArgumentException if either the name, value or type is null, or if the field's
* type is neither indexed() nor stored(), or if indexed() is false but storeTermVectors() is
* true.
*/
public Field(String name, CharSequence value, IndexableFieldType type) {
if (name == null) {
throw new IllegalArgumentException("name must not be null");
}
if (value == null) {
throw new IllegalArgumentException("value must not be null");
}
if (type == null) {
throw new IllegalArgumentException("type must not be null");
}
if (type.stored() == false && type.indexOptions() == IndexOptions.NONE) {
throw new IllegalArgumentException(
"it doesn't make sense to have a field that is neither indexed nor stored");
}
this.name = name;
this.fieldsData = value;
this.type = type;
}
/**
* The value of the field as a String, or null. If null, the Reader value or binary value is used.
* Exactly one of stringValue(), readerValue(), and binaryValue() must be set.
*/
@Override
public String stringValue() {
if (fieldsData instanceof CharSequence || fieldsData instanceof Number) {
return fieldsData.toString();
} else {
return null;
}
}
@Override
public CharSequence getCharSequenceValue() {
return fieldsData instanceof CharSequence ? (CharSequence) fieldsData : stringValue();
}
/**
* The value of the field as a Reader, or null. If null, the String value or binary value is used.
* Exactly one of stringValue(), readerValue(), and binaryValue() must be set.
*/
@Override
public Reader readerValue() {
return fieldsData instanceof Reader ? (Reader) fieldsData : null;
}
/**
* The TokenStream for this field to be used when indexing, or null. If null, the Reader value or
* String value is analyzed to produce the indexed tokens.
*/
public TokenStream tokenStreamValue() {
return fieldsData instanceof TokenStream ? (TokenStream) fieldsData : null;
}
/**
* Expert: change the value of this field. This can be used during indexing to re-use a single
* Field instance to improve indexing speed by avoiding GC cost of new'ing and reclaiming Field
* instances. Typically a single {@link Document} instance is re-used as well. This helps most on
* small documents.
*
*
Each Field instance should only be used once within a single {@link Document} instance. See
* ImproveIndexingSpeed for
* details.
*/
public void setStringValue(String value) {
if (!(fieldsData instanceof String)) {
throw new IllegalArgumentException(
"cannot change value type from " + fieldsData.getClass().getSimpleName() + " to String");
}
if (value == null) {
throw new IllegalArgumentException("value must not be null");
}
fieldsData = value;
}
/** Expert: change the value of this field. See {@link #setStringValue(String)}. */
public void setReaderValue(Reader value) {
if (!(fieldsData instanceof Reader)) {
throw new IllegalArgumentException(
"cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Reader");
}
fieldsData = value;
}
/** Expert: change the value of this field. See {@link #setStringValue(String)}. */
public void setBytesValue(byte[] value) {
setBytesValue(new BytesRef(value));
}
/**
* Expert: change the value of this field. See {@link #setStringValue(String)}.
*
*
NOTE: the provided BytesRef is not copied so be sure not to change it until you're done with
* this field.
*/
public void setBytesValue(BytesRef value) {
if (!(fieldsData instanceof BytesRef)) {
throw new IllegalArgumentException(
"cannot change value type from "
+ fieldsData.getClass().getSimpleName()
+ " to BytesRef");
}
if (value == null) {
throw new IllegalArgumentException("value must not be null");
}
fieldsData = value;
}
/** Expert: change the value of this field. See {@link #setStringValue(String)}. */
public void setByteValue(byte value) {
if (!(fieldsData instanceof Byte)) {
throw new IllegalArgumentException(
"cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Byte");
}
fieldsData = Byte.valueOf(value);
}
/** Expert: change the value of this field. See {@link #setStringValue(String)}. */
public void setShortValue(short value) {
if (!(fieldsData instanceof Short)) {
throw new IllegalArgumentException(
"cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Short");
}
fieldsData = Short.valueOf(value);
}
/** Expert: change the value of this field. See {@link #setStringValue(String)}. */
public void setIntValue(int value) {
if (!(fieldsData instanceof Integer)) {
throw new IllegalArgumentException(
"cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Integer");
}
fieldsData = Integer.valueOf(value);
}
/** Expert: change the value of this field. See {@link #setStringValue(String)}. */
public void setLongValue(long value) {
if (!(fieldsData instanceof Long)) {
throw new IllegalArgumentException(
"cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Long");
}
fieldsData = Long.valueOf(value);
}
/** Expert: change the value of this field. See {@link #setStringValue(String)}. */
public void setFloatValue(float value) {
if (!(fieldsData instanceof Float)) {
throw new IllegalArgumentException(
"cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Float");
}
fieldsData = Float.valueOf(value);
}
/** Expert: change the value of this field. See {@link #setStringValue(String)}. */
public void setDoubleValue(double value) {
if (!(fieldsData instanceof Double)) {
throw new IllegalArgumentException(
"cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Double");
}
fieldsData = Double.valueOf(value);
}
/** Expert: sets the token stream to be used for indexing. */
public void setTokenStream(TokenStream tokenStream) {
if (!(fieldsData instanceof TokenStream)) {
throw new IllegalArgumentException(
"cannot change value type from "
+ fieldsData.getClass().getSimpleName()
+ " to TokenStream");
}
this.fieldsData = tokenStream;
}
@Override
public String name() {
return name;
}
@Override
public Number numericValue() {
if (fieldsData instanceof Number) {
return (Number) fieldsData;
} else {
return null;
}
}
@Override
public BytesRef binaryValue() {
if (fieldsData instanceof BytesRef) {
return (BytesRef) fieldsData;
} else {
return null;
}
}
/** Prints a Field for human consumption. */
@Override
public String toString() {
StringBuilder result = new StringBuilder();
result.append(type.toString());
result.append('<');
result.append(name);
result.append(':');
if (fieldsData != null) {
result.append(fieldsData);
}
result.append('>');
return result.toString();
}
/** Returns the {@link FieldType} for this field. */
@Override
public IndexableFieldType fieldType() {
return type;
}
@Override
public InvertableType invertableType() {
return InvertableType.TOKEN_STREAM;
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
if (fieldType().indexOptions() == IndexOptions.NONE) {
// Not indexed
return null;
}
if (!fieldType().tokenized()) {
if (stringValue() != null) {
if (!(reuse instanceof StringTokenStream)) {
// lazy init the TokenStream as it is heavy to instantiate
// (attributes,...) if not needed
reuse = new StringTokenStream();
}
((StringTokenStream) reuse).setValue(stringValue());
return reuse;
} else if (binaryValue() != null) {
if (!(reuse instanceof BinaryTokenStream)) {
// lazy init the TokenStream as it is heavy to instantiate
// (attributes,...) if not needed
reuse = new BinaryTokenStream();
}
((BinaryTokenStream) reuse).setValue(binaryValue());
return reuse;
} else {
throw new IllegalArgumentException("Non-Tokenized Fields must have a String value");
}
}
if (tokenStreamValue() != null) {
return tokenStreamValue();
} else if (readerValue() != null) {
return analyzer.tokenStream(name(), readerValue());
} else if (stringValue() != null) {
return analyzer.tokenStream(name(), stringValue());
}
throw new IllegalArgumentException(
"Field must have either TokenStream, String, Reader or Number value; got " + this);
}
private static final class BinaryTokenStream extends TokenStream {
private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class);
private boolean used = true;
private BytesRef value;
/**
* Creates a new TokenStream that returns a BytesRef as single token.
*
*
Warning: Does not initialize the value, you must call {@link #setValue(BytesRef)}
* afterwards!
*/
BinaryTokenStream() {}
public void setValue(BytesRef value) {
this.value = value;
}
@Override
public boolean incrementToken() {
if (used) {
return false;
}
clearAttributes();
bytesAtt.setBytesRef(value);
used = true;
return true;
}
@Override
public void reset() {
used = false;
}
@Override
public void close() {
value = null;
}
}
private static final class StringTokenStream extends TokenStream {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private boolean used = true;
private String value = null;
/**
* Creates a new TokenStream that returns a String as single token.
*
*
Warning: Does not initialize the value, you must call {@link #setValue(String)}
* afterwards!
*/
StringTokenStream() {}
/** Sets the string value. */
void setValue(String value) {
this.value = value;
}
@Override
public boolean incrementToken() {
if (used) {
return false;
}
clearAttributes();
termAttribute.append(value);
offsetAttribute.setOffset(0, value.length());
used = true;
return true;
}
@Override
public void end() throws IOException {
super.end();
final int finalOffset = value.length();
offsetAttribute.setOffset(finalOffset, finalOffset);
}
@Override
public void reset() {
used = false;
}
@Override
public void close() {
value = null;
}
}
/** Specifies whether and how a field should be stored. */
public enum Store {
/**
* Store the original field value in the index. This is useful for short texts like a document's
* title which should be displayed with the results. The value is stored in its original form,
* i.e. no analyzer is used before it is stored.
*/
YES,
/** Do not store the field value in the index. */
NO
}
@Override
public StoredValue storedValue() {
if (fieldType().stored() == false) {
return null;
} else if (fieldsData == null) {
throw new IllegalArgumentException("fieldsData is unset");
} else if (fieldsData instanceof Integer) {
return new StoredValue((int) fieldsData);
} else if (fieldsData instanceof Long) {
return new StoredValue((long) fieldsData);
} else if (fieldsData instanceof Float) {
return new StoredValue((float) fieldsData);
} else if (fieldsData instanceof Double) {
return new StoredValue((double) fieldsData);
} else if (fieldsData instanceof BytesRef) {
return new StoredValue((BytesRef) fieldsData);
} else if (fieldsData instanceof StoredFieldDataInput) {
return new StoredValue((StoredFieldDataInput) fieldsData);
} else if (fieldsData instanceof String) {
return new StoredValue((String) fieldsData);
} else {
throw new IllegalStateException("Cannot store value of type " + fieldsData.getClass());
}
}
}