org.apache.lucene.index.FieldInfo Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
The newest version!
/*
* COPIED FROM APACHE LUCENE 4.7.2
*
* Git URL: [email protected]:apache/lucene.git, tag: releases/lucene-solr/4.7.2, path: lucene/core/src/java
*
* (see https://issues.apache.org/jira/browse/OAK-10786 for details)
*/
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.HashMap;
import java.util.Map;
/**
* Access to the Field Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Field Info file. Objects
* of this class are thread-safe for multiple readers, but only one thread can
* be adding documents at a time, with no other reader or writer threads
* accessing this object.
**/
public final class FieldInfo {
/** Field's name */
public final String name;
/** Internal field number */
public final int number;
private boolean indexed;
private DocValuesType docValueType;
// True if any document indexed term vectors
private boolean storeTermVector;
private DocValuesType normType;
private boolean omitNorms; // omit norms associated with indexed fields
private IndexOptions indexOptions;
private boolean storePayloads; // whether this field stores payloads together with term positions
private Map attributes;
private long dvGen = -1; // the DocValues generation of this field
/**
* Controls how much information is stored in the postings lists.
* @lucene.experimental
*/
public static enum IndexOptions {
// NOTE: order is important here; FieldInfo uses this
// order to merge two conflicting IndexOptions (always
// "downgrades" by picking the lowest).
/**
* Only documents are indexed: term frequencies and positions are omitted.
* Phrase and other positional queries on the field will throw an exception, and scoring
* will behave as if any term in the document appears only once.
*/
// TODO: maybe rename to just DOCS?
DOCS_ONLY,
/**
* Only documents and term frequencies are indexed: positions are omitted.
* This enables normal scoring, except Phrase and other positional queries
* will throw an exception.
*/
DOCS_AND_FREQS,
/**
* Indexes documents, frequencies and positions.
* This is a typical default for full-text search: full scoring is enabled
* and positional queries are supported.
*/
DOCS_AND_FREQS_AND_POSITIONS,
/**
* Indexes documents, frequencies, positions and offsets.
* Character offsets are encoded alongside the positions.
*/
DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
}
/**
* DocValues types.
* Note that DocValues is strongly typed, so a field cannot have different types
* across different documents.
*/
public static enum DocValuesType {
/**
* A per-document Number
*/
NUMERIC,
/**
* A per-document byte[]. Values may be larger than
* 32766 bytes, but different codecs may enforce their own limits.
*/
BINARY,
/**
* A pre-sorted byte[]. Fields with this type only store distinct byte values
* and store an additional offset pointer per document to dereference the shared
* byte[]. The stored byte[] is presorted and allows access via document id,
* ordinal and by-value. Values must be <= 32766 bytes.
*/
SORTED,
/**
* A pre-sorted Set<byte[]>. Fields with this type only store distinct byte values
* and store additional offset pointers per document to dereference the shared
* byte[]s. The stored byte[] is presorted and allows access via document id,
* ordinal and by-value. Values must be <= 32766 bytes.
*/
SORTED_SET
}
/**
* Sole Constructor.
*
* @lucene.experimental
*/
public FieldInfo(String name, boolean indexed, int number, boolean storeTermVector, boolean omitNorms,
boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normsType,
Map attributes) {
this.name = name;
this.indexed = indexed;
this.number = number;
this.docValueType = docValues;
if (indexed) {
this.storeTermVector = storeTermVector;
this.storePayloads = storePayloads;
this.omitNorms = omitNorms;
this.indexOptions = indexOptions;
this.normType = !omitNorms ? normsType : null;
} else { // for non-indexed fields, leave defaults
this.storeTermVector = false;
this.storePayloads = false;
this.omitNorms = false;
this.indexOptions = null;
this.normType = null;
}
this.attributes = attributes;
assert checkConsistency();
}
private boolean checkConsistency() {
if (!indexed) {
assert !storeTermVector;
assert !storePayloads;
assert !omitNorms;
assert normType == null;
assert indexOptions == null;
} else {
assert indexOptions != null;
if (omitNorms) {
assert normType == null;
}
// Cannot store payloads unless positions are indexed:
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !this.storePayloads;
}
return true;
}
void update(IndexableFieldType ft) {
update(ft.indexed(), false, ft.omitNorms(), false, ft.indexOptions());
}
// should only be called by FieldInfos#addOrUpdate
void update(boolean indexed, boolean storeTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
//System.out.println("FI.update field=" + name + " indexed=" + indexed + " omitNorms=" + omitNorms + " this.omitNorms=" + this.omitNorms);
if (this.indexed != indexed) {
this.indexed = true; // once indexed, always index
}
if (indexed) { // if updated field data is not for indexing, leave the updates out
if (this.storeTermVector != storeTermVector) {
this.storeTermVector = true; // once vector, always vector
}
if (this.storePayloads != storePayloads) {
this.storePayloads = true;
}
if (this.omitNorms != omitNorms) {
this.omitNorms = true; // if one require omitNorms at least once, it remains off for life
this.normType = null;
}
if (this.indexOptions != indexOptions) {
if (this.indexOptions == null) {
this.indexOptions = indexOptions;
} else {
// downgrade
this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions;
}
if (this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// cannot store payloads if we don't store positions:
this.storePayloads = false;
}
}
}
assert checkConsistency();
}
void setDocValuesType(DocValuesType type) {
if (docValueType != null && docValueType != type) {
throw new IllegalArgumentException("cannot change DocValues type from " + docValueType + " to " + type + " for field \"" + name + "\"");
}
docValueType = type;
assert checkConsistency();
}
/** Returns IndexOptions for the field, or null if the field is not indexed */
public IndexOptions getIndexOptions() {
return indexOptions;
}
/**
* Returns true if this field has any docValues.
*/
public boolean hasDocValues() {
return docValueType != null;
}
/**
* Returns {@link DocValuesType} of the docValues. this may be null if the field has no docvalues.
*/
public DocValuesType getDocValuesType() {
return docValueType;
}
/** Sets the docValues generation of this field. */
public void setDocValuesGen(long dvGen) {
this.dvGen = dvGen;
}
/**
* Returns the docValues generation of this field, or -1 if no docValues
* updates exist for it.
*/
public long getDocValuesGen() {
return dvGen;
}
/**
* Returns {@link DocValuesType} of the norm. this may be null if the field has no norms.
*/
public DocValuesType getNormType() {
return normType;
}
void setStoreTermVectors() {
storeTermVector = true;
assert checkConsistency();
}
void setStorePayloads() {
if (indexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
storePayloads = true;
}
assert checkConsistency();
}
void setNormValueType(DocValuesType type) {
if (normType != null && normType != type) {
throw new IllegalArgumentException("cannot change Norm type from " + normType + " to " + type + " for field \"" + name + "\"");
}
normType = type;
assert checkConsistency();
}
/**
* Returns true if norms are explicitly omitted for this field
*/
public boolean omitsNorms() {
return omitNorms;
}
/**
* Returns true if this field actually has any norms.
*/
public boolean hasNorms() {
return normType != null;
}
/**
* Returns true if this field is indexed.
*/
public boolean isIndexed() {
return indexed;
}
/**
* Returns true if any payloads exist for this field.
*/
public boolean hasPayloads() {
return storePayloads;
}
/**
* Returns true if any term vectors exist for this field.
*/
public boolean hasVectors() {
return storeTermVector;
}
/**
* Get a codec attribute value, or null if it does not exist
*/
public String getAttribute(String key) {
if (attributes == null) {
return null;
} else {
return attributes.get(key);
}
}
/**
* Puts a codec attribute value.
*
* This is a key-value mapping for the field that the codec can use
* to store additional metadata, and will be available to the codec
* when reading the segment via {@link #getAttribute(String)}
*
* If a value already exists for the field, it will be replaced with
* the new value.
*/
public String putAttribute(String key, String value) {
if (attributes == null) {
attributes = new HashMap();
}
return attributes.put(key, value);
}
/**
* Returns internal codec attributes map. May be null if no mappings exist.
*/
public Map attributes() {
return attributes;
}
}