Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package lux.index;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import lux.index.analysis.DefaultAnalyzer;
import lux.index.field.AttributeQNameField;
import lux.index.field.AttributeTextField;
import lux.index.field.DocumentField;
import lux.index.field.ElementQNameField;
import lux.index.field.ElementTextField;
import lux.index.field.FieldDefinition;
import lux.index.field.IDField;
import lux.index.field.PathField;
import lux.index.field.PathOccurrenceField;
import lux.index.field.PathValueField;
import lux.index.field.QNameValueField;
import lux.index.field.TinyBinaryField;
import lux.index.field.TinyBinarySolrField;
import lux.index.field.URIField;
import lux.index.field.XmlTextField;
import lux.xml.tinybin.TinyBinary;
import org.apache.lucene.util.Version;
/**
* Maintains a list of field definitions and index options that inform indexing and search.
* The configuration options and core indexing setup are immutable, but new fields may be added,
* fields may be renamed, and namespace mappings may be defined.
*/
public class IndexConfiguration {
public static final Version LUCENE_VERSION = Version.LUCENE_44;
/** causes a document node to be built during indexing. Must be set if any XPathFields are to be defined. */
public final static int BUILD_DOCUMENT = 0x00000001;
/** Configure for use in solr; eg TinyBinarySolrField instead of TinyBinaryField*/
public final static int SOLR = 0x00000002;
/** causes QNames indexes to include the full namespace uri. If not set, QNames are indexed lexically,
* as {prefix}:{localname} without regard for any prefix mappings. Currently namespace-unaware indexing
* and search is not fully supported.
*/
public final static int NAMESPACE_AWARE = 0x00000004;
/** causes a document to be stored in the index. This should generally always be enabled */
public final static int STORE_DOCUMENT = 0x00000008;
/** indicates that documents are to be stored in {@link TinyBinary} format. If this is not set,
* documents are stored as serialized XML. */
public final static int STORE_TINY_BINARY = 0x00000010;
/** enables the {@link #ELT_QNAME} and {@link #ATT_QNAME} fields, causing element and attribute
* QNames to be indexed. If paths are indexed, this isn't really needed. */
public final static int INDEX_QNAMES = 0x00000020;
/** enables the {@link #PATH} field, causing element and attribute QName paths to be indexed. */
public final static int INDEX_PATHS = 0x00000040;
/** enables the {@link #XML_TEXT}, {@link #ELEMENT_TEXT}, and {@link #ATTRIBUTE_TEXT} fields,
* causing element and attribute text to be indexed. */
public final static int INDEX_FULLTEXT = 0x00000080;
/** enables the {@link #PATH_VALUE} field (if INDEX_PATHS is set), and the {@link #QNAME_VALUE} field (if
* INDEX_QNAMES is set), causing values to be indexed. This is an experimental feature that is not
* fully supported.
*/
public final static int INDEX_VALUES = 0x00000100;
/** enables the computation and storage of term offsets in the index. Currently there is no reason to enable
* this flag. In the future term offsets may be used to accelerate highlighting. */
public final static int COMPUTE_OFFSETS = 0x00000200;
/** causes all namespace information to be stripped from incoming documents */
public final static int STRIP_NAMESPACES = 0x00000400;
/** experimental: index each occurrence of each path as an unparsed string,
* rather than indexing unique paths and tokenizing */
public final static int INDEX_EACH_PATH = 0x00000800;
/** mask covering all of the indexing options */
public final static int INDEXES = INDEX_QNAMES | INDEX_PATHS | INDEX_FULLTEXT | INDEX_VALUES;
/** the default indexing options */
public final static int DEFAULT_OPTIONS = STORE_DOCUMENT | INDEX_QNAMES | INDEX_PATHS | INDEX_FULLTEXT | NAMESPACE_AWARE;
/** unique identifier field that identifies a document */
public static final FieldDefinition URI = URIField.getInstance();
/** field that stores xml documents */
private static final FieldDefinition XML_STORE = DocumentField.getInstance();
/** field that stores xml documents */
private static final FieldDefinition TINY_BINARY_STORE = TinyBinaryField.getInstance();
/** element QName field */
public static final FieldDefinition ELT_QNAME = ElementQNameField.getInstance();
/** attribute QName field */
public static final FieldDefinition ATT_QNAME = AttributeQNameField.getInstance();
/** path field */
public static final FieldDefinition PATH = PathField.getInstance();
/** element text field indexes all the text along with element QNames. */
public static final FieldDefinition ELEMENT_TEXT = ElementTextField.getInstance();
/** attribute text field indexes all the text along with attribute QNames. */
public static final FieldDefinition ATTRIBUTE_TEXT = AttributeTextField.getInstance();
/** full text field that indexes all the text in a document (not including attribute values). */
public static final FieldDefinition XML_TEXT = XmlTextField.getInstance();
// not fully supported?
public static final FieldDefinition PATH_VALUE = PathValueField.getInstance();
public static final FieldDefinition QNAME_VALUE = QNameValueField.getInstance();
/** The default configuration instance */
public static final IndexConfiguration DEFAULT = new IndexConfiguration();
private long options;
private final HashMap fields;
private final HashMap fieldNames;
private MultiFieldAnalyzer fieldAnalyzers;
private final HashMap namespaceMap;
/** @return the analyzers associated with the fields to be indexed */
public MultiFieldAnalyzer getFieldAnalyzers() {
return fieldAnalyzers;
}
/**
* @param options
* @return a new IndexCOnfiguration with the given options, unless the options are the default options,
* in which case {@link #DEFAULT} is returned.
*/
public static IndexConfiguration makeIndexConfiguration (long options) {
long opt = options | NAMESPACE_AWARE;
if (opt == DEFAULT_OPTIONS) {
return DEFAULT;
}
return new IndexConfiguration(opt);
}
protected IndexConfiguration (long options) {
namespaceMap = new HashMap();
fields = new HashMap();
fieldNames = new HashMap();
fieldAnalyzers = new MultiFieldAnalyzer();
fieldAnalyzers.put(null, new DefaultAnalyzer());
addField (URI);
this.options = options | NAMESPACE_AWARE;
init();
}
protected IndexConfiguration () {
this (DEFAULT_OPTIONS);
}
private void init () {
if (isOption (INDEX_QNAMES)) {
addField(ELT_QNAME);
addField(ATT_QNAME);
if (isOption (INDEX_VALUES)) {
addField(QNAME_VALUE);
}
}
if (isOption (INDEX_PATHS)) {
if (isOption (INDEX_EACH_PATH)) {
addField (PathOccurrenceField.getInstance());
} else {
addField(PATH);
}
if (isOption (INDEX_VALUES)) {
addField(PATH_VALUE);
}
}
if (isOption (INDEX_FULLTEXT)) {
addField (XML_TEXT);
if (isOption (INDEX_QNAMES) || isOption(INDEX_PATHS)) {
addField (ELEMENT_TEXT);
addField (ATTRIBUTE_TEXT);
}
/*
if (// FIXME: do we need offsets ever? Perhaps if we make use of a better highlighter
XML_TEXT.getTermVector().withOffsets() ||
ELEMENT_TEXT.getTermVector().withOffsets() ||
ATTRIBUTE_TEXT.getTermVector().withOffsets()
) {
// We may not need to bother computing offsets at all
options |= COMPUTE_OFFSETS;
}
*/
}
if (isOption (STORE_DOCUMENT)) {
if (isOption (STORE_TINY_BINARY )) {
if (isOption(SOLR)) {
addField(TinyBinarySolrField.getInstance());
} else {
addField(TINY_BINARY_STORE);
}
} else {
addField(XML_STORE);
}
}
addField (IDField.getInstance());
}
/** adds a new field
* @param field the field to add
*/
public void addField (FieldDefinition field) {
FieldDefinition existing = fields.get(field.getDefaultName());
if (existing != null) {
if (existing != field) {
throw new IllegalStateException ("Duplicate field name: " + field);
}
return;
}
fields.put(field.getDefaultName(), field);
fieldAnalyzers.put(getFieldName(field), field.getAnalyzer());
}
/**
* Get the effective name of a field, given its canonical name. Fields may be renamed, or aliased, for
* compatibility with existing schemas.
* @param field a field's canonical name
* @return the effective name of the field
*/
public String getFieldName (FieldName field) {
return getFieldName (field.getField());
}
/**
* Get the effective name of a field, given its definition. Fields may be renamed, or aliased, for
* compatibility with existing schemas.
* @param field a field definition
* @return the effective name of the field
*/
public String getFieldName (FieldDefinition field) {
String alias = fieldNames.get(field);
if (alias != null) {
return alias;
}
return field.getDefaultName();
}
/**
* rename an existing field; the new name is used in the index.
* @param field the definition of a field
* @param name the new name to use
*/
public void renameField (FieldDefinition field, String name) {
if (! field.isRenameable()) {
throw new IllegalArgumentException("Attempt to rename field " + field + " whose name is fixed");
}
String currentName = fieldNames.get (field);
if (currentName == null) {
currentName = field.getDefaultName();
}
if (currentName != null) {
if (currentName.equals(name)) {
return;
}
fields.remove(currentName);
}
fieldNames.put(field, name);
fields.put(name, field);
}
public Collection getFields () {
return fields.values();
}
public FieldDefinition getField (FieldName fieldName) {
return fieldName.getField();
}
public FieldDefinition getField (String fieldName) {
return fields.get(fieldName);
}
public String getDefaultFieldName () {
return getFieldName (XML_TEXT);
}
/**
* @param option an option bit constant
* @return whether the option is set
*/
public boolean isOption (int option) {
return (options & option) != 0;
}
public boolean isIndexingEnabled() {
return (options & INDEXES) != 0;
}
public Map getNamespaceMap () {
return Collections.unmodifiableMap(namespaceMap);
}
public void defineNamespaceMapping (String prefix, String namespaceURI) {
namespaceMap.put(prefix, namespaceURI);
}
}
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */