All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pageseeder.flint.indexing.FlintField Maven / Gradle / Ivy

/*
 * Copyright 2015 Allette Systems (Australia)
 * http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.flint.indexing;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.text.SimpleDateFormat;
import java.util.TimeZone;

/**
 * A builder for fields.
 *
 * 

This class can be used to temporarily hold values required to creates new fields and * ensure that the field can be built without errors. * * @author Christophe Lauret * @version 10 February 2012 */ public final class FlintField { /** * Use the GMT time zone. */ private static final TimeZone GMT = TimeZone.getTimeZone("GMT"); /** * The default boost value for the term. */ public enum NumericType { INT, FLOAT, DOUBLE, LONG } public enum DocValuesType { NONE, FORCED_NONE, SORTED, SORTED_NUMERIC, SORTED_SET } public enum IndexOptions { NONE, DOCS, DOCS_AND_FREQS, DOCS_AND_FREQS_AND_POSITIONS, DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, } public enum Resolution { YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND } /** * The logger for this class. */ private static final Logger LOGGER = LoggerFactory.getLogger(FlintField.class); /** * Name of the catalog this field will be added to. */ private final String _catalog; /** * The name of the field currently processed. */ private String _name; /** * The 'store' flag of the field to build. */ private boolean _store = true; /** * The 'tokenize' flag of the field to build. */ private boolean _tokenize = true; /** * The 'sorted' flag of the field to build. */ private DocValuesType _docValues = DocValuesType.NONE; /** * The 'index' attribute of the field to build. */ private IndexOptions _index = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; /** * If norms are omitted. */ private boolean _omitNorms = false; /** * The vector flag of the field to build. */ private boolean _vector = true; /** * The vector positions of the field to build. */ private boolean _vectorPositions = true; /** * The vector payloads of the field to build. */ private boolean _vectorPayloads = true; /** * The vector offsets of the field to build. */ private boolean _vectorOffsets = true; /** * Date format to use (only if the value is a date) */ private SimpleDateFormat _dateformat; /** * The 'resolution' attribute of the field currently processed - determines the granularity of date stored. */ private Resolution _resolution; /** * The value of the field currently processed. */ private CharSequence _value; /** * The class of the number type for a numeric field. */ private NumericType _numeric; /** * The precision step to use for numeric field. */ private Integer _precisionStep = null; /** * If the field's value is compressed. */ private boolean _compressed = false; // Constuctor // ---------------------------------------------------------------------------------------------- /** * @param catalog the name of the catalog this field will be added to. */ public FlintField(String catalog) { this._catalog = catalog; } public FlintField cloneNoDocValues() { FlintField cloned = new FlintField(this._catalog); cloned._name = this._name; cloned._value = this._value; cloned._compressed = this._compressed; cloned._dateformat = this._dateformat; cloned._index = this._index; cloned._numeric = this._numeric; cloned._omitNorms = this._omitNorms; cloned._precisionStep = this._precisionStep; cloned._resolution = this._resolution; cloned._store = this._store; cloned._tokenize = this._tokenize; cloned._vector = this._vector; cloned._vectorOffsets = this._vectorOffsets; cloned._vectorPayloads = this._vectorPayloads; cloned._vectorPositions = this._vectorPositions; cloned._docValues = DocValuesType.FORCED_NONE; return cloned; } // Setters // ---------------------------------------------------------------------------------------------- /** * The name of the field to build. * * @param name The name of the field to build. * @return this builder. */ public FlintField name(String name) { this._name = name; return this; } /** * The value of the field to build. * * @param value The value of the field to build. * @return this builder. */ public FlintField value(String value) { this._value = value; return this; } /** * Set the field store for the field to build. * * @param store The field store for the field to build. * @return this builder. */ public FlintField store(boolean store) { this._store = store; return this; } /** * Set the field store for the field to build. * * @param store The field store for the field to build as a string. * @return this builder. */ public FlintField store(String store) { if (store != null) this._store = Boolean.parseBoolean(store); return this; } /** * Set the field tokenize for the field to build. * * @param tokenize The field tokenize for the field to build. * @return this builder. */ public FlintField tokenize(boolean tokenize) { this._tokenize = tokenize; return this; } /** * Set the field tokenize for the field to build. * * @param tokenize The field tokenize for the field to build as a string. * @return this builder. */ public FlintField tokenize(String tokenize) { if (tokenize != null) this._tokenize = Boolean.parseBoolean(tokenize); return this; } /** * Set the field index for the field to build. * * @param index The field index for the field to build. * @return this builder. */ public FlintField index(IndexOptions index) { this._index = index; return this; } /** * Set the omit norms flag. * * @param omit the new value * * @return this builder. */ public FlintField omitNorms(boolean omit) { this._omitNorms = omit; return this; } /** * Set the field index for the field to build. * * @param index The field index for the field to build as a string. * @return this builder. */ public FlintField index(String index) { if (index != null) this._index = toIndexOptions(index); return this; } /** * @param docValues if the field should be indexed as doc values. * @return this builder. */ public FlintField docValues(DocValuesType docValues) { this._docValues = docValues == null ? DocValuesType.NONE : docValues; return this; } /** * Supported values are "none", "sorted" and "sorted-set". * If the parameter is null, the same behaviour as "none" is applied. * * @param docValues the doc values type. * @param numeric if this field is numeric * * @return this builder. */ public FlintField docValues(String docValues, boolean numeric) { this._docValues = toDocValues(docValues, numeric); return this; } /** * Sets the term vector. * * @param vector The term vector for the field to build as a string. * @return this builder. */ public FlintField termVector(boolean vector) { this._vector = vector; return this; } /** * Sets the term vector offsets flag. * * @param vectorOffsets The term vector offsets flag for the field to build as a string. * @return this builder. */ public FlintField termVectorOffsets(boolean vectorOffsets) { this._vectorOffsets = vectorOffsets; return this; } /** * Sets the term vector positions flag. * * @param vectorPositions The term vector positions flag for the field to build as a string. * @return this builder. */ public FlintField termVectorPositions(boolean vectorPositions) { this._vectorPositions = vectorPositions; return this; } /** * Sets the term vector payloads flag. * * @param vectorPayloads The term vector payloads flag for the field to build as a string. * @return this builder. */ public FlintField termVectorPayloads(boolean vectorPayloads) { this._vectorPayloads = vectorPayloads; return this; } /** * Sets the term vector. * * @param vector The term vector for the field to build as a string. * @return this builder. */ public FlintField termVector(String vector) { if (vector != null) this._vector = Boolean.parseBoolean(vector); return this; } /** * Sets the term vector offsets flag. * * @param vectorOffsets The term vector offsets flag for the field to build as a string. * @return this builder. */ public FlintField termVectorOffsets(String vectorOffsets) { if (vectorOffsets != null) this._vectorOffsets = Boolean.parseBoolean(vectorOffsets); return this; } /** * Sets the term vector positions flag. * * @param vectorPositions The term vector positions flag for the field to build as a string. * @return this builder. */ public FlintField termVectorPositions(String vectorPositions) { if (vectorPositions != null) this._vectorPositions = Boolean.parseBoolean(vectorPositions); return this; } /** * Sets the term vector payloads flag. * * @param vectorPayloads The term vector payloads flag for the field to build as a string. * @return this builder. */ public FlintField termVectorPayloads(String vectorPayloads) { if (vectorPayloads != null) this._vectorPayloads = Boolean.parseBoolean(vectorPayloads); return this; } /** * Returns the date format for this field. * * @param dateformat A date format to parse dates. * @return this builder. */ public FlintField dateFormat(SimpleDateFormat dateformat) { this._dateformat = dateformat; return this; } /** * Returns the date format for this field. * * @param dateformat A date format to parse dates. * @return this builder. */ public FlintField dateFormat(String dateformat) { this._dateformat = toDateFormat(dateformat); return this; } /** * Returns the date resolution for this field. * * @param resolution A date resolution to parse dates. * @return this builder. */ public FlintField resolution(Resolution resolution) { this._resolution = resolution; return this; } /** * Returns the date resolution for this field. * * @see FlintField#toResolution(String) * * @param resolution A date resolution to parse dates. * @return this builder. */ public FlintField resolution(String resolution) { if (resolution != null) this._resolution = toResolution(resolution); return this; } /** * Sets the number type for a numeric type, set to null for a string. * * @param type The number type for the numeric field to build. * @return this builder. */ public FlintField numeric(NumericType type) { this._numeric = type; return this; } /** * Sets the number type for a numeric type, set to null for a string. * * @param type The number type for the numeric field to build. * @return this builder. */ public FlintField numeric(String type) { if (type != null) this._numeric = toNumeric(type); return this; } /** * Sets the precision step (only applies to numeric fields). * *

Good values depend on usage and data type. *

Suitable values are generally between 1 and 8, see below (copied from Lucene doc) *

    *
  • The default for all data types is 4.
  • *
  • Ideal value in most cases for 64 bit data types (long, double) is 6 or 8.
  • *
  • Ideal value in most cases for 32 bit data types (int, float) is 4.
  • *
  • For low cardinality fields larger precision steps are good.
  • *
* * @param precision The precision step. * @return this builder. */ public FlintField precisionStep(int precision) { this._precisionStep = precision; return this; } /** * Sets the precision step (only applies to numeric fields). * * @see #precisionStep(int) * * @param precision The precision step to be parsed as an integer. * @return this builder. */ public FlintField precisionStep(String precision) { if (precision == null) return this; try { this._precisionStep = Integer.parseInt(precision); } catch (NumberFormatException ex) { LOGGER.error("Unable to parse precision step {} as an integer - ignored and used default", precision); } return this; } // Getters // ---------------------------------------------------------------------------------------------- /** * Returns the name of the field to build. * * @return The name of the field to build. */ public String name() { return this._name; } public String catalog() { return this._catalog; } /** * Returns the value of the field to build. * * @return The value of the field to build. */ public CharSequence value() { return this._value; } /** * Returns the field store for the field to build. * * @return The field store for the field to build. */ public boolean store() { return this._store; } /** * Returns the field tokenize for the field to build. * * @return The field tokenize for the field to build. */ public boolean tokenize() { return this._tokenize; } /** * Returns the field index for the field to build. * * @return The field index for the field to build. */ public IndexOptions index() { return this._index; } /** * Returns the numeric type for this field. * * @return the numeric type for this field. */ public NumericType numericType() { return this._numeric; } public boolean isDocValues() { return (this._docValues != DocValuesType.NONE && this._docValues != DocValuesType.FORCED_NONE); } public boolean isNumeric() { return this._numeric != null; } /** * Returns the doc values type for this field. * * @return the doc values type for this field. */ public DocValuesType docValues() { return this._docValues; } public NumericType numeric() { return this._numeric; } public SimpleDateFormat dateformat() { return this._dateformat; } public Integer precisionStep() { return this._precisionStep; } public Resolution resolution() { return this._resolution; } public boolean omitNorms() { return this._omitNorms; } public boolean termVector() { return this._vector; } public boolean termVectorOffsets() { return this._vectorOffsets; } public boolean termVectorPayloads() { return this._vectorPayloads; } public boolean termVectorPositions() { return this._vectorPositions; } // Static utility methods // ---------------------------------------------------------------------------------------------- /** * Returns the Index Options. * * @param index the index options as a string * * @return The corresponding Lucene 5 constant. */ public static IndexOptions toIndexOptions(String index) { if (index == null) return null; try { return IndexOptions.valueOf(index.toUpperCase().replace('-', '_')); } catch (IllegalArgumentException ex) { LOGGER.warn("Invalid index option: {}", index); return null; } } /** * Returns the resolution for date. * * @param resolution The date tools resolution. * * @return The corresponding Lucene 5 constant. */ public static Resolution toResolution(String resolution) { if (resolution == null) return null; else if ("year".equals(resolution)) return Resolution.YEAR; else if ("month".equals(resolution)) return Resolution.MONTH; else if ("day".equals(resolution)) return Resolution.DAY; else if ("hour".equals(resolution)) return Resolution.HOUR; else if ("minute".equals(resolution)) return Resolution.MINUTE; else if ("second".equals(resolution)) return Resolution.SECOND; else if ("milli".equals(resolution)) return Resolution.MILLISECOND; else if ("millisecond".equals(resolution)) return Resolution.MILLISECOND; LOGGER.warn("Invalid date resolution: {}, defaulting to Resolution.DAY", resolution); return Resolution.DAY; } /** * Returns the numeric type. * *

If the value does not match one of the value number types * this method will return null and the field will be indexed as a normal String field. * * @param type The number type; one of ("int","float","double" or "long"). * * @return The corresponding NumericType instance or null. */ public static NumericType toNumeric(String type) { if (type == null) return null; for (NumericType nt : NumericType.values()) { if (type.equalsIgnoreCase(nt.name())) return nt; } LOGGER.warn("Invalid number type : {}, defaulting to null (string)", type); return null; } /** * Returns the doc values type. * *

If the value does not match one of the doc values types * this method will return null and the field will be indexed as a normal field. * * @param type The doc values type. * * @return The corresponding DocValuesType instance or null. */ public static DocValuesType toDocValues(String type, boolean numeric) { if (type == null) return DocValuesType.NONE; if ("sorted".equals(type)) return numeric ? DocValuesType.SORTED_NUMERIC : DocValuesType.SORTED; if ("sorted-set".equals(type)) return DocValuesType.SORTED_SET; if ("none".equals(type)) return DocValuesType.NONE; LOGGER.warn("Invalid doc values type : {}, defaulting to none", type); return DocValuesType.NONE; } /** * Returns the date format to use, allowing recycling. * *

Set the current date format to null if the format is null. * *

Otherwise retrieve from map or create an instance if it has never been created. * *

Note: we only set the timezone if the date format includes a time component; otherwise we default to GMT to * ensure that Lucene will preserve the date. * * @param format The date format used. * @return the corresponding date format or null. */ public static SimpleDateFormat toDateFormat(String format) { if (format == null) return null; try { SimpleDateFormat df = new SimpleDateFormat(format); df.setTimeZone(GMT); return df; } catch (IllegalArgumentException ex) { LOGGER.warn("Ignoring unusable date format '"+format+"'", ex); } return null; } }