org.apache.solr.schema.FieldType Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.schema;

import static org.apache.lucene.analysis.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharFilterFactory;
import org.apache.lucene.analysis.TokenFilterFactory;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.TokenizerFactory;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.DocValuesRewriteMethod;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.solr.analysis.SolrAnalyzer;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.query.SolrRangeQuery;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.uninverting.UninvertingReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Base class for all field types used by an index schema.
 *
 * @since 3.1
 */
public abstract class FieldType extends FieldProperties {
  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  /**
   * The default poly field separator.
   *
   * @see #createFields(SchemaField, Object)
   * @see #isPolyField()
   */
  public static final String POLY_FIELD_SEPARATOR = "___";

  /** The name of the type (not the name of the field) */
  protected String typeName;

  /** additional arguments specified in the field type declaration */
  protected Map args;

  /** properties explicitly set to true */
  protected int trueProperties;

  /** properties explicitly set to false */
  protected int falseProperties;

  protected int properties;
  private boolean isExplicitQueryAnalyzer;
  private boolean isExplicitAnalyzer;

  /** Returns true if fields of this type should be tokenized */
  public boolean isTokenized() {
    return (properties & TOKENIZED) != 0;
  }

  /** Returns true if fields can have multiple values */
  public boolean isMultiValued() {
    return (properties & MULTIVALUED) != 0;
  }

  /** Check if a property is set */
  protected boolean hasProperty(int p) {
    return (properties & p) != 0;
  }

  /**
   * A "polyField" is a FieldType that can produce more than one IndexableField instance for a
   * single value, via the {@link #createFields(org.apache.solr.schema.SchemaField, Object)} method.
   * This is useful when hiding the implementation details of a field from the Solr end user. For
   * instance, a spatial point may be represented by multiple different fields.
   *
   * @return true if the {@link #createFields(org.apache.solr.schema.SchemaField, Object)} method
   *     may return more than one field
   */
  public boolean isPolyField() {
    return false;
  }

  public boolean isPointField() {
    return false;
  }

  public boolean isUtf8Field() {
    return false;
  }

  /** Returns true if the fields' docValues should be used for obtaining stored value */
  public boolean useDocValuesAsStored() {
    return (properties & USE_DOCVALUES_AS_STORED) != 0;
  }

  /**
   * Returns true if a single field value of this type has multiple logical values for the purposes
   * of faceting, sorting, etc. Text fields normally return true since each token/word is a logical
   * value.
   */
  public boolean multiValuedFieldCache() {
    return isTokenized();
  }

  /**
   * subclasses should initialize themselves with the args provided and remove valid arguments.
   * leftover arguments will cause an exception. Common boolean properties have already been
   * handled.
   */
  protected void init(IndexSchema schema, Map args) {}

  public boolean write(IteratorWriter.ItemWriter itemWriter) {
    return false;
  }

  /**
   * Initializes the field type. Subclasses should usually override {@link #init(IndexSchema, Map)}
   * which is called by this method.
   */
  protected void setArgs(IndexSchema schema, Map args) {
    // default to STORED, INDEXED, OMIT_TF_POSITIONS and MULTIVALUED depending on schema version
    properties = (STORED | INDEXED);
    float schemaVersion = schema.getVersion();
    if (schemaVersion < 1.1f) properties |= MULTIVALUED;
    if (schemaVersion > 1.1f) properties |= OMIT_TF_POSITIONS;
    if (schemaVersion < 1.3) {
      args.remove("compressThreshold");
    }
    if (schemaVersion >= 1.6f) properties |= USE_DOCVALUES_AS_STORED;

    properties |= UNINVERTIBLE;

    this.args = Collections.unmodifiableMap(args);
    Map initArgs = new HashMap<>(args);
    initArgs.remove(CLASS_NAME); // consume the class arg

    trueProperties = FieldProperties.parseProperties(initArgs, true, false);
    falseProperties = FieldProperties.parseProperties(initArgs, false, false);

    properties &= ~falseProperties;
    properties |= trueProperties;

    for (String prop : FieldProperties.propertyNames) initArgs.remove(prop);

    init(schema, initArgs);

    String positionInc = initArgs.get(POSITION_INCREMENT_GAP);
    if (positionInc != null) {
      Analyzer analyzer = getIndexAnalyzer();
      if (analyzer instanceof SolrAnalyzer) {
        ((SolrAnalyzer) analyzer).setPositionIncrementGap(Integer.parseInt(positionInc));
      } else {
        throw new RuntimeException(
            "Can't set " + POSITION_INCREMENT_GAP + " on custom analyzer " + analyzer.getClass());
      }
      analyzer = getQueryAnalyzer();
      if (analyzer instanceof SolrAnalyzer) {
        ((SolrAnalyzer) analyzer).setPositionIncrementGap(Integer.parseInt(positionInc));
      } else {
        throw new RuntimeException(
            "Can't set " + POSITION_INCREMENT_GAP + " on custom analyzer " + analyzer.getClass());
      }
      initArgs.remove(POSITION_INCREMENT_GAP);
    }

    this.postingsFormat = initArgs.remove(POSTINGS_FORMAT);
    this.docValuesFormat = initArgs.remove(DOC_VALUES_FORMAT);

    if (initArgs.size() > 0) {
      throw new RuntimeException(
          "schema fieldtype "
              + typeName
              + "("
              + this.getClass().getName()
              + ")"
              + " invalid arguments:"
              + initArgs);
    }
  }

  /** :TODO: document this method */
  protected void restrictProps(int props) {
    if ((properties & props) != 0) {
      throw new RuntimeException(
          "schema fieldtype "
              + typeName
              + "("
              + this.getClass().getName()
              + ")"
              + " invalid properties:"
              + propertiesToString(properties & props));
    }
  }

  /** The Name of this FieldType as specified in the schema file */
  public String getTypeName() {
    return typeName;
  }

  void setTypeName(String typeName) {
    this.typeName = typeName;
  }

  @Override
  public String toString() {
    return typeName
        + "{class="
        + this.getClass().getName()
        //            + propertiesToString(properties)
        + (indexAnalyzer != null ? ",analyzer=" + indexAnalyzer.getClass().getName() : "")
        + ",args="
        + args
        + "}";
  }

  /**
   * Used for adding a document when a field needs to be created from a type and a string.
   *
   * By default, the indexed value is the same as the stored value (taken from toInternal()).
   * Having a different representation for external, internal, and indexed would present quite a few
   * problems given the current Lucene architecture. An analyzer for adding docs would need to
   * translate internal->indexed while an analyzer for querying would need to translate
   * external->indexed.
   *
   * 
The only other alternative to having internal==indexed would be to have internal==external.
   * In this case, toInternal should convert to the indexed representation, toExternal() should do
   * nothing, and createField() should *not* call toInternal, but use the external value and set
   * tokenized=true to get Lucene to convert to the internal(indexed) form. :TODO: clean up and
   * clarify this explanation.
   *
   * @see #toInternal
   */
  public IndexableField createField(SchemaField field, Object value) {
    if (!field.indexed() && !field.stored()) {
      if (log.isTraceEnabled()) {
        log.trace("Ignoring unindexed/unstored field: {}", field);
      }
      return null;
    }

    String val;
    try {
      val = toInternal(value.toString());
    } catch (SolrException se) {
      throw se; //  BAD_REQUEST to fall through
    } catch (RuntimeException e) {
      throw new SolrException(
          SolrException.ErrorCode.SERVER_ERROR,
          "Error while creating field '" + field + "' from value '" + value + "'",
          e);
    }
    if (val == null) return null;

    return createField(field.getName(), val, field);
  }

  /**
   * Create the field from native Lucene parts. Mostly intended for use by FieldTypes outputing
   * multiple Fields per SchemaField
   *
   * @param name The name of the field
   * @param val The _internal_ value to index
   * @param type {@link org.apache.lucene.document.FieldType}
   * @return the {@link org.apache.lucene.index.IndexableField}.
   */
  protected IndexableField createField(
      String name, String val, org.apache.lucene.index.IndexableFieldType type) {
    return new Field(name, val, type);
  }

  /**
   * Given a {@link org.apache.solr.schema.SchemaField}, create one or more {@link
   * org.apache.lucene.index.IndexableField} instances
   *
   * @param field the {@link org.apache.solr.schema.SchemaField}
   * @param value The value to add to the field
   * @return An array of {@link org.apache.lucene.index.IndexableField}
   * @see #createField(SchemaField, Object)
   * @see #isPolyField()
   */
  public List createFields(SchemaField field, Object value) {
    IndexableField f = createField(field, value);
    if (field.hasDocValues() && f.fieldType().docValuesType() == null) {
      // field types that support doc values should either override createField
      // to return a field with doc values or extend createFields if this can't
      // be done in a single field instance (see StrField for example)
      throw new UnsupportedOperationException(
          "This field type does not support doc values: " + this);
    }
    return f == null ? Collections.emptyList() : Collections.singletonList(f);
  }

  /**
   * Convert an external value (from XML update command or from query string) into the internal
   * format for both storing and indexing (which can be modified by any analyzers).
   *
   * @see #toExternal
   */
  public String toInternal(String val) {
    // - used in delete when a Term needs to be created.
    // - used by the default getTokenizer() and createField()
    return val;
  }

  /**
   * Convert the stored-field format to an external (string, human readable) value
   *
   * @see #toInternal
   */
  public String toExternal(IndexableField f) {
    // currently used in writing XML of the search result (but perhaps
    // a more efficient toXML(IndexableField f, Writer w) should be used
    // in the future.
    String val = f.stringValue();
    if (val == null) {
      // docValues will use the binary value
      val = f.binaryValue().utf8ToString();
    }
    return val;
  }

  /**
   * Convert the stored-field format to an external object.
   *
   * @see #toInternal
   * @since solr 1.3
   */
  public Object toObject(IndexableField f) {
    return toExternal(f); // by default use the string
  }

  public Object toObject(SchemaField sf, BytesRef term) {
    final CharsRefBuilder ref = new CharsRefBuilder();
    indexedToReadable(term, ref);
    final IndexableField f = createField(sf, ref.toString());
    return toObject(f);
  }

  /** Given an indexed term, return the human readable representation */
  public String indexedToReadable(String indexedForm) {
    return indexedForm;
  }

  /** Given an indexed term, append the human readable representation */
  public CharsRef indexedToReadable(BytesRef input, CharsRefBuilder output) {
    output.copyUTF8Bytes(input);
    return output.get();
  }

  /** Given the stored field, return the human readable representation */
  public String storedToReadable(IndexableField f) {
    return toExternal(f);
  }

  /** Given the stored field, return the indexed form */
  public String storedToIndexed(IndexableField f) {
    // right now, the transformation of single valued fields like SortableInt
    // is done when the Field is created, not at analysis time... this means
    // that the indexed form is the same as the stored field form.
    return f.stringValue();
  }

  /** Given the readable value, return the term value that will match it. */
  public String readableToIndexed(String val) {
    return toInternal(val);
  }

  /**
   * Given the readable value, return the term value that will match it. This method will modify the
   * size and length of the {@code result} parameter and write from offset 0
   */
  public void readableToIndexed(CharSequence val, BytesRefBuilder result) {
    final String internal = readableToIndexed(val.toString());
    result.copyChars(internal);
  }

  public void setIsExplicitQueryAnalyzer(boolean isExplicitQueryAnalyzer) {
    this.isExplicitQueryAnalyzer = isExplicitQueryAnalyzer;
  }

  public boolean isExplicitQueryAnalyzer() {
    return isExplicitQueryAnalyzer;
  }

  public void setIsExplicitAnalyzer(boolean explicitAnalyzer) {
    isExplicitAnalyzer = explicitAnalyzer;
  }

  public boolean isExplicitAnalyzer() {
    return isExplicitAnalyzer;
  }

  /**
   * @return the string used to specify the concrete class name in a serialized representation: the
   *     class arg. If the concrete class name was not specified via a class arg, returns {@code
   *     getClass().getName()}.
   */
  public String getClassArg() {
    if (null != args) {
      String className = args.get(CLASS_NAME);
      if (null != className) {
        return className;
      }
    }
    return getClass().getName();
  }

  /**
   * Returns a Query instance for doing prefix searches on this field type. Also, other QueryParser
   * implementations may have different semantics.
   *
   * 
Sub-classes should override this method to provide their own range query implementation.
   *
   * @param parser the {@link org.apache.solr.search.QParser} calling the method
   * @param sf the schema field
   * @param termStr the term string for prefix query, if blank then this query should match all docs
   *     with this field
   * @return a Query instance to perform prefix search
   */
  public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) {
    if (termStr != null && termStr.isEmpty()) {
      return getExistenceQuery(parser, sf);
    }
    PrefixQuery query = new PrefixQuery(new Term(sf.getName(), termStr));
    query.setRewriteMethod(sf.getType().getRewriteMethod(parser, sf));
    return query;
  }

  /**
   * If DocValues is not enabled for a field, but it's indexed, docvalues can be constructed on the
   * fly (uninverted, aka fieldcache) on the first request to sort, facet, etc. This specifies the
   * structure to use.
   *
   * 
This method will not be used if the field is (effectively) uninvertible="false"
   *
   * @param sf field instance
   * @return type to uninvert, or {@code null} (to disallow uninversion for the field)
   * @see SchemaField#isUninvertible()
   */
  public abstract UninvertingReader.Type getUninversionType(SchemaField sf);

  /**
   * Default analyzer for types that only produce 1 verbatim token... A maximum size of chars to be
   * read must be specified
   */
  protected final class DefaultAnalyzer extends SolrAnalyzer {
    final int maxChars;

    DefaultAnalyzer(int maxChars) {
      this.maxChars = maxChars;
    }

    @Override
    public TokenStreamComponents createComponents(String fieldName) {
      Tokenizer ts =
          new Tokenizer() {
            final char[] cbuf = new char[maxChars];
            final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
            final BytesTermAttribute bytesAtt =
                isPointField() ? addAttribute(BytesTermAttribute.class) : null;
            final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);

            @Override
            public boolean incrementToken() throws IOException {
              clearAttributes();
              int n = input.read(cbuf, 0, maxChars);
              if (n <= 0) return false;
              if (isPointField()) {
                BytesRef b =
                    ((PointField) FieldType.this).toInternalByteRef(new String(cbuf, 0, n));
                bytesAtt.setBytesRef(b);
              } else {
                String s = toInternal(new String(cbuf, 0, n));
                termAtt.setEmpty().append(s);
              }
              offsetAtt.setOffset(correctOffset(0), correctOffset(n));
              return true;
            }
          };

      return new TokenStreamComponents(ts);
    }
  }

  private Analyzer indexAnalyzer = new DefaultAnalyzer(256);

  private Analyzer queryAnalyzer = indexAnalyzer;

  /**
   * Returns the Analyzer to be used when indexing fields of this type.
   *
   * 
This method may be called many times, at any time.
   *
   * @see #getQueryAnalyzer
   */
  public Analyzer getIndexAnalyzer() {
    return indexAnalyzer;
  }

  /**
   * Returns the Analyzer to be used when searching fields of this type.
   *
   * 
This method may be called many times, at any time.
   *
   * @see #getIndexAnalyzer
   */
  public Analyzer getQueryAnalyzer() {
    return queryAnalyzer;
  }

  /** Returns true if this type supports index and query analyzers, false otherwise. */
  protected boolean supportsAnalyzers() {
    return false;
  }

  /**
   * Sets the Analyzer to be used when indexing fields of this type.
   *
   * 
Subclasses should override {@link #supportsAnalyzers()} to enable this function.
   *
   * @see #supportsAnalyzers()
   * @see #setQueryAnalyzer
   * @see #getIndexAnalyzer
   */
  public final void setIndexAnalyzer(Analyzer analyzer) {
    if (supportsAnalyzers()) {
      indexAnalyzer = analyzer;
    } else {
      throw new SolrException(
          ErrorCode.SERVER_ERROR,
          "FieldType: "
              + this.getClass().getSimpleName()
              + " ("
              + typeName
              + ") does not support specifying an analyzer");
    }
  }

  /**
   * Sets the Analyzer to be used when querying fields of this type.
   *
   * 
Subclasses should override {@link #supportsAnalyzers()} to enable this function.
   *
   * @see #supportsAnalyzers()
   * @see #setIndexAnalyzer
   * @see #getQueryAnalyzer
   */
  public final void setQueryAnalyzer(Analyzer analyzer) {
    if (supportsAnalyzers()) {
      queryAnalyzer = analyzer;
    } else {
      throw new SolrException(
          ErrorCode.SERVER_ERROR,
          "FieldType: "
              + this.getClass().getSimpleName()
              + " ("
              + typeName
              + ") does not support specifying an analyzer");
    }
  }

  /**
   * @lucene.internal
   */
  protected SimilarityFactory similarityFactory;

  /**
   * @lucene.internal
   */
  protected Similarity similarity;

  /**
   * Gets the Similarity used when scoring fields of this type
   *
   * 
The default implementation returns null, which means this type has no custom similarity
   * associated with it.
   *
   * @lucene.internal
   */
  public Similarity getSimilarity() {
    return similarity;
  }

  /**
   * Gets the factory for the Similarity used when scoring fields of this type
   *
   * 
The default implementation returns null, which means this type has no custom similarity
   * factory associated with it.
   *
   * @lucene.internal
   */
  public SimilarityFactory getSimilarityFactory() {
    return similarityFactory;
  }

  /** Return the numeric type of this field, or null if this field is not a numeric field. */
  public NumberType getNumberType() {
    return null;
  }

  /**
   * Sets the Similarity used when scoring fields of this type
   *
   * @lucene.internal
   */
  public void setSimilarity(SimilarityFactory similarityFactory) {
    this.similarityFactory = similarityFactory;
    this.similarity = similarityFactory.getSimilarity();
  }

  /** The postings format used for this field type */
  protected String postingsFormat;

  public String getPostingsFormat() {
    return postingsFormat;
  }

  /** The docvalues format used for this field type */
  protected String docValuesFormat;

  public final String getDocValuesFormat() {
    return docValuesFormat;
  }

  /**
   * calls back to TextResponseWriter to write the field value
   *
   * 
Sub-classes should prefer using {@link #toExternal(IndexableField)} or {@link
   * #toObject(IndexableField)} to get the writeable external value of f instead of
   * directly using f.stringValue() or f.binaryValue()
   */
  public abstract void write(TextResponseWriter writer, String name, IndexableField f)
      throws IOException;

  /**
   * Returns the SortField instance that should be used to sort fields of this type.
   *
   * @see SchemaField#checkSortability
   * @see #getStringSort
   * @see #getNumericSort
   */
  public abstract SortField getSortField(SchemaField field, boolean top);

  /**
   * A Helper utility method for use by subclasses.
   *
   * 
This method deals with:
   *
   * 

   *   {@link SchemaField#checkSortability}
   *   
Creating a {@link SortField} on field with the specified reverse
   *        & sortType
   *   
Setting the {@link SortField#setMissingValue} to missingLow or 
   *       missingHigh as appropriate based on the value of reverse and the
   *       sortMissingFirst & sortMissingLast properties of the 
   *       field
   * 
   *
   * @param field The SchemaField to sort on. May use sortMissingFirst or 
   *     sortMissingLast or neither.
   * @param sortType The sort Type of the underlying values in the field
   * @param reverse True if natural order of the sortType should be reversed
   * @param missingLow The missingValue to be used if the other params indicate that
   *     docs w/o values should sort as "low" as possible.
   * @param missingHigh The missingValue to be used if the other params indicate that
   *     docs w/o values should sort as "high" as possible.
   * @see #getSortedSetSortField
   */
  protected static SortField getSortField(
      SchemaField field,
      SortField.Type sortType,
      boolean reverse,
      Object missingLow,
      Object missingHigh) {
    field.checkSortability();

    SortField sf = new SortField(field.getName(), sortType, reverse);
    applySetMissingValue(field, sf, missingLow, missingHigh);

    return sf;
  }

  /** Same as {@link #getSortField} but using {@link SortedSetSortField} */
  protected static SortField getSortedSetSortField(
      SchemaField field,
      SortedSetSelector.Type selector,
      boolean reverse,
      Object missingLow,
      Object missingHigh) {

    field.checkSortability();
    SortField sf = new SortedSetSortField(field.getName(), reverse, selector);
    applySetMissingValue(field, sf, missingLow, missingHigh);

    return sf;
  }

  /** Same as {@link #getSortField} but using {@link SortedNumericSortField}. */
  protected static SortField getSortedNumericSortField(
      SchemaField field,
      SortField.Type sortType,
      SortedNumericSelector.Type selector,
      boolean reverse,
      Object missingLow,
      Object missingHigh) {

    field.checkSortability();
    SortField sf = new SortedNumericSortField(field.getName(), sortType, reverse, selector);
    applySetMissingValue(field, sf, missingLow, missingHigh);

    return sf;
  }

  /**
   * @see #getSortField
   * @see #getSortedSetSortField
   */
  private static void applySetMissingValue(
      SchemaField field, SortField sortField, Object missingLow, Object missingHigh) {
    final boolean reverse = sortField.getReverse();

    if (field.sortMissingLast()) {
      sortField.setMissingValue(reverse ? missingLow : missingHigh);
    } else if (field.sortMissingFirst()) {
      sortField.setMissingValue(reverse ? missingHigh : missingLow);
    }
  }

  /**
   * Utility usable by subclasses when they want to get basic String sorting using common checks.
   *
   * @see SchemaField#checkSortability
   * @see #getSortedSetSortField
   * @see #getSortField
   */
  protected SortField getStringSort(SchemaField field, boolean reverse) {
    if (field.multiValued()) {
      MultiValueSelector selector = field.type.getDefaultMultiValueSelectorForSort(field, reverse);
      if (null != selector) {
        return getSortedSetSortField(
            field,
            selector.getSortedSetSelectorType(),
            reverse,
            SortField.STRING_FIRST,
            SortField.STRING_LAST);
      }
    }

    // else...
    // either single valued, or don't support implicit multi selector
    // (in which case let getSortField() give the error)
    return getSortField(
        field, SortField.Type.STRING, reverse, SortField.STRING_FIRST, SortField.STRING_LAST);
  }

  /**
   * Utility usable by subclasses when they want to get basic Numeric sorting using common checks.
   *
   * @see SchemaField#checkSortability
   * @see #getSortedNumericSortField
   * @see #getSortField
   */
  protected SortField getNumericSort(SchemaField field, NumberType type, boolean reverse) {
    if (field.multiValued()) {
      MultiValueSelector selector = field.type.getDefaultMultiValueSelectorForSort(field, reverse);
      if (null != selector) {
        return getSortedNumericSortField(
            field,
            type.sortType,
            selector.getSortedNumericSelectorType(),
            reverse,
            type.sortMissingLow,
            type.sortMissingHigh);
      }
    }

    // else...
    // either single valued, or don't support implicit multi selector
    // (in which case let getSortField() give the error)
    return getSortField(field, type.sortType, reverse, type.sortMissingLow, type.sortMissingHigh);
  }

  /** called to get the default value source (normally, from the Lucene FieldCache.) */
  public ValueSource getValueSource(SchemaField field, QParser parser) {
    field.checkFieldCacheSource();
    return new StrFieldSource(field.name);
  }

  /**
   * Method for dynamically building a ValueSource based on a single value of a multivalued field.
   *
   * The default implementation throws an error except in the trivial case where this method is
   * used on a {@link SchemaField} that is in fact not-multivalued, in which case it delegates to
   * {@link #getValueSource}
   *
   * @see MultiValueSelector
   */
  public ValueSource getSingleValueSource(
      MultiValueSelector choice, SchemaField field, QParser parser) {
    // trivial base case
    if (!field.multiValued()) {
      // single value matches any selector
      return getValueSource(field, parser);
    }

    throw new SolrException(
        ErrorCode.BAD_REQUEST,
        "Selecting a single value from a multivalued field is not supported for this field: "
            + field.getName()
            + " (type: "
            + this.getTypeName()
            + ")");
  }

  /**
   * Method for indicating which {@link MultiValueSelector} (if any) should be used when sorting on
   * a multivalued field of this type for the specified direction (asc/desc). The default
   * implementation returns null (for all inputs).
   *
   * @param field The SchemaField (of this type) in question
   * @param reverse false if this is an ascending sort, true if this is a descending sort.
   * @return the implicit selector to use for this direction, or null if implicit sorting on the
   *     specified direction is not supported and should return an error.
   * @see MultiValueSelector
   */
  public MultiValueSelector getDefaultMultiValueSelectorForSort(
      SchemaField field, boolean reverse) {
    // trivial base case
    return null;
  }

  /**
   * Returns a Query instance for doing range searches on this field type. {@link
   * org.apache.solr.search.SolrQueryParser} currently passes part1 and part2
   *  as null if they are '*' respectively. minInclusive and maxInclusive
   *  are both true currently by SolrQueryParser but that may change in the future. Also,
   * other QueryParser implementations may have different semantics.
   *
   * 
By default range queries with '*'s or nulls on either side are treated as existence queries
   * and are created with {@link #getExistenceQuery}. If unbounded range queries should not be
   * treated as existence queries for a certain fieldType, then {@link
   * #treatUnboundedRangeAsExistence} should be overridden.
   *
   * 
Sub-classes should override the {@link #getSpecializedRangeQuery} method to provide their
   * own range query implementation.
   *
   * @param parser the {@link org.apache.solr.search.QParser} calling the method
   * @param field the schema field
   * @param part1 the lower boundary of the range, nulls are allowed.
   * @param part2 the upper boundary of the range, nulls are allowe
   * @param minInclusive whether the minimum of the range is inclusive or not
   * @param maxInclusive whether the maximum of the range is inclusive or not
   * @return a Query instance to perform range search according to given parameters
   */
  public Query getRangeQuery(
      QParser parser,
      SchemaField field,
      String part1,
      String part2,
      boolean minInclusive,
      boolean maxInclusive) {
    if (part1 == null && part2 == null && treatUnboundedRangeAsExistence(field)) {
      return getExistenceQuery(parser, field);
    }
    return getSpecializedRangeQuery(parser, field, part1, part2, minInclusive, maxInclusive);
  }

  /**
   * Returns whether an unbounded range query should be treated the same as an existence query for
   * the given field type.
   *
   * @param field the schema field
   * @return whether unbounded range and existence are equivalent for the given field type.
   */
  protected boolean treatUnboundedRangeAsExistence(SchemaField field) {
    return true;
  }

  /**
   * Returns a Query instance for doing range searches on this field type. {@link
   * org.apache.solr.search.SolrQueryParser} currently passes part1 and part2
   *  as null if they are '*' respectively. minInclusive and maxInclusive
   *  are both true currently by SolrQueryParser but that may change in the future. Also,
   * other QueryParser implementations may have different semantics.
   *
   * 
Sub-classes should override this method to provide their own range query implementation.
   * They should strive to handle nulls in part1 and/or part2 as well as
   * unequal minInclusive and maxInclusive parameters gracefully.
   *
   * 
This method does not, and should not, check for or handle existence queries, please look at
   * {@link #getRangeQuery} for that logic.
   *
   * @param parser the {@link org.apache.solr.search.QParser} calling the method
   * @param field the schema field
   * @param part1 the lower boundary of the range, nulls are allowed.
   * @param part2 the upper boundary of the range, nulls are allowed
   * @param minInclusive whether the minimum of the range is inclusive or not
   * @param maxInclusive whether the maximum of the range is inclusive or not
   * @return a Query instance to perform range search according to given parameters
   */
  protected Query getSpecializedRangeQuery(
      QParser parser,
      SchemaField field,
      String part1,
      String part2,
      boolean minInclusive,
      boolean maxInclusive) {
    // TODO: change these all to use readableToIndexed/bytes instead (e.g. for unicode collation)
    final BytesRef miValue = part1 == null ? null : new BytesRef(toInternal(part1));
    final BytesRef maxValue = part2 == null ? null : new BytesRef(toInternal(part2));

    if (field.hasDocValues() && !field.indexed()) {
      return SortedSetDocValuesField.newSlowRangeQuery(
          field.getName(), miValue, maxValue, minInclusive, maxInclusive);
    } else {
      SolrRangeQuery rangeQuery =
          new SolrRangeQuery(field.getName(), miValue, maxValue, minInclusive, maxInclusive);
      return rangeQuery;
    }
  }

  /**
   * Returns a Query instance for doing existence searches for a field. If the field does not have
   * docValues or norms, this method will call {@link #getSpecializedExistenceQuery}, which defaults
   * to an unbounded rangeQuery.
   *
   * 
This method should only be overridden whenever a fieldType does not support {@link
   * org.apache.lucene.search.DocValuesFieldExistsQuery} or {@link
   * org.apache.lucene.search.NormsFieldExistsQuery}. If a fieldType does not support an unbounded
   * rangeQuery as an existenceQuery (such as double or float fields),
   * {@link #getSpecializedExistenceQuery} should be overridden.
   *
   * @param parser The {@link org.apache.solr.search.QParser} calling the method
   * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
   * @return The {@link org.apache.lucene.search.Query} instance.
   */
  public Query getExistenceQuery(QParser parser, SchemaField field) {
    if (field.hasDocValues()) {
      return new DocValuesFieldExistsQuery(field.getName());
    } else if (!field.omitNorms()
        && !isPointField()) { // TODO: Remove !isPointField() for SOLR-14199
      return new NormsFieldExistsQuery(field.getName());
    } else {
      // Default to an unbounded range query
      return getSpecializedExistenceQuery(parser, field);
    }
  }

  /**
   * Returns a Query instance for doing existence searches for a field without certain options, such
   * as docValues or norms.
   *
   * 
This method can be overridden to implement specialized existence logic for fieldTypes. The
   * default query returned is an unbounded range query.
   *
   * @param parser The {@link org.apache.solr.search.QParser} calling the method
   * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
   * @return The {@link org.apache.lucene.search.Query} instance.
   */
  protected Query getSpecializedExistenceQuery(QParser parser, SchemaField field) {
    return getSpecializedRangeQuery(parser, field, null, null, true, true);
  }

  /**
   * Returns a Query instance for doing searches against a field.
   *
   * @param parser The {@link org.apache.solr.search.QParser} calling the method
   * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
   * @param externalVal The String representation of the value to search
   * @return The {@link org.apache.lucene.search.Query} instance. This implementation returns a
   *     {@link org.apache.lucene.search.TermQuery} but overriding queries may not
   */
  public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) {
    if (field.hasDocValues() && !field.indexed()) {
      // match-only
      return getRangeQuery(parser, field, externalVal, externalVal, true, true);
    } else {
      BytesRefBuilder br = new BytesRefBuilder();
      readableToIndexed(externalVal, br);
      return new TermQuery(new Term(field.getName(), br));
    }
  }

  /**
   * Returns a Query instance for doing a single term search against a field. This term will not be
   * analyzed before searching.
   *
   * @param parser The {@link org.apache.solr.search.QParser} calling the method
   * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
   * @param externalVal The String representation of the term value to search
   * @return The {@link org.apache.lucene.search.Query} instance.
   */
  public Query getFieldTermQuery(QParser parser, SchemaField field, String externalVal) {
    return getFieldQuery(parser, field, externalVal);
  }

  /**
   * @lucene.experimental
   */
  public Query getSetQuery(QParser parser, SchemaField field, Collection externalVals) {
    if (!field.indexed()) {
      // TODO: if the field isn't indexed, this feels like the wrong query type to use?
      BooleanQuery.Builder builder = new BooleanQuery.Builder();
      for (String externalVal : externalVals) {
        Query subq = getFieldQuery(parser, field, externalVal);
        builder.add(subq, BooleanClause.Occur.SHOULD);
      }
      return QueryUtils.build(builder, parser);
    }

    List lst = new ArrayList<>(externalVals.size());
    BytesRefBuilder br = new BytesRefBuilder();
    for (String externalVal : externalVals) {
      readableToIndexed(externalVal, br);
      lst.add(br.toBytesRef());
    }
    return new TermInSetQuery(field.getName(), lst);
  }

  /**
   * Expert: Returns the rewrite method for multiterm queries such as wildcards.
   *
   * @param parser The {@link org.apache.solr.search.QParser} calling the method
   * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
   * @return A suitable rewrite method for rewriting multi-term queries to primitive queries.
   */
  public MultiTermQuery.RewriteMethod getRewriteMethod(QParser parser, SchemaField field) {
    if (!field.indexed() && field.hasDocValues()) {
      return new DocValuesRewriteMethod();
    } else {
      return MultiTermQuery.CONSTANT_SCORE_REWRITE;
    }
  }

  /**
   * Check's {@link org.apache.solr.schema.SchemaField} instances constructed using this field type
   * to ensure that they are valid.
   *
   * 
This method is called by the SchemaField constructor to check that its
   * initialization does not violate any fundamental requirements of the FieldType.
   * Subclasses may choose to throw a {@link SolrException} if invariants are violated by the 
   * SchemaField.
   */
  public void checkSchemaField(final SchemaField field) {
    if (field.hasDocValues()) {
      checkSupportsDocValues();
    }
    if (field.isLarge() && field.multiValued()) {
      throw new SolrException(
          ErrorCode.SERVER_ERROR, "Field type " + this + " is 'large'; can't support multiValued");
    }
    if (field.isLarge() && getNumberType() != null) {
      throw new SolrException(
          ErrorCode.SERVER_ERROR, "Field type " + this + " is 'large'; can't support numerics");
    }
  }

  /**
   * Called by {@link #checkSchemaField(SchemaField)} if the field has docValues. By default none
   * do.
   */
  protected void checkSupportsDocValues() {
    throw new SolrException(
        ErrorCode.SERVER_ERROR, "Field type " + this + " does not support doc values");
  }

  public static final String TYPE = "type";
  public static final String TYPE_NAME = "name";
  public static final String CLASS_NAME = "class";
  public static final String ANALYZER = "analyzer";
  public static final String INDEX = "index";
  public static final String INDEX_ANALYZER = "indexAnalyzer";
  public static final String QUERY = "query";
  public static final String QUERY_ANALYZER = "queryAnalyzer";
  public static final String MULTI_TERM = "multiterm";
  public static final String MULTI_TERM_ANALYZER = "multiTermAnalyzer";
  public static final String SIMILARITY = "similarity";
  public static final String CHAR_FILTER = "charFilter";
  public static final String CHAR_FILTERS = "charFilters";
  public static final String TOKENIZER = "tokenizer";
  public static final String FILTER = "filter";
  public static final String FILTERS = "filters";

  protected static final String AUTO_GENERATE_PHRASE_QUERIES = "autoGeneratePhraseQueries";
  protected static final String ENABLE_GRAPH_QUERIES = "enableGraphQueries";
  private static final String ARGS = "args";
  private static final String POSITION_INCREMENT_GAP = "positionIncrementGap";
  protected static final String SYNONYM_QUERY_STYLE = "synonymQueryStyle";

  /**
   * Get a map of property name -> value for this field type.
   *
   * @param showDefaults if true, include default properties.
   */
  public SimpleOrderedMap