org.apache.lucene.index.Term Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.servicemix.bundles.lucene
This OSGi bundle wraps ${pkgArtifactId} ${pkgVersion} jar file.
There is a newer version: 6.4.2_1
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;

/**
 * A Term represents a word from text. This is the unit of search. It is composed of two elements,
 * the text of the word, as a string, and the name of the field that the text occurred in.
 *
 * Note that terms may represent more than words from text fields, but also things like dates,
 * email addresses, urls, etc.
 */
public final class Term implements Comparable, Accountable {
  private static final long BASE_RAM_BYTES =
      RamUsageEstimator.shallowSizeOfInstance(Term.class)
          + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);

  String field;
  BytesRef bytes;

  /**
   * Constructs a Term with the given field and bytes.
   *
   * 
Note that a null field or null bytes value results in undefined behavior for most Lucene
   * APIs that accept a Term parameter.
   *
   * 
The provided BytesRef is copied when it is non null.
   */
  public Term(String fld, BytesRef bytes) {
    field = fld;
    this.bytes = bytes == null ? null : BytesRef.deepCopyOf(bytes);
  }

  /**
   * Constructs a Term with the given field and the bytes from a builder.
   *
   * 
Note that a null field value results in undefined behavior for most Lucene APIs that accept
   * a Term parameter.
   */
  public Term(String fld, BytesRefBuilder bytesBuilder) {
    field = fld;
    this.bytes = bytesBuilder.toBytesRef();
  }

  /**
   * Constructs a Term with the given field and text.
   *
   * 
Note that a null field or null text value results in undefined behavior for most Lucene APIs
   * that accept a Term parameter.
   */
  public Term(String fld, String text) {
    this(fld, new BytesRef(text));
  }

  /**
   * Constructs a Term with the given field and empty text. This serves two purposes: 1) reuse of a
   * Term with the same field. 2) pattern for a query.
   *
   * @param fld field's name
   */
  public Term(String fld) {
    this(fld, new BytesRef());
  }

  /**
   * Returns the field of this term. The field indicates the part of a document which this term came
   * from.
   */
  public String field() {
    return field;
  }

  /**
   * Returns the text of this term. In the case of words, this is simply the text of the word. In
   * the case of dates and other types, this is an encoding of the object as a string.
   */
  public String text() {
    return toString(bytes);
  }

  /**
   * Returns human-readable form of the term text. If the term is not unicode, the raw bytes will be
   * printed instead.
   */
  public static String toString(BytesRef termText) {
    // the term might not be text, but usually is. so we make a best effort
    CharsetDecoder decoder =
        StandardCharsets.UTF_8
            .newDecoder()
            .onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);
    try {
      return decoder
          .decode(ByteBuffer.wrap(termText.bytes, termText.offset, termText.length))
          .toString();
    } catch (
        @SuppressWarnings("unused")
        CharacterCodingException e) {
      return termText.toString();
    }
  }

  /** Returns the bytes of this term, these should not be modified. */
  public BytesRef bytes() {
    return bytes;
  }

  @Override
  public boolean equals(Object obj) {
    if (this == obj) return true;
    if (obj == null) return false;
    if (getClass() != obj.getClass()) return false;
    Term other = (Term) obj;
    if (field == null) {
      if (other.field != null) return false;
    } else if (!field.equals(other.field)) return false;
    if (bytes == null) {
      if (other.bytes != null) return false;
    } else if (!bytes.equals(other.bytes)) return false;
    return true;
  }

  @Override
  public int hashCode() {
    final int prime = 31;
    int result = 1;
    result = prime * result + ((field == null) ? 0 : field.hashCode());
    result = prime * result + ((bytes == null) ? 0 : bytes.hashCode());
    return result;
  }

  /**
   * Compares two terms, returning a negative integer if this term belongs before the argument, zero
   * if this term is equal to the argument, and a positive integer if this term belongs after the
   * argument.
   *
   * 
The ordering of terms is first by field, then by text.
   */
  @Override
  public int compareTo(Term other) {
    if (field.equals(other.field)) {
      return bytes.compareTo(other.bytes);
    } else {
      return field.compareTo(other.field);
    }
  }

  /**
   * Resets the field and text of a Term.
   *
   * WARNING: the provided BytesRef is not copied, but used directly. Therefore the bytes should
   * not be modified after construction, for example, you should clone a copy rather than pass
   * reused bytes from a TermsEnum.
   */
  final void set(String fld, BytesRef bytes) {
    field = fld;
    this.bytes = bytes;
  }

  @Override
  public String toString() {
    return field + ":" + text();
  }

  @Override
  public long ramBytesUsed() {
    return BASE_RAM_BYTES
        + RamUsageEstimator.sizeOfObject(field)
        + (bytes != null
            ? RamUsageEstimator.alignObjectSize(
                bytes.bytes.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER)
            : 0L);
  }
}