org.apache.solr.spelling.Token Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.spelling;

import java.util.Objects;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;

/**
 * A Token is an occurrence of a term from the text of a field. It consists of a term's text, the
 * start and end offset of the term in the text of the field, and a type string.
 *
 * The start and end offsets permit applications to re-associate a token with its source text,
 * e.g., to display highlighted query terms in a document browser, or to show matching text
 * fragments in a KWIC display, etc.
 *
 * 
The type is a string, assigned by a lexical analyzer (a.k.a. tokenizer), naming the lexical or
 * syntactic class that the token belongs to. For example an end of sentence marker token might be
 * implemented with type "eos". The default token type is "word".
 *
 * 
A Token can optionally have metadata (a.k.a. payload) in the form of a variable length byte
 * array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the payloads
 * from the index.
 *
 * 
A few things to note:
 *
 * 

 *   clear() initializes all of the fields to default values. This was changed in contrast to
 *       Lucene 2.4, but should affect no one.
 *   
Because TokenStreams can be chained, one cannot assume that the Token's
 *        current type is correct.
 *   
The startOffset and endOffset represent the start and offset in the source text, so be
 *       careful in adjusting them.
 *   
When caching a reusable token, clone it. When injecting a cached token into a stream that
 *       can be reset, clone it again.
 * 
 */
@Deprecated
public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {

  // TODO Refactor the spellchecker API to use TokenStreams properly, rather than this hack

  private int flags;
  private BytesRef payload;

  /** Constructs a Token will null text. */
  public Token() {}

  /**
   * Constructs a Token with the given term text, start and end offsets. The type defaults to
   * "word." NOTE: for better indexing speed you should instead use the char[] termBuffer
   * methods to set the term text.
   *
   * @param text term text
   * @param start start offset in the source text
   * @param end end offset in the source text
   */
  public Token(CharSequence text, int start, int end) {
    append(text);
    setOffset(start, end);
  }

  /**
   * {@inheritDoc}
   *
   * @see FlagsAttribute
   */
  @Override
  public int getFlags() {
    return flags;
  }

  /**
   * {@inheritDoc}
   *
   * @see FlagsAttribute
   */
  @Override
  public void setFlags(int flags) {
    this.flags = flags;
  }

  /**
   * {@inheritDoc}
   *
   * @see PayloadAttribute
   */
  @Override
  public BytesRef getPayload() {
    return this.payload;
  }

  /**
   * {@inheritDoc}
   *
   * @see PayloadAttribute
   */
  @Override
  public void setPayload(BytesRef payload) {
    this.payload = payload;
  }

  /**
   * Resets the term text, payload, flags, positionIncrement, positionLength, startOffset, endOffset
   * and token type to default.
   */
  @Override
  public void clear() {
    super.clear();
    flags = 0;
    payload = null;
  }

  @Override
  public boolean equals(Object obj) {
    if (obj == this) return true;
    if (!(obj instanceof Token)) return false;

    final Token other = (Token) obj;
    return (flags == other.flags && (Objects.equals(payload, other.payload)) && super.equals(obj));
  }

  @Override
  public int hashCode() {
    int code = super.hashCode();
    code = code * 31 + flags;
    if (payload != null) {
      code = code * 31 + payload.hashCode();
    }
    return code;
  }

  @Override
  public Token clone() {
    final Token t = (Token) super.clone();
    if (payload != null) {
      t.payload = BytesRef.deepCopyOf(payload);
    }
    return t;
  }

  @Override
  public void copyTo(AttributeImpl target) {
    super.copyTo(target);
    ((FlagsAttribute) target).setFlags(flags);
    ((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
  }

  @Override
  public void reflectWith(AttributeReflector reflector) {
    super.reflectWith(reflector);
    reflector.reflect(FlagsAttribute.class, "flags", flags);
    reflector.reflect(PayloadAttribute.class, "payload", payload);
  }
}