All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.Term Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;

/**
 * A Term represents a word from text. This is the unit of search. It is composed of two elements,
 * the text of the word, as a string, and the name of the field that the text occurred in.
 *
 * 

Note that terms may represent more than words from text fields, but also things like dates, * email addresses, urls, etc. */ public final class Term implements Comparable, Accountable { private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(Term.class) + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class); String field; BytesRef bytes; /** * Constructs a Term with the given field and bytes. * *

Note that a null field or null bytes value results in undefined behavior for most Lucene * APIs that accept a Term parameter. * *

The provided BytesRef is copied when it is non null. */ public Term(String fld, BytesRef bytes) { field = fld; this.bytes = bytes == null ? null : BytesRef.deepCopyOf(bytes); } /** * Constructs a Term with the given field and the bytes from a builder. * *

Note that a null field value results in undefined behavior for most Lucene APIs that accept * a Term parameter. */ public Term(String fld, BytesRefBuilder bytesBuilder) { field = fld; this.bytes = bytesBuilder.toBytesRef(); } /** * Constructs a Term with the given field and text. * *

Note that a null field or null text value results in undefined behavior for most Lucene APIs * that accept a Term parameter. */ public Term(String fld, String text) { this(fld, new BytesRef(text)); } /** * Constructs a Term with the given field and empty text. This serves two purposes: 1) reuse of a * Term with the same field. 2) pattern for a query. * * @param fld field's name */ public Term(String fld) { this(fld, new BytesRef()); } /** * Returns the field of this term. The field indicates the part of a document which this term came * from. */ public String field() { return field; } /** * Returns the text of this term. In the case of words, this is simply the text of the word. In * the case of dates and other types, this is an encoding of the object as a string. */ public String text() { return toString(bytes); } /** * Returns human-readable form of the term text. If the term is not unicode, the raw bytes will be * printed instead. */ public static String toString(BytesRef termText) { // the term might not be text, but usually is. so we make a best effort CharsetDecoder decoder = StandardCharsets.UTF_8 .newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); try { return decoder .decode(ByteBuffer.wrap(termText.bytes, termText.offset, termText.length)) .toString(); } catch ( @SuppressWarnings("unused") CharacterCodingException e) { return termText.toString(); } } /** Returns the bytes of this term, these should not be modified. */ public BytesRef bytes() { return bytes; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; Term other = (Term) obj; if (field == null) { if (other.field != null) return false; } else if (!field.equals(other.field)) return false; if (bytes == null) { if (other.bytes != null) return false; } else if (!bytes.equals(other.bytes)) return false; return true; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((field == null) ? 0 : field.hashCode()); result = prime * result + ((bytes == null) ? 0 : bytes.hashCode()); return result; } /** * Compares two terms, returning a negative integer if this term belongs before the argument, zero * if this term is equal to the argument, and a positive integer if this term belongs after the * argument. * *

The ordering of terms is first by field, then by text. */ @Override public int compareTo(Term other) { if (field.equals(other.field)) { return bytes.compareTo(other.bytes); } else { return field.compareTo(other.field); } } /** * Resets the field and text of a Term. * *

WARNING: the provided BytesRef is not copied, but used directly. Therefore the bytes should * not be modified after construction, for example, you should clone a copy rather than pass * reused bytes from a TermsEnum. */ final void set(String fld, BytesRef bytes) { field = fld; this.bytes = bytes; } @Override public String toString() { return field + ":" + text(); } @Override public long ramBytesUsed() { return BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(field) + (bytes != null ? RamUsageEstimator.alignObjectSize( bytes.bytes.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER) : 0L); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy