All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.appengine.api.search.Field Maven / Gradle / Ivy

/*
 * Copyright 2021 Google LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.appengine.api.search;

import com.google.appengine.api.internal.ImmutableCopy;
import com.google.appengine.api.search.checkers.FieldChecker;
import com.google.apphosting.api.search.DocumentPb;
import com.google.apphosting.api.search.DocumentPb.FieldValue;
import com.google.apphosting.api.search.DocumentPb.FieldValue.ContentType;
import com.google.common.base.Preconditions;
import java.io.Serializable;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import org.checkerframework.checker.nullness.qual.Nullable;

/**
 * Represents a field of a {@link Document}, which is a name, an optional locale, and at most one
 * value: text, HTML, atom, date, GeoPoint, untokenizedPrefix, tokenizedPrefix or vector. Field name
 * lengths are between 1 and
 * {@link com.google.appengine.api.search.checkers.SearchApiLimits#MAXIMUM_NAME_LENGTH} characters,
 * and text and HTML values are limited to
 * {@link com.google.appengine.api.search.checkers.SearchApiLimits#MAXIMUM_TEXT_LENGTH}. Atoms are
 * limited to {@link com.google.appengine.api.search.checkers.SearchApiLimits#MAXIMUM_ATOM_LENGTH}
 * characters, both prefix types are limited to
 * {@link com.google.appengine.api.search.checkers.SearchApiLimits#MAXIMUM_PREFIX_LENGTH}
 * . Vector field size is limited to
 * {@link com.google.appengine.api.search.checkers.SearchApiLimits#VECTOR_FIELD_MAX_SIZE}
 * and dates must not have a time component.
 *
 * 

* There are 5 types of text fields, ATOM, TEXT, HTML, UNTOKENIZED_PREFIX, and TOKENIZED_PREFIX. * Atom fields when queried, are checked for equality. For example, if you add a field with name * {@code code} and an ATOM value of "928A 33B-1", then query {@code code:"928A 33B-1"} would match * the document with this field, while query {@code code:928A} would not. TEXT fields, unlike ATOM, * match both on equality or if any token extracted from the original field matches. Thus if * {@code code} field had the value set with {@link Field.Builder#setText(String)} method, both * queries would match. HTML fields have HTML tags stripped before tokenization. Untokenized prefix * fields match queries that are prefixes containing the contiguous starting characters of the whole * field. For example if the field was "the quick brown fox", the query "the qui" would match * whereas "th qui" would not. On the other hand, Tokenized prefix fields match if the query terms * are prefixes of individual terms in the field. If the query is a phrase of terms, the ordering of * the terms will matter. For example if the field is "the quick brown fox", the query "th qui bro" * would match whereas "bro qui the" would not. Vector fields are only used to compute the dot * product between a given constant vector and the provided vector field for sorting and field * expressions only. for example, if a 3d vector is named "scores" and has a value of (1,2,3) then * the expression {@code dot(scores, vector(3,2,1))} will be evaluated to 10. */ public final class Field implements Serializable { /** * A field builder. Fields must have a name, and optionally a locale * and at most one of text, html, atom or date. */ public static final class Builder { // Mandatory private String name; // Optional @Nullable private Locale locale; // At most one of the following values specified. @Nullable private FieldType type; @Nullable private String text; @Nullable private String html; @Nullable private String atom; @Nullable private Date date; @Nullable private Double number; @Nullable private GeoPoint geoPoint; @Nullable private String untokenizedPrefix; @Nullable private String tokenizedPrefix; private List vector = Collections.emptyList(); /** * Constructs a field builder. */ private Builder() { } /** * Sets a name for the field. The field name length must be * between 1 and {@literal FieldChecker#MAXIMUM_NAME_LENGTH} and it should match * {@link com.google.appengine.api.search.checkers.SearchApiLimits#FIELD_NAME_PATTERN}. * * @param name the name of the field * @return this builder * @throws IllegalArgumentException if the name or value is invalid */ public Builder setName(String name) { this.name = FieldChecker.checkFieldName(name); return this; } /** * Sets a text value for the field. * * @param text the text value of the field * @return this builder * @throws IllegalArgumentException if the text is invalid */ public Builder setText(String text) { Preconditions.checkArgument(type == null, "Field value must not be already set"); this.type = FieldType.TEXT; this.text = FieldChecker.checkText(text); return this; } /** * Sets a HTML value for the field. * * @param html the HTML value of the field * @return this builder * @throws IllegalArgumentException if the HTML is invalid */ public Builder setHTML(String html) { Preconditions.checkArgument(type == null, "Field value must not be already set"); this.type = FieldType.HTML; this.html = FieldChecker.checkHTML(html); return this; } /** * Sets an atomic value, indivisible text, for the field. * * @param atom the indivisible text of the field * @return this builder * @throws IllegalArgumentException if the atom is invalid */ public Builder setAtom(String atom) { Preconditions.checkArgument(type == null, "Field value must not be already set"); this.type = FieldType.ATOM; this.atom = FieldChecker.checkAtom(atom); return this; } /** * Sets a date associated with the field. * * @param date the date of the field * @return this builder * @throws IllegalArgumentException if the date is out of range */ public Builder setDate(Date date) { Preconditions.checkArgument(type == null, "Field value must not be already set"); Preconditions.checkArgument(date != null, "Cannot set date field to null."); this.type = FieldType.DATE; this.date = FieldChecker.checkDate(date); return this; } /** * Sets a numeric value for the field. The {@code number} must be between * {@link com.google.appengine.api.search.checkers.SearchApiLimits#MINIMUM_NUMBER_VALUE} and * {@link com.google.appengine.api.search.checkers.SearchApiLimits#MAXIMUM_NUMBER_VALUE}. * * @param number the numeric value of the field * @return this builder * @throws IllegalArgumentException if the number is outside the valid range */ public Builder setNumber(double number) { Preconditions.checkArgument(type == null, "Field value must not be already set"); this.type = FieldType.NUMBER; this.number = FieldChecker.checkNumber(Double.valueOf(number)); return this; } /** * Sets a {@link GeoPoint} value for the field. * * @param geoPoint the {@link GeoPoint} value of the field * @return this builder */ public Builder setGeoPoint(GeoPoint geoPoint) { Preconditions.checkArgument(type == null, "Field value must not be already set"); Preconditions.checkArgument(geoPoint != null, "Cannot set geo field to null."); this.type = FieldType.GEO_POINT; this.geoPoint = geoPoint; return this; } /** * Sets an untokenized prefix value for the field. * * @param untokenizedPrefix the string value of the field * @return this builder * @throws IllegalArgumentException if the untokenized prefix field is invalid */ public Builder setUntokenizedPrefix(String untokenizedPrefix) { Preconditions.checkArgument(type == null, "Field value must not be already set"); this.type = FieldType.UNTOKENIZED_PREFIX; this.untokenizedPrefix = FieldChecker.checkPrefix(untokenizedPrefix); return this; } /** * Sets a tokenized prefix value for the field. * * @param tokenizedPrefix the string value of the field * @return this builder * @throws IllegalArgumentException if the tokenized prefix field is invalid */ public Builder setTokenizedPrefix(String tokenizedPrefix) { Preconditions.checkArgument(type == null, "Field value must not be already set"); this.type = FieldType.TOKENIZED_PREFIX; this.tokenizedPrefix = FieldChecker.checkPrefix(tokenizedPrefix); return this; } /** * Sets a vector value for the field. * * @param vector a list of Double values forming a vector field value * @return this builder * @throws IllegalArgumentException if the vector field is invalid */ public Builder setVector(List vector) { Preconditions.checkArgument(type == null, "Field value must not be already set"); Preconditions.checkArgument(vector != null, "Cannot set vector field to null"); List v = ImmutableCopy.list(vector); FieldChecker.checkVector(v); this.type = FieldType.VECTOR; this.vector = v; return this; } /** * Sets the Locale of the field value. If none is given, then the locale * of the document will be used. * * @param locale the locale the field value is written in * @return this builder */ public Builder setLocale(Locale locale) { this.locale = locale; return this; } /** * Builds a field using this builder. The field must have a * valid name, string value, type. * * @return a {@link Field} built by this builder * @throws IllegalArgumentException if the field has an invalid * name, text, HTML, atom, date */ public Field build() { return new Field(this); } } /** * The type of the field value. */ public enum FieldType { /** * Text content. */ TEXT, /** * HTML content. */ HTML, /** * An indivisible text content. */ ATOM, /** * A Date with no time component. */ DATE, // TODO: use self descriptive DOUBLE as a name. /** Double precision floating-point number. */ NUMBER, /** * Geographical coordinates of a point, in WGS84. */ GEO_POINT, /** * Untokenized prefix field content. */ UNTOKENIZED_PREFIX, /** * Tokenized prefix field content. */ TOKENIZED_PREFIX, /** * Vector field content. */ VECTOR, } private static final long serialVersionUID = 6829483617830682721L; // Mandatory private final String name; // Optional @Nullable private final Locale locale; @Nullable private final FieldType type; @Nullable private String text; @Nullable private String html; @Nullable private String atom; @Nullable private Date date; @Nullable private Double number; @Nullable private GeoPoint geoPoint; @Nullable private String untokenizedPrefix; @Nullable private String tokenizedPrefix; private List vector = Collections.emptyList(); /** * Constructs a field using the builder. * * @param builder a builder used to construct the Field */ private Field(Builder builder) { name = builder.name; type = builder.type; if (builder.type != null) { switch (builder.type) { case TEXT: text = builder.text; break; case HTML: html = builder.html; break; case ATOM: atom = builder.atom; break; case DATE: date = builder.date; break; case NUMBER: number = builder.number; break; case GEO_POINT: geoPoint = builder.geoPoint; break; case UNTOKENIZED_PREFIX: untokenizedPrefix = builder.untokenizedPrefix; break; case TOKENIZED_PREFIX: tokenizedPrefix = builder.tokenizedPrefix; break; case VECTOR: vector = builder.vector; break; default: throw new IllegalArgumentException(String.format("Unknown field type given %s", builder.type)); } } locale = builder.locale; checkValid(); } /** * @return the name of the field */ public String getName() { return name; } /** * @return the type of value of the field. Can be null */ public FieldType getType() { return type; } /** * @return the text value of the field. Can be null */ public String getText() { return text; } /** * @return the HTML value of the field. Can be null */ public String getHTML() { return html; } /** * @return the atomic value of the field. Can be null */ public String getAtom() { return atom; } /** * @return the date value of the field. Can be null */ public Date getDate() { return date; } /** * @return the numeric value of the field. Can be null */ public Double getNumber() { return number; } /** * @return the {@link GeoPoint} value of the field. Can be null */ public GeoPoint getGeoPoint() { return geoPoint; } /** * @return the String value of the untokenized prefix field. Can be null */ public String getUntokenizedPrefix() { return untokenizedPrefix; } /** * @return the String value of the tokenized prefix field. Can be null */ public String getTokenizedPrefix() { return tokenizedPrefix; } /** * @return the vector value of the field. */ public List getVector() { return vector; } /** * @return the locale the field value is written in. Can be null. If none * is given the locale of the document will be used */ public Locale getLocale() { return locale; } @Override public int hashCode() { return name.hashCode(); } @Override public boolean equals(Object object) { if (object == this) { return true; } if (!(object instanceof Field)) { return false; } Field field = (Field) object; return Util.equalObjects(name, field.name); } /** * Checks whether the field is valid, specifically, * whether the field name, value are valid. * Also that at most one value: text, HTML, atom, date, untokenizedPrefix, tokenizedPrefix, or * vector is set. * * @return this Field * @throws IllegalArgumentException if field name, text, HTML, atom, * date, untokenizedPrefix, tokenizedPrefix, or vector are invalid */ private Field checkValid() { FieldChecker.checkFieldName(name); if (type != null) { switch (type) { case TEXT: FieldChecker.checkText(text); break; case HTML: FieldChecker.checkHTML(html); break; case ATOM: FieldChecker.checkAtom(atom); break; case DATE: FieldChecker.checkDate(date); break; case NUMBER: case GEO_POINT: break; case UNTOKENIZED_PREFIX: FieldChecker.checkPrefix(untokenizedPrefix); break; case TOKENIZED_PREFIX: FieldChecker.checkPrefix(tokenizedPrefix); break; case VECTOR: FieldChecker.checkVector(vector); break; default: throw new IllegalArgumentException(String.format("unknown field type %s", type)); } } return this; } /** * Creates a field builder. * * @return a new builder for creating fields */ public static Builder newBuilder() { return new Builder(); } /** * Creates a builder of a field from the given field. * * @param field the field protocol buffer used to create the builder * @return a field builder created from the given field * @throws SearchException if the field contains invalid name, text, html, * atom, date */ static Builder newBuilder(DocumentPb.Field field) { FieldValue value = field.getValue(); Field.Builder fieldBuilder = Field.newBuilder().setName(field.getName()); if (value.hasLanguage()) { fieldBuilder.setLocale(FieldChecker.parseLocale(value.getLanguage())); } switch (value.getType()) { case TEXT: fieldBuilder.setText(value.getStringValue()); break; case HTML: fieldBuilder.setHTML(value.getStringValue()); break; case ATOM: fieldBuilder.setAtom(value.getStringValue()); break; case NUMBER: try { fieldBuilder.setNumber( NumberFormat.getNumberInstance().parse(value.getStringValue()).doubleValue()); } catch (ParseException e) { throw new SearchException("Failed to parse double: " + value.getStringValue()); } break; case GEO: fieldBuilder.setGeoPoint(GeoPoint.newGeoPoint(value.getGeo())); break; case DATE: String dateString = value.getStringValue(); if (dateString.isEmpty()) { throw new SearchException( String.format("date not specified for field %s", field.getName())); } fieldBuilder.setDate(DateUtil.deserializeDate(dateString)); break; case UNTOKENIZED_PREFIX: fieldBuilder.setUntokenizedPrefix(value.getStringValue()); break; case TOKENIZED_PREFIX: fieldBuilder.setTokenizedPrefix(value.getStringValue()); break; case VECTOR: fieldBuilder.setVector(value.getVectorValueList()); break; default: throw new SearchException( String.format("unknown field value type %s for field %s", value.getType(), field.getName())); } return fieldBuilder; } /** * Copies a {@link Field} object into a {@link com.google.apphosting.api.search.DocumentPb.Field} * protocol buffer. * * @return the field protocol buffer copy of this field object * @throws IllegalArgumentException if the field value type is unknown */ DocumentPb.Field copyToProtocolBuffer() { DocumentPb.FieldValue.Builder fieldValueBuilder = DocumentPb.FieldValue.newBuilder(); if (locale != null) { fieldValueBuilder.setLanguage(locale.toString()); } if (type != null) { switch (type) { case TEXT: if (text != null) { fieldValueBuilder.setStringValue(text); } fieldValueBuilder.setType(ContentType.TEXT); break; case HTML: if (html != null) { fieldValueBuilder.setStringValue(html); } fieldValueBuilder.setType(ContentType.HTML); break; case ATOM: if (atom != null) { fieldValueBuilder.setStringValue(atom); } fieldValueBuilder.setType(ContentType.ATOM); break; case DATE: fieldValueBuilder.setStringValue(DateUtil.serializeDate(date)); fieldValueBuilder.setType(ContentType.DATE); break; case NUMBER: // TODO: use binary number representation instead DecimalFormat format = new DecimalFormat(); format.setDecimalSeparatorAlwaysShown(false); format.setGroupingUsed(false); format.setMaximumFractionDigits(Integer.MAX_VALUE); fieldValueBuilder.setStringValue(format.format(number)); fieldValueBuilder.setType(ContentType.NUMBER); break; case GEO_POINT: fieldValueBuilder.setGeo(geoPoint.copyToProtocolBuffer()); fieldValueBuilder.setType(ContentType.GEO); break; case UNTOKENIZED_PREFIX: if (untokenizedPrefix != null) { fieldValueBuilder.setStringValue(untokenizedPrefix); } fieldValueBuilder.setType(ContentType.UNTOKENIZED_PREFIX); break; case TOKENIZED_PREFIX: if (tokenizedPrefix != null) { fieldValueBuilder.setStringValue(tokenizedPrefix); } fieldValueBuilder.setType(ContentType.TOKENIZED_PREFIX); break; case VECTOR: fieldValueBuilder.addAllVectorValue(vector); fieldValueBuilder.setType(ContentType.VECTOR); break; default: throw new IllegalArgumentException(String.format("unknown field type %s", type)); } } DocumentPb.Field.Builder builder = DocumentPb.Field.newBuilder() .setName(name) .setValue(fieldValueBuilder); return builder.build(); } @Override public String toString() { return new Util.ToStringHelper("Field") .addField("name", name) .addField("value", valueToString()) .addField("type", String.valueOf(type)) .addField("locale", locale) .finish(); } private String valueToString() throws IllegalArgumentException { if (type == null) { return "null"; } switch (type) { case TEXT: return text; case HTML: return html; case ATOM: return atom; case DATE: return DateUtil.formatDateTime(date); case GEO_POINT: return geoPoint.toString(); case NUMBER: DecimalFormat format = new DecimalFormat(); format.setDecimalSeparatorAlwaysShown(false); format.setMaximumFractionDigits(Integer.MAX_VALUE); return format.format(number); case UNTOKENIZED_PREFIX: return untokenizedPrefix; case TOKENIZED_PREFIX: return tokenizedPrefix; case VECTOR: return vector.toString(); default: throw new IllegalArgumentException(String.format("unknown field type %s", type)); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy