org.elasticsearch.common.unit.Fuzziness Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.unit;
import com.google.common.base.Preconditions;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
import org.elasticsearch.common.xcontent.XContentParser;
import java.io.IOException;
/**
* A unit class that encapsulates all in-exact search
* parsing and conversion from similarities to edit distances
* etc.
*/
public final class Fuzziness implements ToXContent {
public static final XContentBuilderString X_FIELD_NAME = new XContentBuilderString("fuzziness");
public static final Fuzziness ZERO = new Fuzziness(0);
public static final Fuzziness ONE = new Fuzziness(1);
public static final Fuzziness TWO = new Fuzziness(2);
public static final Fuzziness AUTO = new Fuzziness("AUTO");
public static final ParseField FIELD = new ParseField(X_FIELD_NAME.camelCase().getValue());
private final String fuzziness;
private Fuzziness(int fuzziness) {
Preconditions.checkArgument(fuzziness >= 0 && fuzziness <= 2, "Valid edit distances are [0, 1, 2] but was [" + fuzziness + "]");
this.fuzziness = Integer.toString(fuzziness);
}
private Fuzziness(String fuzziness) {
this.fuzziness = fuzziness;
}
/**
* Creates a {@link Fuzziness} instance from an edit distance. The value must be one of [0, 1, 2]
*/
public static Fuzziness fromEdits(int edits) {
return new Fuzziness(edits);
}
public static Fuzziness build(Object fuzziness) {
if (fuzziness instanceof Fuzziness) {
return (Fuzziness) fuzziness;
}
String string = fuzziness.toString();
if (AUTO.asString().equalsIgnoreCase(string)) {
return AUTO;
}
return new Fuzziness(string);
}
public static Fuzziness parse(XContentParser parser) throws IOException {
XContentParser.Token token = parser.currentToken();
switch (token) {
case VALUE_STRING:
case VALUE_NUMBER:
final String fuzziness = parser.text();
if (AUTO.asString().equalsIgnoreCase(fuzziness)) {
return AUTO;
}
try {
final int minimumSimilarity = Integer.parseInt(fuzziness);
switch (minimumSimilarity) {
case 0:
return ZERO;
case 1:
return ONE;
case 2:
return TWO;
default:
return build(fuzziness);
}
} catch (NumberFormatException ex) {
return build(fuzziness);
}
default:
throw new IllegalArgumentException("Can't parse fuzziness on token: [" + token + "]");
}
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
return toXContent(builder, params, true);
}
public XContentBuilder toXContent(XContentBuilder builder, Params params, boolean includeFieldName) throws IOException {
if (includeFieldName) {
builder.field(X_FIELD_NAME, fuzziness);
} else {
builder.value(fuzziness);
}
return builder;
}
public int asDistance() {
return asDistance(null);
}
public int asDistance(String text) {
if (this == AUTO) { //AUTO
final int len = termLen(text);
if (len <= 2) {
return 0;
} else if (len > 5) {
return 2;
} else {
return 1;
}
}
return Math.min(2, asInt());
}
public TimeValue asTimeValue() {
if (this == AUTO) {
return TimeValue.timeValueMillis(1);
} else {
return TimeValue.parseTimeValue(fuzziness.toString(), null, "fuzziness");
}
}
public long asLong() {
if (this == AUTO) {
return 1;
}
try {
return Long.parseLong(fuzziness.toString());
} catch (NumberFormatException ex) {
return (long) Double.parseDouble(fuzziness.toString());
}
}
public int asInt() {
if (this == AUTO) {
return 1;
}
try {
return Integer.parseInt(fuzziness.toString());
} catch (NumberFormatException ex) {
return (int) Float.parseFloat(fuzziness.toString());
}
}
public short asShort() {
if (this == AUTO) {
return 1;
}
try {
return Short.parseShort(fuzziness.toString());
} catch (NumberFormatException ex) {
return (short) Float.parseFloat(fuzziness.toString());
}
}
public byte asByte() {
if (this == AUTO) {
return 1;
}
try {
return Byte.parseByte(fuzziness.toString());
} catch (NumberFormatException ex) {
return (byte) Float.parseFloat(fuzziness.toString());
}
}
public double asDouble() {
if (this == AUTO) {
return 1d;
}
return Double.parseDouble(fuzziness.toString());
}
public float asFloat() {
if (this == AUTO) {
return 1f;
}
return Float.parseFloat(fuzziness.toString());
}
private int termLen(String text) {
return text == null ? 5 : text.codePointCount(0, text.length()); // 5 avg term length in english
}
public String asString() {
return fuzziness.toString();
}
}