org.elasticsearch.common.unit.Fuzziness Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.unit;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import java.io.IOException;
import java.util.Locale;
import java.util.Objects;
/**
* A unit class that encapsulates all in-exact search
* parsing and conversion from similarities to edit distances
* etc.
*/
public final class Fuzziness implements ToXContent, Writeable {
public static final String X_FIELD_NAME = "fuzziness";
public static final Fuzziness ZERO = new Fuzziness(0);
public static final Fuzziness ONE = new Fuzziness(1);
public static final Fuzziness TWO = new Fuzziness(2);
public static final Fuzziness AUTO = new Fuzziness("AUTO");
public static final ParseField FIELD = new ParseField(X_FIELD_NAME);
private final String fuzziness;
private Fuzziness(int fuzziness) {
if (fuzziness != 0 && fuzziness != 1 && fuzziness != 2) {
throw new IllegalArgumentException("Valid edit distances are [0, 1, 2] but was [" + fuzziness + "]");
}
this.fuzziness = Integer.toString(fuzziness);
}
private Fuzziness(String fuzziness) {
if (fuzziness == null) {
throw new IllegalArgumentException("fuzziness can't be null!");
}
this.fuzziness = fuzziness.toUpperCase(Locale.ROOT);
}
/**
* Read from a stream.
*/
public Fuzziness(StreamInput in) throws IOException {
fuzziness = in.readString();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(fuzziness);
}
/**
* Creates a {@link Fuzziness} instance from an edit distance. The value must be one of [0, 1, 2]
*
* Note: Using this method only makes sense if the field you are applying Fuzziness to is some sort of string.
*/
public static Fuzziness fromEdits(int edits) {
return new Fuzziness(edits);
}
public static Fuzziness build(Object fuzziness) {
if (fuzziness instanceof Fuzziness) {
return (Fuzziness) fuzziness;
}
String string = fuzziness.toString();
if (AUTO.asString().equalsIgnoreCase(string)) {
return AUTO;
}
return new Fuzziness(string);
}
public static Fuzziness parse(XContentParser parser) throws IOException {
XContentParser.Token token = parser.currentToken();
switch (token) {
case VALUE_STRING:
case VALUE_NUMBER:
final String fuzziness = parser.text();
if (AUTO.asString().equalsIgnoreCase(fuzziness)) {
return AUTO;
}
try {
final int minimumSimilarity = Integer.parseInt(fuzziness);
switch (minimumSimilarity) {
case 0:
return ZERO;
case 1:
return ONE;
case 2:
return TWO;
default:
return build(fuzziness);
}
} catch (NumberFormatException ex) {
return build(fuzziness);
}
default:
throw new IllegalArgumentException("Can't parse fuzziness on token: [" + token + "]");
}
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
return toXContent(builder, params, true);
}
public XContentBuilder toXContent(XContentBuilder builder, Params params, boolean includeFieldName) throws IOException {
if (includeFieldName) {
builder.field(X_FIELD_NAME, fuzziness);
} else {
builder.value(fuzziness);
}
return builder;
}
public int asDistance() {
return asDistance(null);
}
public int asDistance(String text) {
if (this.equals(AUTO)) { //AUTO
final int len = termLen(text);
if (len <= 2) {
return 0;
} else if (len > 5) {
return 2;
} else {
return 1;
}
}
return Math.min(2, (int) asFloat());
}
public float asFloat() {
if (this.equals(AUTO)) {
return 1f;
}
return Float.parseFloat(fuzziness.toString());
}
private int termLen(String text) {
return text == null ? 5 : text.codePointCount(0, text.length()); // 5 avg term length in english
}
public String asString() {
return fuzziness.toString();
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
Fuzziness other = (Fuzziness) obj;
return Objects.equals(fuzziness, other.fuzziness);
}
@Override
public int hashCode() {
return fuzziness.hashCode();
}
}