com.basistech.rosette.dm.LanguageDetection Maven / Gradle / Ivy
/*
* Copyright 2014 Basis Technology Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.basistech.rosette.dm;
import com.basistech.util.ISO15924;
import com.basistech.util.LanguageCode;
import com.google.common.base.Objects;
import com.google.common.collect.Lists;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
/**
* The results of running language detection on a region of text.
* The results are composed as a list of {@link com.basistech.rosette.dm.LanguageDetection.DetectionResult},
* to reflect the multiple alternatives produced by language detectors.
*/
public class LanguageDetection extends Attribute implements Serializable {
private static final long serialVersionUID = 222L;
/**
* A single result from language detection.
* Typically, the language identifier produces multiple results with
* different confidence values.
*/
public static class DetectionResult extends BaseAttribute {
private final LanguageCode language;
private final String encoding;
private final ISO15924 script;
private final Double confidence;
protected DetectionResult(LanguageCode language,
String encoding,
ISO15924 script,
Double confidence,
Map extendedProperties) {
super(extendedProperties);
this.language = language;
this.encoding = encoding;
this.script = script;
this.confidence = confidence;
}
/**
* Returns the detected language.
*
* @return the detected language
*/
public LanguageCode getLanguage() {
return language;
}
/**
* Returns the detected encoding.
*
* @return the detected encoding, or null if none was detected
*/
public String getEncoding() {
return encoding;
}
/**
* Returns the script.
*
* @return the script, or null of none was detected
*/
public ISO15924 getScript() {
return script;
}
/**
* Returns the confidence of this detection.
*
* @return the confidence of this detection alternative, or
* null if not available.
*/
public Double getConfidence() {
return confidence;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
DetectionResult that = (DetectionResult) o;
if (confidence != null ? !confidence.equals(that.confidence) : that.confidence != null) {
return false;
}
if (encoding != null ? !encoding.equals(that.encoding) : that.encoding != null) {
return false;
}
if (language != that.language) {
return false;
}
return script == that.script;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (language != null ? language.hashCode() : 0);
result = 31 * result + (encoding != null ? encoding.hashCode() : 0);
result = 31 * result + (script != null ? script.hashCode() : 0);
result = 31 * result + (confidence != null ? confidence.hashCode() : 0);
return result;
}
@Override
protected Objects.ToStringHelper toStringHelper() {
return super.toStringHelper()
.add("language", language)
.add("encoding", encoding)
.add("script", script)
.add("confidence", confidence);
}
/**
* Builder for detection results.
*/
public static class Builder extends BaseAttribute.Builder {
private LanguageCode language;
private String encoding;
private ISO15924 script;
private Double confidence;
/**
* Constructs a builder with default values.
*
* @param language the detected language
*/
public Builder(LanguageCode language) {
this.language = language;
}
/**
* Constructs a builder initialized from an existing detection result.
*
* @param toCopy the item to copy
* @adm.ignore
*/
public Builder(DetectionResult toCopy) {
super(toCopy);
language = toCopy.getLanguage();
encoding = toCopy.getEncoding();
script = toCopy.getScript();
confidence = toCopy.getConfidence();
}
/**
* Specifies the language.
*
* @param language the language
* @return this
*/
public Builder language(LanguageCode language) {
this.language = language;
return this;
}
/**
* Specifies the encoding.
*
* @param encoding the encoding
* @return this
*/
public Builder encoding(String encoding) {
this.encoding = encoding;
return this;
}
/**
* Specifies the script.
*
* @param script the script
* @return this
*/
public Builder script(ISO15924 script) {
this.script = script;
return this;
}
/**
* Specifies the confidence.
*
* @param confidence the confidence; null if no confidence is available.
* @return this.
*/
public Builder confidence(Double confidence) {
this.confidence = confidence;
return this;
}
/**
* Build an immutable detection result from the current state of the builder.
*
* @return the detection result
*/
public DetectionResult build() {
return new DetectionResult(language, encoding, script, confidence, buildExtendedProperties());
}
@Override
protected Builder getThis() {
return this;
}
}
}
private final List detectionResults;
protected LanguageDetection(int startOffset, int endOffset, List detectionResults, Map extendedProperties) {
super(startOffset, endOffset, extendedProperties);
this.detectionResults = listOrNull(detectionResults);
}
/**
* Returns the detection results, in order from best to worst confidence.
*
* @return the detection results, in order from best to worst confidence
*/
public List getDetectionResults() {
return detectionResults;
}
@Override
protected Objects.ToStringHelper toStringHelper() {
return super.toStringHelper()
.add("detectionResults", detectionResults);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
LanguageDetection that = (LanguageDetection) o;
return detectionResults.equals(that.detectionResults);
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + detectionResults.hashCode();
return result;
}
/**
* A builder for language detection results.
*/
public static class Builder extends Attribute.Builder {
private List detectionResults;
/**
* Constructs a builder from the required properties.
*
* @param startOffset the start offset of the region in characters
* @param endOffset the end offset of the region in characters
* @param detectionResults the list of detection results
*/
public Builder(int startOffset, int endOffset, List detectionResults) {
super(startOffset, endOffset);
this.detectionResults = detectionResults;
}
/**
* Constructs a builder by copying the values from an existing language detection.
*
* @param toCopy the object to copy
*/
public Builder(LanguageDetection toCopy) {
super(toCopy);
this.detectionResults = Lists.newArrayList();
addAllToList(this.detectionResults, toCopy.detectionResults);
}
/**
* Constructs an immutable language detection result from the current state of the builder.
*
* @return the new language detection
*/
public LanguageDetection build() {
// we do not null this list when empty. Should we?
return new LanguageDetection(startOffset, endOffset, detectionResults, buildExtendedProperties());
}
@Override
protected Builder getThis() {
return this;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy