
org.languagetool.rules.uk.InflectionHelper Maven / Gradle / Ivy
package org.languagetool.rules.uk;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.languagetool.AnalyzedToken;
/**
* @since 3.6
*/
public class InflectionHelper {
private InflectionHelper() {
}
public static class Inflection implements Comparable {
private static final Pattern MFN = Pattern.compile("[mfn]");
final String gender;
final String _case;
final String animTag;
Inflection(String gender, String _case, String animTag) {
this.gender = gender;
this._case = _case;
this.animTag = animTag;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((_case == null) ? 0 : _case.hashCode());
// result = prime * result + ((animTag == null) ? 0 : animTag.hashCode());
result = prime * result + ((gender == null) ? 0 : gender.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Inflection other = (Inflection) obj;
return genderEquals(gender, other.gender)
&& _case.equals(other._case)
&& (animTag == null || other.animTag == null
|| ! animMatters() || ! other.isAnimalSensitive() || animTag.equals(other.animTag));
}
private boolean genderEquals(String gender1, String gender2) {
if( gender1.equals(gender2) )
return true;
if( gender1.equals("s") && MFN.matcher(gender2).matches()
|| gender2.equals("s") && MFN.matcher(gender1).matches())
return true;
return false;
}
public boolean equalsIgnoreGender(Inflection other) {
return //gender.equals(other.gender)
_case.equals(other._case)
&& (animTag == null || other.animTag == null
|| ! animMatters() || animTag.equals(other.animTag));
}
boolean animMatters() {
return animTag != null && ! "unanim".equals(animTag) && _case.equals("v_zna") && isAnimalSensitive();
}
private boolean isAnimalSensitive() {
return "mp".contains(gender);
}
@Override
public String toString() {
return ":" + gender + ":" + _case
+ (animMatters() ? "_"+animTag : "");
}
@Override
public int compareTo(Inflection o) {
if( GEN_ORDER.get(gender) == null ) System.err.println ("unknown gender for " + gender + " for " + o);
int compared = GEN_ORDER.get(gender).compareTo(GEN_ORDER.get(o.gender));
if( compared != 0 )
return compared;
compared = VIDM_ORDER.get(_case).compareTo(VIDM_ORDER.get(o._case));
return compared;
}
}
public static List getAdjInflections(List adjTokenReadings) {
return getAdjInflections(adjTokenReadings, "adj");
}
public static List getNumrInflections(List adjTokenReadings) {
return getAdjInflections(adjTokenReadings, "numr");
}
public static List getAdjInflections(List adjTokenReadings, String postagStart) {
List masterInflections = new ArrayList<>();
for (AnalyzedToken token: adjTokenReadings) {
String posTag = token.getPOSTag();
if( posTag == null || ! posTag.startsWith(postagStart) )
continue;
Matcher matcher = TokenAgreementAdjNounRule.ADJ_INFLECTION_PATTERN.matcher(posTag);
matcher.find();
String gen = matcher.group(1);
String vidm = matcher.group(2);
String animTag = null;
if (matcher.group(3) != null) {
animTag = matcher.group(3).substring(2); // :rinanim/:ranim
}
Inflection inflection = new Inflection(gen, vidm, animTag);
if( ! masterInflections.contains(inflection) ) {
masterInflections.add(inflection);
}
}
return masterInflections;
}
static List getNounInflections(List nounTokenReadings) {
return getNounInflections(nounTokenReadings, null);
}
public static List getNounInflections(List nounTokenReadings, Pattern ignoreTag) {
List slaveInflections = new ArrayList<>();
for (AnalyzedToken token: nounTokenReadings) {
String posTag2 = token.getPOSTag();
if( posTag2 == null )
continue;
if( ignoreTag != null && ignoreTag.matcher(posTag2).find() ) {
continue;
}
Matcher matcher = TokenAgreementAdjNounRule.NOUN_INFLECTION_PATTERN.matcher(posTag2);
if( ! matcher.find() ) {
// System.err.println("Failed to find slave inflection tag in " + posTag2 + " for " + nounTokenReadings);
continue;
}
String gen = matcher.group(2);
String vidm = matcher.group(3);
String animTag = matcher.group(1);
Inflection inflection = new Inflection(gen, vidm, animTag);
if( ! slaveInflections.contains(inflection) ) {
slaveInflections.add(inflection);
}
}
return slaveInflections;
}
static final Map GEN_ORDER = new HashMap<>();
private static final Map VIDM_ORDER = new HashMap<>();
static {
GEN_ORDER.put("m", 0);
GEN_ORDER.put("f", 1);
GEN_ORDER.put("n", 3);
GEN_ORDER.put("s", 4); // for pron
GEN_ORDER.put("p", 5);
GEN_ORDER.put("i", 6); // verb:inf
GEN_ORDER.put("o", 7); // verb:impers
VIDM_ORDER.put("v_naz", 10);
VIDM_ORDER.put("v_rod", 20);
VIDM_ORDER.put("v_dav", 30);
VIDM_ORDER.put("v_zna", 40);
VIDM_ORDER.put("v_oru", 50);
VIDM_ORDER.put("v_mis", 60);
VIDM_ORDER.put("v_kly", 70);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy