com.yahoo.language.simple.SimpleTransformer Maven / Gradle / Ivy
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.simple;
import com.yahoo.language.Language;
import com.yahoo.language.process.Transformer;
import java.text.Normalizer;
import java.util.regex.Pattern;
/**
* Converts all accented characters into their de-accented counterparts followed by their combining diacritics, then
* strips off the diacritics using a regex.
*
* @author Simon Thoresen Hult
*/
public class SimpleTransformer implements Transformer {
private final static Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
@Override
public String accentDrop(String input, Language language) {
return pattern.matcher(Normalizer.normalize(input, Normalizer.Form.NFD)).replaceAll("");
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy