All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.language.simple.SimpleTransformer Maven / Gradle / Ivy

There is a newer version: 8.441.21
Show newest version
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.simple;

import com.yahoo.language.Language;
import com.yahoo.language.process.Transformer;

import java.text.Normalizer;
import java.util.regex.Pattern;

/**
 * Converts all accented characters into their de-accented counterparts followed by their combining diacritics, then
 * strips off the diacritics using a regex.
 *
 * @author Simon Thoresen Hult
 */
public class SimpleTransformer implements Transformer {

    private final static Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");

    @Override
    public String accentDrop(String input, Language language) {
        return pattern.matcher(Normalizer.normalize(input, Normalizer.Form.NFD)).replaceAll("");
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy