All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.vesalainen.util.fi.Hyphenator Maven / Gradle / Ivy

/*
 * Copyright (C) 2010 Timo Vesalainen
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.vesalainen.util.fi;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 *
 * @author tkv
 */
public class Hyphenator
{
    public static final char HYPHEN = 173;  // soft hyphen
    public static final String HYPHENSTR = new String(new char[]{HYPHEN});  // soft hyphen
    private enum State {START, CONSONANT, VOCAL, DIPHTHONG};
    private static final char auml = 228;
    private static final char Auml = 196;
    private static final char ouml = 246;
    private static final char Ouml = 214;
    private static final char aring = 229;
    private static final char Aring = 197;
    private static final char scaron = 353;
    private static final char Scaron = 352;
    private static final char zcaron = 382;
    private static final char Zcaron = 381;
    private static final char[] FINNCHARS = new char[] {'a','A','b','B','c','C',
    'd','D','e','E','f','F','g','G','h','H','i','I','j','J','k','K','l','L','m',
    'M','n','N','o','O','p','P','q','Q','r','R','s','S',353,352,'t','T','u','U',
    'v','V','w','W','x','X','y','Y','z','Z',382, 381,229,197,228,196,246,214 };
    private static final String FINNSTRING = new String(FINNCHARS);
    private static final Pattern FINNWORD = Pattern.compile("["+FINNSTRING+"]+");
    private static final Pattern WS = Pattern.compile("\\p{javaWhitespace}+");
    private static final Map map;
    static
    {
        map = new HashMap();
        map.put("kaivosaukko", "kai"+HYPHEN+"vos"+HYPHEN+"auk"+HYPHEN+"ko");
        map.put("syysolkiperhonen", "syys"+HYPHEN+"ol"+HYPHEN+"ki"+HYPHEN+"per"+HYPHEN+"ho"+HYPHEN+"nen");
    }

    public static final String hyphenate(String text)
    {
        return hyphenate(text, Locale.getDefault());
    }

    public static final String hyphenate(String text, Locale locale)
    {
        text = text.replace(HYPHENSTR, "");
        if (!"fi".equals(locale.getLanguage()))
        {
            return text;
        }
        StringBuilder sb = new StringBuilder();
        StringParser parser = new StringParser(text);
        while (parser.find(FINNWORD))
        {
            sb.append(parser.skipped());
            hyphenateWord(parser.group(), sb);
        }
        sb.append(parser.remaining());
        return sb.toString();
    }
    private static final void hyphenateWord(String word, StringBuilder sb)
    {
        Matcher mm = FINNWORD.matcher(word);
        if (mm.matches())
        {
            if (map.containsKey(word.toLowerCase()))
            {
                String str = map.get(word.toLowerCase());
                int jj=0;
                for (int ii=0;ii 1)
        {
            char c1 = word.charAt(1);
            if (!vocal(c1))
            {
                for (int ii=2;ii 1)
        {
            if (vocal(word.charAt(1)))
            {
                char c1 = word.charAt(0);
                char c2 = word.charAt(1);
                c1 = Character.toLowerCase(c1);
                c2 = Character.toLowerCase(c2);
                return (c1 != c2 && c2 != 'i' && !diphthong(c1, c2));
            }
        }
        return false;
    }
    private static final boolean diphthongRule(String word)
    {
        if (word.length() > 2)
        {
            char c1 = word.charAt(0);
            char c2 = word.charAt(1);
            char c3 = word.charAt(2);
            c1 = Character.toLowerCase(c1);
            c2 = Character.toLowerCase(c2);
            c3 = Character.toLowerCase(c3);
            if (vocal(c1) && vocal(c2) && vocal(c3))
            {
                return c1 == c2 || diphthong(c1, c2);
            }
        }
        return false;
    }
    private static final boolean diphthong(char c1, char c2)
    {
        c1 = Character.toLowerCase(c1);
        c2 = Character.toLowerCase(c2);
        switch (c1)
        {
            case 'a':
                return c2 == 'u';
            case 'e':
                return c2 == 'u' || c2 == 'y';
            case 'i':
                return c2 == 'e' || c2 == 'u';
            case 'o':
                return c2 == 'u';
            case 'u':
                return c2 == 'o';
            case 'y':
                return c2 == ouml;
            case auml:
                return c2 == 'y';
            case ouml:
                return c2 == 'y';
            default:
                return false;
        }
    }
    private static final boolean vocal(char cc)
    {
        switch (Character.toLowerCase(cc))
        {
            case 'a':
            case 'e':
            case 'i':
            case 'o':
            case 'u':
            case 'y':
            case auml:
            case ouml:
            case aring:
                return true;
            default:
                return false;
        }
    }
    /**
     * @param args the command line arguments
     */
    public static void main(String[] args)
    {
        try
        {
            System.err.println(Hyphenator.hyphenate("leffassa kivaa kahdelle"));
            System.err.println(Hyphenator.hyphenate("tragiikkaa sekä horkkatiloja"));
            System.err.println(Hyphenator.hyphenate("luento Aasian kääpiöpuolueista"));
            System.err.println(Hyphenator.hyphenate("raaistunut maailma liuottimet lauantaina tauotta leuan alla"));
            System.err.println(Hyphenator.hyphenate("Kaivosaukko syysolkiperhonen"));
            System.err.println(Hyphenator.hyphenate("venemessuilla kisasuunnitelmia jutun sattuma huomattavasti"));
            System.err.println(Hyphenator.hyphenate("Deepawali, vapaapäivä. ravintolassa: kunniaksi tekoon koristeitaan"));
        }
        catch (Exception ex)
        {
            ex.printStackTrace();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy