All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.atilika.kuromoji.compile.UnknownDictionaryCompiler Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta7
Show newest version
/*-*
 * Copyright © 2010-2015 Atilika Inc. and contributors (see CONTRIBUTORS.md)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.  A copy of the
 * License is distributed with this work in the LICENSE.md file.  You may
 * also obtain a copy of the License from
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.atilika.kuromoji.compile;

import com.atilika.kuromoji.dict.GenericDictionaryEntry;
import com.atilika.kuromoji.io.IntegerArrayIO;
import com.atilika.kuromoji.io.StringArrayIO;
import com.atilika.kuromoji.util.UnknownDictionaryEntryParser;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class UnknownDictionaryCompiler implements Compiler {

    private OutputStream output;

    protected Map categoryMap;

    protected List dictionaryEntries = new ArrayList<>();

    public UnknownDictionaryCompiler(Map categoryMap, OutputStream output) {
        this.categoryMap = categoryMap;
        this.output = output;
    }

    public void readUnknownDefinition(InputStream input, String encoding) throws IOException {
        LineNumberReader reader = new LineNumberReader(new InputStreamReader(input, encoding));

        UnknownDictionaryEntryParser parser = new UnknownDictionaryEntryParser();
        String line;

        while ((line = reader.readLine()) != null) {
            GenericDictionaryEntry entry = parser.parse(line);

            dictionaryEntries.add(entry);
        }
    }

    public int[][] makeCosts() {
        int[][] costs = new int[dictionaryEntries.size()][];

        for (int i = 0; i < dictionaryEntries.size(); i++) {
            GenericDictionaryEntry entry = dictionaryEntries.get(i);

            costs[i] = new int[] {entry.getLeftId(), entry.getRightId(), entry.getWordCost()};
        }

        return costs;
    }

    public String[][] makeFeatures() {
        String[][] features = new String[dictionaryEntries.size()][];

        for (int i = 0; i < dictionaryEntries.size(); i++) {
            GenericDictionaryEntry entry = dictionaryEntries.get(i);

            List tmp = new ArrayList<>();
            tmp.addAll(entry.getPosFeatures());
            tmp.addAll(entry.getFeatures());

            String[] f = new String[tmp.size()];
            features[i] = tmp.toArray(f);
        }

        return features;
    }

    public int[][] makeCategoryReferences() {
        int[][] entries = new int[categoryMap.size()][];

        for (String category : categoryMap.keySet()) {
            int categoryId = categoryMap.get(category);

            entries[categoryId] = getEntryIndices(category);
        }

        return entries;
    }

    public void printFeatures(String[][] features) {
        for (int i = 0; i < features.length; i++) {
            System.out.println(i);

            String[] array = features[i];

            for (int j = 0; j < array.length; j++) {
                System.out.println("\t" + array[j]);
            }

        }
    }

    public int[] getEntryIndices(String surface) {
        List indices = new ArrayList<>();

        for (int i = 0; i < dictionaryEntries.size(); i++) {
            GenericDictionaryEntry entry = dictionaryEntries.get(i);

            if (entry.getSurface().equals(surface)) {
                indices.add(i);
            }
        }

        return toArray(indices);
    }

    private int[] toArray(List list) {
        int[] array = new int[list.size()];

        for (int i = 0; i < list.size(); i++) {
            array[i] = list.get(i);
        }

        return array;
    }

    public List getDictionaryEntries() {
        return dictionaryEntries;
    }

    @Override
    public void compile() throws IOException {
        IntegerArrayIO.writeArray2D(output, makeCosts());
        IntegerArrayIO.writeArray2D(output, makeCategoryReferences());
        StringArrayIO.writeArray2D(output, makeFeatures());
        output.close();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy