All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.atilika.kuromoji.dict.UnknownDictionary Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta7
Show newest version
/*-*
 * Copyright © 2010-2015 Atilika Inc. and contributors (see CONTRIBUTORS.md)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.  A copy of the
 * License is distributed with this work in the LICENSE.md file.  You may
 * also obtain a copy of the License from
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.atilika.kuromoji.dict;

import com.atilika.kuromoji.io.IntegerArrayIO;
import com.atilika.kuromoji.io.StringArrayIO;
import com.atilika.kuromoji.util.KuromojiBinFilesFetcher;
import com.atilika.kuromoji.util.ResourceResolver;
import com.atilika.kuromoji.util.StringUtils;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;

public class UnknownDictionary implements Dictionary {

    //    public static final String UNKNOWN_DICTIONARY_FILENAME = "unknownDictionary.bin";
    public static final String UNKNOWN_DICTIONARY_FILENAME = new File(KuromojiBinFilesFetcher.getKuromojiRoot(),"unknownDictionary.bin").getAbsolutePath();

    private static final String DEFAULT_FEATURE = "*";

    private static final String FEATURE_SEPARATOR = ",";

    private final int[][] entries;

    private final int[][] costs;

    private final String[][] features;

    private final int totalFeatures;

    private final CharacterDefinitions characterDefinition;

    public UnknownDictionary(CharacterDefinitions characterDefinition, int[][] entries, int[][] costs,
                    String[][] features, int totalFeatures) {
        this.characterDefinition = characterDefinition;
        this.entries = entries;
        this.costs = costs;
        this.features = features;
        this.totalFeatures = totalFeatures;
    }

    public UnknownDictionary(CharacterDefinitions characterDefinition, int[][] entries, int[][] costs,
                    String[][] features) {
        this(characterDefinition, entries, costs, features, features.length);
    }


    public int[] lookupWordIds(int categoryId) {
        // Returns an array of word ids
        return entries[categoryId];
    }

    @Override
    public int getLeftId(int wordId) {
        return costs[wordId][0];
    }

    @Override
    public int getRightId(int wordId) {
        return costs[wordId][1];
    }

    @Override
    public int getWordCost(int wordId) {
        return costs[wordId][2];
    }

    @Override
    public String getAllFeatures(int wordId) {
        return StringUtils.join(getAllFeaturesArray(wordId), FEATURE_SEPARATOR);
    }

    @Override
    public String[] getAllFeaturesArray(int wordId) {
        if (totalFeatures == features.length) {
            return features[wordId];
        }

        String[] allFeatures = new String[totalFeatures];
        String[] basicFeatures = features[wordId];

        System.arraycopy(basicFeatures, 0, allFeatures, 0, basicFeatures.length);

        for (int i = basicFeatures.length; i < totalFeatures; i++) {
            allFeatures[i] = DEFAULT_FEATURE;
        }

        return allFeatures;
    }

    @Override
    public String getFeature(int wordId, int... fields) {
        String[] allFeatures = getAllFeaturesArray(wordId);
        String[] features = new String[fields.length];

        for (int i = 0; i < fields.length; i++) {
            int featureNumber = fields[i];
            features[i] = allFeatures[featureNumber];
        }

        return StringUtils.join(features, FEATURE_SEPARATOR);
    }

    public CharacterDefinitions getCharacterDefinition() {
        return characterDefinition;
    }

    public static UnknownDictionary newInstance(ResourceResolver resolver, CharacterDefinitions characterDefinitions,
                    int totalFeatures) throws IOException {
        InputStream unkDefInput = resolver.resolve(UnknownDictionary.UNKNOWN_DICTIONARY_FILENAME);

        int[][] costs = IntegerArrayIO.readArray2D(unkDefInput);
        int[][] references = IntegerArrayIO.readArray2D(unkDefInput);
        String[][] features = StringArrayIO.readArray2D(unkDefInput);

        UnknownDictionary unknownDictionary =
                        new UnknownDictionary(characterDefinitions, references, costs, features, totalFeatures);

        return unknownDictionary;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy