All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.atilika.kuromoji.compile.WordIdMapCompiler Maven / Gradle / Ivy

/**
 * Copyright © 2010-2015 Atilika Inc. and contributors (see CONTRIBUTORS.md)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.  A copy of the
 * License is distributed with this work in the LICENSE.md file.  You may
 * also obtain a copy of the License from
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.atilika.kuromoji.compile;

import com.atilika.kuromoji.io.IntegerArrayIO;

import java.io.IOException;
import java.io.OutputStream;

public class WordIdMapCompiler implements Compiler {

    private int[][] wordIds = new int[1][];

    private int[] indices;

    private GrowableIntArray wordIdArray = new GrowableIntArray();

    public void addMapping(int sourceId, int wordId) {
        if (wordIds.length <= sourceId) {
            int[][] newArray = new int[sourceId + 1][];
            System.arraycopy(wordIds, 0, newArray, 0, wordIds.length);
            wordIds = newArray;
        }

        // Prepare array -- extend the length of array by one
        int[] current = wordIds[sourceId];
        if (current == null) {
            current = new int[1];
        } else {
            int[] newArray = new int[current.length + 1];
            System.arraycopy(current, 0, newArray, 0, current.length);
            current = newArray;
        }
        wordIds[sourceId] = current;

        int[] targets = wordIds[sourceId];
        targets[targets.length - 1] = wordId;
    }

    public void write(OutputStream output) throws IOException {
        compile();
        IntegerArrayIO.writeArray(output, indices);
        IntegerArrayIO.writeArray(output, wordIdArray.getArray());
    }

    public void compile() {
        this.indices = new int[wordIds.length];
        int wordIdIndex = 0;

        for (int i = 0; i < wordIds.length; i++) {
            int[] inner = wordIds[i];

            if (inner == null) {
                indices[i] = -1;
            } else {
                indices[i] = wordIdIndex;
                wordIdArray.set(wordIdIndex++, inner.length);

                for (int j = 0; j < inner.length; j++) {
                    wordIdArray.set(wordIdIndex++, inner[j]);
                }
            }
        }
    }

    public static class GrowableIntArray {

        private static final float ARRAY_GROWTH_RATE = 1.25f;

        private static final int ARRAY_INITIAL_SIZE = 1024;

        private int maxIndex;

        private int[] array;

        public GrowableIntArray(int size) {
            this.array = new int[size];
            this.maxIndex = 0;
        }

        public GrowableIntArray() {
            this(ARRAY_INITIAL_SIZE);
        }

        public int[] getArray() {
            int length = maxIndex + 1;
            int[] a = new int[length];
            System.arraycopy(array, 0, a, 0, length);
            return a;
        }

        public void set(int index, int value) {
            if (index >= array.length) {
                grow(getNewLength(index));
            }

            if (index > maxIndex) {
                maxIndex = index;
            }

            array[index] = value;
        }

        private void grow(int newLength) {
            int[] tmp = new int[newLength];
            System.arraycopy(array, 0, tmp, 0, maxIndex + 1);
            array = tmp;
        }

        private int getNewLength(int index) {
            return (int) Math.max(
                index + 1,
                array.length * ARRAY_GROWTH_RATE
            );
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy