All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.atilika.kuromoji.util.DictionaryEntryLineParser Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta7
Show newest version
/**
 * Copyright © 2010-2015 Atilika Inc. and contributors (see CONTRIBUTORS.md)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.  A copy of the
 * License is distributed with this work in the LICENSE.md file.  You may
 * also obtain a copy of the License from
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.atilika.kuromoji.util;

import java.util.ArrayList;
import java.util.List;

public class DictionaryEntryLineParser {

    private static final char QUOTE = '"';
    private static final char COMMA = ',';
    private static final String QUOTE_ESCAPED = "\"\"";

    /**
     * Parse CSV line
     *
     * @param line  line to parse
     * @return String array of parsed valued, null
     * @throws RuntimeException on malformed input
     */
    public static String[] parseLine(String line) {
        boolean insideQuote = false;
        List result = new ArrayList<>();
        StringBuilder builder = new StringBuilder();
        int quoteCount = 0;

        for (int i = 0; i < line.length(); i++) {
            char c = line.charAt(i);

            if (c == QUOTE) {
                insideQuote = !insideQuote;
                quoteCount++;
            }

            if (c == COMMA && !insideQuote) {
                String value = builder.toString();
                value = unescape(value);

                result.add(value);
                builder = new StringBuilder();
                continue;
            }

            builder.append(c);
        }

        result.add(builder.toString());

        if (quoteCount % 2 != 0) {
            throw new RuntimeException("Unmatched quote in entry: " + line);
        }

        return result.toArray(new String[result.size()]);
    }

    /**
     * Unescape input for CSV
     *
     * @param text  text to be unescaped
     * @return unescaped value, not null
     */
    public static String unescape(String text) {
        StringBuilder builder = new StringBuilder();
        boolean foundQuote = false;

        for (int i = 0; i < text.length(); i++) {
            char c = text.charAt(i);

            if (i == 0 && c == QUOTE || i == text.length() - 1 && c == QUOTE) {
                continue;
            }

            if (c == QUOTE) {
                if (foundQuote) {
                    builder.append(QUOTE);
                    foundQuote = false;
                } else {
                    foundQuote = true;
                }
            } else {
                foundQuote = false;
                builder.append(c);
            }
        }

        return builder.toString();
    }

    /**
     * Escape input for CSV
     *
     * @param text  text to be escaped
     * @return escaped value, not null
     */
    public static String escape(String text) {
        boolean hasQuote = text.indexOf(QUOTE) >= 0;
        boolean hasComma = text.indexOf(COMMA) >= 0;

        if (!(hasQuote || hasComma)) {
            return text;
        }

        StringBuilder builder = new StringBuilder();

        if (hasQuote) {
            for (int i = 0; i < text.length(); i++) {
                char c = text.charAt(i);

                if (c == QUOTE) {
                    builder.append(QUOTE_ESCAPED);
                } else {
                    builder.append(c);
                }
            }
        } else {
            builder.append(text);
        }

        if (hasComma) {
            builder.insert(0, QUOTE);
            builder.append(QUOTE);
        }
        return builder.toString();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy