All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.nosqlbench.virtdata.library.basics.shared.util.CharsetMapping Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2022 nosqlbench
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.nosqlbench.virtdata.library.basics.shared.util;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

public class CharsetMapping {

    /**
     * Parse the spec, yielding an array of character arrays. each position in the spec delimited
     * by comma or semicolon is represented by an array. Each array is then constructed from
     * {@link #rangeFor(String)}.
     *
     * @param spec A range set specifier
     * @return An array of char arrays
     */
    public static char[][] parseSpec(String spec) {
        String[] ranges = spec.split("[,;]");
        char[][] cs = new char[ranges.length][];
        for (int i = 0; i < ranges.length; i++) {
            char[] range = rangeFor(ranges[i]);
            cs[i] = range;
        }
        return cs;
    }

    /**
     * Parse the range and return set of characters in an array. Any occurrences of a range specifier
     * like {@code a-z} are expanded into the two characters and every on in between, in ordinal order.
     * Otherwise, the characters are taken as they are presented. Each range is built and sanity
     * checked by {@link #rangeFor} to ensure ordering is valid as well as that the characters are
     * all in the printable range of ordinal 32 to ordinal 126.
     * @param range a character range specifier like 'a-z' or '1357'
     * @return An array of characters
     */
    public static char[] rangeFor(String range) {
        range = range.replaceAll("\\n","\n").replaceAll("\\r","\r");
        List chars = new ArrayList<>();
        int pos = 0;
        while (pos < range.length()) {
            if (range.length() > pos + 2 && range.charAt(pos + 1) == '-') {
                List rangeChars = rangeFor(range.substring(pos, pos + 1), range.substring(pos + 2, pos + 3));
                chars.addAll(rangeChars);
                pos += 3;
            } else {
                chars.add(range.substring(pos, pos + 1).charAt(0));
                pos += 1;
            }
        }
        char[] charAry = new char[chars.size()];
        for (int i = 0; i < chars.size(); i++) {
            charAry[i] = chars.get(i);
        }
        return charAry;
    }

    /**
     * Create a list of characters from the US ASCII plane based on a start and end character.
     * @param startChar A single ASCII character
     * @param endChar A single ASCII character, must be equal to or come after startChar
     * @return A list of characters in the range
     */
    public static List rangeFor(String startChar, String endChar) {
        int start = startChar.getBytes(StandardCharsets.US_ASCII)[0];
        int end = endChar.getBytes(StandardCharsets.US_ASCII)[0];
        assertPrintable(start);
        assertPrintable(end);
        assertOrder(start, end);
        List chars = new ArrayList<>();
        ByteBuffer bb = ByteBuffer.allocate(1);
        for (int i = start; i <= end; i++) {
            bb.clear();
            bb.put(0, (byte) i);
            CharBuffer decoded = StandardCharsets.US_ASCII.decode(bb);
            chars.add(decoded.get(0));
        }
        return chars;
    }

    private static void assertOrder(int start, int end) {
        if (end < start) {
            throw new RuntimeException("char '" + (char) end + "' (" + end + ") occurs after '" + (char) start + "' (" + start + "). Are you sure this is the right spec? (reverse the order)");
        }

    }

    private static void assertPrintable(int asciiCode) {
        if (asciiCode > 126 || asciiCode < 32) {
            throw new RuntimeException("ASCII character for code " + asciiCode + " is outside the range of printable characters.");
        }
    }



}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy