All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jruby.util.ByteListHelper Maven / Gradle / Ivy

package org.jruby.util;

import org.jcodings.Encoding;

/**
 * Helpers for working with bytelists.
 */
public class ByteListHelper {
    public interface CodePoint {
        /**
         * call a command pattern caller with index of and actual codepoint.  If false is
         * returned it will let users of this interface to stop walking the codepoints.
         * @param index the index of which codepoint we are visiting.
         * @param codepoint we are visiting.
         * @return true to continue walking and false to stop.
         */
        boolean call(int index, int codepoint, Encoding encoding);
    }

    // Note: perhaps there is some great way of doing this with streams but I did not want to box index
    // with value or make someone try and deal with that outside of the closure.
    public interface Visit {
        U call(int index, T value, U module);
    }

    /**
     * This method assumes the ByteList will be a valid string for the encoding which it is marked as.
     * It also assumes slow path mbc walking.  If you know you have an ASCII ByteList you should do something
     * else.
     * @param bytelist of the mbc-laden bytes
     * @param each the closure which walks the codepoints
     * @return true if it walks the whole bytelist or false if it stops.
     */
    public static boolean eachCodePoint(ByteList bytelist, CodePoint each) {
        byte[] bytes = bytelist.unsafeBytes();
        int len = bytelist.getRealSize();
        Encoding encoding = bytelist.getEncoding();
        int begin = bytelist.begin();
        int end = begin + len;
        int n;

        for (int i = 0; i < len; i += n) {
            int realIndex = begin + i;
            n = StringSupport.encFastMBCLen(bytes, realIndex, end, encoding);
            if (!each.call(i, encoding.mbcToCode(bytes, realIndex, end), encoding)) return false;
        }

        return true;
    }

    /**
     * This method will split a string and call a visitor for each segment between the split pattern.
     *
     * @param value to be split
     * @param pattern the pattern to split value with
     * @param bodyVisitor visitor for all but last segment
     * @param headVisitor visitor for the last segment (if null if will use bodyVisitor).
     * @param  Return type of visitor
     * @return last T from headVisitor
     */
    public static  T split(ByteList value, ByteList pattern, Visit bodyVisitor, Visit headVisitor) {
        if (headVisitor == null) headVisitor = bodyVisitor;

        Encoding enc = pattern.getEncoding();
        byte[] bytes = value.getUnsafeBytes();
        int begin = value.getBegin();
        int realSize = value.getRealSize();
        int end = begin + realSize;
        int currentOffset = 0;
        int patternIndex;
        int i = 0;
        T current = null;

        for (; currentOffset < realSize && (patternIndex = value.indexOf(pattern, currentOffset)) >= 0; i++) {
            int t = enc.rightAdjustCharHead(bytes, currentOffset + begin, patternIndex + begin, end) - begin;
            if (t != patternIndex) {
                currentOffset = t;
                continue;
            }

            current = bodyVisitor.call(i, value.makeShared(currentOffset, patternIndex - currentOffset), current);
            if (current == null) return null;

            currentOffset = patternIndex + pattern.getRealSize();
        }

        return headVisitor.call(i, value.makeShared(currentOffset, realSize - currentOffset), current);
    }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy