All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ml.alternet.misc.CharRange$ Maven / Gradle / Ivy

Go to download

Alternet Tools include discovery service tools, concurrent and locking tools, and more

The newest version!
package ml.alternet.misc;

import static java.util.Spliterator.DISTINCT;
import static java.util.Spliterator.IMMUTABLE;
import static java.util.Spliterator.NONNULL;
import static java.util.Spliterator.ORDERED;
import static java.util.Spliterator.SIZED;
import static java.util.Spliterator.SORTED;
import static ml.alternet.misc.CharRange.range;
import static ml.alternet.util.StringBuilderUtil.collectorOf;

import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.Spliterator;
import java.util.TreeSet;
import java.util.Spliterators.AbstractSpliterator;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import ml.alternet.encode.BytesEncoder;
import ml.alternet.misc.CharRange.BoundRange;
import ml.alternet.misc.CharRange.Reversible;
import ml.alternet.misc.CharRange.UnboundRange;

/**
 * Internal implementation of kind of ranges.
 *
 * @author Philippe Poulard
 */
class CharRange$ {

    /**
     * Define a range made of a single character, or made of
     * all characters but one.
     *
     * @author Philippe Poulard
     */
    static class Char extends BoundRange {

        int car;
        boolean equal;

        /**
         * Defines a range with a single character.
         *
         * @param equal true to indicate inclusion,
         *      false to indicate exclusion.
         * @param car The actual character.
         *
         * @see Reversible#includes()
         */
        Char(boolean equal, int car) {
            this.car = car;
            this.equal = equal;
        }

        @Override
        public int start() {
            return this.car;
        }

        @Override
        public int end() {
            return this.car;
        }

        @Override
        public boolean includes() {
            return this.equal;
        }

        @Override
        public boolean contains(int codepoint) {
            return this.equal ^ this.car != codepoint;
        }

        @Override
        public Stream asIntervals() {
            if (this.equal) {
                return Stream.of(this);
            } else {
                return Chars.reverse(IntStream.of(start()));
            }
        }

        @Override
        public StringBuilder toPrettyString(StringBuilder buf) {
            if (! this.equal) {
                buf.append("!");
            }
            return super.toPrettyString(buf);
        }

        @Override
        public CharRange except(CharRange range) {
            if (this.equal) {
                if (range.contains(this.car)) {
                    return EMPTY;
                } else {
                    return this;
                }
            } else {
                return new Ranges(this).except(range);
            }
        }

        /**
         * Merges the given range with this range.
         *
         * @param car The character to include in this range.
         *
         * @return The new range of characters, or the same if
         *      the given character already belong to this range.
         */
        public CharRange union(Char car) {
            if ( ! (this.equal ^ car.equal) ) {
                if (car.car == this.car) {
                    return this; // same
                } else {
                    return new Chars(this.equal, this.car, car.car);
                }
            } else if (car.car == this.car) {
                return ANY; // c U !c
            } else {
                return new Ranges(this, car);
            }
        }

        @Override
        public CharRange union(CharRange range) {
            // delegate to Chars, Range, or Ranges
            return range.union(this);
        }

        private static final char[] HEXES = BytesEncoder.ValueSpace.hexa.get();

        public static StringBuilder append(StringBuilder buf, int c) {
            if (Character.isISOControl(c)) {
                switch (c) {
                case '\b' : buf.append("\\b"); break;
                case '\n' : buf.append("\\n"); break;
                case '\t' : buf.append("\\t"); break;
                case '\f' : buf.append("\\f"); break;
                case '\r' : buf.append("\\r"); break;
                case '\'' : buf.append("\\'"); break;
                case '\\' : buf.append("\\\\"); break;
                default :
                    buf.append("\\u");
                    buf.append(HEXES[(c >> 12) & 15]);
                    buf.append(HEXES[(c >> 8) & 15]);
                    buf.append(HEXES[(c >> 4) & 15]);
                    buf.append(HEXES[(c) & 15]);
                }
            } else if (c > 0xffff || c == 0) {
                buf.append("\\u").append(Integer.toHexString(c));
            } else {
                buf.appendCodePoint(c);
            }
            return buf;
        }

        @Override
        public CharRange revert() {
            return new Char(! this.equal, this.car);
        }

    }

    /**
     * Define a range of characters made of the characters of a string,
     * either by inclusion or exclusion.
     *
     * @author Philippe Poulard
     */
    static class Chars extends UnboundRange implements CharRange, Reversible {

        String chars;
        boolean equal;

        /**
         * Defines a range with the characters given in a string.
         *
         * @param equal true to indicate inclusion,
         *      false to indicate exclusion.
         * @param chars The actual characters.
         *
         * @see Reversible#includes()
         */
        Chars(boolean equal, CharSequence chars) {
            this(equal, chars.codePoints());
        }

        /**
         * Defines a range with the codepoints given.
         *
         * @param equal true to indicate inclusion,
         *      false to indicate exclusion.
         * @param codepoints The actual codepoints.
         */
        Chars(boolean equal, int... codepoints) {
            this(equal, IntStream.of(codepoints));
        }

        /**
         * Defines a range with the codepoints given.
         *
         * @param equal true to indicate inclusion,
         *      false to indicate exclusion.
         * @param codepoints The actual codepoints.
         */
        Chars(boolean equal, IntStream codepoints) {
              int[] cp = codepoints.sorted().distinct().toArray();
              this.chars = new String(cp, 0, cp.length);
              this.equal = equal;
        }

        @Override
        public boolean includes() {
            return this.equal;
        }

        @Override
        public boolean contains(int codepoint) {
            return this.equal ^ this.chars.indexOf(codepoint) == -1;
        }

        @Override
        public StringBuilder toPrettyString(StringBuilder buf) {
            if (! this.equal) {
                buf.append("!");
            }
            return this.chars.codePoints()
                .boxed()
                .collect(collectorOf(
                    "( ", " | ", " )", buf,
                    cp -> Char.append(buf.append('\''), cp).append('\''))
                );
        }

        @Override
        public CharRange except(CharRange range) {
            return new Ranges(this).except(range);
        }

        public CharRange union(Char car) {
            if ( ! (this.equal ^ car.equal) ) {
                if (this.chars.indexOf(car.car) == -1) {
                    return new Chars(
                        this.equal,
                        IntStream.concat(this.chars.codePoints(), IntStream.of(car.car) )
                    );
                } else {
                    return this;
                }
            } else {
                return new Ranges(this, car);
            }
        }

        public CharRange union(Chars chars) {
            if ( ! (this.equal ^ chars.equal) ) {
                return new Chars(
                    this.equal,
                    IntStream.concat(this.chars.codePoints(), chars.chars.codePoints() )
                );
            } else {
                return new Ranges(this, chars);
            }
        }

        @Override
        public Stream asIntervals() {
            if (this.equal) { // case of inclusion
                if (this.chars.length() == 0) {
                    return Stream.empty();
                } else {
                    int[] codepoints = this.chars.codePoints().sorted().distinct().toArray();
                    // group consecutive characters to a range, and serve individual ones
                    Spliterator iter = new AbstractSpliterator(
                        codepoints.length,
                        DISTINCT | IMMUTABLE | NONNULL | ORDERED | SIZED)
                    {
                        int i = 0;
                        boolean end = false;
                        int firstOfGroup = -1;
                        int lastOfGroup = -1;

                        @Override
                        public boolean tryAdvance(Consumer action) {
                            if (end && firstOfGroup == - 1) {
                                return false;
                            } // else not yet the end OR firstOfGroup not yet consumed
                            while (i < codepoints.length) {
                                int cp = codepoints[i++];
                                if (firstOfGroup == -1) { // first set
                                    firstOfGroup = cp;
                                    lastOfGroup = cp;
                                } else if (cp == lastOfGroup + 1) {
                                    lastOfGroup++; // expand the range
                                    continue;
                                } else {
                                    i--; // cp not consumed in this tryAdvance => reset it
                                    break;
                                }
                            }
                            if (firstOfGroup == lastOfGroup) {
                                action.accept(new Char(equal, firstOfGroup));
                            } else {
                                action.accept(new Range(firstOfGroup, lastOfGroup));
                            }
                            if (i >= codepoints.length) {
                                end = true;
                            }
                            firstOfGroup = - 1; // reinit
                            lastOfGroup = -1;
                            return true;
                        }
                    };
                    return StreamSupport.stream(iter, false);
                }
            } else { // case of exclusion
                if (this.chars.length() == 0) {
                    return Stream.of(ANY);
                } else {
                    // range inversion
                    return reverse(this.chars.codePoints());
                }
            }
        }

        /**
         * Reverse a sequence of chars.
         *
         * @param chars The chars to reverse.
         *
         * @return The new stream contains all the chars that are
         *      not in the input stream.
         */
        public static Stream reverse(IntStream chars) {
            int[] lower = { Character.MIN_CODE_POINT };
            // chars are n points, ranges are n+1 intervals
            return Stream. concat(
                chars.sorted()
                    .distinct()
                    // filter out consecutive chars
                    .filter(c -> c == lower[0] ? lower[0]++ < 0 /*false*/ : true)
                    .mapToObj( c -> {
                        // the char before because the upper bound must exclude it
                        Range r = new Range(lower[0], c - 1);
                        lower[0] = c + 1; // capture the last
                        return r;
                    }),
                Stream.of(
                    new Range( -1, Character.MAX_CODE_POINT) {
                        @Override
                        public int start() {
                            return lower[0]; // the very last character
                                    // known at the end
                        };
                    })
            ).filter(r -> ! r.isEmpty());
                    // when consecutive chars with MIN_CP or MAX_CP are found
        }

        @Override
        public CharRange revert() {
            return new Chars(! this.equal, this.chars);
        }

    }

    /**
     * Define an atomic range of characters.
     *
     * @author Philippe Poulard
     */
    static class Range extends BoundRange {

        private int start;
        private int end;

        /**
         * Create a range.
         *
         * @param start The start codepoint (included)
         * @param end The end codepoint (included)
         */
        Range(int start, int end) {
            this.start = start;
            this.end = end;
        }

        @Override
        public int start() {
            return this.start;
        }

        @Override
        public int end() {
            return this.end;
        }

        @Override
        public boolean contains(int codepoint) {
            return codepoint >= start() && codepoint <= end();
        }

        @Override
        public Stream asIntervals() {
            return Stream.of(this);
        }

        @Override
        public CharRange except(CharRange range) {
            return new Ranges(this).except(range);
        }

        public CharRange union(Range range) {
            if (range.start() == start() && range.end() == end()) {
                return this;
            } else if (range.end() < start() || range.start() > end()) {
                return super.union(range);
            } else {
                // extend a range
                return new Range(Math.min(range.start(), start()), Math.max(range.end(), end()));
            }
        }

        public CharRange union(Char car) {
            if (contains(car.car)) {
                return this;
            } else {
                return super.union(car);
            }
        }

        public CharRange union(Chars chars) {
            if (chars.chars.codePoints().allMatch(c -> contains(c))) {
                return this;
            } else {
                return super.union(chars);
            }
        }

        @Override
        public CharRange revert() {
            if (isEmpty()) {
                return ANY;
            } else if (this.start == Character.MIN_CODE_POINT) {
                if (this.end == Character.MAX_CODE_POINT) {
                    return EMPTY;
                } else if (this.end + 1 == Character.MAX_CODE_POINT) {
                    return new Char(true, Character.MAX_CODE_POINT);
                } else {
                    return new Range(this.end + 1, Character.MAX_CODE_POINT);
                }
            } else {
                if (this.end == Character.MAX_CODE_POINT) {
                    if (this.start == Character.MIN_CODE_POINT + 1) {
                        return new Char(true, Character.MIN_CODE_POINT);
                    } else {
                        return new Range(Character.MIN_CODE_POINT, this.start - 1);
                    }
                } else {
                    return new Range(Character.MIN_CODE_POINT, this.start - 1)
                    .union(new Range(this.end + 1, Character.MAX_CODE_POINT));
                }
            }
        }

    }

    /**
     * Define non-overlapping ranges of characters.
     *
     * @author Philippe Poulard
     */
    static class Ranges extends UnboundRange {

        // set of successive ranges ordered by start char
        TreeSet ranges = new TreeSet<>();

        /**
         * Create a non-overlapping ranges of characters.
         *
         * @param ranges The ranges that compose this set may overlap themselves.
         */
        Ranges(CharRange... ranges) {
            this(Stream.of(ranges));
        }

        /**
         * Create a non-overlapping ranges of characters.
         *
         * @param ranges The ranges that compose this set may overlap themselves.
         */
        Ranges(Stream ranges) {
            ranges.flatMap(c -> c.asIntervals())
                .filter(r -> ! r.isEmpty())
                .sorted()
                .distinct()
                .forEach(r -> {
                    BoundRange exist = this.ranges.floor(r);
                    if (exist == null || exist.end() < r.start() || exist.start() > r.end()) {
                        this.ranges.add(r);
                    } else { // extend a range
                        this.ranges.remove(exist);
                        this.ranges.add(new Range(Math.min(exist.start(), r.start()), Math.max(exist.end(), r.end())));
                    }
                });
        }

        @Override
        public boolean contains(int codepoint) {
            BoundRange exist = this.ranges.floor(new Char(true, codepoint));
            return exist != null && exist.contains(codepoint);
        }

        @Override
        public StringBuilder toPrettyString(StringBuilder buf) {
            return this.ranges.stream()
                .collect(collectorOf(
                    "(", " | ", ")", buf,
                    bcr -> bcr.toPrettyString(buf))
            );
        }

        @Override
        public int hashCode() {
            return Objects.hash(this.ranges);
        }

        @Override
        public Stream asIntervals() {
            return this.ranges.stream();
        }

        @Override
        public CharRange union(CharRange range) {
            if (range.asIntervals().allMatch(r -> {
                    // 'r' is in the boundaries of an existing range ?
                    BoundRange exist = this.ranges.floor(r);
                    return exist != null && r.end() <= exist.end();
            }))
            { // unchanged
                return this;
            } else {
                return new Ranges(this, range);
            }
        }

        @Override
        public CharRange except(CharRange range) {
            return except(new CharRange[] { range });
        }

        @Override
        public CharRange except(CharRange... ranges) {
            List boundRanges = Arrays.asList(ranges)
                .stream()
                .flatMap(c -> c.asIntervals())
                .filter(r -> ! r.isEmpty())
                .sorted()
                .distinct()
                .collect(Collectors.toList());
            if (boundRanges.stream()
                .anyMatch(exclude -> {
                    // 'exclude' is overlapping an existing range ?
                    BoundRange include = this.ranges.floor(exclude);
                    // that include element is before exclude element
                    if (include == null) {
                        return false;
                    } else if (include.end() >= exclude.start()) {
                        return true;
                    } else { // check the next
                        include = this.ranges.higher(include);
                        // that include element is after exclude element
                        return include != null && include.start() <= exclude.end();
                    }
            }))
            { // at least one exclusion has an impact on the ranges
                // => create an iterator that merges 2 sorted cursors
                Spliterator iter = new AbstractSpliterator(
                        Long.MAX_VALUE, DISTINCT | IMMUTABLE | NONNULL | ORDERED | SORTED )
                {
                    Spliterator cursorInclude = asIntervals().sorted().distinct().spliterator();
                    BoundRange include; // current to include
                    Spliterator cursorExclude;
                    BoundRange exclude; // current to exclude
                    boolean end = initInclusion(() -> {
                        // will be call only if we have something to include
                        cursorExclude = boundRanges.spliterator();
                        cursorExclude.tryAdvance(br -> exclude = br);
                        return false; // end = false
                    });
                    // first read
                    boolean initInclusion(Supplier initExclusion) {
                        return cursorInclude.tryAdvance(br -> include = br) ? initExclusion.get() : true;
                    }

                    // accept currentInclude and read the next
                    boolean includeCurrent(Consumer action) {
                        action.accept(include);
                        // read next
                        end = ! cursorInclude.tryAdvance(br -> include = br);
                        if (end) {
                            include = null;
                        }
                        return true;
                    }

                    @Override
                    public Comparator getComparator() {
                        return null; // mean : sort in natural order
                    }

                    @Override
                    public boolean tryAdvance(Consumer action) {
                        // read items to include while reading items to exclude

                        // below : [                ] => Unicode range
                        // we                  IIIIII => include range
                        // have                EEEEEE => exclude range

                        while (true) {
                            if (end) { // no more to include
                                return false;
                            } else if (exclude == null) { // no more to exclude
                                // accept all
                                return includeCurrent(action);
                            } else // both are present
                            if (include.end() < exclude.start()) {
                                // exclusion not yet reached
                                //     [     IIIIII       ]
                                //     [             EEE  ]
                                // => accept IIIIII
                                return includeCurrent(action);
                            } else {
                                if (exclude.end() < include.start()) {
                                    // inclusion not yet reached
                                    //        [         IIIIII    ]
                                    //        [  EEEE             ]
                                    // => ignore EEEE
                                    if (! cursorExclude.tryAdvance(br -> exclude = br)) {
                                        exclude = null;
                                    }
                                    // loop
                                } else // IIIIII and EEEEE are overlapping ; let's find how...
                                if (exclude.start() <= include.start() && exclude.end() >= include.end()) {
                                    //    [      IIIIII      ]
                                    //    [  EEEEEEEEEEEEE   ]
                                    // ignore => IIIIII
                                    // read next...
                                    end = ! cursorInclude.tryAdvance(br -> include = br);
                                    // ...and loop
                                } else if (exclude.end() <= include.end()) {
                                    if (exclude.start() > include.start()) {
                                        //       [   IIIIIII    ]
                                        //       [     EE       ]
                                        // accept => II
                                        // next =>       III
                                        action.accept(range(include.start(), exclude.start() - 1));
                                        include = range(exclude.end() + 1, include.end());
                                        if (include.isEmpty()) {
                                            end = ! cursorInclude.tryAdvance(br -> include = br);
                                        }
                                        return true;
                                    } else {
                                        //   [       IIIIII   ]
                                        //   [   EEEEEEEE     ]
                                        // cut =>        II
                                        include = new Range(exclude.end() + 1, include.end());
                                        if (include.isEmpty()) {
                                            end = ! cursorInclude.tryAdvance(br -> include = br);
                                        }
                                        // loop
                                    }
                                } else {
                                    //     [     IIIIII       ]
                                    //     [       EEEEEEEE   ]
                                    // accept => II
                                    action.accept(new Range(include.start(), exclude.start() - 1));
                                    // read next
                                    end = ! cursorInclude.tryAdvance(br -> include = br);
                                    return true;
                                }
                            }
                        }
                    }
                };
                Ranges newRanges = new Ranges();
                StreamSupport. stream(iter , false)
                    .filter(elem -> ! elem.isEmpty())
                    .forEach(elem -> newRanges.ranges.add(elem));
                if (newRanges.ranges.size() == 0) {
                    return EMPTY;
                } else if (newRanges.ranges.size() == 1) {
                    BoundRange r = newRanges.ranges.first();
                    if (r.start() == Character.MIN_CODE_POINT && r.end() == Character.MAX_CODE_POINT) {
                        return ANY;
                    } else {
                        return r;
                    }
                } else {
                    return newRanges;
                }
            } else { // unchanged
                return this;
            }
        }

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy