All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.javaparser.UnicodeEscapeProcessingProvider Maven / Gradle / Ivy

There is a newer version: 3.26.2
Show newest version
/*
 * Copyright (C) 2007-2010 Júlio Vilmar Gesser.
 * Copyright (C) 2011, 2013-2023 The JavaParser Team.
 *
 * This file is part of JavaParser.
 *
 * JavaParser can be used either under the terms of
 * a) the GNU Lesser General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 * b) the terms of the Apache License
 *
 * You should have received a copy of both licenses in LICENCE.LGPL and
 * LICENCE.APACHE. Please refer to those files for details.
 *
 * JavaParser is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 */
package com.github.javaparser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * {@link Provider} un-escaping unicode escape sequences in the input sequence.
 */
public class UnicodeEscapeProcessingProvider implements Provider {

    private static final char LF = '\n';

    private static final char CR = '\r';

    private static final char BACKSLASH = '\\';

    private static final int EOF = -1;

    private char[] _data;

    /**
     * The number of characters in {@link #_data}.
     */
    private int _len = 0;

    /**
     * The position in {@link #_data} where to read the next source character from.
     */
    private int _pos = 0;

    private boolean _backslashSeen;

    private final LineCounter _inputLine = new LineCounter();

    private final LineCounter _outputLine = new LineCounter();

    private final PositionMappingBuilder _mappingBuilder = new PositionMappingBuilder(_outputLine, _inputLine);

    private Provider _input;

    /**
     * Creates a {@link UnicodeEscapeProcessingProvider}.
     */
    public UnicodeEscapeProcessingProvider(Provider input) {
        this(2048, input);
    }

    /**
     * Creates a {@link UnicodeEscapeProcessingProvider}.
     */
    public UnicodeEscapeProcessingProvider(int bufferSize, Provider input) {
        _input = input;
        _data = new char[bufferSize];
    }

    /**
     * The {@link LineCounter} of the input file.
     */
    public LineCounter getInputCounter() {
        return _inputLine;
    }

    /**
     * The {@link LineCounter} of the output file.
     */
    public LineCounter getOutputCounter() {
        return _outputLine;
    }

    @Override
    public int read(char[] buffer, final int offset, int len) throws IOException {
        int pos = offset;
        int stop = offset + len;
        while (pos < stop) {
            int ch = _outputLine.process(nextOutputChar());
            if (ch < 0) {
                if (pos == offset) {
                    // Nothing read yet, this is the end of the stream.
                    return EOF;
                }
                break;
            }
            _mappingBuilder.update();
            buffer[pos++] = (char) ch;
        }
        return pos - offset;
    }

    @Override
    public void close() throws IOException {
        _input.close();
    }

    /**
     * Produces the next un-escaped character to be written to the output.
     *
     * @return The next character or {@code -1} if no more characters are available.
     */
    private int nextOutputChar() throws IOException {
        int next = nextInputChar();
        switch(next) {
            case EOF:
                return EOF;
            case BACKSLASH:
                {
                    if (_backslashSeen) {
                        return clearBackSlashSeen(next);
                    }
                    return backSlashSeen();
                }
            default:
                {
                    // An arbitrary character.
                    return clearBackSlashSeen(next);
                }
        }
    }

    private int clearBackSlashSeen(int next) {
        _backslashSeen = false;
        return next;
    }

    private int backSlashSeen() throws IOException {
        _backslashSeen = true;
        int next = nextInputChar();
        switch(next) {
            case EOF:
                // End of file after backslash produces the backslash itself.
                return BACKSLASH;
            case 'u':
                {
                    return unicodeStartSeen();
                }
            default:
                {
                    pushBack(next);
                    return BACKSLASH;
                }
        }
    }

    private int unicodeStartSeen() throws IOException {
        int uCnt = 1;
        while (true) {
            int next = nextInputChar();
            switch(next) {
                case EOF:
                    {
                        pushBackUs(uCnt);
                        return BACKSLASH;
                    }
                case 'u':
                    {
                        uCnt++;
                        continue;
                    }
                default:
                    {
                        return readDigits(uCnt, next);
                    }
            }
        }
    }

    private int readDigits(int uCnt, int next3) throws IOException {
        int digit3 = digit(next3);
        if (digit3 < 0) {
            pushBack(next3);
            pushBackUs(uCnt);
            return BACKSLASH;
        }
        int next2 = nextInputChar();
        int digit2 = digit(next2);
        if (digit2 < 0) {
            pushBack(next2);
            pushBack(next3);
            pushBackUs(uCnt);
            return BACKSLASH;
        }
        int next1 = nextInputChar();
        int digit1 = digit(next1);
        if (digit1 < 0) {
            pushBack(next1);
            pushBack(next2);
            pushBack(next3);
            pushBackUs(uCnt);
            return BACKSLASH;
        }
        int next0 = nextInputChar();
        int digit0 = digit(next0);
        if (digit0 < 0) {
            pushBack(next0);
            pushBack(next1);
            pushBack(next2);
            pushBack(next3);
            pushBackUs(uCnt);
            return BACKSLASH;
        }
        int ch = digit3 << 12 | digit2 << 8 | digit1 << 4 | digit0;
        return clearBackSlashSeen(ch);
    }

    private void pushBackUs(int cnt) {
        for (int n = 0; n < cnt; n++) {
            pushBack('u');
        }
    }

    private static int digit(int ch) {
        if (ch >= '0' && ch <= '9') {
            return ch - '0';
        }
        if (ch >= 'A' && ch <= 'F') {
            return 10 + ch - 'A';
        }
        if (ch >= 'a' && ch <= 'f') {
            return 10 + ch - 'a';
        }
        return -1;
    }

    /**
     * Processes column/line information from the input file.
     *
     * @return The next character or {@code -1} if no more input is available.
     */
    private int nextInputChar() throws IOException {
        int result = nextBufferedChar();
        return _inputLine.process(result);
    }

    /**
     * Retrieves the next un-escaped character from the buffered {@link #_input}.
     *
     * @return The next character or {@code -1} if no more input is available.
     */
    private int nextBufferedChar() throws IOException {
        while (isBufferEmpty()) {
            int direct = fillBuffer();
            if (direct < 0) {
                return EOF;
            }
        }
        return _data[_pos++];
    }

    private boolean isBufferEmpty() {
        return _pos >= _len;
    }

    private int fillBuffer() throws IOException {
        _pos = 0;
        int direct = _input.read(_data, 0, _data.length);
        if (direct != 0) {
            _len = direct;
        }
        return direct;
    }

    private void pushBack(int ch) {
        if (ch < 0) {
            return;
        }
        if (isBufferEmpty()) {
            _pos = _data.length;
            _len = _data.length;
        } else if (_pos == 0) {
            if (_len == _data.length) {
                // Buffer is completely full, no push possible, enlarge buffer.
                char[] newData = new char[_data.length + 1024];
                _len = newData.length;
                _pos = newData.length - _data.length;
                System.arraycopy(_data, 0, newData, _pos, _data.length);
                _data = newData;
            } else {
                // Move contents to the right.
                int cnt = _len - _pos;
                _pos = _data.length - _len;
                _len = _data.length;
                System.arraycopy(_data, 0, _data, _pos, cnt);
            }
        }
        _data[--_pos] = (char) ch;
    }

    /**
     * The {@link PositionMapping} being built during processing the file.
     */
    public PositionMapping getPositionMapping() {
        return _mappingBuilder.getMapping();
    }

    /**
     * An algorithm mapping {@link Position} form two corresponding files.
     */
    public static final class PositionMapping {

        private final List _deltas = new ArrayList<>();

        /**
         * Creates a {@link UnicodeEscapeProcessingProvider.PositionMapping}.
         */
        public PositionMapping() {
            super();
        }

        /**
         * Whether this is the identity transformation.
         */
        public boolean isEmpty() {
            return _deltas.isEmpty();
        }

        void add(int line, int column, int lineDelta, int columnDelta) {
            _deltas.add(new DeltaInfo(line, column, lineDelta, columnDelta));
        }

        /**
         * Looks up the {@link PositionUpdate} for the given Position.
         */
        public PositionUpdate lookup(Position position) {
            int result = Collections.binarySearch(_deltas, position);
            if (result >= 0) {
                return _deltas.get(result);
            }
            int insertIndex = -result - 1;
            if (insertIndex == 0) {
                    // Before the first delta info, identity mapping.
                    return PositionUpdate.NONE;
                }
            return _deltas.get(insertIndex - 1);
        }

        /**
         * Algorithm updating a {@link Position} from one file to a
         * {@link Position} in a corresponding file.
         */
        public static interface PositionUpdate {

            /**
             * The identity position mapping.
             */
            PositionUpdate NONE = new PositionUpdate() {

                @Override
                public int transformLine(int line) {
                    return line;
                }

                @Override
                public int transformColumn(int column) {
                    return column;
                }

                @Override
                public Position transform(Position pos) {
                    return pos;
                }
            };

            /**
             * Maps the given line to an original line.
             */
            int transformLine(int line);

            /**
             * Maps the given column to an original column.
             */
            int transformColumn(int column);

            /**
             * The transformed position.
             */
            default Position transform(Position pos) {
                int line = pos.line;
                int column = pos.column;
                int transformedLine = transformLine(line);
                int transformedColumn = transformColumn(column);
                return new Position(transformedLine, transformedColumn);
            }
        }

        private static final class DeltaInfo extends Position implements PositionUpdate {

            /**
             * The offset to add to the {@link #line} and all following source
             * positions up to the next {@link PositionUpdate}.
             */
            private final int _lineDelta;

            /**
             * The offset to add to the {@link #column} and all following
             * source positions up to the next {@link PositionUpdate}.
             */
            private final int _columnDelta;

            /**
             * Creates a {@link PositionUpdate}.
             */
            public DeltaInfo(int line, int column, int lineDelta, int columnDelta) {
                super(line, column);
                _lineDelta = lineDelta;
                _columnDelta = columnDelta;
            }

            @Override
            public int transformLine(int sourceLine) {
                return sourceLine + _lineDelta;
            }

            @Override
            public int transformColumn(int sourceColumn) {
                return sourceColumn + _columnDelta;
            }

            @Override
            public String toString() {
                return "(" + line + ", " + column + ": " + _lineDelta + ", " + _columnDelta + ")";
            }
        }

        /**
         * Transforms the given {@link Position}.
         */
        public Position transform(Position pos) {
            return lookup(pos).transform(pos);
        }

        /**
         * Transforms the given {@link Range}.
         */
        public Range transform(Range range) {
            Position begin = transform(range.begin);
            Position end = transform(range.end);
            if (begin == range.begin && end == range.end) {
                // No change.
                return range;
            }
            return new Range(begin, end);
        }
    }

    private static final class PositionMappingBuilder {

        private LineCounter _left;

        private LineCounter _right;

        private final PositionMapping _mapping = new PositionMapping();

        private int _lineDelta = 0;

        private int _columnDelta = 0;

        /**
         * Creates a {@link PositionMappingBuilder}.
         *
         * @param left The source {@link LineCounter}.
         * @param right The target {@link LineCounter}.
         */
        public PositionMappingBuilder(LineCounter left, LineCounter right) {
            _left = left;
            _right = right;
            update();
        }

        /**
         * The built {@link PositionMapping}.
         */
        public PositionMapping getMapping() {
            return _mapping;
        }

        public void update() {
            int lineDelta = _right.getLine() - _left.getLine();
            int columnDelta = _right.getColumn() - _left.getColumn();
            if (lineDelta != _lineDelta || columnDelta != _columnDelta) {
                _mapping.add(_left.getLine(), _left.getColumn(), lineDelta, columnDelta);
                _lineDelta = lineDelta;
                _columnDelta = columnDelta;
            }
        }
    }

    /**
     * Processor keeping track of the current line and column in a stream of
     * incoming characters.
     *
     * @see #process(int)
     */
    public static final class LineCounter {

        /**
         * Whether {@link #CR} has been seen on the input as last character.
         */
        private boolean _crSeen;

        private int _line = 1;

        private int _column = 1;

        /**
         * Creates a {@link UnicodeEscapeProcessingProvider.LineCounter}.
         */
        public LineCounter() {
            super();
        }

        /**
         * The line of the currently processed input character.
         */
        public int getLine() {
            return _line;
        }

        /**
         * The column of the currently processed input character.
         */
        public int getColumn() {
            return _column;
        }

        /**
         * The current position.
         */
        public Position getPosition() {
            return new Position(getLine(), getColumn());
        }

        /**
         * Analyzes the given character for line feed.
         */
        public int process(int ch) {
            switch(ch) {
                case EOF:
                    {
                        break;
                    }
                case CR:
                    {
                        incLine();
                        _crSeen = true;
                        break;
                    }
                case LF:
                    {
                        // CR LF does only count as a single line terminator.
                        if (_crSeen) {
                            _crSeen = false;
                        } else {
                            incLine();
                        }
                        break;
                    }
                default:
                    {
                        _crSeen = false;
                        _column++;
                    }
            }
            return ch;
        }

        private void incLine() {
            _line++;
            _column = 1;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy