org.codehaus.groovy.antlr.UnicodeEscapingReader Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of groovy-all Show documentation
Groovy: A powerful, dynamic language for the JVM
There is a newer version: 3.0.22
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */
package org.codehaus.groovy.antlr;

import java.io.IOException;
import java.io.Reader;

import antlr.CharScanner;
import antlr.Token;
import antlr.TokenStreamException;

/**
 * Translates GLS-defined unicode escapes into characters. Throws an exception
 * in the event of an invalid unicode escape being detected.
 * 
 * No attempt has been made to optimize this class for speed or space.
 */
public class UnicodeEscapingReader extends Reader {

    private final Reader reader;
    private CharScanner lexer;
    private boolean hasNextChar = false;
    private int nextChar;
    private final SourceBuffer sourceBuffer;
    private int previousLine;
    private int numUnicodeEscapesFound = 0;
    private int numUnicodeEscapesFoundOnCurrentLine = 0;

    private static class DummyLexer extends CharScanner {
        private final Token t = new Token();
        public Token nextToken() throws TokenStreamException {
            return t;
        }
        @Override
        public int getColumn() {
            return 0;
        }
        @Override
        public int getLine() {
            return 0;
        }
    }
    
    /**
     * Constructor.
     * @param reader The reader that this reader will filter over.
     */
    public UnicodeEscapingReader(Reader reader,SourceBuffer sourceBuffer) {
        this.reader = reader;
        this.sourceBuffer = sourceBuffer;
        this.lexer = new DummyLexer();
    }

    /**
     * Sets the lexer that is using this reader. Must be called before the
     * lexer is used.
     */
    public void setLexer(CharScanner lexer) {
        this.lexer = lexer;
    }

    /**
     * Reads characters from the underlying reader.
     * @see java.io.Reader#read(char[],int,int)
     */
    public int read(char cbuf[], int off, int len) throws IOException {
        int c = 0;
        int count = 0;
        while (count < len && (c = read())!= -1) {
            cbuf[off + count] = (char) c;
            count++;
        }
        return (count == 0 && c == -1) ? -1 : count;
    }

    /**
     * Gets the next character from the underlying reader,
     * translating escapes as required.
     * @see java.io.Reader#close()
     */
    public int read() throws IOException {
        if (hasNextChar) {
            hasNextChar = false;
            write(nextChar);
            return nextChar;
        }

        if (previousLine != lexer.getLine()) {
            // new line, so reset unicode escapes
            numUnicodeEscapesFoundOnCurrentLine = 0;
            previousLine = lexer.getLine();
        }
        
        int c = reader.read();
        if (c != '\\') {
            write(c);
            return c;
        }

        // Have one backslash, continue if next char is 'u'
        c = reader.read();
        if (c != 'u') {
            hasNextChar = true;
            nextChar = c;
            write('\\');
            return '\\';
        }

        // Swallow multiple 'u's
        int numberOfUChars = 0;
        do {
            numberOfUChars++;
            c = reader.read();
        } while (c == 'u');

        // Get first hex digit
        checkHexDigit(c);
        StringBuilder charNum = new StringBuilder();
        charNum.append((char) c);

        // Must now be three more hex digits
        for (int i = 0; i < 3; i++) {
            c = reader.read();
            checkHexDigit(c);
            charNum.append((char) c);
        }
        int rv = Integer.parseInt(charNum.toString(), 16);
        write(rv);
        
        numUnicodeEscapesFound += 4 + numberOfUChars;
        numUnicodeEscapesFoundOnCurrentLine += 4 + numberOfUChars;

        return rv;
    }
    private void write(int c) {
        if (sourceBuffer != null) {sourceBuffer.write(c);}
    }
    /**
     * Checks that the given character is indeed a hex digit.
     */
    private void checkHexDigit(int c) throws IOException {
        if (c >= '0' && c <= '9') {
            return;
        }
        if (c >= 'a' && c <= 'f') {
            return;
        }
        if (c >= 'A' && c <= 'F') {
            return;
        }
        // Causes the invalid escape to be skipped
        hasNextChar = true;
        nextChar = c;
        throw new IOException("Did not find four digit hex character code."
                + " line: " + lexer.getLine() + " col:" + lexer.getColumn());
    }

    public int getUnescapedUnicodeColumnCount() {
        return numUnicodeEscapesFoundOnCurrentLine;
    }

    public int getUnescapedUnicodeOffsetCount() {
        return numUnicodeEscapesFound;
    }

    /**
     * Closes this reader by calling close on the underlying reader.
     *
     * @see java.io.Reader#close()
     */
    public void close() throws IOException {
        reader.close();
    }
}