All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.codehaus.groovy.antlr.UnicodeEscapingReader Maven / Gradle / Ivy

/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */
package org.codehaus.groovy.antlr;

import groovyjarjarantlr.CharScanner;
import groovyjarjarantlr.Token;
import groovyjarjarantlr.TokenStreamException;

import java.io.IOException;
import java.io.Reader;

/**
 * Translates GLS-defined unicode escapes into characters. Throws an exception
 * in the event of an invalid unicode escape being detected.
 * 

* No attempt has been made to optimize this class for speed or space. */ public class UnicodeEscapingReader extends Reader { private final Reader reader; private CharScanner lexer; private boolean hasNextChar = false; private int nextChar; private final SourceBuffer sourceBuffer; private int previousLine; private int numUnicodeEscapesFound = 0; private int numUnicodeEscapesFoundOnCurrentLine = 0; private static class DummyLexer extends CharScanner { private final Token t = new Token(); @Override public Token nextToken() throws TokenStreamException { return t; } @Override public int getColumn() { return 0; } @Override public int getLine() { return 0; } } /** * Constructor. * @param reader The reader that this reader will filter over. */ public UnicodeEscapingReader(Reader reader,SourceBuffer sourceBuffer) { this.reader = reader; this.sourceBuffer = sourceBuffer; // GRECLIPSE add if (sourceBuffer != null) { sourceBuffer.unescaper = this; } // GRECLIPSE end this.lexer = new DummyLexer(); } /** * Sets the lexer that is using this reader. Must be called before the * lexer is used. */ public void setLexer(CharScanner lexer) { this.lexer = lexer; } /** * Reads characters from the underlying reader. * @see java.io.Reader#read(char[],int,int) */ @Override public int read(char cbuf[], int off, int len) throws IOException { int c = 0; int count = 0; while (count < len && (c = read())!= -1) { cbuf[off + count] = (char) c; count++; } return (count == 0 && c == -1) ? -1 : count; } /** * Gets the next character from the underlying reader, * translating escapes as required. * @see java.io.Reader#close() */ @Override public int read() throws IOException { if (hasNextChar) { hasNextChar = false; // GRECLIPSE add checkCodePoint(nextChar); // GRECLIPSE add write(nextChar); return nextChar; } if (previousLine != lexer.getLine()) { // new line, so reset unicode escapes numUnicodeEscapesFoundOnCurrentLine = 0; previousLine = lexer.getLine(); } int c = reader.read(); if (c != '\\') { // GRECLIPSE add checkCodePoint(c); // GRECLIPSE add write(c); return c; } // Have one backslash, continue if next char is 'u' c = reader.read(); if (c != 'u') { hasNextChar = true; nextChar = c; write('\\'); return '\\'; } // Swallow multiple 'u's int numberOfUChars = 0; do { numberOfUChars++; c = reader.read(); } while (c == 'u'); // Get first hex digit checkHexDigit(c); StringBuilder charNum = new StringBuilder(); charNum.append((char) c); // Must now be three more hex digits for (int i = 0; i < 3; i++) { c = reader.read(); checkHexDigit(c); charNum.append((char) c); } int rv = Integer.parseInt(charNum.toString(), 16); write(rv); numUnicodeEscapesFound += 4 + numberOfUChars; numUnicodeEscapesFoundOnCurrentLine += 4 + numberOfUChars; return rv; } private void write(int c) { if (sourceBuffer != null) {sourceBuffer.write(c);} } // GRECLIPSE add private void checkCodePoint(int c) { if (Character.isIdentifierIgnorable(c) || Character.getType(c) == Character.CONTROL && c != '\t' && c != '\r' && c != '\n') { lexer.reportError(String.format("Unexpected character 0x%02X (%s) at column %d", c, Character.getName(c), lexer.getColumn())); } } // GRECLIPSE end /** * Checks that the given character is indeed a hex digit. */ private void checkHexDigit(int c) throws IOException { if (c >= '0' && c <= '9') { return; } if (c >= 'a' && c <= 'f') { return; } if (c >= 'A' && c <= 'F') { return; } // Causes the invalid escape to be skipped hasNextChar = true; nextChar = c; throw new IOException("Did not find four digit hex character code." + " line: " + lexer.getLine() + " col:" + lexer.getColumn()); } public int getUnescapedUnicodeColumnCount() { return numUnicodeEscapesFoundOnCurrentLine; } public int getUnescapedUnicodeOffsetCount() { return numUnicodeEscapesFound; } /** * Closes this reader by calling close on the underlying reader. * * @see java.io.Reader#close() */ @Override public void close() throws IOException { reader.close(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy