All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openjdk.tools.javac.parser.UnicodeReader Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package org.openjdk.tools.javac.parser;

import java.nio.CharBuffer;
import java.util.Arrays;

import org.openjdk.tools.javac.file.JavacFileManager;
import org.openjdk.tools.javac.util.ArrayUtils;
import org.openjdk.tools.javac.util.Log;
import org.openjdk.tools.javac.util.Name;
import org.openjdk.tools.javac.util.Names;

import static org.openjdk.tools.javac.util.LayoutCharacters.*;

/** The char reader used by the javac lexer/tokenizer. Returns the sequence of
 * characters contained in the input stream, handling unicode escape accordingly.
 * Additionally, it provides features for saving chars into a buffer and to retrieve
 * them at a later stage.
 *
 *  

This is NOT part of any supported API. * If you write code that depends on this, you do so at your own risk. * This code and its internal interfaces are subject to change or * deletion without notice. */ public class UnicodeReader { /** The input buffer, index of next character to be read, * index of one past last character in buffer. */ protected char[] buf; protected int bp; protected final int buflen; /** The current character. */ protected char ch; /** The buffer index of the last converted unicode character */ protected int unicodeConversionBp = -1; protected Log log; protected Names names; /** A character buffer for saved chars. */ protected char[] sbuf = new char[128]; protected int sp; /** * Create a scanner from the input array. This method might * modify the array. To avoid copying the input array, ensure * that {@code inputLength < input.length} or * {@code input[input.length -1]} is a white space character. * * @param sf the factory which created this Scanner * @param buffer the input, might be modified * Must be positive and less than or equal to input.length. */ protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) { this(sf, JavacFileManager.toArray(buffer), buffer.limit()); } protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) { log = sf.log; names = sf.names; if (inputLength == input.length) { if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) { inputLength--; } else { input = Arrays.copyOf(input, inputLength + 1); } } buf = input; buflen = inputLength; buf[buflen] = EOI; bp = -1; scanChar(); } /** Read next character. */ protected void scanChar() { if (bp < buflen) { ch = buf[++bp]; if (ch == '\\') { convertUnicode(); } } } /** Read next character in comment, skipping over double '\' characters. */ protected void scanCommentChar() { scanChar(); if (ch == '\\') { if (peekChar() == '\\' && !isUnicode()) { skipChar(); } else { convertUnicode(); } } } /** Append a character to sbuf. */ protected void putChar(char ch, boolean scan) { sbuf = ArrayUtils.ensureCapacity(sbuf, sp); sbuf[sp++] = ch; if (scan) scanChar(); } protected void putChar(char ch) { putChar(ch, false); } protected void putChar(boolean scan) { putChar(ch, scan); } Name name() { return names.fromChars(sbuf, 0, sp); } String chars() { return new String(sbuf, 0, sp); } /** Convert unicode escape; bp points to initial '\' character * (Spec 3.3). */ protected void convertUnicode() { if (ch == '\\' && unicodeConversionBp != bp) { bp++; ch = buf[bp]; if (ch == 'u') { do { bp++; ch = buf[bp]; } while (ch == 'u'); int limit = bp + 3; if (limit < buflen) { int d = digit(bp, 16); int code = d; while (bp < limit && d >= 0) { bp++; ch = buf[bp]; d = digit(bp, 16); code = (code << 4) + d; } if (d >= 0) { ch = (char)code; unicodeConversionBp = bp; return; } } log.error(bp, "illegal.unicode.esc"); } else { bp--; ch = '\\'; } } } /** Are surrogates supported? */ final static boolean surrogatesSupported = surrogatesSupported(); private static boolean surrogatesSupported() { try { Character.isHighSurrogate('a'); return true; } catch (NoSuchMethodError ex) { return false; } } /** Scan surrogate pairs. If 'ch' is a high surrogate and * the next character is a low surrogate, returns the code point * constructed from these surrogates. Otherwise, returns -1. * This method will not consume any of the characters. */ protected int peekSurrogates() { if (surrogatesSupported && Character.isHighSurrogate(ch)) { char high = ch; int prevBP = bp; scanChar(); char low = ch; ch = high; bp = prevBP; if (Character.isLowSurrogate(low)) { return Character.toCodePoint(high, low); } } return -1; } /** Convert an ASCII digit from its base (8, 10, or 16) * to its value. */ protected int digit(int pos, int base) { char c = ch; if ('0' <= c && c <= '9') return Character.digit(c, base); //a fast common case int codePoint = peekSurrogates(); int result = codePoint >= 0 ? Character.digit(codePoint, base) : Character.digit(c, base); if (result >= 0 && c > 0x7f) { log.error(pos + 1, "illegal.nonascii.digit"); if (codePoint >= 0) scanChar(); ch = "0123456789abcdef".charAt(result); } return result; } protected boolean isUnicode() { return unicodeConversionBp == bp; } protected void skipChar() { bp++; } protected char peekChar() { return buf[bp + 1]; } /** * Returns a copy of the input buffer, up to its inputLength. * Unicode escape sequences are not translated. */ public char[] getRawCharacters() { char[] chars = new char[buflen]; System.arraycopy(buf, 0, chars, 0, buflen); return chars; } /** * Returns a copy of a character array subset of the input buffer. * The returned array begins at the {@code beginIndex} and * extends to the character at index {@code endIndex - 1}. * Thus the length of the substring is {@code endIndex-beginIndex}. * This behavior is like * {@code String.substring(beginIndex, endIndex)}. * Unicode escape sequences are not translated. * * @param beginIndex the beginning index, inclusive. * @param endIndex the ending index, exclusive. * @throws ArrayIndexOutOfBoundsException if either offset is outside of the * array bounds */ public char[] getRawCharacters(int beginIndex, int endIndex) { int length = endIndex - beginIndex; char[] chars = new char[length]; System.arraycopy(buf, beginIndex, chars, 0, length); return chars; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy