All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.antlr.v4.misc.EscapeSequenceParsing Maven / Gradle / Ivy

There is a newer version: 4.13.2
Show newest version
/*
 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

package org.antlr.v4.misc;

import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.unicode.UnicodeData;

import java.util.Objects;

/**
 * Utility class to parse escapes like:
 *   \\n
 *   \\uABCD
 *   \\u{10ABCD}
 *   \\p{Foo}
 *   \\P{Bar}
 *   \\p{Baz=Blech}
 *   \\P{Baz=Blech}
 */
public abstract class EscapeSequenceParsing {
	public static class Result {
		public enum Type {
			INVALID,
			CODE_POINT,
			PROPERTY
		};

		public final Type type;
		public final int codePoint;
		public final IntervalSet propertyIntervalSet;
		public final int startOffset;
		public final int parseLength;

		public Result(Type type, int codePoint, IntervalSet propertyIntervalSet, int startOffset, int parseLength) {
			this.type = type;
			this.codePoint = codePoint;
			this.propertyIntervalSet = propertyIntervalSet;
			this.startOffset = startOffset;
			this.parseLength = parseLength;
		}

		@Override
		public String toString() {
			return String.format(
					"%s type=%s codePoint=%d propertyIntervalSet=%s parseLength=%d",
					super.toString(),
					type,
					codePoint,
					propertyIntervalSet,
					parseLength);
		}

		@Override
		public boolean equals(Object other) {
			if (!(other instanceof Result)) {
				return false;
			}
			Result that = (Result) other;
			if (this == that) {
				return true;
			}
			return Objects.equals(this.type, that.type) &&
				Objects.equals(this.codePoint, that.codePoint) &&
				Objects.equals(this.propertyIntervalSet, that.propertyIntervalSet) &&
				Objects.equals(this.parseLength, that.parseLength);
		}

		@Override
		public int hashCode() {
			return Objects.hash(type, codePoint, propertyIntervalSet, parseLength);
		}
	}

	/**
	 * Parses a single escape sequence starting at {@code startOff}.
	 *
	 * Returns a type of INVALID if no valid escape sequence was found, a Result otherwise.
	 */
	public static Result parseEscape(String s, int startOff) {
		int offset = startOff;
		if (offset + 2 > s.length() || s.codePointAt(offset) != '\\') {
			return invalid(startOff, s.length()-1);
		}
		// Move past backslash
		offset++;
		int escaped = s.codePointAt(offset);
		// Move past escaped code point
		offset += Character.charCount(escaped);
		if (escaped == 'u') {
			// \\u{1} is the shortest we support
			if (offset + 3 > s.length()) {
				return invalid(startOff, s.length()-1);
			}
			int hexStartOffset;
			int hexEndOffset; // appears to be exclusive
			if (s.codePointAt(offset) == '{') {
				hexStartOffset = offset + 1;
				hexEndOffset = s.indexOf('}', hexStartOffset);
				if (hexEndOffset == -1) {
					return invalid(startOff, s.length()-1);
				}
				offset = hexEndOffset + 1;
			}
			else {
				if (offset + 4 > s.length()) {
					return invalid(startOff, s.length()-1);
				}
				hexStartOffset = offset;
				hexEndOffset = offset + 4;
				offset = hexEndOffset;
			}
			int codePointValue = CharSupport.parseHexValue(s, hexStartOffset, hexEndOffset);
			if (codePointValue == -1 || codePointValue > Character.MAX_CODE_POINT) {
				return invalid(startOff, startOff+6-1);
			}
			return new Result(
				Result.Type.CODE_POINT,
				codePointValue,
				IntervalSet.EMPTY_SET,
				startOff,
				offset - startOff);
		}
		else if (escaped == 'p' || escaped == 'P') {
			// \p{L} is the shortest we support
			if (offset + 3 > s.length()) {
				return invalid(startOff, s.length()-1);
			}
			if (s.codePointAt(offset) != '{') {
				return invalid(startOff, offset);
			}
			int openBraceOffset = offset;
			int closeBraceOffset = s.indexOf('}', openBraceOffset);
			if (closeBraceOffset == -1) {
				return invalid(startOff, s.length()-1);
			}
			String propertyName = s.substring(openBraceOffset + 1, closeBraceOffset);
			IntervalSet propertyIntervalSet = UnicodeData.getPropertyCodePoints(propertyName);
			if (propertyIntervalSet == null) {
				return invalid(startOff, closeBraceOffset);
			}
			offset = closeBraceOffset + 1;
			if (escaped == 'P') {
				propertyIntervalSet = propertyIntervalSet.complement(IntervalSet.COMPLETE_CHAR_SET);
			}
			return new Result(
				Result.Type.PROPERTY,
				-1,
				propertyIntervalSet,
				startOff,
				offset - startOff);
		}
		else if (escaped < CharSupport.ANTLRLiteralEscapedCharValue.length) {
			int codePoint = CharSupport.ANTLRLiteralEscapedCharValue[escaped];
			if (codePoint == 0) {
				if (escaped != ']' && escaped != '-') { // escape ']' and '-' only in char sets.
					return invalid(startOff, startOff+1);
				}
				else {
					codePoint = escaped;
				}
			}
			return new Result(
				Result.Type.CODE_POINT,
				codePoint,
				IntervalSet.EMPTY_SET,
				startOff,
				offset - startOff);
		}
		else {
			return invalid(startOff,s.length()-1);
		}
	}

	private static Result invalid(int start, int stop) { // start..stop is inclusive
		return new Result(
			Result.Type.INVALID,
			0,
			IntervalSet.EMPTY_SET,
			start,
			stop - start + 1);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy