All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sandius.rembulan.parser.ast.StringLiteral Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2016 Miroslav Janíček
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package net.sandius.rembulan.parser.ast;

import net.sandius.rembulan.ByteString;
import net.sandius.rembulan.ByteStringBuilder;
import net.sandius.rembulan.LuaFormat;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.Objects;

public class StringLiteral extends Literal {

	private final ByteString value;

	private StringLiteral(ByteString value) {
		this.value = value;
	}

	private static int decValueOf(int c) {
		return c >= '0' && c <= '9' ? c - (int) '0' : -1;
	}

	private static int hexValueOf(int c) {
		return c >= '0' && c <= '9'
				? c - (int) '0'
				: (c >= 'a' && c <= 'f'
						? 10 + c - (int) 'a'
						: (c >= 'A' && c <= 'F'
								? 10 + c - (int) 'A'
								: -1));
	}

	private static boolean isWhitespace(int c) {
		return c <= 0xff && Character.isWhitespace(c);
	}

	private static void appendBytes(ByteStringBuilder builder, CharsetEncoder encoder, int codePoint) {
		final ByteBuffer bytes;
		try {
			bytes = encoder.encode(CharBuffer.wrap(Character.toChars(codePoint)));
		}
		catch (CharacterCodingException ex) {
			// should not happen
			throw new IllegalStateException(ex);
		}

		while (bytes.hasRemaining()) {
			builder.append(bytes.get());
		}
	}

	private static ByteString stringValueOf(InputStream in) throws IOException {
		Objects.requireNonNull(in);
		BufferedInputStream stream = new BufferedInputStream(in);

		// encoder for UTF-8 byte sequences
		CharsetEncoder utf8Encoder = StandardCharsets.UTF_8.newEncoder()
				.onMalformedInput(CodingErrorAction.REPLACE)
				.onUnmappableCharacter(CodingErrorAction.REPLACE);

		ByteStringBuilder bld = new ByteStringBuilder();

		final int qmark;

		// skip the surrounding quotation marks
		if ((qmark = stream.read()) == -1) throw new IllegalArgumentException("string is empty");

		// the current byte position from the beginning of the string, incl. the quotation mark
		int pos = 1;

		int c;
		while ((c = stream.read()) != -1) {
			pos += 1;

			if (c == '\\') {
				// escaped character

				// index of the '\\' character
				final int oldPos = pos - 1;

				c = stream.read();

				if (decValueOf(c) != -1) {
					// decimal char specification: at most three such characters

					int dec = decValueOf(c);

					int rem = 2;
					do {
						stream.mark(rem);
						c = stream.read();
						pos += 1;

						int digit = decValueOf(c);
						if (digit != -1) {
							dec = dec * 10 + digit;
							rem -= 1;
						}
						else {
							// that was it
							stream.reset();
							pos -= 1;  // update position
							break;
						}
					} while (rem > 0);

					if (dec > 255) {
						throw new IllegalArgumentException("decimal escape too large at index " + oldPos);
					}

					// TODO: update position

					bld.append((byte) dec);
				}
				else if (c == 'x') {
					// hexadecimal escape

					int hex = 0;
					for (int j = 0; j < 2; j++) {
						c = stream.read();
						pos += 1;

						int digit = hexValueOf(c);

						if (digit == -1) {
							throw new IllegalArgumentException("hexadecimal digit expected at index " + pos);
						}
						else {
							hex = (hex << 4) + digit;
						}
					}

					bld.append((byte) hex);
				}
				else if (c == 'u') {
					if (stream.read() != '{') {
						throw new IllegalArgumentException("missing '{' at index " + (oldPos + 2));
					}

					pos += 1;  // the opening '{' has been read

					int value;

					{
						c = stream.read();
						pos += 1;

						int digit = hexValueOf(c);
						if (digit == -1) {
							throw new IllegalArgumentException("hexadecimal digit expected at index " + (pos - 1));
						}
						value = digit;
					}

					do {
						c = stream.read();
						pos += 1;

						int digit = hexValueOf(c);
						if (c != '}') {
							if (digit == -1) {
								throw new IllegalArgumentException("hexadecimal digit expected at index " + (pos - 1));
							}
							else {
								value = (value << 4) + digit;

								if (value >= Character.MAX_CODE_POINT) {
			                        throw new IllegalArgumentException("UTF-8 value too large at index " + (pos - 1));
								}
							}
						}
					} while (c != '}');

					appendBytes(bld, utf8Encoder, value);
				}
				else if (c == 'z') {
					// skip subsequent whitespace
					while (isWhitespace(stream.read())) {
						pos += 1;
					}
				}
				else if (c == '\n' || c == '\r') {
					if (c == '\r') {
						// this must be a \r\n
						if (stream.read() != '\n') {
							throw new IllegalArgumentException("\\n expected at index " + oldPos);
						}
						else {
							pos += 1;
						}
					}

					bld.append((byte) '\n');
				}
				else {
					final char d;
					switch (c) {
						case 'a': d = LuaFormat.CHAR_BELL; break;
						case 'b': d = '\b'; break;
						case 'f': d = '\f'; break;
						case 'n': d = '\n'; break;
						case 'r': d = '\r'; break;
						case 't': d = '\t'; break;
						case 'v': d = LuaFormat.CHAR_VERTICAL_TAB; break;
						case '\\': d = '\\'; break;
						case '\'': d = '\''; break;
						case '"': d = '"'; break;
						default:
							throw new IllegalArgumentException("invalid escape sequence at index " + oldPos + " (\\" + c + ")");
					}
					bld.append((byte) d);
				}
			}
			else {
				if (c != qmark) {
					bld.append((byte) c);
				}
			}
		}

		return bld.toByteString();
	}

	private static ByteString stringValueOf(ByteString s) {
		try {
			return stringValueOf(s.asInputStream());
		}
		catch (IOException ex) {
			// should not happen!
			throw new RuntimeException(ex);
		}
	}

	public static StringLiteral fromString(ByteString s) {
		return new StringLiteral(stringValueOf(s));
	}

	@Deprecated
	public static StringLiteral fromString(String s) {
		return fromString(ByteString.of(s));
	}

	// TODO: use a ByteString parameter
	public static StringLiteral verbatim(String s) {
		return new StringLiteral(ByteString.of(s));
	}

	public static StringLiteral fromName(Name n) {
		return new StringLiteral(ByteString.of(n.value()));
	}

	public ByteString value() {
		return value;
	}

	@Override
	public Literal accept(Transformer tf) {
		return tf.transform(this);
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy