All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.javaparser.UnicodeEscapeProcessingProvider Maven / Gradle / Ivy

/*
 * Copyright (C) 2007-2010 Júlio Vilmar Gesser.
 * Copyright (C) 2011, 2013-2021 The JavaParser Team.
 *
 * This file is part of JavaParser.
 *
 * JavaParser can be used either under the terms of
 * a) the GNU Lesser General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 * b) the terms of the Apache License
 *
 * You should have received a copy of both licenses in LICENCE.LGPL and
 * LICENCE.APACHE. Please refer to those files for details.
 *
 * JavaParser is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 */
package com.github.javaparser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * {@link Provider} un-escaping unicode escape sequences in the input sequence.
 */
public class UnicodeEscapeProcessingProvider implements Provider {
	
	private static final char LF = '\n';

	private static final char CR = '\r';

	private static final char BACKSLASH = '\\';

	private static final int EOF = -1;
	
	private char[] _data;
	
	/**
	 * The number of characters in {@link #_data}.
	 */
	private int _len = 0;
	
	/**
	 * The position in {@link #_data} where to read the next source character from.
	 */
	private int _pos = 0;

	private boolean _backslashSeen;
	
	private final LineCounter _inputLine = new LineCounter();

	private final LineCounter _outputLine = new LineCounter();
	
	private final PositionMappingBuilder _mappingBuilder = new PositionMappingBuilder(_outputLine, _inputLine);
	
	private Provider _input;

	/** 
	 * Creates a {@link UnicodeEscapeProcessingProvider}.
	 */
	public UnicodeEscapeProcessingProvider(Provider input) {
		this(2048, input);
	}

	/** 
	 * Creates a {@link UnicodeEscapeProcessingProvider}.
	 */
	public UnicodeEscapeProcessingProvider(int bufferSize, Provider input) {
		_input = input;
		_data = new char[bufferSize];
	}
	
	/**
	 * The {@link LineCounter} of the input file.
	 */
	public LineCounter getInputCounter() {
		return _inputLine;
	}
	
	/**
	 * The {@link LineCounter} of the output file.
	 */
	public LineCounter getOutputCounter() {
		return _outputLine;
	}

	@Override
	public int read(char[] buffer, final int offset, int len) throws IOException {
		int pos = offset;
		int stop = offset + len;
		while (pos < stop) {
			int ch = _outputLine.process(nextOutputChar());
			if (ch < 0) {
				if (pos == offset) {
					// Nothing read yet, this is the end of the stream.
					return EOF;
				} else {
					break;
				}
			} else {
				_mappingBuilder.update();
				buffer[pos++] = (char) ch;
			}
		}
		return pos - offset;
	}

	@Override
	public void close() throws IOException {
		_input.close();
	}

	/** 
	 * Produces the next un-escaped character to be written to the output.
	 * 
	 * @return The next character or {@code -1} if no more characters are available.
	 */
	private int nextOutputChar() throws IOException {
		int next = nextInputChar();
		switch (next) {
			case EOF:
				return EOF;
			case BACKSLASH: {
				if (_backslashSeen) {
					return clearBackSlashSeen(next);
				} else {
					return backSlashSeen();
				}
			}
			default: {
				// An arbitrary character.
				return clearBackSlashSeen(next);
			}
		}
	}

	private int clearBackSlashSeen(int next) {
		_backslashSeen = false;
		return next;
	}

	private int backSlashSeen() throws IOException {
		_backslashSeen = true;
		
		int next = nextInputChar();
		switch (next) {
			case EOF:
				// End of file after backslash produces the backslash itself.
				return BACKSLASH;
			case 'u': {
				return unicodeStartSeen();
			}
			default: {
				pushBack(next);
				return BACKSLASH;
			}
		}
	}

	private int unicodeStartSeen() throws IOException {
		int uCnt = 1;
		while (true) {
			int next = nextInputChar();
			switch (next) {
				case EOF: {
					pushBackUs(uCnt);
					return BACKSLASH;
				}
				case 'u': {
					uCnt++;
					continue;
				}
				default: {
					return readDigits(uCnt, next);
				}
			}
		}
	}

	private int readDigits(int uCnt, int next3) throws IOException {
		int digit3 = digit(next3);
		if (digit3 < 0) {
			pushBack(next3);
			pushBackUs(uCnt);
			return BACKSLASH;
		}
		
		int next2 = nextInputChar();
		int digit2 = digit(next2);
		if (digit2 < 0) {
			pushBack(next2);
			pushBack(next3);
			pushBackUs(uCnt);
			return BACKSLASH;
		}
		
		int next1 = nextInputChar();
		int digit1 = digit(next1);
		if (digit1 < 0) {
			pushBack(next1);
			pushBack(next2);
			pushBack(next3);
			pushBackUs(uCnt);
			return BACKSLASH;
		}
		
		int next0 = nextInputChar();
		int digit0 = digit(next0);
		if (digit0 < 0) {
			pushBack(next0);
			pushBack(next1);
			pushBack(next2);
			pushBack(next3);
			pushBackUs(uCnt);
			return BACKSLASH;
		}

		int ch = digit3 << 12 | digit2 << 8 | digit1 << 4 | digit0;
		return clearBackSlashSeen(ch);
	}

	private void pushBackUs(int cnt) {
		for (int n = 0; n < cnt; n++) {
			pushBack('u');
		}
	}

	private static int digit(int ch) {
		if (ch >= '0' && ch <= '9') {
			return ch - '0';
		}
		if (ch >= 'A' && ch <= 'F') {
			return 10 + ch - 'A';
		}
		if (ch >= 'a' && ch <= 'f') {
			return 10 + ch - 'a';
		}
		return -1;
	}

	/** 
	 * Processes column/line information from the input file.
	 * 
	 * @return The next character or {@code -1} if no more input is available.
	 */
	private int nextInputChar() throws IOException {
		int result = nextBufferedChar();
		return _inputLine.process(result);
	}

	/** 
	 * Retrieves the next un-escaped character from the buffered {@link #_input}.
	 * 
	 * @return The next character or {@code -1} if no more input is available.
	 */
	private int nextBufferedChar() throws IOException {
		while (isBufferEmpty()) {
			int direct = fillBuffer();
			if (direct < 0) {
				return EOF;
			}
		}
		return _data[_pos++];
	}

	private boolean isBufferEmpty() {
		return _pos >= _len;
	}

	private int fillBuffer() throws IOException {
		_pos = 0;
		int direct = _input.read(_data, 0, _data.length);
		if (direct != 0) {
			_len = direct;
		}
		return direct;
	}

	private void pushBack(int ch) {
		if (ch < 0) {
			return;
		}
		
		if (isBufferEmpty()) {
			_pos = _data.length;
			_len = _data.length;
		} else if (_pos == 0) {
			if (_len == _data.length) {
				// Buffer is completely full, no push possible, enlarge buffer.
				char[] newData = new char[_data.length + 1024];
				_len = newData.length;
				_pos = newData.length - _data.length;
				System.arraycopy(_data, 0, newData, _pos, _data.length);
				_data = newData;
			} else {
				// Move contents to the right.
				int cnt = _len - _pos;
				_pos = _data.length - _len;
				_len = _data.length;
				System.arraycopy(_data, 0, _data, _pos, cnt);
			}
		}
		_data[--_pos] = (char) ch;
	}
	
	/**
	 * The {@link PositionMapping} being built during processing the file.
	 */
	public PositionMapping getPositionMapping() {
		return _mappingBuilder.getMapping();
	}
	
	/**
	 * An algorithm mapping {@link Position} form two corresponding files.
	 */
	public static final class PositionMapping {
		
		private final List _deltas = new ArrayList<>();
		
		/** 
		 * Creates a {@link UnicodeEscapeProcessingProvider.PositionMapping}.
		 */
		public PositionMapping() {
			super();
		}
		
		/**
		 * Whether this is the identity transformation.
		 */
		public boolean isEmpty() {
			return _deltas.isEmpty();
		}

		void add(int line, int column, int lineDelta, int columnDelta) {
			_deltas.add(new DeltaInfo(line, column, lineDelta, columnDelta));
		}
		
		/**
		 * Looks up the {@link PositionUpdate} for the given Position.
		 */
		public PositionUpdate lookup(Position position) {
			int result = Collections.binarySearch(_deltas, position);
			if (result >= 0) {
				return _deltas.get(result);
			} else {
				int insertIndex = -result - 1;
				if (insertIndex == 0) {
					// Before the first delta info, identity mapping.
					return PositionUpdate.NONE;
				} else {
					// The relevant update is the one with the position smaller
					// than the requested position.
					return _deltas.get(insertIndex - 1);
				}
			}
		}
		
		/**
		 * Algorithm updating a {@link Position} from one file to a
		 * {@link Position} in a corresponding file.
		 */
		public static interface PositionUpdate {
			
			/**
			 * The identity position mapping.
			 */
			PositionUpdate NONE = new PositionUpdate() {
				@Override
				public int transformLine(int line) {
					return line;
				}
				
				@Override
				public int transformColumn(int column) {
					return column;
				}
				
				@Override
				public Position transform(Position pos) {
					return pos;
				}
			};

			/** 
			 * Maps the given line to an original line.
			 */
			int transformLine(int line);

			/** 
			 * Maps the given column to an original column.
			 */
			int transformColumn(int column);

			/**
			 * The transformed position.
			 */
			default Position transform(Position pos) {
				int line = pos.line;
				int column = pos.column;
				int transformedLine = transformLine(line);
				int transformedColumn = transformColumn(column);
				return new Position(transformedLine, transformedColumn);
			}
			
		}
		
		private static final class DeltaInfo extends Position implements PositionUpdate {

			/**
			 * The offset to add to the {@link #line} and all following source
			 * positions up to the next {@link PositionUpdate}.
			 */
			private final int _lineDelta;
			
			/**
			 * The offset to add to the {@link #column} and all following
			 * source positions up to the next {@link PositionUpdate}.
			 */
			private final int _columnDelta;

			/** 
			 * Creates a {@link PositionUpdate}.
			 */
			public DeltaInfo(int line, int column, int lineDelta,
					int columnDelta) {
				super(line, column);
				_lineDelta = lineDelta;
				_columnDelta = columnDelta;
			}
			
			@Override
			public int transformLine(int sourceLine) {
				return sourceLine + _lineDelta;
			}
			
			@Override
			public int transformColumn(int sourceColumn) {
				return sourceColumn + _columnDelta;
			}
			
			@Override
			public String toString() {
				return "(" + line + ", " + column + ": " + _lineDelta + ", " + _columnDelta + ")";
			}

		}

		/** 
		 * Transforms the given {@link Position}.
		 */
		public Position transform(Position pos) {
			return lookup(pos).transform(pos);
		}

		/** 
		 * Transforms the given {@link Range}.
		 */
		public Range transform(Range range) {
			Position begin = transform(range.begin);
			Position end = transform(range.end);
			if (begin == range.begin && end == range.end) {
				// No change.
				return range;
			}
			return new Range(begin, end);
		}
	}
	
	private static final class PositionMappingBuilder {
		
		private LineCounter _left;
		
		private LineCounter _right;
		
		private final PositionMapping _mapping = new PositionMapping();
		
		private int _lineDelta = 0;
		private int _columnDelta = 0;
		
		/** 
		 * Creates a {@link PositionMappingBuilder}.
		 *
		 * @param left The source {@link LineCounter}.
		 * @param right The target {@link LineCounter}.
		 */
		public PositionMappingBuilder(LineCounter left, LineCounter right) {
			_left = left;
			_right = right;
			update();
		}
		
		/**
		 * The built {@link PositionMapping}.
		 */
		public PositionMapping getMapping() {
			return _mapping;
		}
		
		public void update() {
			int lineDelta = _right.getLine() - _left.getLine();
			int columnDelta = _right.getColumn() - _left.getColumn();
			
			if (lineDelta != _lineDelta || columnDelta != _columnDelta) {
				_mapping.add(_left.getLine(), _left.getColumn(), lineDelta, columnDelta);
				
				_lineDelta = lineDelta;
				_columnDelta = columnDelta;
			}
		}
		
	}
	
	/**
	 * Processor keeping track of the current line and column in a stream of
	 * incoming characters.
	 * 
	 * @see #process(int)
	 */
	public static final class LineCounter {
		
		/**
		 * Whether {@link #CR} has been seen on the input as last character.
		 */
		private boolean _crSeen;

		private int _line = 1;

		private int _column = 1;

		/** 
		 * Creates a {@link UnicodeEscapeProcessingProvider.LineCounter}.
		 */
		public LineCounter() {
			super();
		}
		
		/**
		 * The line of the currently processed input character.
		 */
		public int getLine() {
			return _line;
		}
		
		/**
		 * The column of the currently processed input character.
		 */
		public int getColumn() {
			return _column;
		}
		
		/** 
		 * The current position.
		 */
		public Position getPosition() {
			return new Position(getLine(), getColumn());
		}

		/** 
		 * Analyzes the given character for line feed.
		 */
		public int process(int ch) {
			switch (ch) {
				case EOF: {
					break;
				}
				case CR: {
					incLine();
					_crSeen = true;
					break;
				}
				case LF: {
					// CR LF does only count as a single line terminator.
					if (_crSeen) {
						_crSeen = false;
					} else {
						incLine();
					}
					break;
				}
				default: {
					_crSeen = false;
					_column++;
				}
			}
			return ch;
		}

		private void incLine() {
			_line++;
			_column = 1;
		}

	}
	
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy