All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.fhir.ucum.Lexer Maven / Gradle / Ivy

/*******************************************************************************
 * Crown Copyright (c) 2006 - 2014, Copyright (c) 2006 - 2014 Kestral Computing P/L.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors:
 *    Kestral Computing P/L - initial implementation
 *******************************************************************************/

package org.fhir.ucum;


public class Lexer {

	private static final char NO_CHAR = Character.UNASSIGNED;
	private String source;
	private int index;
	
	private String token;
	private TokenType type;
	private int start;
	
	/**
	 * @param source
	 * @throws UcumException 
	 * @ 
	 */
	public Lexer(String source) throws UcumException  {
		super();
		this.source = source;
		if (source == null)
			source = "";
		index = 0;
		consume();
	}
	
	public void consume() throws UcumException  {
		token = null;
		type = TokenType.NONE;
		start = index;
		if (index < source.length()) {
			char ch = nextChar();
			if (!(checkSingle(ch, '/', TokenType.SOLIDUS) ||
					checkSingle(ch, '.', TokenType.PERIOD) || 
					checkSingle(ch, '(', TokenType.OPEN) || 
					checkSingle(ch, ')', TokenType.CLOSE) || 
					checkAnnotation(ch) ||
					checkNumber(ch) ||
					checkNumberOrSymbol(ch)))
				throw new UcumException("Error processing unit '"+source+"': unexpected character '"+ch+"' at position "+Integer.toString(start));			
		}		
	}

	private boolean checkNumber(char ch) throws UcumException  {
		if (ch == '+' || ch == '-') {
			token = String.valueOf(ch);
			ch = peekChar();
			while ((ch >= '0' && ch <= '9')) {
				token = token + ch;
				index++;
				ch = peekChar();
			}
			if (token.length() == 1) {
				throw new UcumException("Error processing unit'"+source+"': unexpected character '"+ch+"' at position "+Integer.toString(start)+": a + or - must be followed by at least one digit");			
				}
			type = TokenType.NUMBER;
			return true;
		} else
			return false;
	}

	private boolean checkNumberOrSymbol(char ch) throws UcumException  {
		boolean isSymbol = false;
		boolean inBrackets = false;
		if (isValidSymbolChar(ch, true, false)) {
			token = String.valueOf(ch);
			isSymbol = isSymbol || !((ch >= '0' && ch <= '9'));
			inBrackets = checkBrackets(ch, inBrackets);
			ch = peekChar();
			inBrackets = checkBrackets(ch, inBrackets);
			while (isValidSymbolChar(ch, !isSymbol || inBrackets, inBrackets)) {
				token = token + ch;
				isSymbol = isSymbol || ((ch != NO_CHAR) && !((ch >= '0' && ch <= '9')));
				index++;
				ch = peekChar();
				inBrackets = checkBrackets(ch, inBrackets);
			}
			if (isSymbol)
				type = TokenType.SYMBOL;
			else
				type = TokenType.NUMBER;
			return true;
		} else
			return false;
	}

	
	private boolean checkBrackets(char ch, boolean inBrackets) throws UcumException  {
		if (ch == '[')
			if (inBrackets)
				error("Nested [");
			else 
				return true;
		if (ch == ']')
			if (!inBrackets)
				error("] without [");
			else 
				return false;
		return inBrackets;
	}

	private boolean isValidSymbolChar(char ch, boolean allowDigits, boolean inBrackets) {
		return (allowDigits && ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
		     ch == '[' || ch == ']' || ch == '%' || ch == '*' || ch == '^' || ch == '\'' || 
		     ch == '"' || ch == '_' || (inBrackets && ch == '.');
	}

	private boolean checkAnnotation(char ch) throws UcumException  {
		if (ch == '{') {
			StringBuilder b = new StringBuilder();
			while (ch != '}') {
				ch = nextChar();
				if (!Utilities.isAsciiChar(ch))
					throw new UcumException("Error processing unit'"+source+"': Annotation contains non-ascii characters");
				if (ch == 0) 
					throw new UcumException("Error processing unit'"+source+"': unterminated annotation");
				b.append(ch);
			}
			// got to the end of the annotation - need to do it again
			token = b.toString();
			type = TokenType.ANNOTATION;
			return true;
		} else
			return false;
	}

	private boolean checkSingle(char ch, char test, TokenType type) {
		if (ch == test) {
			token = String.valueOf(ch);
			this.type = type;			
			return true;
		}
		return false;
	}

	private char nextChar() {
		char res = index < source.length() ? source.charAt(index) : NO_CHAR;
		index++;
		return res;
	}

	private char peekChar() {
		return index < source.length() ? source.charAt(index) : NO_CHAR;
	}

	/**
	 * @return the token
	 */
	public String getToken() {
		return token;
	}

	/**
	 * @return the type
	 */
	public TokenType getType() {
		return type;
	}

	public void error(String errMsg) throws UcumException  {
		throw new UcumException("Error processing unit '"+source+"': "+ errMsg +"' at position "+Integer.toString(start));			
		
	}

	public int getTokenAsInt() {
		return token.charAt(0) == '+' ? Integer.parseInt(token.substring(1)) : Integer.parseInt(token);
	}

	public boolean finished() {
		return index == source.length();
	}


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy