All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.ejahns.token.Tokenizer Maven / Gradle / Ivy

The newest version!
package com.github.ejahns.token;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.github.ejahns.Constants.*;
import static com.github.ejahns.Parser.TokenType.*;

class Tokenizer {

	private static final Pattern LANGUAGE_PATTERN = Pattern.compile("^#\\s*language\\s*:\\s*([a-zA-Z\\-_]+)$");

	private KeywordProvider keywordProvider;
	private boolean readingDocString = false;
	private String currentDocStringSeparator = DOCSTRING_SEPARATOR;

	Tokenizer() {
		keywordProvider = LanguageProvider.getKeywordProvider();
	}

	Token tokenize(String line, int lineNum) throws TokenizerException {
		if (null == line) {
			return new Token(null, EOFToken, lineNum);
		}
		String trim = line.trim();
		if (readingDocString) {
			if (trim.startsWith(currentDocStringSeparator)) {
				readingDocString = false;
				return new Token(null, DocStringSeparatorToken, lineNum);
			}
			else {
				return new Token(trim, OtherToken, lineNum);
			}
		}
		if (trim.equals("")) {
			return null;
		}
		Matcher matcher = LANGUAGE_PATTERN.matcher(trim);
		if (matcher.matches()) {
			String language = matcher.group(1);
			keywordProvider = LanguageProvider.getKeywordProvider(language);
			return new Token(language, LanguageToken, lineNum);
		}
		if (trim.startsWith(COMMENT_PREFIX)) {
			return null;
		}
		if (trim.startsWith(DOCSTRING_SEPARATOR)) {
			currentDocStringSeparator = DOCSTRING_SEPARATOR;
			readingDocString = true;
			String type = trim.substring(DOCSTRING_SEPARATOR.length());
			type = type.equals("") ? null : type;
			return new Token(type, DocStringSeparatorToken, lineNum);
		}
		if (trim.startsWith(ALTERNATE_DOCSTRING_SEPARATOR)) {
			currentDocStringSeparator = ALTERNATE_DOCSTRING_SEPARATOR;
			readingDocString = true;
			String type = trim.substring(ALTERNATE_DOCSTRING_SEPARATOR.length());
			type = type.equals("") ? null : type;
			return new Token(type, DocStringSeparatorToken, lineNum);
		}
		if (trim.startsWith(TAG_PREFIX)) {
			return new Token(trim, TagLineToken, lineNum);
		}
		if (trim.startsWith(CELL_SEPARATOR)) {
			if (!trim.endsWith(CELL_SEPARATOR)) {
				throw new TokenizerException(
					String.format(
						"expected final '|' at end of line %s: %s",
						lineNum,
						trim)
				);
			}
			return new Token(trim, TableRowToken, lineNum);
		}
		KeywordProvider.TokenAssociation tokenAssociation = keywordProvider.matchTokenType(trim);
		if (null != tokenAssociation) {
			return new Token(tokenAssociation.keyword, tokenAssociation.line, tokenAssociation.tokenType, lineNum);
		}
		return new Token(trim, OtherToken, lineNum);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy