org.antlr.v4.parse.TokenVocabParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of antlr4 Show documentation
Show all versions of antlr4 Show documentation
The ANTLR 4 grammar compiler.
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.parse;
import org.antlr.runtime.Token;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.ast.GrammarAST;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** */
public class TokenVocabParser {
protected final Grammar g;
public TokenVocabParser(Grammar g) {
this.g = g;
}
/** Load a vocab file {@code .tokens} and return mapping. */
public Map load() {
Map tokens = new LinkedHashMap();
int maxTokenType = -1;
File fullFile = getImportedVocabFile();
FileInputStream fis = null;
BufferedReader br = null;
Tool tool = g.tool;
String vocabName = g.getOptionString("tokenVocab");
try {
Pattern tokenDefPattern = Pattern.compile("([^\n]+?)[ \\t]*?=[ \\t]*?([0-9]+)");
fis = new FileInputStream(fullFile);
InputStreamReader isr;
if (tool.grammarEncoding != null) {
isr = new InputStreamReader(fis, tool.grammarEncoding);
}
else {
isr = new InputStreamReader(fis);
}
br = new BufferedReader(isr);
String tokenDef = br.readLine();
int lineNum = 1;
while ( tokenDef!=null ) {
Matcher matcher = tokenDefPattern.matcher(tokenDef);
if ( matcher.find() ) {
String tokenID = matcher.group(1);
String tokenTypeS = matcher.group(2);
int tokenType;
try {
tokenType = Integer.valueOf(tokenTypeS);
}
catch (NumberFormatException nfe) {
tool.errMgr.toolError(ErrorType.TOKENS_FILE_SYNTAX_ERROR,
vocabName + CodeGenerator.VOCAB_FILE_EXTENSION,
" bad token type: "+tokenTypeS,
lineNum);
tokenType = Token.INVALID_TOKEN_TYPE;
}
tool.log("grammar", "import "+tokenID+"="+tokenType);
tokens.put(tokenID, tokenType);
maxTokenType = Math.max(maxTokenType,tokenType);
lineNum++;
}
else {
if ( tokenDef.length()>0 ) { // ignore blank lines
tool.errMgr.toolError(ErrorType.TOKENS_FILE_SYNTAX_ERROR,
vocabName + CodeGenerator.VOCAB_FILE_EXTENSION,
" bad token def: " + tokenDef,
lineNum);
}
}
tokenDef = br.readLine();
}
}
catch (FileNotFoundException fnfe) {
GrammarAST inTree = g.ast.getOptionAST("tokenVocab");
String inTreeValue = inTree.getToken().getText();
if ( vocabName.equals(inTreeValue) ) {
tool.errMgr.grammarError(ErrorType.CANNOT_FIND_TOKENS_FILE_REFD_IN_GRAMMAR,
g.fileName,
inTree.getToken(),
fullFile);
}
else { // must be from -D option on cmd-line not token in tree
tool.errMgr.toolError(ErrorType.CANNOT_FIND_TOKENS_FILE_GIVEN_ON_CMDLINE,
fullFile,
g.name);
}
}
catch (Exception e) {
tool.errMgr.toolError(ErrorType.ERROR_READING_TOKENS_FILE,
e,
fullFile,
e.getMessage());
}
finally {
try {
if ( br!=null ) br.close();
}
catch (IOException ioe) {
tool.errMgr.toolError(ErrorType.ERROR_READING_TOKENS_FILE,
ioe,
fullFile,
ioe.getMessage());
}
}
return tokens;
}
/** Return a File descriptor for vocab file. Look in library or
* in -o output path. antlr -o foo T.g4 U.g4 where U needs T.tokens
* won't work unless we look in foo too. If we do not find the
* file in the lib directory then must assume that the .tokens file
* is going to be generated as part of this build and we have defined
* .tokens files so that they ALWAYS are generated in the base output
* directory, which means the current directory for the command line tool if there
* was no output directory specified.
*/
public File getImportedVocabFile() {
String vocabName = g.getOptionString("tokenVocab");
File f = new File(g.tool.libDirectory,
File.separator +
vocabName +
CodeGenerator.VOCAB_FILE_EXTENSION);
if (f.exists()) {
return f;
}
// We did not find the vocab file in the lib directory, so we need
// to look for it in the output directory which is where .tokens
// files are generated (in the base, not relative to the input
// location.)
f = new File(g.tool.outputDirectory, vocabName + CodeGenerator.VOCAB_FILE_EXTENSION);
return f;
}
}