All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jparsec.Terminals Maven / Gradle / Ivy

The newest version!
/*****************************************************************************
 * Copyright (C) jparsec.org                                                *
 * ------------------------------------------------------------------------- *
 * Licensed under the Apache License, Version 2.0 (the "License");           *
 * you may not use this file except in compliance with the License.          *
 * You may obtain a copy of the License at                                   *
 *                                                                           *
 * http://www.apache.org/licenses/LICENSE-2.0                                *
 *                                                                           *
 * Unless required by applicable law or agreed to in writing, software       *
 * distributed under the License is distributed on an "AS IS" BASIS,         *
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  *
 * See the License for the specific language governing permissions and       *
 * limitations under the License.                                            *
 *****************************************************************************/
package org.jparsec;

import static java.util.Arrays.asList;
import static org.jparsec.internal.util.Checks.checkArgument;

import java.util.ArrayList;
import java.util.Collection;
import java.util.function.Function;

import org.jparsec.Tokens.Fragment;
import org.jparsec.Tokens.ScientificNotation;
import org.jparsec.Tokens.Tag;
import org.jparsec.internal.annotations.Private;
import org.jparsec.internal.util.Objects;
import org.jparsec.internal.util.Strings;
import org.jparsec.internal.util.Checks;

/**
 * Provides convenient API to build lexer and parsers for terminals.
 * The following example is a parser snippet for Java generic type expression such as
 * {@code List}: 
   {@code
 *   Terminals terms = Terminals
 *       .operators("?", "<", ">", ",")
 *       .words(Scanners.IDENTIFIER)
 *       .keywords("super", "extends")
 *       .build();
 *   Parser typeName = Terminals.identifier();
 *   Parser wildcardWithUpperBound = terms.phrase("?", "extends");
 *   ...
 *   parser.from(terms.tokenizer(), Scanners.WHITESPACES.optional()).parse("List");
 * }
* * @author Ben Yu */ public final class Terminals extends Lexicon { /** * {@link Parser} that recognizes reserved word tokens. * i.e. {@link Tokens.Fragment} tokens tagged as {@link Tag#RESERVED}. * {@link Fragment#text()} is returned as parser result. */ public static final Parser RESERVED = fragment(Tag.RESERVED); /** Entry point for parser and tokenizers of character literal. */ public static final class CharLiteral { private CharLiteral() {} /** {@link Parser} that recognizes {@link Character} tokens. */ public static final Parser PARSER = Parsers.tokenType(Character.class, "character literal"); /** * A tokenizer that parses single quoted character literal (escaped by {@code '\'}), * and then converts the character to a {@link Character} token. */ public static final Parser SINGLE_QUOTE_TOKENIZER = Scanners.SINGLE_QUOTE_CHAR.map(TokenizerMaps.SINGLE_QUOTE_CHAR); } /** Entry point for parser and tokenizers of string literal. */ public static final class StringLiteral { private StringLiteral() {} /** {@link Parser} that recognizes {@link String} tokens. */ public static final Parser PARSER = Parsers.tokenType(String.class, "string literal"); /** * A tokenizer that parses double quoted string literal (escaped by {@code '\'}), * and transforms the quoted content by applying escape characters. */ public static final Parser DOUBLE_QUOTE_TOKENIZER = Scanners.DOUBLE_QUOTE_STRING.map(TokenizerMaps.DOUBLE_QUOTE_STRING); /** * A tokenizer that parses single quoted string literal (single quote is escaped with * another single quote), and transforms the quoted content by applying escape characters. */ public static final Parser SINGLE_QUOTE_TOKENIZER = Scanners.SINGLE_QUOTE_STRING.map(TokenizerMaps.SINGLE_QUOTE_STRING); } /** Entry point for parser and tokenizers of integral number literal represented as {@link Long}. */ public static final class LongLiteral { private LongLiteral() {} /** {@link Parser} that recognizes {@link Long} tokens. */ public static final Parser PARSER = Parsers.tokenType(Long.class, "integer literal"); /** * A tokenizer that parses a decimal integer number (valid patterns are: {@code 1, 10, 123}), * and converts the string to a {@link Long} value. */ public static final Parser DEC_TOKENIZER = Scanners.DEC_INTEGER.map(TokenizerMaps.DEC_AS_LONG); /** * A tokenizer that parses a octal integer number (valid patterns are: * {@code 0, 07, 017, 0371} etc.), and converts the string to a {@link Long} value. * *

An octal number has to start with 0. */ public static final Parser OCT_TOKENIZER = Scanners.OCT_INTEGER.map(TokenizerMaps.OCT_AS_LONG); /** * A tokenizer that parses a hex integer number (valid patterns are: * {@code 0x1, 0Xff, 0xFe1} etc.), and converts the string to a {@link Long} value. * *

A hex number has to start with either 0x or 0X. */ public static final Parser HEX_TOKENIZER = Scanners.HEX_INTEGER.map(TokenizerMaps.HEX_AS_LONG); /** * A tokenizer that parses decimal, hex, and octal numbers and converts the string to a * {@code Long} value. */ public static final Parser TOKENIZER = Parsers.or(HEX_TOKENIZER, DEC_TOKENIZER, OCT_TOKENIZER); } /** Entry point for any arbitrary integer literal represented as a {@link String}. */ public static final class IntegerLiteral { private IntegerLiteral() {} /** * {@link Parser} that recognizes {@link Tokens.Fragment} tokens tagged as {@link Tag#INTEGER}. */ public static final Parser PARSER = fragment(Tag.INTEGER); /** * A tokenizer that parses a integer number (valid patterns are: {@code 0, 00, 1, 10}) * and returns a {@link Fragment} token tagged as {@link Tag#INTEGER}. */ public static final Parser TOKENIZER = Scanners.INTEGER.map(TokenizerMaps.INTEGER_FRAGMENT); } /** Entry point for parser and tokenizers of decimal number literal represented as {@link String}.*/ public static final class DecimalLiteral { private DecimalLiteral() {} /** * {@link Parser} that recognizes {@link Tokens.Fragment} tokens tagged as {@link Tag#DECIMAL}. */ public static final Parser PARSER = fragment(Tag.DECIMAL); /** * A tokenizer that parses a decimal number (valid patterns are: {@code 1, 2.3, 00, 0., .23}) * and returns a {@link Fragment} token tagged as {@link Tag#DECIMAL}. */ public static final Parser TOKENIZER = Scanners.DECIMAL.map(TokenizerMaps.DECIMAL_FRAGMENT); } /** Entry point for parser and tokenizers of scientific notation literal. */ public static final class ScientificNumberLiteral { private ScientificNumberLiteral() {} /** {@link Parser} that recognies {@link ScientificNotation} tokens. */ public static final Parser PARSER = Parsers.tokenType(ScientificNotation.class, "scientific number literal"); /** * A tokenizer that parses a scientific notation and converts the string to a * {@link ScientificNotation} value. */ public static final Parser TOKENIZER = Scanners.SCIENTIFIC_NOTATION.map(TokenizerMaps.SCIENTIFIC_NOTATION); } /** Entry point for parser and tokenizers of regular identifier. */ public static final class Identifier { private Identifier() {} /** * {@link Parser} that recognizes identifier tokens. * i.e. {@link Tokens.Fragment} tokens tagged as {@link Tag#IDENTIFIER}. * {@link Fragment#text()} is returned as parser result. */ public static final Parser PARSER = fragment(Tag.IDENTIFIER); /** * A tokenizer that parses any identifier and returns a {@link Fragment} token tagged as * {@link Tag#IDENTIFIER}. * *

An identifier starts with an alphabetic character or underscore, * and is followed by 0 or more alphanumeric characters or underscore. */ public static final Parser TOKENIZER = Scanners.IDENTIFIER.map(TokenizerMaps.IDENTIFIER_FRAGMENT); } private Terminals(Lexicon lexicon) { super(lexicon.words, lexicon.tokenizer); } /** * Returns a {@link Terminals} object for lexing and parsing the operators with names specified in * {@code ops}, and for lexing and parsing the keywords case insensitively. Parsers for operators * and keywords can be obtained through {@link #token}; parsers for identifiers through * {@link #identifier}. * *

In detail, keywords and operators are lexed as {@link Tokens.Fragment} with * {@link Tag#RESERVED} tag. Words that are not among {@code keywords} are lexed as * {@code Fragment} with {@link Tag#IDENTIFIER} tag. * *

A word is defined as an alphanumeric string that starts with {@code [_a - zA - Z]}, * with 0 or more {@code [0 - 9_a - zA - Z]} following. * * @param ops the operator names. * @param keywords the keyword names. * @return the Terminals instance. * @deprecated Use {@code operators(ops) * .words(Scanners.IDENTIFIER) * .caseInsensitiveKeywords(keywords) * .build()} instead. */ @Deprecated public static Terminals caseInsensitive(String[] ops, String[] keywords) { return operators(ops).words(Scanners.IDENTIFIER).caseInsensitiveKeywords(asList(keywords)).build(); } /** * Returns a {@link Terminals} object for lexing and parsing the operators with names specified in * {@code ops}, and for lexing and parsing the keywords case sensitively. Parsers for operators * and keywords can be obtained through {@link #token}; parsers for identifiers through * {@link #identifier}. * *

In detail, keywords and operators are lexed as {@link Tokens.Fragment} with * {@link Tag#RESERVED} tag. Words that are not among {@code keywords} are lexed as * {@code Fragment} with {@link Tag#IDENTIFIER} tag. * *

A word is defined as an alphanumeric string that starts with {@code [_a - zA - Z]}, * with 0 or more {@code [0 - 9_a - zA - Z]} following. * * @param ops the operator names. * @param keywords the keyword names. * @return the Terminals instance. * @deprecated Use {@code operators(ops) * .words(Scanners.IDENTIFIER) * .keywords(keywords) * .build()} instead. */ @Deprecated public static Terminals caseSensitive(String[] ops, String[] keywords) { return operators(ops).words(Scanners.IDENTIFIER).keywords(asList(keywords)).build(); } /** * Returns a {@link Terminals} object for lexing and parsing the operators with names specified in * {@code ops}, and for lexing and parsing the keywords case insensitively. Parsers for operators * and keywords can be obtained through {@link #token}; parsers for identifiers through * {@link #identifier}. * *

In detail, keywords and operators are lexed as {@link Tokens.Fragment} with * {@link Tag#RESERVED} tag. Words that are not among {@code keywords} are lexed as * {@code Fragment} with {@link Tag#IDENTIFIER} tag. * * @param wordScanner the scanner that returns a word in the language. * @param ops the operator names. * @param keywords the keyword names. * @return the Terminals instance. * @deprecated Use {@code operators(ops) * .words(wordScanner) * .caseInsensitiveKeywords(keywords) * .build()} instead. */ @Deprecated public static Terminals caseInsensitive( Parser wordScanner, String[] ops, String[] keywords) { return operators(ops) .words(wordScanner) .caseInsensitiveKeywords(keywords) .build(); } /** * Returns a {@link Terminals} object for lexing and parsing the operators with names specified in * {@code ops}, and for lexing and parsing the keywords case sensitively. Parsers for operators * and keywords can be obtained through {@link #token}; parsers for identifiers through * {@link #identifier}. * *

In detail, keywords and operators are lexed as {@link Tokens.Fragment} with * {@link Tag#RESERVED} tag. Words that are not among {@code keywords} are lexed as * {@code Fragment} with {@link Tag#IDENTIFIER} tag. * * @param wordScanner the scanner that returns a word in the language. * @param ops the operator names. * @param keywords the keyword names. * @return the Terminals instance. * @deprecated Use {@code operators(ops) * .words(wordScanner) * .keywords(keywords) * .build()} instead. */ @Deprecated public static Terminals caseSensitive( Parser wordScanner, String[] ops, String[] keywords) { return operators(ops) .words(wordScanner) .keywords(keywords) .build(); } /** * Returns a {@link Terminals} object for lexing and parsing the operators with names specified in * {@code ops}, and for lexing and parsing the keywords case insensitively. Parsers for operators * and keywords can be obtained through {@link #token}; parsers for identifiers through * {@link #identifier}. * *

In detail, keywords and operators are lexed as {@link Tokens.Fragment} with * {@link Tag#RESERVED} tag. Words that are not among {@code keywords} are lexed as * {@code Fragment} with {@link Tag#IDENTIFIER} tag. * * @param wordScanner the scanner that returns a word in the language. * @param ops the operator names. * @param keywords the keyword names. * @param wordMap maps the text to a token value for non-keywords recognized by * {@code wordScanner}. * @return the Terminals instance. * @deprecated Use {@code operators(ops) * .words(wordScanner) * .tokenizeWordsWith(wordMap) * .caseInsensitiveKeywords(keywords) * .build()} instead. */ @Deprecated public static Terminals caseInsensitive( Parser wordScanner, String[] ops, String[] keywords, Function wordMap) { return operators(ops) .words(wordScanner) .caseInsensitiveKeywords(keywords) .tokenizeWordsWith(wordMap) .build(); } /** * Returns a {@link Terminals} object for lexing and parsing the operators with names specified in * {@code ops}, and for lexing and parsing the keywords case sensitively. Parsers for operators * and keywords can be obtained through {@link #token}; parsers for identifiers through * {@link #identifier}. * *

In detail, keywords and operators are lexed as {@link Tokens.Fragment} with * {@link Tag#RESERVED} tag. Words that are not among {@code keywords} are lexed as * {@code Fragment} with {@link Tag#IDENTIFIER} tag. * * @param wordScanner the scanner that returns a word in the language. * @param ops the operator names. * @param keywords the keyword names. * @param wordMap maps the text to a token value for non-keywords recognized by * {@code wordScanner}. * @return the Terminals instance. * @deprecated Use {@code operators(ops) * .words(wordScanner) * .tokenizeWordsWith(wordMap) * .keywords(keywords) * .build()} instead. */ @Deprecated public static Terminals caseSensitive( Parser wordScanner, String[] ops, String[] keywords, Function wordMap) { return operators(ops) .words(wordScanner) .keywords(keywords) .tokenizeWordsWith(wordMap) .build(); } /** * Returns a {@link Terminals} object for lexing the operators with names specified in * {@code ops}. Operators are lexed as {@link Tokens.Fragment} with {@link Tag#RESERVED} tag. * For example, to get the parser for operator "?", simply call {@code token("?")}. * *

If words and keywords need to be parsed, they can be configured via {@link #words}. * * @param ops the operator names. * @return the Terminals instance. */ public static Terminals operators(String... ops) { return operators(asList(ops)); } /** * Returns a {@link Terminals} object for lexing the operators with names specified in * {@code ops}. Operators are lexed as {@link Tokens.Fragment} with {@link Tag#RESERVED} tag. * For example, to get the parser for operator "?", simply call {@code token("?")}. * *

If words and keywords need to be parsed, they can be configured via {@link #words}. * * @param ops the operator names. * @return the Terminals instance. * @since 2.2 */ public static Terminals operators(Collection ops) { return new Terminals(Operators.lexicon(ops)); } /** * Starts to build a new {@code Terminals} instance that recognizes words not already recognized * by {@code this} {@code Terminals} instance (typically operators). * *

By default identifiers are recognized through {@link #identifier} during token-level * parsing phase. Use {@link Builder#tokenizeWordsWith} to tokenize differently, and choose an * alternative token-level parser accordingly. * * @param wordScanner defines words recognized by the new instance * @since 2.2 */ public Builder words(Parser wordScanner) { return new Builder(wordScanner); } /** * Builds {@link Terminals} instance by defining the words and keywords recognized. * The following example implements a calculator with logical operators:

   {@code
   *   Terminals terms = Terminals
   *       .operators("<", ">", "=", ">=", "<=")
   *       .words(Scanners.IDENTIFIER)
   *       .caseInsensitiveKeywords("and", "or")
   *       .build();
   *   Parser var = Terminals.identifier();
   *   Parser integer = Terminals.IntegerLiteral.PARSER.map(...);
   *   Parser and = terms.token("and");
   *   Parser lessThan = terms.token("<");
   *   ...
   *   Parser parser = grammar.from(
   *       terms.tokenizer().or(IntegerLiteral.TOKENIZER), Scanners.WHITSPACES.optional());
   * }
* * @since 2.2 */ public final class Builder { private final Parser wordScanner; private Collection keywords = new ArrayList(); private StringCase stringCase = StringCase.CASE_SENSITIVE; private Function wordTokenMap = TokenizerMaps.IDENTIFIER_FRAGMENT; Builder(Parser wordScanner) { this.wordScanner = Checks.checkNotNull(wordScanner); } /** * Defines keywords. Keywords are special words with their own grammar rules. * To get the parser for a keyword, call {@code token(keyword)}. * *

Note that if you call {@link #keywords} or {@link #caseInsensitiveKeywords} multiple * times on the same {@link Builder} instance, the last call overwrites previous calls. */ public Builder keywords(@SuppressWarnings("hiding") String... keywords) { return keywords(asList(keywords)); } /** * Defines keywords. Keywords are special words with their own grammar rules. * To get the parser for a keyword, call {@code token(keyword)}. * *

Note that if you call {@link #keywords} or {@link #caseInsensitiveKeywords} multiple * times on the same {@link Builder} instance, the last call overwrites previous calls. */ public Builder keywords(@SuppressWarnings("hiding") Collection keywords) { this.keywords = keywords; this.stringCase = StringCase.CASE_SENSITIVE; return this; } /** * Defines case insensitive keywords. Keywords are special words with their own grammar * rules. To get the parser for a keyword, call {@code token(keyword)}. * *

Note that if you call {@link #keywords} or {@link #caseInsensitiveKeywords} multiple * times on the same {@link Builder} instance, the last call overwrites previous calls. */ public Builder caseInsensitiveKeywords(@SuppressWarnings("hiding") String... keywords) { return caseInsensitiveKeywords(asList(keywords)); } /** * Defines case insensitive keywords. Keywords are special words with their own grammar * rules. To get the parser for a keyword, call {@code token(keyword)}. * *

Note that if you call {@link #keywords} or {@link #caseInsensitiveKeywords} multiple * times on the same {@link Builder} instance, the last call overwrites previous calls. */ public Builder caseInsensitiveKeywords(@SuppressWarnings("hiding") Collection keywords) { this.keywords = keywords; this.stringCase = StringCase.CASE_INSENSITIVE; return this; } /** Configures alternative tokenization strategy for words (except keywords). */ public Builder tokenizeWordsWith(Function wordMap) { this.wordTokenMap = Checks.checkNotNull(wordMap); return this; } /** Builds a new {@link Terminals} instance that recognizes words defined in this builder. */ public Terminals build() { return new Terminals( union(Keywords.lexicon(wordScanner, keywords, stringCase, wordTokenMap))); } } /** * Returns a {@link Parser} that recognizes identifiers (a.k.a words, variable names etc). * Equivalent to {@link Identifier#PARSER}. * * @since 2.2 */ public static Parser identifier() { return Identifier.PARSER; } /** * Returns a {@link Parser} that recognizes {@link Tokens.Fragment} token values * tagged with one of {@code tags}. */ public static Parser fragment(final Object... tags) { return Parsers.token(fromFragment(tags)); } /** * Returns a {@link TokenMap} object that only recognizes {@link Tokens.Fragment} token values * tagged with one of {@code tags}. */ static TokenMap fromFragment(final Object... tags) { return new TokenMap() { @Override public String map(final Token token) { final Object val = token.value(); if (val instanceof Fragment) { Fragment c = (Fragment) val; if (!Objects.in(c.tag(), tags)) return null; return c.text(); } else return null; } @Override public String toString() { if (tags.length == 0) return ""; if (tags.length == 1) return String.valueOf(tags[0]); return "[" + Strings.join(", ", tags) + "]"; } }; } @Private static void checkDup(Iterable a, Iterable b) { for (String s1 : a) { for (String s2 : b) { Checks.checkArgument(!s1.equals(s2), "%s duplicated", s1); } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy