com.rabbitmq.jms.parse.sql.SqlProduction Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of rabbitmq-jms Show documentation

RabbitMQ JMS Client

There is a newer version: 3.4.0

/* Copyright (c) 2013-2023 Broadcom. All Rights Reserved. The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. */
package com.rabbitmq.jms.parse.sql;

import static com.rabbitmq.jms.parse.sql.SqlTreeType.BINARYOP;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.COLLAPSE1;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.COLLAPSE2;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.CONJUNCTION;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.DISJUNCTION;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.JOINLIST;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.LEAF;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.LIST;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.PATTERN1;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.PATTERN2;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.POSTFIXUNARYOP;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.PREFIXUNARYOP;
import static com.rabbitmq.jms.parse.sql.SqlTreeType.TERNARYOP;

import com.rabbitmq.jms.parse.Multiples.Pair;
import com.rabbitmq.jms.parse.TokenStream;

/**
 * This describes the SQL grammar in terms of the tokens {@link SqlTokenType}.
 * 
 * For each grammar production, we match the alternatives in turn. For each alternative, we match the terms in sequence
 * until we reach the end of the sequence. If we reach the end with all matched we return the resulting tree
 * constructed from the sequence of terms matched (using a SqlTreeNodeType for that purpose), otherwise we go to the next alternative.
 * If we run out of alternatives, and none of them matched, we return null.
 * 
 * 
 * The top-level expression should check, after matching correctly, that we have consumed all the tokens from the input
 * stream. If so, then we are good to go. If not, the top-level expression has failed to match everything, and we can report the
 * next few tokens to indicate where the failure occurred.
 * 
 * 
 * The order of a production’s alternatives is important, we match those with more tokens first, where the prefixes are the same.
 * 
 * 
 * The grammar, where the uppercase terms are terminals ({@link SqlTokenType}s) and the lowercase terms are non-terminals,
 * is completely defined by the alternatives in the productions declared below.  For example:
 * 
 * or_expr      -> and_expr OR or_expr
 *               | and_expr
 * 
 * corresponds to the production declaration:
 *  * or_expr   ( cAlt( "and_expr OR or_expr"                              , DISJUNCTION    )
 *           , cAlt( "and_expr"                                         , COLLAPSE1      ) ),
 * 
 * where each alternative is coded. (Because Java will not allow pre-reference of enum constants in enum definitions we cannot
 * use the productions directly but must encode them (in this case as an array of strings); they are decoded during parsing.)
 * 
 * 
 * Each alternative gets a sequence of productions (terminal or non-terminal) and a production action ({@link SqlTreeType}); the sequence
 * is matched in order to match the alternative.  The parser algorithm (for each production) treats the alternatives
 * in declared order and accepts the first one that matches fully.
 * 
 * 
 * Each production {@link SqlProduction} knows how to parse itself. This is the algorithm encoded in {@link #parse()} which recursively
 * calls each potential sub-expression to parse a sequence.  The terminal nodes know how to parse themselves (they are just single tokens),
 * and progress has to be made (in the token sequence) so the recursion will terminate eventually, provided the grammar is well-defined.
 * 
 * 
 * The parse tree is constructed using the {@link SqlTreeType} for a matched alternative (or LEAF in the case of a terminal).  The
 * parsed terms of the alternative are passed to the {@link SqlTreeType} to construct the tree of this type.
 * Each terminal {@link SqlTokenType} is a LEAF in the tree and the token value is the value of the node.
 * 
 * 
 * Non-terminals are associated with an array of coded alternatives; each coded alternative is an {@link SqlTreeType} and a {@link String} array.
 * Each string in the array is the name of a {@link SqlProduction}.
 * 
 * 
 * I would have liked to use {@link SqlProduction} explicitly, but Java doesn't let me use an enumerated type term in
 * the definition of a term, unless it has been defined lexically beforehand (!) so I use strings instead!!
 * 
 * 
 * Terminals are simply proxies for a {@link SqlTokenType} and have no alternatives.
 * 
 * 
 * The root of the grammar is expression, exposed in ROOT.
 * 
 */
enum SqlProduction {
    // non-terminals: grammar_alternative_______________________________   SqlTreeType
    expression( cAlt( "or_expr"                                          , COLLAPSE1      ) ),
    or_expr   ( cAlt( "and_expr OR or_expr"                              , DISJUNCTION    )
              , cAlt( "and_expr"                                         , COLLAPSE1      ) ),
    and_expr  ( cAlt( "not_expr AND and_expr"                            , CONJUNCTION    )
              , cAlt( "not_expr"                                         , COLLAPSE1      ) ),
    not_expr  ( cAlt( "NOT cmp_expr"                                     , PREFIXUNARYOP  )
              , cAlt( "cmp_expr"                                         , COLLAPSE1      ) ),
    cmp_expr  ( cAlt( "arith_expr op_cmp arith_expr"                     , BINARYOP       )
              , cAlt( "arith_expr BETWEEN arith_expr AND arith_expr"     , TERNARYOP      )
              , cAlt( "arith_expr NOT_BETWEEN arith_expr AND arith_expr" , TERNARYOP      )
              , cAlt( "arith_expr"                                       , COLLAPSE1      ) ),
    op_cmp    ( cAlt( "CMP_EQ"                                           , LEAF           )
              , cAlt( "CMP_NEQ"                                          , LEAF           )
              , cAlt( "CMP_LTEQ"                                         , LEAF           )
              , cAlt( "CMP_GTEQ"                                         , LEAF           )
              , cAlt( "CMP_LT"                                           , LEAF           )
              , cAlt( "CMP_GT"                                           , LEAF           ) ),
    op_plus   ( cAlt( "OP_PLUS"                                          , LEAF           )
              , cAlt( "OP_MINUS"                                         , LEAF           ) ),
    op_mult   ( cAlt( "OP_MULT"                                          , LEAF           )
              , cAlt( "OP_DIV"                                           , LEAF           ) ),
    arith_expr( cAlt( "plus_expr"                                        , COLLAPSE1      ) ),
    plus_expr ( cAlt( "mult_expr op_plus plus_expr"                      , BINARYOP       )
              , cAlt( "mult_expr"                                        , COLLAPSE1      ) ),
    mult_expr ( cAlt( "sign_expr op_mult mult_expr"                      , BINARYOP       )
              , cAlt( "sign_expr"                                        , COLLAPSE1      ) ),
    sign_expr ( cAlt( "op_plus sign_expr"                                , PREFIXUNARYOP  )
              , cAlt( "simple"                                           , COLLAPSE1      ) ),
    simple    ( cAlt( "LP expression RP"                                 , COLLAPSE2      )
              , cAlt( "TRUE"                                             , LEAF           )
              , cAlt( "FALSE"                                            , LEAF           )
              , cAlt( "STRING"                                           , LEAF           )
              , cAlt( "number"                                           , COLLAPSE1      )
              , cAlt( "IDENT NULL"                                       , POSTFIXUNARYOP )
              , cAlt( "IDENT NOT_NULL"                                   , POSTFIXUNARYOP )
              , cAlt( "IDENT IN stringlist"                              , BINARYOP       )
              , cAlt( "IDENT NOT_IN stringlist"                          , BINARYOP       )
              , cAlt( "IDENT LIKE pattern"                               , BINARYOP       )
              , cAlt( "IDENT NOT_LIKE pattern"                           , BINARYOP       )
              , cAlt( "IDENT"                                            , LEAF           ) ),
    stringlist( cAlt( "LP strings RP"                                    , COLLAPSE2      ) ),
    strings   ( cAlt( "STRING COMMA strings"                             , JOINLIST       )
              , cAlt( "STRING"                                           , LIST           ) ),
    pattern   ( cAlt( "STRING ESCAPE STRING"                             , PATTERN2       )
              , cAlt( "STRING"                                           , PATTERN1       ) ),
    number    ( cAlt( "HEX"                                              , LEAF           )
              , cAlt( "FLOAT"                                            , LEAF           )
              , cAlt( "INT"                                              , LEAF           ) ),

    // terminals:
    LIKE        (SqlTokenType.LIKE       ),
    NOT_LIKE    (SqlTokenType.NOT_LIKE   ),
    IN          (SqlTokenType.IN         ),
    NOT_IN      (SqlTokenType.NOT_IN     ),
    NULL        (SqlTokenType.NULL       ),
    NOT_NULL    (SqlTokenType.NOT_NULL   ),
    BETWEEN     (SqlTokenType.BETWEEN    ),
    NOT_BETWEEN (SqlTokenType.NOT_BETWEEN),
    AND         (SqlTokenType.AND        ),
    OR          (SqlTokenType.OR         ),
    NOT         (SqlTokenType.NOT        ),
    ESCAPE      (SqlTokenType.ESCAPE     ),
    TRUE        (SqlTokenType.TRUE       ),
    FALSE       (SqlTokenType.FALSE      ),
    CMP_EQ      (SqlTokenType.CMP_EQ     ),
    CMP_NEQ     (SqlTokenType.CMP_NEQ    ),
    CMP_LTEQ    (SqlTokenType.CMP_LTEQ   ),
    CMP_GTEQ    (SqlTokenType.CMP_GTEQ   ),
    CMP_LT      (SqlTokenType.CMP_LT     ),
    CMP_GT      (SqlTokenType.CMP_GT     ),
    OP_PLUS     (SqlTokenType.OP_PLUS    ),
    OP_MINUS    (SqlTokenType.OP_MINUS   ),
    OP_MULT     (SqlTokenType.OP_MULT    ),
    OP_DIV      (SqlTokenType.OP_DIV     ),
    COMMA       (SqlTokenType.COMMA      ),
    LP          (SqlTokenType.LP         ),
    RP          (SqlTokenType.RP         ),
    IDENT       (SqlTokenType.IDENT      ),
    STRING      (SqlTokenType.STRING     ),
    FLOAT       (SqlTokenType.FLOAT      ),
    INT         (SqlTokenType.INT        ),
    HEX         (SqlTokenType.HEX        ),
    ;
    public static final SqlProduction ROOT = expression;

    private final SqlTokenType tokenType;
    private final CodedAlternative[] codedAlts;

    /**
     * Constructor for non-terminals
     * @param alts - (array of) alternatives
     */
    SqlProduction(CodedAlternative ... alts) {
        this.tokenType = null;
        this.codedAlts = alts;
    }

    /**
     * Constructor for terminals
     * @param tokenType - terminal token type
     */
    SqlProduction(SqlTokenType tokenType) {
        this.tokenType = tokenType;
        this.codedAlts = null;
    }

    /**
     * Recursive descent parser for a particular production.
     * @param ts - token stream to parse
     * @return null if this production cannot be (fully) matched immediately next in the stream,
     *   otherwise returns the constructed tree for this production with the stream set past the tokens used for it.
     */
    SqlParseTree parse(TokenStream ts) {
        if (this.tokenType != null) {
            // terminal:
            if (isTokenRightType(ts.readToken(), this.tokenType))
                return new SqlParseTree(new SqlTreeNode(LEAF, ts.getNext()));  // return single node tree
        } else {
            // non-terminal:
            int startPosition = ts.position();
            AlternativeParser altParser = new AlternativeParser(ts);

            // Match each subsequent alternative until one fully matches
            for (CodedAlternative codedAlt : this.codedAlts) {
                if (altParser.match(codedAlt.alternative())) {
                    return codedAlt.treeType().tree(altParser.getMatchedChildren());
                }
            }
            // We drop through if none of the alternatives matched fully.
            ts.reset(startPosition);
        }
        return null;
    }

    private static final class AlternativeParser {

        private static final SqlProduction[] EMPTY_ALTERNATIVE = new SqlProduction[0];
        private static final SqlParseTree[]  EMPTY_SUBTREES    = new SqlParseTree[0];

        private final TokenStream tokenStream;
        private final int startPosition;

        private SqlProduction[] alternative;
        private SqlParseTree[] subtrees;
        private int numberMatched;
        // invariant: alternative.length == subtrees.length
        //            subtrees[i] is the parsed tree of alternative[i], for i in 0..numberMatched-1
        //            tokenStream from startPosition to tokenStream.position() contains the tokens which parse to subtrees[0..numberMatched-1]

        AlternativeParser(TokenStream tokenStream) {
            this.tokenStream = tokenStream;
            this.startPosition = tokenStream.position();

            this.alternative = EMPTY_ALTERNATIVE;
            this.subtrees = EMPTY_SUBTREES;
            this.numberMatched = 0;
        }

        boolean match(SqlProduction[] alt) {
            setNewAlternative(alt);

            for (int term=this.numberMatched; term < this.subtrees.length; ++term, ++this.numberMatched) {
                this.subtrees[term] = this.alternative[term].parse(this.tokenStream);
                if (null == this.subtrees[term]) break;
            }
            return isFullyMatched();
        }

        private void setNewAlternative(SqlProduction[] alt) {
            SqlParseTree[] newSubtrees = new SqlParseTree[alt.length];

            if (canUsePreviousMatches(alt)) {
                System.arraycopy(this.subtrees, 0, newSubtrees, 0, this.numberMatched);
            } else {
                this.numberMatched = 0;
                this.tokenStream.reset(this.startPosition);
            }
            this.alternative = alt;
            this.subtrees = newSubtrees;
        }

        private boolean canUsePreviousMatches(SqlProduction[] alt) {
            if (this.numberMatched > alt.length) return false;

            for (int i=0; i < this.numberMatched; ++i) {
                if (alt[i] != this.alternative[i]) return false;
            }
            return true;
        }

        private boolean isFullyMatched() {
            return this.numberMatched == this.subtrees.length;
        }

        SqlParseTree[] getMatchedChildren() {
            return isFullyMatched() ? this.subtrees : null;
        }
    }

    /** @return true if tok is a {@link SqlToken} of the correct type. */
    private static final boolean isTokenRightType(SqlToken tok, SqlTokenType tokType) {
        return (tok != null && tok.type() == tokType);
    }

    /**
     * Non-terminal coded alternative (array of these used to declare non-terminals)
     * @param terms - string of production names
     * @param treeNodeType - type of tree to construct if matched
     * @return a coded grammar alternative for a production
     */
    private static final CodedAlternative cAlt(String terms, SqlTreeType treeNodeType) {
        return new CodedAlternative(treeNodeType, terms.split(" "));
    }

    /**
     * An alternative is coded as a specialisation of a simple {@link Pair} which initially just stores the coded form.
     * The accessor methods {@link #alternative()} and {@link #treeType()}
     * do the necessary decoding (to an array of {@link SqlProduction} and a {@link SqlTreeType} respectively).
     */
    private static final class CodedAlternative extends Pair {
        public CodedAlternative(SqlTreeType l, String[] r) { super(l, r); }
        /** Get array of productions: the term sequence of the alternative */
        public SqlProduction[] alternative() { return aValueOf(this.right()); }
        /** Get the type of tree to construct for this alternative */
        public SqlTreeType treeType()    { return this.left(); }

        /**
         * Convert array of {@link String}s to array of {@link SqlProduction}s
         */
        private static final SqlProduction[] aValueOf(String[] ss) {
            SqlProduction[] scts = new SqlProduction[ss.length];
            for (int i=0; i

    

    

    
            
    
            

    
        
            
                Related Artifacts
                
                     mysql-connector-java mysql
 facebook-messenger com.github.codedrinker
 selenium-java org.seleniumhq.selenium
 instagram-java com.github.sola92
 gson com.google.code.gson
 poi org.apache.poi
 httpclient org.apache.httpcomponents
 json org.json
 facebook-java-api com.google.code.facebook-java-api
 poi-ooxml org.apache.poi
 jackson-databind com.fasterxml.jackson.core
 junit junit
 primefaces org.primefaces
 ojdbc7 com.github.noraui
 jfoenix com.jfoenix
 testng org.testng
 json-simple com.googlecode.json-simple
 selenium-server org.seleniumhq.selenium
 itextpdf com.itextpdf
 spring-core org.springframework
                
            
        
        
            
                Related Groups
                
                     org.springframework
 org.apache.poi
 org.hibernate
 org.springframework.boot
 com.fasterxml.jackson.core
 com.itextpdf
 org.seleniumhq.selenium
 mysql
 org.finos.legend.engine
 org.apache.httpcomponents
 org.apache.logging.log4j
 org.openjfx
 org.apache.commons
 org.json
 com.google.guava
 com.google.zxing
 net.sf.jasperreports
 javax.xml.bind
 ojdbc
 com.google.code.facebook-java-api