All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.starlink.table.jdbc.SqlSyntax Maven / Gradle / Ivy

package uk.ac.starlink.table.jdbc;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Pattern;

/**
 * Summarises some aspects of the syntax of an SQL variant,
 * and provides some utilities based on that.
 *
 * @author   Mark Taylor
 * @since    7 Jun 2013
 */
public class SqlSyntax {

    private final SortedSet reservedWords_;
    private final Pattern identifierRegex_;
    private final char quoteChar_;

    /** Standard regular expression for identifiers, as per SQL92. */
    public static final Pattern SQL92_IDENTIFIER_REGEX =
        Pattern.compile( "[A-Za-z][A-Za-z0-9_]*" );

    /**
     * SQL92 list of reserved words.
     * This list was actually taken from ADQL 2.0 standard.
     */
    public static final String[] SQL92_RESERVED = new String[] {
        "ABSOLUTE", "ACTION", "ADD", "ALL", "ALLOCATE", "ALTER", "AND",
        "ANY", "ARE", "AS", "ASC", "ASSERTION", "AT", "AUTHORIZATION",
        "AVG", "BEGIN", "BETWEEN", "BIT", "BIT_LENGTH", "BOTH", "BY",
        "CASCADE", "CASCADED", "CASE", "CAST", "CATALOG", "CHAR",
        "CHARACTER", "CHARACTER_LENGTH", "CHAR_LENGTH", "CHECK", "CLOSE",
        "COALESCE", "COLLATE", "COLLATION", "COLUMN", "COMMIT", "CONNECT",
        "CONNECTION", "CONSTRAINT", "CONSTRAINTS", "CONTINUE", "CONVERT",
        "CORRESPONDING", "COUNT", "CREATE", "CROSS", "CURRENT",
        "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
        "CURRENT_USER", "CURSOR", "DATE", "DAY", "DEALLOCATE", "DECIMAL",
        "DECLARE", "DEFAULT", "DEFERRABLE", "DEFERRED", "DELETE", "DESC",
        "DESCRIBE", "DESCRIPTOR", "DIAGNOSTICS", "DISCONNECT", "DISTINCT",
        "DOMAIN", "DOUBLE", "DROP", "ELSE", "END", "ESCAPE", "EXCEPT",
        "EXCEPTION", "EXEC", "EXECUTE", "EXISTS", "EXTERNAL", "EXTRACT",
        "FALSE", "FETCH", "FIRST", "FLOAT", "FOR", "FOREIGN", "FOUND",
        "FROM", "FULL", "GET", "GLOBAL", "GO", "GOTO", "GRANT",
        "GROUP", "HAVING", "HOUR", "IDENTITY", "IMMEDIATE", "IN",
        "INDICATOR", "INITIALLY", "INNER", "INPUT", "INSENSITIVE",
        "INSERT", "INT", "INTEGER", "INTERSECT", "INTERVAL", "INTO",
        "IS", "ISOLATION", "JOIN", "KEY", "LANGUAGE", "LAST", "LEADING",
        "LEFT", "LEVEL", "LIKE", "LOCAL", "LOWER", "MATCH", "MAX", "MIN",
        "MINUTE", "MODULE", "MONTH", "NAMES", "NATIONAL", "NATURAL",
        "NCHAR", "NEXT", "NO", "NOT", "NULL", "NULLIF", "NUMERIC",
        "OCTET_LENGTH", "OF", "ON", "ONLY", "OPEN", "OPTION", "OR",
        "ORDER", "OUTER", "OUTPUT", "OVERLAPS", "PAD", "PARTIAL",
        "POSITION", "PRECISION", "PREPARE", "PRESERVE", "PRIMARY",
        "PRIOR", "PRIVILEGES", "PROCEDURE", "PUBLIC", "READ", "REAL",
        "REFERENCES", "RELATIVE", "RESTRICT", "REVOKE", "RIGHT",
        "ROLLBACK", "ROWS", "SCHEMA", "SCROLL", "SECOND", "SECTION",
        "SELECT", "SESSION", "SESSION_USER", "SET", "SIZE", "SMALLINT",
        "SOME", "SPACE", "SQL", "SQLCODE", "SQLERROR", "SQLSTATE",
        "SUBSTRING", "SUM", "SYSTEM_USER", "TABLE", "TEMPORARY", "THEN",
        "TIME", "TIMESTAMP", "TIMEZONE_HOUR", "TIMEZONE_MINUTE", "TO",
        "TRAILING", "TRANSACTION", "TRANSLATE", "TRANSLATION", "TRIM",
        "TRUE", "UNION", "UNIQUE", "UNKNOWN", "UPDATE", "UPPER", "USAGE",
        "USER", "USING", "VALUE", "VALUES", "VARCHAR", "VARYING", "VIEW",
        "WHEN", "WHENEVER", "WHERE", "WITH", "WORK", "WRITE", "YEAR",
        "ZONE",
    };

    /** Reserved words taken from the MySQL 5.0.3 manual.  */
    private static final String[] MYSQL_RESERVED = new String[] {
        "ADD", "ALL", "ALTER", "ANALYZE", "AND", "AS", "ASC", "ASENSITIVE",
        "BEFORE", "BETWEEN", "BIGINT", "BINARY", "BLOB", "BOTH", "BY",
        "CALL", "CASCADE", "CASE", "CHANGE", "CHAR", "CHARACTER", "CHECK",
        "COLLATE", "COLUMN", "CONDITION", "CONNECTION", "CONSTRAINT",
        "CONTINUE", "CONVERT", "CREATE", "CROSS", "CURRENT_DATE",
        "CURRENT_TIME", "CURRENT_TIMESTAMP", "CURRENT_USER", "CURSOR",
        "DATABASE", "DATABASES", "DAY_HOUR", "DAY_MICROSECOND", "DAY_MINUTE",
        "DAY_SECOND", "DEC", "DECIMAL", "DECLARE", "DEFAULT", "DELAYED",
        "DELETE", "DESC", "DESCRIBE", "DETERMINISTIC", "DISTINCT",
        "DISTINCTROW", "DIV", "DOUBLE", "DROP", "DUAL", "EACH", "ELSE",
        "ELSEIF", "ENCLOSED", "ESCAPED", "EXISTS", "EXIT", "EXPLAIN",
        "FALSE", "FETCH", "FLOAT", "FOR", "FORCE", "FOREIGN", "FROM",
        "FULLTEXT", "GOTO", "GRANT", "GROUP", "HAVING", "HIGH_PRIORITY",
        "HOUR_MICROSECOND", "HOUR_MINUTE", "HOUR_SECOND", "IF", "IGNORE",
        "IN", "INDEX", "INFILE", "INNER", "INOUT", "INSENSITIVE", "INSERT",
        "INT", "INTEGER", "INTERVAL", "INTO", "IS", "ITERATE", "JOIN",
        "KEY", "KEYS", "KILL", "LEADING", "LEAVE", "LEFT", "LIKE",
        "LIMIT", "LINES", "LOAD", "LOCALTIME", "LOCALTIMESTAMP", "LOCK",
        "LONG", "LONGBLOB", "LONGTEXT", "LOOP", "LOW_PRIORITY", "MATCH",
        "MEDIUMBLOB", "MEDIUMINT", "MEDIUMTEXT", "MIDDLEINT",
        "MINUTE_MICROSECOND", "MINUTE_SECOND", "MOD", "MODIFIES", "NATURAL",
        "NOT", "NO_WRITE_TO_BINLOG", "NULL", "NUMERIC", "ON", "OPTIMIZE",
        "OPTION", "OPTIONALLY", "OR", "ORDER", "OUT", "OUTER", "OUTFILE",
        "PRECISION", "PRIMARY", "PROCEDURE", "PURGE", "READ", "READS",
        "REAL", "REFERENCES", "REGEXP", "RENAME", "REPEAT", "REPLACE",
        "REQUIRE", "RESTRICT", "RETURN", "REVOKE", "RIGHT", "RLIKE",
        "SCHEMA", "SCHEMAS", "SECOND_MICROSECOND", "SELECT", "SENSITIVE",
        "SEPARATOR", "SET", "SHOW", "SMALLINT", "SONAME", "SPATIAL",
        "SPECIFIC", "SQL", "SQLEXCEPTION", "SQLSTATE", "SQLWARNING",
        "SQL_BIG_RESULT", "SQL_CALC_FOUND_ROWS", "SQL_SMALL_RESULT",
        "SSL", "STARTING", "STRAIGHT_JOIN", "TABLE", "TERMINATED",
        "THEN", "TINYBLOB", "TINYINT", "TINYTEXT", "TO", "TRAILING",
        "TRIGGER", "TRUE", "UNDO", "UNION", "UNIQUE", "UNLOCK", "UNSIGNED",
        "UPDATE", "USAGE", "USE", "USING", "UTC_DATE", "UTC_TIME",
        "UTC_TIMESTAMP", "VALUES", "VARBINARY", "VARCHAR", "VARCHARACTER",
        "VARYING", "WHEN", "WHERE", "WHILE", "WITH", "WRITE", "XOR",
        "YEAR_MONTH", "ZEROFILL",
    };

    /**
     * Reserved words for Oracle,
     * taken from O'Reilly SQL in a Nutshell, 3rd edition
     * (via http://oreilly.com/sql/excerpts/sql-in-nutshell/shared-platform-specific-keywords.htm)
     */
    private static final String[] ORACLE_RESERVED = new String[] {
        "ACCESS", "ADD", "ALL", "ALTER", "AND", "ANY", "AS", "ASC", "AUDIT", 
        "BETWEEN", "BY", "CHAR", "CHECK", "CLUSTER", "COLUMN", "COMMENT", 
        "COMPRESS", "CONNECT", "CREATE", "CURRENT", "DATE", "DECIMAL", 
        "DEFAULT", "DELETE", "DESC", "DISTINCT", "DROP", "ELSE", "EXCLUSIVE", 
        "EXISTS", "FILE", "FLOAT", "FOR", "FROM", "GRANT", "GROUP", "HAVING", 
        "IDENTIFIED", "IMMEDIATE", "IN", "INCREMENT", "INDEX", "INITIAL", 
        "INSERT", "INTEGER", "INTERSECT", "INTO", "IS", "LEVEL", "LIKE", 
        "LOCK", "LONG", "MAXEXTENTS", "MINUS", "MLSLABEL", "MODE", "MODIFY", 
        "NOAUDIT", "NOCOMPRESS", "NOT", "NOWAIT", "NULL", "NUMBER", "OF", 
        "OFFLINE", "ON", "ONLINE", "OPTION", "OR", "ORDER", "PCTFREE", 
        "PRIOR", "PRIVILEGES", "PUBLIC", "RAW", "RENAME", "RESOURCE", 
        "REVOKE", "ROW", "ROWID", "ROWNUM", "ROWS", "SELECT", "SESSION", 
        "SET", "SHARE", "SIZE", "SMALLINT", "START", "SUCCESSFUL", "SYNONYM", 
        "SYSDATE", "TABLE", "THEN", "TO", "TRIGGER", "UID", "UNION", 
        "UNIQUE", "UPDATE", "USER", "VALIDATE", "VALUES", "VARCHAR", 
    };

    /**
     * Reserved words for PostgreSQL,
     * taken from O'Reilly SQL in a Nutshell, 3rd edition
     * (via http://oreilly.com/sql/excerpts/sql-in-nutshell/shared-platform-specific-keywords.htm)
     */
    private static final String[] POSTGRES_RESERVED = new String[] {
        "ABORT", "ADD", "ALL", "ALLOCATE", "ALTER", "ANALYZE", "AND", "ANY",
        "ARE", "AS", "ASC", "ASSERTION", "AT", "AUTHORIZATION", "AVG", 
        "BEGIN", "BETWEEN", "BINARY", "BIT", "BIT_LENGTH", "BOTH", "BY",
        "CASCADE", "CASCADED", "CASE", "CAST", "CATALOG", "CHAR", 
        "CHAR_LENGTH", "CHARACTER", "CHARACTER_LENGTH", "CHECK", "CLOSE",
        "CLUSTER", "COALESCE", "COLLATE", "COLLATION", "COLUMN", "COMMIT",
        "CONNECT", "CONNECTION", "CONSTRAINT", "CONTINUE", "CONVERT", "COPY",
        "CORRESPONDING", "COUNT", "CREATE", "CROSS", "CURRENT", 
        "CURRENT_DATE", "CURRENT_SESSION", "CURRENT_TIME", 
        "CURRENT_TIMESTAMP", "CURRENT_USER", "CURSOR", "DATE", "DEALLOCATE",
        "DEC", "DECIMAL", "DECLARE", "DEFAULT", "DELETE", "DESC", "DESCRIBE",
        "DESCRIPTOR", "DIAGNOSTICS", "DISCONNECT", "DISTINCT", "DO",
        "DOMAIN", "DROP", "ELSE", "END", "ESCAPE", "EXCEPT", "EXCEPTION",
        "EXEC", "EXECUTE", "EXISTS", "EXPLAIN", "EXTEND", "EXTERNAL",
        "EXTRACT", "FALSE", "FETCH", "FIRST", "FLOAT", "FOR", "FOREIGN",
        "FOUND", "FROM", "FULL", "GET", "GLOBAL", "GO", "GOTO", "GRANT",
        "GROUP", "HAVING", "IDENTITY", "IN", "INDICATOR", "INNER", "INPUT",
        "INSERT", "INTERSECT", "INTERVAL", "INTO", "IS", "JOIN", "LAST",
        "LEADING", "LEFT", "LIKE", "LISTEN", "LOAD", "LOCAL", "LOCK",
        "LOWER", "MAX", "MIN", "MODULE", "MOVE", "NAMES", "NATIONAL", 
        "NATURAL", "NCHAR", "NEW", "NO", "NONE", "NOT", "NOTIFY", "NULL",
        "NULLIF", "NUMERIC", "OCTET_LENGTH", "OFFSET", "ON", "OPEN",
        "OR", "ORDER", "OUTER", "OUTPUT", "OVERLAPS", "PARTIAL", "POSITION",
        "PRECISION", "PREPARE", "PRESERVE", "PRIMARY", "PRIVILEGES",
        "PROCEDURE", "PUBLIC", "REFERENCES", "RESET", "REVOKE", "RIGHT",
        "ROLLBACK", "ROWS", "SCHEMA", "SECTION", "SELECT", "SESSION",
        "SESSION_USER", "SET", "SETOF", "SHOW", "SIZE", "SOME", "SQL",
        "SQLCODE", "SQLERROR", "SQLSTATE", "SUBSTRING", "SUM", "SYSTEM_USER",
        "TABLE", "TEMPORARY", "THEN", "TO", "TRAILING", "TRANSACTION",
        "TRANSLATE", "TRANSLATION", "TRIM", "TRUE", "UNION", "UNIQUE",
        "UNKNOWN", "UNLISTEN", "UNTIL", "UPDATE", "UPPER", "USAGE",
        "USER", "USING", "VACUUM", "VALUE", "VALUES", "VARCHAR", "VARYING",
        "VERBOSE", "VIEW", "WHEN", "WHENEVER", "WHERE", "WITH", "WORK",
        "WRITE",
    };

    /**
     * Reserved words for SQL Server,
     * taken from O'Reilly SQL in a Nutshell, 3rd edition
     * (via http://oreilly.com/sql/excerpts/sql-in-nutshell/shared-platform-specific-keywords.htm)
     */
    private static final String[] SQLSERVER_RESERVED = new String[] {
        "ADD", "ALL", "ALTER", "AND", "ANY", "AS", "ASC", "AUTHORIZATION", 
        "BACKUP", "BEGIN", "BETWEEN", "BREAK", "BROWSE", "BULK", "BY", 
        "CASCADE", "CASE", "CHECK", "CHECKPOINT", "CLOSE", "CLUSTERED", 
        "COALESCE", "COLLATE", "COLUMN", "COMMIT", "COMPUTE", "CONSTRAINT", 
        "CONTAINS", "CONTAINSTABLE", "CONTINUE", "CONVERT", "CREATE", 
        "CROSS", "CURRENT", "CURRENT_DATE", "CURRENT_TIME", 
        "CURRENT_TIMESTAMP", "CURRENT_USER", "CURSOR", "DATABASE", 
        "DBCC", "DEALLOCATE", "DECLARE", "DEFAULT", "DELETE", "DENY", 
        "DESC", "DISK", "DISTINCT", "DISTRIBUTED", "DOUBLE", "DROP", 
        "DUMP", "ELSE", "END", "ERRLVL", "ESCAPE", "EXCEPT", "EXEC", 
        "EXECUTE", "EXISTS", "EXIT", "EXTERNAL", "FETCH", "FILE", 
        "FILLFACTOR", "FOR", "FOREIGN", "FREETEXT", "FREETEXTTABLE", "FROM", 
        "FULL", "FUNCTION", "GOTO", "GRANT", "GROUP", "HAVING", "HOLDLOCK", 
        "IDENTITY", "IDENTITY_INSERT", "IDENTITYCOL", "IF", "IN", "INDEX", 
        "INNER", "INSERT", "INTERSECT", "INTO", "IS", "JOIN", "KEY", "KILL", 
        "LEFT", "LIKE", "LINENO", "LOAD", "NATIONAL", "NOCHECK", 
        "NONCLUSTERED", "NOT", "NULL", "NULLIF", "OF", "OFF", "OFFSETS", 
        "ON", "OPEN", "OPENDATASOURCE", "OPENQUERY", "OPENROWSET", "OPENXML", 
        "OPTION", "OR", "ORDER", "OUTER", "OVER", "PERCENT", "PIVOT", "PLAN", 
        "PRECISION", "PRIMARY", "PRINT", "PROC", "PROCEDURE", "PUBLIC", 
        "RAISERROR", "READ", "READTEXT", "RECONFIGURE", "REFERENCES", 
        "REPLICATION", "RESTORE", "RESTRICT", "RETURN", "REVERT", "REVOKE", 
        "RIGHT", "ROLLBACK", "ROWCOUNT", "ROWGUIDCOL", "RULE", "SAVE", 
        "SCHEMA", "SECURITYAUDIT", "SELECT", "SESSION_USER", "SET", 
        "SETUSER", "SHUTDOWN", "SOME", "STATISTICS", "SYSTEM_USER", "TABLE", 
        "TABLESAMPLE", "TEXTSIZE", "THEN", "TO", "TOP", "TRAN", "TRANSACTION", 
        "TRIGGER", "TRUNCATE", "TSEQUAL", "UNION", "UNIQUE", "UNPIVOT", 
        "UPDATE", "UPDATETEXT", "USE", "USER", "VALUES", "VARYING", "VIEW", 
        "WAITFOR", "WHEN", "WHERE", "WHILE", "WITH", "WRITETEXT", 
    };

    /**
     * Constructor.
     *
     * @param  reservedWords  list of words considered reserved for this dialect
     * @param  identifierRegex  regular expression for an identifier token
     *                          in this dialect
     * @param  quoteChar  character which may be used to quote words in
     *                    this dialect (thus avoiding their usual parsing);
     *                    words are quoted with a copy of this character
     *                    at start and end, doubled if this character is
     *                    embedded
     */
    public SqlSyntax( String[] reservedWords, Pattern identifierRegex,
                      char quoteChar ) {
        TreeSet words = new TreeSet();
        for ( int i = 0; i < reservedWords.length; i++ ) {
            words.add( normalise( reservedWords[ i ] ) );
        }
        reservedWords_ = Collections.unmodifiableSortedSet( words );
        identifierRegex_ = identifierRegex;
        quoteChar_ = quoteChar;
        for ( String word : reservedWords_ ) {
            assert isIdentifier( word );
            assert word.equals( normalise( word ) );
        }
    }

    /**
     * Returns an alphabetical list of the reserved words known by this class,
     * in normalised (upper case) form.
     *
     * @return  unmodifiable set of reserved words
     */
    public SortedSet getReservedWords() {
        return reservedWords_;
    }

    /**
     * Indicates whether a given word is reserved.  The result is not
     * sensitive to the case of the supplied word.
     *
     * @param  word  word to test
     * @return  true iff reserved
     */
    public boolean isReserved( String word ) {
        return reservedWords_.contains( normalise( word ) );
    }

    /**
     * Indicates whether a given word is syntactically permitted to act as
     * an identifier.
     *
     * @param  word  word to test
     * @return  true iff identifier
     */
    public boolean isIdentifier( String word ) {
        return identifierRegex_.matcher( word ).matches();
    }

    /**
     * Returns a string which can be used within an SQL query to refer to
     * an item with the name of a given word.
     * If the given word can be used as it stands, it is returned as given.
     * Otherwise (if it's reserved or syntactically unsuitable) a quoted
     * version of the provided word is returned.
     *
     * @param  word  word to use
     * @return   quoted or unquoted version of word
     */
    public String quoteIfNecessary( String word ) {
        return ( isIdentifier( word ) && ! isReserved( word ) )
             ? word
             : quote( word );
    }

    /**
     * Returns a quoted version of a word.
     *
     * 

Note that quoting words is not harmless - unlike * for instance shell syntax, quotes are not just stripped off * where present before processing, but instead in SQL92 and hence * ADQL they modify the interpretation of what's quoted. * This is something to do with the way case folding is handled, * and I (mbt) didn't know about it until Markus Demleitner * persuaded me it's actually true. As it happens MySQL behaves * contrary to the standard in this respect (quoting of column names - * but not table names?? - is harmless) but (for instance) PostgreSQL * does not. Therefore do not use this method indiscriminately, * use quoteIfNecessary instead. * * @param word word to quote * @return quoted word */ public String quote( String word ) { StringBuilder sbuf = new StringBuilder( word.length() + 2 ) .append( quoteChar_ ); for ( int i = 0; i < word.length(); i++ ) { char c = word.charAt( i ); sbuf.append( c ); /* Double up quote characters if they need quoting. * This is how it works in SQL92 anyway. */ if ( c == quoteChar_ ) { sbuf.append( c ); } } sbuf.append( quoteChar_ ); return sbuf.toString(); } /** * Normalises a word into a canonical form suitable for equality matching. * *

Note this method is called from the constructor, which is why it's * static and not overridable. * * @param word input word * @return normalised form */ private static String normalise( String word ) { return word.toUpperCase(); } /** * Returns a list of words that are known to be reserved in a mixed * bag of popular RDBMSs. * * @return reserved word list */ public static String[] getParanoidReservedWords() { Collection words = new HashSet(); words.addAll( Arrays.asList( SQL92_RESERVED ) ); words.addAll( Arrays.asList( MYSQL_RESERVED ) ); words.addAll( Arrays.asList( ORACLE_RESERVED ) ); words.addAll( Arrays.asList( POSTGRES_RESERVED ) ); words.addAll( Arrays.asList( SQLSERVER_RESERVED ) ); List wordList = new ArrayList( words ); Collections.sort( wordList ); return wordList.toArray( new String[ 0 ] ); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy