All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.huemulsolutions.bigdata.sql_decode.huemul_sql_decode.scala Maven / Gradle / Ivy

Go to download

Obtiene los campos y tablas utilizados en una sentencia SQL. Dada la siguiente consulta:description select campo1 as Id, campo2 as descripcion from Tabla_datos retorna un arregla con Id (obtenido desde campo1 de tabla "tabla_datos"), Descripcion (obtenido desde campo2 de tabla "tabla_datos")

The newest version!
package com.huemulsolutions.bigdata.sql_decode

import scala.collection.mutable._
import util.control.Breaks._
import scala.util.Try

class huemul_SQL_Decode(Symbols_user: ArrayBuffer[String], AutoIncSubQuery: Int ) extends Serializable {
  Symbols_user.append("AS")
  Symbols_user.append(")")
  Symbols_user.append("(")
  Symbols_user.append("CASE")
  Symbols_user.append("THEN")
  Symbols_user.append("END")
  Symbols_user.append("ELSE")
  Symbols_user.append("FROM")
  Symbols_user.append("WHERE")
  Symbols_user.append("GROUP")
  Symbols_user.append("BY")
  Symbols_user.append("HAVING")
  Symbols_user.append("IS")
  Symbols_user.append("NULL")
  //ADD DataTypes
  Symbols_user.append("INT")
  Symbols_user.append("INTEGER")
  Symbols_user.append("STRING")
  Symbols_user.append("VARCHAR")
  Symbols_user.append("VARCHAR2")
  Symbols_user.append("BOOLEAN")
  Symbols_user.append("LONG")
  Symbols_user.append("REAL")
  Symbols_user.append("FLOAT")
  Symbols_user.append("DATE")
  Symbols_user.append("DATETIME")
  Symbols_user.append("TIMESTAMP")
  Symbols_user.append("TINYINT")
  Symbols_user.append("SMALLINT")
  
  val Symbols_joins: ArrayBuffer[String] = new ArrayBuffer[String] 
  Symbols_joins.append("INNER")
  Symbols_joins.append("LEFT")
  Symbols_joins.append("RIGHT")
  Symbols_joins.append("FULL")
  Symbols_joins.append("JOIN")
  
  val Symbols: ArrayBuffer[String] = new ArrayBuffer[String] 
  Symbols.append("+")
  Symbols.append("-")
  Symbols.append("*")
  Symbols.append("/")
  Symbols.append(",")
  Symbols.append(".")
  Symbols.append("%")
  Symbols.append("=")
  Symbols.append(">")
  Symbols.append("<")
  Symbols.append("!")
  
  val SQL_Functions: ArrayBuffer[String] = new ArrayBuffer[String]
  SQL_Functions.append("ABS")
  SQL_Functions.append("ACOS")
  SQL_Functions.append("ADD_MONTHS")
  SQL_Functions.append("AND")
  SQL_Functions.append("APPROX_COUNT_DISTINCT")
  SQL_Functions.append("APPROX_PERCENTILE")
  SQL_Functions.append("ARRAY")
  SQL_Functions.append("ARRAY_CONTAINS")
  SQL_Functions.append("ASCII")
  SQL_Functions.append("ASIN")
  SQL_Functions.append("ASSERT_TRUE")
  SQL_Functions.append("ATAN")
  SQL_Functions.append("ATAN2")
  SQL_Functions.append("AVG")
  SQL_Functions.append("BASE64")
  SQL_Functions.append("BIGINT")
  SQL_Functions.append("BIN")
  SQL_Functions.append("BINARY")
  SQL_Functions.append("BIT_LENGTH")
  SQL_Functions.append("BOOLEAN")
  SQL_Functions.append("BROUND")
  SQL_Functions.append("CAST")
  SQL_Functions.append("CBRT")
  SQL_Functions.append("CEIL")
  SQL_Functions.append("CEILING")
  SQL_Functions.append("CHAR")
  SQL_Functions.append("CHAR_LENGTH")
  SQL_Functions.append("CHARACTER_LENGTH")
  SQL_Functions.append("CHR")
  SQL_Functions.append("COALESCE")
  SQL_Functions.append("COLLECT_LIST")
  SQL_Functions.append("COLLECT_SET")
  SQL_Functions.append("CONCAT")
  SQL_Functions.append("CONCAT_WS")
  SQL_Functions.append("CONV")
  SQL_Functions.append("CORR")
  SQL_Functions.append("COS")
  SQL_Functions.append("COSH")
  SQL_Functions.append("COT")
  SQL_Functions.append("COUNT")
  SQL_Functions.append("COUNT_MIN_SKETCH")
  SQL_Functions.append("COVAR_POP")
  SQL_Functions.append("COVAR_SAMP")
  SQL_Functions.append("CRC32")
  SQL_Functions.append("CUBE")
  SQL_Functions.append("CUME_DIST")
  SQL_Functions.append("CURRENT_DATABASE")
  SQL_Functions.append("CURRENT_DATE")
  SQL_Functions.append("CURRENT_TIMESTAMP")
  SQL_Functions.append("DATE")
  SQL_Functions.append("DATE_ADD")
  SQL_Functions.append("DATE_FORMAT")
  SQL_Functions.append("DATE_SUB")
  SQL_Functions.append("DATE_TRUNC")
  SQL_Functions.append("DATEDIFF")
  SQL_Functions.append("DAY")
  SQL_Functions.append("DAYOFMONTH")
  SQL_Functions.append("DAYOFWEEK")
  SQL_Functions.append("DAYOFYEAR")
  SQL_Functions.append("DECIMAL")
  SQL_Functions.append("DECODE")
  SQL_Functions.append("DEGREES")
  SQL_Functions.append("DENSE_RANK")
  SQL_Functions.append("DOUBLE")
  SQL_Functions.append("E")
  SQL_Functions.append("ELT")
  SQL_Functions.append("ENCODE")
  SQL_Functions.append("EXP")
  SQL_Functions.append("EXPLODE")
  SQL_Functions.append("EXPLODE_OUTER")
  SQL_Functions.append("EXPM1")
  SQL_Functions.append("FACTORIAL")
  SQL_Functions.append("FIND_IN_SET")
  SQL_Functions.append("FIRST")
  SQL_Functions.append("FIRST_VALUE")
  SQL_Functions.append("FLOAT")
  SQL_Functions.append("FLOOR")
  SQL_Functions.append("FORMAT_NUMBER")
  SQL_Functions.append("FORMAT_STRING")
  SQL_Functions.append("FROM_JSON")
  SQL_Functions.append("FROM_UNIXTIME")
  SQL_Functions.append("FROM_UTC_TIMESTAMP")
  SQL_Functions.append("GET_JSON_OBJECT")
  SQL_Functions.append("GREATEST")
  SQL_Functions.append("GROUPING")
  SQL_Functions.append("GROUPING_ID")
  SQL_Functions.append("HASH")
  SQL_Functions.append("HEX")
  SQL_Functions.append("HOUR")
  SQL_Functions.append("HYPOT")
  SQL_Functions.append("IF")
  SQL_Functions.append("IFNULL")
  SQL_Functions.append("IN")
  SQL_Functions.append("INITCAP")
  SQL_Functions.append("INLINE")
  SQL_Functions.append("INLINE_OUTER")
  SQL_Functions.append("INPUT_FILE_BLOCK_LENGTH")
  SQL_Functions.append("INPUT_FILE_BLOCK_START")
  SQL_Functions.append("INPUT_FILE_NAME")
  SQL_Functions.append("INSTR")
  SQL_Functions.append("INT")
  SQL_Functions.append("ISNAN")
  SQL_Functions.append("ISNOTNULL")
  SQL_Functions.append("ISNULL")
  SQL_Functions.append("JAVA_METHOD")
  SQL_Functions.append("JSON_TUPLE")
  SQL_Functions.append("KURTOSIS")
  SQL_Functions.append("LAG")
  SQL_Functions.append("LAST")
  SQL_Functions.append("LAST_DAY")
  SQL_Functions.append("LAST_VALUE")
  SQL_Functions.append("LCASE")
  SQL_Functions.append("LEAD")
  SQL_Functions.append("LEAST")
  SQL_Functions.append("LEFT")
  SQL_Functions.append("LENGTH")
  SQL_Functions.append("LEVENSHTEIN")
  SQL_Functions.append("LIKE")
  SQL_Functions.append("LN")
  SQL_Functions.append("LOCATE")
  SQL_Functions.append("LOG")
  SQL_Functions.append("LOG10")
  SQL_Functions.append("LOG1P")
  SQL_Functions.append("LOG2")
  SQL_Functions.append("LOWER")
  SQL_Functions.append("LPAD")
  SQL_Functions.append("LTRIM")
  SQL_Functions.append("MAP")
  SQL_Functions.append("MAP_KEYS")
  SQL_Functions.append("MAP_VALUES")
  SQL_Functions.append("MAX")
  SQL_Functions.append("MD5")
  SQL_Functions.append("MEAN")
  SQL_Functions.append("MIN")
  SQL_Functions.append("MINUTE")
  SQL_Functions.append("MOD")
  SQL_Functions.append("MONOTONICALLY_INCREASING_ID")
  SQL_Functions.append("MONTH")
  SQL_Functions.append("MONTHS_BETWEEN")
  SQL_Functions.append("NAMED_STRUCT")
  SQL_Functions.append("NANVL")
  SQL_Functions.append("NEGATIVE")
  SQL_Functions.append("NEXT_DAY")
  SQL_Functions.append("NOT")
  SQL_Functions.append("NOW")
  SQL_Functions.append("NTILE")
  SQL_Functions.append("NULLIF")
  SQL_Functions.append("NVL")
  SQL_Functions.append("NVL2")
  SQL_Functions.append("OCTET_LENGTH")
  SQL_Functions.append("OR")
  SQL_Functions.append("PARSE_URL")
  SQL_Functions.append("PERCENT_RANK")
  SQL_Functions.append("PERCENTILE")
  SQL_Functions.append("PERCENTILE_APPROX")
  SQL_Functions.append("PI")
  SQL_Functions.append("PMOD")
  SQL_Functions.append("POSEXPLODE")
  SQL_Functions.append("POSEXPLODE_OUTER")
  SQL_Functions.append("POSITION")
  SQL_Functions.append("POSITIVE")
  SQL_Functions.append("POW")
  SQL_Functions.append("POWER")
  SQL_Functions.append("PRINTF")
  SQL_Functions.append("QUARTER")
  SQL_Functions.append("RADIANS")
  SQL_Functions.append("RAND")
  SQL_Functions.append("RANDN")
  SQL_Functions.append("RANK")
  SQL_Functions.append("REFLECT")
  SQL_Functions.append("REGEXP_EXTRACT")
  SQL_Functions.append("REGEXP_REPLACE")
  SQL_Functions.append("REPEAT")
  SQL_Functions.append("REPLACE")
  SQL_Functions.append("REVERSE")
  SQL_Functions.append("RIGHT")
  SQL_Functions.append("RINT")
  SQL_Functions.append("RLIKE")
  SQL_Functions.append("ROLLUP")
  SQL_Functions.append("ROUND")
  SQL_Functions.append("ROW_NUMBER")
  SQL_Functions.append("RPAD")
  SQL_Functions.append("RTRIM")
  SQL_Functions.append("SECOND")
  SQL_Functions.append("SENTENCES")
  SQL_Functions.append("SHA")
  SQL_Functions.append("SHA1")
  SQL_Functions.append("SHA2")
  SQL_Functions.append("SHIFTLEFT")
  SQL_Functions.append("SHIFTRIGHT")
  SQL_Functions.append("SHIFTRIGHTUNSIGNED")
  SQL_Functions.append("SIGN")
  SQL_Functions.append("SIGNUM")
  SQL_Functions.append("SIN")
  SQL_Functions.append("SINH")
  SQL_Functions.append("SIZE")
  SQL_Functions.append("SKEWNESS")
  SQL_Functions.append("SMALLINT")
  SQL_Functions.append("SORT_ARRAY")
  SQL_Functions.append("SOUNDEX")
  SQL_Functions.append("SPACE")
  SQL_Functions.append("SPARK_PARTITION_ID")
  SQL_Functions.append("SPLIT")
  SQL_Functions.append("SQRT")
  SQL_Functions.append("STACK")
  SQL_Functions.append("STD")
  SQL_Functions.append("STDDEV")
  SQL_Functions.append("STDDEV_POP")
  SQL_Functions.append("STDDEV_SAMP")
  SQL_Functions.append("STR_TO_MAP")
  SQL_Functions.append("STRING")
  SQL_Functions.append("STRUCT")
  SQL_Functions.append("SUBSTR")
  SQL_Functions.append("SUBSTRING")
  SQL_Functions.append("SUBSTRING_INDEX")
  SQL_Functions.append("SUM")
  SQL_Functions.append("TAN")
  SQL_Functions.append("TANH")
  SQL_Functions.append("TIMESTAMP")
  SQL_Functions.append("TINYINT")
  SQL_Functions.append("TO_DATE")
  SQL_Functions.append("TO_JSON")
  SQL_Functions.append("TO_TIMESTAMP")
  SQL_Functions.append("TO_UNIX_TIMESTAMP")
  SQL_Functions.append("TO_UTC_TIMESTAMP")
  SQL_Functions.append("TRANSLATE")
  SQL_Functions.append("TRIM")
  SQL_Functions.append("TRUNC")
  SQL_Functions.append("UCASE")
  SQL_Functions.append("UNBASE64")
  SQL_Functions.append("UNHEX")
  SQL_Functions.append("UNIX_TIMESTAMP")
  SQL_Functions.append("UPPER")
  SQL_Functions.append("UUID")
  SQL_Functions.append("VAR_POP")
  SQL_Functions.append("VAR_SAMP")
  SQL_Functions.append("VARIANCE")
  SQL_Functions.append("WEEKOFYEAR")
  SQL_Functions.append("WHEN")
  SQL_Functions.append("WINDOW")
  SQL_Functions.append("XPATH")
  SQL_Functions.append("XPATH_BOOLEAN")
  SQL_Functions.append("XPATH_DOUBLE")
  SQL_Functions.append("XPATH_FLOAT")
  SQL_Functions.append("XPATH_INT")
  SQL_Functions.append("XPATH_LONG")
  SQL_Functions.append("XPATH_NUMBER")
  SQL_Functions.append("XPATH_SHORT")
  SQL_Functions.append("XPATH_STRING")
  SQL_Functions.append("YEAR")
  
  val Symbols_keys: ArrayBuffer[huemul_sql_symbol] = new ArrayBuffer[huemul_sql_symbol] 
  Symbols_keys.append(new huemul_sql_symbol("(",")",false))
  Symbols_keys.append(new huemul_sql_symbol("'","'",true))
  Symbols_keys.append(new huemul_sql_symbol(""""""",""""""",true))
  
  val Symbols_drops: ArrayBuffer[String] = new ArrayBuffer[String] 
  Symbols_drops.append("\n")
  
  var line: Int = 0
  private var numTempSubQuery: Int = AutoIncSubQuery
  def getNumTempSubQuery(): Int = numTempSubQuery
  
  /**
   * split a string in characters, search for "\n" and others for split in two o more words
   */
  private def splitKeys(text: String, pos_real_start: Int, pos_real_end: Int): ArrayBuffer[huemul_sql_symbol_base] = {
    val Result: ArrayBuffer[huemul_sql_symbol_base] = new ArrayBuffer[huemul_sql_symbol_base] ()
    var rest_of_text: String = text
    var position_word: Integer = 0
    //var cycle: Int = 0
    var text_found: Boolean = false
    var text_found_save: Boolean = true
    var add_line: Boolean = false
    val enter: String = "\n"
    
    var pos_start: Int = pos_real_start
    val pos_end: Int = pos_real_end
    while (position_word < rest_of_text.length()) {
      text_found_save = true
      add_line = false
      text_found = false
      
      if (Symbols.exists { x => x(0) == rest_of_text(position_word) })
        text_found = true
      else if (Symbols_keys.exists { x => x.getsymbol_start(0) == rest_of_text(position_word) || x.getsymbol_end(0) == rest_of_text(position_word) })
        text_found = true
      else if (rest_of_text(position_word) == enter(0) || rest_of_text(position_word).toInt == 13) {
        text_found = true
        text_found_save = false
        add_line = true
      }       
      
      //split text if found special characters
      if (text_found) {
        val text_before = rest_of_text.substring(0,position_word)
        val text_char = rest_of_text.substring(position_word,position_word+1)
        rest_of_text = rest_of_text.substring(position_word+1,rest_of_text.length())
        //Only insert text_before if pos > 0, example  [*campo5] only insert [*] and rest of text [campo5] is for next iteration 
        if (position_word > 0){
          Result.append(new huemul_sql_symbol_base(text_before, line, pos_start, pos_start+position_word))
          pos_start = pos_start+position_word
        }
          
        if (text_found_save) {
          Result.append(new huemul_sql_symbol_base(text_char, line, pos_start, pos_start+1))
          pos_start = pos_start+1
        }
        
        position_word = 0
      } else 
        position_word += 1   
      
      if (add_line) {
        line += 1
        pos_start += 1
      }     
    }
  
    if (rest_of_text.nonEmpty)
      Result.append(new huemul_sql_symbol_base(rest_of_text, line, pos_start,pos_end ))
    
    Result
  }
  
  /**
   * split a text in words, and return only valid words (without spaces and line return)
   */
  private def splitWords(text: String): ArrayBuffer[huemul_sql_symbol_base] = {
    val Result: ArrayBuffer[huemul_sql_symbol_base] = new ArrayBuffer[huemul_sql_symbol_base] ()
    val enter: String = "\n"
    var rest_of_text: String = text
    //println(rest_of_text)
    //Get first word
    var position_end: Integer = 0
    var word: String = null
    var cycle: Int = 0
    line = 1
    var position_real_start: Int = 0
    var position_real_end: Int = 0
    while (position_end >= 0 && cycle <= 20000) {
      position_end = rest_of_text.indexOf(" ")
      position_real_end = position_real_start + position_end
      if (position_end >= 0) {
        word = rest_of_text.substring(0,position_end)
        rest_of_text = rest_of_text.substring(position_end+1, rest_of_text.length())
      } else {
        word = rest_of_text
        position_real_end += rest_of_text.length()+1
        rest_of_text = null
        
      }
            
      if (word != null && word.trim() != "") {
        //split word, searching new characters
        val NewResult = splitKeys(word, position_real_start, position_real_end)
        Result.appendAll(NewResult)                 
      } else if (word == enter || (word.nonEmpty && word.charAt(0).toInt == 13))
        line += 1
      
      //println(s"line: $line pos: [$position_end], word: [$word], word len: [${word.length()}]  real ini: $position_real_start, real end: $position_real_end")
      
      position_real_start = position_real_end + 1
      
      cycle += 1
    }
    
    
    Result
  }
  
  /**
   * remove comments (like --), 
   */
  def Analyze_SQL_RemoveComments(p_words: ArrayBuffer[huemul_sql_symbol_base]): ArrayBuffer[huemul_sql_symbol_base] = {
    val local_words = p_words
    var actual_line = -1
    var position = 0
    var remove_start_pos: Int = -1
    while (position < local_words.length-1) {
      //doesn't have commented lines
      if (actual_line == -1) {
        if (local_words(position).getsymbol == "-" && local_words(position+1).getsymbol == "-") {
          actual_line = local_words(position).getsymbol_line
          remove_start_pos = position
        }
      } else {
        if (actual_line != local_words(position).getsymbol_line) {
          //Remove lines
          local_words.remove(remove_start_pos, position - remove_start_pos )

          //set variables to re-start
          actual_line = -1
          position = remove_start_pos - 1
        }
      }
      
      position += 1
    }
    
    local_words
  }
  
  /**
   * type for return result of get_select_on_select method
   */
  private class class_get_select_on_select {
    var position: Int = 0
    var select_on_select: huemul_sql_decode_result = _
  }
  
  /**
   * find end of subquerys
   */
  private def get_select_on_select(position: Int, position_max: Int, SQL: String, localWords: ArrayBuffer[huemul_sql_symbol_base], TablesAndColumns: ArrayBuffer[huemul_sql_tables_and_columns]): class_get_select_on_select =  {
    //SELECT INSIDE SELECT, GET COLUMN AND TABLES
    val res = new class_get_select_on_select()
      
    var last_pos_close: Int = position + 1
    var NumParen: Int = 1
    var NumText_01: Int = 0
    var NumText_02: Int = 0
    
    //search last position to find close ")"
    while (last_pos_close < position_max && NumParen > 0 ) {
      last_pos_close  += 1
      val word_close = localWords(last_pos_close).getsymbol.toUpperCase()
      
      if (word_close == "'") {
        if (NumText_01 == 0)
          NumText_01 = 1
        else 
          NumText_01 = 0
      }
      else if (word_close == """"""") {
        if (NumText_02 == 0)
          NumText_02 = 1
        else 
          NumText_02 = 0
      }
      else if (word_close == ")" && NumText_01 == 0 && NumText_02 == 0)
        NumParen -= 1
      else if (word_close == "(" && NumText_01 == 0 && NumText_02 == 0)
        NumParen += 1
    }
    
    //if found close
    if (NumParen == 0) {                
      val temp1_sql = SQL.substring(localWords(position+1).getpos_start, localWords(last_pos_close-1).getpos_end )
      res.select_on_select  = decodeSQL(temp1_sql, TablesAndColumns)
      //new position                    
      res.position = last_pos_close
    }
    
    
    res
  }
  
  /**
   * close "FROM", return SQL and find database.table information of columns used
   */
  private def Analyze_SQL_CloseFROM (decode_result: huemul_sql_decode_result, TablesAndColumns: ArrayBuffer[huemul_sql_tables_and_columns], position_start: Int, position_end: Int, SQL: String ): huemul_sql_decode_result = {
    decode_result.from_sql = SQL.substring(position_start, position_end) 
    
    var textTables: String = ""
    var localTablesAndColumns: ArrayBuffer[huemul_sql_tables_and_columns] = new ArrayBuffer[huemul_sql_tables_and_columns]()
    decode_result.tables.foreach { x =>
      
      if (x.database_name == null) {
        val tabRes = TablesAndColumns.filter { y => y.table_name != null && y.table_name.toUpperCase() == x.table_name.toUpperCase()  }  
        if (tabRes.nonEmpty)
          x.database_name = tabRes(0).database_name
      }
      
      if (x.tableAlias_name == null)
        x.tableAlias_name = x.table_name
      
      val dbandtable = s"[${x.database_name}.${x.table_name}]"
      if (!textTables.contains(dbandtable)){
        localTablesAndColumns = localTablesAndColumns ++ TablesAndColumns.filter { y => y.table_name.toUpperCase() == x.table_name.toUpperCase() && y.database_name.toUpperCase() == x.database_name.toUpperCase()  }  
        textTables = textTables.concat(dbandtable)
      }
      //println(s"database_name: [${x.database_name}], table_Name: [${x.table_name}], alias: [${x.tableAlias_name}] [${x.tableAlias_name.length()} (${x.tableAlias_name.charAt(x.tableAlias_name.length()-1).toInt})]")  
    }
    
    //println ("*** columns")
    decode_result.columns.foreach { x => 
      //println(s"name: [${x.column_name}] sql: [${x.column_sql}] ")
      if (x.column_name == null && x.column_sql == "*") {
        //get "select *, add all columns origin from all tables or alias specify
        decode_result.tables.foreach { x_tables => 
            localTablesAndColumns.filter { x_newColumn => x_newColumn.table_name.toUpperCase() == x_tables.table_name.toUpperCase() && x_newColumn.database_name.toUpperCase() == x_tables.database_name.toUpperCase() }.foreach { x_newColumn =>  
              //Add all columns of table
              val addCol = new huemul_sql_columns()
              addCol.column_name = x_newColumn.column_name
              addCol.column_sql = s"${x_tables.tableAlias_name}.${x_newColumn.column_name} --added automatically by huemul_sql_decode"
              val addcolumn_origin = new huemul_sql_columns_origin()
              addcolumn_origin.trace_column_name = x_newColumn.column_name
              addcolumn_origin.trace_database_name = x_newColumn.database_name
              addcolumn_origin.trace_table_name = x_newColumn.table_name
              addcolumn_origin.trace_tableAlias_name = x_tables.tableAlias_name
              addCol.column_origin.append(addcolumn_origin)
              decode_result.columns.append(addCol)
            }
        }
        
      } else if (x.column_name == null && x.column_origin.length == 1 && x.column_origin(0).trace_column_name == "*") {
        //get database & table names
        val localDB = decode_result.tables.filter { x_tables => x_tables.tableAlias_name.toUpperCase() == x.column_origin(0).trace_tableAlias_name.toUpperCase()}
        if (localDB.nonEmpty) {
          //filter table from localtables
          val newColumns = localTablesAndColumns.filter { x_localcols => x_localcols.table_name.toUpperCase() == localDB(0).table_name.toUpperCase() && x_localcols.database_name.toUpperCase() == localDB(0).database_name.toUpperCase() }
          
          newColumns.foreach { x_newcolums =>
            val addCol = new huemul_sql_columns()
            addCol.column_name = x_newcolums.column_name
            addCol.column_sql = x_newcolums.column_name.concat(" --added automatically by huemul")
            val addcolumn_origin = new huemul_sql_columns_origin()
            addcolumn_origin.trace_column_name = x_newcolums.column_name
            addcolumn_origin.trace_database_name = x_newcolums.database_name
            addcolumn_origin.trace_table_name = x_newcolums.table_name
            addcolumn_origin.trace_tableAlias_name = localDB(0).tableAlias_name
            addCol.column_origin.append(addcolumn_origin)
            decode_result.columns.append(addCol) 
          }           
        }
      } else {
        
        x.column_origin.foreach { y_col_orig =>
          //complete database & Table information)
          if (y_col_orig.trace_database_name == null) {          
            //Search for table trace 
            //                                     alias found in "SELECT" sentence              alias of table equal to alias referece in column              
            val RegFound = decode_result.tables.filter { z_tab1 => y_col_orig.trace_tableAlias_name != null  && z_tab1.tableAlias_name.toUpperCase() == y_col_orig.trace_tableAlias_name.toUpperCase() }
            if (RegFound.length == 1) {
              //get tables information of "FROM" sentence
              y_col_orig.trace_table_name = RegFound(0).table_name
              y_col_orig.trace_database_name= RegFound(0).database_name
              //y_col_orig.trace_tableAlias_name = RegFound(0).tableAlias_name
            } else if (y_col_orig.trace_tableAlias_name == null) {              
              //if not found with self query, search in param TablesAndColumns (all columns & tables)
              val ResColAndTable = TablesAndColumns.filter { z_table => z_table.column_name != null && z_table.column_name.toUpperCase() ==  y_col_orig.trace_column_name.toUpperCase() }
              
              //search table name of column found in "FROM" table list  
              var isFound: Boolean = false
              ResColAndTable.foreach { z_table2 => 
                val tabFound = decode_result.tables.filter { a_tabfrom => a_tabfrom.table_name.toUpperCase() == z_table2.table_name.toUpperCase() }
                if (tabFound.length == 1){
                  isFound = true
                  y_col_orig.trace_table_name = z_table2.table_name
                  y_col_orig.trace_database_name = z_table2.database_name
                  y_col_orig.trace_tableAlias_name = z_table2.table_name
                }
              }
              
              if (!isFound){
                y_col_orig.trace_table_name = null //UNKNOWN
                y_col_orig.trace_database_name = null //UNKNOWN
              }
            }
          }
         
        }
      }


      //distinct
      val finalOrigin: ArrayBuffer[huemul_sql_columns_origin] = new ArrayBuffer[huemul_sql_columns_origin]()
      x.column_origin.foreach { x_ori_from =>  
        if (!finalOrigin.exists { x_fin =>
          x_fin.trace_database_name == x_ori_from.trace_database_name &&
            x_fin.trace_table_name == x_ori_from.trace_table_name &&
            x_fin.trace_column_name == x_ori_from.trace_column_name &&
            x_fin.trace_tableAlias_name == x_ori_from.trace_tableAlias_name
        }) {
          finalOrigin.append(x_ori_from)
        }
      }
      
      

      x.column_origin = finalOrigin
      
      //println("final")
      //x.column_origin.foreach { y => println(s"[${y.trace_tableAlias_name}].[${y.trace_column_name}]   trace_table_name:${y.trace_table_name} database:${y.trace_database_name}")}
    }
    
    //Exclude select "*"
    decode_result.columns = decode_result.columns.filter { x => ( 
                                                                       !(x.column_name == null && x.column_sql == "*") 
                                                                    && !(x.column_name == null && x.column_origin.length == 1 && x.column_origin(0).trace_column_name == "*")
                                                                  )}
    
    decode_result
  }
  
   /**
   * close "WHERE", return SQL and find database.table information of columns used IN WHERE
   */
  private def Analyze_SQL_CloseWHERE(decode_result: huemul_sql_decode_result, TablesAndColumns: ArrayBuffer[huemul_sql_tables_and_columns], tables: ArrayBuffer[huemul_sql_tables], position_start: Int, position_end: Int, SQL: String ): huemul_sql_decode_result = {
    decode_result.where_sql = SQL.substring(position_start, position_end) 
    
    //filter only tables used in FROM sentence
    var DataFiltered: ArrayBuffer[huemul_sql_tables_and_columns] = new ArrayBuffer[huemul_sql_tables_and_columns]() 
    tables.foreach { x_tablesUsed => 
           DataFiltered = DataFiltered ++
              TablesAndColumns.filter { y_full => y_full.table_name.toUpperCase() == x_tablesUsed.table_name.toUpperCase() && (   (x_tablesUsed.database_name == null)
                                                                                                                               || (x_tablesUsed.database_name.toUpperCase() == y_full.database_name.toUpperCase())
                                                                                                                              )
                                      }
              
        }
    
    //to complete information, use DataFiltered
    decode_result.columns_where.foreach { x_newreg =>
        //complete table and database name with ALIAS name
        if (x_newreg.trace_tableAlias_name != null) {
          val tablefound = tables.filter { x_table => x_table.tableAlias_name.toUpperCase() == x_newreg.trace_tableAlias_name.toUpperCase()  }
          if (tablefound.nonEmpty) {
            x_newreg.trace_table_name = tablefound(0).table_name
            x_newreg.trace_database_name = tablefound(0).database_name
          } 
        } else {
          //if not found, search in all catalog by column name
          val tableFound = DataFiltered.filter { x_all => x_all.column_name.toUpperCase() == x_newreg.trace_column_name.toUpperCase() }
          if (tableFound.nonEmpty) {
            x_newreg.trace_table_name = tableFound(0).table_name
            x_newreg.trace_database_name = tableFound(0).database_name
          }
        }
    }
    
    /*  
    println("INICIO********************************")
    DataFiltered.foreach { x => println(s"${x.table_name}, ${x.column_name}") }
    println("FIN********************************")
    
    println("INICIO********************************")
    decode_result.columns_where.foreach { x => println(s"x.trace_column_name:${x.trace_column_name}, x.trace_table_name:${x.trace_table_name}, x.trace_tableAlias_name:${x.trace_tableAlias_name}") }
    println("FIN********************************")
    * 
    */
    
    decode_result
      
  }
  
  
  private def Analyze_SQL(p_words: ArrayBuffer[huemul_sql_symbol_base], SQL: String, TablesAndColumns: ArrayBuffer[huemul_sql_tables_and_columns]): huemul_sql_decode_result = {
    var Result: huemul_sql_decode_result = new huemul_sql_decode_result()
    
    
    var position: Int = 0
   
    var stage: String = null
    var substage: String = null
    var search_end_bool: Boolean = false
    var SentenceIsFound: Boolean = false
    //variables to save select fields
    var column = new huemul_sql_columns()
    var table_from = new huemul_sql_tables()
    val cols_functions: ArrayBuffer[String] = new ArrayBuffer[String]()
    var position_sql_begin: Int = 0  //save position start and end from fields (Select)
    //var position_end: Int = 0
    var select_on_select: huemul_sql_decode_result = null
    
    //clean "." character, is used as database and columns specification.
    val Symbols_adhoc = Symbols.filter { x => x != "." }
    val search_end_text: ArrayBuffer[String] = new ArrayBuffer[String]()
    val CanProcess: Boolean = true
    var parenthesis: Int = 0
    var numOfWords: Int = 0
    
    //Drop commented lines with --
    val localWords = Analyze_SQL_RemoveComments(p_words)
    
    
    //var actual_line = -1
    position = 0
    val position_max: Int = localWords.length
    while (position < position_max) {
      var word: String = localWords(position).getsymbol.toUpperCase()
      
      //Get current word, previous word and next word
      var word_next: String = null
      var word_prev: String = null
      if (position + 1 < position_max)
        word_next = localWords(position+1).getsymbol.toUpperCase()
      if (position > 0)
        word_prev = localWords(position-1).getsymbol.toUpperCase()
        
        
      if (CanProcess) {
        //try to determine stage
        if (stage == null) {
          if (word == "SELECT") {
            if (word_next == "DISTINCT")
              position += 1
            stage = "SELECT"
            substage = null
          }
          if (word == "FROM"){
            stage = "FROM"
            position_sql_begin = localWords(position).getpos_start
          }
        } else if (word == "WHERE" && stage == "FROM") {
            stage = "WHERE"
            Result = Analyze_SQL_CloseFROM(Result, TablesAndColumns, position_sql_begin, localWords(position-1).getpos_end, SQL)
            
            position_sql_begin = localWords(position).getpos_start
        } else if (word == "GROUP" && stage == "FROM") {
            stage = "GROUP BY"
            Result = Analyze_SQL_CloseFROM(Result, TablesAndColumns, position_sql_begin, localWords(position-1).getpos_end, SQL)
            
            position_sql_begin = localWords(position).getpos_start
        } else if (word == "ORDER" && stage == "FROM") {
            stage = "ORDER"
            Result = Analyze_SQL_CloseFROM(Result, TablesAndColumns, position_sql_begin, localWords(position-1).getpos_end, SQL)
            
            position_sql_begin = localWords(position).getpos_start
        } else if (word == "GROUP" && stage == "WHERE") {
            stage = "GROUP BY"
            Result = Analyze_SQL_CloseWHERE(Result,TablesAndColumns, Result.tables, position_sql_begin, localWords(position-1).getpos_end, SQL)
            
            position_sql_begin = localWords(position).getpos_start
        } else if (word == "ORDER" && stage == "WHERE") {
            stage = "ORDER"
            Result = Analyze_SQL_CloseWHERE(Result,TablesAndColumns, Result.tables, position_sql_begin, localWords(position-1).getpos_end, SQL)
            
            position_sql_begin = localWords(position).getpos_start
        } else {
          
          //determine substage
          if (stage == "SELECT" && substage == null) {

            column = new huemul_sql_columns()
            val psum = if (word == ",") 1 else 0
            position_sql_begin = localWords(position + psum).getpos_start
            substage = "COLUMN"
            numOfWords = 0
          }
          
          
          //flag off, to determine if sql function or anothers found
          SentenceIsFound = false
          
          /*****************************************************************************************************/
          /***********   R E M O V E   "    ****************************************/
          /*****************************************************************************************************/
          
          //search for end of string sentence like "something" (the second one)
          if (search_end_bool) {
            var pos: Int = -1
            
            //loop for search in array
            breakable {
              for (i <- search_end_text.indices) {
                if (search_end_text(i) == word) {
                  pos = i
                  break  
                }
              }
            }
            
            //if found, remove from array and off flag search_end_bool
            if (pos >= 0) {
              SentenceIsFound = true
              search_end_text.remove(pos)
              if (search_end_text.isEmpty)
                search_end_bool = false
            }
          } else {
            //search for ""
            val sumbol_key_filter = Symbols_keys.filter { x => x.getsymbol_start.toUpperCase() == word.toUpperCase() && x.getsymbol_isForText  }
            if (sumbol_key_filter.nonEmpty) {
              SentenceIsFound = true
              
              //if found, add to array
              search_end_text.append(sumbol_key_filter(0).getsymbol_end)
              search_end_bool = true
            }
          }
          /*****************************************************************************************************/
          /***********   S T A R T   S E L E C T   A N D   C O L U M N  ****************************************/
          /*****************************************************************************************************/
          
          //looking for field}
          if (stage == "SELECT" && substage == "COLUMN") {
            //search for "(" and ")"
            
                        
            if (word == "(" && word_next == "SELECT") {
              SentenceIsFound = true
              val res_sel = get_select_on_select(position, position_max, SQL, localWords, TablesAndColumns)
              
              position = res_sel.position
              select_on_select = res_sel.select_on_select
              numOfWords += 2
            } else if (word == "(")
              parenthesis += 1
            else if (word == ")")
              parenthesis -= 1
            
            //search for sql functions
            if (!SentenceIsFound)  {
              val use_function = SQL_Functions.filter { x => x.toUpperCase() == word.toUpperCase() }
              if (use_function.nonEmpty) {
                SentenceIsFound = true
                cols_functions.append(word)
                numOfWords += 1
              } 
            }
              
            //search for other operations (like +, -, etc)
            if (!SentenceIsFound)  {
              val _Symbol = Symbols_adhoc.filter { x => x.toUpperCase() == word.toUpperCase() }
              if (_Symbol.nonEmpty) {
                SentenceIsFound = true  
              }
            }
              
            //search for user symbols
            if (!SentenceIsFound) {
              val _Symbols_user = Symbols_user.filter { x => x.toUpperCase() == word.toUpperCase() }
              if (_Symbols_user.nonEmpty) {
                SentenceIsFound = true
                numOfWords += 1
              }
            }
             
            //end of columns (ex: select  table.field as myField, table.field2 as myfield2 FROM
            //                                                 ---                         ---
            if (!search_end_bool && position_sql_begin > 0 && (   (word_next == "," && parenthesis == 0)
                                                                        || word_next == "FROM"
                                                                       )) {
              var colName: String = ""
              if (SentenceIsFound)
                colName = null
              else 
                colName = word
                
              //if word is the column name and alias at the same time (for example "select column from table"), then add column origin
              if (column.column_origin.isEmpty && Try(word.toDouble).isFailure &&
                                       (   numOfWords == 1 
                                    || (word_prev == "SELECT")
                                    || (word_prev == "," && numOfWords == 0)  
                                    )) {
                val new_reg =  new huemul_sql_columns_origin()
                new_reg.trace_column_name = word
                column.column_origin.append(new_reg)
              }
                
              //Setting all columns
              column.column_name = colName
              column.sql_pos_start = position_sql_begin
              column.sql_pos_end = localWords(position).getpos_end
              column.column_sql = SQL.substring(position_sql_begin, column.sql_pos_end )
              if (select_on_select != null) {
                column.column_origin = select_on_select.columns(0).column_origin
                select_on_select = null
              }
              Result.columns.append(column)
              
              //Clean for next cycle
              substage = null
              if (word_next == "FROM"){
                stage = null
              }
              
            } else if (!SentenceIsFound && !search_end_bool && word == ".") {
              val new_reg =  new huemul_sql_columns_origin()
              new_reg.trace_column_name = word_next
              new_reg.trace_tableAlias_name = word_prev
              column.column_origin.append(new_reg)
              numOfWords += 1
            } else if (!SentenceIsFound && !search_end_bool && (word != "." && word_next != "." && word_prev != ".") && Try(word.toDouble).isFailure) {
              val new_reg =  new huemul_sql_columns_origin()
              new_reg.trace_column_name = word
              column.column_origin.append(new_reg)
              numOfWords += 1
            }
          } else if (stage == "FROM") {
          /*****************************************************************************************************/
          /***********   S T A R T   F R O M   ****************************************/
          /*****************************************************************************************************/
                        
            if (!search_end_bool) {
              if (word == "(" && word_next == "SELECT") {
                //get subquery info
                val res_sel = get_select_on_select(position, position_max, SQL, localWords, TablesAndColumns)
                
                position = res_sel.position
                
                var l_subquery_word_01: String = null
                //var l_subquery_word_02: String = null
                  
                //get next word to 
                if (position+1 < position_max) {
                  l_subquery_word_01 = localWords(position+1).getsymbol.toUpperCase()
                }
                   
                table_from = new huemul_sql_tables()
                table_from.tableAlias_name = l_subquery_word_01

                table_from.database_name = res_sel.select_on_select.AliasDatabase
                table_from.table_name = res_sel.select_on_select.AliasQuery
                
                //table_from.database_name = "TEMP_HUEMUL"
                //table_from.table_name = s"TEMP_HUEMUL_${numTempSubQuery}"
                //numTempSubQuery += 1
                Result.tables.append(table_from)
                Result.subquery_result.append(res_sel.select_on_select)
                
                //Add temp sub query to table list
                res_sel.select_on_select.columns.foreach { x_subquery => 
                  //println(s"inserta datos ${table_from.database_name}, ${table_from.table_name}, ${x_subquery.column_name}")
                  val tempQuery = new huemul_sql_tables_and_columns().setData(table_from.database_name, table_from.table_name, x_subquery.column_name)
                  TablesAndColumns.append(tempQuery)
                }
                
                
                
              } else if (word_prev == "FROM" || word_prev == "JOIN" || word_prev == ",") {
                table_from = new huemul_sql_tables()
                //try to get database.table form
                if (word_next == ".") {
                  table_from.database_name = word
                  
                  //Get next position
                  if (position+2 < position_max) {
                    word_prev = localWords(position+1).getsymbol.toUpperCase()
                    word = localWords(position+2).getsymbol.toUpperCase()
                    if (position+3 < position_max) 
                      word_next = localWords(position+3).getsymbol.toUpperCase()
                    else
                      word_next = null
                    
                    table_from.table_name = word
                    position += 2
                  }
                } else {
                  table_from.table_name = word
                }
                
                var word_last: String = null
                if (position+2 < position_max){
                  word_last = localWords(position+2).getsymbol.toUpperCase()
                }
                    
                if (   word_next == "ON" 
                    || word_next == "," 
                    || word_next == "ORDER"
                    || word_next == "GROUP"
                    || word_next == "WHERE"
                    || Symbols_joins.exists { x => word_next != null && x.toUpperCase() == word_next.toUpperCase() }
                    ) {
                  table_from.tableAlias_name = table_from.table_name
                  
                } else if (word_next != null && word_next.toUpperCase() == "AS") {
                  table_from.tableAlias_name = word_last
                   if (word_last != null)
                    position += 2
                } else {
                 table_from.tableAlias_name = word_next
                 if (word_last != null) //only add 1
                   position += 1
                }
                   
                Result.tables.append(table_from)
                
              } 
            }              
          } else if (stage == "WHERE") {
            /*****************************************************************************************************/
            /***********   S T A R T   W H E R E   ****************************************/
            /*****************************************************************************************************/
            
            if (word == "(" && word_next == "SELECT") {
              SentenceIsFound = true
              val res_sel = get_select_on_select(position, position_max, SQL, localWords, TablesAndColumns)
              
              position = res_sel.position
              select_on_select = res_sel.select_on_select
            }
            
            //search for sql functions
            if (!SentenceIsFound)  {
              val use_function = SQL_Functions.filter { x => x.toUpperCase() == word.toUpperCase() }
              if (use_function.nonEmpty) {
                SentenceIsFound = true
                cols_functions.append(word)
              } 
            }
              
            //search for other operations (like +, -, etc)
            if (!SentenceIsFound)  {
              val _Symbol = Symbols_adhoc.filter { x => x.toUpperCase() == word.toUpperCase() }
              if (_Symbol.nonEmpty) {
                SentenceIsFound = true  
              }
            }
              
            //search for user symbols
            if (!SentenceIsFound) {
              val _Symbols_user = Symbols_user.filter { x => x.toUpperCase() == word.toUpperCase() }
              if (_Symbols_user.nonEmpty) {
                SentenceIsFound = true
              }
            }
            
            if (Try(word.toDouble).isSuccess) {
              SentenceIsFound = true
            }
             
            if (!SentenceIsFound && !search_end_bool && word == ".") {
              val new_reg =  new huemul_sql_columns_origin()
              new_reg.trace_column_name = word_next
              new_reg.trace_tableAlias_name = word_prev
              Result.columns_where.append(new_reg)
            } else if (!SentenceIsFound && !search_end_bool && (word != "." && word_next != "." && word_prev != ".") && Try(word.toDouble).isFailure ) {
              val new_reg =  new huemul_sql_columns_origin()
              new_reg.trace_column_name = word
              Result.columns_where.append(new_reg)
            }
          }
           
          //END SELECT & FROM
        }
      }
      position += 1
    } 
    
    //if last stage was "FROM" --> SQL sentence does't have WHERE, GROUP BY OR HAVING
    if (stage == "FROM") {
      Result = Analyze_SQL_CloseFROM(Result, TablesAndColumns, position_sql_begin, localWords(position-1).getpos_end, SQL)
    } else if (stage == "WHERE") {
      Result = Analyze_SQL_CloseWHERE(Result,TablesAndColumns, Result.tables, position_sql_begin, localWords(position-1).getpos_end, SQL)
    }
    
    Result
    
  }

  /**
   * Decode SQL
   * @param SQL SQL code
   * @param TablesAndColumns list of tables and columns
   * @return
   */
  def decodeSQL(SQL: String, TablesAndColumns: ArrayBuffer[huemul_sql_tables_and_columns] = new ArrayBuffer[huemul_sql_tables_and_columns] ): huemul_sql_decode_result = {
    
    //split words
    val words = splitWords(SQL)
   
    var i:Int = 0
    var numErrors: Int = 0
    words.foreach { x => 
      //println(s"x.getpos_start: ${x.getpos_start}, x.getpos_end: ${x.getpos_end}")
      
      
      if (x.getsymbol != SQL.substring(x.getpos_start, x.getpos_end)) {
        numErrors += 1
        println(s"ALERT: ERROR READING IN pos: $i line: ${x.getsymbol_line}; Valida:${if (x.getsymbol == SQL.substring(x.getpos_start, x.getpos_end)) "true" else "false" } , word: [${x.getsymbol}], largo: ${x.getsymbol.length()}, Pos Start: ${x.getpos_start}, Pos end: ${x.getpos_end}, comprobar: [${SQL.substring(x.getpos_start, x.getpos_end)}] ")
      }
      
      i += 1
    }

    //Analyze SQL
    val res = Analyze_SQL(words, SQL, TablesAndColumns)
    res.NumErrors = numErrors
    res.AutoIncSubQuery = numTempSubQuery
    numTempSubQuery += 1
    res.AliasQuery = s"TEMP_HUEMUL_$numTempSubQuery"
    res.AliasDatabase = "TEMP_HUEMUL"
    res
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy