All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hive.hplsql.Copy Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hive.hplsql;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.math.RoundingMode;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.text.DecimalFormat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.antlr.v4.runtime.ParserRuleContext;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hive.hplsql.executor.QueryExecutor;
import org.apache.hive.hplsql.executor.QueryResult;

public class Copy {
  
  Exec exec;
  Timer timer = new Timer();
  boolean trace = false;
  boolean info = false;
  
  long srcSizeInBytes = 0;
  
  String delimiter = "\t";
  boolean sqlInsert = false;
  String sqlInsertName;
  String targetConn;
  int batchSize = 1000;
  
  boolean overwrite = false;
  boolean delete = false;
  boolean ignore = false;
  private QueryExecutor queryExecutor;

  Copy(Exec e, QueryExecutor queryExecutor) {
    exec = e;
    trace = exec.getTrace();
    info = exec.getInfo();
    this.queryExecutor = queryExecutor;
  }
  
  /**
   * Run COPY command
   */
  Integer run(HplsqlParser.Copy_stmtContext ctx) {
    trace(ctx, "COPY");
    initOptions(ctx);
    StringBuilder sql = new StringBuilder();
    if (ctx.table_name() != null) {
      String table = evalPop(ctx.table_name()).toString();
      sql.append("SELECT * FROM ");
      sql.append(table); 
    }
    else {
      sql.append(evalPop(ctx.select_stmt()).toString());
      if (trace) {
        trace(ctx, "Statement:\n" + sql);
      }
    }
    QueryResult query = queryExecutor.executeQuery(sql.toString(), ctx);
    if (query.error()) { 
      exec.signal(query);
      return 1;
    }    
    exec.setSqlSuccess();
    try {
      if (targetConn != null) {
        copyToTable(ctx, query);
      }
      else {
        copyToFile(ctx, query);
      }
    } catch (Exception e) {
      exec.signal(e);
      return 1;
    } finally {
      query.close();
    }
    return 0;
  }
    
  /**
   * Copy the query results to another table
   * @throws Exception 
   */
  void copyToTable(HplsqlParser.Copy_stmtContext ctx, QueryResult query) throws Exception {
    int cols = query.columnCount();
    int rows = 0;
    if (trace) {
      trace(ctx, "SELECT executed: " + cols + " columns");
    } 
    Connection conn = exec.getConnection(targetConn);
    StringBuilder sql = new StringBuilder();
    sql.append("INSERT INTO " + sqlInsertName + " VALUES (");
    for (int i = 0; i < cols; i++) {
      sql.append("?");
      if (i + 1 < cols) {
        sql.append(",");
      }
    }
    sql.append(")");
    PreparedStatement ps = conn.prepareStatement(sql.toString());
    long start = timer.start();
    long prev = start;
    boolean batchOpen = false;
    while (query.next()) {
      for (int i = 0; i < cols; i++) {
        ps.setObject(i, query.column(i, Object.class));
      }
      rows++;
      if (batchSize > 1) {
        ps.addBatch();
        batchOpen = true;
        if (rows % batchSize == 0) {
          ps.executeBatch();
          batchOpen = false;
        }        
      }
      else {
        ps.executeUpdate();
      }      
      if (trace && rows % 100 == 0) {
        long cur = timer.current();
        if (cur - prev > 10000) {
          trace(ctx, "Copying rows: " + rows + " (" + rows/((cur - start)/1000) + " rows/sec)");
          prev = cur;
        }
      }
    }
    if (batchOpen) {
      ps.executeBatch();
    }
    ps.close();
    exec.returnConnection(targetConn, conn);
    exec.setRowCount(rows);
    long elapsed = timer.stop();
    if (info) {
      DecimalFormat df = new DecimalFormat("#,##0.00");
      df.setRoundingMode(RoundingMode.HALF_UP);
      info(ctx, "COPY completed: " + rows + " row(s), " + timer.format() + ", " + df.format(rows/(elapsed/1000.0)) + " rows/sec");
    }
  }
  
  /**
   * Copy the query results to a file
   * @throws Exception 
   */
  void copyToFile(HplsqlParser.Copy_stmtContext ctx, QueryResult query) throws Exception {
    String filename = null;
    if (ctx.copy_target().expr() != null) {
      filename = evalPop(ctx.copy_target().expr()).toString();
    }
    else {
      filename = ctx.copy_target().getText();
    }
    byte[] del = delimiter.getBytes();
    byte[] rowdel = "\n".getBytes();
    byte[] nullstr = "NULL".getBytes();
    int cols = query.columnCount();
    int rows = 0;
    long bytes = 0;
    if (trace || info) {
      String mes = "Query executed: " + cols + " columns, output file: " + filename;
      if (trace) {
        trace(ctx, mes);
      }
      else {
        info(ctx, mes);
      }
    } 
    java.io.File file = null;
    File hdfsFile = null;
    if (ctx.T_HDFS() == null) {
      file = new java.io.File(filename);
    }
    else {
      hdfsFile = new File();
    }     
    OutputStream out = null;
    timer.start();
    try {      
      if (file != null) {
        if (!file.exists()) {
          file.createNewFile();
        }
        out = new FileOutputStream(file, false /*append*/);
      }
      else {
        out = hdfsFile.create(filename, true /*overwrite*/);
      }
      String col;
      String sql = "";
      if (sqlInsert) {
        sql = "INSERT INTO " + sqlInsertName + " VALUES (";
        rowdel = ");\n".getBytes();
      }
      while (query.next()) {
        if (sqlInsert) {
          out.write(sql.getBytes());
        }
        for (int i = 0; i < cols; i++) {
          if (i > 0) {
            out.write(del);
            bytes += del.length;
          }
          col = query.column(i, String.class);
          if (col != null) {
            if (sqlInsert) {
              col = Utils.quoteString(col);
            }
            byte[] b = col.getBytes();
            out.write(b);
            bytes += b.length;
          }
          else if (sqlInsert) {
            out.write(nullstr);
          }
        }
        out.write(rowdel);
        bytes += rowdel.length;
        rows++;
      }
      exec.setRowCount(rows);
    }
    finally {
      if (out != null) {
        out.close();
      }
    }
    long elapsed = timer.stop();
    if (info) {
      DecimalFormat df = new DecimalFormat("#,##0.00");
      df.setRoundingMode(RoundingMode.HALF_UP);
      info(ctx, "COPY completed: " + rows + " row(s), " + Utils.formatSizeInBytes(bytes) + ", " + timer.format() + ", " + df.format(rows/(elapsed/1000.0)) + " rows/sec");
    }
  }
  
  /**
   * Run COPY FROM LOCAL statement
   */
  public Integer runFromLocal(HplsqlParser.Copy_from_local_stmtContext ctx) { 
    trace(ctx, "COPY FROM LOCAL");
    initFileOptions(ctx.copy_file_option());
    HashMap> srcFiles = new HashMap<>();
    String src = evalPop(ctx.copy_source(0)).toString();
    String dest = evalPop(ctx.copy_target()).toString();
    int srcItems = ctx.copy_source().size();
    for (int i = 0; i < srcItems; i++) {
      createLocalFileList(srcFiles, evalPop(ctx.copy_source(i)).toString(), null);
    }
    if (info) {
      info(ctx, "Files to copy: " + srcFiles.size() + " (" + Utils.formatSizeInBytes(srcSizeInBytes) + ")");
    }
    if (srcFiles.size() == 0) {
      exec.setHostCode(2);
      return 2;
    }
    timer.start();
    File file = new File();
    FileSystem fs;
    int succeed = 0;
    int failed = 0;
    long copiedSize = 0;
    try {
      fs = file.createFs();
      boolean multi = false;
      if (srcFiles.size() > 1) {
        multi = true;
      }
      for (Map.Entry> i : srcFiles.entrySet()) {
        try {
          Path s = new Path(i.getKey());           
          Path d;
          if (multi) {
            String relativePath = i.getValue().getLeft();
            if (relativePath == null) {
              d = new Path(dest, s.getName());
            }
            else {
              d = new Path(dest, relativePath + Path.SEPARATOR + s.getName()); 
            }
          }
          else {
            // Path to file is specified (can be relative), so treat target as a file name (hadoop fs -put behavior)
            if (srcItems == 1 && i.getKey().endsWith(src)) {
              d = new Path(dest);
            }
            // Source directory is specified, so treat the target as a directory 
            else {
              d = new Path(dest + Path.SEPARATOR + s.getName());
            }
          }
          fs.copyFromLocalFile(delete, overwrite, s, d);
          succeed++;
          long size = i.getValue().getRight();
          copiedSize += size;
          if (info) {
            info(ctx, "Copied: " + file.resolvePath(d) + " (" + Utils.formatSizeInBytes(size) + ")");
          }
        }
        catch(IOException e) {
          failed++;
          if (!ignore) {
            throw e;
          }
        }
      }
    }
    catch(IOException e) {
      exec.signal(e);
      exec.setHostCode(1);
      return 1;
    }
    finally {
      long elapsed = timer.stop();
      if (info) {
        info(ctx, "COPY completed: " + succeed + " succeed, " + failed + " failed, " + 
              timer.format() + ", " + Utils.formatSizeInBytes(copiedSize) + ", " +
              Utils.formatBytesPerSec(copiedSize, elapsed));
      }
      if (failed == 0) {
        exec.setHostCode(0);
      }
      else {
        exec.setHostCode(1);
      }
      file.close();
    }
    return 0; 
  }
  
  /**
   * Create the list of local files for the specified path (including subdirectories)
   */
  void createLocalFileList(HashMap> list, String path, String relativePath) {
    java.io.File file = new java.io.File(path);
    if (file.exists()) {
      if (file.isDirectory()) {
        for (java.io.File i : file.listFiles()) {
          if (i.isDirectory()) {
            String rel;
            if (relativePath == null) {
              rel = i.getName();
            }
            else {
              rel = relativePath + java.io.File.separator + i.getName(); 
            }
            createLocalFileList(list, i.getAbsolutePath(), rel);
          }
          else {
            long size = i.length();
            list.put(i.getAbsolutePath(), Pair.of(relativePath, size));
            srcSizeInBytes += size;
          }
        }
      }
      else {
        long size = file.length();
        list.put(file.getAbsolutePath(), Pair.of(relativePath, size));
        srcSizeInBytes += size;
      }
    }
  }
  
  /**
   * Initialize COPY command options
   */
  void initOptions(HplsqlParser.Copy_stmtContext ctx) {
    int cnt = ctx.copy_option().size();
    for (int i = 0; i < cnt; i++) {
      HplsqlParser.Copy_optionContext option = ctx.copy_option(i);
      if (option.T_DELIMITER() != null) {
        delimiter = StringEscapeUtils.unescapeJava(evalPop(option.expr()).toString());
      }
      else if (option.T_SQLINSERT() != null) {
        sqlInsert = true;
        delimiter = ", ";
        if (option.qident() != null) {
          sqlInsertName = option.qident().getText();
        }
      }
      else if (option.T_AT() != null) {
        targetConn = option.qident().getText();
        if (ctx.copy_target().expr() != null) {
          sqlInsertName = evalPop(ctx.copy_target().expr()).toString();
        }
        else {
          sqlInsertName = ctx.copy_target().getText();
        }
      }
      else if (option.T_BATCHSIZE() != null) {
        batchSize = evalPop(option.expr()).intValue();
      }
    }
  }
  
  /**
   * Initialize COPY FILE options
   */
  void initFileOptions(List options) {
    srcSizeInBytes = 0;
    for (HplsqlParser.Copy_file_optionContext i : options) {
      if (i.T_OVERWRITE() != null) {
        overwrite = true;
      }
      else if (i.T_DELETE() != null) {
        delete = true;
      }
      else if (i.T_IGNORE() != null) {
        ignore = true;
      }
    }
  }
 
  /**
   * Evaluate the expression and pop value from the stack
   */
  Var evalPop(ParserRuleContext ctx) {
    exec.visit(ctx);
    if (!exec.stack.isEmpty()) { 
      return exec.stackPop();
    }
    return Var.Empty;
  }

  /**
   * Trace and information
   */
  public void trace(ParserRuleContext ctx, String message) {
    exec.trace(ctx, message);
  }
  
  public void info(ParserRuleContext ctx, String message) {
    exec.info(ctx, message);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy