All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clickzetta.client.jdbc.core.CZTextFileResult Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
package com.clickzetta.client.jdbc.core;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import cz.proto.coordinator.CoordinatorServiceOuterClass;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class CZTextFileResult implements CZResult {
  private static ExecutorService executor = Executors.newFixedThreadPool(10);
  private static class CloseOssInputStreamTask implements Runnable {
    List inputStream;
    List files;
    CZStorageClient storageClient;
    public CloseOssInputStreamTask(List files, List is,
                                   CZStorageClient client) {
      this.inputStream = is;
      this.files = files;
      storageClient = client;
    }
    @Override
    public void run() {
      for (int i = 0; i < inputStream.size(); i++) {
        try {
          inputStream.get(i).close();
          logger.info("close oss file {} success", files.get(i));
        } catch (Exception e) {
          logger.error("close oss file {} fail {}", files.get(i), e.getMessage());
        }
        try {
          if (storageClient != null) {
            storageClient.shutDown();
            logger.info("storage client shutDown.");
          }
        } catch (Exception e) {
          logger.error("storage client shutDown fail {}", e.getMessage());
        }
      }
    }
  }

  private CZStorageClient storageClient = null;
  private List openedFiles = new ArrayList<>();
  private List inputStreams = new ArrayList<>();
  private CloseableHttpClient httpClient = null;
  private CZLazySimpleSerDe.FileFormat format;
  private static final Logger logger = LoggerFactory.getLogger(CZTextFileResult.class);

  CZTextFileResult(CoordinatorServiceOuterClass.JobResultLocation jobResultLocation,
                   List metaData, boolean useInternalEndpoint,
                   int maxRowSize, boolean useObjectStoreHttps) throws Exception {
    this(jobResultLocation, metaData, useInternalEndpoint, maxRowSize,
            CZLazySimpleSerDe.FileFormat.TEXT, useObjectStoreHttps);
  }
  CZTextFileResult(CoordinatorServiceOuterClass.JobResultLocation jobResultLocation,
                   List metaData, boolean useInternalEndpoint,
                   int maxRowSize, CZLazySimpleSerDe.FileFormat format, boolean useObjectStoreHttps) throws Exception {
    this.format = format;
    this.maxRowSize = maxRowSize;
    List files = getFiles(jobResultLocation, format);
    fileIterator = files.iterator();
    recordIterator = null;
    if (jobResultLocation.getPresignedUrlsCount() > 0) {
      httpClient = HttpClients.createDefault();
    } else {
      storageClient = CZStorageClient.create(jobResultLocation, useInternalEndpoint, useObjectStoreHttps);
    }
    serDe = new CZLazySimpleSerDe(metaData.size());
    logger.info("FileCount: " + files.size());
  }

  private List getFiles(
          CoordinatorServiceOuterClass.JobResultLocation jobResultLocation,
          CZLazySimpleSerDe.FileFormat format) {
    List files = new ArrayList<>();
    if (jobResultLocation.getPresignedUrlsCount() > 0) {
      for (int i = 0; i < jobResultLocation.getPresignedUrlsCount(); i++) {
        files.add(jobResultLocation.getPresignedUrls(i));
      }
    } else {
      for (int i = 0; i < jobResultLocation.getLocationCount(); i++) {
        files.add(jobResultLocation.getLocation(i));
      }
    }
    if (files.isEmpty()) {
      logger.info("no file in fileResultLocation.");
    }
    return files;
  }

  @Override
  public boolean hasNext() throws Exception {
    if (recordIterator != null && recordIterator.hasNext()) {
      return true;
    }
    while ((this.maxRowSize == 0 || this.curRowSize < this.maxRowSize) && fileIterator.hasNext()) {
      String nextFile = fileIterator.next();
      List rows = readFile(nextFile);
      recordIterator = rows.iterator();
      if (recordIterator.hasNext()) {
        return true;
      }
    }
    curRowSize = 0;
    executor.submit(new CloseOssInputStreamTask(openedFiles, inputStreams, storageClient));
    return false;
  }

  @Override
  public List getNext() {
    String rowStr = recordIterator.next();
    try {
      if (format.equals(CZLazySimpleSerDe.FileFormat.TEXT)) {
        return serDe.deserializeOwn(rowStr);
      } else if (format.equals(CZLazySimpleSerDe.FileFormat.CSV)) {
        return serDe.deserialize(rowStr);
      }
    } catch (Exception e) {
      logger.error("LazySimpleSerDeFail", e);
    }
    return new ArrayList<>();
  }

  private List readFile(String nextFile) throws Exception {
    logger.info("ReadFileLocation: " + nextFile);
    URI uri = new URI(nextFile);
    String file = uri.getPath().substring(1);
    InputStream inputStream;
    if (httpClient != null) {
      HttpGet get = new HttpGet(nextFile);
      CloseableHttpResponse response = httpClient.execute(get);
      inputStream = response.getEntity().getContent();
      inputStreams.add(inputStream);
      openedFiles.add(file);
    } else {
      String bucketName = uri.getHost();

      logger.info("ObjectStorageBucket: " + bucketName);
      logger.info("ObjectStorageFile: " + file);
      if (storageClient == null) {
        throw new Exception("Storage client not init.");
      }
      inputStream = storageClient.getObjectInputStream(bucketName, file);
      // get content
      openedFiles.add(file);
      inputStreams.add(inputStream);
    }
    List rows = getContent(inputStream);
    logger.info("Rows: " + rows.size());
    logger.info("input stream: {}", file);
    return rows;
  }

  private List getContent(InputStream inputStream) throws IOException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
    List rows = new ArrayList<>();
    try {
      while (this.maxRowSize == 0 || (this.curRowSize < this.maxRowSize)) {
        String line = reader.readLine();
        if (line == null) break;
        if (this.maxRowSize > this.curRowSize) {
          this.curRowSize++;
        }
        rows.add(line);
      }
    } catch (IOException e) {
      logger.error("IOException in reading data from file.");
      return Collections.emptyList();
    }
    return rows;
  }

  private int curRowSize = 0;
  private int maxRowSize = 0;
  private Iterator fileIterator;
  private Iterator recordIterator;
  private CZLazySimpleSerDe serDe;
}