All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.odps.datahub.DatahubReader Maven / Gradle / Ivy

There is a newer version: 0.51.2-public
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.datahub;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.Map;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.TableSchema;
import com.aliyun.odps.commons.proto.ProtobufRecordStreamReader;
import com.aliyun.odps.commons.proto.XstreamPack.XStreamPack;
import com.aliyun.odps.commons.transport.Connection;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.commons.util.IOUtils;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.rest.RestClient;


public class DatahubReader {

  private RestClient datahubServiceClient;
  private TableSchema tableSchema;
  private String path;
  private MessageDigest messageDigest;
  private Map params;
  private Map headers;
  private byte[] packMeta;
  private String lastPackId;
  private String nextPackId;
  private PackType.ReadMode readMode;
  private ProtobufRecordStreamReader protobufRecordStreamReader;

  public DatahubReader(RestClient datahubServiceClient, TableSchema tableSchema, String path,
                       Map params, Map headers) {
    this(datahubServiceClient, tableSchema, path, params, headers, PackType.FIRST_PACK_ID);
  }

  public DatahubReader(RestClient datahubServiceClient, TableSchema tableSchema, String path,
                       Map params, Map headers,
                       String packId) {
    this.datahubServiceClient = datahubServiceClient;
    this.tableSchema = tableSchema;
    this.path = path;
    this.params = params;
    this.headers = headers;
    this.packMeta = null;

    try {
      this.messageDigest = MessageDigest.getInstance("MD5");
    } catch (NoSuchAlgorithmException e) {
      throw new RuntimeException(e.getMessage());
    }

    this.protobufRecordStreamReader = null;

    seek(packId, PackType.ReadMode.SEEK_CUR);
  }

  /**
   * 从指定的shard读取一条记录
   *
   * @return {@link com.aliyun.odps.data.Record}
   * @throws OdpsException, IOException
   */
  public Record read() throws OdpsException, IOException {
    Record r = null;
    do {
      if (protobufRecordStreamReader != null) {
        try {
          r = protobufRecordStreamReader.read();
        } catch (IOException e) {
          protobufRecordStreamReader = null;
          this.nextPackId = this.lastPackId;
          this.readMode = PackType.ReadMode.SEEK_CUR;
          throw e;
        }
      }
    } while (r == null && getPack("all"));

    return r;
  }

  /**
   * 跳过某条Record不进行数据读取
   */
  public void skipPack() {
    seek(this.lastPackId, PackType.ReadMode.SEEK_NEXT);
  }

  /**
   * 获取当前从服务端读取的pack的packId
   *
   * @return String PackId
   */
  public String getLastPackId() {
    return this.lastPackId;
  }

  private void seek(String rpid, PackType.ReadMode mode) {

    if ((rpid == null || rpid.equals("")) && !(mode.equals(PackType.ReadMode.SEEK_BEGIN)
                          || mode.equals(PackType.ReadMode.SEEK_END))) {
      throw new IllegalArgumentException("Invalid pack id.");
    }

    switch (mode) {
      case SEEK_BEGIN:
        nextPackId = PackType.FIRST_PACK_ID;
        break;
      case SEEK_END:
        nextPackId = PackType.LAST_PACK_ID;
        break;
      case SEEK_CUR:
      case SEEK_NEXT:
        nextPackId = rpid.toString();
        break;
      default:
        throw new IllegalArgumentException("Invalid pack read mode.");
    }
    //lastPackId = null;
    readMode = mode;
    protobufRecordStreamReader = null;
  }

  public SeekPackResult seek(long timeStamp) throws OdpsException, IOException {
    HashMap params = new HashMap(this.params);
    HashMap headers = new HashMap(this.headers);

    try {
      params.put(DatahubConstants.SEEK_TIME, Long.toString(timeStamp));
      Connection conn = datahubServiceClient.connect(path, "GET", params, headers);
      Response resp = conn.getResponse();

      if (!resp.isOK()) {
        DatahubException ex = new DatahubException(conn.getInputStream());
        ex.setRequestId(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_REQUEST_ID));
        throw ex;
      } else {
        String json = IOUtils.readStreamAsString(conn.getInputStream());
        JSONObject tree = JSON.parseObject(json);
        String node = tree.getString("PackId");
        if (node != null) {
          SeekPackResult startPack = new SeekPackResult(node);
          return startPack;
        } else {
          throw new DatahubException("get pack id fail");
        }
      }
    } catch (DatahubException e) {
      throw e;
    } catch (Exception e) {
      throw new DatahubException(e.getMessage(), e);
    }
  }


  private boolean getPack(String fetchMode) throws OdpsException, IOException {
    this.protobufRecordStreamReader = null;

    HashMap params = new HashMap(this.params);
    HashMap headers = new HashMap(this.headers);

    try {
      String strMode;
      if (this.readMode.equals(PackType.ReadMode.SEEK_NEXT)) {
        strMode = DatahubConstants.ITER_MODE_AFTER_PACKID;
      } else {
        strMode = DatahubConstants.ITER_MODE_AT_PACKID;
      }
      params.put(DatahubConstants.PACK_ID, this.nextPackId);
      params.put(DatahubConstants.ITERATE_MODE, strMode);
      params.put(DatahubConstants.PACK_NUM, "1");
      params.put(DatahubConstants.PACK_FETCHMODE, fetchMode);

      //headers.put(Headers.CONTENT_MD5, generatorMD5(bytes));
      Connection conn = datahubServiceClient.connect(path, "GET", params, headers);
      Response resp = conn.getResponse();

      if (!resp.isOK()) {
        DatahubException ex = new DatahubException(conn.getInputStream());
        ex.setRequestId(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_REQUEST_ID));
        throw ex;
      }

      String num = resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_PACK_NUM);
      if (num.equals("0")) {
        return false;
      }

      InputStream in = conn.getInputStream();
      byte[] bytes = IOUtils.readFully(in);
      
      XStreamPack pack = XStreamPack.parseFrom(bytes);
      if (fetchMode.equals("all")) {
        bytes = pack.getPackData().toByteArray();

        this.protobufRecordStreamReader = new ProtobufRecordStreamReader(
                tableSchema, new ByteArrayInputStream(bytes));
      }

      if (pack.hasPackMeta()) {
        this.packMeta = pack.getPackMeta().toByteArray();
      } else {
        this.packMeta = "".getBytes();
      }

      String npid = resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_NEXT_PACKID);
      this.lastPackId = resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_CURRENT_PACKID);
      if (!npid.equals(PackType.LAST_PACK_ID)) {
        this.nextPackId = npid;
        this.readMode = PackType.ReadMode.SEEK_CUR;
      } else {
        this.nextPackId = lastPackId;
        this.readMode = PackType.ReadMode.SEEK_NEXT;
      }

      return true;

    } catch (DatahubException e) {
      throw e;
    } catch (Exception e) {
      throw new DatahubException(e.getMessage(), e);
    }
  }

  public byte[] readMeta() throws OdpsException, IOException {
    if (getPack("meta")) {
      return this.packMeta;
    }
    return null;
  }

  private String generatorMD5(byte[] bytes) {
    byte[] digest = messageDigest.digest(bytes);
    StringBuilder sb = new StringBuilder();
    for (byte b : digest) {
      sb.append(String.format("%02X", b));
    }
    return sb.toString();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy