All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.odps.datahub.DatahubClient Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.datahub;

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.TableSchema;
import com.aliyun.odps.commons.transport.Connection;
import com.aliyun.odps.commons.transport.Headers;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.commons.util.IOUtils;
import com.aliyun.odps.rest.RestClient;

public class DatahubClient {

  private String projectName;
  private String tableName;

  private DatahubConfiguration conf;
  private RestClient datahubServiceClient;
  private HashMap headers;
  private TableSchema schema = new TableSchema();
  private List shards = new ArrayList();

  final private Long MAX_WAITING_MILLISECOND = 120000L;

  /**
   * shard的状态
   */
  public static enum ShardState {
    UNLOADED,
    LOADED,
    LOADING
  }

  /**
   * Datahub服务入口类
   *
   * @param odps
   *     odps对象
   * @param projectName
   *     对应project名称
   * @param tableName
   *     对应table名称
   * @param datahubEndpoint
   *     datahub服务地址,公网用户使用 http://dh.odps.aliyun.com,ecs或内网用户请使用 http://dh-ext.odps.aliyun-inc.com
   *
   * @throws OdpsException
   */
  public DatahubClient(Odps odps, String projectName, String tableName, String datahubEndpoint)
      throws OdpsException {
    this.conf = new DatahubConfiguration(odps);
    this.projectName = projectName;
    this.tableName = tableName;
    this.headers = new HashMap();
    this.headers.put(Headers.CONTENT_LENGTH, String.valueOf(0));
    this.headers.put(DatahubHttpHeaders.HEADER_STREAM_VERSION, "1");
    setEndpoint(datahubEndpoint);
    initiate();
  }

  public String getProjectName() {
    return projectName;
  }

  public String getTableName() {
    return tableName;
  }

  /**
   * 在ODPS hub服务上启用shard
   *
   * @param shardNumber
   *     需要启用的shard数量
   * @throws OdpsException
   */
  public void loadShard(long shardNumber) throws OdpsException {
    if (shardNumber < 0) {
      throw new DatahubException("invalid shard number");
    }

    HashMap params = new HashMap();
    HashMap headers = new HashMap(this.headers);
    headers.put(DatahubHttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(DatahubConstants.VERSION));
    // TODO use conf class
    String path = getResource() + "/shards";
    Connection conn = null;
    try {
      params.put(DatahubConstants.SHARD_NUMBER, Long.toString(shardNumber));
      conn = datahubServiceClient.connect(path, "POST", params, headers);
      Response resp = conn.getResponse();

      if (!resp.isOK()) {
        DatahubException ex = new DatahubException(conn.getInputStream());
        ex.setRequestId(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_REQUEST_ID));
        throw ex;
      }
    } catch (IOException e) {
      throw new DatahubException(e.getMessage(), e);
    } catch (DatahubException e) {
      throw e;
    } catch (OdpsException e) {
      throw new DatahubException(e.getMessage(), e);
    } finally {
      if (conn != null) {
        try {
          conn.disconnect();
        } catch (IOException ignored) {
        }
      }
    }
  }

  /**
   * 同步等待 load shard 完成
   * 默认超时时间为 120000ms
   *
   * @return
   * @throws OdpsException
   */

  public void waitForShardLoad() throws OdpsException {
    waitForShardLoad(MAX_WAITING_MILLISECOND);
  }

  /**
   * 同步等待 load shard 完成
   * 最大超时时间为 120000ms
   *
   * @param timeout
   *     超时时间,单位是毫秒
   *     若该值超过 120000ms,将等待 120000ms
   * @return
   * @throws OdpsException
   */

  public void waitForShardLoad(long timeout) throws OdpsException {

    if (timeout <= 0) {
      throw new DatahubException("invalid waiting time");
    }

    long waitTime = timeout > MAX_WAITING_MILLISECOND ? MAX_WAITING_MILLISECOND : timeout;

    long now = System.currentTimeMillis();

    long end = now + waitTime;

    while (now < end) {
      try {
        if (isShardLoadCompleted()) {
          return;
        }
        Thread.sleep(10000L);
        now = System.currentTimeMillis();
      } catch (Exception e) {
        throw new DatahubException(e.getMessage(), e);
      }
    }

    if (isShardLoadCompleted() == false) {
      throw new DatahubException("load shard timeout");
    }
  }

  /**
   * 检查 DatahubClinet 对应的table拥有的shard 是否全部 loaded
   *
   * @return true 为全部 load 完成, 否则返回 false
   * 注: 内部处理了 getShardStatus 可能引起的异常
   */

  private boolean isShardLoadCompleted() {
    try {
      HashMap shardStatusMap = getShardStatus();
      Iterator iter = shardStatusMap.entrySet().iterator();

      while (iter.hasNext()) {
        Map.Entry entry = (Map.Entry) iter.next();
        ShardState status = (ShardState) entry.getValue();

        if (status != ShardState.LOADED) {
          return false;
        }
      }

      return true;
    } catch (Exception e) {

    }

    return false;
  }

  /**
   * 查询DatahubClinet对应的table拥有的shard在服务端的状态
   *
   * @return 返回key为shardid, value是ShardState的HashMap
   * @throws OdpsException, IOException
   */
  public HashMap getShardStatus() throws OdpsException, IOException {

    HashMap params = new HashMap();
    HashMap hdrs = new HashMap(this.headers);

    try {
      String path = getResource() + "/shards";

      hdrs.put(DatahubHttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(DatahubConstants.VERSION));

      params.put(DatahubConstants.SHARD_STATUS, null);

      Connection conn = datahubServiceClient.connect(path, "GET", params, hdrs);
      Response resp = conn.getResponse();

      if (!resp.isOK()) {
        DatahubException ex = new DatahubException(conn.getInputStream());
        ex.setRequestId(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_REQUEST_ID));
        throw ex;
      } else {
        return loadShardStatusFromJson(conn.getInputStream());
      }

    } catch (Exception e) {
      throw new DatahubException(e.getMessage(), e);
    }
  }

  /**
   * 在ODPS hub查询partiton对应的拷贝到离线集群的状态
   *
   * @param shardId
   *     需要查询的shardId
   * @param partitionSpec
   *     查询的分区,分区表必选, 非分区表可以为null
   * @return ReplicatorStatus
   * @throws OdpsException
   * @deprecated 返回的packid不一定是对应partition的, 只能通过比较大小来判断, 不要使用是否相等来判断
   */
  @Deprecated
  public ReplicatorStatus QueryReplicatorStatus(long shardId, PartitionSpec partitionSpec)
      throws OdpsException {
    HashMap params = new HashMap();
    HashMap headers = new HashMap(this.headers);
    headers.put(DatahubHttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(DatahubConstants.VERSION));
    params.put("query", "replicator");
    if (partitionSpec != null && partitionSpec.toString().length() > 0) {
      params.put(DatahubConstants.RES_PARTITION, partitionSpec.toString().replaceAll("'", ""));
    }

    String path = getStreamResource(shardId);
    Connection conn = null;
    try {
      conn = datahubServiceClient.connect(path, "GET", params, headers);
      Response resp = conn.getResponse();

      if (!resp.isOK()) {
        DatahubException ex = new DatahubException(conn.getInputStream());
        ex.setRequestId(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_REQUEST_ID));
        throw ex;
      }

      return new ReplicatorStatus(conn.getInputStream());

    } catch (IOException e) {
      throw new DatahubException(e.getMessage(), e);
    } catch (DatahubException e) {
      throw e;
    } catch (OdpsException e) {
      throw new DatahubException(e.getMessage(), e);
    } finally {
      if (conn != null) {
        try {
          conn.disconnect();
        } catch (IOException ignored) {
        }
      }
    }
  }

  /**
   * 设置DatahubServer地址
   *
   * 

* 没有设置DatahubServer地址的情况下, 自动选择 *

* * @param endpoint */ public void setEndpoint(String endpoint) throws OdpsException { try { URI u = new URI(endpoint); conf.setEndpoint(u); datahubServiceClient = conf.newRestClient(projectName); } catch (URISyntaxException e) { throw new IllegalArgumentException("Invalid endpoint."); } catch (DatahubException e) { throw e; } catch (OdpsException e) { throw new DatahubException(e.getMessage(), e); } } /** * 在ODPS hub查询非分区表拷贝到离线集群的状态 * * @param shardId * 需要查询的shardId * @return ReplicatorStatus * @throws OdpsException */ public ReplicatorStatus QueryReplicatorStatus(long shardId) throws OdpsException { return QueryReplicatorStatus(shardId, null); } /** * 返回一个写入datahub的pack的时间戳,datahub确保小于等于这个时间戳的pack都已经进入离线ODPS * * @param * @return timestamp * @throws IOException, DatahubException */ public Date getTableReplicatedTimeStamp() throws IOException, OdpsException { HashMap params = new HashMap(); HashMap headers = new HashMap(this.headers); headers.put(DatahubHttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(DatahubConstants.VERSION)); params.put("query", "replicatedtimestamp"); String path = getStreamResource(); Connection conn = null; conn = datahubServiceClient.connect(path, "GET", params, headers); Response resp = conn.getResponse(); if (!resp.isOK()) { DatahubException ex = new DatahubException(conn.getInputStream()); ex.setRequestId(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_REQUEST_ID)); throw ex; } String json = IOUtils.readStreamAsString(conn.getInputStream()); JSONObject tree = JSON.parseObject(json); Long node = tree.getLong(DatahubConstants.TABLE_REPLICATED_TIMESTAMP); if (node != null) { return new Date(node.longValue()); } else { throw new DatahubException("get table replicated timestamp fail"); } } /** * 返回一个时间戳,datahub确保小于等于这个时间戳的pack都已经进入离线ODPS * * @param * * @return timestamp * @throws IOException, DatahubException */ public Date getTableTimestamp() throws IOException, OdpsException { HashMap shardStatus = getShardStatus(); final long currentTimestamp = System.currentTimeMillis(); long timestamp = currentTimestamp; for (Map.Entry entry : shardStatus.entrySet()) { long shardId = entry.getKey(); PackReader reader = null; if (entry.getValue() == ShardState.LOADED) { final String loaderReplicatedPackid = QueryReplicatorStatus(shardId).GetLastReplicatedPackId(); if (loaderReplicatedPackid.equals(PackType.FIRST_PACK_ID)) { continue; } reader = openPackReader(shardId); final String brokerLastPackid = reader.seek(currentTimestamp).getPackId(); if (brokerLastPackid.equals(PackType.LAST_PACK_ID)) { continue; } if(!brokerLastPackid.equals(loaderReplicatedPackid)) { reader = openPackReader(shardId, loaderReplicatedPackid); ReadPackResult readPackResult = reader.readPackMeta(); timestamp = Math.min(timestamp, reader.readPackMeta().getTimeStamp()); } } } return new Date(timestamp); } private void initiate() throws OdpsException { HashMap params = new HashMap(); params.put("query", "meta"); /*for osps stream compatibility*/ params.put("type", "stream"); Connection conn = null; try { datahubServiceClient = conf.newRestClient(projectName); conn = datahubServiceClient.connect(getResource(), "GET", params, headers); Response resp = conn.getResponse(); if (resp.isOK()) { loadFromJson(conn.getInputStream()); } else { DatahubException e = new DatahubException(conn.getInputStream()); e.setRequestId(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_REQUEST_ID)); throw e; } } catch (IOException e) { throw new DatahubException(e.getMessage(), e); } catch (DatahubException e) { throw e; } catch (OdpsException e) { throw new DatahubException(e.getMessage(), e); } finally { if (conn != null) { try { conn.disconnect(); } catch (IOException ignored) { } } } } public TableSchema getStreamSchema() { return this.schema; } public TableSchema getStreamSchemaFromServer() throws OdpsException { initiate(); return this.schema; } public List getShardList() { return this.shards; } /** * 创建DatahubWriter写入指定shard * @param shardId * 需要写入数据的shardId * * @return DatahubWriter * @throws OdpsException, IOException */ public DatahubWriter openDatahubWriter(long shardId) throws OdpsException, IOException { HashMap params = new HashMap(); HashMap headers = new HashMap(this.headers); headers.put(DatahubHttpHeaders.CONTENT_TYPE, "application/octet-stream"); headers.put(DatahubHttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(DatahubConstants.VERSION)); return new DatahubWriter(datahubServiceClient, getStreamResource(shardId), params, headers); } /** * 创建DatahubWriter * * @return DatahubWriter * @throws OdpsException, IOException */ public DatahubWriter openDatahubWriter() throws OdpsException, IOException { HashMap params = new HashMap(); HashMap headers = new HashMap(this.headers); headers.put(DatahubHttpHeaders.CONTENT_TYPE, "application/octet-stream"); headers.put(DatahubHttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(DatahubConstants.VERSION)); return new DatahubWriter(datahubServiceClient, getStreamResource(), params, headers); } /** * 创建DatahubReader读取指定shard * @param shardId * 需要读取数据的shardId * * @return DatahubReader * @throws OdpsException, IOException */ public DatahubReader openDatahubReader(long shardId) throws OdpsException, IOException { HashMap params = new HashMap(); HashMap headers = new HashMap(this.headers); headers.put(DatahubHttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(DatahubConstants.VERSION)); return new DatahubReader(datahubServiceClient, schema, getStreamResource(shardId), params, headers); } /** * 创建DatahubReader读取指定shard * @param shardId * 需要读取数据的shardId * @param packId * 指定读取的packId * * @return DatahubReader * @throws OdpsException, IOException */ public DatahubReader openDatahubReader(long shardId, String packId) throws OdpsException, IOException { if (packId == null || packId.equals("")) { throw new IllegalArgumentException("Invalid pack id."); } HashMap params = new HashMap(); HashMap headers = new HashMap(this.headers); headers.put(DatahubHttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(DatahubConstants.VERSION)); return new DatahubReader(datahubServiceClient, schema, getStreamResource(shardId), params, headers, packId); } public PackReader openPackReader(long shardId) throws OdpsException, IOException { HashMap params = new HashMap(); HashMap headers = new HashMap(this.headers); headers.put(DatahubHttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(DatahubConstants.VERSION)); return new PackReader(datahubServiceClient, schema, getStreamResource(shardId), params, headers); } public PackReader openPackReader(long shardId, String packId) throws OdpsException, IOException { if (packId == null || packId.equals("")) { throw new IllegalArgumentException("Invalid pack id."); } HashMap params = new HashMap(); HashMap headers = new HashMap(this.headers); headers.put(DatahubHttpHeaders.HEADER_ODPS_TUNNEL_VERSION, String.valueOf(DatahubConstants.VERSION)); return new PackReader(datahubServiceClient, schema, getStreamResource(shardId), params, headers, packId); } private String getResource() { return conf.getResource(projectName, tableName); } private String getStreamResource(long shardId) { return conf.getStreamUploadResource(projectName, tableName, shardId); } private String getStreamResource() { return conf.getStreamUploadResource(projectName, tableName); } private void loadFromJson(InputStream is) throws OdpsException { try { String json = IOUtils.readStreamAsString(is); JSONObject tree = JSON.parseObject(json); JSONObject schemaNode = tree.getJSONObject("Schema"); if (schemaNode != null) { schema = new DatahubTableSchema(schemaNode); } else { throw new DatahubException("get table schema fail"); } JSONArray node = tree.getJSONArray("Shards"); if (node != null) { for (int i = 0; i < node.size(); ++i) { long shardId = node.getLongValue(i); shards.add(shardId); } } else { throw new DatahubException("get shard fail"); } } catch (Exception e) { throw new DatahubException("Invalid json content.", e); } } private HashMap loadShardStatusFromJson(InputStream is) throws OdpsException { try { HashMap shardStatus = new HashMap(); String json = IOUtils.readStreamAsString(is); JSONObject tree = JSON.parseObject(json); JSONArray node = tree.getJSONArray("ShardStatus"); if (node != null) { for (int i = 0; i < node.size(); ++i) { JSONObject status = node.getJSONObject(i); ShardState state = ShardState.valueOf(status.getString("State").toUpperCase()); shardStatus.put(Long.parseLong(status.getString("ShardId")), state); } } return shardStatus; } catch (Exception e) { throw new DatahubException("Invalid json content.", e); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy