com.aliyun.odps.datahub.PackReader Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.datahub;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.Map;
import java.util.List;
import java.util.ArrayList;
import com.aliyun.odps.commons.proto.ProtobufRecordStreamReader;
import com.aliyun.odps.TableSchema;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.commons.transport.Connection;
import com.aliyun.odps.commons.transport.Response;
import com.aliyun.odps.commons.util.IOUtils;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.rest.RestClient;
import com.aliyun.odps.commons.proto.XstreamPack.XStreamPack;
import com.aliyun.odps.commons.proto.XstreamPack.BytesPairPB;
public class PackReader {
private RestClient datahubServiceClient;
private TableSchema tableSchema;
private String path;
private MessageDigest messageDigest;
private Map params;
private Map headers;
private String currPackId;
private String nextPackId;
private PackType.ReadMode readMode;
private ProtobufRecordStreamReader protobufRecordStreamReader;
public PackReader(RestClient datahubServiceClient, TableSchema tableSchema, String path,
Map params, Map headers) {
this(datahubServiceClient, tableSchema, path, params, headers, PackType.FIRST_PACK_ID);
}
public PackReader(RestClient datahubServiceClient, TableSchema tableSchema, String path,
Map params, Map headers, String packId) {
this.datahubServiceClient = datahubServiceClient;
this.tableSchema = tableSchema;
this.path = path;
this.params = params;
this.headers = headers;
this.currPackId = null;
this.nextPackId = null;
try {
this.messageDigest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e.getMessage());
}
this.protobufRecordStreamReader = null;
seek(packId, PackType.ReadMode.SEEK_CUR);
}
private boolean isValid(String pid) {
return pid != null && !pid.equals(PackType.FIRST_PACK_ID)
&& !pid.equals(PackType.LAST_PACK_ID);
}
private void seek(String rpid, PackType.ReadMode mode) {
if ((rpid == null || rpid.equals("")) && mode != PackType.ReadMode.SEEK_BEGIN
&& mode != PackType.ReadMode.SEEK_END) {
throw new IllegalArgumentException("Invalid pack id.");
}
if (mode == PackType.ReadMode.SEEK_NEXT && isValid(currPackId)
&& currPackId.equals(rpid) && isValid(nextPackId)) {
rpid = nextPackId;
mode = PackType.ReadMode.SEEK_CUR;
}
else {
currPackId = null;
}
switch (mode) {
case SEEK_BEGIN:
nextPackId = PackType.FIRST_PACK_ID;
break;
case SEEK_END:
nextPackId = PackType.LAST_PACK_ID;
break;
case SEEK_CUR:
case SEEK_NEXT:
nextPackId = rpid;
break;
default:
throw new IllegalArgumentException("Invalid pack read mode.");
}
readMode = mode;
protobufRecordStreamReader = null;
}
public SeekPackResult seek(long timeStamp) throws OdpsException, IOException {
HashMap params = new HashMap(this.params);
HashMap headers = new HashMap(this.headers);
try {
params.put(DatahubConstants.SEEK_TIME, Long.toString(timeStamp));
Connection conn = datahubServiceClient.connect(path, "GET", params, headers);
Response resp = conn.getResponse();
if (!resp.isOK()) {
DatahubException ex = new DatahubException(conn.getInputStream());
ex.setRequestId(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_REQUEST_ID));
throw ex;
} else {
String json = IOUtils.readStreamAsString(conn.getInputStream());
JSONObject tree = JSON.parseObject(json);
String node = tree.getString("PackId");
if (node != null) {
SeekPackResult startPack = new SeekPackResult(node);
return startPack;
} else {
throw new DatahubException("get pack id fail");
}
}
} catch (DatahubException e) {
throw e;
} catch (Exception e) {
throw new DatahubException(e.getMessage(), e);
}
}
public ReadPackResult read() throws OdpsException, IOException {
try {
return read("all");
} catch (DatahubException e) {
if (e.getErrorCode().equals("OutOfRange")) {
seek(PackType.FIRST_PACK_ID, PackType.ReadMode.SEEK_CUR);
return read("all");
} else {
throw e;
}
}
}
public ReadPackResult readPackMeta() throws OdpsException, IOException {
return read("meta");
}
public Map readMeta() throws OdpsException, IOException {
ReadPackResult result = readPackMeta();
if (result != null) {
return result.getKvMeta();
} else {
return null;
}
}
private ReadPackResult read(String fetchMode) throws OdpsException, IOException {
this.protobufRecordStreamReader = null;
HashMap params = new HashMap(this.params);
HashMap headers = new HashMap(this.headers);
try {
String strMode;
if (readMode == PackType.ReadMode.SEEK_NEXT) {
strMode = DatahubConstants.ITER_MODE_AFTER_PACKID;
} else {
strMode = DatahubConstants.ITER_MODE_AT_PACKID;
}
params.put(DatahubConstants.PACK_ID, this.nextPackId);
params.put(DatahubConstants.ITERATE_MODE, strMode);
params.put(DatahubConstants.PACK_NUM, "1");
params.put(DatahubConstants.PACK_FETCHMODE, fetchMode);
Connection conn = datahubServiceClient.connect(path, "GET", params, headers);
Response resp = conn.getResponse();
if (!resp.isOK()) {
DatahubException ex = new DatahubException(conn.getInputStream());
ex.setRequestId(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_REQUEST_ID));
throw ex;
}
String num = resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_PACK_NUM);
if (num.equals("0")) {
return null;
}
InputStream in = conn.getInputStream();
byte[] bytes = IOUtils.readFully(in);
XStreamPack pack = XStreamPack.parseFrom(bytes);
List records = new ArrayList();
if (fetchMode.equals("all")) {
bytes = pack.getPackData().toByteArray();
this.protobufRecordStreamReader = new ProtobufRecordStreamReader(
tableSchema, new ByteArrayInputStream(bytes));
Record r = null;
while ((r = protobufRecordStreamReader.read()) != null)
{
records.add(r);
}
}
String npid = resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_NEXT_PACKID);
String cpid = resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_CURRENT_PACKID);
long timeStamp = new Long(resp.getHeader(DatahubHttpHeaders.HEADER_ODPS_PACK_TIMESTAMP));
if (!npid.equals(PackType.LAST_PACK_ID)) {
nextPackId = npid;
readMode = PackType.ReadMode.SEEK_CUR;
currPackId = cpid;
} else {
nextPackId = cpid;
readMode = PackType.ReadMode.SEEK_NEXT;
currPackId = null;
}
HashMap kvMap = null;
if (pack.hasKvMeta())
{
kvMap = new HashMap();
for (int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy