All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.epam.deltix.util.s3.S3Reader Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2023 EPAM Systems, Inc
 *
 * See the NOTICE file distributed with this work for additional information
 * regarding copyright ownership. Licensed under the Apache License,
 * Version 2.0 (the "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.epam.deltix.util.s3;

import com.amazonaws.util.StringUtils;
import com.epam.deltix.gflog.api.Log;
import com.epam.deltix.gflog.api.LogFactory;
import com.epam.deltix.util.collections.generated.ObjectArrayList;
import com.epam.deltix.util.lang.Util;
import org.apache.commons.lang3.tuple.Pair;

import java.io.*;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;

public abstract class S3Reader implements Closeable, AutoCloseable {

    protected static final Log LOG = LogFactory.getLog(S3Reader.class);

    private S3DataStore dataStore;
    private List batchKeys;
    private int batchIndex = -1;

    private String userMetadata = null;
    private DataOutputStream out = new DataOutputStream();

    public S3Reader(S3DataStore dataStore, String dataKey, long startTime, long endTime) throws IOException {
        this.dataStore = dataStore;

        // get keys with dataKey prefix sorted by name and filtered by timestamp since it is
        // in this form //_.json.gz
        String keyPrefix = dataKey.endsWith(getDataFormat()) || dataKey.endsWith(S3DataStore.KEY_DELIMITER) ? dataKey : dataKey + S3DataStore.KEY_DELIMITER;
        String keySuffix = "." + getDataFormat();
        this.batchKeys = filterAndSort(dataStore.getObjectKeys(keyPrefix, keySuffix), startTime, endTime);
        Collections.sort(this.batchKeys);
        if (this.batchKeys.size() == 0)
            throw new IllegalArgumentException("No data found under " + keyPrefix);

        String mdKey = keyPrefix + S3Writer.METADATA_OBJ_NAME;
        if (dataStore.objectExists(mdKey)) {
            dataStore.download(mdKey, out);
            userMetadata = new String(out.toByteArray(), StringUtils.UTF8);
        }
        out.reset();
    }

    public S3Reader(S3DataStore dataStore, String dataKey) throws IOException {
        this(dataStore, dataKey, Long.MIN_VALUE, Long.MAX_VALUE);
    }

    public String getUserMetadata() {
        return userMetadata;
    }

    public synchronized T read() throws IOException {
        if (batchIndex >= batchKeys.size())
            return null;

        T record = readNextRecord();

        while (record == null) {
            // download the next batch and readNextRecord
            batchIndex++;
            if (batchIndex >= batchKeys.size())
                break;

            out.reset();
            dataStore.download(batchKeys.get(batchIndex), out);
            startBatch(out.getData());

            record = readNextRecord();
        }
        return record;
    }

    /**
     * Filters object keys according to time interval
     *
     * @param keys      list of keys in format DATA_KEY/date=yyyy-MM-dd/HH-mm-ss_1231231231231.json.gz
     * @param startTime start time
     * @param endTime   end time
     * @return filtered values list
     */
    private List filterAndSort(List keys, long startTime, long endTime) {
        final ObjectArrayList> list = keys.stream()
                .map(s -> Pair.of(extractTimestamp(s), s))
                .filter(p -> p.getLeft() >= startTime)
                .sorted(Comparator.comparingLong(Pair::getLeft))
                .collect(Collectors.toCollection(ObjectArrayList::new));
        final ObjectArrayList result = new ObjectArrayList<>();
        if (list.size() > 0)
            result.add(list.get(0).getRight());
        for (int i = 1; i < list.size(); i++) {
            if (list.get(i - 1).getLeft() < endTime)
                result.add(list.get(i).getRight());
        }
        return result;
    }

    public long getLastTimestamp() {
        return extractTimestamp(batchKeys.get(batchKeys.size() - 1));
    }

    /**
     * Extracts timestamp from key
     * @param key string in format DATA_KEY/date=yyyy-MM-dd/HH-mm-ss_1231231231231.json.gz
     * @return extracted timestamp
     */
    protected long extractTimestamp(String key) {
        return Long.parseLong(key.substring(key.lastIndexOf('_') + 1, key.lastIndexOf("." + getDataFormat())));
    }

    protected abstract void startBatch(InputStream batchData) throws IOException;

    protected abstract T readNextRecord() throws IOException;

    protected abstract String getDataFormat();

    protected static class DataOutputStream extends ByteArrayOutputStream {
        protected InputStream getData() {
            return new ByteArrayInputStream(super.buf, 0, size());
        }
    }

    @Override
    public void close() throws IOException {
        Util.close(out);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy