All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.groupon.lex.metrics.history.xdr.TSDataFileChain Maven / Gradle / Ivy

The newest version!
package com.groupon.lex.metrics.history.xdr;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.groupon.lex.metrics.history.TSData;
import com.groupon.lex.metrics.history.v2.Compression;
import com.groupon.lex.metrics.history.v2.list.RWListFile;
import com.groupon.lex.metrics.history.v2.xdr.Util;
import com.groupon.lex.metrics.history.xdr.TSDataScanDir.MetaData;
import com.groupon.lex.metrics.history.xdr.support.FileUtil;
import com.groupon.lex.metrics.history.xdr.support.SequenceTSData;
import com.groupon.lex.metrics.lib.GCCloseable;
import com.groupon.lex.metrics.lib.SimpleMapEntry;
import com.groupon.lex.metrics.lib.sequence.ObjectSequence;
import com.groupon.lex.metrics.timeseries.TimeSeriesCollection;
import java.io.Closeable;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import static java.util.Collections.emptyList;
import static java.util.Collections.singletonMap;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import static java.util.Objects.requireNonNull;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
import lombok.Value;
import org.acplt.oncrpc.OncRpcException;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;

/**
 * A writeable TSDataFile, that uses multiple underlying files.
 *
 * @author ariane
 */
public class TSDataFileChain extends SequenceTSData {
    private static final Logger LOG = Logger.getLogger(TSDataFileChain.class.getName());
    public static long MAX_FILESIZE = 512 * 1024 * 1024;
    public static int MAX_FILERECORDS = 17280;  // 1/5th of number of seconds in a day.
    private final long max_filesize_;
    private final int max_filerecords_ = MAX_FILERECORDS;
    private List> pendingTasks = new ArrayList<>();
    private boolean optimizeOldFiles = false;
    private static final LoadingCache FILES = CacheBuilder.newBuilder()
            .concurrencyLevel(Runtime.getRuntime().availableProcessors())
            .weakKeys()
            .softValues()
            .build(new CacheLoader() {
                @Override
                public SequenceTSData load(Key key) throws IOException {
                    return TSData.readonly(key.getFile());
                }
            });
    private final Path dir_;
    private final Set readKeys = new HashSet<>();
    private Optional appendFile = Optional.empty();
    private final ReentrantReadWriteLock guard = new ReentrantReadWriteLock(true);  // Protects readKeys and appendFile.

    @NonNull
    @Getter
    @Setter
    private volatile Compression appendCompression = Compression.DEFAULT_APPEND;
    @NonNull
    @Getter
    @Setter
    private volatile Compression optimizedCompression = Compression.DEFAULT_OPTIMIZED;

    private static SequenceTSData getFile(Key key) throws IOException {
        try {
            return FILES.get(key);
        } catch (ExecutionException ex) {
            if (ex.getCause() instanceof IOException)
                throw (IOException) ex.getCause();
            throw new IllegalStateException("exception not recognized", ex);  // Should never happen.
        }
    }

    /**
     * Tests if there are any pending tasks to be completed.
     */
    public synchronized boolean hasPendingTasks() {
        Iterator> futIter = pendingTasks.iterator();
        while (futIter.hasNext()) {
            Future fut = futIter.next();
            if (fut.isDone())
                futIter.remove();
        }
        return !pendingTasks.isEmpty();
    }

    public void waitPendingTasks() {
        final List> copy;
        synchronized (this) {
            copy = new ArrayList<>(pendingTasks);
        }
        for (Future fut : copy) {
            try {
                fut.get();
            } catch (InterruptedException ex) {
                LOG.log(Level.WARNING, "interrupted while waiting for pending task", ex);
            } catch (ExecutionException ex) {
                LOG.log(Level.WARNING, "pending task completed exceptionally", ex);
            }
        }

        hasPendingTasks();  // Clears out the completed futures.
    }

    /**
     * Saved metadata of a single file, used to identify the file and (re)open
     * it.
     */
    @Value
    public static class Key implements Comparable {
        @NonNull
        private final Path file;
        @NonNull
        private final DateTime begin;
        @NonNull
        private final DateTime end;
        /**
         * Indicates the file was created/modified since TSDataFileChain
         * started. Note that the write file is always considered new.
         */
        private final boolean newFile;

        @Override
        public int compareTo(Key othr) {
            int cmp = 0;
            if (cmp == 0)
                cmp = getBegin().compareTo(othr.getBegin());
            if (cmp == 0)
                cmp = getEnd().compareTo(othr.getEnd());
            if (cmp == 0)
                cmp = getFile().compareTo(othr.getFile());
            return cmp;
        }
    }

    @RequiredArgsConstructor
    @Getter
    private static class AppendFile {
        private final Path filename;
        private final RWListFile tsdata;
    }

    private TSDataFileChain(@NonNull Path dir, @NonNull Optional wfile, @NonNull Collection rfiles, long max_filesize) throws IOException {
        dir_ = dir;
        if (wfile.isPresent()) {
            try {
                appendFile = Optional.of(new AppendFile(wfile.get(), new RWListFile(new GCCloseable<>(FileChannel.open(wfile.get(), StandardOpenOption.READ, StandardOpenOption.WRITE)), true)));
            } catch (OncRpcException ex) {
                throw new IOException("decoding failed: " + wfile.get(), ex);
            }
        }
        requireNonNull(rfiles).stream()
                .unordered()
                .map((md) -> new Key(md.getFileName(), md.getBegin(), md.getEnd(), false))
                .forEach(readKeys::add);
        max_filesize_ = max_filesize;
    }

    public static Optional openDirExisting(@NonNull TSDataScanDir scandir) throws IOException {
        return openDirExisting(scandir, MAX_FILESIZE);
    }

    public static Optional openDirExisting(@NonNull TSDataScanDir scandir, long max_filesize) throws IOException {
        List files = scandir.getFiles();
        if (files.isEmpty())
            return Optional.empty();

        /* If the most recent file is upgradable, use that to append new records to. */
        Optional last = Optional.of(files.get(files.size() - 1))
                .filter((md) -> md.isUpgradable());
        if (last.isPresent())
            files = files.subList(0, files.size() - 1);
        return Optional.of(new TSDataFileChain(scandir.getDir(), last.map(MetaData::getFileName), files, max_filesize));
    }

    public static TSDataFileChain openDir(@NonNull TSDataScanDir scandir) throws IOException {
        return openDir(scandir, MAX_FILESIZE);
    }

    public static TSDataFileChain openDir(@NonNull TSDataScanDir scandir, long max_filesize) throws IOException {
        TSDataFileChain result = openDirExisting(scandir, max_filesize).orElse(null);
        if (result == null)
            result = new TSDataFileChain(scandir.getDir(), Optional.empty(), emptyList(), max_filesize);
        return result;
    }

    public static Optional openDirExisting(@NonNull Path dir) throws IOException {
        return openDirExisting(dir, MAX_FILESIZE);
    }

    public static Optional openDirExisting(@NonNull Path dir, long max_filesize) throws IOException {
        return openDirExisting(new TSDataScanDir(dir), max_filesize);
    }

    public static TSDataFileChain openDir(@NonNull Path dir) throws IOException {
        return openDir(dir, MAX_FILESIZE);
    }

    public static TSDataFileChain openDir(@NonNull Path dir, long max_filesize) throws IOException {
        return openDir(new TSDataScanDir(dir), max_filesize);
    }

    private AppendFile getAppendFileForWriting(DateTime ts_trigger) throws IOException {
        assert guard.isWriteLockedByCurrentThread();

        if (appendFile.filter((fd) -> fd.getTsdata().getFileSize() < max_filesize_ && fd.getTsdata().size() < max_filerecords_).isPresent())
            return appendFile.get();

        final String prefix = String.format("monsoon-%04d%02d%02d-%02d%02d", ts_trigger.getYear(), ts_trigger.getMonthOfYear(), ts_trigger.getDayOfMonth(), ts_trigger.getHourOfDay(), ts_trigger.getMinuteOfHour());
        try {
            final FileUtil.NamedFileChannel newFile = FileUtil.createNewFile(dir_, prefix, ".tsd");
            try {
                final RWListFile fd = RWListFile.newFile(new GCCloseable<>(newFile.getFileChannel()), appendCompression);
                final AppendFile newAppendFile = new AppendFile(newFile.getFileName(), fd);
                installAppendFile(newAppendFile);
                return newAppendFile;
            } catch (Error | RuntimeException | IOException ex) {
                try {
                    Files.delete(newFile.getFileName());
                } catch (Error | RuntimeException | IOException ex1) {
                    ex.addSuppressed(ex1);
                }
                throw ex;
            }
        } catch (IOException ex) {
            if (appendFile.filter((fd) -> fd.getTsdata().getFileSize() < 2 * max_filesize_).isPresent())
                return appendFile.get();
            throw ex;
        }
    }

    @Override
    public ObjectSequence getSequence() {
        return Util.mergeSequences(getRawCollections().stream()
                .map(SequenceTSData::getSequence)
                .toArray(ObjectSequence[]::new));
    }

    @Override
    public boolean canAddSingleRecord() {
        return true;
    }

    @Override
    public boolean isOptimized() {
        return true;
    }

    @Override
    public long getFileSize() {
        final ReentrantReadWriteLock.ReadLock lock = guard.readLock();
        lock.lock();
        try {
            return appendFile.map(fd -> fd.getTsdata().getFileSize()).orElse(0L)
                    + readKeys.stream()
                    .mapToLong(key -> {
                        final TSData tsdata = FILES.getIfPresent(key);
                        try {
                            if (tsdata != null)
                                return tsdata.getFileSize();
                            else
                                return Files.size(key.getFile());
                        } catch (IOException ex) {
                            LOG.log(Level.WARNING, "unable to stat file " + key.getFile(), ex);
                            return 0L;
                        }
                    })
                    .sum();
        } finally {
            lock.unlock();
        }
    }

    @Override
    public DateTime getBegin() {
        final ReentrantReadWriteLock.ReadLock lock = guard.readLock();
        lock.lock();
        try {
            return Stream.concat(appendFile.map(fd -> fd.getTsdata().getBegin()).map(Stream::of).orElseGet(Stream::empty), readKeys.stream().map(Key::getBegin))
                    .min(Comparator.naturalOrder())
                    .orElseGet(() -> new DateTime(0, DateTimeZone.UTC));
        } finally {
            lock.unlock();
        }
    }

    @Override
    public DateTime getEnd() {
        final ReentrantReadWriteLock.ReadLock lock = guard.readLock();
        lock.lock();
        try {
            return Stream.concat(appendFile.map(fd -> fd.getTsdata().getEnd()).map(Stream::of).orElseGet(Stream::empty), readKeys.stream().map(Key::getEnd))
                    .max(Comparator.naturalOrder())
                    .orElseGet(() -> new DateTime(0, DateTimeZone.UTC));
        } finally {
            lock.unlock();
        }
    }

    @Override
    public short getMajor() {
        final ReentrantReadWriteLock.ReadLock lock = guard.readLock();
        lock.lock();
        try {
            return appendFile.map(AppendFile::getTsdata).map(TSData::getMajor).orElse(Const.MAJOR);
        } finally {
            lock.unlock();
        }
    }

    @Override
    public short getMinor() {
        final ReentrantReadWriteLock.ReadLock lock = guard.readLock();
        lock.lock();
        try {
            return appendFile.map(AppendFile::getTsdata).map(TSData::getMinor).orElse(Const.MINOR);
        } finally {
            lock.unlock();
        }
    }

    /**
     * Attempts to compress the file denoted by the key.
     *
     * @param files The keys and associated tsdata for the files to be
     * compressed. Map values may be null.
     */
    private void optimizeFiles(Map files) {
        // Fill in all null values in 'files' and remove keys that have been retired.
        files = files.entrySet().stream()
                .map(filesEntry -> {
                    TSData tsdata = filesEntry.getValue();
                    if (tsdata == null) {
                        try {
                            tsdata = getFile(filesEntry.getKey());
                        } catch (IOException ex) {
                            LOG.log(Level.INFO, "skipping optimization of " + filesEntry.getKey().getFile(), ex);
                            tsdata = null;
                        }
                    }
                    return Optional.ofNullable(tsdata) // Null value if the file was removed between request for compression and now.
                            .map(tsd -> SimpleMapEntry.create(filesEntry.getKey(), tsd));
                })
                .flatMap(opt -> opt.map(Stream::of).orElseGet(Stream::empty))
                .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
        if (files.isEmpty())
            return;
        final List keys = new ArrayList<>(files.keySet());  // Keys for removal later; we don't want to reference the complete map, as it also holds each TSData and holding on would prevent the GC from retiring them early.
        LOG.log(Level.INFO, "kicking off optimization of {0}", keys.stream().map(Key::getFile).collect(Collectors.toList()));

        optimize(files.values(), new ArrayList<>(files.keySet()));
    }

    private CompletableFuture optimize(Collection tsdata, Collection erase) {
        final CompletableFuture task = new TSDataOptimizerTask(dir_, tsdata)
                .withCompression(appendCompression)
                .run()
                .thenAccept((newFile) -> {
                    final ReentrantReadWriteLock.WriteLock lock = guard.writeLock();
                    lock.lock();
                    try {
                        erase.forEach(file -> {
                            if (readKeys.remove(file)) {
                                try {
                                    Files.delete(file.getFile());
                                } catch (IOException ex) {
                                    LOG.log(Level.WARNING, "unable to remove {0}", file.getFile());
                                }
                            }
                        });

                        // Install new key.
                        final Key newKey = new Key(newFile.getName(), newFile.getData().getBegin(), newFile.getData().getEnd(), true);
                        FILES.put(newKey, newFile.getData());
                        readKeys.add(newKey);
                    } finally {
                        lock.unlock();
                    }
                });
        pendingTasks.add(task);
        // Remove self after completion.
        task
                .whenComplete((result, exc) -> {
                    if (exc != null)
                        LOG.log(Level.WARNING, "unable to optimize " + erase.stream().collect(Collectors.toList()), exc);
                    synchronized (this) {
                        pendingTasks.remove(task);
                    }
                });
        return task;
    }

    /**
     * Start optimization of old files.
     *
     * @return a completable future of the background selection process. This
     * future can be used to wait for the background selecting thread to
     * complete, or to cancel the operation.
     */
    public CompletableFuture optimizeOldFiles() {
        synchronized (this) {
            if (optimizeOldFiles)
                return CompletableFuture.completedFuture(null);  // Already called.

            CompletableFuture task = new CompletableFuture<>();
            Thread thr = new Thread(() -> optimizeOldFilesTask(task));
            thr.setDaemon(true);
            thr.start();
            pendingTasks.add(task);
            optimizeOldFiles = true;

            // Make the task remove itself after completion.
            task.whenComplete((obj, err) -> {
                try {
                    if (err != null)
                        LOG.log(Level.WARNING, "failed selecting old files for optimization", err);
                } finally {
                    pendingTasks.remove(task);
                }
            });

            return task;
        }
    }

    /**
     * Task that executes gathering data for background collection.
     *
     * @param handle a CompletableFuture that will be completed when the method
     * ends.
     */
    private void optimizeOldFilesTask(CompletableFuture handle) {
        try {
            final List keys;
            final ReentrantReadWriteLock.ReadLock lock = guard.readLock();
            lock.lock();
            try {
                keys = readKeys.stream()
                        .filter(key -> {
                            try {
                                return !key.isNewFile() && !getFile(key).isOptimized();
                            } catch (IOException ex) {
                                return false;  // Ignore files that fail to open.
                            }
                        })
                        .sorted()
                        .collect(Collectors.toList());
            } finally {
                lock.unlock();
            }

            int count = 0;
            Map batch = new HashMap<>();
            for (Key key : keys) {
                if (key.isNewFile())
                    continue;  // New files are handled by file rotation logic.
                final TSData value;
                try {
                    value = getFile(key);
                } catch (IOException ex) {
                    continue;  // Ignore files that fail to open.
                }
                if (value == null)
                    continue;  // Key lost.
                if (value.isOptimized())
                    continue;  // Skip already optimized files.

                if (handle.isCancelled())
                    return;
                final int value_size = value.size();  // Takes 2-4 seconds for v0.x and v1.x files!
                count += value_size;
                batch.put(key, value);
                LOG.log(Level.INFO, "added {0} ({1} scrapes) to batch ({2} files, {3} scrapes)", new Object[]{key.getFile(), value_size, batch.size(), count});

                if (count > 50000) {
                    optimizeFiles(batch);
                    // Reset counters.
                    batch.clear();
                    count = 0;
                }
            }
            if (!batch.isEmpty())
                optimizeFiles(batch);
        } catch (Error | Exception ex) {
            handle.completeExceptionally(ex);
        } finally {
            handle.complete(null);
        }
    }

    private void installAppendFile(@NonNull AppendFile newAppendFile) {
        assert guard.isWriteLockedByCurrentThread();

        if (appendFile.isPresent()) {
            final AppendFile newReadOnlyFile = appendFile.get();
            final Key newKey = new Key(newReadOnlyFile.getFilename(), newReadOnlyFile.getTsdata().getBegin(), newReadOnlyFile.getTsdata().getEnd(), true);
            FILES.put(newKey, newReadOnlyFile.getTsdata());
            readKeys.add(newKey);

            optimizeFiles(singletonMap(newKey, newReadOnlyFile.getTsdata()));
        }

        LOG.log(Level.INFO, "rotating into new file {0}", newAppendFile.getFilename());
        appendFile = Optional.of(newAppendFile);
    }

    @Override
    public boolean add(@NonNull TimeSeriesCollection e) {
        final ReentrantReadWriteLock.WriteLock lock = guard.writeLock();
        lock.lock();
        try {
            return getAppendFileForWriting(e.getTimestamp()).getTsdata().add(e);
        } catch (IOException ex) {
            throw new RuntimeException(ex);
        } finally {
            lock.unlock();
        }
    }

    @Override
    public boolean addAll(@NonNull Collection e) {
        if (e.isEmpty())
            return false;

        final ReentrantReadWriteLock.WriteLock lock = guard.writeLock();
        lock.lock();
        try {
            DateTime ts = e.iterator().next().getTimestamp();
            return getAppendFileForWriting(ts).getTsdata().addAll(e);
        } catch (IOException ex) {
            throw new RuntimeException(ex);
        } finally {
            lock.unlock();
        }
    }

    /**
     * Retrieves keys (file metadata) for all files in this chain.
     */
    public Set getKeys() {
        final ReentrantReadWriteLock.ReadLock lock = guard.readLock();
        lock.lock();
        try {
            return Collections.unmodifiableSet(new HashSet<>(readKeys));
        } finally {
            lock.unlock();
        }
    }

    /**
     * Removes the file associated with the given key.
     */
    public void delete(@NonNull Key key) {
        final ReentrantReadWriteLock.WriteLock lock = guard.writeLock();
        lock.lock();
        try {
            if (!readKeys.remove(key))
                throw new IllegalArgumentException("key does not exist");
            Files.delete(key.getFile());
        } catch (IOException ex) {
            LOG.log(Level.WARNING, "unable to remove file " + key.getFile(), ex);
        } finally {
            lock.unlock();
        }
    }

    public Collection getRawCollections() {
        final ReentrantReadWriteLock.ReadLock lock = guard.readLock();
        lock.lock();
        try {
            Stream readSequences = readKeys.stream()
                    .flatMap(key -> {
                        try {
                            return Stream.of(getFile(key));
                        } catch (IOException ex) {
                            LOG.log(Level.WARNING, "unable to open {0}, omitting from result", key.getFile());
                            return Stream.empty();
                        }
                    });
            Stream appendSequences = appendFile
                    .map(AppendFile::getTsdata)
                    .map(Stream::of)
                    .orElseGet(Stream::empty);

            return Stream.concat(readSequences, appendSequences)
                    .parallel()
                    .unordered()
                    .collect(Collectors.toList());
        } finally {
            lock.unlock();
        }
    }

    @Override
    public Optional> getFileChannel() {
        return Optional.empty();
    }

    public BatchAdd batchAdd() {
        return new BatchAdd();
    }

    public class BatchAdd implements Closeable {
        private final List tsdList = new ArrayList<>();
        private long tsdBytes = 0;
        private int tsdRecords = 0;
        private final Collection> outstanding = new ArrayList<>();

        public void add(Collection tsd) {
            if (tsd instanceof TSData)
                add((TSData) tsd);
            else
                TSDataFileChain.this.addAll(tsd);
        }

        public void add(TSData tsd) {
            tsdBytes += tsd.getFileSize();
            tsdRecords += tsd.size();
            tsdList.add(tsd);

            if (tsdBytes >= max_filesize_ || tsdRecords >= max_filerecords_) {
                outstanding.add(optimize(tsdList, emptyList()));
                tsdList.clear();
                tsdBytes = 0;
                tsdRecords = 0;
            }
        }

        public void awaitCompletion() throws IOException {
            try {
                CompletableFuture.allOf(outstanding.toArray(new CompletableFuture[]{}))
                        .get();
            } catch (InterruptedException ex) {
                /* SKIP */
            } catch (ExecutionException ex) {
                // Unpack the cause of the execution exception.
                try {
                    throw ex.getCause();
                } catch (Error | RuntimeException | IOException ex1) {
                    throw ex1;
                } catch (Throwable ex1) {
                    throw new IOException("unable to complete optimization", ex1);
                }
            }
        }

        @Override
        public void close() throws IOException {
            tsdList.forEach(TSDataFileChain.this::addAll);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy