All Downloads are FREE. Search and download functionalities are using the official Maven repository.

au.gov.amsa.risky.format.Formats Maven / Gradle / Ivy

There is a newer version: 0.6.19
Show newest version
package au.gov.amsa.risky.format;

import java.io.File;
import java.text.DecimalFormat;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.github.davidmoten.guavamini.annotations.VisibleForTesting;
import com.github.davidmoten.rx.Functions;
import com.github.davidmoten.util.Preconditions;

import au.gov.amsa.util.Files;
import rx.Observable;
import rx.Observable.Transformer;
import rx.functions.Action1;
import rx.functions.Action2;
import rx.functions.Func1;

public final class Formats {

    private static final Logger log = LoggerFactory.getLogger(Formats.class);

    public static Observable transform(final File input, final File output,
            Pattern pattern, final Transformer transformer,
            final Action2, File> fixesWriter, final Func1 renamer) {
        Preconditions.checkNotNull(input);
        Preconditions.checkNotNull(output);
        Preconditions.checkNotNull(pattern);
        Preconditions.checkNotNull(transformer);
        final List files = Files.find(input, pattern);
        long n = 0;
        for (File file : files)
            n += file.length();
        final long totalSizeBytes = n;
        log.info("transforming " + new DecimalFormat("0.000").format(totalSizeBytes / 1000000.0)
                + "MB");
        final Action1 logger = new Action1() {
            final AtomicInteger count = new AtomicInteger();
            final long startTime = System.currentTimeMillis();
            final AtomicLong size = new AtomicLong();

            @Override
            public void call(File f) {
                long t = System.currentTimeMillis();
                int n = count.incrementAndGet();
                long bytes = size.getAndAdd(f.length());
                double timeToFinishMins;
                if (n > 1) {
                    timeToFinishMins = (t - startTime) / (double) (bytes) * (totalSizeBytes - bytes)
                            / 1000.0 / 60.0;
                } else
                    timeToFinishMins = -1;
                DecimalFormat df = new DecimalFormat("0.000");
                log.info("transforming " + n + " of " + files.size() + ":" + f + ", sizeMB="
                        + df.format(f.length() / 1000000.0) + ", finish in mins="
                        + df.format(timeToFinishMins));
            }
        };

        log.info("converting " + files.size() + " files" + " in " + input);
        return Observable
                // get the files matching the pattern from the directory
                .from(files)
                // replace the file with a transformed version
                .flatMap(file -> {
                    final File outputFile = rebase(file, input, output);
                    outputFile.getParentFile().mkdirs();
                    logger.call(file);
                    return BinaryFixes.from(file, true, BinaryFixesFormat.WITHOUT_MMSI)
                            // to list
                            .toList()
                            // flatten
                            .flatMapIterable(Functions.> identity())
                            // transform the fixes
                            .compose(transformer)
                            // make into a list again
                            .toList()
                            // replace the file with sorted fixes
                            .doOnNext(list -> {
                        File f = new File(outputFile.getParentFile(),
                                renamer.call(outputFile.getName()));
                        fixesWriter.call(list, f);
                    })
                            // count the fixes
                            .count();
                });

    }

    @VisibleForTesting
    static File rebase(File file, File existingParent, File newParent) {
        if (file.getAbsolutePath().equals(existingParent.getAbsolutePath()))
            return newParent;
        else
            return new File(rebase(file.getParentFile(), existingParent, newParent),
                    file.getName());
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy