au.gov.amsa.risky.format.ByMmsiToDailyConverter Maven / Gradle / Ivy
package au.gov.amsa.risky.format;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.text.DecimalFormat;
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.github.davidmoten.rx.slf4j.Logging;
import au.gov.amsa.util.Files;
import rx.Observable;
import rx.schedulers.Schedulers;
public class ByMmsiToDailyConverter {
private static final Logger log = LoggerFactory.getLogger(ByMmsiToDailyConverter.class);
public static void convert(File input, File output) {
DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneOffset.UTC);
output.mkdirs();
try {
FileUtils.cleanDirectory(output);
} catch (IOException e1) {
throw new RuntimeException(e1);
}
{
List files = Files.find(input, Pattern.compile(".*\\.track"));
System.out.println("found " + files.size() + " files");
int bufferSize = 1000;
Observable.from(files)
//
.flatMap(file -> BinaryFixes.from(file, true))
//
.lift(Logging. logger().showMemory().every(1000000)
.showCount("recordsMillions").log())
//
.groupBy(fix -> dtf.format(Instant.ofEpochMilli(fix.time())))
//
.flatMap(g -> g.buffer(bufferSize))
//
.doOnNext(list -> {
File file = new File(output,
dtf.format(Instant.ofEpochMilli(list.get(0).time())) + ".fix");
try (OutputStream os = new FileOutputStream(file, true)) {
for (Fix fix : list) {
BinaryFixes.write(fix, os, BinaryFixesFormat.WITH_MMSI);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}).count().toBlocking().single();
}
}
public static void sort(File output) {
// now sort the data in each output file by time and rewrite
List files = Files.find(output, Pattern.compile(".*\\.fix"));
Observable.from(files)
//
.buffer(Math.max(1, files.size() / Runtime.getRuntime().availableProcessors()))
//
.flatMap(list -> Observable.from(list)
//
.doOnNext(file -> sortFixFile(file)).subscribeOn(Schedulers.computation()))
.count().toBlocking().single();
}
static void sortFixFile(File file) {
log.info("sorting " + file.getName() + ", size="
+ new DecimalFormat("0.00").format(file.length() / 1024.0 / 1024.0));
try {
File temp = new File(file.getParent(), file.getName() + ".tmp");
temp.delete();
ArrayList fixes = BinaryFixes.from(file, false, BinaryFixesFormat.WITH_MMSI)
.collect(() -> new ArrayList(20_000_000), (list, fix) -> list.add(fix))
.toBlocking().single();
Collections.sort(fixes, (a, b) -> Long.compare(a.time(), b.time()));
try (OutputStream os = new BufferedOutputStream(new FileOutputStream(temp))) {
for (Fix fix : fixes) {
BinaryFixes.write(fix, os, BinaryFixesFormat.WITH_MMSI);
}
}
file.delete();
temp.renameTo(file);
log.info("sorted");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy