com.rapid7.armor.columnfile.ColumnFileReader Maven / Gradle / Ivy
package com.rapid7.armor.columnfile;
import static com.rapid7.armor.Constants.MAGIC_HEADER;
import static com.rapid7.armor.Constants.VERSION;
import java.io.DataInputStream;
import java.io.IOException;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.ArrayList;
import java.util.List;
import com.rapid7.armor.Constants;
import com.rapid7.armor.io.IOTools;
import com.rapid7.armor.meta.ColumnMetadata;
public class ColumnFileReader {
private ColumnMetadata metadata;
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public ColumnMetadata getColumnMetadata() {
return metadata;
}
private int shouldHaveRead(int compressed, int uncompressed) {
if (compressed > 0)
return compressed;
else
return uncompressed;
}
// Assists in loading of a calling file by passing by streams to the caller, it is up to the caller to correctly
// consume the stream and store the contents how they see it to be fit.
public void read(DataInputStream dataInputStream, ColumnFileListener listener) throws IOException {
// Header first
readForMagicHeader(dataInputStream);
Constants.ColumnFileFormatVersion version = readForFormatVersion(dataInputStream);
switch (version) {
case VERSION_1:
readV1(dataInputStream, listener);
break;
case VERSION_2:
readV2(dataInputStream, listener);
break;
default:
throw new IllegalArgumentException("Unable to read columnfile since version is " + version + " which is unsupported");
}
}
static class TableOfContentsEntry {
ColumnFileSection sectionType;
int offset;
TableOfContentsEntry(ColumnFileSection sectionType, int sectionOffset) {
this.sectionType = sectionType;
this.offset = sectionOffset;
}
static TableOfContentsEntry create(int sectionType, int sectionOffset) {
ColumnFileSection st = ColumnFileSection.fromID(sectionType);
if (st != null) {
return new TableOfContentsEntry(st, sectionOffset);
} else {
throw new IllegalArgumentException("Columnfile contains unknown section type " + sectionType + " in table of contents");
}
}
}
private List readTableOfContents(DataInputStream dataInputStream)
throws IOException
{
List result = new ArrayList<>();
int compressedSize = dataInputStream.readInt();
int uncompressedSize = dataInputStream.readInt();
int recordCount = uncompressedSize / 8; // 8 bytes per record
for (int i = 0; i < recordCount; ++i) {
int sectionType = dataInputStream.readInt();
int sectionOffset = dataInputStream.readInt();
result.add(TableOfContentsEntry.create(sectionType, sectionOffset));
}
return result;
}
private void readV2(DataInputStream dataInputStream, ColumnFileListener listener) throws IOException {
// Table of Contents
List toc = readTableOfContents(dataInputStream);
// read sections in their order from the TOC
int totalBytesRead = 0;
for (TableOfContentsEntry entry : toc) {
if (entry.offset != totalBytesRead) {
// should be warning?
throw new IllegalArgumentException("Columnfile byte offset does not match table of contents for section " + entry.sectionType + " at offset " + entry.offset + " comparison to " + totalBytesRead);
}
int readBytes = 0;
if (entry.sectionType == ColumnFileSection.METADATA) {
readBytes = readMetadata(dataInputStream);
} else
{
readBytes = readSection(dataInputStream, listener, entry.sectionType);
}
totalBytesRead += readBytes;
}
}
private int readSection(DataInputStream dataInputStream, ColumnFileListener listener, ColumnFileSection sectionType)
throws IOException
{
int readBytes = 0;
int compressedSize = dataInputStream.readInt();
int uncompressedSize = dataInputStream.readInt();
if (listener != null)
{
readBytes = listener.columnFileSection(sectionType, metadata, dataInputStream, compressedSize, uncompressedSize);
}
int shouldHaveRead = shouldHaveRead(compressedSize, uncompressedSize);
if (readBytes < shouldHaveRead)
{
int remainingBytes = shouldHaveRead - readBytes;
IOTools.skipFully(dataInputStream, remainingBytes);
}
return shouldHaveRead + 8; // for the 2 readInts for the compressed / uncompressed sizes
}
private int readMetadata(DataInputStream dataInputStream)
throws IOException
{
dataInputStream.readInt(); // Skip compressed, always uncompressed for meta
int metadataLength = dataInputStream.readInt(); // Skip compressed, always uncompressed for meta
byte[] metadataBytes = new byte[metadataLength];
dataInputStream.readFully(metadataBytes);
metadata = OBJECT_MAPPER.readValue(metadataBytes, ColumnMetadata.class);
return metadataLength + 8; // for the first two readInts for the compressed / uncompressed sizes
}
private void readV1(DataInputStream dataInputStream, ColumnFileListener listener) throws IOException {
// Metadata
readMetadata(dataInputStream);
// Load entity dictionary
readSection(dataInputStream, listener, ColumnFileSection.ENTITY_DICTIONARY);
readSection(dataInputStream, listener, ColumnFileSection.VALUE_DICTIONARY);
readSection(dataInputStream, listener, ColumnFileSection.ENTITY_INDEX);
readSection(dataInputStream, listener, ColumnFileSection.ROWGROUP);
}
private void readForMagicHeader(DataInputStream dataInputStream) throws IOException {
short header = dataInputStream.readShort();
if (header != MAGIC_HEADER)
throw new IllegalArgumentException("The magic header doesn't exist");
}
private Constants.ColumnFileFormatVersion readForFormatVersion(DataInputStream dataInputStream) throws IOException {
int version = dataInputStream.readInt();
for (Constants.ColumnFileFormatVersion v: Constants.ColumnFileFormatVersion.values()) {
if (v.getVal() == version) {
return v;
}
}
throw new IllegalArgumentException("Unable to read columnfile since version is " + version + " and this lib is only for " + VERSION);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy