All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ning.tr13.tools.TrieDumper Maven / Gradle / Ivy

package com.ning.tr13.tools;

import java.io.*;
import java.util.Arrays;

import com.ning.tr13.TrieConstants;
import com.ning.tr13.lookup.TrieHeader;
import com.ning.tr13.util.InputUtil;
import com.ning.tr13.util.VInt;

/**
 * Simple utility class that can read a Trie file, and dump
 * its contents in format that default
 * {@link com.ning.tr13.KeyValueReader} could read.
 */
public abstract class TrieDumper
    extends TrieConstants
{
    private final long[] tmpLongValueBuffer = new long[1];

    private final static byte[] LF = new byte[] { '\n' };
    
    protected final char valueSeparator;
    
    protected TrieDumper(char valueSeparator) {
        this.valueSeparator = valueSeparator;
    }
    
    public void dump(InputStream in, OutputStream out) throws IOException
    {
        // header:
        byte[] buffer = new byte[TrieHeader.HEADER_LENGTH];
        InputUtil.readFully(in, buffer);
        // First: let's verify signature, header
        TrieHeader header = TrieHeader.read(buffer, 0);
        long len = header.getPayloadLength();
        if (len > Integer.MAX_VALUE) {
            throw new IOException("Too big input file (over 2 gigs)");
        }
        byte[] payload = new byte[(int) len];
        InputUtil.readFully(in, payload);

        // Ok, let's traverse then
        byte[] keyBuffer = new byte[200];
        readAndDump(out, payload, 0, keyBuffer, 0);
    }

    protected int readAndDump(OutputStream out,
            byte[] block, int offset, byte[] keyBuffer, int keyLen) throws IOException
    {
        // First things first: block type, length
        int firstByte = block[offset];
        if ((firstByte & 0x80)  == 0) { // leaf
            // leaf types have 6 bits in first byte for value
            offset = VInt.bytesToUnsigned(FIRST_BYTE_BITS_FOR_LEAVES, block, offset, tmpLongValueBuffer);
            long value = tmpLongValueBuffer[0];
            if ((firstByte & 0x40)  == 0) { // simple
                // simple leaf only has value, so output stuff as is
                _writeValue(out, keyBuffer, keyLen, value, null, 0, 0);
            } else { // valued
                int lenOffset = offset;
                // suffix-leaf has additional key suffix following value
                offset = VInt.bytesToUnsigned(8, block, offset, tmpLongValueBuffer);
                long l = tmpLongValueBuffer[0];
                // let's do some sanity checks
                if (l < 0) {
                    throw new IOException("Corrupt trie structure: negative suffix length at index "+lenOffset);                    
                }
                if ((offset + l) > block.length) {
                    throw new IOException("Corrupt trie structure: leaf suffix length "+l+" (at offset "+lenOffset+") would extend past input end");                    
                }
                int suffixLen = (int) l;
                _writeValue(out, keyBuffer, keyLen, value, block, offset, suffixLen);
                offset += suffixLen;
            }
            return offset;
        }

        // Nope; branch
        // branch types have 6 bits in first byte for value
        int origOffset = offset;
        offset = VInt.bytesToUnsigned(FIRST_BYTE_BITS_FOR_BRANCHES, block, offset, tmpLongValueBuffer);
        long l = tmpLongValueBuffer[0];
        long blockLen;
        
        if ((firstByte & 0x40)  == 0) { // simple branch
            blockLen = l;
        } else { // branch with value
            // first value, then length
            offset = VInt.bytesToUnsigned(8, block, offset, tmpLongValueBuffer);
            blockLen = tmpLongValueBuffer[0];
            // which we need to output first
            _writeValue(out, keyBuffer, keyLen, l, null, 0, 0);
        }
        if (blockLen < 0L) { // sanity check
            throw new IOException("Corrupt trie structure: branch had negative block length at index "+origOffset);
        }
        final long end = offset + blockLen;
        origOffset = offset;
        do {
            byte nextByte = block[offset++];
            keyBuffer = _appendKey(keyBuffer, nextByte, keyLen);
            offset = readAndDump(out, block, offset, keyBuffer, keyLen+1);
        } while (offset < end);
        if (offset != end) { // sanity check
            throw new IOException("Corrupt trie structure: "
                    +(((firstByte & 0x40)  == 0) ? "simple" : "value")
                    +" branch child block declared to extend from "
                    +origOffset+" to "+(end-1)+"; extended to "+(offset-1));
        }
        return offset;
    }

    private void _writeValue(OutputStream out, byte[] keyBuffer, int keyLen, long value,
            byte[] extraKey, int extraKeyOffset, int extraKeyLen) throws IOException
    {
        out.write(keyBuffer, 0, keyLen);
        if (extraKey != null) {
            out.write(extraKey, extraKeyOffset, extraKeyLen);
        }
        out.write(valueSeparator);
        // numbers are ASCII, so let's just do:
        String numStr = String.valueOf(value);
        for (int i = 0, len = numStr.length(); i < len; ++i) {
            out.write(numStr.charAt(i));
        }
        out.write(LF);
    }

    private byte[] _appendKey(byte[] keyBuffer, byte nextKeyByte, int keyLen)
    {
        if (keyLen >= keyBuffer.length) {
            keyBuffer = Arrays.copyOf(keyBuffer, keyBuffer.length * 2);
        }
        keyBuffer[keyLen] = nextKeyByte;
        return keyBuffer;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy