All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.TestNameHashing Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
package test;

import java.io.*;

import com.fasterxml.aalto.in.*;

public final class TestNameHashing
{
    ByteBasedPNameTable mTable = null;

    int mCharCount = 0;

    protected TestNameHashing() { }

    protected void test(String[] args)
        throws Exception
    {
        if (args.length != 1) {
            System.err.println("Usage: java "+getClass().getName()+" ");
            System.exit(1);
        }
        test2(args);
        int wordCount = mTable.size();
        double avgLen = (double) mCharCount / (double) wordCount;

        // Let's check memory usage too:
        Runtime rt = Runtime.getRuntime();
        long freeMin = rt.freeMemory();
        System.out.println("DEBUG: Free1: "+freeMin+", total: "+rt.totalMemory()+", max: "+rt.maxMemory());

        try { Thread.sleep(400L); } catch (InterruptedException ie) { }
        Thread.yield();
        System.gc();
        Thread.yield();
        try { Thread.sleep(400L); } catch (InterruptedException ie) { }
        Thread.yield();
        System.gc();
        Thread.yield();

        freeMin = rt.freeMemory();
        System.out.println("DEBUG: Free2: "+freeMin+", total: "+rt.totalMemory()+", max: "+rt.maxMemory());

        mTable.nuke();
        mTable = null;
        try { Thread.sleep(400L); } catch (InterruptedException ie) { }
        Thread.yield();
        System.gc();
        Thread.yield();
        try { Thread.sleep(400L); } catch (InterruptedException ie) { }
        Thread.yield();
        System.gc();
        Thread.yield();

        long freeMax = rt.freeMemory();
        System.out.println("DEBUG: Free3: "+freeMax+", total: "+rt.totalMemory()+", max: "+rt.maxMemory());

        long tableSize = freeMax - freeMin;
        double avgSize = tableSize / (double) wordCount;

        System.out.println("Memory used by table: "+tableSize+" -> "+avgSize+" bytes per word ("+avgLen+" chars/word)");
    }

    protected void test2(String[] args)
        throws Exception
    {
        mTable = new ByteBasedPNameTable(64);
        InputStream in = new FileInputStream(args[0]);
        BufferedReader br = new BufferedReader(new InputStreamReader(in));

        System.out.println("Ok, starting to read in names: ");

        String word;
        mCharCount = 0;

        while ((word = br.readLine()) != null) {
            if (tryToFind(mTable, word) == null) {
                addSymbol(mTable, word);
                //System.out.print("+'"+word+"' ");
                //System.out.print('+');
                mCharCount += word.length();
            } else {
                System.out.print('.');
            }
        }
        System.out.println(".");
        System.out.println("Done! Table: "+mTable.toString());
        //System.out.println(" -> "+mTable.toDebugString());

        in.close();
    }

    PName tryToFind(ByteBasedPNameTable table, String word)
    {
        int[] quads = calcQuads(word);
        int hash = ByteBasedPNameTable.calcHash(quads, quads.length);
        if (quads.length < 3) {
            return table.findSymbol(hash, quads[0], (quads.length < 2) ? 0 : quads[1]);
        }
        return table.findSymbol(hash, quads, quads.length);
    }

    PName addSymbol(ByteBasedPNameTable table, String word)
    {
        int[] quads = calcQuads(word);
        int colonIx = word.indexOf(':');
        int hash = ByteBasedPNameTable.calcHash(quads, quads.length);
        if (quads.length < 3) {
            return table.addSymbol(hash, word, colonIx, quads[0], (quads.length < 2) ? 0 : quads[1]);
        }
        return table.addSymbol(hash, word, colonIx, quads, quads.length);
    }

    int[] calcQuads(String word)
    {
        byte[] wordBytes;
        try {
            wordBytes = word.getBytes("UTF-8");
        } catch (java.io.UnsupportedEncodingException ex) {
            throw new Error("Internal error: "+ex); // should never occur
        }
        int blen = wordBytes.length;
        int[] result = new int[(blen + 3) / 4];
        for (int i = 0; i < blen; ++i) {
            int x = wordBytes[i] & 0xFF;

            if (++i < blen) {
                x = (x << 8) | (wordBytes[i] & 0xFF);
                if (++i < blen) {
                    x = (x << 8) | (wordBytes[i] & 0xFF);
                    if (++i < blen) {
                        x = (x << 8) | (wordBytes[i] & 0xFF);
                    }
                }
            }
            result[i / 4] = x;
        }

        return result;
    }

    public static void main(String[] args)
        throws Exception
    {
        new TestNameHashing().test(args);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy