All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.fbk.knowledgestore.tool.TestGenerator Maven / Gradle / Ivy

Go to download

A collection of command line tools for interacting with a KnowledgeStore server, including benchmarking tools to create and perform a performance test of KS retrieval methods as well as a tool for dumping the contents of a KS instance to RDF files.

There is a newer version: 1.7.1
Show newest version
package eu.fbk.knowledgestore.tool;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;

import javax.annotation.Nullable;

import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import org.openrdf.query.BindingSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import eu.fbk.knowledgestore.OperationException;
import eu.fbk.knowledgestore.Session;
import eu.fbk.knowledgestore.client.Client;
import eu.fbk.knowledgestore.data.Handler;
import eu.fbk.knowledgestore.data.Stream;
import eu.fbk.knowledgestore.internal.CommandLine;
import eu.fbk.knowledgestore.internal.Util;
import eu.fbk.rdfpro.util.IO;
import eu.fbk.rdfpro.util.Tracker;

public final class TestGenerator {

    private static final Logger LOGGER = LoggerFactory.getLogger(TestGenerator.class);

    private static final Random RANDOM = new Random(System.currentTimeMillis());

    private final Dictionary dictionary;

    private final String url;

    private final String username;

    private final String password;

    private final int mixes;

    private final File outputFile;

    private final Query[] queries;

    public static void main(final String[] args) {
        try {
            final CommandLine cmd = CommandLine
                    .parser()
                    .withName("ks-test-generator")
                    .withHeader(
                            "Generates the request mixes for the test, by querying the "
                                    + "KnowledgeStore. Generator parameters and queries are "
                                    + "supplied in a .properties file. Output data is written "
                                    + "to a .tsv file.")
                    .withOption("c", "config", "the configuration file", "FILE",
                            CommandLine.Type.FILE_EXISTING, true, false, true)
                    .withFooter(
                            "Configuration parameters may be overridden by supplying additional "
                                    + "property=value\narguments on the command line.")
                    .withLogger(LoggerFactory.getLogger("eu.fbk.knowledgestore")).parse(args);

            final File configFile = cmd.getOptionValue("c", File.class);

            final Properties config = new Properties();
            try (InputStream configStream = IO.read(configFile.getAbsolutePath())) {
                config.load(configStream);
            }

            for (final String arg : cmd.getArgs(String.class)) {
                final int index = arg.indexOf('=');
                if (index > 0) {
                    final String name = arg.substring(0, index);
                    final String value = arg.substring(index + 1);
                    config.setProperty(name, value);
                }
            }

            new TestGenerator(config, configFile.getParentFile()).run();

        } catch (final Throwable ex) {
            CommandLine.fail(ex);
        }
    }

    public TestGenerator(final Properties properties, @Nullable final File basePath) {

        // Create a global dictionary for mapping values to codes and back
        this.dictionary = new Dictionary();

        // Get base path
        final Path base = (basePath != null ? basePath : new File(System.getProperty("user.dir")))
                .toPath();

        // Parse server URL, username and password
        this.url = TestUtil.read(properties, "test.url", String.class);
        this.username = TestUtil.read(properties, "test.username", String.class, null);
        this.password = TestUtil.read(properties, "test.password", String.class, null);
        LOGGER.info("SUT: {}{}", this.url,
                this.username == null && this.password == null ? " (anonymous access)"
                        : " (authenticated access)");

        // Parse number of mixes to generate and output file
        this.mixes = TestUtil.read(properties, "test.mixes", Integer.class, 0);
        this.outputFile = base.resolve(
                Paths.get(TestUtil.read(properties, "test.out", String.class))).toFile();
        LOGGER.info("{} mix(es) to be written to {}", this.mixes,
                this.outputFile.getAbsolutePath());

        // Parse queries
        final List allQueries = Query.create(properties, basePath);
        final List enabledQueries = Lists.newArrayList();
        final Set enabledNames = Sets.newLinkedHashSet(Arrays.asList(TestUtil.read(
                properties, "test.queries", String.class).split("\\s*[,]\\s*")));
        for (final String name : enabledNames) {
            boolean added = false;
            for (final Query query : allQueries) {
                if (query.name.equals(name)) {
                    enabledQueries.add(query);
                    added = true;
                    break;
                }
            }
            Preconditions.checkArgument(added, "Unknown query " + name);
        }
        this.queries = enabledQueries.toArray(new Query[enabledQueries.size()]);
        LOGGER.info("{} queries enabled ({} defined): {}", enabledQueries.size(),
                allQueries.size(), Joiner.on(", ").join(enabledQueries));
    }

    @SuppressWarnings("resource")
    public void run() throws IOException, OperationException {

        Client client = null;
        Session session = null;

        try {
            // Obtain a session
            client = Client.builder(this.url).compressionEnabled(true).validateServer(false)
                    .build();
            session = client.newSession(this.username, this.password);

            // Read schema and tuples from input files
            final List> fileVars = Lists.newArrayList();
            final List> fileTuples = Lists.newArrayList();
            for (int i = 0; i < this.queries.length; ++i) {
                final List vars = Lists.newArrayList();
                final List tuples = Lists.newArrayList();
                final File file = this.queries[i].download(session);
                read(file, vars, tuples, this.dictionary);
                fileVars.add(vars);
                fileTuples.add(tuples);
            }

            // Compute output schema and mappings from file to output schema
            final int[][] fileMappings = new int[this.queries.length][]; // m_ij -> var j file i
            final List outputVars = Lists.newArrayList();
            for (int i = 0; i < fileVars.size(); ++i) {
                boolean insidePrefix = true;
                fileMappings[i] = new int[fileVars.get(i).size()];
                for (int j = 0; j < fileMappings[i].length; ++j) {
                    final String var = fileVars.get(i).get(j);
                    int index = outputVars.indexOf(var);
                    if (index < 0) {
                        insidePrefix = false;
                        index = outputVars.size();
                        outputVars.add(var);
                    } else if (!insidePrefix) {
                        throw new IllegalArgumentException("Variable " + var + " of query "
                                + this.queries[i] + " matches var in previous files "
                                + "but is preceded by newly intruduced variable ");
                    }
                    fileMappings[i][j] = index;
                }
            }
            LOGGER.info("Output schema: ({})", Joiner.on(", ").join(outputVars));

            // Use a tracker to display the progress of the operation
            final Tracker tracker = new Tracker(LOGGER, null, //
                    "Generated %d tuples (%d tuple/s avg)", //
                    "Generated %d tuples (%d tuple/s, %d tuple/s avg)");
            tracker.start();

            // Generate a set of (unique) joined tuples, of the size specified
            int numFailures = 0;
            int numDuplicates = 0;
            final Set outputTuples = Sets.newLinkedHashSet();
            final int[] outputCodes = new int[outputVars.size()];
            outer: while (outputTuples.size() < this.mixes) {
                Arrays.fill(outputCodes, 0);
                for (int i = 0; i < fileTuples.size(); ++i) {
                    if (!pick(fileTuples.get(i), fileMappings[i], outputCodes)) {
                        ++numFailures;
                        continue outer;
                    }
                }
                if (outputTuples.add(Tuple.create(outputCodes))) {
                    tracker.increment();
                } else {
                    ++numDuplicates;
                }
            }

            // Signal completion
            tracker.end();

            // Log number of failures and number of duplicate tuples during generation
            LOGGER.info("Tuple generation statistics: {} attempts failed, {} duplicates",
                    numFailures, numDuplicates);

            // Write resulting tuples
            write(this.outputFile, outputVars, outputTuples, this.dictionary);

        } finally {
            // Release session
            Util.closeQuietly(session);
            Util.closeQuietly(client);
        }
    }

    private static boolean pick(final List tuples, final int[] mappings,
            final int[] outputCodes) {

        final int numVariables = mappings.length;
        final int numTuples = tuples.size();

        // The a-priori range where to pick the tuple is the full tuples list
        int start = 0;
        int end = tuples.size();

        // Check if range can be constrained based on codes previously assigned (i.e., join)
        boolean constrained = false;
        final int[] searchCodes = new int[numVariables];
        for (int i = 0; i < numVariables; ++i) {
            final int code = outputCodes[mappings[i]];
            if (code != 0) {
                searchCodes[i] = code;
                constrained = true;
            }
        }

        // If range can be constrained, build a 'search' tuple whose first codes are given by
        // variables previously assigned, and remaining variables are zero; then do binary search
        // followed by a scan for matching tuples to determine the range
        if (constrained) {
            final Tuple searchTuple = Tuple.create(searchCodes);
            start = Collections.binarySearch(tuples, searchTuple);
            if (start < 0) {
                start = -start - 1; // in case exact match not found
            }
            if (start >= numTuples || !tuples.get(start).matches(searchTuple)) {
                return false; // if range is empty or cannot join
            }
            end = start + 1;
            while (end < numTuples && tuples.get(end).matches(searchTuple)) {
                ++end;
            }
        }

        // Pick a random index inside the allowed range and use that tuple to augment output
        final int chosenIndex = start + RANDOM.nextInt(end - start);
        final Tuple chosenTuple = tuples.get(chosenIndex);
        for (int i = 0; i < numVariables; ++i) {
            final int slot = mappings[i];
            final int oldValue = outputCodes[slot];
            final int newValue = chosenTuple.get(i);
            if (oldValue != 0 && newValue != oldValue) {
                throw new Error("Join error: " + chosenTuple + " - "
                        + Arrays.toString(outputCodes) + " (search:  "
                        + Arrays.toString(searchCodes) + "; start " + start + "; end " + end + ")");
            }
            outputCodes[mappings[i]] = chosenTuple.get(i);
        }

        // Return true upon success
        return true;
    }

    private static void read(final File file, final List vars, final List tuples,
            final Dictionary dictionary) throws IOException {

        // Read the file specified, populating the supplied vars and tuples list
        try (final BufferedReader reader = new BufferedReader(IO.utf8Reader(IO.buffer(IO.read(file
                .getAbsolutePath()))))) {

            // Read variables
            for (final String token : reader.readLine().split("\t")) {
                vars.add(token.trim().substring(1));
            }

            // Use a tracker to show the progress of the operation
            final Tracker tracker = new Tracker(LOGGER, null, //
                    "Parsed " + file.getAbsolutePath() + " (" + Joiner.on(", ").join(vars)
                            + "): %d tuples (%d tuple/s avg)", //
                    "Parsed %d tuples (%d tuple/s, %d tuple/s avg)");
            tracker.start();

            // Read data tuples, mapping values to codes using the dictionary
            int lineNum = 0;
            String line;
            final int[] codes = new int[vars.size()];
            while ((line = reader.readLine()) != null) {
                try {
                    ++lineNum;
                    final String[] tokens = line.split("\t");
                    for (int j = 0; j < codes.length; ++j) {
                        codes[j] = dictionary.codeFor(tokens[j]);
                    }
                    tuples.add(Tuple.create(codes));
                    tracker.increment();
                } catch (final Throwable ex) {
                    LOGGER.warn("Ignoring invalid line " + lineNum + " of file " + file + " - "
                            + ex.getMessage() + " [" + line + "]");
                }
            }

            // Signal completion
            tracker.end();

            // Sort read tuples
            Collections.sort(tuples);
        }

    }

    private static void write(final File file, final List vars,
            final Collection tuples, final Dictionary dictionary) throws IOException {

        // Use a tracker to show the progress of the operation
        final Tracker tracker = new Tracker(LOGGER, null, //
                "Written " + file.getAbsolutePath() + " (" + Joiner.on(", ").join(vars)
                        + "): %d tuples (%d tuple/s avg)", //
                "Written %d tuples (%d tuple/s, %d tuple/s avg)");
        tracker.start();

        // Write to the file specified one line at a time
        final int numVars = vars.size();
        try (Writer writer = IO.utf8Writer(IO.buffer(IO.write(file.getAbsolutePath())))) {

            // Start writing the header line: ?v1 ?v2 ...
            for (int i = 0; i < numVars; ++i) {
                if (i > 0) {
                    writer.write("\t");
                }
                writer.write("?");
                writer.write(vars.get(i));
            }
            writer.write("\n");

            // Write data lines
            for (final Tuple tuple : tuples) {
                for (int i = 0; i < numVars; ++i) {
                    if (i > 0) {
                        writer.write("\t");
                    }
                    writer.write(dictionary.stringFor(tuple.get(i)));
                }
                writer.write("\n");
                tracker.increment();
            }
        }

        // Signal completion
        tracker.end();
    }

    private static class Query {

        private final String name;

        private final File file;

        private final String string;

        public Query(final String name, final File file, final String string) {
            this.name = name;
            this.file = file;
            this.string = string;
        }

        public File download(final Session session) throws IOException, OperationException {

            if (!this.file.exists()) {

                final AtomicReference writerToClose = new AtomicReference(null);

                final Tracker tracker = new Tracker(LOGGER, null, //
                        "Evaluated query " + this.name + ": %d tuples (%d tuple/s avg)", //
                        "Evaluating query " + this.name
                                + ": %d tuples (%d tuple/s, %d tuple/s avg)");
                tracker.start();

                try (Stream stream = session.sparql(this.string).timeout(3600 * 1000L)
                        .execTuples()) {

                    stream.toHandler(new Handler() {

                        private Writer writer = null;

                        private List variables;

                        @SuppressWarnings("unchecked")
                        @Override
                        public void handle(final BindingSet bindings) throws Throwable {
                            if (this.writer == null) {
                                this.writer = IO.utf8Writer(IO.buffer(IO.write(Query.this.file
                                        .getAbsolutePath())));
                                writerToClose.set(this.writer);
                                this.variables = stream.getProperty("variables", List.class);
                                for (int i = 0; i < this.variables.size(); ++i) {
                                    this.writer.write(i > 0 ? "\t?" : "?");
                                    this.writer.write(this.variables.get(i));
                                }
                                this.writer.write("\n");
                            }
                            if (bindings != null) {
                                this.writer.write(TestUtil.encode(this.variables, bindings));
                                this.writer.write("\n");
                                tracker.increment();
                            }
                        }

                    });

                } finally {
                    final Writer writer = writerToClose.get();
                    if (writer != null) {
                        writer.flush();
                        Util.closeQuietly(writer);
                        try {
                            // TODO: remove this hack, necessary for giving gzip enough time to
                            // complete writing the file (the fix should be added to IO.write())
                            Thread.sleep(250);
                        } catch (InterruptedException ex) {
                            // ignore
                        }
                    }
                }

                tracker.end();
            }

            return this.file;
        }

        public static List create(final Properties properties, final File basePath) {
            final List queries = Lists.newArrayList();
            for (final Map.Entry entry : TestUtil.split(properties).entrySet()) {
                final String name = entry.getKey();
                final Properties props = entry.getValue();
                final String filename = props.getProperty("file");
                final String query = props.getProperty("query");
                if (filename != null && query != null) {
                    final File file = basePath.toPath().resolve(Paths.get(filename)).toFile();
                    queries.add(new Query(name, file, query));
                }
            }
            return queries;
        }

        @Override
        public String toString() {
            return this.name;
        }

    }

    private static class Dictionary {

        private static final int TABLE_SIZE = 32 * 1024 * 1024 - 1;

        private static final int MAX_COLLISIONS = 1024;

        private static final int BUFFER_BITS = 12;

        private static final int BUFFER_SIZE = 1 << BUFFER_BITS;

        private final int[] table;

        private int[] list;

        private final List buffers;

        private int offset;

        private int lastCode;

        Dictionary() {
            this.table = new int[Dictionary.TABLE_SIZE];
            this.list = new int[1024];
            this.buffers = Lists.newArrayList();
            this.offset = BUFFER_SIZE;
            this.lastCode = 0;
        }

        public int codeFor(final String string) {
            final byte[] bytes = string.getBytes(Charsets.UTF_8);
            int bucket = Math.abs(string.hashCode()) % TABLE_SIZE;
            for (int i = 0; i < MAX_COLLISIONS; ++i) {
                final int code = this.table[bucket];
                if (code != 0) {
                    final int pointer = this.list[code - 1];
                    if (match(pointer, bytes)) {
                        return code;
                    }
                } else {
                    final int pointer = store(bytes);
                    if (this.lastCode >= this.list.length) {
                        final int[] oldList = this.list;
                        this.list = Arrays.copyOf(oldList, this.list.length * 2);
                    }
                    this.list[this.lastCode++] = pointer;
                    this.table[bucket] = this.lastCode;

                    // if (lastCode % 100000 == 0) {
                    // System.out.println(buffers.size() * BUFFER_SIZE);
                    // }

                    return this.lastCode;
                }
                bucket = (bucket + 1) % TABLE_SIZE;
            }
            throw new Error("Max number of collisions exceeded - RDF vocabulary too large");
        }

        public String stringFor(final int code) {
            final int pointer = this.list[code - 1];
            return new String(load(pointer), Charsets.UTF_8);
        }

        private byte[] load(final int pointer) {
            final int index = pointer >>> BUFFER_BITS - 2;
            final int offset = pointer << 2 & BUFFER_SIZE - 1;
            final byte[] buffer = this.buffers.get(index);
            int end = offset;
            while (buffer[end] != 0) {
                ++end;
            }
            return Arrays.copyOfRange(buffer, offset, end);
        }

        private int store(final byte[] bytes) {
            if (this.offset + bytes.length + 1 > BUFFER_SIZE) {
                this.buffers.add(new byte[BUFFER_SIZE]);
                this.offset = 0;
            }
            final int index = this.buffers.size() - 1;
            final int pointer = this.offset >> 2 | index << BUFFER_BITS - 2;
            final byte[] buffer = this.buffers.get(index);
            System.arraycopy(bytes, 0, buffer, this.offset, bytes.length);
            this.offset += bytes.length;
            buffer[this.offset++] = 0;
            this.offset = this.offset + 3 & 0xFFFFFFFC;
            return pointer;
        }

        private boolean match(final int pointer, final byte[] bytes) {
            final int index = pointer >>> BUFFER_BITS - 2;
            final int offset = pointer << 2 & BUFFER_SIZE - 1;
            final byte[] buffer = this.buffers.get(index);
            for (int i = 0; i < bytes.length; ++i) {
                if (buffer[offset + i] != bytes[i]) {
                    return false;
                }
            }
            return true;
        }

    }

    private static abstract class Tuple implements Comparable {

        public static Tuple create(final int... codes) {
            switch (codes.length) {
            case 0:
                return Tuple0.INSTANCE;
            case 1:
                return new Tuple1(codes[0]);
            case 2:
                return new Tuple2(codes[0], codes[1]);
            case 3:
                return new Tuple3(codes[0], codes[1], codes[2]);
            case 4:
                return new Tuple4(codes[0], codes[1], codes[2], codes[3]);
            default:
                return new TupleN(codes.clone());
            }
        }

        public abstract int size();

        public abstract int get(int index);

        public boolean matches(final Tuple tuple) {
            final int size = size();
            for (int i = 0; i < size; ++i) {
                final int expected = tuple.get(i);
                if (expected != 0 && get(i) != expected) {
                    return false;
                }
            }
            return true;
        }

        @Override
        public int compareTo(final Tuple other) {
            final int thisSize = size();
            final int otherSize = other.size();
            final int minSize = Math.min(thisSize, otherSize);
            for (int i = 0; i < minSize; ++i) {
                final int result = get(i) - other.get(i);
                if (result != 0) {
                    return result;
                }
            }
            return thisSize - otherSize;
        }

        @Override
        public boolean equals(final Object object) {
            if (object == this) {
                return true;
            }
            if (!(object instanceof Tuple)) {
                return false;
            }
            final Tuple other = (Tuple) object;
            final int size = size();
            if (other.size() != size) {
                return false;
            }
            for (int i = 0; i < size; ++i) {
                if (get(i) != other.get(i)) {
                    return false;
                }
            }
            return true;
        }

        @Override
        public int hashCode() {
            final int size = size();
            int hash = size;
            for (int i = 0; i < size; ++i) {
                hash = 37 * hash + get(i);
            }
            return hash;
        }

        @Override
        public String toString() {
            final int size = size();
            final StringBuilder builder = new StringBuilder();
            builder.append('(');
            for (int i = 0; i < size; ++i) {
                if (i > 0) {
                    builder.append(", ");
                }
                builder.append(get(i));
            }
            builder.append(')');
            return builder.toString();
        }

        private static final class Tuple0 extends Tuple {

            static final Tuple0 INSTANCE = new Tuple0();

            @Override
            public int size() {
                return 0;
            }

            @Override
            public int get(final int index) {
                throw new IndexOutOfBoundsException("Invalid index " + index);
            }

        }

        private static final class Tuple1 extends Tuple {

            private final int code;

            Tuple1(final int code) {
                this.code = code;
            }

            @Override
            public int size() {
                return 1;
            }

            @Override
            public int get(final int index) {
                Preconditions.checkElementIndex(index, 1);
                return this.code;
            }

        }

        private static final class Tuple2 extends Tuple {

            private final int code0;

            private final int code1;

            Tuple2(final int code0, final int code1) {
                this.code0 = code0;
                this.code1 = code1;
            }

            @Override
            public int size() {
                return 2;
            }

            @Override
            public int get(final int index) {
                Preconditions.checkElementIndex(index, 2);
                return index == 0 ? this.code0 : this.code1;
            }

        }

        private static final class Tuple3 extends Tuple {

            private final int code0;

            private final int code1;

            private final int code2;

            Tuple3(final int code0, final int code1, final int code2) {
                this.code0 = code0;
                this.code1 = code1;
                this.code2 = code2;
            }

            @Override
            public int size() {
                return 3;
            }

            @Override
            public int get(final int index) {
                switch (index) {
                case 0:
                    return this.code0;
                case 1:
                    return this.code1;
                case 2:
                    return this.code2;
                default:
                    throw new IndexOutOfBoundsException("Index " + index + ", size 3");
                }
            }

        }

        private static final class Tuple4 extends Tuple {

            private final int code0;

            private final int code1;

            private final int code2;

            private final int code3;

            Tuple4(final int code0, final int code1, final int code2, final int code3) {
                this.code0 = code0;
                this.code1 = code1;
                this.code2 = code2;
                this.code3 = code3;
            }

            @Override
            public int size() {
                return 4;
            }

            @Override
            public int get(final int index) {
                switch (index) {
                case 0:
                    return this.code0;
                case 1:
                    return this.code1;
                case 2:
                    return this.code2;
                case 3:
                    return this.code3;
                default:
                    throw new IndexOutOfBoundsException("Index " + index + ", size 4");
                }
            }

        }

        private static final class TupleN extends Tuple {

            private final int[] codes;

            TupleN(final int[] codes) {
                this.codes = codes;
            }

            @Override
            public int size() {
                return this.codes.length;
            }

            @Override
            public int get(final int index) {
                return this.codes[index];
            }

        }

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy