de.julielab.xmlData.cli.CLI Maven / Gradle / Ivy

Go to download
/**
 * QueryCLI.java
 * 
 * Copyright (c) 2010, JULIE Lab.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Common Public License v1.0
 * 

 * Author: faessler
 * 

 * Current version: 1.0
 * Since version:   1.0
 * 
 * Creation date: 20.11.2010
 **/

package de.julielab.xmlData.cli;

import de.julielab.medline.ConfigurationConstants;
import de.julielab.medline.MedlineUpdateException;
import de.julielab.medline.Updater;
import de.julielab.xml.JulieXMLConstants;
import de.julielab.xml.JulieXMLTools;
import de.julielab.xmlData.Constants;
import de.julielab.xmlData.config.TableSchemaDoesNotExistException;
import de.julielab.xmlData.dataBase.CoStoSysConnection;
import de.julielab.xmlData.dataBase.DataBaseConnector;
import de.julielab.xmlData.dataBase.SubsetStatus;
import de.julielab.xmlData.dataBase.util.TableSchemaMismatchException;
import org.apache.commons.cli.*;
import org.apache.commons.configuration2.XMLConfiguration;
import org.apache.commons.configuration2.builder.FileBasedConfigurationBuilder;
import org.apache.commons.configuration2.builder.fluent.Parameters;
import org.apache.commons.configuration2.ex.ConfigurationException;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.sql.SQLException;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

import static de.julielab.xmlData.dataBase.DataBaseConnector.StatusElement.*;

/**
 * Command line interface for interaction with a databases holding e.g. Medline
 * XML data.
 *
 * @author faessler / hellrich
 */
public class CLI {

    private final static String DELIMITER = "\n--------------------------------------------------------------------------------\n";

    private static final Logger LOG = LoggerFactory.getLogger(CLI.class);
    private static final String KEY_PART_SEPERATOR = "\t";
    private static final String FILE_SEPERATOR = System.getProperty("file.separator");
    public static String[] USER_SCHEME_DEFINITION = new String[]{"dbcconfiguration.xml", "costosys.xml", "costosysconfiguration.xml"};
    private static boolean verbose = false;

    private static void logMessage(String msg) {
        if (!verbose)
            return;
        LOG.info(msg);
    }

    public static void main(String[] args) throws Exception {
        long time = System.currentTimeMillis();
        String dbUrl;
        String user;
        String password;
        String dbName;
        String serverName;
        String pgSchema;
        String msg;
        boolean updateMode = false;

        boolean error = false;
        Mode mode = Mode.ERROR;

        Options options = getOptions();

        // What has to be done
        CommandLineParser parser = new DefaultParser();
        CommandLine cmd = null;
        try {
            cmd = parser.parse(options, args);
        } catch (ParseException e) {
            LOG.error("Can't parse arguments: " + e.getMessage());
            printHelp(options);
            System.exit(1);
        }

        verbose = cmd.hasOption('v');
        if (verbose)
            LOG.info("Verbose logging enabled.");

        // selecting the mode
        if (cmd.hasOption("h"))
            error = true; // To show help
        if (cmd.hasOption("i"))
            mode = Mode.IMPORT;
        if (cmd.hasOption("u")) {
            mode = Mode.IMPORT;
            updateMode = true;
        }
        if (cmd.hasOption("q"))
            mode = Mode.QUERY;
        if (cmd.getOptionValue("s") != null)
            mode = Mode.SUBSET;
        if (cmd.getOptionValue("re") != null)
            mode = Mode.RESET;
        if (cmd.getOptionValue("st") != null)
            mode = Mode.STATUS;
        if (cmd.hasOption("t"))
            mode = Mode.TABLES;
        if (cmd.hasOption("lts"))
            mode = Mode.LIST_TABLE_SCHEMAS;
        if (cmd.hasOption("td"))
            mode = Mode.TABLE_DEFINITION;
        if (cmd.hasOption("sch"))
            mode = Mode.SCHEME;
        if (cmd.hasOption("ch"))
            mode = Mode.CHECK;
        if (cmd.hasOption("dc"))
            mode = Mode.DEFAULT_CONFIG;
        if (cmd.hasOption("dt"))
            mode = Mode.DROP_TABLE;
        if (cmd.hasOption("um"))
            mode = Mode.UPDATE_MEDLINE;

        // authentication
        // config file
        String dbcConfigPath = null;
        if (cmd.hasOption("dbc"))
            dbcConfigPath = cmd.getOptionValue("dbc");
        if (dbcConfigPath == null)
            dbcConfigPath = findConfigurationFile();
        File conf = new File(dbcConfigPath);
        dbUrl = cmd.getOptionValue('U');
        if (dbUrl == null) {
            msg = "No database URL given. Using value in configuration file";
            logMessage(msg);
        }
        user = cmd.getOptionValue("n");
        if (user == null) {
            msg = "No database username given. Using value in configuration file";
            logMessage(msg);
        }
        password = cmd.getOptionValue("p");
        if (password == null) {
            msg = "No password given. Using value in configuration file";
            logMessage(msg);
        }
        serverName = cmd.getOptionValue("srv");
        dbName = cmd.getOptionValue("db");
        pgSchema = cmd.getOptionValue("pgs");
        if (!((serverName != null && dbName != null) ^ dbUrl != null)
                && !(serverName == null && dbName == null && dbUrl == null) && !conf.exists()) {
            LOG.error(
                    "No base configuration has been found. Thus, you must specify server name and database name or the complete URL with -u (but not both).");
            System.exit(1);
        }

        DataBaseConnector dbc = null;
        try {
            if (conf.exists()) {
                logMessage(String.format("Using configuration file at %s", conf));
                if (dbUrl == null)
                    dbc = new DataBaseConnector(serverName, dbName, user, password, pgSchema,
                            new FileInputStream(conf));
                else
                    dbc = new DataBaseConnector(dbUrl, user, password, pgSchema, new FileInputStream(conf));
            } else {
                logMessage(String.format(
                        "No custom configuration found (should be located at %s). Using default configuration.",
                        Stream.of(USER_SCHEME_DEFINITION).collect(Collectors.joining(" or "))));
                if (dbUrl == null)
                    dbc = new DataBaseConnector(serverName, dbName, user, password, pgSchema, null);
                else
                    dbc = new DataBaseConnector(dbUrl, user, password, pgSchema, null);
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }

        // all those options...
        String tableName = cmd.getOptionValue("td");
        if (tableName == null)
            tableName = cmd.getOptionValue("ch");

        String subsetTableName = cmd.getOptionValue("s");
        if (subsetTableName == null)
            subsetTableName = cmd.getOptionValue("re");
        if (subsetTableName == null)
            subsetTableName = cmd.getOptionValue("renp");
        if (subsetTableName == null)
            subsetTableName = cmd.getOptionValue("st");

        String fileStr = cmd.getOptionValue("f");
        if (fileStr == null)
            fileStr = cmd.getOptionValue("i");
        if (fileStr == null)
            fileStr = cmd.getOptionValue("u");
        if (cmd.hasOption("im")) {
            mode = Mode.IMPORT;
            // For some reasons, multuple versions of some documents have been found in the baseline in the past.
            // Just use the update mode.
            XMLConfiguration importConfig = loadXmlConfiguration(new File(cmd.getOptionValue("im")));
            fileStr = importConfig.getString(ConfigurationConstants.INSERTION_INPUT);
            updateMode = true;
        }

        String superTableName = cmd.getOptionValue("z");
        if (superTableName == null)
            superTableName = dbc.getActiveDataTable();

        String queryStr = cmd.getOptionValue("q");
        String subsetJournalFileName = cmd.getOptionValue("j");
        String subsetQuery = cmd.getOptionValue("o");
        String randomSubsetSize = cmd.getOptionValue("r");
        String whereClause = cmd.getOptionValue("w");
        String xpath = cmd.getOptionValue("x");
        String baseOutDir = cmd.getOptionValue("out");
        String batchSize = cmd.getOptionValue("bs");
        String limit = cmd.getOptionValue("l");
        String tableSchema = cmd.getOptionValue("ts") != null ? cmd.getOptionValue("ts") : dbc.getActiveTableSchema();
        boolean useDelimiter = baseOutDir != null ? false : cmd.hasOption("d");
        boolean returnPubmedArticleSet = cmd.hasOption("pas");
        boolean mirrorSubset = cmd.hasOption("m");
        boolean all4Subset = cmd.hasOption("a");
        Integer numberRefHops = cmd.hasOption("rh") ? Integer.parseInt(cmd.getOptionValue("rh")) : null;

        if (tableSchema.matches("[0-9]+")) {
            tableSchema = dbc.getConfig().getTableSchemaNames().get(Integer.parseInt(tableSchema));
        }

        try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
            switch (mode) {
                case QUERY:
                    QueryOptions qo = new QueryOptions();
                    qo.fileStr = fileStr;
                    qo.queryStr = queryStr;
                    qo.useDelimiter = useDelimiter;
                    qo.pubmedArticleSet = returnPubmedArticleSet;
                    qo.xpath = xpath;
                    qo.baseOutDirStr = baseOutDir;
                    qo.batchSizeStr = batchSize;
                    qo.limitStr = limit;
                    qo.tableName = superTableName;
                    qo.tableSchema = tableSchema;
                    qo.whereClause = whereClause;
                    qo.numberRefHops = numberRefHops;
                    error = doQuery(dbc, qo);
                    break;

                case IMPORT:
                    error = doImportOrUpdate(dbc, fileStr, queryStr, superTableName, updateMode);
                    break;

                case SUBSET:
                    error = doSubset(dbc, subsetTableName, fileStr, queryStr, superTableName, subsetJournalFileName,
                            subsetQuery, mirrorSubset, whereClause, all4Subset, randomSubsetSize, numberRefHops);
                    break;

                case RESET:
                    if (subsetTableName == null) {
                        LOG.error("You must provide the name of the subset table to reset.");
                        error = true;
                    } else {
                        boolean files = cmd.hasOption("f");
                        try {
                            if (!files || StringUtils.isBlank(fileStr)) {
                                boolean np = cmd.hasOption("np");
                                boolean ne = cmd.hasOption("ne");
                                String lc = cmd.hasOption("lc") ? cmd.getOptionValue("lc") : null;
                                if (np)
                                    logMessage("table reset is restricted to non-processed table rows");
                                if (ne)
                                    logMessage("table reset is restricted to table row without errors");
                                if (lc != null)
                                    logMessage("table reset is restricted to rows with last component " + lc);
                                if (!np && !ne && lc == null) {
                                    SubsetStatus status = dbc.status(subsetTableName, EnumSet.of(IN_PROCESS, IS_PROCESSED, TOTAL));
                                    long inProcess = status.inProcess;
                                    long isProcessed = status.isProcessed;
                                    long total = status.total;
                                    // We don't bother with too small datasets, worst
                                    // case would be to do it again for 10000 docs which
                                    // is not much.
                                    if (total > 10000 && inProcess + isProcessed >= total / 2) {
                                        String input = getYesNoAnswer("The subset table \"" + subsetTableName
                                                + "\" is in process or already processed over 50%."
                                                + " Do you really wish to reset it completely into an unprocessed state? (yes/no)");
                                        if (input.equals("no"))
                                            System.exit(0);
                                    }
                                }
                                dbc.resetSubset(subsetTableName, np, ne, lc);
                            } else {
                                logMessage("Resetting all documents identified by the IDs in file \"" + fileStr + "\".");
                                try {
                                    List pkValues = asListOfArrays(fileStr);
                                    dbc.resetSubset(subsetTableName, pkValues);
                                } catch (IOException e) {
                                    e.printStackTrace();
                                }
                            }
                        } catch (TableNotFoundException e) {
                            e.printStackTrace();
                        }
                    }
                    break;
                case STATUS:
                    error = doStatus(dbc,
                            subsetTableName,
                            cmd.hasOption("he"),
                            cmd.hasOption("isp"),
                            cmd.hasOption("inp"),
                            cmd.hasOption("to"),
                            cmd.hasOption("lc"));
                    break;

                case TABLES:
                    for (String s : dbc.getTables())
                        System.out.println(s);
                    break;

                case TABLE_DEFINITION:
                    for (String s : dbc.getTableDefinition(tableName))
                        System.out.println(s);
                    break;

                case LIST_TABLE_SCHEMAS:
                    System.out.println("The following table schema names are contained in the current configuration:\n");
                    List tableSchemaNames = dbc.getConfig().getTableSchemaNames();
                    IntStream.range(0, tableSchemaNames.size()).mapToObj(i -> i + " " + tableSchemaNames.get(i))
                            .forEach(System.out::println);
                    break;

                case SCHEME:
                    System.out.println(dbc.getScheme());
                    break;

                case CHECK:
                    dbc.checkTableDefinition(tableName);
                    break;

                case DEFAULT_CONFIG:
                    System.out.println(new String(dbc.getEffectiveConfiguration()));
                    break;

                case DROP_TABLE:
                    dropTableInteractively(dbc, cmd.getOptionValue("dt"));
                    break;

                case UPDATE_MEDLINE:
                    Updater updater = new Updater(loadXmlConfiguration(new File(cmd.getOptionValue("um"))));
                    updater.process(dbc);
                    break;

                case ERROR:
                    break;
            }
        }

        if (error) {
            // printHelp(options);
            System.exit(1);
        }

        time = System.currentTimeMillis() - time;
        LOG.info(String.format("Processing took %d seconds.", time / 1000));
    }

    public static String findConfigurationFile() throws ConfigurationNotFoundException {
        String configFileProperty = System.getProperty(Constants.COSTOSYS_CONFIG_FILE);
        if (configFileProperty != null && new File(configFileProperty).exists())
            return configFileProperty;
        File workingDirectory = new File(".");
        Set possibleConfigFileNames = new HashSet<>(Arrays.asList(USER_SCHEME_DEFINITION));
        for (String file : workingDirectory.list()) {
            if (possibleConfigFileNames.contains(file.toLowerCase()))
                return file;
        }
        throw new ConfigurationNotFoundException("No configuration file with a name in " + Arrays.toString(USER_SCHEME_DEFINITION) + " was found in the current working directory " + new File(".").getAbsolutePath());
    }

    private static void dropTableInteractively(DataBaseConnector dbc, String tableName) {
        try {
            if (!dbc.tableExists(tableName)) {
                if (tableName.contains("."))
                    System.err
                            .println("Table \"" + tableName + "\" does not exist in database " + dbc.getDbURL() + ".");
                else
                    System.err.println("Table \"" + tableName + "\" does not exist in database " + dbc.getDbURL()
                            + " in active schema " + dbc.getActivePGSchema() + ".");
                return;
            } else {
                String unqualifiedTableName = tableName.contains(".") ? tableName.substring(tableName.indexOf(".") + 1)
                        : tableName;
                String schema = tableName.contains(".") ? tableName.substring(0, tableName.indexOf("."))
                        : dbc.getActivePGSchema();
                System.out.println("Found table \"" + unqualifiedTableName + "\" in schema " + schema + " in database "
                        + dbc.getDbURL() + ". Do you really want to drop it (y/n)?");
                BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
                String response = in.readLine().toLowerCase();
                while (!response.equals("y") && !response.equals("yes") && !response.equals("n")
                        && !response.equals("no")) {
                    System.out.println("Please specify y(es) or n(o).");
                    response = in.readLine().toLowerCase();
                }
                if (response.startsWith("y")) {
                    System.out.println("Dropping table \"" + unqualifiedTableName + "\" in Postgres schema \"" + schema
                            + "\" of database " + dbc.getDbURL());
                    dbc.dropTable(String.join(".", schema, unqualifiedTableName));
                } else {
                    System.out.println("User canceled. Aborting process.");
                }
            }
        } catch (IOException | SQLException e) {
            e.printStackTrace();
        }
    }

    /**
     * Poses question to the user and awaits for a yes or
     * no answer and returns it.
     *
     * @param question the question raised
     * @return the answer yes or no
     */
    private static String getYesNoAnswer(String question) {
        BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
        String input = "";
        try {
            while (!input.equals("yes") && !input.equals("no")) {
                System.out.println(question);
                input = br.readLine();
            }
        } catch (IOException e) {
            LOG.error("Something went wrong while reading from STDIN: ", e);
            System.exit(1);
        }
        return input;
    }

    private static boolean doStatus(DataBaseConnector dbc, String subsetTableName, boolean showHasErrors, boolean showIsProcessed, boolean showIsInProcess, boolean showTotal, boolean showLastComponent) {
        boolean error = false;
        try {
            if (subsetTableName == null) {
                LOG.error("You must provide the name of a subset table to display its status.");
                error = true;
            } else {
                EnumSet modes = EnumSet.noneOf(DataBaseConnector.StatusElement.class);
                if (showHasErrors) modes.add(DataBaseConnector.StatusElement.HAS_ERRORS);
                if (showIsProcessed) modes.add(DataBaseConnector.StatusElement.IS_PROCESSED);
                if (showIsInProcess) modes.add(DataBaseConnector.StatusElement.IN_PROCESS);
                if (showTotal) modes.add(DataBaseConnector.StatusElement.TOTAL);
                if (showLastComponent) modes.add(DataBaseConnector.StatusElement.LAST_COMPONENT);
                if (modes.isEmpty())
                    modes = EnumSet.allOf(DataBaseConnector.StatusElement.class);

                try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) {
                    SubsetStatus status = dbc.status(subsetTableName, modes);
                    System.out.println(status);
                }
            }
        } catch (TableSchemaDoesNotExistException e) {
            LOG.error(e.getMessage());
            error = true;
        } catch (TableNotFoundException e) {
            LOG.error(e.getMessage());
            e.printStackTrace();
        }
        return error;
    }

    private static boolean doSubset(DataBaseConnector dbc, String subsetTableName, String fileStr, String queryStr,
                                    String superTableName, String subsetJournalFileName, String subsetQuery, boolean mirrorSubset,
                                    String whereClause, boolean all4Subset, String randomSubsetSize, Integer numberRefHops)
            throws SQLException {
        String comment = "";
        boolean error;
        ArrayList ids = null;
        String condition = null;

        error = checkSchema(dbc, subsetTableName);
        if (!error) {
            if (subsetJournalFileName != null) {
                try {
                    ids = asList(subsetJournalFileName);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                if (ids.size() == 0) {
                    LOG.error(subsetJournalFileName + " is empty.");
                    error = true;
                }
                StringBuilder sb = new StringBuilder();
                for (String id : ids)
                    sb.append(", ").append(id);
                condition = Constants.NLM_ID_FIELD_NAME;
                comment = "Subset created " + new Date().toString() + " by matching with " + superTableName + " on "
                        + condition + ": " + sb.substring(2);
            } else if (subsetQuery != null) {
                logMessage("Querying PubMed for: " + subsetQuery);
                ids = QueryPubMed.query(subsetQuery);
                if (ids.size() == 0) {
                    LOG.error("No results for your query.");
                    error = true;
                } else
                    LOG.info("PubMed delivered " + ids.size() + " results.");
                condition = Constants.PMID_FIELD_NAME;
                comment = "Subset created " + new Date().toString() + " by matching with " + superTableName
                        + " on PubMed-query: " + subsetQuery;
            } else if (all4Subset) {
                logMessage("Creating subset by matching all entries from table " + superTableName + ".");
                comment = "Subset created " + new Date().toString() + " by matching with " + superTableName;
            } else if (whereClause != null) {
                comment = "Subset created " + new Date().toString() + " by selecting rows from " + superTableName
                        + " with where clause \"" + whereClause + "\"";
            } else if (randomSubsetSize != null) {
                try {
                    new Integer(randomSubsetSize);
                    comment = "Subset created " + new Date().toString() + " by randomly selecting " + randomSubsetSize
                            + " rows from " + superTableName + ".";
                } catch (NumberFormatException e) {
                    LOG.error(randomSubsetSize + " is not a number!");
                    error = true;
                }
            } else if (fileStr != null) {
                try {
                    ids = asList(fileStr);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                if (ids.size() == 0) {
                    LOG.error(fileStr + " is empty.");
                    error = true;
                }
                condition = dbc.getFieldConfiguration(dbc.getActiveTableSchema()).getPrimaryKey()[0];
                comment = "Subset created " + new Date().toString() + " by matching with " + superTableName + " on "
                        + ids.size() + " " + condition + "s;";
            } else if (mirrorSubset) {
                comment = "Subset created " + new Date().toString() + " as to mirror " + superTableName + ";";
            } else {
                error = true;
                LOG.error("You must choose a way to define the subset.");
            }

            comment = escapeSingleQuotes(comment);
        }
        if (!dbc.withConnectionQueryBoolean(c -> dbc.tableExists(superTableName))) {
            logMessage("Checking whether super table " + superTableName + " exists...");
            LOG.error("Table " + superTableName + " doesn't exist!");
            error = true;
        }
        if (!error) {
            try (CoStoSysConnection connPair = dbc.obtainOrReserveConnection()) {
                if (!dbc.tableExists(subsetTableName)) {
                    logMessage("No table with the name \"" + subsetTableName + "\" exists, creating new subset table...");
                    dbc.createSubsetTable(subsetTableName, superTableName, numberRefHops, comment);
                    logMessage("Created table " + subsetTableName);
                } else
                    LOG.error("Table " + subsetTableName + " allready exists.");
                if (dbc.isEmpty(subsetTableName) && !error) {
                    if (all4Subset)
                        dbc.initSubset(subsetTableName, superTableName);
                    else if (whereClause != null)
                        dbc.initSubsetWithWhereClause(subsetTableName, superTableName, whereClause);
                    else if (ids != null && ids.size() > 0)
                        dbc.initSubset(ids, subsetTableName, superTableName, condition);
                    else if (mirrorSubset)
                        dbc.initMirrorSubset(subsetTableName, superTableName, true);
                    else if (randomSubsetSize != null) {
                        dbc.initRandomSubset(new Integer(randomSubsetSize), subsetTableName, superTableName);
                    }
                    logMessage("Subset defined.");
                } else {
                    LOG.error(subsetTableName + " is not empty, please use another table.");
                    error = true;
                }
            }
        }
        return error;
    }

    private static boolean doImportOrUpdate(DataBaseConnector dbc, String fileStr, String queryStr,
                                            String superTableName, boolean updateMode) throws SQLException {
        boolean error = false;
        if (fileStr != null) {

            if (!dbc.withConnectionQueryBoolean(c -> c.tableExists(superTableName))) {
                error = checkSchema(dbc, superTableName);
                final String comment = "Data table created " + new Date().toString() + " by importing data from path " + fileStr;
                if (!error) {
                    dbc.withConnectionExecute(c -> c.createTable(superTableName, comment));
                    logMessage("Created table " + superTableName);

                }
            }

            if (dbc.withConnectionQueryBoolean(c -> c.isEmpty(superTableName)) && !updateMode) {
                dbc.withConnectionExecute(c -> c.importFromXMLFile(fileStr, superTableName));
            } else {
                logMessage("Table is not empty or update mode was explicitly specified, processing Updates.");
                dbc.withConnectionExecute(c -> c.updateFromXML(fileStr, superTableName));
                logMessage("Updates finished.");
            }
        } else {
            LOG.error("You must specify a file or directory to retrieve XML files from.");
            error = true;
        }
        return error;
    }

    private static boolean doQuery(DataBaseConnector dbc, QueryOptions qo) {
        boolean error = false;

        /**
         * The document IDs that should be returned (optional)
         */
        String queryStr = qo.queryStr;
        String fileStr = qo.fileStr;
        String tableName = qo.tableName;
        String tableSchema = qo.tableSchema;
        boolean useDelimiter = qo.useDelimiter;
        boolean pubmedArticleSet = qo.pubmedArticleSet;
        String xpath = qo.xpath;
        // this could be a directory or file name, depending on parameters
        String baseOutFile = qo.baseOutDirStr;
        String batchSizeStr = qo.batchSizeStr;
        String limitStr = qo.limitStr;
        Integer numberRefHops = qo.numberRefHops;

        // In the following algorithm, first of all each possible
        // parameter/resource is acquired. Further down is then one single
        // algorithm iterating over queried documents and treating them
        // accordingly to the parameters which have been found.
        File outfile = null;
        int batchSize = 0;
        BufferedWriter bw = null;
        boolean keysExplicitlyGiven = fileStr != null || queryStr != null;
        long limit = limitStr != null ? Integer.parseInt(limitStr) : -1;

        boolean createDirectory = baseOutFile != null && !pubmedArticleSet;
        if (verbose) {
            logMessage("Creating " + (createDirectory ? "directory" : "file") + " " + baseOutFile
                    + " to write query results to.");
        }

        if (createDirectory) {
            outfile = new File(baseOutFile);
            if (!outfile.exists()) {
                logMessage("Directory " + outfile.getAbsolutePath()
                        + " does not exist and will be created (as well as sub dircetories for file batches if required).");
                outfile.mkdir();
            }
            logMessage("Writing queried documents to " + outfile.getAbsolutePath());

            if (batchSizeStr != null) {
                try {
                    batchSize = Integer.parseInt(batchSizeStr);
                    logMessage("Dividing query result files in batches of " + batchSize);
                    if (batchSize < 1)
                        throw new NumberFormatException();
                } catch (NumberFormatException e) {
                    LOG.error(
                            "Error parsing \"{}\" into an integer. Please deliver a positive numeric value for the batch size of files.");
                }
            }
        }

        if (!error) {
            List keys = new ArrayList();
            if (fileStr != null) {
                try {
                    keys = asListOfArrays(fileStr);
                } catch (IOException e1) {
                    LOG.error("Could not open '" + new File(fileStr).getAbsolutePath() + "'.");
                    error = true;
                }
            }
            if (queryStr != null) {
                for (String pmid : queryStr.split(","))
                    keys.add(pmid.split(KEY_PART_SEPERATOR));
            }

            // Main algorithm iterating over documents.
            try {
                if (!error) {
                    Iterator it;
                    if (!keysExplicitlyGiven) {
                        it = dbc.querySubset(tableName, qo.whereClause, limit, numberRefHops, tableSchema);
                    } else if (keys.size() > 0)
                        it = dbc.retrieveColumnsByTableSchema(keys, tableName, tableSchema);
                    else
                        throw new IllegalStateException(
                                "No query keys have been explicitly given (e.g. in a file) nor should the whole table be queried.");
                    int i = 0;
                    // The name of the sub directories will just be their batch
                    // number. We start at -1 because the batchNumber will be
                    // incremented first of all (0 % x == 0, Ax).
                    int batchNumber = -1;
                    // outDir will be baseOutDir plus the current batch number
                    // of files when
                    // saving the queried files in separate batches is wished.
                    File outDir = outfile;

                    if (pubmedArticleSet) {
                        if (null != baseOutFile) {
                            logMessage(
                                    "Creating a single file with a PubmedArticleSet and writing it to " + baseOutFile);
                            bw = new BufferedWriter(new FileWriter(baseOutFile));
                        }
                        print("\n"
                                + "\n"
                                + "", bw);
                    }

                    while (it.hasNext()) {
                        byte[][] idAndXML = it.next();
                        if (outfile != null) {
                            // if we want batches, create appropriate
                            // subdirectories
                            if (batchSize > 0 && i % batchSize == 0) {
                                ++batchNumber;
                                // Adjust the sub directory for the new batch.
                                String subDirectoryName = (batchNumber > -1 && batchSize > 0
                                        ? Integer.toString(batchNumber) : "");
                                String subDirPath = outfile.getAbsolutePath() + FILE_SEPERATOR + subDirectoryName;
                                outDir = new File(subDirPath);
                                outDir.mkdir();
                            }

                            // Write the current file into the given directory
                            // and use the key as file name
                            String filename = new String(idAndXML[0]);

                            if (!pubmedArticleSet) {
                                if (bw != null)
                                    bw.close();
                                bw = new BufferedWriter(new FileWriter(outDir + FILE_SEPERATOR + filename));
                            }
                        }
                        if (xpath == null) {
                            StringBuilder sb = new StringBuilder();
                            if (pubmedArticleSet)
                                sb.append("\n");
                            sb.append(new String(idAndXML[1], "UTF-8"));
                            if (pubmedArticleSet)
                                sb.append("\n");
                            print(sb.toString(), bw);
                        } else {
                            // 'values' contains for each XPath delivered one
                            // array of Strings holding the values for this
                            // XPath (e.g. the AuthorList mostly yields several
                            // values).
                            String[][] values = getXpathValues(idAndXML[1], xpath);
                            for (String[] valuesOfXpath : values)
                                for (String singleValue : valuesOfXpath)
                                    print(singleValue, bw);
                        }
                        if (useDelimiter)
                            System.out.println(DELIMITER);
                        ++i;

                    }

                    if (pubmedArticleSet) {
                        print("", bw);
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            } catch (SQLException e) {
                e.printStackTrace();
            } finally {
                try {
                    if (bw != null)
                        bw.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

        return error;
    }

    /**
     * @param string
     * @param bw
     * @throws IOException
     */
    private static void print(String string, BufferedWriter bw) throws IOException {
        if (bw == null)
            System.out.println(string);
        else
            bw.write(string + "\n");
    }

    private static String[][] getXpathValues(byte[] next, String xpaths) {

        String[] xpathArray = xpaths.split(",");
        List> fields = new ArrayList>();
        for (String xpath : xpathArray) {
            Map field = new HashMap();
            field.put(JulieXMLConstants.NAME, xpath);
            field.put(JulieXMLConstants.XPATH, xpath);
            field.put(JulieXMLConstants.RETURN_XML_FRAGMENT, "true");
            field.put(JulieXMLConstants.RETURN_ARRAY, "true");
            fields.add(field);
        }

        String[][] retStrings = new String[xpathArray.length][];

        Iterator> it = JulieXMLTools.constructRowIterator(next, 1024, ".", fields, "your result");
        if (it.hasNext()) {
            Map row = it.next();
            for (int i = 0; i < xpathArray.length; i++) {
                // Get the field "xpath" which was given as field name above; we
                // wanted multiple results to be returned in an array.
                String[] values = (String[]) row.get(xpathArray[i]);
                if (values == null)
                    values = new String[]{"XPath " + xpaths + " does not exist in this document."};
                retStrings[i] = values;
            }
            if (it.hasNext()) {
                // What happened? We wanted all values in one array, so this
                // should not happen.
                LOG.warn(
                        "There are more results for the XPath {} then expected and not all have been returned. Please contact a developer for help.",
                        xpaths);
            }
        }
        return retStrings;
    }

    private static void printHelp(Options options) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(160);
        formatter.printHelp(CLI.class.getName(), options);
    }

    private static Options getOptions() {
        Options options = new Options();

        // -------------------- OptionGroup for available modes --------------
        OptionGroup modes = new OptionGroup();

        modes.addOption(buildOption("i", "import", "Import data into the _data table", "file/dir to import"));
        modes.addOption(buildOption("im", "importmedline", "Import PubMed/MEDLINE data into the _data table. The parameter is a XML file holding information about the PubMed/MEDLINE baseline location. It is the same file format used for the -um mode.", "XML MEDLINE config"));
        modes.addOption(buildOption("u", "update", "Update _data table", "file/dir to update from"));
        modes.addOption(buildOption("um", "updatemedline", "Update _data table from PubMed/MEDLINE update files. Keeps track of already applied update files via an internal table. The parameter is a XML file holding information about the update file location. It is the same file format used for the -im mode.", "XML MEDLINE config"));
        modes.addOption(buildOption("s", "subset",
                "Define a subset table; use -f, -o, -a, -m, -w or -r to specify the subsets source.",
                "name of the new subset table"));
        modes.addOption(buildOption("re", "reset",
                "Resets a subset table to a not-yet-processed state. Flags:\n" + "-np only reset non-processed items\n"
                        + "-ne only reset items without errors\n"
                        + "-lc to reset only those items with the given last component\n"
                        + "-f a partial reset can be achieved by specifying a file containing one primary key value for each document to be resetted",
                "subset table name"));
        modes.addOption(
                buildOption("st", "status", "Show the processing status of a subset table. Generates a small report containing the number of processed and total documents of a subset table. " +
                        "The report can be customized using the -he, -isp, -inp, -to and -slc switches", "subset table name"));

        OptionBuilder.withLongOpt("query");
        OptionBuilder.withDescription("Query a table (default: " + Constants.DEFAULT_DATA_TABLE_NAME
                + ") for XMLs. You can enter the primary keys directly or use -f to specify a file. If you define none of both, the whole table will be returned.\n"
                + "Use -d to display delimiters between the results.\n"
                + "Use -z to specify the target table. If the table is a subset, only documents in this subset will be returned.\n"
                + "Use -l to set a limit of returned documents.\n"
                + "Use -x to specify an XPath expression go extract specific parts of the queried XML documents.\n"
                + "Use -out to save the query results to file.");
        OptionBuilder.hasOptionalArg();
        OptionBuilder.withArgName("your query");
        modes.addOption(OptionBuilder.create("q"));

        modes.addOption(buildOption("h", "help", "Displays all possible parameters."));
        modes.addOption(buildOption("t", "tables", "Displays all tables in the active scheme."));

        modes.addOption(buildOption("td", "tabledefinition", "Displays the columns of a table.", "the table"));

        modes.addOption(buildOption("ds", "displayscheme", "Displays the active scheme."));
        modes.addOption(buildOption("ch", "check",
                "Checks if a table confirms to its definition (for subsets: only primary keys!)", "table"));
        modes.addOption(buildOption("dc", "defaultconfig", "Prints the defaultConfiguration."));
        modes.addOption(buildOption("dt", "droptable", "Drops the given table.", "table"));

        modes.addOption(buildOption("lts", "listtableschemas",
                "Displays all table schema names in the configuration. The showed name index can be used as value for the -ts option."));

        modes.setRequired(true);

        options.addOptionGroup(modes);

        // -------------------- OptionGroup for exclusive parameters--------------
        OptionGroup exclusive = new OptionGroup();

        exclusive.addOption(buildOption("f", "file",
                "Set the file used for query, subset creation or partial subset reset.", "file"));
        exclusive.addOption(buildOption("o", "online",
                "Defines the subset by a PubMed query - remember to wrap it in double quotation marks!", "query"));
        exclusive.addOption(buildOption("a", "all", "Use all entries of the _data table for the subset."));
        exclusive.addOption(buildOption("r", "random",
                "Generates a random subset, you must provide its size as a parameter. Often used with -z.", "size"));
        exclusive.addOption(buildOption("m", "mirror",
                "Creates a subset table which mirrors the database table. I.e. when the data table gets new records, the mirror subset(s) will be updated accordingly."));
        exclusive
                .addOption(buildOption("w", "where", "Uses a SQL WHERE clause during subset definition.", "condition"));
        exclusive.addOption(
                buildOption("j", "journals", "Define a subset by providing a file with journal names.", "file"));
        exclusive.addOption(
                buildOption("l", "limit", "For use with -q. Restricts the number of documents returned.", "limit"));


        options.addOption(buildOption("he", "has errors",
                "Flag for -st(atus) mode to add the 'has errors' statistic to a subset status report."));
        options.addOption(buildOption("isp", "is processed",
                "Flag for -st(atus) mode to add the 'is processed' statistic to a subset status report."));
        options.addOption(buildOption("inp", "is in process",
                "Flag for -st(atus) mode to add the 'is in process' statistic to a subset status report."));
        options.addOption(buildOption("to", "total",
                "Flag for -st(atus) mode to add the 'total' statistic to a subset status report."));
        options.addOption(buildOption("slc", "show last component",
                "Flag for -st(atus) mode to add the 'last component' statistic to a subset status report."));


        options.addOption(buildOption("np", "not processed",
                "Flag for -re(set) mode to restrict to non-processed table rows. May be combined with -ne, -lc."));
        options.addOption(buildOption("ne", "no errors",
                "Flag for -re(set) mode to restrict to table rows without errors. May be combined with -np, -lc."));
        options.addOption(buildOption("lc", "last component",
                "Option for -re(set) mode to restrict to table rows to a given last component identifier. May be combined with -np, -ne.",
                "component name"));

        options.addOptionGroup(exclusive);

        // --------------- optional details for many modes --------------
        options.addOption(buildOption("z", "superset",
                "Provides a superset name for definition of a subset or the name of a data table.",
                "name of the superset table"));
        options.addOption(buildOption("v", "verbose", "Activate verbose informational ouput of the tool's actions"));

        options.addOption(buildOption("d", "delimiter", "Display a line of \"-\" as delimiter between the results."));
        options.addOption(buildOption("pas", "pubmedarticleset",
                "For use with -q. The queried documents will be interpreted as Medline XML documents and will be enclosed in PubmedArticleSet."));
        options.addOption(buildOption("out", "out",
                "The file or directory where query results are written to. By default, a directory will be created and it will be filled with one file per document. The files will have the name of their database primary key. Modifying parameters:\n"
                        + "Use -bs to create subdirectories for batches of files.\n"
                        + "Use -pas to create no directory but a single XML file representing a PubmedArticleSet. This assumes that the queried documents are Medline or Pubmed XML documents.",
                "output directory"));
        options.addOption(buildOption("bs", "batchsize",
                "The number of queried documents (by -q and -out) which should be written in one directory. Subdirectories will be created at need.",
                "batchsize"));
        options.addOption(buildOption("x", "xpath",
                "When querying documents using -q, you may specify one or more XPath expressions to restrict the output to the elements referenced by your XPath expressions. Several XPaths must be delimited by a single comma.",
                "xpath"));
        options.addOption(buildOption("rh", "referencehops",
                "The maximum number of allowed hops to tables referenced with a foreign key when creating subset tables.",
                "max number of hops"));
        options.addOption(buildOption("ts", "tableschema",
                "Table Schema to use; currently only supported by -q mode. The name can be given or the index as retrieved by the -lts mode.",
                "schemaname"));

        // -------------------- authentication --------------------
        options.addOption(buildOption("U", "url",
                "URL to database server (e.g. jdbc:postgresql:///)", "url"));
        options.addOption(buildOption("n", "username", "username for database", "username"));
        options.addOption(buildOption("p", "pass", "password for database", "password"));
        options.addOption(buildOption("pgs", "pgschema", "Postgres Schema to use", "schema"));
        options.addOption(buildOption("srv", "server", "Server name to connect to", "servername"));
        options.addOption(buildOption("db", "database", "Database to connect to", "database"));
        options.addOption(buildOption("dbc", "databaseconfiguration",
                "XML file specifying the user configuration (defaults to dbcConfiguration.xml).", "Config File"));

        return options;
    }

    private static Option buildOption(String shortName, String longName, String description, String... arguments) {
        OptionBuilder.withLongOpt(longName);
        OptionBuilder.withDescription(description);
        OptionBuilder.hasArgs(arguments.length);
        for (String argument : arguments)
            OptionBuilder.withArgName(argument);
        return OptionBuilder.create(shortName);
    }

    /**
     * @param dbc       - databaseconnector
     * @param tableName - name of the table to check
     * @return true - if there was an error, otherwise false
     */
    private static boolean checkSchema(DataBaseConnector dbc, String tableName) {
        boolean error = false;
        String[] tablePath = tableName.split("\\.");
        // if the table name has the form 'schemaname.tablename'
        if (tablePath.length == 2 && !dbc.withConnectionQueryBoolean(c -> c.schemaExists(tablePath[0])))
            dbc.createSchema(tablePath[0]);
        else if (tablePath.length > 2) {
            LOG.error(String.format(
                    "The table path %s is invalid. Only table names of the form 'tablename' or 'schemaname.tablename'are accepted.",
                    tableName));

        }
        return error;
    }

    private static String escapeSingleQuotes(String comment) {
        return comment.replaceAll("'", "\\\\'");

    }

    private static List asListOfArrays(String fileStr) throws IOException {
        List list = new ArrayList();
        File file = new File(fileStr);
        if (file != null) {
            try (BufferedReader br = new BufferedReader(new FileReader(file))) {
                String line = br.readLine();
                while (line != null) {
                    list.add(line.split(KEY_PART_SEPERATOR));
                    line = br.readLine();
                }
            }
        }
        return list;
    }

    private static ArrayList asList(String fileStr) throws IOException {
        ArrayList list = new ArrayList();
        File file = new File(fileStr);
        if (file != null) {
            try (BufferedReader br = new BufferedReader(new FileReader(file))) {
                String line = br.readLine();
                while (line != null) {
                    list.add(line);
                    line = br.readLine();
                }
            }
        }
        return list;
    }

    private enum Mode {
        IMPORT, QUERY, SUBSET, RESET, STATUS, ERROR, TABLES, LIST_TABLE_SCHEMAS, TABLE_DEFINITION, SCHEME, CHECK, DEFAULT_CONFIG, DROP_TABLE, UPDATE_MEDLINE
    }

    public static XMLConfiguration loadXmlConfiguration(File configurationFile) throws ConfigurationException {
        try {
            Parameters params = new Parameters();
            FileBasedConfigurationBuilder configBuilder =
                    new FileBasedConfigurationBuilder<>(XMLConfiguration.class).configure(params
                            .xml()
                            .setFile(configurationFile));
            return configBuilder.getConfiguration();
        } catch (org.apache.commons.configuration2.ex.ConfigurationException e) {
            throw new ConfigurationException(e);
        }
    }
}