be.ugent.rml.cli.Main Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rmlmapper Show documentation
Show all versions of rmlmapper Show documentation
The RMLMapper executes RML rules to generate high quality Linked Data from multiple originally (semi-)structured data sources.
package be.ugent.rml.cli;
import be.ugent.rml.Executor;
import be.ugent.rml.Utils;
import be.ugent.rml.access.DatabaseType;
import be.ugent.rml.conformer.MappingConformer;
import be.ugent.rml.functions.FunctionLoader;
import be.ugent.rml.functions.lib.IDLabFunctions;
import be.ugent.rml.metadata.MetadataGenerator;
import be.ugent.rml.records.RecordsFactory;
import be.ugent.rml.store.QuadStore;
import be.ugent.rml.store.RDF4JStore;
import be.ugent.rml.store.SimpleQuadStore;
import be.ugent.rml.term.NamedNode;
import be.ugent.rml.term.Term;
import ch.qos.logback.classic.Level;
import org.apache.commons.cli.*;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import org.slf4j.MarkerFactory;
import java.io.*;
import java.time.Instant;
import java.util.*;
import java.util.stream.Collectors;
public class Main {
private static final Logger logger = LoggerFactory.getLogger(Main.class);
private static final Marker fatal = MarkerFactory.getMarker("FATAL");
public static void main(String[] args) {
main(args, System.getProperty("user.dir"));
}
/**
* Main method use for the CLI. Allows to also set the current working directory via the argument basePath.
*
* @param args the CLI arguments
* @param basePath the basePath used during the execution.
*/
public static void main(String[] args, String basePath) {
Options options = new Options();
Option mappingdocOption = Option.builder("m")
.longOpt("mappingfile")
.hasArg()
.numberOfArgs(Option.UNLIMITED_VALUES)
.desc("one or more mapping file paths and/or strings (multiple values are concatenated). " +
"r2rml is converted to rml if needed using the r2rml arguments.")
.build();
Option outputfileOption = Option.builder("o")
.longOpt("outputfile")
.hasArg()
.desc("path to output file (default: stdout)")
.build();
Option functionfileOption = Option.builder("f")
.longOpt("functionfile")
.hasArg()
.numberOfArgs(Option.UNLIMITED_VALUES)
.desc("one or more function file paths (dynamic functions with relative paths are found relative to the cwd)")
.build();
Option triplesmapsOption = Option.builder("t")
.longOpt("triplesmaps")
.hasArg()
.desc("IRIs of the triplesmaps that should be executed in order, split by ',' (default is all triplesmaps)")
.build();
Option removeduplicatesOption = Option.builder("d")
.longOpt("duplicates")
.desc("remove duplicates in the output")
.build();
Option configfileOption = Option.builder("c")
.longOpt("configfile")
.hasArg()
.desc("path to configuration file")
.build();
Option helpOption = Option.builder("h")
.longOpt("help")
.desc("show help info")
.build();
Option verboseOption = Option.builder("v")
.longOpt("verbose")
.desc("show more details in debugging output")
.build();
Option metadataOption = Option.builder("e")
.longOpt("metadatafile")
.hasArg()
.desc("path to output metadata file")
.build();
Option metadataDetailLevelOption = Option.builder("l")
.longOpt("metadataDetailLevel")
.hasArg()
.desc("generate metadata on given detail level (dataset - triple - term)")
.build();
Option serializationFormatOption = Option.builder("s")
.longOpt("serialization")
.desc("serialization format (nquads (default), turtle, trig, trix, jsonld, hdt)")
.hasArg()
.build();
Option jdbcDSNOption = Option.builder("dsn")
.longOpt("r2rml-jdbcDSN")
.desc("DSN of the database when using R2RML rules")
.hasArg()
.build();
Option passwordOption = Option.builder("p")
.longOpt("r2rml-password")
.desc("password of the database when using R2RML rules")
.hasArg()
.build();
Option usernameOption = Option.builder("u")
.longOpt("r2rml-username")
.desc("username of the database when using R2RML rules")
.hasArg()
.build();
options.addOption(mappingdocOption);
options.addOption(outputfileOption);
options.addOption(functionfileOption);
options.addOption(removeduplicatesOption);
options.addOption(triplesmapsOption);
options.addOption(configfileOption);
options.addOption(helpOption);
options.addOption(verboseOption);
options.addOption(serializationFormatOption);
options.addOption(metadataOption);
options.addOption(metadataDetailLevelOption);
options.addOption(jdbcDSNOption);
options.addOption(passwordOption);
options.addOption(usernameOption);
CommandLineParser parser = new DefaultParser();
try {
// parse the command line arguments
CommandLine lineArgs = parser.parse(options, args);
// Check if config file is given
Properties configFile = null;
if (lineArgs.hasOption("c")) {
configFile = new Properties();
configFile.load(Utils.getReaderFromLocation(lineArgs.getOptionValue("c")));
}
if (checkOptionPresence(helpOption, lineArgs, configFile)) {
printHelp(options);
return;
}
if (checkOptionPresence(verboseOption, lineArgs, configFile)) {
setLoggerLevel(Level.DEBUG);
} else {
setLoggerLevel(Level.ERROR);
}
String[] mOptionValue = getOptionValues(mappingdocOption, lineArgs, configFile);
if (mOptionValue == null) {
printHelp(options);
} else {
// Concatenate all mapping files
List lis = Arrays.stream(mOptionValue)
.map(Utils::getInputStreamFromFileOrContentString)
.collect(Collectors.toList());
InputStream is = new SequenceInputStream(Collections.enumeration(lis));
Map mappingOptions = new HashMap<>();
for (Option option : new Option[]{jdbcDSNOption, passwordOption, usernameOption}) {
if (checkOptionPresence(option, lineArgs, configFile)) {
mappingOptions.put(option.getLongOpt().replace("r2rml-", ""), getOptionValues(option, lineArgs, configFile)[0]);
}
}
// Read mapping file.
RDF4JStore rmlStore = new RDF4JStore();
rmlStore.read(is, null, RDFFormat.TURTLE);
// Convert mapping file to RML if needed.
MappingConformer conformer = new MappingConformer(rmlStore, mappingOptions);
try {
boolean conversionNeeded = conformer.conform();
if (conversionNeeded) {
logger.info("Conversion to RML was needed.");
}
} catch (Exception e) {
logger.error(fatal, "Failed to make mapping file conformant to RML spec.", e);
}
RecordsFactory factory = new RecordsFactory(basePath);
String outputFormat = getPriorityOptionValue(serializationFormatOption, lineArgs, configFile);
QuadStore outputStore = getStoreForFormat(outputFormat);
Executor executor;
// Extract required information and create the MetadataGenerator
MetadataGenerator metadataGenerator = null;
String metadataFile = getPriorityOptionValue(metadataOption, lineArgs, configFile);
String requestedDetailLevel = getPriorityOptionValue(metadataDetailLevelOption, lineArgs, configFile);
if (checkOptionPresence(metadataOption, lineArgs, configFile)) {
if (requestedDetailLevel != null) {
MetadataGenerator.DETAIL_LEVEL detailLevel;
switch (requestedDetailLevel) {
case "dataset":
detailLevel = MetadataGenerator.DETAIL_LEVEL.DATASET;
break;
case "triple":
detailLevel = MetadataGenerator.DETAIL_LEVEL.TRIPLE;
break;
case "term":
detailLevel = MetadataGenerator.DETAIL_LEVEL.TERM;
break;
default:
logger.error("Unknown metadata detail level option. Use the -h flag for more info.");
return;
}
QuadStore metadataStore = getStoreForFormat(outputFormat);
metadataGenerator = new MetadataGenerator(
detailLevel,
getPriorityOptionValue(metadataOption, lineArgs, configFile),
mOptionValue,
rmlStore,
metadataStore
);
} else {
logger.error("Please specify the detail level when requesting metadata generation. Use the -h flag for more info.");
}
}
String[] fOptionValue = getOptionValues(functionfileOption, lineArgs, configFile);
FunctionLoader functionLoader;
// Read function description files.
if (fOptionValue == null) {
functionLoader = new FunctionLoader();
} else {
logger.debug("Using custom path to functions.ttl file: " + Arrays.toString(fOptionValue));
RDF4JStore functionDescriptionTriples = new RDF4JStore();
functionDescriptionTriples.read(Utils.getInputStreamFromFile(Utils.getFile("functions_idlab.ttl")), null, RDFFormat.TURTLE);
Map libraryMap = new HashMap<>();
libraryMap.put("IDLabFunctions", IDLabFunctions.class);
List lisF = Arrays.stream(fOptionValue)
.map(Utils::getInputStreamFromFileOrContentString)
.collect(Collectors.toList());
for (int i = 0; i < lisF.size(); i++) {
functionDescriptionTriples.read(lisF.get(i), null, RDFFormat.TURTLE);
}
functionLoader = new FunctionLoader(functionDescriptionTriples, libraryMap);
}
// We have to get the. InputStreams of the RML documents again,
// // because we can only use an InputStream once
lis = Arrays.stream(mOptionValue)
.map(Utils::getInputStreamFromFileOrContentString)
.collect(Collectors.toList());
is = new SequenceInputStream(Collections.enumeration(lis));
executor = new Executor(rmlStore, factory, functionLoader, outputStore, Utils.getBaseDirectiveTurtle(is));
List triplesMaps = new ArrayList<>();
String tOptionValue = getPriorityOptionValue(triplesmapsOption, lineArgs, configFile);
if (tOptionValue != null) {
List triplesMapsIRI = Arrays.asList(tOptionValue.split(","));
triplesMapsIRI.forEach(iri -> {
triplesMaps.add(new NamedNode(iri));
});
}
if (metadataGenerator != null) {
metadataGenerator.preMappingGeneration(triplesMaps.isEmpty() ?
executor.getTriplesMaps() : triplesMaps, rmlStore);
}
// Get start timestamp for post mapping metadata
String startTimestamp = Instant.now().toString();
try {
QuadStore result = executor.execute(triplesMaps, checkOptionPresence(removeduplicatesOption, lineArgs, configFile),
metadataGenerator);
// Get stop timestamp for post mapping metadata
String stopTimestamp = Instant.now().toString();
// Generate post mapping metadata and output all metadata
if (metadataGenerator != null) {
metadataGenerator.postMappingGeneration(startTimestamp, stopTimestamp,
result);
writeOutput(metadataGenerator.getResult(), metadataFile, outputFormat);
}
String outputFile = getPriorityOptionValue(outputfileOption, lineArgs, configFile);
if (result.isEmpty()) {
logger.info("No results!");
// Write even if no results
}
result.copyNameSpaces(rmlStore);
writeOutput(result, outputFile, outputFormat);
} catch (Exception e) {
logger.error(e.getMessage());
}
}
} catch (ParseException exp) {
// oops, something went wrong
logger.error("Parsing failed. Reason: " + exp.getMessage());
printHelp(options);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
private static boolean checkOptionPresence(Option option, CommandLine lineArgs, Properties properties) {
return lineArgs.hasOption(option.getOpt()) || (properties != null
&& properties.getProperty(option.getLongOpt()) != null
&& !properties.getProperty(option.getLongOpt()).equals("false")); // ex: 'help = false' in the config file shouldn't return the help text
}
private static String getPriorityOptionValue(Option option, CommandLine lineArgs, Properties properties) {
if (lineArgs.hasOption(option.getOpt())) {
return lineArgs.getOptionValue(option.getOpt());
} else if (properties != null && properties.getProperty(option.getLongOpt()) != null) {
return properties.getProperty(option.getLongOpt());
} else {
return null;
}
}
private static String[] getOptionValues(Option option, CommandLine lineArgs, Properties properties) {
if (lineArgs.hasOption(option.getOpt())) {
return lineArgs.getOptionValues(option.getOpt());
} else if (properties != null && properties.getProperty(option.getLongOpt()) != null) {
return properties.getProperty(option.getLongOpt()).split(" ");
} else {
return null;
}
}
private static void printHelp(Options options) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("java -jar mapper.jar \noptions:", options);
}
private static void setLoggerLevel(Level level) {
Logger root = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME);
((ch.qos.logback.classic.Logger) root).setLevel(level);
}
private static void writeOutput(QuadStore store, String outputFile, String format) {
boolean hdt = format != null && format.equals("hdt");
if (hdt) {
try {
format = "nquads";
File tmpFile = File.createTempFile("file", ".nt");
tmpFile.deleteOnExit();
String uncompressedOutputFile = tmpFile.getAbsolutePath();
File nquadsFile = writeOutputUncompressed(store, uncompressedOutputFile, format);
Utils.ntriples2hdt(uncompressedOutputFile, outputFile);
nquadsFile.deleteOnExit();
} catch (IOException e) {
e.printStackTrace();
}
} else {
if (format != null) {
format = format.toLowerCase();
} else {
format = "nquads";
}
writeOutputUncompressed(store, outputFile, format);
}
}
private static File writeOutputUncompressed(QuadStore store, String outputFile, String format) {
File targetFile = null;
if (store.size() > 1) {
logger.info(store.size() + " quads were generated");
} else {
logger.info(store.size() + " quad was generated");
}
try {
Writer out;
String doneMessage = null;
//if output file provided, write to triples output file
if (outputFile != null) {
targetFile = new File(outputFile);
logger.info("Writing quads to " + targetFile.getPath() + "...");
if (!targetFile.isAbsolute()) {
targetFile = new File(System.getProperty("user.dir") + "/" + outputFile);
}
doneMessage = "Writing to " + targetFile.getPath() + " is done.";
out = new BufferedWriter(new FileWriter(targetFile));
} else {
out = new BufferedWriter(new OutputStreamWriter(System.out));
}
store.write(out, format);
out.close();
if (doneMessage != null) {
logger.info(doneMessage);
}
} catch (Exception e) {
System.err.println("Writing output failed. Reason: " + e.getMessage());
}
return targetFile;
}
private static QuadStore getStoreForFormat(String outputFormat) {
if (outputFormat == null || outputFormat.equals("nquads") || outputFormat.equals("hdt")) {
return new SimpleQuadStore();
} else {
return new RDF4JStore();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy