be.ugent.rml.cli.Main Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rmlmapper Show documentation
Show all versions of rmlmapper Show documentation
The RMLMapper executes RML rules to generate high quality Linked Data from multiple originally (semi-)structured data sources.
package be.ugent.rml.cli;
import be.ugent.idlab.knows.functions.agent.Agent;
import be.ugent.idlab.knows.functions.agent.AgentFactory;
import be.ugent.knows.idlabFunctions.IDLabFunctions;
import be.ugent.rml.Executor;
import be.ugent.rml.StrictMode;
import be.ugent.rml.Utils;
import be.ugent.rml.conformer.MappingConformer;
import be.ugent.rml.metadata.MetadataGenerator;
import be.ugent.rml.records.RecordsFactory;
import be.ugent.rml.store.Quad;
import be.ugent.rml.store.QuadStore;
import be.ugent.rml.store.RDF4JStore;
import be.ugent.rml.target.Target;
import be.ugent.rml.target.TargetFactory;
import be.ugent.rml.term.NamedNode;
import be.ugent.rml.term.Term;
import ch.qos.logback.classic.Level;
import org.apache.commons.cli.*;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import org.slf4j.MarkerFactory;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.time.Instant;
import java.util.*;
import java.util.stream.Collectors;
import static be.ugent.rml.StrictMode.BEST_EFFORT;
import static be.ugent.rml.StrictMode.STRICT;
public class Main {
private static final Logger logger = LoggerFactory.getLogger(Main.class);
private static final Marker fatal = MarkerFactory.getMarker("FATAL");
public static void main(String[] args) {
try {
run(args, System.getProperty("user.dir"));
} catch (Exception e) {
System.out.println(e);
System.exit(1);
}
}
public static void run(String[] args) throws Exception {
run(args, System.getProperty("user.dir"));
}
/**
* Main method use for the CLI. Allows to also set the current working directory
* via the argument basePath.
*
* @param args the CLI arguments
* @param basePath the basePath used during the execution.
*/
public static void run(String[] args, String basePath) throws Exception {
Options options = new Options();
Option mappingdocOption = Option.builder("m")
.longOpt("mappingfile")
.hasArg()
.numberOfArgs(Option.UNLIMITED_VALUES)
.desc("one or more mapping file paths and/or strings (multiple values are concatenated). " +
"r2rml is converted to rml if needed using the r2rml arguments."
+ "RDF Format is determined based on extension.")
.build();
Option privateSecurityDataOption = Option.builder("psd")
.longOpt("privatesecuritydata")
.hasArg()
.numberOfArgs(Option.UNLIMITED_VALUES)
.desc("one or more private security files containing all private security information such as " +
"usernames, passwords, certificates, etc.")
.build();
Option outputfileOption = Option.builder("o")
.longOpt("outputfile")
.hasArg()
.desc("path to output file (default: stdout)")
.build();
Option functionfileOption = Option.builder("f")
.longOpt("functionfile")
.hasArg()
.numberOfArgs(Option.UNLIMITED_VALUES)
.desc("one or more function file paths (dynamic functions with relative paths are found relative to the cwd)")
.build();
Option triplesmapsOption = Option.builder("t")
.longOpt("triplesmaps")
.hasArg()
.desc("IRIs of the triplesmaps that should be executed in order, split by ',' (default is all triplesmaps)")
.build();
Option removeduplicatesOption = Option.builder("d")
.longOpt("duplicates")
.desc("remove duplicates in the HDT, N-Triples, or N-Quads output")
.build();
Option configfileOption = Option.builder("c")
.longOpt("configfile")
.hasArg()
.desc("path to configuration file")
.build();
Option helpOption = Option.builder("h")
.longOpt("help")
.desc("show help info")
.build();
Option verboseOption = Option.builder("v")
.longOpt("verbose")
.desc("show more details in debugging output")
.build();
Option metadataOption = Option.builder("e")
.longOpt("metadatafile")
.hasArg()
.desc("path to output metadata file")
.build();
Option metadataDetailLevelOption = Option.builder("l")
.longOpt("metadataDetailLevel")
.hasArg()
.desc("generate metadata on given detail level (dataset - triple - term)")
.build();
Option serializationFormatOption = Option.builder("s")
.longOpt("serialization")
.desc("serialization format (nquads (default), turtle, trig, trix, jsonld, hdt)")
.hasArg()
.build();
Option jdbcDSNOption = Option.builder("dsn")
.longOpt("r2rml-jdbcDSN")
.desc("DSN of the database when using R2RML rules")
.hasArg()
.build();
Option passwordOption = Option.builder("p")
.longOpt("r2rml-password")
.desc("password of the database when using R2RML rules")
.hasArg()
.build();
Option usernameOption = Option.builder("u")
.longOpt("r2rml-username")
.desc("username of the database when using R2RML rules")
.hasArg()
.build();
Option strictModeOption = Option.builder()
.longOpt("strict")
.desc("Enable strict mode. In strict mode, the mapper will fail on invalid IRIs instead of skipping them.")
.build();
Option baseIriOption = Option.builder("b")
.longOpt("base-iri")
.desc("Base IRI used to expand relative IRIs in generated terms in the output.")
.hasArg()
.build();
Option provideOwnEOFMarkerOption = Option.builder()
.longOpt("disable-automatic-eof-marker")
.desc("Setting this option assumes input data has a kind of End-of-File marker. " +
"Don't use unless you're absolutely sure what you're doing!")
.build();
options.addOption(mappingdocOption);
options.addOption(privateSecurityDataOption);
options.addOption(outputfileOption);
options.addOption(functionfileOption);
options.addOption(removeduplicatesOption);
options.addOption(triplesmapsOption);
options.addOption(configfileOption);
options.addOption(helpOption);
options.addOption(verboseOption);
options.addOption(serializationFormatOption);
options.addOption(metadataOption);
options.addOption(metadataDetailLevelOption);
options.addOption(jdbcDSNOption);
options.addOption(passwordOption);
options.addOption(usernameOption);
options.addOption(strictModeOption);
options.addOption(baseIriOption);
options.addOption(provideOwnEOFMarkerOption);
CommandLineParser parser = new DefaultParser();
try {
// parse the command line arguments
CommandLine lineArgs = parser.parse(options, args);
// Check if config file is given
Properties configFile = null;
if (lineArgs.hasOption("c")) {
configFile = new Properties();
try (Reader reader = Utils.getReaderFromLocation(lineArgs.getOptionValue("c"))) {
configFile.load(reader);
}
}
if (checkOptionPresence(helpOption, lineArgs, configFile)) {
printHelp(options);
return;
}
if (checkOptionPresence(verboseOption, lineArgs, configFile)) {
setLoggerLevel(Level.DEBUG);
} else {
setLoggerLevel(Level.ERROR);
}
String[] mOptionValue = getOptionValues(mappingdocOption, lineArgs, configFile);
List lis = new ArrayList<>();
if (mOptionValue == null && System.console() != null) {
printHelp(options);
throw new IllegalArgumentException("No mapping file nor via stdin found!");
}
String outputFile = getPriorityOptionValue(outputfileOption, lineArgs, configFile);
// If output path exists and contains 'directory-like' characters
if (outputFile != null) {
// Windows paths 🤷♂️
outputFile = outputFile.replaceAll("\\\\", "/");
if (!Utils.checkPathParent(outputFile, null)) {
logger.error(fatal, "The given output path does not exist.");
throw new IllegalArgumentException("The given output path does not exist.");
}
}
if (mOptionValue != null) {
// Concatenate all mapping files
lis = Arrays.stream(mOptionValue)
.map(Utils::getInputStreamFromFileOrContentString)
.collect(Collectors.toList());
}
try {
BufferedInputStream bis = new BufferedInputStream(System.in);
int available = bis.available();
if (available > 0) {
// This little hack solves Maven tests: if the console is detached
// the normal System.in could send EOT bytes to indicate that there is no
// input.
// So we check if there are other bytes than the (EOT) / End of File (EOF) bytes: 04
byte[] firstBytes = new byte[32];
bis.mark(32);
int bytesRead = bis.read(firstBytes);
bis.reset();
if (bytesRead > 0) {
boolean addStream = false;
for (byte aByte : firstBytes) {
if (aByte != 0 && aByte != 4) { // 4 is the EOF / EOT byte
addStream = true;
break;
}
}
if (addStream) {
lis.add(bis);
}
}
}
} catch (IOException ex) {
logger.warn("Error trying to check System.in: {}", ex.getMessage());
// The inputstream is closed when read. Leads to IOExceptions for tests that don't provide their own inputstream
}
InputStream is = new SequenceInputStream(Collections.enumeration(lis));;
Map mappingOptions = new HashMap<>();
for (Option option : new Option[]{jdbcDSNOption, passwordOption, usernameOption}) {
if (checkOptionPresence(option, lineArgs, configFile)) {
mappingOptions.put(option.getLongOpt().replace("r2rml-", ""), getOptionValues(option, lineArgs, configFile)[0]);
}
}
// Read mapping file.
RDF4JStore rmlStore = new RDF4JStore();
try {
rmlStore.read(is, null, RDFFormat.TURTLE);
}
catch (RDFParseException e) {
logger.error(fatal, "Unable to parse mapping rules as Turtle. Does the file exist and is it valid Turtle?", e);
throw new IllegalArgumentException("Unable to parse mapping rules as Turtle. Does the file exist and is it valid Turtle?");
}
// Private security data is optionally
if (lineArgs.hasOption("psd")) {
// Read the private security data.
String[] mOptionValuePrivateSecurityData = getOptionValues(privateSecurityDataOption, lineArgs, configFile);
List lisPrivateSecurityData = Arrays.stream(mOptionValuePrivateSecurityData)
.map(Utils::getInputStreamFromFileOrContentString)
.collect(Collectors.toList());
try (InputStream isPrivateSecurityData = new SequenceInputStream(Collections.enumeration(lisPrivateSecurityData))) {
rmlStore.read(isPrivateSecurityData, null, RDFFormat.TURTLE);
} catch (RDFParseException e) {
logger.debug(e.getMessage());
logger.error(fatal, "Unable to parse private security data as Turtle. Does the file exist and is it valid Turtle?");
throw new IllegalArgumentException("Unable to parse private security data as Turtle. Does the file exist and is it valid Turtle?");
}
}
// Convert mapping file to RML if needed.
MappingConformer conformer = new MappingConformer(rmlStore, mappingOptions);
try {
boolean conversionNeeded = conformer.conform();
if (conversionNeeded) {
logger.info("Conversion to RML was needed.");
}
} catch (Exception e) {
logger.error(fatal, "Failed to make mapping file conformant to RML spec.", e);
}
RecordsFactory factory = new RecordsFactory(basePath);
String outputFormat = getPriorityOptionValue(serializationFormatOption, lineArgs, configFile);
QuadStore outputStore = getStoreForFormat(outputFormat);
Executor executor;
// Extract required information and create the MetadataGenerator
MetadataGenerator metadataGenerator = null;
String metadataFile = getPriorityOptionValue(metadataOption, lineArgs, configFile);
String requestedDetailLevel = getPriorityOptionValue(metadataDetailLevelOption, lineArgs, configFile);
if (checkOptionPresence(metadataOption, lineArgs, configFile)) {
if (requestedDetailLevel != null) {
MetadataGenerator.DETAIL_LEVEL detailLevel;
switch (requestedDetailLevel) {
case "dataset":
detailLevel = MetadataGenerator.DETAIL_LEVEL.DATASET;
break;
case "triple":
detailLevel = MetadataGenerator.DETAIL_LEVEL.TRIPLE;
break;
case "term":
detailLevel = MetadataGenerator.DETAIL_LEVEL.TERM;
break;
default:
logger.error("Unknown metadata detail level option. Use the -h flag for more info.");
return;
}
QuadStore metadataStore = getStoreForFormat(outputFormat);
metadataGenerator = new MetadataGenerator(
detailLevel,
getPriorityOptionValue(metadataOption, lineArgs, configFile),
mOptionValue,
rmlStore,
metadataStore
);
} else {
logger.error("Please specify the detail level when requesting metadata generation. Use the -h flag for more info.");
}
}
boolean strict = checkOptionPresence(strictModeOption, lineArgs, configFile);
StrictMode strictMode = strict ? STRICT : BEST_EFFORT;
// get the base IRI
String baseIRI = getPriorityOptionValue(baseIriOption, lineArgs, configFile);
if (baseIRI == null || baseIRI.isEmpty()) {
// if no explicit base IRI is set
if (strictMode.equals(STRICT)) {
throw new Exception("When running in strict mode, a base IRI argument must be set.");
} else {
if (mOptionValue != null) {
/*
* We have to get the InputStreams of the RML documents again,
* because we can only use an InputStream once
*/
lis = Arrays.stream(mOptionValue)
.map(Utils::getInputStreamFromFileOrContentString)
.collect(Collectors.toList());
}
// Best-effort mode, use the @base directive as a fallback
try (InputStream is2 = new SequenceInputStream(Collections.enumeration(lis))) {
baseIRI = Utils.getBaseDirectiveTurtle(is2);
}
}
}
String[] fOptionValue = getOptionValues(functionfileOption, lineArgs, configFile);
final Agent functionAgent;
List triplesMaps = new ArrayList<>();
String tOptionValue = getPriorityOptionValue(triplesmapsOption, lineArgs, configFile);
if (tOptionValue != null) {
List triplesMapsIRI = Arrays.asList(tOptionValue.split(","));
triplesMapsIRI.forEach(iri -> {
triplesMaps.add(new NamedNode(iri));
});
}
// Read function description files.
if (fOptionValue == null) {
// default initialisation with IDLab functions and GREL functions...
functionAgent = AgentFactory.createFromFnO(
"fno/functions_idlab.ttl", "fno/functions_idlab_classes_java_mapping.ttl",
"fno_idlab_old/functions_idlab.ttl", "fno_idlab_old/functions_idlab_classes_java_mapping.ttl",
"functions_grel.ttl",
"grel_java_mapping.ttl");
} else {
logger.debug("Using custom path to functions.ttl file: {}", Arrays.toString(fOptionValue));
String[] optionWithIDLabFunctionArgs = new String[fOptionValue.length + 4];
optionWithIDLabFunctionArgs[0] = "fno/functions_idlab.ttl" ;
optionWithIDLabFunctionArgs[1] = "fno/functions_idlab_classes_java_mapping.ttl" ;
optionWithIDLabFunctionArgs[2] = "fno_idlab_old/functions_idlab.ttl" ;
optionWithIDLabFunctionArgs[3] = "fno_idlab_old/functions_idlab_classes_java_mapping.ttl" ;
System.arraycopy(fOptionValue, 0, optionWithIDLabFunctionArgs, 4, fOptionValue.length);
functionAgent = AgentFactory.createFromFnO(optionWithIDLabFunctionArgs);
}
executor = new Executor(rmlStore, factory, outputStore, baseIRI, strictMode, functionAgent);
if (checkOptionPresence(provideOwnEOFMarkerOption, lineArgs, configFile)) {
logger.warn("Automatic EOF marker disabled!");
executor.setEOFProvidedInData();
}
executor.verifySources(basePath);
if (metadataGenerator != null) {
metadataGenerator.preMappingGeneration(triplesMaps.isEmpty() ?
executor.getTriplesMaps() : triplesMaps, rmlStore);
}
// Get start timestamp for post mapping metadata
String startTimestamp = Instant.now().toString();
QuadStore result = null;
try {
Map targets = executor.execute(triplesMaps, checkOptionPresence(removeduplicatesOption, lineArgs, configFile),
metadataGenerator);
} catch (Exception e) {
logger.error(e.getMessage());
throw e;
} finally {
functionAgent.close();
}
Map targets = executor.getTargets();
if (targets != null) {
result = targets.get(new NamedNode("rmlmapper://default.store"));
if(result != null) {
result.copyNameSpaces(rmlStore);
}
result.copyNameSpaces(rmlStore);
IDLabFunctions.saveState();
writeOutputTargets(targets, rmlStore, basePath, outputFile, outputFormat);
}
// Get stop timestamp for post mapping metadata
String stopTimestamp = Instant.now().toString();
// Generate post mapping metadata and output all metadata
if (metadataGenerator != null && targets != null) {
metadataGenerator.postMappingGeneration(startTimestamp, stopTimestamp,
result);
writeOutput(metadataGenerator.getResult(), metadataFile, outputFormat);
}
} catch (ParseException exp) {
// oops, something went wrong
logger.error("Parsing failed. Reason: {}", exp.getMessage());
printHelp(options);
} catch (IllegalArgumentException exp) {
throw exp;
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
private static void writeOutputTargets(Map targets, QuadStore rmlStore, String basePath, String outputFileDefault, String outputFormatDefault) throws Exception {
boolean hasNoResults = true;
logger.debug("Writing to Targets: {}", targets.keySet());
TargetFactory targetFactory = new TargetFactory(basePath);
// Go over each term and export to the Target if needed
for (Map.Entry termTargetMapping: targets.entrySet()) {
Term term = termTargetMapping.getKey();
QuadStore store = termTargetMapping.getValue();
if (store.size() > 0) {
hasNoResults = false;
logger.info("Target: {} has {} results", term, store.size());
}
/* Remove magic marker from output */
List quads = store.getQuads(null, null, null, null);
for (Quad q: quads) {
String subject = q.getSubject().toString();
String object = q.getObject().toString();
if (subject.contains(IDLabFunctions.MAGIC_MARKER_ENCODED)
|| subject.contains(IDLabFunctions.MAGIC_MARKER)
|| object.contains(IDLabFunctions.MAGIC_MARKER_ENCODED)
|| object.contains(IDLabFunctions.MAGIC_MARKER) ) {
store.removeQuads(q.getSubject(), q.getPredicate(), q.getObject(), q.getGraph());
}
}
// Default target is exported separately for backwards compatibility reasons
if (term.getValue().equals("rmlmapper://default.store")) {
logger.debug("Exporting to default Target");
writeOutput(store, outputFileDefault, outputFormatDefault);
}
else {
logger.debug("Exporting to Target: {}", term);
if (store.size() > 1) {
logger.info("{} quads were generated for {} Target", store.size(), term);
} else {
logger.info("{} quad was generated {} Target", store.size(), term);
}
Target target = targetFactory.getTarget(term, rmlStore, store);
String serializationFormat = target.getSerializationFormat();
OutputStream output = target.getOutputStream();
store.addQuads(target.getMetadata());
// Set character encoding
try (Writer out = new BufferedWriter(new OutputStreamWriter(output, Charset.defaultCharset()))) {
// Write store to target
store.write(out, serializationFormat);
}
// Close OS resources
target.close();
}
}
if (hasNoResults) {
logger.info("No results!");
}
}
private static boolean checkOptionPresence(Option option, CommandLine lineArgs, Properties properties) {
return (option.getOpt() != null && lineArgs.hasOption(option.getOpt()))
|| (option.getLongOpt() != null && lineArgs.hasOption(option.getLongOpt()))
|| (properties != null
&& properties.getProperty(option.getLongOpt()) != null
&& !properties.getProperty(option.getLongOpt()).equals("false")); // ex: 'help = false' in the config file shouldn't return the help text
}
private static String getPriorityOptionValue(Option option, CommandLine lineArgs, Properties properties) {
if (lineArgs.hasOption(option.getOpt())) {
return lineArgs.getOptionValue(option.getOpt());
} else if (properties != null && properties.getProperty(option.getLongOpt()) != null) {
return properties.getProperty(option.getLongOpt());
} else {
return null;
}
}
private static String[] getOptionValues(Option option, CommandLine lineArgs, Properties properties) {
if (lineArgs.hasOption(option.getOpt())) {
return lineArgs.getOptionValues(option.getOpt());
} else if (properties != null && properties.getProperty(option.getLongOpt()) != null) {
return properties.getProperty(option.getLongOpt()).split(" ");
} else {
return null;
}
}
private static void printHelp(Options options) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("java -jar mapper.jar \noptions:", options);
}
private static void setLoggerLevel(Level level) {
Logger root = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME);
((ch.qos.logback.classic.Logger) root).setLevel(level);
}
private static void writeOutput(QuadStore store, String outputFile, String format) {
boolean hdt = format != null && format.equals("hdt");
if (hdt) {
try {
format = "nquads";
File tmpFile = File.createTempFile("file", ".nt");
tmpFile.deleteOnExit();
String uncompressedOutputFile = tmpFile.getAbsolutePath();
File nquadsFile = writeOutputUncompressed(store, uncompressedOutputFile, format);
Utils.ntriples2hdt(uncompressedOutputFile, outputFile);
nquadsFile.deleteOnExit();
} catch (IOException e) {
e.printStackTrace();
}
} else {
if (format != null) {
format = format.toLowerCase();
} else {
format = "nquads";
}
writeOutputUncompressed(store, outputFile, format);
}
}
private static File writeOutputUncompressed(QuadStore store, String outputFile, String format) {
File targetFile = null;
if (store.size() > 1) {
logger.info("{} quads were generated for default Target", store.size());
} else {
logger.info("{} quad was generated for default Target", store.size());
}
Writer out = null;
try {
String doneMessage = null;
boolean isSystemOut = false;
//if output file provided, write to triples output file
if (outputFile != null) {
targetFile = new File(outputFile);
logger.info("Writing quads to {}...", targetFile.getPath());
if (!targetFile.isAbsolute()) {
targetFile = new File(System.getProperty("user.dir") + "/" + outputFile);
}
doneMessage = "Writing to " + targetFile.getPath() + " is done.";
out = Files.newBufferedWriter(targetFile.toPath(), StandardCharsets.UTF_8);
} else {
isSystemOut = true;
out = new BufferedWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
}
store.write(out, format);
if (isSystemOut) {
out.flush(); // flush System.out stream
out = null; // replace with null, so it won't be closed later;
}
if (doneMessage != null) {
logger.info(doneMessage);
}
} catch (Exception e) {
logger.error("Writing output failed. Reason: " + e.getMessage());
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
logger.error("Could not close writer. ", e);
}
}
}
return targetFile;
}
private static QuadStore getStoreForFormat(String outputFormat) {
return new RDF4JStore();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy