be.ugent.rml.records.CSVW Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rmlmapper Show documentation
Show all versions of rmlmapper Show documentation
The RMLMapper executes RML rules to generate high quality Linked Data from multiple originally (semi-)structured data sources.
The newest version!
package be.ugent.rml.records;
import be.ugent.idlab.knows.dataio.access.Access;
import be.ugent.idlab.knows.dataio.iterators.CSVWSourceIterator;
import be.ugent.idlab.knows.dataio.iterators.csvw.CSVWConfiguration;
import be.ugent.idlab.knows.dataio.iterators.csvw.CSVWConfigurationBuilder;
import be.ugent.idlab.knows.dataio.record.Record;
import be.ugent.rml.NAMESPACES;
import be.ugent.rml.Utils;
import be.ugent.rml.store.QuadStore;
import be.ugent.rml.term.NamedNode;
import be.ugent.rml.term.Term;
import org.apache.commons.io.Charsets;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
/**
* This class has as main goal to create a CSVParser for a Logical Source with CSVW.
*/
public class CSVW {
private final QuadStore rmlStore;
private final Term logicalSource;
CSVW(QuadStore rmlStore, Term logicalSource) {
this.rmlStore = rmlStore;
this.logicalSource = logicalSource;
}
/**
* Read the records from the given Access
*
* @param access The access containing the records
* @return The list of records in the Access
*/
List getRecords(Access access) throws Exception {
List sources = Utils.getObjectsFromQuads(this.rmlStore.getQuads(this.logicalSource, new NamedNode(NAMESPACES.RML2 + "source"), null));
Term source = sources.get(0);
CSVWConfiguration config = getConfiguration(source);
List records = new ArrayList<>();
try (CSVWSourceIterator iterator = new CSVWSourceIterator(access, config)) {
iterator.forEachRemaining(records::add);
}
return records;
}
private CSVWConfiguration getConfiguration(Term logicalSource) {
CSVWConfigurationBuilder configBuilder = CSVWConfiguration.builder();
configBuilder = setOptionList(logicalSource, "null", configBuilder, CSVWConfigurationBuilder::withNulls);
// extract data from dialect
List dialectTerms = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.CSVW + "dialect"), null));
if (!dialectTerms.isEmpty()) {
Term dialect = dialectTerms.get(0);
configBuilder = setDialectOptions(dialect, configBuilder);
}
return configBuilder.build();
}
private CSVWConfigurationBuilder setDialectOptions(Term dialect, CSVWConfigurationBuilder configBuilder) {
configBuilder = setOptionString(dialect, "commentPrefix", configBuilder, CSVWConfigurationBuilder::withCommentPrefix);
configBuilder = setOptionChar(dialect, "delimiter", configBuilder, CSVWConfigurationBuilder::withDelimiter);
configBuilder = setOptionChar(dialect, "doubleQuote", configBuilder, CSVWConfigurationBuilder::withEscapeCharacter);
configBuilder = setOptionString(dialect, "trim", configBuilder, CSVWConfigurationBuilder::withTrim);
configBuilder = setOptionChar(dialect, "quoteChar", configBuilder, CSVWConfigurationBuilder::withQuoteCharacter);
configBuilder = setOptionCharset(dialect, "encoding", configBuilder, CSVWConfigurationBuilder::withEncoding);
return configBuilder;
}
/**
* Sets an option in CSVWConfigurationBuilder that expects a string
*
* @param term Term containing the dialect
* @param option option to read form dialect
* @param builder CSVWConfigurationBuilder to set the option in
* @param setter method of CSVWConfigurationBuilder to call
* @return a CSVWConfigurationBuilder with the option set if the option is present in the dialect, otherwise the original CSVWConfigurationBuilder is returned
*/
private CSVWConfigurationBuilder setOptionString(Term term, String option, CSVWConfigurationBuilder builder, StringOptionSetter setter) {
List optionTerms = Utils.getObjectsFromQuads(this.rmlStore.getQuads(term, new NamedNode(NAMESPACES.CSVW + option), null));
if (!optionTerms.isEmpty()) {
builder = setter.call(builder, optionTerms.get(0).getValue());
}
return builder;
}
private CSVWConfigurationBuilder setOptionCharset(Term dialect, CSVWConfigurationBuilder builder, CharsetOptionSetter setter) {
List optionTerms = Utils.getObjectsFromQuads(this.rmlStore.getQuads(dialect, new NamedNode(NAMESPACES.CSVW + "encoding"), null));
if (!optionTerms.isEmpty()) {
builder = setter.call(builder, Charset.forName(optionTerms.get(0).getValue()));
}
return builder;
}
/**
* Sets an option in CSVWConfigurationBuilder that expects a character
*
* @param term Term containing the option
* @param option option to read form term
* @param builder CSVWConfigurationBuilder to set the option in
* @param setter method of CSVWConfigurationBuilder to call
* @return a CSVWConfigurationBuilder with the option set if the option is present in the term, otherwise the original CSVWConfigurationBuilder is returned
*/
private CSVWConfigurationBuilder setOptionChar(Term term, String option, CSVWConfigurationBuilder builder, CharacterOptionSetter setter) {
List optionTerms = Utils.getObjectsFromQuads(this.rmlStore.getQuads(term, new NamedNode(NAMESPACES.CSVW + option), null));
if (!optionTerms.isEmpty()) {
builder = setter.call(builder, optionTerms.get(0).getValue().charAt(0));
}
return builder;
}
private CSVWConfigurationBuilder setOptionList(Term term, String option, CSVWConfigurationBuilder builder, ListOptionSetter setter) {
List optionTerms = Utils.getObjectsFromQuads(this.rmlStore.getQuads(term, new NamedNode(NAMESPACES.CSVW + option), null));
if (!optionTerms.isEmpty()) {
List nulls = optionTerms.stream().map(Term::getValue).collect(Collectors.toList());
builder = setter.call(builder, nulls);
}
return builder;
}
private CSVWConfigurationBuilder setOptionCharset(Term term, String option, CSVWConfigurationBuilder builder, CharsetOptionSetter setter) {
List optionTerms = Utils.getObjectsFromQuads(this.rmlStore.getQuads(term, new NamedNode(NAMESPACES.CSVW + option), null));
if (!optionTerms.isEmpty()) {
String charsetString = optionTerms.get(0).getValue();
Charset charset = Charsets.toCharset(charsetString);
builder = setter.call(builder, charset);
}
return builder;
}
/**
* Functional interface to set a value of CSVWConfigurationBuilder that expects a string.
*/
private interface StringOptionSetter {
CSVWConfigurationBuilder call(CSVWConfigurationBuilder builder, String value);
}
/**
* Functional interface to set a value of CSVWConfigurationBuilder that expects a character.
*/
private interface CharacterOptionSetter {
CSVWConfigurationBuilder call(CSVWConfigurationBuilder builder, Character value);
}
private interface ListOptionSetter {
CSVWConfigurationBuilder call(CSVWConfigurationBuilder builder, List values);
}
private interface CharsetOptionSetter {
CSVWConfigurationBuilder call(CSVWConfigurationBuilder builder, Charset charset);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy