be.ugent.rml.records.TabularSourceFactory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rmlmapper Show documentation
Show all versions of rmlmapper Show documentation
The RMLMapper executes RML rules to generate high quality Linked Data from multiple originally (semi-)structured data sources.
The newest version!
package be.ugent.rml.records;
import be.ugent.idlab.knows.dataio.access.Access;
import be.ugent.idlab.knows.dataio.iterators.CSVSourceIterator;
import be.ugent.idlab.knows.dataio.iterators.ExcelSourceIterator;
import be.ugent.idlab.knows.dataio.iterators.ODSSourceIterator;
import be.ugent.idlab.knows.dataio.record.Record;
import be.ugent.rml.store.QuadStore;
import be.ugent.rml.term.Term;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* This class is a record factory that creates CSV records.
*/
public class TabularSourceFactory implements ReferenceFormulationRecordFactory {
/**
* This method returns a list of CSV records for a data source.
*
* @param access the access from which records need to be fetched.
* @param logicalSource the used Logical Source.
* @param rmlStore the QuadStore with the RML rules.
* @return a list of records.
*/
@Override
public List getRecords(Access access, Term logicalSource, QuadStore rmlStore) throws Exception {
// We are not dealing with something like CSVW.
// Check for different spreadsheet formats
return switch (access.getContentType().toLowerCase()) {
case "text/csv" -> getRecordsForCSV(access);
case "text/csvw" -> getRecordsForCSVW(access, rmlStore, logicalSource);
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" -> getRecordsForExcel(access);
case "application/vnd.oasis.opendocument.spreadsheet" -> getRecordsForODT(access);
default ->
throw new IllegalArgumentException(String.format("Unrecognised content type: %s", access.getContentType()));
};
}
/**
* Get Sources for Excel file format.
*
* @param access Access to consume sources from
* @return a list of sources
*/
private List getRecordsForExcel(Access access) throws Exception {
List output = new ArrayList<>();
try (ExcelSourceIterator iterator = new ExcelSourceIterator(access)) {
iterator.forEachRemaining(output::add);
}
return output;
}
/**
* Get Sources for ODT file format.
*
* @param access Access to consume sources from
* @return a list of ODT sources
*/
private List getRecordsForODT(Access access) throws Exception {
List output = new ArrayList<>();
try (ODSSourceIterator iterator = new ODSSourceIterator(access)) {
iterator.forEachRemaining(output::add);
}
return output;
}
/**
* This method returns a CSVParser from a simple access (local/remote CSV file; no CSVW).
*
* @param access the used access.
* @return a List of Records.
* @throws IOException
*/
private List getRecordsForCSV(Access access) throws Exception {
try (CSVSourceIterator iterator = new CSVSourceIterator(access)) {
List results = new ArrayList<>();
iterator.forEachRemaining(results::add);
return results;
}
}
private List getRecordsForCSVW(Access access, QuadStore rmlStore, Term logicalSource) throws Exception {
CSVW csvw = new CSVW(rmlStore, logicalSource);
return csvw.getRecords(access);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy