de.sekmi.histream.etl.ETLObservationSupplier Maven / Gradle / Ivy
package de.sekmi.histream.etl;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.bind.JAXB;
import de.sekmi.histream.Observation;
import de.sekmi.histream.ObservationFactory;
import de.sekmi.histream.ObservationSupplier;
import de.sekmi.histream.etl.config.DataSource;
import de.sekmi.histream.etl.config.EavTable;
import de.sekmi.histream.etl.config.Meta;
import de.sekmi.histream.etl.config.PatientTable;
import de.sekmi.histream.etl.config.VisitTable;
import de.sekmi.histream.etl.config.WideTable;
import de.sekmi.histream.ext.Patient;
import de.sekmi.histream.ext.Visit;
import de.sekmi.histream.impl.ObservationFactoryImpl;
import de.sekmi.histream.impl.SimplePatientExtension;
import de.sekmi.histream.impl.SimpleVisitExtension;
/**
* Supplier for observations which are loaded from arbitrary
* table data.
*
* Algorithm
*
* - read first patient and first visit. -> currentPatient,
* currentVisit
*
* - For each concept table (including patient and visit tables):
* read first row, add all concepts from first row to concept queue,
* sort concept queue by patid, visitid, start
*
* - process/remove all concepts with currentPatient and
* currentVisit
*
* - if all concepts from one concept table are removed,
* fetch next row from that table, add concepts to queue and sort.
* Go to 3.
*
* - if no more concepts for and currentVisit
* are in queue, fetch next visit. Go to 3.
*
* - if no more concepts for currentPatient are in queue,
* fetch next patient. Go to 3.
*
* - if queue empty (no more patient and visit) then
* done.
*
*
*
* @author marap1
*
*/
public class ETLObservationSupplier implements ObservationSupplier{
private PatientTable pt;
private VisitTable vt;
private List wt;
private List et;
private RecordSupplier pr;
private RecordSupplier vr;
private List> fr;
private FactGroupingQueue queue;
private DataSource ds;
/**
* Build a new observation supplier with the supplied configuration file.
* Relative URLs within the configuration are resolved against the provided configuration url.
*
* @param configuration location configuration file
* @param factory observation factory
* @return observation supplier
*
* @throws IOException error reading configuration or data tables.
* @throws ParseException configuration error
*
*/
public static ETLObservationSupplier load(URL configuration, ObservationFactory factory) throws IOException, ParseException{
DataSource ds = JAXB.unmarshal(configuration, DataSource.class);
ds.getMeta().setLocation(configuration);
return new ETLObservationSupplier(ds, factory);
}
/**
* Same as {@link #load(URL, ObservationFactory)} with using a default observation factory.
* The default observation factory will only support Patient and Visit extensions.
*
* @param configuration configuration URL
* @return observation factory
* @throws IOException error reading configuration or table data
* @throws ParseException configuration error
*/
public static ETLObservationSupplier load(URL configuration) throws IOException, ParseException{
ObservationFactory of = new ObservationFactoryImpl();
of.registerExtension(new SimplePatientExtension());
of.registerExtension(new SimpleVisitExtension());
return load(configuration, of);
}
/**
* Construct a new observation supplier directly from a {@link DataSource}.
*
* @param ds data source
* @param factory observation factory
* @throws IOException error reading configuration or table data
* @throws ParseException configuration error
*/
public ETLObservationSupplier(DataSource ds, ObservationFactory factory) throws IOException, ParseException {
this.ds = ds;
pt = ds.getPatientTable();
vt = ds.getVisitTable();
wt = ds.getWideTables();
et = ds.getEavTables();
// TODO long tables
Meta meta = ds.getMeta();
// in case of exception, make sure already opened suppliers are closed
Exception error = null;
try{
pr = pt.open(factory, meta);
vr = vt.open(factory, meta);
queue = new FactGroupingQueue(pr, vr,
factory.getExtensionAccessor(Patient.class),
factory.getExtensionAccessor(Visit.class));
// open all tables
fr = new ArrayList<>(wt.size());
for( WideTable t : wt ){
//@SuppressWarnings("resource")
RecordSupplier s = t.open(factory, meta);
queue.addFactTable(s);
fr.add(s);
}
for( EavTable t : et ){
RecordSupplier s = t.open(factory, meta);
queue.addFactTable(s);
fr.add(s);
}
queue.prepare();
}catch( UncheckedIOException e ){
error = e.getCause();
}catch( UncheckedParseException e ){
error = e.getCause();
}catch( ParseException | IOException e ){
error = e;
}
if( error != null ){
try{
this.close();
}catch( IOException f ){
error.addSuppressed(f);
}
if( error instanceof ParseException ){
throw (ParseException)error;
}else{
throw (IOException)error;
}
}
}
@Override
public Observation get() {
return queue.next();
}
@Override
public void close() throws IOException {
IOException error = null;
if( pr != null ){
try{ pr.close(); }
catch( IOException e ){ error = e; }
pr=null;
}
if( vr != null ){
try{ vr.close(); }
catch( IOException e ){
if( error != null )error.addSuppressed(e);
else error = e;
}
vr=null;
}
if( fr != null ){
Iterator> i = fr.iterator();
while( i.hasNext() ){
try{ i.next().close(); }
catch( IOException e ){
if( error != null )error.addSuppressed(e);
else error = e;
}
i.remove();
}
}
if( error != null )throw error;
}
@Override
public String getMeta(String key) {
switch( key ){
case ObservationSupplier.META_ETL_STRATEGY:
return ds.getMeta().getETLStrategy();
case ObservationSupplier.META_SOURCE_ID:
return ds.getMeta().getSourceId();
case ObservationSupplier.META_ORDER_GROUPED:
return "true";
default:
return null;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy