org.etlunit.io.file.FileDataImpl Maven / Gradle / Ivy
package org.etlunit.io.file;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.Semaphore;
class FileDataImpl implements DataFile.FileData
{
private final FlatFile flatFile;
private final List columns;
private BufferedReader bread;
Semaphore state = new Semaphore(1);
public FileDataImpl(FlatFile flatFile, List columns)
{
this.flatFile = flatFile;
if (columns == null)
{
this.columns = flatFile.getDataFileSchema().getColumnNames();
}
else
{
this.columns = columns;
}
// verify at least one column is selected
if (this.columns.size() == 0)
{
throw new IllegalArgumentException("At least one column must be specified in the select list");
}
}
@Override
public Iterator iterator() throws IOException
{
if (bread != null)
{
throw new IOException("One per costumer, please (I.E., don't call iterator() twice)");
}
bread = new BufferedReader(new FileReader(flatFile.getSource()), 16384);
Semaphore state = new Semaphore(1);
return new DataIterator();
}
@Override
public void dispose() throws IOException
{
if (state.availablePermits() == 1)
{
bread.close();
}
}
private class DataIterator implements Iterator
{
String nullToken;
String columnDelimiter;
String nextLine;
private final Map lineData;
private final Map publicLineData;
public DataIterator()
{
nullToken = flatFile.getDataFileSchema().getNullToken();
columnDelimiter = flatFile.getDataFileSchema().getColumnDelimiter();
nextLine = null;
lineData = new HashMap();
publicLineData = Collections.unmodifiableMap(lineData);
}
public boolean hasNext()
{
if (state.availablePermits() == 0)
{
return false;
}
if (nextLine != null)
{
return true;
}
while (state.availablePermits() == 1)
{
try
{
nextLine = FlatFile.readLine(bread, flatFile.getDataFileSchema().getRowDelimiter());
if (nextLine == null)
{
bread.close();
state.acquireUninterruptibly();
}
else
{
String trim = nextLine.trim();
if (!trim.equals("") && !(trim.startsWith("/*") && trim.endsWith("*/")) && !trim.startsWith("#"))
{
break;
}
}
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
return nextLine != null;
}
public DataFile.FileRow next()
{
if (nextLine == null)
{
throw new IllegalStateException("Next line does not exist");
}
// convert the line into columns
lineData.clear();
if (flatFile.getDataFileSchema() != null)
{
// defer mapping columns to the schema
lineData.putAll(flatFile.getDataFileSchema().validateAndSplitLine(nextLine));
}
else
{
String[] columns = nextLine.split(columnDelimiter);
for (int i = 0; i < columns.length; i++)
{
if (nullToken.equals(columns[i]))
{
lineData.put(flatFile.getDataFileSchema().getColumnNames().get(i), null);
}
else
{
lineData.put(flatFile.getDataFileSchema().getColumnNames().get(i), columns[i]);
}
}
}
if (lineData.size() != flatFile.getDataFileSchema().getColumnNames().size())
{
throw new IllegalStateException("Line has the wrong number of columns. Required["
+ flatFile.getDataFileSchema().getColumnNames().size()
+ "], actual["
+ lineData.size()
+ "] - source '"
+ nextLine
+ "'");
}
// remove unwanted columns - this is ugly but better than doing it twice above in two different handlers
Set> eSet = lineData.entrySet();
Iterator> eSi = eSet.iterator();
while (eSi.hasNext())
{
if (!columns.contains(eSi.next().getKey()))
{
// get rid of it
eSi.remove();
}
}
// this method also needs to know about columns
final OrderKey orderKey = FlatFile.addOrderKey(flatFile.getDataFileSchema(), (Map) lineData, columns);
nextLine = null;
return new DataFile.FileRow()
{
@Override
public Map getData()
{
return publicLineData;
}
@Override
public OrderKey getOrderKey()
{
return orderKey;
}
};
}
public void remove()
{
throw new UnsupportedOperationException();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy