org.etlunit.io.file.FlatFileSchema Maven / Gradle / Ivy
package org.etlunit.io.file;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.ArrayNode;
import org.etlunit.json.validator.ClasspathSchemaResolver;
import org.etlunit.json.validator.JsonSchemaValidationException;
import org.etlunit.json.validator.JsonUtils;
import org.etlunit.json.validator.JsonValidator;
import org.etlunit.parser.ETLTestParser;
import org.etlunit.parser.ETLTestValueObject;
import org.etlunit.parser.ParseException;
import org.etlunit.util.IOUtils;
import org.etlunit.util.JSonBuilderProxy;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.*;
import java.util.regex.Pattern;
class FlatFileSchema implements DataFileSchema
{
private static final Map typePatterns = new HashMap();
private final String id;
private String rowDelimiter;
private String columnDelimiter;
private String nullToken;
private final List columns = new ArrayList();
private final List columnNames = new ArrayList();
private final List orderColumns = new ArrayList();
private final List orderColumnNames = new ArrayList();
private final List keyColumns = new ArrayList();
private final List keyColumnNames = new ArrayList();
private format_type formatType;
private final DataFileManager dataFileManager;
private int lineLength = -1;
public FlatFileSchema(
String resourceId,
format_type format,
String rowDelimiter,
String columnDelimiter,
String nullDelimiter,
DataFileManager dataFileManager
)
{
this.dataFileManager = dataFileManager;
formatType = format;
id = resourceId;
this.rowDelimiter = rowDelimiter;
this.columnDelimiter = columnDelimiter;
this.nullToken = nullDelimiter;
validateInternal();
}
public FlatFileSchema(
JsonNode ffmlSch,
String resourceId,
DataFileManager dataFileManager
)
{
this.dataFileManager = dataFileManager;
id = resourceId;
rowDelimiter = ffmlSch.get("row-delimiter").asText();
String form = ffmlSch.get("format-type").asText();
if (form.equals("delimited"))
{
formatType = format_type.delimited;
}
else if (form.equals("fixed"))
{
formatType = format_type.fixed;
}
else
{
throw new IllegalArgumentException("Bad format type. Please test before making changes to the schema.");
}
if (ffmlSch.has("column-delimiter"))
{
JsonNode jsonNodes = ffmlSch.get("column-delimiter");
if (!jsonNodes.isNull())
{
columnDelimiter = jsonNodes.asText();
}
else
{
columnDelimiter = null;
}
}
else
{
columnDelimiter = null;
}
if (ffmlSch.has("null-token"))
{
JsonNode nDem = ffmlSch.get("null-token");
if (nDem.isNull())
{
nullToken = "null";
}
else
{
nullToken = nDem.asText();
}
}
else
{
nullToken = dataFileManager.getDefaultNullToken();
}
validateInternal();
ArrayNode anode = (ArrayNode) ffmlSch.get("columns");
for (int i = 0; i < anode.size(); i++)
{
JsonNode node = anode.get(i);
SchemaColumn schemaColumn = new SchemaColumn(node, dataFileManager);
addColumn(schemaColumn);
}
anode = (ArrayNode) ffmlSch.get("orderBy");
if (anode != null)
{
for (int i = 0; i < anode.size(); i++)
{
JsonNode node = anode.get(i);
// this will fail if the column does not exist
addOrderColumn(node.asText());
}
}
else
{
// default is to order by all columns
orderColumns.addAll(columns);
orderColumnNames.addAll(columnNames);
}
anode = (ArrayNode) ffmlSch.get("primaryKey");
if (anode != null)
{
for (int i = 0; i < anode.size(); i++)
{
JsonNode node = anode.get(i);
// this will fail if the column does not exist
addKeyColumn(node.asText());
}
}
else
{
// default is no key . . .
}
}
private void validateInternal()
{
if (columnDelimiter != null && formatType == format_type.fixed)
{
throw new IllegalArgumentException("Fixed-width files do not have column delimiters");
}
if (columnDelimiter == null && formatType == format_type.delimited)
{
throw new IllegalArgumentException("Delimited files must have column delimiters");
}
}
public String getId()
{
return id;
}
@Override
public List getOrderColumns()
{
return orderColumns.size() > 0 ? orderColumns : getColumns();
}
public String getRowDelimiter()
{
return rowDelimiter;
}
public format_type getFormatType()
{
return formatType;
}
public Column createColumn(String id)
{
return new SchemaColumn(id, null, dataFileManager);
}
@Override
public void addColumn(Column column)
{
if (columnNames.contains(column.getId()))
{
throw new IllegalArgumentException("Column already added: " + column.getId());
}
// validate that if this is a fixed file, lengths are provided, and not provided for delimited files
if (formatType == format_type.fixed)
{
if (column.getLength() == -1)
{
throw new IllegalArgumentException("Columns added to flat files must have a length provided");
}
else
{
int size = columns.size();
if (size == 0)
{
column.setOffset(0);
lineLength = 0;
}
else
{
Column lastCol = columns.get(size - 1);
column.setOffset(lastCol.getOffset() + lastCol.getLength());
}
lineLength += column.getLength();
}
}
columns.add(column);
columnNames.add(column.getId());
}
@Override
public void addKeyColumn(String name)
{
if (keyColumnNames.contains(name))
{
throw new IllegalArgumentException("Column already added to primary key: " + name);
}
Column col = getColumn(name);
keyColumns.add(col);
keyColumnNames.add(col.getId());
}
@Override
public void addOrderColumn(String name)
{
if (orderColumnNames.contains(name))
{
throw new IllegalArgumentException("Column already added to order clause: " + name);
}
Column col = getColumn(name);
orderColumns.add(col);
orderColumnNames.add(col.getId());
}
@Override
public void setKeyColumns(List names)
{
keyColumnNames.clear();
keyColumns.clear();
for (String col : names)
{
addKeyColumn(col);
}
}
@Override
public void setOrderColumns(List names)
{
orderColumnNames.clear();
orderColumns.clear();
for (String col : names)
{
addOrderColumn(col);
}
}
public String getColumnDelimiter()
{
return columnDelimiter;
}
public List getColumns()
{
return columns;
}
@Override
public Column getColumn(String name)
{
for (Column sch : columns)
{
if (sch.getId().equals(name))
{
return sch;
}
}
throw new IllegalArgumentException("Column [" + name + "] not found");
}
public static FlatFileSchema loadFromFile(File schemaPath, String resourceId, DataFileManager dataFileManager)
{
try
{
return loadFromString(IOUtils.readFileToString(schemaPath), resourceId, dataFileManager);
}
catch (IOException e)
{
throw new IllegalArgumentException(e);
}
}
private static FlatFileSchema validate(JsonNode instance, String resourceId, DataFileManager dataFileManager)
{
try
{
JsonValidator
vlad =
new JsonValidator("org/etlunit/io/file/ffml/ffml.jsonSchema",
new ClasspathSchemaResolver(FlatFileSchema.class));
vlad.validate(instance);
return new FlatFileSchema(instance.get("flat-file"), resourceId, dataFileManager);
}
catch (JsonSchemaValidationException e)
{
throw new IllegalArgumentException(e);
}
}
public static FlatFileSchema loadFromString(String schema, String resourceId, DataFileManager dataFileManager)
{
try
{
return validate(JsonUtils.loadJson(schema), resourceId, dataFileManager);
}
catch (JsonSchemaValidationException e)
{
throw new IllegalArgumentException(e);
}
}
public static FlatFileSchema loadFromResource(String resourceId, DataFileManager dataFileManager)
{
return loadFromResource(resourceId, FlatFileSchema.class.getClassLoader(), dataFileManager);
}
public static FlatFileSchema loadFromResource(String resourceId, ClassLoader classLoader, DataFileManager dataFileManager)
{
try
{
return validate(getJsonNode(resourceId, classLoader), resourceId, dataFileManager);
}
catch (ParseException e)
{
throw new IllegalArgumentException(e);
}
}
private static JsonNode getJsonNode(String resourceId, ClassLoader classLoader) throws ParseException
{
return getNode(resourceId, classLoader).getJsonNode();
}
private static ETLTestValueObject getNode(String resourceId, ClassLoader classLoader) throws ParseException
{
String ffml = ClasspathSchemaResolver.resolveClasspath(resourceId, classLoader);
ETLTestValueObject instance = ETLTestParser.loadObject(ffml);
ETLTestValueObject extendsQ = instance.query("flat-file.extends");
if (extendsQ != null)
{
// resolve the extends uri and merge
// load the l-side (base)
ETLTestValueObject extended = getNode(extendsQ.getValueAsString(), classLoader);
// merge from instance >> extended
instance = instance.merge(extended, ETLTestValueObject.merge_type.left_merge);
}
return instance;
}
public Map validateAndSplitLine(String line)
{
Map map = new HashMap();
List colData = null;
switch (formatType)
{
case delimited:
// split on the delimiter
String search = Pattern.quote(columnDelimiter);
colData = Arrays.asList(line.split(search, -1));
break;
case fixed:
if (line.length() != lineLength)
{
throw new IllegalArgumentException("Illegal line - incorrect length. Found["
+ line.length()
+ "] required ["
+ lineLength
+ "]: "
+ line);
}
colData = new ArrayList();
for (int i = 0; i < columns.size(); i++)
{
Column schemaCol = columns.get(i);
int offset = schemaCol.getOffset();
int endIndex = offset + schemaCol.getLength();
if (line.length() < endIndex)
{
throw new IllegalArgumentException("Illegal line - length too short: " + line);
}
String colText = line.substring(offset, endIndex);
colData.add(colText);
}
break;
}
if (colData.size() != columns.size())
{
throw new IllegalArgumentException("Line does not have the correct number of columns: expected["
+ columns.size()
+ "], actual["
+ colData.size()
+ "] "
+ line);
}
for (int colNo = 0; colNo < colData.size(); colNo++)
{
String token = colData.get(colNo);
Column schemaCol = getColumns().get(colNo);
if (nullToken.equals(token))
{
map.put(schemaCol.getId(), null);
}
else
{
schemaCol.validateText(token);
map.put(schemaCol.getId(), token);
}
}
return map;
}
public static FlatFileType resolveValidatorForType(String type)
{
if (typePatterns.size() == 0)
{
try
{
Enumeration enume = FlatFileSchema.class.getClassLoader().getResources("reference/ffml/types.ffml");
while (enume.hasMoreElements())
{
URL url = enume.nextElement();
String str = IOUtils.readURLToString(url);
typePatterns.putAll(FlatFileType.load(str));
}
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
return typePatterns.get(type);
}
@Override
public DataFileSchema createSubViewExcludingColumns(List columns, String id, format_type format)
{
List incColumns = new ArrayList(getColumnNames());
Iterator it = incColumns.iterator();
while (it.hasNext())
{
if (columns.contains(it.next()))
{
it.remove();
}
}
return createSubViewIncludingColumns(incColumns, id, format);
}
@Override
public DataFileSchema createSubViewExcludingColumns(List columns, String id)
{
return createSubViewExcludingColumns(columns, id, formatType);
}
public DataFileSchema createSubViewIncludingColumns(
List columns,
String id
)
{
return createSubViewIncludingColumns(columns, id, formatType);
}
public DataFileSchema createSubViewIncludingColumns
(
List columns,
String id,
format_type format
)
{
String delim = this.columnDelimiter;
switch (formatType)
{
case delimited:
switch (format)
{
case fixed:
delim = null;
break;
}
break;
case fixed:
switch (format)
{
case delimited:
delim = dataFileManager.getDefaultColumnDelimiter();
break;
}
break;
}
FlatFileSchema newSchema = new FlatFileSchema(
id != null ? id : this.id,
format != null ? format : this.formatType,
rowDelimiter != null ? rowDelimiter : this.rowDelimiter,
delim,
nullToken,
dataFileManager
);
if (columns != null)
{
for (Column col : getColumns())
{
if (columns.contains(col.getId()))
{
newSchema.columns.add(col);
newSchema.columnNames.add(col.getId());
if (orderColumns.contains(col))
{
newSchema.orderColumns.add(col);
newSchema.orderColumnNames.add(col.getId());
}
if (keyColumns.contains(col))
{
newSchema.keyColumns.add(col);
newSchema.keyColumnNames.add(col.getId());
}
}
}
if (newSchema.columns.size() != columns.size())
{
throw new IllegalArgumentException("Unmatched columns in view");
}
}
else
{
newSchema.columns.addAll(this.columns);
newSchema.columnNames.addAll(this.columnNames);
newSchema.orderColumns.addAll(orderColumns);
newSchema.orderColumnNames.addAll(orderColumnNames);
newSchema.keyColumns.addAll(keyColumns);
newSchema.keyColumnNames.addAll(keyColumnNames);
}
return newSchema;
}
@Override
public List getColumnNames()
{
return columnNames;
}
@Override
public List getOrderColumnNames()
{
return orderColumnNames.size() > 0 ? orderColumnNames : getColumnNames();
}
@Override
public List getKeyColumns()
{
return keyColumns;
}
@Override
public List getKeyColumnNames()
{
return keyColumnNames;
}
public String toJsonString()
{
JSonBuilderProxy jprocs = new JSonBuilderProxy()
.object()
.key("flat-file")
.object()
.key("format-type")
.value("delimited")
.key("row-delimiter")
.value(getRowDelimiter())
.key("column-delimiter")
.value(getColumnDelimiter())
.key("null-token")
.value(getNullToken())
.key("columns")
.array();
for (Column col : getColumns())
{
jprocs =
jprocs.object()
.key("id")
.value(col.getId())
.key("type")
.value(col.getType())
.key("length")
.value(col.getLength())
.key("basic-type")
.value(col.getBasicType().name())
.endObject();
}
jprocs = jprocs.endArray();
if (getKeyColumnNames().size() != 0)
{
jprocs = jprocs.key("primaryKey").value(getKeyColumnNames());
}
if (getOrderColumnNames().size() != 0)
{
jprocs = jprocs.key("orderBy").value(getOrderColumnNames());
}
jprocs = jprocs.endObject().endObject();
// load into Jackson and pretty-print
try
{
return JsonUtils.printJson(JsonUtils.loadJson(jprocs.toString()));
}
catch (JsonSchemaValidationException e)
{
throw new RuntimeException(e);
}
}
public String getNullToken()
{
return nullToken;
}
public void setColumnDelimiter(String columnDelimiter)
{
if (formatType == format_type.fixed)
{
throw new IllegalArgumentException("Fixed-width files do not have column delimiters");
}
this.columnDelimiter = columnDelimiter;
}
public void setRowDelimiter(String rowDelimiter)
{
this.rowDelimiter = rowDelimiter;
}
public void setNullToken(String nullToken)
{
this.nullToken = nullToken;
}
@Override
public void setFormatType(format_type type)
{
if (formatType != type)
{
if (formatType == format_type.delimited)
{
columnDelimiter = null;
}
else
{
// set the column delimiter to the default
columnDelimiter = dataFileManager.getDefaultColumnDelimiter();
}
formatType = type;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy