All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.etlunit.io.file.FlatFileSchema Maven / Gradle / Ivy

package org.etlunit.io.file;

import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.ArrayNode;
import org.etlunit.json.validator.ClasspathSchemaResolver;
import org.etlunit.json.validator.JsonSchemaValidationException;
import org.etlunit.json.validator.JsonUtils;
import org.etlunit.json.validator.JsonValidator;
import org.etlunit.parser.ETLTestParser;
import org.etlunit.parser.ETLTestValueObject;
import org.etlunit.parser.ParseException;
import org.etlunit.util.IOUtils;
import org.etlunit.util.JSonBuilderProxy;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.*;
import java.util.regex.Pattern;

class FlatFileSchema implements DataFileSchema
{
	private static final Map typePatterns = new HashMap();

	private final String id;
	private String rowDelimiter;
	private String columnDelimiter;
	private String nullToken;

	private final List columns = new ArrayList();
	private final List columnNames = new ArrayList();

	private final List orderColumns = new ArrayList();
	private final List orderColumnNames = new ArrayList();

	private final List keyColumns = new ArrayList();
	private final List keyColumnNames = new ArrayList();

	private format_type formatType;
	private final DataFileManager dataFileManager;

	private int lineLength = -1;

	public FlatFileSchema(
			String resourceId,
			format_type format,
			String rowDelimiter,
			String columnDelimiter,
			String nullDelimiter,
			DataFileManager dataFileManager
											 )
	{
		this.dataFileManager = dataFileManager;

		formatType = format;
		id = resourceId;
		this.rowDelimiter = rowDelimiter;
		this.columnDelimiter = columnDelimiter;
		this.nullToken = nullDelimiter;

		validateInternal();
	}

	public FlatFileSchema(
			JsonNode ffmlSch,
			String resourceId,
			DataFileManager dataFileManager
											 )
	{
		this.dataFileManager = dataFileManager;
		id = resourceId;
		rowDelimiter = ffmlSch.get("row-delimiter").asText();

		String form = ffmlSch.get("format-type").asText();

		if (form.equals("delimited"))
		{
			formatType = format_type.delimited;
		}
		else if (form.equals("fixed"))
		{
			formatType = format_type.fixed;
		}
		else
		{
			throw new IllegalArgumentException("Bad format type.  Please test before making changes to the schema.");
		}

		if (ffmlSch.has("column-delimiter"))
		{
			JsonNode jsonNodes = ffmlSch.get("column-delimiter");

			if (!jsonNodes.isNull())
			{
				columnDelimiter = jsonNodes.asText();
			}
			else
			{
				columnDelimiter = null;
			}
		}
		else
		{
			columnDelimiter = null;
		}

		if (ffmlSch.has("null-token"))
		{
			JsonNode nDem = ffmlSch.get("null-token");
			if (nDem.isNull())
			{
				nullToken = "null";
			}
			else
			{
				nullToken = nDem.asText();
			}
		}
		else
		{
			nullToken = dataFileManager.getDefaultNullToken();
		}

		validateInternal();

		ArrayNode anode = (ArrayNode) ffmlSch.get("columns");

		for (int i = 0; i < anode.size(); i++)
		{
			JsonNode node = anode.get(i);

			SchemaColumn schemaColumn = new SchemaColumn(node, dataFileManager);

			addColumn(schemaColumn);
		}

		anode = (ArrayNode) ffmlSch.get("orderBy");

		if (anode != null)
		{
			for (int i = 0; i < anode.size(); i++)
			{
				JsonNode node = anode.get(i);

				// this will fail if the column does not exist
				addOrderColumn(node.asText());
			}
		}
		else
		{
			// default is to order by all columns
			orderColumns.addAll(columns);
			orderColumnNames.addAll(columnNames);
		}

		anode = (ArrayNode) ffmlSch.get("primaryKey");

		if (anode != null)
		{
			for (int i = 0; i < anode.size(); i++)
			{
				JsonNode node = anode.get(i);

				// this will fail if the column does not exist
				addKeyColumn(node.asText());
			}
		}
		else
		{
			// default is no key . . .
		}
	}

	private void validateInternal()
	{
		if (columnDelimiter != null && formatType == format_type.fixed)
		{
			throw new IllegalArgumentException("Fixed-width files do not have column delimiters");
		}

		if (columnDelimiter == null && formatType == format_type.delimited)
		{
			throw new IllegalArgumentException("Delimited files must have column delimiters");
		}
	}

	public String getId()
	{
		return id;
	}

	@Override
	public List getOrderColumns()
	{
		return orderColumns.size() > 0 ? orderColumns : getColumns();
	}

	public String getRowDelimiter()
	{
		return rowDelimiter;
	}

	public format_type getFormatType()
	{
		return formatType;
	}

	public Column createColumn(String id)
	{
		return new SchemaColumn(id, null, dataFileManager);
	}

	@Override
	public void addColumn(Column column)
	{
		if (columnNames.contains(column.getId()))
		{
			throw new IllegalArgumentException("Column already added: " + column.getId());
		}

		// validate that if this is a fixed file, lengths are provided, and not provided for delimited files
		if (formatType == format_type.fixed)
		{
			if (column.getLength() == -1)
			{
				throw new IllegalArgumentException("Columns added to flat files must have a length provided");
			}
			else
			{
				int size = columns.size();

				if (size == 0)
				{
					column.setOffset(0);
					lineLength = 0;
				}
				else
				{
					Column lastCol = columns.get(size - 1);
					column.setOffset(lastCol.getOffset() + lastCol.getLength());
				}

				lineLength += column.getLength();
			}
		}

		columns.add(column);
		columnNames.add(column.getId());
	}

	@Override
	public void addKeyColumn(String name)
	{
		if (keyColumnNames.contains(name))
		{
			throw new IllegalArgumentException("Column already added to primary key: " + name);
		}

		Column col = getColumn(name);

		keyColumns.add(col);
		keyColumnNames.add(col.getId());
	}

	@Override
	public void addOrderColumn(String name)
	{
		if (orderColumnNames.contains(name))
		{
			throw new IllegalArgumentException("Column already added to order clause: " + name);
		}

		Column col = getColumn(name);

		orderColumns.add(col);
		orderColumnNames.add(col.getId());
	}

	@Override
	public void setKeyColumns(List names)
	{
		keyColumnNames.clear();
		keyColumns.clear();

		for (String col : names)
		{
			addKeyColumn(col);
		}
	}

	@Override
	public void setOrderColumns(List names)
	{
		orderColumnNames.clear();
		orderColumns.clear();

		for (String col : names)
		{
			addOrderColumn(col);
		}
	}

	public String getColumnDelimiter()
	{
		return columnDelimiter;
	}

	public List getColumns()
	{
		return columns;
	}

	@Override
	public Column getColumn(String name)
	{
		for (Column sch : columns)
		{
			if (sch.getId().equals(name))
			{
				return sch;
			}
		}

		throw new IllegalArgumentException("Column [" + name + "] not found");
	}

	public static FlatFileSchema loadFromFile(File schemaPath, String resourceId, DataFileManager dataFileManager)
	{
		try
		{
			return loadFromString(IOUtils.readFileToString(schemaPath), resourceId, dataFileManager);
		}
		catch (IOException e)
		{
			throw new IllegalArgumentException(e);
		}
	}

	private static FlatFileSchema validate(JsonNode instance, String resourceId, DataFileManager dataFileManager)
	{
		try
		{
			JsonValidator
					vlad =
					new JsonValidator("org/etlunit/io/file/ffml/ffml.jsonSchema",
							new ClasspathSchemaResolver(FlatFileSchema.class));

			vlad.validate(instance);

			return new FlatFileSchema(instance.get("flat-file"), resourceId, dataFileManager);
		}
		catch (JsonSchemaValidationException e)
		{
			throw new IllegalArgumentException(e);
		}
	}

	public static FlatFileSchema loadFromString(String schema, String resourceId, DataFileManager dataFileManager)
	{
		try
		{
			return validate(JsonUtils.loadJson(schema), resourceId, dataFileManager);
		}
		catch (JsonSchemaValidationException e)
		{
			throw new IllegalArgumentException(e);
		}
	}

	public static FlatFileSchema loadFromResource(String resourceId, DataFileManager dataFileManager)
	{
		return loadFromResource(resourceId, FlatFileSchema.class.getClassLoader(), dataFileManager);
	}

	public static FlatFileSchema loadFromResource(String resourceId, ClassLoader classLoader, DataFileManager dataFileManager)
	{
		try
		{
			return validate(getJsonNode(resourceId, classLoader), resourceId, dataFileManager);
		}
		catch (ParseException e)
		{
			throw new IllegalArgumentException(e);
		}
	}

	private static JsonNode getJsonNode(String resourceId, ClassLoader classLoader) throws ParseException
	{
		return getNode(resourceId, classLoader).getJsonNode();
	}

	private static ETLTestValueObject getNode(String resourceId, ClassLoader classLoader) throws ParseException
	{
		String ffml = ClasspathSchemaResolver.resolveClasspath(resourceId, classLoader);

		ETLTestValueObject instance = ETLTestParser.loadObject(ffml);

		ETLTestValueObject extendsQ = instance.query("flat-file.extends");
		if (extendsQ != null)
		{
			// resolve the extends uri and merge

			// load the l-side (base)
			ETLTestValueObject extended = getNode(extendsQ.getValueAsString(), classLoader);

			// merge from instance >> extended
			instance = instance.merge(extended, ETLTestValueObject.merge_type.left_merge);
		}

		return instance;
	}

	public Map validateAndSplitLine(String line)
	{
		Map map = new HashMap();

		List colData = null;

		switch (formatType)
		{
			case delimited:
				// split on the delimiter
				String search = Pattern.quote(columnDelimiter);

				colData = Arrays.asList(line.split(search, -1));

				break;

			case fixed:
				if (line.length() != lineLength)
				{
					throw new IllegalArgumentException("Illegal line - incorrect length.  Found["
							+ line.length()
							+ "] required ["
							+ lineLength
							+ "]: "
							+ line);
				}

				colData = new ArrayList();

				for (int i = 0; i < columns.size(); i++)
				{
					Column schemaCol = columns.get(i);

					int offset = schemaCol.getOffset();
					int endIndex = offset + schemaCol.getLength();

					if (line.length() < endIndex)
					{
						throw new IllegalArgumentException("Illegal line - length too short: " + line);
					}

					String colText = line.substring(offset, endIndex);

					colData.add(colText);
				}

				break;
		}


		if (colData.size() != columns.size())
		{
			throw new IllegalArgumentException("Line does not have the correct number of columns: expected["
					+ columns.size()
					+ "], actual["
					+ colData.size()
					+ "] "
					+ line);
		}

		for (int colNo = 0; colNo < colData.size(); colNo++)
		{
			String token = colData.get(colNo);

			Column schemaCol = getColumns().get(colNo);

			if (nullToken.equals(token))
			{
				map.put(schemaCol.getId(), null);
			}
			else
			{
				schemaCol.validateText(token);

				map.put(schemaCol.getId(), token);
			}
		}

		return map;
	}

	public static FlatFileType resolveValidatorForType(String type)
	{
		if (typePatterns.size() == 0)
		{
			try
			{
				Enumeration enume = FlatFileSchema.class.getClassLoader().getResources("reference/ffml/types.ffml");

				while (enume.hasMoreElements())
				{
					URL url = enume.nextElement();

					String str = IOUtils.readURLToString(url);

					typePatterns.putAll(FlatFileType.load(str));
				}
			}
			catch (IOException e)
			{
				throw new RuntimeException(e);
			}
		}

		return typePatterns.get(type);
	}

	@Override
	public DataFileSchema createSubViewExcludingColumns(List columns, String id, format_type format)
	{
		List incColumns = new ArrayList(getColumnNames());

		Iterator it = incColumns.iterator();

		while (it.hasNext())
		{
			if (columns.contains(it.next()))
			{
				it.remove();
			}
		}

		return createSubViewIncludingColumns(incColumns, id, format);
	}

	@Override
	public DataFileSchema createSubViewExcludingColumns(List columns, String id)
	{
		return createSubViewExcludingColumns(columns, id, formatType);
	}

	public DataFileSchema createSubViewIncludingColumns(
			List columns,
			String id
																										 )
	{
		return createSubViewIncludingColumns(columns, id, formatType);
	}

	public DataFileSchema createSubViewIncludingColumns
			(
					List columns,
					String id,
					format_type format
			)
	{
		String delim = this.columnDelimiter;

		switch (formatType)
		{
			case delimited:
				switch (format)
				{
					case fixed:
						delim = null;
						break;
				}
				break;
			case fixed:
				switch (format)
				{
					case delimited:
						delim = dataFileManager.getDefaultColumnDelimiter();
						break;
				}
				break;
		}

		FlatFileSchema newSchema = new FlatFileSchema(
				id != null ? id : this.id,
				format != null ? format : this.formatType,
				rowDelimiter != null ? rowDelimiter : this.rowDelimiter,
				delim,
				nullToken,
				dataFileManager
		);

		if (columns != null)
		{
			for (Column col : getColumns())
			{
				if (columns.contains(col.getId()))
				{
					newSchema.columns.add(col);
					newSchema.columnNames.add(col.getId());

					if (orderColumns.contains(col))
					{
						newSchema.orderColumns.add(col);
						newSchema.orderColumnNames.add(col.getId());
					}

					if (keyColumns.contains(col))
					{
						newSchema.keyColumns.add(col);
						newSchema.keyColumnNames.add(col.getId());
					}
				}
			}

			if (newSchema.columns.size() != columns.size())
			{
				throw new IllegalArgumentException("Unmatched columns in view");
			}
		}
		else
		{
			newSchema.columns.addAll(this.columns);
			newSchema.columnNames.addAll(this.columnNames);
			newSchema.orderColumns.addAll(orderColumns);
			newSchema.orderColumnNames.addAll(orderColumnNames);
			newSchema.keyColumns.addAll(keyColumns);
			newSchema.keyColumnNames.addAll(keyColumnNames);
		}

		return newSchema;
	}

	@Override
	public List getColumnNames()
	{
		return columnNames;
	}

	@Override
	public List getOrderColumnNames()
	{
		return orderColumnNames.size() > 0 ? orderColumnNames : getColumnNames();
	}

	@Override
	public List getKeyColumns()
	{
		return keyColumns;
	}

	@Override
	public List getKeyColumnNames()
	{
		return keyColumnNames;
	}

	public String toJsonString()
	{
		JSonBuilderProxy jprocs = new JSonBuilderProxy()
				.object()
				.key("flat-file")
				.object()
				.key("format-type")
				.value("delimited")
				.key("row-delimiter")
				.value(getRowDelimiter())
				.key("column-delimiter")
				.value(getColumnDelimiter())
				.key("null-token")
				.value(getNullToken())
				.key("columns")
				.array();

		for (Column col : getColumns())
		{
			jprocs =
					jprocs.object()
							.key("id")
							.value(col.getId())
							.key("type")
							.value(col.getType())
							.key("length")
							.value(col.getLength())
							.key("basic-type")
							.value(col.getBasicType().name())
							.endObject();
		}

		jprocs = jprocs.endArray();

		if (getKeyColumnNames().size() != 0)
		{
			jprocs = jprocs.key("primaryKey").value(getKeyColumnNames());
		}

		if (getOrderColumnNames().size() != 0)
		{
			jprocs = jprocs.key("orderBy").value(getOrderColumnNames());
		}

		jprocs = jprocs.endObject().endObject();

		// load into Jackson and pretty-print
		try
		{
			return JsonUtils.printJson(JsonUtils.loadJson(jprocs.toString()));
		}
		catch (JsonSchemaValidationException e)
		{
			throw new RuntimeException(e);
		}
	}

	public String getNullToken()
	{
		return nullToken;
	}

	public void setColumnDelimiter(String columnDelimiter)
	{
		if (formatType == format_type.fixed)
		{
			throw new IllegalArgumentException("Fixed-width files do not have column delimiters");
		}

		this.columnDelimiter = columnDelimiter;
	}

	public void setRowDelimiter(String rowDelimiter)
	{
		this.rowDelimiter = rowDelimiter;
	}

	public void setNullToken(String nullToken)
	{
		this.nullToken = nullToken;
	}

	@Override
	public void setFormatType(format_type type)
	{
		if (formatType != type)
		{
			if (formatType == format_type.delimited)
			{
				columnDelimiter = null;
			}
			else
			{
				// set the column delimiter to the default
				columnDelimiter = dataFileManager.getDefaultColumnDelimiter();
			}

			formatType = type;
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy