com.movielabs.mddflib.avails.xml.streaming.StreamingXmlBuilder Maven / Gradle / Ivy

Go to download
/**
 * Copyright (c) 2019 MovieLabs

 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
package com.movielabs.mddflib.avails.xml.streaming;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.XMLHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.filter.Filters;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import com.movielabs.mddf.MddfContext;
import com.movielabs.mddf.MddfContext.FILE_FMT;
import com.movielabs.mddflib.avails.xml.Pedigree;
import com.movielabs.mddflib.avails.xml.RowDataSrc;
import com.movielabs.mddflib.avails.xml.AbstractXmlBuilder;
import com.movielabs.mddflib.avails.xml.MetadataBuilder;
import com.movielabs.mddflib.avails.xml.AvailsSheet.Version;
import com.movielabs.mddflib.avails.xml.AvailsWrkBook.RESULT_STATUS;
import com.movielabs.mddflib.logging.LogMgmt;
import com.movielabs.mddflib.util.xml.FormatConverter;
import com.movielabs.mddflib.util.xml.MddfTarget;
import com.movielabs.mddflib.util.xml.SchemaWrapper;

/**
 * Converts an Avails file using the XLSX format to an XML DOM representation
 * using the XSSFReader API. This is an event-driven approach to SAX
 * processing that results in a greatly reduced memory footprint.
 * 
 * @author L. Levin, Critical Architectures LLC
 *
 */
public class StreamingXmlBuilder extends AbstractXmlBuilder {

	/**
	 * ContentHandler that allows termination of the ingest process by the
	 * StreamingXmlBuilder at any time. Termination is initiated by the
	 * handler throwing a XlsxDataTermination exception. The builder
	 * signals to the handler it should initiate termination by setting the field
	 * terminateNow to true.
	 * 
	 * @author L. Levin, Critical Architectures LLC
	 * 
	 */
	public class AvailSheetXMLHandler extends XSSFSheetXMLHandler implements ContentHandler {

		/**
		 * @param styles
		 * @param comments
		 * @param strings
		 * @param sheetContentsHandler
		 * @param dataFormatter
		 * @param formulasNotResults
		 */
		public AvailSheetXMLHandler(StylesTable styles, CommentsTable comments, ReadOnlySharedStringsTable strings,
				SheetContentsHandler sheetContentsHandler, DataFormatter dataFormatter, boolean formulasNotResults) {
			super(styles, comments, strings, sheetContentsHandler, dataFormatter, formulasNotResults);
		}

		/*
		 * (non-Javadoc)
		 * 
		 * @see
		 * org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler#startElement(java.lang
		 * .String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
		 */
		public void endElement(String uri, String localName, String qName) throws SAXException {
			if (terminateNow) {
				throw new XlsxDataTermination("Ran out of data on row " + lastDataRow);
			}
			super.endElement(uri, localName, qName);
		}

	}

	/**
	 * A SheetContentsHandler that uses the XSSF Event SAX helpers to do
	 * most of the work of parsing the Sheet XML. Data values are accumulated on a
	 * row-by-row basis. When the end of a row is signaled by the invoking of
	 * endRow(), the accumulated data is passed to
	 * StreamingXmlBuilder.processRow().
	 */
	private class ParseByRow implements SheetContentsHandler {
		private boolean haveFirstRow = false;
		private boolean firstCellOfRow;
		private int currentRow = -1;
		private int currentCol = -1;
		private ArrayList rowContentsAsList;
		private String[] rowContentsAsArray;
		private int curCell = 0;
		private boolean rowHasData;

		ParseByRow() {
			rowContentsAsList = new ArrayList();
		}

		@Override
		public void startRow(int rowNum) {
			// Prepare for this row
			firstCellOfRow = true;
			currentCol = -1;
			currentRow = rowNum; // need if creating missing cellReference
			curCell = 0;
			if (haveFirstRow) {
				rowContentsAsArray = new String[rowContentsAsList.size()];
			} else {
				rowContentsAsArray = null;
			}
			rowHasData = false;
		}

		@Override
		public void endRow(int rowNum) {
			if (rowNum == 0) {
				rowContentsAsArray = new String[rowContentsAsList.size()];
				rowContentsAsArray = rowContentsAsList.toArray(rowContentsAsArray);
				haveFirstRow = true;
			}

			try {
				processRow(rowContentsAsArray, rowNum, rowHasData);
			} catch (XlsxDataTermination e) {
//				System.out.println("Empty rows; ronNum=" + rowNum);
			}
		}

		@Override
		public void cell(String cellReference, String formattedValue, XSSFComment comment) {
			// gracefully handle missing CellRef here in a similar way as XSSFCell does
			if (cellReference == null) {
				cellReference = new CellAddress(currentRow, currentCol).formatAsString();
			}
			// Did we miss any cells?
			int thisCol = (new CellReference(cellReference)).getCol();
			int missedCols = thisCol - currentCol - 1;
			for (int i = 0; i < missedCols; i++) {
				addDataToRow("");
			}
			currentCol = thisCol;
			addDataToRow(formattedValue);
		}

		private void addDataToRow(String data) {
			if (!haveFirstRow) {
				rowContentsAsList.add(data);
			} else if (curCell >= rowContentsAsArray.length) {
				// error
				String colID = LogMgmt.mapColNum(curCell);
				int rowID = currentRow + 1;
				String msg = "Ignoring cell " + colID + " in row " + rowID + ": out of bounds";
				logger.log(LogMgmt.LEV_WARN, LogMgmt.TAG_XLATE, msg, null, moduleId);
				return;
			} else {
				rowContentsAsArray[curCell++] = data;
			}
			if (data != null && !data.isEmpty()) {
				rowHasData = true;
			}
		}

		@Override
		public void headerFooter(String text, boolean isHeader, String tagName) {
			// TODO Auto-generated method stub
		}
	}

	// ==============================================
	/**
	 * FOR TESTING ONLY!!
	 * 
	 * @param args
	 */
	public static void main(String[] args) {
	}

	public static final String moduleId = "XmlBuilder_2";

	/**
	 * Number of contiguous empty rows that will result in termination of input
	 * processing.
	 */
	public static final int TERMINATION_THRESHOLD = 5;

	private LogMgmt logger;
	private Version templateVersion;

	private File curSrcXslxFile;
	private String shortDesc;

	private String xsdVersion;
	private String mdMecVer;
	private String mdVer;

	private Namespace availsNSpace;
	private Namespace mdNSpace;
	private Namespace mdMecNSpace;

	private SchemaWrapper availsSchema;
	private SchemaWrapper mdSchema;
	private SchemaWrapper mdMecSchema;

	private HashMap headerMap;
	private Map pedigreeMap = null;
	private Map availElRegistry = null;
	private Map> avail2AssetMap;
	private Map> avail2TransMap;
	private Map> avail2EntilementMap;
	private Map> entitlement2IdMap;
	private Map assetElRegistry;
	private Map element2SrcRowMap;

	private Element rootEl;

	private XPathFactory xpfac = XPathFactory.instance();

	private String[] headerRow_0;

	private boolean noPrefix = false;

	private int emptyRowCnt;

	private int lastDataRow;
	boolean terminateNow = false;

	private MetadataBuilder mdBuilder;

	/**
	 * @param logger
	 * @param sstVersion Avail XSLX version (i.e. '1.x')
	 */
	public StreamingXmlBuilder(LogMgmt logger, Version sstVersion) {
		this.logger = logger;
		this.templateVersion = sstVersion;
		switch (templateVersion) {
		case V1_9:
			setVersion("2.5");
			break; 
		case V1_8:
			setVersion("2.4");
			break;
		case V1_7_3:
			setVersion("2.3");
			break;
		case V1_7_2:
			setVersion("2.2.2");
			break;
		case V1_7:
			setVersion("2.2");
			break;
		default:
			logger.log(LogMgmt.LEV_FATAL, LogMgmt.TAG_AVAIL, "Unsupported template version " + templateVersion, null,
					moduleId);
			throw new IllegalArgumentException("Unsupported Avails Schema version " + templateVersion);
		}
		logger.log(LogMgmt.LEV_INFO, LogMgmt.TAG_AVAIL, "Ingesting as XLSX version " + templateVersion, null,
				moduleId);
	}

	/**
	 * Set the Avail XML version to use for output.
	 * 
	 * @param availXsdVersion
	 * @return
	 */
	private boolean setVersion(String availXsdVersion) {
		availsSchema = null;
		mdSchema = null;
		mdMecSchema = null;
		xsdVersion = null;
		String xsdRsrc = "avails-v" + availXsdVersion;
		availsSchema = SchemaWrapper.factory(xsdRsrc);
		if (availsSchema == null) {
			return false;
		}
		availsNSpace = Namespace.getNamespace("avails",
				"http://www.movielabs.com/schema/avails/v" + availXsdVersion + "/avails");
		// Load supporting schemas:

		FILE_FMT availsFmt = MddfContext.identifyMddfFormat("avails", availXsdVersion);
		if (availsFmt == null) {
			throw new IllegalArgumentException("Unsupported Avails Schema version " + availXsdVersion);
		}
		Map uses = MddfContext.getReferencedXsdVersions(availsFmt);

		mdMecVer = uses.get("MDMEC");
		mdVer = uses.get("MD");
		mdMecSchema = SchemaWrapper.factory("mdmec-v" + mdMecVer);
		mdMecNSpace = Namespace.getNamespace("mdmec", "http://www.movielabs.com/schema/mdmec/v" + mdMecVer);

		mdSchema = SchemaWrapper.factory("md-v" + mdVer);
		mdNSpace = Namespace.getNamespace("md", "http://www.movielabs.com/schema/md/v" + mdVer + "/md");

		if (mdMecSchema == null || (mdSchema == null)) {
			xsdVersion = null;
			return false;
		}
		xsdVersion = availXsdVersion;
		return true;
	}

	/**
	 * Return the XML mddf version used when generating XML from the XLSX.
	 * 
	 * @return the xsdVersion
	 */
	public String getVersion() {
		return xsdVersion;
	}

	/**
	 * Convert one sheet within the Workbook to an XML representation. The results
	 * are returned in the form of a Map with the following content:
	 * 
	 * results.get("xlsx"): the File srcXslxFile that was
	 * passed as input argument
	 * results.get("xml"): the JDom2 Document that was created
	 * 
	 * results.get("pedigree": a
	 * Map pedigreeMap instance linking XML elements to
	 * the Avail cell from which they were derived.
	 * results.get("srcFmt"): FILE_FMT of the ingested
	 * XLSX
	 * results.get("status"): RESULT_STATUS.COMPLETED
	 * 
	 * The results returned will be null if XLSX has a FILE_FMT
	 * that is invalid or that can not be processed by the code as currently
	 * implemented.
	 * 
	 * @param srcXslxFile
	 * @param sheetNum
	 * @param shortDesc
	 * @return results
	 * @throws IllegalStateException
	 */
	public Map convert(MddfTarget mddfTarget, InputStream inStream, int sheetNum, String shortDesc)
			throws IllegalStateException {
		File srcXslxFile = mddfTarget.getSrcFile();
		Map results = new HashMap();
		FILE_FMT srcMddfFmt = null;
		switch (templateVersion) {
		case V1_9:
			srcMddfFmt = FILE_FMT.AVAILS_1_9;
			break;
		case V1_8:
			srcMddfFmt = FILE_FMT.AVAILS_1_8;
			break;
		case V1_7_3:
			srcMddfFmt = FILE_FMT.AVAILS_1_7_3;
			break;
		case V1_7_2:
			srcMddfFmt = FILE_FMT.AVAILS_1_7_2;
			break;
		case V1_7:
			srcMddfFmt = FILE_FMT.AVAILS_1_7;
			break;
		case V1_6:
			logger.log(LogMgmt.LEV_FATAL, LogMgmt.TAG_AVAIL,
					"Version " + templateVersion + " has been deprecated and is no longer supported", mddfTarget,
					moduleId);
			return null;
		case UNK:
			logger.log(LogMgmt.LEV_FATAL, LogMgmt.TAG_AVAIL, "Unable to identify XLSX format ", mddfTarget, moduleId);
			break;
		default:
			logger.log(LogMgmt.LEV_FATAL, LogMgmt.TAG_AVAIL, "Unsupported template version " + templateVersion,
					mddfTarget, moduleId);
			return null;
		}
		Document xmlDoc = makeXmlAsJDom(mddfTarget, inStream, 0, shortDesc);
		results.put("xlsx", srcXslxFile);
		results.put("xml", xmlDoc);
		results.put("pedigree", pedigreeMap);
		results.put("srcFmt", srcMddfFmt);
		results.put("status", RESULT_STATUS.COMPLETED);
		return results;
	}

	/**
	 * @param srcXslxFile
	 * @param sheetNum
	 * @param shortDesc
	 * @return
	 * @throws IllegalStateException
	 */
	private Document makeXmlAsJDom(MddfTarget mddfTarget, InputStream inStream, int sheetNum, String shortDesc)
			throws IllegalStateException {
		this.shortDesc = shortDesc;
		this.curSrcXslxFile = mddfTarget.getSrcFile();
		if (xsdVersion == null) {
			String msg = "Unable to generate XML from XLSX: XSD version was not set or is unsupported.";
			logger.log(LogMgmt.LEV_ERR, LogMgmt.TAG_XLATE, msg, null, moduleId);
			throw new IllegalStateException("The XSD version was not set or is unsupported.");
		}
		Document doc = initializeDoc();
		rootEl = doc.getRootElement();
		// initialize data structures used to collect interim structures
		initializeMappings();

		mdBuilder = new MetadataBuilder(logger, this);
		/*
		 * initiate event-driven ingest. The streaming parser (i.e., the ParseByRow
		 * instance 'rowHandler) will invoke the processRow() method whenever it has
		 * ingested an entire row,.
		 */
		try {
			OPCPackage xlsxPackage = null;
			if (inStream == null) {
				xlsxPackage = OPCPackage.open(curSrcXslxFile.getPath(), PackageAccess.READ);
			} else {
				xlsxPackage = OPCPackage.open(inStream);
			}
			XSSFReader xssfReader = new XSSFReader(xlsxPackage);
			StylesTable styles = xssfReader.getStylesTable();
			ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(xlsxPackage);
			XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
			int index = 0;
			while (iter.hasNext()) {
				try (InputStream stream = iter.next()) {
					/*
					 * Is this the correct sheet? Note we identify sheet by the index number, not
					 * the name.
					 */
					// TODO: Allow use of sheet-name to ID the desired sheet
					if (index == sheetNum) {
						String sheetName = iter.getSheetName();
						String msg = "Ingesting Sheet " + sheetName;
						logger.log(LogMgmt.LEV_INFO, LogMgmt.TAG_XLATE, msg, null, moduleId);
						processSheet(styles, strings, stream);
					}
				}
				++index;
			}
		} catch (XlsxDataTermination e) {
			logger.log(LogMgmt.LEV_INFO, LogMgmt.TAG_XLATE, e.getMessage(), null, moduleId);
		} catch (Exception e) {
			e.printStackTrace();
			String msg = "Unable to ingest XLSX: verify correct version was specified";
			logger.log(LogMgmt.LEV_ERR, LogMgmt.TAG_XLATE, msg, null, moduleId);
			return null;
		}

		assembleDoc();

		finalizeDocument(doc, templateVersion);
		String msg = "Completed ingesting XLSX file";
		logger.log(LogMgmt.LEV_INFO, LogMgmt.TAG_XLATE, msg, mddfTarget, moduleId);
		/*
		 * re-set the interim structures to facilitate garbage collection
		 */
//		initializeMappings();

		return doc;
	}

	/**
	 * 
	 */
	private Document initializeDoc() {
		// Create and initialize Document...
		Document doc = new Document();
		Element root = new Element("AvailList", availsNSpace);
		root.addNamespaceDeclaration(mdNSpace);
		root.addNamespaceDeclaration(mdMecNSpace);
		root.addNamespaceDeclaration(SchemaWrapper.xsiNSpace);
		doc.setRootElement(root);
		return doc;
	}

	/**
	 * 
	 */
	private void initializeMappings() {
		pedigreeMap = new HashMap();
		availElRegistry = new HashMap();
		assetElRegistry = new HashMap();
		avail2AssetMap = new HashMap>();
		avail2TransMap = new HashMap>();
		avail2EntilementMap = new HashMap>();
		entitlement2IdMap = new HashMap>();
		element2SrcRowMap = new HashMap();
	}

	/**
	 * Parses the content of one sheet using event-driven ingest. The streaming
	 * parser (i.e., the ParseByRow instance 'rowHandler) will invoke the
	 * processRow() method whenever it has ingested an entire row.
	 *
	 *
	 * @param styles           The table of styles that may be referenced by cells
	 * @param strings          The table of strings that may be referenced by cells
	 *                         in the sheet
	 * @param sheetInputStream The stream to read the sheet-data from.
	 * 
	 * @exception java.io.IOException An IO exception from the parser, possibly from
	 *            a byte stream or character stream supplied by the application.
	 * @throws SAXException if parsing the XML data fails.
	 */
	void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream)
			throws IOException, SAXException, XlsxDataTermination {

		ParseByRow rowHandler = new ParseByRow();
		DataFormatter formatter = new DataFormatter();
		InputSource sheetSource = new InputSource(sheetInputStream);
		try {
			XMLReader sheetParser = XMLHelper.newXMLReader();
			ContentHandler handler = new AvailSheetXMLHandler(styles, null, strings, rowHandler, formatter, false);
			sheetParser.setContentHandler(handler);
			sheetParser.parse(sheetSource);
		} catch (ParserConfigurationException e) {
			e.printStackTrace();
			throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
		}
		int availCnt = availElRegistry.values().size();
		String msg = "Avail count for WorkSheet = " + availCnt;
		logger.log(LogMgmt.LEV_INFO, LogMgmt.TAG_XLATE, msg, null, moduleId);

	}

	/**
	 * The streaming parser (i.e., the ParseByRow instance 'rowHandler)
	 * will invoke the processRow() method whenever it has ingested an
	 * entire row. The row data will be handed off to an Ingester instance
	 * for conversion to XML. Empty rows are, however, ignored.
	 * 
	 * This method also implements the logic that will determine if stream
	 * processing should be terminated prior to the end of file being detected. This
	 * is based on the TERMINATION_THRESHOLD value.
	 * 
	 * 
	 * @param row
	 * @param rowNum
	 * @param rowHasData
	 * @throws XlsxDataTermination
	 */
	void processRow(String[] row, int rowNum, boolean rowHasData) throws XlsxDataTermination {
//		System.out.println("ProcessRow():: rowNum=" + rowNum + ", emptyRowCnt=" + emptyRowCnt);
		// if we hit 5 empty rows one after another we terminate processing
		if (rowHasData) {
			emptyRowCnt = 0;
			lastDataRow = rowNum;
		} else {
			emptyRowCnt++;
		}
		if (emptyRowCnt > TERMINATION_THRESHOLD) {
			terminateNow = true;
		}
		// rows '0' and '1' are the headers with column keys
		switch (rowNum) {
		case 0:
			// save for now
			headerRow_0 = row;
			return;
		case 1:
			headerMap = new HashMap();
			for (int i = 0; i < row.length; i++) {
				String colKey = headerRow_0[i] + "/" + row[i];
				Integer colPtr = Integer.valueOf(i);
				headerMap.put(colKey, colPtr);
			}
			headerRow_0 = null; // not needed anymore so GC it
			return;
		case 2:
			/* 3rd row may contain either comments or data */
			if (row == null || (!rowHasData)) {
				// 3rd row has been left blank (i.e., no '//OPT or //REQ comments)
				return;
			}
			if (row[1].startsWith("//")) {
				// row contains comments
				return;
			}
		default:
			// ...................
		}
		/*
		 * process a data row
		 */
		if (rowHasData) {
			try {
				IngesterV1_7 ingester = new IngesterV1_7(row, rowNum, this, logger);
			} catch (Exception e) {
				e.printStackTrace();
				int rowID = rowNum + 1;
				String msg = "Unable to ingest data in row " + rowID + "; Exception while processing: "
						+ e.getMessage();
				logger.log(LogMgmt.LEV_ERR, LogMgmt.TAG_XLATE, msg, null, moduleId);
			}
		}
	}

	/**
	 * Final assembly in correct order..
	 */
	private void assembleDoc() {
		// Final assembly in correct order..
		Iterator alidIt = availElRegistry.values().iterator();
		while (alidIt.hasNext()) {
			Element nextAvailEl = alidIt.next();
			Element sDescEl = nextAvailEl.getChild("ShortDescription", availsNSpace);
			int index = nextAvailEl.indexOf(sDescEl) + 1;
			Map seMap = avail2EntilementMap.get(nextAvailEl);
			if (seMap != null && !seMap.isEmpty()) {
				Collection seSet = seMap.values();
				nextAvailEl.addContent(index, seSet);
			}
			nextAvailEl.addContent(index, avail2TransMap.get(nextAvailEl));
			nextAvailEl.addContent(index, avail2AssetMap.get(nextAvailEl));
			finalizeAssetMetadata(nextAvailEl);
			rootEl.addContent(nextAvailEl);
		}
	}

	/**
	 * @param availEl
	 */
	private void finalizeAssetMetadata(Element availEl) {
		List assetList = availEl.getChildren("Asset", availsNSpace);
		for (Element nextEl : assetList) {
			String assetWorkType = nextEl.getChildText("WorkType", availsNSpace);
			Element mdEl = null;
			switch (assetWorkType) {
			case "Season":
				mdEl = nextEl.getChild("SeasonMetadata", availsNSpace);
				break;
			case "Episode":
				mdEl = nextEl.getChild("EpisodeMetadata", availsNSpace);
				break;
			case "Series":
				mdEl = nextEl.getChild("SeriesMetadata", availsNSpace);
				break;
			default:
				mdEl = nextEl.getChild("Metadata", availsNSpace);
				break;
			}
			if (mdEl != null) {
				/*
				 * Is this still needed given new mode of md building?
				 */
				// mdHelper_basic.finalize(mdEl);
			}
		}
	}

	/**
	 * Finalization deals with any issue requiring multiple rows and/or Avails be
	 * examined collectively. This can best be done when the entire XML document has
	 * been assembled.
	 * 
	 * @param doc     the XML generated from the xlsx
	 * @param version the version of the xlsx file
	 */
	protected void finalizeDocument(Document doc, Version version) {
		switch (version) {
		case V1_8:
			finalizeVolumes(doc);
			break;
		default:
			return;
		}
	}

	/**
	 * If any Volumes are defined then the VolumeMetadata needs to
	 * be completed by identifying the correct number of Episodes it contains.
	 * 
	 * @param doc
	 */
	private void finalizeVolumes(Document doc) {
		String avPrefix = availsNSpace.getPrefix();
		String xpath_VolMD = "//" + avPrefix + ":VolumeMetadata";
		XPathExpression xpExp_VolMetadata = xpfac.compile(xpath_VolMD, Filters.element(), null, availsNSpace);
		String xpath_EpisodeNum = "./" + avPrefix + ":EpisodeMetadata/" + avPrefix + ":EpisodeNumber/"
				+ mdNSpace.getPrefix() + ":Number";
		XPathExpression xpExp_EpisodeNum = xpfac.compile(xpath_EpisodeNum, Filters.element(), null,
				availsNSpace, mdNSpace);

		List volList = xpExp_VolMetadata.evaluate(doc.getRootElement());
		if (volList.isEmpty()) {
			logger.logIssue(LogMgmt.TAG_XLATE, LogMgmt.LEV_DEBUG, null, "No Volumes found", null, null, moduleId);
			return;
		}
		logger.logIssue(LogMgmt.TAG_XLATE, LogMgmt.LEV_INFO, null, "Finalizing " + volList.size() + " Volume(s)", null,
				null, moduleId);
		/**
		 * For each Volume we need to find all Assets with
		 * 
		 * WorkType of 'Episode' and with SeasonContentID that matches the Volume's
		 * SeasonContentID AND
		 * with a @volumeNumber that matches the
		 * VolumeMetadata/VolumeNumber
		 * 
		 */
		String xpath_SeasonCID = "./" + avPrefix + ":SeasonMetadata/" + avPrefix + ":SeasonContentID";
		String xpath_VolNum = "./" + avPrefix + ":VolumeNumber/" + mdNSpace.getPrefix() + ":Number";
		XPathExpression xpExp_scid = xpfac.compile(xpath_SeasonCID, Filters.element(), null, availsNSpace);
		XPathExpression xpExp_vNum = xpfac.compile(xpath_VolNum, Filters.element(), null, availsNSpace,
				mdNSpace);
		for (Element volMdEl : volList) {
			String scid = null;
			String vNum = null;
			Element scidEl = xpExp_scid.evaluateFirst(volMdEl);
			if (scidEl != null) {
				scid = scidEl.getTextNormalize();
			}
			Element vNumEl = xpExp_vNum.evaluateFirst(volMdEl);
			if (vNumEl != null) {
				vNum = vNumEl.getTextNormalize();
			}
			if ((scid == null) || (vNum == null)) {
				// missing key info. This will get flagged downstream when XML is validated
				continue;
			}
			logger.logIssue(LogMgmt.TAG_XLATE, LogMgmt.LEV_DEBUG, null, "Finalizing Volume " + vNum + ", scid=" + scid,
					null, null, moduleId);
			// find matching Episode Assets
			String xpath_Episodes = "//" + avPrefix + ":Asset[@volNum='" + vNum + "' and ./" + avPrefix
					+ ":EpisodeMetadata/" + avPrefix + ":SeasonMetadata[" + avPrefix + ":SeasonContentID/text()='"
					+ scid + "']]";
			XPathExpression xpExp_episodes = xpfac.compile(xpath_Episodes, Filters.element(), null,
					availsNSpace);
			List episodeList = xpExp_episodes.evaluate(doc.getRootElement());
			logger.logIssue(LogMgmt.TAG_XLATE, LogMgmt.LEV_DEBUG, null,
					"Found " + episodeList.size() + " matching Episode Assets", null, null, moduleId);
			/*
			 * Identify 1st Episode. At the remove the temporary @volNum attribute since it
			 * violates the XSD
			 * 
			 */
			int first = Integer.MAX_VALUE;
			for (Element assetEl : episodeList) {
				assetEl.removeAttribute("volNum");
				Element episodeNumEl = xpExp_EpisodeNum.evaluateFirst(assetEl);
				if (episodeNumEl != null) {
					int eNum = Integer.parseInt(episodeNumEl.getTextNormalize());
					first = Integer.min(first, eNum);
				}
			}
			Element volNoEpEl = volMdEl.getChild("VolumeNumberOfEpisodes", availsNSpace);
			volNoEpEl.setText(Integer.toString(episodeList.size()));
			// VolumeFirstEpisodeNumber goes immediately before the VolumeNumberOfEpisodes
			int index = volMdEl.indexOf(volNoEpEl);
			Element vFirstEl = new Element("VolumeFirstEpisodeNumber", availsNSpace);
			vFirstEl.setText(Integer.toString(first));
			volMdEl.addContent(index, vFirstEl);
		}
	}

	/**
	 * Returns the (zero-based) column number that matches the key. The key is a
	 * composite of the two header columns (e.g., "AvailTrans/Territory")
	 * 
	 * @param key
	 * @return column number or -1 if key does not match a know column header.
	 */
	public int getColumnIdx(String key) {
		if (noPrefix) {
			String[] parts = key.split("/");
			key = parts[1];
		}
		Integer colIdx = headerMap.get(key);
		if (colIdx == null) {
			return -1;
		}
		return colIdx.intValue();
	}

	// =======================================
	// methods used by RowBuilders
	// .........................................

	/**
	 * Returns the JDom Element instantiating the Avail associated
	 * with the ALID specified by the row. If this is the first request for the
	 * specified Avail a new element is constructed and returned. Otherwise, a
	 * previously created element will be returned. Thus, there is never more than
	 * one XML element per ALID value.
	 * 
	 * @param curRow
	 * @return
	 */
	Element getAvailElement(StreamingRowIngester curRow) {
		Pedigree alidPedigree = curRow.getPedigreedData("Avail/ALID");
		/*
		 * TODO: next line throws a NullPtrException if column is missing. How do we
		 * handle?
		 */
		String alid = alidPedigree.getRawValue();
		logger.logIssue(LogMgmt.TAG_XLATE, LogMgmt.LEV_DEBUG, curSrcXslxFile,
				"Looking for Avail with ALID=[" + alid + "]", null, null, moduleId);
		Element availEL = availElRegistry.get(alid);
		if (availEL == null) {
			logger.logIssue(LogMgmt.TAG_XLATE, LogMgmt.LEV_DEBUG, curSrcXslxFile,
					"Building Avail with ALID=[" + alid + "]", null, null, moduleId);
			availEL = new Element("Avail", getAvailsNSpace());
			/*
			 * No data value for the Avail element itself but for purposes of error logging
			 * we link it to the ALID
			 */
			addToPedigree(availEL, alidPedigree);
			/*
			 * availEl will get added to document at completion of sheet processing. For
			 * now, just store in HashMap.
			 */
			availElRegistry.put(alid, availEL);
			/*
			 * Keeping track of row will facilitate later check to make sure any other row
			 * for same Avail has identical values where required.
			 */
			element2SrcRowMap.put(availEL, curRow);

			Element alidEl = mGenericElement("ALID", alid, getAvailsNSpace());
			availEL.addContent(alidEl);
			addToPedigree(alidEl, alidPedigree);
			
			// new for v1.9 and 2.5...
			curRow.process(availEL, "AssociatedALID", getAvailsNSpace(),   "Avail/AssociatedALID", ";");

			availEL.addContent(curRow.mDisposition());
			availEL.addContent(curRow.mPublisher("Licensor", "Avail/DisplayName"));
			availEL.addContent(curRow.mPublisher("ServiceProvider", "Avail/ServiceProvider"));

			String availType = mapWorkType(curRow);
			Element atEl = mGenericElement("AvailType", availType, getAvailsNSpace());
			availEL.addContent(atEl);
			addToPedigree(atEl, curRow.getPedigreedData("AvailAsset/WorkType"));

			Element sdEl = mGenericElement("ShortDescription", shortDesc, getAvailsNSpace());
			availEL.addContent(sdEl);

			// Exception Flag
			curRow.process(availEL, "ExceptionFlag", getAvailsNSpace(), "Avail/ExceptionFlag");

			/*
			 * Initialize data structures for collecting Assets, Transactions, and
			 * Entitlements.
			 */
			avail2AssetMap.put(availEL, new ArrayList());
			avail2TransMap.put(availEL, new ArrayList());
			avail2EntilementMap.put(availEL, new HashMap());
		} else {
			/*
			 * make sure key values are aligned...
			 */
			RowDataSrc srcRow = element2SrcRowMap.get(availEL);
			checkForMatch("Avail/ALID", srcRow, curRow, "Avail");
			checkForMatch("Avail/DisplayName", srcRow, curRow, "Avail");
			checkForMatch("Avail/ServiceProvider", srcRow, curRow, "Avail");
			checkForMatch("Avail/ExceptionFlag", srcRow, curRow, "Avail");
			/*
			 * AvailAsset/WorkType is special case as different WorkTypes may map to same
			 * AvailType
			 */

			String definedValue = mapWorkType(srcRow);
			String curValue = mapWorkType(curRow);
			if (!definedValue.equals(curValue)) {
				// Generate error msg
				String msg = "Inconsistent WorkType; value not compatable with 1st definition of referenced Avail";
				Integer row4log = Integer.valueOf(srcRow.getRowNumber() + 1);
				String details = "AVAIL was 1st defined in row " + row4log + " which specifies AvailAsset/WorkType as "
						+ srcRow.getData("AvailAsset/WorkType") + " and requires WorkType=" + definedValue;
				logger.logIssue(LogMgmt.TAG_XLSX, LogMgmt.LEV_ERR, row4log, msg, details, null, moduleId);
			}
		}
		return availEL;
	}

	private boolean checkForMatch(String colKey, RowDataSrc srcRow, RowDataSrc curRow, String entityName) {
		String definedValue = srcRow.getData(colKey);
		if (definedValue == null) {
			// col not defined so we consider it a match
			return true;
		}
		String curValue = curRow.getData(colKey);
		if (definedValue.equals(curValue)) {
			return true;
		} else {
			// Generate error msg
			String msg = "Inconsistent specification; value does not match 1st definition of referenced " + entityName;
			int row4log = srcRow.getRowNumber() + 1;
			String details = entityName + " was 1st defined in row " + row4log + " which specifies " + colKey + " as '"
					+ definedValue + "'";
			Integer source = Integer.valueOf(row4log);
			logger.logIssue(LogMgmt.TAG_XLSX, LogMgmt.LEV_ERR, source, msg, details, null, moduleId);
			return false;
		}

	}

	/**
	 * @param rowHelper
	 * @return
	 */
	private String mapWorkType(RowDataSrc rowHelper) {
		String workTypeSS = rowHelper.getData("AvailAsset/WorkType");
		String availType;
		switch (workTypeSS) {
		case "Movie":
		case "Short":
			availType = "single";
			break;
		case "Collection":
			availType = "bundle";
			break;
		case "Supplemental":
			availType = "suplement";
			break;
		case "Volume":
		case "Series":
		case "Season":
		case "Episode": 
		default:
			availType = workTypeSS.toLowerCase();
		}
		return availType;
	}

	/**
	 * @return the availsNSpace
	 */
	public Namespace getAvailsNSpace() {
		return availsNSpace;
	}

	/**
	 * @return the mdNSpace
	 */
	public Namespace getMdNSpace() {
		return mdNSpace;
	}

	/**
	 * @return the mdMecNSpace
	 */
	public Namespace getMdMecNSpace() {
		return mdMecNSpace;
	}

	private SchemaWrapper getSchema(String schema) {
		switch (schema) {
		case "avails":
			return availsSchema;
		case "mdmec":
			return mdMecSchema;
		case "md":
			return mdSchema;
		default:
			throw new IllegalArgumentException("Schema '" + schema + "' is unsupported.");
		}
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see
	 * com.movielabs.mddflib.avails.xml.AbstractXmlBuilder#isRequired(java.lang.
	 * String, java.lang.String)
	 */
	public boolean isRequired(String elementName, String schema)
			throws IllegalStateException, IllegalArgumentException {
		if (xsdVersion == null) {
			throw new IllegalStateException("The XSD version was not set or is unsupported.");
		}
		return getSchema(schema).isRequired(elementName);
	}

	/**
	 * 
	 * @return the pedigreeMap
	 */
	Map getPedigreeMap() {
		return pedigreeMap;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see
	 * com.movielabs.mddflib.avails.xml.AbstractXmlBuilder#addToPedigree(java.lang.
	 * Object, com.movielabs.mddflib.avails.xml.Pedigree)
	 */
	public void addToPedigree(Object content, Pedigree source) {
		pedigreeMap.put(content, source);
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see
	 * com.movielabs.mddflib.avails.xml.AbstractXmlBuilder#mGenericElement(java.lang
	 * .String, java.lang.String, org.jdom2.Namespace)
	 */
	public Element mGenericElement(String name, String val, Namespace ns) {
		Element el = new Element(name, ns);
		String formatted = formatForType(name, ns, val);
		el.setText(formatted);
		return el;
	}

	/**
	 * @param name
	 * @param ns
	 * @param inputValue
	 * @return
	 */
	private String formatForType(String elementName, Namespace ns, String inputValue)
			throws IllegalStateException, IllegalArgumentException {
		if (xsdVersion == null) {
			throw new IllegalStateException("The XSD version was not set or is unsupported.");
		}
		if (inputValue == null) {
			inputValue = "";
		}
		/*
		 * remove any leading or trailing whitespace
		 */
		String formattedValue = inputValue.replaceAll("[\\s]*$", "");
		formattedValue = formattedValue.replaceAll("^[\\s]*", "");

		String schema = ns.getPrefix();
		String type = getSchema(schema).getType(elementName);
		switch (type) {
		case "xs:string":
		case "md:id-type":
		case "md:string-ContentID-Identifier":
		case "xs:anyURI":
			break;
		case "xs:duration":
			formattedValue = FormatConverter.durationToXml(formattedValue);
			break;
		case "xs:boolean":
			formattedValue = FormatConverter.booleanToXml(formattedValue);
			break;
		case "xs:date":
			break;
		case "xs:dateTime":
			formattedValue = FormatConverter.dateTimeToXml(formattedValue, elementName.startsWith("Start"));
			break;
		default:
			// throw new IllegalArgumentException("Data type '" + type + "' not
			// supported by code :(");

		}
		return formattedValue;
	}

	/**
	 * @param curRow
	 */
	public void createAsset(StreamingRowIngester curRow) {
		/*
		 * Gen unique key and see if there is a matching Element. Unfortunately the
		 * key's structure is based on contentID which is sensitive to the WorkType.
		 */
		String workType = curRow.getData("AvailAsset/WorkType");
		String cidPrefix = "";
		switch (workType) {
		case "Season":
		case "Episode":
		case "Volume":
			cidPrefix = workType;
			break;
		default:
		}
		String cidSrc = cidPrefix + "ContentID";
		String cidColKey = "AvailAsset/" + cidSrc;
		String contentID = curRow.getData(cidColKey);
		// concatenate with the ALID
		String alid = curRow.getData("Avail/ALID");
		String assetKey = contentID + "__" + alid;
		Element assetEl = assetElRegistry.get(assetKey);
		if (assetEl == null) {
			assetEl = curRow.buildAsset();
			assetElRegistry.put(assetKey, assetEl);
			element2SrcRowMap.put(assetEl, curRow);
			/* add Asset to the Avail */
			Element availEL = availElRegistry.get(alid);
			addAsset(availEL, assetEl);
			return;
		}
		/*
		 * Check the consistency of the Asset info as originally specified with the same
		 * fields in the current row.
		 */
		RowDataSrc srcRow = element2SrcRowMap.get(assetEl);
		boolean match = true;
		match = checkForMatch("AvailAsset/WorkType", srcRow, curRow, "Asset") && match;
		match = checkForMatch("AvailAsset/ContentID", srcRow, curRow, "Asset") && match;
		match = checkForMatch("AvailAsset/EpisodeContentID", srcRow, curRow, "Asset") && match;
		match = checkForMatch("AvailAsset/SeasonContentID", srcRow, curRow, "Asset") && match;
		match = checkForMatch("AvailAsset/SeriesContentID", srcRow, curRow, "Asset") && match;
		if (match) {
			// Generate msg
			String msg = "Ignoring redundant Asset information";
			int row4log = curRow.getRowNumber() + 1;
			String details = "An Asset with " + cidSrc + "=" + contentID
					+ " was previously defined. Asset-specific fields in row " + row4log + " will be ignored";
			Integer source = Integer.valueOf(row4log);
			logger.logIssue(LogMgmt.TAG_XLATE, LogMgmt.LEV_DEBUG, source, msg, details, null, moduleId);
		}
	}

	/**
	 * @param avail
	 * @param assetEl
	 */
	void addAsset(Element avail, Element assetEl) {
		List assetList = avail2AssetMap.get(avail);
		assetList.add(assetEl);
	}

	/**
	 * Metadata is created and added as content to the Asset element
	 * 
	 * @param assetEl  - Asset element the metadata is appended to.
	 * @param workType - determinines structure of the metadata being added.
	 * @param row      - a RowDataSrc
	 */
	public void createAssetMetadata(Element assetEl, String workType, RowDataSrc row) {
		Element metadataEl = mdBuilder.appendMData(row, workType);
		assetEl.addContent(metadataEl);
	}

	void addEntitlement(Element avail, String ecosysId, Element eidEl) {
		Map entitlmentMap = avail2EntilementMap.get(avail);
		Element seEl = entitlmentMap.get(ecosysId);
		if (seEl == null) {
			// new ecosystem for this Avail
			seEl = new Element("SharedEntitlement", getAvailsNSpace());
			seEl.setAttribute("ecosystem", ecosysId);
			entitlmentMap.put(ecosysId, seEl);
			entitlement2IdMap.put(seEl, new ArrayList());
		}
		/*
		 * Multiple IDs are allowed for any given ecosystem BUT we want to avoid
		 * redundant entries.
		 */
		List idList = entitlement2IdMap.get(seEl);
		String eid = eidEl.getText();
		if (idList.contains(eid)) {
			return;
		} else {
			seEl.addContent(eidEl);
			idList.add(eid);
		}
	}

	void addTransaction(Element avail, Element transEl) {
		List transactionList = avail2TransMap.get(avail);
		transactionList.add(transEl);
	}
}