All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.okworx.ilcd.validation.reference.ReferenceBuilder Maven / Gradle / Ivy

Go to download

A Java library for performing technical validation of data in ILCD data format.

There is a newer version: 2.7.2
Show newest version
package com.okworx.ilcd.validation.reference;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import net.java.truevfs.access.TFile;
import net.java.truevfs.access.TFileInputStream;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOCase;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;

import com.okworx.ilcd.validation.common.DatasetType;
import com.okworx.ilcd.validation.common.DefaultValidationContext;
import com.okworx.ilcd.validation.common.IContextAwareComponent;
import com.okworx.ilcd.validation.common.IValidationContext;
import com.okworx.ilcd.validation.events.EventsList;
import com.okworx.ilcd.validation.events.Severity;
import com.okworx.ilcd.validation.events.ValidationEvent;
import com.okworx.ilcd.validation.util.AbstractDatasetsTask;
import com.okworx.ilcd.validation.util.ILCDNameSpaceContext;
import com.okworx.ilcd.validation.util.PartitionedList;

/**
 * 

ReferenceBuilder class.

* * @author oliver.kusche * @version $Id: $Id */ public class ReferenceBuilder implements IContextAwareComponent { protected final Logger log = org.apache.logging.log4j.LogManager.getLogger(this.getClass()); private HashMap references; /** *

Getter for the field references.

* * @return a {@link java.util.HashMap} object. */ public HashMap getReferences() { return references; } private String aspectName = null; protected EventsList eventsList = new EventsList(this.aspectName); protected IValidationContext validationContext = new DefaultValidationContext(); /** *

Constructor for ReferenceBuilder.

*/ public ReferenceBuilder() { } /** *

Constructor for ReferenceBuilder.

* * @param aspectName a {@link java.lang.String} object. */ public ReferenceBuilder(String aspectName) { this.aspectName = aspectName; } // TODO: improve performance by using a Stax implementation instead /** *

build.

* * @param origSource a {@link java.io.File} object. * @return a {@link java.util.HashMap} object. */ public HashMap build(File origSource) { TFile source = new TFile(origSource); if (!source.isFile() && !source.isArchive() && !source.isDirectory()) throw new IllegalArgumentException(source.getAbsolutePath() + " is neither a file nor a directory nor a ZIP archive"); Collection files = new ArrayList(); if (source.isFile()) { files.add(source); } else { // add all XML datasets // as this will include any other XML files, let's filter out those which we know not to be well-formed Collection xmlfiles = FileUtils.listFiles(source, FileFilterUtils.and(FileFilterUtils.suffixFileFilter(".xml", IOCase.INSENSITIVE), FileFilterUtils.notFileFilter(FileFilterUtils.or( FileFilterUtils.nameFileFilter("compliance1.xml"), FileFilterUtils.nameFileFilter("compliance2.xml"), FileFilterUtils.nameFileFilter("compliance3.xml"), FileFilterUtils.nameFileFilter("compliance4.xml"), FileFilterUtils.nameFileFilter("complianceOur.xml") ))), TrueFileFilter.INSTANCE); // now add any files in external_docs - scan the rest and add only files in a folder named external_docs Collection extfiles = FileUtils.listFiles(source, FileFilterUtils.notFileFilter(FileFilterUtils.suffixFileFilter(".xml", IOCase.INSENSITIVE)), TrueFileFilter.INSTANCE); if (log.isDebugEnabled()) { log.debug("found " + xmlfiles.size() + " XML files and " + extfiles.size() + " others"); } for (File f : xmlfiles) { if (log.isTraceEnabled()) log.trace("adding " + f.getName()); files.add(new TFile(f)); } for (File f : extfiles) { if (f.getParent().endsWith("external_docs") && !f.getName().startsWith(".")) { if (log.isTraceEnabled()) log.trace("adding external file " + f.getName()); files.add(new TFile(f)); } } } this.references = new HashMap(); ConcurrentHashMap map = new ConcurrentHashMap(); PartitionedList partList = new PartitionedList(files); Collection>> tasks = new ArrayList>>(); for (List fileList : partList.getPartitions()) { tasks.add(new ExtractReferencesTask(fileList)); } try { ExecutorService executor = Executors.newFixedThreadPool(partList.getNumThreads()); List>> results = executor.invokeAll(tasks); for (Future> result : results) { map.putAll(result.get()); } executor.shutdown(); } catch (Exception e) { log.error(e); } if (log.isDebugEnabled()) log.debug(map.size() + " references extracted"); this.references.putAll(map); return this.references; } final protected class ExtractReferencesTask extends AbstractDatasetsTask implements Callable> { protected final Logger log = org.apache.logging.log4j.LogManager.getLogger(this.getClass()); ExtractReferencesTask(List files) { this.files = files; } public HashMap call() throws Exception { return parse(files); } private final List files; private HashMap parse(List files) { HashMap result = new HashMap(); DocumentBuilder builder; XPathExpression xpRootElement; XPathExpression xpUuid; XPathExpression xpVersion; XPathExpression xpNameProcessFlowModel; XPathExpression xpNameProcessFlowModelLocalized; XPathExpression xpNameSource; XPathExpression xpNameSourceLocalized; XPathExpression xpNameOther; XPathExpression xpNameOtherLocalized; XPathExpression xpTypeFlow; try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); builder = factory.newDocumentBuilder(); // Create XPathFactory object XPathFactory xpathFactory = XPathFactory.newInstance(); // Create XPath object XPath xpath = xpathFactory.newXPath(); xpath.setNamespaceContext(new ILCDNameSpaceContext()); // TODO make sure the file we're reading is an ILCD dataset xpRootElement = xpath.compile("local-name(/*)"); xpUuid = xpath.compile("/*/*/*[local-name()='dataSetInformation']/common:UUID/text()"); xpVersion = xpath.compile("/*/*[local-name()='administrativeInformation']/*[local-name()='publicationAndOwnership']/common:dataSetVersion/text()"); // name can be in multiple languages // if a CustomValidationContext is given, use the language from the Context's locale // if not, use the default locale // if no data according to these settings is found, take the first entry String lang = ReferenceBuilder.this.validationContext.getLocale().getLanguage(); xpNameProcessFlowModel = xpath.compile("/*/*/*[local-name()='dataSetInformation']/*[local-name()='name']/*[local-name()='baseName']/text()"); xpNameProcessFlowModelLocalized = xpath.compile("/*/*/*[local-name()='dataSetInformation']/*[local-name()='name']/*[local-name()='baseName' and @xml:lang='" + lang + "']/text()"); xpNameSource = xpath.compile("/*/*/*[local-name()='dataSetInformation']/*[local-name()='shortName']/text()"); xpNameSourceLocalized = xpath.compile("/*/*/*[local-name()='dataSetInformation']/*[local-name()='shortName' and @xml:lang='" + lang + "']/text()"); xpNameOther = xpath.compile("/*/*/*[local-name()='dataSetInformation']/*[local-name()='name']/text()"); xpNameOtherLocalized = xpath.compile("/*/*/*[local-name()='dataSetInformation']/*[local-name()='name' and @xml:lang='" + lang + "']/text()"); xpTypeFlow = xpath.compile("/f:flowDataSet/f:modellingAndValidation/f:LCIMethod/f:typeOfDataSet/text()"); } catch (XPathExpressionException e) { e.printStackTrace(); return null; } catch (ParserConfigurationException e) { e.printStackTrace(); return null; } Document doc = null; for (TFile file : files) { if (log.isDebugEnabled()) log.debug("parsing file " + file.getName()); if (StringUtils.endsWithIgnoreCase(file.getName(), ".xml")) { try { doc = builder.parse(new TFileInputStream(file)); String rootElement = (String) xpRootElement.evaluate(doc, XPathConstants.STRING); DatasetType type = DatasetType.fromRootElementName(rootElement); if (type == null) continue; // evaluate expression result on XML document String uuid = (String) xpUuid.evaluate(doc, XPathConstants.STRING); if (uuid == null) continue; uuid = uuid.toLowerCase(); if (log.isDebugEnabled()) log.debug(" found UUID: "+ uuid); String version = (String) xpVersion.evaluate(doc, XPathConstants.STRING); IDatasetReference ref = new DatasetReference(uuid, version, file.getAbsolutePath(), file.getName()); ref.setDatasetType(type); switch (type) { case FLOW: ref = new FlowDatasetReference(uuid, version, file.getAbsolutePath(), file.getName()); ref.setDatasetType(type); ((FlowDatasetReference) ref).setFlowType((String) xpTypeFlow.evaluate(doc, XPathConstants.STRING)); case PROCESS: case LCMODEL: ref.setName(evaluateName(doc, xpNameProcessFlowModel, xpNameProcessFlowModelLocalized)); if (log.isTraceEnabled()) log.trace(" type, name: " + type + " " + ref.getName()); break; case SOURCE: ref.setName(evaluateName(doc, xpNameSource, xpNameSourceLocalized)); if (log.isTraceEnabled()) log.trace(" type, name: " + type + " " + ref.getName()); break; case CONTACT: case FLOWPROPERTY: case LCIAMETHOD: case UNITGROUP: ref.setName(evaluateName(doc, xpNameOther, xpNameOtherLocalized)); if (log.isTraceEnabled()) log.trace(" type, name: " + type + " " + ref.getName()); break; default: break; } result.put(uuid.toLowerCase(), ref); } catch (XPathExpressionException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SAXException e) { ReferenceBuilder.this.eventsList.add(new ValidationEvent("General", Severity.ERROR, new DatasetReference(file.getAbsolutePath(), file.getName()), "error parsing file: " + e.getMessage())); log.warn("possibly invalid XML", e); } catch (IOException e) { ReferenceBuilder.this.eventsList.add(new ValidationEvent("General", Severity.ERROR, new DatasetReference(file.getAbsolutePath(), file.getName()), "error reading file" + e.getMessage())); log.warn("possibly invalid XML", e); } } else { DatasetType type = DatasetType.EXTERNAL_FILE; log.debug("putting " + file.getName() + " in reference cache"); IDatasetReference ref = new DatasetReference(null, null, file.getAbsolutePath(), file.getName()); ref.setDatasetType(type); result.put(file.getName(), ref); } } log.debug("returning " + result.size() + " results"); return result; } } private String evaluateName(Document doc, XPathExpression expr, XPathExpression exprLocalized) throws XPathExpressionException { String result = (String) exprLocalized.evaluate(doc, XPathConstants.STRING); if (StringUtils.isBlank(result)) result = (String) expr.evaluate(doc, XPathConstants.STRING); return result; } /** *

Getter for the field aspectName.

* * @return a {@link java.lang.String} object. */ public String getAspectName() { return aspectName; } /** *

Setter for the field aspectName.

* * @param aspectName a {@link java.lang.String} object. */ public void setAspectName(String aspectName) { this.aspectName = aspectName; } /** *

Getter for the field eventsList.

* * @return a {@link com.okworx.ilcd.validation.events.EventsList} object. */ public EventsList getEventsList() { return eventsList; } /** *

Setter for the field eventsList.

* * @param eventsList a {@link com.okworx.ilcd.validation.events.EventsList} object. */ public void setEventsList(EventsList eventsList) { this.eventsList = eventsList; } /** *

Getter for the field validationContext.

* * @return a {@link com.okworx.ilcd.validation.common.IValidationContext} object. */ public IValidationContext getValidationContext() { return validationContext; } /** {@inheritDoc} */ public void setValidationContext(IValidationContext validationContext) { this.validationContext = validationContext; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy