All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.okworx.ilcd.validation.analyze.SummaryExtractor Maven / Gradle / Ivy

Go to download

A Java library for performing technical validation of data in ILCD data format.

There is a newer version: 2.7.2
Show newest version
package com.okworx.ilcd.validation.analyze;

import com.okworx.ilcd.validation.AbstractDatasetsValidator;
import com.okworx.ilcd.validation.common.DatasetType;
import com.okworx.ilcd.validation.events.IValidationEvent;
import com.okworx.ilcd.validation.reference.IDatasetReference;
import com.okworx.ilcd.validation.util.*;
import net.java.truevfs.access.TFileInputStream;
import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.*;

// this can be added to a ValidatorChain in order to generate a datasets summary sheet in the batch mode report
public class SummaryExtractor extends AbstractDatasetsValidator {
    @Override
    public String getAspectName() {
        return "Summary";
    }

    @Override
    public String getAspectDescription() {
        return "Metadata summary";
    }

    protected Collection summaryProcesses = new ConcurrentLinkedQueue<>();

    protected Collection summaryLCModels = new ConcurrentLinkedQueue<>();

    @Override
    public boolean validate() throws InterruptedException {
        super.validate();

        updateStatusValidating();

        this.unitsTotal = this.objectsToValidate.size();

        PartitionedList partList = new PartitionedList<>(
                this.objectsToValidate.values());

        Collection> tasks = new ArrayList<>();

        for (List refList : partList.getPartitions()) {
            tasks.add(new ExtractTask(this, refList));
        }

        ExecutorService executor = Executors.newFixedThreadPool(partList.getNumThreads());

        try {
            List> taskResults = executor.invokeAll(tasks);
            for (Future taskResult : taskResults) {
                if (taskResult.get() != null) {
                    taskResult.get();
                }
            }
            executor.shutdown();
        } catch (InterruptedException e) {
            executor.shutdown();
            interrupted(e);
        } catch (Exception e) {
            log.error(e);
        }

        updateProgress(1);
        updateStatusDone();

        return true;
    }

    public Collection getSummaryProcesses() {
        return summaryProcesses;
    }

    public Collection getSummaryLCModels() {
        return summaryLCModels;
    }

    final class ExtractTask extends AbstractDatasetsTask implements Callable {

        ExtractTask(AbstractDatasetsValidator validator, Collection files) {
            this.files = files;
            this.validator = validator;
        }

        public TaskResult call() throws Exception {
            return new TaskResult(extract(this.files), this.statistics);
        }

        // extracts metadata for the summary from the process datasets
        private Collection extract(Collection files) throws Exception {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            factory.setNamespaceAware(true);
            DocumentBuilder builder = factory.newDocumentBuilder();

            // Create XPathFactory object
            XPathFactory xpathFactory = XPathFactory.newInstance();
            XPath xpath = xpathFactory.newXPath();
            xpath.setNamespaceContext(new ILCDNameSpaceContext());

            // process
            XPathExpression xpProcessDataproviderName = xpath.compile("/p:processDataSet/p:administrativeInformation/p:publicationAndOwnership/common:referenceToOwnershipOfDataSet/common:shortDescription[@xml:lang='en' or not(@xml:lang)]");
//            XPathExpression xpProcessDataproviderUUID = xpath.compile("/p:processDataSet/p:administrativeInformation/p:publicationAndOwnership/common:referenceToOwnershipOfDataSet/@refObjectId");
            XPathExpression xpProcessGeoCode = xpath.compile("/p:processDataSet/p:processInformation/p:geography/p:locationOfOperationSupplyOrProduction/@location");
            XPathExpression xpProcessProcessType = xpath.compile("/p:processDataSet/p:modellingAndValidation/p:LCIMethodAndAllocation/p:typeOfDataSet");
            XPathExpression xpProcessDataCutOffAndCompletenessPrinciples = xpath.compile("/p:processDataSet/p:modellingAndValidation/p:dataSourcesTreatmentAndRepresentativeness/p:dataCutOffAndCompletenessPrinciples[@xml:lang='en' or not(@xml:lang)]");
            XPathExpression xpProcessDeviationsFromLCIMethodApproaches = xpath.compile("/p:processDataSet/p:modellingAndValidation/p:LCIMethodAndAllocation/p:deviationsFromLCIMethodApproaches[@xml:lang='en' or not(@xml:lang)]");

            // lc model
            XPathExpression xpLCModelDataproviderName = xpath.compile("/lcm:lifeCycleModelDataSet/lcm:administrativeInformation/lcm:publicationAndOwnership/common:referenceToOwnershipOfDataSet/common:shortDescription[@xml:lang='en' or not(@xml:lang)]");

            XPathExpression xpLCModelResultingProcess1UUID =    xpath.compile("/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:dataSetInformation/lcm:referenceToResultingProcess[1]/@refObjectId");
            XPathExpression xpLCModelResultingProcess1Version = xpath.compile("/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:dataSetInformation/lcm:referenceToResultingProcess[1]/@version");
            XPathExpression xpLCModelResultingProcess1Name =    xpath.compile("/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:dataSetInformation/lcm:referenceToResultingProcess[1]/common:shortDescription[@xml:lang='en' or not(@xml:lang)]");
            XPathExpression xpLCModelResultingProcess2UUID =    xpath.compile("/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:dataSetInformation/lcm:referenceToResultingProcess[2]/@refObjectId");
            XPathExpression xpLCModelResultingProcess2Version = xpath.compile("/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:dataSetInformation/lcm:referenceToResultingProcess[2]/@version");
            XPathExpression xpLCModelResultingProcess2Name =    xpath.compile("/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:dataSetInformation/lcm:referenceToResultingProcess[2]/common:shortDescription[@xml:lang='en' or not(@xml:lang)]");

            XPathExpression xpLCModelReferenceProcessUUID =     xpath.compile("/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:technology/lcm:processes/lcm:processInstance[@dataSetInternalID=/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:quantitativeReference/lcm:referenceToReferenceProcess/text()]/lcm:referenceToProcess/@refObjectId");
            XPathExpression xpLCModelReferenceProcessVersion =  xpath.compile("/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:technology/lcm:processes/lcm:processInstance[@dataSetInternalID=/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:quantitativeReference/lcm:referenceToReferenceProcess/text()]/lcm:referenceToProcess/@version");
            XPathExpression xpLCModelReferenceProcessName =     xpath.compile("/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:technology/lcm:processes/lcm:processInstance[@dataSetInternalID=/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:quantitativeReference/lcm:referenceToReferenceProcess/text()]/lcm:referenceToProcess/common:shortDescription[@xml:lang='en' or not(@xml:lang)]");

            XPathExpression xpLCModelProcessInstanceCount =     xpath.compile("count(/lcm:lifeCycleModelDataSet/lcm:lifeCycleModelInformation/lcm:technology/lcm:processes/lcm:processInstance)");

            for (IDatasetReference ref : files) {
                // for now, we're considering only processes
                if (DatasetType.PROCESS.equals(ref.getDatasetType())) {
                    Document doc = builder.parse(new TFileInputStream(ref.getAbsoluteFileName()));

                    String dataProviderName = (String) xpProcessDataproviderName.evaluate(doc, XPathConstants.STRING);
//                    String dataProviderUUID = (String) xpProcessDataproviderUUID.evaluate(doc, XPathConstants.STRING);
                    String processGeoCode = (String) xpProcessGeoCode.evaluate(doc, XPathConstants.STRING);
                    String processType = (String) xpProcessProcessType.evaluate(doc, XPathConstants.STRING);
                    String dataCutOffAndCompletenessPrinciples = (String) xpProcessDataCutOffAndCompletenessPrinciples.evaluate(doc, XPathConstants.STRING);
                    String deviationsFromLCIMethodApproaches = (String) xpProcessDeviationsFromLCIMethodApproaches.evaluate(doc, XPathConstants.STRING);

                    ProcessSummaryDTO summaryEntry = new ProcessSummaryDTO();
                    summaryEntry.uuid = ref.getUuid();
                    summaryEntry.version = ref.getVersion();
                    summaryEntry.name = ref.getName();
                    summaryEntry.geo = processGeoCode;
//                    summaryEntry.owner = (StringUtils.isNotEmpty(dataProviderUUID) ? dataProviderName + "  (" + dataProviderUUID + ")" : dataProviderName);
                    summaryEntry.owner = dataProviderName;
                    summaryEntry.processType = processType;
                    summaryEntry.dataCutOffAndCompletenessPrinciples = StringUtils.replaceEachRepeatedly(dataCutOffAndCompletenessPrinciples, new String[]{"\n", "\t", "  "}, new String[]{" ", " ", " "});
                    summaryEntry.deviationsFromLCIMethodApproaches = StringUtils.replaceEachRepeatedly(deviationsFromLCIMethodApproaches, new String[]{"\n", "\t", "  "}, new String[]{" ", " ", " "});

                    SummaryExtractor.this.summaryProcesses.add(summaryEntry);

                } else if (DatasetType.LCMODEL.equals(ref.getDatasetType())) {
                    Document doc = builder.parse(new TFileInputStream(ref.getAbsoluteFileName()));

                    String dataProviderName = (String) xpLCModelDataproviderName.evaluate(doc, XPathConstants.STRING);
                    String resultingProcess1UUID = (String) xpLCModelResultingProcess1UUID.evaluate(doc, XPathConstants.STRING);
                    String resultingProcess1Version = (String) xpLCModelResultingProcess1Version.evaluate(doc, XPathConstants.STRING);
                    String resultingProcess1Name = (String) xpLCModelResultingProcess1Name.evaluate(doc, XPathConstants.STRING);

                    String resultingProcess2UUID = (String) xpLCModelResultingProcess2UUID.evaluate(doc, XPathConstants.STRING);
                    String resultingProcess2Version = (String) xpLCModelResultingProcess2Version.evaluate(doc, XPathConstants.STRING);
                    String resultingProcess2Name = (String) xpLCModelResultingProcess2Name.evaluate(doc, XPathConstants.STRING);

                    String referenceProcessUUID = (String) xpLCModelReferenceProcessUUID.evaluate(doc, XPathConstants.STRING);
                    String referenceProcessVersion = (String) xpLCModelReferenceProcessVersion.evaluate(doc, XPathConstants.STRING);
                    String referenceProcessName = (String) xpLCModelReferenceProcessName.evaluate(doc, XPathConstants.STRING);

                    Integer processInstancesCount = null;
                    try {
                        String result = (String) xpLCModelProcessInstanceCount.evaluate(doc, XPathConstants.STRING);
                        processInstancesCount = Integer.parseInt(result);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }

                    LCModelSummaryDTO summaryEntry = new LCModelSummaryDTO();
                    summaryEntry.uuid = ref.getUuid();
                    summaryEntry.version = ref.getVersion();
                    summaryEntry.name = ref.getName();
                    summaryEntry.owner = dataProviderName;

                    summaryEntry.resultingProcess1UUID = resultingProcess1UUID;
                    summaryEntry.resultingProcess1Version = resultingProcess1Version;
                    summaryEntry.resultingProcess1Name = resultingProcess1Name;
                    summaryEntry.resultingProcess2UUID = resultingProcess2UUID;
                    summaryEntry.resultingProcess2Version = resultingProcess2Version;
                    summaryEntry.resultingProcess2Name = resultingProcess2Name;

                    summaryEntry.referenceProcessUUID = referenceProcessUUID;
                    summaryEntry.referenceProcessVersion = referenceProcessVersion;
                    summaryEntry.referenceProcessName = referenceProcessName;

                    summaryEntry.processInstancesCount = processInstancesCount;

                    log.debug("{}", summaryEntry);

                    SummaryExtractor.this.summaryLCModels.add(summaryEntry);
                }
            }

            return null;
        }
    }

    public static abstract class SummaryDTO {
        public SummaryDTO() {
        }

        public String uuid = null;
        public String version = null;
        public String name = null;
    }

    public static class ProcessSummaryDTO extends SummaryDTO {
        public ProcessSummaryDTO() {
            super();
        }

        public String geo = null;
        public String owner = null;
        public String processType = null;
        public String dataCutOffAndCompletenessPrinciples = null;
        public String deviationsFromLCIMethodApproaches = null;

        @Override
        public String toString() {
            return "ProcessSummaryDTO{" +
                    "uuid='" + uuid + '\'' +
                    ", version='" + version + '\'' +
                    ", name='" + name + '\'' +
                    ", geo='" + geo + '\'' +
                    ", owner='" + owner + '\'' +
                    ", processType='" + processType + '\'' +
                    ", dataCutOffAndCompletenessPrinciples='" + dataCutOffAndCompletenessPrinciples + '\'' +
                    ", deviationsFromLCIMethodApproaches='" + deviationsFromLCIMethodApproaches + '\'' +
                    '}';
        }
    }

    public static class LCModelSummaryDTO extends SummaryDTO {
        public LCModelSummaryDTO() {
            super();
        }

        public String owner = null;
        public String resultingProcess1UUID = null;
        public String resultingProcess1Version = null;
        public String resultingProcess1Name = null;
        public String resultingProcess2UUID = null;
        public String resultingProcess2Version = null;
        public String resultingProcess2Name = null;
        public String referenceProcessUUID = null;
        public String referenceProcessVersion = null;
        public String referenceProcessName = null;

        public Integer processInstancesCount = null;

        @Override
        public String toString() {
            return "LCModelSummaryDTO{" +
                    "uuid='" + uuid + '\'' +
                    ", version='" + version + '\'' +
                    ", name='" + name + '\'' +
                    ", resultingProcess1UUID='" + resultingProcess1UUID + '\'' +
                    ", resultingProcess1Version='" + resultingProcess1Version + '\'' +
                    ", resultingProcess1Name='" + resultingProcess1Name + '\'' +
                    ", resultingProcess2UUID='" + resultingProcess2UUID + '\'' +
                    ", resultingProcess2Version='" + resultingProcess2Version + '\'' +
                    ", resultingProcess2Name='" + resultingProcess2Name + '\'' +
                    ", referenceProcessUUID='" + referenceProcessUUID + '\'' +
                    ", referenceProcessVersion='" + referenceProcessVersion + '\'' +
                    ", referenceProcessName='" + referenceProcessName + '\'' +
                    ", processInstancesCount='" + processInstancesCount + '\'' +
                    '}';
        }
    }

    protected void updateStatusValidating() {
        updateStatus("Processing for datasets summary...");
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy