All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.okworx.ilcd.validation.LinkValidator Maven / Gradle / Ivy

Go to download

A Java library for performing technical validation of data in ILCD data format.

There is a newer version: 2.7.2
Show newest version
package com.okworx.ilcd.validation;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;

import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import net.java.truevfs.access.TFileInputStream;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import com.okworx.ilcd.validation.common.DatasetType;
import com.okworx.ilcd.validation.events.IValidationEvent;
import com.okworx.ilcd.validation.events.Severity;
import com.okworx.ilcd.validation.events.ValidationEvent;
import com.okworx.ilcd.validation.reference.DatasetReference;
import com.okworx.ilcd.validation.reference.IDatasetReference;
import com.okworx.ilcd.validation.reference.ReferenceCache;
import com.okworx.ilcd.validation.util.AbstractDatasetsTask;
import com.okworx.ilcd.validation.util.ILCDNameSpaceContext;
import com.okworx.ilcd.validation.util.PartitionedList;
import com.okworx.ilcd.validation.util.TaskResult;
import com.okworx.ilcd.validation.util.TransletCache;

/**
 * Checks whether local references (links) within datasets are valid.
 *
 * @author oliver.kusche
 * @version $Id: $Id
 */
public class LinkValidator extends AbstractReferenceObjectsAwareValidator implements IValidator {

	/** Constant PARAM_IGNORE_REFS_TO_LCIAMETHODS="ignoreReferencesToLCIAMethods" */
	public static final String PARAM_IGNORE_REFS_TO_LCIAMETHODS = "ignoreReferencesToLCIAMethods";

	/** Constant PARAM_IGNORE_COMPLEMENTINGPROCESS="ignoreComplementingProcess" */
	public static final String PARAM_IGNORE_COMPLEMENTINGPROCESS = "ignoreComplementingProcess";

	/** Constant PARAM_IGNORE_INCLUDEDPROCESSES="ignoreIncludedProcesses" */
	public static final String PARAM_IGNORE_INCLUDEDPROCESSES = "ignoreIncludedProcesses";

	/** Constant PARAM_IGNORE_PRECEDINGDATASETVERSION="ignorePrecedingDatasetVersion" */
	public static final String PARAM_IGNORE_PRECEDINGDATASETVERSION = "ignorePrecedingDatasetVersion";

	/** Constant PARAM_IGNORE_REFS_WITH_REMOTE_LINKS="ignoreReferencesWithRemoteLinks" */
	public static final String PARAM_IGNORE_REFS_WITH_REMOTE_LINKS = "ignoreReferencesWithRemoteLinks";

	/** {@inheritDoc} */
	@Override
	public String getAspectName() {
		return "Links";
	}

	/**
	 * 

getAspectDescription.

* * @return a {@link java.lang.String} object. */ public String getAspectDescription() { return "Checks whether local references (links) within a set datasets are valid."; } protected ReferenceCache referenceObjectsCache; // the key is the referenced source's UUID protected ConcurrentHashMap referencesToSourceWithImages = new ConcurrentHashMap<>(); // the key is the originating dataset's (source dataset's) UUID protected ConcurrentHashMap referencesToDigitalFile = new ConcurrentHashMap<>(); // TODO refactor out link extraction routine /** *

validate.

* * @return boolean. * @throws java.lang.InterruptedException if any. */ public boolean validate() throws InterruptedException { super.validate(); updateStatusValidating(); this.unitsTotal = this.objectsToValidate.size(); log.debug("initializing reference objects cache..."); this.referenceObjectsCache = new ReferenceCache(); this.referenceObjectsCache.put(this.objectsToValidate); log.debug("done - " + this.referenceObjectsCache.getLinks().size() + " objects in cache."); boolean result = true; try { XPathExpression expr = setupXpathRef(); PartitionedList partList = new PartitionedList<>(this.objectsToValidate.values()); log.debug(this.unitsTotal + " total objects split into " + partList.getPartitions().size() + " chunks"); Collection> tasks = new ArrayList<>(); for (List refList : partList.getPartitions()) { tasks.add(new LinkExtractorTask(refList, TransletCache.getInstance().getTranslet(), expr, this)); } ExecutorService executor = Executors.newFixedThreadPool(partList.getNumThreads()); try { List> taskResults = executor.invokeAll(tasks); for (Future taskResult : taskResults) { if (taskResult.get() != null) { TaskResult res = taskResult.get(); this.eventsList.addAll(res.getValidationEvents()); this.statistics.add(res.getStatistics()); } } executor.shutdown(); } catch (InterruptedException e) { executor.shutdown(); interrupted(e); } catch (Exception e) { log.error(e); e.printStackTrace(); } checkImagesReferences(); log.info(this.eventsList.getEvents().size() + " events"); updateProgress(1); updateStatusDone(); return this.getEventsList().isPositive(); } catch (Exception e) { log.error(e); e.printStackTrace(); } return result; } private XPathExpression setupXpathRef() throws XPathExpressionException { XPathFactory xpathFactory = XPathFactory.newInstance(); XPath xpath = xpathFactory.newXPath(); xpath.setNamespaceContext(new ILCDNameSpaceContext()); return xpath.compile("/*/*"); } // this check whether any local file attachments referenced for "referenceToTechnologyFlowDiagrammOrPicture" or // "referenceToTechnologyPictogramme" have a valid extension of either .png, .jpg or .jpeg (case insensitive) private void checkImagesReferences() { if (log.isDebugEnabled()) { log.debug(this.referencesToSourceWithImages.size() + " links to images found"); log.debug("referencesToSourceWithImages: " + referencesToSourceWithImages); log.debug("referencesToDigitalFile: " + referencesToDigitalFile); } for (String key : this.referencesToSourceWithImages.keySet()) { IDatasetReference ref = this.referencesToSourceWithImages.get(key); if (log.isDebugEnabled()) log.debug("source with image: " + key + ": " + ref); if (this.referencesToDigitalFile.containsKey(ref.getUuid())) { IDatasetReference digitalFileRef = this.referencesToDigitalFile.get(ref.getUuid()); String fileName = digitalFileRef.getUri().toLowerCase(); if (log.isDebugEnabled()) log.debug("examining " + fileName); // we allow .png, .jpg, .jpeg regardless of case, otherwise we'll generate a warning if (! (fileName.endsWith(".png") || fileName.endsWith(".jpg") || fileName.endsWith(".jpeg") ) ) { ValidationEvent e = new ValidationEvent(LinkValidator.this.getAspectName(), Severity.WARNING, digitalFileRef, "the image should be in either PNG or JPG format"); e.setAltMessage("the image should be in either PNG or JPG format"); this.eventsList.add(e); } } else if (log.isDebugEnabled()){ log.debug("no referenceToDigitalFile found for key " + ref.getUuid()); } } } final class LinkExtractorTask extends AbstractDatasetsTask implements Callable { private static final String MESSAGE_INVALID_REFERENCE = "Invalid (because empty) reference, neither UUID nor URI specified. Reference to "; private static final String MESSAGE_TO = " to "; private static final String MESSAGE_COULD_NOT_RESOLVE_REFERENCE = "could not resolve reference "; private final XPathExpression expr; private final Templates templates; LinkExtractorTask(List files, Templates templates, XPathExpression expr, LinkValidator validator) { this.files = files; this.expr = expr; this.templates = templates; this.validator = validator; } public TaskResult call() throws Exception { return new TaskResult(extract(files), this.statistics); } // this extracts all references from the given datasets using the extractReferences.xsl stylesheet private Collection extract(Collection files) throws Exception { Collection events = new ArrayList<>(); Transformer transformer = templates.newTransformer(); int count = 0; for (IDatasetReference reference : files) { if (Thread.currentThread().isInterrupted()) { log.info("operation was interrupted, aborting"); updateStatusCancelled(); break; } int eventCount = events.size(); if (!reference.getDatasetType().equals(DatasetType.EXTERNAL_FILE)) checkReference(events, transformer, reference); boolean success = (eventCount == events.size()); this.statistics.update(reference, success); if (success && LinkValidator.this.reportSuccesses) events.add(new ValidationEvent(LinkValidator.this.getAspectName(), Severity.SUCCESS, reference, ValidationEvent.SUCCESS_MESSAGE)); count = updateChunkCount(count); } return events; } // this will first extract the links and then check them against the global list of references, // reporting every reference that is given without the corresponding object being available private void checkReference(Collection events, Transformer transformer, IDatasetReference reference) throws TransformerException, XPathExpressionException, IOException { NodeList refs = extractNodeList(transformer, reference); List links = new ArrayList<>(); boolean paramCheckReferencesWithRemoteLink = !BooleanUtils.isTrue((Boolean) this.validator.getParameter(PARAM_IGNORE_REFS_WITH_REMOTE_LINKS)); for (int i = 0; i < refs.getLength(); i++) { Node ref = refs.item(i); String uuid = ref.getAttributes().getNamedItem(ATTRIBUTE_REF_OBJECT_ID).getNodeValue().trim(); String uri = ref.getAttributes().getNamedItem(ATTRIBUTE_URI).getNodeValue().trim(); String origin = ref.getAttributes().getNamedItem(ATTRIBUTE_ORIGIN).getNodeValue().trim(); log.trace("{} {} {}", uuid, uri, origin); String name = null; try { name = ref.getAttributes().getNamedItem(ATTRIBUTE_NAME).getNodeValue().trim(); } catch (NullPointerException e1) { } // skip remote links if configured if (!paramCheckReferencesWithRemoteLink && (StringUtils.startsWithIgnoreCase(uri.trim(), URL_PREFIX_HTTP) || StringUtils.startsWithIgnoreCase(uri.trim(), URL_PREFIX_HTTPS))) continue; String type = ref.getAttributes().getNamedItem(ATTRIBUTE_TYPE).getNodeValue(); DatasetType dsType = null; try { dsType = DatasetType.fromValue(type); } catch (Exception e) { log.error("invalid dataset type " + type); } IDatasetReference dsRef = new DatasetReference(uuid, uri, dsType, null, DatasetType.EXTERNAL_FILE.equals(dsType) ? FilenameUtils.getName(uri.trim()) : null, origin); dsRef.setName(name); dsRef.setOriginDatasetUUID(reference.getUuid()); links.add(dsRef); if ("referenceToTechnologyFlowDiagrammOrPicture".equals(dsRef.getOrigin()) || "referenceToTechnologyPictogramme".equals(dsRef.getOrigin()) || "referenceToDiagram".equals(dsRef.getOrigin())) { if (log.isDebugEnabled()) log.debug("we've got an image: " + dsRef.getUuid()); referencesToSourceWithImages.put(dsRef.getUuid(), dsRef); } } log.debug("checking " + links.size() + " links"); boolean skipReferenceObjects = BooleanUtils.isTrue((Boolean) this.validator.parameters.get(LinkValidator.PARAM_IGNORE_REFERENCE_OBJECTS)); for (IDatasetReference ref : links) { if (!LinkValidator.this.referenceObjectsCache.contains(ref) && !ref.equals(reference)) { // if skipReferenceObjects param is set, check whether it's listed there if (skipReferenceObjects) { if (((LinkValidator) this.validator).referenceElementaryFlows.containsKey(ref.getUuid()) || ((LinkValidator) this.validator).referenceObjectsOther.containsKey(ref.getUuid())) { if (log.isDebugEnabled()) log.debug("ignoring object found in reference list: " + ref.getUuid()); continue; } } // check whether we might have an external document if (ref.getDatasetType().equals(DatasetType.EXTERNAL_FILE)) { // store the reference in a shared map for checking proper file extensions later // the key of the map is the UUID of the source dataset containing the reference to the ext. doc if (ref.getUuid() != null) LinkValidator.this.referencesToDigitalFile.put(reference.getUuid(), ref); else if (log.isDebugEnabled()) log.debug("UUID is null: " + ref); if (log.isDebugEnabled()) log.debug(ref.getOrigin() + " : " + ref.getShortFileName() + " " + LinkValidator.this.referenceObjectsCache.getLinks() .containsKey(ref.getShortFileName())); if (LinkValidator.this.referenceObjectsCache.getLinks() .containsKey(ref.getShortFileName())) continue; } StringBuilder messagePrefix = new StringBuilder(MESSAGE_COULD_NOT_RESOLVE_REFERENCE); messagePrefix.append(ref.getOrigin()); messagePrefix.append(MESSAGE_TO); StringBuilder messageSuffix = new StringBuilder(" ("); messageSuffix.append(ref.getName()).append(")"); if (StringUtils.isBlank(ref.getUuid()) && StringUtils.isBlank(ref.getUri())) { messagePrefix = new StringBuilder(MESSAGE_INVALID_REFERENCE).append(ref.getDatasetType().getValue()); messageSuffix = new StringBuilder(); } StringBuilder message = new StringBuilder(); message.append(" "); message.append(ref.getDatasetType().getValue()); message.append(" "); message.append(StringUtils.isNotBlank(ref.getUuid()) ? ref.getUuid() : ref.getUri()); message.insert(0, messagePrefix); message.append(messageSuffix); IValidationEvent event = new ValidationEvent(LinkValidator.this.getAspectName(), Severity.ERROR, reference, message.toString()); event.setMessageReference(ref); if (ref.getDatasetType().equals(DatasetType.EXTERNAL_FILE)) ref.setName(ref.getShortFileName()); event.setAltMessage(messagePrefix.toString()); events.add(event); } } } private NodeList extractNodeList(Transformer transformer, IDatasetReference reference) throws TransformerException, XPathExpressionException, IOException { DOMResult transformResult = new DOMResult(); boolean paramSkipSiam = BooleanUtils.isTrue((Boolean) this.validator.getParameter(PARAM_IGNORE_REFS_TO_LCIAMETHODS)); if (paramSkipSiam) { if (LinkValidator.this.log.isDebugEnabled()) LinkValidator.this.log.debug("setting skipSupportedImpactAssessmentMethods=true"); transformer.setParameter(PARAM_IGNORE_REFS_TO_LCIAMETHODS, Boolean.TRUE); } boolean paramSkipComplementingProcesses = BooleanUtils.isTrue((Boolean) this.validator.getParameter(PARAM_IGNORE_COMPLEMENTINGPROCESS)); if (paramSkipComplementingProcesses) { if (LinkValidator.this.log.isDebugEnabled()) LinkValidator.this.log.debug("setting skipComplementingProcess=true"); transformer.setParameter(PARAM_IGNORE_COMPLEMENTINGPROCESS, Boolean.TRUE); } boolean paramSkipIncludedProcesses = BooleanUtils.isTrue((Boolean) this.validator.getParameter(PARAM_IGNORE_INCLUDEDPROCESSES)); if (paramSkipIncludedProcesses) { if (LinkValidator.this.log.isDebugEnabled()) LinkValidator.this.log.debug("setting skipIncludedProcesses=true"); transformer.setParameter(PARAM_IGNORE_INCLUDEDPROCESSES, Boolean.TRUE); } boolean paramSkipPrecedingDatasetVersion = BooleanUtils.isTrue((Boolean) this.validator.getParameter(PARAM_IGNORE_PRECEDINGDATASETVERSION)); if (paramSkipPrecedingDatasetVersion) { if (LinkValidator.this.log.isDebugEnabled()) LinkValidator.this.log.debug("setting skipPrecedingDatasetVersion=true"); transformer.setParameter(PARAM_IGNORE_PRECEDINGDATASETVERSION, Boolean.TRUE); } transformer.transform(new StreamSource(wrapInputStream(new TFileInputStream(reference.getAbsoluteFileName()))), transformResult); Document resultDoc = (Document) transformResult.getNode(); return (NodeList) expr.evaluate(resultDoc, XPathConstants.NODESET); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy