com.okworx.ilcd.validation.LinkValidator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ilcd-validation Show documentation
Show all versions of ilcd-validation Show documentation
A Java library for performing technical validation of data in ILCD data format.
package com.okworx.ilcd.validation;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import net.java.truevfs.access.TFileInputStream;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import com.okworx.ilcd.validation.common.DatasetType;
import com.okworx.ilcd.validation.events.IValidationEvent;
import com.okworx.ilcd.validation.events.Severity;
import com.okworx.ilcd.validation.events.ValidationEvent;
import com.okworx.ilcd.validation.reference.DatasetReference;
import com.okworx.ilcd.validation.reference.IDatasetReference;
import com.okworx.ilcd.validation.reference.ReferenceCache;
import com.okworx.ilcd.validation.util.AbstractDatasetsTask;
import com.okworx.ilcd.validation.util.ILCDNameSpaceContext;
import com.okworx.ilcd.validation.util.PartitionedList;
import com.okworx.ilcd.validation.util.TaskResult;
import com.okworx.ilcd.validation.util.TransletCache;
/**
* Checks whether local references (links) within datasets are valid.
*
* @author oliver.kusche
* @version $Id: $Id
*/
public class LinkValidator extends AbstractReferenceObjectsAwareValidator implements IValidator {
/** Constant PARAM_IGNORE_REFS_TO_LCIAMETHODS="ignoreReferencesToLCIAMethods"
*/
public static final String PARAM_IGNORE_REFS_TO_LCIAMETHODS = "ignoreReferencesToLCIAMethods";
/** Constant PARAM_IGNORE_COMPLEMENTINGPROCESS="ignoreComplementingProcess"
*/
public static final String PARAM_IGNORE_COMPLEMENTINGPROCESS = "ignoreComplementingProcess";
/** Constant PARAM_IGNORE_INCLUDEDPROCESSES="ignoreIncludedProcesses"
*/
public static final String PARAM_IGNORE_INCLUDEDPROCESSES = "ignoreIncludedProcesses";
/** Constant PARAM_IGNORE_PRECEDINGDATASETVERSION="ignorePrecedingDatasetVersion"
*/
public static final String PARAM_IGNORE_PRECEDINGDATASETVERSION = "ignorePrecedingDatasetVersion";
/** Constant PARAM_IGNORE_REFS_WITH_REMOTE_LINKS="ignoreReferencesWithRemoteLinks"
*/
public static final String PARAM_IGNORE_REFS_WITH_REMOTE_LINKS = "ignoreReferencesWithRemoteLinks";
/** {@inheritDoc} */
@Override
public String getAspectName() {
return "Links";
}
/**
* getAspectDescription.
*
* @return a {@link java.lang.String} object.
*/
public String getAspectDescription() {
return "Checks whether local references (links) within a set datasets are valid.";
}
protected ReferenceCache referenceObjectsCache;
// the key is the referenced source's UUID
protected ConcurrentHashMap referencesToSourceWithImages = new ConcurrentHashMap<>();
// the key is the originating dataset's (source dataset's) UUID
protected ConcurrentHashMap referencesToDigitalFile = new ConcurrentHashMap<>();
// TODO refactor out link extraction routine
/**
* validate.
*
* @return boolean.
* @throws java.lang.InterruptedException if any.
*/
public boolean validate() throws InterruptedException {
super.validate();
updateStatusValidating();
this.unitsTotal = this.objectsToValidate.size();
log.debug("initializing reference objects cache...");
this.referenceObjectsCache = new ReferenceCache();
this.referenceObjectsCache.put(this.objectsToValidate);
log.debug("done - " + this.referenceObjectsCache.getLinks().size() + " objects in cache.");
boolean result = true;
try {
XPathExpression expr = setupXpathRef();
PartitionedList partList = new PartitionedList<>(this.objectsToValidate.values());
log.debug(this.unitsTotal + " total objects split into " + partList.getPartitions().size() + " chunks");
Collection> tasks = new ArrayList<>();
for (List refList : partList.getPartitions()) {
tasks.add(new LinkExtractorTask(refList, TransletCache.getInstance().getTranslet(), expr, this));
}
ExecutorService executor = Executors.newFixedThreadPool(partList.getNumThreads());
try {
List> taskResults = executor.invokeAll(tasks);
for (Future taskResult : taskResults) {
if (taskResult.get() != null) {
TaskResult res = taskResult.get();
this.eventsList.addAll(res.getValidationEvents());
this.statistics.add(res.getStatistics());
}
}
executor.shutdown();
} catch (InterruptedException e) {
executor.shutdown();
interrupted(e);
} catch (Exception e) {
log.error(e);
e.printStackTrace();
}
checkImagesReferences();
log.info(this.eventsList.getEvents().size() + " events");
updateProgress(1);
updateStatusDone();
return this.getEventsList().isPositive();
}
catch (Exception e) {
log.error(e);
e.printStackTrace();
}
return result;
}
private XPathExpression setupXpathRef() throws XPathExpressionException {
XPathFactory xpathFactory = XPathFactory.newInstance();
XPath xpath = xpathFactory.newXPath();
xpath.setNamespaceContext(new ILCDNameSpaceContext());
return xpath.compile("/*/*");
}
// this check whether any local file attachments referenced for "referenceToTechnologyFlowDiagrammOrPicture" or
// "referenceToTechnologyPictogramme" have a valid extension of either .png, .jpg or .jpeg (case insensitive)
private void checkImagesReferences() {
if (log.isDebugEnabled()) {
log.debug(this.referencesToSourceWithImages.size() + " links to images found");
log.debug("referencesToSourceWithImages: " + referencesToSourceWithImages);
log.debug("referencesToDigitalFile: " + referencesToDigitalFile);
}
for (String key : this.referencesToSourceWithImages.keySet()) {
IDatasetReference ref = this.referencesToSourceWithImages.get(key);
if (log.isDebugEnabled())
log.debug("source with image: " + key + ": " + ref);
if (this.referencesToDigitalFile.containsKey(ref.getUuid())) {
IDatasetReference digitalFileRef = this.referencesToDigitalFile.get(ref.getUuid());
String fileName = digitalFileRef.getUri().toLowerCase();
if (log.isDebugEnabled())
log.debug("examining " + fileName);
// we allow .png, .jpg, .jpeg regardless of case, otherwise we'll generate a warning
if (! (fileName.endsWith(".png") || fileName.endsWith(".jpg") || fileName.endsWith(".jpeg") ) ) {
ValidationEvent e = new ValidationEvent(LinkValidator.this.getAspectName(), Severity.WARNING, digitalFileRef, "the image should be in either PNG or JPG format");
e.setAltMessage("the image should be in either PNG or JPG format");
this.eventsList.add(e);
}
} else if (log.isDebugEnabled()){
log.debug("no referenceToDigitalFile found for key " + ref.getUuid());
}
}
}
final class LinkExtractorTask extends AbstractDatasetsTask implements Callable {
private static final String MESSAGE_INVALID_REFERENCE = "Invalid (because empty) reference, neither UUID nor URI specified. Reference to ";
private static final String MESSAGE_TO = " to ";
private static final String MESSAGE_COULD_NOT_RESOLVE_REFERENCE = "could not resolve reference ";
private final XPathExpression expr;
private final Templates templates;
LinkExtractorTask(List files, Templates templates, XPathExpression expr,
LinkValidator validator) {
this.files = files;
this.expr = expr;
this.templates = templates;
this.validator = validator;
}
public TaskResult call() throws Exception {
return new TaskResult(extract(files), this.statistics);
}
// this extracts all references from the given datasets using the extractReferences.xsl stylesheet
private Collection extract(Collection files) throws Exception {
Collection events = new ArrayList<>();
Transformer transformer = templates.newTransformer();
int count = 0;
for (IDatasetReference reference : files) {
if (Thread.currentThread().isInterrupted()) {
log.info("operation was interrupted, aborting");
updateStatusCancelled();
break;
}
int eventCount = events.size();
if (!reference.getDatasetType().equals(DatasetType.EXTERNAL_FILE))
checkReference(events, transformer, reference);
boolean success = (eventCount == events.size());
this.statistics.update(reference, success);
if (success && LinkValidator.this.reportSuccesses)
events.add(new ValidationEvent(LinkValidator.this.getAspectName(), Severity.SUCCESS, reference, ValidationEvent.SUCCESS_MESSAGE));
count = updateChunkCount(count);
}
return events;
}
// this will first extract the links and then check them against the global list of references,
// reporting every reference that is given without the corresponding object being available
private void checkReference(Collection events, Transformer transformer,
IDatasetReference reference) throws TransformerException, XPathExpressionException, IOException {
NodeList refs = extractNodeList(transformer, reference);
List links = new ArrayList<>();
boolean paramCheckReferencesWithRemoteLink = !BooleanUtils.isTrue((Boolean) this.validator.getParameter(PARAM_IGNORE_REFS_WITH_REMOTE_LINKS));
for (int i = 0; i < refs.getLength(); i++) {
Node ref = refs.item(i);
String uuid = ref.getAttributes().getNamedItem(ATTRIBUTE_REF_OBJECT_ID).getNodeValue().trim();
String uri = ref.getAttributes().getNamedItem(ATTRIBUTE_URI).getNodeValue().trim();
String origin = ref.getAttributes().getNamedItem(ATTRIBUTE_ORIGIN).getNodeValue().trim();
log.trace("{} {} {}", uuid, uri, origin);
String name = null;
try {
name = ref.getAttributes().getNamedItem(ATTRIBUTE_NAME).getNodeValue().trim();
} catch (NullPointerException e1) {
}
// skip remote links if configured
if (!paramCheckReferencesWithRemoteLink && (StringUtils.startsWithIgnoreCase(uri.trim(), URL_PREFIX_HTTP) || StringUtils.startsWithIgnoreCase(uri.trim(), URL_PREFIX_HTTPS)))
continue;
String type = ref.getAttributes().getNamedItem(ATTRIBUTE_TYPE).getNodeValue();
DatasetType dsType = null;
try {
dsType = DatasetType.fromValue(type);
} catch (Exception e) {
log.error("invalid dataset type " + type);
}
IDatasetReference dsRef = new DatasetReference(uuid, uri, dsType, null, DatasetType.EXTERNAL_FILE.equals(dsType) ? FilenameUtils.getName(uri.trim()) : null, origin);
dsRef.setName(name);
dsRef.setOriginDatasetUUID(reference.getUuid());
links.add(dsRef);
if ("referenceToTechnologyFlowDiagrammOrPicture".equals(dsRef.getOrigin())
|| "referenceToTechnologyPictogramme".equals(dsRef.getOrigin())
|| "referenceToDiagram".equals(dsRef.getOrigin())) {
if (log.isDebugEnabled())
log.debug("we've got an image: " + dsRef.getUuid());
referencesToSourceWithImages.put(dsRef.getUuid(), dsRef);
}
}
log.debug("checking " + links.size() + " links");
boolean skipReferenceObjects = BooleanUtils.isTrue((Boolean) this.validator.parameters.get(LinkValidator.PARAM_IGNORE_REFERENCE_OBJECTS));
for (IDatasetReference ref : links) {
if (!LinkValidator.this.referenceObjectsCache.contains(ref) && !ref.equals(reference)) {
// if skipReferenceObjects param is set, check whether it's listed there
if (skipReferenceObjects) {
if (((LinkValidator) this.validator).referenceElementaryFlows.containsKey(ref.getUuid()) || ((LinkValidator) this.validator).referenceObjectsOther.containsKey(ref.getUuid())) {
if (log.isDebugEnabled())
log.debug("ignoring object found in reference list: " + ref.getUuid());
continue;
}
}
// check whether we might have an external document
if (ref.getDatasetType().equals(DatasetType.EXTERNAL_FILE)) {
// store the reference in a shared map for checking proper file extensions later
// the key of the map is the UUID of the source dataset containing the reference to the ext. doc
if (ref.getUuid() != null)
LinkValidator.this.referencesToDigitalFile.put(reference.getUuid(), ref);
else if (log.isDebugEnabled())
log.debug("UUID is null: " + ref);
if (log.isDebugEnabled())
log.debug(ref.getOrigin() + " : " + ref.getShortFileName()
+ " "
+ LinkValidator.this.referenceObjectsCache.getLinks()
.containsKey(ref.getShortFileName()));
if (LinkValidator.this.referenceObjectsCache.getLinks()
.containsKey(ref.getShortFileName()))
continue;
}
StringBuilder messagePrefix = new StringBuilder(MESSAGE_COULD_NOT_RESOLVE_REFERENCE);
messagePrefix.append(ref.getOrigin());
messagePrefix.append(MESSAGE_TO);
StringBuilder messageSuffix = new StringBuilder(" (");
messageSuffix.append(ref.getName()).append(")");
if (StringUtils.isBlank(ref.getUuid()) && StringUtils.isBlank(ref.getUri())) {
messagePrefix = new StringBuilder(MESSAGE_INVALID_REFERENCE).append(ref.getDatasetType().getValue());
messageSuffix = new StringBuilder();
}
StringBuilder message = new StringBuilder();
message.append(" ");
message.append(ref.getDatasetType().getValue());
message.append(" ");
message.append(StringUtils.isNotBlank(ref.getUuid()) ? ref.getUuid() : ref.getUri());
message.insert(0, messagePrefix);
message.append(messageSuffix);
IValidationEvent event = new ValidationEvent(LinkValidator.this.getAspectName(), Severity.ERROR, reference, message.toString());
event.setMessageReference(ref);
if (ref.getDatasetType().equals(DatasetType.EXTERNAL_FILE))
ref.setName(ref.getShortFileName());
event.setAltMessage(messagePrefix.toString());
events.add(event);
}
}
}
private NodeList extractNodeList(Transformer transformer, IDatasetReference reference)
throws TransformerException, XPathExpressionException, IOException {
DOMResult transformResult = new DOMResult();
boolean paramSkipSiam = BooleanUtils.isTrue((Boolean) this.validator.getParameter(PARAM_IGNORE_REFS_TO_LCIAMETHODS));
if (paramSkipSiam) {
if (LinkValidator.this.log.isDebugEnabled())
LinkValidator.this.log.debug("setting skipSupportedImpactAssessmentMethods=true");
transformer.setParameter(PARAM_IGNORE_REFS_TO_LCIAMETHODS, Boolean.TRUE);
}
boolean paramSkipComplementingProcesses = BooleanUtils.isTrue((Boolean) this.validator.getParameter(PARAM_IGNORE_COMPLEMENTINGPROCESS));
if (paramSkipComplementingProcesses) {
if (LinkValidator.this.log.isDebugEnabled())
LinkValidator.this.log.debug("setting skipComplementingProcess=true");
transformer.setParameter(PARAM_IGNORE_COMPLEMENTINGPROCESS, Boolean.TRUE);
}
boolean paramSkipIncludedProcesses = BooleanUtils.isTrue((Boolean) this.validator.getParameter(PARAM_IGNORE_INCLUDEDPROCESSES));
if (paramSkipIncludedProcesses) {
if (LinkValidator.this.log.isDebugEnabled())
LinkValidator.this.log.debug("setting skipIncludedProcesses=true");
transformer.setParameter(PARAM_IGNORE_INCLUDEDPROCESSES, Boolean.TRUE);
}
boolean paramSkipPrecedingDatasetVersion = BooleanUtils.isTrue((Boolean) this.validator.getParameter(PARAM_IGNORE_PRECEDINGDATASETVERSION));
if (paramSkipPrecedingDatasetVersion) {
if (LinkValidator.this.log.isDebugEnabled())
LinkValidator.this.log.debug("setting skipPrecedingDatasetVersion=true");
transformer.setParameter(PARAM_IGNORE_PRECEDINGDATASETVERSION, Boolean.TRUE);
}
transformer.transform(new StreamSource(wrapInputStream(new TFileInputStream(reference.getAbsoluteFileName()))),
transformResult);
Document resultDoc = (Document) transformResult.getNode();
return (NodeList) expr.evaluate(resultDoc, XPathConstants.NODESET);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy