io.uhndata.cards.vocabularies.internal.BioOntologyIndexer Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of cards-vocabularies Show documentation
There is a newer version: 0.9.25
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.uhndata.cards.vocabularies.internal;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.RepositoryException;

import org.apache.commons.io.FileUtils;
import org.apache.sling.api.SlingHttpServletRequest;
import org.apache.sling.api.SlingHttpServletResponse;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import io.uhndata.cards.vocabularies.spi.RepositoryHandler;
import io.uhndata.cards.vocabularies.spi.SourceParser;
import io.uhndata.cards.vocabularies.spi.VocabularyDescription;
import io.uhndata.cards.vocabularies.spi.VocabularyIndexException;
import io.uhndata.cards.vocabularies.spi.VocabularyIndexer;
import io.uhndata.cards.vocabularies.spi.VocabularyParserUtils;
import io.uhndata.cards.vocabularies.spi.VocabularyTermSource;

/**
 * Generic indexer for vocabularies available on the BioOntology portal.
 * BioOntology is a RESTfull server serving a large collection of vocabularies, available as OWL sources, along with
 * meta-information.
 * 
 * To be invoked, this indexer requires that:
 * 

 * the {@code source} request parameter is {@code bioontology}
 * the {@code identifier} request parameter is a valid, case-sensitive identifier of a vocabulary available in the
 * BioOntology server
 * 
 * An optional {@code version} parameter can be used to index a specific version of the target vocabulary. If not
 * specified, then the latest available version will be used.
 *
 * @version $Id: 87fcb7971d28597135a4682b0968e257bc3e4c33 $
 */
@Component(
    service = VocabularyIndexer.class,
    name = "VocabularyIndexer.bioontology")
public class BioOntologyIndexer implements VocabularyIndexer
{
    private static final Logger LOGGER = LoggerFactory.getLogger(BioOntologyIndexer.class);

    @Reference
    private VocabularyParserUtils utils;

    @Reference(target = "(component.name=RepositoryHandler.bioontology)")
    private RepositoryHandler repository;

    /**
     * Automatically injected list of all available parsers. A {@code volatile} list dynamically changes when
     * implementations are added, removed, or replaced.
     */
    @Reference
    private volatile List parsers;

    /** The vocabulary node where the indexed data must be placed. */
    private InheritableThreadLocal vocabularyNode = new InheritableThreadLocal<>();

    private InheritableThreadLocal> vocabularyIgnoreURIs = new InheritableThreadLocal<>();

    @Override
    public boolean canIndex(String source)
    {
        return "bioontology".equals(source);
    }

    @Override
    public void index(final String source, final SlingHttpServletRequest request,
        final SlingHttpServletResponse response)
        throws IOException, VocabularyIndexException
    {
        // Obtain relevant request parameters.
        String identifier = request.getParameter("identifier");
        String version = request.getParameter("version");
        String overwrite = request.getParameter("overwrite");

        // Obtain the resource of the request and adapt it to a JCR node. This must be the /Vocabularies homepage node.
        Node homepage = request.getResource().adaptTo(Node.class);
        this.vocabularyIgnoreURIs.set(new ArrayList());
        try {
            Node vocabulariesIgnore = homepage.getSession().getNode("/apps/cards/VocabulariesIgnore/" + identifier);
            NodeIterator vocabulariesIgnoreIter = vocabulariesIgnore.getNodes();
            while (vocabulariesIgnoreIter.hasNext()) {
                String ignoreTerm = vocabulariesIgnoreIter.nextNode().getProperty("value").getString();
                this.vocabularyIgnoreURIs.get().add(ignoreTerm);
            }
        } catch (RepositoryException e) {
            LOGGER.warn("Unable to get the list of vocabulary entity classes to ignore");
        }

        File temporaryFile = null;
        try {
            // Throw exceptions if mandatory parameters are not found or if homepage node cannot be found
            if (identifier == null) {
                throw new VocabularyIndexException("Mandatory [identifier] parameter not provided.");
            }

            if (homepage == null) {
                throw new VocabularyIndexException("Could not access resource of your request.");
            }

            // Delete the Vocabulary node already representing this vocabulary instance if it exists
            this.utils.clearVocabularyNode(homepage, identifier, overwrite);

            // Load the description
            VocabularyDescription description = this.repository.getVocabularyDescription(identifier, version);

            // Check that we have a known parser for this vocabulary
            SourceParser parser =
                this.parsers.stream().filter(p -> p.canParse(description.getSourceFormat())).findFirst()
                    .orElseThrow(() -> new VocabularyIndexException("No known parsers for vocabulary [" + identifier
                        + "] in format [" + description.getSourceFormat() + "]"));

            // Download the source
            temporaryFile = this.repository.downloadVocabularySource(description);

            // Create a new Vocabulary node representing this vocabulary
            this.vocabularyNode.set(OntologyIndexerUtils.createVocabularyNode(homepage, description));

            // Parse the source file and create VocabularyTerm node children
            parser.parse(temporaryFile, description, this::createVocabularyTermNode);

            /*
             * Save the JCR session and check-in nodes. If any errors occur before this step, all proposed changes
             * will not be applied and the repository will remain in its original state. Lucene indexing is
             * automatically performed by the Jackrabbit Oak repository when this is performed.
             */
            OntologyIndexerUtils.finalizeInstall(homepage, this.vocabularyNode);

            // Success response json
            this.utils.writeStatusJson(request, response, true, null);
        } catch (Exception e) {
            // If parsing fails, return an error json with the exception message
            this.utils.writeStatusJson(request, response, false, "Vocabulary indexing error: " + e.getMessage());
            LOGGER.error("Vocabulary indexing error: {}", e.getMessage(), e);
        } finally {
            // Delete temporary source file
            FileUtils.deleteQuietly(temporaryFile);
            this.vocabularyNode.remove();
        }
    }

    private boolean shouldIncludeVocabularyTermNode(VocabularyTermSource term)
    {
        String termURI = term.getURI();
        for (String ignoreTerm : this.vocabularyIgnoreURIs.get()) {
            if (termURI.startsWith(ignoreTerm)) {
                return false;
            }
        }
        return true;
    }

    private void createVocabularyTermNode(VocabularyTermSource term)
    {
        if (shouldIncludeVocabularyTermNode(term)) {
            OntologyIndexerUtils.createVocabularyTermNode(term, this.vocabularyNode);
        }
    }
}