All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bridgedb.uri.loader.RdfParser Maven / Gradle / Ivy

There is a newer version: 3.1.3
Show newest version
// BridgeDb,
// An abstraction layer for identifier mapping services, both local and online.
//
// Copyright 2006-2009  BridgeDb developers
// Copyright 2012-2013  Christian Y. A. Brenninkmeijer
// Copyright 2012-2013  OpenPhacts
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package org.bridgedb.uri.loader;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;

import org.apache.log4j.Logger;
import org.bridgedb.uri.loader.transative.TransativeConfig;
import org.bridgedb.utils.BridgeDBException;
import org.eclipse.rdf4j.common.exception.RDF4JException;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandler;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.RDFParserFactory;
import org.eclipse.rdf4j.rio.RDFParserRegistry;
import org.eclipse.rdf4j.rio.turtle.TurtleParser;

public class RdfParser {
    
    private final RDFHandler handler;
    public static RDFParser DEFAULT_PARSER = new TurtleParser();
    private static final String GET_FORMAT_FROM_ADDRESS = null;
    
    public RdfParser(RDFHandler handler){
        this.handler = handler;
    }
    
    static final Logger logger = Logger.getLogger(RdfParser.class);
     
    public void parse(String baseURI, File file) throws BridgeDBException{
        parse(baseURI, file, GET_FORMAT_FROM_ADDRESS);
    }
    
    public void parse(String baseURI, File file, String rdfFormatName) throws BridgeDBException{
        logger.info("Parsing: " + file.getAbsolutePath());
        FileReader reader = null;
        try {
            reader = new FileReader(file);
            RDFParser parser = getParser(file.getName(), rdfFormatName);
            parser.setRDFHandler(handler);
            parser.setParseErrorListener(new LinksetParserErrorListener());
            parser.parse (reader, baseURI);
        } catch (IOException ex) {
            throw new BridgeDBException("Error reading " + file.getAbsolutePath() + " " + ex.getMessage(), ex);
        } catch (RDF4JException ex) {
            throw new BridgeDBException("Error parsing " + file.getAbsolutePath() + " " + ex.getMessage(), ex);
        } finally {
            try {
                if (reader != null){
                    reader.close();
                }
            } catch (IOException ex) {
                throw new BridgeDBException("Error closing inputStream ", ex);
            }
        }
    }
    
    public void parse(String uri) throws BridgeDBException {
        parse(uri, GET_FORMAT_FROM_ADDRESS);        
    }
    
    public void parse(String uri, String rdfFormatName) throws BridgeDBException {
        InputStream stream = getInputStream(uri);
        parse(stream, uri, rdfFormatName);
    }

    public void parse(InputStream stream, String mappingSource, String rdfFormatName) throws BridgeDBException {
        logger.info("Parsing: " + mappingSource);
        try {
            RDFParser parser = getParser(mappingSource, rdfFormatName);
            parser.setRDFHandler(handler);
            parser.setParseErrorListener(new LinksetParserErrorListener());
            parser.parse (stream, mappingSource);
        } catch (IOException ex) {
            throw new BridgeDBException("Error reading " + mappingSource + " " + ex.getMessage(), ex);
        } catch (RDF4JException ex) {
            throw new BridgeDBException("Error parsing " + mappingSource + " " + ex.getMessage(), ex);
        } finally {
            try {
                stream.close();
            } catch (IOException ex) {
                throw new BridgeDBException("Error closing inputStream ", ex);
            }
        }
    }

    public InputStream getInputStream(String uri) throws BridgeDBException {
        URL url;
        try {
            url = new URL(uri);
        } catch (MalformedURLException ex) {
            throw new BridgeDBException ("Unable to convert String to Uri:" + uri, ex);
        }
        InputStream inputStream;
        try {
            return url.openStream();
        } catch (IOException ex) {
            throw new BridgeDBException ("Unable to convert String to Uri:" + uri, ex);
        }
    }

    public static IRI fileToURL(File file) throws BridgeDBException{
        String baseURI = TransativeConfig.getTransitiveBaseUri();
        if (baseURI == null || baseURI.isEmpty()){
            return fileToURI(file); 
        }
        if (file.getParent().equals(TransativeConfig.getTransativeDirectory())){
            return SimpleValueFactory.getInstance().createIRI(baseURI + file.getName());
        } else {
            return RdfParser.fileToURI(file);
        }
    }
    
    private static IRI fileToURI(File file) throws BridgeDBException{
        try {
            String uri = file.toURI().toURL().toExternalForm();
            return SimpleValueFactory.getInstance().createIRI(uri);
        } catch (MalformedURLException ex) {
            throw new BridgeDBException("Unable to convert file to URI", ex);
        }
    }

    public static RDFParser getParser(String address, String rdfFormatName) throws BridgeDBException{
        RDFParserRegistry reg = RDFParserRegistry.getInstance();
        RDFFormat format = null;
        if (rdfFormatName == null || rdfFormatName.isEmpty()){
            if (address.endsWith(".gz")){
                address = address.substring(0, address.length()-3);
            }
            if (address.endsWith(".n3")){
                address = "try.ttl";
            }
            Optional fileFormat = reg.getFileFormatForFileName(address);
            if (fileFormat.isEmpty()){
                //added bridgeDB/OPS specific extension here if required.           
                logger.warn("OpenRDF does not know the RDF Format for " + address);
                logger.warn("Using the default format " + DEFAULT_PARSER);
                return DEFAULT_PARSER;
            }
            format = fileFormat.get();
        } else {
            List rdfFormats = new ArrayList<>();
            rdfFormats.add(RDFFormat.N3);
            rdfFormats.add(RDFFormat.NQUADS);
            rdfFormats.add(RDFFormat.NTRIPLES);
            rdfFormats.add(RDFFormat.JSONLD);
            rdfFormats.add(RDFFormat.RDFA);
            rdfFormats.add(RDFFormat.RDFJSON);
            rdfFormats.add(RDFFormat.RDFXML);
            rdfFormats.add(RDFFormat.TRIG);
            rdfFormats.add(RDFFormat.TRIX);
            rdfFormats.add(RDFFormat.TURTLE);
            for (RDFFormat rdfFormat: rdfFormats){
                if (rdfFormat.getName().equalsIgnoreCase(rdfFormatName)){
                    format = rdfFormat;
                }
                if (format == null){
                    throw new BridgeDBException("No RdfFormat with name " + rdfFormatName + " known");
                }
            }
        }
        Optional factory = reg.get(format);
        return factory.get().getParser();
    }

 }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy