org.bridgedb.rdf.BridgeDBRdfHandler Maven / Gradle / Ivy
The newest version!
// BridgeDb,
// An abstraction layer for identifier mapping services, both local and online.
//
// Copyright 2006-2009 BridgeDb developers
// Copyright 2012-2013 Christian Y. A. Brenninkmeijer
// Copyright 2012-2013 OpenPhacts
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package org.bridgedb.rdf;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import java.util.HashMap;
import java.util.Optional;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.bridgedb.DataSource;
import org.bridgedb.DataSourcePatterns;
import org.bridgedb.bio.DataSourceComparator;
import org.bridgedb.bio.Organism;
import org.bridgedb.rdf.constants.BridgeDBConstants;
import org.bridgedb.rdf.constants.DCTermsConstants;
import org.bridgedb.rdf.constants.DCatConstants;
import org.bridgedb.rdf.constants.RdfConstants;
import org.bridgedb.rdf.pairs.RdfBasedCodeMapper;
import org.bridgedb.utils.BridgeDBException;
import org.bridgedb.utils.ConfigReader;
import org.bridgedb.utils.Reporter;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.repository.Repository;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import org.eclipse.rdf4j.repository.RepositoryException;
import org.eclipse.rdf4j.repository.RepositoryResult;
import org.eclipse.rdf4j.repository.sail.SailRepository;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParserRegistry;
import org.eclipse.rdf4j.rio.RDFWriter;
import org.eclipse.rdf4j.rio.turtle.TurtleWriter;
import org.eclipse.rdf4j.sail.memory.MemoryStore;
/**
*
* @author Christian
*/
public class BridgeDBRdfHandler extends RdfBase{
static boolean initialized = false;
public static final String CONFIG_FILE_NAME = "DataSource.ttl";
private static final Logger logger = Logger.getLogger(BridgeDBRdfHandler.class);
private HashMap dataSourceRegister = new HashMap();
private HashMap uriPatternRegister = new HashMap();
private BridgeDBRdfHandler(){
}
private void doParseRdfInputStream(InputStream stream) throws BridgeDBException {
Repository repository = null;
RepositoryConnection repositoryConnection = null;
try {
repository = new SailRepository(new MemoryStore());
repository.init();
repositoryConnection = repository.getConnection();
repositoryConnection.add(stream, DEFAULT_BASE_URI, DEFAULT_FILE_FORMAT);
readAllDataSources(repositoryConnection);
readAllUriPatterns(repositoryConnection);
} catch (Exception ex) {
throw new BridgeDBException ("Error parsing RDF inputStream: " + ex.getMessage(), ex);
} finally {
try {
stream.close();
} catch (IOException ex) {
logger.error("Error closing input Stream", ex);
}
shutDown(repository, repositoryConnection);
}
}
private void readAllDataSources(RepositoryConnection repositoryConnection) throws RepositoryException, BridgeDBException {
RepositoryResult statements =
repositoryConnection.getStatements(null, RdfConstants.TYPE_URI, BridgeDBConstants.DATA_SOURCE_URI, true);
//repositoryConnection.getStatements(null, null, null, true);
while (statements.hasNext()) {
Statement statement = statements.next();
Resource dataSourceResource = statement.getSubject();
DataSource dataSource = getDataSource(repositoryConnection, dataSourceResource);
}
}
private DataSource getDataSource(RepositoryConnection repositoryConnection, Resource dataSourceResource)
throws BridgeDBException, RepositoryException {
DataSource result = dataSourceRegister.get(dataSourceResource);
if (result == null){
result = readDataSource(repositoryConnection, dataSourceResource);
dataSourceRegister.put(dataSourceResource, result);
}
return result;
}
public DataSource readDataSource(RepositoryConnection repositoryConnection, Resource dataSourceId)
throws BridgeDBException, RepositoryException{
String fullName = getSingletonString(repositoryConnection, dataSourceId, BridgeDBConstants.FULL_NAME_URI);
String systemCode = getSingletonString(repositoryConnection, dataSourceId, BridgeDBConstants.SYSTEM_CODE_URI);
DataSource.Builder builder = DataSource.register(systemCode, fullName);
String idExample = getPossibleSingletonString(repositoryConnection, dataSourceId, BridgeDBConstants.ID_EXAMPLE_URI);
if (idExample != null){
builder.idExample(idExample);
}
String mainUrl = getPossibleSingletonString(repositoryConnection, dataSourceId, BridgeDBConstants.MAIN_URL_URI);
if (mainUrl != null){
builder.mainUrl(mainUrl);
}
Value organismId = getPossibleSingleton(repositoryConnection, dataSourceId, BridgeDBConstants.ABOUT_ORGANISM_URI);
if (organismId != null){
Object organism = OrganismRdf.byRdfResource(organismId);
builder.organism(organism);
}
String primary = getPossibleSingletonString(repositoryConnection, dataSourceId, BridgeDBConstants.PRIMARY_URI);
if (primary != null){
builder.primary(Boolean.parseBoolean(primary));
}
String type = getPossibleSingletonString(repositoryConnection, dataSourceId, BridgeDBConstants.TYPE_URI);
if (type != null){
builder.type(type);
}
Value regexValue = getPossibleSingleton(repositoryConnection, dataSourceId, BridgeDBConstants.HAS_REGEX_PATTERN_URI);
Pattern regex = null;
if (regexValue != null){
regex = Pattern.compile(regexValue.stringValue());
DataSourcePatterns.registerPattern(builder.asDataSource(), regex);
} else {
regex = DataSourcePatterns.getPatterns().get(builder.asDataSource());
}
Value urlValue = getPossibleSingleton(repositoryConnection, dataSourceId, BridgeDBConstants.HAS_PRIMARY_URI_PATTERN_URI);
if (urlValue != null){
UriPattern urlPattern = getUriPattern(repositoryConnection, (Resource)urlValue,
systemCode, UriPatternType.mainUrlPattern);
builder.urlPattern(urlPattern.getUriPattern());
}
String urnBase = getPossibleSingletonString(repositoryConnection, dataSourceId, BridgeDBConstants.URN_BASE_URI);
if (urnBase != null){
builder.urnBase(urnBase);
}
Value identifiersOrgSimpleValue = getPossibleSingleton(repositoryConnection, dataSourceId, BridgeDBConstants.HAS_IDENTIFERS_ORG_PATTERN_URI);
if (identifiersOrgSimpleValue != null){
UriPattern identifiersOrgSimplePattern = getUriPattern(repositoryConnection, (Resource)identifiersOrgSimpleValue,
systemCode, UriPatternType.identifiersOrgPatternSimple);
String identifiersOrgInfo = identifiersOrgSimplePattern.getUriPattern().replace("identifiers.org","info.identifiers.org");
UriPattern identifiersOrgInfoPattern = UriPattern.register(identifiersOrgInfo, systemCode, UriPatternType.identifiersOrgPatternInfo);
builder.identifiersOrgBase(identifiersOrgSimplePattern.getUriPattern());
}
Value identifiersOrgInfoValue = getPossibleSingleton(repositoryConnection, dataSourceId, BridgeDBConstants.HAS_IDENTIFERS_ORG_INFO_PATTERN_URI);
if (identifiersOrgInfoValue != null){
UriPattern identifiersOrgInfoPattern = getUriPattern(repositoryConnection, (Resource)identifiersOrgInfoValue,
systemCode, UriPatternType.identifiersOrgPatternInfo);
String identifiersOrgSimple = identifiersOrgInfoPattern.getUriPattern().replace("info.identifiers.org","identifiers.org");
UriPattern identifiersOrgSimplePattern = UriPattern.register(identifiersOrgSimple, systemCode, UriPatternType.identifiersOrgPatternSimple);
builder.identifiersOrgBase(identifiersOrgSimplePattern.getUriPattern());
}
String alternative = getPossibleSingletonString(repositoryConnection, dataSourceId, DCTermsConstants.ALTERNATIVE_URI);
if (alternative != null){
builder.alternative(alternative);
}
String description = getPossibleSingletonString(repositoryConnection, dataSourceId, DCatConstants.DESCRIPTION_URI);
if (description != null){
builder.description(description);
}
readUriPatterns(repositoryConnection, dataSourceId, systemCode, UriPatternType.dataSourceUriPattern);
readCodeMapper (repositoryConnection, systemCode, regex);
DataSourceMetaDataProvidor.setProvidor(systemCode, DataSourceMetaDataProvidor.RDF);
return builder.asDataSource();
}
private void readCodeMapper(RepositoryConnection repositoryConnection, String systemCode, Pattern regex) throws RepositoryException, BridgeDBException {
RepositoryResult statements =
repositoryConnection.getStatements(null, BridgeDBConstants.SYSTEM_CODE_URI, SimpleValueFactory.getInstance().createLiteral(systemCode), true);
// String xrefPrefix = null;
Resource codeMapperReseource = null;
while (statements.hasNext()) {
Statement statement = statements.next();
Resource subject = statement.getSubject();
String xrefPrefix = getPossibleSingletonString(repositoryConnection, subject, BridgeDBConstants.XREF_PREFIX_URI);
if (xrefPrefix != null){
if (regex != null){
if (regex.pattern().startsWith(xrefPrefix)){
regex = Pattern.compile(regex.pattern().substring(xrefPrefix.length()));
} else if (regex.pattern().startsWith("^" + xrefPrefix)){
regex = Pattern.compile("^" + regex.pattern().substring(xrefPrefix.length()+1));
}
}
codeMapperReseource = subject;
RdfBasedCodeMapper.addXrefPrefix(systemCode, xrefPrefix);
this.readUriPatterns(repositoryConnection, codeMapperReseource, systemCode, UriPatternType.codeMapperPattern);
}
}
}
private void readUriPatterns(RepositoryConnection repositoryConnection, Resource subject, String sysCode,
UriPatternType patternType) throws BridgeDBException, RepositoryException {
RepositoryResult statements =
repositoryConnection.getStatements(subject, BridgeDBConstants.HAS_URI_PATTERN_URI, null, true);
//repositoryConnection.getStatements(null, null, null, true);
while (statements.hasNext()) {
Statement statement = statements.next();
Value uriValue = statement.getObject();
UriPattern uriPattern = getUriPattern(repositoryConnection, (Resource)uriValue, sysCode, patternType);
}
}
private UriPattern getUriPattern(RepositoryConnection repositoryConnection, Resource uriPatternResource,
String code, UriPatternType patternType) throws BridgeDBException, RepositoryException {
UriPattern result = uriPatternRegister.get(uriPatternResource);
if (result == null){
result = UriPattern.readUriPattern(repositoryConnection, uriPatternResource, code, patternType);
uriPatternRegister.put(uriPatternResource, result);
} else {
result.getSysCodes().add(code);
}
return result;
}
private void readAllUriPatterns(RepositoryConnection repositoryConnection) throws RepositoryException, BridgeDBException {
RepositoryResult statements =
repositoryConnection.getStatements(null, RdfConstants.TYPE_URI, BridgeDBConstants.URI_PATTERN_URI, true);
//repositoryConnection.getStatements(null, null, null, true);
while (statements.hasNext()) {
Statement statement = statements.next();
Resource uriPatternResource = statement.getSubject();
UriPattern uriPattern = uriPatternRegister.get(uriPatternResource);
if (uriPattern == null){
throw new BridgeDBException ("Found an unused "+ BridgeDBConstants.URI_PATTERN_URI + " " + uriPatternResource);
}
}
}
//Static methods
public static void parseRdfFile(File file) throws BridgeDBException{
try {
InputStream inputStream = new FileInputStream(file);
parseRdfInputStream(inputStream);
} catch (IOException ex) {
throw new BridgeDBException ("Error accessing file " + file.getAbsolutePath(), ex);
}
}
public static void parseRdfInputStream(InputStream stream) throws BridgeDBException {
BridgeDBRdfHandler handler = new BridgeDBRdfHandler();
handler.doParseRdfInputStream(stream);
UriPattern.checkRegexPatterns();
}
public static void main(String[] args) throws RepositoryException, BridgeDBException, IOException, RDFParseException, RDFHandlerException {
ConfigReader.logToConsole();
File file1 = new File ("C:\\OpenPhacts\\BridgeDb\\org.bridgedb.rdf\\resources\\DataSource.ttl");
parseRdfFile(file1);
}
public static void init() throws BridgeDBException{
if (initialized){
return;
}
InputStream stream = ConfigReader.getInputStream(CONFIG_FILE_NAME);
parseRdfInputStream(stream);
initialized = true;
Reporter.println("BridgeDBRdfHandler initialized");
}
public static void writeRdfToFile(File file) throws BridgeDBException{
TreeSet sortedDataSources = new TreeSet(new DataSourceComparator());
sortedDataSources.addAll(DataSource.getDataSources());
writeRdfToFile(file, sortedDataSources);
}
public static void writeRdfToFile(File file, SortedSet dataSources) throws BridgeDBException{
Reporter.println("Writing DataSource RDF to " + file.getAbsolutePath());
Repository repository = null;
RepositoryConnection repositoryConnection = null;
try {
repository = new SailRepository(new MemoryStore());
repository.init();
repositoryConnection = repository.getConnection();
for (DataSource dataSource: dataSources){
writeDataSource(repositoryConnection, dataSource);
}
OrganismRdf.addAll(repositoryConnection);
UriPattern.addAll(repositoryConnection);
writeRDF(repositoryConnection, file);
} catch (Exception ex) {
throw new BridgeDBException ("Error writing RDF to file:" + ex.getMessage(), ex);
} finally {
shutDown(repository, repositoryConnection);
}
}
private static void writeDataSource(RepositoryConnection repositoryConnection, DataSource dataSource) throws RepositoryException, BridgeDBException {
Resource id = asResource(dataSource);
repositoryConnection.add(id, RdfConstants.TYPE_URI, BridgeDBConstants.DATA_SOURCE_URI);
if (dataSource.getFullName() != null){
repositoryConnection.add(id, BridgeDBConstants.FULL_NAME_URI, SimpleValueFactory.getInstance().createLiteral(dataSource.getFullName()));
}
if (dataSource.getSystemCode() != null && (!dataSource.getSystemCode().trim().isEmpty())){
repositoryConnection.add(id, BridgeDBConstants.SYSTEM_CODE_URI, SimpleValueFactory.getInstance().createLiteral(dataSource.getSystemCode()));
}
if (dataSource.getMainUrl() != null){
repositoryConnection.add(id, BridgeDBConstants.MAIN_URL_URI, SimpleValueFactory.getInstance().createLiteral(dataSource.getMainUrl()));
}
if (dataSource.getExample() != null && dataSource.getExample().getId() != null){
repositoryConnection.add(id, BridgeDBConstants.ID_EXAMPLE_URI, SimpleValueFactory.getInstance().createLiteral(dataSource.getExample().getId()));
}
repositoryConnection.add(id, BridgeDBConstants.PRIMARY_URI, SimpleValueFactory.getInstance().createLiteral(dataSource.isPrimary()));
if (dataSource.getType() != null){
repositoryConnection.add(id, BridgeDBConstants.TYPE_URI, SimpleValueFactory.getInstance().createLiteral(dataSource.getType()));
}
Pattern regex = DataSourcePatterns.getPatterns().get(dataSource);
String url = dataSource.getKnownUrl("$id");
UriPattern urlPattern = UriPattern.byPattern(url);
if (urlPattern != null){
repositoryConnection.add(id, BridgeDBConstants.HAS_PRIMARY_URI_PATTERN_URI, urlPattern.getResourceId());
}
String identifersOrgSimple = dataSource.getIdentifiersOrgUri("$id");
UriPattern identifersOrgSimplePattern = UriPattern.byPattern(identifersOrgSimple);
if (identifersOrgSimplePattern != null){
repositoryConnection.add(id, BridgeDBConstants.HAS_IDENTIFERS_ORG_PATTERN_URI, identifersOrgSimplePattern.getResourceId());
String identifersOrgInfo = identifersOrgSimple.replace("identifiers.org","info.identifiers.org");
UriPattern identifersOrgInfoPattern = UriPattern.byPattern(identifersOrgInfo);
if (identifersOrgInfoPattern != null){
repositoryConnection.add(id, BridgeDBConstants.HAS_IDENTIFERS_ORG_INFO_PATTERN_URI, identifersOrgInfoPattern.getResourceId());
}
}
if (dataSource.getOrganism() != null){
Organism organism = (Organism)dataSource.getOrganism();
repositoryConnection.add(id, BridgeDBConstants.ABOUT_ORGANISM_URI, OrganismRdf.getResourceId(organism));
}
Pattern pattern = DataSourcePatterns.getPatterns().get(dataSource);
if (pattern != null && !pattern.toString().isEmpty()){
Value patternValue = SimpleValueFactory.getInstance().createLiteral(pattern.toString());
repositoryConnection.add(id, BridgeDBConstants.HAS_REGEX_PATTERN_URI, patternValue);
}
if (dataSource.getAlternative() != null){
repositoryConnection.add(id, DCTermsConstants.ALTERNATIVE_URI, SimpleValueFactory.getInstance().createLiteral(dataSource.getAlternative()));
}
if (dataSource.getDescription() != null){
repositoryConnection.add(id, DCatConstants.DESCRIPTION_URI, SimpleValueFactory.getInstance().createLiteral(dataSource.getDescription()));
}
SortedSet sortedPatterns = UriPattern.byCodeAndType(dataSource.getSystemCode(), UriPatternType.dataSourceUriPattern);
if (sortedPatterns != null){
for (UriPattern uriPattern:sortedPatterns){
repositoryConnection.add(id, BridgeDBConstants.HAS_URI_PATTERN_URI, uriPattern.getResourceId());
}
}
writeCodeMapper(repositoryConnection, dataSource);
}
private static void writeCodeMapper(RepositoryConnection repositoryConnection, DataSource dataSource) throws RepositoryException {
String xrefPrefix = RdfBasedCodeMapper.getXrefPrefix(dataSource.getSystemCode());
if (xrefPrefix == null){
return;
}
Resource id = asCodeMapperResource(dataSource);
repositoryConnection.add(id, RdfConstants.TYPE_URI, BridgeDBConstants.CODE_MAPPER_URI);
repositoryConnection.add(id, BridgeDBConstants.SYSTEM_CODE_URI, SimpleValueFactory.getInstance().createLiteral(dataSource.getSystemCode()));
Value prefixValue = SimpleValueFactory.getInstance().createLiteral(xrefPrefix);
repositoryConnection.add(id, BridgeDBConstants.XREF_PREFIX_URI, prefixValue);
SortedSet sortedPatterns = UriPattern.byCodeAndType(dataSource.getSystemCode(), UriPatternType.codeMapperPattern);
if (sortedPatterns != null){
for (UriPattern pattern:sortedPatterns){
repositoryConnection.add(id, BridgeDBConstants.HAS_URI_PATTERN_URI, pattern.getResourceId());
}
}
}
private static void writeRDF(RepositoryConnection repositoryConnection, File file)
throws IOException, RDFHandlerException, RepositoryException{
Writer writer = new FileWriter (file);
TurtleWriter turtleWriter = new TurtleWriter(writer);
writeRDF(repositoryConnection, turtleWriter);
writer.close();
}
private static void writeRDF(RepositoryConnection repositoryConnection, RDFWriter rdfWriter)
throws IOException, RDFHandlerException, RepositoryException{
rdfWriter.handleNamespace(BridgeDBConstants.PREFIX_NAME, BridgeDBConstants.PREFIX);
rdfWriter.handleNamespace(DCatConstants.PREFIX_NAME, DCatConstants.voidns);
rdfWriter.handleNamespace(DCTermsConstants.PREFIX_NAME, DCTermsConstants.voidns);
rdfWriter.handleNamespace("", DEFAULT_BASE_URI);
rdfWriter.startRDF();
RepositoryResult statements =
repositoryConnection.getStatements(null, null, null, true);
while (statements.hasNext()) {
Statement statement = statements.next();
rdfWriter.handleStatement(statement);
}
rdfWriter.endRDF();
}
private static RDFFormat getFormat(File file){
String fileName = file.getName();
if (fileName.endsWith(".n3")){
fileName = "try.ttl";
}
RDFParserRegistry reg = RDFParserRegistry.getInstance();
Optional fileFormat = reg.getFileFormatForFileName(fileName);
if (fileFormat == null){
//added bridgeDB/OPS specific extension here if required.
logger.warn("OpenRDF does not know the RDF Format for " + fileName);
logger.warn("Using the default format " + DEFAULT_FILE_FORMAT);
return DEFAULT_FILE_FORMAT;
} else {
return fileFormat.get();
}
}
protected static Resource asResource(DataSource dataSource) {
if (dataSource.getFullName() == null){
return SimpleValueFactory.getInstance().createIRI(BridgeDBConstants.DATA_SOURCE1 + "_bysysCode_" + scrub(dataSource.getSystemCode()));
} else {
return SimpleValueFactory.getInstance().createIRI(BridgeDBConstants.DATA_SOURCE1 + "_" + scrub(dataSource.getFullName()));
}
}
protected static Resource asCodeMapperResource(DataSource dataSource) {
if (dataSource.getFullName() == null){
return SimpleValueFactory.getInstance().createIRI(BridgeDBConstants.CODE_MAPPER1 + "_bysysCode_" + scrub(dataSource.getSystemCode()));
} else {
return SimpleValueFactory.getInstance().createIRI(BridgeDBConstants.CODE_MAPPER1 + "_" + scrub(dataSource.getFullName()));
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy