All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bridgedb.uri.tools.RegexUriPattern Maven / Gradle / Ivy

The newest version!
// BridgeDb,
// An abstraction layer for identifier mapping services, both local and online.
//
// Copyright 2006-2009  BridgeDb developers
// Copyright 2012-2013  Christian Y. A. Brenninkmeijer
// Copyright 2012-2013  OpenPhacts
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package org.bridgedb.uri.tools;

import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.bridgedb.DataSource;
import org.bridgedb.DataSourcePatterns;
import org.bridgedb.rdf.UriPattern;
import org.bridgedb.rdf.UriPatternType;
import org.bridgedb.rdf.pairs.RdfBasedCodeMapper;
import org.bridgedb.utils.BridgeDBException;

/**
 *
 * @author Christian
 */
public class RegexUriPattern {

    private final String prefix;
    private final String postfix;
    private final String sysCode;
    private final Pattern regex;
    
    static boolean initialized = false;
    
    private RegexUriPattern(String prefix, String postfix, String sysCode, Pattern regex) throws BridgeDBException{
        if (prefix == null || prefix.isEmpty()){
            throw new BridgeDBException ("Illegal prefixe " + prefix);
        }
        this.prefix = prefix;
        if (postfix != null){
            this.postfix = postfix;
        } else {
            this.postfix = "";
        }
        this.sysCode = sysCode;
        this.regex = regex;
    }

    /**
     * @return the prefix
     */
    public String getPrefix() {
        return prefix;
    }

    /**
     * @return the postfix
     */
    public String getPostfix() {
        return postfix;
    }

    /**
     * @return the sysCode
     */
    public String getSysCode() {
        return sysCode;
    }

    public Pattern getRegex() {
        return regex;
    }
    
    public String getUri(String id) {
        return prefix + id + postfix;
    }

    public String toString(){
        String result = getUri("$id");// + " -> " + sysCode;
        if (regex != null){
            result = result + " (" + regex.pattern() + ")";
        }
        result+= " for DataSource " + sysCode;
        return result;
    }
    
    public String getUriPattern() {
        //TODO handle regex
        return getUri("$id");
    }

    public Set mapsTo() throws BridgeDBException{
       TreeSet possibles  = new TreeSet(UriPattern.byCode(sysCode));
       UriPattern asPattern = UriPattern.byPattern(getUriPattern());
       possibles.remove(asPattern);
       return possibles;
    }
    
    private static Pattern shortenRegex(Pattern regex, String sysCode) throws BridgeDBException{
        if (regex == null){
            return null;
        }
        String xrefPrefix = RdfBasedCodeMapper.getXrefPrefix(sysCode);
        String fullPattern = regex.pattern();
        if (fullPattern.startsWith(xrefPrefix)){
            String partPattern = fullPattern.substring(xrefPrefix.length());
            return Pattern.compile(partPattern);
        } else if (fullPattern.startsWith("^" + xrefPrefix)){
            String partPattern = "^" + fullPattern.substring(1 + xrefPrefix.length());
            return Pattern.compile(partPattern);
        } else {
            throw new BridgeDBException ("Unable to convert Pattern " + regex.pattern() + " for code " + sysCode 
                    + " based on xrefprefix " + xrefPrefix);
        }
    }

    public static RegexUriPattern factory(String prefix, String postfix, String sysCode) throws BridgeDBException {
        return new RegexUriPattern(prefix, postfix, sysCode, null);
    }

    public static RegexUriPattern factory(String prefix, String postfix, String sysCode, Pattern regexPattern) throws BridgeDBException {
        return new RegexUriPattern(prefix, postfix, sysCode, regexPattern);
    }

    public static RegexUriPattern factory(UriPattern uriPattern, String sysCode) throws BridgeDBException{
        DataSource dataSource = DataSource.getExistingBySystemCode(sysCode);
        Pattern regex = DataSourcePatterns.getPatterns().get(dataSource);
        String xrefPrefix = RdfBasedCodeMapper.getXrefPrefix(sysCode);
        String prefix;
        String postfix = uriPattern.getPostfix();
        if (xrefPrefix == null){
            prefix = uriPattern.getPrefix();
        } else if (uriPattern.getType() == UriPatternType.codeMapperPattern){
            //prefix should not include the xrefPrefix
            prefix = uriPattern.getPrefix();
            regex = shortenRegex(regex, sysCode);
        } else {
            //prefix should include the xrefPrefix as regex and IDs no longer do
            prefix = uriPattern.getPrefix() + xrefPrefix;
            regex = shortenRegex(regex, sysCode);
        }
        return new RegexUriPattern(prefix, postfix, sysCode, regex);
    }
    
    public static Collection getUriPatterns() throws BridgeDBException {
        HashSet results = new HashSet();
        for (UriPattern pattern:UriPattern.getUriPatterns()){
             for (String sysCode:pattern.getSysCodes()){
                RegexUriPattern regexPattern = factory(pattern, sysCode);
                results.add(regexPattern);
            }
        }
        return results;
    }

    public static Set existingByPattern(String pattern) throws BridgeDBException {
        //todo regex in pattern
        UriPattern uriPattern = UriPattern.existingByPattern(pattern);
        return byPattern(uriPattern);
    }

    public static Set byPattern(UriPattern uriPattern) throws BridgeDBException {
        Set possibleSysCodes = uriPattern.getSysCodes();
        HashSet results = new HashSet();
        for (String possibleSysCode:possibleSysCodes){
            results.add(factory(uriPattern, possibleSysCode));
        }
        return results;
    }
 
    /*private static String extractShortName (String full) throws BridgeDBException {
        String withoutStart;
        if (full.startsWith("ftp://")){
            withoutStart = full.substring(6);
        } else if (full.startsWith("http://")){
            withoutStart = full.substring(7);
        } else if (full.startsWith("https://")){
            withoutStart = full.substring(8);
        } else {
            withoutStart = full;
        }
        if (withoutStart.startsWith("ftp.")){
            withoutStart = withoutStart.substring(4);
        }
        if (withoutStart.startsWith("rdf.")){
            withoutStart = withoutStart.substring(4);
        }
        if (withoutStart.startsWith("www.")){
            withoutStart = withoutStart.substring(4);
        } 
        if (withoutStart.indexOf("/") > 0){
            return withoutStart.substring(0, withoutStart.indexOf("/"));
        } else {
            return withoutStart;
        }
    }*/

    /*public static HashMap getUriGroups() throws BridgeDBException {
        Set patterns = UriPattern.getUriPatterns();
        HashMap results = new HashMap();
        for (UriPattern pattern:UriPattern.getUriPatterns()){
            String mid = extractShortName(pattern.getUriPattern());
            Integer count = results.get(mid);
            if (count == null){
                count = pattern.getSysCodes().size();
            } else {
                count+= pattern.getSysCodes().size();
            }
            results.put(mid, count);
         }
        return results;
    }*/

    /*public static void refreshUriPatterns() throws BridgeDBException{
       if (initialized){
            return;
        }
        UriPattern.refreshUriPatterns();
        init();
        initialized = true;
            
    }*/

    /*public static void init() throws BridgeDBException {
        Reporter.println("RegexUriPattern init");
        Set patterns = UriPattern.getUriPatterns();
        HashMap results = new HashMap();
        for (UriPattern pattern:UriPattern.getUriPatterns()){
            String mid = extractShortName(pattern.getUriPattern());
            Set byShortName = getByShortNames().get(mid);
            if (byShortName == null){
                byShortName = new HashSet();
            }
            byShortName.addAll(byPattern(pattern));
            getByShortNames().put(mid, byShortName);
        }
    }*/

    @Override
    public boolean equals(Object otherObject){
        if (!(otherObject instanceof RegexUriPattern)){
            return false;
        }
        RegexUriPattern other = (RegexUriPattern)otherObject;
        if (!this.prefix.equals(other.prefix)){
            return false;
        }
        if (!this.sysCode.equals(other.sysCode)){
            return false;
        }
        if (this.postfix == null){
            if (other.postfix != null){
                return false;
            }
        } else {
            if (!other.postfix.equals(postfix)){
                return false;
            }
        }
        if (this.regex == null){
            if (other.regex != null){
                return false;
            }
        } else {
            if (other.regex == null){
                return false;
            }
            if (!this.regex.pattern().equals(other.regex.pattern())){
                return false;
            }
        }
        return true;
    }
    
    @Override
    public int hashCode() {
        String full = sysCode + "--" + prefix;
        if (regex != null){
            full = full + regex;
        }
        if (prefix != null){
            full = full + prefix;
        }
        return full.hashCode();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy