All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.scop.Astral Maven / Gradle / Ivy

/**
 * BioJava development code
 *
 * This code may be freely distributed and modified under the terms of the GNU Lesser General Public Licence. This
 * should be distributed with the code. If you do not have a copy, see:
 *
 * http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual authors. These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims, or to join the biojava-l mailing list, visit the home page
 * at:
 *
 * http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.scop;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.lang.ref.SoftReference;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;


/**
 * Provides programmatic access to ASTRAL representative sets. See the paper by Chandonia et. al. for more information. Example:
 *
 * 
 * Set<String> astralSet = Astral.getRepresentatives(Astral.AstralSet.NINETY_FIVE_175B);
 * 
* * This class uses a multiton pattern with soft references for caching. In short: the first time you call the above, it * will fetch the data from ASTRAL; the second time will (probably) not have to; and the instances can still be * garbage-collected if necessary (meaning they don't require heap memory). * * @author dmyerstu * @since 3.0.6 */ public class Astral { /** * An ASTRAL sequence-identity cutoff with an identifier such as: * *
	 * 1.75A_95
	 * 
* * Also contains a URL pointing to a FASTA file containing the representatives. Every character before the first * whitespace character of each header in the FASTA file is expected to be a representative's name. * * @author dmyersturnbull * */ public static enum AstralSet { FORTY_175("1.75_40", "http://scop.berkeley.edu/downloads/scopseq-1.75/astral-scopdom-seqres-gd-sel-gs-bib-40-1.75.fa"), NINETY_FIVE_175("1.75_95", "http://scop.berkeley.edu/downloads/scopseq-1.75/astral-scopdom-seqres-gd-sel-gs-bib-95-1.75.fa"), FORTY_175A("1.75A_40", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-40-2.01.fa"), NINETY_FIVE_175A("1.75A_95","http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-95-2.01.fa"), FORTY_175B("1.75B_40", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-40-2.02.fa"), NINETY_FIVE_175B("1.75B_95", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-95-2.02.fa"), FORTY_201("2.01_40", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-40-2.01.fa"), NINETY_FIVE_201("2.01_95", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-95-2.01.fa"), FORTY_202("2.02_40", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-40-2.02.fa"), NINETY_FIVE_202("2.02_95", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-95-2.02.fa"), FORTY_203("2.03_40", "http://scop.berkeley.edu/downloads/scopeseq-2.03/astral-scopedom-seqres-gd-sel-gs-bib-40-2.03.fa"), NINETY_FIVE_203("2.03_95", "http://scop.berkeley.edu/downloads/scopeseq-2.03/astral-scopedom-seqres-gd-sel-gs-bib-95-2.03.fa"); private String id; private String url; public static AstralSet parse(String str) { for (AstralSet c : AstralSet.class.getEnumConstants()) { if (c.getId().equals(str)) return c; } throw new IllegalArgumentException("No ASTRAL set with id " + str); } AstralSet(String id, String url) { this.url = url; this.id = id; } public String getId() { return id; } public String getUrl() { return url; } @Override public String toString() { return id; } } private static Map> instances = new HashMap>(); private static final Logger logger = LoggerFactory.getLogger(Astral.class); private Set names; private LinkedHashMap failedLines; /** * Get a list of representatives' names for the specified ASTRAL cutoff. */ public static Set getRepresentatives(AstralSet cutoff) { if (instances.containsKey(cutoff.getId()) && instances.get(cutoff.getId()).get() != null) { return instances.get(cutoff.getId()).get().getNames(); } Astral astral = new Astral(cutoff); instances.put(cutoff.getId(), new SoftReference(astral)); return astral.getNames(); } /** * Get a list of representatives' names for the specified ASTRAL cutoff. * @param id An ASTRAL Id, such as 1.75A_95. */ public static Set getRepresentatives(String id) { return getRepresentatives(AstralSet.parse(id)); } /** * Constructs a new Astral object. Generally, client code should prefer calling * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not * included in {@link #Astral(AstralSet)} is required. * * @param cutoff * The ASTRAL sequence-identity cutoff required * @throws RuntimeException * If the Astral set could not be parsed or accessed for any reason */ public Astral(AstralSet cutoff) { URL url; try { url = new URL(cutoff.getUrl()); } catch (MalformedURLException e) { throw new RuntimeException("The URL was invalid!", e); } Reader reader; try { reader = new InputStreamReader(url.openStream()); } catch (IOException e) { throw new RuntimeException("Couldn't open stream to URL " + url, e); } init(reader); } /** * Constructs a new Astral object. Generally, client code should prefer calling * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not * included in {@link #Astral(AstralSet)} is required. * * @throws RuntimeException * If the Astral set could not be parsed or accessed for any reason */ public Astral(String id, URL url) { Reader reader; try { reader = new InputStreamReader(url.openStream()); } catch (IOException e) { throw new RuntimeException("Couldn't open stream to URL " + url, e); } init(reader); } /** * Constructs a new Astral object. Generally, client code should prefer calling * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not * included in {@link #Astral(AstralSet)} is required. * * @throws RuntimeException * If the Astral set could not be parsed or accessed for any reason */ public Astral(String id, Reader reader) { init(reader); } /** * @return The names of representatives in this ASTRAL set. */ public Set getNames() { return names; } /** * Gets a map describing lines read in the file that weren't understood. * @return A LinkedHashMap mapping line numbers of failures to the lines themselves */ public LinkedHashMap getFailedLines() { return failedLines; } /** * Parses the FASTA file opened by reader. */ private void init(Reader reader) { names = new TreeSet(); failedLines = new LinkedHashMap(); BufferedReader br = null; try { br = new BufferedReader(reader); logger.info("Reading ASTRAL file..."); String line = ""; int i = 0; while ((line = br.readLine()) != null) { if (line.startsWith(">")) { try { String scopId = line.split("\\s")[0].substring(1); names.add(scopId); if (i % 1000 == 0) { logger.debug("Reading ASTRAL line for " + scopId); } i++; } catch (RuntimeException e) { failedLines.put(i, line); logger.warn("Couldn't read line " + line, e); } } } br.close(); } catch (IOException e) { throw new RuntimeException("Couldn't read the input stream ", e); } finally { if (br != null) { try { br.close(); } catch (IOException e) { logger.warn("Could not close stream", e); } } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy