
org.biojava.nbio.structure.symmetry.utils.BlastClustReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
The newest version!
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.symmetry.utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.net.URL;
import java.util.*;
public class BlastClustReader implements Serializable {
private static final long serialVersionUID = 1L;
private static final Logger logger = LoggerFactory.getLogger(BlastClustReader.class);
private int sequenceIdentity = 0;
private List> clusters = new ArrayList<>();
// https://cdn.rcsb.org/resources/sequence/clusters/bc-95.out
private static final String coreUrl = "https://cdn.rcsb.org/resources/sequence/clusters/";
private static final List seqIdentities = Arrays.asList(30, 40, 50, 70, 90, 95, 100);
public BlastClustReader(int sequenceIdentity) {
this.sequenceIdentity = sequenceIdentity;
}
public List> getPdbChainIdClusters() {
loadClusters(sequenceIdentity);
return clusters;
}
public Map getRepresentatives(String pdbId) {
loadClusters(sequenceIdentity);
String pdbIdUc = pdbId.toUpperCase();
Map representatives = new LinkedHashMap<>();
for (List cluster: clusters) {
// map fist match to representative
for (String chainId: cluster) {
if (chainId.startsWith(pdbIdUc)) {
representatives.put(chainId, cluster.get(0));
break;
}
}
}
return representatives;
}
public String getRepresentativeChain(String pdbId, String chainId) {
loadClusters(sequenceIdentity);
String pdbChainId = pdbId.toUpperCase() + "." + chainId;
for (List cluster: clusters) {
if (cluster.contains(pdbChainId)) {
return cluster.get(0);
}
}
return "";
}
public int indexOf(String pdbId, String chainId) {
loadClusters(sequenceIdentity);
String pdbChainId = pdbId.toUpperCase() + "." + chainId;
for (int i = 0; i < clusters.size(); i++) {
List cluster = clusters.get(i);
if (cluster.contains(pdbChainId)) {
return i;
}
}
return -1;
}
public List> getPdbChainIdClusters(String pdbId) {
loadClusters(sequenceIdentity);
String pdbIdUpper = pdbId.toUpperCase();
List> matches = new ArrayList<>();
for (List cluster: clusters) {
for (String chainId: cluster) {
if (chainId.startsWith(pdbIdUpper)) {
matches.add(cluster);
break;
}
}
}
return matches;
}
public List> getChainIdsInEntry(String pdbId) {
loadClusters(sequenceIdentity);
List> matches = new ArrayList<>();
List match = null;
for (List cluster: clusters) {
for (String chainId: cluster) {
if (chainId.startsWith(pdbId)) {
if (match == null) {
match = new ArrayList<>();
}
match.add(chainId.substring(5));
}
}
if (match != null) {
Collections.sort(match);
matches.add(match);
match = null;
}
}
return matches;
}
private void loadClusters(int sequenceIdentity) {
// load clusters only once
if (clusters.size() > 0) {
return;
}
if (!seqIdentities.contains(sequenceIdentity)) {
logger.error("Representative chains are not available for %sequence identity: {}", sequenceIdentity);
return;
}
String urlString = coreUrl + "bc-" + sequenceIdentity + ".out";
try {
URL u = new URL(urlString);
InputStream stream = u.openStream();
if (stream != null) {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
String line = null;
while ((line = reader.readLine()) != null) {
line = line.replaceAll("_", ".");
List cluster = Arrays.asList(line.split(" "));
clusters.add(cluster);
}
reader.close();
stream.close();
} else {
throw new IOException("Got null stream for URL " + urlString);
}
} catch (IOException e) {
logger.error("Could not get sequence clusters from URL " + urlString + ". Error: " + e.getMessage());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy