
org.biojava.nbio.structure.rcsb.GetRepresentatives Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.rcsb;
import org.biojava.nbio.structure.align.client.JFatCatClient;
import org.biojava.nbio.structure.align.client.StructureName;
import org.biojava.nbio.structure.align.util.HTTPConnectionTools;
import org.biojava.nbio.structure.align.xml.RepresentativeXMLConverter;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
/**
* TODO Move this to {@link Representatives}.
*/
public class GetRepresentatives {
private static String clusterUrl = "http://www.rcsb.org/pdb/rest/representatives?cluster=";
private static String allUrl = "http://www.rcsb.org/pdb/rest/getCurrent/";
// available sequence clusters
private static List seqIdentities = Arrays.asList(30, 40, 50, 70, 90, 95, 100);
/**
* Returns a representative set of PDB protein chains at the specified sequence
* identity cutoff. See http://www.pdb.org/pdb/statistics/clusterStatistics.do
* for more information.
* @param sequenceIdentity sequence identity threshold
* @return PdbChainKey set of representatives
*/
public static SortedSet getRepresentatives(int sequenceIdentity) {
SortedSet representatives = new TreeSet();
if (!seqIdentities.contains(sequenceIdentity)) {
System.err.println("Error: representative chains are not available for %sequence identity: "
+ sequenceIdentity);
return representatives;
}
try {
URL u = new URL(clusterUrl + sequenceIdentity);
InputStream stream = HTTPConnectionTools.getInputStream(u, 60000);
String xml = null;
if (stream != null) {
xml = JFatCatClient.convertStreamToString(stream);
SortedSet reps = RepresentativeXMLConverter.fromXML(xml);
for (String s : reps) {
StructureName k = new StructureName(s);
representatives.add(k);
}
}
} catch (Exception e) {
e.printStackTrace();
}
return representatives;
}
/**
* Returns the current list of all PDB IDs.
* @return PdbChainKey set of all PDB IDs.
*/
public static SortedSet getAll() {
SortedSet representatives = new TreeSet();
try {
URL u = new URL(allUrl);
InputStream stream = HTTPConnectionTools.getInputStream(u, 60000);
if (stream != null) {
BufferedReader reader = new BufferedReader(
new InputStreamReader(stream));
String line = null;
while ((line = reader.readLine()) != null) {
int index = line.lastIndexOf("structureId=");
if (index > 0) {
representatives.add(line.substring(index + 13, index + 17));
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
return representatives;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy