org.biojava.nbio.aaproperties.CommandPrompt Maven / Gradle / Ivy
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.aaproperties;
import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable;
import org.biojava.nbio.aaproperties.xml.CaseFreeAminoAcidCompoundSet;
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.io.*;
import org.biojava.nbio.core.sequence.template.CompoundSet;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
public class CommandPrompt {
/**
* The main method
* @param args
* See showHelp for a list of available arguments
* @throws Exception
* To handle exception thrown by reading of XML files
*/
public static void main(String[] args) throws Exception{
run(args);
}
private static AminoAcidCompositionTable checkForValidityAndObtainAATable(String inputLocation, int propertyListSize, String aminoAcidCompositionLocation,
String elementMassLocation) throws Exception{
if(inputLocation == null) {
showHelp();
throw new Error("Please do provide location of input file.");
}
if(propertyListSize == 0){
showHelp();
throw new Error("Please at least specify a property to compute.");
}
AminoAcidCompositionTable aaTable = null;
if(aminoAcidCompositionLocation != null && elementMassLocation == null){
aaTable = PeptideProperties.obtainAminoAcidCompositionTable(new File(aminoAcidCompositionLocation));
}else if(aminoAcidCompositionLocation != null && elementMassLocation != null){
aaTable = PeptideProperties.obtainAminoAcidCompositionTable(new File(aminoAcidCompositionLocation, elementMassLocation));
}else if(aminoAcidCompositionLocation == null && elementMassLocation != null){
throw new Error("You have define the location of Element Mass XML file. Please also define the location of Amino Acid Composition XML file");
}
return aaTable;
}
private static void readInputAndGenerateOutput(String outputLocation, List propertyList, List specificList,
String delimiter, String inputLocation, AminoAcidCompositionTable aaTable, int decimalPlace) throws Exception{
PrintStream output;
if(outputLocation != null)
output = new PrintStream(new File(outputLocation));
else
output = System.out;
printHeader(output, propertyList, specificList, delimiter);
LinkedHashMap a = readInputFile(inputLocation, aaTable);
//Need for the last sequence
for(Entry entry:a.entrySet()){
compute(output, entry.getValue().getOriginalHeader(), entry.getValue().getSequenceAsString().trim(), delimiter, aaTable, propertyList, specificList,
decimalPlace);
}
output.close();
}
public static void run(String[] args) throws Exception{
/*
* Parse input arguments
*/
List propertyList = new ArrayList();
List specificList = new ArrayList();
String inputLocation = null;
String outputLocation = null;
String aminoAcidCompositionLocation = null;
String elementMassLocation = null;
String delimiter = ",";
int decimalPlace = 4;
for(int i = 0; i < args.length; i++){
if(args[i].charAt(0) != '-' || args[i].length() != 2){
showHelp();
throw new Error("Unknown option: " + args[i]);
}else{
switch(args[i].charAt(1)){
//Required
case 'i': inputLocation = args[++i]; break;
//Optional
case 'o': outputLocation = args[++i]; break;
case 'f':
i++;
if(args[i].equalsIgnoreCase("csv")) delimiter = ",";
else if(args[i].equalsIgnoreCase("tsv")) delimiter = "\t";
else throw new Error("Invalid value for -f: " + args[i] + ". Please choose either csv or tsv only.");
break;
case 'x': aminoAcidCompositionLocation = args[++i]; break;
case 'y': elementMassLocation = args[++i]; break;
case 'd': decimalPlace = Integer.parseInt(args[++i]); break;
//Properties
case 'a':
propertyList.add('1');
propertyList.add('2');
propertyList.add('3');
propertyList.add('4');
propertyList.add('5');
propertyList.add('6');
propertyList.add('7');
propertyList.add('8');
propertyList.add('9');
break;
case '1': propertyList.add('1'); break;
case '2': propertyList.add('2'); break;
case '3': propertyList.add('3'); break;
case '4': propertyList.add('4'); break;
case '5': propertyList.add('5'); break;
case '6': propertyList.add('6'); break;
case '7': propertyList.add('7'); break;
case '8': propertyList.add('8'); break;
case '9': propertyList.add('9'); break;
case '0':
propertyList.add('0');
i++;
if(args[i].length() != 1) throw new Error("Invalid value: " + args[i] + ". Amino Acid Symbol should be of single character");
specificList.add(args[i].toUpperCase().charAt(0));
break;
default:
showHelp();
throw new Error("Unknown option: " + args[i]);
}
}
}
/*
* Check for validity of input arguments
*/
AminoAcidCompositionTable aaTable = checkForValidityAndObtainAATable(inputLocation, propertyList.size(), aminoAcidCompositionLocation,
elementMassLocation);
/*
* Read input file and generate output
*/
readInputAndGenerateOutput(outputLocation, propertyList, specificList, delimiter, inputLocation, aaTable, decimalPlace);
}
private static LinkedHashMap readInputFile(String inputLocation, AminoAcidCompositionTable aaTable) throws Exception{
FileInputStream inStream = new FileInputStream(inputLocation);
CompoundSet set;
if(aaTable == null){
set = CaseFreeAminoAcidCompoundSet.getAminoAcidCompoundSet();
}else{
set = aaTable.getAminoAcidCompoundSet();
}
LinkedHashMap ret;
if ( inputLocation.toLowerCase().contains(".gb")) {
GenbankReader genbankReader = new GenbankReader(
inStream, new GenericGenbankHeaderParser(),
new ProteinSequenceCreator(set));
ret = genbankReader.process();
} else {
FastaReader fastaReader = new FastaReader(
inStream, new GenericFastaHeaderParser(),
new ProteinSequenceCreator(set));
ret = fastaReader.process();
}
return ret;
}
public enum PropertyName{MolecularWeight, Absorbance_True, Absorbance_False, ExtinctionCoefficient_True, ExtinctionCoefficient_False,
InstabilityIndex, ApliphaticIndex, AverageHydropathyValue, IsoelectricPoint, NetCharge_pH_7, A, R,
N, D, C, E, Q, G, H, I, L,
K, M, F, P, S, T, W, Y, V};
private static void printHeader(PrintStream output, List propertyList, List specificList, String delimiter) throws IOException{
int specificCount = 0;
/*
* 1 Molecular weight
* 2 Absorbance (assumed Cys reduced and assume Cys to form cystines)
* 3 Extinction coefficient (assumed Cys reduced and assume Cys to form cystines)
* 4 Instability index
* 5 Apliphatic index
* 6 Average hydropathy value
* 7 Isoelectric point
* 8 Net charge at pH 7
* 9 Composition of the 20 standard amino acid
* 0 Composition of the specific amino acid
*/
List sList = new ArrayList();
sList.add("SequenceName");
for(Character c:propertyList){
switch(c){
case '1': sList.add(PropertyName.MolecularWeight.toString()); break;
case '2': sList.add(PropertyName.Absorbance_True.toString()); sList.add(PropertyName.Absorbance_False.toString()); break;
case '3': sList.add(PropertyName.ExtinctionCoefficient_True.toString()); sList.add(PropertyName.ExtinctionCoefficient_False.toString()); break;
case '4': sList.add(PropertyName.InstabilityIndex.toString()); break;
case '5': sList.add(PropertyName.ApliphaticIndex.toString()); break;
case '6': sList.add(PropertyName.AverageHydropathyValue.toString()); break;
case '7': sList.add(PropertyName.IsoelectricPoint.toString()); break;
case '8': sList.add(PropertyName.NetCharge_pH_7.toString()); break;
case '9':
sList.add(PropertyName.A.toString()); sList.add(PropertyName.R.toString());
sList.add(PropertyName.N.toString()); sList.add(PropertyName.D.toString());
sList.add(PropertyName.C.toString()); sList.add(PropertyName.E.toString());
sList.add(PropertyName.Q.toString()); sList.add(PropertyName.G.toString());
sList.add(PropertyName.H.toString()); sList.add(PropertyName.I.toString());
sList.add(PropertyName.L.toString()); sList.add(PropertyName.K.toString());
sList.add(PropertyName.M.toString()); sList.add(PropertyName.F.toString());
sList.add(PropertyName.P.toString()); sList.add(PropertyName.S.toString());
sList.add(PropertyName.T.toString()); sList.add(PropertyName.W.toString());
sList.add(PropertyName.Y.toString()); sList.add(PropertyName.V.toString());
break;
case '0': sList.add("" + specificList.get(specificCount++)); break;
}
}
for(int i = 0; i < sList.size(); i++){
if(i != 0) output.print(delimiter);
output.print(sList.get(i));
}
output.println();
output.flush();
}
private static void compute(PrintStream output, String header, String sequence, String delimiter,
AminoAcidCompositionTable aaTable, List propertyList, List specificList, int decimalPlace) throws CompoundNotFoundException{
/*
* 1 Molecular weight
* 2 Absorbance (assumed Cys reduced and assume Cys to form cystines)
* 3 Extinction coefficient
* 4 Instability index
* 5 Apliphatic index
* 6 Average hydropathy value
* 7 Isoelectric point
* 8 Net charge at pH 7
* 9 Composition of the 20 standard amino acid
* 0 Composition of the specific amino acid
*/
ProteinSequence pSequence;
CompoundSet aaSet;
if(aaTable != null){
sequence = Utils.checkSequence(sequence, aaTable.getSymbolSet());
pSequence = new ProteinSequence(sequence, aaTable.getAminoAcidCompoundSet());
aaSet = aaTable.getAminoAcidCompoundSet();
}else{
sequence = Utils.checkSequence(sequence);
pSequence = new ProteinSequence(sequence);
aaSet = AminoAcidCompoundSet.getAminoAcidCompoundSet();
}
IPeptideProperties pp = new PeptidePropertiesImpl();
int specificCount = 0;
List dList = new ArrayList();
for(Character c:propertyList){
switch(c){
case '1':
if(aaTable == null)
dList.add(pp.getMolecularWeight(pSequence));
else
dList.add(pp.getMolecularWeight(pSequence));
break;
case '2':
dList.add(pp.getAbsorbance(pSequence, true));
dList.add(pp.getAbsorbance(pSequence, false));
break;
case '3':
dList.add(pp.getExtinctionCoefficient(pSequence, true));
dList.add(pp.getExtinctionCoefficient(pSequence, false));
break;
case '4': dList.add(pp.getInstabilityIndex(pSequence)); break;
case '5': dList.add(pp.getApliphaticIndex(pSequence)); break;
case '6': dList.add(pp.getAvgHydropathy(pSequence)); break;
case '7': dList.add(pp.getIsoelectricPoint(pSequence)); break;
case '8': dList.add(pp.getNetCharge(pSequence)); break;
case '9':
Map aaCompound2Double = pp.getAAComposition(pSequence);
//(A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V)
dList.add(aaCompound2Double.get(Constraints.A));
dList.add(aaCompound2Double.get(Constraints.R));
dList.add(aaCompound2Double.get(Constraints.N));
dList.add(aaCompound2Double.get(Constraints.D));
dList.add(aaCompound2Double.get(Constraints.C));
dList.add(aaCompound2Double.get(Constraints.E));
dList.add(aaCompound2Double.get(Constraints.Q));
dList.add(aaCompound2Double.get(Constraints.G));
dList.add(aaCompound2Double.get(Constraints.H));
dList.add(aaCompound2Double.get(Constraints.I));
dList.add(aaCompound2Double.get(Constraints.L));
dList.add(aaCompound2Double.get(Constraints.K));
dList.add(aaCompound2Double.get(Constraints.M));
dList.add(aaCompound2Double.get(Constraints.F));
dList.add(aaCompound2Double.get(Constraints.P));
dList.add(aaCompound2Double.get(Constraints.S));
dList.add(aaCompound2Double.get(Constraints.T));
dList.add(aaCompound2Double.get(Constraints.W));
dList.add(aaCompound2Double.get(Constraints.Y));
dList.add(aaCompound2Double.get(Constraints.V));
break;
case '0': dList.add(pp.getEnrichment(pSequence, aaSet.getCompoundForString("" + specificList.get(specificCount++)))); break;
}
}
output.print(header.replace(delimiter, "_"));
for(int i = 0; i < dList.size(); i++){
output.print(delimiter + Utils.roundToDecimals(dList.get(i), decimalPlace));
}
output.println();
output.flush();
}
private static void showHelp(){
System.err.println("NAME");
System.err.println("\tAn executable to generate physico-chemical properties of protein sequences.");
System.err.println();
System.err.println("EXAMPLES");
System.err.println("\tjava -jar AAProperties.jar -i test.fasta -a");
System.err.println("\t\tGenerates all possible properties.");
System.err.println();
System.err.println("\tjava -jar AAProperties.jar -i test.fasta -1 -3 -7");
System.err.println("\t\tGenerates only molecular weight, extinction coefficient and isoelectric point.");
System.err.println();
System.err.println("\tjava -jar AAProperties.jar -i test.fasta -0 A -0 N -1");
System.err.println("\t\tGenerates composition of two specific amino acid symbol and molecular weight.");
System.err.println();
System.err.println("OPTIONS");
System.err.println("\tRequired");
System.err.println("\t\t-i location of input FASTA file");
System.err.println();
System.err.println("\tOptional");
System.err.println("\t\t-o location of output file [standard output (default)]");
System.err.println("\t\t-f output format [csv (default) or tsv]");
System.err.println("\t\t-x location of Amino Acid Composition XML file for defining amino acid composition");
System.err.println("\t\t-y location of Element Mass XML file for defining mass of elements");
System.err.println("\t\t-d number of decimals (int) [4 (default)]");
System.err.println();
System.err.println("\tProvide at least one of them");
System.err.println("\t\t-a compute properties of option 1-9");
System.err.println("\t\t-1 compute molecular weight");
System.err.println("\t\t-2 compute absorbance");
System.err.println("\t\t-3 compute extinction coefficient");
System.err.println("\t\t-4 compute instability index");
System.err.println("\t\t-5 compute apliphatic index");
System.err.println("\t\t-6 compute average hydropathy value");
System.err.println("\t\t-7 compute isoelectric point");
System.err.println("\t\t-8 compute net charge at pH 7");
System.err.println("\t\t-9 compute composition of 20 standard amino acid (A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V)");
System.err.println("\t\t-0 compute composition of specific amino acid symbol");
System.err.println();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy