com.hfg.bio.seq.alignment.blast.BLAST_Database Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq.alignment.blast;
import java.io.File;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.hfg.bio.seq.BioSequenceType;
import com.hfg.util.Executor;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.xml.XMLName;
import com.hfg.xml.XMLTag;
//==============================================================================
/**
Container for a BLAST database.
@author J. Alex Taylor, hairyfatguy.com
*/
//==============================================================================
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//==============================================================================
public class BLAST_Database extends File
{
private BioSequenceType mType;
protected static XMLName XML_NAME = new XMLName("BLAST_Database");
protected static XMLName XML_NAME_ATT = new XMLName("name");
protected static XMLName XML_SEQ_TYPE_ATT = new XMLName("seqType");
protected static XMLName XML_DIR_ATT = new XMLName("dir");
protected static XMLName XML_DESCRIPTION_ATT = new XMLName("description");
private static final Pattern NUM_SEQS_PATTERN = Pattern.compile("\\s+([\\d\\,]+) sequences");
private String mDescription;
private Date mCacheDate;
private Integer mNumSequences;
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public BLAST_Database(String inName)
{
super(inName);
}
//---------------------------------------------------------------------------
public BLAST_Database(File inParentDir, String inName)
{
super(inParentDir, inName);
}
//---------------------------------------------------------------------------
public BLAST_Database(XMLTag inXMLTag)
{
super(inXMLTag.getAttributeValue(XML_DIR_ATT), inXMLTag.getAttributeValue(XML_NAME_ATT));
inXMLTag.verifyTagName(XML_NAME);
if (! inXMLTag.hasAttribute(XML_SEQ_TYPE_ATT))
{
throw new RuntimeException("No " + StringUtil.singleQuote(XML_SEQ_TYPE_ATT) + " specified for BLAST database " + StringUtil.singleQuote(getName()) + "!");
}
setSeqType(BioSequenceType.valueOf(inXMLTag.getAttributeValue(XML_SEQ_TYPE_ATT)));
if (inXMLTag.hasAttribute(XML_DESCRIPTION_ATT))
{
setDescription(inXMLTag.getAttributeValue(XML_DESCRIPTION_ATT));
}
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public XMLTag toXMLTag()
{
XMLTag tag = new XMLTag(XML_NAME);
tag.setAttribute(XML_NAME_ATT, getName());
tag.setAttribute(XML_DIR_ATT, getParent());
tag.setAttribute(XML_SEQ_TYPE_ATT, getSeqType());
if (StringUtil.isSet(getDescription()))
{
tag.setAttribute(XML_DESCRIPTION_ATT, getDescription());
}
return tag;
}
//---------------------------------------------------------------------------
public BLAST_Database setSeqType(BioSequenceType inValue)
{
mType = inValue;
return this;
}
//---------------------------------------------------------------------------
public BioSequenceType getSeqType()
{
return mType;
}
//---------------------------------------------------------------------------
public BLAST_Database setDescription(String inValue)
{
mDescription = inValue;
return this;
}
//---------------------------------------------------------------------------
public String getDescription()
{
return mDescription;
}
//---------------------------------------------------------------------------
public Integer getNumSequences()
{
if (null == mNumSequences
|| (mCacheDate != null
&& mCacheDate.getTime() < lastModified()))
{
/* Example:
/apps/blast/current/bin/blastdbcmd -db /apps/blast/db/swissprot -info
Database: Non-redundant UniProtKB/SwissProt sequences
463,486 sequences; 173,912,377 total residues
Date: May 12, 2016 10:23 AM Longest sequence: 35,213 residues
Volumes:
/apps/blast/db/swissprot.00
*/
File exe = new File(BLAST.getDefaultSettings().getExecutableDir(), "blastdbcmd");
if (! exe.exists())
{
throw new RuntimeException("The BLAST executable " + StringUtil.singleQuote(exe) + " does not exist!");
}
StringBuilderPlus cmd = new StringBuilderPlus(exe.getAbsolutePath() + " -db " + StringUtil.singleQuote(getAbsolutePath()) + " -info");
Executor executor = new Executor();
executor.setCommand(cmd.toString());
int exitStatus = executor.exec();
Matcher m = NUM_SEQS_PATTERN.matcher(executor.getSTDOUT());
if (m.find())
{
mNumSequences = Integer.parseInt(StringUtil.replaceAll(m.group(1), ",", ""));
}
mCacheDate = new Date();
}
return mNumSequences;
}
}