All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.alignment.blast.BLAST_Database Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq.alignment.blast;

import java.io.File;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.hfg.bio.seq.BioSequenceType;
import com.hfg.util.Executor;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.xml.XMLName;
import com.hfg.xml.XMLTag;


//==============================================================================
/**
 Container for a BLAST database.

 @author J. Alex Taylor, hairyfatguy.com
 */
//==============================================================================
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//==============================================================================

public class BLAST_Database extends File
{
   private BioSequenceType mType;

   protected static XMLName XML_NAME       = new XMLName("BLAST_Database");
   protected static XMLName XML_NAME_ATT   = new XMLName("name");
   protected static XMLName XML_SEQ_TYPE_ATT   = new XMLName("seqType");
   protected static XMLName XML_DIR_ATT    = new XMLName("dir");
   protected static XMLName XML_DESCRIPTION_ATT = new XMLName("description");

   private static final Pattern NUM_SEQS_PATTERN = Pattern.compile("\\s+([\\d\\,]+) sequences");

   private String  mDescription;
   private Date    mCacheDate;
   private Integer mNumSequences;

   //###########################################################################
   // CONSTRUCTORS
   //###########################################################################

   //---------------------------------------------------------------------------
   public BLAST_Database(String inName)
   {
      super(inName);
   }

   //---------------------------------------------------------------------------
   public BLAST_Database(File inParentDir, String inName)
   {
      super(inParentDir, inName);
   }

   //---------------------------------------------------------------------------
   public BLAST_Database(XMLTag inXMLTag)
   {
      super(inXMLTag.getAttributeValue(XML_DIR_ATT), inXMLTag.getAttributeValue(XML_NAME_ATT));

      inXMLTag.verifyTagName(XML_NAME);

      if (! inXMLTag.hasAttribute(XML_SEQ_TYPE_ATT))
      {
         throw new RuntimeException("No " + StringUtil.singleQuote(XML_SEQ_TYPE_ATT) + " specified for BLAST database " + StringUtil.singleQuote(getName()) + "!");
      }

      setSeqType(BioSequenceType.valueOf(inXMLTag.getAttributeValue(XML_SEQ_TYPE_ATT)));

      if (inXMLTag.hasAttribute(XML_DESCRIPTION_ATT))
      {
         setDescription(inXMLTag.getAttributeValue(XML_DESCRIPTION_ATT));
      }
   }


   //###########################################################################
   // PUBLIC METHODS
   //###########################################################################

   //---------------------------------------------------------------------------
   public XMLTag toXMLTag()
   {
      XMLTag tag = new XMLTag(XML_NAME);
      tag.setAttribute(XML_NAME_ATT, getName());
      tag.setAttribute(XML_DIR_ATT, getParent());
      tag.setAttribute(XML_SEQ_TYPE_ATT, getSeqType());

      if (StringUtil.isSet(getDescription()))
      {
         tag.setAttribute(XML_DESCRIPTION_ATT, getDescription());
      }

      return tag;
   }

   //---------------------------------------------------------------------------
   public BLAST_Database setSeqType(BioSequenceType inValue)
   {
      mType = inValue;
      return this;
   }

   //---------------------------------------------------------------------------
   public BioSequenceType getSeqType()
   {
      return mType;
   }


   //---------------------------------------------------------------------------
   public BLAST_Database setDescription(String inValue)
   {
      mDescription = inValue;
      return this;
   }

   //---------------------------------------------------------------------------
   public String getDescription()
   {
      return mDescription;
   }


   //---------------------------------------------------------------------------
   public Integer getNumSequences()
   {
      if (null == mNumSequences
            || (mCacheDate != null
                && mCacheDate.getTime() < lastModified()))
      {
/* Example:
         /apps/blast/current/bin/blastdbcmd -db /apps/blast/db/swissprot -info

Database: Non-redundant UniProtKB/SwissProt sequences
	463,486 sequences; 173,912,377 total residues

Date: May 12, 2016  10:23 AM	Longest sequence: 35,213 residues

Volumes:
	/apps/blast/db/swissprot.00
*/
         File exe = new File(BLAST.getDefaultSettings().getExecutableDir(), "blastdbcmd");
         if (! exe.exists())
         {
            throw new RuntimeException("The BLAST executable " + StringUtil.singleQuote(exe) + " does not exist!");
         }

         StringBuilderPlus cmd = new StringBuilderPlus(exe.getAbsolutePath() + " -db " + StringUtil.singleQuote(getAbsolutePath()) + " -info");
         Executor executor = new Executor();
         executor.setCommand(cmd.toString());

         int exitStatus = executor.exec();

         Matcher m = NUM_SEQS_PATTERN.matcher(executor.getSTDOUT());
         if (m.find())
         {
            mNumSequences = Integer.parseInt(StringUtil.replaceAll(m.group(1), ",", ""));
         }

         mCacheDate = new Date();
      }

      return mNumSequences;
   }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy