com.hfg.bio.seq.alignment.blast.BLAST Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq.alignment.blast;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import com.hfg.bio.seq.BioSequence;
import com.hfg.bio.seq.format.FASTA;
import com.hfg.util.BooleanUtil;
import com.hfg.util.Executor;
import com.hfg.util.OS;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
//==============================================================================
/**
Wrapper for a BLAST search.
Command-line executables are downloadable from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/
@author J. Alex Taylor, hairyfatguy.com
*/
//==============================================================================
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//==============================================================================
public class BLAST
{
private BLAST_Settings mSettings;
private static BLAST_Settings sDefaultSettings;
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public BLAST()
{
this(null);
}
//---------------------------------------------------------------------------
public BLAST(BLAST_Settings inSettings)
{
mSettings = inSettings != null ? inSettings : getDefaultSettings();
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public BLAST_Settings getSettings()
{
return mSettings;
}
//---------------------------------------------------------------------------
public BLAST_Output run(BioSequence inQuery)
throws IOException
{
preflight();
String cmd = generateCmd();
Executor executor = new Executor();
executor.setSTDIN(generateSTDIN(inQuery));
executor.setCommand(cmd);
int exitStatus = executor.exec();
BLAST_Output output = new BLAST_Output()
.setExitStatus(exitStatus)
.setStdErr(executor.getSTDERR())
.setStdOut(executor.getSTDOUT())
.setExecutedCmd(cmd);
return output;
}
//###########################################################################
// PRIVATE METHODS
//###########################################################################
//---------------------------------------------------------------------------
public static void setDefaultSettings(BLAST_Settings inValue)
{
sDefaultSettings = inValue;
}
//---------------------------------------------------------------------------
public static BLAST_Settings getDefaultSettings()
{
return (sDefaultSettings != null ? sDefaultSettings : new BLAST_Settings());
}
//---------------------------------------------------------------------------
private File getExecutable()
{
return (getSettings().getBLAST_Program() != null ? new File(getSettings().getExecutableDir(), getSettings().getBLAST_Program().name()) : null);
}
//---------------------------------------------------------------------------
private void preflight()
throws IOException
{
File exe = getExecutable();
if (null == exe)
{
throw new IOException("No BLAST program was specified!");
}
else if (! exe.exists())
{
File exeDir = exe.getParentFile();
if (exeDir != null
&& !exeDir.exists())
{
throw new IOException("The BLAST executable directory " + StringUtil.singleQuote(exeDir.getPath()) + " doesn't exist!");
}
throw new IOException("The BLAST executable " + StringUtil.singleQuote(exe.getPath()) + " doesn't exist!");
}
// Check that the specified databases have BLAST index files
List dbs = getSettings().getBLAST_Databases();
if (! CollectionUtil.hasValues(dbs))
{
throw new IOException("No BLAST databases specified for searching!");
}
else
{
for (BLAST_Database db : dbs)
{
/* TODO
if (! db.exists())
{
throw new IOException("The BLAST database " + StringUtil.singleQuote(db.getPath()) + " doesn't exist!");
}
*/
}
}
}
//---------------------------------------------------------------------------
private String generateCmd()
{
StringBuilderPlus cmd = new StringBuilderPlus(getExecutable().getPath()).setDelimiter(" ");
cmd.delimitedAppend("-db " + generateDatabaseList());
if (getSettings().getEValue() != null)
{
cmd.delimitedAppend("-evalue " + getSettings().getEValue());
}
if (getSettings().getWordSize() != null)
{
cmd.delimitedAppend("-word_size " + getSettings().getWordSize());
}
if (getSettings().getSoftMasking() != null)
{
cmd.delimitedAppend("-soft_masking " + getSettings().getSoftMasking());
}
if (getSettings().getOutputFile() != null)
{
cmd.delimitedAppend("-out " + getSettings().getOutputFile().getPath());
}
if (getSettings().getNumDescriptions() != null)
{
cmd.delimitedAppend("-num_descriptions " + getSettings().getNumDescriptions());
}
if (getSettings().getNumAlignments() != null)
{
cmd.delimitedAppend("-num_alignments " + getSettings().getNumAlignments());
}
if (BooleanUtil.valueOf(getSettings().htmlOutput()))
{
cmd.delimitedAppend("-html");
}
if (getSettings().getNumThreads() != null)
{
cmd.delimitedAppend("-num_threads " + getSettings().getNumThreads());
}
if (getSettings().getCommandLineParams() != null)
{
cmd.delimitedAppend(getSettings().getCommandLineParams());
}
return cmd.toString();
}
//---------------------------------------------------------------------------
/*
From http://www.ncbi.nlm.nih.gov/books/NBK279669/
To access a BLAST database containing spaces under Microsoft Windows it is necessary to use two sets of double-quotes,
escaping the innermost quotes with a backslash. For example, Users\joeuser\My Documents\Downloads would be accessed by:
blastdbcmd -db "\"Users\joeuser\My Documents\Downloads\mydb\"" -info
The first backslash escapes the beginning inner quote, and the backslash following “mydb” escapes the ending inner quote.
A second database can be added to this command by including it within the outer pair of quotes:
blastdbcmd -db "\"Users\joeuser\My Documents\Downloads\mydb\" myotherdb" -info
If the second database had contained a space, it would have been necessary to surround it by quotes escaped by a backslash.
Under UNIX systems (including LINUX and Mac OS X) it is preferable to use a single quote (‘) in place of the escaped double quote:
blastdbcmd -db ‘ "path with spaces/mydb" ’ -info
Multiple databases can also be listed within the single quotes, similar to the procedure described for Microsoft Windows.
*/
private String generateDatabaseList()
{
StringBuilderPlus dbList = new StringBuilderPlus().setDelimiter(" ");
boolean needOuterQuote = false;
for (BLAST_Database db : getSettings().getBLAST_Databases())
{
dbList.delimitedAppend(db.getPath().contains(" ") ? StringUtil.quote(db.getPath()) : db.getPath());
if (db.getPath().contains(" ")
&& OS.value().equals(OS.Windows))
{
needOuterQuote = true;
}
}
return (getSettings().getBLAST_Databases().size() > 1 || needOuterQuote ?
(OS.value().equals(OS.Windows) ? StringUtil.quote(dbList.toString()) : StringUtil.singleQuote(dbList.toString())) : dbList.toString());
}
//---------------------------------------------------------------------------
private InputStream generateSTDIN(BioSequence inQuery)
{
FASTA fasta = new FASTA();
return new ByteArrayInputStream(fasta.write(inQuery).getBytes());
}
}