com.hfg.bio.taxonomy.ncbi.NCBITaxon Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.taxonomy.ncbi;
import java.io.*;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
//------------------------------------------------------------------------------
/**
* Species class based on the NCBI taxonomy data.
* A default set of files are included as data sources. To minimize load time and
* memory the initial default data source has a few common values. If a value is
* requested that isn't found in this set, a second, more complete but not fully
* up-to-date data source is loaded and the lightweight data source is discarded.
* An additional more up-to-date data source can be manually added as an
* NCBIRemoteTaxonomyDataSource.
*
* @author J. Alex Taylor, hairyfatguy.com
*
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class NCBITaxon implements Comparable
{
private static final Logger LOGGER = Logger.getLogger(NCBITaxon.class.getPackage().getName());
// Data sources should be arranged such that smaller/lighter sources are tried first
// and only if a match is not found do we move to the next "heavier" implementation.
// Two data sources are provided by default - the first with a few common values and
// the second which is a mostly complete (but old) taxonomy. If an up-to-date taxonomy
// is desired, an NCBIRemoteTaxonomyDataSource should be added as a data source.
private static final List mDataSources = new ArrayList<>(3);
static
{
addDataSource(new CommonInternalDataSource());
addDataSource(new FullInternalDataSource());
}
// Shortcuts to some common organisms
/** Human */
public static final NCBITaxon HOMO_SAPIENS = new NCBITaxon(9606);
/** Mouse */
public static final NCBITaxon MUS_MUSCULUS = new NCBITaxon(10090);
/** Rat */
public static final NCBITaxon RATTUS_NORVEGICUS = new NCBITaxon(10116);
/** Hamster */
public static final NCBITaxon CRICETULUS_GRISEUS = new NCBITaxon(10029);
/** Rabbit */
public static final NCBITaxon ORYCTOLAGUS_CUNICULUS = new NCBITaxon(9986);
/** Cow */
public static final NCBITaxon BOS_TAURUS = new NCBITaxon(9913);
/** Horse */
public static final NCBITaxon EQUUS_CABALLUS = new NCBITaxon(9796);
/** Pig */
public static final NCBITaxon SUS_SCROFA = new NCBITaxon(9823);
/** Sheep */
public static final NCBITaxon OVIS_ARIES = new NCBITaxon(9940);
/** Xenopus (African clawed frog) */
public static final NCBITaxon XENOPUS_LAEVIS = new NCBITaxon(8355);
/** Drosophila (Fruit fly) */
public static final NCBITaxon DROSOPHILA_MELANOGASTER = new NCBITaxon(7227);
/** E. Coli */
public static final NCBITaxon ESCHERICHIA_COLI = new NCBITaxon(562);
/** Yeast */
public static final NCBITaxon SACCHAROMYCES_CEREVISIAE = new NCBITaxon(4932);
/** Dog */
public static final NCBITaxon CANIS_FAMILIARIS = new NCBITaxon(9615);
/** Chimpanzee */
public static final NCBITaxon PAN_TROGLODYTES = new NCBITaxon(9598);
/** Rhesus monkey */
public static final NCBITaxon MACACA_MULATTA = new NCBITaxon(9544);
/** Camel */
public static final NCBITaxon CAMELUS_BACTRIANUS = new NCBITaxon(9837);
/** Dromedary */
public static final NCBITaxon CAMELUS_DROMEDARIUS = new NCBITaxon(9838);
/** Llama */
public static final NCBITaxon LAMA_GLAMA = new NCBITaxon(9844);
/** Alpaca */
public static final NCBITaxon VICUGNA_PACOS = new NCBITaxon(30538);
// If you add to these common defs, add to sCommonSet below and regenerate the short dump files.
/** Mammals */
public static final NCBITaxon MAMMALS = new NCBITaxon(40674);
/** Primates */
public static final NCBITaxon PRIMATES = new NCBITaxon(9443);
/** Rodents */
public static final NCBITaxon RODENTS = new NCBITaxon(9989);
/** Unknown / unidentified */
public static final NCBITaxon UNKNOWN = new NCBITaxon(32644);
/** Synthetic construct / artificial sequence */
public static final NCBITaxon SYNTHETIC_CONSTRUCT = new NCBITaxon(32630);
//**************************************************************************
// PRIVATE FIELDS
//**************************************************************************
private boolean mInitialized;
private int mTaxonId;
private int mParentTaxonId;
private String mScientificName;
private String mCommonName;
private String mGenBankCommonName;
private Set mSynonyms;
private NCBITaxonNodeRank mNodeRank = NCBITaxonNodeRank.NO_RANK;
private String mEMBL_Code;
private NCBIGenBankDivision mDivision;
private Boolean mInheritedDivisionFlag;
private NCBIGeneticCode mGeneticCode;
private Boolean mInheritedGeneticCodeFlag;
private NCBIGeneticCode mMitochondrialGeneticCode;
private Boolean mInheritedMitochondrialGeneticCodeFlag;
private Boolean mGenBankHiddenFlag;
private Boolean mHiddenSubtreeRootFlag;
private String mComments;
private static final Set sCommonSet = new HashSet<>();
private static final String NODES_FILE = "rsrc/nodes.dmp.gz";
private static final String NAMES_FILE = "rsrc/names.dmp.gz";
private static final String COMMON_NODES_FILE = "rsrc/nodes_short.dmp.gz";
private static final String COMMON_NAMES_FILE = "rsrc/names_short.dmp.gz";
private static final String NL = System.getProperty("line.separator");
static
{
sCommonSet.add(HOMO_SAPIENS);
sCommonSet.add(MUS_MUSCULUS);
sCommonSet.add(RATTUS_NORVEGICUS);
sCommonSet.add(CRICETULUS_GRISEUS);
sCommonSet.add(ORYCTOLAGUS_CUNICULUS);
sCommonSet.add(BOS_TAURUS);
sCommonSet.add(EQUUS_CABALLUS);
sCommonSet.add(SUS_SCROFA);
sCommonSet.add(OVIS_ARIES);
sCommonSet.add(DROSOPHILA_MELANOGASTER);
sCommonSet.add(ESCHERICHIA_COLI);
sCommonSet.add(XENOPUS_LAEVIS);
sCommonSet.add(SACCHAROMYCES_CEREVISIAE);
sCommonSet.add(CANIS_FAMILIARIS);
sCommonSet.add(PAN_TROGLODYTES);
sCommonSet.add(MACACA_MULATTA);
sCommonSet.add(CAMELUS_BACTRIANUS);
sCommonSet.add(CAMELUS_DROMEDARIUS);
sCommonSet.add(LAMA_GLAMA);
sCommonSet.add(VICUGNA_PACOS);
sCommonSet.add(MAMMALS);
sCommonSet.add(PRIMATES);
sCommonSet.add(RODENTS);
sCommonSet.add(SYNTHETIC_CONSTRUCT);
sCommonSet.add(UNKNOWN);
}
//**************************************************************************
// CONSTRUCTORS
//**************************************************************************
//--------------------------------------------------------------------------
public NCBITaxon(int inTaxonId)
{
mTaxonId = inTaxonId;
}
//**************************************************************************
// PUBLIC FUNCTIONS
//**************************************************************************
//---------------------------------------------------------------------------
public static Logger getLogger()
{
return LOGGER;
}
//--------------------------------------------------------------------------
public static void addDataSource(NCBITaxonomyDataSource inValue)
{
mDataSources.add(inValue);
}
//--------------------------------------------------------------------------
public static void addDataSource(int inIndex, NCBITaxonomyDataSource inValue)
{
mDataSources.add(inIndex, inValue);
}
//--------------------------------------------------------------------------
public static void setDataSource(NCBITaxonomyDataSource inValue)
{
mDataSources.clear();
mDataSources.add(inValue);
}
//--------------------------------------------------------------------------
/**
* Retrieves the NCBITaxon for the specified common name, scientific name,
* or GenBank common name. Generally there will be a single taxon found for a given
* name, but there are instances where multiple taxons may be found.
@param inValue the species name (common or scientific) for the taxon object to return
@return a Set of taxon objects corresponding to the specified name. Returns null if a match cannot be found.
*/
public static synchronized Set getByName(String inValue)
{
Set taxons = null;
if (StringUtil.isSet(inValue))
{
// Lowercase the value so we can compare the names case-insensitively.
inValue = inValue.toLowerCase();
for (int i = 0; i < mDataSources.size(); i++)
{
NCBITaxonomyDataSource dataSource = mDataSources.get(i);
taxons = dataSource.getByName(inValue);
if (null == taxons
& i < mDataSources.size() - 1)
{
// The requested id wasn't found in the data source.
// Data sources should be provided in increasing size so
// if this isn't the last data source, jetison it and move
// to the next one.
mDataSources.remove(i--);
}
else
{
break;
}
}
}
if (taxons != null)
{
for (NCBITaxon taxon : taxons)
{
taxon.mInitialized = true;
}
}
return taxons;
}
//--------------------------------------------------------------------------
/**
Returns the taxon for the specified NCBI taxon id.
@param inValue the taxon id for the taxon object to retrieve
@return the taxon object corresponding to the specified id
*/
public static synchronized NCBITaxon getByTaxonId(int inValue)
{
NCBITaxon taxon = null;
for (int i = 0; i < mDataSources.size(); i++)
{
NCBITaxonomyDataSource dataSource = mDataSources.get(i);
taxon = dataSource.getByTaxonId(inValue);
if (null == taxon
& i < mDataSources.size() - 1)
{
// The requested id wasn't found in the data source.
// Data sources should be provided in increasing size so
// if this isn't the last data source, jetison it and move
// to the next one.
mDataSources.remove(i--);
resetCommonTaxons();
}
else
{
break;
}
}
if (taxon != null)
{
taxon.mInitialized = true;
}
return taxon;
}
//--------------------------------------------------------------------------
/**
Returns an unmodifiable Collection of the common taxons (those defined as class constants).
@return the small collection of frequently used taxon objects
*/
public static Collection getCommonSet()
{
return Collections.unmodifiableCollection(sCommonSet);
}
//--------------------------------------------------------------------------
@Override
public String toString()
{
StringBuilder buffer = new StringBuilder();
buffer.append(mTaxonId);
buffer.append(" ");
buffer.append(mScientificName);
if (mGenBankCommonName != null)
{
buffer.append(" (");
buffer.append(mGenBankCommonName);
buffer.append(")");
}
return buffer.toString();
}
//--------------------------------------------------------------------------
public int getTaxonId()
{
return mTaxonId;
}
//--------------------------------------------------------------------------
public String getFullTaxonomy()
{
if (! mInitialized)
{
init();
}
StringBuilderPlus buffer = new StringBuilderPlus().setDelimiter("; ");
if (mParentTaxonId != 1)
{
NCBITaxon parentTaxon = getParentTaxon();
buffer.append(parentTaxon.getFullTaxonomy());
}
if (getTaxonomyRank() != NCBITaxonNodeRank.NO_RANK)
{
buffer.delimitedAppend(getTaxonomyRank());
buffer.append(" ");
buffer.append(getScientificName());
}
return buffer.toString();
}
//--------------------------------------------------------------------------
public boolean isSubtaxonOf(NCBITaxon inTaxon2)
{
boolean result = false;
NCBITaxon currentTaxon = this;
while (currentTaxon != null
&& currentTaxon.getTaxonId() != 1)
{
currentTaxon = currentTaxon.getParentTaxon();
if (currentTaxon != null
&& currentTaxon.equals(inTaxon2))
{
result = true;
break;
}
}
return result;
}
//--------------------------------------------------------------------------
public NCBITaxon getFirstCommonTaxon(NCBITaxon inTaxon2)
{
NCBITaxon firstCommonTaxon = null;
Set taxonSet = new HashSet<>();
NCBITaxon currentTaxon = this;
while (currentTaxon != null)
{
taxonSet.add(currentTaxon);
currentTaxon = currentTaxon.getParentTaxon();
}
// Now walk up the 2nd taxon's branch until we find a taxon in common.
currentTaxon = inTaxon2;
while (currentTaxon != null)
{
if (taxonSet.contains(currentTaxon))
{
firstCommonTaxon = currentTaxon;
break;
}
currentTaxon = currentTaxon.getParentTaxon();
}
return firstCommonTaxon;
}
//--------------------------------------------------------------------------
public NCBITaxon getParentTaxon()
{
return getByTaxonId(getParentTaxonId());
}
//--------------------------------------------------------------------------
public NCBITaxon setParentTaxonId(int inValue)
{
// It can't be its own parent.
if (inValue != mTaxonId) mParentTaxonId = inValue;
return this;
}
//--------------------------------------------------------------------------
public int getParentTaxonId()
{
if (! mInitialized)
{
init();
}
return mParentTaxonId;
}
//--------------------------------------------------------------------------
public String getScientificName()
{
if (! mInitialized)
{
init();
}
return mScientificName;
}
//--------------------------------------------------------------------------
public NCBITaxon setScientificName(String inValue)
{
mScientificName = inValue;
return this;
}
//--------------------------------------------------------------------------
public String getCommonName()
{
if (! mInitialized)
{
init();
}
return mCommonName;
}
//--------------------------------------------------------------------------
public NCBITaxon setCommonName(String inValue)
{
mCommonName = inValue;
return this;
}
//--------------------------------------------------------------------------
public String getGenBankCommonName()
{
if (! mInitialized)
{
init();
}
return mGenBankCommonName;
}
//--------------------------------------------------------------------------
public NCBITaxon setGenBankCommonName(String inValue)
{
mGenBankCommonName = inValue;
return this;
}
//--------------------------------------------------------------------------
public Set getSynonyms()
{
if (! mInitialized)
{
init();
}
return mSynonyms;
}
//--------------------------------------------------------------------------
public NCBITaxon setSynonyms(Collection inValues)
{
mSynonyms = inValues != null ? new HashSet<>(inValues) : null;
return this;
}
//--------------------------------------------------------------------------
public NCBITaxon addSynonym(String inValue)
{
if (null == mSynonyms)
{
mSynonyms = new HashSet<>(2);
}
mSynonyms.add(inValue);
return this;
}
//--------------------------------------------------------------------------
public NCBITaxonNodeRank getTaxonomyRank()
{
if (! mInitialized)
{
init();
}
return mNodeRank;
}
//--------------------------------------------------------------------------
public NCBITaxon setTaxonomyRank(NCBITaxonNodeRank inValue)
{
mNodeRank = inValue;
return this;
}
//--------------------------------------------------------------------------
public String getEMBL_Code()
{
if (! mInitialized)
{
init();
}
return mEMBL_Code;
}
//--------------------------------------------------------------------------
public NCBITaxon setEMBL_Code(String inValue)
{
mEMBL_Code = inValue;
return this;
}
//--------------------------------------------------------------------------
public NCBIGenBankDivision getDivision()
{
if (! mInitialized)
{
init();
}
return mDivision;
}
//--------------------------------------------------------------------------
public NCBITaxon setDivision(NCBIGenBankDivision inValue)
{
mDivision = inValue;
return this;
}
//--------------------------------------------------------------------------
public boolean getInheritedDivisionFlag()
{
if (! mInitialized)
{
init();
}
return mInheritedDivisionFlag;
}
//--------------------------------------------------------------------------
public NCBITaxon setInheritedDivisionFlag(boolean inValue)
{
mInheritedDivisionFlag = inValue;
return this;
}
//--------------------------------------------------------------------------
public NCBIGeneticCode getGeneticCode()
{
if (! mInitialized)
{
init();
}
return mGeneticCode;
}
//--------------------------------------------------------------------------
public NCBITaxon setGeneticCode(NCBIGeneticCode inValue)
{
mGeneticCode = inValue;
return this;
}
//--------------------------------------------------------------------------
public boolean getInheritedGeneticCodeFlag()
{
if (! mInitialized)
{
init();
}
return mInheritedGeneticCodeFlag;
}
//--------------------------------------------------------------------------
public NCBITaxon setInheritedGeneticCodeFlag(boolean inValue)
{
mInheritedGeneticCodeFlag = inValue;
return this;
}
//--------------------------------------------------------------------------
public NCBIGeneticCode getMitochondrialGeneticCode()
{
if (! mInitialized)
{
init();
}
return mMitochondrialGeneticCode;
}
//--------------------------------------------------------------------------
public NCBITaxon setMitochondrialGeneticCode(NCBIGeneticCode inValue)
{
mMitochondrialGeneticCode = inValue;
return this;
}
//--------------------------------------------------------------------------
public boolean getInheritedMitochondrialGeneticCodeFlag()
{
if (! mInitialized)
{
init();
}
return mInheritedMitochondrialGeneticCodeFlag;
}
//--------------------------------------------------------------------------
public NCBITaxon setInheritedMitochondrialGeneticCodeFlag(boolean inValue)
{
mInheritedMitochondrialGeneticCodeFlag = inValue;
return this;
}
//--------------------------------------------------------------------------
public boolean getGenBankHiddenFlag()
{
if (! mInitialized)
{
init();
}
return mGenBankHiddenFlag;
}
//--------------------------------------------------------------------------
public NCBITaxon setGenBankHiddenFlag(boolean inValue)
{
mGenBankHiddenFlag = inValue;
return this;
}
//--------------------------------------------------------------------------
public boolean getHiddenSubtreeRootFlag()
{
if (! mInitialized)
{
init();
}
return mHiddenSubtreeRootFlag;
}
//--------------------------------------------------------------------------
public NCBITaxon setHiddenSubtreeRootFlag(boolean inValue)
{
mHiddenSubtreeRootFlag = inValue;
return this;
}
//--------------------------------------------------------------------------
public String getComments()
{
if (! mInitialized)
{
init();
}
return mComments;
}
//--------------------------------------------------------------------------
public NCBITaxon setComments(String inValue)
{
mComments = inValue;
return this;
}
//--------------------------------------------------------------------------
@Override
public boolean equals(Object inObj)
{
boolean result = false;
if (inObj != null)
{
if (this == inObj
|| (inObj instanceof NCBITaxon
&& mTaxonId == ((NCBITaxon) inObj).mTaxonId))
{
result = true;
}
}
return result;
}
//--------------------------------------------------------------------------
@Override
public int hashCode()
{
return mTaxonId;
}
//--------------------------------------------------------------------------
public int compareTo(NCBITaxon inObj)
{
int result = 0;
if (inObj != null)
{
NCBIGenBankDivision division = getDivision();
if (division != null
&& division.name() != null)
{
NCBIGenBankDivision division2 = inObj.getDivision();
if (division2 != null
&& division2.name() != null)
{
result = division.name().compareTo(division2.name());
}
else
{
result = 1;
}
}
else
{
result = -1;
}
}
else
{
result = 1;
}
if (0 == result)
{
if (mTaxonId > inObj.mTaxonId)
{
result = 1;
}
else if (mTaxonId < inObj.mTaxonId)
{
result = -1;
}
}
return result;
}
//--------------------------------------------------------------------------
/**
Creates a subset of the nodes file containing just the specified taxon ids (and their parent taxon ids).
@param inTaxonIds the list of id to extract from the taxonomy data
@param inDestFile the nodes file to which the extracted taxon data should be written
*/
public static void exportNodesFile(Set inTaxonIds, File inDestFile)
{
List orderedTaxonIds = new ArrayList<>(inTaxonIds);
Collections.sort(orderedTaxonIds);
Writer fileWriter = null;
try
{
try
{
fileWriter = new FileWriter(inDestFile);
String delimiter = "\t|\t";
StringBuilderPlus lineBuffer = new StringBuilderPlus().setDelimiter(delimiter);
for (Integer taxonId : orderedTaxonIds)
{
NCBITaxon taxon = getByTaxonId(taxonId);
lineBuffer.setLength(0);
lineBuffer.delimitedAppend(taxon.getTaxonId())
.delimitedAppend(taxon.getParentTaxonId())
.delimitedAppend(taxon.getTaxonomyRank())
.delimitedAppend(taxon.getEMBL_Code())
.delimitedAppend(taxon.getDivision().getId())
.delimitedAppend(taxon.getInheritedDivisionFlag() ? 1 : 0)
.delimitedAppend(taxon.getGeneticCode().getId())
.delimitedAppend(taxon.getInheritedGeneticCodeFlag() ? 1 : 0)
.delimitedAppend(taxon.getMitochondrialGeneticCode().getId())
.delimitedAppend(taxon.getInheritedMitochondrialGeneticCodeFlag() ? 1 : 0)
.delimitedAppend(taxon.getGenBankHiddenFlag() ? 1 : 0)
.delimitedAppend(taxon.getHiddenSubtreeRootFlag() ? 1 : 0)
.delimitedAppend(StringUtil.isSet(taxon.getComments()) ? taxon.getComments() : "");
fileWriter.write(lineBuffer.toString());
fileWriter.write(NL);
}
}
finally
{
if (fileWriter != null) fileWriter.close();
}
}
catch (IOException e)
{
throw new RuntimeException("Error parsing node file.", e);
}
}
//--------------------------------------------------------------------------
/**
Creates a subset of the names file containing just the specified taxon ids (and their parent taxon ids).
@param inTaxonIds the list of id to extract from the taxonomy data
@param inDestFile the names file to which the extracted taxon data should be written
*/
public static void exportNamesFile(Set inTaxonIds, File inDestFile)
throws IOException
{
List orderedTaxonIds = new ArrayList<>(inTaxonIds);
Collections.sort(orderedTaxonIds);
Writer fileWriter = null;
try
{
fileWriter = new FileWriter(inDestFile);
StringBuilderPlus lineBuffer = new StringBuilderPlus().setDelimiter("\t|\t");
for (Integer taxonId : orderedTaxonIds)
{
NCBITaxon taxon = getByTaxonId(taxonId);
if (StringUtil.isSet(taxon.getScientificName()))
{
lineBuffer.setLength(0);
lineBuffer.delimitedAppend(taxon.getTaxonId())
.delimitedAppend(taxon.getScientificName())
.delimitedAppend("\t") // EMBL name
.delimitedAppend(NCBITaxonNameClass.SCIENTIFIC_NAME);
fileWriter.write(lineBuffer.toString());
fileWriter.write(NL);
}
if (StringUtil.isSet(taxon.getCommonName()))
{
lineBuffer.setLength(0);
lineBuffer.delimitedAppend(taxon.getTaxonId())
.delimitedAppend(taxon.getCommonName())
.delimitedAppend("\t") // EMBL name
.delimitedAppend(NCBITaxonNameClass.COMMON_NAME);
fileWriter.write(lineBuffer.toString());
fileWriter.write(NL);
}
if (StringUtil.isSet(taxon.getGenBankCommonName()))
{
lineBuffer.setLength(0);
lineBuffer.delimitedAppend(taxon.getTaxonId())
.delimitedAppend(taxon.getGenBankCommonName())
.delimitedAppend("\t") // EMBL name
.delimitedAppend(NCBITaxonNameClass.GENBANK_COMMON_NAME);
fileWriter.write(lineBuffer.toString());
fileWriter.write(NL);
}
if (CollectionUtil.hasValues(taxon.getSynonyms()))
{
for (String synonym : taxon.getSynonyms())
{
lineBuffer.setLength(0);
lineBuffer.delimitedAppend(taxon.getTaxonId())
.delimitedAppend(synonym)
.delimitedAppend("\t") // EMBL name
.delimitedAppend(NCBITaxonNameClass.SYNONYM);
fileWriter.write(lineBuffer.toString());
fileWriter.write(NL);
}
}
}
}
finally
{
if (fileWriter != null) fileWriter.close();
}
}
//**************************************************************************
// PRIVATE METHODS
//**************************************************************************
//--------------------------------------------------------------------------
private static void resetCommonTaxons()
{
for (NCBITaxon taxon : sCommonSet)
{
taxon.clearData();
}
}
//--------------------------------------------------------------------------
private void clearData()
{
mInitialized = false;
}
//--------------------------------------------------------------------------
private void init()
{
NCBITaxon template = getByTaxonId(mTaxonId);
if (template != null)
{
setParentTaxonId(template.getParentTaxonId());
setScientificName(template.getScientificName());
setCommonName(template.getCommonName());
setGenBankCommonName(template.getGenBankCommonName());
setSynonyms(template.getSynonyms());
setTaxonomyRank(template.getTaxonomyRank());
setDivision(template.getDivision());
setEMBL_Code(template.getEMBL_Code());
setGeneticCode(template.getGeneticCode());
setInheritedGeneticCodeFlag(template.getInheritedGeneticCodeFlag());
setMitochondrialGeneticCode(template.getMitochondrialGeneticCode());
setInheritedMitochondrialGeneticCodeFlag(template.getInheritedMitochondrialGeneticCodeFlag());
setGenBankHiddenFlag(template.getGenBankHiddenFlag());
setHiddenSubtreeRootFlag(template.getHiddenSubtreeRootFlag());
setComments(template.getComments());
}
mInitialized = true;
}
//###########################################################################
// INNER CLASS
//###########################################################################
private abstract static class InternalDataSource extends NCBITaxonomyDataSourceImpl
{
private String mNodesRsrc;
private String mNamesRsrc;
//-----------------------------------------------------------------------
public InternalDataSource(String inNodesRsrc, String inNamesRsrc)
{
mNodesRsrc = inNodesRsrc;
mNamesRsrc = inNamesRsrc;
}
//--------------------------------------------------------------------------
protected synchronized void initialize()
{
parseNodesFile();
parseNamesFile();
// TODO: Trim the maps to conserve space?
}
//--------------------------------------------------------------------------
private void parseNodesFile()
{
try
{
BufferedReader nodeReader = null;
try
{
nodeReader = getNodesReader();
innerParseNodesFile(nodeReader);
}
finally
{
if (nodeReader != null) nodeReader.close();
}
}
catch (IOException e)
{
throw new RuntimeException("Error parsing node file.", e);
}
}
//--------------------------------------------------------------------------
private void parseNamesFile()
{
try
{
BufferedReader namesReader = null;
try
{
namesReader = getNamesReader();
innerParseNamesFile(namesReader);
}
finally
{
if (namesReader != null) namesReader.close();
}
}
catch (IOException e)
{
throw new RuntimeException("Error parsing node file.", e);
}
}
//--------------------------------------------------------------------------
private BufferedReader getNodesReader()
throws IOException
{
LOGGER.log(Level.FINE, "Initializing NCBI taxon data source from nodes file "
+ StringUtil.singleQuote(mNodesRsrc));
InputStream stream = getResourceStream(mNodesRsrc);
return new BufferedReader(new InputStreamReader(stream), 1024 * 8);
}
//--------------------------------------------------------------------------
private BufferedReader getNamesReader()
throws IOException
{
InputStream stream = getResourceStream(mNamesRsrc);
return new BufferedReader(new InputStreamReader(stream), 1024 * 8);
}
//--------------------------------------------------------------------------
private static InputStream getResourceStream(String inResource)
throws IOException
{
InputStream stream = NCBITaxon.class.getResourceAsStream(inResource);
if (null == stream)
{
throw new RuntimeException("'" + inResource + "' couldn't be found!");
}
if (inResource.endsWith(".gz"))
{
stream = new GZIPInputStream(stream);
}
return stream;
}
}
//###########################################################################
// INNER CLASS
//###########################################################################
private static class CommonInternalDataSource extends InternalDataSource
{
//-----------------------------------------------------------------------
public CommonInternalDataSource()
{
super(COMMON_NODES_FILE, COMMON_NAMES_FILE);
}
}
//###########################################################################
// INNER CLASS
//###########################################################################
private static class FullInternalDataSource extends InternalDataSource
{
//-----------------------------------------------------------------------
public FullInternalDataSource()
{
super(NODES_FILE, NAMES_FILE);
}
}
}