All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.taxonomy.ncbi.NCBITaxon Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.taxonomy.ncbi;

import java.io.*;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;

import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;

//------------------------------------------------------------------------------
/**
 * Species class based on the NCBI taxonomy data.
 * A default set of files are included as data sources. To minimize load time and
 * memory the initial default data source has a few common values. If a value is
 * requested that isn't found in this set, a second, more complete but not fully
 * up-to-date data source is loaded and the lightweight data source is discarded.
 * An additional more up-to-date data source can be manually added as an
 * NCBIRemoteTaxonomyDataSource.
 * 
* @author J. Alex Taylor, hairyfatguy.com *
*/ //------------------------------------------------------------------------------ // com.hfg XML/HTML Coding Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class NCBITaxon implements Comparable { private static final Logger LOGGER = Logger.getLogger(NCBITaxon.class.getPackage().getName()); // Data sources should be arranged such that smaller/lighter sources are tried first // and only if a match is not found do we move to the next "heavier" implementation. // Two data sources are provided by default - the first with a few common values and // the second which is a mostly complete (but old) taxonomy. If an up-to-date taxonomy // is desired, an NCBIRemoteTaxonomyDataSource should be added as a data source. private static final List mDataSources = new ArrayList<>(3); static { addDataSource(new CommonInternalDataSource()); addDataSource(new FullInternalDataSource()); } // Shortcuts to some common organisms /** Human */ public static final NCBITaxon HOMO_SAPIENS = new NCBITaxon(9606); /** Mouse */ public static final NCBITaxon MUS_MUSCULUS = new NCBITaxon(10090); /** Rat */ public static final NCBITaxon RATTUS_NORVEGICUS = new NCBITaxon(10116); /** Hamster */ public static final NCBITaxon CRICETULUS_GRISEUS = new NCBITaxon(10029); /** Rabbit */ public static final NCBITaxon ORYCTOLAGUS_CUNICULUS = new NCBITaxon(9986); /** Cow */ public static final NCBITaxon BOS_TAURUS = new NCBITaxon(9913); /** Horse */ public static final NCBITaxon EQUUS_CABALLUS = new NCBITaxon(9796); /** Pig */ public static final NCBITaxon SUS_SCROFA = new NCBITaxon(9823); /** Sheep */ public static final NCBITaxon OVIS_ARIES = new NCBITaxon(9940); /** Xenopus (African clawed frog) */ public static final NCBITaxon XENOPUS_LAEVIS = new NCBITaxon(8355); /** Drosophila (Fruit fly) */ public static final NCBITaxon DROSOPHILA_MELANOGASTER = new NCBITaxon(7227); /** E. Coli */ public static final NCBITaxon ESCHERICHIA_COLI = new NCBITaxon(562); /** Yeast */ public static final NCBITaxon SACCHAROMYCES_CEREVISIAE = new NCBITaxon(4932); /** Dog */ public static final NCBITaxon CANIS_FAMILIARIS = new NCBITaxon(9615); /** Chimpanzee */ public static final NCBITaxon PAN_TROGLODYTES = new NCBITaxon(9598); /** Rhesus monkey */ public static final NCBITaxon MACACA_MULATTA = new NCBITaxon(9544); /** Camel */ public static final NCBITaxon CAMELUS_BACTRIANUS = new NCBITaxon(9837); /** Dromedary */ public static final NCBITaxon CAMELUS_DROMEDARIUS = new NCBITaxon(9838); /** Llama */ public static final NCBITaxon LAMA_GLAMA = new NCBITaxon(9844); /** Alpaca */ public static final NCBITaxon VICUGNA_PACOS = new NCBITaxon(30538); // If you add to these common defs, add to sCommonSet below and regenerate the short dump files. /** Mammals */ public static final NCBITaxon MAMMALS = new NCBITaxon(40674); /** Primates */ public static final NCBITaxon PRIMATES = new NCBITaxon(9443); /** Rodents */ public static final NCBITaxon RODENTS = new NCBITaxon(9989); /** Unknown / unidentified */ public static final NCBITaxon UNKNOWN = new NCBITaxon(32644); /** Synthetic construct / artificial sequence */ public static final NCBITaxon SYNTHETIC_CONSTRUCT = new NCBITaxon(32630); //************************************************************************** // PRIVATE FIELDS //************************************************************************** private boolean mInitialized; private int mTaxonId; private int mParentTaxonId; private String mScientificName; private String mCommonName; private String mGenBankCommonName; private Set mSynonyms; private NCBITaxonNodeRank mNodeRank = NCBITaxonNodeRank.NO_RANK; private String mEMBL_Code; private NCBIGenBankDivision mDivision; private Boolean mInheritedDivisionFlag; private NCBIGeneticCode mGeneticCode; private Boolean mInheritedGeneticCodeFlag; private NCBIGeneticCode mMitochondrialGeneticCode; private Boolean mInheritedMitochondrialGeneticCodeFlag; private Boolean mGenBankHiddenFlag; private Boolean mHiddenSubtreeRootFlag; private String mComments; private static final Set sCommonSet = new HashSet<>(); private static final String NODES_FILE = "rsrc/nodes.dmp.gz"; private static final String NAMES_FILE = "rsrc/names.dmp.gz"; private static final String COMMON_NODES_FILE = "rsrc/nodes_short.dmp.gz"; private static final String COMMON_NAMES_FILE = "rsrc/names_short.dmp.gz"; private static final String NL = System.getProperty("line.separator"); static { sCommonSet.add(HOMO_SAPIENS); sCommonSet.add(MUS_MUSCULUS); sCommonSet.add(RATTUS_NORVEGICUS); sCommonSet.add(CRICETULUS_GRISEUS); sCommonSet.add(ORYCTOLAGUS_CUNICULUS); sCommonSet.add(BOS_TAURUS); sCommonSet.add(EQUUS_CABALLUS); sCommonSet.add(SUS_SCROFA); sCommonSet.add(OVIS_ARIES); sCommonSet.add(DROSOPHILA_MELANOGASTER); sCommonSet.add(ESCHERICHIA_COLI); sCommonSet.add(XENOPUS_LAEVIS); sCommonSet.add(SACCHAROMYCES_CEREVISIAE); sCommonSet.add(CANIS_FAMILIARIS); sCommonSet.add(PAN_TROGLODYTES); sCommonSet.add(MACACA_MULATTA); sCommonSet.add(CAMELUS_BACTRIANUS); sCommonSet.add(CAMELUS_DROMEDARIUS); sCommonSet.add(LAMA_GLAMA); sCommonSet.add(VICUGNA_PACOS); sCommonSet.add(MAMMALS); sCommonSet.add(PRIMATES); sCommonSet.add(RODENTS); sCommonSet.add(SYNTHETIC_CONSTRUCT); sCommonSet.add(UNKNOWN); } //************************************************************************** // CONSTRUCTORS //************************************************************************** //-------------------------------------------------------------------------- public NCBITaxon(int inTaxonId) { mTaxonId = inTaxonId; } //************************************************************************** // PUBLIC FUNCTIONS //************************************************************************** //--------------------------------------------------------------------------- public static Logger getLogger() { return LOGGER; } //-------------------------------------------------------------------------- public static void addDataSource(NCBITaxonomyDataSource inValue) { mDataSources.add(inValue); } //-------------------------------------------------------------------------- public static void addDataSource(int inIndex, NCBITaxonomyDataSource inValue) { mDataSources.add(inIndex, inValue); } //-------------------------------------------------------------------------- public static void setDataSource(NCBITaxonomyDataSource inValue) { mDataSources.clear(); mDataSources.add(inValue); } //-------------------------------------------------------------------------- /** * Retrieves the NCBITaxon for the specified common name, scientific name, * or GenBank common name. Generally there will be a single taxon found for a given * name, but there are instances where multiple taxons may be found. @param inValue the species name (common or scientific) for the taxon object to return @return a Set of taxon objects corresponding to the specified name. Returns null if a match cannot be found. */ public static synchronized Set getByName(String inValue) { Set taxons = null; if (StringUtil.isSet(inValue)) { // Lowercase the value so we can compare the names case-insensitively. inValue = inValue.toLowerCase(); for (int i = 0; i < mDataSources.size(); i++) { NCBITaxonomyDataSource dataSource = mDataSources.get(i); taxons = dataSource.getByName(inValue); if (null == taxons & i < mDataSources.size() - 1) { // The requested id wasn't found in the data source. // Data sources should be provided in increasing size so // if this isn't the last data source, jetison it and move // to the next one. mDataSources.remove(i--); } else { break; } } } if (taxons != null) { for (NCBITaxon taxon : taxons) { taxon.mInitialized = true; } } return taxons; } //-------------------------------------------------------------------------- /** Returns the taxon for the specified NCBI taxon id. @param inValue the taxon id for the taxon object to retrieve @return the taxon object corresponding to the specified id */ public static synchronized NCBITaxon getByTaxonId(int inValue) { NCBITaxon taxon = null; for (int i = 0; i < mDataSources.size(); i++) { NCBITaxonomyDataSource dataSource = mDataSources.get(i); taxon = dataSource.getByTaxonId(inValue); if (null == taxon & i < mDataSources.size() - 1) { // The requested id wasn't found in the data source. // Data sources should be provided in increasing size so // if this isn't the last data source, jetison it and move // to the next one. mDataSources.remove(i--); resetCommonTaxons(); } else { break; } } if (taxon != null) { taxon.mInitialized = true; } return taxon; } //-------------------------------------------------------------------------- /** Returns an unmodifiable Collection of the common taxons (those defined as class constants). @return the small collection of frequently used taxon objects */ public static Collection getCommonSet() { return Collections.unmodifiableCollection(sCommonSet); } //-------------------------------------------------------------------------- @Override public String toString() { StringBuilder buffer = new StringBuilder(); buffer.append(mTaxonId); buffer.append(" "); buffer.append(mScientificName); if (mGenBankCommonName != null) { buffer.append(" ("); buffer.append(mGenBankCommonName); buffer.append(")"); } return buffer.toString(); } //-------------------------------------------------------------------------- public int getTaxonId() { return mTaxonId; } //-------------------------------------------------------------------------- public String getFullTaxonomy() { if (! mInitialized) { init(); } StringBuilderPlus buffer = new StringBuilderPlus().setDelimiter("; "); if (mParentTaxonId != 1) { NCBITaxon parentTaxon = getParentTaxon(); buffer.append(parentTaxon.getFullTaxonomy()); } if (getTaxonomyRank() != NCBITaxonNodeRank.NO_RANK) { buffer.delimitedAppend(getTaxonomyRank()); buffer.append(" "); buffer.append(getScientificName()); } return buffer.toString(); } //-------------------------------------------------------------------------- public boolean isSubtaxonOf(NCBITaxon inTaxon2) { boolean result = false; NCBITaxon currentTaxon = this; while (currentTaxon != null && currentTaxon.getTaxonId() != 1) { currentTaxon = currentTaxon.getParentTaxon(); if (currentTaxon != null && currentTaxon.equals(inTaxon2)) { result = true; break; } } return result; } //-------------------------------------------------------------------------- public NCBITaxon getFirstCommonTaxon(NCBITaxon inTaxon2) { NCBITaxon firstCommonTaxon = null; Set taxonSet = new HashSet<>(); NCBITaxon currentTaxon = this; while (currentTaxon != null) { taxonSet.add(currentTaxon); currentTaxon = currentTaxon.getParentTaxon(); } // Now walk up the 2nd taxon's branch until we find a taxon in common. currentTaxon = inTaxon2; while (currentTaxon != null) { if (taxonSet.contains(currentTaxon)) { firstCommonTaxon = currentTaxon; break; } currentTaxon = currentTaxon.getParentTaxon(); } return firstCommonTaxon; } //-------------------------------------------------------------------------- public NCBITaxon getParentTaxon() { return getByTaxonId(getParentTaxonId()); } //-------------------------------------------------------------------------- public NCBITaxon setParentTaxonId(int inValue) { // It can't be its own parent. if (inValue != mTaxonId) mParentTaxonId = inValue; return this; } //-------------------------------------------------------------------------- public int getParentTaxonId() { if (! mInitialized) { init(); } return mParentTaxonId; } //-------------------------------------------------------------------------- public String getScientificName() { if (! mInitialized) { init(); } return mScientificName; } //-------------------------------------------------------------------------- public NCBITaxon setScientificName(String inValue) { mScientificName = inValue; return this; } //-------------------------------------------------------------------------- public String getCommonName() { if (! mInitialized) { init(); } return mCommonName; } //-------------------------------------------------------------------------- public NCBITaxon setCommonName(String inValue) { mCommonName = inValue; return this; } //-------------------------------------------------------------------------- public String getGenBankCommonName() { if (! mInitialized) { init(); } return mGenBankCommonName; } //-------------------------------------------------------------------------- public NCBITaxon setGenBankCommonName(String inValue) { mGenBankCommonName = inValue; return this; } //-------------------------------------------------------------------------- public Set getSynonyms() { if (! mInitialized) { init(); } return mSynonyms; } //-------------------------------------------------------------------------- public NCBITaxon setSynonyms(Collection inValues) { mSynonyms = inValues != null ? new HashSet<>(inValues) : null; return this; } //-------------------------------------------------------------------------- public NCBITaxon addSynonym(String inValue) { if (null == mSynonyms) { mSynonyms = new HashSet<>(2); } mSynonyms.add(inValue); return this; } //-------------------------------------------------------------------------- public NCBITaxonNodeRank getTaxonomyRank() { if (! mInitialized) { init(); } return mNodeRank; } //-------------------------------------------------------------------------- public NCBITaxon setTaxonomyRank(NCBITaxonNodeRank inValue) { mNodeRank = inValue; return this; } //-------------------------------------------------------------------------- public String getEMBL_Code() { if (! mInitialized) { init(); } return mEMBL_Code; } //-------------------------------------------------------------------------- public NCBITaxon setEMBL_Code(String inValue) { mEMBL_Code = inValue; return this; } //-------------------------------------------------------------------------- public NCBIGenBankDivision getDivision() { if (! mInitialized) { init(); } return mDivision; } //-------------------------------------------------------------------------- public NCBITaxon setDivision(NCBIGenBankDivision inValue) { mDivision = inValue; return this; } //-------------------------------------------------------------------------- public boolean getInheritedDivisionFlag() { if (! mInitialized) { init(); } return mInheritedDivisionFlag; } //-------------------------------------------------------------------------- public NCBITaxon setInheritedDivisionFlag(boolean inValue) { mInheritedDivisionFlag = inValue; return this; } //-------------------------------------------------------------------------- public NCBIGeneticCode getGeneticCode() { if (! mInitialized) { init(); } return mGeneticCode; } //-------------------------------------------------------------------------- public NCBITaxon setGeneticCode(NCBIGeneticCode inValue) { mGeneticCode = inValue; return this; } //-------------------------------------------------------------------------- public boolean getInheritedGeneticCodeFlag() { if (! mInitialized) { init(); } return mInheritedGeneticCodeFlag; } //-------------------------------------------------------------------------- public NCBITaxon setInheritedGeneticCodeFlag(boolean inValue) { mInheritedGeneticCodeFlag = inValue; return this; } //-------------------------------------------------------------------------- public NCBIGeneticCode getMitochondrialGeneticCode() { if (! mInitialized) { init(); } return mMitochondrialGeneticCode; } //-------------------------------------------------------------------------- public NCBITaxon setMitochondrialGeneticCode(NCBIGeneticCode inValue) { mMitochondrialGeneticCode = inValue; return this; } //-------------------------------------------------------------------------- public boolean getInheritedMitochondrialGeneticCodeFlag() { if (! mInitialized) { init(); } return mInheritedMitochondrialGeneticCodeFlag; } //-------------------------------------------------------------------------- public NCBITaxon setInheritedMitochondrialGeneticCodeFlag(boolean inValue) { mInheritedMitochondrialGeneticCodeFlag = inValue; return this; } //-------------------------------------------------------------------------- public boolean getGenBankHiddenFlag() { if (! mInitialized) { init(); } return mGenBankHiddenFlag; } //-------------------------------------------------------------------------- public NCBITaxon setGenBankHiddenFlag(boolean inValue) { mGenBankHiddenFlag = inValue; return this; } //-------------------------------------------------------------------------- public boolean getHiddenSubtreeRootFlag() { if (! mInitialized) { init(); } return mHiddenSubtreeRootFlag; } //-------------------------------------------------------------------------- public NCBITaxon setHiddenSubtreeRootFlag(boolean inValue) { mHiddenSubtreeRootFlag = inValue; return this; } //-------------------------------------------------------------------------- public String getComments() { if (! mInitialized) { init(); } return mComments; } //-------------------------------------------------------------------------- public NCBITaxon setComments(String inValue) { mComments = inValue; return this; } //-------------------------------------------------------------------------- @Override public boolean equals(Object inObj) { boolean result = false; if (inObj != null) { if (this == inObj || (inObj instanceof NCBITaxon && mTaxonId == ((NCBITaxon) inObj).mTaxonId)) { result = true; } } return result; } //-------------------------------------------------------------------------- @Override public int hashCode() { return mTaxonId; } //-------------------------------------------------------------------------- public int compareTo(NCBITaxon inObj) { int result = 0; if (inObj != null) { NCBIGenBankDivision division = getDivision(); if (division != null && division.name() != null) { NCBIGenBankDivision division2 = inObj.getDivision(); if (division2 != null && division2.name() != null) { result = division.name().compareTo(division2.name()); } else { result = 1; } } else { result = -1; } } else { result = 1; } if (0 == result) { if (mTaxonId > inObj.mTaxonId) { result = 1; } else if (mTaxonId < inObj.mTaxonId) { result = -1; } } return result; } //-------------------------------------------------------------------------- /** Creates a subset of the nodes file containing just the specified taxon ids (and their parent taxon ids). @param inTaxonIds the list of id to extract from the taxonomy data @param inDestFile the nodes file to which the extracted taxon data should be written */ public static void exportNodesFile(Set inTaxonIds, File inDestFile) { List orderedTaxonIds = new ArrayList<>(inTaxonIds); Collections.sort(orderedTaxonIds); Writer fileWriter = null; try { try { fileWriter = new FileWriter(inDestFile); String delimiter = "\t|\t"; StringBuilderPlus lineBuffer = new StringBuilderPlus().setDelimiter(delimiter); for (Integer taxonId : orderedTaxonIds) { NCBITaxon taxon = getByTaxonId(taxonId); lineBuffer.setLength(0); lineBuffer.delimitedAppend(taxon.getTaxonId()) .delimitedAppend(taxon.getParentTaxonId()) .delimitedAppend(taxon.getTaxonomyRank()) .delimitedAppend(taxon.getEMBL_Code()) .delimitedAppend(taxon.getDivision().getId()) .delimitedAppend(taxon.getInheritedDivisionFlag() ? 1 : 0) .delimitedAppend(taxon.getGeneticCode().getId()) .delimitedAppend(taxon.getInheritedGeneticCodeFlag() ? 1 : 0) .delimitedAppend(taxon.getMitochondrialGeneticCode().getId()) .delimitedAppend(taxon.getInheritedMitochondrialGeneticCodeFlag() ? 1 : 0) .delimitedAppend(taxon.getGenBankHiddenFlag() ? 1 : 0) .delimitedAppend(taxon.getHiddenSubtreeRootFlag() ? 1 : 0) .delimitedAppend(StringUtil.isSet(taxon.getComments()) ? taxon.getComments() : ""); fileWriter.write(lineBuffer.toString()); fileWriter.write(NL); } } finally { if (fileWriter != null) fileWriter.close(); } } catch (IOException e) { throw new RuntimeException("Error parsing node file.", e); } } //-------------------------------------------------------------------------- /** Creates a subset of the names file containing just the specified taxon ids (and their parent taxon ids). @param inTaxonIds the list of id to extract from the taxonomy data @param inDestFile the names file to which the extracted taxon data should be written */ public static void exportNamesFile(Set inTaxonIds, File inDestFile) throws IOException { List orderedTaxonIds = new ArrayList<>(inTaxonIds); Collections.sort(orderedTaxonIds); Writer fileWriter = null; try { fileWriter = new FileWriter(inDestFile); StringBuilderPlus lineBuffer = new StringBuilderPlus().setDelimiter("\t|\t"); for (Integer taxonId : orderedTaxonIds) { NCBITaxon taxon = getByTaxonId(taxonId); if (StringUtil.isSet(taxon.getScientificName())) { lineBuffer.setLength(0); lineBuffer.delimitedAppend(taxon.getTaxonId()) .delimitedAppend(taxon.getScientificName()) .delimitedAppend("\t") // EMBL name .delimitedAppend(NCBITaxonNameClass.SCIENTIFIC_NAME); fileWriter.write(lineBuffer.toString()); fileWriter.write(NL); } if (StringUtil.isSet(taxon.getCommonName())) { lineBuffer.setLength(0); lineBuffer.delimitedAppend(taxon.getTaxonId()) .delimitedAppend(taxon.getCommonName()) .delimitedAppend("\t") // EMBL name .delimitedAppend(NCBITaxonNameClass.COMMON_NAME); fileWriter.write(lineBuffer.toString()); fileWriter.write(NL); } if (StringUtil.isSet(taxon.getGenBankCommonName())) { lineBuffer.setLength(0); lineBuffer.delimitedAppend(taxon.getTaxonId()) .delimitedAppend(taxon.getGenBankCommonName()) .delimitedAppend("\t") // EMBL name .delimitedAppend(NCBITaxonNameClass.GENBANK_COMMON_NAME); fileWriter.write(lineBuffer.toString()); fileWriter.write(NL); } if (CollectionUtil.hasValues(taxon.getSynonyms())) { for (String synonym : taxon.getSynonyms()) { lineBuffer.setLength(0); lineBuffer.delimitedAppend(taxon.getTaxonId()) .delimitedAppend(synonym) .delimitedAppend("\t") // EMBL name .delimitedAppend(NCBITaxonNameClass.SYNONYM); fileWriter.write(lineBuffer.toString()); fileWriter.write(NL); } } } } finally { if (fileWriter != null) fileWriter.close(); } } //************************************************************************** // PRIVATE METHODS //************************************************************************** //-------------------------------------------------------------------------- private static void resetCommonTaxons() { for (NCBITaxon taxon : sCommonSet) { taxon.clearData(); } } //-------------------------------------------------------------------------- private void clearData() { mInitialized = false; } //-------------------------------------------------------------------------- private void init() { NCBITaxon template = getByTaxonId(mTaxonId); if (template != null) { setParentTaxonId(template.getParentTaxonId()); setScientificName(template.getScientificName()); setCommonName(template.getCommonName()); setGenBankCommonName(template.getGenBankCommonName()); setSynonyms(template.getSynonyms()); setTaxonomyRank(template.getTaxonomyRank()); setDivision(template.getDivision()); setEMBL_Code(template.getEMBL_Code()); setGeneticCode(template.getGeneticCode()); setInheritedGeneticCodeFlag(template.getInheritedGeneticCodeFlag()); setMitochondrialGeneticCode(template.getMitochondrialGeneticCode()); setInheritedMitochondrialGeneticCodeFlag(template.getInheritedMitochondrialGeneticCodeFlag()); setGenBankHiddenFlag(template.getGenBankHiddenFlag()); setHiddenSubtreeRootFlag(template.getHiddenSubtreeRootFlag()); setComments(template.getComments()); } mInitialized = true; } //########################################################################### // INNER CLASS //########################################################################### private abstract static class InternalDataSource extends NCBITaxonomyDataSourceImpl { private String mNodesRsrc; private String mNamesRsrc; //----------------------------------------------------------------------- public InternalDataSource(String inNodesRsrc, String inNamesRsrc) { mNodesRsrc = inNodesRsrc; mNamesRsrc = inNamesRsrc; } //-------------------------------------------------------------------------- protected synchronized void initialize() { parseNodesFile(); parseNamesFile(); // TODO: Trim the maps to conserve space? } //-------------------------------------------------------------------------- private void parseNodesFile() { try { BufferedReader nodeReader = null; try { nodeReader = getNodesReader(); innerParseNodesFile(nodeReader); } finally { if (nodeReader != null) nodeReader.close(); } } catch (IOException e) { throw new RuntimeException("Error parsing node file.", e); } } //-------------------------------------------------------------------------- private void parseNamesFile() { try { BufferedReader namesReader = null; try { namesReader = getNamesReader(); innerParseNamesFile(namesReader); } finally { if (namesReader != null) namesReader.close(); } } catch (IOException e) { throw new RuntimeException("Error parsing node file.", e); } } //-------------------------------------------------------------------------- private BufferedReader getNodesReader() throws IOException { LOGGER.log(Level.FINE, "Initializing NCBI taxon data source from nodes file " + StringUtil.singleQuote(mNodesRsrc)); InputStream stream = getResourceStream(mNodesRsrc); return new BufferedReader(new InputStreamReader(stream), 1024 * 8); } //-------------------------------------------------------------------------- private BufferedReader getNamesReader() throws IOException { InputStream stream = getResourceStream(mNamesRsrc); return new BufferedReader(new InputStreamReader(stream), 1024 * 8); } //-------------------------------------------------------------------------- private static InputStream getResourceStream(String inResource) throws IOException { InputStream stream = NCBITaxon.class.getResourceAsStream(inResource); if (null == stream) { throw new RuntimeException("'" + inResource + "' couldn't be found!"); } if (inResource.endsWith(".gz")) { stream = new GZIPInputStream(stream); } return stream; } } //########################################################################### // INNER CLASS //########################################################################### private static class CommonInternalDataSource extends InternalDataSource { //----------------------------------------------------------------------- public CommonInternalDataSource() { super(COMMON_NODES_FILE, COMMON_NAMES_FILE); } } //########################################################################### // INNER CLASS //########################################################################### private static class FullInternalDataSource extends InternalDataSource { //----------------------------------------------------------------------- public FullInternalDataSource() { super(NODES_FILE, NAMES_FILE); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy