All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.clulab.reach.grounding.ReachKBConstants.scala Maven / Gradle / Ivy

The newest version!
package org.clulab.reach.grounding

/**
  * Trait for defining constants used by grounding and entity checking code.
  *   Written by Tom Hicks. 10/22/2015.
  *   Last Modified: Update for HMS drug KB.
  */
object ReachKBConstants {

  /** The default namespace string for KBs. */
  val DefaultNamespace: String = "uaz"

  /** The string used to separate a namespace and an ID. */
  val NamespaceIdSeparator: String = ":"


  /** The set of words to remove from all keys to create a lookup key. */
  val AllKeysStopSuffixes = Seq("_human")

  /** The set of words to remove from a key to create a protein family lookup key. */
  val FamilyStopSuffixes = Seq(" protein family", " family")

  /** The set of characters to remove from the text to create a lookup key. */
  val KeyCharactersToRemove = " /-".toSet

  /** The set of words to remove from a key to create a protein lookup key. */
  val ProteinStopSuffixes = Seq(" mutant protein", " protein")


  /** File Path to the directory which holds the entity knowledge bases. */
  val KBDirFilePath = "src/main/resources/org/clulab/reach/kb"

  /** Resource Path to the directory which holds the entity knowledge bases. */
  val KBDirResourcePath = "/org/clulab/reach/kb"


  /** Filename of the cellular location file generated by the entity checker. */
  val GendCellLocationFilename = "biopax-cellular_component.tsv.gz"
  /** Prefix string for cellular location IDs generated by the entity checker. */
  val GendCellLocationPrefix = "UA-BP-CC-"

  /** Filename of the small molecule file generated by the entity checker. */
  val GendChemicalFilename = "biopax-simple_chemical.tsv.gz"
  /** Prefix string for small molecule IDs generated by the entity checker. */
  val GendChemicalPrefix = "UA-BP-SC-"

  /** Filename of the protein/family file generated by the entity checker. */
  val GendProteinFilename = "biopax-gene_or_gene_product.tsv.gz"
  /** Prefix string for protein/family IDs generated by the entity checker. */
  val GendProteinPrefix = "UA-BP-GGP-"


  /** Filename of the static bio processes file. */
  val StaticBioProcessFilename = "bio_process.tsv.gz"

  /** Filename of the static cellular location file. */
  val StaticCellLocationFilename = "GO-subcellular-locations.tsv.gz"

  /** Filename of the alternate static cellular location file. */
  val StaticCellLocation2Filename = "uniprot-subcellular-locations.tsv.gz"

  /** Filename of the static small molecule (chemical) file. */
  val StaticChemicalFilename = "PubChem.tsv.gz"

  /** Filename of the static small molecule (drug) file. */
  val StaticDrugFilename = "hms-drugs.tsv.gz"

  /** Filename of the static small molecule (metabolite) file. */
  val StaticMetaboliteFilename = "hmdb.tsv.gz"

  /** Filename of the static protein file. */
  val StaticProteinFilename = "uniprot-proteins.tsv.gz"

  /** Filename of the static protein family file. */
  val StaticProteinFamilyFilename = "PFAM-families.tsv.gz"

  /** Filename of the secondary static protein family file. */
  val StaticProteinFamily2Filename = "ProteinFamilies.tsv.gz"


  /** Filename of the context species file */
  val ContextSpeciesFilename = "Species.tsv.gz"

  /** Filename of the context cell lines file */
  val ContextCellLineFilename = "Cellosaurus.tsv.gz"

  /** Filename of the secondary context cell lines file */
  val ContextCellLine2Filename = "atcc.tsv.gz"

  /** Filename of the context cell types file */
  val ContextCellTypeFilename = "CellOntology.tsv.gz"

  /** Filename of the context organs file */
  val ContextOrganFilename = "Uberon.tsv.gz"

  /** Filename of the static tissue type file. */
  val ContextTissueTypeFilename = "tissue-type.tsv.gz"


  /** Filename of the protein kinases lookup table. */
  val ProteinKinasesFilename = "uniprot-kinases.txt.gz"

  /** Filename of a file containing just Protein Domain suffixes; one per line. */
  val ProteinDomainShortNamesFilename = "proteinDomains-short.txt.gz"

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy