All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.clulab.reach.grounding.AdHocIMKBFactory.scala Maven / Gradle / Ivy

The newest version!
package org.clulab.reach.grounding

import scala.io.Source
import org.clulab.reach.grounding.ReachKBConstants._
import org.clulab.reach.grounding.ReachKBUtils._

/**
  * Factory class for creating and loading an in-memory KB from a mixed-namespaced TSV file.
  *   Written by: Tom Hicks. 1/19/2016.
  *   Last Modified: Refactor for standardized 2-5 column KB format.
  */
class AdHocIMKBFactory extends Speciated with ReachKBKeyTransforms {

  /** Main factory method to create, fill, and return an encapsulated knowledge base. */
  def make (kbFilename:String = "", metaInfo:Option[IMKBMetaInfo] = None): InMemoryKB = {
    // all adhoc KBs have species info because human is assumed if species not given:
    val imkb: InMemoryKB = new InMemoryKB(true, metaInfo.getOrElse(new IMKBMetaInfo()))
    if (kbFilename != "")
      loadFromKBDir(imkb, kbFilename)     // load new in-memory KB
    // imkb.imkb.foreach { case (k, entries) =>   // for DEBUGGING
    //   entries.foreach { ent => println(ent.toString()) }} // for DEBUGGING
    return imkb
  }

  /** Additional factory method to workaround meta info option. */
  def make (kbFilename: String, metaInfo: IMKBMetaInfo): InMemoryKB =
    make(kbFilename, Some(metaInfo))


  /**
    * Load this KB from the given 4-5 column, tab-separated-value (TSV) text file.
    *   1st column (0) is the text string,
    *   2nd column (1) is the ID string,
    *   3rd column (2) is the Species string (optional content),
    *   4th column (3) is the Namespace string (required),
    *   5th column (4) is the Type string (ignored).
    * If filename argument is null or the empty string, skip file loading.
    */
  private def loadFromKBDir (imkb:InMemoryKB, filename:String) = {
    if ((filename != null) && !filename.trim.isEmpty) { // skip loading if filename missing
      val kbResourcePath = makePathInKBDir(filename)
      val source = sourceFromResource(kbResourcePath)
      source.getLines.map(tsvRowToFields(_)).filter(tsvValidateFields(_)).foreach { fields =>
        processFields(imkb, fields)
      }
      source.close()
    }
  }

  /** Extract particular fields and process them as needed. */
  private def processFields (imkb:InMemoryKB, fields: Seq[String]): Unit = {
    val text = fields(0)
    val refId = fields(1)
    val species = if (fields(2) != "") fields(2) else Speciated.Human
    val namespace = fields(3)
    imkb.addCanonicalEntry(text, namespace, refId, species) // store new entry
  }

  /** Check for required fields in one row of the TSV input file. */
  private def tsvValidateFields (fields:Seq[String]): Boolean = {
    if (fields.size < 4) return false       // sanity check
    return fields(0).nonEmpty && fields(1).nonEmpty && fields(3).nonEmpty
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy