All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.clulab.assembly.AssemblyManager.scala Maven / Gradle / Ivy

The newest version!
package org.clulab.assembly

import java.io.File
import org.clulab.assembly.representations._
import collection.Map
import collection.immutable
import org.clulab.odin._
import org.clulab.reach.mentions.{MentionOps, CorefMention}
// used to differentiate AssemblyModifications from Modifications on mentions
import org.clulab.reach.mentions


/**
 * Stores precedence information for two distinct [[EntityEventRepresentation]]
 * @param before the [[EntityEventRepresentation] that precedes [[PrecedenceRelation.after]]
 * @param after the [[EntityEventRepresentation]] that follows [[PrecedenceRelation.before]]
 * @param evidence the mentions that serve as evidence for this precedence relation
 * @param foundBy the name of the Sieve which found this relation
 */
case class PrecedenceRelation(
  before: EER,
  after: EER,
  var evidence: Set[Mention],
  foundBy: String
) {

  /**
    * Returns true if the input argument is a [[PrecedenceRelation]] with identical before and after [[EntityEventRepresentation]]s
    * @param other Any comparison object
    * @return a Boolean
    */
  // TODO: Should this be in terms of an Equiv Hash?
  def strictlyEquivalent(other: Any): Boolean = other match {
    case pr: PrecedenceRelation => this.before == pr.before && this.after == pr.after
    case _ => false
  }

  /**
    * Returns true if the input argument is a [[PrecedenceRelation]] with either the same before or after [[EntityEventRepresentation]]s
    * @param other Any comparison object
    * @return a Boolean
    */
  // TODO: Should this be in terms of an Equiv Hash?
  def isEquivalentTo(other: Any): Boolean = other match {
    case pr: PrecedenceRelation => this.before == pr.before || this.after == pr.after
    case _ => false
  }
}

/**
 * @constructor Creates a new AssemblyManager from two LUTs: (Mention -> [[IDPointer]]) and ([[IDPointer]] -> [[EntityEventRepresentation]]).
 *             These LUTs are used to populate the mentionStateToID and idToEERepresentation LUTs containing the same information.
 *             Subsequent updates to these LUTs create new LUTs.
 *             The motivation for the LUTs was to allow for changes in the mapping of a Mention -...-> [[EntityEventRepresentation]]
 *             to easily propagate in nested cases.
 *             For example, an update to the (Mention -> [[EntityEventRepresentation]]) mapping of a SimpleEvent Mention will propagate
 *             to the (Mention -> [[EntityEventRepresentation]]) mapping of any ComplexEvent Mention containing this SimpleEvent Mention.
 * @param m2id a lookup table from Mention -> [[IDPointer]].  Each key (Mention) should map to a unique [[IDPointer]].
 * @param id2eer a lookup table from [[IDPointer]] -> [[EntityEventRepresentation]].
 *                Keys ([[IDPointer]]) may point to the same value (EntityEventRepresentation)
 */
class AssemblyManager(
  m2id: Map[MentionState, IDPointer],
  id2eer: Map[IDPointer, EER]
) extends Serializable {

  import AssemblyManager._

  // Because modifications don't feature into the hashcode,
  // a mention's identify at assembly consists of both the mention and its mods (i.e., the "state" of the mention)
  private var mentionStateToID: immutable.Map[MentionState, IDPointer] = m2id.toMap
  private var idToEER: immutable.Map[IDPointer, EER] = id2eer.toMap
  // faster lookup of equivalent events
  private var ehToEERs: immutable.Map[Int, Set[EER]] = {
    // group by EH
    id2eer.toSeq.groupBy(_._2.equivalenceHash)
      // get EERs
      .mapValues(_.map(_._2).toSet)
  }

  private var idToMentionState: immutable.Map[IDPointer, MentionState] = mentionStateToID.map{ case (k, v) => (v, k)}
  // PrecedenceRelations associated with a distinct EER
  private var EERtoPrecedenceRelations: immutable.Map[EER, Set[PrecedenceRelation]] =
    Map.empty
      .withDefaultValue(Set.empty[PrecedenceRelation])
  // initialize to size of LUT 2
  private var nextID: IDPointer = idToEER.size

  /**
    * Retrieve the set of mentions currently tracked by the manager
    */
  def getMentions: Set[Mention] = mentionStateToID.keys.map(_._1).toSet

  //
  // Handle PrecedenceRelations
  //

  /**
   * Stores a PrecedenceRelation in [[EERtoPrecedenceRelations]] connecting "before" and "after".
   * Tracks "before" and "after" Mentions and produces EERs, is not already present.
   * @param before an Odin Mention that causally precedes "after"
   * @param after an Odin Mention that (causally) follows "before"
   * @param evidence a Set of Odin Mentions serving as evidence for the precedence relation
   * @param foundBy the name of the sieve or procedure that discovered this precedence relation
   */
  def storePrecedenceRelation(
    before: Mention,
    after: Mention,
    evidence: Set[Mention] = Set.empty[Mention],
    foundBy: String
  ): Unit = {

    // ensure Mentions are being tracked
    // and get their corresponding EERs
    val eer1 = getOrCreateEER(before)
    val eer2 = getOrCreateEER(after)
    val ev: Set[Mention] = if (evidence.isEmpty) sieves.SieveUtils.createEvidenceForCPR(before, after, foundBy) else evidence
    storePrecedenceRelation(before = eer1, after = eer2, ev, foundBy)
  }

  /**
   * Stores a PrecedenceRelation in [[EERtoPrecedenceRelations]] for the EERs corresponding to "before" and "after"
   * @param before an [[EntityEventRepresentation]] that causally precedes "after"
   * @param after an [[EntityEventRepresentation]] that (causally) follows "before"
   * @param foundBy the name of the sieve or procedure that discovered this precedence relation
   */
  def storePrecedenceRelation(
    before: EER,
    after: EER,
    evidence: Set[Mention],
    foundBy: String
  ): Unit = {

    val pr = PrecedenceRelation(
      before,
      after,
      evidence,
      foundBy
    )

    updateEERtoPrecedenceRelations(pr)
  }

  def storePrecedenceRelation(
    before: EER,
    after: EER,
    foundBy: String
  ): Unit = storePrecedenceRelation(before, after, Set.empty[Mention], foundBy)

  /**
   * Update entries in [[EERtoPrecedenceRelations]] for pr.before and pr.after
   * @param pr a [[PrecedenceRelation]]
   */
  private def updateEERtoPrecedenceRelations(pr: PrecedenceRelation): Unit = {
    // update PRs for before
    val before = pr.before
    val oldBefore = EERtoPrecedenceRelations.getOrElse(before, Set.empty)
    EERtoPrecedenceRelations = EERtoPrecedenceRelations + (before -> (oldBefore ++ Set(pr)))
    // update PRs for after
    val after = pr.after
    val oldAfter = EERtoPrecedenceRelations.getOrElse(after, Set.empty)
    EERtoPrecedenceRelations = EERtoPrecedenceRelations + (after -> (oldAfter ++ Set(pr)))
  }

  // retrieval of PrecedenceRelations
  /**
   * Retrieves the Set of PrecedenceRelations corresponding to the provided [[EntityEventRepresentation.equivalenceHash]] (eh)
   * @param eer an [[EntityEventRepresentation]]
   */
  def getPrecedenceRelationsFor(eer: EER): Set[PrecedenceRelation] = EERtoPrecedenceRelations(eer)

  /**
   * Retrieves the Set of PrecedenceRelations corresponding to the provided Mention
   * @param m an Odin Mention
   */
  def getPrecedenceRelationsFor(m: Mention): Set[PrecedenceRelation] = {
    getPrecedenceRelationsFor(getOrCreateEER(m))
  }

  /**
   * Retrieves the (distinct) Set of PrecedenceRelations for all Events
   */
  def getPrecedenceRelations: Set[PrecedenceRelation] = {
   for {
     e <- distinctEvents
     pr <- getPrecedenceRelationsFor(e)
   } yield pr
  }

  /**
   * Retrieves the distinct Set of EER predecessors for the provided EER.
   * @param eer an [[EntityEventRepresentation]]
   * @return the Set of distinct EntityEventRepresentations known to causally precede any EER corresponding to [[EntityEventRepresentation.equivalenceHash]]
   */
  def distinctPredecessorsOf(eer: EER): Set[EER] = {
    val predecessors = predecessorsOf(eer)
    distinctEERsFromSet(predecessors)
  }

  /**
   * Retrieves the distinct Set of EER predecessors for the provided Mention (m).
   * @param m an Odin Mention
   * @return the Set of distinct EntityEventRepresentations known to causally precede the EER corresponding to M
   */
  def distinctPredecessorsOf(m: Mention): Set[EER] = m match {
    case isValid if AssemblyManager.isValidMention(m) =>
        distinctPredecessorsOf(getOrCreateEER(m))
    case _ => Set.empty[EER]
  }

  /**
   * Retrieves the non-distinct Set of EER predecessors for the provided EER.
   * @param eer an [[EntityEventRepresentation]]
   * @return the Set of non-distinct EntityEventRepresentations known to causally precede eer
   */
  def predecessorsOf(eer: EER): Set[EER] = for {
    pr <- EERtoPrecedenceRelations(eer)
    if pr.before.equivalenceHash != eer.equivalenceHash
  } yield pr.before

  /**
   * Retrieves the non-distinct Set of EER predecessors for the provided Mention (m).
   * @param m an Odin Mention
   * @return the Set of non-distinct EntityEventRepresentations known to causally precede the EER corresponding to m
   */
  def predecessorsOf(m: Mention): Set[EER] = m match {
    // check if valid mention
    case isValid if AssemblyManager.isValidMention(isValid) =>
      predecessorsOf(getOrCreateEER(isValid))
    case _ => Set.empty[EER]
  }

  /**
   * Retrieves the distinct Set of EER successors for the provided EER.
   * @param eer an [[EntityEventRepresentation]]
   * @return the Set of distinct EntityEventRepresentations known to causally succeed any EER corresponding to eh
   */
  def distinctSuccessorsOf(eer: EER): Set[EER] = {
    val successors = successorsOf(eer)
    distinctEERsFromSet(successors)
  }

  /**
   * Retrieves the distinct Set of EER successors for the provided Mention (m).
   * @param m an Odin Mention
   * @return the Set of distinct EntityEventRepresentations known to causally succeed any EER corresponding to eh
   */
  def distinctSuccessorsOf(m: Mention): Set[EER] = m match {
    // check if Mention is valid
    case isValid if AssemblyManager.isValidMention(isValid) =>
      distinctSuccessorsOf(getOrCreateEER(isValid))
    case _ => Set.empty[EER]
  }

  /**
   * Retrieves the non-distinct Set of EER successors for the provided EER.
   * @param eer an [[EntityEventRepresentation]]
   * @return the Set of non-distinct EntityEventRepresentations known to causally succeed eer
   */
  def successorsOf(eer: EER): Set[EER] = for {
    pr <- getPrecedenceRelationsFor(eer)
    if pr.after.equivalenceHash != eer.equivalenceHash
  } yield eer

  /**
   * Retrieves the non-distinct Set of EER successors for the provided Mention (m).
   * @param m an Odin Mention
   * @return the Set of non-distinct EntityEventRepresentations known to causally succeed the EER corresponding to m
   */
  def successorsOf(m: Mention): Set[EER] = m match {
    // check if Mention is valid
    case isValid if AssemblyManager.isValidMention(isValid) =>
      successorsOf(getOrCreateEER(isValid))
    case _ => Set.empty[EER]
  }

  //
  // Utils for processing mentions
  //

  /**
   * Creates an [[EntityEventRepresentation]] if m is a valid Mention
   * See [[isValidMention]] for details on validation check
   * @param m an Odin Mention
   */
  def trackMention(m: Mention): Unit = {
    // do not store Sites, Activations, etc. in LUT 1
    if (isValidMention(m)) getOrCreateEER(m)
  }

  /**
   * Creates an [[EntityEventRepresentation]] for each valid Mention
   * See [[isValidMention]] for details on validation check
   * @param mentions a sequence of Mention to store in the AssemblyManager LUTs
   */
  def trackMentions(mentions: Seq[Mention]): Unit = {
    // do not store Sites, Activations, etc. in LUT 1
    mentions.filter(isValidMention)
      // get or create an EntityEventRepresentation for each mention
      .map(getOrCreateEER)
  }

  /**
   * Gets the polarity of a mention.  Should only be relevant to ComplexEvents
   * @param m an Odin Mention
   * @return [[AssemblyManager.positive]], [[AssemblyManager.negative]], or [[AssemblyManager.unknown]]
   */
  def getPolarityLabel(m: Mention): String = m match {
    case pos if pos matches "(?i)^positive".r => AssemblyManager.positive
    case neg if neg matches "(?i)^negative".r => AssemblyManager.negative
    case _ => AssemblyManager.unknown
  }

  //
  // Utils for creating IDs
  //

  /**
   * Creates a unique [[IDPointer]].
   * This implementation does not rely on updates to either the [[mentionStateToID]] or [[idToEER]] LUT to determine a unique [[IDPointer]].
   * @return a unique [[IDPointer]]
   */
  // use the size of LUT 2 to create a new ID
  private def createID: IDPointer = {
    val currentID = nextID
    nextID += 1
    currentID
  }

  /**
   * Attempts to retrieve an [[IDPointer]] for a Mention, and creates a new [[IDPointer]] if none is found.
   * @param m an Odin Mention
   * @return an [[IDPointer]] unique to m
   */
  private def getOrCreateID(m: Mention): IDPointer = {
    mentionStateToID.getOrElse(getMentionState(m), createID)
  }

  //
  // Utils for modifying storage tables
  //

  /**
   * Updates the [[mentionStateToID]] and [[idToEER]] LUTs
   * @param id a unique [[IDPointer]] for m
   * @param m an Odin Mention
   * @param eer the [[EntityEventRepresentation]] corresponding to m
   */
  private def updateLUTs(id: IDPointer, m: Mention, eer: EER): Unit = {
    // update LUT #1a
    updateMentionStateToIDTable(m, id)
    // update LUT #1b
    updateIDtoMentionStateTable(m, id)
    // update LUT #2
    updateIDtoEERTable(id, eer)
    // update LUT #3
    updateEHtoEERsTable(eer.equivalenceHash, eer)
  }

  /**
   * Updates the [[ehToEERs]] LUT
   * @param eh an equivalenceHash
   * @param eer an [[EER]]
   */
  private def updateEHtoEERsTable(eh: Int, eer: EER): Unit = {
    val idsForEH = ehToEERs.getOrElse(eh, Set.empty[EER]) ++ Set(eer)
    ehToEERs = ehToEERs + (eh -> idsForEH)
  }

  /**
   * Updates the [[mentionStateToID]] LUT
   * @param m an Odin Mention
   * @param id an [[IDPointer]] unique to m
   */
  private def updateMentionStateToIDTable(m: Mention, id: IDPointer): Unit = {
    mentionStateToID = mentionStateToID + (getMentionState(m) -> id)
  }

  /**
   * Updates the [[idToMentionState]] LUT
   * @param m an Odin Mention
   * @param id an [[IDPointer]] unique to m
   */
  private def updateIDtoMentionStateTable(m: Mention, id: IDPointer): Unit = {
    idToMentionState = idToMentionState + (id -> getMentionState(m))
  }

  /**
   * Updates the [[idToEER]] LUT
   * @param id a unique [[IDPointer]] pointing to eer
   * @param eer an [[EntityEventRepresentation]] associated with the provided id
   */
  private def updateIDtoEERTable(id: IDPointer, eer: EER): Unit = {
    idToEER = idToEER + (id -> eer)
    // update the ehToEERs table
    updateEHtoEERsTable(eer.equivalenceHash, eer)
  }

  //
  // Utils for removing IDs
  //

  /**
   * Removes mention and corresponding [[EntityEventRepresentation]] associated with the provided id.
   * If the corresponding EntityEventRepresentation is a [[SimpleEvent]], remove its output as well.
   * @param id an [[IDPointer]] used to identify mentions and EntityEventRepresentations for removal
   */
  def removeEntriesContainingID(id: IDPointer): Unit = {

    // get ids of EEReprs containing the given id
    val ids = for {
      r <- idToEER.values.toSeq
      if r.containsID(id)
    } yield r match {
        // if an event, get event's id and ids of event's outputs
        case event: SimpleEvent =>
          event.outputPointers ++ Set(event.uniqueID)
        case eer => Set(eer.uniqueID)
      }

    removeEntriesCorrespondingToIDs(ids.flatten)
  }

  /**
   * Removes mention and corresponding EERepresentation from the AssemblyManager
   * @param m
   */
  def removeEntriesContainingIDofMention(m: Mention): Unit = {
    val id = getOrCreateID(m)
    removeEntriesContainingID(id)
  }

  /**
   * Removes entries referencing the given [[EntityEventRepresentation]].
   * @param eer an [[EntityEventRepresentation]] used to identify mentions and EntityEventRepresentations for removal
   */
  def removeEntriesContainingIDofEER(eer: EER): Unit = {

    // get ids of EERepresentations containing the id of the given EERepresentation
    val idsForRemoval: Seq[IDPointer] = for {
      r <- idToEER.values.toSeq
      if r.containsID(eer.uniqueID)
    } yield r.uniqueID

    removeEntriesCorrespondingToIDs(idsForRemoval)
  }

  /**
   * Removes entries referencing the any of the given [[IDPointer]].
   * @param ids a Seq[IDPointer] used to identify mentions and EntityEventRepresentations for removal
   */
  def removeEntriesCorrespondingToIDs(ids: Seq[IDPointer]): Unit = {

    // remove mentions associated with the IDs

    val id2m = idToMentionState

    for {
      id <- ids
      if id2m contains id
      m = id2m(id)
    } {
      mentionStateToID = mentionStateToID - m
    }

    // remove EEReprs containing the given id
    for {
      id <- ids
    } {
      idToEER = idToEER - id
    }

  }

  //
  // Utils for handling modifications
  //

  /**
   * Builds a Set[AssemblyModfication] from the modifcations belonging to a Mention m.
   * Currently, only a subset of Mention [[org.clulab.reach.mentions.Modification]] are considered relevant to assembly:
   * PTM
   * Mutant
   *
   * Additionally, a Mention corresponding to an Entity will include an [[EntityLabel]] [[AssemblyModification]] encoding its label (ex. Family)
   * @param m an Odin Mention
   * @return Set[AssemblyModification]
   */
  protected def mkAssemblyModifications(m: Mention): Set[AssemblyModification] = {
    // we only care to represent a subset of the Modifications associated with a mention
    val mods: Set[AssemblyModification] =
      m.toBioMention.modifications flatMap {
        // TODO: is site part of label?
        case mut: mentions.Mutant => Set(MutantEntity(mut.label))
        // TODO: should site be handled differently?
        case ptm: mentions.PTM =>
          val site: Option[String] = if (ptm.site.nonEmpty) Some(ptm.site.get.text) else None
          Set(PTM(ptm.label, site, ptm.negated))
        case _ => Nil
      }
    if (m matches "Entity") Set(EntityLabel(m.label)) ++ mods else mods
  }

  //
  // SimpleEntity creation
  //

  /**
   * Create a [[SimpleEntity]] representation from a Mention
   * and an optional set of optional modifications (useful for building output of SimpleEvent)
   *
   * Whenever modifications are provided, the [[mentionStateToID]] LUT is NOT updated, so as to avoid a conflict with the existing mapping (see the description of mods for the motivation)
   * @param m an Odin Mention
   * @param mods an optional set of [[AssemblyModification]].
   *             This is useful for building the output of a [[SimpleEvent]] (any simple event other than a Binding), which is a set of [[SimpleEvent]] where the key [[PTM]] comes from the [[SimpleEvent]]
   *             (i.e., the PTM cannot be recovered by simply examining m out of context)
   * @return a tuple of ([[IDPointer]], [[SimpleEntity]])
   */
  protected def createSimpleEntityWithID(
    m: Mention,
    mods: Option[Set[AssemblyModification]]
  ): (SimpleEntity, IDPointer) = {

    /** Used to create "new" mention whenever mods are provided **/
    def createEvidence(m: Mention): Mention = m match {
      case tb: TextBoundMention => tb.copy(foundBy = s"${tb.foundBy}-output-representation")
      case rel: RelationMention => rel.copy(foundBy = s"${rel.foundBy}-output-representation")
      case em: EventMention => em.copy(foundBy = s"${em.foundBy}-output-representation")
    }

    // check for coref
    val e = getResolvedForm(m)

    // mention should be an Entity or Cellular_component
    require((e matches "Entity") || (e matches "Cellular_component"), "createSimpleEntity requires an 'Entity' or 'Cellular_component' Mention")

    val modifications = mkAssemblyModifications(e)

    // prepare id
    // if mods have been provided, a new id should be created since createSimpleEvent calls this method
    // and the current representation could be an output of a SimpleEvent
    // for a sentence like "Ras is phosphorylated", the Mention for "Ras" should only point to the PTM-less form;
    // however, when createSimpleEvent calls this method to construct an output representation,
    // it gives it the PTMs to associate with this mention
    // TODO: should this use m or e?
    val id = if (mods.nonEmpty) createID else getOrCreateID(e)

    // only use if mods are nonEmpty
    // use resolved form
    val newEvidence = createEvidence(e)

    // prepare SimpleEntity
    val eer =
      new SimpleEntity(
        id,
        // TODO: decide whether or not we should use a richer representation for the grounding ID
        e.nsId,
        // modifications relevant to assembly
        if (mods.isDefined) modifications ++ mods.get else modifications,
        // source mention
        // FIXME: not sure if newEvidence should be stored...
        if (mods.isEmpty) Some(m) else Some(newEvidence),
        this
      )
    // Only update table 1 if no additional mods were provided
    if (mods.isEmpty) updateLUTs(id, m, eer) else  updateIDtoEERTable(id, eer) // updateLUTs(id, newEvidence, repr)

    // eer and id pair
    (eer, id)
  }

  /**
   * Create a [[SimpleEntity]] representation from a Mention
   * and an optional set of optional modifications (useful for building output of SimpleEvent)
   *
   * Whenever modifications are provided, the [[mentionStateToID]] LUT is NOT updated, so as to avoid a conflict with the existing mapping (see the description of mods for the motivation)
   * @param m an Odin Mention
   * @param mods an optional set of [[AssemblyModification]].
   *             This is useful for building the output of a [[SimpleEvent]] (any simple event other than a Binding), which is a set of [[SimpleEvent]] where the key [[PTM]] comes from the [[SimpleEvent]]
   *             (i.e., the PTM cannot be recovered by simply examining m out of context)
   * @return a [[SimpleEntity]]
   */
  protected def createSimpleEntity(
    m: Mention,
    mods: Option[Set[AssemblyModification]]
  ): SimpleEntity = createSimpleEntityWithID(m, mods)._1


  //
  // Complex creation
  //

  /**
   * Creates a [[Complex]] from a Binding Mention and updates the [[mentionStateToID]] and [[idToEER]] LUTs
   * @param m an Odin Mention
   * @return a tuple of ([[Complex]], [[IDPointer]])
   */
  protected def createComplexWithID(m: Mention): (Complex, IDPointer) = {

    // check for coref
    val c = getResolvedForm(m)

    require(c matches "Complex", "createComplex only handles Mentions with the label 'Complex'.")

    // prepare id
    val id = getOrCreateID(m)

    // prepare Complex
    // TODO: do binding events have sites?

    val themes = getAllThemes(c)

    val mbrs: Set[IDPointer] = themes.map(m => createSimpleEntityWithID(m, None)).map(_._2).toSet
    val eer =
      new Complex(
        id,
        mbrs,
        Some(m),
        this
      )

    // update LUTs
    // use original mention for later lookup
    updateLUTs(id, m, eer)

    (eer, id)
  }

  /**
   * Creates a [[Complex]] from a Binding Mention and updates the [[mentionStateToID]] and [[idToEER]] LUTs
   * @param m an Odin Mention
   * @return a [[Complex]]
   */
  private def createComplex(m: Mention): Complex = createComplexWithID(m)._1

  //
  // SimpleEvent creation
  //

  /**
   * Creates a [[SimpleEvent]] from a Simple Event Mention (excludes Bindings) and updates the [[mentionStateToID]] and [[idToEER]] LUTs
   * @param m an Odin Mention
   * @return a tuple of ([[SimpleEvent]], [[IDPointer]])
   */
  private def createSimpleEventWithID(m: Mention): (SimpleEvent, IDPointer) = {

    //
    // helper functions for label-based dispatch
    //

    /**
     * Creates a [[SimpleEvent]] from a Binding Mention and updates the [[mentionStateToID]] and [[idToEER]] LUTs
     * @param m an Odin Mention
     * @return a tuple of ([[SimpleEvent]], [[IDPointer]])
     */
    def handleBinding(m: Mention): (SimpleEvent, IDPointer) = {

      // check for coref
      val e = getResolvedForm(m)

      // mention should be a Binding
      require(e matches "Binding", "handleBinding only accepts Binding mentions.")
      // there should not be a cause among the arguments
      require(!(e.arguments contains "cause"), "Binding should not contain a cause!")
      // prepare input (roles -> repr. pointers)

      // construct inputs from themes
      // TODO: how to handle sites?
      val correctedThemes = e.arguments
        .filter(_._1.toLowerCase.startsWith("theme"))
        .values
        .flatten
        .toSeq
      val themeMap: Map[String, Seq[Mention]] = Map("theme" -> correctedThemes)
      val input: Map[String, Set[IDPointer]] = themeMap map {
        case (role: String, mns: Seq[Mention]) =>
          (role, mns.map(getOrCreateEERwithID).map(_._2).toSet)
      }

      // prepare output
      val complexMembers: Set[IDPointer] = correctedThemes
        .map(m => createSimpleEntityWithID(m, None))
        .map(_._2)
        .toSet

      // prepare id for SimpleEvent
      val id = getOrCreateID(m)

      //prepare id for output (Complex)
      // prepare id
      val complexPointer = createID

      // prepare Complex
      val complex =
        new Complex(
          complexPointer,
          complexMembers,
          None,
          this
        )

      // update table #2
      updateIDtoEERTable(complexPointer, complex)

      // prepare SimpleEvent
      // TODO: throw exception if arguments contains "cause"
      val eer =
        new SimpleEvent(
          id,
          input,
          Set(complexPointer),
          e.label,
          Some(m),
          this
        )

      // update LUTs
      // use original mention for later lookup
      updateLUTs(id, m, eer)

      (eer, id)
    }

    /**
     * Creates a [[SimpleEvent]] from a Translocation Mention and updates the [[mentionStateToID]] and [[idToEER]] LUTs
     * @param m an Odin Mention
     * @return a tuple of ([[SimpleEvent]], [[IDPointer]])
     */
    def handleTranslocation(m: Mention): (SimpleEvent, IDPointer) = {

      /**
       * Create input Map for Translocation event
       */
      def createInputForTranslocation(m: Mention): Map[String, Set[IDPointer]] = {

        val isNegated = hasNegation(m)

        // handle source Location
        val src = "source"
        val mods: Set[AssemblyModification] = m match {
          case hasSource if hasSource.arguments contains src =>
            // create a PTM for each source
            for (
              src <- hasSource.arguments(src).toSet[Mention]
            ) yield {
              val gid = src.toBioMention.nsId
              representations.Location(gid).asInstanceOf[AssemblyModification]
            }
          // no mods
          case _ => Set.empty[AssemblyModification]
        }

        val correctedThemes = m.arguments
          .filter(_._1.toLowerCase.startsWith("theme"))
          .values
          .flatten
          .toSeq
        val themeMap: Map[String, Seq[Mention]] = Map("theme" -> correctedThemes)
        val input: Map[String, Set[IDPointer]] = themeMap map {
          case ("theme", mns: Seq[Mention]) =>
            ("theme", mns.map(m => createSimpleEntityWithID(m, Some(mods))).map(_._2).toSet)
        }
        input
      }

      /**
       * Create output Set for addition event
       */
      def createOutputForTranslocation(m: Mention): Set[IDPointer] = {

        val isNegated = hasNegation(m)

        // handle dest Location
        val dest = "destination"
        val mods: Set[AssemblyModification] = m match {
          case hasSource if hasSource.arguments contains dest =>
            // create a PTM for each source
            for (
              d <- hasSource.arguments(dest).toSet[Mention]
            ) yield {
              val gid = d.toBioMention.nsId
              representations.Location(gid)
            }
          // no mods
          case _ => Set.empty[AssemblyModification]
        }

        // NOTE: we need to be careful if we use something other than theme
        m.arguments("theme")
          .map(m => createSimpleEntityWithID(m, Some(mods))).map(_._2)
          .toSet
      }

      // check for coref
      val e = getResolvedForm(m)

      // only accept Translocation mention
      require(e matches "Translocation", s"handleTranslocation received Mention of label '${e.label}', but method only accepts a Translocation Mention.")
      // prepare input (roles -> repr. pointers)

      // create input
      val input = createInputForTranslocation(e)

      // prepare output
      val output: Set[IDPointer] = createOutputForTranslocation(e)

      // prepare id
      val id = getOrCreateID(m)

      // prepare SimpleEvent
      val eer =
        new SimpleEvent(
          id,
          input,
          output,
          e.label,
          Some(m),
          this
        )

      // update LUTs
      // use original mention for later lookup
      updateLUTs(id, m, eer)

      (eer, id)
    }

     /**
     * Creates a [[SimpleEvent]] from a SimpleEvent Mention (excluding Bindings) and updates the [[mentionStateToID]] and [[idToEER]] LUTs
     * @param m an Odin Mention
     * @return a tuple of ([[SimpleEvent]], [[IDPointer]])
     */
    def handleNBSimpleEvent(m: Mention): (SimpleEvent, IDPointer) = {

       /**
        * Create input Map for removal event
        */
       def createInputForRemovalEvent(m: Mention): Map[String, Set[IDPointer]] = {

         val isNegated = hasNegation(m)

         // get input PTM for removal event
         val ptm: String = m.label.replaceAll("^De", "").capitalize

         // handle sites
         val ptms: Set[AssemblyModification] = m match {
           case hasSites if hasSites.arguments contains "site" =>
             // create a PTM for each site
             for (site <- hasSites.arguments("site").toSet[Mention]) yield representations.PTM(ptm, Some(site.text), isNegated)
           // create a PTM without a site
           case noSites => Set(representations.PTM(ptm, None, isNegated))
         }

         // filter out sites from input
         val siteLessArgs = m.arguments - "site"
         val input: Map[String, Set[IDPointer]] = siteLessArgs map {
           case ("theme", mns: Seq[Mention]) =>
             ("theme", mns.map(m => createSimpleEntityWithID(m, Some(ptms))).map(_._2).toSet)
           case (role: String, mns: Seq[Mention]) =>
             (role, mns.map(getOrCreateEERwithID).map(_._2).toSet)
         }
         input
       }

       /**
        * Create input Map for addition event
        */
       def createInputForAdditionEvent(m: Mention): Map[String, Set[IDPointer]] = {
         // filter out sites from input
         val siteLessArgs = m.arguments - "site"
         val input: Map[String, Set[IDPointer]] = siteLessArgs map {
           case (role: String, mns: Seq[Mention]) =>
             //println(s"\tprocessing mentions for '$role' role of '${e.label}'")
             (role, mns.map(getOrCreateEERwithID).map(_._2).toSet)
         }
         input
       }

       /**
        * Create output Set for addition event
        */
       def createOutputForRemovalEvent(m: Mention): Set[IDPointer] = {
         // NOTE: we need to be careful if we use something other than theme
         m.arguments("theme")
           .map(getOrCreateEERwithID).map(_._2)
           .toSet
       }

       /**
        * Create output Set for addition event
        */
       def createOutputForAdditionEvent(m: Mention): Set[IDPointer] = {

         val isNegated = hasNegation(m)

         // handle sites
         val ptms: Set[AssemblyModification] = m match {
           case hasSites if hasSites.arguments contains "site" =>
             // create a PTM for each site
             for (site <- hasSites.arguments("site").toSet[Mention]) yield representations.PTM(m.label, Some(site.text), isNegated)
           // create a PTM without a site
           case noSites => Set(representations.PTM(m.label, None, isNegated))
         }

         // NOTE: we need to be careful if we use something other than theme
         m.arguments("theme")
           .map(m => createSimpleEntityWithID(m, Some(ptms))).map(_._2)
           .toSet
       }

      // check for coref
      val e = getResolvedForm(m)

      // mention should be a SimpleEvent, but not a Binding
      require((e matches "SimpleEvent") && !(e matches "Binding"), s"handleNBSimpleEvent received Mention of label '${e.label}', but method only accepts a SimpleEvent Mention that is NOT a Binding.")
      // prepare input (roles -> repr. pointers)

      // create input
      val input: Map[String, Set[IDPointer]] = e match {
        case removalEvent if removalEvent matches "RemovalEvent" =>
          createInputForRemovalEvent(removalEvent)
        case additionEvent if additionEvent matches "AdditionEvent" =>
          createInputForAdditionEvent(additionEvent)
      }

      // prepare output
      val output: Set[IDPointer] = e match {
        case removalEvent if removalEvent matches "RemovalEvent" =>
          createOutputForRemovalEvent(removalEvent)
        case additionEvent if additionEvent matches "AdditionEvent" =>
          createOutputForAdditionEvent(additionEvent)
      }

      // prepare id
      val id = getOrCreateID(m)

      // prepare SimpleEvent
      val eer =
        new SimpleEvent(
          id,
          input,
          output,
          e.label,
          Some(m),
          this
        )

      // update LUTs
      // use original mention for later lookup
      updateLUTs(id, m, eer)

      (eer, id)
    }

    //
    // handle dispatch
    //

    val event = getResolvedForm(m.toCorefMention)
    require(event matches "SimpleEvent", s"createSimpleEventWithID requires Mention with the label SimpleEvent, but received Mention with label '${event.label}'")
    // there should not be a cause among the arguments
    require(!(event.arguments contains "cause"), "SimpleEvent should not contain a cause!")
    // SimpleEvent must have theme
    require(event.arguments contains "theme", s"'${event.label}' must have a theme.")
    m match {
      case binding if binding matches "Binding" => handleBinding(binding)
      case translocation if translocation matches "Translocation" => handleTranslocation(translocation)
      case other => handleNBSimpleEvent(other)
    }
  }

  /**
   * Creates a [[SimpleEvent]] from a Simple Event Mention (excludes Bindings) and updates the [[mentionStateToID]] and [[idToEER]] LUTs
   * @param m an Odin Mention
   * @return a [[SimpleEvent]]
   */
  private def createSimpleEvent(m: Mention): SimpleEvent = createSimpleEventWithID(m)._1

  //
  // Regulation creation
  //

  /**
   * Creates a [[Regulation]] from a Regulation Mention and updates the [[mentionStateToID]] and [[idToEER]] LUTs
   * @param m an Odin Mention
   * @return a tuple of ([[Regulation]], [[IDPointer]])
   */
  private def createRegulationWithID(m: Mention): (Regulation, IDPointer) = {

    // check for coref
    val reg = getResolvedForm(m)

    // get polarity
    val polarity = getPolarityLabel(reg)

    // mention should be a Regulation
    require(reg matches "Regulation", "createRegulation only handles Regulations")
    // mention's polarity should be either positive or negative
    require(polarity == AssemblyManager.positive || polarity == AssemblyManager.negative, "Polarity of Regulation must be positive or negative")
    // all controlled args must be simple events
    require(reg.arguments("controlled").forall(_ matches "Event"), "The 'controlled' of any Regulation must be an Event")

    val controllers: Set[IDPointer] = {
      reg.arguments("controller")
        .toSet[Mention]
        .map(c => getOrCreateEERwithID(c)._2)
    }

    val controlleds: Set[IDPointer] = {
      reg.arguments("controlled")
        .toSet[Mention]
        .map(c => getOrCreateEERwithID(c)._2)
    }

    // prepare id
    val id = getOrCreateID(m)

    // prepare Regulation

    val eer =
      new Regulation(
        id,
        controllers,
        controlleds,
        polarity,
        Some(m),
        this
      )

    // update LUTs
    // use original mention for later lookup
    updateLUTs(id, m, eer)

    // eer and id pair
    (eer, id)
  }

  /**
   * Creates a [[Regulation]] from a Regulation Mention and updates the [[mentionStateToID]] and [[idToEER]] LUTs
   * @param m an Odin Mention
   * @return a [[Regulation]]
   */
  private def createRegulation(m: Mention): Regulation = createRegulationWithID(m)._1

  /**
   * Creates a [[Activation]] from an Activation Mention and updates the [[mentionStateToID]] and [[idToEER]] LUTs
   * @param m an Odin Mention
   * @return a [[Activation]]
   */
  private def createActivation(m: Mention): Activation = createActivationWithID(m)._1

  //
  // Regulation creation
  //

  /**
   * Creates a [[Activation]] from an Activation Mention and updates the [[mentionStateToID]] and [[idToEER]] LUTs
   * @param m an Odin Mention
   * @return a tuple of ([[Activation]], [[IDPointer]])
   */
  private def createActivationWithID(m: Mention): (Activation, IDPointer) = {

    // check for coref
    val act = getResolvedForm(m)

    // get polarity
    val polarity = getPolarityLabel(act)

    // mention should be a Activation
    require(act matches "ActivationEvent", "createActivation only handles Activations")
    // mention's polarity should be either positive or negative
    require(polarity == AssemblyManager.positive || polarity == AssemblyManager.negative, "Polarity of ComplexEvent must be positive or negative")
    val controllers: Set[IDPointer] = {
      act.arguments("controller")
        .toSet[Mention]
        .map(c => getOrCreateEERwithID(c)._2)
    }

    val controlleds: Set[IDPointer] = {
      act.arguments("controlled")
        .toSet[Mention]
        .map(c => getOrCreateEERwithID(c)._2)
    }

    // prepare id
    val id = getOrCreateID(m)

    // prepare Regulation

    val eer =
      new Activation(
        id,
        controllers,
        controlleds,
        polarity,
        Some(m),
        this
      )

    // update LUTs
    // use original mention for later lookup
    updateLUTs(id, m, eer)

    // eer and id pair
    (eer, id)
  }

  //
  // EntityEventRepresentation creation
  //

  /**
   * Attempts to retrieve an [[EntityEventRepresentation]] for m.
   * If a representation cannot be retrieved, a new one is created.
   * Whenever a new representation is created,
   * the [[mentionStateToID]] and [[idToEER]] LUTs will be updated (see [[createEER]] for details)
   * @param m an Odin Mention
   * @return the [[EntityEventRepresentation]] corresponding to m
   */
  private def getOrCreateEER(m: Mention): EER = {
    // ensure this mention should be stored in LUT 1
    require(isValidMention(m), s"mention with the label ${m.label} cannot be tracked by the AssemblyManager")
    hasMention(m) match {
      // if an ID already exists, retrieve the associated representation
      case true =>
        val id = mentionStateToID(getMentionState(m))
        idToEER(id)
      // create new representation
      case false => createEER(m)
    }
  }

  /**
   * Attempts to retrieve a ([[EntityEventRepresentation]], [[IDPointer]]) tuple given a Mention m.
   * The tuple will be created if the Mention m is not already present in the [[mentionStateToID]] LUT
   * @param m an Odin Mention
   * @return a tuple of ([[EntityEventRepresentation]], [[IDPointer]])
   */
  private def getOrCreateEERwithID(m: Mention): (EER, IDPointer) = hasMention(m) match {
    case true =>
      val id = mentionStateToID(getMentionState(m))
      val eer = getEER(id)
      (eer, id)
    case false =>
      val eer = createEER(m)
      val id = eer.uniqueID
      (eer, id)
  }

  /**
   * Creates a ([[EntityEventRepresentation]], [[IDPointer]]) tuple from a Mention m.
   * Assumes the Mention m is not already present in the [[mentionStateToID]] LUT
   * Updates to [[mentionStateToID]] and [[idToEER]] in the relevant create* call
   * @param m an Odin Mention
   * @return a tuple of ([[EntityEventRepresentation]], [[IDPointer]])
   */
  private def createEERwithID(m: Mention): (EER, IDPointer) = {
    // pass the unresolved form through according to a check against the resolved form
    getResolvedForm(m) match {
      case complex if complex matches "Complex" => createComplexWithID(m)
      case e if e matches "Entity" => createSimpleEntityWithID(m, None)
      case cc if cc matches "Cellular_component" => createSimpleEntityWithID(m, None)
      case se if se matches "SimpleEvent" => createSimpleEventWithID(m)
      case regulation if regulation matches "Regulation" => createRegulationWithID(m)
      case activation if activation matches "ActivationEvent" => createActivationWithID(m)
      case other => throw new Exception(s"createEERwithID failed for ${other.label}")
    }
  }

  /**
   * Creates an ([[EntityEventRepresentation]], [[IDPointer]]) tuple given a Mention m.
   * The tuple will be created if the Mention m is not already present in the [[mentionStateToID]] LUT
   * @param m an Odin Mention
   * @return an [[EntityEventRepresentation]]
   */
  private def createEER(m: Mention): EER = createEERwithID(m)._1

  //
  // Utils for summarization
  //

  /**
   * A (mostly) human readable printout of the (key, value) pairs in the [[mentionStateToID]]] LUT
   */
  def mentionIndexSummary: Seq[String] = {
    for {
      (k, id) <- mentionStateToID.toSeq
    } yield s"${mentionSummary(k._1)} => $id"
  }

  //
  // Utilities for component retrieval
  //

  /**
   * Retrieves all tracked Mentions from [[AssemblyManager.mentionStateToID]]
   * @return the Set of Odin Mentions tracked by this AssemblyManager
   */
  def trackedMentions: Set[Mention] = mentionStateToID.keys.map(_._1).toSet

  /**
   * Retrieves all EntityEventRepresentations found in [[AssemblyManager.idToEER]]
   * @return the Set of Odin Mentions tracked by this AssemblyManager
   */
  def EERs: Set[EER] = idToEER.values.toSet

  /**
   * Retrieves ID from an [[EntityEventRepresentation.uniqueID]]
   * @param eer an EntityEventRepresentation
   * @return the IDPointer for the repr
   */
  def getID(eer: EER): IDPointer = eer.uniqueID

  /**
   * Retrieves an [[EntityEventRepresentation]] for a Mention.
   * Assumes an [[EntityEventRepresentation]] for the given Mention already exists.
   * @param m an Odin Mention
   * @return an [[EntityEventRepresentation]]
   */
  def getEER(m: Mention): EER = {
    require(isValidMention(m), s"Mention '${m.label}' is not valid")
    val k1 = getMentionState(m)
    // backoff: attempt to use resolved form
    val k2 = getMentionState(getResolvedForm(m))
    val id = mentionStateToID.getOrElse(k1, mentionStateToID(k2))
    idToEER(id)
  }

  /**
   * Retrieves an [[EntityEventRepresentation]] associated with the given [[IDPointer]].
   * Assumes an [[EntityEventRepresentation]] associated with the provide [[IDPointer]] already exists.
   * @param id an [[IDPointer]]
   * @return an [[EntityEventRepresentation]]
   */
  def getEER(id: IDPointer): EER =
    idToEER(id)

  /**
   * Retrieves the Set of [[EntityEventRepresentation]] tracked by the manager.
   * @return Set[EntityEventRepresentation]
   */
  def getEERs: Set[EER] = idToEER.values.toSet

  /**
   * Returns a SimpleEntity for a Mention with the appropriate labels.
   * @param m an Odin Mention.  Must have the label "Entity" and not the label "Complex".
   */
  def getSimpleEntity(m: Mention): SimpleEntity = {
    require(m matches "Entity", "Mention is not an Entity")
    require(! (m matches "Complex"), "Mention is a Complex")
    getOrCreateEER(m).asInstanceOf[SimpleEntity]
  }

  /**
   * Returns an Event for a Mention with the appropriate labels.
   * @param m an Odin Mention.
   */
  // TODO: add label check?
  def getEvent(m: Mention): Event = getOrCreateEER(m).asInstanceOf[Event]

  /**
   * Returns a Regulation for a Mention m with the appropriate label.
   * @param m an Odin Mention.  Must have the label "Complex".
   */
  def getComplex(m: Mention): Complex = {
    require(m matches "Complex", "Mention is not a Complex")
    getOrCreateEER(m).asInstanceOf[Complex]
  }

  /**
   * Returns a SimpleEvent for a Mention m with the appropriate labels.
   * @param m an Odin Mention.  Must have the label "SimpleEvent".
   */
  def getSimpleEvent(m: Mention): SimpleEvent = {
    require(m matches "SimpleEvent", "Mention is not a SimpleEvent")
    getOrCreateEER(m).asInstanceOf[SimpleEvent]
  }

  /**
   * Returns a Regulation for a Mention m with the label "Regulation"
   * @param m an Odin Mention.  Must have the label "Regulation"
   */
  def getRegulation(m: Mention): Regulation = {
    require(m matches "Regulation", "Mention is not a Regulation")
    getOrCreateEER(m).asInstanceOf[Regulation]
  }

  /**
   * Collects mentions pointing to a given [[EntityEventRepresentation]].
   * @param eer an [[EntityEventRepresentation]]
   * @return a sequence of Mention serving as textual evidence of the given representation
   */
  def getEvidence(eer: EER): Set[Mention] = {
    val equivEERs: Set[EER] = ehToEERs.getOrElse(eer.equivalenceHash, Set.empty[EER])
    // retrieve the mention by id
    val evidence = for {
      equivEER <- equivEERs
      id = equivEER.uniqueID
      // check is needed, because output of a SimpleEvent has no Mention
      if idToMentionState contains id
      (e, mods) = idToMentionState(id)
    } yield e
    evidence
  }

  //
  // Grouping utilities
  //

  /**
   * Get non-distinct equivalent EERs matching the provided equivalenceHash (eh)
   * @param eh an [[EntityEventRepresentation.equivalenceHash]]
   * @return
   */
  def getEquivalentEERs(eh: Int): Set[EER] = ehToEERs.getOrElse(eh, Set.empty[EER])
  def getEquivalentEERs(eer: EER): Set[EER] = ehToEERs.getOrElse(eer.equivalenceHash, Set.empty[EER])

  /**
   * Returns groups of equivalent [[EntityEventRepresentation]], ignoring differences due to [[IDPointer]] references.
   *
   * Mentions may point to (essentially) the same [[EntityEventRepresentation]], which would only differ in terms of the [[IDPointer]], which link an [[EntityEventRepresentation]] to a particular Mention
   */
  def groupedEERs: Seq[Set[EER]] = ehToEERs.values.toSeq

  /**
   * Gets distinct members of eers after grouping by [[EntityEventRepresentation.equivalenceHash]].
   * @param eers an [[EntityEventRepresentation]] Set
   * @return
   */
  def distinctEERsFromSet(eers: Set[EER]): Set[EER] = {
    eers.groupBy(_.equivalenceHash)
      .mapValues(_.head)
      .values
      .toSet
  }

  /**
   * Returns head of each group returned by [[groupedEERs]].
   *
   * @return a Set of [[EntityEventRepresentation]]
   */
  def distinctEERs: Set[EER] = {
    groupedEERs.map(_.head)
      .toSet
  }

  /**
   * Returns Set of "distinct" [[EntityEventRepresentation]] with corresponding evidence.
   */
  def distinctEERsWithEvidence: Set[(EER, Set[Mention])] = {
    distinctEERs.map(eer => (eer, getEvidence(eer)))
  }

  // Entity

  /**
   * Retrieves all Entities from the manager.
   * Note that these are non-distinct.
   */
  def getEntities: Set[Entity] = {
    for {
      e: EER <- getEERs
      if e.isInstanceOf[Entity]
      entity = e.asInstanceOf[Entity]
    } yield entity
  }

  /**
   * Returns "distinct" Set of Entities. Ignores multiple instances of the same Entity.
   * @return a Set of Entity
   */
  def distinctEntities: Set[Entity] = {
    for {
      e: EER <- distinctEERs
      if e.isInstanceOf[Entity]
      entity = e.asInstanceOf[Entity]
    } yield entity
  }

  /**
   * Returns "distinct" Set of Entities and all evidence (Set[Mention]) corresponding to each [[Entity]].
   * @return Set[(Entity, Set[Mention])]
   */
  def distinctEntitiesWithEvidence: Set[(Entity, Set[Mention])] = {
    distinctEntities
      .map( entity => (entity, getEvidence(entity)))
  }

  // Event

  /**
   * Retrieves all Events from the manager.
   * Note that these are non-distinct (Events may differ in terms of their IDPointers).
   */
  def getEvents: Set[Event] = {
    for {
      e: EER <- getEERs
      if e.isInstanceOf[Event]
      event = e.asInstanceOf[Event]
    } yield event
  }

  /**
   * Returns "distinct" Set of Events. Ignores multiple instances of the same Entity.
   * @return a Set of Event
   */
  def distinctEvents: Set[Event] = {
    for {
      e: EER <- distinctEERs
      if e.isInstanceOf[Event]
      event = e.asInstanceOf[Event]
    } yield event
  }

  /**
   * Returns "distinct" Set of Events and all evidence (Set[Mention]) corresponding to each [[Event]].
   * @return Set[(SimpleEntity, Set[Mention])]
   */
  def distinctEventsWithEvidence: Set[(Event, Set[Mention])] = {
    distinctEvents
      .map( event => (event, getEvidence(event)))
  }

  // SimpleEntity

  /**
   * Retrieves all SimpleEntities from the manager.
   * Note that these are non-distinct.
   */
  def getSimpleEntities: Set[SimpleEntity] = {
    for {
      e: EER <- getEERs
      if e.isInstanceOf[SimpleEntity]
      entity = e.asInstanceOf[SimpleEntity]
    } yield entity
  }

  /**
   * Retrieves all SimpleEntities containing the given
   * @param mod an [[AssemblyModification]]
   * @return a Set of SimpleEntities sharing mod
   */
  def getSimpleEntitiesByModification[M <: AssemblyModification](mod: M): Set[SimpleEntity] = {
    for {
      se <- getSimpleEntities
      if se.modifications contains mod
    } yield se
  }

  /**
   * Returns "distinct" Set of SimpleEntity. Ignores multiple instances of the same SimpleEntity.
   * @return a Set of SimpleEntity
   */
  def distinctSimpleEntities: Set[SimpleEntity] = {
    for {
      e: EER <- distinctEERs
      if e.isInstanceOf[SimpleEntity]
      entity = e.asInstanceOf[SimpleEntity]
    } yield entity
  }

  /**
   * Returns "distinct" Set of SimpleEntities and all evidence (Set[Mention]) corresponding to each [[SimpleEntity]].
   * @return Set[(SimpleEntity, Set[Mention])]
   */
  def distinctSimpleEntitiesWithEvidence: Set[(SimpleEntity, Set[Mention])] = {
    distinctSimpleEntities
      .map( entity => (entity, getEvidence(entity)))
  }

  // Complex

  /**
   * Retrieves all Complexes from the manager.
   * Note that these are non-distinct (Complexes may differ in terms of their IDPointers).
   */
  def getComplexes: Set[Complex] = {
    for {
      e: EER <- getEERs
      if e.isInstanceOf[Complex]
      complex = e.asInstanceOf[Complex]
    } yield complex
  }

  /**
   * Returns "distinct" Set of Complexes. Ignores differences in IDPointers.
   * @return a Set of Complexes
   */
  def distinctComplexes: Set[Complex] = {
    for {
      e: EER <- distinctEERs
      if e.isInstanceOf[Complex]
      complex = e.asInstanceOf[Complex]
    } yield complex
  }

  /**
   * Returns "distinct" Set of Complexes and all evidence (Set[Mention]) corresponding to each Complex.
   * @return Set[(Complex, Set[Mention])]
   */
  def distinctComplexesWithEvidence: Set[(Complex, Set[Mention])] = {
    distinctComplexes
      .map( comp => (comp, getEvidence(comp)))
  }

  // SimpleEvents

  /**
   * Retrieves all SimpleEvents from the manager.
   * Note that these are non-distinct (SimpleEvents may differ in terms of their IDPointers).
   */
  def getSimpleEvents: Set[SimpleEvent] = {
    for {
      e: EER <- getEERs
      if e.isInstanceOf[SimpleEvent]
      se = e.asInstanceOf[SimpleEvent]
    } yield se
  }

  /**
   * Retrieves all SimpleEvents from the manager matching the provided event label.
   * Note that these are non-distinct (SimpleEvents may differ in terms of their IDPointers).
   * @param label a String to match against each [[SimpleEvent.label]]
   */
  def getSimpleEvents(label: String): Set[SimpleEvent] = {
    for {
      e: EER <- getEERs
      if e.isInstanceOf[SimpleEvent]
      se = e.asInstanceOf[SimpleEvent]
      if se.label == label
    } yield se
  }

  /**
   * Returns "distinct" Set of SimpleEvents. Ignores differences in IDPointers.
   * @return a Set of SimpleEvents
   */
  def distinctSimpleEvents: Set[SimpleEvent] = {
    for {
      e: EER <- distinctEERs
      if e.isInstanceOf[SimpleEvent]
      se = e.asInstanceOf[SimpleEvent]
    } yield se
  }

  /**
   * Returns "distinct" Set of SimpleEvents matching the provided event label. Ignores differences in IDPointers.
   * @param label a String to match against each [[SimpleEvent.label]]
   * @return a Set of SimpleEvents
   */
  def distinctSimpleEvents(label: String): Set[SimpleEvent] = {
    for {
      e: EER <- distinctEERs
      if e.isInstanceOf[SimpleEvent]
      se = e.asInstanceOf[SimpleEvent]
      if se.label == label
    } yield se
  }

  /**
   * Returns "distinct" Set of SimpleEvent matching the provided event label and all evidence (Set[Mention]) corresponding to each SimpleEvent.
   * @param label a String to match against each [[SimpleEvent.label]]
   * @return Set[(SimpleEvent, Set[Mention])]
   */
  def distinctSimpleEventsWithEvidence(label: String): Set[(SimpleEvent, Set[Mention])] = {
    distinctSimpleEvents(label)
      .map( se => (se, getEvidence(se)))
  }

  // Regulations

  /**
   * Retrieves all Regulations from the manager.
   * Note that these are non-distinct (Regulations may differ in terms of their IDPointers).
   */
  def getRegulations: Set[Regulation] = {
    for {
      e: EER <- getEERs
      if e.isInstanceOf[Regulation]
      reg = e.asInstanceOf[Regulation]
    } yield reg
  }

  /**
   * Retrieves all Regulations from the manager matching the provided polarity label.
   * Note that these are non-distinct (Regulations may differ in terms of their IDPointers).
   * @param polarity a String to match against each [[Regulation.polarity]]
   */
  def getRegulations(polarity: String): Set[Regulation] = {
    for {
      e: EER <- getEERs
      if e.isInstanceOf[Regulation]
      reg = e.asInstanceOf[Regulation]
      if reg.polarity == polarity
    } yield reg
  }

  /**
   * Returns "distinct" Set of Regulation. Ignores differences in IDPointers.
   * @return a Set of Regulation
   */
  def distinctRegulations: Set[Regulation] = {
    for {
      e: EER <- distinctEERs
      if e.isInstanceOf[Regulation]
      reg = e.asInstanceOf[Regulation]
    } yield reg
  }

  /**
   * Returns "distinct" Set of Regulations matching the provided polarity. Ignores differences in IDPointers.
   * @param polarity a String to match against each [[Regulation.polarity]]
   * @return a Set of Regulations
   */
  def distinctRegulations(polarity: String): Set[Regulation] = {
    for {
      e: EER <- distinctEERs
      if e.isInstanceOf[Regulation]
      reg = e.asInstanceOf[Regulation]
      if reg.polarity == polarity
    } yield reg
  }

  /**
   * Returns "distinct" Set of Regulations and all evidence (Set[Mention]) corresponding to each Regulation.
   * @return Set[(Regulation, Set[Mention])]
   */
  def distinctRegulationsWithEvidence: Set[(Regulation, Set[Mention])] = {
    distinctRegulations
      .map( reg => (reg, getEvidence(reg)))
  }

  /**
   * Returns "distinct" Set of Regulations matching the provided polarity and all evidence (Set[Mention]) corresponding to each Regulation.
   * @param polarity a String to match against each [[Regulation.polarity]]
   * @return Set[(Regulation, Set[Mention])]
   */
  def distinctRegulationsWithEvidence(polarity: String): Set[(Regulation, Set[Mention])] = {
    distinctRegulations(polarity)
      .map( reg => (reg, getEvidence(reg)))
  }

  // Activations

  /**
   * Retrieves all Activations from the manager.
   * Note that these are non-distinct (Activations may differ in terms of their IDPointers).
   */
  def getActivations: Set[Activation] = {
    for {
      e: EER <- getEERs
      if e.isInstanceOf[Activation]
      act = e.asInstanceOf[Activation]
    } yield act
  }

  /**
   * Retrieves all Activations from the manager matching the provided polarity label.
   * Note that these are non-distinct (Activations may differ in terms of their IDPointers).
   * @param polarity a String to match against each [[Activation.polarity]]
   */
  def getActivations(polarity: String): Set[Activation] = {
    for {
      e: EER <- getEERs
      if e.isInstanceOf[Activation]
      act = e.asInstanceOf[Activation]
      if act.polarity == polarity
    } yield act
  }

  /**
   * Returns "distinct" Set of Activations. Ignores differences in IDPointers.
   * @return a Set of Regulation
   */
  def distinctActivations: Set[Activation] = {
    for {
      e: EER <- distinctEERs
      if e.isInstanceOf[Activation]
      act = e.asInstanceOf[Activation]
    } yield act
  }

  /**
   * Returns "distinct" Set of Activations matching the provided polarity. Ignores differences in IDPointers.
   * @param polarity a String to match against each [[Activation.polarity]]
   * @return a Set of Activations
   */
  def distinctActivations(polarity: String): Set[Activation] = {
    for {
      e: EER <- distinctEERs
      if e.isInstanceOf[Activation]
      act = e.asInstanceOf[Activation]
      if act.polarity == polarity
    } yield act
  }

  /**
   * Returns "distinct" Set of Activations and all evidence (Set[Mention]) corresponding to each Activation.
   * @return Set[(Regulation, Set[Mention])]
   */
  def distinctActivationsWithEvidence: Set[(Regulation, Set[Mention])] = {
    distinctRegulations
      .map( reg => (reg, getEvidence(reg)))
  }

  /**
   * Returns "distinct" Set of Activations matching the provided polarity and all evidence (Set[Mention]) corresponding to each Activation.
   * @param polarity a String to match against each [[Activation.polarity]]
   * @return Set[(Activation, Set[Mention])]
   */
  def distinctActivationsWithEvidence(polarity: String): Set[(Activation, Set[Mention])] = {
    distinctActivations(polarity)
      .map( act => (act, getEvidence(act)))
  }

  //
  // summary utilities
  //

  /**
   * A high-level summary of a Mention m
   * @param m an Odin Mention
   * @return a high-level String representation of m
   */
  def mentionSummary(m: Mention): String = {
    val bio = m.toBioMention
    val docRepr = s"DOC:${m.document.id.get} (sent. ${m.sentence})"
    s"Mention(label=${m.label}, text='${m.text}', modifications=${bio.modifications}, doc=$docRepr)"
  }

  def summarizeMentionIndex: Unit = println(mentionIndexSummary.sorted.mkString("\n"))

  def summarizeEntities: Unit = println(getSimpleEntities.map(_.summarize).toSeq.sorted.mkString("\n"))


  //
  // LUT utils
  //

  def hasMention(m: Mention): Boolean = mentionStateToID contains getMentionState(m)

  //
  // Set diff
  //

  def EERdiff(eers1: Set[EER], eers2: Set[EER]): Set[EER] = {
    eers1.filterNot(eer => eers2.exists(_.isEquivalentTo(eer)))
  }

  def EERintersection(eers1: Set[EER], eers2: Set[EER]): Set[EER] = {
    eers1.filter(eer => eers2.exists(_.isEquivalentTo(eer)))
  }

  //
  // Serialization methods
  //

  def saveTo(f: File): Unit = saveTo(f.getAbsolutePath)

  def saveTo(fileName: String): Unit = {
    org.clulab.utils.Serializer.save[AssemblyManager](this, fileName)
  }
}

object AssemblyManager {
  val positive = "Positive"
  val negative = "Negative"
  val unknown = "UNKNOWN"

  def apply(): AssemblyManager = new AssemblyManager(Map.empty[MentionState, IDPointer], Map.empty[IDPointer, EER])

  /**
   * Instantiate [[AssemblyManager]] and track the provided Mentions
   * @param mns a sequence of Odin Mentions
   * @return an [[AssemblyManager]]
   */
  def apply(mns: Seq[Mention]): AssemblyManager = {
    val am = new AssemblyManager(Map.empty[MentionState, IDPointer], Map.empty[IDPointer, EER])
    am.trackMentions(mns)
    am
  }

  def loadFrom(f: File): AssemblyManager = loadFrom(f.getAbsolutePath)

  def loadFrom(fileName: String): AssemblyManager = {
    org.clulab.utils.Serializer.load[AssemblyManager](fileName)
  }

  /**
   * Get antecedent if present.  Otherwise return the CorefMntion as-is. 
* * Used to retrieve the appropriate features of a mention's antecedent. * @param m an Odin Mention * @return a [[org.clulab.reach.mentions.CorefMention]] (possibly cm) */ def getResolvedForm(m: Mention): CorefMention = { val cm = m.toCorefMention cm.antecedentOrElse(cm) } /** * Checks whether a mention involves a corefence resolution * @param m an Odin Mention * @return */ def involvesCoreference(m: Mention): Boolean = getResolvedForm(m) match { // if the resolved form differs from m, this is a case of coref case resolved if resolved != m => true // ... otherwise check if any arg involves coref case checkArgs => checkArgs.arguments.values.flatten.exists(involvesCoreference) } /** * Checks to see if the mention can be safely handled by the AssemblyManager * Currently Sites are not stored in the LUTs, * though they can appear as part of a modification * (see the [[PTM]] [[AssemblyModification]] for an example) * @param mention an Odin Mention * @return true if the mention can be safely handled by the manager; false otherwise */ def isValidMention(mention: Mention): Boolean = { getResolvedForm(mention) match { // no generic event case gen if gen matches "Generic_event" => false // allow entities case entity if entity matches "Entity" => true // needed for Translocations case cc if cc matches "Cellular_component" => true // simple events must have a theme and should not have a cause case se if se matches "SimpleEvent" => (se.arguments contains "theme") && !(se.arguments contains "cause") // activations must have controlled and controller case act if act matches "ActivationEvent" => (act.arguments contains "controller") && (act.arguments contains "controlled") && // controllers must be Entities act.arguments("controller").forall { case entity if entity matches "Entity" => true case _ => false } && // make sure all controlleds are valid act.arguments("controlled").forall(isValidMention) // regs must have controlled and controller case reg if reg matches "Regulation" => (reg.arguments contains "controller") && (reg.arguments contains "controlled") && // controlled must be an Event (or Complex), but not an Activation reg.arguments("controlled").forall { // controlled cannot be an entity UNLESS it is a Complex case complex if complex matches "Complex" => true case entity if entity matches "Entity" => false case event if event matches "Event" => isValidMention(event) } // assume invalid otherwise case _ => false } } /** * Retrieves all themes from a Mention * @param m an Odin Mention * @return a Seq[Mention] produced by a flattening of all values corresponding to theme* keys */ def getAllThemes(m: Mention): Seq[Mention] = { m.arguments .filter(_._1.toLowerCase.startsWith("theme")) .values .flatten .toSeq } /** * Check if mention is negated */ def hasNegation(m: Mention): Boolean = m.toBioMention.modifications exists { case mentions.Negation(_) => true case _ => false } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy