Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
7b703f5
Added new bio processes related to frailty
enoriega Feb 15, 2021
ccc11e5
Merge branch 'master' into frailty
enoriega Mar 24, 2021
80a9bc4
Progress on the grammar for associations. The use case is frailty
enoriega Mar 27, 2021
a64f4e3
Progress on the grammar for associations. The use case is frailty
enoriega Mar 27, 2021
085ecf2
Progress on the grammar for associations. The use case is frailty
enoriega Mar 27, 2021
19f2c88
Merge remote-tracking branch 'origin/frailty' into frailty
enoriega Mar 27, 2021
d366998
Added more unit tests for association events
enoriega May 3, 2021
8a46796
Merge remote-tracking branch 'origin/decouple_kb' into frailty
enoriega May 3, 2021
ca5bbed
Restored the frailty related entities to the override file
enoriega May 3, 2021
cb948d8
Greatly expanded the association unit tests and grammar
enoriega May 4, 2021
8e7c9dd
Expanded the grammar with more rules and triggers
enoriega May 4, 2021
6388831
Added a term to the hedging lexicon
enoriega May 10, 2021
42ab64c
Merge remote-tracking branch 'origin/master' into frailty
enoriega May 10, 2021
de1b87d
Merge branch 'decouple_kb' into frailty
enoriega May 10, 2021
e177df7
Added support for p values and correlation coefficients
enoriega May 11, 2021
8e5eba6
Changed the Association unit tests to account for Positive and Negati…
enoriega May 11, 2021
826df27
Added support for confidence intervals
enoriega May 11, 2021
b55676c
Process materials and methods too
enoriega May 12, 2021
28b3180
Bug fix for the issue that crashed 1/3 of the papers during assembly
enoriega May 26, 2021
ff301a8
Improved polarity detection for associations
enoriega May 26, 2021
25de135
Extended the assembly manager to allow bypassing of assembly for the …
enoriega Jun 2, 2021
8744a2d
Changed AZ output to add mark up to visualize the elements of the events
enoriega Jun 29, 2021
a4c90a8
Merge branch 'temp' into frailty
enoriega Jun 29, 2021
7426870
Fixed runReachCLI.sh
enoriega Jul 26, 2021
8c767e9
Merge branch 'master' into frailty
enoriega Sep 6, 2021
74dfd62
Merge branch 'master' into frailty
enoriega Mar 21, 2022
ce6e1b9
Added a new use case to NXML searcher for our project
enoriega Mar 21, 2022
3c2323b
Added docstring
enoriega Mar 22, 2022
3ee7dc7
Merge branch 'master' into frailty
enoriega Mar 28, 2022
f81b3ed
Merge branch 'serialization' into frailty
enoriega Mar 28, 2022
4a77c47
Added new bio processes for frailty
enoriega Mar 31, 2022
27a0dab
Updates to allow cell types and organs as participants
enoriega May 31, 2022
5c3a22f
Added chilton use case and a
enoriega Jun 7, 2022
6e87e4b
bugfix
enoriega Jun 7, 2022
064fdd2
Merge branch 'master' into frailty
enoriega Jun 10, 2022
ed01f65
Fixed de-serialization bugs
enoriega Jun 24, 2022
d59f218
Merge remote-tracking branch 'origin/master' into frailty
enoriega Dec 6, 2022
0f07901
Added terms for Skye's search
enoriega Dec 6, 2022
5cb32c5
Bumped up the sbt version and scala version to support apple silicon
enoriega Sep 6, 2023
1eb6aad
Added the "training-data" output format to train classifiers and rela…
enoriega Sep 6, 2023
4bc7480
Fixed a bug that crashed the whole CLI app by throwing an error
enoriega Sep 6, 2023
808d130
Updates from Keith's review
enoriega Sep 22, 2023
ade626a
Removed a comma to make it work on Scala 2.11
enoriega Sep 22, 2023
11e40bc
Added additional utilities for exporting training data. This time: Ru…
enoriega Oct 6, 2023
4893638
Added visual analytics output
enoriega Oct 7, 2023
01e5411
Added negative examples to the training data exporter
enoriega Oct 21, 2023
fe8cf32
Added the missing KB ids to the visual analytics output
enoriega Nov 7, 2023
89936c3
Improved the training data output format
enoriega Feb 11, 2024
f01d71f
Adding article text for the VA project
enoriega Jul 24, 2024
ba4f3b0
Added character span for mention elements in VA output
enoriega Jul 24, 2024
ed0b6a7
Changed the nxml reader version temporarily to a local snapshot
enoriega Jul 25, 2024
1162a7f
Cherry picked the mark up assembly form the frailty branch
enoriega Sep 3, 2024
b54a9d8
Restored AssemblyRow to its previous state and updated build sbt to u…
enoriega Sep 3, 2024
9386700
Merge branch 'refs/heads/frailty' into enoriega/training_output
enoriega Sep 3, 2024
6f07269
Added the "is_negated" flag to the outputs
enoriega Nov 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@ package org.clulab.reach.assembly

import com.typesafe.scalalogging.LazyLogging
import org.clulab.reach.assembly.representations._
import collection.Map
import collection.immutable

import collection.{Map, immutable, mutable}
import org.clulab.odin._
import org.clulab.reach.mentions.{CorefMention, MentionOps}

import scala.collection.mutable.ListBuffer
// used to differentiate AssemblyModifications from Modifications on mentions
import org.clulab.reach.mentions
import java.io.File


/**
* Stores precedence information for two distinct [[EntityEventRepresentation]]
* @param before the [[EntityEventRepresentation] that precedes [[PrecedenceRelation.after]]
* @param before the [[EntityEventRepresentation]] that precedes [[PrecedenceRelation.after]]
* @param after the [[EntityEventRepresentation]] that follows [[PrecedenceRelation.before]]
* @param evidence the mentions that serve as evidence for this precedence relation
* @param foundBy the name of the Sieve which found this relation
Expand Down Expand Up @@ -69,6 +71,7 @@ class AssemblyManager(

import AssemblyManager._

private val nonAssemblyMentions = new mutable.ListBuffer[Mention]()
// Because modifications don't feature into the hashcode,
// a mention's identify at assembly consists of both the mention and its mods (i.e., the "state" of the mention)
private var mentionStateToID: immutable.Map[MentionState, IDPointer] = m2id.toMap
Expand All @@ -90,6 +93,8 @@ class AssemblyManager(
// initialize to size of LUT 2
private var nextID: IDPointer = idToEER.size

def getNonAssemblyMentions:Iterable[Mention] = nonAssemblyMentions.toList

/**
* Retrieve the set of mentions currently tracked by the manager
*/
Expand Down Expand Up @@ -293,12 +298,16 @@ class AssemblyManager(
* See [[isValidMention]] for details on validation check
* @param m an Odin Mention
*/
def trackMention(m: Mention): Unit = isValidMention(m) match {
// do not store Sites, Activations, etc. in LUT 1
case true =>
// get or create an EntityEventRepresentation
val _ = getOrCreateEER(m)
case false => ()
def trackMention(m: Mention): Unit = if (isValidMention(m)) {
m match {
case statistic if statistic matches "Statistic" =>
nonAssemblyMentions += statistic
case _ =>
// get or create an EntityEventRepresentation
val _ = getOrCreateEER(m)
}
} else {
()
}

/**
Expand Down Expand Up @@ -572,7 +581,7 @@ class AssemblyManager(
new SimpleEntity(
id,
// TODO: decide whether or not we should use a richer representation for the grounding ID
e.nsId,
e.nsId(),
// modifications relevant to assembly
if (mods.isDefined) modifications ++ mods.get else modifications,
// source mention
Expand Down Expand Up @@ -651,6 +660,59 @@ class AssemblyManager(
*/
private def createComplex(m: Mention): Complex = createComplexWithID(m)._1


private def createAssociationEventWithID(m: Mention): (Association, IDPointer) = {
//
// handle dispatch
//

// check for coref
val assoc = getResolvedForm(m)

// get polarity
val polarity = getPolarityLabel(assoc)

// mention should be a Regulation
require(assoc matches "Association", "createAssociationEventWithID only handles Associations")
// mention's polarity should be either positive or negative
// require(polarity == AssemblyManager.positive || polarity == AssemblyManager.negative, "Polarity of Regulation must be positive or negative")
// // all controlled args must be simple events
// require(assoc.arguments("controlled").forall(_ matches "Event"), "The 'controlled' of any Regulation must be an Event")

// val controllers: Set[IDPointer] = {
// assoc.arguments("controller")
// .toSet[Mention]
// .map(c => getOrCreateEERwithID(c)._2)
// }

val themes: Set[IDPointer] = {
assoc.arguments("theme")
.toSet[Mention]
.map(c => getOrCreateEERwithID(c)._2)
}

// prepare id
val id = getOrCreateID(m)

// prepare Regulation

val eer =
Association(
id,
themes,
polarity,
Some(m),
this
)

// update LUTs
// use original mention for later lookup
updateLUTs(id, m, eer)

// eer and id pair
(eer, id)
}

//
// SimpleEvent creation
//
Expand Down Expand Up @@ -761,7 +823,7 @@ class AssemblyManager(
for (
src <- hasSource.arguments(src).toSet[Mention]
) yield {
val gid = src.toBioMention.nsId
val gid = src.toBioMention.nsId()
representations.Location(gid).asInstanceOf[AssemblyModification]
}
// no mods
Expand Down Expand Up @@ -796,7 +858,7 @@ class AssemblyManager(
for (
d <- hasSource.arguments(dest).toSet[Mention]
) yield {
val gid = d.toBioMention.nsId
val gid = d.toBioMention.nsId()
representations.Location(gid)
}
// no mods
Expand Down Expand Up @@ -1145,13 +1207,11 @@ class AssemblyManager(
private def getOrCreateEER(m: Mention): EER = {
// ensure this mention should be stored in LUT 1
require(isValidMention(m), s"mention with the label ${m.label} cannot be tracked by the AssemblyManager")
hasMention(m) match {
// if an ID already exists, retrieve the associated representation
case true =>
val id = mentionStateToID(getMentionState(m))
idToEER(id)
// create new representation
case false => createEER(m)
if (hasMention(m)) {
val id = mentionStateToID(getMentionState(m))
idToEER(id)
} else {
createEER(m)
}
}

Expand All @@ -1161,15 +1221,14 @@ class AssemblyManager(
* @param m an Odin Mention
* @return a tuple of ([[EntityEventRepresentation]], [[IDPointer]])
*/
private def getOrCreateEERwithID(m: Mention): (EER, IDPointer) = hasMention(m) match {
case true =>
val id = mentionStateToID(getMentionState(m))
val eer = getEER(id)
(eer, id)
case false =>
val eer = createEER(m)
val id = eer.uniqueID
(eer, id)
private def getOrCreateEERwithID(m: Mention): (EER, IDPointer) = if (hasMention(m)) {
val id = mentionStateToID(getMentionState(m))
val eer = getEER(id)
(eer, id)
} else {
val eer = createEER(m)
val id = eer.uniqueID
(eer, id)
}

/**
Expand All @@ -1188,6 +1247,7 @@ class AssemblyManager(
case se if se matches "SimpleEvent" => createSimpleEventWithID(m)
case regulation if regulation matches "Regulation" => createRegulationWithID(m)
case activation if activation matches "ActivationEvent" => createActivationWithID(m)
case association if association matches "Association" => createAssociationEventWithID(m)
case other => throw new Exception(s"createEERwithID failed for ${other.label}")
}
}
Expand Down Expand Up @@ -1758,9 +1818,9 @@ class AssemblyManager(
s"Mention(label=${m.label}, text='${m.text}', modifications=${bio.modifications}, doc=$docRepr)"
}

def summarizeMentionIndex: Unit = println(mentionIndexSummary.sorted.mkString("\n"))
def summarizeMentionIndex(): Unit = println(mentionIndexSummary.sorted.mkString("\n"))

def summarizeEntities: Unit = println(getSimpleEntities.map(_.summarize).toSeq.sorted.mkString("\n"))
def summarizeEntities(): Unit = println(getSimpleEntities.map(_.summarize).toSeq.sorted.mkString("\n"))


//
Expand Down Expand Up @@ -1888,6 +1948,13 @@ object AssemblyManager {
case event if event matches "Event" => isValidMention(event)
}

// Assiciations must have two theme arguments
case association if association matches "Association" =>
(association.arguments contains "theme") && (association.arguments("theme").size == 2)

case significance if significance matches "Significance" =>
(significance.arguments contains "kind") && (significance.arguments contains "value")

// assume invalid otherwise
case _ => false
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class AssemblyExporter(val manager: AssemblyManager) extends LazyLogging {
val ignoreMods = false

// distinct EntityEventRepresentations
val distinctEERS = manager.distinctEERs
val distinctEERS: Set[EER] = manager.distinctEERs

// LUT for retrieving IDs to distinct EERs
// TODO: A better version of this should probably belong to the manager
Expand Down Expand Up @@ -121,9 +121,19 @@ class AssemblyExporter(val manager: AssemblyManager) extends LazyLogging {
case se: SimpleEvent =>
se.input.values.flatten.map(m => createInput(m, mods)).mkString(", ")

case assoc: Association =>
assoc.controlled.map{
// get IDs of any events
case event: Event => EERLUT.getOrElse(event.equivalenceHash(ignoreMods = ignoreMods), reportError(assoc, event))
// represent entities directly
case entity: Entity =>
createInput(entity, s"$mods")
}.mkString(", ")


// inputs to an activation are entities
case act: Activation =>
act.controlled.map {
act.controller.map {
// get IDs of any events
case event: Event => EERLUT.getOrElse(event.equivalenceHash(ignoreMods = ignoreMods), reportError(act, event))
// represent entities directly
Expand Down Expand Up @@ -153,6 +163,15 @@ class AssemblyExporter(val manager: AssemblyManager) extends LazyLogging {
case other => createInput(other, mods)
}.mkString(", ")

case assoc: Association =>
assoc.controlled.map{
// get IDs of any events
case event: Event => EERLUT.getOrElse(event.equivalenceHash(ignoreMods = ignoreMods), reportError(assoc, event))
// represent entities directly
case entity: Entity =>
createInput(entity, s"$mods")
}.mkString(", ")

// positive activations produce an activated output entity
case posact: Activation if posact.polarity == AssemblyManager.positive =>
posact.controlled.map(c => createInput(c, s"$mods.a")).mkString(", ")
Expand Down Expand Up @@ -238,10 +257,43 @@ class AssemblyExporter(val manager: AssemblyManager) extends LazyLogging {
precededBy(event),
event.negated,
event.evidence,
event
Some(event)
)
}
rows.toSeq

val statisticRows: Set[AssemblyRow] = (manager.getNonAssemblyMentions collect {
case significance if significance matches "Significance" =>
AssemblyRow(
significance.arguments("kind").head.text,
significance.arguments("value").head.text,
NONE,
NONE,
NONE,
NONE,
significance.label,
Set.empty,
negated = false,
Set(significance),
None
)

case interval if interval matches "Confidence_interval" =>
AssemblyRow(
interval.arguments("start").head.text,
interval.arguments("end").head.text,
NONE,
NONE,
interval.arguments("degree").head.text,
NONE,
interval.label,
Set.empty,
negated = false,
Set(interval),
None
)
}).toSet

rows.toSeq ++ statisticRows.toSeq
}

/** for debugging purposes */
Expand Down Expand Up @@ -293,6 +345,7 @@ object AssemblyExporter {
val ENTITY = "entity"
val REGULATION = "Regulation"
val ACTIVATION = "Activation"
val ASSOCIATION = "Association"
val TRANSLOCATION = "Translocation"

// context types
Expand Down Expand Up @@ -388,6 +441,7 @@ object AssemblyExporter {
def getEventLabel(e: EntityEventRepresentation): String = e match {
case reg: Regulation => s"$REGULATION (${reg.polarity})"
case act: Activation => s"$ACTIVATION (${act.polarity})"
case assoc: Association => s"$ASSOCIATION (${assoc.polarity})"
case se: SimpleEvent => se.label
case ptm: SimpleEntity if ptm.modifications.exists(_.isInstanceOf[representations.PTM]) =>
ptm.modifications.find(_.isInstanceOf[representations.PTM]).get.asInstanceOf[representations.PTM].label
Expand Down
Loading