Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Makefile
project/

# sbt specific
.bsp
dist/*
target/
lib_managed/
Expand Down
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
val scala11 = "2.11.12" // up to 2.11.12
val scala12 = "2.12.13" // up to 2.12.13
val scala13 = "2.13.5" // up to 2.13.5
val scala12 = "2.12.15" // up to 2.12.15
val scala13 = "2.13.8" // up to 2.13.8
// scala13 is waiting on ai.lum %% common.

ThisBuild / crossScalaVersions := Seq(scala12, scala11)
Expand Down
2 changes: 1 addition & 1 deletion corenlp/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name := "processors-corenlp"
description := "processors-corenlp"

libraryDependencies ++= {
val corenlpV = "3.9.2"
val corenlpV = "4.4.0"

Seq (
// this sub-project depends on CoreNLP
Expand Down
23 changes: 23 additions & 0 deletions corenlp/buildinfo.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
enablePlugins(BuildInfoPlugin)

buildInfoKeys := {
val stanfordVersion = {
val Array(major, minor, revision) = libraryDependencies.value
.find(_.name == "stanford-corenlp")
.map(_.revision)
.get // It must exist
.split('.') // and be formatted
.map(_.toInt) // compatibly!

Map(
"major" -> major,
"minor" -> minor,
"revision" -> revision
)
}

Seq[BuildInfoKey](
"stanfordVersion" -> stanfordVersion
)
}
buildInfoPackage := "org.clulab.processors.corenlp"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package org.clulab.processors.corenlp

case class Version(major: Int, minor: Int, revision: Int)

object Version {
def apply(version: Map[String, Int]): Version = Version(version("major"), version("minor"), version("revision"))

val stanford: Version = Version(BuildInfo.stanfordVersion)
}
Original file line number Diff line number Diff line change
Expand Up @@ -354,15 +354,15 @@ object ShallowNLPProcessor {
// character offsets and actual text
val sentStartOffset = sentence.startOffsets.head
val sentEndOffset = sentence.endOffsets.last
crtSent.set(classOf[CharacterOffsetBeginAnnotation], new Integer(sentStartOffset))
crtSent.set(classOf[CharacterOffsetEndAnnotation], new Integer(sentEndOffset))
crtSent.set(classOf[CharacterOffsetBeginAnnotation], Integer.valueOf(sentStartOffset))
crtSent.set(classOf[CharacterOffsetEndAnnotation], Integer.valueOf(sentEndOffset))
crtSent.set(classOf[TextAnnotation], doc.text.get.substring(sentStartOffset, sentEndOffset))

// token and sentence offsets
crtSent.set(classOf[TokenBeginAnnotation], new Integer(tokenOffset))
crtSent.set(classOf[TokenBeginAnnotation], Integer.valueOf(tokenOffset))
tokenOffset += crtTokens.size()
crtSent.set(classOf[TokenEndAnnotation], new Integer(tokenOffset))
crtSent.set(classOf[SentenceIndexAnnotation], new Integer(sentOffset)) // Stanford counts sentences starting from 0
crtSent.set(classOf[TokenEndAnnotation], Integer.valueOf(tokenOffset))
crtSent.set(classOf[SentenceIndexAnnotation], Integer.valueOf(sentOffset)) // Stanford counts sentences starting from 0

sentencesAnnotation.add(crtSent)
sentOffset += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ package org.clulab.processors

import org.clulab.processors.shallownlp.ShallowNLPProcessor
import org.scalatest._

import org.clulab.processors.corenlp.CoreNLPProcessor
import org.clulab.processors.corenlp.{CoreNLPProcessor, Version}
import org.clulab.struct.CorefMention

/**
Expand Down Expand Up @@ -123,7 +122,9 @@ class TestCoreNLPProcessor extends FlatSpec with Matchers {
doc.sentences(0).tags.get(0) should be ("NNP")
doc.sentences(0).tags.get(1) should be ("NNP")
doc.sentences(0).tags.get(2) should be ("VBD")
doc.sentences(0).tags.get(3) should be ("TO")
doc.sentences(0).tags.get(3) should be (
if (Version.stanford.major < 4) "TO" else "IN"
)
doc.sentences(0).tags.get(4) should be ("NNP")
doc.sentences(0).tags.get(5) should be (".")
doc.sentences(1).tags.get(0) should be ("RB")
Expand Down Expand Up @@ -164,7 +165,9 @@ class TestCoreNLPProcessor extends FlatSpec with Matchers {

doc.sentences.head.universalBasicDependencies.get.hasEdge(1, 0, "compound") should be (true)
doc.sentences.head.universalBasicDependencies.get.hasEdge(2, 1, "nsubj") should be (true)
doc.sentences.head.universalBasicDependencies.get.hasEdge(2, 4, "nmod") should be (true)
doc.sentences.head.universalBasicDependencies.get.hasEdge(
2, 4, if (Version.stanford.major < 4) "nmod" else "obl"
) should be (true)
doc.sentences.head.universalBasicDependencies.get.hasEdge(4, 3, "case") should be (true)

doc.sentences.head.syntacticTree.foreach(t => {
Expand Down Expand Up @@ -252,9 +255,11 @@ class TestCoreNLPProcessor extends FlatSpec with Matchers {

println(doc.sentences.head.universalBasicDependencies.get)

doc.sentences.head.universalBasicDependencies.get.hasEdge(4, 6, "dep") should be (true) // this probably should be "appos", but oh well...
doc.sentences.head.universalBasicDependencies.get.hasEdge(16, 18, "appos") should be (true)

// TODO: with CoreNLP >= v4, this tree is completely foobar... so it is not tested.
if (Version.stanford.major < 4) {
doc.sentences.head.universalBasicDependencies.get.hasEdge(4, 6, "dep") should be (true) // this probably should be "appos", but oh well...
doc.sentences.head.universalBasicDependencies.get.hasEdge(16, 18, "appos") should be (true)
}
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.clulab.processors

import org.clulab.dynet.Utils
import org.clulab.processors.corenlp.Version
import org.clulab.processors.shallownlp.ShallowNLPProcessor
import org.scalatest._
import org.clulab.processors.fastnlp.FastNLPProcessorWithSemanticRoles
Expand All @@ -19,7 +20,9 @@ class TestFastNLPProcessor extends FlatSpec with Matchers {

doc.sentences.head.dependencies.get.hasEdge(1, 0, "compound") should be (true)
doc.sentences.head.dependencies.get.hasEdge(2, 1, "nsubj") should be (true)
doc.sentences.head.dependencies.get.hasEdge(2, 4, "nmod_to") should be (true)
doc.sentences.head.dependencies.get.hasEdge(
2, 4, if (Version.stanford.major < 4) "nmod_to" else "obl_to"
) should be (true)

/*
val it = new DirectedGraphEdgeIterator[String](doc.sentences.head.dependencies.get)
Expand All @@ -35,7 +38,7 @@ class TestFastNLPProcessor extends FlatSpec with Matchers {

//println(doc.sentences.head.dependencies)
doc.sentences.head.dependencies.get.hasEdge(1, 0, "nsubj") should be (true)
doc.sentences.head.dependencies.get.hasEdge(1, 3, "dobj") should be (true)
doc.sentences.head.dependencies.get.hasEdge(1, 3, "obj") should be (true)
doc.sentences.head.dependencies.get.hasEdge(1, 4, "punct") should be (true)
doc.sentences.head.dependencies.get.hasEdge(3, 2, "det") should be (true)
}
Expand Down Expand Up @@ -78,7 +81,10 @@ class TestFastNLPProcessor extends FlatSpec with Matchers {

println(doc.sentences.head.universalBasicDependencies.get)

doc.sentences.head.universalBasicDependencies.get.hasEdge(4, 6, "appos") should be (true)
// TODO: this should be (4, 6, "appos") - CoreNLP is incorrect here
doc.sentences.head.universalBasicDependencies.get.hasEdge(
if (Version.stanford.major < 4) 4 else 2, 6, "appos"
) should be (true)
doc.sentences.head.universalBasicDependencies.get.hasEdge(16, 18, "appos") should be (true)
}

Expand Down
2 changes: 1 addition & 1 deletion main/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pomIncludeRepository := { (repo: MavenRepository) =>
}

// for processors-models
resolvers += "Artifactory" at "http://artifactory.cs.arizona.edu:8081/artifactory/sbt-release"
resolvers += ("Artifactory" at "http://artifactory.cs.arizona.edu:8081/artifactory/sbt-release").withAllowInsecureProtocol(true)

libraryDependencies ++= {
val json4sVersion = "3.5.2"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,19 @@ object WordEmbeddingMapPool {
def loadEmbedding(name: String, fileLocation: String, resourceLocation: String, compact: Boolean): WordEmbeddingMap = {
val StreamResult(inputStream, _, format) = inputStreamer.stream(name, fileLocation, resourceLocation)
.getOrElse(throw new RuntimeException(s"WordEmbeddingMap $name could not be opened."))
val wordEmbeddingMap = inputStream.autoClose { inputStream =>
val binary = format == InputStreamer.Format.Bin
val wordEmbeddingMap = {
// This is intentionally not using autoClose because the inputStream might be a
// JarURLConnection#JarURLInputStream which can't be autoClosed in newer (> 1.8)
// versions of Java. See further explanation in InputStreamer.
try {
val binary = format == InputStreamer.Format.Bin

if (compact) CompactWordEmbeddingMap(inputStream, binary)
else ExplicitWordEmbeddingMap(inputStream, binary)
if (compact) CompactWordEmbeddingMap(inputStream, binary)
else ExplicitWordEmbeddingMap(inputStream, binary)
}
finally {
inputStream.close()
}
}

wordEmbeddingMap
Expand Down
22 changes: 21 additions & 1 deletion main/src/main/scala/org/clulab/utils/InputStreamer.scala
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
package org.clulab.utils

import sun.net.www.protocol.jar.JarURLConnection

import java.io.FileInputStream
import java.io.InputStream

import scala.util.Failure
import scala.util.Try

class PublicCloseInputStream(inputStream: InputStream) extends InputStream {

override def read(): Int = inputStream.read()

// This can be reflected upon.
override def close(): Unit = inputStream.close()
}

class InputStreamer(val provider: AnyRef = InputStreamer, direct: Boolean = true) {
import InputStreamer.Format
import InputStreamer.StreamResult
Expand All @@ -19,6 +28,17 @@ class InputStreamer(val provider: AnyRef = InputStreamer, direct: Boolean = true
else provider.getClass.getClassLoader.getResourceAsStream(name)

Option(inputStream).getOrElse(throw new RuntimeException(s"Resource $name not found."))
// The inputStream may be a JarURLConnection#JarURLInputStream which is Closeable but
// whose close method is not discoverable using reflection at runtime so that an AutoClose
// cannot be constructed without JVM options like --add-opens which we want to avoid.
// The exception is this:
// Cause: java.lang.reflect.InaccessibleObjectException: Unable to make public void
// sun.net.www.protocol.jar.JarURLConnection$JarURLInputStream.close() throws
// java.io.IOException accessible: module java.base does not "opens sun.net.www.protocol.jar"
// to unnamed module @5ffead27.
// Update: Use of PublicCloseInputStream was found the be incredibly slow, so the use of
// autoClose in WordEmbeddingMapPool was replaced by a try and finally on the raw inputStream.
// new PublicCloseInputStream(inputStream)
}

protected def getInputStream(name: String, fileLocation: String, resourceLocation: String,
Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version = 1.2.8
sbt.version = 1.6.2
7 changes: 4 additions & 3 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Latest version numbers were updated on 2021 Mar 11.
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.2-1") // up to 1.1.2-1 *
addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.3") // up to 3.9.6 *
addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.13") // up to 1.0.13
addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.10.0") // up to 0.10.0
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.2-1") // up to 1.1.2-1 *
addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.3") // up to 3.9.6 *
addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.13") // up to 1.0.13
// * Held back out of an abundance of caution.