From c75a88635fd7136fee7d563b451a06fe136341d7 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Wed, 30 Nov 2022 10:42:33 -0700 Subject: [PATCH 1/6] Combine documents all within Processor --- .../org/clulab/processors/Processor.scala | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/main/src/main/scala/org/clulab/processors/Processor.scala b/main/src/main/scala/org/clulab/processors/Processor.scala index 6980a9904..7b84ae8ce 100644 --- a/main/src/main/scala/org/clulab/processors/Processor.scala +++ b/main/src/main/scala/org/clulab/processors/Processor.scala @@ -12,6 +12,117 @@ trait Processor { /** Constructs a document of tokens from free text; includes sentence splitting and tokenization. */ def mkDocument (text:String, keepText:Boolean = false): Document + protected def offsetSentence(sentence: Sentence, charOffset: Int): Sentence = { + val raw = sentence.raw + val startOffsets = sentence.startOffsets.map(_ + charOffset) + val endOffsets = sentence.endOffsets.map(_ + charOffset) + val words = sentence.words + val newSentence = Sentence(raw, startOffsets, endOffsets, words) + + newSentence.tags = sentence.tags + newSentence.lemmas = sentence.lemmas + newSentence.entities = sentence.entities + newSentence.norms = sentence.norms + newSentence.chunks = sentence.chunks + newSentence.syntacticTree = sentence.syntacticTree + newSentence.graphs = sentence.graphs + newSentence.relations = sentence.relations + newSentence + } + + protected def offsetDocument(document: Document, offset: Int): Document = { + if (offset == 0) document + else { + val offsetSentences = document.sentences.map(offsetSentence(_, offset)) + val newDocument = replaceSentences(document, offsetSentences) + + newDocument + } + } + + protected def replaceSentences(document: Document, sentences: Array[Sentence]): Document = { + val newDocument = new Document(sentences) + + newDocument.id = document.id + newDocument.text = document.text + + require(newDocument.coreferenceChains.isEmpty) + require(document.coreferenceChains.isEmpty) + + document.getAttachmentKeys.foreach { attachmentKey => + require(newDocument.getAttachment(attachmentKey).forall(_ == document.getAttachment(attachmentKey).get)) + newDocument.addAttachment(attachmentKey, document.getAttachment(attachmentKey).get) + } + + val dctOpt = document.getDCT + dctOpt.foreach(newDocument.setDCT) + + newDocument + } + + // The documents here were created with Processor.mkDocument, which could have created a subclassed + // Document or documents with certain fields already filled in. This implementation only handles + // known document fields and then only performs rudimentary requirement checks to make sure that + // the documents are compatible for combination. In more complicated situations it would be necessary + // to override this method in the Processor subclass. + protected def combineDocuments(documents: IndexedSeq[Document], combinedTextOpt: Option[String]): Document = { + require(documents.length > 1) + val headDocument = documents.head + val tailDocuments = documents.tail + val combinedSentences = documents.flatMap(_.sentences).toArray + val combinedDocument = new Document(combinedSentences) + + val headId = headDocument.id + require(tailDocuments.forall(_.id == headId)) + combinedDocument.id = headId + + require(combinedDocument.text.isEmpty) + combinedDocument.text = combinedTextOpt + + // Coreference chains involve Mentions that include references to documents. The Mentions are being + // moved to a new Document and it would be infeasible to move the chains. + require(combinedDocument.coreferenceChains.isEmpty) + require(documents.forall(_.coreferenceChains.isEmpty)) + + documents.foreach { document => + document.getAttachmentKeys.foreach { attachmentKey => + require(combinedDocument.getAttachment(attachmentKey).forall(_ == document.getAttachment(attachmentKey).get)) + combinedDocument.addAttachment(attachmentKey, document.getAttachment(attachmentKey).get) + } + } + + val headDctOpt = headDocument.getDCT + require(documents.tail.forall(_.getDCT == headDctOpt)) + headDctOpt.foreach(combinedDocument.setDCT) + combinedDocument + } + + def mkDocument(texts: IndexedSeq[String], separators: IndexedSeq[String], keepText: Boolean = false): Document = { + require(texts.length == separators.length) + texts.length match { + case 0 => mkDocument("", keepText) + case 1 => mkDocument(texts.head, keepText) + case _ => + val documents = texts.map(mkDocument(_, keepText)) + val offsets = texts.zip(separators).scanLeft(0) { case (offset, (text, separator)) => offset + text.length + separator.length } + val offsetDocuments = documents.zip(offsets).map { case (document, offset) => + offsetDocument(document, offset) // charOffset and wordOffset, because some things are counted in words? + } + val combinedTextOpt = + if (keepText) { + val combinedText = texts.zip(separators).foldLeft(new StringBuilder) { case (stringBuilder, (text, separator)) => + stringBuilder.append(text).append(separator) + }.toString + + Some(combinedText) + } + else None + val combinedDocument = combineDocuments(offsetDocuments, combinedTextOpt) + + combinedDocument + } + } + /** Constructs a document of tokens from an array of untokenized sentences. */ def mkDocumentFromSentences (sentences:Iterable[String], keepText:Boolean = false, From 68628db953e6ec51890dd771da62bb1e98990509 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Wed, 30 Nov 2022 11:55:35 -0700 Subject: [PATCH 2/6] Test combining documents --- .../org/clulab/processors/Processor.scala | 2 +- .../org/clulab/processors/sentences10.txt | 10 +++ .../processors/TestMkCombinedDocument.scala | 67 +++++++++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 main/src/test/resources/org/clulab/processors/sentences10.txt create mode 100644 main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala diff --git a/main/src/main/scala/org/clulab/processors/Processor.scala b/main/src/main/scala/org/clulab/processors/Processor.scala index 7b84ae8ce..0fe777ff4 100644 --- a/main/src/main/scala/org/clulab/processors/Processor.scala +++ b/main/src/main/scala/org/clulab/processors/Processor.scala @@ -97,7 +97,7 @@ trait Processor { combinedDocument } - def mkDocument(texts: IndexedSeq[String], separators: IndexedSeq[String], keepText: Boolean = false): Document = { + def mkCombinedDocument(texts: IndexedSeq[String], separators: IndexedSeq[String], keepText: Boolean = false): Document = { require(texts.length == separators.length) texts.length match { case 0 => mkDocument("", keepText) diff --git a/main/src/test/resources/org/clulab/processors/sentences10.txt b/main/src/test/resources/org/clulab/processors/sentences10.txt new file mode 100644 index 000000000..d4aa03bec --- /dev/null +++ b/main/src/test/resources/org/clulab/processors/sentences10.txt @@ -0,0 +1,10 @@ +Needed lines of action will be decided on by representatives of some 50 nations . +Scarcity , not only of foodstuffs but of lumber and other forest products , textiles , seeds , fertilizers , draught power , and farm equipment will continue throughout most of Europe and Asia during the coming year . +Hopes of continued recovery in Europe 's indigenous food supplies were checked by last winter 's bad weather . +Diets in Western and Central Europe will be still lower next year , and in Asia they will remain at present very low levels , unless imports can be increased . +Even to hold the present line will require drastic action . +Minimum import needs for Europe , North Africa , and Asia in 1947/48 may be estimated at 34 to 38 million tons without allowing for any improvement in bread rations , any additional livestock feeding , or any increase in working reserves . +Against this need , supplies of grain available for export from the surplus countries may be tentatively estimated at 30 to 34 million tons . +Even with somewhat larger supplies of certain other foods particularly potatoes , sugar , and fats the situation will continue to be grim . +Cessation of UNRRA activities and accumulated foreign exchange difficulties worsen the problem for nations in a weak bargaining position . +Every delay in improving this situation further impairs the working ability of labour , slows up reconstruction , adds to the physical damage caused by prolonged undernourishment , and accelerates social unrest . diff --git a/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala new file mode 100644 index 000000000..3e9e97adf --- /dev/null +++ b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala @@ -0,0 +1,67 @@ +package org.clulab.processors + +import org.clulab.processors.clu.CluProcessor +import org.clulab.serialization.DocumentSerializer +import org.clulab.utils.Closer.AutoCloser +import org.clulab.utils.{Sourcer, Test} + +import java.io.{PrintWriter, StringWriter} + +class TestMkCombinedDocument extends Test { + val sentences = Sourcer.sourceFromFilename("./main/src/test/resources/org/clulab/processors/sentences10.txt").autoClose { source => + source.getLines.toArray + } + val manySentenceLengths = Array( + Array(1, 9), + Array(9, 1), + Array(1, 1, 8), + Array(1, 8, 1), + Array(8, 1, 1), + Array(5, 5), + Array(2, 2, 2, 2, 2), + Array(1, 2, 3, 4), + Array(4, 3, 2, 1), + Array(0, 5, 0, 5, 0) + ) + val sep = " " + val documentSerializer = new DocumentSerializer() + + def toString(document: Document): String = { + val stringWriter = new StringWriter() + + new PrintWriter(stringWriter).autoClose { printWriter => + documentSerializer.save(document, printWriter) + } + stringWriter.toString + } + + behavior of "mkCombinedDocument" + + def test(sentenceLengths: Array[Int]): Unit = { + val label = sentenceLengths.mkString("[", ", ", "]") + + it should s"combine $label" in { + val sentenceStarts = sentenceLengths.scanLeft(0) { case (start, split) => start + split } + assert(sentenceStarts.last == 10) + val sentenceGroups = sentenceStarts.zip(sentenceLengths).map { case (start, length) => + sentences.slice(start, start + length).mkString(sep) + } + val separators = sentenceGroups.map { sentenceGroup => if (sentenceGroup.isEmpty) "" else sep } + val document = processor.mkCombinedDocument(sentenceGroups, separators) + val actualResult = toString(document) + + actualResult should be(expectedResult) + } + } + + val processor = new CluProcessor() + val document = processor.mkDocument(sentences.mkString(sep)) + val expectedResult = toString(document) + + manySentenceLengths.foreach { sentenceLengths => + test(sentenceLengths) + } + + + // Do another example with
in the middle. Make sure get two sentences. +} From a4c8d718b26f8a3cf979b4756086305d9fc22fc4 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Wed, 30 Nov 2022 12:59:24 -0700 Subject: [PATCH 3/6] Test with br, call them trailers --- .../org/clulab/processors/Processor.scala | 16 +++---- .../processors/TestMkCombinedDocument.scala | 47 ++++++++++++++----- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/main/src/main/scala/org/clulab/processors/Processor.scala b/main/src/main/scala/org/clulab/processors/Processor.scala index 0fe777ff4..d77b2f4ae 100644 --- a/main/src/main/scala/org/clulab/processors/Processor.scala +++ b/main/src/main/scala/org/clulab/processors/Processor.scala @@ -12,10 +12,10 @@ trait Processor { /** Constructs a document of tokens from free text; includes sentence splitting and tokenization. */ def mkDocument (text:String, keepText:Boolean = false): Document - protected def offsetSentence(sentence: Sentence, charOffset: Int): Sentence = { + protected def offsetSentence(sentence: Sentence, offset: Int): Sentence = { val raw = sentence.raw - val startOffsets = sentence.startOffsets.map(_ + charOffset) - val endOffsets = sentence.endOffsets.map(_ + charOffset) + val startOffsets = sentence.startOffsets.map(_ + offset) + val endOffsets = sentence.endOffsets.map(_ + offset) val words = sentence.words val newSentence = Sentence(raw, startOffsets, endOffsets, words) @@ -97,20 +97,20 @@ trait Processor { combinedDocument } - def mkCombinedDocument(texts: IndexedSeq[String], separators: IndexedSeq[String], keepText: Boolean = false): Document = { - require(texts.length == separators.length) + def mkCombinedDocument(texts: IndexedSeq[String], trailers: IndexedSeq[String], keepText: Boolean = false): Document = { + require(texts.length == trailers.length) texts.length match { case 0 => mkDocument("", keepText) case 1 => mkDocument(texts.head, keepText) case _ => val documents = texts.map(mkDocument(_, keepText)) - val offsets = texts.zip(separators).scanLeft(0) { case (offset, (text, separator)) => offset + text.length + separator.length } + val offsets = texts.zip(trailers).scanLeft(0) { case (offset, (text, trailer)) => offset + text.length + trailer.length } val offsetDocuments = documents.zip(offsets).map { case (document, offset) => - offsetDocument(document, offset) // charOffset and wordOffset, because some things are counted in words? + offsetDocument(document, offset) } val combinedTextOpt = if (keepText) { - val combinedText = texts.zip(separators).foldLeft(new StringBuilder) { case (stringBuilder, (text, separator)) => + val combinedText = texts.zip(trailers).foldLeft(new StringBuilder) { case (stringBuilder, (text, separator)) => stringBuilder.append(text).append(separator) }.toString diff --git a/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala index 3e9e97adf..bc6f3693c 100644 --- a/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala +++ b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala @@ -21,47 +21,68 @@ class TestMkCombinedDocument extends Test { Array(2, 2, 2, 2, 2), Array(1, 2, 3, 4), Array(4, 3, 2, 1), - Array(0, 5, 0, 5, 0) + Array(0, 5, 0, 5) ) - val sep = " " + val separator = " " val documentSerializer = new DocumentSerializer() + val processor = new CluProcessor() def toString(document: Document): String = { val stringWriter = new StringWriter() new PrintWriter(stringWriter).autoClose { printWriter => - documentSerializer.save(document, printWriter) + documentSerializer.save(document, printWriter, keepText = true) } stringWriter.toString } behavior of "mkCombinedDocument" - def test(sentenceLengths: Array[Int]): Unit = { + def test(sentenceLengths: Array[Int], expectedResult: String): Unit = { val label = sentenceLengths.mkString("[", ", ", "]") it should s"combine $label" in { val sentenceStarts = sentenceLengths.scanLeft(0) { case (start, split) => start + split } assert(sentenceStarts.last == 10) val sentenceGroups = sentenceStarts.zip(sentenceLengths).map { case (start, length) => - sentences.slice(start, start + length).mkString(sep) + sentences.slice(start, start + length).mkString(separator) + } + // + val trailers = sentenceGroups.zipWithIndex.map { case (sentenceGroup, index) => + if (sentenceGroup.isEmpty || index == sentenceGroups.indices.last) "" + else separator } - val separators = sentenceGroups.map { sentenceGroup => if (sentenceGroup.isEmpty) "" else sep } - val document = processor.mkCombinedDocument(sentenceGroups, separators) + val document = processor.mkCombinedDocument(sentenceGroups, trailers, keepText = true) val actualResult = toString(document) actualResult should be(expectedResult) } } - val processor = new CluProcessor() - val document = processor.mkDocument(sentences.mkString(sep)) - val expectedResult = toString(document) + { + val document = processor.mkDocument(sentences.mkString(separator), keepText = true) + val expectedResult = toString(document) - manySentenceLengths.foreach { sentenceLengths => - test(sentenceLengths) + manySentenceLengths.foreach { sentenceLengths => + test(sentenceLengths, expectedResult) + } } + behavior of "dynamically separated texts" - // Do another example with
in the middle. Make sure get two sentences. + it should "combine as expected" in { + val text = "I found this text
on a web page." + val separator = "
" + val texts = text.split(separator) + val indices = texts.indices + val trailers = indices.map { index => if (index != indices.last) separator else "" } + val document = processor.mkCombinedDocument(texts, trailers, keepText = true) + + document.text.get should be (text) + document.sentences.length should be (2) + + document.sentences.foreach { sentence => + sentence.words should not contain(separator) + } + } } From f56d46765bb430d5b6462be6101539a84890ce1d Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Wed, 30 Nov 2022 14:00:47 -0700 Subject: [PATCH 4/6] Test more with br --- .../processors/TestMkCombinedDocument.scala | 46 ++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala index bc6f3693c..3ce3f6edc 100644 --- a/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala +++ b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala @@ -70,7 +70,31 @@ class TestMkCombinedDocument extends Test { behavior of "dynamically separated texts" - it should "combine as expected" in { + it should "include separators in both text and words" in { + val text = "I found this text
on a web page." + val separator = "
" + val texts = text.split(separator) + val dirtyTexts = texts.zipWithIndex.map { case (text, index) => + if (index != texts.indices.last) text + separator + else text + } + val indices = texts.indices + val trailers = indices.map { _ => "" } + val document = processor.mkCombinedDocument(dirtyTexts, trailers, keepText = true) + + document.text.get should be (text) + document.sentences.length should be (indices.length) + + document.sentences.zipWithIndex.foreach { case (sentence, index) => + if (index != indices.last) + sentence.words should contain (separator) + else + sentence.words should not contain (separator) + } + } + + // This is thought to be the standard case. + it should "include separators in text but not words" in { val text = "I found this text
on a web page." val separator = "
" val texts = text.split(separator) @@ -79,10 +103,28 @@ class TestMkCombinedDocument extends Test { val document = processor.mkCombinedDocument(texts, trailers, keepText = true) document.text.get should be (text) - document.sentences.length should be (2) + document.sentences.length should be (indices.length) document.sentences.foreach { sentence => sentence.words should not contain(separator) } } + + it should "include separators in neither text nor words" in { + val text = "I found this text
on a web page." + val separator = "
" + val cleanSeparator = " " + val cleanText = text.replace(separator, cleanSeparator) + val texts = text.split(separator) + val indices = texts.indices + val trailers = indices.map { index => if (index != indices.last) cleanSeparator else "" } + val document = processor.mkCombinedDocument(texts, trailers, keepText = true) + + document.text.get should be(cleanText) + document.sentences.length should be(indices.length) + + document.sentences.foreach { sentence => + sentence.words should not contain (separator) + } + } } From d2349f22d94972ad9e2a9d566879ff72e87efa51 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Mon, 9 Jan 2023 20:10:39 -0700 Subject: [PATCH 5/6] Move methods to proper classes --- .../org/clulab/processors/Document.scala | 29 +++++++++++ .../org/clulab/processors/Processor.scala | 50 +------------------ .../org/clulab/processors/Sentence.scala | 19 +++++++ 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/main/src/main/scala/org/clulab/processors/Document.scala b/main/src/main/scala/org/clulab/processors/Document.scala index 140bc2f49..3b71d5813 100644 --- a/main/src/main/scala/org/clulab/processors/Document.scala +++ b/main/src/main/scala/org/clulab/processors/Document.scala @@ -184,7 +184,36 @@ class Document(val sentences: Array[Sentence]) extends Serializable { } } }) + } + + protected def replaceSentences(sentences: Array[Sentence]): Document = { + val newDocument = new Document(sentences) + + newDocument.id = id + newDocument.text = text + + require(newDocument.coreferenceChains.isEmpty) + require(coreferenceChains.isEmpty) + + getAttachmentKeys.foreach { attachmentKey => + require(newDocument.getAttachment(attachmentKey).forall(_ == getAttachment(attachmentKey).get)) + newDocument.addAttachment(attachmentKey, getAttachment(attachmentKey).get) + } + val dctOpt = getDCT + dctOpt.foreach(newDocument.setDCT) + + newDocument + } + + def offset(offset: Int): Document = { + if (offset == 0) this + else { + val offsetSentences = sentences.map(_.offset(offset)) + val newDocument = replaceSentences(offsetSentences) + + newDocument + } } } diff --git a/main/src/main/scala/org/clulab/processors/Processor.scala b/main/src/main/scala/org/clulab/processors/Processor.scala index d77b2f4ae..52af1288b 100644 --- a/main/src/main/scala/org/clulab/processors/Processor.scala +++ b/main/src/main/scala/org/clulab/processors/Processor.scala @@ -12,54 +12,6 @@ trait Processor { /** Constructs a document of tokens from free text; includes sentence splitting and tokenization. */ def mkDocument (text:String, keepText:Boolean = false): Document - protected def offsetSentence(sentence: Sentence, offset: Int): Sentence = { - val raw = sentence.raw - val startOffsets = sentence.startOffsets.map(_ + offset) - val endOffsets = sentence.endOffsets.map(_ + offset) - val words = sentence.words - val newSentence = Sentence(raw, startOffsets, endOffsets, words) - - newSentence.tags = sentence.tags - newSentence.lemmas = sentence.lemmas - newSentence.entities = sentence.entities - newSentence.norms = sentence.norms - newSentence.chunks = sentence.chunks - newSentence.syntacticTree = sentence.syntacticTree - newSentence.graphs = sentence.graphs - newSentence.relations = sentence.relations - newSentence - } - - protected def offsetDocument(document: Document, offset: Int): Document = { - if (offset == 0) document - else { - val offsetSentences = document.sentences.map(offsetSentence(_, offset)) - val newDocument = replaceSentences(document, offsetSentences) - - newDocument - } - } - - protected def replaceSentences(document: Document, sentences: Array[Sentence]): Document = { - val newDocument = new Document(sentences) - - newDocument.id = document.id - newDocument.text = document.text - - require(newDocument.coreferenceChains.isEmpty) - require(document.coreferenceChains.isEmpty) - - document.getAttachmentKeys.foreach { attachmentKey => - require(newDocument.getAttachment(attachmentKey).forall(_ == document.getAttachment(attachmentKey).get)) - newDocument.addAttachment(attachmentKey, document.getAttachment(attachmentKey).get) - } - - val dctOpt = document.getDCT - dctOpt.foreach(newDocument.setDCT) - - newDocument - } - // The documents here were created with Processor.mkDocument, which could have created a subclassed // Document or documents with certain fields already filled in. This implementation only handles // known document fields and then only performs rudimentary requirement checks to make sure that @@ -106,7 +58,7 @@ trait Processor { val documents = texts.map(mkDocument(_, keepText)) val offsets = texts.zip(trailers).scanLeft(0) { case (offset, (text, trailer)) => offset + text.length + trailer.length } val offsetDocuments = documents.zip(offsets).map { case (document, offset) => - offsetDocument(document, offset) + document.offset(offset) } val combinedTextOpt = if (keepText) { diff --git a/main/src/main/scala/org/clulab/processors/Sentence.scala b/main/src/main/scala/org/clulab/processors/Sentence.scala index c9011e11f..e95bf9a55 100644 --- a/main/src/main/scala/org/clulab/processors/Sentence.scala +++ b/main/src/main/scala/org/clulab/processors/Sentence.scala @@ -173,6 +173,25 @@ class Sentence( reverted } + + def offset(offset: Int): Sentence = { + if (offset == 0) this + else { + val newStartOffsets = startOffsets.map(_ + offset) + val newEndOffsets = endOffsets.map(_ + offset) + val newSentence = Sentence(raw, newStartOffsets, newEndOffsets, words) + + newSentence.tags = tags + newSentence.lemmas = lemmas + newSentence.entities = entities + newSentence.norms = norms + newSentence.chunks = chunks + newSentence.syntacticTree = syntacticTree + newSentence.graphs = graphs + newSentence.relations = relations + newSentence + } + } } object Sentence { From 0f761220855dfc30869cf03af0844974ac0fadd4 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Tue, 10 Jan 2023 12:41:12 -0700 Subject: [PATCH 6/6] Compile for Scala 3 --- main/src/main/scala/org/clulab/processors/Sentence.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/main/src/main/scala/org/clulab/processors/Sentence.scala b/main/src/main/scala/org/clulab/processors/Sentence.scala index 284b83928..1787ba1a1 100644 --- a/main/src/main/scala/org/clulab/processors/Sentence.scala +++ b/main/src/main/scala/org/clulab/processors/Sentence.scala @@ -5,7 +5,6 @@ import org.clulab.struct.{DirectedGraph, GraphMap, RelationTriple, Tree} import org.clulab.struct.GraphMap._ import org.clulab.utils.SeqUtils -import scala.collection.immutable.Range import scala.collection.mutable import scala.util.hashing.MurmurHash3._ @@ -178,8 +177,8 @@ class Sentence( def offset(offset: Int): Sentence = { if (offset == 0) this else { - val newStartOffsets = startOffsets.map(_ + offset) - val newEndOffsets = endOffsets.map(_ + offset) + val newStartOffsets = startOffsets.map(_ + offset).toArray + val newEndOffsets = endOffsets.map(_ + offset).toArray val newSentence = Sentence(raw, newStartOffsets, newEndOffsets, words) newSentence.tags = tags