From c75a88635fd7136fee7d563b451a06fe136341d7 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Wed, 30 Nov 2022 10:42:33 -0700
Subject: [PATCH 1/6] Combine documents all within Processor

---
 .../org/clulab/processors/Processor.scala     | 111 ++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/main/src/main/scala/org/clulab/processors/Processor.scala b/main/src/main/scala/org/clulab/processors/Processor.scala
index 6980a9904..7b84ae8ce 100644
--- a/main/src/main/scala/org/clulab/processors/Processor.scala
+++ b/main/src/main/scala/org/clulab/processors/Processor.scala
@@ -12,6 +12,117 @@ trait Processor {
   /** Constructs a document of tokens from free text; includes sentence splitting and tokenization. */
   def mkDocument (text:String, keepText:Boolean = false): Document
 
+  protected def offsetSentence(sentence: Sentence, charOffset: Int): Sentence = {
+    val raw = sentence.raw
+    val startOffsets = sentence.startOffsets.map(_ + charOffset)
+    val endOffsets = sentence.endOffsets.map(_ + charOffset)
+    val words = sentence.words
+    val newSentence = Sentence(raw, startOffsets, endOffsets, words)
+
+    newSentence.tags = sentence.tags
+    newSentence.lemmas = sentence.lemmas
+    newSentence.entities = sentence.entities
+    newSentence.norms = sentence.norms
+    newSentence.chunks = sentence.chunks
+    newSentence.syntacticTree = sentence.syntacticTree
+    newSentence.graphs = sentence.graphs
+    newSentence.relations = sentence.relations
+    newSentence
+  }
+
+  protected def offsetDocument(document: Document, offset: Int): Document = {
+    if (offset == 0) document
+    else {
+      val offsetSentences = document.sentences.map(offsetSentence(_, offset))
+      val newDocument = replaceSentences(document, offsetSentences)
+
+      newDocument
+    }
+  }
+
+  protected def replaceSentences(document: Document, sentences: Array[Sentence]): Document = {
+    val newDocument = new Document(sentences)
+
+    newDocument.id = document.id
+    newDocument.text = document.text
+
+    require(newDocument.coreferenceChains.isEmpty)
+    require(document.coreferenceChains.isEmpty)
+
+    document.getAttachmentKeys.foreach { attachmentKey =>
+      require(newDocument.getAttachment(attachmentKey).forall(_ == document.getAttachment(attachmentKey).get))
+      newDocument.addAttachment(attachmentKey, document.getAttachment(attachmentKey).get)
+    }
+
+    val dctOpt = document.getDCT
+    dctOpt.foreach(newDocument.setDCT)
+
+    newDocument
+  }
+
+  // The documents here were created with Processor.mkDocument, which could have created a subclassed
+  // Document or documents with certain fields already filled in.  This implementation only handles
+  // known document fields and then only performs rudimentary requirement checks to make sure that
+  // the documents are compatible for combination.  In more complicated situations it would be necessary
+  // to override this method in the Processor subclass.
+  protected def combineDocuments(documents: IndexedSeq[Document], combinedTextOpt: Option[String]): Document = {
+    require(documents.length > 1)
+    val headDocument = documents.head
+    val tailDocuments = documents.tail
+    val combinedSentences = documents.flatMap(_.sentences).toArray
+    val combinedDocument = new Document(combinedSentences)
+
+    val headId = headDocument.id
+    require(tailDocuments.forall(_.id == headId))
+    combinedDocument.id = headId
+
+    require(combinedDocument.text.isEmpty)
+    combinedDocument.text = combinedTextOpt
+
+    // Coreference chains involve Mentions that include references to documents.  The Mentions are being
+    // moved to a new Document and it would be infeasible to move the chains.
+    require(combinedDocument.coreferenceChains.isEmpty)
+    require(documents.forall(_.coreferenceChains.isEmpty))
+
+    documents.foreach { document =>
+      document.getAttachmentKeys.foreach { attachmentKey =>
+        require(combinedDocument.getAttachment(attachmentKey).forall(_ == document.getAttachment(attachmentKey).get))
+        combinedDocument.addAttachment(attachmentKey, document.getAttachment(attachmentKey).get)
+      }
+    }
+
+    val headDctOpt = headDocument.getDCT
+    require(documents.tail.forall(_.getDCT == headDctOpt))
+    headDctOpt.foreach(combinedDocument.setDCT)
+    combinedDocument
+  }
+
+  def mkDocument(texts: IndexedSeq[String], separators: IndexedSeq[String], keepText: Boolean = false): Document = {
+    require(texts.length == separators.length)
+    texts.length match {
+      case 0 => mkDocument("", keepText)
+      case 1 => mkDocument(texts.head, keepText)
+      case _ =>
+        val documents = texts.map(mkDocument(_, keepText))
+        val offsets = texts.zip(separators).scanLeft(0) { case (offset, (text, separator)) => offset + text.length + separator.length }
+        val offsetDocuments = documents.zip(offsets).map { case (document, offset) =>
+          offsetDocument(document, offset) // charOffset and wordOffset, because some things are counted in words?
+        }
+        val combinedTextOpt =
+            if (keepText) {
+              val combinedText = texts.zip(separators).foldLeft(new StringBuilder) { case (stringBuilder, (text, separator)) =>
+                stringBuilder.append(text).append(separator)
+              }.toString
+
+              Some(combinedText)
+            }
+            else None
+        val combinedDocument = combineDocuments(offsetDocuments, combinedTextOpt)
+
+        combinedDocument
+    }
+  }
+
   /** Constructs a document of tokens from an array of untokenized sentences. */
   def mkDocumentFromSentences (sentences:Iterable[String],
                                keepText:Boolean = false,

From 68628db953e6ec51890dd771da62bb1e98990509 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Wed, 30 Nov 2022 11:55:35 -0700
Subject: [PATCH 2/6] Test combining documents

---
 .../org/clulab/processors/Processor.scala     |  2 +-
 .../org/clulab/processors/sentences10.txt     | 10 +++
 .../processors/TestMkCombinedDocument.scala   | 67 +++++++++++++++++++
 3 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 main/src/test/resources/org/clulab/processors/sentences10.txt
 create mode 100644 main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala

diff --git a/main/src/main/scala/org/clulab/processors/Processor.scala b/main/src/main/scala/org/clulab/processors/Processor.scala
index 7b84ae8ce..0fe777ff4 100644
--- a/main/src/main/scala/org/clulab/processors/Processor.scala
+++ b/main/src/main/scala/org/clulab/processors/Processor.scala
@@ -97,7 +97,7 @@ trait Processor {
     combinedDocument
   }
 
-  def mkDocument(texts: IndexedSeq[String], separators: IndexedSeq[String], keepText: Boolean = false): Document = {
+  def mkCombinedDocument(texts: IndexedSeq[String], separators: IndexedSeq[String], keepText: Boolean = false): Document = {
     require(texts.length == separators.length)
     texts.length match {
       case 0 => mkDocument("", keepText)
diff --git a/main/src/test/resources/org/clulab/processors/sentences10.txt b/main/src/test/resources/org/clulab/processors/sentences10.txt
new file mode 100644
index 000000000..d4aa03bec
--- /dev/null
+++ b/main/src/test/resources/org/clulab/processors/sentences10.txt
@@ -0,0 +1,10 @@
+Needed lines of action will be decided on by representatives of some 50 nations .
+Scarcity , not only of foodstuffs but of lumber and other forest products , textiles , seeds , fertilizers , draught power , and farm equipment will continue throughout most of Europe and Asia during the coming year .
+Hopes of continued recovery in Europe 's indigenous food supplies were checked by last winter 's bad weather .
+Diets in Western and Central Europe will be still lower next year , and in Asia they will remain at present very low levels , unless imports can be increased .
+Even to hold the present line will require drastic action .
+Minimum import needs for Europe , North Africa , and Asia in 1947/48 may be estimated at 34 to 38 million tons without allowing for any improvement in bread rations , any additional livestock feeding , or any increase in working reserves .
+Against this need , supplies of grain available for export from the surplus countries may be tentatively estimated at 30 to 34 million tons .
+Even with somewhat larger supplies of certain other foods particularly potatoes , sugar , and fats the situation will continue to be grim .
+Cessation of UNRRA activities and accumulated foreign exchange difficulties worsen the problem for nations in a weak bargaining position .
+Every delay in improving this situation further impairs the working ability of labour , slows up reconstruction , adds to the physical damage caused by prolonged undernourishment , and accelerates social unrest .
diff --git a/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala
new file mode 100644
index 000000000..3e9e97adf
--- /dev/null
+++ b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala
@@ -0,0 +1,67 @@
+package org.clulab.processors
+
+import org.clulab.processors.clu.CluProcessor
+import org.clulab.serialization.DocumentSerializer
+import org.clulab.utils.Closer.AutoCloser
+import org.clulab.utils.{Sourcer, Test}
+
+import java.io.{PrintWriter, StringWriter}
+
+class TestMkCombinedDocument extends Test {
+  val sentences = Sourcer.sourceFromFilename("./main/src/test/resources/org/clulab/processors/sentences10.txt").autoClose { source =>
+    source.getLines.toArray
+  }
+  val manySentenceLengths = Array(
+    Array(1, 9),
+    Array(9, 1),
+    Array(1, 1, 8),
+    Array(1, 8, 1),
+    Array(8, 1, 1),
+    Array(5, 5),
+    Array(2, 2, 2, 2, 2),
+    Array(1, 2, 3, 4),
+    Array(4, 3, 2, 1),
+    Array(0, 5, 0, 5, 0)
+  )
+  val sep = "  "
+  val documentSerializer = new DocumentSerializer()
+
+  def toString(document: Document): String = {
+    val stringWriter = new StringWriter()
+
+    new PrintWriter(stringWriter).autoClose { printWriter =>
+      documentSerializer.save(document, printWriter)
+    }
+    stringWriter.toString
+  }
+
+  behavior of "mkCombinedDocument"
+
+  def test(sentenceLengths: Array[Int]): Unit = {
+    val label = sentenceLengths.mkString("[", ", ", "]")
+
+    it should s"combine $label" in {
+      val sentenceStarts = sentenceLengths.scanLeft(0) { case (start, split) => start + split }
+      assert(sentenceStarts.last == 10)
+      val sentenceGroups = sentenceStarts.zip(sentenceLengths).map { case (start, length) =>
+        sentences.slice(start, start + length).mkString(sep)
+      }
+      val separators = sentenceGroups.map { sentenceGroup => if (sentenceGroup.isEmpty) "" else sep }
+      val document = processor.mkCombinedDocument(sentenceGroups, separators)
+      val actualResult = toString(document)
+
+      actualResult should be(expectedResult)
+    }
+  }
+
+  val processor = new CluProcessor()
+  val document = processor.mkDocument(sentences.mkString(sep))
+  val expectedResult = toString(document)
+
+  manySentenceLengths.foreach { sentenceLengths =>
+    test(sentenceLengths)
+  }
+
+
+  // Do another example with <br> in the middle.  Make sure get two sentences.
+}

From a4c8d718b26f8a3cf979b4756086305d9fc22fc4 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Wed, 30 Nov 2022 12:59:24 -0700
Subject: [PATCH 3/6] Test with br, call them trailers

---
 .../org/clulab/processors/Processor.scala     | 16 +++----
 .../processors/TestMkCombinedDocument.scala   | 47 ++++++++++++++-----
 2 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/main/src/main/scala/org/clulab/processors/Processor.scala b/main/src/main/scala/org/clulab/processors/Processor.scala
index 0fe777ff4..d77b2f4ae 100644
--- a/main/src/main/scala/org/clulab/processors/Processor.scala
+++ b/main/src/main/scala/org/clulab/processors/Processor.scala
@@ -12,10 +12,10 @@ trait Processor {
   /** Constructs a document of tokens from free text; includes sentence splitting and tokenization. */
   def mkDocument (text:String, keepText:Boolean = false): Document
 
-  protected def offsetSentence(sentence: Sentence, charOffset: Int): Sentence = {
+  protected def offsetSentence(sentence: Sentence, offset: Int): Sentence = {
     val raw = sentence.raw
-    val startOffsets = sentence.startOffsets.map(_ + charOffset)
-    val endOffsets = sentence.endOffsets.map(_ + charOffset)
+    val startOffsets = sentence.startOffsets.map(_ + offset)
+    val endOffsets = sentence.endOffsets.map(_ + offset)
     val words = sentence.words
     val newSentence = Sentence(raw, startOffsets, endOffsets, words)
 
@@ -97,20 +97,20 @@ trait Processor {
     combinedDocument
   }
 
-  def mkCombinedDocument(texts: IndexedSeq[String], separators: IndexedSeq[String], keepText: Boolean = false): Document = {
-    require(texts.length == separators.length)
+  def mkCombinedDocument(texts: IndexedSeq[String], trailers: IndexedSeq[String], keepText: Boolean = false): Document = {
+    require(texts.length == trailers.length)
     texts.length match {
       case 0 => mkDocument("", keepText)
       case 1 => mkDocument(texts.head, keepText)
       case _ =>
         val documents = texts.map(mkDocument(_, keepText))
-        val offsets = texts.zip(separators).scanLeft(0) { case (offset, (text, separator)) => offset + text.length + separator.length }
+        val offsets = texts.zip(trailers).scanLeft(0) { case (offset, (text, trailer)) => offset + text.length + trailer.length }
         val offsetDocuments = documents.zip(offsets).map { case (document, offset) =>
-          offsetDocument(document, offset) // charOffset and wordOffset, because some things are counted in words?
+          offsetDocument(document, offset)
         }
         val combinedTextOpt =
             if (keepText) {
-              val combinedText = texts.zip(separators).foldLeft(new StringBuilder) { case (stringBuilder, (text, separator)) =>
+              val combinedText = texts.zip(trailers).foldLeft(new StringBuilder) { case (stringBuilder, (text, separator)) =>
                 stringBuilder.append(text).append(separator)
               }.toString
 
diff --git a/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala
index 3e9e97adf..bc6f3693c 100644
--- a/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala
+++ b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala
@@ -21,47 +21,68 @@ class TestMkCombinedDocument extends Test {
     Array(2, 2, 2, 2, 2),
     Array(1, 2, 3, 4),
     Array(4, 3, 2, 1),
-    Array(0, 5, 0, 5, 0)
+    Array(0, 5, 0, 5)
   )
-  val sep = "  "
+  val separator = "  "
   val documentSerializer = new DocumentSerializer()
+  val processor = new CluProcessor()
 
   def toString(document: Document): String = {
     val stringWriter = new StringWriter()
 
     new PrintWriter(stringWriter).autoClose { printWriter =>
-      documentSerializer.save(document, printWriter)
+      documentSerializer.save(document, printWriter, keepText = true)
     }
     stringWriter.toString
   }
 
   behavior of "mkCombinedDocument"
 
-  def test(sentenceLengths: Array[Int]): Unit = {
+  def test(sentenceLengths: Array[Int], expectedResult: String): Unit = {
     val label = sentenceLengths.mkString("[", ", ", "]")
 
     it should s"combine $label" in {
       val sentenceStarts = sentenceLengths.scanLeft(0) { case (start, split) => start + split }
       assert(sentenceStarts.last == 10)
       val sentenceGroups = sentenceStarts.zip(sentenceLengths).map { case (start, length) =>
-        sentences.slice(start, start + length).mkString(sep)
+        sentences.slice(start, start + length).mkString(separator)
+      }
+      //
+      val trailers = sentenceGroups.zipWithIndex.map { case (sentenceGroup, index) =>
+        if (sentenceGroup.isEmpty || index == sentenceGroups.indices.last) ""
+        else separator
       }
-      val separators = sentenceGroups.map { sentenceGroup => if (sentenceGroup.isEmpty) "" else sep }
-      val document = processor.mkCombinedDocument(sentenceGroups, separators)
+      val document = processor.mkCombinedDocument(sentenceGroups, trailers, keepText = true)
       val actualResult = toString(document)
 
       actualResult should be(expectedResult)
     }
   }
 
-  val processor = new CluProcessor()
-  val document = processor.mkDocument(sentences.mkString(sep))
-  val expectedResult = toString(document)
+  {
+    val document = processor.mkDocument(sentences.mkString(separator), keepText = true)
+    val expectedResult = toString(document)
 
-  manySentenceLengths.foreach { sentenceLengths =>
-    test(sentenceLengths)
+    manySentenceLengths.foreach { sentenceLengths =>
+      test(sentenceLengths, expectedResult)
+    }
   }
 
+  behavior of "dynamically separated texts"
 
-  // Do another example with <br> in the middle.  Make sure get two sentences.
+  it should "combine as expected" in {
+    val text = "I found this text<br>on a web page."
+    val separator = "<br>"
+    val texts = text.split(separator)
+    val indices = texts.indices
+    val trailers = indices.map { index => if (index != indices.last) separator else "" }
+    val document = processor.mkCombinedDocument(texts, trailers, keepText = true)
+
+    document.text.get should be (text)
+    document.sentences.length should be (2)
+
+    document.sentences.foreach { sentence =>
+      sentence.words should not contain(separator)
+    }
+  }
 }

From f56d46765bb430d5b6462be6101539a84890ce1d Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Wed, 30 Nov 2022 14:00:47 -0700
Subject: [PATCH 4/6] Test more with br

---
 .../processors/TestMkCombinedDocument.scala   | 46 ++++++++++++++++++-
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala
index bc6f3693c..3ce3f6edc 100644
--- a/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala
+++ b/main/src/test/scala/org/clulab/processors/TestMkCombinedDocument.scala
@@ -70,7 +70,31 @@ class TestMkCombinedDocument extends Test {
 
   behavior of "dynamically separated texts"
 
-  it should "combine as expected" in {
+  it should "include separators in both text and words" in {
+    val text = "I found this text<br>on a web page."
+    val separator = "<br>"
+    val texts = text.split(separator)
+    val dirtyTexts = texts.zipWithIndex.map { case (text, index) =>
+      if (index != texts.indices.last) text + separator
+      else text
+    }
+    val indices = texts.indices
+    val trailers = indices.map { _ => "" }
+    val document = processor.mkCombinedDocument(dirtyTexts, trailers, keepText = true)
+
+    document.text.get should be (text)
+    document.sentences.length should be (indices.length)
+
+    document.sentences.zipWithIndex.foreach { case (sentence, index) =>
+      if (index != indices.last)
+        sentence.words should contain (separator)
+      else
+        sentence.words should not contain (separator)
+    }
+  }
+
+  // This is thought to be the standard case.
+  it should "include separators in text but not words" in {
     val text = "I found this text<br>on a web page."
     val separator = "<br>"
     val texts = text.split(separator)
@@ -79,10 +103,28 @@ class TestMkCombinedDocument extends Test {
     val document = processor.mkCombinedDocument(texts, trailers, keepText = true)
 
     document.text.get should be (text)
-    document.sentences.length should be (2)
+    document.sentences.length should be (indices.length)
 
     document.sentences.foreach { sentence =>
       sentence.words should not contain(separator)
     }
   }
+
+  it should "include separators in neither text nor words" in {
+    val text = "I found this text<br>on a web page."
+    val separator = "<br>"
+    val cleanSeparator = "    "
+    val cleanText = text.replace(separator, cleanSeparator)
+    val texts = text.split(separator)
+    val indices = texts.indices
+    val trailers = indices.map { index => if (index != indices.last) cleanSeparator else "" }
+    val document = processor.mkCombinedDocument(texts, trailers, keepText = true)
+
+    document.text.get should be(cleanText)
+    document.sentences.length should be(indices.length)
+
+    document.sentences.foreach { sentence =>
+      sentence.words should not contain (separator)
+    }
+  }
 }

From d2349f22d94972ad9e2a9d566879ff72e87efa51 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Mon, 9 Jan 2023 20:10:39 -0700
Subject: [PATCH 5/6] Move methods to proper classes

---
 .../org/clulab/processors/Document.scala      | 29 +++++++++++
 .../org/clulab/processors/Processor.scala     | 50 +------------------
 .../org/clulab/processors/Sentence.scala      | 19 +++++++
 3 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/main/src/main/scala/org/clulab/processors/Document.scala b/main/src/main/scala/org/clulab/processors/Document.scala
index 140bc2f49..3b71d5813 100644
--- a/main/src/main/scala/org/clulab/processors/Document.scala
+++ b/main/src/main/scala/org/clulab/processors/Document.scala
@@ -184,7 +184,36 @@ class Document(val sentences: Array[Sentence]) extends Serializable {
         }
       }
     })
+  }
+
+  protected def replaceSentences(sentences: Array[Sentence]): Document = {
+    val newDocument = new Document(sentences)
+
+    newDocument.id = id
+    newDocument.text = text
+
+    require(newDocument.coreferenceChains.isEmpty)
+    require(coreferenceChains.isEmpty)
+
+    getAttachmentKeys.foreach { attachmentKey =>
+      require(newDocument.getAttachment(attachmentKey).forall(_ == getAttachment(attachmentKey).get))
+      newDocument.addAttachment(attachmentKey, getAttachment(attachmentKey).get)
+    }
 
+    val dctOpt = getDCT
+    dctOpt.foreach(newDocument.setDCT)
+
+    newDocument
+  }
+
+  def offset(offset: Int): Document = {
+    if (offset == 0) this
+    else {
+      val offsetSentences = sentences.map(_.offset(offset))
+      val newDocument = replaceSentences(offsetSentences)
+
+      newDocument
+    }
   }
 }
 
diff --git a/main/src/main/scala/org/clulab/processors/Processor.scala b/main/src/main/scala/org/clulab/processors/Processor.scala
index d77b2f4ae..52af1288b 100644
--- a/main/src/main/scala/org/clulab/processors/Processor.scala
+++ b/main/src/main/scala/org/clulab/processors/Processor.scala
@@ -12,54 +12,6 @@ trait Processor {
   /** Constructs a document of tokens from free text; includes sentence splitting and tokenization. */
   def mkDocument (text:String, keepText:Boolean = false): Document
 
-  protected def offsetSentence(sentence: Sentence, offset: Int): Sentence = {
-    val raw = sentence.raw
-    val startOffsets = sentence.startOffsets.map(_ + offset)
-    val endOffsets = sentence.endOffsets.map(_ + offset)
-    val words = sentence.words
-    val newSentence = Sentence(raw, startOffsets, endOffsets, words)
-
-    newSentence.tags = sentence.tags
-    newSentence.lemmas = sentence.lemmas
-    newSentence.entities = sentence.entities
-    newSentence.norms = sentence.norms
-    newSentence.chunks = sentence.chunks
-    newSentence.syntacticTree = sentence.syntacticTree
-    newSentence.graphs = sentence.graphs
-    newSentence.relations = sentence.relations
-    newSentence
-  }
-
-  protected def offsetDocument(document: Document, offset: Int): Document = {
-    if (offset == 0) document
-    else {
-      val offsetSentences = document.sentences.map(offsetSentence(_, offset))
-      val newDocument = replaceSentences(document, offsetSentences)
-
-      newDocument
-    }
-  }
-
-  protected def replaceSentences(document: Document, sentences: Array[Sentence]): Document = {
-    val newDocument = new Document(sentences)
-
-    newDocument.id = document.id
-    newDocument.text = document.text
-
-    require(newDocument.coreferenceChains.isEmpty)
-    require(document.coreferenceChains.isEmpty)
-
-    document.getAttachmentKeys.foreach { attachmentKey =>
-      require(newDocument.getAttachment(attachmentKey).forall(_ == document.getAttachment(attachmentKey).get))
-      newDocument.addAttachment(attachmentKey, document.getAttachment(attachmentKey).get)
-    }
-
-    val dctOpt = document.getDCT
-    dctOpt.foreach(newDocument.setDCT)
-
-    newDocument
-  }
-
   // The documents here were created with Processor.mkDocument, which could have created a subclassed
   // Document or documents with certain fields already filled in.  This implementation only handles
   // known document fields and then only performs rudimentary requirement checks to make sure that
@@ -106,7 +58,7 @@ trait Processor {
         val documents = texts.map(mkDocument(_, keepText))
         val offsets = texts.zip(trailers).scanLeft(0) { case (offset, (text, trailer)) => offset + text.length + trailer.length }
         val offsetDocuments = documents.zip(offsets).map { case (document, offset) =>
-          offsetDocument(document, offset)
+          document.offset(offset)
         }
         val combinedTextOpt =
             if (keepText) {
diff --git a/main/src/main/scala/org/clulab/processors/Sentence.scala b/main/src/main/scala/org/clulab/processors/Sentence.scala
index c9011e11f..e95bf9a55 100644
--- a/main/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/main/src/main/scala/org/clulab/processors/Sentence.scala
@@ -173,6 +173,25 @@ class Sentence(
 
     reverted
   }
+
+  def offset(offset: Int): Sentence = {
+    if (offset == 0) this
+    else {
+      val newStartOffsets = startOffsets.map(_ + offset)
+      val newEndOffsets = endOffsets.map(_ + offset)
+      val newSentence = Sentence(raw, newStartOffsets, newEndOffsets, words)
+
+      newSentence.tags = tags
+      newSentence.lemmas = lemmas
+      newSentence.entities = entities
+      newSentence.norms = norms
+      newSentence.chunks = chunks
+      newSentence.syntacticTree = syntacticTree
+      newSentence.graphs = graphs
+      newSentence.relations = relations
+      newSentence
+    }
+  }
 }
 
 object Sentence {

From 0f761220855dfc30869cf03af0844974ac0fadd4 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Tue, 10 Jan 2023 12:41:12 -0700
Subject: [PATCH 6/6] Compile for Scala 3

---
 main/src/main/scala/org/clulab/processors/Sentence.scala | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/main/src/main/scala/org/clulab/processors/Sentence.scala b/main/src/main/scala/org/clulab/processors/Sentence.scala
index 284b83928..1787ba1a1 100644
--- a/main/src/main/scala/org/clulab/processors/Sentence.scala
+++ b/main/src/main/scala/org/clulab/processors/Sentence.scala
@@ -5,7 +5,6 @@ import org.clulab.struct.{DirectedGraph, GraphMap, RelationTriple, Tree}
 import org.clulab.struct.GraphMap._
 import org.clulab.utils.SeqUtils
 
-import scala.collection.immutable.Range
 import scala.collection.mutable
 import scala.util.hashing.MurmurHash3._
 
@@ -178,8 +177,8 @@ class Sentence(
   def offset(offset: Int): Sentence = {
     if (offset == 0) this
     else {
-      val newStartOffsets = startOffsets.map(_ + offset)
-      val newEndOffsets = endOffsets.map(_ + offset)
+      val newStartOffsets = startOffsets.map(_ + offset).toArray
+      val newEndOffsets = endOffsets.map(_ + offset).toArray
       val newSentence = Sentence(raw, newStartOffsets, newEndOffsets, words)
 
       newSentence.tags = tags