Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ A fast path checks for the ADD pattern with `bytes.Contains` before scanning. Me

ADD segments without a field separator (e.g., `ADD\r` with no fields) are not merged and remain as standalone segments. This matches the spec requirement that ADD must carry continuation data.

ADD segments that immediately follow MSH are also left as standalone segments. Per the spec, ADD extends data segments, not the message header. This also enables `Concatenate` to correctly reassemble cross-message continuations where page N+1 begins with `MSH` followed by `ADD` (continuing the last segment of page N): because ADD is still visible as a standalone segment in the parsed page, `Concatenate` preserves it in the assembled buffer, and the final `ParseMessage` call then merges it into the correct preceding segment.

### Null vs Empty

Per the HL7 v2 specification:
Expand Down
47 changes: 47 additions & 0 deletions continuation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,53 @@ func TestConcatenate_nextHasADDSegments(t *testing.T) {
}
}

func TestConcatenate_secondPageStartsWithADD(t *testing.T) {
// HL7 continuation scenario: page1 splits an OBX at a field boundary and
// ends with DSC. page2 starts with MSH (for transport) followed immediately
// by an ADD segment that continues OBX-5 from page1.
//
// After Concatenate the ADD must be merged into the OBX from page1, not
// lost when the page2 MSH is stripped.
page1Raw := []byte(msh + "\rOBX|1|TX|note||First part\rDSC|Q002|I")
page2Raw := []byte(msh + "\rADD|Second part")

page1, err := ParseMessage(page1Raw)
if err != nil {
t.Fatalf("ParseMessage(page1): %v", err)
}
page2, err := ParseMessage(page2Raw)
if err != nil {
t.Fatalf("ParseMessage(page2): %v", err)
}

result, err := page1.Concatenate(page2)
if err != nil {
t.Fatalf("Concatenate: %v", err)
}

segs := result.Segments()
// Expected: MSH OBX. DSC stripped from page1, MSH stripped from page2.
// The ADD from page2 continues OBX so no ADD segment is visible.
if len(segs) != 2 {
t.Fatalf("len(Segments()) = %d, want 2", len(segs))
}
for i, want := range []string{"MSH", "OBX"} {
if segs[i].Type() != want {
t.Errorf("seg[%d].Type() = %q, want %q", i, segs[i].Type(), want)
}
}

// OBX-5: page1's original field value, unchanged.
if got := result.Get("OBX(0)-5"); got != "First part" {
t.Errorf("OBX(0)-5 = %q, want %q", got, "First part")
}
// OBX-6: ADD field 1 from page2. mergeADD appends fields (not within-field
// concatenation), so ADD field 1 becomes the next field of OBX.
if got := result.Get("OBX(0)-6"); got != "Second part" {
t.Errorf("OBX(0)-6 = %q, want %q", got, "Second part")
}
}

func BenchmarkConcatenate(b *testing.B) {
page1Raw := []byte(msh + "\rPID|1||12345^^^MRN||Smith^John|||M\rOBR|1||ORD001|CBC\rOBX|1|ST|8302-2^Body Height^LN||170|cm\rDSC|Q002|I")
page2Raw := []byte(msh + "\rPID|2||67890^^^MRN||Jones^Jane|||F\rOBR|2||ORD002|CMP\rOBX|2|NM|8310-5^Body Temp^LN||37.0|Cel")
Expand Down
52 changes: 47 additions & 5 deletions message.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ func (m *Message) Raw() []byte {
//
// ADD segments without a field separator (e.g., "ADD\\r") are not merged and
// remain as standalone segments.
//
// ADD segments that immediately follow MSH are also left as standalone segments.
// Per HL7v2.5.1, ADD extends data segments, not the message header. Leaving
// ADD visible after MSH allows Concatenate to correctly reassemble cross-message
// continuations where page N+1 starts with MSH followed by an ADD that continues
// the last segment of page N.
func mergeADD(data []byte, fieldSep byte) []byte {
pattern1 := [5]byte{'\r', 'A', 'D', 'D', fieldSep}
pattern2 := [5]byte{'\n', 'A', 'D', 'D', fieldSep}
Expand All @@ -142,30 +148,66 @@ func mergeADD(data []byte, fieldSep byte) []byte {
return owned
}

// Slow path: merge ADD segments by stripping boundaries.
// Slow path: merge ADD segments, skipping merge when the preceding segment
// is MSH. segStart tracks where the current segment begins in owned.
owned := make([]byte, 0, len(data))
segStart := 0
i := 0
for i < len(data) {
// Check for \r\nADD<sep> (must check before \rADD<sep>).
if i+6 <= len(data) && data[i] == '\r' && data[i+1] == '\n' &&
data[i+2] == 'A' && data[i+3] == 'D' && data[i+4] == 'D' && data[i+5] == fieldSep {
i += 5 // skip \r\nADD, keep <sep> as the field boundary
if isMSHSeg(owned[segStart:], fieldSep) {
owned = append(owned, '\r', '\n')
segStart = len(owned)
i += 2 // skip \r\n; ADD<sep> will be copied normally
} else {
i += 5 // skip \r\nADD, keep <sep> as the field boundary
}
continue
}
// Check for \rADD<sep>.
if i+5 <= len(data) && data[i] == '\r' &&
data[i+1] == 'A' && data[i+2] == 'D' && data[i+3] == 'D' && data[i+4] == fieldSep {
i += 4 // skip \rADD, keep <sep> as the field boundary
if isMSHSeg(owned[segStart:], fieldSep) {
owned = append(owned, '\r')
segStart = len(owned)
i++ // skip \r; ADD<sep> will be copied normally
} else {
i += 4 // skip \rADD, keep <sep> as the field boundary
}
continue
}
// Check for \nADD<sep>.
if i+5 <= len(data) && data[i] == '\n' &&
data[i+1] == 'A' && data[i+2] == 'D' && data[i+3] == 'D' && data[i+4] == fieldSep {
i += 4 // skip \nADD, keep <sep> as the field boundary
if isMSHSeg(owned[segStart:], fieldSep) {
owned = append(owned, '\n')
segStart = len(owned)
i++ // skip \n; ADD<sep> will be copied normally
} else {
i += 4 // skip \nADD, keep <sep> as the field boundary
}
continue
}
owned = append(owned, data[i])
// Regular byte: copy to output and track segment boundaries.
b := data[i]
owned = append(owned, b)
i++
if b == '\r' {
if i < len(data) && data[i] == '\n' {
owned = append(owned, '\n')
i++
}
segStart = len(owned)
} else if b == '\n' {
segStart = len(owned)
}
}
return owned
}

// isMSHSeg reports whether seg is an MSH segment (starts with "MSH" + fieldSep).
func isMSHSeg(seg []byte, fieldSep byte) bool {
return len(seg) >= 4 && seg[0] == 'M' && seg[1] == 'S' && seg[2] == 'H' && seg[3] == fieldSep
}
46 changes: 40 additions & 6 deletions message_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -537,17 +537,34 @@ func TestParseMessageADD_AccessorWorks(t *testing.T) {
}

func TestParseMessageADD_AtStart(t *testing.T) {
// ADD as first segment after MSH — no meaningful preceding segment to merge into.
// Per spec, ADD must follow a data segment. When it follows MSH, it merges into MSH.
raw := []byte("MSH|^~\\&|S|F|R|RF|20240101||ADT^A01|1|P|2.5.1\rADD|extra")
// ADD immediately after MSH is left as a standalone segment. Per HL7v2.5.1,
// ADD extends data segments, not the message header. This also allows
// Concatenate to correctly reassemble cross-message continuations where
// page N+1 begins with MSH followed by an ADD continuing a segment from page N.
//
// The three ADD segments that follow the first ADD have a non-MSH preceding
// segment (ADD itself), so they are merged into it in the normal way. The
// result is a single ADD segment whose fields carry all four values.
raw := []byte("MSH|^~\\&|S|F|R|RF|20240101||ADT^A01|1|P|2.5.1\rADD|extra1\rADD|extra2\rADD|extra3\rADD|extra4")
msg, err := ParseMessage(raw)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

// ADD merges into MSH — only 1 segment.
if got := len(msg.Segments()); got != 1 {
t.Fatalf("len(Segments()) = %d, want 1", got)
// First ADD is standalone (after MSH); subsequent ADDs merge into it.
// Result: MSH + one merged ADD segment.
if got := len(msg.Segments()); got != 2 {
t.Fatalf("len(Segments()) = %d, want 2", got)
}
add := msg.Segments()[1]
if got := add.Type(); got != "ADD" {
t.Fatalf("segment[1].Type() = %q, want ADD", got)
}
// Fields 1–4 carry the values from the four ADD occurrences.
for i, want := range []string{"extra1", "extra2", "extra3", "extra4"} {
if got := add.Field(i + 1).String(); got != want {
t.Errorf("ADD field %d = %q, want %q", i+1, got, want)
}
}
}

Expand Down Expand Up @@ -577,6 +594,23 @@ func BenchmarkParseMessageWithADD(b *testing.B) {
}
}

func BenchmarkParseMessageMultipleADD(b *testing.B) {
// Two OBX segments each continued by two ADD segments — four ADD merges total.
raw := []byte("MSH|^~\\&|S|F|R|RF|20240101||ORU^R01|1|P|2.5.1\r" +
"PID|1||123||Smith^John\r" +
"OBX|1|TX|note||first_part_of_long_value\r" +
"ADD|second_part_of_long_value\r" +
"ADD|third_part_of_long_value\r" +
"OBX|2|TX|note2||alpha\r" +
"ADD|beta\r" +
"ADD|gamma")
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, _ = ParseMessage(raw)
}
}

func BenchmarkGetAccessor(b *testing.B) {
data, err := os.ReadFile(filepath.Join("testdata", "oru_r01.hl7"))
if err != nil {
Expand Down
Loading