diff --git a/AGENTS.md b/AGENTS.md index b10f2ea..2d059e9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -184,6 +184,8 @@ A fast path checks for the ADD pattern with `bytes.Contains` before scanning. Me ADD segments without a field separator (e.g., `ADD\r` with no fields) are not merged and remain as standalone segments. This matches the spec requirement that ADD must carry continuation data. +ADD segments that immediately follow MSH are also left as standalone segments. Per the spec, ADD extends data segments, not the message header. This also enables `Concatenate` to correctly reassemble cross-message continuations where page N+1 begins with `MSH` followed by `ADD` (continuing the last segment of page N): because ADD is still visible as a standalone segment in the parsed page, `Concatenate` preserves it in the assembled buffer, and the final `ParseMessage` call then merges it into the correct preceding segment. + ### Null vs Empty Per the HL7 v2 specification: diff --git a/continuation_test.go b/continuation_test.go index ef6816b..61f3518 100644 --- a/continuation_test.go +++ b/continuation_test.go @@ -291,6 +291,53 @@ func TestConcatenate_nextHasADDSegments(t *testing.T) { } } +func TestConcatenate_secondPageStartsWithADD(t *testing.T) { + // HL7 continuation scenario: page1 splits an OBX at a field boundary and + // ends with DSC. page2 starts with MSH (for transport) followed immediately + // by an ADD segment that continues OBX-5 from page1. + // + // After Concatenate the ADD must be merged into the OBX from page1, not + // lost when the page2 MSH is stripped. + page1Raw := []byte(msh + "\rOBX|1|TX|note||First part\rDSC|Q002|I") + page2Raw := []byte(msh + "\rADD|Second part") + + page1, err := ParseMessage(page1Raw) + if err != nil { + t.Fatalf("ParseMessage(page1): %v", err) + } + page2, err := ParseMessage(page2Raw) + if err != nil { + t.Fatalf("ParseMessage(page2): %v", err) + } + + result, err := page1.Concatenate(page2) + if err != nil { + t.Fatalf("Concatenate: %v", err) + } + + segs := result.Segments() + // Expected: MSH OBX. DSC stripped from page1, MSH stripped from page2. + // The ADD from page2 continues OBX so no ADD segment is visible. + if len(segs) != 2 { + t.Fatalf("len(Segments()) = %d, want 2", len(segs)) + } + for i, want := range []string{"MSH", "OBX"} { + if segs[i].Type() != want { + t.Errorf("seg[%d].Type() = %q, want %q", i, segs[i].Type(), want) + } + } + + // OBX-5: page1's original field value, unchanged. + if got := result.Get("OBX(0)-5"); got != "First part" { + t.Errorf("OBX(0)-5 = %q, want %q", got, "First part") + } + // OBX-6: ADD field 1 from page2. mergeADD appends fields (not within-field + // concatenation), so ADD field 1 becomes the next field of OBX. + if got := result.Get("OBX(0)-6"); got != "Second part" { + t.Errorf("OBX(0)-6 = %q, want %q", got, "Second part") + } +} + func BenchmarkConcatenate(b *testing.B) { page1Raw := []byte(msh + "\rPID|1||12345^^^MRN||Smith^John|||M\rOBR|1||ORD001|CBC\rOBX|1|ST|8302-2^Body Height^LN||170|cm\rDSC|Q002|I") page2Raw := []byte(msh + "\rPID|2||67890^^^MRN||Jones^Jane|||F\rOBR|2||ORD002|CMP\rOBX|2|NM|8310-5^Body Temp^LN||37.0|Cel") diff --git a/message.go b/message.go index feb54a6..a855c39 100644 --- a/message.go +++ b/message.go @@ -131,6 +131,12 @@ func (m *Message) Raw() []byte { // // ADD segments without a field separator (e.g., "ADD\\r") are not merged and // remain as standalone segments. +// +// ADD segments that immediately follow MSH are also left as standalone segments. +// Per HL7v2.5.1, ADD extends data segments, not the message header. Leaving +// ADD visible after MSH allows Concatenate to correctly reassemble cross-message +// continuations where page N+1 starts with MSH followed by an ADD that continues +// the last segment of page N. func mergeADD(data []byte, fieldSep byte) []byte { pattern1 := [5]byte{'\r', 'A', 'D', 'D', fieldSep} pattern2 := [5]byte{'\n', 'A', 'D', 'D', fieldSep} @@ -142,30 +148,66 @@ func mergeADD(data []byte, fieldSep byte) []byte { return owned } - // Slow path: merge ADD segments by stripping boundaries. + // Slow path: merge ADD segments, skipping merge when the preceding segment + // is MSH. segStart tracks where the current segment begins in owned. owned := make([]byte, 0, len(data)) + segStart := 0 i := 0 for i < len(data) { // Check for \r\nADD (must check before \rADD). if i+6 <= len(data) && data[i] == '\r' && data[i+1] == '\n' && data[i+2] == 'A' && data[i+3] == 'D' && data[i+4] == 'D' && data[i+5] == fieldSep { - i += 5 // skip \r\nADD, keep as the field boundary + if isMSHSeg(owned[segStart:], fieldSep) { + owned = append(owned, '\r', '\n') + segStart = len(owned) + i += 2 // skip \r\n; ADD will be copied normally + } else { + i += 5 // skip \r\nADD, keep as the field boundary + } continue } // Check for \rADD. if i+5 <= len(data) && data[i] == '\r' && data[i+1] == 'A' && data[i+2] == 'D' && data[i+3] == 'D' && data[i+4] == fieldSep { - i += 4 // skip \rADD, keep as the field boundary + if isMSHSeg(owned[segStart:], fieldSep) { + owned = append(owned, '\r') + segStart = len(owned) + i++ // skip \r; ADD will be copied normally + } else { + i += 4 // skip \rADD, keep as the field boundary + } continue } // Check for \nADD. if i+5 <= len(data) && data[i] == '\n' && data[i+1] == 'A' && data[i+2] == 'D' && data[i+3] == 'D' && data[i+4] == fieldSep { - i += 4 // skip \nADD, keep as the field boundary + if isMSHSeg(owned[segStart:], fieldSep) { + owned = append(owned, '\n') + segStart = len(owned) + i++ // skip \n; ADD will be copied normally + } else { + i += 4 // skip \nADD, keep as the field boundary + } continue } - owned = append(owned, data[i]) + // Regular byte: copy to output and track segment boundaries. + b := data[i] + owned = append(owned, b) i++ + if b == '\r' { + if i < len(data) && data[i] == '\n' { + owned = append(owned, '\n') + i++ + } + segStart = len(owned) + } else if b == '\n' { + segStart = len(owned) + } } return owned } + +// isMSHSeg reports whether seg is an MSH segment (starts with "MSH" + fieldSep). +func isMSHSeg(seg []byte, fieldSep byte) bool { + return len(seg) >= 4 && seg[0] == 'M' && seg[1] == 'S' && seg[2] == 'H' && seg[3] == fieldSep +} diff --git a/message_test.go b/message_test.go index 49fe3f3..92ede0c 100644 --- a/message_test.go +++ b/message_test.go @@ -537,17 +537,34 @@ func TestParseMessageADD_AccessorWorks(t *testing.T) { } func TestParseMessageADD_AtStart(t *testing.T) { - // ADD as first segment after MSH — no meaningful preceding segment to merge into. - // Per spec, ADD must follow a data segment. When it follows MSH, it merges into MSH. - raw := []byte("MSH|^~\\&|S|F|R|RF|20240101||ADT^A01|1|P|2.5.1\rADD|extra") + // ADD immediately after MSH is left as a standalone segment. Per HL7v2.5.1, + // ADD extends data segments, not the message header. This also allows + // Concatenate to correctly reassemble cross-message continuations where + // page N+1 begins with MSH followed by an ADD continuing a segment from page N. + // + // The three ADD segments that follow the first ADD have a non-MSH preceding + // segment (ADD itself), so they are merged into it in the normal way. The + // result is a single ADD segment whose fields carry all four values. + raw := []byte("MSH|^~\\&|S|F|R|RF|20240101||ADT^A01|1|P|2.5.1\rADD|extra1\rADD|extra2\rADD|extra3\rADD|extra4") msg, err := ParseMessage(raw) if err != nil { t.Fatalf("unexpected error: %v", err) } - // ADD merges into MSH — only 1 segment. - if got := len(msg.Segments()); got != 1 { - t.Fatalf("len(Segments()) = %d, want 1", got) + // First ADD is standalone (after MSH); subsequent ADDs merge into it. + // Result: MSH + one merged ADD segment. + if got := len(msg.Segments()); got != 2 { + t.Fatalf("len(Segments()) = %d, want 2", got) + } + add := msg.Segments()[1] + if got := add.Type(); got != "ADD" { + t.Fatalf("segment[1].Type() = %q, want ADD", got) + } + // Fields 1–4 carry the values from the four ADD occurrences. + for i, want := range []string{"extra1", "extra2", "extra3", "extra4"} { + if got := add.Field(i + 1).String(); got != want { + t.Errorf("ADD field %d = %q, want %q", i+1, got, want) + } } } @@ -577,6 +594,23 @@ func BenchmarkParseMessageWithADD(b *testing.B) { } } +func BenchmarkParseMessageMultipleADD(b *testing.B) { + // Two OBX segments each continued by two ADD segments — four ADD merges total. + raw := []byte("MSH|^~\\&|S|F|R|RF|20240101||ORU^R01|1|P|2.5.1\r" + + "PID|1||123||Smith^John\r" + + "OBX|1|TX|note||first_part_of_long_value\r" + + "ADD|second_part_of_long_value\r" + + "ADD|third_part_of_long_value\r" + + "OBX|2|TX|note2||alpha\r" + + "ADD|beta\r" + + "ADD|gamma") + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _, _ = ParseMessage(raw) + } +} + func BenchmarkGetAccessor(b *testing.B) { data, err := os.ReadFile(filepath.Join("testdata", "oru_r01.hl7")) if err != nil {