Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Single-package Go library (`package hl7`) for parsing HL7 version 2.x messages i
- **Full serialization or Marshal API.** The library does not provide a general `Marshal` function. Message construction is handled by `MessageBuilder` (from-scratch) and `Transform` (modify existing), both of which produce `*Message` via `ParseMessage`. There are no per-field setters on parsed messages.
- **Built-in schema definitions.** The library does not ship with HL7v2 segment, data type, or table definitions. Users provide their own `Schema` struct to `msg.Validate()`. This keeps the library general-purpose and avoids bundling version-specific definitions.
- **No field location constants.** HL7 field positions (e.g., `PID-5.1`) are not stable across HL7 v2 versions or vendor implementations. The library does not provide named constants for terser-style location strings; callers define their own.
- **No predicate-based segment filtering.** Methods like `SegmentsOfType`, `SegmentsWhere`, or combinator functions (`And`, `Or`) were considered and rejected. The plain `Segments()` loop with an inline `if` or `switch` is idiomatic Go, immediately readable without library knowledge, and handles all filtering cases in a single pass. The abstraction does not eliminate domain knowledge — callers still decide which types and fields matter — it only restructures where that logic lives. The API surface cost exceeds the ergonomic benefit.

## Building and Testing

Expand Down
16 changes: 12 additions & 4 deletions delimiters.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,19 @@ func nthSlice(data []byte, delim byte, n int) []byte {
}
idx := 0
start := 0
for i := 0; i <= len(data); i++ {
if i == len(data) || data[i] == delim {
for i := 0; i < len(data); i++ {
if data[i] == delim {
if idx == n {
return data[start:i]
}
idx++
start = i + 1
}
}
// Final piece (no trailing delimiter).
if idx == n {
return data[start:]
}
return nil
}

Expand All @@ -97,15 +101,19 @@ func nthRange(data []byte, delim byte, n int) (start, end int, found bool) {
}
idx := 0
s := 0
for i := 0; i <= len(data); i++ {
if i == len(data) || data[i] == delim {
for i := 0; i < len(data); i++ {
if data[i] == delim {
if idx == n {
return s, i, true
}
idx++
s = i + 1
}
}
// Final piece (no trailing delimiter).
if idx == n {
return s, len(data), true
}
return 0, 0, false
}

Expand Down
13 changes: 6 additions & 7 deletions escape.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,14 +165,13 @@ func Escape(data []byte, delims Delimiters) []byte {
}

// containsAny returns true if data contains any of the delimiter characters.
// Uses bytes.IndexByte for SIMD-accelerated scanning on amd64/arm64.
func containsAny(data []byte, delims Delimiters) bool {
for _, b := range data {
if b == delims.Field || b == delims.Component || b == delims.SubComponent ||
b == delims.Repetition || b == delims.Escape {
return true
}
}
return false
return bytes.IndexByte(data, delims.Field) >= 0 ||
bytes.IndexByte(data, delims.Component) >= 0 ||
bytes.IndexByte(data, delims.SubComponent) >= 0 ||
bytes.IndexByte(data, delims.Repetition) >= 0 ||
bytes.IndexByte(data, delims.Escape) >= 0
}

// appendHexDecode decodes pairs of hex characters and appends the resulting
Expand Down
106 changes: 106 additions & 0 deletions examples/charset/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Example charset demonstrates decoding a Latin-1 (ISO 8859-1) encoded HL7v2
// message into UTF-8. It shows both selective field decoding with DecodeString
// and whole-message conversion with MapAllValues.
package main

import (
"bytes"
"fmt"
"log"

"github.com/senojj/hl7"
)

// Sample ADT^A01 message encoded in Latin-1 (ISO 8859-1).
// MSH-18 declares the charset as "8859/1" per HL7 v2.5.1 Table 0211.
// High-byte characters embedded using Go hex escapes:
//
// 0xFC = ü (Müller — family name)
// 0xF6 = ö (Jörg — middle name)
// 0xDF = ß (Hauptstraße — street)
var sampleMessage = []byte("" +
"MSH|^~\\&|SENDER|FAC|RECV|FAC|20260101||ADT^A01^ADT_A01|MSG001|P|2.5.1|||NE|AL|DEU|8859/1\r" +
"PID|1||123456||M\xFCller^Hans^J\xF6rg||19620315|M|||Hauptstra\xDFe 5^^Berlin^^10115^DE\r" +
"PV1|1|I\r")

// latin1ToUTF8 is a ValueMapper that converts Latin-1 (ISO 8859-1) bytes to
// UTF-8. Bytes below 0x80 are ASCII and pass through unchanged; bytes in the
// range 0x80–0xFF map directly to Unicode U+0080–U+00FF and expand to two
// UTF-8 bytes. Empty and null values are returned unchanged to preserve HL7
// null semantics.
func latin1ToUTF8(v hl7.Value) ([]byte, error) {
if !v.HasValue() {
return v.Bytes(), nil
}
data := v.Bytes()
out := make([]byte, 0, len(data))
for _, b := range data {
if b < 0x80 {
out = append(out, b)
} else {
// Latin-1 0x80–0xFF maps to Unicode U+0080–U+00FF.
out = append(out, 0xC0|(b>>6), 0x80|(b&0x3F))
}
}
return out, nil
}

func main() {
// Parse the Latin-1 encoded message.
msg, err := hl7.ParseMessage(sampleMessage)
if err != nil {
log.Fatal("parse:", err)
}

// Read MSH-18 to confirm the declared charset.
charset := msg.Get("MSH-18").String()
fmt.Println("Charset declared in MSH-18:", charset)
fmt.Println()

// Selective decoding — decode individual fields using DecodeString.
// This is suitable when only a few fields need charset conversion.
family, err := msg.Get("PID-5.1").DecodeString(latin1ToUTF8)
if err != nil {
log.Fatal("decode PID-5.1:", err)
}
given, err := msg.Get("PID-5.2").DecodeString(latin1ToUTF8)
if err != nil {
log.Fatal("decode PID-5.2:", err)
}
middle, err := msg.Get("PID-5.3").DecodeString(latin1ToUTF8)
if err != nil {
log.Fatal("decode PID-5.3:", err)
}
addr, err := msg.Get("PID-11.1").DecodeString(latin1ToUTF8)
if err != nil {
log.Fatal("decode PID-11.1:", err)
}

fmt.Println("Patient name (decoded):", family+", "+given+" "+middle)
fmt.Println("Address (decoded):", addr)
fmt.Println()

// Full-message conversion — re-encode every field value to UTF-8 and
// update MSH-18 in a single atomic Transform call.
utf8Msg, err := msg.Transform(
hl7.MapAllValues(latin1ToUTF8),
hl7.Replace("MSH-18", "UNICODE UTF-8"),
)
if err != nil {
log.Fatal("transform:", err)
}

// Write the converted message (no MLLP framing for readability).
var out bytes.Buffer
writer := hl7.NewWriter(&out)
if err := writer.WriteMessage(utf8Msg); err != nil {
log.Fatal("write:", err)
}

fmt.Println("Converted message (UTF-8):")
for _, seg := range bytes.Split(out.Bytes(), []byte{'\r'}) {
if len(seg) > 0 {
fmt.Println(" ", string(seg))
}
}
}
15 changes: 6 additions & 9 deletions message.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,12 @@ func ParseMessage(data []byte) (*Message, error) {
// the segment terminator (\r). Accepts \r, \n, and \r\n for real-world
// compatibility. Empty lines are skipped.
func splitSegments(data []byte, delims Delimiters) []Segment {
// Pre-count terminators for allocation.
n := 1
for _, b := range data {
if b == '\r' || b == '\n' {
n++
}
}

segments := make([]Segment, 0, n)
// Heuristic capacity: typical HL7 segments are 40-120 bytes; dividing by
// 80 with a +4 floor gives a good estimate in one pass, avoiding a separate
// pre-counting scan. The +4 floor ensures ADD-merged messages with fewer
// bytes-per-segment are still pre-sized adequately. Slight over-allocation
// is an acceptable tradeoff for eliminating the extra buffer scan.
segments := make([]Segment, 0, len(data)/80+4)
start := 0

for i := 0; i < len(data); i++ {
Expand Down
Loading