Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions accessor.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,19 @@ type Value struct {
// String returns the unescaped string value. Returns an empty string for a
// zero (not-found) Value.
func (v Value) String() string {
return string(Unescape(v.raw, v.delims))
return string(v.Bytes())
}

// Bytes returns the raw bytes without escape processing. Returns nil for a
// zero (not-found) Value.
// Bytes returns the unescaped bytes of this value. Equivalent to calling
// String() and converting, but avoids the string allocation for binary use.
// Returns nil for a zero (not-found) Value.
func (v Value) Bytes() []byte {
return Unescape(v.raw, v.delims)
}

// Raw returns the raw, possibly-escaped bytes of this value as they appear
// in the HL7 message (before escape processing). Returns nil for a zero Value.
func (v Value) Raw() []byte {
return v.raw
}

Expand Down Expand Up @@ -250,7 +257,8 @@ func (v Value) HasValue() bool {
// msg.Get("PID-3.1").String() // Patient ID
// msg.Get("PID-5.1").String() // Family name
// msg.Get("OBX(0)-5").String() // First OBX segment, field 5
// msg.Get("PID-3.1").Bytes() // raw bytes without unescaping
// msg.Get("PID-3.1").Bytes() // unescaped bytes
// msg.Get("PID-3.1").Raw() // raw bytes without unescaping
func (m *Message) Get(location string) Value {
loc, err := ParseLocation(location)
if err != nil {
Expand Down
16 changes: 8 additions & 8 deletions ack.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,14 @@ func (m *Message) Ack(code AckCode, controlID string, opts ...AckOption) ([]byte
// Pre-calculate buffer size to avoid growing.
// MSH: "MSH" + separator + encoding chars + 10 delimited fields
// MSA: "MSA" + code + control ID + optional text
sendApp := msh.Field(5).Bytes()
sendFac := msh.Field(6).Bytes()
recvApp := msh.Field(3).Bytes()
recvFac := msh.Field(4).Bytes()
triggerEvent := msh.Field(9).Rep(0).Component(2).Bytes()
procID := msh.Field(11).Bytes()
versionID := msh.Field(12).Bytes()
origControlID := msh.Field(10).Bytes()
sendApp := msh.Field(5).Raw()
sendFac := msh.Field(6).Raw()
recvApp := msh.Field(3).Raw()
recvFac := msh.Field(4).Raw()
triggerEvent := msh.Field(9).Rep(0).Component(2).Raw()
procID := msh.Field(11).Raw()
versionID := msh.Field(12).Raw()
origControlID := msh.Field(10).Raw()

// Estimate size: MSH header + fields + MSA segment.
size := 3 + 1 + 4 + // "MSH" + separator + encoding chars
Expand Down
2 changes: 1 addition & 1 deletion builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ func TestBuilderEscaping(t *testing.T) {

// Raw bytes should contain escape sequences.
pid := findSeg(msg, "PID")
raw := pid.Field(3).Bytes()
raw := pid.Field(3).Raw()
rawStr := string(raw)
for _, seq := range []string{`\F\`, `\S\`, `\R\`, `\E\`, `\T\`} {
if !strings.Contains(rawStr, seq) {
Expand Down
24 changes: 12 additions & 12 deletions charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@

package hl7

// ValueMapper converts field-content bytes to a new representation.
// ValueMapper converts a Value to a new byte representation.
// It is used at read time (DecodeString) to decode stored bytes, and at
// write time (MapAllValues) to transform content before storage.
//
// The mapper receives post-unescape bytes and must return the transformed bytes.
// The mapper receives the full Value, allowing it to inspect IsNull(),
// IsEmpty(), and HasValue() before deciding how to convert. Call v.Bytes()
// to obtain the post-unescape bytes for byte-level conversion.
// When used with DecodeString, returning an error aborts the decode.
// When used with MapAllValues, returning an error aborts the transform.
//
Expand All @@ -27,24 +29,22 @@ package hl7
// DecodeString behaves identically to String() — no conversion is applied and
// no extra allocation is incurred beyond the Unescape fast path.
//
// Note: Unescape runs before the ValueMapper, so the mapper receives
// resolved byte values. Charset escape sequences (\C..\ and \M..\) are passed
// through verbatim by Unescape; a sophisticated ValueMapper may parse them,
// but a simple byte-level mapper will treat them as-is.
type ValueMapper func(data []byte) ([]byte, error)
// Note: v.Bytes() returns post-unescape bytes. Charset escape sequences
// (\C..\ and \M..\) are passed through verbatim by Unescape; a sophisticated
// ValueMapper may parse them, but a simple byte-level mapper will treat them as-is.
type ValueMapper func(v Value) ([]byte, error)

// DecodeString returns the unescaped, charset-decoded string value of the
// Value. Unescape runs first; if decode is nil the unescaped bytes are cast
// to string with no further allocation.
// Value. If decode is nil the unescaped bytes are cast to string with no
// further allocation.
//
// DecodeString is promoted to Field, Repetition, Component, and Subcomponent
// via their embedded Value.
func (v Value) DecodeString(decode ValueMapper) (string, error) {
unescaped := Unescape(v.raw, v.delims)
if decode == nil {
return string(unescaped), nil
return v.String(), nil
}
converted, err := decode(unescaped)
converted, err := decode(v)
if err != nil {
return "", err
}
Expand Down
37 changes: 33 additions & 4 deletions charset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ import (
// latin1ToUTF8 is a test-only ValueMapper that converts Latin-1 (ISO 8859-1)
// bytes to UTF-8. Each byte in the range 0x80–0xFF is expanded to its UTF-8
// two-byte sequence; bytes below 0x80 are ASCII and pass through unchanged.
func latin1ToUTF8(data []byte) ([]byte, error) {
func latin1ToUTF8(v Value) ([]byte, error) {
data := v.Bytes()
out := make([]byte, 0, len(data))
for _, b := range data {
if b < 0x80 {
Expand All @@ -35,17 +36,45 @@ func latin1ToUTF8(data []byte) ([]byte, error) {
return out, nil
}

// utf16LEToUTF8 is a test-only ValueMapper that converts UTF-16 little-endian
// bytes to UTF-8. The input length must be even; an error is returned otherwise.
// Only BMP characters (U+0000..U+FFFF) are decoded; surrogate pairs are not needed
// for the Latin/Western European test data used here.
// Null ("") and empty values are returned unchanged to preserve HL7 semantics.
func utf16LEToUTF8(v Value) ([]byte, error) {
if !v.HasValue() {
return v.Bytes(), nil // preserve null ("") and empty unchanged
}
data := v.Bytes()
if len(data)%2 != 0 {
return nil, errors.New("utf16: odd byte count")
}
out := make([]byte, 0, len(data))
for i := 0; i < len(data); i += 2 {
r := rune(uint16(data[i]) | uint16(data[i+1])<<8)
switch {
case r < 0x80:
out = append(out, byte(r))
case r < 0x800:
out = append(out, byte(0xC0|(r>>6)), byte(0x80|(r&0x3F)))
default:
out = append(out, byte(0xE0|(r>>12)), byte(0x80|((r>>6)&0x3F)), byte(0x80|(r&0x3F)))
}
}
return out, nil
}

// errHighBit is returned by errorDecoder when a byte with the high bit set is seen.
var errHighBit = errors.New("high bit set")

// errorDecoder rejects any byte with the high bit set.
func errorDecoder(data []byte) ([]byte, error) {
for _, b := range data {
func errorDecoder(v Value) ([]byte, error) {
for _, b := range v.Bytes() {
if b&0x80 != 0 {
return nil, errHighBit
}
}
return data, nil
return v.Bytes(), nil
}

// --- Value.DecodeString ---
Expand Down
6 changes: 4 additions & 2 deletions doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
// msg.Get("MSH-9.1").String() // Message code (unescaped string)
// msg.Get("PID-3[1].4.2").String() // 2nd repetition of PID-3, component 4, subcomponent 2
// msg.Get("OBX(1)-5").String() // 2nd OBX segment (0-based), field 5
// msg.Get("PID-3.1").Bytes() // raw bytes without unescaping
// msg.Get("PID-3.1").Bytes() // unescaped bytes
// msg.Get("PID-3.1").Raw() // raw bytes without unescaping
//
// Get returns a zero Value when the location is invalid or the value is not
// present — consistent with how Field(n), Rep(n), and Component(n) return zero
Expand All @@ -56,7 +57,8 @@
//
// Field values in non-UTF-8 charsets (e.g. Latin-1 / ISO 8859-1 declared in
// MSH-18) can be decoded with DecodeString. A ValueMapper is a
// func([]byte) ([]byte, error) that converts post-unescape bytes to UTF-8.
// func(Value) ([]byte, error) that converts a Value to UTF-8 bytes; call
// v.Bytes() inside the mapper to get the post-unescape bytes to convert.
// DecodeString is available on Value, Field, Repetition, Component, and
// Subcomponent:
//
Expand Down
6 changes: 3 additions & 3 deletions transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -1064,10 +1064,10 @@ func mapDelimited(raw []byte, sep byte, fn func([]byte) ([]byte, error)) ([]byte
return out, nil
}

// mapLeafValue unescapes raw, applies mapper, and re-escapes the result.
// mapLeafValue applies mapper to raw and re-escapes the result.
// The mapper receives the full Value and calls v.Bytes() for unescaped bytes.
func mapLeafValue(raw []byte, delims Delimiters, mapper ValueMapper) ([]byte, error) {
unescaped := Unescape(raw, delims)
mapped, err := mapper(unescaped)
mapped, err := mapper(Value{raw: raw, delims: delims})
if err != nil {
return nil, err
}
Expand Down
Loading
Loading