Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,32 @@

package hl7

// ValueDecoder converts post-unescape field value bytes to the desired
// encoding, typically UTF-8. The decoder is pre-configured for the
// source → target charset pair; it carries no charset parameter.
// ValueMapper converts field-content bytes to a new representation.
// It is used at read time (DecodeString) to decode stored bytes, and at
// write time (MapAllValues) to transform content before storage.
//
// The mapper receives post-unescape bytes and must return the transformed bytes.
// When used with DecodeString, returning an error aborts the decode.
// When used with MapAllValues, returning an error aborts the transform.
//
// Callers read MSH-18 to determine the source charset, then select an
// appropriate ValueDecoder before calling DecodeString. When decode is nil,
// appropriate ValueMapper before calling DecodeString. When decode is nil,
// DecodeString behaves identically to String() — no conversion is applied and
// no extra allocation is incurred beyond the Unescape fast path.
//
// Note: Unescape runs before the ValueDecoder, so the decoder receives
// Note: Unescape runs before the ValueMapper, so the mapper receives
// resolved byte values. Charset escape sequences (\C..\ and \M..\) are passed
// through verbatim by Unescape; a sophisticated ValueDecoder may parse them,
// but a simple byte-level decoder will treat them as-is.
type ValueDecoder func(data []byte) ([]byte, error)
// through verbatim by Unescape; a sophisticated ValueMapper may parse them,
// but a simple byte-level mapper will treat them as-is.
type ValueMapper func(data []byte) ([]byte, error)

// DecodeString returns the unescaped, charset-decoded string value of the
// Value. Unescape runs first; if decode is nil the unescaped bytes are cast
// to string with no further allocation.
//
// DecodeString is promoted to Field, Repetition, Component, and Subcomponent
// via their embedded Value.
func (v Value) DecodeString(decode ValueDecoder) (string, error) {
func (v Value) DecodeString(decode ValueMapper) (string, error) {
unescaped := Unescape(v.raw, v.delims)
if decode == nil {
return string(unescaped), nil
Expand Down
14 changes: 7 additions & 7 deletions charset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
"testing"
)

// latin1ToUTF8 is a test-only ValueDecoder that converts Latin-1 (ISO 8859-1)
// latin1ToUTF8 is a test-only ValueMapper that converts Latin-1 (ISO 8859-1)
// bytes to UTF-8. Each byte in the range 0x80–0xFF is expanded to its UTF-8
// two-byte sequence; bytes below 0x80 are ASCII and pass through unchanged.
func latin1ToUTF8(data []byte) ([]byte, error) {
Expand Down Expand Up @@ -55,7 +55,7 @@ func TestValueDecodeString(t *testing.T) {
tests := []struct {
name string
raw []byte
decode ValueDecoder
decode ValueMapper
want string
wantErr bool
}{
Expand Down Expand Up @@ -142,7 +142,7 @@ func TestFieldDecodeString(t *testing.T) {
tests := []struct {
name string
raw []byte
decode ValueDecoder
decode ValueMapper
want string
wantErr bool
}{
Expand Down Expand Up @@ -205,7 +205,7 @@ func TestRepetitionDecodeString(t *testing.T) {
tests := []struct {
name string
raw []byte
decode ValueDecoder
decode ValueMapper
want string
wantErr bool
}{
Expand Down Expand Up @@ -262,7 +262,7 @@ func TestComponentDecodeString(t *testing.T) {
tests := []struct {
name string
raw []byte
decode ValueDecoder
decode ValueMapper
want string
wantErr bool
}{
Expand Down Expand Up @@ -319,7 +319,7 @@ func TestSubcomponentDecodeString(t *testing.T) {
tests := []struct {
name string
raw []byte
decode ValueDecoder
decode ValueMapper
want string
wantErr bool
}{
Expand Down Expand Up @@ -370,7 +370,7 @@ func TestSubcomponentDecodeString(t *testing.T) {
}

// TestDecodeStringUnescapeFirst verifies that Unescape runs before the
// ValueDecoder — a field containing \F\ (the escape sequence for the field
// ValueMapper — a field containing \F\ (the escape sequence for the field
// separator) gives the decoder a literal | byte, not the raw escape bytes.
func TestDecodeStringUnescapeFirst(t *testing.T) {
delims := DefaultDelimiters()
Expand Down
13 changes: 9 additions & 4 deletions doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
// # Character Set Decoding
//
// Field values in non-UTF-8 charsets (e.g. Latin-1 / ISO 8859-1 declared in
// MSH-18) can be decoded with DecodeString. A ValueDecoder is a
// MSH-18) can be decoded with DecodeString. A ValueMapper is a
// func([]byte) ([]byte, error) that converts post-unescape bytes to UTF-8.
// DecodeString is available on Value, Field, Repetition, Component, and
// Subcomponent:
Expand All @@ -69,9 +69,9 @@
// family, err := seg.Field(5).Rep(0).Component(1).DecodeString(decode)
//
// When decode is nil, DecodeString behaves identically to String() with no
// extra allocation. Unescape always runs before the decoder. The \C..\ and
// extra allocation. Unescape always runs before the mapper. The \C..\ and
// \M..\ charset escape sequences are passed through verbatim by Unescape; a
// sophisticated ValueDecoder may parse them, but a simple byte-level decoder
// sophisticated ValueMapper may parse them, but a simple byte-level mapper
// will treat them as-is.
//
// # Hierarchical Traversal
Expand Down Expand Up @@ -127,11 +127,16 @@
// hl7.Omit("PID-19"),
// hl7.Move("PID-4", "PID-3"),
// hl7.Copy("PID-4", "PID-3"),
// hl7.MapValue("PID-5.1", upperMapper),
// hl7.MapAllValues(latin1ToUTF8Mapper),
// )
//
// Field-level change constructors: Replace sets a value, Null sets the HL7
// null (""), Omit clears a field, Move copies src to dst then clears src,
// and Copy copies src to dst while preserving the source.
// Copy copies src to dst while preserving the source, MapValue applies a
// ValueMapper to a single location, and MapAllValues applies a ValueMapper to
// every leaf in the message (except MSH-1 and MSH-2). The mapper receives
// unescaped bytes; its output is re-escaped before storage.
//
// Segment-level change constructors add, remove, or reorder entire segments:
//
Expand Down
169 changes: 169 additions & 0 deletions transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,17 @@ type copyChange struct {

func (copyChange) applyChange() {}

type mapAllValuesChange struct{ mapper ValueMapper }

func (mapAllValuesChange) applyChange() {}

type mapValueChange struct {
location string
mapper ValueMapper
}

func (mapValueChange) applyChange() {}

// Replace returns a Change that sets the value at location to the given string.
// The value is plain text and will be escaped for the output delimiter set.
func Replace(location, value string) Change { return replaceChange{location, value} }
Expand Down Expand Up @@ -144,6 +155,23 @@ func InsertSegmentBefore(beforeType string, beforeIndex int, newType string) Cha
// at the end of the message. This change never returns an error.
func AppendSegment(newType string) Change { return appendSegmentChange{newType} }

// MapAllValues returns a Change that applies mapper to every leaf value in the
// message, including empty and null ("") leaves. MSH-1 and MSH-2
// (delimiter definition fields) are never modified.
//
// The mapper receives unescaped field-content bytes; its output is escaped
// before storage. If the mapper returns an error for any value, Transform
// returns that error and the message is not modified.
func MapAllValues(mapper ValueMapper) Change { return mapAllValuesChange{mapper} }

// MapValue returns a Change that applies mapper to the value at the given
// terser-style location. The mapper receives unescaped bytes at that location
// (nil if the location is absent); its output is escaped before storage.
// If the mapper returns an error, Transform returns that error.
func MapValue(location string, mapper ValueMapper) Change {
return mapValueChange{location, mapper}
}

// Transform applies the given changes to the message and returns a new Message.
// The original message is not modified. The output uses the same delimiters as the source.
func (m *Message) Transform(changes ...Change) (*Message, error) {
Expand Down Expand Up @@ -681,6 +709,21 @@ func applyOneChange(w *workBuf, delims Delimiters, c Change) error {

case appendSegmentChange:
w.insertBefore(len(w.segs), []byte(ch.newType))

case mapAllValuesChange:
return applyMapAllValues(w, delims, ch.mapper)

case mapValueChange:
loc, err := validateLocation(ch.location)
if err != nil {
return err
}
raw := readValueAtLocation(w, delims, loc)
newRaw, err := mapLeafValue(raw, delims, ch.mapper)
if err != nil {
return err
}
applyValueAtLocation(w, delims, loc, newRaw)
}
return nil
}
Expand Down Expand Up @@ -905,6 +948,132 @@ func reencodeData(data []byte, src, dst Delimiters) []byte {
return out
}

// applyMapAllValues applies mapper to every leaf (subcomponent-level) value in
// the message. MSH-1 and MSH-2 are never touched. The mapper receives
// unescaped bytes and its result is re-escaped before storage.
func applyMapAllValues(w *workBuf, delims Delimiters, mapper ValueMapper) error {
for segIdx := range w.segs {
segRaw := w.segBytes(segIdx)
isMSH := len(segRaw) >= 3 && segRaw[0] == 'M' && segRaw[1] == 'S' && segRaw[2] == 'H'

// MSH fields start at 3 (skip MSH-1 and MSH-2).
firstField := 1
if isMSH {
firstField = 3
}

for fieldNum := firstField; ; fieldNum++ {
// Re-read segment bytes on each iteration because replaceField may
// reallocate w.data and shift segment offsets.
segRaw = w.segBytes(segIdx)
raw := readFieldBytes(segRaw, delims, fieldNum)
if raw == nil {
break
}

newRaw, err := mapFieldValues(raw, delims, mapper)
if err != nil {
return err
}
if !bytes.Equal(raw, newRaw) {
w.replaceField(segIdx, delims, fieldNum, newRaw)
}
}
}
return nil
}

// mapFieldValues applies mapper to every leaf within a field's raw bytes.
// The field may contain repetitions, each of which may contain components, etc.
// Returns raw unchanged (no allocation) if the mapper is a no-op for all leaves.
func mapFieldValues(raw []byte, delims Delimiters, mapper ValueMapper) ([]byte, error) {
n := countDelimited(raw, delims.Repetition)
if n == 1 {
return mapRepValues(raw, delims, mapper)
}
return mapDelimited(raw, delims.Repetition, func(piece []byte) ([]byte, error) {
return mapRepValues(piece, delims, mapper)
})
}

// mapRepValues applies mapper to every leaf within a single repetition's bytes.
// Returns raw unchanged (no allocation) if the mapper is a no-op for all leaves.
func mapRepValues(raw []byte, delims Delimiters, mapper ValueMapper) ([]byte, error) {
n := countDelimited(raw, delims.Component)
if n == 1 {
return mapComponentValues(raw, delims, mapper)
}
return mapDelimited(raw, delims.Component, func(piece []byte) ([]byte, error) {
return mapComponentValues(piece, delims, mapper)
})
}

// mapComponentValues applies mapper to every leaf within a single component's bytes.
// Returns raw unchanged (no allocation) if the mapper is a no-op for all leaves.
func mapComponentValues(raw []byte, delims Delimiters, mapper ValueMapper) ([]byte, error) {
n := countDelimited(raw, delims.SubComponent)
if n == 1 {
return mapLeafValue(raw, delims, mapper)
}
return mapDelimited(raw, delims.SubComponent, func(piece []byte) ([]byte, error) {
return mapLeafValue(piece, delims, mapper)
})
}

// mapDelimited applies fn to each delimited piece of raw.
// Uses lazy allocation: returns raw unchanged if fn is a no-op for every piece.
// When a change is detected at index i, all prior unchanged pieces are copied
// into the output before appending the changed piece and any remainder.
func mapDelimited(raw []byte, sep byte, fn func([]byte) ([]byte, error)) ([]byte, error) {
n := countDelimited(raw, sep)
var out []byte

for i := range n {
piece := nthSlice(raw, sep, i)
mapped, err := fn(piece)
if err != nil {
return nil, err
}

if out == nil && bytes.Equal(piece, mapped) {
continue // unchanged so far — no allocation
}

if out == nil {
// First change at index i: retroactively copy pieces 0..i-1,
// then add the separator that precedes this changed piece.
out = make([]byte, 0, len(raw))
for j := range i {
if j > 0 {
out = append(out, sep)
}
out = append(out, nthSlice(raw, sep, j)...)
}
if i > 0 {
out = append(out, sep)
}
} else {
out = append(out, sep)
}
out = append(out, mapped...)
}

if out == nil {
return raw, nil // nothing changed
}
return out, nil
}

// mapLeafValue unescapes raw, applies mapper, and re-escapes the result.
func mapLeafValue(raw []byte, delims Delimiters, mapper ValueMapper) ([]byte, error) {
unescaped := Unescape(raw, delims)
mapped, err := mapper(unescaped)
if err != nil {
return nil, err
}
return Escape(mapped, delims), nil
}

// appendMaybeEscaped appends a literal byte to out, escaping it if it
// matches a destination delimiter character.
func appendMaybeEscaped(out []byte, b byte, dst Delimiters) []byte {
Expand Down
Loading
Loading