Skip to content

Commit 4eb4cc3

Browse files
chore: support different EOLs in streaming (#11)
1 parent 74f6a63 commit 4eb4cc3

File tree

4 files changed

+63
-20
lines changed

4 files changed

+63
-20
lines changed

lib/finch-api/util.rb

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -577,15 +577,29 @@ class << self
577577
#
578578
# @return [Enumerable]
579579
#
580-
def enum_lines(enum)
580+
def decode_lines(enum)
581+
re = /(\r\n|\r|\n)/
582+
buffer = String.new.b
583+
cr_seen = nil
584+
581585
chain_fused(enum) do |y|
582-
buffer = String.new
583586
enum.each do |row|
584587
buffer << row
585-
while (idx = buffer.index("\n"))
586-
y << buffer.slice!(..idx)
588+
while (match = re.match(buffer, cr_seen.to_i))
589+
case [match.captures.first, cr_seen]
590+
in ["\r", nil]
591+
cr_seen = match.end(1)
592+
next
593+
in ["\r" | "\r\n", Integer]
594+
y << buffer.slice!(..(cr_seen.pred))
595+
else
596+
y << buffer.slice!(..(match.end(1).pred))
597+
end
598+
cr_seen = nil
587599
end
588600
end
601+
602+
y << buffer.slice!(..(cr_seen.pred)) unless cr_seen.nil?
589603
y << buffer unless buffer.empty?
590604
end
591605
end
@@ -598,26 +612,26 @@ def enum_lines(enum)
598612
#
599613
# @return [Hash{Symbol=>Object}]
600614
#
601-
def parse_sse(lines)
615+
def decode_sse(lines)
602616
chain_fused(lines) do |y|
603617
blank = {event: nil, data: nil, id: nil, retry: nil}
604618
current = {}
605619

606620
lines.each do |line|
607-
case line.strip
621+
case line.sub(/\R$/, "")
608622
in ""
609623
next if current.empty?
610624
y << {**blank, **current}
611625
current = {}
612626
in /^:/
613627
next
614628
in /^([^:]+):\s?(.*)$/
615-
_, field, value = Regexp.last_match.to_a
629+
field, value = Regexp.last_match.captures
616630
case field
617631
in "event"
618632
current.merge!(event: value)
619633
in "data"
620-
(current[:data] ||= String.new) << (value << "\n")
634+
(current[:data] ||= String.new.b) << value << "\n"
621635
in "id" unless value.include?("\0")
622636
current.merge!(id: value)
623637
in "retry" if /^\d+$/ =~ value

rbi/lib/finch-api/util.rbi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,11 @@ module FinchAPI
157157
end
158158

159159
sig { params(enum: T::Enumerable[String]).returns(T::Enumerable[String]) }
160-
def self.enum_lines(enum)
160+
def self.decode_lines(enum)
161161
end
162162

163163
sig { params(lines: T::Enumerable[String]).returns(FinchAPI::Util::SSEMessage) }
164-
def self.parse_sse(lines)
164+
def self.decode_sse(lines)
165165
end
166166
end
167167
end

sig/finch-api/util.rbs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,9 @@ module FinchAPI
100100
type sse_message =
101101
{ event: String?, data: String?, id: String?, retry: Integer? }
102102

103-
def self?.enum_lines: (Enumerable[String] enum) -> Enumerable[String]
103+
def self?.decode_lines: (Enumerable[String] enum) -> Enumerable[String]
104104

105-
def self?.parse_sse: (
105+
def self?.decode_sse: (
106106
Enumerable[String] lines
107107
) -> FinchAPI::Util::sse_message
108108
end

test/finch-api/util_test.rb

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,8 @@ def test_close_fused_sse_chain
320320
.map(&:to_s)
321321

322322
fused_1 = FinchAPI::Util.fused_enum(enum)
323-
fused_2 = FinchAPI::Util.enum_lines(fused_1)
324-
fused_3 = FinchAPI::Util.parse_sse(fused_2)
323+
fused_2 = FinchAPI::Util.decode_lines(fused_1)
324+
fused_3 = FinchAPI::Util.decode_sse(fused_2)
325325

326326
assert_equal(0, taken)
327327
FinchAPI::Util.close_fused!(fused_3)
@@ -330,7 +330,7 @@ def test_close_fused_sse_chain
330330
end
331331

332332
class FinchAPI::Test::UtilSseTest < Minitest::Test
333-
def test_enum_lines
333+
def test_decode_lines
334334
cases = {
335335
%w[] => %w[],
336336
%W[\n\n] => %W[\n \n],
@@ -340,15 +340,37 @@ def test_enum_lines
340340
%W[a\nb\n] => %W[a\n b\n],
341341
%W[\na b\n] => %W[\n ab\n],
342342
%W[\na b\n\n] => %W[\n ab\n \n],
343-
%W[\na b] => %W[\n ab]
343+
%W[\na b] => %W[\n ab],
344+
%W[\u1F62E\u200D\u1F4A8] => %W[\u1F62E\u200D\u1F4A8],
345+
%W[\u1F62E \u200D \u1F4A8] => %W[\u1F62E\u200D\u1F4A8]
344346
}
347+
eols = %W[\n \r \r\n]
345348
cases.each do |enum, expected|
346-
lines = FinchAPI::Util.enum_lines(enum)
349+
eols.each do |eol|
350+
lines = FinchAPI::Util.decode_lines(enum.map { _1.gsub("\n", eol) })
351+
assert_equal(expected.map { _1.gsub("\n", eol) }, lines.to_a, "eol=#{JSON.generate(eol)}")
352+
end
353+
end
354+
end
355+
356+
def test_mixed_decode_lines
357+
cases = {
358+
%w[] => %w[],
359+
%W[\r\r] => %W[\r \r],
360+
%W[\r \r] => %W[\r \r],
361+
%W[\r\r\r] => %W[\r \r \r],
362+
%W[\r\r \r] => %W[\r \r \r],
363+
%W[\r \n] => %W[\r\n],
364+
%W[\r\r\n] => %W[\r \r\n],
365+
%W[\n\r] => %W[\n \r]
366+
}
367+
cases.each do |enum, expected|
368+
lines = FinchAPI::Util.decode_lines(enum)
347369
assert_equal(expected, lines.to_a)
348370
end
349371
end
350372

351-
def test_parse_sse
373+
def test_decode_sse
352374
cases = {
353375
"empty input" => {
354376
[] => []
@@ -372,8 +394,8 @@ def test_parse_sse
372394
},
373395
"complete event" => {
374396
[
375-
"event: update\n",
376397
"id: 123\n",
398+
"event: update\n",
377399
"data: hello world\n",
378400
"retry: 5000\n",
379401
"\n"
@@ -454,12 +476,19 @@ def test_parse_sse
454476
{data: "first\n"},
455477
{data: "second\n"}
456478
]
479+
},
480+
"multibyte unicode" => {
481+
[
482+
"data: \u1F62E\u200D\u1F4A8\n"
483+
] => [
484+
{data: "\u1F62E\u200D\u1F4A8\n"}
485+
]
457486
}
458487
}
459488

460489
cases.each do |name, test_cases|
461490
test_cases.each do |input, expected|
462-
actual = FinchAPI::Util.parse_sse(input).map(&:compact)
491+
actual = FinchAPI::Util.decode_sse(input).map(&:compact)
463492
assert_equal(expected, actual, name)
464493
end
465494
end

0 commit comments

Comments
 (0)