From b3ff745618c90eae59e8edf10c3ce1e846269b09 Mon Sep 17 00:00:00 2001 From: Varun Chawla Date: Fri, 13 Feb 2026 00:52:11 -0800 Subject: [PATCH] Fix Reader.peek() IndexError when index exceeds buffer length When peek() is called with an index beyond the available data, update() may not be able to add enough data to the buffer (e.g. for string inputs where raw_buffer is None). The second buffer access then raises an IndexError instead of returning the '\0' sentinel. Add a bounds check after update() so that peek() returns '\0' when the requested position is past the end of the buffer, consistent with how the null terminator is used throughout the parser. Fixes #904 --- lib/yaml/reader.py | 4 ++- tests/test_reader_peek.py | 56 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 tests/test_reader_peek.py diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py index 774b0219b..d5a6b2893 100644 --- a/lib/yaml/reader.py +++ b/lib/yaml/reader.py @@ -89,7 +89,9 @@ def peek(self, index=0): return self.buffer[self.pointer+index] except IndexError: self.update(index+1) - return self.buffer[self.pointer+index] + if self.pointer+index < len(self.buffer): + return self.buffer[self.pointer+index] + return '\0' def prefix(self, length=1): if self.pointer+length >= len(self.buffer): diff --git a/tests/test_reader_peek.py b/tests/test_reader_peek.py new file mode 100644 index 000000000..28d1d43ac --- /dev/null +++ b/tests/test_reader_peek.py @@ -0,0 +1,56 @@ +import io + +import yaml +import yaml.reader + + +class TestReaderPeekBeyondBuffer: + """Tests for Reader.peek() when index exceeds available data. + + Regression test for https://github.com/yaml/pyyaml/issues/904 + """ + + def test_peek_beyond_end_returns_null(self): + """peek() past end of a string stream should return '\\0', not raise IndexError.""" + reader = yaml.reader.Reader('abc') + assert reader.peek(4) == '\0' + + def test_peek_at_null_terminator(self): + """peek() at the exact position of the null terminator should return '\\0'.""" + reader = yaml.reader.Reader('abc') + # buffer is 'abc\0', so index 3 is the '\0' + assert reader.peek(3) == '\0' + + def test_peek_far_beyond_end(self): + """peek() with a very large index should return '\\0'.""" + reader = yaml.reader.Reader('abc') + assert reader.peek(100) == '\0' + + def test_peek_within_range(self): + """peek() within range should still return the correct character.""" + reader = yaml.reader.Reader('abc') + assert reader.peek(0) == 'a' + assert reader.peek(1) == 'b' + assert reader.peek(2) == 'c' + + def test_peek_empty_string(self): + """peek() on an empty string should return '\\0'.""" + reader = yaml.reader.Reader('') + assert reader.peek(0) == '\0' + assert reader.peek(1) == '\0' + + def test_peek_beyond_end_bytes(self): + """peek() past end of a bytes stream should return '\\0'.""" + reader = yaml.reader.Reader(b'abc') + assert reader.peek(4) == '\0' + + def test_peek_beyond_end_file_stream(self): + """peek() past end of a file-like stream should return '\\0'.""" + reader = yaml.reader.Reader(io.StringIO('abc')) + assert reader.peek(4) == '\0' + + def test_loader_peek_beyond_end(self): + """Original reproducer from issue #904.""" + obj = yaml.loader.Loader('abc') + ret = obj.peek(4) + assert ret == '\0'