From 42e0660be889feb52dd56cb7907602719da3710a Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 7 Apr 2026 09:44:32 +0200 Subject: [PATCH 1/4] string.c: str_subseq propagate coderange when it is 7bit Any substring of a 7bit string will be 7bit. --- string.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/string.c b/string.c index ef0e3864ea31dd..d512717dfe672a 100644 --- a/string.c +++ b/string.c @@ -3140,6 +3140,9 @@ str_subseq(VALUE str, long beg, long len) const int termlen = TERM_LEN(str); if (!SHARABLE_SUBSTRING_P(beg, len, RSTRING_LEN(str))) { str2 = rb_enc_str_new(RSTRING_PTR(str) + beg, len, rb_str_enc_get(str)); + if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) { + ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT); + } RB_GC_GUARD(str); return str2; } @@ -3152,12 +3155,19 @@ str_subseq(VALUE str, long beg, long len) TERM_FILL(ptr2+len, termlen); STR_SET_LEN(str2, len); + if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) { + ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT); + } + RB_GC_GUARD(str); } else { str_replace_shared(str2, str); RUBY_ASSERT(!STR_EMBED_P(str2)); - ENC_CODERANGE_CLEAR(str2); + if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) { + ENC_CODERANGE_CLEAR(str2); + } + RSTRING(str2)->as.heap.ptr += beg; if (RSTRING_LEN(str2) > len) { STR_SET_LEN(str2, len); From 13926009ba829f42037719b9a163a41a8d67abed Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 7 Apr 2026 16:57:55 +0900 Subject: [PATCH 2/4] [Bug #21983] Make `&nil` distinguishable from nonexistent block arg --- ast.c | 2 +- test/ruby/test_ast.rb | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ast.c b/ast.c index b22330f6b26a9f..1ddc2b5791550e 100644 --- a/ast.c +++ b/ast.c @@ -698,7 +698,7 @@ node_children(VALUE ast_value, const NODE *node) : var_name(ainfo->rest_arg)), (ainfo->no_kwarg ? Qfalse : NEW_CHILD(ast_value, (NODE *)ainfo->kw_args)), (ainfo->no_kwarg ? Qfalse : NEW_CHILD(ast_value, ainfo->kw_rest_arg)), - var_name(ainfo->block_arg)); + (ainfo->no_blockarg ? Qfalse : var_name(ainfo->block_arg))); } case NODE_SCOPE: { diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb index 6d3999a32aaa16..fe0402cd09f509 100644 --- a/test/ruby/test_ast.rb +++ b/test/ruby/test_ast.rb @@ -720,6 +720,7 @@ def test_block_arg assert_equal(nil, block_arg.call('')) assert_equal(:block, block_arg.call('&block')) assert_equal(:&, block_arg.call('&')) + assert_equal(false, block_arg.call('&nil')) end def test_keyword_rest From fcd210086c82c4e4f2835561d7f7ce81e9edf1c5 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 7 Apr 2026 09:27:54 +0200 Subject: [PATCH 3/4] Allow sharing middlesubstring if the terminator is present Sharing middle substrings have been behind a compilation flags for a very long time, and it's unclear if we'll ever make it the default. However, we can still share middle substrings without breaking the zero-terminated contract if the source string happens to have the necessary NUL characters at that specific offset. This is the case for some file formats such as FlatBuffers, BSON keys and a few others. --- string.c | 34 ++++++++++++++++++++-------------- test/ruby/test_string.rb | 21 +++++++++++++++++++++ 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/string.c b/string.c index d512717dfe672a..106e6562eaf390 100644 --- a/string.c +++ b/string.c @@ -198,15 +198,30 @@ VALUE rb_cSymbol; #define STR_ENC_GET(str) get_encoding(str) +static inline bool +zero_filled(const char *s, int n) +{ + for (; n > 0; --n) { + if (*s++) return false; + } + return true; +} + #if !defined SHARABLE_MIDDLE_SUBSTRING # define SHARABLE_MIDDLE_SUBSTRING 0 #endif -#if !SHARABLE_MIDDLE_SUBSTRING -#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end)) + +static inline bool +SHARABLE_SUBSTRING_P(VALUE str, long beg, long len) +{ +#if SHARABLE_MIDDLE_SUBSTRING + return true; #else -#define SHARABLE_SUBSTRING_P(beg, len, end) 1 + long end = beg + len; + long source_len = RSTRING_LEN(str); + return end == source_len || zero_filled(RSTRING_PTR(str) + end, TERM_LEN(str)); #endif - +} static inline long str_embed_capa(VALUE str) @@ -2810,15 +2825,6 @@ rb_string_value_ptr(volatile VALUE *ptr) return RSTRING_PTR(str); } -static int -zero_filled(const char *s, int n) -{ - for (; n > 0; --n) { - if (*s++) return 0; - } - return 1; -} - static const char * str_null_char(const char *s, long len, const int minlen, rb_encoding *enc) { @@ -3138,7 +3144,7 @@ str_subseq(VALUE str, long beg, long len) RUBY_ASSERT(beg+len <= RSTRING_LEN(str)); const int termlen = TERM_LEN(str); - if (!SHARABLE_SUBSTRING_P(beg, len, RSTRING_LEN(str))) { + if (!SHARABLE_SUBSTRING_P(str, beg, len)) { str2 = rb_enc_str_new(RSTRING_PTR(str) + beg, len, rb_str_enc_get(str)); if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) { ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT); diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 0c211bbd7f3c1e..f2aade2aa1a1ec 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -3461,6 +3461,27 @@ def test_byteslice assert_equal(false, ("\u3042"*10).byteslice(0, 20).valid_encoding?, bug7954) end + def test_shared_middle_string_terminator + ten = "0123456789" + hundred = ten * 10 + str = "#{hundred}\0#{hundred}".freeze + + require 'objspace' + + substr = str.byteslice(0, hundred.bytesize) + assert_equal hundred, substr + assert_includes ObjectSpace.dump(substr), ' "shared":true,' + + # Larger terminator + substr.force_encoding(Encoding::UTF_16BE) + assert_equal hundred.dup.force_encoding(Encoding::UTF_16BE), substr + refute_includes ObjectSpace.dump(substr), ' "shared":true,' + + substr = str.byteslice(0, hundred.bytesize + 1) + assert_equal hundred + "\0", substr + refute_includes ObjectSpace.dump(substr), ' "shared":true,' + end + def test_unknown_string_option str = nil assert_nothing_raised(SyntaxError) do From 2fda295cc9ad71ac91b40241acfb7fc98173278a Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 7 Apr 2026 05:24:43 -0500 Subject: [PATCH 4/4] [DOC] Doc for File::Stat --- file.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/file.c b/file.c index e40f67ec73817a..6d715269f849cf 100644 --- a/file.c +++ b/file.c @@ -5945,13 +5945,48 @@ rb_f_test(int argc, VALUE *argv, VALUE _) /* * Document-class: File::Stat * - * Objects of class File::Stat encapsulate common status information - * for File objects. The information is recorded at the moment the - * File::Stat object is created; changes made to the file after that - * point will not be reflected. File::Stat objects are returned by - * IO#stat, File::stat, File#lstat, and File::lstat. Many of these - * methods return platform-specific values, and not all values are - * meaningful on all systems. See also Kernel#test. + * A \File::Stat object contains information about an entry in the file system. + * + * Each of these methods returns a new \File::Stat object: + * + * - File#lstat. + * - File::Stat.new. + * - File::lstat. + * - File::stat. + * - IO#stat. + * + * === Snapshot + * + * A new \File::Stat object takes an immediate "snapshot" of the entry's information; + * the captured information is never updated, + * regardless of changes in the actual entry: + * + * The entry must exist when File::Stat.new is called: + * + * filepath = 't.tmp' + * File.exist?(filepath) # => false + * File::Stat.new(filepath) # Raises Errno::ENOENT: No such file or directory. + * File.write(filepath, 'foo') # Create the file. + * stat = File::Stat.new(filepath) # Okay. + * + * Later changes to the actual entry do not change the \File::Stat object: + * + * File.atime(filepath) # => 2026-04-01 11:51:38.0014518 -0500 + * stat.atime # => 2026-04-01 11:51:38.0014518 -0500 + * File.write(filepath, 'bar') + * File.atime(filepath) # => 2026-04-01 11:58:11.922614 -0500 + * stat.atime # => 2026-04-01 11:51:38.0014518 -0500 + * File.delete(filepath) + stat.atime # => 2026-04-01 11:51:38.0014518 -0500 + * + * === OS-Dependencies + * + * Methods in a \File::Stat object may return platform-dependents values, + * and not all values are meaningful on all systems; + * for example, File::Stat#blocks returns +nil+ on Windows, + * but returns an integer on Linux. + * + * See also Kernel#test. */ static VALUE