From 4ad44e52ae7ba1ef49bd96ac6e0ccaad68096700 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Thu, 18 Jun 2026 20:33:37 -0500 Subject: [PATCH 01/13] [ruby/json] Update the json_minefield_parser_test.rb to remove JRUBY_PENDING as the new JRuby parser successfully parsed those files. https://github.com/ruby/json/commit/e954a6fe4e --- test/json/json_minefield_parser_test.rb | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/test/json/json_minefield_parser_test.rb b/test/json/json_minefield_parser_test.rb index 71590325573edf..e6dcb54b8d341b 100644 --- a/test/json/json_minefield_parser_test.rb +++ b/test/json/json_minefield_parser_test.rb @@ -11,22 +11,10 @@ class << self private def define_test(name, &block) - if RUBY_ENGINE == 'jruby' && JRUBY_PENDING.include?(name) - define_method("test_#{name}") do - pend("#{name} doesn't pass on JRuby", &block) - end - else - define_method("test_#{name}", &block) - end + define_method("test_#{name}", &block) end end - JRUBY_PENDING = %w( - n_structure_open_array_object - n_structure_100000_opening_arrays - n_object_trailing_comment_slash_open - ).freeze - INVALID_ENCODING_TESTS = %w( i_string_truncated-utf-8 i_string_overlong_sequence_6_bytes_null From 57d5247f91b63d4c915ed94adae104eace1d0953 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 19 Jun 2026 14:39:51 +0900 Subject: [PATCH 02/13] [ruby/mmtk] Call rb_memerror when OOM https://github.com/ruby/mmtk/commit/6258cfa315 --- gc/mmtk/mmtk.c | 5 +++++ gc/mmtk/src/collection.rs | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c index a725432c6e7c3a..0e49fbc6f4f12e 100644 --- a/gc/mmtk/mmtk.c +++ b/gc/mmtk/mmtk.c @@ -909,6 +909,11 @@ rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags VALUE *alloc_obj = (VALUE *)rb_mmtk_alloc_fast_path(objspace, ractor_cache, alloc_size, MMTk_MIN_OBJ_ALIGN); if (!alloc_obj) { alloc_obj = mmtk_alloc(ractor_cache->mutator, alloc_size, MMTk_MIN_OBJ_ALIGN, 0, MMTK_ALLOCATION_SEMANTICS_DEFAULT); + + // On heap exhaustion raise NoMemoryError. + if (RB_UNLIKELY(alloc_obj == NULL)) { + rb_memerror(); + } } alloc_obj++; diff --git a/gc/mmtk/src/collection.rs b/gc/mmtk/src/collection.rs index 648efa4e274783..81b39737fba057 100644 --- a/gc/mmtk/src/collection.rs +++ b/gc/mmtk/src/collection.rs @@ -9,6 +9,7 @@ use crate::upcalls; use crate::Ruby; use mmtk::memory_manager; use mmtk::scheduler::*; +use mmtk::util::alloc::AllocationError; use mmtk::util::heap::GCTriggerPolicy; use mmtk::util::VMMutatorThread; use mmtk::util::VMThread; @@ -63,6 +64,19 @@ impl Collection for VMCollection { (upcalls().block_for_gc)(tls); } + fn out_of_memory(_tls: VMThread, err_kind: AllocationError) { + match err_kind { + // The heap is exhausted and could not be grown. Return normally + // without aborting. + AllocationError::HeapOutOfMemory => {} + // The OS refused an mmap. This is unrecoverable, so abort the + // process via the same panic handler used for GC-thread panics. + AllocationError::MmapOutOfMemory => { + (upcalls().mutator_thread_panic_handler)(); + } + } + } + fn spawn_gc_thread(_tls: VMThread, ctx: GCThreadContext) { let join_handle = match ctx { GCThreadContext::Worker(mut worker) => thread::Builder::new() From ebd2fcf9aa138174d942165c832b83ba8e836527 Mon Sep 17 00:00:00 2001 From: Fletcher Dares Date: Thu, 18 Jun 2026 18:50:46 -0400 Subject: [PATCH 03/13] Test ASCII-8BIT case mapping byte parity --- test/ruby/enc/test_case_mapping.rb | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb index a7d1ed0d1663f8..27567bb0b4bfe9 100644 --- a/test/ruby/enc/test_case_mapping.rb +++ b/test/ruby/enc/test_case_mapping.rb @@ -60,6 +60,31 @@ def test_ascii check_swapcase_properties 'yUKIHIRO matsumoto (MAtz)', 'Yukihiro MATSUMOTO (maTZ)' end + def test_ascii_8bit_case_mapping_all_bytes + bytes = (0..255).to_a.pack("C*") + upper_ascii = "A".ord.."Z".ord + lower_ascii = "a".ord.."z".ord + downcase_expected = (0..255).map {|byte| upper_ascii.cover?(byte) ? byte + 32 : byte }.pack("C*") + upcase_expected = (0..255).map {|byte| lower_ascii.cover?(byte) ? byte - 32 : byte }.pack("C*") + + assert_equal Encoding::ASCII_8BIT, bytes.encoding + assert_equal downcase_expected, bytes.downcase + assert_equal downcase_expected, bytes.downcase(:fold) + assert_equal upcase_expected, bytes.upcase + + downcased = bytes.dup + assert_same downcased, downcased.downcase! + assert_equal downcase_expected, downcased + + folded = bytes.dup + assert_same folded, folded.downcase!(:fold) + assert_equal downcase_expected, folded + + upcased = bytes.dup + assert_same upcased, upcased.upcase! + assert_equal upcase_expected, upcased + end + def test_invalid assert_raise(ArgumentError, "Should not be possible to upcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').upcase } assert_raise(ArgumentError, "Should not be possible to downcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').downcase } From 51d4ad5f9a5da6f602e5d106b627603761a30576 Mon Sep 17 00:00:00 2001 From: Fletcher Dares Date: Thu, 18 Jun 2026 18:50:47 -0400 Subject: [PATCH 04/13] Fast path ASCII-8BIT case mapping --- benchmark/string_downcase.yml | 8 ++++++++ benchmark/string_upcase.yml | 8 ++++++++ string.c | 3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/benchmark/string_downcase.yml b/benchmark/string_downcase.yml index 1fea6afbecdf52..c5cd94606dec7b 100644 --- a/benchmark/string_downcase.yml +++ b/benchmark/string_downcase.yml @@ -7,6 +7,10 @@ prelude: | nonascii10 = nonascii1 * 10 nonascii100 = nonascii10 * 10 nonascii1000 = nonascii100 * 10 + ascii8bit256 = (0..255).to_a.pack("C*") + ascii8bit2560 = ascii8bit256 * 10 + ascii8bit25600 = ascii8bit2560 * 10 + ascii8bit256000 = ascii8bit25600 * 10 benchmark: downcase-1: str1.upcase downcase-10: str10.upcase @@ -16,3 +20,7 @@ benchmark: downcase-nonascii10: nonascii10.downcase downcase-nonascii100: nonascii100.downcase downcase-nonascii1000: nonascii1000.downcase + downcase-ascii8bit-256: ascii8bit256.downcase + downcase-ascii8bit-2560: ascii8bit2560.downcase + downcase-ascii8bit-25600: ascii8bit25600.downcase + downcase-ascii8bit-256000: ascii8bit256000.downcase diff --git a/benchmark/string_upcase.yml b/benchmark/string_upcase.yml index dab84bbde23ebd..d2c5f4960b3804 100644 --- a/benchmark/string_upcase.yml +++ b/benchmark/string_upcase.yml @@ -7,6 +7,10 @@ prelude: | nonascii10 = nonascii1 * 10 nonascii100 = nonascii10 * 10 nonascii1000 = nonascii100 * 10 + ascii8bit256 = (0..255).to_a.pack("C*") + ascii8bit2560 = ascii8bit256 * 10 + ascii8bit25600 = ascii8bit2560 * 10 + ascii8bit256000 = ascii8bit25600 * 10 benchmark: upcase-1: str1.upcase upcase-10: str10.upcase @@ -16,3 +20,7 @@ benchmark: upcase-nonascii10: nonascii10.upcase upcase-nonascii100: nonascii100.upcase upcase-nonascii1000: nonascii1000.upcase + upcase-ascii8bit-256: ascii8bit256.upcase + upcase-ascii8bit-2560: ascii8bit2560.upcase + upcase-ascii8bit-25600: ascii8bit25600.upcase + upcase-ascii8bit-256000: ascii8bit256000.upcase diff --git a/string.c b/string.c index 5c6151294d506e..408f899edbad1c 100644 --- a/string.c +++ b/string.c @@ -7836,7 +7836,8 @@ case_option_single_p(OnigCaseFoldType flags, rb_encoding *enc, VALUE str) { if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc) == 1)) return true; - return !(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT; + return !(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI) && + (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT || rb_is_ascii8bit_enc(enc)); } /* 16 should be long enough to absorb any kind of single character length increase */ From 73cd91b8165dd34c66b8e18c0c4e6f76bd1c9087 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 19 Jun 2026 10:18:25 +0200 Subject: [PATCH 05/13] [ruby/json] Implement ResumableParser#parsed_bytes This is intended to make it easier to securely parse untrusted inputs. https://github.com/ruby/json/commit/21c2bbea1c --- ext/json/parser/parser.c | 69 +++++++++++++++++++++++++++--- test/json/resumable_parser_test.rb | 25 ++++++++++- 2 files changed, 86 insertions(+), 8 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 7b028635c31887..59cdc4273503bd 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -2162,6 +2162,9 @@ typedef struct JSON_ResumableParserStruct { rvalue_stack value_stack; json_frame_stack frames; VALUE buffer; + size_t parsed_bytes; + size_t incomplete_bytes; + bool complete; bool in_use; } JSON_ResumableParser; @@ -2282,6 +2285,18 @@ static inline JSON_ResumableParser *cResumableParser_get(VALUE self) * * An incomplete document is buffered in full and there is no size limit, so when reading * from an untrusted source the caller is responsible for bounding how much data is fed. + * For example: + * + * loop do + * if parser.parsed_bytes > DOCUMENT_MAX_SIZE + * raise "document too large" + * end + * + * parser << read_chunk + * while parser.parse + * process(parser.value) + * end + * end */ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self) { @@ -2398,6 +2413,13 @@ static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock) static VALUE cResumableParser_parse(VALUE self) { JSON_ResumableParser *parser = ResumableParser_acquire(self, true); + + if (parser->complete) { + parser->parsed_bytes = 0; + parser->incomplete_bytes = 0; + parser->complete = false; + } + if (!parser->buffer) { parser->in_use = false; return Qfalse; @@ -2427,20 +2449,28 @@ static VALUE cResumableParser_parse(VALUE self) .config = &parser->config, }; int status; - bool complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status); - parser->in_use = false; + const char *initial_cursor = parser->state.cursor; + parser->complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status); if (status) { - complete = false; VALUE error_source = rb_ivar_get(rb_errinfo(), i_at_eos); if (error_source == self) { - complete = false; // is an EOS error raised by ourself + parser->complete = false; // is an EOS error raised by ourself rb_set_errinfo(Qnil); + status = 0; } else { - rb_jump_tag(status); // reraise + parser->complete = true; // a parse error is considered complete } } + + parser->parsed_bytes += parser->state.cursor - initial_cursor; + parser->incomplete_bytes = parser->complete ? 0 : parser->state.end - parser->state.cursor; + + parser->in_use = false; + if (status) { + rb_jump_tag(status); // reraise + } RB_GC_GUARD(Vsource); - return complete ? Qtrue : Qfalse; + return parser->complete ? Qtrue : Qfalse; } /* @@ -2498,6 +2528,9 @@ static VALUE cResumableParser_clear(VALUE self) { JSON_ResumableParser *parser = ResumableParser_acquire(self, false); parser->buffer = 0; + parser->complete = true; + parser->parsed_bytes = 0; + parser->incomplete_bytes = 0; parser->frames.head = 0; parser->value_stack.head = 0; parser->state.name_cache.length = 0; @@ -2633,6 +2666,29 @@ static VALUE cResumableParser_eos_p(VALUE self) return eos(&parser->state) ? Qtrue : Qfalse; } +/* + * call-seq: parsed_bytes -> integer + * + * Returns the number of bytes parsed since the start of the current partial value. + * This is intended to be used for securing against untrusted input: + * + * loop do + * if parser.parsed_bytes > DOCUMENT_MAX_SIZE + * raise "document too large" + * end + * + * parser << read_chunk + * while parser.parse + * process(parser.value) + * end + * end + */ +static VALUE cResumableParser_parsed_bytes(VALUE self) +{ + JSON_ResumableParser *parser = cResumableParser_get(self); + return ULL2NUM(parser->parsed_bytes + parser->incomplete_bytes); +} + void Init_parser(void) { #ifdef HAVE_RB_EXT_RACTOR_SAFE @@ -2669,6 +2725,7 @@ void Init_parser(void) rb_define_method(cResumableParser, "clear", cResumableParser_clear, 0); rb_define_method(cResumableParser, "rest", cResumableParser_rest, 0); rb_define_method(cResumableParser, "eos?", cResumableParser_eos_p, 0); + rb_define_method(cResumableParser, "parsed_bytes", cResumableParser_parsed_bytes, 0); rb_global_variable(&CNaN); CNaN = rb_const_get(mJSON, rb_intern("NaN")); diff --git a/test/json/resumable_parser_test.rb b/test/json/resumable_parser_test.rb index a720679c210984..3e22a18e0a3313 100644 --- a/test/json/resumable_parser_test.rb +++ b/test/json/resumable_parser_test.rb @@ -156,7 +156,7 @@ def test_parse_byte_by_byte_string end def test_parse_byte_by_byte_numbers - assert_resumed_parsing('123 ') + assert_resumed_parsing('123 ', trailing_bytes: 1) end def test_nul_byte_is_a_syntax_error @@ -364,6 +364,26 @@ def test_buffer_shrink parser.value end + def test_parsed_bytes + chunk = '[1, 2, 3, 4, tru' + @parser << chunk + refute @parser.parse + assert_equal chunk.bytesize, @parser.parsed_bytes + + @parser << 'e][]' + assert @parser.parse + assert_equal chunk.bytesize + 2, @parser.parsed_bytes + + assert @parser.parse + assert_equal 2, @parser.parsed_bytes + + @parser << chunk + refute @parser.parse + assert_equal chunk.bytesize, @parser.parsed_bytes + @parser.clear + assert_equal 0, @parser.parsed_bytes + end + private def assert_parse_error(json) @@ -389,7 +409,7 @@ def assert_partial_value(expected, json) end end - def assert_resumed_parsing(json, parser = @parser) + def assert_resumed_parsing(json, parser = @parser, trailing_bytes: 0) expected = JSON.parse(json) last_parsed_byte_index = 0 @@ -402,6 +422,7 @@ def assert_resumed_parsing(json, parser = @parser) assert_equal expected, actual remaining_bytes = (json.bytesize - last_parsed_byte_index) assert_equal 0, remaining_bytes, "unconsumed bytes: #{actual.inspect}, remaining: #{json.byteslice(-1, remaining_bytes).inspect}" + assert_equal json.bytesize - trailing_bytes, parser.parsed_bytes end def assert_parse_stream(expected, json, parser = @parser) From 46128b15d519e222eb6649a64bed9d61920ec243 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 19 Jun 2026 10:34:15 +0200 Subject: [PATCH 06/13] [ruby/json] ResumableParser: Don't compute lines and columns on parse error Fix: https://github.com/ruby/json/issues/1022 They can't always be accurate because we don't always keep the full document in the buffer. As such it's better never to compute them than to sometimes provide wrong coordinates. In theory we could keep the number of lines since the start of the parse, but that's more book keeping for little utility. Anyway, these are useful to find a syntax error in a file, not so much in a stream of documents. https://github.com/ruby/json/commit/b2372115af --- ext/json/parser/parser.c | 32 +++++++++++++++++++----------- test/json/resumable_parser_test.rb | 14 +++++++++++-- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 59cdc4273503bd..a39b62205d5ceb 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -627,7 +627,7 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state) #define PARSE_ERROR_FRAGMENT_LEN 32 -static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column) +static VALUE build_parse_error_message(const char *format, JSON_ParserState *state) { unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3]; @@ -661,9 +661,7 @@ static VALUE build_parse_error_message(const char *format, JSON_ParserState *sta } } - VALUE message = rb_enc_sprintf(enc_utf8, format, ptr); - rb_str_catf(message, " at line %ld column %ld", line, column); - return message; + return rb_enc_sprintf(enc_utf8, format, ptr); } static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, long column, bool eos) @@ -679,10 +677,15 @@ static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state, bool eos) { - long line, column; - cursor_position(state, &line, &column); - VALUE message = build_parse_error_message(format, state, line, column); - rb_exc_raise(parse_error_new(state, message, line, column, eos)); + VALUE message = build_parse_error_message(format, state); + if (state->parser) { // line and columns can't be accurate in resumable + rb_exc_raise(parse_error_new(state, message, 0, 0, eos)); + } else { + long line, column; + cursor_position(state, &line, &column); + rb_str_catf(message, " at line %ld column %ld", line, column); + rb_exc_raise(parse_error_new(state, message, line, column, eos)); + } } NORETURN(static) void raise_eos_error(const char *format, JSON_ParserState *state) @@ -1172,10 +1175,15 @@ NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE d rb_inspect(duplicate_key) ); - long line, column; - cursor_position(state, &line, &column); - rb_str_concat(message, build_parse_error_message("", state, line, column)) ; - rb_exc_raise(parse_error_new(state, message, line, column, false)); + rb_str_concat(message, build_parse_error_message("", state)); + if (state->parser) { // line and columns can't be accurate in resumable + rb_exc_raise(parse_error_new(state, message, 0, 0, false)); + } else { + long line, column; + cursor_position(state, &line, &column); + rb_str_catf(message, " at line %ld column %ld", line, column); + rb_exc_raise(parse_error_new(state, message, line, column, false)); + } } NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs) diff --git a/test/json/resumable_parser_test.rb b/test/json/resumable_parser_test.rb index 3e22a18e0a3313..1a981e6b89de4a 100644 --- a/test/json/resumable_parser_test.rb +++ b/test/json/resumable_parser_test.rb @@ -384,14 +384,24 @@ def test_parsed_bytes assert_equal 0, @parser.parsed_bytes end + def test_parse_error_message + error = assert_parse_error("\n\n[plop\nfoo", "unexpected character: 'plop'") + assert_equal 0, error.line + assert_equal 0, error.column + end + private - def assert_parse_error(json) + def assert_parse_error(json, expected_error_message = nil) parser = new_parser parser << json - assert_raise(JSON::ParserError, "expected a parse error for #{json.inspect}") do + error = assert_raise(JSON::ParserError, "expected a parse error for #{json.inspect}") do parser.parse end + if expected_error_message + assert_equal expected_error_message, error.message + end + error end def assert_incomplete(json) From c88430e1878fe8d219a239259728a90098e014d2 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 17 Jun 2026 15:11:55 +0200 Subject: [PATCH 07/13] Fix ar_find_entry_hint() to handle #eql? or another thread changing bound or converting to st_table * While perusing code in hash.c I found it suspicious that ar_find_entry_hint() didn't reread bound in the loop and yet called arbitrary code through #eql?. * ar_find_entry_hint() before this commit would not check if bound or the storage (AR->ST) changed and would return a bin index which is not correct to access. * Check bound in the loop so it's always up-to-date. * Return RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE when converted to st_table and make callers retry the operation as a st_table. [Bug #22120] Co-Authored-By: Claude Opus 4.6 --- hash.c | 30 +++++++++++++++++++++++++----- test/ruby/test_hash.rb | 29 +++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/hash.c b/hash.c index a694d6bdffb944..cf5d7a3934129d 100644 --- a/hash.c +++ b/hash.c @@ -402,6 +402,7 @@ typedef st_index_t st_hash_t; */ #define RHASH_AR_TABLE_MAX_BOUND RHASH_AR_TABLE_MAX_SIZE +#define RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE (RHASH_AR_TABLE_MAX_BOUND + 1) #define RHASH_AR_TABLE_REF(hash, n) (&RHASH_AR_TABLE(hash)->pairs[n]) #define RHASH_AR_CLEARED_HINT 0xff @@ -603,18 +604,22 @@ ar_equal(VALUE x, VALUE y) return rb_any_cmp(x, y) == 0; } +// Returns the bin index if found, RHASH_AR_TABLE_MAX_BOUND if not found, +// or RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE if #eql? or a Thread converted the hash to st_table. static unsigned ar_find_entry_hint(VALUE hash, ar_hint_t hint, st_data_t key) { - unsigned i, bound = RHASH_AR_TABLE_BOUND(hash); - const ar_hint_t *hints = RHASH_AR_TABLE(hash)->ar_hint.ary; - /* if table is NULL, then bound also should be 0 */ - for (i = 0; i < bound; i++) { + for (unsigned i = 0; i < RHASH_AR_TABLE_BOUND(hash); i++) { + const ar_hint_t *hints = RHASH_AR_TABLE(hash)->ar_hint.ary; if (hints[i] == hint) { ar_table_pair *pair = RHASH_AR_TABLE_REF(hash, i); - if (ar_equal(key, pair->key)) { + int eq = ar_equal(key, pair->key); + if (UNLIKELY(!RHASH_AR_TABLE_P(hash))) { + return RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE; + } + if (eq) { RB_DEBUG_COUNTER_INC(artable_hint_hit); return i; } @@ -898,6 +903,9 @@ ar_foreach_check(VALUE hash, st_foreach_check_callback_func *func, st_data_t arg pair = RHASH_AR_TABLE_REF(hash, i); if (pair->key == never) break; ret = ar_find_entry_hint(hash, hint, key); + if (UNLIKELY(ret == RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE)) { + ensure_ar_table(hash); + } if (ret == RHASH_AR_TABLE_MAX_BOUND) { (*func)(0, 0, arg, 1); return 2; @@ -937,6 +945,9 @@ ar_update(VALUE hash, st_data_t key, if (RHASH_AR_TABLE_SIZE(hash) > 0) { bin = ar_find_entry(hash, hash_value, key); + if (UNLIKELY(bin == RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE)) { + return -1; + } existing = (bin != RHASH_AR_TABLE_MAX_BOUND) ? TRUE : FALSE; } else { @@ -990,6 +1001,9 @@ ar_insert(VALUE hash, st_data_t key, st_data_t value) } bin = ar_find_entry(hash, hash_value, key); + if (UNLIKELY(bin == RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE)) { + return -1; + } if (bin == RHASH_AR_TABLE_MAX_BOUND) { if (RHASH_AR_TABLE_SIZE(hash) >= RHASH_AR_TABLE_MAX_SIZE) { return -1; @@ -1023,6 +1037,9 @@ ar_lookup(VALUE hash, st_data_t key, st_data_t *value) return st_lookup(RHASH_ST_TABLE(hash), key, value); } unsigned bin = ar_find_entry(hash, hash_value, key); + if (UNLIKELY(bin == RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE)) { + return st_lookup(RHASH_ST_TABLE(hash), key, value); + } if (bin == RHASH_AR_TABLE_MAX_BOUND) { return 0; @@ -1049,6 +1066,9 @@ ar_delete(VALUE hash, st_data_t *key, st_data_t *value) } bin = ar_find_entry(hash, hash_value, *key); + if (UNLIKELY(bin == RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE)) { + return st_delete(RHASH_ST_TABLE(hash), key, value); + } if (bin == RHASH_AR_TABLE_MAX_BOUND) { if (value != 0) *value = 0; diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index 2d1b513c7092c7..a6b7f26dd6e6cd 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -2431,4 +2431,33 @@ def initialize(val) = @hash = val end assert_equal values, hash.values, "[ruby-core:121239] [Bug #21170]" end + + def test_ar_find_entry_hint_eql_mutates_hash + # ar_find_entry_hint caches bound and hints, then calls #eql? which + # can mutate the hash. If #eql? triggers AR->ST conversion the loop + # would read st_table memory as ar_table pairs. + key_class = Class.new do + attr_reader :v + def initialize(v, h = nil) + @v = v + @h = h + end + def hash; 0; end + def eql?(other) + if @h + # Trigger AR->ST conversion + @h[42] = 42 + end + other.is_a?(self.class) && @v == other.v + end + end + + h = {} + 8.times { |i| h[key_class.new(i)] = i } + + # Not in the hash, so ar_find_entry_hint checks every entry. + lookup_key = key_class.new(-1, h) + + assert_equal nil, h[lookup_key] + end end From fe5ec34eb4ec16ca1fcccc4b3ac6a948295d770c Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Thu, 18 Jun 2026 17:08:15 +0200 Subject: [PATCH 08/13] Refactor DO_PTR_EQUAL_CHECK to read entry fields once into locals * The macros would expand `ptr` 3 times which could cause extra reads. * We had reports of many segfaults in this area (e.g. https://github.com/DataDog/dd-trace-rb/issues/5718). Even though the code before looks correct semantically, this makes it definitely OK and much easier to follow, by reading fields from the entry only once and not touching the entry after #eql? has returned. The entry can be a pointer to free'd memory if the table was rebuilt during #eql?. * Separate functions for st_table and set_table avoid unsafe casts. Co-Authored-By: Claude Opus 4.6 --- st.c | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/st.c b/st.c index 6bf83c94cdadf0..550bcc6325dd7c 100644 --- a/st.c +++ b/st.c @@ -184,18 +184,33 @@ static const struct st_hash_type type_strcasehash = { #define free_fixed_ptr(v) free(v) #endif -#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0) -#define PTR_EQUAL(tab, ptr, hash_val, key_) \ - ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key)) +/* Compare an entry's hash and key against given hash_val and key. + Entry fields must be read into locals by the caller before passing + them here, to avoid re-reading from potentially-freed memory after + #eql? triggers a table rebuild. */ +static inline int +entry_equal(const struct st_hash_type *type, + st_hash_t entry_hash, st_data_t entry_key, + st_hash_t hash_val, st_data_t key) +{ + return (entry_hash == hash_val) && + ((entry_key == key) || (*type->compare)(key, entry_key) == 0); +} + +/* As entry_equal, but also checks whether the table was rebuilt + during the comparison (i.e. #eql? mutated it). */ +static inline void +ptr_equal_check(const st_table *tab, const st_table_entry *entry, + st_hash_t hash_val, st_data_t key, + int *res, int *rebuilt_p) +{ + unsigned int old_rebuilds_num = tab->rebuilds_num; + *res = entry_equal(tab->type, entry->hash, entry->key, hash_val, key); + *rebuilt_p = old_rebuilds_num != tab->rebuilds_num; +} -/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set - up to TRUE if the table is rebuilt during the comparison. */ #define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \ - do { \ - unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \ - res = PTR_EQUAL(tab, ptr, hash_val, key); \ - rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \ - } while (FALSE) + ptr_equal_check((tab), (ptr), (hash_val), (key), &(res), &(rebuilt_p)) /* Features of a table. */ struct st_features { @@ -2387,6 +2402,19 @@ struct set_table_entry { st_data_t key; }; +static inline void +set_ptr_equal_check(const set_table *tab, const set_table_entry *entry, + st_hash_t hash_val, st_data_t key, + int *res, int *rebuilt_p) +{ + unsigned int old_rebuilds_num = tab->rebuilds_num; + *res = entry_equal(tab->type, entry->hash, entry->key, hash_val, key); + *rebuilt_p = old_rebuilds_num != tab->rebuilds_num; +} + +#define SET_DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \ + set_ptr_equal_check((tab), (ptr), (hash_val), (key), &(res), &(rebuilt_p)) + /* Return hash value of KEY for table TAB. */ static inline st_hash_t set_do_hash(st_data_t key, set_table *tab) @@ -2729,7 +2757,7 @@ set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key) bound = tab->entries_bound; entries = tab->entries; for (i = tab->entries_start; i < bound; i++) { - DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p); + SET_DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p); if (EXPECT(rebuilt_p, 0)) return REBUILT_TABLE_ENTRY_IND; if (eq_p) @@ -2768,7 +2796,7 @@ set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key) for (;;) { bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind); if (! EMPTY_OR_DELETED_BIN_P(bin)) { - DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); + SET_DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); if (EXPECT(rebuilt_p, 0)) return REBUILT_TABLE_ENTRY_IND; if (eq_p) @@ -2812,7 +2840,7 @@ set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key) for (;;) { bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind); if (! EMPTY_OR_DELETED_BIN_P(bin)) { - DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); + SET_DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); if (EXPECT(rebuilt_p, 0)) return REBUILT_TABLE_BIN_IND; if (eq_p) @@ -2913,7 +2941,7 @@ set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value, break; } else if (! DELETED_BIN_P(entry_index)) { - DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p); + SET_DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p); if (EXPECT(rebuilt_p, 0)) return REBUILT_TABLE_ENTRY_IND; if (eq_p) From 4be75d8ec870f339fad09c526f02c93a896bbfcf Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 19 Jun 2026 16:57:50 +0900 Subject: [PATCH 09/13] Avoid `-C` with miniruby If `load-relative` is specified, the system uses `dladdr` to locate the executable file path when initializing the load path; however, on some platforms (at least OpenBSD), `dladdr` fails under certain conditions (such as when debug information is missing). In that case, the system falls back to using `argv[0]`; however, if this is a relative path, the executable cannot be found after changing the working directory with the `-C` option, resulting in an error. --- common.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common.mk b/common.mk index 53e832c6370bba..5b16d87e0e479e 100644 --- a/common.mk +++ b/common.mk @@ -1585,10 +1585,10 @@ no-test-bundled-gems-precheck: yes-update-default-gemspecs no-update-default-gemspecs: update-default-gemspecs update-default-gemspecs: $(PREP) $(RBCONFIG) @$(MAKEDIRS) $(srcdir)/.bundle/specifications - $(Q)$(MINIRUBY) -W0 -C "$(srcdir)" -I tool/lib -roptparse -routput -rbundled_gem \ + $(Q)$(MINIRUBY) -W0 -I "$(srcdir)/tool/lib" -roptparse -routput -rbundled_gem \ -e "(out = Output.new).def_options(ARGV.options)" \ -e "BundledGem.update_default_gemspecs(ARGV.parse!, out, quiet: $(V).zero?)" \ - -- -c -o .bundle/specifications lib ext + -- -c -o "$(srcdir)/.bundle/specifications" "$(srcdir)/lib" "$(srcdir)/ext" install-for-test-bundled-gems: $(TEST_RUNNABLE)-install-for-test-bundled-gems no-install-for-test-bundled-gems: no-update-default-gemspecs From ca7a90c41e48e85f6446b598b20fed37f281162e Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 19 Jun 2026 13:30:25 +0200 Subject: [PATCH 10/13] [ruby/json] Don't omit `test_parse_error_snippet` on TruffleRuby https://github.com/ruby/json/commit/181396ca3d --- test/json/json_parser_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index c891dd7c2db99d..9000d1152f7ea8 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -862,7 +862,7 @@ def test_parse_error_incomplete_hash end def test_parse_error_snippet - omit "JRuby errors don't contain positions" unless RUBY_ENGINE == "ruby" + omit "JRuby errors don't contain positions" if RUBY_ENGINE == "jruby" error = assert_raise(JSON::ParserError) { JSON.parse("あああああああああああああああああああああああ") } assert_equal "unexpected character: 'ああああああああああ' at line 1 column 1", error.message From 70abf87ed1603b00e2d4cedad86c1cf727273407 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 19 Jun 2026 11:51:52 +0200 Subject: [PATCH 11/13] [ruby/json] ResumableParser: use throw rather than raise for handled EOS Since the exception will be swallowed, building a message and backtrace is just a waste of time. https://github.com/ruby/json/commit/4bd1e9bce6 --- ext/json/parser/parser.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index a39b62205d5ceb..2c5c4a5a4fb630 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -677,10 +677,17 @@ static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state, bool eos) { - VALUE message = build_parse_error_message(format, state); - if (state->parser) { // line and columns can't be accurate in resumable - rb_exc_raise(parse_error_new(state, message, 0, 0, eos)); + if (state->parser) { + if (eos) { + // the error will be swallowed by ResumableParser#parse, so no + // point building a message or backtrace. + rb_throw_obj(state->parser, state->parser); + } else { + // line and columns can't be accurate in resumable + rb_exc_raise(parse_error_new(state, build_parse_error_message(format, state), 0, 0, eos)); + } } else { + VALUE message = build_parse_error_message(format, state); long line, column; cursor_position(state, &line, &column); rb_str_catf(message, " at line %ld column %ld", line, column); @@ -2379,14 +2386,25 @@ static VALUE cResumableParser_feed(VALUE self, VALUE str) struct json_parse_any_args { JSON_ParserState *state; JSON_ParserConfig *config; + VALUE parser; }; -static VALUE json_parse_any_resumable_safe(VALUE _args) +static VALUE json_parse_any_resumable_safe0(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, _args)) { struct json_parse_any_args *args = (struct json_parse_any_args *)_args; return (VALUE)json_parse_any(args->state, args->config, true); } +static VALUE json_parse_any_resumable_safe(VALUE _args) +{ + struct json_parse_any_args *args = (struct json_parse_any_args *)_args; + VALUE result = rb_catch_obj(args->parser, json_parse_any_resumable_safe0, _args); + if (result == args->parser) { + return (VALUE)false; + } + return result; +} + static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock) { JSON_ResumableParser *parser = cResumableParser_get(self); @@ -2455,25 +2473,20 @@ static VALUE cResumableParser_parse(VALUE self) struct json_parse_any_args args = { .state = &parser->state, .config = &parser->config, + .parser = self, }; int status; const char *initial_cursor = parser->state.cursor; parser->complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status); + if (status) { - VALUE error_source = rb_ivar_get(rb_errinfo(), i_at_eos); - if (error_source == self) { - parser->complete = false; // is an EOS error raised by ourself - rb_set_errinfo(Qnil); - status = 0; - } else { - parser->complete = true; // a parse error is considered complete - } + parser->complete = true; // a parse error is considered complete } parser->parsed_bytes += parser->state.cursor - initial_cursor; parser->incomplete_bytes = parser->complete ? 0 : parser->state.end - parser->state.cursor; - parser->in_use = false; + if (status) { rb_jump_tag(status); // reraise } From b6bcd73e9f788d6299f33d92a696b814b453efe4 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 19 Jun 2026 12:46:23 +0200 Subject: [PATCH 12/13] [ruby/json] Workaround TruffleRuby buggy `rb_catch_obj` implementation Somehow on TruffleRuby `rb_catch_obj` straight out doesn't call the passed function, acting as a noop. https://github.com/ruby/json/commit/9d8efcb08b --- ext/json/parser/extconf.rb | 2 ++ ext/json/parser/parser.c | 47 ++++++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb index bd537c9c54dfdb..8c5bdb66e9512b 100644 --- a/ext/json/parser/extconf.rb +++ b/ext/json/parser/extconf.rb @@ -2,6 +2,8 @@ require 'mkmf' $defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" +$defs << "-DJSON_WORKAROUND_RB_CATCH_BUG" if RUBY_ENGINE == 'truffleruby' + have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0 have_func("rb_str_to_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 2c5c4a5a4fb630..057367bc5ca469 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -5,7 +5,7 @@ static VALUE mJSON, eNestingError, eParserError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; -static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column, i_at_eos; +static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column; static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_comments, sym_allow_control_characters, sym_allow_invalid_escape, sym_symbolize_names, @@ -669,19 +669,46 @@ static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, VALUE exc = rb_exc_new_str(eParserError, message); rb_ivar_set(exc, i_at_line, LONG2NUM(line)); rb_ivar_set(exc, i_at_column, LONG2NUM(column)); - if (eos && state->parser) { - rb_ivar_set(exc, i_at_eos, state->parser); - } return exc; } +#ifdef JSON_WORKAROUND_RB_CATCH_BUG +#define JSON_CATCH_FUNC_ARGLIST(yielded_arg, func_args) VALUE func_args + +NORETURN(static) void parser_throw_eos(VALUE parser) +{ + VALUE exc = rb_exc_new_str(eParserError, rb_utf8_str_new_cstr("EOS")); + rb_ivar_set(exc, rb_intern("@resumable_parser_eos"), parser); + rb_exc_raise(exc); +} + +static VALUE parser_catch_eos(VALUE parser, VALUE (*func)(VALUE args), VALUE func_args) +{ + int status; + VALUE result = rb_protect(func, func_args, &status); + if (status) { + VALUE error_source = rb_ivar_get(rb_errinfo(), rb_intern("@resumable_parser_eos")); + if (error_source == parser) { + rb_set_errinfo(Qnil); + return parser; + } + rb_jump_tag(status); + } + return result; +} +#else +#define JSON_CATCH_FUNC_ARGLIST RB_BLOCK_CALL_FUNC_ARGLIST +#define parser_throw_eos(parser) rb_throw_obj(parser, parser) +#define parser_catch_eos(parser, func, func_args) rb_catch_obj(parser, func, func_args) +#endif + NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state, bool eos) { if (state->parser) { if (eos) { // the error will be swallowed by ResumableParser#parse, so no // point building a message or backtrace. - rb_throw_obj(state->parser, state->parser); + parser_throw_eos(state->parser); } else { // line and columns can't be accurate in resumable rb_exc_raise(parse_error_new(state, build_parse_error_message(format, state), 0, 0, eos)); @@ -2389,7 +2416,7 @@ struct json_parse_any_args { VALUE parser; }; -static VALUE json_parse_any_resumable_safe0(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, _args)) +static VALUE json_parse_any_resumable_safe0(JSON_CATCH_FUNC_ARGLIST(yielded_arg, _args)) { struct json_parse_any_args *args = (struct json_parse_any_args *)_args; return (VALUE)json_parse_any(args->state, args->config, true); @@ -2398,11 +2425,8 @@ static VALUE json_parse_any_resumable_safe0(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_a static VALUE json_parse_any_resumable_safe(VALUE _args) { struct json_parse_any_args *args = (struct json_parse_any_args *)_args; - VALUE result = rb_catch_obj(args->parser, json_parse_any_resumable_safe0, _args); - if (result == args->parser) { - return (VALUE)false; - } - return result; + VALUE result = parser_catch_eos(args->parser, json_parse_any_resumable_safe0, _args); + return result == args->parser ? Qfalse : result; } static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock) @@ -2778,7 +2802,6 @@ void Init_parser(void) i_encode = rb_intern("encode"); i_at_line = rb_intern("@line"); i_at_column = rb_intern("@column"); - i_at_eos = rb_intern("@eos"); binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); From a7022d33576787c2b018b48675d65df275c32d7e Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 19 Jun 2026 14:06:47 +0200 Subject: [PATCH 13/13] [ruby/json] ResumableParser: accept only keyword arguments Fix: https://github.com/ruby/json/pull/1016#issuecomment-4744487710 `json` takes option hashes across the board, mostly because its API predates the introduction of keyword arguments. I'd like to change that to only take keyword arguments and error when an unknown argument is passed, but I'm not yet sure of the backward compatibility consequences, so it might wait for the next major. But in the meantime, `ResumableParser` being a new API, it can safely use keyword arguments. https://github.com/ruby/json/commit/f08c66338c --- ext/json/parser/parser.c | 46 +++++++++++++++++++++--------- test/json/resumable_parser_test.rb | 20 +++++++++++-- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 057367bc5ca469..e4dc3fb01047fb 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1966,6 +1966,8 @@ static VALUE convert_encoding(VALUE source) struct parser_config_init_args { JSON_ParserConfig *config; VALUE self; + VALUE unknown_keywords; + bool strict; }; static void parser_config_wb_write(VALUE self, VALUE *dest, VALUE val) @@ -2019,27 +2021,43 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data) } } } + else if (args->strict) { + if (!args->unknown_keywords) { + args->unknown_keywords = rb_obj_hide(rb_ary_new()); + } + rb_ary_push(args->unknown_keywords, key); + } return ST_CONTINUE; } -static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self) +static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self, bool strict) { config->max_nesting = 100; struct parser_config_init_args args = { .config = config, .self = self, + .strict = strict, }; - if (!NIL_P(opts)) { - Check_Type(opts, T_HASH); - if (RHASH_SIZE(opts) > 0) { - // We assume in most cases few keys are set so it's faster to go over - // the provided keys than to check all possible keys. - rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args); - } + if (NIL_P(opts)) return; + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) == 0) return; + + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args); + if (RB_UNLIKELY(args.unknown_keywords)) { + if (RARRAY_LEN(args.unknown_keywords) == 1) { + rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, RARRAY_AREF(args.unknown_keywords, 0)); + } + else { + VALUE keywords = rb_ary_join(args.unknown_keywords, rb_utf8_str_new_cstr(", ")); + rb_raise(rb_eArgError, "unknown keywords: %s", RSTRING_PTR(keywords)); + RB_GC_GUARD(keywords); + } } } @@ -2057,7 +2075,7 @@ static VALUE cParserConfig_initialize(VALUE self, VALUE opts) rb_check_frozen(self); GET_PARSER_CONFIG; - parser_config_init(config, opts, self); + parser_config_init(config, opts, self, false); return self; } @@ -2153,7 +2171,7 @@ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts) { JSON_ParserConfig _config = {0}; JSON_ParserConfig *config = &_config; - parser_config_init(config, opts, false); + parser_config_init(config, opts, Qfalse, false); return cParser_parse(config, Vsource); } @@ -2342,12 +2360,14 @@ static inline JSON_ResumableParser *cResumableParser_get(VALUE self) */ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self) { - rb_check_arity(argc, 0, 1); rb_check_frozen(self); + + VALUE opts = Qfalse; + rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "0:", &opts); JSON_ResumableParser *parser = cResumableParser_get(self); - VALUE opts = argc > 0 ? argv[0] : Qnil; - parser_config_init(&parser->config, opts, self); + opts = argc > 0 ? argv[0] : Qnil; + parser_config_init(&parser->config, opts, self, true); return self; } diff --git a/test/json/resumable_parser_test.rb b/test/json/resumable_parser_test.rb index 1a981e6b89de4a..800502f4d46fe4 100644 --- a/test/json/resumable_parser_test.rb +++ b/test/json/resumable_parser_test.rb @@ -9,6 +9,22 @@ def setup @parser = new_parser end + def test_keyword_arguments + new_parser + new_parser({}) + new_parser(allow_nan: true) + + error = assert_raise(ArgumentError) do + new_parser(doesnt_exist: true, allow_nan: true) + end + assert_equal "unknown keyword: doesnt_exist", error.message + + error = assert_raise(ArgumentError) do + new_parser(doesnt_exist: true, allow_nan: true, a: 1, b: 2) + end + assert_equal "unknown keywords: doesnt_exist, a, b", error.message + end + def test_value refute_predicate @parser, :value? assert_raise(ArgumentError) { @parser.value } @@ -444,7 +460,7 @@ def assert_parse_stream(expected, json, parser = @parser) assert_equal(expected, actual) end - def new_parser(options = nil) - JSON::ResumableParser.new(options) + def new_parser(...) + JSON::ResumableParser.new(...) end end