From cde9c8a2629923f5a42bf8684e4bcdd58ed89c48 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 18 Jun 2026 21:24:12 +0200 Subject: [PATCH 1/4] [ruby/json] ResumableParser#parse: don't swallow other parsers errors Fix: https://github.com/ruby/json/issues/1024#issuecomment-4745232736 https://github.com/ruby/json/commit/65549acb9c Co-Authored-By: Yusuke Endoh --- ext/json/parser/parser.c | 37 +++++++++++++++--------------- test/json/resumable_parser_test.rb | 12 ++++++++++ 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 665d3152718b6e..38e24374fdfb5a 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -660,13 +660,13 @@ static VALUE build_parse_error_message(const char *format, JSON_ParserState *sta return message; } -static VALUE parse_error_new(VALUE message, long line, long column, bool eos) +static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, long column, bool eos) { VALUE exc = rb_exc_new_str(eParserError, message); rb_ivar_set(exc, i_at_line, LONG2NUM(line)); rb_ivar_set(exc, i_at_column, LONG2NUM(column)); - if (eos) { - rb_ivar_set(exc, i_at_eos, Qtrue); + if (eos && state->value_stack_handle) { + rb_ivar_set(exc, i_at_eos, *state->value_stack_handle); } return exc; } @@ -676,7 +676,7 @@ NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *st long line, column; cursor_position(state, &line, &column); VALUE message = build_parse_error_message(format, state, line, column); - rb_exc_raise(parse_error_new(message, line, column, eos)); + rb_exc_raise(parse_error_new(state, message, line, column, eos)); } NORETURN(static) void raise_eos_error(const char *format, JSON_ParserState *state) @@ -1169,7 +1169,7 @@ NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE d long line, column; cursor_position(state, &line, &column); rb_str_concat(message, build_parse_error_message("", state, line, column)) ; - rb_exc_raise(parse_error_new(message, line, column, false)); + rb_exc_raise(parse_error_new(state, message, line, column, false)); } NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs) @@ -2281,7 +2281,7 @@ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self) return self; } -static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock); +static JSON_ResumableParser *ResumableParser_acquire(VALUE *self, bool lock); /* * call-seq: self << string -> self @@ -2292,7 +2292,7 @@ static VALUE cResumableParser_feed(VALUE self, VALUE str) { rb_check_frozen(self); - JSON_ResumableParser *parser = ResumableParser_acquire(self, false); + JSON_ResumableParser *parser = ResumableParser_acquire(&self, false); str = convert_encoding(str); if (!RSTRING_LEN(str)) { @@ -2350,9 +2350,9 @@ static VALUE json_parse_any_resumable_safe(VALUE _args) return (VALUE)json_parse_any(args->state, args->config, true); } -static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock) +static JSON_ResumableParser *ResumableParser_acquire(VALUE *self, bool lock) { - JSON_ResumableParser *parser = cResumableParser_get(self); + JSON_ResumableParser *parser = cResumableParser_get(*self); if (parser->in_use) { rb_raise(rb_eArgError, "ResumableParser can't be used recursively"); @@ -2365,8 +2365,8 @@ static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock) // self may have moved, so we need to update all pointers // Investigate: We might be better off keeping JSON_ParserState on the stack // and only persist what we need. - parser->state.value_stack_handle = &self; - parser->state.frame_stack_handle = &self; + parser->state.value_stack_handle = self; + parser->state.frame_stack_handle = self; parser->state.value_stack = &parser->value_stack; parser->state.frames = &parser->frames; @@ -2385,7 +2385,7 @@ static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock) */ static VALUE cResumableParser_parse(VALUE self) { - JSON_ResumableParser *parser = ResumableParser_acquire(self, true); + JSON_ResumableParser *parser = ResumableParser_acquire(&self, true); if (!parser->buffer) { parser->in_use = false; return Qfalse; @@ -2419,8 +2419,9 @@ static VALUE cResumableParser_parse(VALUE self) parser->in_use = false; if (status) { complete = false; - if (RTEST(rb_ivar_get(rb_errinfo(), rb_intern("@eos")))) { - complete = false; // is an EOS error + VALUE error_source = rb_ivar_get(rb_errinfo(), i_at_eos); + if (error_source == self) { + complete = false; // is an EOS error raised by ourself rb_set_errinfo(Qnil); } else { rb_jump_tag(status); // reraise @@ -2437,7 +2438,7 @@ static VALUE cResumableParser_parse(VALUE self) */ static VALUE cResumableParser_value_p(VALUE self) { - JSON_ResumableParser *parser = ResumableParser_acquire(self, false); + JSON_ResumableParser *parser = ResumableParser_acquire(&self, false); if (parser->value_stack.head > 0) { json_frame *frame = json_frame_stack_peek(&parser->frames); @@ -2461,7 +2462,7 @@ static VALUE cResumableParser_value_p(VALUE self) */ static VALUE cResumableParser_value(VALUE self) { - JSON_ResumableParser *parser = ResumableParser_acquire(self, false); + JSON_ResumableParser *parser = ResumableParser_acquire(&self, false); if (parser->frames.head > 0) { json_frame *frame = json_frame_stack_peek(&parser->frames); @@ -2483,7 +2484,7 @@ static VALUE cResumableParser_value(VALUE self) */ static VALUE cResumableParser_clear(VALUE self) { - JSON_ResumableParser *parser = ResumableParser_acquire(self, false); + JSON_ResumableParser *parser = ResumableParser_acquire(&self, false); parser->buffer = 0; parser->frames.head = 0; parser->value_stack.head = 0; @@ -2575,7 +2576,7 @@ static VALUE cResumableParser_partial_value_body(VALUE self) */ static VALUE cResumableParser_partial_value(VALUE self) { - JSON_ResumableParser *parser = ResumableParser_acquire(self, true); + JSON_ResumableParser *parser = ResumableParser_acquire(&self, true); int status; VALUE result = rb_protect(cResumableParser_partial_value_body, self, &status); diff --git a/test/json/resumable_parser_test.rb b/test/json/resumable_parser_test.rb index 2801544c76a60a..a720679c210984 100644 --- a/test/json/resumable_parser_test.rb +++ b/test/json/resumable_parser_test.rb @@ -77,6 +77,18 @@ def test_clear_resets_nesting_depth assert_equal [1], parser.value end + def test_nested_parse_error + parser = new_parser(on_load: ->(o) do + JSON.parse("") #=> raises JSON::ParserError + o + end) + parser << "[1]" + + assert_raise(JSON::ParserError) do + parser.parse + end + end + def test_parse_document_direct @parser << '[true]' assert_equal true, @parser.parse From 75bdfeb67ae7b6f80ba0f16d131da09c0d4f08b6 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Fri, 19 Jun 2026 03:39:35 +0900 Subject: [PATCH 2/4] [ruby/json] Document that JSON::ResumableParser does not bound its buffer size An incomplete document is buffered in full with no size limit, so reading from an untrusted source can grow memory without bound. Note in the rdoc that bounding the input is the caller's responsibility. https://github.com/ruby/json/commit/3650e2bf41 Co-Authored-By: Claude Opus 4.8 (1M context) --- ext/json/parser/parser.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 38e24374fdfb5a..289bd4e3714374 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -2268,6 +2268,11 @@ static inline JSON_ResumableParser *cResumableParser_get(VALUE self) * parser << ' ' * parser.parse # => true * parser.value # => 123 + * + * === Security + * + * An incomplete document is buffered in full and there is no size limit, so when reading + * from an untrusted source the caller is responsible for bounding how much data is fed. */ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self) { From 82f237837044ad6534f60774206fddf6168a81fe Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 18 Jun 2026 18:12:21 +0200 Subject: [PATCH 3/4] File.expand_path: right size the returned string in common cases For the common cases of: - `File.expand_path('rel/path') - `File.expand_path('rel/path', __FILE__)` It we can make an educated guess that the returned path will fit inside a string of the combined size of both arguments. This in the vast majority of cases allow to return an embedded string and avoid allocating one or two external buffer. Co-Authored-By: John Hawthorn --- file.c | 62 ++++++++++++++++++++++++++++++++++++---------------- win32/file.c | 4 ++++ 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/file.c b/file.c index dfd05452fd555d..45c09ba664c961 100644 --- a/file.c +++ b/file.c @@ -4044,10 +4044,12 @@ ntfs_tail(const char *path, const char *end, rb_encoding *enc) }\ } while (0) -#define BUFINIT() (\ - p = buf = RSTRING_PTR(result),\ - buflen = RSTRING_LEN(result),\ - pend = p + buflen) +#define BUFINIT(result, buf, p, pend) do {\ + if (!result) { result = rb_usascii_str_new(0, 1); } \ + p = buf = RSTRING_PTR(result); \ + buflen = RSTRING_LEN(result); \ + pend = p + buflen; \ +} while (0) #ifdef __APPLE__ # define SKIPPATHSEP(p) ((*(p)) ? 1 : 0) @@ -4244,9 +4246,9 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na mb_enc = enc_mbclen_needed(rb_str_enc_get(dname)); } - BUFINIT(); - if (s < fend && s[0] == '~' && abs_mode == 0) { /* execute only if NOT absolute_path() */ + BUFINIT(result, buf, p, pend); // TOOD: right size the buffer + long userlen = 0; if (s + 1 == fend || isdirsep(s[1])) { buf = 0; @@ -4277,12 +4279,14 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na rb_raise(rb_eArgError, "non-absolute home"); } } - BUFINIT(); + BUFINIT(result, buf, p, pend); p = pend; } #ifdef DOSISH_DRIVE_LETTER /* skip drive letter */ else if (s + 1 < fend && has_drive_letter(s)) { + BUFINIT(result, buf, p, pend); // TOOD: right size the buffer + if (s + 2 < fend && isdirsep(s[2])) { /* specified drive letter, and full path */ /* skip drive letter */ @@ -4297,7 +4301,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na int same = 0; if (!NIL_P(dname) && !not_same_drive(dname, s[0])) { rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); - BUFINIT(); + BUFINIT(result, buf, p, pend); if (has_drive_letter(p) && TOLOWER(p[0]) == TOLOWER(s[0])) { /* ok, same drive */ same = 1; @@ -4305,7 +4309,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } if (!same) { char *e = append_fspath(result, fname, getcwdofdrv(*s), &enc, fsenc); - BUFINIT(); + BUFINIT(result, buf, p, pend); p = e; } else { @@ -4318,15 +4322,35 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } #endif /* DOSISH_DRIVE_LETTER */ else if (s == fend || !rb_is_absolute_path(s)) { + if (!NIL_P(dname)) { - rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); + if (result) { + rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); + } + else { + result = rb_usascii_str_new(0, RSTRING_LEN(dname) + RSTRING_LEN(fname) + 1); + rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); + + if (RB_UNLIKELY(RSTRING_LEN(result) > RSTRING_LEN(dname))) { + VALUE resized_result = rb_usascii_str_new(0, RSTRING_LEN(result) + RSTRING_LEN(fname) + 1); + rb_str_set_len(resized_result, 0); + rb_str_buf_append(resized_result, result); + rb_str_set_len(result, 0); + result = resized_result; + } + } + rb_enc_associate(result, fs_enc_check(result, fname)); - BUFINIT(); + BUFINIT(result, buf, p, pend); p = pend; } else { + VALUE cwd = rb_dir_getwd_ospath(); + if (!result) { + result = rb_usascii_str_new(0, RSTRING_LEN(cwd) + RSTRING_LEN(fname) + 1); + } char *e = append_fspath(result, fname, rb_dir_getwd_ospath(), &enc, fsenc); - BUFINIT(); + BUFINIT(result, buf, p, pend); p = e; } #if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC @@ -4340,6 +4364,8 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na p = chompdirsep(skiproot(buf, p), p, mb_enc, enc); } else { + BUFINIT(result, buf, p, pend); + size_t len; b = s; do s++; while (s < fend && isdirsep(*s)); @@ -4564,7 +4590,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na WideCharToMultiByte(CP_UTF8, 0, wfd.cFileName, wlen, RSTRING_PTR(tmp), len + 1, NULL, NULL); rb_str_cat_conv_enc_opts(result, bdiff, RSTRING_PTR(tmp), len, rb_utf8_encoding(), 0, Qnil); - BUFINIT(); + BUFINIT(result, buf, p, pend); rb_str_resize(tmp, 0); } p += len; @@ -4584,8 +4610,6 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } #endif /* !_WIN32 (this ifdef started above rb_default_home_dir) */ -#define EXPAND_PATH_BUFFER() rb_usascii_str_new(0, 1) - static VALUE str_shrink(VALUE str) { @@ -4603,20 +4627,20 @@ str_shrink(VALUE str) static VALUE file_expand_path_1(VALUE fname) { - return rb_file_expand_path_internal(fname, Qnil, 0, 0, EXPAND_PATH_BUFFER()); + return rb_file_expand_path_internal(fname, Qnil, 0, 0, Qfalse); } VALUE rb_file_expand_path(VALUE fname, VALUE dname) { check_expand_path_args(fname, dname); - return expand_path(fname, dname, 0, 1, EXPAND_PATH_BUFFER()); + return expand_path(fname, dname, 0, 1, Qfalse); } VALUE rb_file_expand_path_fast(VALUE fname, VALUE dname) { - return expand_path(fname, dname, 0, 0, EXPAND_PATH_BUFFER()); + return expand_path(fname, dname, 0, 0, Qfalse); } VALUE @@ -4676,7 +4700,7 @@ VALUE rb_file_absolute_path(VALUE fname, VALUE dname) { check_expand_path_args(fname, dname); - return expand_path(fname, dname, 1, 1, EXPAND_PATH_BUFFER()); + return expand_path(fname, dname, 1, 1, Qfalse); } VALUE diff --git a/win32/file.c b/win32/file.c index 26b99715cd6ca8..b5bf663aee3660 100644 --- a/win32/file.c +++ b/win32/file.c @@ -275,6 +275,10 @@ rb_default_home_dir(VALUE result) VALUE rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result) { + if (!result) { + result = rb_usascii_str_new(0, 1); + } + size_t size = 0, whome_len = 0; size_t buffer_len = 0; long wpath_len = 0, wdir_len = 0; From 062228c046e192acf99d19921e4f02ad08ab1017 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 19 Jun 2026 00:03:28 +0200 Subject: [PATCH 4/4] [ruby/json] Refactor ResumableParser Stop updating stack handles, we don't actually need them as we're starting heap allocated. Add an assertion to prove it. Use a dedicated field for storing the parser reference to be used for marking EOS errors. Also stop marking EOS errors when not in resumable mode. https://github.com/ruby/json/commit/004a5c1faa --- ext/json/parser/parser.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 289bd4e3714374..7b028635c31887 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -233,6 +233,8 @@ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalu static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) { + JSON_ASSERT(stack->type != RVALUE_STACK_STACK_ALLOCATED || handle); + if (RB_UNLIKELY(stack->head >= stack->capa)) { stack = rvalue_stack_grow(stack, handle, stack_ref); } @@ -431,6 +433,7 @@ typedef struct JSON_ParserStateStruct { int in_array; int current_nesting; unsigned int emitted_deprecations; + VALUE parser; } JSON_ParserState; static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref); @@ -451,6 +454,9 @@ static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *h static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame) { json_frame_stack *stack = state->frames; + + JSON_ASSERT(stack->type != RVALUE_STACK_STACK_ALLOCATED || state->frame_stack_handle); + if (RB_UNLIKELY(stack->head >= stack->capa)) { stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames); } @@ -665,8 +671,8 @@ static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, VALUE exc = rb_exc_new_str(eParserError, message); rb_ivar_set(exc, i_at_line, LONG2NUM(line)); rb_ivar_set(exc, i_at_column, LONG2NUM(column)); - if (eos && state->value_stack_handle) { - rb_ivar_set(exc, i_at_eos, *state->value_stack_handle); + if (eos && state->parser) { + rb_ivar_set(exc, i_at_eos, state->parser); } return exc; } @@ -2166,6 +2172,7 @@ static void JSON_ResumableParser_mark(void *ptr) rvalue_stack_mark(&parser->value_stack); rvalue_cache_mark(&parser->state.name_cache); rb_gc_mark(parser->buffer); // pin the buffer + rb_gc_mark_movable(parser->state.parser); } static void JSON_ResumableParser_free(void *ptr) @@ -2200,6 +2207,7 @@ static void JSON_ResumableParser_compact(void *ptr) rvalue_stack_compact(&parser->value_stack); rvalue_cache_compact(&parser->state.name_cache); parser->buffer = rb_gc_location(parser->buffer); + parser->state.parser = rb_gc_location(parser->state.parser); } static const rb_data_type_t JSON_ResumableParser_type = { @@ -2221,6 +2229,7 @@ static VALUE cResumableParser_allocate(VALUE klass) JSON_ResumableParser *parser; VALUE obj = TypedData_Make_Struct(klass, JSON_ResumableParser, &JSON_ResumableParser_type, parser); parser->state.in_array++; + parser->state.parser = obj; return obj; } @@ -2286,7 +2295,7 @@ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self) return self; } -static JSON_ResumableParser *ResumableParser_acquire(VALUE *self, bool lock); +static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock); /* * call-seq: self << string -> self @@ -2297,7 +2306,7 @@ static VALUE cResumableParser_feed(VALUE self, VALUE str) { rb_check_frozen(self); - JSON_ResumableParser *parser = ResumableParser_acquire(&self, false); + JSON_ResumableParser *parser = ResumableParser_acquire(self, false); str = convert_encoding(str); if (!RSTRING_LEN(str)) { @@ -2355,9 +2364,9 @@ static VALUE json_parse_any_resumable_safe(VALUE _args) return (VALUE)json_parse_any(args->state, args->config, true); } -static JSON_ResumableParser *ResumableParser_acquire(VALUE *self, bool lock) +static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock) { - JSON_ResumableParser *parser = cResumableParser_get(*self); + JSON_ResumableParser *parser = cResumableParser_get(self); if (parser->in_use) { rb_raise(rb_eArgError, "ResumableParser can't be used recursively"); @@ -2370,8 +2379,6 @@ static JSON_ResumableParser *ResumableParser_acquire(VALUE *self, bool lock) // self may have moved, so we need to update all pointers // Investigate: We might be better off keeping JSON_ParserState on the stack // and only persist what we need. - parser->state.value_stack_handle = self; - parser->state.frame_stack_handle = self; parser->state.value_stack = &parser->value_stack; parser->state.frames = &parser->frames; @@ -2390,7 +2397,7 @@ static JSON_ResumableParser *ResumableParser_acquire(VALUE *self, bool lock) */ static VALUE cResumableParser_parse(VALUE self) { - JSON_ResumableParser *parser = ResumableParser_acquire(&self, true); + JSON_ResumableParser *parser = ResumableParser_acquire(self, true); if (!parser->buffer) { parser->in_use = false; return Qfalse; @@ -2443,7 +2450,7 @@ static VALUE cResumableParser_parse(VALUE self) */ static VALUE cResumableParser_value_p(VALUE self) { - JSON_ResumableParser *parser = ResumableParser_acquire(&self, false); + JSON_ResumableParser *parser = ResumableParser_acquire(self, false); if (parser->value_stack.head > 0) { json_frame *frame = json_frame_stack_peek(&parser->frames); @@ -2467,7 +2474,7 @@ static VALUE cResumableParser_value_p(VALUE self) */ static VALUE cResumableParser_value(VALUE self) { - JSON_ResumableParser *parser = ResumableParser_acquire(&self, false); + JSON_ResumableParser *parser = ResumableParser_acquire(self, false); if (parser->frames.head > 0) { json_frame *frame = json_frame_stack_peek(&parser->frames); @@ -2489,7 +2496,7 @@ static VALUE cResumableParser_value(VALUE self) */ static VALUE cResumableParser_clear(VALUE self) { - JSON_ResumableParser *parser = ResumableParser_acquire(&self, false); + JSON_ResumableParser *parser = ResumableParser_acquire(self, false); parser->buffer = 0; parser->frames.head = 0; parser->value_stack.head = 0; @@ -2581,7 +2588,7 @@ static VALUE cResumableParser_partial_value_body(VALUE self) */ static VALUE cResumableParser_partial_value(VALUE self) { - JSON_ResumableParser *parser = ResumableParser_acquire(&self, true); + JSON_ResumableParser *parser = ResumableParser_acquire(self, true); int status; VALUE result = rb_protect(cResumableParser_partial_value_body, self, &status);