diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 665d3152718b6e..7b028635c31887 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -233,6 +233,8 @@ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalu static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) { + JSON_ASSERT(stack->type != RVALUE_STACK_STACK_ALLOCATED || handle); + if (RB_UNLIKELY(stack->head >= stack->capa)) { stack = rvalue_stack_grow(stack, handle, stack_ref); } @@ -431,6 +433,7 @@ typedef struct JSON_ParserStateStruct { int in_array; int current_nesting; unsigned int emitted_deprecations; + VALUE parser; } JSON_ParserState; static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref); @@ -451,6 +454,9 @@ static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *h static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame) { json_frame_stack *stack = state->frames; + + JSON_ASSERT(stack->type != RVALUE_STACK_STACK_ALLOCATED || state->frame_stack_handle); + if (RB_UNLIKELY(stack->head >= stack->capa)) { stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames); } @@ -660,13 +666,13 @@ static VALUE build_parse_error_message(const char *format, JSON_ParserState *sta return message; } -static VALUE parse_error_new(VALUE message, long line, long column, bool eos) +static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, long column, bool eos) { VALUE exc = rb_exc_new_str(eParserError, message); rb_ivar_set(exc, i_at_line, LONG2NUM(line)); rb_ivar_set(exc, i_at_column, LONG2NUM(column)); - if (eos) { - rb_ivar_set(exc, i_at_eos, Qtrue); + if (eos && state->parser) { + rb_ivar_set(exc, i_at_eos, state->parser); } return exc; } @@ -676,7 +682,7 @@ NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *st long line, column; cursor_position(state, &line, &column); VALUE message = build_parse_error_message(format, state, line, column); - rb_exc_raise(parse_error_new(message, line, column, eos)); + rb_exc_raise(parse_error_new(state, message, line, column, eos)); } NORETURN(static) void raise_eos_error(const char *format, JSON_ParserState *state) @@ -1169,7 +1175,7 @@ NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE d long line, column; cursor_position(state, &line, &column); rb_str_concat(message, build_parse_error_message("", state, line, column)) ; - rb_exc_raise(parse_error_new(message, line, column, false)); + rb_exc_raise(parse_error_new(state, message, line, column, false)); } NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs) @@ -2166,6 +2172,7 @@ static void JSON_ResumableParser_mark(void *ptr) rvalue_stack_mark(&parser->value_stack); rvalue_cache_mark(&parser->state.name_cache); rb_gc_mark(parser->buffer); // pin the buffer + rb_gc_mark_movable(parser->state.parser); } static void JSON_ResumableParser_free(void *ptr) @@ -2200,6 +2207,7 @@ static void JSON_ResumableParser_compact(void *ptr) rvalue_stack_compact(&parser->value_stack); rvalue_cache_compact(&parser->state.name_cache); parser->buffer = rb_gc_location(parser->buffer); + parser->state.parser = rb_gc_location(parser->state.parser); } static const rb_data_type_t JSON_ResumableParser_type = { @@ -2221,6 +2229,7 @@ static VALUE cResumableParser_allocate(VALUE klass) JSON_ResumableParser *parser; VALUE obj = TypedData_Make_Struct(klass, JSON_ResumableParser, &JSON_ResumableParser_type, parser); parser->state.in_array++; + parser->state.parser = obj; return obj; } @@ -2268,6 +2277,11 @@ static inline JSON_ResumableParser *cResumableParser_get(VALUE self) * parser << ' ' * parser.parse # => true * parser.value # => 123 + * + * === Security + * + * An incomplete document is buffered in full and there is no size limit, so when reading + * from an untrusted source the caller is responsible for bounding how much data is fed. */ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self) { @@ -2365,8 +2379,6 @@ static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock) // self may have moved, so we need to update all pointers // Investigate: We might be better off keeping JSON_ParserState on the stack // and only persist what we need. - parser->state.value_stack_handle = &self; - parser->state.frame_stack_handle = &self; parser->state.value_stack = &parser->value_stack; parser->state.frames = &parser->frames; @@ -2419,8 +2431,9 @@ static VALUE cResumableParser_parse(VALUE self) parser->in_use = false; if (status) { complete = false; - if (RTEST(rb_ivar_get(rb_errinfo(), rb_intern("@eos")))) { - complete = false; // is an EOS error + VALUE error_source = rb_ivar_get(rb_errinfo(), i_at_eos); + if (error_source == self) { + complete = false; // is an EOS error raised by ourself rb_set_errinfo(Qnil); } else { rb_jump_tag(status); // reraise diff --git a/file.c b/file.c index dfd05452fd555d..45c09ba664c961 100644 --- a/file.c +++ b/file.c @@ -4044,10 +4044,12 @@ ntfs_tail(const char *path, const char *end, rb_encoding *enc) }\ } while (0) -#define BUFINIT() (\ - p = buf = RSTRING_PTR(result),\ - buflen = RSTRING_LEN(result),\ - pend = p + buflen) +#define BUFINIT(result, buf, p, pend) do {\ + if (!result) { result = rb_usascii_str_new(0, 1); } \ + p = buf = RSTRING_PTR(result); \ + buflen = RSTRING_LEN(result); \ + pend = p + buflen; \ +} while (0) #ifdef __APPLE__ # define SKIPPATHSEP(p) ((*(p)) ? 1 : 0) @@ -4244,9 +4246,9 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na mb_enc = enc_mbclen_needed(rb_str_enc_get(dname)); } - BUFINIT(); - if (s < fend && s[0] == '~' && abs_mode == 0) { /* execute only if NOT absolute_path() */ + BUFINIT(result, buf, p, pend); // TOOD: right size the buffer + long userlen = 0; if (s + 1 == fend || isdirsep(s[1])) { buf = 0; @@ -4277,12 +4279,14 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na rb_raise(rb_eArgError, "non-absolute home"); } } - BUFINIT(); + BUFINIT(result, buf, p, pend); p = pend; } #ifdef DOSISH_DRIVE_LETTER /* skip drive letter */ else if (s + 1 < fend && has_drive_letter(s)) { + BUFINIT(result, buf, p, pend); // TOOD: right size the buffer + if (s + 2 < fend && isdirsep(s[2])) { /* specified drive letter, and full path */ /* skip drive letter */ @@ -4297,7 +4301,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na int same = 0; if (!NIL_P(dname) && !not_same_drive(dname, s[0])) { rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); - BUFINIT(); + BUFINIT(result, buf, p, pend); if (has_drive_letter(p) && TOLOWER(p[0]) == TOLOWER(s[0])) { /* ok, same drive */ same = 1; @@ -4305,7 +4309,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } if (!same) { char *e = append_fspath(result, fname, getcwdofdrv(*s), &enc, fsenc); - BUFINIT(); + BUFINIT(result, buf, p, pend); p = e; } else { @@ -4318,15 +4322,35 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } #endif /* DOSISH_DRIVE_LETTER */ else if (s == fend || !rb_is_absolute_path(s)) { + if (!NIL_P(dname)) { - rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); + if (result) { + rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); + } + else { + result = rb_usascii_str_new(0, RSTRING_LEN(dname) + RSTRING_LEN(fname) + 1); + rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); + + if (RB_UNLIKELY(RSTRING_LEN(result) > RSTRING_LEN(dname))) { + VALUE resized_result = rb_usascii_str_new(0, RSTRING_LEN(result) + RSTRING_LEN(fname) + 1); + rb_str_set_len(resized_result, 0); + rb_str_buf_append(resized_result, result); + rb_str_set_len(result, 0); + result = resized_result; + } + } + rb_enc_associate(result, fs_enc_check(result, fname)); - BUFINIT(); + BUFINIT(result, buf, p, pend); p = pend; } else { + VALUE cwd = rb_dir_getwd_ospath(); + if (!result) { + result = rb_usascii_str_new(0, RSTRING_LEN(cwd) + RSTRING_LEN(fname) + 1); + } char *e = append_fspath(result, fname, rb_dir_getwd_ospath(), &enc, fsenc); - BUFINIT(); + BUFINIT(result, buf, p, pend); p = e; } #if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC @@ -4340,6 +4364,8 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na p = chompdirsep(skiproot(buf, p), p, mb_enc, enc); } else { + BUFINIT(result, buf, p, pend); + size_t len; b = s; do s++; while (s < fend && isdirsep(*s)); @@ -4564,7 +4590,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na WideCharToMultiByte(CP_UTF8, 0, wfd.cFileName, wlen, RSTRING_PTR(tmp), len + 1, NULL, NULL); rb_str_cat_conv_enc_opts(result, bdiff, RSTRING_PTR(tmp), len, rb_utf8_encoding(), 0, Qnil); - BUFINIT(); + BUFINIT(result, buf, p, pend); rb_str_resize(tmp, 0); } p += len; @@ -4584,8 +4610,6 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na } #endif /* !_WIN32 (this ifdef started above rb_default_home_dir) */ -#define EXPAND_PATH_BUFFER() rb_usascii_str_new(0, 1) - static VALUE str_shrink(VALUE str) { @@ -4603,20 +4627,20 @@ str_shrink(VALUE str) static VALUE file_expand_path_1(VALUE fname) { - return rb_file_expand_path_internal(fname, Qnil, 0, 0, EXPAND_PATH_BUFFER()); + return rb_file_expand_path_internal(fname, Qnil, 0, 0, Qfalse); } VALUE rb_file_expand_path(VALUE fname, VALUE dname) { check_expand_path_args(fname, dname); - return expand_path(fname, dname, 0, 1, EXPAND_PATH_BUFFER()); + return expand_path(fname, dname, 0, 1, Qfalse); } VALUE rb_file_expand_path_fast(VALUE fname, VALUE dname) { - return expand_path(fname, dname, 0, 0, EXPAND_PATH_BUFFER()); + return expand_path(fname, dname, 0, 0, Qfalse); } VALUE @@ -4676,7 +4700,7 @@ VALUE rb_file_absolute_path(VALUE fname, VALUE dname) { check_expand_path_args(fname, dname); - return expand_path(fname, dname, 1, 1, EXPAND_PATH_BUFFER()); + return expand_path(fname, dname, 1, 1, Qfalse); } VALUE diff --git a/test/json/resumable_parser_test.rb b/test/json/resumable_parser_test.rb index 2801544c76a60a..a720679c210984 100644 --- a/test/json/resumable_parser_test.rb +++ b/test/json/resumable_parser_test.rb @@ -77,6 +77,18 @@ def test_clear_resets_nesting_depth assert_equal [1], parser.value end + def test_nested_parse_error + parser = new_parser(on_load: ->(o) do + JSON.parse("") #=> raises JSON::ParserError + o + end) + parser << "[1]" + + assert_raise(JSON::ParserError) do + parser.parse + end + end + def test_parse_document_direct @parser << '[true]' assert_equal true, @parser.parse diff --git a/win32/file.c b/win32/file.c index 26b99715cd6ca8..b5bf663aee3660 100644 --- a/win32/file.c +++ b/win32/file.c @@ -275,6 +275,10 @@ rb_default_home_dir(VALUE result) VALUE rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result) { + if (!result) { + result = rb_usascii_str_new(0, 1); + } + size_t size = 0, whome_len = 0; size_t buffer_len = 0; long wpath_len = 0, wdir_len = 0;