diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 136aab6ae764f5..665d3152718b6e 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -593,6 +593,7 @@ static void cursor_position(JSON_ParserState *state, long *line_out, long *colum while (cursor >= state->start) { if (*cursor-- == '\n') { + line++; break; } column++; @@ -1642,7 +1643,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo state->cursor++; value = json_decode_array(state, config, 0); break; - } else if (resumable && next == 0) { + } else if (resumable && eos(state)) { state->cursor = value_start; return false; } @@ -1691,8 +1692,14 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo } case 0: - return false; - + // peek() returns 0 both at end-of-stream and for a literal NUL byte in the + // buffer. Only a genuine EOS means "feed me more"; a NUL byte that is not at + // EOS is just an invalid character. + if (eos(state)) { + return false; + } else { + raise_syntax_error("unexpected NULL byte: %s", state); + } default: raise_syntax_error("unexpected character: %s", state); } @@ -1807,7 +1814,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false); case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON; } - } else if (resumable && next_char == 0) { + } else if (resumable && eos(state)) { return false; } else { raise_syntax_error("expected ',' or ']' after array value", state); @@ -1858,7 +1865,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false); case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON; } - } else if (resumable && next_char == 0) { + } else if (resumable && eos(state)) { return false; } else { raise_syntax_error("expected ',' or '}' after object value, got: %s", state); @@ -2274,6 +2281,8 @@ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self) return self; } +static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock); + /* * call-seq: self << string -> self * @@ -2282,13 +2291,14 @@ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self) static VALUE cResumableParser_feed(VALUE self, VALUE str) { rb_check_frozen(self); + + JSON_ResumableParser *parser = ResumableParser_acquire(self, false); + str = convert_encoding(str); if (!RSTRING_LEN(str)) { return self; } - JSON_ResumableParser *parser = cResumableParser_get(self); - size_t offset = parser->state.cursor - parser->state.start; const size_t remaining = parser->state.end - parser->state.cursor; @@ -2377,6 +2387,7 @@ static VALUE cResumableParser_parse(VALUE self) { JSON_ResumableParser *parser = ResumableParser_acquire(self, true); if (!parser->buffer) { + parser->in_use = false; return Qfalse; } @@ -2477,22 +2488,16 @@ static VALUE cResumableParser_clear(VALUE self) parser->frames.head = 0; parser->value_stack.head = 0; parser->state.name_cache.length = 0; + parser->state.current_nesting = 0; + parser->state.in_array = 1; + parser->state.emitted_deprecations = 0; parser->state.start = parser->state.cursor = parser->state.end = NULL; return self; } -/* - * call-seq: partial_value -> object - * - * Returns the Ruby objects parsed up to this point: - * parser << '[1, [2, 3,' - * parser.parse # => false - * parser.value # ArgumentError no ready value - * parser.partial_value # => [1, [2, 3]] - */ -static VALUE cResumableParser_partial_value(VALUE self) +static VALUE cResumableParser_partial_value_body(VALUE self) { - JSON_ResumableParser *original_parser = ResumableParser_acquire(self, false); + JSON_ResumableParser *original_parser = cResumableParser_get(self); JSON_ResumableParser parser = *original_parser; parser.state.frames = &parser.frames; @@ -2559,6 +2564,28 @@ static VALUE cResumableParser_partial_value(VALUE self) return partial_result; } +/* + * call-seq: partial_value -> object + * + * Returns the Ruby objects parsed up to this point: + * parser << '[1, [2, 3,' + * parser.parse # => false + * parser.value # ArgumentError no ready value + * parser.partial_value # => [1, [2, 3]] + */ +static VALUE cResumableParser_partial_value(VALUE self) +{ + JSON_ResumableParser *parser = ResumableParser_acquire(self, true); + + int status; + VALUE result = rb_protect(cResumableParser_partial_value_body, self, &status); + parser->in_use = false; + if (status) { + rb_jump_tag(status); + } + return result; +} + /* * call-seq: rest -> string * diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index c1aa2f61567a90..c891dd7c2db99d 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -862,7 +862,7 @@ def test_parse_error_incomplete_hash end def test_parse_error_snippet - omit "C ext only test" unless RUBY_ENGINE == "ruby" + omit "JRuby errors don't contain positions" unless RUBY_ENGINE == "ruby" error = assert_raise(JSON::ParserError) { JSON.parse("あああああああああああああああああああああああ") } assert_equal "unexpected character: 'ああああああああああ' at line 1 column 1", error.message @@ -875,6 +875,15 @@ def test_parse_error_snippet error = assert_raise(JSON::ParserError) { JSON.parse("abcあああああああああああああああああああああああ") } assert_equal "unexpected character: 'abcあああああああああ' at line 1 column 1", error.message + + error = assert_raise(JSON::ParserError) { JSON.parse("[1,\n@") } + assert_equal "unexpected character: '@' at line 2 column 1", error.message + + error = assert_raise(JSON::ParserError) { JSON.parse("[\n 1,\n @\n]") } + assert_equal "unexpected character: '@' at line 3 column 3", error.message + + error = assert_raise(JSON::ParserError) { JSON.parse("@") } + assert_equal "unexpected character: '@' at line 1 column 1", error.message end def test_parse_leading_slash diff --git a/test/json/resumable_parser_test.rb b/test/json/resumable_parser_test.rb index 52f1356a704f9a..2801544c76a60a 100644 --- a/test/json/resumable_parser_test.rb +++ b/test/json/resumable_parser_test.rb @@ -48,6 +48,35 @@ def test_clear refute_predicate @parser, :value? end + def test_parse_with_empty_buffer_keeps_parser_usable + # parse before any feed must not leak the in_use lock + refute @parser.parse + @parser << '[1, 2, 3]' + assert @parser.parse + assert_equal [1, 2, 3], @parser.value + + # same after a clear with no following feed + @parser.clear + refute @parser.parse + @parser << '[4]' + assert @parser.parse + assert_equal [4], @parser.value + end + + def test_clear_resets_nesting_depth + # An unfinished document leaks a nesting level; #clear must reset it so a later shallow + # document is not rejected with a spurious NestingError. + parser = new_parser(max_nesting: 10) + 10.times do + parser << '[1' # opens an array that is never closed before clear + parser.parse + parser.clear + end + parser << '[1]' + assert parser.parse + assert_equal [1], parser.value + end + def test_parse_document_direct @parser << '[true]' assert_equal true, @parser.parse @@ -118,6 +147,30 @@ def test_parse_byte_by_byte_numbers assert_resumed_parsing('123 ') end + def test_nul_byte_is_a_syntax_error + # A NUL byte in a structural position must raise, not stall forever waiting for more input + # (peek() returns 0 both at EOS and for a literal NUL byte). + assert_parse_error "\x00" # document value + assert_parse_error "[\x00]" # first array element + assert_parse_error "[1\x00]" # after an array element (',' or ']' expected) + assert_parse_error "[1,\x00]" # array element after ',' + assert_parse_error "{\x00}" # object key + assert_parse_error "{\"a\":1\x00}" # after an object value (',' or '}' expected) + assert_parse_error "{\"a\":1,\x00}" # object key after ',' + end + + def test_incomplete_input_at_structural_positions_resumes + # Counterpart of test_nul_byte_is_a_syntax_error: a genuine EOS at the same positions must + # stay incomplete (return false), not raise -- this is what distinguishes EOS from a NUL. + assert_incomplete "[" + assert_incomplete "[1" + assert_incomplete "[1," + assert_incomplete "{" + assert_incomplete "{\"a\"" + assert_incomplete "{\"a\":1" + assert_incomplete "{\"a\":1," + end + def test_rest @parser << '[1, 2, 3, "unterminated string' refute @parser.parse @@ -208,6 +261,46 @@ def test_reentrency_prevented assert_equal "ResumableParser can't be used recursively", error.message end + def test_reentrency_prevented_in_partial_value + parser = nil + callback = ->(o) do + # Arrays are only built while partial_value runs (the scalars were pushed by the + # earlier parse); re-entering here used to corrupt/free the shared frame stack. + parser.parse if o.is_a?(Array) + o + end + parser = new_parser(on_load: callback) + parser << '[1, [2, 3,' + parser.parse + error = assert_raise ArgumentError do + parser.partial_value + end + assert_equal "ResumableParser can't be used recursively", error.message + + # The in_use lock must be released even though partial_value raised. + refute_predicate parser, :value? + end + + def test_feed_during_callback_prevented + parser = nil + callback = ->(o) do + parser << '99' if o == 1 # feeding while a parse is running must be rejected + o + end + parser = new_parser(on_load: callback) + parser << '[1, 2, 3]' + error = assert_raise ArgumentError do + parser.parse + end + assert_equal "ResumableParser can't be used recursively", error.message + + # the lock is released, so the parser stays usable + parser = new_parser + parser << '[1, 2, 3]' + assert parser.parse + assert_equal [1, 2, 3], parser.value + end + def test_exception_unlock_parser called = false parser = nil @@ -261,6 +354,20 @@ def test_buffer_shrink private + def assert_parse_error(json) + parser = new_parser + parser << json + assert_raise(JSON::ParserError, "expected a parse error for #{json.inspect}") do + parser.parse + end + end + + def assert_incomplete(json) + parser = new_parser + parser << json + refute(parser.parse, "expected #{json.inspect} not to produce a value") + end + def assert_partial_value(expected, json) parser = new_parser parser << json diff --git a/weakmap.c b/weakmap.c index 7cef1fd46a63a7..618c88bfa985f8 100644 --- a/weakmap.c +++ b/weakmap.c @@ -842,7 +842,7 @@ wkmap_clear(VALUE self) * call-seq: * map.inspect -> new_string * - * Returns a new String containing informations about the map: + * Returns a new String containing information about the map: * * m = ObjectSpace::WeakKeyMap.new * m[key] = value diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index a418c3008dd491..3d144471fc887f 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -201,6 +201,87 @@ mod hir_opt_tests { "); } + #[test] + fn test_no_fold_fixnum_add_overflow() { + eval(&format!(" + def test + {RUBY_FIXNUM_MAX} + 1 + end + ")); + assert_snapshot!(hir_string("test"), @" + fn test@:3: + bb1(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb3(v1) + bb2(): + EntryPoint JIT(0) + v4:BasicObject = LoadArg :self@0 + Jump bb3(v4) + bb3(v6:BasicObject): + v10:Fixnum[4611686018427387903] = Const Value(4611686018427387903) + v12:Fixnum[1] = Const Value(1) + PatchPoint MethodRedefined(Integer@0x1000, +@0x1008, cme:0x1010) + v23:Fixnum = FixnumAdd v10, v12 + CheckInterrupts + Return v23 + "); + } + + #[test] + fn test_no_fold_fixnum_sub_underflow() { + eval(&format!(" + def test + {RUBY_FIXNUM_MIN} - 1 + end + ")); + assert_snapshot!(hir_string("test"), @" + fn test@:3: + bb1(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb3(v1) + bb2(): + EntryPoint JIT(0) + v4:BasicObject = LoadArg :self@0 + Jump bb3(v4) + bb3(v6:BasicObject): + v10:Fixnum[-4611686018427387904] = Const Value(-4611686018427387904) + v12:Fixnum[1] = Const Value(1) + PatchPoint MethodRedefined(Integer@0x1000, -@0x1008, cme:0x1010) + v23:Fixnum = FixnumSub v10, v12 + CheckInterrupts + Return v23 + "); + } + + #[test] + fn test_no_fold_fixnum_mult_overflow() { + eval(&format!(" + def test + {RUBY_FIXNUM_MAX} * 2 + end + ")); + assert_snapshot!(hir_string("test"), @" + fn test@:3: + bb1(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb3(v1) + bb2(): + EntryPoint JIT(0) + v4:BasicObject = LoadArg :self@0 + Jump bb3(v4) + bb3(v6:BasicObject): + v10:Fixnum[4611686018427387903] = Const Value(4611686018427387903) + v12:Fixnum[2] = Const Value(2) + PatchPoint MethodRedefined(Integer@0x1000, *@0x1008, cme:0x1010) + v23:Fixnum = FixnumMult v10, v12 + CheckInterrupts + Return v23 + "); + } + #[test] fn test_fold_fixnum_sub_zero() { eval("