Skip to content
63 changes: 45 additions & 18 deletions ext/json/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ static void cursor_position(JSON_ParserState *state, long *line_out, long *colum

while (cursor >= state->start) {
if (*cursor-- == '\n') {
line++;
break;
}
column++;
Expand Down Expand Up @@ -1642,7 +1643,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
state->cursor++;
value = json_decode_array(state, config, 0);
break;
} else if (resumable && next == 0) {
} else if (resumable && eos(state)) {
state->cursor = value_start;
return false;
}
Expand Down Expand Up @@ -1691,8 +1692,14 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
}

case 0:
return false;

// peek() returns 0 both at end-of-stream and for a literal NUL byte in the
// buffer. Only a genuine EOS means "feed me more"; a NUL byte that is not at
// EOS is just an invalid character.
if (eos(state)) {
return false;
} else {
raise_syntax_error("unexpected NULL byte: %s", state);
}
default:
raise_syntax_error("unexpected character: %s", state);
}
Expand Down Expand Up @@ -1807,7 +1814,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
}
} else if (resumable && next_char == 0) {
} else if (resumable && eos(state)) {
return false;
} else {
raise_syntax_error("expected ',' or ']' after array value", state);
Expand Down Expand Up @@ -1858,7 +1865,7 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
}
} else if (resumable && next_char == 0) {
} else if (resumable && eos(state)) {
return false;
} else {
raise_syntax_error("expected ',' or '}' after object value, got: %s", state);
Expand Down Expand Up @@ -2274,6 +2281,8 @@ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self)
return self;
}

static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock);

/*
* call-seq: self << string -> self
*
Expand All @@ -2282,13 +2291,14 @@ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self)
static VALUE cResumableParser_feed(VALUE self, VALUE str)
{
rb_check_frozen(self);

JSON_ResumableParser *parser = ResumableParser_acquire(self, false);

str = convert_encoding(str);
if (!RSTRING_LEN(str)) {
return self;
}

JSON_ResumableParser *parser = cResumableParser_get(self);

size_t offset = parser->state.cursor - parser->state.start;
const size_t remaining = parser->state.end - parser->state.cursor;

Expand Down Expand Up @@ -2377,6 +2387,7 @@ static VALUE cResumableParser_parse(VALUE self)
{
JSON_ResumableParser *parser = ResumableParser_acquire(self, true);
if (!parser->buffer) {
parser->in_use = false;
return Qfalse;
}

Expand Down Expand Up @@ -2477,22 +2488,16 @@ static VALUE cResumableParser_clear(VALUE self)
parser->frames.head = 0;
parser->value_stack.head = 0;
parser->state.name_cache.length = 0;
parser->state.current_nesting = 0;
parser->state.in_array = 1;
parser->state.emitted_deprecations = 0;
parser->state.start = parser->state.cursor = parser->state.end = NULL;
return self;
}

/*
* call-seq: partial_value -> object
*
* Returns the Ruby objects parsed up to this point:
* parser << '[1, [2, 3,'
* parser.parse # => false
* parser.value # ArgumentError no ready value
* parser.partial_value # => [1, [2, 3]]
*/
static VALUE cResumableParser_partial_value(VALUE self)
static VALUE cResumableParser_partial_value_body(VALUE self)
{
JSON_ResumableParser *original_parser = ResumableParser_acquire(self, false);
JSON_ResumableParser *original_parser = cResumableParser_get(self);
JSON_ResumableParser parser = *original_parser;

parser.state.frames = &parser.frames;
Expand Down Expand Up @@ -2559,6 +2564,28 @@ static VALUE cResumableParser_partial_value(VALUE self)
return partial_result;
}

/*
* call-seq: partial_value -> object
*
* Returns the Ruby objects parsed up to this point:
* parser << '[1, [2, 3,'
* parser.parse # => false
* parser.value # ArgumentError no ready value
* parser.partial_value # => [1, [2, 3]]
*/
static VALUE cResumableParser_partial_value(VALUE self)
{
JSON_ResumableParser *parser = ResumableParser_acquire(self, true);

int status;
VALUE result = rb_protect(cResumableParser_partial_value_body, self, &status);
parser->in_use = false;
if (status) {
rb_jump_tag(status);
}
return result;
}

/*
* call-seq: rest -> string
*
Expand Down
11 changes: 10 additions & 1 deletion test/json/json_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ def test_parse_error_incomplete_hash
end

def test_parse_error_snippet
omit "C ext only test" unless RUBY_ENGINE == "ruby"
omit "JRuby errors don't contain positions" unless RUBY_ENGINE == "ruby"

error = assert_raise(JSON::ParserError) { JSON.parse("あああああああああああああああああああああああ") }
assert_equal "unexpected character: 'ああああああああああ' at line 1 column 1", error.message
Expand All @@ -875,6 +875,15 @@ def test_parse_error_snippet

error = assert_raise(JSON::ParserError) { JSON.parse("abcあああああああああああああああああああああああ") }
assert_equal "unexpected character: 'abcあああああああああ' at line 1 column 1", error.message

error = assert_raise(JSON::ParserError) { JSON.parse("[1,\n@") }
assert_equal "unexpected character: '@' at line 2 column 1", error.message

error = assert_raise(JSON::ParserError) { JSON.parse("[\n 1,\n @\n]") }
assert_equal "unexpected character: '@' at line 3 column 3", error.message

error = assert_raise(JSON::ParserError) { JSON.parse("@") }
assert_equal "unexpected character: '@' at line 1 column 1", error.message
end

def test_parse_leading_slash
Expand Down
107 changes: 107 additions & 0 deletions test/json/resumable_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,35 @@ def test_clear
refute_predicate @parser, :value?
end

def test_parse_with_empty_buffer_keeps_parser_usable
# parse before any feed must not leak the in_use lock
refute @parser.parse
@parser << '[1, 2, 3]'
assert @parser.parse
assert_equal [1, 2, 3], @parser.value

# same after a clear with no following feed
@parser.clear
refute @parser.parse
@parser << '[4]'
assert @parser.parse
assert_equal [4], @parser.value
end

def test_clear_resets_nesting_depth
# An unfinished document leaks a nesting level; #clear must reset it so a later shallow
# document is not rejected with a spurious NestingError.
parser = new_parser(max_nesting: 10)
10.times do
parser << '[1' # opens an array that is never closed before clear
parser.parse
parser.clear
end
parser << '[1]'
assert parser.parse
assert_equal [1], parser.value
end

def test_parse_document_direct
@parser << '[true]'
assert_equal true, @parser.parse
Expand Down Expand Up @@ -118,6 +147,30 @@ def test_parse_byte_by_byte_numbers
assert_resumed_parsing('123 ')
end

def test_nul_byte_is_a_syntax_error
# A NUL byte in a structural position must raise, not stall forever waiting for more input
# (peek() returns 0 both at EOS and for a literal NUL byte).
assert_parse_error "\x00" # document value
assert_parse_error "[\x00]" # first array element
assert_parse_error "[1\x00]" # after an array element (',' or ']' expected)
assert_parse_error "[1,\x00]" # array element after ','
assert_parse_error "{\x00}" # object key
assert_parse_error "{\"a\":1\x00}" # after an object value (',' or '}' expected)
assert_parse_error "{\"a\":1,\x00}" # object key after ','
end

def test_incomplete_input_at_structural_positions_resumes
# Counterpart of test_nul_byte_is_a_syntax_error: a genuine EOS at the same positions must
# stay incomplete (return false), not raise -- this is what distinguishes EOS from a NUL.
assert_incomplete "["
assert_incomplete "[1"
assert_incomplete "[1,"
assert_incomplete "{"
assert_incomplete "{\"a\""
assert_incomplete "{\"a\":1"
assert_incomplete "{\"a\":1,"
end

def test_rest
@parser << '[1, 2, 3, "unterminated string'
refute @parser.parse
Expand Down Expand Up @@ -208,6 +261,46 @@ def test_reentrency_prevented
assert_equal "ResumableParser can't be used recursively", error.message
end

def test_reentrency_prevented_in_partial_value
parser = nil
callback = ->(o) do
# Arrays are only built while partial_value runs (the scalars were pushed by the
# earlier parse); re-entering here used to corrupt/free the shared frame stack.
parser.parse if o.is_a?(Array)
o
end
parser = new_parser(on_load: callback)
parser << '[1, [2, 3,'
parser.parse
error = assert_raise ArgumentError do
parser.partial_value
end
assert_equal "ResumableParser can't be used recursively", error.message

# The in_use lock must be released even though partial_value raised.
refute_predicate parser, :value?
end

def test_feed_during_callback_prevented
parser = nil
callback = ->(o) do
parser << '99' if o == 1 # feeding while a parse is running must be rejected
o
end
parser = new_parser(on_load: callback)
parser << '[1, 2, 3]'
error = assert_raise ArgumentError do
parser.parse
end
assert_equal "ResumableParser can't be used recursively", error.message

# the lock is released, so the parser stays usable
parser = new_parser
parser << '[1, 2, 3]'
assert parser.parse
assert_equal [1, 2, 3], parser.value
end

def test_exception_unlock_parser
called = false
parser = nil
Expand Down Expand Up @@ -261,6 +354,20 @@ def test_buffer_shrink

private

def assert_parse_error(json)
parser = new_parser
parser << json
assert_raise(JSON::ParserError, "expected a parse error for #{json.inspect}") do
parser.parse
end
end

def assert_incomplete(json)
parser = new_parser
parser << json
refute(parser.parse, "expected #{json.inspect} not to produce a value")
end

def assert_partial_value(expected, json)
parser = new_parser
parser << json
Expand Down
2 changes: 1 addition & 1 deletion weakmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ wkmap_clear(VALUE self)
* call-seq:
* map.inspect -> new_string
*
* Returns a new String containing informations about the map:
* Returns a new String containing information about the map:
*
* m = ObjectSpace::WeakKeyMap.new
* m[key] = value
Expand Down
81 changes: 81 additions & 0 deletions zjit/src/hir/opt_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,87 @@ mod hir_opt_tests {
");
}

#[test]
fn test_no_fold_fixnum_add_overflow() {
eval(&format!("
def test
{RUBY_FIXNUM_MAX} + 1
end
"));
assert_snapshot!(hir_string("test"), @"
fn test@<compiled>:3:
bb1():
EntryPoint interpreter
v1:BasicObject = LoadSelf
Jump bb3(v1)
bb2():
EntryPoint JIT(0)
v4:BasicObject = LoadArg :self@0
Jump bb3(v4)
bb3(v6:BasicObject):
v10:Fixnum[4611686018427387903] = Const Value(4611686018427387903)
v12:Fixnum[1] = Const Value(1)
PatchPoint MethodRedefined(Integer@0x1000, +@0x1008, cme:0x1010)
v23:Fixnum = FixnumAdd v10, v12
CheckInterrupts
Return v23
");
}

#[test]
fn test_no_fold_fixnum_sub_underflow() {
eval(&format!("
def test
{RUBY_FIXNUM_MIN} - 1
end
"));
assert_snapshot!(hir_string("test"), @"
fn test@<compiled>:3:
bb1():
EntryPoint interpreter
v1:BasicObject = LoadSelf
Jump bb3(v1)
bb2():
EntryPoint JIT(0)
v4:BasicObject = LoadArg :self@0
Jump bb3(v4)
bb3(v6:BasicObject):
v10:Fixnum[-4611686018427387904] = Const Value(-4611686018427387904)
v12:Fixnum[1] = Const Value(1)
PatchPoint MethodRedefined(Integer@0x1000, -@0x1008, cme:0x1010)
v23:Fixnum = FixnumSub v10, v12
CheckInterrupts
Return v23
");
}

#[test]
fn test_no_fold_fixnum_mult_overflow() {
eval(&format!("
def test
{RUBY_FIXNUM_MAX} * 2
end
"));
assert_snapshot!(hir_string("test"), @"
fn test@<compiled>:3:
bb1():
EntryPoint interpreter
v1:BasicObject = LoadSelf
Jump bb3(v1)
bb2():
EntryPoint JIT(0)
v4:BasicObject = LoadArg :self@0
Jump bb3(v4)
bb3(v6:BasicObject):
v10:Fixnum[4611686018427387903] = Const Value(4611686018427387903)
v12:Fixnum[2] = Const Value(2)
PatchPoint MethodRedefined(Integer@0x1000, *@0x1008, cme:0x1010)
v23:Fixnum = FixnumMult v10, v12
CheckInterrupts
Return v23
");
}

#[test]
fn test_fold_fixnum_sub_zero() {
eval("
Expand Down