diff --git a/benchmark/string_downcase.yml b/benchmark/string_downcase.yml index 1fea6afbecdf52..c5cd94606dec7b 100644 --- a/benchmark/string_downcase.yml +++ b/benchmark/string_downcase.yml @@ -7,6 +7,10 @@ prelude: | nonascii10 = nonascii1 * 10 nonascii100 = nonascii10 * 10 nonascii1000 = nonascii100 * 10 + ascii8bit256 = (0..255).to_a.pack("C*") + ascii8bit2560 = ascii8bit256 * 10 + ascii8bit25600 = ascii8bit2560 * 10 + ascii8bit256000 = ascii8bit25600 * 10 benchmark: downcase-1: str1.upcase downcase-10: str10.upcase @@ -16,3 +20,7 @@ benchmark: downcase-nonascii10: nonascii10.downcase downcase-nonascii100: nonascii100.downcase downcase-nonascii1000: nonascii1000.downcase + downcase-ascii8bit-256: ascii8bit256.downcase + downcase-ascii8bit-2560: ascii8bit2560.downcase + downcase-ascii8bit-25600: ascii8bit25600.downcase + downcase-ascii8bit-256000: ascii8bit256000.downcase diff --git a/benchmark/string_upcase.yml b/benchmark/string_upcase.yml index dab84bbde23ebd..d2c5f4960b3804 100644 --- a/benchmark/string_upcase.yml +++ b/benchmark/string_upcase.yml @@ -7,6 +7,10 @@ prelude: | nonascii10 = nonascii1 * 10 nonascii100 = nonascii10 * 10 nonascii1000 = nonascii100 * 10 + ascii8bit256 = (0..255).to_a.pack("C*") + ascii8bit2560 = ascii8bit256 * 10 + ascii8bit25600 = ascii8bit2560 * 10 + ascii8bit256000 = ascii8bit25600 * 10 benchmark: upcase-1: str1.upcase upcase-10: str10.upcase @@ -16,3 +20,7 @@ benchmark: upcase-nonascii10: nonascii10.upcase upcase-nonascii100: nonascii100.upcase upcase-nonascii1000: nonascii1000.upcase + upcase-ascii8bit-256: ascii8bit256.upcase + upcase-ascii8bit-2560: ascii8bit2560.upcase + upcase-ascii8bit-25600: ascii8bit25600.upcase + upcase-ascii8bit-256000: ascii8bit256000.upcase diff --git a/common.mk b/common.mk index 53e832c6370bba..5b16d87e0e479e 100644 --- a/common.mk +++ b/common.mk @@ -1585,10 +1585,10 @@ no-test-bundled-gems-precheck: yes-update-default-gemspecs no-update-default-gemspecs: update-default-gemspecs update-default-gemspecs: $(PREP) $(RBCONFIG) @$(MAKEDIRS) $(srcdir)/.bundle/specifications - $(Q)$(MINIRUBY) -W0 -C "$(srcdir)" -I tool/lib -roptparse -routput -rbundled_gem \ + $(Q)$(MINIRUBY) -W0 -I "$(srcdir)/tool/lib" -roptparse -routput -rbundled_gem \ -e "(out = Output.new).def_options(ARGV.options)" \ -e "BundledGem.update_default_gemspecs(ARGV.parse!, out, quiet: $(V).zero?)" \ - -- -c -o .bundle/specifications lib ext + -- -c -o "$(srcdir)/.bundle/specifications" "$(srcdir)/lib" "$(srcdir)/ext" install-for-test-bundled-gems: $(TEST_RUNNABLE)-install-for-test-bundled-gems no-install-for-test-bundled-gems: no-update-default-gemspecs diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb index bd537c9c54dfdb..8c5bdb66e9512b 100644 --- a/ext/json/parser/extconf.rb +++ b/ext/json/parser/extconf.rb @@ -2,6 +2,8 @@ require 'mkmf' $defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" +$defs << "-DJSON_WORKAROUND_RB_CATCH_BUG" if RUBY_ENGINE == 'truffleruby' + have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0 have_func("rb_str_to_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 7b028635c31887..e4dc3fb01047fb 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -5,7 +5,7 @@ static VALUE mJSON, eNestingError, eParserError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; -static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column, i_at_eos; +static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column; static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_comments, sym_allow_control_characters, sym_allow_invalid_escape, sym_symbolize_names, @@ -627,7 +627,7 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state) #define PARSE_ERROR_FRAGMENT_LEN 32 -static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column) +static VALUE build_parse_error_message(const char *format, JSON_ParserState *state) { unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3]; @@ -661,9 +661,7 @@ static VALUE build_parse_error_message(const char *format, JSON_ParserState *sta } } - VALUE message = rb_enc_sprintf(enc_utf8, format, ptr); - rb_str_catf(message, " at line %ld column %ld", line, column); - return message; + return rb_enc_sprintf(enc_utf8, format, ptr); } static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, long column, bool eos) @@ -671,18 +669,57 @@ static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, VALUE exc = rb_exc_new_str(eParserError, message); rb_ivar_set(exc, i_at_line, LONG2NUM(line)); rb_ivar_set(exc, i_at_column, LONG2NUM(column)); - if (eos && state->parser) { - rb_ivar_set(exc, i_at_eos, state->parser); - } return exc; } +#ifdef JSON_WORKAROUND_RB_CATCH_BUG +#define JSON_CATCH_FUNC_ARGLIST(yielded_arg, func_args) VALUE func_args + +NORETURN(static) void parser_throw_eos(VALUE parser) +{ + VALUE exc = rb_exc_new_str(eParserError, rb_utf8_str_new_cstr("EOS")); + rb_ivar_set(exc, rb_intern("@resumable_parser_eos"), parser); + rb_exc_raise(exc); +} + +static VALUE parser_catch_eos(VALUE parser, VALUE (*func)(VALUE args), VALUE func_args) +{ + int status; + VALUE result = rb_protect(func, func_args, &status); + if (status) { + VALUE error_source = rb_ivar_get(rb_errinfo(), rb_intern("@resumable_parser_eos")); + if (error_source == parser) { + rb_set_errinfo(Qnil); + return parser; + } + rb_jump_tag(status); + } + return result; +} +#else +#define JSON_CATCH_FUNC_ARGLIST RB_BLOCK_CALL_FUNC_ARGLIST +#define parser_throw_eos(parser) rb_throw_obj(parser, parser) +#define parser_catch_eos(parser, func, func_args) rb_catch_obj(parser, func, func_args) +#endif + NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state, bool eos) { - long line, column; - cursor_position(state, &line, &column); - VALUE message = build_parse_error_message(format, state, line, column); - rb_exc_raise(parse_error_new(state, message, line, column, eos)); + if (state->parser) { + if (eos) { + // the error will be swallowed by ResumableParser#parse, so no + // point building a message or backtrace. + parser_throw_eos(state->parser); + } else { + // line and columns can't be accurate in resumable + rb_exc_raise(parse_error_new(state, build_parse_error_message(format, state), 0, 0, eos)); + } + } else { + VALUE message = build_parse_error_message(format, state); + long line, column; + cursor_position(state, &line, &column); + rb_str_catf(message, " at line %ld column %ld", line, column); + rb_exc_raise(parse_error_new(state, message, line, column, eos)); + } } NORETURN(static) void raise_eos_error(const char *format, JSON_ParserState *state) @@ -1172,10 +1209,15 @@ NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE d rb_inspect(duplicate_key) ); - long line, column; - cursor_position(state, &line, &column); - rb_str_concat(message, build_parse_error_message("", state, line, column)) ; - rb_exc_raise(parse_error_new(state, message, line, column, false)); + rb_str_concat(message, build_parse_error_message("", state)); + if (state->parser) { // line and columns can't be accurate in resumable + rb_exc_raise(parse_error_new(state, message, 0, 0, false)); + } else { + long line, column; + cursor_position(state, &line, &column); + rb_str_catf(message, " at line %ld column %ld", line, column); + rb_exc_raise(parse_error_new(state, message, line, column, false)); + } } NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs) @@ -1924,6 +1966,8 @@ static VALUE convert_encoding(VALUE source) struct parser_config_init_args { JSON_ParserConfig *config; VALUE self; + VALUE unknown_keywords; + bool strict; }; static void parser_config_wb_write(VALUE self, VALUE *dest, VALUE val) @@ -1977,27 +2021,43 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data) } } } + else if (args->strict) { + if (!args->unknown_keywords) { + args->unknown_keywords = rb_obj_hide(rb_ary_new()); + } + rb_ary_push(args->unknown_keywords, key); + } return ST_CONTINUE; } -static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self) +static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self, bool strict) { config->max_nesting = 100; struct parser_config_init_args args = { .config = config, .self = self, + .strict = strict, }; - if (!NIL_P(opts)) { - Check_Type(opts, T_HASH); - if (RHASH_SIZE(opts) > 0) { - // We assume in most cases few keys are set so it's faster to go over - // the provided keys than to check all possible keys. - rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args); - } + if (NIL_P(opts)) return; + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) == 0) return; + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args); + + if (RB_UNLIKELY(args.unknown_keywords)) { + if (RARRAY_LEN(args.unknown_keywords) == 1) { + rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, RARRAY_AREF(args.unknown_keywords, 0)); + } + else { + VALUE keywords = rb_ary_join(args.unknown_keywords, rb_utf8_str_new_cstr(", ")); + rb_raise(rb_eArgError, "unknown keywords: %s", RSTRING_PTR(keywords)); + RB_GC_GUARD(keywords); + } } } @@ -2015,7 +2075,7 @@ static VALUE cParserConfig_initialize(VALUE self, VALUE opts) rb_check_frozen(self); GET_PARSER_CONFIG; - parser_config_init(config, opts, self); + parser_config_init(config, opts, self, false); return self; } @@ -2111,7 +2171,7 @@ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts) { JSON_ParserConfig _config = {0}; JSON_ParserConfig *config = &_config; - parser_config_init(config, opts, false); + parser_config_init(config, opts, Qfalse, false); return cParser_parse(config, Vsource); } @@ -2162,6 +2222,9 @@ typedef struct JSON_ResumableParserStruct { rvalue_stack value_stack; json_frame_stack frames; VALUE buffer; + size_t parsed_bytes; + size_t incomplete_bytes; + bool complete; bool in_use; } JSON_ResumableParser; @@ -2282,15 +2345,29 @@ static inline JSON_ResumableParser *cResumableParser_get(VALUE self) * * An incomplete document is buffered in full and there is no size limit, so when reading * from an untrusted source the caller is responsible for bounding how much data is fed. + * For example: + * + * loop do + * if parser.parsed_bytes > DOCUMENT_MAX_SIZE + * raise "document too large" + * end + * + * parser << read_chunk + * while parser.parse + * process(parser.value) + * end + * end */ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self) { - rb_check_arity(argc, 0, 1); rb_check_frozen(self); + + VALUE opts = Qfalse; + rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "0:", &opts); JSON_ResumableParser *parser = cResumableParser_get(self); - VALUE opts = argc > 0 ? argv[0] : Qnil; - parser_config_init(&parser->config, opts, self); + opts = argc > 0 ? argv[0] : Qnil; + parser_config_init(&parser->config, opts, self, true); return self; } @@ -2356,14 +2433,22 @@ static VALUE cResumableParser_feed(VALUE self, VALUE str) struct json_parse_any_args { JSON_ParserState *state; JSON_ParserConfig *config; + VALUE parser; }; -static VALUE json_parse_any_resumable_safe(VALUE _args) +static VALUE json_parse_any_resumable_safe0(JSON_CATCH_FUNC_ARGLIST(yielded_arg, _args)) { struct json_parse_any_args *args = (struct json_parse_any_args *)_args; return (VALUE)json_parse_any(args->state, args->config, true); } +static VALUE json_parse_any_resumable_safe(VALUE _args) +{ + struct json_parse_any_args *args = (struct json_parse_any_args *)_args; + VALUE result = parser_catch_eos(args->parser, json_parse_any_resumable_safe0, _args); + return result == args->parser ? Qfalse : result; +} + static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock) { JSON_ResumableParser *parser = cResumableParser_get(self); @@ -2398,6 +2483,13 @@ static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock) static VALUE cResumableParser_parse(VALUE self) { JSON_ResumableParser *parser = ResumableParser_acquire(self, true); + + if (parser->complete) { + parser->parsed_bytes = 0; + parser->incomplete_bytes = 0; + parser->complete = false; + } + if (!parser->buffer) { parser->in_use = false; return Qfalse; @@ -2425,22 +2517,25 @@ static VALUE cResumableParser_parse(VALUE self) struct json_parse_any_args args = { .state = &parser->state, .config = &parser->config, + .parser = self, }; int status; - bool complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status); + const char *initial_cursor = parser->state.cursor; + parser->complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status); + + if (status) { + parser->complete = true; // a parse error is considered complete + } + + parser->parsed_bytes += parser->state.cursor - initial_cursor; + parser->incomplete_bytes = parser->complete ? 0 : parser->state.end - parser->state.cursor; parser->in_use = false; + if (status) { - complete = false; - VALUE error_source = rb_ivar_get(rb_errinfo(), i_at_eos); - if (error_source == self) { - complete = false; // is an EOS error raised by ourself - rb_set_errinfo(Qnil); - } else { - rb_jump_tag(status); // reraise - } + rb_jump_tag(status); // reraise } RB_GC_GUARD(Vsource); - return complete ? Qtrue : Qfalse; + return parser->complete ? Qtrue : Qfalse; } /* @@ -2498,6 +2593,9 @@ static VALUE cResumableParser_clear(VALUE self) { JSON_ResumableParser *parser = ResumableParser_acquire(self, false); parser->buffer = 0; + parser->complete = true; + parser->parsed_bytes = 0; + parser->incomplete_bytes = 0; parser->frames.head = 0; parser->value_stack.head = 0; parser->state.name_cache.length = 0; @@ -2633,6 +2731,29 @@ static VALUE cResumableParser_eos_p(VALUE self) return eos(&parser->state) ? Qtrue : Qfalse; } +/* + * call-seq: parsed_bytes -> integer + * + * Returns the number of bytes parsed since the start of the current partial value. + * This is intended to be used for securing against untrusted input: + * + * loop do + * if parser.parsed_bytes > DOCUMENT_MAX_SIZE + * raise "document too large" + * end + * + * parser << read_chunk + * while parser.parse + * process(parser.value) + * end + * end + */ +static VALUE cResumableParser_parsed_bytes(VALUE self) +{ + JSON_ResumableParser *parser = cResumableParser_get(self); + return ULL2NUM(parser->parsed_bytes + parser->incomplete_bytes); +} + void Init_parser(void) { #ifdef HAVE_RB_EXT_RACTOR_SAFE @@ -2669,6 +2790,7 @@ void Init_parser(void) rb_define_method(cResumableParser, "clear", cResumableParser_clear, 0); rb_define_method(cResumableParser, "rest", cResumableParser_rest, 0); rb_define_method(cResumableParser, "eos?", cResumableParser_eos_p, 0); + rb_define_method(cResumableParser, "parsed_bytes", cResumableParser_parsed_bytes, 0); rb_global_variable(&CNaN); CNaN = rb_const_get(mJSON, rb_intern("NaN")); @@ -2700,7 +2822,6 @@ void Init_parser(void) i_encode = rb_intern("encode"); i_at_line = rb_intern("@line"); i_at_column = rb_intern("@column"); - i_at_eos = rb_intern("@eos"); binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c index a725432c6e7c3a..0e49fbc6f4f12e 100644 --- a/gc/mmtk/mmtk.c +++ b/gc/mmtk/mmtk.c @@ -909,6 +909,11 @@ rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags VALUE *alloc_obj = (VALUE *)rb_mmtk_alloc_fast_path(objspace, ractor_cache, alloc_size, MMTk_MIN_OBJ_ALIGN); if (!alloc_obj) { alloc_obj = mmtk_alloc(ractor_cache->mutator, alloc_size, MMTk_MIN_OBJ_ALIGN, 0, MMTK_ALLOCATION_SEMANTICS_DEFAULT); + + // On heap exhaustion raise NoMemoryError. + if (RB_UNLIKELY(alloc_obj == NULL)) { + rb_memerror(); + } } alloc_obj++; diff --git a/gc/mmtk/src/collection.rs b/gc/mmtk/src/collection.rs index 648efa4e274783..81b39737fba057 100644 --- a/gc/mmtk/src/collection.rs +++ b/gc/mmtk/src/collection.rs @@ -9,6 +9,7 @@ use crate::upcalls; use crate::Ruby; use mmtk::memory_manager; use mmtk::scheduler::*; +use mmtk::util::alloc::AllocationError; use mmtk::util::heap::GCTriggerPolicy; use mmtk::util::VMMutatorThread; use mmtk::util::VMThread; @@ -63,6 +64,19 @@ impl Collection for VMCollection { (upcalls().block_for_gc)(tls); } + fn out_of_memory(_tls: VMThread, err_kind: AllocationError) { + match err_kind { + // The heap is exhausted and could not be grown. Return normally + // without aborting. + AllocationError::HeapOutOfMemory => {} + // The OS refused an mmap. This is unrecoverable, so abort the + // process via the same panic handler used for GC-thread panics. + AllocationError::MmapOutOfMemory => { + (upcalls().mutator_thread_panic_handler)(); + } + } + } + fn spawn_gc_thread(_tls: VMThread, ctx: GCThreadContext) { let join_handle = match ctx { GCThreadContext::Worker(mut worker) => thread::Builder::new() diff --git a/hash.c b/hash.c index a694d6bdffb944..cf5d7a3934129d 100644 --- a/hash.c +++ b/hash.c @@ -402,6 +402,7 @@ typedef st_index_t st_hash_t; */ #define RHASH_AR_TABLE_MAX_BOUND RHASH_AR_TABLE_MAX_SIZE +#define RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE (RHASH_AR_TABLE_MAX_BOUND + 1) #define RHASH_AR_TABLE_REF(hash, n) (&RHASH_AR_TABLE(hash)->pairs[n]) #define RHASH_AR_CLEARED_HINT 0xff @@ -603,18 +604,22 @@ ar_equal(VALUE x, VALUE y) return rb_any_cmp(x, y) == 0; } +// Returns the bin index if found, RHASH_AR_TABLE_MAX_BOUND if not found, +// or RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE if #eql? or a Thread converted the hash to st_table. static unsigned ar_find_entry_hint(VALUE hash, ar_hint_t hint, st_data_t key) { - unsigned i, bound = RHASH_AR_TABLE_BOUND(hash); - const ar_hint_t *hints = RHASH_AR_TABLE(hash)->ar_hint.ary; - /* if table is NULL, then bound also should be 0 */ - for (i = 0; i < bound; i++) { + for (unsigned i = 0; i < RHASH_AR_TABLE_BOUND(hash); i++) { + const ar_hint_t *hints = RHASH_AR_TABLE(hash)->ar_hint.ary; if (hints[i] == hint) { ar_table_pair *pair = RHASH_AR_TABLE_REF(hash, i); - if (ar_equal(key, pair->key)) { + int eq = ar_equal(key, pair->key); + if (UNLIKELY(!RHASH_AR_TABLE_P(hash))) { + return RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE; + } + if (eq) { RB_DEBUG_COUNTER_INC(artable_hint_hit); return i; } @@ -898,6 +903,9 @@ ar_foreach_check(VALUE hash, st_foreach_check_callback_func *func, st_data_t arg pair = RHASH_AR_TABLE_REF(hash, i); if (pair->key == never) break; ret = ar_find_entry_hint(hash, hint, key); + if (UNLIKELY(ret == RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE)) { + ensure_ar_table(hash); + } if (ret == RHASH_AR_TABLE_MAX_BOUND) { (*func)(0, 0, arg, 1); return 2; @@ -937,6 +945,9 @@ ar_update(VALUE hash, st_data_t key, if (RHASH_AR_TABLE_SIZE(hash) > 0) { bin = ar_find_entry(hash, hash_value, key); + if (UNLIKELY(bin == RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE)) { + return -1; + } existing = (bin != RHASH_AR_TABLE_MAX_BOUND) ? TRUE : FALSE; } else { @@ -990,6 +1001,9 @@ ar_insert(VALUE hash, st_data_t key, st_data_t value) } bin = ar_find_entry(hash, hash_value, key); + if (UNLIKELY(bin == RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE)) { + return -1; + } if (bin == RHASH_AR_TABLE_MAX_BOUND) { if (RHASH_AR_TABLE_SIZE(hash) >= RHASH_AR_TABLE_MAX_SIZE) { return -1; @@ -1023,6 +1037,9 @@ ar_lookup(VALUE hash, st_data_t key, st_data_t *value) return st_lookup(RHASH_ST_TABLE(hash), key, value); } unsigned bin = ar_find_entry(hash, hash_value, key); + if (UNLIKELY(bin == RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE)) { + return st_lookup(RHASH_ST_TABLE(hash), key, value); + } if (bin == RHASH_AR_TABLE_MAX_BOUND) { return 0; @@ -1049,6 +1066,9 @@ ar_delete(VALUE hash, st_data_t *key, st_data_t *value) } bin = ar_find_entry(hash, hash_value, *key); + if (UNLIKELY(bin == RHASH_AR_TABLE_CONVERTED_TO_ST_TABLE)) { + return st_delete(RHASH_ST_TABLE(hash), key, value); + } if (bin == RHASH_AR_TABLE_MAX_BOUND) { if (value != 0) *value = 0; diff --git a/st.c b/st.c index 6bf83c94cdadf0..550bcc6325dd7c 100644 --- a/st.c +++ b/st.c @@ -184,18 +184,33 @@ static const struct st_hash_type type_strcasehash = { #define free_fixed_ptr(v) free(v) #endif -#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0) -#define PTR_EQUAL(tab, ptr, hash_val, key_) \ - ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key)) +/* Compare an entry's hash and key against given hash_val and key. + Entry fields must be read into locals by the caller before passing + them here, to avoid re-reading from potentially-freed memory after + #eql? triggers a table rebuild. */ +static inline int +entry_equal(const struct st_hash_type *type, + st_hash_t entry_hash, st_data_t entry_key, + st_hash_t hash_val, st_data_t key) +{ + return (entry_hash == hash_val) && + ((entry_key == key) || (*type->compare)(key, entry_key) == 0); +} + +/* As entry_equal, but also checks whether the table was rebuilt + during the comparison (i.e. #eql? mutated it). */ +static inline void +ptr_equal_check(const st_table *tab, const st_table_entry *entry, + st_hash_t hash_val, st_data_t key, + int *res, int *rebuilt_p) +{ + unsigned int old_rebuilds_num = tab->rebuilds_num; + *res = entry_equal(tab->type, entry->hash, entry->key, hash_val, key); + *rebuilt_p = old_rebuilds_num != tab->rebuilds_num; +} -/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set - up to TRUE if the table is rebuilt during the comparison. */ #define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \ - do { \ - unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \ - res = PTR_EQUAL(tab, ptr, hash_val, key); \ - rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \ - } while (FALSE) + ptr_equal_check((tab), (ptr), (hash_val), (key), &(res), &(rebuilt_p)) /* Features of a table. */ struct st_features { @@ -2387,6 +2402,19 @@ struct set_table_entry { st_data_t key; }; +static inline void +set_ptr_equal_check(const set_table *tab, const set_table_entry *entry, + st_hash_t hash_val, st_data_t key, + int *res, int *rebuilt_p) +{ + unsigned int old_rebuilds_num = tab->rebuilds_num; + *res = entry_equal(tab->type, entry->hash, entry->key, hash_val, key); + *rebuilt_p = old_rebuilds_num != tab->rebuilds_num; +} + +#define SET_DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \ + set_ptr_equal_check((tab), (ptr), (hash_val), (key), &(res), &(rebuilt_p)) + /* Return hash value of KEY for table TAB. */ static inline st_hash_t set_do_hash(st_data_t key, set_table *tab) @@ -2729,7 +2757,7 @@ set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key) bound = tab->entries_bound; entries = tab->entries; for (i = tab->entries_start; i < bound; i++) { - DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p); + SET_DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p); if (EXPECT(rebuilt_p, 0)) return REBUILT_TABLE_ENTRY_IND; if (eq_p) @@ -2768,7 +2796,7 @@ set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key) for (;;) { bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind); if (! EMPTY_OR_DELETED_BIN_P(bin)) { - DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); + SET_DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); if (EXPECT(rebuilt_p, 0)) return REBUILT_TABLE_ENTRY_IND; if (eq_p) @@ -2812,7 +2840,7 @@ set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key) for (;;) { bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind); if (! EMPTY_OR_DELETED_BIN_P(bin)) { - DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); + SET_DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); if (EXPECT(rebuilt_p, 0)) return REBUILT_TABLE_BIN_IND; if (eq_p) @@ -2913,7 +2941,7 @@ set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value, break; } else if (! DELETED_BIN_P(entry_index)) { - DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p); + SET_DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p); if (EXPECT(rebuilt_p, 0)) return REBUILT_TABLE_ENTRY_IND; if (eq_p) diff --git a/string.c b/string.c index 5c6151294d506e..408f899edbad1c 100644 --- a/string.c +++ b/string.c @@ -7836,7 +7836,8 @@ case_option_single_p(OnigCaseFoldType flags, rb_encoding *enc, VALUE str) { if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc) == 1)) return true; - return !(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT; + return !(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI) && + (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT || rb_is_ascii8bit_enc(enc)); } /* 16 should be long enough to absorb any kind of single character length increase */ diff --git a/test/json/json_minefield_parser_test.rb b/test/json/json_minefield_parser_test.rb index 71590325573edf..e6dcb54b8d341b 100644 --- a/test/json/json_minefield_parser_test.rb +++ b/test/json/json_minefield_parser_test.rb @@ -11,22 +11,10 @@ class << self private def define_test(name, &block) - if RUBY_ENGINE == 'jruby' && JRUBY_PENDING.include?(name) - define_method("test_#{name}") do - pend("#{name} doesn't pass on JRuby", &block) - end - else - define_method("test_#{name}", &block) - end + define_method("test_#{name}", &block) end end - JRUBY_PENDING = %w( - n_structure_open_array_object - n_structure_100000_opening_arrays - n_object_trailing_comment_slash_open - ).freeze - INVALID_ENCODING_TESTS = %w( i_string_truncated-utf-8 i_string_overlong_sequence_6_bytes_null diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index c891dd7c2db99d..9000d1152f7ea8 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -862,7 +862,7 @@ def test_parse_error_incomplete_hash end def test_parse_error_snippet - omit "JRuby errors don't contain positions" unless RUBY_ENGINE == "ruby" + omit "JRuby errors don't contain positions" if RUBY_ENGINE == "jruby" error = assert_raise(JSON::ParserError) { JSON.parse("あああああああああああああああああああああああ") } assert_equal "unexpected character: 'ああああああああああ' at line 1 column 1", error.message diff --git a/test/json/resumable_parser_test.rb b/test/json/resumable_parser_test.rb index a720679c210984..800502f4d46fe4 100644 --- a/test/json/resumable_parser_test.rb +++ b/test/json/resumable_parser_test.rb @@ -9,6 +9,22 @@ def setup @parser = new_parser end + def test_keyword_arguments + new_parser + new_parser({}) + new_parser(allow_nan: true) + + error = assert_raise(ArgumentError) do + new_parser(doesnt_exist: true, allow_nan: true) + end + assert_equal "unknown keyword: doesnt_exist", error.message + + error = assert_raise(ArgumentError) do + new_parser(doesnt_exist: true, allow_nan: true, a: 1, b: 2) + end + assert_equal "unknown keywords: doesnt_exist, a, b", error.message + end + def test_value refute_predicate @parser, :value? assert_raise(ArgumentError) { @parser.value } @@ -156,7 +172,7 @@ def test_parse_byte_by_byte_string end def test_parse_byte_by_byte_numbers - assert_resumed_parsing('123 ') + assert_resumed_parsing('123 ', trailing_bytes: 1) end def test_nul_byte_is_a_syntax_error @@ -364,14 +380,44 @@ def test_buffer_shrink parser.value end + def test_parsed_bytes + chunk = '[1, 2, 3, 4, tru' + @parser << chunk + refute @parser.parse + assert_equal chunk.bytesize, @parser.parsed_bytes + + @parser << 'e][]' + assert @parser.parse + assert_equal chunk.bytesize + 2, @parser.parsed_bytes + + assert @parser.parse + assert_equal 2, @parser.parsed_bytes + + @parser << chunk + refute @parser.parse + assert_equal chunk.bytesize, @parser.parsed_bytes + @parser.clear + assert_equal 0, @parser.parsed_bytes + end + + def test_parse_error_message + error = assert_parse_error("\n\n[plop\nfoo", "unexpected character: 'plop'") + assert_equal 0, error.line + assert_equal 0, error.column + end + private - def assert_parse_error(json) + def assert_parse_error(json, expected_error_message = nil) parser = new_parser parser << json - assert_raise(JSON::ParserError, "expected a parse error for #{json.inspect}") do + error = assert_raise(JSON::ParserError, "expected a parse error for #{json.inspect}") do parser.parse end + if expected_error_message + assert_equal expected_error_message, error.message + end + error end def assert_incomplete(json) @@ -389,7 +435,7 @@ def assert_partial_value(expected, json) end end - def assert_resumed_parsing(json, parser = @parser) + def assert_resumed_parsing(json, parser = @parser, trailing_bytes: 0) expected = JSON.parse(json) last_parsed_byte_index = 0 @@ -402,6 +448,7 @@ def assert_resumed_parsing(json, parser = @parser) assert_equal expected, actual remaining_bytes = (json.bytesize - last_parsed_byte_index) assert_equal 0, remaining_bytes, "unconsumed bytes: #{actual.inspect}, remaining: #{json.byteslice(-1, remaining_bytes).inspect}" + assert_equal json.bytesize - trailing_bytes, parser.parsed_bytes end def assert_parse_stream(expected, json, parser = @parser) @@ -413,7 +460,7 @@ def assert_parse_stream(expected, json, parser = @parser) assert_equal(expected, actual) end - def new_parser(options = nil) - JSON::ResumableParser.new(options) + def new_parser(...) + JSON::ResumableParser.new(...) end end diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb index a7d1ed0d1663f8..27567bb0b4bfe9 100644 --- a/test/ruby/enc/test_case_mapping.rb +++ b/test/ruby/enc/test_case_mapping.rb @@ -60,6 +60,31 @@ def test_ascii check_swapcase_properties 'yUKIHIRO matsumoto (MAtz)', 'Yukihiro MATSUMOTO (maTZ)' end + def test_ascii_8bit_case_mapping_all_bytes + bytes = (0..255).to_a.pack("C*") + upper_ascii = "A".ord.."Z".ord + lower_ascii = "a".ord.."z".ord + downcase_expected = (0..255).map {|byte| upper_ascii.cover?(byte) ? byte + 32 : byte }.pack("C*") + upcase_expected = (0..255).map {|byte| lower_ascii.cover?(byte) ? byte - 32 : byte }.pack("C*") + + assert_equal Encoding::ASCII_8BIT, bytes.encoding + assert_equal downcase_expected, bytes.downcase + assert_equal downcase_expected, bytes.downcase(:fold) + assert_equal upcase_expected, bytes.upcase + + downcased = bytes.dup + assert_same downcased, downcased.downcase! + assert_equal downcase_expected, downcased + + folded = bytes.dup + assert_same folded, folded.downcase!(:fold) + assert_equal downcase_expected, folded + + upcased = bytes.dup + assert_same upcased, upcased.upcase! + assert_equal upcase_expected, upcased + end + def test_invalid assert_raise(ArgumentError, "Should not be possible to upcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').upcase } assert_raise(ArgumentError, "Should not be possible to downcase invalid string.") { "\xEB".dup.force_encoding('UTF-8').downcase } diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index 2d1b513c7092c7..a6b7f26dd6e6cd 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -2431,4 +2431,33 @@ def initialize(val) = @hash = val end assert_equal values, hash.values, "[ruby-core:121239] [Bug #21170]" end + + def test_ar_find_entry_hint_eql_mutates_hash + # ar_find_entry_hint caches bound and hints, then calls #eql? which + # can mutate the hash. If #eql? triggers AR->ST conversion the loop + # would read st_table memory as ar_table pairs. + key_class = Class.new do + attr_reader :v + def initialize(v, h = nil) + @v = v + @h = h + end + def hash; 0; end + def eql?(other) + if @h + # Trigger AR->ST conversion + @h[42] = 42 + end + other.is_a?(self.class) && @v == other.v + end + end + + h = {} + 8.times { |i| h[key_class.new(i)] = i } + + # Not in the hash, so ar_find_entry_hint checks every entry. + lookup_key = key_class.new(-1, h) + + assert_equal nil, h[lookup_key] + end end