From cd20a828b146b426d2b8b0acc59aa863737a1660 Mon Sep 17 00:00:00 2001 From: Arpit Jain Date: Sat, 4 Jul 2026 00:04:16 +0900 Subject: [PATCH] Don't abort scan on non-UTF-8 module dictionary keys convert_dictionary_to_python used PyDict_SetItemString, which decodes the key as strict UTF-8. YARA module dictionary keys are SIZED_STRING values holding arbitrary bytes (for instance pe.version_info keys read straight from the binary), so a non-UTF-8 key raised UnicodeDecodeError. That exception propagated out of the modules_callback as a SystemError and aborted the whole scan. Build the key explicitly with a tolerant decoder (PyUnicode_DecodeUTF8 with the 'replace' handler) and use the key length so embedded NULs are handled too. Keys stay str, so this is not a breaking change: valid keys are unchanged and invalid bytes become U+FFFD instead of aborting. Fixes #273 Signed-off-by: Arpit Jain --- yara-python.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/yara-python.c b/yara-python.c index 3547e85..25566e0 100644 --- a/yara-python.c +++ b/yara-python.c @@ -818,10 +818,25 @@ PyObject* convert_dictionary_to_python( if (py_object != NULL) { - PyDict_SetItemString( - py_dict, - dictionary->items->objects[i].key->c_string, - py_object); + // Dictionary keys are SIZED_STRING values holding arbitrary bytes (for + // example the pe.version_info keys come straight from the binary), so + // they are not guaranteed to be valid UTF-8. PyDict_SetItemString would + // decode strictly and raise UnicodeDecodeError on a non-UTF-8 key, which + // aborts the whole scan (see issue #273). Build the key tolerantly and + // use its length so embedded NULs are handled too. + SIZED_STRING* key = dictionary->items->objects[i].key; + + #if PY_MAJOR_VERSION >= 3 + PyObject* py_key = PyUnicode_DecodeUTF8(key->c_string, key->length, "replace"); + #else + PyObject* py_key = PyString_FromStringAndSize(key->c_string, key->length); + #endif + + if (py_key != NULL) + { + PyDict_SetItem(py_dict, py_key, py_object); + Py_DECREF(py_key); + } Py_DECREF(py_object); }