diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 4b1e289c6ff468..91bf3fcbc0eced 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1576,6 +1576,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(aclose)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(add)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(add_done_callback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(add_note)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(adobe)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(after_in_child)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(after_in_parent)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 6ee649b59a5c37..73804a531db224 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -299,6 +299,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(aclose) STRUCT_FOR_ID(add) STRUCT_FOR_ID(add_done_callback) + STRUCT_FOR_ID(add_note) STRUCT_FOR_ID(adobe) STRUCT_FOR_ID(after_in_child) STRUCT_FOR_ID(after_in_parent) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 778db946c2a3aa..25c0fde5657fc0 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1574,6 +1574,7 @@ extern "C" { INIT_ID(aclose), \ INIT_ID(add), \ INIT_ID(add_done_callback), \ + INIT_ID(add_note), \ INIT_ID(adobe), \ INIT_ID(after_in_child), \ INIT_ID(after_in_parent), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index bd8f50ff0ee732..ab448dcc1e5853 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -976,6 +976,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(add_note); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(adobe); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index cace780f79f515..aaa91aca36e3c4 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -510,6 +510,34 @@ def _test_exception(self, have_source): self.assertIn('call_with_frame("StartElement"', entries[1].line) + def test_invalid_NotStandalone(self): + parser = expat.ParserCreate() + parser.NotStandaloneHandler = mock.Mock(return_value="bad value") + parser.ElementDeclHandler = lambda _1, _2: None + + payload = b"""\ +]> +""" + with self.assertRaises(TypeError) as cm: + parser.Parse(payload, True) + parser.NotStandaloneHandler.assert_called_once() + + notes = ["invalid 'NotStandalone' event handler return value"] + self.assertEqual(cm.exception.__notes__, notes) + + def test_invalid_ExternalEntityRefHandler(self): + parser = expat.ParserCreate() + parser.UseForeignDTD() + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) + parser.ExternalEntityRefHandler = mock.Mock(return_value=None) + + with self.assertRaises(TypeError) as cm: + parser.Parse(b"", True) + parser.ExternalEntityRefHandler.assert_called_once() + + notes = ["invalid 'ExternalEntityRef' event handler return value"] + self.assertEqual(cm.exception.__notes__, notes) + # Test Current* members: class PositionTest(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst b/Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst new file mode 100644 index 00000000000000..2103024b616d4e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst @@ -0,0 +1,2 @@ +:mod:`xml.parsers.expat`: add an exception note when a custom Expat handler +return value cannot be properly interpreted. Patch by Bénédikt Tran. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 31b883fe8bd548..e915054143c51d 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -503,6 +503,29 @@ my_StartElementHandler(void *userData, } } +static inline void +invalid_expat_handler_rv(const char *name) +{ + PyObject *exc = PyErr_GetRaisedException(); + assert(exc != NULL); + PyObject *note = PyUnicode_FromFormat("invalid '%s' event handler return value", name); + if (note == NULL) { + goto error; + } + PyObject *res = PyObject_CallMethodOneArg(exc, &_Py_ID(add_note), note); + Py_DECREF(note); + if (res == NULL) { + goto error; + } + Py_DECREF(res); + goto done; + +error: + PyErr_Clear(); +done: + PyErr_SetRaisedException(exc); +} + #define RC_HANDLER(RETURN_TYPE, NAME, PARAMS, \ INIT, PARSE_FORMAT, CONVERSION, \ RETURN_VARIABLE, GETUSERDATA) \ @@ -536,6 +559,9 @@ my_ ## NAME ## Handler PARAMS { \ } \ CONVERSION \ Py_DECREF(rv); \ + if (PyErr_Occurred()) { \ + invalid_expat_handler_rv(#NAME); \ + } \ return RETURN_VARIABLE; \ }