From ce8c3be2d53a807c6901e7613d27ec9535a1d574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Mar 2026 11:53:15 +0100 Subject: [PATCH 1/5] gh-146563: add exception note for invalid Expat handler return values --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 +++ Lib/test/test_pyexpat.py | 28 +++++++++++++++++++ ...-03-28-11-31-32.gh-issue-146563.cXtSym.rst | 2 ++ Modules/pyexpat.c | 26 +++++++++++++++++ 7 files changed, 63 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 4b1e289c6ff468..91bf3fcbc0eced 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1576,6 +1576,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(aclose)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(add)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(add_done_callback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(add_note)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(adobe)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(after_in_child)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(after_in_parent)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 6ee649b59a5c37..73804a531db224 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -299,6 +299,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(aclose) STRUCT_FOR_ID(add) STRUCT_FOR_ID(add_done_callback) + STRUCT_FOR_ID(add_note) STRUCT_FOR_ID(adobe) STRUCT_FOR_ID(after_in_child) STRUCT_FOR_ID(after_in_parent) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 778db946c2a3aa..25c0fde5657fc0 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1574,6 +1574,7 @@ extern "C" { INIT_ID(aclose), \ INIT_ID(add), \ INIT_ID(add_done_callback), \ + INIT_ID(add_note), \ INIT_ID(adobe), \ INIT_ID(after_in_child), \ INIT_ID(after_in_parent), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index bd8f50ff0ee732..ab448dcc1e5853 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -976,6 +976,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(add_note); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(adobe); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index cace780f79f515..aaa91aca36e3c4 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -510,6 +510,34 @@ def _test_exception(self, have_source): self.assertIn('call_with_frame("StartElement"', entries[1].line) + def test_invalid_NotStandalone(self): + parser = expat.ParserCreate() + parser.NotStandaloneHandler = mock.Mock(return_value="bad value") + parser.ElementDeclHandler = lambda _1, _2: None + + payload = b"""\ +]> +""" + with self.assertRaises(TypeError) as cm: + parser.Parse(payload, True) + parser.NotStandaloneHandler.assert_called_once() + + notes = ["invalid 'NotStandalone' event handler return value"] + self.assertEqual(cm.exception.__notes__, notes) + + def test_invalid_ExternalEntityRefHandler(self): + parser = expat.ParserCreate() + parser.UseForeignDTD() + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) + parser.ExternalEntityRefHandler = mock.Mock(return_value=None) + + with self.assertRaises(TypeError) as cm: + parser.Parse(b"", True) + parser.ExternalEntityRefHandler.assert_called_once() + + notes = ["invalid 'ExternalEntityRef' event handler return value"] + self.assertEqual(cm.exception.__notes__, notes) + # Test Current* members: class PositionTest(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst b/Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst new file mode 100644 index 00000000000000..e8fd2306def5f1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst @@ -0,0 +1,2 @@ +:mod:`xml.parser.expat`: add an exception note when a custom Expat handler +return value cannot be properly interpreted. Patch by Bénédikt Tran. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 31b883fe8bd548..c51520773d7948 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -503,6 +503,28 @@ my_StartElementHandler(void *userData, } } +static inline void +invalid_expat_handler_rv(const char *name) +{ + PyObject *exc = PyErr_GetRaisedException(); + assert(exc != NULL); + PyObject *note = PyUnicode_FromFormat("invalid '%s' event handler return value", name); + if (note == NULL) { + goto error; + } + PyObject *res = PyObject_CallMethodOneArg(exc, &_Py_ID(add_note), note); + Py_DECREF(note); + if (res == NULL) { + goto error; + } + goto done; + +error: + PyErr_Clear(); +done: + _PyErr_ChainExceptions1(exc); +} + #define RC_HANDLER(RETURN_TYPE, NAME, PARAMS, \ INIT, PARSE_FORMAT, CONVERSION, \ RETURN_VARIABLE, GETUSERDATA) \ @@ -536,6 +558,10 @@ my_ ## NAME ## Handler PARAMS { \ } \ CONVERSION \ Py_DECREF(rv); \ + if (PyErr_Occurred()) { \ + invalid_expat_handler_rv(#NAME); \ + return RETURN_VARIABLE; \ + } \ return RETURN_VARIABLE; \ } From b74a33918443534793182537b5104f26a7230a0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Mar 2026 11:54:27 +0100 Subject: [PATCH 2/5] Update Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst --- .../next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst b/Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst index e8fd2306def5f1..2103024b616d4e 100644 --- a/Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst +++ b/Misc/NEWS.d/next/Library/2026-03-28-11-31-32.gh-issue-146563.cXtSym.rst @@ -1,2 +1,2 @@ -:mod:`xml.parser.expat`: add an exception note when a custom Expat handler +:mod:`xml.parsers.expat`: add an exception note when a custom Expat handler return value cannot be properly interpreted. Patch by Bénédikt Tran. From d3bf70cc4478a6389a50c3464572645a3d17fb65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Mar 2026 11:54:58 +0100 Subject: [PATCH 3/5] Update Modules/pyexpat.c --- Modules/pyexpat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index c51520773d7948..3659cf0ffee836 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -517,6 +517,7 @@ invalid_expat_handler_rv(const char *name) if (res == NULL) { goto error; } + Py_DECREF(res); goto done; error: From 43145a8bcfb52aeb40d20f2deeacfecb3f455180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Mar 2026 11:55:38 +0100 Subject: [PATCH 4/5] Update Modules/pyexpat.c --- Modules/pyexpat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 3659cf0ffee836..2f87d115ffbe41 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -561,7 +561,6 @@ my_ ## NAME ## Handler PARAMS { \ Py_DECREF(rv); \ if (PyErr_Occurred()) { \ invalid_expat_handler_rv(#NAME); \ - return RETURN_VARIABLE; \ } \ return RETURN_VARIABLE; \ } From 284f66e89ad805b751ebd722e9783c10938066a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Mar 2026 15:07:27 +0100 Subject: [PATCH 5/5] correctly use `PyErr_SetRaisedException` --- Modules/pyexpat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 2f87d115ffbe41..e915054143c51d 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -523,7 +523,7 @@ invalid_expat_handler_rv(const char *name) error: PyErr_Clear(); done: - _PyErr_ChainExceptions1(exc); + PyErr_SetRaisedException(exc); } #define RC_HANDLER(RETURN_TYPE, NAME, PARAMS, \