python · VaggelisD · May 27, 2026
diff --git a/mypy/typeshed/stubs/librt/librt/strings.pyi b/mypy/typeshed/stubs/librt/librt/strings.pyi
@@ -48,3 +48,12 @@ def isdigit(c: i32, /) -> bool: ...
 def isalnum(c: i32, /) -> bool: ...
 def isalpha(c: i32, /) -> bool: ...
 def isidentifier(c: i32, /) -> bool: ...
+
+# Codepoint case conversion. For the rare codepoints whose Unicode
+# uppercase / lowercase expands to multiple codepoints (e.g. U+00DF
+# uppercases to "SS", U+FB01 to "FI"), returns the input unchanged so
+# the signature stays i32 -> i32. Use str.upper() / str.lower() for full
+# Unicode case conversion when those cases matter. Negative inputs are
+# returned unchanged.
+def toupper(c: i32, /) -> i32: ...
+def tolower(c: i32, /) -> i32: ...
diff --git a/mypyc/lib-rt/strings/librt_strings.c b/mypyc/lib-rt/strings/librt_strings.c
@@ -1191,6 +1191,18 @@ DEFINE_CP_BOOL_WRAPPER(isalnum, LibRTStrings_IsAlnum)
 DEFINE_CP_BOOL_WRAPPER(isalpha, LibRTStrings_IsAlpha)
 DEFINE_CP_BOOL_WRAPPER(isidentifier, LibRTStrings_IsIdentifier)
 
+#define DEFINE_CP_I32_WRAPPER(name, fn)                                     \
+    static PyObject*                                                        \
+    cp_##name(PyObject *module, PyObject *arg) {                            \
+        int32_t c;                                                          \
+        if (cp_parse_i32(arg, &c) < 0)                                      \
+            return NULL;                                                    \
+        return PyLong_FromLong((long) fn(c));                               \
+    }
+
+DEFINE_CP_I32_WRAPPER(toupper, LibRTStrings_ToUpper)
+DEFINE_CP_I32_WRAPPER(tolower, LibRTStrings_ToLower)
+
 static PyMethodDef librt_strings_module_methods[] = {
     {"write_i16_le", (PyCFunction) write_i16_le, METH_FASTCALL,
      PyDoc_STR("Write a 16-bit signed integer to BytesWriter in little-endian format")
@@ -1267,6 +1279,12 @@ static PyMethodDef librt_strings_module_methods[] = {
     {"isidentifier", cp_isidentifier, METH_O,
      PyDoc_STR("Test whether a codepoint (i32) is a valid identifier start (XID_Start).")
     },
+    {"toupper", cp_toupper, METH_O,
+     PyDoc_STR("Single-codepoint uppercase mapping for a codepoint (i32). Returns the input unchanged if the Unicode uppercase expands to multiple codepoints (e.g. U+00DF uppercases to \"SS\"); use str.upper() for full Unicode case conversion.")
+    },
+    {"tolower", cp_tolower, METH_O,
+     PyDoc_STR("Single-codepoint lowercase mapping for a codepoint (i32). Returns the input unchanged if the Unicode lowercase expands to multiple codepoints; use str.lower() for full Unicode case conversion.")
+    },
     {NULL, NULL, 0, NULL}
 };
 

diff --git a/mypyc/lib-rt/strings/librt_strings.h b/mypyc/lib-rt/strings/librt_strings.h
@@ -73,4 +73,48 @@ static inline bool LibRTStrings_IsIdentifier(int32_t c) {
     return r == 1;
 }
 
+// Shared slow path for LibRTStrings_ToUpper / _ToLower. Round-trips the
+// codepoint through CPython's str.upper / str.lower on a 1-character
+// string. When the conversion expands to multiple codepoints (e.g.
+// 'ß'.upper() == 'SS') we return the input unchanged so the public
+// helpers stay i32 -> i32. Aborts via CPyError_OutOfMemory on allocation
+// failure.
+static inline int32_t LibRTStrings_ChangeCase_slow(int32_t c, const char *method) {
+    PyObject *s = PyUnicode_FromOrdinal((int)c);
+    if (s == NULL) {
+        CPyError_OutOfMemory();
+    }
+    PyObject *u = PyObject_CallMethod(s, method, NULL);
+    Py_DECREF(s);
+    if (u == NULL) {
+        CPyError_OutOfMemory();
+    }
+    int32_t result = c;
+    if (PyUnicode_GET_LENGTH(u) == 1) {
+        result = (int32_t)PyUnicode_READ_CHAR(u, 0);
+    }
+    Py_DECREF(u);
+    return result;
+}
+
+// Uppercase a codepoint. ASCII fast path is `a..z -> A..Z` (subtract 32);
+// non-ASCII delegates to str.upper on a 1-character string. Returns the
+// input unchanged when uppercasing expands to multiple codepoints.
+static inline int32_t LibRTStrings_ToUpper(int32_t c) {
+    if (c < 0) return c;
+    if (c >= 'a' && c <= 'z') return c - 32;
+    if (c < 128) return c;
+    return LibRTStrings_ChangeCase_slow(c, "upper");
+}
+
+// Lowercase a codepoint. ASCII fast path is `A..Z -> a..z` (add 32);
+// non-ASCII delegates to str.lower on a 1-character string. Returns the
+// input unchanged when lowercasing expands to multiple codepoints.
+static inline int32_t LibRTStrings_ToLower(int32_t c) {
+    if (c < 0) return c;
+    if (c >= 'A' && c <= 'Z') return c + 32;
+    if (c < 128) return c;
+    return LibRTStrings_ChangeCase_slow(c, "lower");
+}
+
 #endif  // LIBRT_STRINGS_H
diff --git a/mypyc/primitives/librt_strings_ops.py b/mypyc/primitives/librt_strings_ops.py
@@ -438,3 +438,26 @@
     error_kind=ERR_NEVER,
     dependencies=[LIBRT_STRINGS],
 )
+
+# Codepoint case conversion. When the Unicode uppercase/lowercase of a
+# codepoint expands to multiple codepoints (e.g. U+00DF uppercases to "SS",
+# U+FB01 to "FI"), returns the input unchanged so the signature stays
+# i32 -> i32; callers needing full Unicode case conversion should use
+# str.upper() / .lower() instead. Negative inputs are returned unchanged.
+function_op(
+    name="librt.strings.toupper",
+    arg_types=[int32_rprimitive],
+    return_type=int32_rprimitive,
+    c_function_name="LibRTStrings_ToUpper",
+    error_kind=ERR_NEVER,
+    dependencies=[LIBRT_STRINGS],
+)
+
+function_op(
+    name="librt.strings.tolower",
+    arg_types=[int32_rprimitive],
+    return_type=int32_rprimitive,
+    c_function_name="LibRTStrings_ToLower",
+    error_kind=ERR_NEVER,
+    dependencies=[LIBRT_STRINGS],
+)
diff --git a/mypyc/test-data/irbuild-librt-strings.test b/mypyc/test-data/irbuild-librt-strings.test
@@ -401,3 +401,29 @@ def is_id(c):
 L0:
     r0 = LibRTStrings_IsIdentifier(c)
     return r0
+
+[case testLibrtStringsToUpperIR]
+from librt.strings import toupper
+from mypy_extensions import i32
+
+def up(c: i32) -> i32:
+    return toupper(c)
+[out]
+def up(c):
+    c, r0 :: i32
+L0:
+    r0 = LibRTStrings_ToUpper(c)
+    return r0
+
+[case testLibrtStringsToLowerIR]
+from librt.strings import tolower
+from mypy_extensions import i32
+
+def lo(c: i32) -> i32:
+    return tolower(c)
+[out]
+def lo(c):
+    c, r0 :: i32
+L0:
+    r0 = LibRTStrings_ToLower(c)
+    return r0
diff --git a/mypyc/test-data/run-librt-strings.test b/mypyc/test-data/run-librt-strings.test
@@ -1490,3 +1490,52 @@ def test_codepoint_classifiers_via_any() -> None:
             f(1 << 40)
         with assertRaises(OverflowError, "codepoint out of i32 range"):
             f(-(1 << 40))
+
+[case testLibrtStringsCodepointCaseConversion_librt]
+from typing import Any
+from mypy_extensions import i32
+from librt.strings import toupper, tolower
+
+from testutil import assertRaises
+
+
+def _expect(c: str, method: str) -> int:
+    # The contract: i32 -> i32 when conversion yields exactly one codepoint,
+    # else return the input unchanged.
+    converted = getattr(c, method)()
+    if len(converted) == 1:
+        return ord(converted)
+    return ord(c)
+
+
+def test_codepoint_case_conversion() -> None:
+    # Negative inputs return unchanged.
+    for bad in (i32(-1), i32(-113)):
+        assert toupper(bad) == bad
+        assert tolower(bad) == bad
+    # Agree with str.upper / str.lower across the full Unicode range
+    # whenever the conversion is single-codepoint; otherwise return input.
+    for i in range(0x110000):
+        c = chr(i)
+        o = ord(c)
+        assert toupper(o) == _expect(c, "upper")
+        assert tolower(o) == _expect(c, "lower")
+
+
+def test_codepoint_case_conversion_via_any() -> None:
+    # Iterate to force generic dispatch through the PyMethodDef wrapper.
+    for fn, in_cp, out_cp in (
+        (toupper, ord("a"), ord("A")),
+        (toupper, ord("A"), ord("A")),
+        (tolower, ord("Z"), ord("z")),
+        (tolower, ord("z"), ord("z")),
+    ):
+        f: Any = fn
+        assert f(in_cp) == out_cp
+        # Negative values are valid i32, returned unchanged.
+        assert f(-1) == -1
+        # Inputs outside i32 range raise OverflowError through the wrapper.
+        with assertRaises(OverflowError, "codepoint out of i32 range"):
+            f(1 << 40)
+        with assertRaises(OverflowError, "codepoint out of i32 range"):
+            f(-(1 << 40))