Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mypy/typeshed/stubs/librt/librt/strings.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ def isspace(c: i32, /) -> bool: ...
def isdigit(c: i32, /) -> bool: ...
def isalnum(c: i32, /) -> bool: ...
def isalpha(c: i32, /) -> bool: ...
def isidentifier(c: i32, /) -> bool: ...
1 change: 0 additions & 1 deletion mypyc/ir/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,5 +116,4 @@ def get_header(self) -> str:
STRING_WRITER_EXTRA_OPS: Final = SourceDep("stringwriter_extra_ops.c")
BYTEARRAY_EXTRA_OPS: Final = SourceDep("bytearray_extra_ops.c")
STR_EXTRA_OPS: Final = SourceDep("str_extra_ops.c")
CODEPOINT_EXTRA_OPS: Final = SourceDep("codepoint_extra_ops.c")
VECS_EXTRA_OPS: Final = SourceDep("vecs_extra_ops.c")
8 changes: 0 additions & 8 deletions mypyc/lib-rt/codepoint_extra_ops.c

This file was deleted.

28 changes: 0 additions & 28 deletions mypyc/lib-rt/codepoint_extra_ops.h

This file was deleted.

63 changes: 53 additions & 10 deletions mypyc/lib-rt/strings/librt_strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include <Python.h>
#include <stdint.h>
#include "CPy.h"
#include "codepoint_extra_ops.h"
#include "librt_strings.h"

#define CPY_BOOL_ERROR 2
Expand Down Expand Up @@ -1154,15 +1153,50 @@ read_f64_be(PyObject *module, PyObject *const *args, size_t nargs) {
return PyFloat_FromDouble(CPyBytes_ReadF64BEUnsafe(data + index));
}

// Codepoint classification helpers exposed to interpreted callers.
// The C-side names are prefixed `cp_` to avoid colliding with libc's
// <ctype.h> isspace / isdigit / etc. Compiled callers go through the
// LibRTStrings_* static inlines in codepoint_extra_ops.h instead.
//
// All wrappers parse a single int argument as i32 (codepoint) and
// dispatch to the corresponding LibRTStrings_* function. The parse
// step accepts any int but rejects values outside the i32 range with
// OverflowError, matching the input domain of the compiled fast path.
// Codepoint classification helpers. Inputs are signed i32 for compatibility
// with mypyc's int32_rprimitive; negative values are non-codepoints and
// return false. Mypyc-compiled callers reach these through the librt.strings
// capsule API (see librt_strings_api.h); interpreted callers go through the
// `cp_*` Python wrappers below.

bool LibRTStrings_IsSpace(int32_t c) {
return c >= 0 && Py_UNICODE_ISSPACE((Py_UCS4)c);
}

bool LibRTStrings_IsDigit(int32_t c) {
return c >= 0 && Py_UNICODE_ISDIGIT((Py_UCS4)c);
}

bool LibRTStrings_IsAlnum(int32_t c) {
return c >= 0 && Py_UNICODE_ISALNUM((Py_UCS4)c);
}

bool LibRTStrings_IsAlpha(int32_t c) {
return c >= 0 && Py_UNICODE_ISALPHA((Py_UCS4)c);
}

// True if c could start a valid identifier (XID_Start, per PEP 3131).
// ASCII fast path covers `[A-Za-z_]`; non-ASCII delegates to CPython's
// PyUnicode_IsIdentifier on a 1-character string. Aborts via
// CPyError_OutOfMemory on allocation failure to keep this ERR_NEVER.
bool LibRTStrings_IsIdentifier(int32_t c) {
if (c < 0) return false;
if (c < 128) {
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| c == '_';
}
PyObject *s = PyUnicode_FromOrdinal((int)c);
if (s == NULL) {
CPyError_OutOfMemory();
}
int r = PyUnicode_IsIdentifier(s);
Py_DECREF(s);
return r == 1;
}

// Python-level wrappers (`cp_*`) for interpreted callers. The C-side names
// are prefixed `cp_` to avoid colliding with libc's <ctype.h> isspace etc.

// Parse a Python int as i32 codepoint. Returns 0 on success and writes
// the value to *out; returns -1 on error with a Python exception set.
Expand Down Expand Up @@ -1194,6 +1228,7 @@ DEFINE_CP_BOOL_WRAPPER(isspace, LibRTStrings_IsSpace)
DEFINE_CP_BOOL_WRAPPER(isdigit, LibRTStrings_IsDigit)
DEFINE_CP_BOOL_WRAPPER(isalnum, LibRTStrings_IsAlnum)
DEFINE_CP_BOOL_WRAPPER(isalpha, LibRTStrings_IsAlpha)
DEFINE_CP_BOOL_WRAPPER(isidentifier, LibRTStrings_IsIdentifier)

static PyMethodDef librt_strings_module_methods[] = {
{"write_i16_le", (PyCFunction) write_i16_le, METH_FASTCALL,
Expand Down Expand Up @@ -1268,6 +1303,9 @@ static PyMethodDef librt_strings_module_methods[] = {
{"isalpha", cp_isalpha, METH_O,
PyDoc_STR("Test whether a codepoint (i32) is a Unicode letter.")
},
{"isidentifier", cp_isidentifier, METH_O,
PyDoc_STR("Test whether a codepoint (i32) is a valid identifier start (XID_Start).")
},
{NULL, NULL, 0, NULL}
};

Expand Down Expand Up @@ -1313,6 +1351,11 @@ librt_strings_module_exec(PyObject *m)
(void *)StringWriter_type_internal,
(void *)StringWriter_write_internal,
(void *)grow_string_buffer,
(void *)LibRTStrings_IsSpace,
(void *)LibRTStrings_IsDigit,
(void *)LibRTStrings_IsAlnum,
(void *)LibRTStrings_IsAlpha,
(void *)LibRTStrings_IsIdentifier,
};
PyObject *c_api_object = PyCapsule_New((void *)librt_strings_api, "librt.strings._C_API", NULL);
if (PyModule_Add(m, "_C_API", c_api_object) < 0) {
Expand Down
4 changes: 2 additions & 2 deletions mypyc/lib-rt/strings/librt_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
// API version -- more recent versions must maintain backward compatibility, i.e.
// we can add new features but not remove or change existing features (unless
// ABI version is changed, but see the comment above).
#define LIBRT_STRINGS_API_VERSION 4
#define LIBRT_STRINGS_API_VERSION 5

// Number of functions in the capsule API. If you add a new function, also increase
// LIBRT_STRINGS_API_VERSION.
#define LIBRT_STRINGS_API_LEN 14
#define LIBRT_STRINGS_API_LEN 19

typedef struct {
PyObject_HEAD
Expand Down
6 changes: 6 additions & 0 deletions mypyc/lib-rt/strings/librt_strings_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import_librt_strings(void);

#include <Python.h>
#include <stdbool.h>
#include <stdint.h>
#include "librt_strings.h"

extern void *LibRTStrings_API[LIBRT_STRINGS_API_LEN];
Expand All @@ -24,6 +25,11 @@ extern void *LibRTStrings_API[LIBRT_STRINGS_API_LEN];
#define LibRTStrings_StringWriter_type_internal (*(PyTypeObject* (*)(void)) LibRTStrings_API[11])
#define LibRTStrings_StringWriter_write_internal (*(char (*)(PyObject *source, PyObject *value)) LibRTStrings_API[12])
#define LibRTStrings_grow_string_buffer (*(bool (*)(StringWriterObject *obj, Py_ssize_t n)) LibRTStrings_API[13])
#define LibRTStrings_IsSpace (*(bool (*)(int32_t c)) LibRTStrings_API[14])
#define LibRTStrings_IsDigit (*(bool (*)(int32_t c)) LibRTStrings_API[15])
#define LibRTStrings_IsAlnum (*(bool (*)(int32_t c)) LibRTStrings_API[16])
#define LibRTStrings_IsAlpha (*(bool (*)(int32_t c)) LibRTStrings_API[17])
#define LibRTStrings_IsIdentifier (*(bool (*)(int32_t c)) LibRTStrings_API[18])


static inline bool CPyBytesWriter_Check(PyObject *obj) {
Expand Down
27 changes: 17 additions & 10 deletions mypyc/primitives/librt_strings_ops.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
from mypyc.ir.deps import (
BYTES_WRITER_EXTRA_OPS,
CODEPOINT_EXTRA_OPS,
LIBRT_STRINGS,
STRING_WRITER_EXTRA_OPS,
)
from mypyc.ir.deps import BYTES_WRITER_EXTRA_OPS, LIBRT_STRINGS, STRING_WRITER_EXTRA_OPS
from mypyc.ir.ops import ERR_MAGIC, ERR_MAGIC_OVERLAPPING, ERR_NEVER
from mypyc.ir.rtypes import (
bool_rprimitive,
Expand Down Expand Up @@ -402,7 +397,7 @@
return_type=bool_rprimitive,
c_function_name="LibRTStrings_IsSpace",
error_kind=ERR_NEVER,
dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS],
dependencies=[LIBRT_STRINGS],
)

function_op(
Expand All @@ -411,7 +406,7 @@
return_type=bool_rprimitive,
c_function_name="LibRTStrings_IsDigit",
error_kind=ERR_NEVER,
dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS],
dependencies=[LIBRT_STRINGS],
)

function_op(
Expand All @@ -420,7 +415,7 @@
return_type=bool_rprimitive,
c_function_name="LibRTStrings_IsAlnum",
error_kind=ERR_NEVER,
dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS],
dependencies=[LIBRT_STRINGS],
)

function_op(
Expand All @@ -429,5 +424,17 @@
return_type=bool_rprimitive,
c_function_name="LibRTStrings_IsAlpha",
error_kind=ERR_NEVER,
dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS],
dependencies=[LIBRT_STRINGS],
)

# isidentifier checks XID_Start semantics for a single codepoint, matching
# str.isidentifier() on a 1-character string. The non-ASCII path allocates
# and aborts via CPyError_OutOfMemory on failure, so this stays ERR_NEVER.
function_op(
name="librt.strings.isidentifier",
arg_types=[int32_rprimitive],
return_type=bool_rprimitive,
c_function_name="LibRTStrings_IsIdentifier",
error_kind=ERR_NEVER,
dependencies=[LIBRT_STRINGS],
)
14 changes: 14 additions & 0 deletions mypyc/test-data/irbuild-librt-strings.test
Original file line number Diff line number Diff line change
Expand Up @@ -387,3 +387,17 @@ def is_a(c):
L0:
r0 = LibRTStrings_IsAlpha(c)
return r0

[case testLibrtStringsIsIdentifierIR]
from librt.strings import isidentifier
from mypy_extensions import i32

def is_id(c: i32) -> bool:
return isidentifier(c)
[out]
def is_id(c):
c :: i32
r0 :: bool
L0:
r0 = LibRTStrings_IsIdentifier(c)
return r0
5 changes: 4 additions & 1 deletion mypyc/test-data/run-librt-strings.test
Original file line number Diff line number Diff line change
Expand Up @@ -1443,7 +1443,7 @@ def test_new_without_init_is_usable() -> None:
[case testLibrtStringsCodepointClassifiers_librt]
from typing import Any
from mypy_extensions import i32
from librt.strings import isspace, isdigit, isalnum, isalpha
from librt.strings import isspace, isdigit, isalnum, isalpha, isidentifier

from testutil import assertRaises

Expand All @@ -1455,6 +1455,7 @@ def test_codepoint_classifiers() -> None:
assert not isdigit(bad)
assert not isalnum(bad)
assert not isalpha(bad)
assert not isidentifier(bad)
# Verify each codepoint primitive agrees with the matching str method
# across all Unicode codepoints, including the ord(chr(i)) round-trip.
# Any forces generic dispatch on the str side.
Expand All @@ -1466,6 +1467,7 @@ def test_codepoint_classifiers() -> None:
assert isdigit(o) == isdigit(i) == a.isdigit()
assert isalnum(o) == isalnum(i) == a.isalnum()
assert isalpha(o) == isalpha(i) == a.isalpha()
assert isidentifier(o) == isidentifier(i) == a.isidentifier()


def test_codepoint_classifiers_via_any() -> None:
Expand All @@ -1476,6 +1478,7 @@ def test_codepoint_classifiers_via_any() -> None:
(isdigit, "5", "a"),
(isalnum, "A", " "),
(isalpha, "A", " "),
(isidentifier, "A", "0"),
):
f: Any = fn
assert f(ord(true_input)) is True
Expand Down
Loading