From 3c54f1f92bb6e08442c18c782aebfc4eba09819c Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 1 Jun 2026 14:45:59 -0700 Subject: [PATCH 01/41] add FFI as a requirement for dynamic calls --- plugins/python/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/plugins/python/CMakeLists.txt b/plugins/python/CMakeLists.txt index d21131767..27f41d14a 100644 --- a/plugins/python/CMakeLists.txt +++ b/plugins/python/CMakeLists.txt @@ -7,6 +7,9 @@ if(Python_NumPy_VERSION VERSION_LESS "2.0.0") ) endif() +find_package(PkgConfig REQUIRED) +pkg_check_modules(FFI REQUIRED IMPORTED_TARGET libffi) + # Phlex module to run Python algorithms add_library( pymodule @@ -17,8 +20,9 @@ add_library( src/dciwrap.cpp src/lifelinewrap.cpp src/errorwrap.cpp + src/dyncall.cpp ) -target_link_libraries(pymodule PRIVATE phlex::module Python::Python Python::NumPy) +target_link_libraries(pymodule PRIVATE phlex::module PkgConfig::FFI Python::Python Python::NumPy) target_compile_definitions(pymodule PRIVATE NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) install(TARGETS pymodule LIBRARY DESTINATION lib) From 0a1ae326245a599cbf96bfac548041105e66926e Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 1 Jun 2026 14:46:40 -0700 Subject: [PATCH 02/41] add basic JIT (Numba) test --- test/python/CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/python/CMakeLists.txt b/test/python/CMakeLists.txt index aa79543c4..276180f55 100644 --- a/test/python/CMakeLists.txt +++ b/test/python/CMakeLists.txt @@ -192,6 +192,13 @@ list(APPEND ACTIVE_PY_CPHLEX_TESTS py:reduce) add_test(NAME py:coverage COMMAND phlex::phlex -c ${CMAKE_CURRENT_SOURCE_DIR}/pycoverage.jsonnet) list(APPEND ACTIVE_PY_CPHLEX_TESTS py:coverage) +# numba tests if installed +if(HAS_NUMBA) + # phlex-based tests that require numpy support + add_test(NAME py:jited COMMAND phlex::phlex -c ${CMAKE_CURRENT_SOURCE_DIR}/pyjited.jsonnet) + list(APPEND ACTIVE_PY_CPHLEX_TESTS py:jited) +endif() + add_test( NAME py:mismatch COMMAND ${PROJECT_BINARY_DIR}/bin/phlex -c ${CMAKE_CURRENT_SOURCE_DIR}/pymismatch.jsonnet From 38a02c293a37d7bc6498a6a1b58549bae987c2f9 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 1 Jun 2026 14:49:16 -0700 Subject: [PATCH 03/41] initial Numba/JIT support --- plugins/python/python/phlex/_typing.py | 23 +- plugins/python/src/modulewrap.cpp | 463 ++++++++++++++----------- 2 files changed, 287 insertions(+), 199 deletions(-) diff --git a/plugins/python/python/phlex/_typing.py b/plugins/python/python/phlex/_typing.py index 6a38e3bdb..860751946 100644 --- a/plugins/python/python/phlex/_typing.py +++ b/plugins/python/python/phlex/_typing.py @@ -13,12 +13,18 @@ from typing import Any, Dict, Union import numpy as np +try: + import numba.core.types as nb_types + has_numba = True +except ImportError: + has_numba = False __all__ = [ "normalize_type", ] -# ctypes and numpy types are likely candidates for use in annotations +# ctypes and numpy types are likely candidates for use in annotations; Numba +# types may appear from callback signatures # TODO: should users be allowed to add to these? _PY2CPP: dict[type, str] = { # numpy types @@ -40,6 +46,21 @@ # np.uintp: "size_t", } +if has_numba: + _PY2CPP.update({ + nb_types.bool: "bool", + nb_types.int8: "int8_t", + nb_types.int16: "int16_t", + nb_types.int32: "int32_t", + nb_types.int64: "int64_t", + nb_types.uint8: "uint8_t", + nb_types.uint16: "uint16_t", + nb_types.uint32: "uint32_t", + nb_types.uint64: "uint64_t", + nb_types.Float: "float", + nb_types.double: "double", + }) + # ctypes types that don't map cleanly to intN_t / uintN_t _CTYPES_SPECIAL: dict[type, str] = {} for _attr, _cpp in [ diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 4b89a8ff5..41f6e2d7d 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -1,3 +1,4 @@ +#include "dyncall.hpp" #include "wrap.hpp" #include "phlex/model/data_cell_index.hpp" @@ -11,6 +12,8 @@ #include #include #include +#include +#include #include #define NO_IMPORT_ARRAY @@ -39,6 +42,13 @@ // need that same input translated. This simplifies memory management, but // can cause a performance bottleneck (since all require the GIL). +// This is dumb, but for now, because all templates need to be instantiated, only +// support up to a fixed compile-timee maximum number of arguments. An alternative +// would be to collect the arguments, but that currently suffers from needing a +// "initial" to create the container to collect arguments into. This may all go +// away once converter nodes have better support in phlex' core +constexpr size_t MAX_SUPPORTED_ARGS = 3; + using namespace phlex::experimental; using namespace phlex; using phlex::concurrency; @@ -95,33 +105,36 @@ namespace { return fmt::format("{}_arg{}_py", algname, arg); } - static inline PyObject* lifeline_transform(intptr_t arg) + static inline dcarg lifeline_transform(dcarg arg) { - PyObject* pyobj = reinterpret_cast(arg); + PyObject* pyobj = reinterpret_cast(arg.m_ptr); if (pyobj && PyObject_TypeCheck(pyobj, &PhlexLifeline_Type)) { - return reinterpret_cast(pyobj)->m_view; + return dcarg{reinterpret_cast(pyobj)->m_view}; } - return pyobj; + return arg; } - // callable object managing the callback - template - struct py_callback { + // callable objects managing the callback + struct py_callback_base { PyObject* m_callable; // owned + void* m_ccallback; // C callable (either dispatcher or direct pointer) - explicit py_callback(PyObject* callable) : m_callable(callable) + py_callback_base(PyObject* callable, + void* cb = (void*)PyObject_CallFunctionObjArgs): + m_callable(callable), m_ccallback(cb) { - // callable is always non-null here (validated before py_callback construction) + // callable is always non-null here (validated before construction) PyGILRAII gil; Py_INCREF(m_callable); } - py_callback(py_callback const& pc) : m_callable(pc.m_callable) + py_callback_base(py_callback_base const& pc) : + m_callable(pc.m_callable), m_ccallback(pc.m_ccallback) { // Must hold GIL when manipulating reference counts PyGILRAII gil; Py_INCREF(m_callable); } - py_callback& operator=(py_callback const& pc) + py_callback_base& operator=(py_callback_base const& pc) { if (this != &pc) { // Must hold GIL when manipulating reference counts @@ -129,66 +142,60 @@ namespace { Py_INCREF(pc.m_callable); Py_DECREF(m_callable); m_callable = pc.m_callable; + m_ccallback = pc.m_ccallback; } return *this; } - ~py_callback() + virtual ~py_callback_base() { // TODO: cleanup deferred to Phlex shutdown hook // Cannot safely Py_DECREF during arbitrary destruction due to: // - TOCTOU race on Py_IsInitialized() without GIL // - Module offloading in interpreter cleanup phase 2 } - py_callback(py_callback&&) = default; - py_callback& operator=(py_callback&&) = default; - - template - intptr_t call(Args... args) - { - static_assert(sizeof...(Args) == N, "Argument count mismatch"); - - PyGILRAII gil; - - PyObject* result = - PyObject_CallFunctionObjArgs(m_callable, lifeline_transform(args)..., nullptr); - - std::string error_msg; - if (!result) { - if (!msg_from_py_error(error_msg)) - error_msg = "Unknown python error"; - } + py_callback_base(py_callback_base&&) = default; + py_callback_base& operator=(py_callback_base&&) = default; + }; - decref_all(args...); + // type repeater to automatically instantiate callbacks taking N args + template + using type_repeater = T; - if (!error_msg.empty()) { - throw std::runtime_error(error_msg.c_str()); - } + template + struct py_callback_impl; - return reinterpret_cast(result); - } + template + struct py_callback_impl> : public py_callback_base { + py_callback_impl(PyObject* callable) : py_callback_base(callable, (void*)PyObject_CallFunctionObjArgs) {} - template - void callv(Args... args) + RT operator()(type_repeater... args) { - static_assert(sizeof...(Args) == N, "Argument count mismatch"); + dcargs_t argsv; + argsv.reserve(sizeof...(Is) + 2); + argsv.push_back(dcarg{m_callable}); + (argsv.push_back(lifeline_transform(args)), ...); + argsv.push_back(dcarg{nullptr}); PyGILRAII gil; - PyObject* result = - PyObject_CallFunctionObjArgs(m_callable, lifeline_transform(args)..., nullptr); + dcarg result{nullptr}; + dyncall((void*)m_ccallback, result, argsv, 1); std::string error_msg; - if (!result) { + if (!result.m_ptr) { if (!msg_from_py_error(error_msg)) error_msg = "Unknown python error"; - } else - Py_DECREF(result); + } decref_all(args...); - if (!error_msg.empty()) { + if (!error_msg.empty()) throw std::runtime_error(error_msg.c_str()); - } + + if constexpr (!std::is_void_v) + return result; + else + Py_DECREF((PyObject*)result.m_ptr); } private: @@ -196,45 +203,42 @@ namespace { void decref_all(Args... args) { // helper to decrement reference counts of N arguments - (Py_DECREF((PyObject*)args), ...); + (Py_DECREF((PyObject*)args.m_ptr), ...); } }; - // use explicit instatiations to ensure that the function signature can - // be derived by the graph builder - struct py_callback_1 : public py_callback<1> { - using py_callback<1>::py_callback; - intptr_t operator()(intptr_t arg0) { return call(arg0); } - }; + template + struct jit_callback_impl; - struct py_callback_2 : public py_callback<2> { - using py_callback<2>::py_callback; - intptr_t operator()(intptr_t arg0, intptr_t arg1) { return call(arg0, arg1); } - }; + template + struct jit_callback_impl> : public py_callback_base { + using py_callback_base::py_callback_base; - struct py_callback_3 : public py_callback<3> { - using py_callback<3>::py_callback; - intptr_t operator()(intptr_t arg0, intptr_t arg1, intptr_t arg2) + RT operator()(type_repeater... args) { - return call(arg0, arg1, arg2); + dcarg result{0.};//nullptr}; + dcargs_t argsv; + argsv.reserve(sizeof...(Is)); + (argsv.push_back(args), ...); + + dyncall((void*)m_ccallback, result, argsv); + // TODO: error reporting? + + if constexpr (!std::is_void_v) + return result; } }; - struct py_callback_1v : public py_callback<1> { - using py_callback<1>::py_callback; - void operator()(intptr_t arg0) { callv(arg0); } - }; + // aliases to reduce typing downstream (explicit instatiations used to ensure + // that the function signature can be derived by the graph builder + template + using py_callback = py_callback_impl>; - struct py_callback_2v : public py_callback<2> { - using py_callback<2>::py_callback; - void operator()(intptr_t arg0, intptr_t arg1) { callv(arg0, arg1); } - }; + template + using jit_callback = jit_callback_impl>; - struct py_callback_3v : public py_callback<3> { - using py_callback<3>::py_callback; - void operator()(intptr_t arg0, intptr_t arg1, intptr_t arg2) { callv(arg0, arg1, arg2); } - }; + // input/output validation helpers static inline std::optional validate_query(PyObject* pyquery) { if (!PyDict_Check(pyquery)) { @@ -345,6 +349,31 @@ namespace { namespace { + static bool is_numba_cfunc(PyObject* obj) + { + static PyObject* cfunc_type = nullptr; + static bool checked = false; + if (!checked) { + checked = true; + + PyObject* nbmod = PyImport_ImportModule("numba.core.ccallback"); + if (nbmod) { + cfunc_type = PyObject_GetAttrString(nbmod, "CFunc"); + Py_DECREF(nbmod); + } + + if (!cfunc_type) + PyErr_Clear(); + // hard reference to cfunc_type here if not null + } + + if (!cfunc_type) + return false; + + int result = PyObject_IsInstance(obj, cfunc_type); + return result == 1; + } + static std::string annotation_as_text(PyObject* pyobj) { static PyObject* normalizer = nullptr; @@ -381,26 +410,49 @@ namespace { std::vector& input_types, std::vector& output_types) { + bool conversion_ok = false; + PyObject* sann = PyUnicode_FromString("__annotations__"); PyObject* annot = PyObject_GetAttr(callable, sann); if (!annot) { - // the callable may be an instance with a __call__ method - PyErr_Clear(); - PyObject* callm = PyObject_GetAttrString(callable, "__call__"); - if (callm) { - annot = PyObject_GetAttr(callm, sann); - Py_DECREF(callm); + // the callable may be a Numba CFunc and have a declared signature + PyObject* sig = PyObject_GetAttrString(callable, "_sig"); + if (sig) { + PyObject* ret = PyObject_GetAttrString(sig, "return_type"); + PyObject* args = PyObject_GetAttrString(sig, "args"); + + if (ret && args && PyTuple_CheckExact(args)) { + output_types.push_back(annotation_as_text(ret)); + for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(args); ++i) { + PyObject* item = PyTuple_GET_ITEM(args, i); + input_types.push_back(annotation_as_text(item)); + } + conversion_ok = true; + } else + PyErr_Clear(); + + Py_XDECREF(args); + Py_XDECREF(ret); + } else { + PyErr_Clear(); + // the callable may be an instance with a __call__ method + PyObject* callm = PyObject_GetAttrString(callable, "__call__"); + if (callm) { + annot = PyObject_GetAttr(callm, sann); + Py_DECREF(callm); + } } } Py_DECREF(sann); - bool conversion_ok = true; - if (annot && PyDict_Check(annot)) { + if (!conversion_ok && annot && PyDict_Check(annot)) { // Variant guarantees OrderedDict with "return" last Py_ssize_t pos = 0; PyObject* key = nullptr; PyObject* value = nullptr; + + conversion_ok = true; while (PyDict_Next(annot, &pos, &key, &value)) { std::string const& ann = annotation_as_text(value); if (ann.empty() && PyErr_Occurred()) { @@ -416,7 +468,6 @@ namespace { } } } else { - conversion_ok = false; if (!PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "unknown annotation formatting"); } @@ -500,33 +551,38 @@ namespace { // for expressions, but causes havoc with C++ signatures. We suppress this warning for the block // because the use of continuations makes per-line suppression impossible. #define BASIC_CONVERTER(name, cpptype, topy, frompy) \ - static intptr_t name##_to_py(cpptype a) \ + static dcarg name##_to_py(cpptype a) \ { \ PyGILRAII gil; \ - return reinterpret_cast(topy(a)); \ + return dcarg{topy(a)}; \ } \ \ - static cpptype py_to_##name(intptr_t pyobj) \ + static dcarg name##_to_dcarg(cpptype a) { return dcarg{a}; } \ + \ + static cpptype py_to_##name(dcarg a) \ { \ PyGILRAII gil; \ - cpptype i = static_cast(frompy(reinterpret_cast(pyobj))); \ + PyObject* pyobj = reinterpret_cast(a.m_ptr); \ + cpptype i = static_cast(frompy(pyobj)); \ std::string msg; \ if (msg_from_py_error(msg, true)) { \ - Py_DECREF(reinterpret_cast(pyobj)); \ + Py_DECREF(pyobj); \ throw std::runtime_error("Python conversion error for type " #name ": " + msg); \ } \ - Py_DECREF(reinterpret_cast(pyobj)); \ + Py_DECREF(pyobj); \ return i; \ } \ \ - struct provider_cb_##name : public py_callback<1> { \ - using py_callback<1>::py_callback; \ + static cpptype dcarg_to_##name(dcarg a) { return a.m_##name; } \ + \ + struct provider_cb_##name : public py_callback { \ + using py_callback::py_callback; \ cpptype operator()(data_cell_index const& id) \ { \ PyGILRAII gil; \ PyObject* arg0 = wrap_dci(id); \ - intptr_t const arg0i = reinterpret_cast(arg0); \ - PyObject* pyres = reinterpret_cast(call(arg0i)); /* decrefs arg0 */ \ + dcarg res = this->py_callback::operator()(dcarg{arg0}); /* decrefs arg0 */ \ + PyObject* pyres = reinterpret_cast(res.m_ptr); \ cpptype cres = frompy(pyres); \ Py_DECREF(pyres); \ return cres; \ @@ -536,25 +592,18 @@ namespace { BASIC_CONVERTER(bool, bool, PyBool_FromLong, pylong_as_bool) BASIC_CONVERTER(int, std::int32_t, PyLong_FromLong, PyLong_AsLong) BASIC_CONVERTER(uint, std::uint32_t, PyLong_FromLong, pylong_or_int_as_ulong) -#if defined(__APPLE__) && defined(__MACH__) - // This is a temporary workaround until we have a solution for handling translation of types - // between C++ and Python. - BASIC_CONVERTER(long, long, PyLong_FromLong, pylong_as_strictlong) - BASIC_CONVERTER(ulong, unsigned long, PyLong_FromUnsignedLong, pylong_or_int_as_ulong) -#else - BASIC_CONVERTER(long, std::int64_t, PyLong_FromLong, pylong_as_strictlong) - BASIC_CONVERTER(ulong, std::uint64_t, PyLong_FromUnsignedLong, pylong_or_int_as_ulong) -#endif + BASIC_CONVERTER(long, ph_long_t, PyLong_FromLong, pylong_as_strictlong) + BASIC_CONVERTER(ulong, ph_ulong_t, PyLong_FromUnsignedLong, pylong_or_int_as_ulong) BASIC_CONVERTER(float, float, PyFloat_FromDouble, PyFloat_AsDouble) BASIC_CONVERTER(double, double, PyFloat_FromDouble, PyFloat_AsDouble) #define VECTOR_CONVERTER(name, cpptype, nptype) \ - static intptr_t name##_to_py(std::shared_ptr> const& v) \ + static dcarg name##_to_py(std::shared_ptr> const& v) \ { \ PyGILRAII gil; \ \ if (!v) \ - return 0; \ + return dcarg{nullptr}; \ \ /* use a numpy view with the shared pointer tied up in a lifeline object (note: this */ \ /* is just a demonstrator; alternatives are still being considered) */ \ @@ -567,7 +616,7 @@ namespace { ); \ \ if (!np_view) \ - return 0; \ + return dcarg{nullptr}; \ \ /* make the data read-only by not making it writable */ \ PyArray_CLEARFLAGS(reinterpret_cast(np_view), NPY_ARRAY_WRITEABLE); \ @@ -579,12 +628,12 @@ namespace { PhlexLifeline_Type.tp_new(&PhlexLifeline_Type, nullptr, nullptr)); \ if (!pyll) { \ Py_DECREF(np_view); \ - return 0; \ + return dcarg{nullptr}; \ } \ pyll->m_source = v; \ pyll->m_view = np_view; /* steals reference */ \ \ - return reinterpret_cast(pyll); \ + return dcarg{pyll}; \ } VECTOR_CONVERTER(vint, std::int32_t, NPY_INT32) @@ -595,14 +644,15 @@ namespace { VECTOR_CONVERTER(vdouble, double, NPY_DOUBLE) #define NUMPY_ARRAY_CONVERTER(name, cpptype, nptype, frompy) \ - static std::shared_ptr> py_to_##name(intptr_t pyobj) \ + static std::shared_ptr> py_to_##name(dcarg a) \ { \ PyGILRAII gil; \ \ auto vec = std::make_shared>(); \ + PyObject* pyobj = (PyObject*)a.m_ptr; \ \ /* TODO: because of unresolved ownership issues, copy the full array contents */ \ - if (PyArray_Check(reinterpret_cast(pyobj))) { \ + if (PyArray_Check(pyobj)) { \ PyArrayObject* arr = reinterpret_cast(pyobj); \ \ /* TODO: flattening the array here seems to be the only workable solution */ \ @@ -616,11 +666,11 @@ namespace { cpptype* raw = static_cast(PyArray_DATA(arr)); \ vec->reserve(total); \ vec->insert(vec->end(), raw, raw + total); \ - } else if (PyList_Check(reinterpret_cast(pyobj))) { \ - Py_ssize_t total = PyList_Size(reinterpret_cast(pyobj)); \ + } else if (PyList_Check(pyobj)) { \ + Py_ssize_t total = PyList_Size(pyobj); \ vec->reserve(total); \ for (Py_ssize_t i = 0; i < total; ++i) { \ - PyObject* item = PyList_GetItem(reinterpret_cast(pyobj), i); \ + PyObject* item = PyList_GetItem(pyobj, i); \ vec->push_back(static_cast(frompy(item))); \ if (PyErr_Occurred()) { \ PyErr_Clear(); \ @@ -634,18 +684,18 @@ namespace { } \ } \ \ - Py_DECREF(reinterpret_cast(pyobj)); \ + Py_DECREF(pyobj); \ return vec; \ } \ \ - struct provider_cb_##name : public py_callback<1> { \ - using py_callback<1>::py_callback; \ + struct provider_cb_##name : public py_callback { \ + using py_callback::py_callback; \ std::shared_ptr> operator()(data_cell_index const& id) \ { \ PyGILRAII gil; \ PyObject* arg0 = wrap_dci(id); \ - intptr_t pyres = call(reinterpret_cast(arg0)); /* decrefs arg0 */ \ - auto cres = py_to_##name(pyres); /* decrefs pyres */ \ + dcarg pyres = this->py_callback::operator()(dcarg{arg0}); /* decrefs arg0 */ \ + auto cres = py_to_##name(pyres); /* decrefs pyres */ \ return cres; \ } \ }; @@ -666,7 +716,7 @@ namespace { product_selector pq_in, std::string const& output) { - mod->ph_module->transform(name, converter, concurrency::serial) + mod->ph_module->transform(name, converter, (concurrency)16) //concurrency::serial) // TODO! .input_family(pq_in) .output_product_suffixes(output); } @@ -679,7 +729,8 @@ static PyObject* parse_args(PyObject* args, std::vector& input_queries, std::vector& input_types, std::vector& output_suffixes, - std::vector& output_types) + std::vector& output_types, + concurrency& nconcur) { // Helper function to extract the common names and identifiers needed to insert // any node. (The observer does not require outputs, but they still need to be @@ -689,24 +740,22 @@ static PyObject* parse_args(PyObject* args, kw3[] = "concurrency", kw4[] = "name"; // kwnames can be of type char const*[] once we mandate Python 3.13 or newer static char* kwnames[] = {kw0, kw1, kw2, kw3, kw4, nullptr}; - PyObject *callable = nullptr, *input = nullptr, *output = nullptr, *concurrency = nullptr, - *pyname = nullptr; + PyObject *callable = nullptr, *input = nullptr, *output = nullptr, *pyname = nullptr; + int nconcur_ = -1; if (!PyArg_ParseTupleAndKeywords( - args, kwds, "OO|OOO", kwnames, &callable, &input, &output, &concurrency, &pyname)) { + args, kwds, "OO|OiO", (char**)kwnames, &callable, &input, &output, &nconcur_, &pyname)) { // error already set by argument parser return nullptr; } - if (concurrency && concurrency != Py_None) { - PyErr_SetString(PyExc_TypeError, "only serial concurrency is supported"); - return nullptr; - } - if (!callable || !PyCallable_Check(callable)) { PyErr_SetString(PyExc_TypeError, "provided algorithm is not callable"); return nullptr; } + // set concurrency, or the default of serial if not set + nconcur = nconcur_ > 0 ? (concurrency)nconcur_ : concurrency::serial; + // retrieve function name if (!pyname) { pyname = PyObject_GetAttrString(callable, "__name__"); @@ -744,7 +793,7 @@ static PyObject* parse_args(PyObject* args, return nullptr; } - // retrieve C++ (matching) types from annotations + // retrieve C++ (matching) types if provided input_types.reserve(input_queries.size()); if (!annotations_to_strings(callable, input_types, output_types)) return nullptr; // Python error already set @@ -794,7 +843,8 @@ static std::optional collection_dtype(std::string const& type_ static bool insert_input_converters(py_phlex_module* mod, std::string const& cname, // TODO: shared_ptr std::vector const& input_queries, - std::vector const& input_types) + std::vector const& input_types, + bool ispy) { // insert input converter nodes into the graph for (auto const [i, inp_pq, inp_type] : @@ -807,19 +857,19 @@ static bool insert_input_converters(py_phlex_module* mod, "py_" + (inp_pq.suffix ? std::string{static_cast(*inp_pq.suffix)} : ""); if (inp_type == "bool") - insert_converter(mod, pyname, bool_to_py, inp_pq, output); + insert_converter(mod, pyname, ispy ? bool_to_py : bool_to_dcarg, inp_pq, output); else if (inp_type == "int32_t") - insert_converter(mod, pyname, int_to_py, inp_pq, output); + insert_converter(mod, pyname, ispy ? int_to_py : int_to_dcarg, inp_pq, output); else if (inp_type == "uint32_t") - insert_converter(mod, pyname, uint_to_py, inp_pq, output); + insert_converter(mod, pyname, ispy ? uint_to_py : uint_to_dcarg, inp_pq, output); else if (inp_type == "int64_t") - insert_converter(mod, pyname, long_to_py, inp_pq, output); + insert_converter(mod, pyname, ispy ? long_to_py : long_to_dcarg, inp_pq, output); else if (inp_type == "uint64_t") - insert_converter(mod, pyname, ulong_to_py, inp_pq, output); + insert_converter(mod, pyname, ispy ? ulong_to_py : ulong_to_dcarg, inp_pq, output); else if (inp_type == "float") - insert_converter(mod, pyname, float_to_py, inp_pq, output); + insert_converter(mod, pyname, ispy ? float_to_py : float_to_dcarg, inp_pq, output); else if (inp_type == "double") - insert_converter(mod, pyname, double_to_py, inp_pq, output); + insert_converter(mod, pyname, ispy ? double_to_py : double_to_dcarg, inp_pq, output); else if (inp_type.compare(0, 7, "ndarray") == 0 || inp_type.compare(0, 4, "list") == 0) { // TODO: these are hard-coded std::vector <-> numpy array mappings, which is // way too simplistic for real use. It only exists for demonstration purposes, @@ -858,23 +908,24 @@ static bool insert_output_converter(py_phlex_module* mod, std::string const& cname, product_selector const& out_pq, std::string const& out_type, - std::string const& output) + std::string const& output, + bool ispy) { // insert output converter node into the graph if (out_type == "bool") - insert_converter(mod, cname, py_to_bool, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_bool : dcarg_to_bool, out_pq, output); else if (out_type == "int32_t") - insert_converter(mod, cname, py_to_int, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_int : dcarg_to_int, out_pq, output); else if (out_type == "uint32_t") - insert_converter(mod, cname, py_to_uint, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_uint : dcarg_to_uint, out_pq, output); else if (out_type == "int64_t") - insert_converter(mod, cname, py_to_long, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_long : dcarg_to_long, out_pq, output); else if (out_type == "uint64_t") - insert_converter(mod, cname, py_to_ulong, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_ulong : dcarg_to_ulong, out_pq, output); else if (out_type == "float") - insert_converter(mod, cname, py_to_float, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_float : dcarg_to_float, out_pq, output); else if (out_type == "double") - insert_converter(mod, cname, py_to_double, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_double : dcarg_to_double, out_pq, output); else if (out_type.compare(0, 7, "ndarray") == 0 || out_type.compare(0, 4, "list") == 0) { // TODO: just like for input types, these are hard-coded, but should be handled by // an IDL instead. @@ -907,6 +958,20 @@ static bool insert_output_converter(py_phlex_module* mod, return true; } +template +static bool unroll_switch(size_t rt_size, Cf&& func) { + return [&](std::index_sequence) { + // 1-based sequence (all computational nodes have an input, or they can't be scheduled), + // with the fold expression short-circuited using || + bool matched = ( ... || ( + (rt_size == (Is + 1)) ? + (std::forward(func)(std::make_index_sequence{}), true) : false + )); + + return matched; + }(std::make_index_sequence{}); +} + static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kwds) { // Register a python algorithm by adding the necessary intermediate converter @@ -915,11 +980,24 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw std::string cname; std::vector input_queries; std::vector input_types, output_suffixes, output_types; - PyObject* callable = - parse_args(args, kwds, cname, input_queries, input_types, output_suffixes, output_types); + concurrency nconcur; + PyObject* callable = parse_args( + args, kwds, cname, input_queries, input_types, output_suffixes, output_types, nconcur); + if (!callable) return nullptr; // error already set + // detect numba and extract C function pointer if any, else use default Python + // callable dispatcher + void* ccallf = nullptr; + if (is_numba_cfunc(callable)) { + PyObject* pyaddr = PyObject_GetAttrString(callable, "address"); + if (pyaddr) + ccallf = PyLong_AsVoidPtr(pyaddr); + if (!ccallf) + PyErr_Clear(); + } + if (output_types.empty()) { PyErr_Format(PyExc_TypeError, "transform %s should have an output type", cname.c_str()); Py_DECREF(callable); @@ -939,12 +1017,12 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw } } - if (!insert_input_converters(mod, cname, input_queries, input_types)) { + if (!insert_input_converters(mod, cname, input_queries, input_types, !ccallf)) { Py_DECREF(callable); return nullptr; // error already set } - // register Python transform + // register Python transform callbacks // TODO: only support single output type for now, as there has to be a mapping // onto a std::tuple otherwise, which is a typed object, thus complicating the @@ -957,54 +1035,41 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw std::string suff0 = "py_" + (pq0.suffix ? std::string{static_cast(*pq0.suffix)} : ""); - switch (input_queries.size()) { - case 1: { - mod->ph_module->transform(pyname, py_callback_1{callable}, concurrency::serial) - .input_family(product_selector{ - .creator = identifier(c0), .layer = pq0.layer, .suffix = identifier(suff0)}) - .output_product_suffixes(pyoutput); - break; - } - case 2: { - auto pq1 = input_queries[1]; - std::string c1 = input_converter_name(cname, 1); - std::string suff1 = - "py_" + (pq1.suffix ? std::string{static_cast(*pq1.suffix)} : ""); - mod->ph_module->transform(pyname, py_callback_2{callable}, concurrency::serial) - .input_family(product_selector{.creator = identifier(c0), - .layer = pq0.layer, - .suffix = identifier(suff0)}, - product_selector{ - .creator = identifier(c1), .layer = pq1.layer, .suffix = identifier(suff1)}) - .output_product_suffixes(pyoutput); - break; - } - case 3: { - auto pq1 = input_queries[1]; - std::string c1 = input_converter_name(cname, 1); - std::string suff1 = - "py_" + (pq1.suffix ? std::string{static_cast(*pq1.suffix)} : ""); - auto pq2 = input_queries[2]; - std::string c2 = input_converter_name(cname, 2); - std::string suff2 = - "py_" + (pq2.suffix ? std::string{static_cast(*pq2.suffix)} : ""); - mod->ph_module->transform(pyname, py_callback_3{callable}, concurrency::serial) - .input_family(product_selector{.creator = identifier(c0), - .layer = pq0.layer, - .suffix = identifier(suff0)}, - product_selector{ - .creator = identifier(c1), .layer = pq1.layer, .suffix = identifier(suff1)}, - product_selector{ - .creator = identifier(c2), .layer = pq2.layer, .suffix = identifier(suff2)}) - .output_product_suffixes(pyoutput); - break; - } - default: { + auto transform_N_args = [&](std::index_sequence) { + constexpr size_t N = sizeof...(Is); + + auto make_product_selector = [&](size_t i) { + auto pq = input_queries[i]; + std::string c = (i == 0) ? c0 : input_converter_name(cname, i); + std::string suff = "py_" + (pq.suffix ? std::string{static_cast(*pq.suffix)} : ""); + + return product_selector{ + .creator = identifier(c), + .layer = pq.layer, + .suffix = identifier(suff) + }; + }; + + auto insert_tranform_for_callback = [&](auto& cb) { + mod->ph_module->transform(pyname, cb, nconcur) + .input_family(make_product_selector(Is)...) + .output_product_suffixes(pyoutput); + }; + + if (ccallf) { + jit_callback cb{callable, ccallf}; + insert_tranform_for_callback(cb); + } else { + py_callback cb{callable}; + insert_tranform_for_callback(cb); + } + }; + + if (!unroll_switch(input_queries.size(), transform_N_args)) { PyErr_SetString(PyExc_TypeError, "unsupported number of inputs"); Py_DECREF(callable); return nullptr; } - } // insert output converter node into the graph auto out_pq = product_selector{.creator = identifier(pyname), @@ -1012,7 +1077,7 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw .suffix = identifier(pyoutput)}; std::string const& out_type = output_types[0]; std::string const& output = output_suffixes[0]; - if (!insert_output_converter(mod, cname, out_pq, out_type, output)) { + if (!insert_output_converter(mod, cname, out_pq, out_type, output, !ccallf)) { Py_DECREF(callable); return nullptr; // error already set } @@ -1029,8 +1094,10 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds std::string cname; std::vector input_queries; std::vector input_types, output_suffixes, output_types; - PyObject* callable = - parse_args(args, kwds, cname, input_queries, input_types, output_suffixes, output_types); + concurrency nconcur; + PyObject* callable = parse_args( + args, kwds, cname, input_queries, input_types, output_suffixes, output_types, nconcur); + if (!callable) return nullptr; // error already set @@ -1040,7 +1107,7 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds return nullptr; } - if (!insert_input_converters(mod, cname, input_queries, input_types)) { + if (!insert_input_converters(mod, cname, input_queries, input_types, true)) { Py_DECREF(callable); return nullptr; // error already set } @@ -1053,7 +1120,7 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds switch (input_queries.size()) { case 1: { - mod->ph_module->observe(cname, py_callback_1v{callable}, concurrency::serial) + mod->ph_module->observe(cname, py_callback{callable}, nconcur) .input_family(product_selector{ .creator = identifier(c0), .layer = pq0.layer, .suffix = identifier(suff0)}); break; @@ -1063,7 +1130,7 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds std::string c1 = input_converter_name(cname, 1); std::string suff1 = "py_" + (pq1.suffix ? std::string{static_cast(*pq1.suffix)} : ""); - mod->ph_module->observe(cname, py_callback_2v{callable}, concurrency::serial) + mod->ph_module->observe(cname, py_callback{callable}, nconcur) .input_family(product_selector{.creator = identifier(c0), .layer = pq0.layer, .suffix = identifier(suff0)}, @@ -1080,7 +1147,7 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds std::string c2 = input_converter_name(cname, 2); std::string suff2 = "py_" + (pq2.suffix ? std::string{static_cast(*pq2.suffix)} : ""); - mod->ph_module->observe(cname, py_callback_3v{callable}, concurrency::serial) + mod->ph_module->observe(cname, py_callback{callable}, nconcur) .input_family(product_selector{.creator = identifier(c0), .layer = pq0.layer, .suffix = identifier(suff0)}, From 8fb74945960f37dba9ddc5dc701336d2877ad734 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 1 Jun 2026 14:51:08 -0700 Subject: [PATCH 04/41] clang-format fixes --- plugins/python/src/modulewrap.cpp | 42 +++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 41f6e2d7d..7f9a09fde 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -119,8 +119,7 @@ namespace { PyObject* m_callable; // owned void* m_ccallback; // C callable (either dispatcher or direct pointer) - py_callback_base(PyObject* callable, - void* cb = (void*)PyObject_CallFunctionObjArgs): + py_callback_base(PyObject* callable, void* cb = (void*)PyObject_CallFunctionObjArgs) : m_callable(callable), m_ccallback(cb) { // callable is always non-null here (validated before construction) @@ -158,15 +157,18 @@ namespace { }; // type repeater to automatically instantiate callbacks taking N args - template + template using type_repeater = T; template struct py_callback_impl; - template + template struct py_callback_impl> : public py_callback_base { - py_callback_impl(PyObject* callable) : py_callback_base(callable, (void*)PyObject_CallFunctionObjArgs) {} + py_callback_impl(PyObject* callable) : + py_callback_base(callable, (void*)PyObject_CallFunctionObjArgs) + { + } RT operator()(type_repeater... args) { @@ -210,13 +212,13 @@ namespace { template struct jit_callback_impl; - template + template struct jit_callback_impl> : public py_callback_base { using py_callback_base::py_callback_base; RT operator()(type_repeater... args) { - dcarg result{0.};//nullptr}; + dcarg result{0.}; //nullptr}; dcargs_t argsv; argsv.reserve(sizeof...(Is)); (argsv.push_back(args), ...); @@ -231,13 +233,12 @@ namespace { // aliases to reduce typing downstream (explicit instatiations used to ensure // that the function signature can be derived by the graph builder - template + template using py_callback = py_callback_impl>; template using jit_callback = jit_callback_impl>; - // input/output validation helpers static inline std::optional validate_query(PyObject* pyquery) { @@ -695,7 +696,7 @@ namespace { PyGILRAII gil; \ PyObject* arg0 = wrap_dci(id); \ dcarg pyres = this->py_callback::operator()(dcarg{arg0}); /* decrefs arg0 */ \ - auto cres = py_to_##name(pyres); /* decrefs pyres */ \ + auto cres = py_to_##name(pyres); /* decrefs pyres */ \ return cres; \ } \ }; @@ -716,7 +717,8 @@ namespace { product_selector pq_in, std::string const& output) { - mod->ph_module->transform(name, converter, (concurrency)16) //concurrency::serial) // TODO! + mod->ph_module + ->transform(name, converter, (concurrency)16) //concurrency::serial) // TODO! .input_family(pq_in) .output_product_suffixes(output); } @@ -959,14 +961,14 @@ static bool insert_output_converter(py_phlex_module* mod, } template -static bool unroll_switch(size_t rt_size, Cf&& func) { +static bool unroll_switch(size_t rt_size, Cf&& func) +{ return [&](std::index_sequence) { // 1-based sequence (all computational nodes have an input, or they can't be scheduled), // with the fold expression short-circuited using || - bool matched = ( ... || ( - (rt_size == (Is + 1)) ? - (std::forward(func)(std::make_index_sequence{}), true) : false - )); + bool matched = (... || ((rt_size == (Is + 1)) + ? (std::forward(func)(std::make_index_sequence{}), true) + : false)); return matched; }(std::make_index_sequence{}); @@ -1041,13 +1043,11 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw auto make_product_selector = [&](size_t i) { auto pq = input_queries[i]; std::string c = (i == 0) ? c0 : input_converter_name(cname, i); - std::string suff = "py_" + (pq.suffix ? std::string{static_cast(*pq.suffix)} : ""); + std::string suff = + "py_" + (pq.suffix ? std::string{static_cast(*pq.suffix)} : ""); return product_selector{ - .creator = identifier(c), - .layer = pq.layer, - .suffix = identifier(suff) - }; + .creator = identifier(c), .layer = pq.layer, .suffix = identifier(suff)}; }; auto insert_tranform_for_callback = [&](auto& cb) { From fade533cd67e672b6aae11534768c2048d3917cc Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 1 Jun 2026 15:21:18 -0700 Subject: [PATCH 05/41] simplify observer insertion code --- plugins/python/src/modulewrap.cpp | 73 +++++++++---------------------- 1 file changed, 20 insertions(+), 53 deletions(-) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 7f9a09fde..08e0a3986 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -1032,17 +1032,12 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw std::string pyname = "py_" + cname; std::string pyoutput = output_suffixes[0] + "_py"; - auto pq0 = input_queries[0]; - std::string c0 = input_converter_name(cname, 0); - std::string suff0 = - "py_" + (pq0.suffix ? std::string{static_cast(*pq0.suffix)} : ""); - auto transform_N_args = [&](std::index_sequence) { constexpr size_t N = sizeof...(Is); auto make_product_selector = [&](size_t i) { auto pq = input_queries[i]; - std::string c = (i == 0) ? c0 : input_converter_name(cname, i); + std::string c = input_converter_name(cname, i); std::string suff = "py_" + (pq.suffix ? std::string{static_cast(*pq.suffix)} : ""); @@ -1112,57 +1107,29 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds return nullptr; // error already set } - // register Python observer - auto pq0 = input_queries[0]; - std::string c0 = input_converter_name(cname, 0); - std::string suff0 = - "py_" + (pq0.suffix ? std::string{static_cast(*pq0.suffix)} : ""); - - switch (input_queries.size()) { - case 1: { - mod->ph_module->observe(cname, py_callback{callable}, nconcur) - .input_family(product_selector{ - .creator = identifier(c0), .layer = pq0.layer, .suffix = identifier(suff0)}); - break; - } - case 2: { - auto pq1 = input_queries[1]; - std::string c1 = input_converter_name(cname, 1); - std::string suff1 = - "py_" + (pq1.suffix ? std::string{static_cast(*pq1.suffix)} : ""); - mod->ph_module->observe(cname, py_callback{callable}, nconcur) - .input_family(product_selector{.creator = identifier(c0), - .layer = pq0.layer, - .suffix = identifier(suff0)}, - product_selector{ - .creator = identifier(c1), .layer = pq1.layer, .suffix = identifier(suff1)}); - break; - } - case 3: { - auto pq1 = input_queries[1]; - std::string c1 = input_converter_name(cname, 1); - std::string suff1 = - "py_" + (pq1.suffix ? std::string{static_cast(*pq1.suffix)} : ""); - auto pq2 = input_queries[2]; - std::string c2 = input_converter_name(cname, 2); - std::string suff2 = - "py_" + (pq2.suffix ? std::string{static_cast(*pq2.suffix)} : ""); - mod->ph_module->observe(cname, py_callback{callable}, nconcur) - .input_family(product_selector{.creator = identifier(c0), - .layer = pq0.layer, - .suffix = identifier(suff0)}, - product_selector{ - .creator = identifier(c1), .layer = pq1.layer, .suffix = identifier(suff1)}, - product_selector{ - .creator = identifier(c2), .layer = pq2.layer, .suffix = identifier(suff2)}); - break; - } - default: { + // register Python observer callbacks + auto observe_N_args = [&](std::index_sequence) { + constexpr size_t N = sizeof...(Is); + + auto make_product_selector = [&](size_t i) { + auto pq = input_queries[i]; + std::string c = input_converter_name(cname, i); + std::string suff = + "py_" + (pq.suffix ? std::string{static_cast(*pq.suffix)} : ""); + + return product_selector{ + .creator = identifier(c), .layer = pq.layer, .suffix = identifier(suff)}; + }; + + mod->ph_module->observe(cname, py_callback{callable}, nconcur) + .input_family(make_product_selector(Is)...); + }; + + if (!unroll_switch(input_queries.size(), observe_N_args)) { PyErr_SetString(PyExc_TypeError, "unsupported number of inputs"); Py_DECREF(callable); return nullptr; } - } Py_DECREF(callable); Py_RETURN_NONE; From fa90040237b3aae7b3a4b36fd45e3b252680e5c5 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 1 Jun 2026 23:27:47 -0700 Subject: [PATCH 06/41] proper dyncall return type --- plugins/python/src/dyncall.cpp | 77 ++++++++++++++++ plugins/python/src/dyncall.hpp | 145 ++++++++++++++++++++++++++++++ plugins/python/src/modulewrap.cpp | 16 ++-- test/python/pyjited.jsonnet | 31 +++++++ 4 files changed, 264 insertions(+), 5 deletions(-) create mode 100644 plugins/python/src/dyncall.cpp create mode 100644 plugins/python/src/dyncall.hpp create mode 100644 test/python/pyjited.jsonnet diff --git a/plugins/python/src/dyncall.cpp b/plugins/python/src/dyncall.cpp new file mode 100644 index 000000000..191c6464f --- /dev/null +++ b/plugins/python/src/dyncall.cpp @@ -0,0 +1,77 @@ +// Dynamic dispatcher from generically packaged args to any C or Python function. +// +// Note: this particular implementation is based on libffi, presumed to be for +// now the minimal dependency, but an alternative could be based on JITing +// using Cling or even Numba's llvmlite. + +#include "dyncall.hpp" +#include +#include + +#include + + +using namespace phlex::experimental; + +namespace phlex::experimental { + static ffi_type* ffi_t[] = { + &ffi_type_pointer, + &ffi_type_uint8, + &ffi_type_sint8, + &ffi_type_uint8, + &ffi_type_sint16, + &ffi_type_uint16, + &ffi_type_sint32, + &ffi_type_uint32, + &ffi_type_sint64, + &ffi_type_uint64, + &ffi_type_float, + &ffi_type_double, + &ffi_type_void, + }; +} // namespace phlex::experimental + +phlex::experimental::edctype phlex::experimental::str2edctype(const std::string& stype) +{ + if (stype == "bool") + return edctype::Bool; + else if (stype == "int32_t") + return edctype::I32; + else if (stype == "uint32_t") + return edctype::U32; + else if (stype == "int64_t") + return edctype::I64; + else if (stype == "uint64_t") + return edctype::U64; + else if (stype == "float") + return edctype::F32; + else if (stype == "double") + return edctype::F64; + return edctype::Void; +} + +void phlex::experimental::dyncall(void* fn, dcarg& result, dcargs_t& args, int var_offset) +{ + std::size_t N = (std::size_t)args.size(); + + auto t = std::make_unique(N); + auto p = std::make_unique(N); + + for (dcargs_t::size_type i = 0; i < N; ++i) { + auto& a = args[i]; + t[i] = ffi_t[(int)a.m_type]; + p[i] = a.value_ptr(); + } + + ffi_cif cif; + ffi_status status; + if (0 < var_offset) + status = ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, var_offset, N, ffi_t[(int)result.m_type], t.get()); + else + status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, N, ffi_t[(int)result.m_type], t.get()); + + if (status) + throw std::runtime_error("ffi prep failed"); + + ffi_call(&cif, (void (*)())fn, result.value_ptr(), p.get()); +} diff --git a/plugins/python/src/dyncall.hpp b/plugins/python/src/dyncall.hpp new file mode 100644 index 000000000..c8b005f63 --- /dev/null +++ b/plugins/python/src/dyncall.hpp @@ -0,0 +1,145 @@ +#ifndef phlex_python_dyncall_hpp +#define phlex_python_dyncall_hpp + +// ======================================================================================= +// +// Dynamic dispatcher from generically packaged args to any C or Python function. +// +// Design rationale +// ================ +// +// Python code is inserted in the Phlex execution graph using generic types to avoid a +// combinatorial explosion of types. This way, all template instantiations can be done at +// compile time. Callback wrappers are then needed to either pack from generic to Python +// or to unpack from generic to C/C++ and perform the call. This dynamic dispatcher +// provides that functionality. +// +// ======================================================================================= + +#include +#include +#include + +#if defined(__APPLE__) && defined(__MACH__) +// This is a temporary workaround until we have a solution for handling translation of types +// between C++ and Python. +typedef long ph_long_t; +typedef unsigned long ph_ulong_t; +#else +typedef std::int64_t ph_long_t; +typedef std::uint64_t ph_ulong_t; +#endif + +namespace phlex::experimental { + + enum class edctype : std::uint8_t { + Ptr = 0, + Bool, + I8, + U8, + I16, + U16, + I32, + U32, + I64, + U64, + F32, + F64, + Void + }; + + // convenience mapper of human-readable string to edctype + edctype str2edctype(const std::string& stype); + + struct dcarg { + union { + void* m_ptr; + std::uint8_t m_bool; // stored as 0 or 1 + std::int8_t m_int8; + std::uint8_t m_uint8; + std::int16_t m_int16; + std::uint16_t m_uint16; + std::int32_t m_int; + std::uint32_t m_uint; + ph_long_t m_long; + ph_ulong_t m_ulong; + float m_float; + double m_double; + }; + edctype m_type; + + // factory-style constructors to guarante value/typecode match + dcarg() : m_type(edctype::Void) { m_ptr = nullptr; } + explicit dcarg(void* v) : m_type(edctype::Ptr) { m_ptr = v; } + explicit dcarg(bool v) : m_type(edctype::Bool) { m_bool = v ? 1 : 0; } + explicit dcarg(std::int8_t v) : m_type(edctype::I8) { m_int8 = v; } + explicit dcarg(std::uint8_t v) : m_type(edctype::U8) { m_uint8 = v; } + explicit dcarg(std::int16_t v) : m_type(edctype::I16) { m_int16 = v; } + explicit dcarg(std::uint16_t v) : m_type(edctype::U16) { m_uint16 = v; } + explicit dcarg(std::int32_t v) : m_type(edctype::I32) { m_int = v; } + explicit dcarg(std::uint32_t v) : m_type(edctype::U32) { m_uint = v; } + explicit dcarg(ph_long_t v) : m_type(edctype::I64) { m_long = v; } + explicit dcarg(ph_ulong_t v) : m_type(edctype::U64) { m_ulong = v; } + explicit dcarg(float v) : m_type(edctype::F32) { m_float = v; } + explicit dcarg(double v) : m_type(edctype::F64) { m_double = v; } + + // convenience null-constructor from enumerated type + explicit dcarg(edctype etype) : m_type(etype) + { + if (etype == edctype::Bool) m_bool = 0; + else if (etype == edctype::I8) m_int8 = 0; + else if (etype == edctype::U8) m_uint8 = 0; + else if (etype == edctype::I16) m_int16 = 0; + else if (etype == edctype::U16) m_uint16 = 0; + else if (etype == edctype::I32) m_int = 0; + else if (etype == edctype::U32) m_uint = 0; + else if (etype == edctype::I64) m_long = 0l; + else if (etype == edctype::U64) m_ulong = 0ul; + else if (etype == edctype::F32) m_float = 0.f; + else if (etype == edctype::F64) m_double = 0.; + else m_ptr = nullptr; + } + + // pointer access to payload + void* value_ptr() + { + switch (m_type) { + case edctype::Ptr: + return &m_ptr; + case edctype::Bool: + return &m_bool; + case edctype::I8: + return &m_int8; + case edctype::U8: + return &m_uint8; + case edctype::I16: + return &m_int16; + case edctype::U16: + return &m_uint16; + case edctype::I32: + return &m_int; + case edctype::U32: + return &m_uint; + case edctype::I64: + return &m_long; + case edctype::U64: + return &m_ulong; + case edctype::F32: + return &m_float; + case edctype::F64: + return &m_double; + case edctype::Void: + return nullptr; + default: + throw std::invalid_argument("unknown edctype"); + } + } + }; + + typedef std::vector dcargs_t; + + void dyncall(void* fn, dcarg& result, dcargs_t& args, int var_offset = -1); + +} // phlex::experimental + +#endif // phlex_python_dyncall_hpp diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 08e0a3986..6036be62f 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -119,7 +119,7 @@ namespace { PyObject* m_callable; // owned void* m_ccallback; // C callable (either dispatcher or direct pointer) - py_callback_base(PyObject* callable, void* cb = (void*)PyObject_CallFunctionObjArgs) : + py_callback_base(PyObject* callable, void* cb) : m_callable(callable), m_ccallback(cb) { // callable is always non-null here (validated before construction) @@ -214,11 +214,17 @@ namespace { template struct jit_callback_impl> : public py_callback_base { - using py_callback_base::py_callback_base; + edctype m_rtype; // dynamic call return type + + jit_callback_impl(PyObject* callable, void* cb, const std::string& stype) : + py_callback_base(callable, cb) + { + m_rtype = str2edctype(stype); + } RT operator()(type_repeater... args) { - dcarg result{0.}; //nullptr}; + dcarg result{m_rtype}; dcargs_t argsv; argsv.reserve(sizeof...(Is)); (argsv.push_back(args), ...); @@ -1031,6 +1037,7 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw // template instantiation std::string pyname = "py_" + cname; std::string pyoutput = output_suffixes[0] + "_py"; + std::string const& out_type = output_types[0]; auto transform_N_args = [&](std::index_sequence) { constexpr size_t N = sizeof...(Is); @@ -1052,7 +1059,7 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw }; if (ccallf) { - jit_callback cb{callable, ccallf}; + jit_callback cb{callable, ccallf, out_type}; insert_tranform_for_callback(cb); } else { py_callback cb{callable}; @@ -1070,7 +1077,6 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw auto out_pq = product_selector{.creator = identifier(pyname), .layer = identifier(output_layer), .suffix = identifier(pyoutput)}; - std::string const& out_type = output_types[0]; std::string const& output = output_suffixes[0]; if (!insert_output_converter(mod, cname, out_pq, out_type, output, !ccallf)) { Py_DECREF(callable); diff --git a/test/python/pyjited.jsonnet b/test/python/pyjited.jsonnet new file mode 100644 index 000000000..127daf914 --- /dev/null +++ b/test/python/pyjited.jsonnet @@ -0,0 +1,31 @@ +{ + driver: { + cpp: 'generate_layers', + layers: { + event: { parent: 'job', total: 100000, starting_number: 1 }, + }, + }, + sources: { + provider: { + cpp: 'cppsource4py', + }, + }, + modules: { + pyadd: { + py: 'jited', + input: [ + { + creator: 'input', + layer: 'event', + suffix: 'i', + }, + { + creator: 'input', + layer: 'event', + suffix: 'j', + }, + ], + output: ['sum'], + }, + }, +} From 879bd533ba918d865e82553f9161016488ed8863 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 1 Jun 2026 23:28:38 -0700 Subject: [PATCH 07/41] basic jit test --- test/python/jited.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 test/python/jited.py diff --git a/test/python/jited.py b/test/python/jited.py new file mode 100644 index 000000000..e3a3e6bf8 --- /dev/null +++ b/test/python/jited.py @@ -0,0 +1,31 @@ +"""A most basic algorithm compiled with Numba. + +This test code implements the smallest possible run that does something +real. It serves as a "Hello, World" equivalent for running Python code +that is compiled with Numba. +""" + +import numba.core.decorators as nb_dec + +from adder import add + + +def PHLEX_REGISTER_ALGORITHMS(m, config): + """Register the `add` algorithm as a transformation. + + Use the standard Phlex `transform` registration to insert a node + in the execution graph that receives two inputs and produces their + sum as an ouput. The labels of inputs and outputs are taken from + the configuration. + + Args: + m (internal): Phlex registrar representation. + config (internal): Phlex configuration representation. + + Returns: + None + """ + nb_add = nb_dec.cfunc("int32(int32, int32)", nogil=True, nopython=True, cache=True)(add) + m.transform(nb_add, input_family=config["input"], output_product_suffixes=config["output"], + concurrency = 128) + From 8c90768bc97d784258fa7a698faa8e027f298148 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Tue, 2 Jun 2026 10:47:15 -0700 Subject: [PATCH 08/41] query -> selector, following the API change --- plugins/python/src/modulewrap.cpp | 71 ++++++++++++++++--------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 6036be62f..d20573027 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -246,21 +246,21 @@ namespace { using jit_callback = jit_callback_impl>; // input/output validation helpers - static inline std::optional validate_query(PyObject* pyquery) + static inline std::optional validate_selector(PyObject* pysel) { - if (!PyDict_Check(pyquery)) { - PyErr_Format(PyExc_TypeError, "query should be a product specification"); + if (!PyDict_Check(pysel)) { + PyErr_Format(PyExc_TypeError, "selector should be a product specification"); return std::nullopt; } - PyObject* pyc = PyDict_GetItemString(pyquery, "creator"); + PyObject* pyc = PyDict_GetItemString(pysel, "creator"); if (!pyc || !PyUnicode_Check(pyc)) { PyErr_Format(PyExc_TypeError, "missing \"creator\" or not a string"); return std::nullopt; } char const* c = PyUnicode_AsUTF8(pyc); - PyObject* pyl = PyDict_GetItemString(pyquery, "layer"); + PyObject* pyl = PyDict_GetItemString(pysel, "layer"); if (!pyl || !PyUnicode_Check(pyl)) { PyErr_Format(PyExc_TypeError, "missing \"layer\" or not a string"); return std::nullopt; @@ -268,7 +268,7 @@ namespace { char const* l = PyUnicode_AsUTF8(pyl); std::optional s; - PyObject* pys = PyDict_GetItemString(pyquery, "suffix"); + PyObject* pys = PyDict_GetItemString(pysel, "suffix"); if (pys) { if (!PyUnicode_Check(pys)) { PyErr_Format(PyExc_TypeError, "provided \"suffix\" is not a string"); @@ -299,11 +299,11 @@ namespace { for (Py_ssize_t i = 0; i < len; ++i) { PyObject* item = items[i]; // borrowed reference - auto pq = validate_query(item); + auto pq = validate_selector(item); if (pq.has_value()) { cargs.push_back(pq.value()); } else { - // validate_query will have set a python exception + // validate_selection will have set a python exception break; } } @@ -734,7 +734,7 @@ namespace { static PyObject* parse_args(PyObject* args, PyObject* kwds, std::string& functor_name, - std::vector& input_queries, + std::vector& input_selectors, std::vector& input_types, std::vector& output_suffixes, std::vector& output_types, @@ -784,8 +784,8 @@ static PyObject* parse_args(PyObject* args, } // convert input declarations, to be able to pass them to Phlex - input_queries = validate_input(input); - if (input_queries.empty()) { + input_selectors = validate_input(input); + if (input_selectors.empty()) { if (!PyErr_Occurred()) { PyErr_Format(PyExc_ValueError, "no input provided for %s; node can not be scheduled", @@ -802,7 +802,7 @@ static PyObject* parse_args(PyObject* args, } // retrieve C++ (matching) types if provided - input_types.reserve(input_queries.size()); + input_types.reserve(input_selectors.size()); if (!annotations_to_strings(callable, input_types, output_types)) return nullptr; // Python error already set @@ -811,12 +811,12 @@ static PyObject* parse_args(PyObject* args, output_types.clear(); // if annotations were correct (and correctly parsed), there should be as many - // input types as input product queries - if (input_types.size() != input_queries.size()) { + // input types as input product selectors + if (input_types.size() != input_selectors.size()) { PyErr_Format(PyExc_TypeError, "number of inputs (%d; %s) does not match number of annotation types (%d; %s)", - input_queries.size(), - stringify(input_queries).c_str(), + input_selectors.size(), + stringify(input_selectors).c_str(), input_types.size(), stringify(input_types).c_str()); return nullptr; @@ -850,13 +850,13 @@ static std::optional collection_dtype(std::string const& type_ static bool insert_input_converters(py_phlex_module* mod, std::string const& cname, // TODO: shared_ptr - std::vector const& input_queries, + std::vector const& input_selectors, std::vector const& input_types, bool ispy) { // insert input converter nodes into the graph for (auto const [i, inp_pq, inp_type] : - std::views::zip(std::views::iota(size_t{}), input_queries, input_types)) { + std::views::zip(std::views::iota(size_t{}), input_selectors, input_types)) { // TODO: this seems overly verbose and inefficient, but the function needs // to be properly types, so every option is made explicit @@ -986,11 +986,11 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw // nodes going from C++ to PyObject* and back. std::string cname; - std::vector input_queries; + std::vector input_selectors; std::vector input_types, output_suffixes, output_types; concurrency nconcur; PyObject* callable = parse_args( - args, kwds, cname, input_queries, input_types, output_suffixes, output_types, nconcur); + args, kwds, cname, input_selectors, input_types, output_suffixes, output_types, nconcur); if (!callable) return nullptr; // error already set @@ -1014,9 +1014,9 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw // TODO: it's not clear what the output layer will be if the input layers are not // all the same, so for now, simply raise an error if their is any ambiguity - auto output_layer = static_cast(input_queries[0].layer); - if (1 < input_queries.size()) { - for (auto const& iq_pq : input_queries | std::views::drop(1)) { + auto output_layer = static_cast(input_selectors[0].layer); + if (1 < input_selectors.size()) { + for (auto const& iq_pq : input_selectors | std::views::drop(1)) { if (static_cast(iq_pq.layer) != output_layer) { PyErr_Format(PyExc_ValueError, "transform %s output layer is ambiguous", cname.c_str()); Py_DECREF(callable); @@ -1025,7 +1025,7 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw } } - if (!insert_input_converters(mod, cname, input_queries, input_types, !ccallf)) { + if (!insert_input_converters(mod, cname, input_selectors, input_types, !ccallf)) { Py_DECREF(callable); return nullptr; // error already set } @@ -1043,7 +1043,7 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw constexpr size_t N = sizeof...(Is); auto make_product_selector = [&](size_t i) { - auto pq = input_queries[i]; + auto pq = input_selectors[i]; std::string c = input_converter_name(cname, i); std::string suff = "py_" + (pq.suffix ? std::string{static_cast(*pq.suffix)} : ""); @@ -1067,7 +1067,7 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw } }; - if (!unroll_switch(input_queries.size(), transform_N_args)) { + if (!unroll_switch(input_selectors.size(), transform_N_args)) { PyErr_SetString(PyExc_TypeError, "unsupported number of inputs"); Py_DECREF(callable); return nullptr; @@ -1093,11 +1093,11 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds // nodes going from C++ to PyObject* and back. std::string cname; - std::vector input_queries; + std::vector input_selectors; std::vector input_types, output_suffixes, output_types; concurrency nconcur; PyObject* callable = parse_args( - args, kwds, cname, input_queries, input_types, output_suffixes, output_types, nconcur); + args, kwds, cname, input_selectors, input_types, output_suffixes, output_types, nconcur); if (!callable) return nullptr; // error already set @@ -1108,7 +1108,7 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds return nullptr; } - if (!insert_input_converters(mod, cname, input_queries, input_types, true)) { + if (!insert_input_converters(mod, cname, input_selectors, input_types, true)) { Py_DECREF(callable); return nullptr; // error already set } @@ -1118,7 +1118,7 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds constexpr size_t N = sizeof...(Is); auto make_product_selector = [&](size_t i) { - auto pq = input_queries[i]; + auto pq = input_selectors[i]; std::string c = input_converter_name(cname, i); std::string suff = "py_" + (pq.suffix ? std::string{static_cast(*pq.suffix)} : ""); @@ -1131,7 +1131,7 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds .input_family(make_product_selector(Is)...); }; - if (!unroll_switch(input_queries.size(), observe_N_args)) { + if (!unroll_switch(input_selectors.size(), observe_N_args)) { PyErr_SetString(PyExc_TypeError, "unsupported number of inputs"); Py_DECREF(callable); return nullptr; @@ -1288,11 +1288,12 @@ static PyObject* sc_provide(py_phlex_source* src, PyObject* args, PyObject* kwds PyErr_Clear(); } - // translate and validate the output "query" - // Since a query in Python is just a dictionary, it isn't called out in the user API as a query - auto opq = validate_query(output); + // translate and validate the output "selectors" + // Since a selector in Python is just a dictionary, it isn't called out in the user + // API as a selector + auto opq = validate_selector(output); if (!opq.has_value()) { - // validate_query has set a python exception with details about the error + // validate_selector has set a python exception with details about the error return nullptr; } From f5a4625f244fc493aeb5b6b536c0e9f913721012 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Tue, 2 Jun 2026 11:35:36 -0700 Subject: [PATCH 09/41] apparently, numba has Float and float32 and they aren't the same thing --- plugins/python/python/phlex/_typing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugins/python/python/phlex/_typing.py b/plugins/python/python/phlex/_typing.py index 860751946..626bdd10b 100644 --- a/plugins/python/python/phlex/_typing.py +++ b/plugins/python/python/phlex/_typing.py @@ -58,6 +58,7 @@ nb_types.uint32: "uint32_t", nb_types.uint64: "uint64_t", nb_types.Float: "float", + nb_types.float32: "float", nb_types.double: "double", }) @@ -117,8 +118,11 @@ def _build_ctypes_map() -> dict[type, str]: "unsigned int": _PY2CPP[ctypes.c_uint], "long": _PY2CPP[ctypes.c_long], "unsigned long": _PY2CPP[ctypes.c_ulong], + # special cases; not necessarily correct but as expected on major platforms "long long": "int64_t", "unsigned long long": "uint64_t", + "float32": "float", + "float64": "double", } From 12cbdca8c600e90fc071f2d7a25b6c7bfe2a7907 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Tue, 2 Jun 2026 12:12:57 -0700 Subject: [PATCH 10/41] test all supported JIT types --- test/python/jited.py | 56 ++++++++++++++++++++++++++++--------- test/python/pyjited.jsonnet | 15 +--------- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/test/python/jited.py b/test/python/jited.py index e3a3e6bf8..875a270c5 100644 --- a/test/python/jited.py +++ b/test/python/jited.py @@ -1,22 +1,35 @@ -"""A most basic algorithm compiled with Numba. +"""Basic Numba tests for all supported types. -This test code implements the smallest possible run that does something -real. It serves as a "Hello, World" equivalent for running Python code -that is compiled with Numba. +Smallest possible tests with a mixture of Python and Numba: Python +providers to produce data, Numba algorithms to transform them, and Python +observers for verification. """ import numba.core.decorators as nb_dec - +import numpy as np from adder import add +from phlex import Variant + +# arg0 suff, arg1 suff, type, result +specs = ( + ("i", "j", np.int32, 1), + ("u1", "u2", np.uint32, 1), + ("l1", "l2", np.int64, 1), + ("ul1", "ul2", np.uint64, 100), + ("f1", "f2", np.float32, 1.), + ("d1", "d2", np.float64, 1.), +) + def PHLEX_REGISTER_ALGORITHMS(m, config): - """Register the `add` algorithm as a transformation. + """Register Numba-jited `add` algorithm variants as a transformation. - Use the standard Phlex `transform` registration to insert a node - in the execution graph that receives two inputs and produces their - sum as an ouput. The labels of inputs and outputs are taken from - the configuration. + Use the standard Phlex `transform` registration to insert a node in the + execution graph of a Numba-jited Python function that receives two inputs + and produces their sum as an ouput. + + Similarly, use the standard Phlex `observe` to add verifier nodes. Args: m (internal): Phlex registrar representation. @@ -25,7 +38,24 @@ def PHLEX_REGISTER_ALGORITHMS(m, config): Returns: None """ - nb_add = nb_dec.cfunc("int32(int32, int32)", nogil=True, nopython=True, cache=True)(add) - m.transform(nb_add, input_family=config["input"], output_product_suffixes=config["output"], - concurrency = 128) + + def new_o(x): + def o(y): + assert y == x + return o + + for arg0, arg1, t, res in specs: + tn = t.__name__ + + f_a = nb_dec.cfunc(f"{tn}({tn}, {tn})", nogil=True, nopython=True, cache=True)(add) + m.transform(f_a, + name="add_"+tn, + input_family=[{"creator": "input", "layer": "event", "suffix": arg0}, + {"creator": "input", "layer": "event", "suffix": arg1}], + output_product_suffixes=["sum_"+tn], + concurrency=4) + + o = Variant(new_o(res), {"y": t, "return": None}, "observe_" + tn) + m.observe(o, + input_family=[{"creator": "add_" + tn, "layer": "event", "suffix": "sum_"+tn}]) diff --git a/test/python/pyjited.jsonnet b/test/python/pyjited.jsonnet index 127daf914..317633cc1 100644 --- a/test/python/pyjited.jsonnet +++ b/test/python/pyjited.jsonnet @@ -2,7 +2,7 @@ driver: { cpp: 'generate_layers', layers: { - event: { parent: 'job', total: 100000, starting_number: 1 }, + event: { parent: 'job', total: 100, starting_number: 1 }, }, }, sources: { @@ -13,19 +13,6 @@ modules: { pyadd: { py: 'jited', - input: [ - { - creator: 'input', - layer: 'event', - suffix: 'i', - }, - { - creator: 'input', - layer: 'event', - suffix: 'j', - }, - ], - output: ['sum'], }, }, } From 9fa2ba1ced8fd35e235c473c0e8640e275c14765 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Tue, 2 Jun 2026 16:38:57 -0700 Subject: [PATCH 11/41] changes to make clang-tidy happy --- plugins/python/src/dyncall.cpp | 11 ++++++++++- plugins/python/src/dyncall.hpp | 6 +++--- plugins/python/src/modulewrap.cpp | 9 ++++----- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/plugins/python/src/dyncall.cpp b/plugins/python/src/dyncall.cpp index 191c6464f..247076f30 100644 --- a/plugins/python/src/dyncall.cpp +++ b/plugins/python/src/dyncall.cpp @@ -14,7 +14,7 @@ using namespace phlex::experimental; namespace phlex::experimental { - static ffi_type* ffi_t[] = { + static ffi_type* const ffi_t[] = { &ffi_type_pointer, &ffi_type_uint8, &ffi_type_sint8, @@ -52,6 +52,14 @@ phlex::experimental::edctype phlex::experimental::str2edctype(const std::string& void phlex::experimental::dyncall(void* fn, dcarg& result, dcargs_t& args, int var_offset) { + // Perform a dynamic call of function fn, taking arguments `args` and returning + // `result`. Set `var_offset` to the appropriate number of positional arguments + // if the other arguments are variational. + + // Except for the memory management unique_ptrs, this code is essentially C, + // because libffi is, and that yields a plethora of warnings from clang-tidy, + // none of which warrant actual changes. + // NOLINTBEGIN std::size_t N = (std::size_t)args.size(); auto t = std::make_unique(N); @@ -74,4 +82,5 @@ void phlex::experimental::dyncall(void* fn, dcarg& result, dcargs_t& args, int v throw std::runtime_error("ffi prep failed"); ffi_call(&cif, (void (*)())fn, result.value_ptr(), p.get()); + // NOLINTEND } diff --git a/plugins/python/src/dyncall.hpp b/plugins/python/src/dyncall.hpp index c8b005f63..f3fb40389 100644 --- a/plugins/python/src/dyncall.hpp +++ b/plugins/python/src/dyncall.hpp @@ -93,8 +93,8 @@ namespace phlex::experimental { else if (etype == edctype::U16) m_uint16 = 0; else if (etype == edctype::I32) m_int = 0; else if (etype == edctype::U32) m_uint = 0; - else if (etype == edctype::I64) m_long = 0l; - else if (etype == edctype::U64) m_ulong = 0ul; + else if (etype == edctype::I64) m_long = 0L; + else if (etype == edctype::U64) m_ulong = 0UL; else if (etype == edctype::F32) m_float = 0.f; else if (etype == edctype::F64) m_double = 0.; else m_ptr = nullptr; @@ -105,7 +105,7 @@ namespace phlex::experimental { { switch (m_type) { case edctype::Ptr: - return &m_ptr; + return reinterpret_cast(&m_ptr); case edctype::Bool: return &m_bool; case edctype::I8: diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index d20573027..173516cb0 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -166,7 +166,7 @@ namespace { template struct py_callback_impl> : public py_callback_base { py_callback_impl(PyObject* callable) : - py_callback_base(callable, (void*)PyObject_CallFunctionObjArgs) + py_callback_base(callable, reinterpret_cast(PyObject_CallFunctionObjArgs)) { } @@ -197,7 +197,7 @@ namespace { if constexpr (!std::is_void_v) return result; else - Py_DECREF((PyObject*)result.m_ptr); + Py_DECREF(reinterpret_cast(result.m_ptr)); } private: @@ -217,9 +217,8 @@ namespace { edctype m_rtype; // dynamic call return type jit_callback_impl(PyObject* callable, void* cb, const std::string& stype) : - py_callback_base(callable, cb) + py_callback_base(callable, cb), m_rtype(str2edctype(stype)) { - m_rtype = str2edctype(stype); } RT operator()(type_repeater... args) @@ -973,7 +972,7 @@ static bool unroll_switch(size_t rt_size, Cf&& func) // 1-based sequence (all computational nodes have an input, or they can't be scheduled), // with the fold expression short-circuited using || bool matched = (... || ((rt_size == (Is + 1)) - ? (std::forward(func)(std::make_index_sequence{}), true) + ? (func(std::make_index_sequence{}), true) : false)); return matched; From 622e7d28600ebb3663c3dba3d0c8be37d79941a4 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Tue, 2 Jun 2026 20:35:31 -0700 Subject: [PATCH 12/41] rejig dyncall to use std::variant instead of a union, per clang-tidy --- plugins/python/src/dyncall.cpp | 66 +++++++++---- plugins/python/src/dyncall.hpp | 148 ++++++++++-------------------- plugins/python/src/modulewrap.cpp | 20 ++-- 3 files changed, 107 insertions(+), 127 deletions(-) diff --git a/plugins/python/src/dyncall.cpp b/plugins/python/src/dyncall.cpp index 247076f30..7ef0ac343 100644 --- a/plugins/python/src/dyncall.cpp +++ b/plugins/python/src/dyncall.cpp @@ -31,23 +31,51 @@ namespace phlex::experimental { }; } // namespace phlex::experimental -phlex::experimental::edctype phlex::experimental::str2edctype(const std::string& stype) +phlex::experimental::dcarg phlex::experimental::dcarg::from_str(const std::string& stype) { - if (stype == "bool") - return edctype::Bool; - else if (stype == "int32_t") - return edctype::I32; - else if (stype == "uint32_t") - return edctype::U32; - else if (stype == "int64_t") - return edctype::I64; - else if (stype == "uint64_t") - return edctype::U64; - else if (stype == "float") - return edctype::F32; - else if (stype == "double") - return edctype::F64; - return edctype::Void; + // only types currently used in modulewrap are added, not all ffi types + if (stype == "bool") return dcarg(false); + else if (stype == "int32_t") return dcarg(static_cast(0)); + else if (stype == "uint32_t") return dcarg(static_cast(0)); + else if (stype == "int64_t") return dcarg(static_cast(0)); + else if (stype == "uint64_t") return dcarg(static_cast(0)); + else if (stype == "float") return dcarg(0.0f); + else if (stype == "double") return dcarg(0.0); + + throw std::invalid_argument("unknown type string: " + stype); +} + +void* phlex::experimental::dcarg::value_ptr() { + return std::visit([](auto& val) -> void* { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return nullptr; + } else { + return static_cast(&val); + } + }, m_value); +} + +namespace { + static ffi_type* get_ffi_type(const dcarg& d) { + return std::visit([](auto&& val) -> ffi_type* { + using T = std::decay_t; + + if constexpr (std::is_same_v) return &ffi_type_void; + else if constexpr (std::is_same_v) return &ffi_type_pointer; + else if constexpr (std::is_same_v) return &ffi_type_uint8; + else if constexpr (std::is_same_v) return &ffi_type_sint8; + else if constexpr (std::is_same_v) return &ffi_type_uint8; + else if constexpr (std::is_same_v) return &ffi_type_sint16; + else if constexpr (std::is_same_v) return &ffi_type_uint16; + else if constexpr (std::is_same_v) return &ffi_type_sint32; + else if constexpr (std::is_same_v) return &ffi_type_uint32; + else if constexpr (std::is_same_v) return &ffi_type_sint64; + else if constexpr (std::is_same_v) return &ffi_type_uint64; + else if constexpr (std::is_same_v) return &ffi_type_float; + else if constexpr (std::is_same_v) return &ffi_type_double; + }, d.m_value); + } } void phlex::experimental::dyncall(void* fn, dcarg& result, dcargs_t& args, int var_offset) @@ -67,16 +95,16 @@ void phlex::experimental::dyncall(void* fn, dcarg& result, dcargs_t& args, int v for (dcargs_t::size_type i = 0; i < N; ++i) { auto& a = args[i]; - t[i] = ffi_t[(int)a.m_type]; + t[i] = get_ffi_type(a); p[i] = a.value_ptr(); } ffi_cif cif; ffi_status status; if (0 < var_offset) - status = ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, var_offset, N, ffi_t[(int)result.m_type], t.get()); + status = ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, var_offset, N, get_ffi_type(result), t.get()); else - status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, N, ffi_t[(int)result.m_type], t.get()); + status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, N, get_ffi_type(result), t.get()); if (status) throw std::runtime_error("ffi prep failed"); diff --git a/plugins/python/src/dyncall.hpp b/plugins/python/src/dyncall.hpp index f3fb40389..e63f6c2a5 100644 --- a/plugins/python/src/dyncall.hpp +++ b/plugins/python/src/dyncall.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #if defined(__APPLE__) && defined(__MACH__) @@ -30,112 +31,63 @@ typedef std::int64_t ph_long_t; typedef std::uint64_t ph_ulong_t; #endif +// Python (for specialization) +struct _object; +typedef _object PyObject; + namespace phlex::experimental { - enum class edctype : std::uint8_t { - Ptr = 0, - Bool, - I8, - U8, - I16, - U16, - I32, - U32, - I64, - U64, - F32, - F64, - Void - }; + struct dcarg { + using FFIType = std::variant< + std::monostate, // void (default) + void*, + bool, + std::int8_t, + std::uint8_t, + std::int16_t, + std::uint16_t, + std::int32_t, + std::uint32_t, + ph_long_t, + ph_ulong_t, + float, + double + >; - // convenience mapper of human-readable string to edctype - edctype str2edctype(const std::string& stype); + FFIType m_value; - struct dcarg { - union { - void* m_ptr; - std::uint8_t m_bool; // stored as 0 or 1 - std::int8_t m_int8; - std::uint8_t m_uint8; - std::int16_t m_int16; - std::uint16_t m_uint16; - std::int32_t m_int; - std::uint32_t m_uint; - ph_long_t m_long; - ph_ulong_t m_ulong; - float m_float; - double m_double; - }; - edctype m_type; - - // factory-style constructors to guarante value/typecode match - dcarg() : m_type(edctype::Void) { m_ptr = nullptr; } - explicit dcarg(void* v) : m_type(edctype::Ptr) { m_ptr = v; } - explicit dcarg(bool v) : m_type(edctype::Bool) { m_bool = v ? 1 : 0; } - explicit dcarg(std::int8_t v) : m_type(edctype::I8) { m_int8 = v; } - explicit dcarg(std::uint8_t v) : m_type(edctype::U8) { m_uint8 = v; } - explicit dcarg(std::int16_t v) : m_type(edctype::I16) { m_int16 = v; } - explicit dcarg(std::uint16_t v) : m_type(edctype::U16) { m_uint16 = v; } - explicit dcarg(std::int32_t v) : m_type(edctype::I32) { m_int = v; } - explicit dcarg(std::uint32_t v) : m_type(edctype::U32) { m_uint = v; } - explicit dcarg(ph_long_t v) : m_type(edctype::I64) { m_long = v; } - explicit dcarg(ph_ulong_t v) : m_type(edctype::U64) { m_ulong = v; } - explicit dcarg(float v) : m_type(edctype::F32) { m_float = v; } - explicit dcarg(double v) : m_type(edctype::F64) { m_double = v; } - - // convenience null-constructor from enumerated type - explicit dcarg(edctype etype) : m_type(etype) - { - if (etype == edctype::Bool) m_bool = 0; - else if (etype == edctype::I8) m_int8 = 0; - else if (etype == edctype::U8) m_uint8 = 0; - else if (etype == edctype::I16) m_int16 = 0; - else if (etype == edctype::U16) m_uint16 = 0; - else if (etype == edctype::I32) m_int = 0; - else if (etype == edctype::U32) m_uint = 0; - else if (etype == edctype::I64) m_long = 0L; - else if (etype == edctype::U64) m_ulong = 0UL; - else if (etype == edctype::F32) m_float = 0.f; - else if (etype == edctype::F64) m_double = 0.; - else m_ptr = nullptr; - } + // convenience mapper of human-readable string to dcarg + static dcarg from_str(const std::string& stype); + + // factory-style constructors to guarantee value/type match + dcarg() : m_value(std::monostate{}) {} + explicit dcarg(void* v) : m_value(v) {} + explicit dcarg(bool v) : m_value(v) {} + explicit dcarg(std::int8_t v) : m_value(v) {} + explicit dcarg(std::uint8_t v) : m_value(v) {} + explicit dcarg(std::int16_t v) : m_value(v) {} + explicit dcarg(std::uint16_t v) : m_value(v) {} + explicit dcarg(std::int32_t v) : m_value(v) {} + explicit dcarg(std::uint32_t v) : m_value(v) {} + explicit dcarg(ph_long_t v) : m_value(v) {} + explicit dcarg(ph_ulong_t v) : m_value(v) {} + explicit dcarg(float v) : m_value(v) {} + explicit dcarg(double v) : m_value(v) {} // pointer access to payload - void* value_ptr() - { - switch (m_type) { - case edctype::Ptr: - return reinterpret_cast(&m_ptr); - case edctype::Bool: - return &m_bool; - case edctype::I8: - return &m_int8; - case edctype::U8: - return &m_uint8; - case edctype::I16: - return &m_int16; - case edctype::U16: - return &m_uint16; - case edctype::I32: - return &m_int; - case edctype::U32: - return &m_uint; - case edctype::I64: - return &m_long; - case edctype::U64: - return &m_ulong; - case edctype::F32: - return &m_float; - case edctype::F64: - return &m_double; - case edctype::Void: - return nullptr; - default: - throw std::invalid_argument("unknown edctype"); - } - } + void* value_ptr(); + + // value access to payload + template + T get() { return std::get(m_value); } }; + // specialization to simplify a very common case + template<> + inline PyObject* dcarg::get() { + return reinterpret_cast(std::get(m_value)); + } + typedef std::vector dcargs_t; void dyncall(void* fn, dcarg& result, dcargs_t& args, int var_offset = -1); diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 173516cb0..5813ee5e4 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -107,7 +107,7 @@ namespace { static inline dcarg lifeline_transform(dcarg arg) { - PyObject* pyobj = reinterpret_cast(arg.m_ptr); + PyObject* pyobj = arg.get(); if (pyobj && PyObject_TypeCheck(pyobj, &PhlexLifeline_Type)) { return dcarg{reinterpret_cast(pyobj)->m_view}; } @@ -184,7 +184,7 @@ namespace { dyncall((void*)m_ccallback, result, argsv, 1); std::string error_msg; - if (!result.m_ptr) { + if (!result.get()) { if (!msg_from_py_error(error_msg)) error_msg = "Unknown python error"; } @@ -197,7 +197,7 @@ namespace { if constexpr (!std::is_void_v) return result; else - Py_DECREF(reinterpret_cast(result.m_ptr)); + Py_DECREF(result.get()); } private: @@ -205,7 +205,7 @@ namespace { void decref_all(Args... args) { // helper to decrement reference counts of N arguments - (Py_DECREF((PyObject*)args.m_ptr), ...); + (Py_DECREF(reinterpret_cast(std::get(args.m_value))), ...); } }; @@ -214,10 +214,10 @@ namespace { template struct jit_callback_impl> : public py_callback_base { - edctype m_rtype; // dynamic call return type + dcarg m_rtype; // dynamic call return type jit_callback_impl(PyObject* callable, void* cb, const std::string& stype) : - py_callback_base(callable, cb), m_rtype(str2edctype(stype)) + py_callback_base(callable, cb), m_rtype(dcarg::from_str(stype)) { } @@ -568,7 +568,7 @@ namespace { static cpptype py_to_##name(dcarg a) \ { \ PyGILRAII gil; \ - PyObject* pyobj = reinterpret_cast(a.m_ptr); \ + PyObject* pyobj = a.get(); \ cpptype i = static_cast(frompy(pyobj)); \ std::string msg; \ if (msg_from_py_error(msg, true)) { \ @@ -579,7 +579,7 @@ namespace { return i; \ } \ \ - static cpptype dcarg_to_##name(dcarg a) { return a.m_##name; } \ + static cpptype dcarg_to_##name(dcarg a) { return a.get(); } \ \ struct provider_cb_##name : public py_callback { \ using py_callback::py_callback; \ @@ -588,7 +588,7 @@ namespace { PyGILRAII gil; \ PyObject* arg0 = wrap_dci(id); \ dcarg res = this->py_callback::operator()(dcarg{arg0}); /* decrefs arg0 */ \ - PyObject* pyres = reinterpret_cast(res.m_ptr); \ + PyObject* pyres = res.get(); \ cpptype cres = frompy(pyres); \ Py_DECREF(pyres); \ return cres; \ @@ -655,7 +655,7 @@ namespace { PyGILRAII gil; \ \ auto vec = std::make_shared>(); \ - PyObject* pyobj = (PyObject*)a.m_ptr; \ + PyObject* pyobj = a.get(); \ \ /* TODO: because of unresolved ownership issues, copy the full array contents */ \ if (PyArray_Check(pyobj)) { \ From 35d4fed3e2c8f4683ac57525bd38783d64cae190 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Tue, 2 Jun 2026 21:35:34 -0700 Subject: [PATCH 13/41] clang-format fixes --- plugins/python/src/dyncall.cpp | 95 +++++++++++++++++++------------ plugins/python/src/dyncall.hpp | 42 +++++++------- plugins/python/src/modulewrap.cpp | 12 ++-- 3 files changed, 87 insertions(+), 62 deletions(-) diff --git a/plugins/python/src/dyncall.cpp b/plugins/python/src/dyncall.cpp index 7ef0ac343..455ddf0bb 100644 --- a/plugins/python/src/dyncall.cpp +++ b/plugins/python/src/dyncall.cpp @@ -10,7 +10,6 @@ #include - using namespace phlex::experimental; namespace phlex::experimental { @@ -31,50 +30,76 @@ namespace phlex::experimental { }; } // namespace phlex::experimental -phlex::experimental::dcarg phlex::experimental::dcarg::from_str(const std::string& stype) +phlex::experimental::dcarg phlex::experimental::dcarg::from_str(std::string const& stype) { // only types currently used in modulewrap are added, not all ffi types - if (stype == "bool") return dcarg(false); - else if (stype == "int32_t") return dcarg(static_cast(0)); - else if (stype == "uint32_t") return dcarg(static_cast(0)); - else if (stype == "int64_t") return dcarg(static_cast(0)); - else if (stype == "uint64_t") return dcarg(static_cast(0)); - else if (stype == "float") return dcarg(0.0f); - else if (stype == "double") return dcarg(0.0); + if (stype == "bool") + return dcarg(false); + else if (stype == "int32_t") + return dcarg(static_cast(0)); + else if (stype == "uint32_t") + return dcarg(static_cast(0)); + else if (stype == "int64_t") + return dcarg(static_cast(0)); + else if (stype == "uint64_t") + return dcarg(static_cast(0)); + else if (stype == "float") + return dcarg(0.0f); + else if (stype == "double") + return dcarg(0.0); throw std::invalid_argument("unknown type string: " + stype); } -void* phlex::experimental::dcarg::value_ptr() { - return std::visit([](auto& val) -> void* { - using T = std::decay_t; - if constexpr (std::is_same_v) { - return nullptr; - } else { +void* phlex::experimental::dcarg::value_ptr() +{ + return std::visit( + [](auto& val) -> void* { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return nullptr; + } else { return static_cast(&val); - } - }, m_value); + } + }, + m_value); } namespace { - static ffi_type* get_ffi_type(const dcarg& d) { - return std::visit([](auto&& val) -> ffi_type* { - using T = std::decay_t; - - if constexpr (std::is_same_v) return &ffi_type_void; - else if constexpr (std::is_same_v) return &ffi_type_pointer; - else if constexpr (std::is_same_v) return &ffi_type_uint8; - else if constexpr (std::is_same_v) return &ffi_type_sint8; - else if constexpr (std::is_same_v) return &ffi_type_uint8; - else if constexpr (std::is_same_v) return &ffi_type_sint16; - else if constexpr (std::is_same_v) return &ffi_type_uint16; - else if constexpr (std::is_same_v) return &ffi_type_sint32; - else if constexpr (std::is_same_v) return &ffi_type_uint32; - else if constexpr (std::is_same_v) return &ffi_type_sint64; - else if constexpr (std::is_same_v) return &ffi_type_uint64; - else if constexpr (std::is_same_v) return &ffi_type_float; - else if constexpr (std::is_same_v) return &ffi_type_double; - }, d.m_value); + static ffi_type* get_ffi_type(dcarg const& d) + { + return std::visit( + [](auto&& val) -> ffi_type* { + using T = std::decay_t; + + if constexpr (std::is_same_v) + return &ffi_type_void; + else if constexpr (std::is_same_v) + return &ffi_type_pointer; + else if constexpr (std::is_same_v) + return &ffi_type_uint8; + else if constexpr (std::is_same_v) + return &ffi_type_sint8; + else if constexpr (std::is_same_v) + return &ffi_type_uint8; + else if constexpr (std::is_same_v) + return &ffi_type_sint16; + else if constexpr (std::is_same_v) + return &ffi_type_uint16; + else if constexpr (std::is_same_v) + return &ffi_type_sint32; + else if constexpr (std::is_same_v) + return &ffi_type_uint32; + else if constexpr (std::is_same_v) + return &ffi_type_sint64; + else if constexpr (std::is_same_v) + return &ffi_type_uint64; + else if constexpr (std::is_same_v) + return &ffi_type_float; + else if constexpr (std::is_same_v) + return &ffi_type_double; + }, + d.m_value); } } diff --git a/plugins/python/src/dyncall.hpp b/plugins/python/src/dyncall.hpp index e63f6c2a5..b3a40d5cd 100644 --- a/plugins/python/src/dyncall.hpp +++ b/plugins/python/src/dyncall.hpp @@ -38,26 +38,24 @@ typedef _object PyObject; namespace phlex::experimental { struct dcarg { - using FFIType = std::variant< - std::monostate, // void (default) - void*, - bool, - std::int8_t, - std::uint8_t, - std::int16_t, - std::uint16_t, - std::int32_t, - std::uint32_t, - ph_long_t, - ph_ulong_t, - float, - double - >; + using FFIType = std::variant; FFIType m_value; // convenience mapper of human-readable string to dcarg - static dcarg from_str(const std::string& stype); + static dcarg from_str(std::string const& stype); // factory-style constructors to guarantee value/type match dcarg() : m_value(std::monostate{}) {} @@ -78,13 +76,17 @@ namespace phlex::experimental { void* value_ptr(); // value access to payload - template - T get() { return std::get(m_value); } + template + T get() + { + return std::get(m_value); + } }; // specialization to simplify a very common case - template<> - inline PyObject* dcarg::get() { + template <> + inline PyObject* dcarg::get() + { return reinterpret_cast(std::get(m_value)); } diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 5813ee5e4..fb413317b 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -119,8 +119,7 @@ namespace { PyObject* m_callable; // owned void* m_ccallback; // C callable (either dispatcher or direct pointer) - py_callback_base(PyObject* callable, void* cb) : - m_callable(callable), m_ccallback(cb) + py_callback_base(PyObject* callable, void* cb) : m_callable(callable), m_ccallback(cb) { // callable is always non-null here (validated before construction) PyGILRAII gil; @@ -214,9 +213,9 @@ namespace { template struct jit_callback_impl> : public py_callback_base { - dcarg m_rtype; // dynamic call return type + dcarg m_rtype; // dynamic call return type - jit_callback_impl(PyObject* callable, void* cb, const std::string& stype) : + jit_callback_impl(PyObject* callable, void* cb, std::string const& stype) : py_callback_base(callable, cb), m_rtype(dcarg::from_str(stype)) { } @@ -971,9 +970,8 @@ static bool unroll_switch(size_t rt_size, Cf&& func) return [&](std::index_sequence) { // 1-based sequence (all computational nodes have an input, or they can't be scheduled), // with the fold expression short-circuited using || - bool matched = (... || ((rt_size == (Is + 1)) - ? (func(std::make_index_sequence{}), true) - : false)); + bool matched = + (... || ((rt_size == (Is + 1)) ? (func(std::make_index_sequence{}), true) : false)); return matched; }(std::make_index_sequence{}); From e4e696f6fb559b33a973c1aab1725b06f0f6b1ec Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Tue, 2 Jun 2026 22:01:30 -0700 Subject: [PATCH 14/41] clang-tidy workarounds --- plugins/python/src/dyncall.cpp | 24 ++++++------------------ plugins/python/src/dyncall.hpp | 6 ++---- plugins/python/src/modulewrap.cpp | 9 +++++++-- 3 files changed, 15 insertions(+), 24 deletions(-) diff --git a/plugins/python/src/dyncall.cpp b/plugins/python/src/dyncall.cpp index 455ddf0bb..2de935eac 100644 --- a/plugins/python/src/dyncall.cpp +++ b/plugins/python/src/dyncall.cpp @@ -12,24 +12,6 @@ using namespace phlex::experimental; -namespace phlex::experimental { - static ffi_type* const ffi_t[] = { - &ffi_type_pointer, - &ffi_type_uint8, - &ffi_type_sint8, - &ffi_type_uint8, - &ffi_type_sint16, - &ffi_type_uint16, - &ffi_type_sint32, - &ffi_type_uint32, - &ffi_type_sint64, - &ffi_type_uint64, - &ffi_type_float, - &ffi_type_double, - &ffi_type_void, - }; -} // namespace phlex::experimental - phlex::experimental::dcarg phlex::experimental::dcarg::from_str(std::string const& stype) { // only types currently used in modulewrap are added, not all ffi types @@ -72,6 +54,11 @@ namespace { [](auto&& val) -> ffi_type* { using T = std::decay_t; + // there are duplicate bodies here b/c bool is represented by uint8, + // just as uint8 is, there being no bool in C; the code is cleaner + // with each type on its own line, however, rather than combining the + // two in a single predicate as a special case + // NOLINTBEGIN(readability-redundant-condition) if constexpr (std::is_same_v) return &ffi_type_void; else if constexpr (std::is_same_v) @@ -98,6 +85,7 @@ namespace { return &ffi_type_float; else if constexpr (std::is_same_v) return &ffi_type_double; + // NOLINTEND(readability-redundant-condition) }, d.m_value); } diff --git a/plugins/python/src/dyncall.hpp b/plugins/python/src/dyncall.hpp index b3a40d5cd..15e4d1873 100644 --- a/plugins/python/src/dyncall.hpp +++ b/plugins/python/src/dyncall.hpp @@ -16,6 +16,8 @@ // // ======================================================================================= +#include "Python.h" // for PyObject* get<> specialization only + #include #include #include @@ -31,10 +33,6 @@ typedef std::int64_t ph_long_t; typedef std::uint64_t ph_ulong_t; #endif -// Python (for specialization) -struct _object; -typedef _object PyObject; - namespace phlex::experimental { struct dcarg { diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index fb413317b..57fac52f7 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -970,8 +970,13 @@ static bool unroll_switch(size_t rt_size, Cf&& func) return [&](std::index_sequence) { // 1-based sequence (all computational nodes have an input, or they can't be scheduled), // with the fold expression short-circuited using || - bool matched = - (... || ((rt_size == (Is + 1)) ? (func(std::make_index_sequence{}), true) : false)); + + // clang-tidy is incorrect here, b/c the condition "rt_size == (Is + 1)" is only ever + // true once, so the forward is only called once, and func is never used after move + // NOLINT(bugprone-use-after-move) + bool matched = (... || ((rt_size == (Is + 1)) + ? (std::forward(func)(std::make_index_sequence{}), true) + : false)); return matched; }(std::make_index_sequence{}); From 2f1d501309d7889ffd4cf545b2b929ca9f3ff453 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Tue, 2 Jun 2026 22:18:40 -0700 Subject: [PATCH 15/41] try again to make clang-tidy shut up --- plugins/python/src/dyncall.cpp | 4 ++-- plugins/python/src/modulewrap.cpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/plugins/python/src/dyncall.cpp b/plugins/python/src/dyncall.cpp index 2de935eac..c7ca48565 100644 --- a/plugins/python/src/dyncall.cpp +++ b/plugins/python/src/dyncall.cpp @@ -58,7 +58,7 @@ namespace { // just as uint8 is, there being no bool in C; the code is cleaner // with each type on its own line, however, rather than combining the // two in a single predicate as a special case - // NOLINTBEGIN(readability-redundant-condition) + // NOLINTBEGIN(bugprone-branch-clone) if constexpr (std::is_same_v) return &ffi_type_void; else if constexpr (std::is_same_v) @@ -85,7 +85,7 @@ namespace { return &ffi_type_float; else if constexpr (std::is_same_v) return &ffi_type_double; - // NOLINTEND(readability-redundant-condition) + // NOLINTEND(bugprone-branch-clone) }, d.m_value); } diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 57fac52f7..2c05f7a6d 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -973,10 +973,11 @@ static bool unroll_switch(size_t rt_size, Cf&& func) // clang-tidy is incorrect here, b/c the condition "rt_size == (Is + 1)" is only ever // true once, so the forward is only called once, and func is never used after move - // NOLINT(bugprone-use-after-move) + // NOLINTBEGIN(bugprone-use-after-move) bool matched = (... || ((rt_size == (Is + 1)) ? (std::forward(func)(std::make_index_sequence{}), true) : false)); + // NOLINTEND(bugprone-use-after-move) return matched; }(std::make_index_sequence{}); From ea7dff708fe020f24023fa82edc56805380c066c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 3 Jun 2026 05:20:09 +0000 Subject: [PATCH 16/41] Apply header-guards fixes --- plugins/python/src/dyncall.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/python/src/dyncall.hpp b/plugins/python/src/dyncall.hpp index 15e4d1873..d3785f031 100644 --- a/plugins/python/src/dyncall.hpp +++ b/plugins/python/src/dyncall.hpp @@ -1,5 +1,5 @@ -#ifndef phlex_python_dyncall_hpp -#define phlex_python_dyncall_hpp +#ifndef PLUGINS_PYTHON_SRC_DYNCALL_HPP +#define PLUGINS_PYTHON_SRC_DYNCALL_HPP // ======================================================================================= // @@ -94,4 +94,4 @@ namespace phlex::experimental { } // phlex::experimental -#endif // phlex_python_dyncall_hpp +#endif // PLUGINS_PYTHON_SRC_DYNCALL_HPP From 913391ccdf9c6b6428eebe430765f6a330241827 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Tue, 2 Jun 2026 22:22:12 -0700 Subject: [PATCH 17/41] ruff fix --- plugins/python/python/phlex/_typing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/python/python/phlex/_typing.py b/plugins/python/python/phlex/_typing.py index 626bdd10b..e25b43618 100644 --- a/plugins/python/python/phlex/_typing.py +++ b/plugins/python/python/phlex/_typing.py @@ -13,6 +13,7 @@ from typing import Any, Dict, Union import numpy as np + try: import numba.core.types as nb_types has_numba = True From 74dc3487910eb58a060cb7f038e2a5a4f005ca10 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Thu, 18 Jun 2026 14:04:36 -0700 Subject: [PATCH 18/41] Revert "Introduce layer_path class (#640)" This reverts commit 732b4bd1a348005d2c5721589c3f71668d1735e6. --- phlex/core/framework_graph.cpp | 5 +- phlex/core/index_router.cpp | 65 ++++++++++++----- phlex/core/index_router.hpp | 4 +- phlex/core/input_arguments.hpp | 13 ++-- phlex/core/producer_catalog.cpp | 5 +- phlex/model/CMakeLists.txt | 2 - phlex/model/data_cell_index.cpp | 21 +++--- phlex/model/data_cell_index.hpp | 3 +- phlex/model/data_layer_hierarchy.cpp | 36 +++++----- phlex/model/data_layer_hierarchy.hpp | 7 +- phlex/model/fixed_hierarchy.cpp | 47 ++++++------ phlex/model/fixed_hierarchy.hpp | 5 +- phlex/model/handle.hpp | 2 +- phlex/model/identifier.cpp | 16 ++++- phlex/model/identifier.hpp | 23 +----- phlex/model/layer_path.cpp | 103 --------------------------- phlex/model/layer_path.hpp | 61 ---------------- phlex/utilities/CMakeLists.txt | 1 - phlex/utilities/bulleted_list.hpp | 51 ------------- plugins/layer_generator.cpp | 6 +- test/CMakeLists.txt | 1 - test/core_misc_test.cpp | 12 ---- test/layer_path.cpp | 43 ----------- 23 files changed, 139 insertions(+), 393 deletions(-) delete mode 100644 phlex/model/layer_path.cpp delete mode 100644 phlex/model/layer_path.hpp delete mode 100644 phlex/utilities/bulleted_list.hpp delete mode 100644 test/layer_path.cpp diff --git a/phlex/core/framework_graph.cpp b/phlex/core/framework_graph.cpp index aa58af777..9e50fa0d7 100644 --- a/phlex/core/framework_graph.cpp +++ b/phlex/core/framework_graph.cpp @@ -3,7 +3,6 @@ #include "phlex/concurrency.hpp" #include "phlex/core/make_computational_edges.hpp" #include "phlex/model/product_store.hpp" -#include "phlex/utilities/bulleted_list.hpp" #include "fmt/format.h" #include "fmt/ranges.h" @@ -70,7 +69,7 @@ namespace phlex::experimental { std::size_t framework_graph::seen_cell_count(std::string const& layer_name, bool const missing_ok) const { - return hierarchy_.count_for(experimental::layer_path(layer_name), missing_ok); + return hierarchy_.count_for(layer_name, missing_ok); } std::size_t framework_graph::execution_count(std::string const& node_name) const @@ -137,7 +136,7 @@ namespace phlex::experimental { return; } throw std::runtime_error( - fmt::format("\nConfiguration errors:\n{}", bulleted_list(registration_errors_))); + fmt::format("\nConfiguration errors:\n - {}", fmt::join(registration_errors_, "\n - "))); } void framework_graph::make_filter_edges() diff --git a/phlex/core/index_router.cpp b/phlex/core/index_router.cpp index 2482e9446..aeae3db3c 100644 --- a/phlex/core/index_router.cpp +++ b/phlex/core/index_router.cpp @@ -1,7 +1,6 @@ #include "phlex/core/index_router.hpp" #include "phlex/model/flush_gate.hpp" -#include "phlex/utilities/bulleted_list.hpp" #include "phlex/utilities/hashing.hpp" #include "fmt/std.h" @@ -15,6 +14,34 @@ using namespace phlex::experimental; +namespace { + using layer_path_t = std::vector; + + std::size_t layer_hash_for_path(layer_path_t const& layer_path) + { + std::size_t result = "job"_id.hash(); + for (auto const& layer_name : layer_path | std::views::drop(1)) { + result = hash(result, identifier{layer_name}.hash()); + } + return result; + } + + bool is_strict_prefix(layer_path_t const& candidate, layer_path_t const& other) + { + // FIXME: Use std::ranges::starts_with(other, candidate) once the compilers support it (C++23) + return candidate.size() < other.size() and + std::ranges::mismatch(other, candidate).in2 == std::ranges::end(candidate); + } + + std::string delimited_layer_path(std::string_view const layer_path) + { + if (not layer_path.starts_with("/")) { + return fmt::format("/{}", layer_path); + } + return std::string{layer_path}; + } +} + namespace phlex::experimental { //======================================================================================== @@ -30,7 +57,7 @@ namespace phlex::experimental { void put_message(data_cell_index_ptr const& index, std::size_t message_id); void put_end_token(data_cell_index_ptr const& index, flush_gate const& fc); - bool matches_exactly(layer_path const& layer_path) const; + bool matches_exactly(std::string const& layer_path) const; bool is_parent_of(data_cell_index_ptr const& index) const; private: @@ -66,9 +93,9 @@ namespace phlex::experimental { flusher_.try_put({.index = index, .count = static_cast(fc.committed_total_count())}); } - bool multilayer_slot::matches_exactly(layer_path const& layer_path) const + bool multilayer_slot::matches_exactly(std::string const& layer_path) const { - return layer_path.ends_with(layer_); + return layer_path.ends_with(delimited_layer_path(static_cast(layer_))); } bool multilayer_slot::is_parent_of(data_cell_index_ptr const& index) const @@ -99,19 +126,21 @@ namespace phlex::experimental { { } - void index_router::establish_layers(std::vector const& layer_paths_from_driver, - std::vector unfold_input_layer_names, - std::vector unfold_output_layer_names) + void index_router::establish_layers( + std::vector> const& layer_paths_from_driver, + std::vector unfold_input_layer_names, + std::vector unfold_output_layer_names) { auto sorted_layer_paths = layer_paths_from_driver; std::ranges::sort(sorted_layer_paths); // In sorted order, a path can only be a prefix of paths that follow it. - for (std::size_t i = 0; i + 1 < sorted_layer_paths.size(); ++i) { + for (std::size_t i = 0; i < sorted_layer_paths.size(); ++i) { bool const is_not_lowest_layer = - sorted_layer_paths[i].is_strict_prefix_of(sorted_layer_paths[i + 1]); + i + 1 < sorted_layer_paths.size() and + is_strict_prefix(sorted_layer_paths[i], sorted_layer_paths[i + 1]); if (is_not_lowest_layer) { - auto const layer_hash = sorted_layer_paths[i].hash(); + auto const layer_hash = layer_hash_for_path(sorted_layer_paths[i]); is_lowest_layer_hashes_.emplace(layer_hash, false); } } @@ -223,17 +252,19 @@ namespace phlex::experimental { return it->second; } - layer_path const layerish_path{{index->layer_name()}}; + std::string const layerish_path{static_cast(index->layer_name())}; auto broadcaster = index_set_node_for(layerish_path); index_set_node_cache_.insert({layer_hash, broadcaster}); return broadcaster; } - auto index_router::index_set_node_for(layer_path const& layer_path) -> detail::index_set_node_ptr + auto index_router::index_set_node_for(std::string const& layer_path) -> detail::index_set_node_ptr { + std::string const search_token = delimited_layer_path(layer_path); + std::vector candidates; for (auto it = index_set_nodes_.begin(), e = index_set_nodes_.end(); it != e; ++it) { - if (layer_path.ends_with(it->first)) { + if (search_token.ends_with(delimited_layer_path(static_cast(it->first)))) { candidates.push_back(it); } } @@ -246,10 +277,10 @@ namespace phlex::experimental { return nullptr; } - std::string msg = fmt::format( - "Multiple layers match specification {}:\n{}", - layer_path, - bulleted_list(candidates | std::views::transform([](auto const& it) { return it->first; }))); + std::string msg = fmt::format("Multiple layers match specification {}:\n", layer_path); + for (auto const& it : candidates) { + msg += fmt::format("\n- {}", it->first); + } throw std::runtime_error(msg); } diff --git a/phlex/core/index_router.hpp b/phlex/core/index_router.hpp index 8e0fb8bb3..366f1b829 100644 --- a/phlex/core/index_router.hpp +++ b/phlex/core/index_router.hpp @@ -56,7 +56,7 @@ namespace phlex::experimental { explicit index_router(tbb::flow::graph& g); data_cell_index_ptr route(data_cell_index_ptr const& index, index_flushes flushes); - void establish_layers(std::vector const& layer_paths_from_driver, + void establish_layers(std::vector> const& layer_paths_from_driver, std::vector unfold_input_layer_names, std::vector unfold_output_layer_names); @@ -91,7 +91,7 @@ namespace phlex::experimental { // correct for unfold outputs (the only source of unknown hashes) and consistent with // index_is_lowest_layer()'s fall-through default. bool is_lowest_layer_hash(std::size_t layer_hash) const; - detail::index_set_node_ptr index_set_node_for(layer_path const& layer); + detail::index_set_node_ptr index_set_node_for(std::string const& layer); detail::index_set_node_ptr index_set_node_for(data_cell_index_ptr const& index); std::pair multilayer_slots_for( data_cell_index_ptr const& index); diff --git a/phlex/core/input_arguments.hpp b/phlex/core/input_arguments.hpp index 34053ef3a..350146538 100644 --- a/phlex/core/input_arguments.hpp +++ b/phlex/core/input_arguments.hpp @@ -4,7 +4,6 @@ #include "phlex/core/message.hpp" #include "phlex/core/product_selector.hpp" #include "phlex/model/handle.hpp" -#include "phlex/utilities/bulleted_list.hpp" #include "fmt/format.h" @@ -33,16 +32,18 @@ namespace phlex::experimental { std::ranges::to(); if (products.empty()) { throw std::runtime_error(fmt::format( - "No products found matching the query {}\n Store (id {} from {}) contains:\n{}", + "No products found matching the query {}\n Store (id {} from {}) contains:\n - {}", query, store->index()->to_string(), store->source().to_string(), - bulleted_list(all_products, /*indent=*/4))); + fmt::join(all_products | views::transform(&product_specification::to_string), + "\n - "))); } if (products.size() > 1) { - throw std::runtime_error(fmt::format("Multiple products found matching the query {}:\n{}", - query, - bulleted_list(products, /*indent=*/4))); + throw std::runtime_error(fmt::format( + "Multiple products found matching the query {}:\n - {}", + query, + fmt::join(products | views::transform(&product_specification::to_string), "\n - "))); } return store->get_handle(products[0]); } diff --git a/phlex/core/producer_catalog.cpp b/phlex/core/producer_catalog.cpp index 7434a339a..1bf365705 100644 --- a/phlex/core/producer_catalog.cpp +++ b/phlex/core/producer_catalog.cpp @@ -1,5 +1,4 @@ #include "phlex/core/producer_catalog.hpp" -#include "phlex/utilities/bulleted_list.hpp" #include "fmt/format.h" #include "fmt/ranges.h" @@ -76,9 +75,9 @@ namespace phlex::experimental { } if (candidates.size() > 1ull) { - std::string msg = fmt::format("More than one candidate matches the query {}: \n{}\n", + std::string msg = fmt::format("More than one candidate matches the query {}: \n - {}\n", query.to_string(), - bulleted_list(std::views::keys(candidates), /*indent=*/1)); + fmt::join(std::views::keys(candidates), "\n - ")); throw std::runtime_error(msg); } diff --git a/phlex/model/CMakeLists.txt b/phlex/model/CMakeLists.txt index 0539e1468..85bef091d 100644 --- a/phlex/model/CMakeLists.txt +++ b/phlex/model/CMakeLists.txt @@ -11,7 +11,6 @@ cet_make_library( data_layer_hierarchy.cpp data_cell_index.cpp identifier.cpp - layer_path.cpp product_matcher.cpp product_store.cpp products.cpp @@ -40,7 +39,6 @@ install( data_layer_hierarchy.hpp data_cell_index.hpp identifier.hpp - layer_path.hpp product_matcher.hpp product_specification.hpp product_store.hpp diff --git a/phlex/model/data_cell_index.cpp b/phlex/model/data_cell_index.cpp index 98701faab..75fb5c852 100644 --- a/phlex/model/data_cell_index.cpp +++ b/phlex/model/data_cell_index.cpp @@ -1,13 +1,16 @@ #include "phlex/model/data_cell_index.hpp" #include "phlex/utilities/hashing.hpp" +#include "boost/algorithm/string.hpp" #include "fmt/format.h" +#include "fmt/ranges.h" #include -#include #include #include +#include #include +#include #include using namespace std::string_literals; @@ -59,17 +62,15 @@ namespace phlex { return layer_name_; } - experimental::layer_path data_cell_index::layer_path() const + std::string data_cell_index::layer_path() const { - // We know how deep we are so we can pre-allocate and fill in reverse - std::vector layers(depth_ + 1); - auto const* ptr = this; - for (auto& layer : std::views::reverse(layers)) { - assert(ptr); - layer = ptr->layer_name(); - ptr = ptr->parent_.get(); + std::vector layers_in_reverse{std::string_view(layer_name_)}; + auto next_parent = parent(); + while (next_parent) { + layers_in_reverse.push_back(std::string_view(next_parent->layer_name())); + next_parent = next_parent->parent(); } - return experimental::layer_path{std::move(layers)}; + return fmt::format("/{}", fmt::join(std::views::reverse(layers_in_reverse), "/")); } std::size_t data_cell_index::depth() const noexcept { return depth_; } diff --git a/phlex/model/data_cell_index.hpp b/phlex/model/data_cell_index.hpp index 3bd618bc0..084baade9 100644 --- a/phlex/model/data_cell_index.hpp +++ b/phlex/model/data_cell_index.hpp @@ -5,7 +5,6 @@ #include "phlex/model/fwd.hpp" #include "phlex/model/identifier.hpp" -#include "phlex/model/layer_path.hpp" #include #include @@ -24,7 +23,7 @@ namespace phlex { using hash_type = std::size_t; data_cell_index_ptr make_child(std::string layer_name, std::size_t data_cell_number) const; experimental::identifier const& layer_name() const noexcept; - experimental::layer_path layer_path() const; + std::string layer_path() const; std::size_t depth() const noexcept; data_cell_index_ptr parent(experimental::identifier const& layer_name) const; data_cell_index_ptr parent() const noexcept; diff --git a/phlex/model/data_layer_hierarchy.cpp b/phlex/model/data_layer_hierarchy.cpp index fb1d7a6a2..6daf692ea 100644 --- a/phlex/model/data_layer_hierarchy.cpp +++ b/phlex/model/data_layer_hierarchy.cpp @@ -1,12 +1,10 @@ #include "phlex/model/data_layer_hierarchy.hpp" - -#include "phlex/utilities/bulleted_list.hpp" +#include "phlex/model/data_cell_index.hpp" #include "fmt/format.h" +#include "fmt/std.h" #include "spdlog/spdlog.h" -#include - namespace { std::string const& maybe_name(std::string const& name) { @@ -37,32 +35,32 @@ namespace phlex::experimental { ++it->second->count; } - std::size_t data_layer_hierarchy::count_for(layer_path const& layer, bool const missing_ok) const + std::size_t data_layer_hierarchy::count_for(std::string const& layer, bool const missing_ok) const { - // The assumption is that specified layer is a portion of a layer path - // sufficient to uniquely identify a layer + // The assumption is that specified layer is the component of a layer path + std::string search_token = layer; + if (not layer.starts_with("/")) { + search_token = '/' + layer; + } + std::vector candidates; for (auto const& [_, entry] : layers_) { - if (entry->layer_path.ends_with(layer)) { + if (entry->layer_path.ends_with(search_token)) { candidates.push_back(entry.get()); } } if (candidates.empty()) { return missing_ok ? 0ull - : throw std::runtime_error( - fmt::format("No layers match the specification {}", layer)); + : throw std::runtime_error("No layers match the specification " + layer); } if (candidates.size() > 1ull) { - std::string msg = - fmt::format("The following data layers match the specification {}:\n\n{}" - "\n\nPlease specify the full layer path to disambiguate between them.", - layer, - bulleted_list(candidates | std::views::transform([](auto const* entry) { - return entry->layer_path; - }), - /*indent=*/0)); + std::string msg{"The following data layers match the specification " + layer + ":\n"}; + for (auto const* entry : candidates) { + msg += "\n- " + entry->layer_path; + } + msg += "\n\nPlease specify the full layer path to disambiguate between them."; throw std::runtime_error(msg); } @@ -88,7 +86,7 @@ namespace phlex::experimental { auto child_prefix = !at_end ? indent + " ├ " : indent + " └ "; auto const& entry = *layers_.at(child_hash); result += "\n" + indent + " │ "; - result += fmt::format("\n{}{}: {}", child_prefix, maybe_name(child_name), entry.count.load()); + result += fmt::format("\n{}{}: {}", child_prefix, maybe_name(child_name), entry.count); auto new_indent = indent; new_indent += at_end ? " " : " │ "; diff --git a/phlex/model/data_layer_hierarchy.hpp b/phlex/model/data_layer_hierarchy.hpp index ff42bf988..384fb58d9 100644 --- a/phlex/model/data_layer_hierarchy.hpp +++ b/phlex/model/data_layer_hierarchy.hpp @@ -5,7 +5,6 @@ #include "phlex/model/data_cell_index.hpp" #include "phlex/model/fwd.hpp" -#include "phlex/model/layer_path.hpp" #include "oneapi/tbb/concurrent_unordered_map.h" @@ -26,7 +25,7 @@ namespace phlex::experimental { data_layer_hierarchy& operator=(data_layer_hierarchy&&) = delete; void increment_count(data_cell_index_ptr const& id); - std::size_t count_for(layer_path const& layer, bool missing_ok = false) const; + std::size_t count_for(std::string const& layer, bool missing_ok = false) const; void print() const; @@ -40,13 +39,13 @@ namespace phlex::experimental { std::string indent = {}) const; struct layer_entry { - layer_entry(identifier n, layer_path path, std::size_t par_hash) : + layer_entry(identifier n, std::string path, std::size_t par_hash) : name{std::move(n)}, layer_path{std::move(path)}, parent_hash{par_hash} { } identifier name; - experimental::layer_path layer_path; + std::string layer_path; std::size_t parent_hash; std::atomic count{}; }; diff --git a/phlex/model/fixed_hierarchy.cpp b/phlex/model/fixed_hierarchy.cpp index 48e1f7a63..c293a2056 100644 --- a/phlex/model/fixed_hierarchy.cpp +++ b/phlex/model/fixed_hierarchy.cpp @@ -14,6 +14,19 @@ #include namespace { + // Each path must be non-empty and may only contain "job" as the first element. + std::span validated_path(std::vector const& path) + { + if (path.empty()) { + throw std::runtime_error("Layer paths cannot be empty."); + } + auto const rest = std::span{path}.subspan(path[0] == "job" ? 1 : 0); + if (std::ranges::contains(rest, "job")) { + throw std::runtime_error("Layer paths may only contain 'job' as the first element."); + } + return rest; + } + // Builds the set of cumulative layer hashes that define the fixed hierarchy. // For example, if the layer paths are ["job", "run", "subrun"] and ["job", "spill"], // the hashes included will correspond to: @@ -27,31 +40,20 @@ namespace { // - "job/spill" // // Each path must be non-empty and may only contain "job" as the first element. - std::set build_hashes( - std::vector const& layer_paths) + std::set build_hashes(std::vector> const& layer_paths) { using namespace phlex::experimental; - using namespace phlex::experimental::literals; - std::set hashes{"job"_idq.hash}; - for (layer_path const& path : layer_paths) { - // "job" was added explicitly so it doesn't matter whether it does or does not appear in path.hashes() - hashes.merge(path.hashes()); + identifier const job{"job"}; + std::set hashes{job.hash()}; + for (std::vector const& path : layer_paths) { + std::size_t cumulative_hash = job.hash(); + for (auto const& name : validated_path(path)) { + cumulative_hash = hash(cumulative_hash, identifier{name}.hash()); + hashes.insert(cumulative_hash); + } } return hashes; } - - std::vector convert_vector_vector_string( - std::vector>&& layer_paths) - { - using namespace phlex::experimental; - return std::move(layer_paths) | std::views::transform([](std::vector& lp) { - auto lp_as_ids = - lp | std::views::transform([](auto& str) { return identifier(std::move(str)); }) | - std::ranges::to(); - return layer_path(std::move(lp_as_ids)); - }) | - std::ranges::to>(); - } } namespace phlex { @@ -73,7 +75,7 @@ namespace phlex { return data_cell_cursor{child, hierarchy_, driver_}; } - experimental::layer_path data_cell_cursor::layer_path() const { return index_->layer_path(); } + std::string data_cell_cursor::layer_path() const { return index_->layer_path(); } // ================================================================================ // data_cell_yielder implementation @@ -97,8 +99,7 @@ namespace phlex { } fixed_hierarchy::fixed_hierarchy(std::vector> layer_paths) : - layer_paths_(convert_vector_vector_string(std::move(layer_paths))), - layer_hashes_(std::from_range, build_hashes(layer_paths_)) + layer_paths_(std::move(layer_paths)), layer_hashes_(std::from_range, build_hashes(layer_paths_)) { } diff --git a/phlex/model/fixed_hierarchy.hpp b/phlex/model/fixed_hierarchy.hpp index 4d419b05f..943785a28 100644 --- a/phlex/model/fixed_hierarchy.hpp +++ b/phlex/model/fixed_hierarchy.hpp @@ -4,7 +4,6 @@ #include "phlex/phlex_model_export.hpp" #include "phlex/model/fwd.hpp" -#include "phlex/model/layer_path.hpp" #include #include @@ -26,7 +25,7 @@ namespace phlex { // data-cell index to the underlying driver, returning a data_cell_cursor for the child. data_cell_cursor yield_child(std::string const& layer_name, std::size_t number) const; - experimental::layer_path layer_path() const; + std::string layer_path() const; private: friend class fixed_hierarchy; @@ -88,7 +87,7 @@ namespace phlex { data_cell_yielder yielder(experimental::framework_driver& d) const; private: - std::vector layer_paths_; + std::vector> layer_paths_; std::vector layer_hashes_; }; diff --git a/phlex/model/handle.hpp b/phlex/model/handle.hpp index 7acb768f8..1ee4ff5bc 100644 --- a/phlex/model/handle.hpp +++ b/phlex/model/handle.hpp @@ -95,7 +95,7 @@ namespace phlex { } std::string_view suffix() const noexcept { return std::string_view(suffix_); } std::string_view layer() const noexcept { return std::string_view(id_->layer_name()); } - std::string layer_path() const { return id_->layer_path().to_string(); } + std::string layer_path() const { return id_->layer_path(); } template friend class handle; diff --git a/phlex/model/identifier.cpp b/phlex/model/identifier.cpp index 998fc8c0a..5a698fc75 100644 --- a/phlex/model/identifier.cpp +++ b/phlex/model/identifier.cpp @@ -4,6 +4,20 @@ #include namespace phlex::experimental { + identifier_query literals::operator""_idq(char const* lit, std::size_t len) + { + return {identifier::hash_string(std::string_view(lit, len))}; + } + + std::uint64_t identifier::hash_string(std::string_view str) + { + // Hash quality is very important here, since comparisons are done using only the hash + using namespace boost::hash2; + xxhash_64 h; + hash_append(h, {}, str); + return h.result(); + } + identifier::identifier(std::string_view str) : content_(str), hash_(hash_string(content_)) {} identifier::identifier(std::string&& str) : content_(std::move(str)), hash_(hash_string(content_)) { @@ -36,6 +50,4 @@ namespace phlex::experimental { { return identifier{std::string_view(lit, len)}; } - - bool identifier_query::operator()(identifier const& id) const noexcept { return id == (*this); } } diff --git a/phlex/model/identifier.hpp b/phlex/model/identifier.hpp index ade0da0e5..d99757ab0 100644 --- a/phlex/model/identifier.hpp +++ b/phlex/model/identifier.hpp @@ -3,8 +3,6 @@ #include "phlex/phlex_model_export.hpp" -#include -#include #include #include @@ -18,25 +16,15 @@ namespace phlex::experimental { /// If you're comparing to an identifier you know at compile time, you're probably not going to need /// to print it. - class identifier; - struct PHLEX_MODEL_EXPORT identifier_query { + struct identifier_query { std::uint64_t hash; - // This means an identifier_query can be used as a callable to check if it compares equal to an identifier - bool operator()(identifier const& id) const noexcept; }; /// Carries around the string itself (as a shared_ptr to string to make copies lighter) /// along with a precomputed hash used for all comparisons class PHLEX_MODEL_EXPORT identifier { public: - static constexpr std::uint64_t hash_string(std::string_view str) - { - // Hash quality is very important here, since comparisons are done using only the hash - using namespace boost::hash2; - xxhash_64 h; - hash_append(h, {}, str); - return h.result(); - } + static std::uint64_t hash_string(std::string_view str); // The default constructor is necessary so other classes containing identifiers // can have default constructors. identifier() = default; @@ -86,16 +74,11 @@ namespace phlex::experimental { // Identifier UDL namespace literals { PHLEX_MODEL_EXPORT identifier operator""_id(char const* lit, std::size_t len); - consteval PHLEX_MODEL_EXPORT identifier_query operator""_idq(char const* lit, std::size_t len) - { - return {identifier::hash_string(std::string_view(lit, len))}; - } - + PHLEX_MODEL_EXPORT identifier_query operator""_idq(char const* lit, std::size_t len); } // Really trying to avoid the extra function call here inline std::string_view format_as(identifier const& id) { return std::string_view(id); } - inline std::size_t hash_value(identifier const& id) { return id.hash(); } } template <> diff --git a/phlex/model/layer_path.cpp b/phlex/model/layer_path.cpp deleted file mode 100644 index 3c15e506e..000000000 --- a/phlex/model/layer_path.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "layer_path.hpp" - -#include "phlex/utilities/hashing.hpp" - -#include "boost/container_hash/hash.hpp" -#include "fmt/format.h" -#include "fmt/ranges.h" - -#include -#include - -using namespace phlex::experimental::literals; - -namespace phlex::experimental { - layer_path::layer_path(std::string_view path) : - layer_path_{ - std::from_range, - path | std::views::split('/') | - std::views::filter([](auto const& sr) { return not sr.empty(); }) | - std::views::transform([](auto const& sr) { return identifier(std::string_view(sr)); })} - { - if (layer_path_.empty()) { - throw std::runtime_error("Layer paths cannot be empty."); - } - if (path.starts_with("/") and not is_complete()) { - throw std::runtime_error( - fmt::format("A complete layer path must start with '/job'. '{}' does not!", path)); - } - - validate(); - } - - void layer_path::validate() const - { - using namespace literals; - if (layer_path_.empty()) { - throw std::runtime_error("Layer paths cannot be empty."); - } - - // We can use any_of because identifier_query has a function call operator - if (layer_path_.size() > 1 && - std::ranges::any_of(std::span{layer_path_}.subspan(1), "job"_idq)) { - throw std::runtime_error("Layer paths may only contain 'job' as the first element."); - } - } - bool layer_path::is_complete() const noexcept { return layer_path_[0] == "job"_idq; } - - bool layer_path::is_strict_prefix_of(layer_path const& other) const noexcept - { - if (layer_path_.size() >= other.layer_path_.size()) { - // Optimization, and address Codex observation that a path is not a _strict_ prefix of itself - return false; - } - // starts_with / ends_with aren't supported until libstdc++ *16* - // return std::ranges::starts_with(other.layer_path_, layer_path_); - auto const& [it, other_it] = std::ranges::mismatch(layer_path_, other.layer_path_); - return it == layer_path_.end(); - } - - bool layer_path::ends_with(layer_path const& other) const noexcept - { - // starts_with / ends_with aren't supported until libstdc++ *16* - // return std::ranges::ends_with(layer_path_, other.layer_path_); - auto rev_layer_path = std::views::reverse(layer_path_); - auto rev_other_layer_path = std::views::reverse(other.layer_path_); - auto const& [it, other_it] = std::ranges::mismatch(rev_layer_path, rev_other_layer_path); - return other_it == rev_other_layer_path.end(); - } - - bool layer_path::ends_with(identifier const& name) const noexcept - { - return layer_path_.back() == name; - } - - std::string layer_path::to_string() const - { - return fmt::format("{}{}", is_complete() ? "/" : "", fmt::join(layer_path_, "/")); - } - - std::size_t layer_path::hash() const noexcept - { - // incomplete paths have an implied "job" root in this calculation - // Need the experimental:: so it isn't confused for this function - std::size_t seed = - is_complete() ? "job"_idq.hash : experimental::hash("job"_idq.hash, layer_path_[0].hash()); - boost::hash_range(seed, layer_path_.begin() + 1, layer_path_.end()); - return seed; - } - - std::set layer_path::hashes() const - { - std::set hashes; - // Add the appropriate first hash - std::size_t cumulative_hash = - is_complete() ? "job"_idq.hash : experimental::hash("job"_idq.hash, layer_path_[0].hash()); - hashes.insert(cumulative_hash); - for (auto const& name : layer_path_ | std::views::drop(1)) { - cumulative_hash = experimental::hash(cumulative_hash, name.hash()); - hashes.insert(cumulative_hash); - } - return hashes; - } -} diff --git a/phlex/model/layer_path.hpp b/phlex/model/layer_path.hpp deleted file mode 100644 index 112f268af..000000000 --- a/phlex/model/layer_path.hpp +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef PHLEX_MODEL_LAYER_PATH_HPP -#define PHLEX_MODEL_LAYER_PATH_HPP -#include "phlex/model/identifier.hpp" -#include "phlex/phlex_model_export.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace phlex::experimental { - class PHLEX_MODEL_EXPORT layer_path { - public: - layer_path(std::vector const& path) : layer_path_{path} { validate(); } - layer_path(std::vector&& path) : layer_path_{std::move(path)} { validate(); } - layer_path(std::string_view path); - - template - requires std::constructible_from && (!std::same_as) - layer_path(T const& path) : layer_path(std::string_view(path)) - { - } - - auto operator<=>(layer_path const&) const noexcept = default; - - /// Is this path complete (does it start with "job") - bool is_complete() const noexcept; - - bool is_strict_prefix_of(layer_path const& other) const noexcept; - - bool ends_with(layer_path const& other) const noexcept; - - bool ends_with(identifier const& name) const noexcept; - - std::string to_string() const; - - /// This function assumes incomplete paths have an implicit job root - std::size_t hash() const noexcept; - - /// Return hash of this path and every parent path - std::set hashes() const; - - private: - std::vector layer_path_; - void validate() const; - }; - - inline std::string format_as(layer_path const& lp) { return lp.to_string(); } - inline std::size_t hash_value(layer_path const& lp) { return lp.hash(); } - - // Required by catch2 - inline std::ostream& operator<<(std::ostream& os, layer_path const& lp) - { - return os << lp.to_string(); - } -} -#endif // PHLEX_MODEL_LAYER_PATH_HPP diff --git a/phlex/utilities/CMakeLists.txt b/phlex/utilities/CMakeLists.txt index c9878734e..9f3fca0db 100644 --- a/phlex/utilities/CMakeLists.txt +++ b/phlex/utilities/CMakeLists.txt @@ -16,7 +16,6 @@ install( FILES resumable_driver.hpp hashing.hpp - bulleted_list.hpp max_allowed_parallelism.hpp resource_usage.hpp simple_ptr_map.hpp diff --git a/phlex/utilities/bulleted_list.hpp b/phlex/utilities/bulleted_list.hpp deleted file mode 100644 index ec67c6034..000000000 --- a/phlex/utilities/bulleted_list.hpp +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef PHLEX_UTILITIES_BULLETED_LIST_HPP -#define PHLEX_UTILITIES_BULLETED_LIST_HPP - -#include -#include - -#include -#include - -namespace phlex::experimental { - namespace detail { - template - concept range_of_formattable = fmt::formattable>; - - template - concept range_of_to_stringable = requires(std::ranges::range_value_t const& val) { - { val.to_string() } -> std::same_as; - }; - } - - template - std::string bulleted_list(R const& rng, std::size_t indent = 2) - { - if (std::ranges::empty(rng)) { - return ""; - } - std::string prefix = - fmt::format("{blank:{indent}s}- ", fmt::arg("blank", ""), fmt::arg("indent", indent)); - std::string prefix_with_newline = fmt::format("\n{}", prefix); - return fmt::format("{}{}", prefix, fmt::join(rng, prefix_with_newline)); - } - - template - requires(!detail::range_of_formattable) - std::string bulleted_list(R const& rng, std::size_t indent = 2) - { - if (std::ranges::empty(rng)) { - return ""; - } - std::string prefix = - fmt::format("{blank:{indent}s}- ", fmt::arg("blank", ""), fmt::arg("indent", indent)); - std::string prefix_with_newline = fmt::format("\n{}", prefix); - return fmt::format( - "{}{}", - prefix, - fmt::join(rng | std::views::transform([](auto&& v) { return v.to_string(); }), - prefix_with_newline)); - } -} - -#endif // PHLEX_UTILITIES_BULLETED_LIST_HPP diff --git a/plugins/layer_generator.cpp b/plugins/layer_generator.cpp index 2b9fd3abd..96a5f08e7 100644 --- a/plugins/layer_generator.cpp +++ b/plugins/layer_generator.cpp @@ -133,13 +133,11 @@ namespace phlex::experimental { index_generator layer_generator::execute(data_cell_index_ptr const cell) { - // Used in drivers which are close to public API --> easier to stick to strings - auto cell_lp = cell->layer_path().to_string(); - auto it = parent_to_children_.find(cell_lp); + auto it = parent_to_children_.find(cell->layer_path()); assert(it != parent_to_children_.cend()); for (auto const& child : it->second) { - auto const full_child_path = fmt::format("{}/{}", cell_lp, child); + auto const full_child_path = cell->layer_path() + "/" + child; auto const& [_, total_per_parent, starting_value] = layers_.at(full_child_path); bool const has_children = parent_to_children_.contains(full_child_path); for (unsigned int i : std::views::iota(starting_value, total_per_parent + starting_value)) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4c2d93575..fba0d349c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -29,7 +29,6 @@ cet_test(type_deduction SOURCE type_deduction.cpp LIBRARIES phlex::metaprogramming ) cet_test(type_id USE_CATCH2_MAIN SOURCE type_id.cpp LIBRARIES phlex::model_internal fmt::fmt) -cet_test(layer_path USE_CATCH2_MAIN SOURCE layer_path.cpp LIBRARIES phlex::model_internal fmt::fmt) cet_test(identifier USE_CATCH2_MAIN SOURCE identifier.cpp LIBRARIES phlex::model phlex::configuration Boost::json) cet_test( yielding_driver diff --git a/test/core_misc_test.cpp b/test/core_misc_test.cpp index 30716c633..1a092a67f 100644 --- a/test/core_misc_test.cpp +++ b/test/core_misc_test.cpp @@ -3,7 +3,6 @@ #include "phlex/core/glue.hpp" #include "phlex/core/registrar.hpp" #include "phlex/model/algorithm_name.hpp" -#include "phlex/utilities/bulleted_list.hpp" #include #include @@ -46,17 +45,6 @@ TEST_CASE("algorithm_name tests", "[model]") CHECK(an.match(algorithm_name::create("p:a"))); CHECK_FALSE(an.match(algorithm_name::create("p:b"))); } - SECTION("Bulleted list of algorithm_name") - { - std::vector ans = {algorithm_name::create("p:a1"), - algorithm_name::create("p:a2")}; - CHECK(bulleted_list(ans) == " - p:a1\n - p:a2"); - } - SECTION("Empty bulleted list") - { - CHECK(bulleted_list(std::vector{}) == ""); - CHECK(bulleted_list(std::vector{}) == ""); - } } TEST_CASE("consumer tests", "[core]") diff --git a/test/layer_path.cpp b/test/layer_path.cpp deleted file mode 100644 index f40966f93..000000000 --- a/test/layer_path.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "phlex/model/layer_path.hpp" - -#include "catch2/catch_test_macros.hpp" - -using namespace phlex::experimental; - -TEST_CASE("Layer path tests", "[layer_path]") -{ - layer_path job = "/job"; - layer_path run = "/job/run"; - layer_path lumiblock = "/job/run/lumiblock"; - layer_path subrun = "/job/run/subrun"; - layer_path event = "/job/run/subrun/event"; - - identifier event_id = "event"; - layer_path partial_event = "subrun/event"; - - CHECK(run.is_complete()); - CHECK(run.is_strict_prefix_of(event)); - CHECK(run.is_strict_prefix_of(subrun)); - CHECK_FALSE(lumiblock.is_strict_prefix_of(event)); - - CHECK(event.ends_with(event_id)); - CHECK_FALSE(partial_event.is_complete()); - CHECK(event.ends_with(partial_event)); - CHECK_FALSE(subrun.ends_with(partial_event)); - - CHECK(subrun.to_string() == "/job/run/subrun"); - - auto event_hashes = event.hashes(); - CHECK(event_hashes.contains(job.hash())); - CHECK(event_hashes.contains(run.hash())); - CHECK(event_hashes.contains(subrun.hash())); - CHECK(event_hashes.contains(event.hash())); - CHECK_FALSE(event_hashes.contains(lumiblock.hash())); - - // Validation - CHECK_THROWS(layer_path("")); - CHECK_THROWS(layer_path("/notajob/notarun")); - CHECK_THROWS(layer_path(std::vector{})); - CHECK_THROWS(layer_path("/job/run/job")); - CHECK_THROWS(layer_path("subrun/job")); -} From f3a101c3b3ba788869a6e756ffd5928834eba820 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Thu, 18 Jun 2026 14:10:19 -0700 Subject: [PATCH 19/41] Revert "Revert "Introduce layer_path class (#640)"" This reverts commit 74dc3487910eb58a060cb7f038e2a5a4f005ca10. --- phlex/core/framework_graph.cpp | 5 +- phlex/core/index_router.cpp | 65 +++++------------ phlex/core/index_router.hpp | 4 +- phlex/core/input_arguments.hpp | 13 ++-- phlex/core/producer_catalog.cpp | 5 +- phlex/model/CMakeLists.txt | 2 + phlex/model/data_cell_index.cpp | 21 +++--- phlex/model/data_cell_index.hpp | 3 +- phlex/model/data_layer_hierarchy.cpp | 36 +++++----- phlex/model/data_layer_hierarchy.hpp | 7 +- phlex/model/fixed_hierarchy.cpp | 47 ++++++------ phlex/model/fixed_hierarchy.hpp | 5 +- phlex/model/handle.hpp | 2 +- phlex/model/identifier.cpp | 16 +---- phlex/model/identifier.hpp | 23 +++++- phlex/model/layer_path.cpp | 103 +++++++++++++++++++++++++++ phlex/model/layer_path.hpp | 61 ++++++++++++++++ phlex/utilities/CMakeLists.txt | 1 + phlex/utilities/bulleted_list.hpp | 51 +++++++++++++ plugins/layer_generator.cpp | 6 +- test/CMakeLists.txt | 1 + test/core_misc_test.cpp | 12 ++++ test/layer_path.cpp | 43 +++++++++++ 23 files changed, 393 insertions(+), 139 deletions(-) create mode 100644 phlex/model/layer_path.cpp create mode 100644 phlex/model/layer_path.hpp create mode 100644 phlex/utilities/bulleted_list.hpp create mode 100644 test/layer_path.cpp diff --git a/phlex/core/framework_graph.cpp b/phlex/core/framework_graph.cpp index 9e50fa0d7..aa58af777 100644 --- a/phlex/core/framework_graph.cpp +++ b/phlex/core/framework_graph.cpp @@ -3,6 +3,7 @@ #include "phlex/concurrency.hpp" #include "phlex/core/make_computational_edges.hpp" #include "phlex/model/product_store.hpp" +#include "phlex/utilities/bulleted_list.hpp" #include "fmt/format.h" #include "fmt/ranges.h" @@ -69,7 +70,7 @@ namespace phlex::experimental { std::size_t framework_graph::seen_cell_count(std::string const& layer_name, bool const missing_ok) const { - return hierarchy_.count_for(layer_name, missing_ok); + return hierarchy_.count_for(experimental::layer_path(layer_name), missing_ok); } std::size_t framework_graph::execution_count(std::string const& node_name) const @@ -136,7 +137,7 @@ namespace phlex::experimental { return; } throw std::runtime_error( - fmt::format("\nConfiguration errors:\n - {}", fmt::join(registration_errors_, "\n - "))); + fmt::format("\nConfiguration errors:\n{}", bulleted_list(registration_errors_))); } void framework_graph::make_filter_edges() diff --git a/phlex/core/index_router.cpp b/phlex/core/index_router.cpp index aeae3db3c..2482e9446 100644 --- a/phlex/core/index_router.cpp +++ b/phlex/core/index_router.cpp @@ -1,6 +1,7 @@ #include "phlex/core/index_router.hpp" #include "phlex/model/flush_gate.hpp" +#include "phlex/utilities/bulleted_list.hpp" #include "phlex/utilities/hashing.hpp" #include "fmt/std.h" @@ -14,34 +15,6 @@ using namespace phlex::experimental; -namespace { - using layer_path_t = std::vector; - - std::size_t layer_hash_for_path(layer_path_t const& layer_path) - { - std::size_t result = "job"_id.hash(); - for (auto const& layer_name : layer_path | std::views::drop(1)) { - result = hash(result, identifier{layer_name}.hash()); - } - return result; - } - - bool is_strict_prefix(layer_path_t const& candidate, layer_path_t const& other) - { - // FIXME: Use std::ranges::starts_with(other, candidate) once the compilers support it (C++23) - return candidate.size() < other.size() and - std::ranges::mismatch(other, candidate).in2 == std::ranges::end(candidate); - } - - std::string delimited_layer_path(std::string_view const layer_path) - { - if (not layer_path.starts_with("/")) { - return fmt::format("/{}", layer_path); - } - return std::string{layer_path}; - } -} - namespace phlex::experimental { //======================================================================================== @@ -57,7 +30,7 @@ namespace phlex::experimental { void put_message(data_cell_index_ptr const& index, std::size_t message_id); void put_end_token(data_cell_index_ptr const& index, flush_gate const& fc); - bool matches_exactly(std::string const& layer_path) const; + bool matches_exactly(layer_path const& layer_path) const; bool is_parent_of(data_cell_index_ptr const& index) const; private: @@ -93,9 +66,9 @@ namespace phlex::experimental { flusher_.try_put({.index = index, .count = static_cast(fc.committed_total_count())}); } - bool multilayer_slot::matches_exactly(std::string const& layer_path) const + bool multilayer_slot::matches_exactly(layer_path const& layer_path) const { - return layer_path.ends_with(delimited_layer_path(static_cast(layer_))); + return layer_path.ends_with(layer_); } bool multilayer_slot::is_parent_of(data_cell_index_ptr const& index) const @@ -126,21 +99,19 @@ namespace phlex::experimental { { } - void index_router::establish_layers( - std::vector> const& layer_paths_from_driver, - std::vector unfold_input_layer_names, - std::vector unfold_output_layer_names) + void index_router::establish_layers(std::vector const& layer_paths_from_driver, + std::vector unfold_input_layer_names, + std::vector unfold_output_layer_names) { auto sorted_layer_paths = layer_paths_from_driver; std::ranges::sort(sorted_layer_paths); // In sorted order, a path can only be a prefix of paths that follow it. - for (std::size_t i = 0; i < sorted_layer_paths.size(); ++i) { + for (std::size_t i = 0; i + 1 < sorted_layer_paths.size(); ++i) { bool const is_not_lowest_layer = - i + 1 < sorted_layer_paths.size() and - is_strict_prefix(sorted_layer_paths[i], sorted_layer_paths[i + 1]); + sorted_layer_paths[i].is_strict_prefix_of(sorted_layer_paths[i + 1]); if (is_not_lowest_layer) { - auto const layer_hash = layer_hash_for_path(sorted_layer_paths[i]); + auto const layer_hash = sorted_layer_paths[i].hash(); is_lowest_layer_hashes_.emplace(layer_hash, false); } } @@ -252,19 +223,17 @@ namespace phlex::experimental { return it->second; } - std::string const layerish_path{static_cast(index->layer_name())}; + layer_path const layerish_path{{index->layer_name()}}; auto broadcaster = index_set_node_for(layerish_path); index_set_node_cache_.insert({layer_hash, broadcaster}); return broadcaster; } - auto index_router::index_set_node_for(std::string const& layer_path) -> detail::index_set_node_ptr + auto index_router::index_set_node_for(layer_path const& layer_path) -> detail::index_set_node_ptr { - std::string const search_token = delimited_layer_path(layer_path); - std::vector candidates; for (auto it = index_set_nodes_.begin(), e = index_set_nodes_.end(); it != e; ++it) { - if (search_token.ends_with(delimited_layer_path(static_cast(it->first)))) { + if (layer_path.ends_with(it->first)) { candidates.push_back(it); } } @@ -277,10 +246,10 @@ namespace phlex::experimental { return nullptr; } - std::string msg = fmt::format("Multiple layers match specification {}:\n", layer_path); - for (auto const& it : candidates) { - msg += fmt::format("\n- {}", it->first); - } + std::string msg = fmt::format( + "Multiple layers match specification {}:\n{}", + layer_path, + bulleted_list(candidates | std::views::transform([](auto const& it) { return it->first; }))); throw std::runtime_error(msg); } diff --git a/phlex/core/index_router.hpp b/phlex/core/index_router.hpp index 366f1b829..8e0fb8bb3 100644 --- a/phlex/core/index_router.hpp +++ b/phlex/core/index_router.hpp @@ -56,7 +56,7 @@ namespace phlex::experimental { explicit index_router(tbb::flow::graph& g); data_cell_index_ptr route(data_cell_index_ptr const& index, index_flushes flushes); - void establish_layers(std::vector> const& layer_paths_from_driver, + void establish_layers(std::vector const& layer_paths_from_driver, std::vector unfold_input_layer_names, std::vector unfold_output_layer_names); @@ -91,7 +91,7 @@ namespace phlex::experimental { // correct for unfold outputs (the only source of unknown hashes) and consistent with // index_is_lowest_layer()'s fall-through default. bool is_lowest_layer_hash(std::size_t layer_hash) const; - detail::index_set_node_ptr index_set_node_for(std::string const& layer); + detail::index_set_node_ptr index_set_node_for(layer_path const& layer); detail::index_set_node_ptr index_set_node_for(data_cell_index_ptr const& index); std::pair multilayer_slots_for( data_cell_index_ptr const& index); diff --git a/phlex/core/input_arguments.hpp b/phlex/core/input_arguments.hpp index 350146538..34053ef3a 100644 --- a/phlex/core/input_arguments.hpp +++ b/phlex/core/input_arguments.hpp @@ -4,6 +4,7 @@ #include "phlex/core/message.hpp" #include "phlex/core/product_selector.hpp" #include "phlex/model/handle.hpp" +#include "phlex/utilities/bulleted_list.hpp" #include "fmt/format.h" @@ -32,18 +33,16 @@ namespace phlex::experimental { std::ranges::to(); if (products.empty()) { throw std::runtime_error(fmt::format( - "No products found matching the query {}\n Store (id {} from {}) contains:\n - {}", + "No products found matching the query {}\n Store (id {} from {}) contains:\n{}", query, store->index()->to_string(), store->source().to_string(), - fmt::join(all_products | views::transform(&product_specification::to_string), - "\n - "))); + bulleted_list(all_products, /*indent=*/4))); } if (products.size() > 1) { - throw std::runtime_error(fmt::format( - "Multiple products found matching the query {}:\n - {}", - query, - fmt::join(products | views::transform(&product_specification::to_string), "\n - "))); + throw std::runtime_error(fmt::format("Multiple products found matching the query {}:\n{}", + query, + bulleted_list(products, /*indent=*/4))); } return store->get_handle(products[0]); } diff --git a/phlex/core/producer_catalog.cpp b/phlex/core/producer_catalog.cpp index 1bf365705..7434a339a 100644 --- a/phlex/core/producer_catalog.cpp +++ b/phlex/core/producer_catalog.cpp @@ -1,4 +1,5 @@ #include "phlex/core/producer_catalog.hpp" +#include "phlex/utilities/bulleted_list.hpp" #include "fmt/format.h" #include "fmt/ranges.h" @@ -75,9 +76,9 @@ namespace phlex::experimental { } if (candidates.size() > 1ull) { - std::string msg = fmt::format("More than one candidate matches the query {}: \n - {}\n", + std::string msg = fmt::format("More than one candidate matches the query {}: \n{}\n", query.to_string(), - fmt::join(std::views::keys(candidates), "\n - ")); + bulleted_list(std::views::keys(candidates), /*indent=*/1)); throw std::runtime_error(msg); } diff --git a/phlex/model/CMakeLists.txt b/phlex/model/CMakeLists.txt index 85bef091d..0539e1468 100644 --- a/phlex/model/CMakeLists.txt +++ b/phlex/model/CMakeLists.txt @@ -11,6 +11,7 @@ cet_make_library( data_layer_hierarchy.cpp data_cell_index.cpp identifier.cpp + layer_path.cpp product_matcher.cpp product_store.cpp products.cpp @@ -39,6 +40,7 @@ install( data_layer_hierarchy.hpp data_cell_index.hpp identifier.hpp + layer_path.hpp product_matcher.hpp product_specification.hpp product_store.hpp diff --git a/phlex/model/data_cell_index.cpp b/phlex/model/data_cell_index.cpp index 75fb5c852..98701faab 100644 --- a/phlex/model/data_cell_index.cpp +++ b/phlex/model/data_cell_index.cpp @@ -1,16 +1,13 @@ #include "phlex/model/data_cell_index.hpp" #include "phlex/utilities/hashing.hpp" -#include "boost/algorithm/string.hpp" #include "fmt/format.h" -#include "fmt/ranges.h" #include +#include #include #include -#include #include -#include #include using namespace std::string_literals; @@ -62,15 +59,17 @@ namespace phlex { return layer_name_; } - std::string data_cell_index::layer_path() const + experimental::layer_path data_cell_index::layer_path() const { - std::vector layers_in_reverse{std::string_view(layer_name_)}; - auto next_parent = parent(); - while (next_parent) { - layers_in_reverse.push_back(std::string_view(next_parent->layer_name())); - next_parent = next_parent->parent(); + // We know how deep we are so we can pre-allocate and fill in reverse + std::vector layers(depth_ + 1); + auto const* ptr = this; + for (auto& layer : std::views::reverse(layers)) { + assert(ptr); + layer = ptr->layer_name(); + ptr = ptr->parent_.get(); } - return fmt::format("/{}", fmt::join(std::views::reverse(layers_in_reverse), "/")); + return experimental::layer_path{std::move(layers)}; } std::size_t data_cell_index::depth() const noexcept { return depth_; } diff --git a/phlex/model/data_cell_index.hpp b/phlex/model/data_cell_index.hpp index 084baade9..3bd618bc0 100644 --- a/phlex/model/data_cell_index.hpp +++ b/phlex/model/data_cell_index.hpp @@ -5,6 +5,7 @@ #include "phlex/model/fwd.hpp" #include "phlex/model/identifier.hpp" +#include "phlex/model/layer_path.hpp" #include #include @@ -23,7 +24,7 @@ namespace phlex { using hash_type = std::size_t; data_cell_index_ptr make_child(std::string layer_name, std::size_t data_cell_number) const; experimental::identifier const& layer_name() const noexcept; - std::string layer_path() const; + experimental::layer_path layer_path() const; std::size_t depth() const noexcept; data_cell_index_ptr parent(experimental::identifier const& layer_name) const; data_cell_index_ptr parent() const noexcept; diff --git a/phlex/model/data_layer_hierarchy.cpp b/phlex/model/data_layer_hierarchy.cpp index 6daf692ea..fb1d7a6a2 100644 --- a/phlex/model/data_layer_hierarchy.cpp +++ b/phlex/model/data_layer_hierarchy.cpp @@ -1,10 +1,12 @@ #include "phlex/model/data_layer_hierarchy.hpp" -#include "phlex/model/data_cell_index.hpp" + +#include "phlex/utilities/bulleted_list.hpp" #include "fmt/format.h" -#include "fmt/std.h" #include "spdlog/spdlog.h" +#include + namespace { std::string const& maybe_name(std::string const& name) { @@ -35,32 +37,32 @@ namespace phlex::experimental { ++it->second->count; } - std::size_t data_layer_hierarchy::count_for(std::string const& layer, bool const missing_ok) const + std::size_t data_layer_hierarchy::count_for(layer_path const& layer, bool const missing_ok) const { - // The assumption is that specified layer is the component of a layer path - std::string search_token = layer; - if (not layer.starts_with("/")) { - search_token = '/' + layer; - } - + // The assumption is that specified layer is a portion of a layer path + // sufficient to uniquely identify a layer std::vector candidates; for (auto const& [_, entry] : layers_) { - if (entry->layer_path.ends_with(search_token)) { + if (entry->layer_path.ends_with(layer)) { candidates.push_back(entry.get()); } } if (candidates.empty()) { return missing_ok ? 0ull - : throw std::runtime_error("No layers match the specification " + layer); + : throw std::runtime_error( + fmt::format("No layers match the specification {}", layer)); } if (candidates.size() > 1ull) { - std::string msg{"The following data layers match the specification " + layer + ":\n"}; - for (auto const* entry : candidates) { - msg += "\n- " + entry->layer_path; - } - msg += "\n\nPlease specify the full layer path to disambiguate between them."; + std::string msg = + fmt::format("The following data layers match the specification {}:\n\n{}" + "\n\nPlease specify the full layer path to disambiguate between them.", + layer, + bulleted_list(candidates | std::views::transform([](auto const* entry) { + return entry->layer_path; + }), + /*indent=*/0)); throw std::runtime_error(msg); } @@ -86,7 +88,7 @@ namespace phlex::experimental { auto child_prefix = !at_end ? indent + " ├ " : indent + " └ "; auto const& entry = *layers_.at(child_hash); result += "\n" + indent + " │ "; - result += fmt::format("\n{}{}: {}", child_prefix, maybe_name(child_name), entry.count); + result += fmt::format("\n{}{}: {}", child_prefix, maybe_name(child_name), entry.count.load()); auto new_indent = indent; new_indent += at_end ? " " : " │ "; diff --git a/phlex/model/data_layer_hierarchy.hpp b/phlex/model/data_layer_hierarchy.hpp index 384fb58d9..ff42bf988 100644 --- a/phlex/model/data_layer_hierarchy.hpp +++ b/phlex/model/data_layer_hierarchy.hpp @@ -5,6 +5,7 @@ #include "phlex/model/data_cell_index.hpp" #include "phlex/model/fwd.hpp" +#include "phlex/model/layer_path.hpp" #include "oneapi/tbb/concurrent_unordered_map.h" @@ -25,7 +26,7 @@ namespace phlex::experimental { data_layer_hierarchy& operator=(data_layer_hierarchy&&) = delete; void increment_count(data_cell_index_ptr const& id); - std::size_t count_for(std::string const& layer, bool missing_ok = false) const; + std::size_t count_for(layer_path const& layer, bool missing_ok = false) const; void print() const; @@ -39,13 +40,13 @@ namespace phlex::experimental { std::string indent = {}) const; struct layer_entry { - layer_entry(identifier n, std::string path, std::size_t par_hash) : + layer_entry(identifier n, layer_path path, std::size_t par_hash) : name{std::move(n)}, layer_path{std::move(path)}, parent_hash{par_hash} { } identifier name; - std::string layer_path; + experimental::layer_path layer_path; std::size_t parent_hash; std::atomic count{}; }; diff --git a/phlex/model/fixed_hierarchy.cpp b/phlex/model/fixed_hierarchy.cpp index c293a2056..48e1f7a63 100644 --- a/phlex/model/fixed_hierarchy.cpp +++ b/phlex/model/fixed_hierarchy.cpp @@ -14,19 +14,6 @@ #include namespace { - // Each path must be non-empty and may only contain "job" as the first element. - std::span validated_path(std::vector const& path) - { - if (path.empty()) { - throw std::runtime_error("Layer paths cannot be empty."); - } - auto const rest = std::span{path}.subspan(path[0] == "job" ? 1 : 0); - if (std::ranges::contains(rest, "job")) { - throw std::runtime_error("Layer paths may only contain 'job' as the first element."); - } - return rest; - } - // Builds the set of cumulative layer hashes that define the fixed hierarchy. // For example, if the layer paths are ["job", "run", "subrun"] and ["job", "spill"], // the hashes included will correspond to: @@ -40,20 +27,31 @@ namespace { // - "job/spill" // // Each path must be non-empty and may only contain "job" as the first element. - std::set build_hashes(std::vector> const& layer_paths) + std::set build_hashes( + std::vector const& layer_paths) { using namespace phlex::experimental; - identifier const job{"job"}; - std::set hashes{job.hash()}; - for (std::vector const& path : layer_paths) { - std::size_t cumulative_hash = job.hash(); - for (auto const& name : validated_path(path)) { - cumulative_hash = hash(cumulative_hash, identifier{name}.hash()); - hashes.insert(cumulative_hash); - } + using namespace phlex::experimental::literals; + std::set hashes{"job"_idq.hash}; + for (layer_path const& path : layer_paths) { + // "job" was added explicitly so it doesn't matter whether it does or does not appear in path.hashes() + hashes.merge(path.hashes()); } return hashes; } + + std::vector convert_vector_vector_string( + std::vector>&& layer_paths) + { + using namespace phlex::experimental; + return std::move(layer_paths) | std::views::transform([](std::vector& lp) { + auto lp_as_ids = + lp | std::views::transform([](auto& str) { return identifier(std::move(str)); }) | + std::ranges::to(); + return layer_path(std::move(lp_as_ids)); + }) | + std::ranges::to>(); + } } namespace phlex { @@ -75,7 +73,7 @@ namespace phlex { return data_cell_cursor{child, hierarchy_, driver_}; } - std::string data_cell_cursor::layer_path() const { return index_->layer_path(); } + experimental::layer_path data_cell_cursor::layer_path() const { return index_->layer_path(); } // ================================================================================ // data_cell_yielder implementation @@ -99,7 +97,8 @@ namespace phlex { } fixed_hierarchy::fixed_hierarchy(std::vector> layer_paths) : - layer_paths_(std::move(layer_paths)), layer_hashes_(std::from_range, build_hashes(layer_paths_)) + layer_paths_(convert_vector_vector_string(std::move(layer_paths))), + layer_hashes_(std::from_range, build_hashes(layer_paths_)) { } diff --git a/phlex/model/fixed_hierarchy.hpp b/phlex/model/fixed_hierarchy.hpp index 943785a28..4d419b05f 100644 --- a/phlex/model/fixed_hierarchy.hpp +++ b/phlex/model/fixed_hierarchy.hpp @@ -4,6 +4,7 @@ #include "phlex/phlex_model_export.hpp" #include "phlex/model/fwd.hpp" +#include "phlex/model/layer_path.hpp" #include #include @@ -25,7 +26,7 @@ namespace phlex { // data-cell index to the underlying driver, returning a data_cell_cursor for the child. data_cell_cursor yield_child(std::string const& layer_name, std::size_t number) const; - std::string layer_path() const; + experimental::layer_path layer_path() const; private: friend class fixed_hierarchy; @@ -87,7 +88,7 @@ namespace phlex { data_cell_yielder yielder(experimental::framework_driver& d) const; private: - std::vector> layer_paths_; + std::vector layer_paths_; std::vector layer_hashes_; }; diff --git a/phlex/model/handle.hpp b/phlex/model/handle.hpp index 1ee4ff5bc..7acb768f8 100644 --- a/phlex/model/handle.hpp +++ b/phlex/model/handle.hpp @@ -95,7 +95,7 @@ namespace phlex { } std::string_view suffix() const noexcept { return std::string_view(suffix_); } std::string_view layer() const noexcept { return std::string_view(id_->layer_name()); } - std::string layer_path() const { return id_->layer_path(); } + std::string layer_path() const { return id_->layer_path().to_string(); } template friend class handle; diff --git a/phlex/model/identifier.cpp b/phlex/model/identifier.cpp index 5a698fc75..998fc8c0a 100644 --- a/phlex/model/identifier.cpp +++ b/phlex/model/identifier.cpp @@ -4,20 +4,6 @@ #include namespace phlex::experimental { - identifier_query literals::operator""_idq(char const* lit, std::size_t len) - { - return {identifier::hash_string(std::string_view(lit, len))}; - } - - std::uint64_t identifier::hash_string(std::string_view str) - { - // Hash quality is very important here, since comparisons are done using only the hash - using namespace boost::hash2; - xxhash_64 h; - hash_append(h, {}, str); - return h.result(); - } - identifier::identifier(std::string_view str) : content_(str), hash_(hash_string(content_)) {} identifier::identifier(std::string&& str) : content_(std::move(str)), hash_(hash_string(content_)) { @@ -50,4 +36,6 @@ namespace phlex::experimental { { return identifier{std::string_view(lit, len)}; } + + bool identifier_query::operator()(identifier const& id) const noexcept { return id == (*this); } } diff --git a/phlex/model/identifier.hpp b/phlex/model/identifier.hpp index d99757ab0..ade0da0e5 100644 --- a/phlex/model/identifier.hpp +++ b/phlex/model/identifier.hpp @@ -3,6 +3,8 @@ #include "phlex/phlex_model_export.hpp" +#include +#include #include #include @@ -16,15 +18,25 @@ namespace phlex::experimental { /// If you're comparing to an identifier you know at compile time, you're probably not going to need /// to print it. - struct identifier_query { + class identifier; + struct PHLEX_MODEL_EXPORT identifier_query { std::uint64_t hash; + // This means an identifier_query can be used as a callable to check if it compares equal to an identifier + bool operator()(identifier const& id) const noexcept; }; /// Carries around the string itself (as a shared_ptr to string to make copies lighter) /// along with a precomputed hash used for all comparisons class PHLEX_MODEL_EXPORT identifier { public: - static std::uint64_t hash_string(std::string_view str); + static constexpr std::uint64_t hash_string(std::string_view str) + { + // Hash quality is very important here, since comparisons are done using only the hash + using namespace boost::hash2; + xxhash_64 h; + hash_append(h, {}, str); + return h.result(); + } // The default constructor is necessary so other classes containing identifiers // can have default constructors. identifier() = default; @@ -74,11 +86,16 @@ namespace phlex::experimental { // Identifier UDL namespace literals { PHLEX_MODEL_EXPORT identifier operator""_id(char const* lit, std::size_t len); - PHLEX_MODEL_EXPORT identifier_query operator""_idq(char const* lit, std::size_t len); + consteval PHLEX_MODEL_EXPORT identifier_query operator""_idq(char const* lit, std::size_t len) + { + return {identifier::hash_string(std::string_view(lit, len))}; + } + } // Really trying to avoid the extra function call here inline std::string_view format_as(identifier const& id) { return std::string_view(id); } + inline std::size_t hash_value(identifier const& id) { return id.hash(); } } template <> diff --git a/phlex/model/layer_path.cpp b/phlex/model/layer_path.cpp new file mode 100644 index 000000000..3c15e506e --- /dev/null +++ b/phlex/model/layer_path.cpp @@ -0,0 +1,103 @@ +#include "layer_path.hpp" + +#include "phlex/utilities/hashing.hpp" + +#include "boost/container_hash/hash.hpp" +#include "fmt/format.h" +#include "fmt/ranges.h" + +#include +#include + +using namespace phlex::experimental::literals; + +namespace phlex::experimental { + layer_path::layer_path(std::string_view path) : + layer_path_{ + std::from_range, + path | std::views::split('/') | + std::views::filter([](auto const& sr) { return not sr.empty(); }) | + std::views::transform([](auto const& sr) { return identifier(std::string_view(sr)); })} + { + if (layer_path_.empty()) { + throw std::runtime_error("Layer paths cannot be empty."); + } + if (path.starts_with("/") and not is_complete()) { + throw std::runtime_error( + fmt::format("A complete layer path must start with '/job'. '{}' does not!", path)); + } + + validate(); + } + + void layer_path::validate() const + { + using namespace literals; + if (layer_path_.empty()) { + throw std::runtime_error("Layer paths cannot be empty."); + } + + // We can use any_of because identifier_query has a function call operator + if (layer_path_.size() > 1 && + std::ranges::any_of(std::span{layer_path_}.subspan(1), "job"_idq)) { + throw std::runtime_error("Layer paths may only contain 'job' as the first element."); + } + } + bool layer_path::is_complete() const noexcept { return layer_path_[0] == "job"_idq; } + + bool layer_path::is_strict_prefix_of(layer_path const& other) const noexcept + { + if (layer_path_.size() >= other.layer_path_.size()) { + // Optimization, and address Codex observation that a path is not a _strict_ prefix of itself + return false; + } + // starts_with / ends_with aren't supported until libstdc++ *16* + // return std::ranges::starts_with(other.layer_path_, layer_path_); + auto const& [it, other_it] = std::ranges::mismatch(layer_path_, other.layer_path_); + return it == layer_path_.end(); + } + + bool layer_path::ends_with(layer_path const& other) const noexcept + { + // starts_with / ends_with aren't supported until libstdc++ *16* + // return std::ranges::ends_with(layer_path_, other.layer_path_); + auto rev_layer_path = std::views::reverse(layer_path_); + auto rev_other_layer_path = std::views::reverse(other.layer_path_); + auto const& [it, other_it] = std::ranges::mismatch(rev_layer_path, rev_other_layer_path); + return other_it == rev_other_layer_path.end(); + } + + bool layer_path::ends_with(identifier const& name) const noexcept + { + return layer_path_.back() == name; + } + + std::string layer_path::to_string() const + { + return fmt::format("{}{}", is_complete() ? "/" : "", fmt::join(layer_path_, "/")); + } + + std::size_t layer_path::hash() const noexcept + { + // incomplete paths have an implied "job" root in this calculation + // Need the experimental:: so it isn't confused for this function + std::size_t seed = + is_complete() ? "job"_idq.hash : experimental::hash("job"_idq.hash, layer_path_[0].hash()); + boost::hash_range(seed, layer_path_.begin() + 1, layer_path_.end()); + return seed; + } + + std::set layer_path::hashes() const + { + std::set hashes; + // Add the appropriate first hash + std::size_t cumulative_hash = + is_complete() ? "job"_idq.hash : experimental::hash("job"_idq.hash, layer_path_[0].hash()); + hashes.insert(cumulative_hash); + for (auto const& name : layer_path_ | std::views::drop(1)) { + cumulative_hash = experimental::hash(cumulative_hash, name.hash()); + hashes.insert(cumulative_hash); + } + return hashes; + } +} diff --git a/phlex/model/layer_path.hpp b/phlex/model/layer_path.hpp new file mode 100644 index 000000000..112f268af --- /dev/null +++ b/phlex/model/layer_path.hpp @@ -0,0 +1,61 @@ +#ifndef PHLEX_MODEL_LAYER_PATH_HPP +#define PHLEX_MODEL_LAYER_PATH_HPP +#include "phlex/model/identifier.hpp" +#include "phlex/phlex_model_export.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace phlex::experimental { + class PHLEX_MODEL_EXPORT layer_path { + public: + layer_path(std::vector const& path) : layer_path_{path} { validate(); } + layer_path(std::vector&& path) : layer_path_{std::move(path)} { validate(); } + layer_path(std::string_view path); + + template + requires std::constructible_from && (!std::same_as) + layer_path(T const& path) : layer_path(std::string_view(path)) + { + } + + auto operator<=>(layer_path const&) const noexcept = default; + + /// Is this path complete (does it start with "job") + bool is_complete() const noexcept; + + bool is_strict_prefix_of(layer_path const& other) const noexcept; + + bool ends_with(layer_path const& other) const noexcept; + + bool ends_with(identifier const& name) const noexcept; + + std::string to_string() const; + + /// This function assumes incomplete paths have an implicit job root + std::size_t hash() const noexcept; + + /// Return hash of this path and every parent path + std::set hashes() const; + + private: + std::vector layer_path_; + void validate() const; + }; + + inline std::string format_as(layer_path const& lp) { return lp.to_string(); } + inline std::size_t hash_value(layer_path const& lp) { return lp.hash(); } + + // Required by catch2 + inline std::ostream& operator<<(std::ostream& os, layer_path const& lp) + { + return os << lp.to_string(); + } +} +#endif // PHLEX_MODEL_LAYER_PATH_HPP diff --git a/phlex/utilities/CMakeLists.txt b/phlex/utilities/CMakeLists.txt index 9f3fca0db..c9878734e 100644 --- a/phlex/utilities/CMakeLists.txt +++ b/phlex/utilities/CMakeLists.txt @@ -16,6 +16,7 @@ install( FILES resumable_driver.hpp hashing.hpp + bulleted_list.hpp max_allowed_parallelism.hpp resource_usage.hpp simple_ptr_map.hpp diff --git a/phlex/utilities/bulleted_list.hpp b/phlex/utilities/bulleted_list.hpp new file mode 100644 index 000000000..ec67c6034 --- /dev/null +++ b/phlex/utilities/bulleted_list.hpp @@ -0,0 +1,51 @@ +#ifndef PHLEX_UTILITIES_BULLETED_LIST_HPP +#define PHLEX_UTILITIES_BULLETED_LIST_HPP + +#include +#include + +#include +#include + +namespace phlex::experimental { + namespace detail { + template + concept range_of_formattable = fmt::formattable>; + + template + concept range_of_to_stringable = requires(std::ranges::range_value_t const& val) { + { val.to_string() } -> std::same_as; + }; + } + + template + std::string bulleted_list(R const& rng, std::size_t indent = 2) + { + if (std::ranges::empty(rng)) { + return ""; + } + std::string prefix = + fmt::format("{blank:{indent}s}- ", fmt::arg("blank", ""), fmt::arg("indent", indent)); + std::string prefix_with_newline = fmt::format("\n{}", prefix); + return fmt::format("{}{}", prefix, fmt::join(rng, prefix_with_newline)); + } + + template + requires(!detail::range_of_formattable) + std::string bulleted_list(R const& rng, std::size_t indent = 2) + { + if (std::ranges::empty(rng)) { + return ""; + } + std::string prefix = + fmt::format("{blank:{indent}s}- ", fmt::arg("blank", ""), fmt::arg("indent", indent)); + std::string prefix_with_newline = fmt::format("\n{}", prefix); + return fmt::format( + "{}{}", + prefix, + fmt::join(rng | std::views::transform([](auto&& v) { return v.to_string(); }), + prefix_with_newline)); + } +} + +#endif // PHLEX_UTILITIES_BULLETED_LIST_HPP diff --git a/plugins/layer_generator.cpp b/plugins/layer_generator.cpp index 96a5f08e7..2b9fd3abd 100644 --- a/plugins/layer_generator.cpp +++ b/plugins/layer_generator.cpp @@ -133,11 +133,13 @@ namespace phlex::experimental { index_generator layer_generator::execute(data_cell_index_ptr const cell) { - auto it = parent_to_children_.find(cell->layer_path()); + // Used in drivers which are close to public API --> easier to stick to strings + auto cell_lp = cell->layer_path().to_string(); + auto it = parent_to_children_.find(cell_lp); assert(it != parent_to_children_.cend()); for (auto const& child : it->second) { - auto const full_child_path = cell->layer_path() + "/" + child; + auto const full_child_path = fmt::format("{}/{}", cell_lp, child); auto const& [_, total_per_parent, starting_value] = layers_.at(full_child_path); bool const has_children = parent_to_children_.contains(full_child_path); for (unsigned int i : std::views::iota(starting_value, total_per_parent + starting_value)) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fba0d349c..4c2d93575 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -29,6 +29,7 @@ cet_test(type_deduction SOURCE type_deduction.cpp LIBRARIES phlex::metaprogramming ) cet_test(type_id USE_CATCH2_MAIN SOURCE type_id.cpp LIBRARIES phlex::model_internal fmt::fmt) +cet_test(layer_path USE_CATCH2_MAIN SOURCE layer_path.cpp LIBRARIES phlex::model_internal fmt::fmt) cet_test(identifier USE_CATCH2_MAIN SOURCE identifier.cpp LIBRARIES phlex::model phlex::configuration Boost::json) cet_test( yielding_driver diff --git a/test/core_misc_test.cpp b/test/core_misc_test.cpp index 1a092a67f..30716c633 100644 --- a/test/core_misc_test.cpp +++ b/test/core_misc_test.cpp @@ -3,6 +3,7 @@ #include "phlex/core/glue.hpp" #include "phlex/core/registrar.hpp" #include "phlex/model/algorithm_name.hpp" +#include "phlex/utilities/bulleted_list.hpp" #include #include @@ -45,6 +46,17 @@ TEST_CASE("algorithm_name tests", "[model]") CHECK(an.match(algorithm_name::create("p:a"))); CHECK_FALSE(an.match(algorithm_name::create("p:b"))); } + SECTION("Bulleted list of algorithm_name") + { + std::vector ans = {algorithm_name::create("p:a1"), + algorithm_name::create("p:a2")}; + CHECK(bulleted_list(ans) == " - p:a1\n - p:a2"); + } + SECTION("Empty bulleted list") + { + CHECK(bulleted_list(std::vector{}) == ""); + CHECK(bulleted_list(std::vector{}) == ""); + } } TEST_CASE("consumer tests", "[core]") diff --git a/test/layer_path.cpp b/test/layer_path.cpp new file mode 100644 index 000000000..f40966f93 --- /dev/null +++ b/test/layer_path.cpp @@ -0,0 +1,43 @@ +#include "phlex/model/layer_path.hpp" + +#include "catch2/catch_test_macros.hpp" + +using namespace phlex::experimental; + +TEST_CASE("Layer path tests", "[layer_path]") +{ + layer_path job = "/job"; + layer_path run = "/job/run"; + layer_path lumiblock = "/job/run/lumiblock"; + layer_path subrun = "/job/run/subrun"; + layer_path event = "/job/run/subrun/event"; + + identifier event_id = "event"; + layer_path partial_event = "subrun/event"; + + CHECK(run.is_complete()); + CHECK(run.is_strict_prefix_of(event)); + CHECK(run.is_strict_prefix_of(subrun)); + CHECK_FALSE(lumiblock.is_strict_prefix_of(event)); + + CHECK(event.ends_with(event_id)); + CHECK_FALSE(partial_event.is_complete()); + CHECK(event.ends_with(partial_event)); + CHECK_FALSE(subrun.ends_with(partial_event)); + + CHECK(subrun.to_string() == "/job/run/subrun"); + + auto event_hashes = event.hashes(); + CHECK(event_hashes.contains(job.hash())); + CHECK(event_hashes.contains(run.hash())); + CHECK(event_hashes.contains(subrun.hash())); + CHECK(event_hashes.contains(event.hash())); + CHECK_FALSE(event_hashes.contains(lumiblock.hash())); + + // Validation + CHECK_THROWS(layer_path("")); + CHECK_THROWS(layer_path("/notajob/notarun")); + CHECK_THROWS(layer_path(std::vector{})); + CHECK_THROWS(layer_path("/job/run/job")); + CHECK_THROWS(layer_path("subrun/job")); +} From 4c4741b7cd11e957324ff1d755338946880407f4 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Thu, 18 Jun 2026 15:56:36 -0700 Subject: [PATCH 20/41] move adding of properties to the end to ensure test is defined first --- test/python/CMakeLists.txt | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/test/python/CMakeLists.txt b/test/python/CMakeLists.txt index c869ddb0d..769f06826 100644 --- a/test/python/CMakeLists.txt +++ b/test/python/CMakeLists.txt @@ -85,19 +85,6 @@ else() endif() endif() -if(HAS_CPPYY) - # Export the location of phlex include headers - if(DEFINED ENV{PHLEX_INSTALL}) - set(PYTHON_TEST_PHLEX_INSTALL $ENV{PHLEX_INSTALL}) - else() - set(PYTHON_TEST_PHLEX_INSTALL ${CMAKE_SOURCE_DIR}) - endif() - - set_property(TEST py:phlex PROPERTY ENVIRONMENT "PHLEX_INSTALL=${PYTHON_TEST_PHLEX_INSTALL}") - - set(PYTHON_TEST_FILES ${PYTHON_TEST_FILES} test_phlex.py) -endif() - set(ACTIVE_PY_CPHLEX_TESTS "") # numpy support if installed @@ -299,3 +286,17 @@ endif() # tests of the python support modules add_test(NAME py:phlex COMMAND ${PYTEST_COMMAND} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) set_tests_properties(py:phlex PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}") + +# additional environment settings if cppyy is available (set after all tests defined) +if(HAS_CPPYY) + # Export the location of phlex include headers + if(DEFINED ENV{PHLEX_INSTALL}) + set(PYTHON_TEST_PHLEX_INSTALL $ENV{PHLEX_INSTALL}) + else() + set(PYTHON_TEST_PHLEX_INSTALL ${CMAKE_SOURCE_DIR}) + endif() + + set_property(TEST py:phlex PROPERTY ENVIRONMENT "PHLEX_INSTALL=${PYTHON_TEST_PHLEX_INSTALL}") + + set(PYTHON_TEST_FILES ${PYTHON_TEST_FILES} test_phlex.py) +endif() From 90cc0ed330397b98ccc5296fc17f144d2b4ff227 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Thu, 18 Jun 2026 15:57:13 -0700 Subject: [PATCH 21/41] add support for Numba-jited observers --- plugins/python/python/phlex/_typing.py | 1 + plugins/python/src/dyncall.cpp | 2 ++ plugins/python/src/modulewrap.cpp | 30 ++++++++++++++++++++++---- test/python/jited.py | 8 ++++--- 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/plugins/python/python/phlex/_typing.py b/plugins/python/python/phlex/_typing.py index e25b43618..a8934fec4 100644 --- a/plugins/python/python/phlex/_typing.py +++ b/plugins/python/python/phlex/_typing.py @@ -61,6 +61,7 @@ nb_types.Float: "float", nb_types.float32: "float", nb_types.double: "double", + nb_types.void: "None", }) # ctypes types that don't map cleanly to intN_t / uintN_t diff --git a/plugins/python/src/dyncall.cpp b/plugins/python/src/dyncall.cpp index c7ca48565..da7101268 100644 --- a/plugins/python/src/dyncall.cpp +++ b/plugins/python/src/dyncall.cpp @@ -29,6 +29,8 @@ phlex::experimental::dcarg phlex::experimental::dcarg::from_str(std::string cons return dcarg(0.0f); else if (stype == "double") return dcarg(0.0); + else if (stype == "void") + return dcarg{}; throw std::invalid_argument("unknown type string: " + stype); } diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 309a36ceb..ce96c77e0 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -1105,13 +1105,26 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds if (!callable) return nullptr; // error already set + // detect numba and extract C function pointer if any, else use default Python + // callable dispatcher + void* ccallf = nullptr; + if (is_numba_cfunc(callable)) { + PyObject* pyaddr = PyObject_GetAttrString(callable, "address"); + if (pyaddr) + ccallf = PyLong_AsVoidPtr(pyaddr); + if (!ccallf) + PyErr_Clear(); + } + if (!output_types.empty()) { - PyErr_Format(PyExc_TypeError, "an observer should not have an output type"); + PyErr_Format(PyExc_TypeError, + "an observer should not have an output type (got: \"%s\")", + output_types[0].c_str()); Py_DECREF(callable); return nullptr; } - if (!insert_input_converters(mod, cname, input_selectors, input_types, true)) { + if (!insert_input_converters(mod, cname, input_selectors, input_types, !ccallf)) { Py_DECREF(callable); return nullptr; // error already set } @@ -1130,8 +1143,17 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds .creator = identifier(c), .layer = pq.layer, .suffix = identifier(suff)}; }; - mod->ph_module->observe(cname, py_callback{callable}, nconcur) - .input_family(make_product_selector(Is)...); + auto insert_observe_for_callback = [&](auto& cb) { + mod->ph_module->observe(cname, cb, nconcur).input_family(make_product_selector(Is)...); + }; + + if (ccallf) { + jit_callback cb{callable, ccallf, "void"}; + insert_observe_for_callback(cb); + } else { + py_callback cb{callable}; + insert_observe_for_callback(cb); + } }; if (!unroll_switch(input_selectors.size(), observe_N_args)) { diff --git a/test/python/jited.py b/test/python/jited.py index 875a270c5..baefafeb4 100644 --- a/test/python/jited.py +++ b/test/python/jited.py @@ -55,7 +55,9 @@ def o(y): output_product_suffixes=["sum_"+tn], concurrency=4) - o = Variant(new_o(res), {"y": t, "return": None}, "observe_" + tn) - m.observe(o, - input_family=[{"creator": "add_" + tn, "layer": "event", "suffix": "sum_"+tn}]) + f_o = nb_dec.cfunc(f"void({tn})", nogil=True, nopython=True, cache=True)(new_o(res)) + m.observe(f_o, + name="obs_"+tn, + input_family=[{"creator": "add_" + tn, "layer": "event", "suffix": "sum_"+tn}], + concurrency=4) From 7b919b9bc8d6e8e5cb2c6c236be48a58e4f9ea09 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Thu, 18 Jun 2026 16:58:20 -0700 Subject: [PATCH 22/41] properly NULL pointers in callback move operators --- plugins/python/src/modulewrap.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index ce96c77e0..d173d487f 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -135,7 +135,6 @@ namespace { py_callback_base& operator=(py_callback_base const& pc) { if (this != &pc) { - // Must hold GIL when manipulating reference counts PyGILRAII gil; Py_INCREF(pc.m_callable); Py_DECREF(m_callable); @@ -144,6 +143,24 @@ namespace { } return *this; } + py_callback_base(py_callback_base&& other) noexcept : + m_callable(other.m_callable), m_ccallback(other.m_ccallback) + { + other.m_callable = nullptr; + other.m_ccallback = nullptr; + } + py_callback_base& operator=(py_callback_base&& other) noexcept + { + if (this != &other) { + PyGILRAII gil; + Py_DECREF(m_callable); + m_callable = other.m_callable; + m_ccallback = other.m_ccallback; + other.m_callable = nullptr; + other.m_ccallback = nullptr; + } + return *this; + } virtual ~py_callback_base() { // TODO: cleanup deferred to Phlex shutdown hook @@ -151,8 +168,6 @@ namespace { // - TOCTOU race on Py_IsInitialized() without GIL // - Module offloading in interpreter cleanup phase 2 } - py_callback_base(py_callback_base&&) = default; - py_callback_base& operator=(py_callback_base&&) = default; }; // type repeater to automatically instantiate callbacks taking N args From 9bf1f5bc7098e877fc6053ff639af01008956e50 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 22 Jun 2026 11:09:30 -0700 Subject: [PATCH 23/41] address formatting issues --- test/python/jited.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/python/jited.py b/test/python/jited.py index baefafeb4..93acc3ee8 100644 --- a/test/python/jited.py +++ b/test/python/jited.py @@ -9,8 +9,6 @@ import numpy as np from adder import add -from phlex import Variant - # arg0 suff, arg1 suff, type, result specs = ( ("i", "j", np.int32, 1), @@ -27,7 +25,7 @@ def PHLEX_REGISTER_ALGORITHMS(m, config): Use the standard Phlex `transform` registration to insert a node in the execution graph of a Numba-jited Python function that receives two inputs - and produces their sum as an ouput. + and produces their sum as an output. Similarly, use the standard Phlex `observe` to add verifier nodes. @@ -58,6 +56,8 @@ def o(y): f_o = nb_dec.cfunc(f"void({tn})", nogil=True, nopython=True, cache=True)(new_o(res)) m.observe(f_o, name="obs_"+tn, - input_family=[{"creator": "add_" + tn, "layer": "event", "suffix": "sum_"+tn}], + input_family=[ + {"creator": "add_" + tn, "layer": "event", "suffix": "sum_"+tn} + ], concurrency=4) From 29ce5f3e331097ffcaa6922693cd17c7a953ac63 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 22 Jun 2026 15:11:45 -0700 Subject: [PATCH 24/41] gersemi format fix --- plugins/python/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/python/CMakeLists.txt b/plugins/python/CMakeLists.txt index f5203ed6f..0c3efee21 100644 --- a/plugins/python/CMakeLists.txt +++ b/plugins/python/CMakeLists.txt @@ -23,7 +23,10 @@ add_library( src/dyncall.cpp ) -target_link_libraries(pymodule PRIVATE phlex::module phlex::source PkgConfig::FFI Python::Python Python::NumPy) +target_link_libraries( + pymodule + PRIVATE phlex::module phlex::source PkgConfig::FFI Python::Python Python::NumPy +) target_compile_definitions(pymodule PRIVATE NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) install(TARGETS pymodule LIBRARY DESTINATION lib) From af07260135a46ecc91b8551bd66f8c834c70fd7f Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 22 Jun 2026 15:12:12 -0700 Subject: [PATCH 25/41] from coderabbit: spelling errors in comments and missing decrefs --- plugins/python/src/modulewrap.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index d173d487f..3202824b2 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -43,7 +43,7 @@ // can cause a performance bottleneck (since all require the GIL). // This is dumb, but for now, because all templates need to be instantiated, only -// support up to a fixed compile-timee maximum number of arguments. An alternative +// support up to a fixed compile-time maximum number of arguments. An alternative // would be to collect the arguments, but that currently suffers from needing a // "initial" to create the container to collect arguments into. This may all go // away once converter nodes have better support in phlex' core @@ -316,7 +316,7 @@ namespace { if (pq.has_value()) { cargs.push_back(pq.value()); } else { - // validate_selection will have set a python exception + // validate_selector will have set a python exception break; } } @@ -453,6 +453,7 @@ namespace { Py_XDECREF(args); Py_XDECREF(ret); + Py_DECREF(sig); } else { PyErr_Clear(); // the callable may be an instance with a __call__ method @@ -1018,8 +1019,10 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw void* ccallf = nullptr; if (is_numba_cfunc(callable)) { PyObject* pyaddr = PyObject_GetAttrString(callable, "address"); - if (pyaddr) + if (pyaddr) { ccallf = PyLong_AsVoidPtr(pyaddr); + Py_DECREF(pyaddr); + } if (!ccallf) PyErr_Clear(); } @@ -1125,8 +1128,10 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds void* ccallf = nullptr; if (is_numba_cfunc(callable)) { PyObject* pyaddr = PyObject_GetAttrString(callable, "address"); - if (pyaddr) + if (pyaddr) { ccallf = PyLong_AsVoidPtr(pyaddr); + Py_DECREF(pyaddr); + } if (!ccallf) PyErr_Clear(); } From 682960479a2f1fbe941f432b74c14c73cc600c3d Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Mon, 22 Jun 2026 15:12:35 -0700 Subject: [PATCH 26/41] from coderabbit: make get() const --- plugins/python/src/dyncall.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/python/src/dyncall.hpp b/plugins/python/src/dyncall.hpp index d3785f031..a38db3ef7 100644 --- a/plugins/python/src/dyncall.hpp +++ b/plugins/python/src/dyncall.hpp @@ -75,7 +75,7 @@ namespace phlex::experimental { // value access to payload template - T get() + T get() const { return std::get(m_value); } @@ -83,7 +83,7 @@ namespace phlex::experimental { // specialization to simplify a very common case template <> - inline PyObject* dcarg::get() + inline PyObject* dcarg::get() const { return reinterpret_cast(std::get(m_value)); } From 025390cbc6a3c5780c94d2801c8b4e02090c8fd6 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Wed, 24 Jun 2026 08:56:35 -0700 Subject: [PATCH 27/41] make clang-tidy happy by adding some dead writes --- plugins/python/src/modulewrap.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 3202824b2..9666a0785 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -1007,7 +1007,7 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw std::string cname; std::vector input_selectors; std::vector input_types, output_suffixes, output_types; - concurrency nconcur; + concurrency nconcur = (concurrency)-1; PyObject* callable = parse_args( args, kwds, cname, input_selectors, input_types, output_suffixes, output_types, nconcur); @@ -1116,7 +1116,7 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds std::string cname; std::vector input_selectors; std::vector input_types, output_suffixes, output_types; - concurrency nconcur; + concurrency nconcur = (concurrency)-1; PyObject* callable = parse_args( args, kwds, cname, input_selectors, input_types, output_suffixes, output_types, nconcur); From 51de0fe720387a703e877e7cb428e564c98c7260 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 12:00:28 -0700 Subject: [PATCH 28/41] use cfunc from the top-level module instead of the decorators one --- test/python/jited.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/python/jited.py b/test/python/jited.py index 93acc3ee8..436bda744 100644 --- a/test/python/jited.py +++ b/test/python/jited.py @@ -5,7 +5,7 @@ observers for verification. """ -import numba.core.decorators as nb_dec +import numba import numpy as np from adder import add @@ -45,7 +45,7 @@ def o(y): for arg0, arg1, t, res in specs: tn = t.__name__ - f_a = nb_dec.cfunc(f"{tn}({tn}, {tn})", nogil=True, nopython=True, cache=True)(add) + f_a = numba.cfunc(f"{tn}({tn}, {tn})", nogil=True, nopython=True, cache=True)(add) m.transform(f_a, name="add_"+tn, input_family=[{"creator": "input", "layer": "event", "suffix": arg0}, @@ -53,7 +53,7 @@ def o(y): output_product_suffixes=["sum_"+tn], concurrency=4) - f_o = nb_dec.cfunc(f"void({tn})", nogil=True, nopython=True, cache=True)(new_o(res)) + f_o = numba.cfunc(f"void({tn})", nogil=True, nopython=True, cache=True)(new_o(res)) m.observe(f_o, name="obs_"+tn, input_family=[ From de38075128be29f0443b0807eee5b78ce7cbfcea Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 12:06:41 -0700 Subject: [PATCH 29/41] Revert "move adding of properties to the end to ensure test is defined first" This reverts commit 4c4741b7cd11e957324ff1d755338946880407f4. --- test/python/CMakeLists.txt | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/test/python/CMakeLists.txt b/test/python/CMakeLists.txt index 769f06826..c869ddb0d 100644 --- a/test/python/CMakeLists.txt +++ b/test/python/CMakeLists.txt @@ -85,6 +85,19 @@ else() endif() endif() +if(HAS_CPPYY) + # Export the location of phlex include headers + if(DEFINED ENV{PHLEX_INSTALL}) + set(PYTHON_TEST_PHLEX_INSTALL $ENV{PHLEX_INSTALL}) + else() + set(PYTHON_TEST_PHLEX_INSTALL ${CMAKE_SOURCE_DIR}) + endif() + + set_property(TEST py:phlex PROPERTY ENVIRONMENT "PHLEX_INSTALL=${PYTHON_TEST_PHLEX_INSTALL}") + + set(PYTHON_TEST_FILES ${PYTHON_TEST_FILES} test_phlex.py) +endif() + set(ACTIVE_PY_CPHLEX_TESTS "") # numpy support if installed @@ -286,17 +299,3 @@ endif() # tests of the python support modules add_test(NAME py:phlex COMMAND ${PYTEST_COMMAND} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) set_tests_properties(py:phlex PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}") - -# additional environment settings if cppyy is available (set after all tests defined) -if(HAS_CPPYY) - # Export the location of phlex include headers - if(DEFINED ENV{PHLEX_INSTALL}) - set(PYTHON_TEST_PHLEX_INSTALL $ENV{PHLEX_INSTALL}) - else() - set(PYTHON_TEST_PHLEX_INSTALL ${CMAKE_SOURCE_DIR}) - endif() - - set_property(TEST py:phlex PROPERTY ENVIRONMENT "PHLEX_INSTALL=${PYTHON_TEST_PHLEX_INSTALL}") - - set(PYTHON_TEST_FILES ${PYTHON_TEST_FILES} test_phlex.py) -endif() From c6c0121899fa7e086326a101efacd4e07b3b3d6a Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 12:15:00 -0700 Subject: [PATCH 30/41] move setting of py:phlex properties until after it has been created --- test/python/CMakeLists.txt | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/test/python/CMakeLists.txt b/test/python/CMakeLists.txt index c869ddb0d..651ff4cab 100644 --- a/test/python/CMakeLists.txt +++ b/test/python/CMakeLists.txt @@ -86,15 +86,6 @@ else() endif() if(HAS_CPPYY) - # Export the location of phlex include headers - if(DEFINED ENV{PHLEX_INSTALL}) - set(PYTHON_TEST_PHLEX_INSTALL $ENV{PHLEX_INSTALL}) - else() - set(PYTHON_TEST_PHLEX_INSTALL ${CMAKE_SOURCE_DIR}) - endif() - - set_property(TEST py:phlex PROPERTY ENVIRONMENT "PHLEX_INSTALL=${PYTHON_TEST_PHLEX_INSTALL}") - set(PYTHON_TEST_FILES ${PYTHON_TEST_FILES} test_phlex.py) endif() @@ -299,3 +290,14 @@ endif() # tests of the python support modules add_test(NAME py:phlex COMMAND ${PYTEST_COMMAND} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) set_tests_properties(py:phlex PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}") + +if(HAS_CPPYY) + # Export the location of phlex include headers + if(DEFINED ENV{PHLEX_INSTALL}) + set(PYTHON_TEST_PHLEX_INSTALL $ENV{PHLEX_INSTALL}) + else() + set(PYTHON_TEST_PHLEX_INSTALL ${CMAKE_SOURCE_DIR}) + endif() + + set_property(TEST py:phlex PROPERTY ENVIRONMENT "PHLEX_INSTALL=${PYTHON_TEST_PHLEX_INSTALL}") +endif() From 701fb794b3df7271c1bdc99f1e979a4c22652702 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 12:28:36 -0700 Subject: [PATCH 31/41] check for missing output suffixes on transform and report a proper error if it does --- plugins/python/src/modulewrap.cpp | 6 ++++++ test/python/adder.py | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 9666a0785..0c022c641 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -1033,6 +1033,12 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw return nullptr; } + if (output_suffixes.empty()) { + PyErr_Format(PyExc_TypeError, "transform %s should have an output suffix", cname.c_str()); + Py_DECREF(callable); + return nullptr; + } + // TODO: it's not clear what the output layer will be if the input layers are not // all the same, so for now, simply raise an error if their is any ambiguity auto output_layer = static_cast(input_selectors[0].layer); diff --git a/test/python/adder.py b/test/python/adder.py index 95920696e..1dc0a3ff0 100644 --- a/test/python/adder.py +++ b/test/python/adder.py @@ -55,4 +55,12 @@ def PHLEX_REGISTER_ALGORITHMS(m, config): None """ int_adder = Variant(add, {"i": int, "j": int, "return": int}, "iadd") + + try: + # intentional failure to check error path of missing output suffix + m.transform(int_adder, input_family=config["input"]) + except TypeError as e: + assert "should have an output suffix" in str(e) + + # functional transform registration m.transform(int_adder, input_family=config["input"], output_product_suffixes=config["output"]) From 89b1951a9257000b0d4b1c3babf96f9c00ba1a5b Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 12:40:57 -0700 Subject: [PATCH 32/41] pass transform concurrency on to the converter nodes --- plugins/python/src/modulewrap.cpp | 70 ++++++++++++++++--------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 0c022c641..bd54c0367 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -735,10 +735,10 @@ namespace { std::string const& name, R (*converter)(Args...), product_selector pq_in, - std::string const& output) + std::string const& output, + concurrency nconcur) { - mod->ph_module - ->transform(name, converter, (concurrency)16) //concurrency::serial) // TODO! + mod->ph_module->transform(name, converter, nconcur) .input_family(pq_in) .output_product_suffixes(output); } @@ -866,7 +866,8 @@ static bool insert_input_converters(py_phlex_module* mod, std::string const& cname, // TODO: shared_ptr std::vector const& input_selectors, std::vector const& input_types, - bool ispy) + bool ispy, + concurrency nc) { // insert input converter nodes into the graph for (auto const [i, inp_pq, inp_type] : @@ -879,19 +880,19 @@ static bool insert_input_converters(py_phlex_module* mod, "py_" + (inp_pq.suffix ? std::string{static_cast(*inp_pq.suffix)} : ""); if (inp_type == "bool") - insert_converter(mod, pyname, ispy ? bool_to_py : bool_to_dcarg, inp_pq, output); + insert_converter(mod, pyname, ispy ? bool_to_py : bool_to_dcarg, inp_pq, output, nc); else if (inp_type == "int32_t") - insert_converter(mod, pyname, ispy ? int_to_py : int_to_dcarg, inp_pq, output); + insert_converter(mod, pyname, ispy ? int_to_py : int_to_dcarg, inp_pq, output, nc); else if (inp_type == "uint32_t") - insert_converter(mod, pyname, ispy ? uint_to_py : uint_to_dcarg, inp_pq, output); + insert_converter(mod, pyname, ispy ? uint_to_py : uint_to_dcarg, inp_pq, output, nc); else if (inp_type == "int64_t") - insert_converter(mod, pyname, ispy ? long_to_py : long_to_dcarg, inp_pq, output); + insert_converter(mod, pyname, ispy ? long_to_py : long_to_dcarg, inp_pq, output, nc); else if (inp_type == "uint64_t") - insert_converter(mod, pyname, ispy ? ulong_to_py : ulong_to_dcarg, inp_pq, output); + insert_converter(mod, pyname, ispy ? ulong_to_py : ulong_to_dcarg, inp_pq, output, nc); else if (inp_type == "float") - insert_converter(mod, pyname, ispy ? float_to_py : float_to_dcarg, inp_pq, output); + insert_converter(mod, pyname, ispy ? float_to_py : float_to_dcarg, inp_pq, output, nc); else if (inp_type == "double") - insert_converter(mod, pyname, ispy ? double_to_py : double_to_dcarg, inp_pq, output); + insert_converter(mod, pyname, ispy ? double_to_py : double_to_dcarg, inp_pq, output, nc); else if (inp_type.compare(0, 7, "ndarray") == 0 || inp_type.compare(0, 4, "list") == 0) { // TODO: these are hard-coded std::vector <-> numpy array mappings, which is // way too simplistic for real use. It only exists for demonstration purposes, @@ -902,17 +903,17 @@ static bool insert_input_converters(py_phlex_module* mod, return false; } if (*dtype == "[int32_t]") { - insert_converter(mod, pyname, vint_to_py, inp_pq, output); + insert_converter(mod, pyname, vint_to_py, inp_pq, output, nc); } else if (*dtype == "[uint32_t]") { - insert_converter(mod, pyname, vuint_to_py, inp_pq, output); + insert_converter(mod, pyname, vuint_to_py, inp_pq, output, nc); } else if (*dtype == "[int64_t]") { - insert_converter(mod, pyname, vlong_to_py, inp_pq, output); + insert_converter(mod, pyname, vlong_to_py, inp_pq, output, nc); } else if (*dtype == "[uint64_t]") { - insert_converter(mod, pyname, vulong_to_py, inp_pq, output); + insert_converter(mod, pyname, vulong_to_py, inp_pq, output, nc); } else if (*dtype == "[float]") { - insert_converter(mod, pyname, vfloat_to_py, inp_pq, output); + insert_converter(mod, pyname, vfloat_to_py, inp_pq, output, nc); } else if (*dtype == "[double]") { - insert_converter(mod, pyname, vdouble_to_py, inp_pq, output); + insert_converter(mod, pyname, vdouble_to_py, inp_pq, output, nc); } else { PyErr_Format(PyExc_TypeError, "unsupported collection input type \"%s\"", inp_type.c_str()); return false; @@ -931,23 +932,24 @@ static bool insert_output_converter(py_phlex_module* mod, product_selector const& out_pq, std::string const& out_type, std::string const& output, - bool ispy) + bool ispy, + concurrency nc) { // insert output converter node into the graph if (out_type == "bool") - insert_converter(mod, cname, ispy ? py_to_bool : dcarg_to_bool, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_bool : dcarg_to_bool, out_pq, output, nc); else if (out_type == "int32_t") - insert_converter(mod, cname, ispy ? py_to_int : dcarg_to_int, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_int : dcarg_to_int, out_pq, output, nc); else if (out_type == "uint32_t") - insert_converter(mod, cname, ispy ? py_to_uint : dcarg_to_uint, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_uint : dcarg_to_uint, out_pq, output, nc); else if (out_type == "int64_t") - insert_converter(mod, cname, ispy ? py_to_long : dcarg_to_long, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_long : dcarg_to_long, out_pq, output, nc); else if (out_type == "uint64_t") - insert_converter(mod, cname, ispy ? py_to_ulong : dcarg_to_ulong, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_ulong : dcarg_to_ulong, out_pq, output, nc); else if (out_type == "float") - insert_converter(mod, cname, ispy ? py_to_float : dcarg_to_float, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_float : dcarg_to_float, out_pq, output, nc); else if (out_type == "double") - insert_converter(mod, cname, ispy ? py_to_double : dcarg_to_double, out_pq, output); + insert_converter(mod, cname, ispy ? py_to_double : dcarg_to_double, out_pq, output, nc); else if (out_type.compare(0, 7, "ndarray") == 0 || out_type.compare(0, 4, "list") == 0) { // TODO: just like for input types, these are hard-coded, but should be handled by // an IDL instead. @@ -957,17 +959,17 @@ static bool insert_output_converter(py_phlex_module* mod, return false; } if (*dtype == "[int32_t]") { - insert_converter(mod, cname, py_to_vint, out_pq, output); + insert_converter(mod, cname, py_to_vint, out_pq, output, nc); } else if (*dtype == "[uint32_t]") { - insert_converter(mod, cname, py_to_vuint, out_pq, output); + insert_converter(mod, cname, py_to_vuint, out_pq, output, nc); } else if (*dtype == "[int64_t]") { - insert_converter(mod, cname, py_to_vlong, out_pq, output); + insert_converter(mod, cname, py_to_vlong, out_pq, output, nc); } else if (*dtype == "[uint64_t]") { - insert_converter(mod, cname, py_to_vulong, out_pq, output); + insert_converter(mod, cname, py_to_vulong, out_pq, output, nc); } else if (*dtype == "[float]") { - insert_converter(mod, cname, py_to_vfloat, out_pq, output); + insert_converter(mod, cname, py_to_vfloat, out_pq, output, nc); } else if (*dtype == "[double]") { - insert_converter(mod, cname, py_to_vdouble, out_pq, output); + insert_converter(mod, cname, py_to_vdouble, out_pq, output, nc); } else { PyErr_Format(PyExc_TypeError, "unsupported collection output type \"%s\"", out_type.c_str()); return false; @@ -1052,7 +1054,7 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw } } - if (!insert_input_converters(mod, cname, input_selectors, input_types, !ccallf)) { + if (!insert_input_converters(mod, cname, input_selectors, input_types, !ccallf, nconcur)) { Py_DECREF(callable); return nullptr; // error already set } @@ -1105,7 +1107,7 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw .layer = identifier(output_layer), .suffix = identifier(pyoutput)}; std::string const& output = output_suffixes[0]; - if (!insert_output_converter(mod, cname, out_pq, out_type, output, !ccallf)) { + if (!insert_output_converter(mod, cname, out_pq, out_type, output, !ccallf, nconcur)) { Py_DECREF(callable); return nullptr; // error already set } @@ -1150,7 +1152,7 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds return nullptr; } - if (!insert_input_converters(mod, cname, input_selectors, input_types, !ccallf)) { + if (!insert_input_converters(mod, cname, input_selectors, input_types, !ccallf, nconcur)) { Py_DECREF(callable); return nullptr; // error already set } From cafb828b108cd097d8486dce802f038c79fb116d Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 12:52:42 -0700 Subject: [PATCH 33/41] verify conversion result in python providers --- plugins/python/src/modulewrap.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index bd54c0367..7c5b59573 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -605,6 +605,11 @@ namespace { dcarg res = this->py_callback::operator()(dcarg{arg0}); /* decrefs arg0 */ \ PyObject* pyres = res.get(); \ cpptype cres = frompy(pyres); \ + std::string msg; \ + if (msg_from_py_error(msg, true)) { \ + Py_DECREF(pyres); \ + throw std::runtime_error("Python provider conversion error for type " #name ": " + msg); \ + } \ Py_DECREF(pyres); \ return cres; \ } \ From fe5d983d6e6aeb2a7291357819c811762cb0dddf Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 12:58:12 -0700 Subject: [PATCH 34/41] delete py_callback_base move constructor/assignment --- plugins/python/src/modulewrap.cpp | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 7c5b59573..f5a6af014 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -143,24 +143,8 @@ namespace { } return *this; } - py_callback_base(py_callback_base&& other) noexcept : - m_callable(other.m_callable), m_ccallback(other.m_ccallback) - { - other.m_callable = nullptr; - other.m_ccallback = nullptr; - } - py_callback_base& operator=(py_callback_base&& other) noexcept - { - if (this != &other) { - PyGILRAII gil; - Py_DECREF(m_callable); - m_callable = other.m_callable; - m_ccallback = other.m_ccallback; - other.m_callable = nullptr; - other.m_ccallback = nullptr; - } - return *this; - } + py_callback_base(py_callback_base&& other) = delete; + py_callback_base& operator=(py_callback_base&& other) = delete; virtual ~py_callback_base() { // TODO: cleanup deferred to Phlex shutdown hook From 224eefda73e3dadaa43972b9a9b6dbdca21e0293 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 13:05:01 -0700 Subject: [PATCH 35/41] coding conventions --- plugins/python/src/dyncall.cpp | 1 - plugins/python/src/dyncall.hpp | 31 ++++++++++++++++--------------- plugins/python/src/modulewrap.cpp | 6 +++--- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/plugins/python/src/dyncall.cpp b/plugins/python/src/dyncall.cpp index da7101268..cb8e580a8 100644 --- a/plugins/python/src/dyncall.cpp +++ b/plugins/python/src/dyncall.cpp @@ -6,7 +6,6 @@ #include "dyncall.hpp" #include -#include #include diff --git a/plugins/python/src/dyncall.hpp b/plugins/python/src/dyncall.hpp index a38db3ef7..ce4fdf43a 100644 --- a/plugins/python/src/dyncall.hpp +++ b/plugins/python/src/dyncall.hpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -36,21 +37,21 @@ typedef std::uint64_t ph_ulong_t; namespace phlex::experimental { struct dcarg { - using FFIType = std::variant; - - FFIType m_value; + using ffi_type = std::variant; + + ffi_type m_value; // convenience mapper of human-readable string to dcarg static dcarg from_str(std::string const& stype); diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index f5a6af014..3c7f369cf 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -47,7 +47,7 @@ // would be to collect the arguments, but that currently suffers from needing a // "initial" to create the container to collect arguments into. This may all go // away once converter nodes have better support in phlex' core -constexpr size_t MAX_SUPPORTED_ARGS = 3; +constexpr size_t max_supported_args = 3; using namespace phlex::experimental; using namespace phlex; @@ -1085,7 +1085,7 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw } }; - if (!unroll_switch(input_selectors.size(), transform_N_args)) { + if (!unroll_switch(input_selectors.size(), transform_N_args)) { PyErr_SetString(PyExc_TypeError, "unsupported number of inputs"); Py_DECREF(callable); return nullptr; @@ -1173,7 +1173,7 @@ static PyObject* md_observe(py_phlex_module* mod, PyObject* args, PyObject* kwds } }; - if (!unroll_switch(input_selectors.size(), observe_N_args)) { + if (!unroll_switch(input_selectors.size(), observe_N_args)) { PyErr_SetString(PyExc_TypeError, "unsupported number of inputs"); Py_DECREF(callable); return nullptr; From 73f52ecd843709ba8815dd1baf03bda7bedbf918 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 13:09:16 -0700 Subject: [PATCH 36/41] fix shadowing of ffi_type --- plugins/python/src/dyncall.hpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/plugins/python/src/dyncall.hpp b/plugins/python/src/dyncall.hpp index ce4fdf43a..0ec6f119b 100644 --- a/plugins/python/src/dyncall.hpp +++ b/plugins/python/src/dyncall.hpp @@ -37,21 +37,21 @@ typedef std::uint64_t ph_ulong_t; namespace phlex::experimental { struct dcarg { - using ffi_type = std::variant; - - ffi_type m_value; + using ffi_variant_type = std::variant; + + ffi_variant_type m_value; // convenience mapper of human-readable string to dcarg static dcarg from_str(std::string const& stype); From 1e008e37d2132b47b4510b127dab9b31427ffa72 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 13:10:46 -0700 Subject: [PATCH 37/41] coding convention --- plugins/python/src/dyncall.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/plugins/python/src/dyncall.cpp b/plugins/python/src/dyncall.cpp index cb8e580a8..894f2d97d 100644 --- a/plugins/python/src/dyncall.cpp +++ b/plugins/python/src/dyncall.cpp @@ -102,12 +102,12 @@ void phlex::experimental::dyncall(void* fn, dcarg& result, dcargs_t& args, int v // because libffi is, and that yields a plethora of warnings from clang-tidy, // none of which warrant actual changes. // NOLINTBEGIN - std::size_t N = (std::size_t)args.size(); + std::size_t nargs = (std::size_t)args.size(); - auto t = std::make_unique(N); - auto p = std::make_unique(N); + auto t = std::make_unique(nargs); + auto p = std::make_unique(nargs); - for (dcargs_t::size_type i = 0; i < N; ++i) { + for (dcargs_t::size_type i = 0; i < nargs; ++i) { auto& a = args[i]; t[i] = get_ffi_type(a); p[i] = a.value_ptr(); @@ -116,9 +116,10 @@ void phlex::experimental::dyncall(void* fn, dcarg& result, dcargs_t& args, int v ffi_cif cif; ffi_status status; if (0 < var_offset) - status = ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, var_offset, N, get_ffi_type(result), t.get()); + status = + ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, var_offset, nargs, get_ffi_type(result), t.get()); else - status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, N, get_ffi_type(result), t.get()); + status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, nargs, get_ffi_type(result), t.get()); if (status) throw std::runtime_error("ffi prep failed"); From 5429b2df0f0ea18690a4fe23b4fd3df394ff75ee Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 13:14:30 -0700 Subject: [PATCH 38/41] add missing decref --- plugins/python/src/modulewrap.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 3c7f369cf..44c296f06 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -305,6 +305,8 @@ namespace { } } + Py_DECREF(coll); + if (PyErr_Occurred()) cargs.clear(); // error handled through Python From d61f52d24803ac21f28dc9c744610b7707b15592 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 13:51:34 -0700 Subject: [PATCH 39/41] improve reporting of conversion errors --- plugins/python/src/modulewrap.cpp | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index 44c296f06..ebce243b1 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -418,10 +418,13 @@ namespace { { bool conversion_ok = false; + // TODO: move to PyObject_GetOptionalAttr and remove PyErr_Clear() in + // the series of calls below, once we upgrade to py3.13 PyObject* sann = PyUnicode_FromString("__annotations__"); PyObject* annot = PyObject_GetAttr(callable, sann); if (!annot) { // the callable may be a Numba CFunc and have a declared signature + PyErr_Clear(); PyObject* sig = PyObject_GetAttrString(callable, "_sig"); if (sig) { PyObject* ret = PyObject_GetAttrString(sig, "return_type"); @@ -614,8 +617,10 @@ namespace { { \ PyGILRAII gil; \ \ - if (!v) \ - return dcarg{nullptr}; \ + if (!v) { \ + Py_INCREF(Py_None); \ + return dcarg{Py_None}; \ + } \ \ /* use a numpy view with the shared pointer tied up in a lifeline object (note: this */ \ /* is just a demonstrator; alternatives are still being considered) */ \ @@ -627,8 +632,10 @@ namespace { (v->data()) /* raw buffer */ \ ); \ \ - if (!np_view) \ + if (!np_view) { \ + std::runtime_error("failed to allocate numpy view object"); \ return dcarg{nullptr}; \ + } \ \ /* make the data read-only by not making it writable */ \ PyArray_CLEARFLAGS(reinterpret_cast(np_view), NPY_ARRAY_WRITEABLE); \ @@ -640,6 +647,7 @@ namespace { PhlexLifeline_Type.tp_new(&PhlexLifeline_Type, nullptr, nullptr)); \ if (!pyll) { \ Py_DECREF(np_view); \ + std::runtime_error("failed to allocate lifeline object"); \ return dcarg{nullptr}; \ } \ pyll->m_source = v; \ @@ -683,11 +691,13 @@ namespace { vec->reserve(total); \ for (Py_ssize_t i = 0; i < total; ++i) { \ PyObject* item = PyList_GetItem(pyobj, i); \ - vec->push_back(static_cast(frompy(item))); \ - if (PyErr_Occurred()) { \ - PyErr_Clear(); \ - break; \ + cpptype value = static_cast(frompy(item)); \ + std::string msg; \ + if (msg_from_py_error(msg, true)) { \ + Py_DECREF(pyobj); \ + throw std::runtime_error("List conversion error for type " #name ": " + msg); \ } \ + vec->push_back(value); \ } \ } else { \ std::string msg; \ From 356718d22bc6584d960c1fd0ae6423274acb28e7 Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 13:55:55 -0700 Subject: [PATCH 40/41] temporary "fix" to make the AI happy; to be removed after release --- plugins/python/src/modulewrap.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/plugins/python/src/modulewrap.cpp b/plugins/python/src/modulewrap.cpp index ebce243b1..9bc8e457a 100644 --- a/plugins/python/src/modulewrap.cpp +++ b/plugins/python/src/modulewrap.cpp @@ -1069,6 +1069,35 @@ static PyObject* md_transform(py_phlex_module* mod, PyObject* args, PyObject* kw std::string pyoutput = output_suffixes[0] + "_py"; std::string const& out_type = output_types[0]; + // TODO: the following makes the AI happy, but should be removed shortly once + // vector support is added for Numba (WIP; release is cut first) + // LCOV_EXCL_START + auto is_collection_type = [](std::string const& type) { + return type.compare(0, 7, "ndarray") == 0 || type.compare(0, 4, "list") == 0; + }; + if (ccallf) { + for (auto const& input_type : input_types) { + if (is_collection_type(input_type)) { + PyErr_Format(PyExc_TypeError, + "Numba transform %s has unsupported collection input type \"%s\"", + cname.c_str(), + input_type.c_str()); + Py_DECREF(callable); + return nullptr; + } + } + if (is_collection_type(out_type)) { + PyErr_Format(PyExc_TypeError, + "Numba transform %s has unsupported collection output type \"%s\"", + cname.c_str(), + out_type.c_str()); + Py_DECREF(callable); + return nullptr; + } + } + // LCOV_EXCL_STOP + // end TODO + auto transform_N_args = [&](std::index_sequence) { constexpr size_t N = sizeof...(Is); From a0fb46852639f237bcccd3ff800d11cdb1db560f Mon Sep 17 00:00:00 2001 From: Wim Lavrijsen Date: Fri, 26 Jun 2026 13:57:17 -0700 Subject: [PATCH 41/41] guarantee that a path that is supposed to fail actually does --- test/python/adder.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/python/adder.py b/test/python/adder.py index 1dc0a3ff0..4205a5736 100644 --- a/test/python/adder.py +++ b/test/python/adder.py @@ -61,6 +61,10 @@ def PHLEX_REGISTER_ALGORITHMS(m, config): m.transform(int_adder, input_family=config["input"]) except TypeError as e: assert "should have an output suffix" in str(e) + else: + raise AssertionError( + "m.transform() should reject registrations without an output suffix" + ) # functional transform registration m.transform(int_adder, input_family=config["input"], output_product_suffixes=config["output"])