address review: shared rng dance in linker, xor-fold 128-bit pcg64 seed

williambdean · williambdean · commit e6f7371d213c · 2026-03-31T16:54:13.000-04:00
diff --git a/pytensor/link/mlx/dispatch/random.py b/pytensor/link/mlx/dispatch/random.py
@@ -8,18 +8,18 @@
 from pytensor.link.mlx.dispatch.core import convert_dtype_to_mlx, mlx_to_list_shape
 
 
-def _truncate_pcg64_state_to_uint64(rng: Generator) -> int:
-    return int(rng.bit_generator.state["state"]["state"]) & 0xFFFFFFFFFFFFFFFF
-
-
 def numpy_generator_to_mlx_key(rng: Generator) -> mx.array:
     """Convert a NumPy Generator to an MLX random key.
 
     MLX uses a functional RNG model where each random call takes an explicit
-    key rather than mutating shared state. This extracts the lower 64 bits of
-    the PCG64 state integer as a seed for the MLX key.
+    key rather than mutating shared state. The PCG64 state is 128 bits, which
+    MLX cannot accept directly. We fold both 64-bit halves together via XOR
+    to use all 128 bits of entropy in a single 64-bit seed.
     """
-    return mx.random.key(_truncate_pcg64_state_to_uint64(rng))
+    state_128 = int(rng.bit_generator.state["state"]["state"])
+    upper = (state_128 >> 64) & 0xFFFFFFFFFFFFFFFF
+    lower = state_128 & 0xFFFFFFFFFFFFFFFF
+    return mx.random.key(upper ^ lower)
 
 
 @mlx_typify.register(Generator)
diff --git a/pytensor/link/mlx/linker.py b/pytensor/link/mlx/linker.py
@@ -1,3 +1,6 @@
+import warnings
+
+from pytensor.compile.sharedvalue import SharedVariable, shared
 from pytensor.link.basic import JITLinker
 
 
@@ -17,7 +20,7 @@ def __init__(self, use_compile=True, *args, **kwargs):
         self.gen_functors = []
         self.use_compile = use_compile
 
-    def fgraph_convert(self, fgraph, **kwargs):
+    def fgraph_convert(self, fgraph, input_storage, storage_map, **kwargs):
         """Convert a PyTensor FunctionGraph to an MLX-compatible function.
 
         Parameters
@@ -31,9 +34,63 @@ def fgraph_convert(self, fgraph, **kwargs):
             An MLX-compatible function
         """
         from pytensor.link.mlx.dispatch import mlx_funcify
+        from pytensor.tensor.random.type import RandomType
+
+        shared_rng_inputs = [
+            inp
+            for inp in fgraph.inputs
+            if (isinstance(inp, SharedVariable) and isinstance(inp.type, RandomType))
+        ]
+
+        # Replace any shared RNG inputs so that their values can be updated in place
+        # without affecting the original RNG container. This is necessary because
+        # MLX does not accept Generators as inputs, and they will have to
+        # be typified
+        if shared_rng_inputs:
+            warnings.warn(
+                f"The RandomType SharedVariables {shared_rng_inputs} will not be used "
+                f"in the compiled MLX graph. Instead a copy will be used.",
+                UserWarning,
+            )
+            new_shared_rng_inputs = [
+                shared(inp.get_value(borrow=False)) for inp in shared_rng_inputs
+            ]
+
+            fgraph.replace_all(
+                zip(shared_rng_inputs, new_shared_rng_inputs, strict=True),
+                import_missing=True,
+                reason="MLXLinker.fgraph_convert",
+            )
+
+            for old_inp, new_inp in zip(
+                shared_rng_inputs, new_shared_rng_inputs, strict=True
+            ):
+                new_inp_storage = [new_inp.get_value(borrow=True)]
+                storage_map[new_inp] = new_inp_storage
+                old_inp_storage = storage_map.pop(old_inp)
+                # Find index of old_inp_storage in input_storage
+                for input_storage_idx, input_storage_item in enumerate(input_storage):
+                    # We have to establish equality based on identity because input_storage may contain numpy arrays
+                    if input_storage_item is old_inp_storage:
+                        break
+                else:  # no break
+                    raise ValueError()
+                input_storage[input_storage_idx] = new_inp_storage
+                # We need to change the order of the inputs of the FunctionGraph
+                # so that the new input is in the same position as to old one,
+                # to align with the storage_map. We hope this is safe!
+                old_inp_fgraph_index = fgraph.inputs.index(old_inp)
+                fgraph.remove_input(
+                    old_inp_fgraph_index,
+                    reason="MLXLinker.fgraph_convert",
+                )
+                fgraph.inputs.remove(new_inp)
+                fgraph.inputs.insert(old_inp_fgraph_index, new_inp)
 
         return mlx_funcify(
             fgraph,
+            input_storage=input_storage,
+            storage_map=storage_map,
             **kwargs,
         )
 
diff --git a/tests/link/mlx/test_random.py b/tests/link/mlx/test_random.py
@@ -3,7 +3,9 @@
 
 import pytensor
 import pytensor.tensor as pt
+from pytensor.compile.function import function
 from pytensor.compile.mode import MLX, Mode
+from pytensor.compile.sharedvalue import shared
 from pytensor.link.mlx.linker import MLXLinker
 from pytensor.tensor.random.utils import RandomStream
 
@@ -173,3 +175,82 @@ def test_beta_not_implemented():
     rv = srng.beta(alpha=2.0, beta=5.0, size=(3,))
     with pytest.raises(NotImplementedError, match="No MLX implementation"):
         pytensor.function([], rv, mode="MLX", updates=srng.updates())
+
+
+def compile_shared_rng_function(*args, mode="MLX", **kwargs):
+    with pytest.warns(
+        UserWarning, match=r"The RandomType SharedVariables \[.+\] will not be used"
+    ):
+        return function(*args, mode=mode, **kwargs)
+
+
+def test_random_updates():
+    original_value = np.random.default_rng(seed=98)
+    rng = shared(original_value, name="original_rng", borrow=False)
+    next_rng, x = pt.random.normal(name="x", rng=rng).owner.outputs
+
+    f = compile_shared_rng_function([], [x], updates={rng: next_rng})
+    assert f() != f()
+
+    # Check that the original shared variable was not overwritten when typifying
+    assert all(
+        a == b if not isinstance(a, np.ndarray) else np.array_equal(a, b)
+        for a, b in zip(
+            rng.get_value().bit_generator.state,
+            original_value.bit_generator.state,
+            strict=True,
+        )
+    )
+
+
+@pytest.mark.parametrize("noise_first", (False, True))
+def test_replaced_shared_rng_storage_order(noise_first):
+    # Test that replacing the RNG variable in the linker does not cause
+    # a disalignment between the compiled graph and the storage_map.
+
+    mu = pytensor.shared(np.array(1.0), name="mu")
+    rng = pytensor.shared(np.random.default_rng(123))
+    next_rng, noise = pt.random.normal(rng=rng).owner.outputs
+
+    out = noise * mu if noise_first else mu * noise
+
+    updates = {
+        mu: pt.grad(out, mu),
+        rng: next_rng,
+    }
+    f = compile_shared_rng_function([], [out], updates=updates)
+
+    # Confirm that input_storage type and fgraph input order are aligned
+    for storage, fgraph_input in zip(
+        f.input_storage, f.maker.fgraph.inputs, strict=True
+    ):
+        assert storage.type == fgraph_input.type
+
+    assert mu.get_value() == 1
+    f()
+    assert mu.get_value() != 1
+
+
+def test_replaced_shared_rng_storage_ordering_equality():
+    """Test that storage identity comparison works when numpy arrays precede
+    the RNG in input_storage (regression test for issue #314)."""
+    pt_rng = RandomStream(1)
+
+    batchshape = (3, 1, 4, 4)
+    inp_shared = pytensor.shared(
+        np.zeros(batchshape, dtype="float64"), name="inp_shared"
+    )
+
+    inp = pt.tensor4(dtype="float64", name="inp")
+    inp_update = inp + pt_rng.normal(size=inp.shape, loc=5, scale=1e-5)
+
+    fn = compile_shared_rng_function(
+        inputs=[],
+        outputs=[],
+        updates={inp_shared: inp_update},
+        givens={inp: inp_shared},
+    )
+    fn()
+    np.testing.assert_allclose(np.array(inp_shared.get_value()), 5, rtol=1e-2)
+    fn()
+    np.testing.assert_allclose(np.array(inp_shared.get_value()), 10, rtol=1e-2)