Skip to content

Commit 4e6b534

Browse files
committed
fix and update
1 parent 1a9044d commit 4e6b534

File tree

7 files changed

+367
-85
lines changed

7 files changed

+367
-85
lines changed

qdp/qdp-python/pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,12 @@ benchmark = [
4949

5050
[tool.uv.sources]
5151
qumat = { path = "../..", editable = true }
52+
torch = { index = "pytorch" }
5253

54+
# CUDA 12.6 wheels to match driver (libnvJitLink 12_6); cu122 pulls libs that need 12_8 and fail.
5355
[[tool.uv.index]]
5456
name = "pytorch"
55-
url = "https://download.pytorch.org/whl/cu122"
57+
url = "https://download.pytorch.org/whl/cu126"
5658
explicit = true
5759

5860
# Invalidate uv cache when Rust or Cargo changes so extension is rebuilt (run_throughput_pipeline_py etc.).

qdp/qdp-python/qumat_qdp/loader.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@
3131

3232
from collections.abc import Iterator
3333
from functools import lru_cache
34-
from typing import TYPE_CHECKING, Any
34+
from typing import TYPE_CHECKING, Any, cast
3535

3636
import numpy as np
3737

3838
if TYPE_CHECKING:
3939
import _qdp
4040

41-
# Optional torch for as_torch()/as_numpy(); import at use site to avoid hard dependency.
41+
# Optional torch for as_torch(); as_numpy() uses QuantumTensor.to_numpy() (no torch needed).
4242
try:
4343
import torch as _torch
4444
except ImportError:
@@ -145,7 +145,7 @@ def as_torch(self, device: str = "cuda") -> QuantumDataLoader:
145145
return self
146146

147147
def as_numpy(self) -> QuantumDataLoader:
148-
"""Yield batches as NumPy arrays (CPU). Conversion is done inside the loader. Returns self."""
148+
"""Yield batches as NumPy float64 arrays (CPU). Uses QuantumTensor.to_numpy() — no PyTorch required. Returns self."""
149149
self._output_format = ("numpy",)
150150
return self
151151

@@ -371,7 +371,8 @@ def _wrap_iterator(self, raw_iter: Iterator[object]) -> Iterator[Any]:
371371
yield t.cpu() if device == "cpu" else t
372372
elif kind == "numpy":
373373
for qt in raw_iter:
374-
yield _torch.from_dlpack(qt).cpu().numpy()
374+
# Rust QuantumTensor has to_numpy(); raw_iter is Iterator[object]
375+
yield cast(Any, qt).to_numpy()
375376
else:
376377
yield from raw_iter
377378

qdp/qdp-python/src/engine.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,7 @@ impl QdpEngine {
722722
encoding_method,
723723
0,
724724
None,
725+
qdp_core::reader::NullHandling::FillZero,
725726
);
726727
let engine = self.engine.clone();
727728
let iter = py

qdp/qdp-python/src/tensor.rs

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,18 @@
1414
// See the License for the specific language governing permissions and
1515
// limitations under the License.
1616

17+
use numpy::{PyArray2, ndarray::Array2};
1718
use pyo3::exceptions::PyRuntimeError;
1819
use pyo3::ffi;
1920
use pyo3::prelude::*;
2021
use qdp_core::dlpack::DLManagedTensor;
22+
use std::ffi::c_void;
23+
24+
// CUDA Runtime API — already linked transitively by qdp-core.
25+
unsafe extern "C" {
26+
fn cudaMemcpy(dst: *mut c_void, src: *const c_void, count: usize, kind: i32) -> i32;
27+
}
28+
const CUDA_MEMCPY_DEVICE_TO_HOST: i32 = 2;
2129

2230
/// Quantum tensor wrapper implementing DLPack protocol
2331
///
@@ -98,6 +106,100 @@ impl QuantumTensor {
98106
}
99107
}
100108

109+
/// Copy encoded quantum state from GPU to a NumPy array (CPU, float64).
110+
///
111+
/// Performs a synchronous cudaMemcpy D2H without requiring PyTorch.
112+
/// Complex128 output (imaginary parts are always 0.0 per the CUDA kernel)
113+
/// is reduced to float64 by discarding the zero imaginary components.
114+
///
115+
/// Returns:
116+
/// numpy.ndarray of shape (batch_size, state_len), dtype float64.
117+
///
118+
/// Raises:
119+
/// RuntimeError: If the tensor has already been consumed, the pointer is
120+
/// invalid, the dtype is unsupported, or the CUDA copy fails.
121+
#[allow(clippy::wrong_self_convention)] // mut required: sets self.consumed and calls DLPack deleter
122+
fn to_numpy<'py>(&mut self, py: Python<'py>) -> PyResult<Bound<'py, PyArray2<f64>>> {
123+
if self.consumed {
124+
return Err(PyRuntimeError::new_err(
125+
"DLPack tensor already consumed (can only be used once)",
126+
));
127+
}
128+
if self.ptr.is_null() {
129+
return Err(PyRuntimeError::new_err("Invalid DLPack tensor pointer"));
130+
}
131+
132+
let (rows, cols, host_data) = unsafe {
133+
let dl_tensor = &(*self.ptr).dl_tensor;
134+
135+
// Shape — require 1-D or 2-D.
136+
let ndim = dl_tensor.ndim as usize;
137+
if ndim == 0 || ndim > 2 || dl_tensor.shape.is_null() {
138+
return Err(PyRuntimeError::new_err(
139+
"to_numpy() requires a 1-D or 2-D tensor",
140+
));
141+
}
142+
let shape = std::slice::from_raw_parts(dl_tensor.shape, ndim);
143+
let (rows, cols) = if ndim == 1 {
144+
(1usize, shape[0] as usize)
145+
} else {
146+
(shape[0] as usize, shape[1] as usize)
147+
};
148+
149+
// Dtype: complex128 (DL_COMPLEX=5, bits=128) or float64 (DL_FLOAT=2, bits=64).
150+
let dtype = &dl_tensor.dtype;
151+
let (is_complex, elem_bytes) = match (dtype.code, dtype.bits) {
152+
(5, 128) => (true, 16usize),
153+
(2, 64) => (false, 8usize),
154+
_ => {
155+
return Err(PyRuntimeError::new_err(format!(
156+
"to_numpy() unsupported dtype: code={}, bits={}",
157+
dtype.code, dtype.bits
158+
)));
159+
}
160+
};
161+
162+
let n_elems = rows * cols;
163+
// For complex128 each element is two consecutive f64 values.
164+
let host_f64_count = if is_complex { n_elems * 2 } else { n_elems };
165+
let mut host_buf = vec![0.0f64; host_f64_count];
166+
167+
let data_ptr = (dl_tensor.data as *const u8).add(dl_tensor.byte_offset as usize);
168+
169+
let ret = cudaMemcpy(
170+
host_buf.as_mut_ptr() as *mut c_void,
171+
data_ptr as *const c_void,
172+
n_elems * elem_bytes,
173+
CUDA_MEMCPY_DEVICE_TO_HOST,
174+
);
175+
if ret != 0 {
176+
return Err(PyRuntimeError::new_err(format!(
177+
"cudaMemcpy D2H failed with error code {}",
178+
ret
179+
)));
180+
}
181+
182+
// Consumed: GPU memory is ours to free now.
183+
self.consumed = true;
184+
if let Some(deleter) = (*self.ptr).deleter {
185+
deleter(self.ptr);
186+
}
187+
188+
// complex128 → float64: discard imaginary parts (always 0.0).
189+
let host_data: Vec<f64> = if is_complex {
190+
host_buf.into_iter().step_by(2).collect()
191+
} else {
192+
host_buf
193+
};
194+
195+
(rows, cols, host_data)
196+
};
197+
198+
let arr = Array2::from_shape_vec((rows, cols), host_data)
199+
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
200+
Ok(PyArray2::from_owned_array(py, arr))
201+
}
202+
101203
/// Returns DLPack device information
102204
///
103205
/// Returns:
@@ -122,8 +224,8 @@ impl QuantumTensor {
122224

123225
impl Drop for QuantumTensor {
124226
fn drop(&mut self) {
125-
// Only free if not consumed by __dlpack__
126-
// If consumed, PyTorch/consumer will call the deleter
227+
// Only free if not consumed; __dlpack__ leaves freeing to PyTorch,
228+
// to_numpy() calls the deleter itself after the D2H copy.
127229
if !self.consumed && !self.ptr.is_null() {
128230
unsafe {
129231
// Defensive check: qdp-core always provides a deleter

qdp/qdp-python/tests/test_quantum_data_loader.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616

1717
"""tests for Quantum Data Loader."""
1818

19+
from unittest.mock import patch
20+
21+
import numpy as np
1922
import pytest
2023

2124
try:
@@ -28,6 +31,15 @@ def _loader_available():
2831
return QuantumDataLoader is not None
2932

3033

34+
def _cuda_available():
35+
try:
36+
import torch
37+
38+
return torch.cuda.is_available()
39+
except ImportError:
40+
return False
41+
42+
3143
@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available")
3244
def test_mutual_exclusion_both_sources_raises() -> None:
3345
"""Calling both .source_synthetic() and .source_file() then __iter__ raises ValueError."""
@@ -238,3 +250,134 @@ def test_source_file_s3_streaming_non_parquet_raises(path):
238250
)
239251
msg = str(exc_info.value).lower()
240252
assert "parquet" in msg or "streaming" in msg
253+
254+
255+
# --- as_torch() / as_numpy() output format tests ---
256+
257+
258+
@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available")
259+
def test_as_torch_raises_at_config_time_when_torch_missing():
260+
"""as_torch() raises RuntimeError immediately (config time) when torch is not installed."""
261+
with patch("qumat_qdp.loader._torch", None):
262+
loader = QuantumDataLoader(device_id=0).qubits(4).batches(2, size=4)
263+
with pytest.raises(RuntimeError) as exc_info:
264+
loader.as_torch()
265+
msg = str(exc_info.value)
266+
assert "PyTorch" in msg or "torch" in msg.lower()
267+
assert "pip install" in msg
268+
269+
270+
@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available")
271+
def test_as_numpy_succeeds_at_config_time_without_torch():
272+
"""as_numpy() does not raise at config time even when torch is not installed."""
273+
with patch("qumat_qdp.loader._torch", None):
274+
loader = (
275+
QuantumDataLoader(device_id=0)
276+
.qubits(4)
277+
.batches(2, size=4)
278+
.source_synthetic()
279+
.as_numpy()
280+
)
281+
assert loader._output_format == ("numpy",)
282+
283+
284+
@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available")
285+
@pytest.mark.skipif(not _cuda_available(), reason="CUDA GPU required")
286+
def test_as_numpy_yields_float64_arrays():
287+
"""as_numpy() yields numpy float64 arrays with correct shape; no torch required."""
288+
num_qubits = 4
289+
batch_size = 8
290+
state_len = 2**num_qubits # 16
291+
292+
batches = []
293+
with patch("qumat_qdp.loader._torch", None):
294+
loader = (
295+
QuantumDataLoader(device_id=0)
296+
.qubits(num_qubits)
297+
.batches(3, size=batch_size)
298+
.source_synthetic()
299+
.as_numpy()
300+
)
301+
for batch in loader:
302+
batches.append(batch)
303+
304+
assert len(batches) == 3
305+
for batch in batches:
306+
assert isinstance(batch, np.ndarray), f"expected ndarray, got {type(batch)}"
307+
assert batch.dtype == np.float64, f"expected float64, got {batch.dtype}"
308+
assert batch.ndim == 2
309+
assert batch.shape == (batch_size, state_len), f"unexpected shape {batch.shape}"
310+
311+
312+
@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available")
313+
@pytest.mark.skipif(not _cuda_available(), reason="CUDA GPU required")
314+
def test_as_numpy_amplitudes_are_unit_norm():
315+
"""Each row from as_numpy() should be a unit-norm state vector (amplitude encoding)."""
316+
num_qubits = 4
317+
batch_size = 16
318+
319+
loader = (
320+
QuantumDataLoader(device_id=0)
321+
.qubits(num_qubits)
322+
.batches(2, size=batch_size)
323+
.source_synthetic()
324+
.as_numpy()
325+
)
326+
for batch in loader:
327+
arr = np.asarray(batch, dtype=np.float64)
328+
norms = np.linalg.norm(arr, axis=1)
329+
np.testing.assert_allclose(norms, 1.0, atol=1e-5)
330+
331+
332+
@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available")
333+
@pytest.mark.skipif(not _cuda_available(), reason="CUDA GPU required")
334+
def test_as_torch_yields_cuda_tensors():
335+
"""as_torch(device='cuda') yields torch tensors on CUDA."""
336+
try:
337+
import torch
338+
except ImportError:
339+
pytest.skip("torch not installed")
340+
341+
num_qubits = 4
342+
batch_size = 8
343+
state_len = 2**num_qubits
344+
345+
loader = (
346+
QuantumDataLoader(device_id=0)
347+
.qubits(num_qubits)
348+
.batches(2, size=batch_size)
349+
.source_synthetic()
350+
.as_torch(device="cuda")
351+
)
352+
for batch in loader:
353+
assert isinstance(batch, torch.Tensor)
354+
assert batch.is_cuda
355+
assert batch.shape == (batch_size, state_len)
356+
357+
358+
@pytest.mark.skipif(not _loader_available(), reason="QuantumDataLoader not available")
359+
@pytest.mark.skipif(not _cuda_available(), reason="CUDA GPU required")
360+
def test_as_numpy_from_source_array():
361+
"""as_numpy() works with source_array(), yielding correct shapes and dtype."""
362+
num_qubits = 3
363+
state_len = 2**num_qubits # 8
364+
n_samples = 12
365+
batch_size = 4
366+
367+
rng = np.random.default_rng(42)
368+
X = rng.standard_normal((n_samples, state_len))
369+
370+
loader = (
371+
QuantumDataLoader(device_id=0)
372+
.qubits(num_qubits)
373+
.batches(1, size=batch_size)
374+
.encoding("amplitude")
375+
.source_array(X)
376+
.as_numpy()
377+
)
378+
batches = list(loader)
379+
assert len(batches) == n_samples // batch_size
380+
for batch in batches:
381+
assert isinstance(batch, np.ndarray)
382+
assert batch.dtype == np.float64
383+
assert batch.shape[1] == state_len

0 commit comments

Comments
 (0)