Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 34 additions & 27 deletions qdp/qdp-core/src/gpu/encodings/amplitude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,21 +245,9 @@ impl QuantumEncoder for AmplitudeEncoder {
buffer
};

// Validate norms on host to catch zero or NaN samples early
{
crate::profile_scope!("GPU::NormValidation");
let host_inv_norms = device
.dtoh_sync_copy(&inv_norms_gpu)
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;

if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
return Err(MahoutError::InvalidInput(
"One or more samples have zero or invalid norm".to_string(),
));
}
}

// Launch batch kernel
// Launch batch encode kernel — takes GPU norm buffer directly, no D2H needed yet.
// We defer the norm validation D2H copy until AFTER the encode kernel + sync so that
// the norm kernel → encode kernel sequence runs without an intermediate GPU-CPU roundtrip.
{
crate::profile_scope!("GPU::BatchKernelLaunch");
let state_ptr = batch_state_vector.ptr_f64().ok_or_else(|| {
Expand Down Expand Up @@ -288,14 +276,30 @@ impl QuantumEncoder for AmplitudeEncoder {
}
}

// Synchronize
// Synchronize — all GPU work (norm + encode) complete after this point.
{
crate::profile_scope!("GPU::Synchronize");
device
.synchronize()
.map_err(|e| MahoutError::Cuda(format!("Sync failed: {:?}", e)))?;
}

// Validate norms on host AFTER sync: D2H copy no longer blocks the encode kernel.
// This preserves error detection for zero/NaN samples without adding a mid-pipeline
// GPU-CPU roundtrip between the norm and encode kernels.
{
crate::profile_scope!("GPU::NormValidation");
let host_inv_norms = device
.dtoh_sync_copy(&inv_norms_gpu)
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;

if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
return Err(MahoutError::InvalidInput(
"One or more samples have zero or invalid norm".to_string(),
));
}
}

Ok(batch_state_vector)
}

Expand Down Expand Up @@ -412,17 +416,8 @@ impl QuantumEncoder for AmplitudeEncoder {
}
buffer
};
{
crate::profile_scope!("GPU::NormValidation");
let host_inv_norms = device
.dtoh_sync_copy(&inv_norms_gpu)
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;
if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
return Err(MahoutError::InvalidInput(
"One or more samples have zero or invalid norm".to_string(),
));
}
}
// Launch encode kernel before D2H norm validation: GPU norm buffer is passed directly,
// so the encode kernel can run immediately after the norm kernel without a CPU roundtrip.
{
crate::profile_scope!("GPU::BatchKernelLaunch");
use cudarc::driver::DevicePtr;
Expand Down Expand Up @@ -450,10 +445,22 @@ impl QuantumEncoder for AmplitudeEncoder {
)));
}
}
// Synchronize first; then validate norms on host (D2H after all GPU work is done).
{
crate::profile_scope!("GPU::Synchronize");
sync_cuda_stream(stream, "CUDA stream synchronize failed")?;
}
{
crate::profile_scope!("GPU::NormValidation");
let host_inv_norms = device
.dtoh_sync_copy(&inv_norms_gpu)
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;
if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
return Err(MahoutError::InvalidInput(
"One or more samples have zero or invalid norm".to_string(),
));
}
}
Ok(batch_state_vector)
}

Expand Down
41 changes: 41 additions & 0 deletions qdp/qdp-core/src/pipeline_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,47 @@ impl PipelineIterator {
})
}

/// Create a pipeline iterator from an in-memory array (e.g. from Python numpy).
/// Data is owned by the iterator; the full encode loop runs in Rust (take_batch + encode_batch).
pub fn new_from_array(
engine: QdpEngine,
data: Vec<f64>,
num_samples: usize,
sample_size: usize,
config: PipelineConfig,
batch_limit: usize,
) -> Result<Self> {
let vector_len = vector_len(config.num_qubits, &config.encoding_method);
if sample_size != vector_len {
return Err(MahoutError::InvalidInput(format!(
"Array sample_size {} does not match vector_len {} for num_qubits={}, encoding={}",
sample_size, vector_len, config.num_qubits, config.encoding_method
)));
}
if data.len() != num_samples * sample_size {
return Err(MahoutError::InvalidInput(format!(
"Array length {} is not num_samples ({}) * sample_size ({})",
data.len(),
num_samples,
sample_size
)));
}
let source = DataSource::InMemory {
data,
cursor: 0,
num_samples,
sample_size,
batches_yielded: 0,
batch_limit,
};
Ok(Self {
engine,
config,
source,
vector_len,
})
}

/// Create a pipeline iterator from a Parquet file using streaming read (Phase 2b).
/// Only `.parquet` is supported; reduces memory for large files by reading in chunks.
/// Validates sample_size == vector_len after the first chunk.
Expand Down
22 changes: 22 additions & 0 deletions qdp/qdp-python/benchmark/encoding_benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,25 @@ uv run python benchmark/encoding_benchmarks/qdp_pipeline/iris_amplitude.py --hel
uv run python benchmark/encoding_benchmarks/qdp_pipeline/mnist_amplitude.py --help
uv run python benchmark/encoding_benchmarks/qdp_pipeline/svhn_iqp.py --help
```

## Credit Card Fraud amplitude baseline (PennyLane)

Minimal, reproducible steps (run from `qdp/qdp-python`):

1. **Download dataset (once)** — Kaggle `creditcard.csv` mirror:

```bash
mkdir -p benchmark/encoding_benchmarks/pennylane_baseline/data
curl -L -o benchmark/encoding_benchmarks/pennylane_baseline/data/creditcard.csv \
https://raw.githubusercontent.com/nsethi31/Kaggle-Data-Credit-Card-Fraud-Detection/master/creditcard.csv
```

2. **Run the PennyLane baseline** — StandardScaler → PCA(16) → L2 norm → 4‑qubit amplitude VQC:

```bash
uv run python benchmark/encoding_benchmarks/pennylane_baseline/creditcardfraud_amplitude.py \
--data-file benchmark/encoding_benchmarks/pennylane_baseline/data/creditcard.csv \
--max-samples 300000 --iters 200 --batch-size 512 --trials 1
```

This prints compile time, train time / throughput, and task metrics (AUPRC, F1, precision, recall) on the test set.
Loading
Loading