Skip to content

Commit 693e248

Browse files
Add a benchmarking script.
* Builds "main" as a baseline and uses Hyperfine to compare the current directory against this baseline. * Testsuite added with four different error correction code groups. * Creates results directories for each run with whisker plots and stat summaries.
1 parent 1e0cd87 commit 693e248

17 files changed

+24553
-0
lines changed

benchmarking/benchmark.py

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
#!/usr/bin/env python3.13
2+
"""
3+
Tesseract Decoder Benchmarker
4+
5+
This script automates the process of benchmarking the Tesseract decoder using hyperfine.
6+
It compares the performance of your current working directory against a baseline revision.
7+
8+
Basic Usage:
9+
Run the benchmarker with default settings (compares current directory against 'main'):
10+
$ ./benchmarking/benchmark.py
11+
12+
Run a quick benchmark (minimal shots and runs, useful for sanity checking before a long run):
13+
$ ./benchmarking/benchmark.py -q
14+
15+
Compare against a specific baseline revision (e.g., a specific commit or branch):
16+
$ ./benchmarking/benchmark.py -b my-feature-branch
17+
18+
Filter circuits by group name (e.g., only run 'surface_code' circuits) See circuits.json for available groups:
19+
$ ./benchmarking/benchmark.py -g surface_code
20+
21+
Benchmarking Multiple Changes:
22+
You can benchmark multiple working directories simultaneously against the baseline.
23+
This is useful if you have several different implementations across different
24+
directories that you want to compare side-by-side in a single run.
25+
26+
To set up additional directories for your changes:
27+
- Using git: Create a new worktree.
28+
$ git worktree add ../path-to-experiment1 <branch-or-commit>
29+
- Using jj (jujutsu): Add a new workspace.
30+
$ jj workspace add ../path-to-experiment1 -r <revision>
31+
32+
Use the -d or --dir flag for each additional directory you want to include:
33+
$ ./benchmarking/benchmark.py -d ../path-to-experiment1 -d ../path-to-experiment2
34+
35+
You can also provide a label for the plot by using the format label=path:
36+
$ ./benchmarking/benchmark.py -d "experiment1=../path-to-experiment1"
37+
38+
This will benchmark the baseline, the current working directory, and the two
39+
extra directories specified, providing a single cohesive report.
40+
41+
Command Line Flags:
42+
-b, --baseline <rev> : Specify baseline revision (default: main). Can be a branch or commit.
43+
-d, --dir <lbl=path> : Add extra working directories to benchmark against. Format: path or label=path. Can be specified multiple times.
44+
-q, --quick : Enable quick mode (fewer shots, warmup rounds, and runs). Useful for testing.
45+
-g, --group <name> : Filter circuits to benchmark by group name (e.g. 'surface_code').
46+
--skip-build : Skip the bazel build step (assuming binaries are already built).
47+
--loop : Continuously loop the benchmarks. Take a step away from your computer, and grab a Nuka Cola.
48+
--shots <num> : Override the default sample-num-shots (default: 5000). Mutually exclusive with -q.
49+
--warmup <num> : Override the default warmup-rounds (default: 15). Mutually exclusive with -q.
50+
--runs <num> : Override the default num-runs (default: 50). Mutually exclusive with -q.
51+
"""
52+
53+
import argparse
54+
import contextlib
55+
import json
56+
import logging
57+
import shutil
58+
import subprocess
59+
import sys
60+
import time
61+
from datetime import datetime
62+
from pathlib import Path
63+
from zoneinfo import ZoneInfo
64+
import plotting
65+
import workspace
66+
67+
# Configure logging with LA timezone
68+
class Formatter(logging.Formatter):
69+
def converter(self, timestamp):
70+
dt = datetime.fromtimestamp(timestamp, tz=ZoneInfo('America/Los_Angeles'))
71+
return dt.timetuple()
72+
73+
logger = logging.getLogger()
74+
logger.setLevel(logging.INFO)
75+
handler = logging.StreamHandler(sys.stdout)
76+
formatter = Formatter('[%(asctime)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
77+
handler.setFormatter(formatter)
78+
logger.addHandler(handler)
79+
80+
def print_batch_summary(json_output_files: list[Path], circuit_names: list[str]) -> None:
81+
logger.info("===================================================")
82+
logger.info(">>> BATCH RUN SUMMARY")
83+
logger.info("===================================================")
84+
85+
for json_file, c_name in zip(json_output_files, circuit_names):
86+
if Path(json_file).exists():
87+
try:
88+
with open(json_file, 'r') as f:
89+
results_data = json.load(f)
90+
91+
results_list = results_data.get('results', [])
92+
if len(results_list) >= 2:
93+
baseline_mean = results_list[0].get('mean')
94+
pwd_mean = results_list[1].get('mean')
95+
96+
if baseline_mean is not None and pwd_mean is not None and pwd_mean > 0:
97+
speedup = baseline_mean / pwd_mean
98+
logger.info(f"Circuit: {c_name}")
99+
logger.info(f" Baseline Mean: {baseline_mean:.4f} s")
100+
logger.info(f" PWD Mean: {pwd_mean:.4f} s")
101+
logger.info(f" Speedup: {speedup:.4f}x")
102+
logger.info("---------------------------------------------------")
103+
except Exception as e:
104+
logger.error(f"Failed to parse or summarize {json_file}: {e}")
105+
106+
def run_benchmark_batch(args: argparse.Namespace, workspaces: list[str | Path], workspace_names: list[str]) -> None:
107+
logger.info("===================================================")
108+
logger.info(">>> STARTING NEW BATCH RUN SEQUENCE")
109+
logger.info("===================================================")
110+
111+
112+
if args.quick:
113+
logger.info(f">>> Quick mode enabled: Reduced shots ({args.sample_num_shots}), warmup ({args.warmup_rounds}), and runs ({args.num_runs}).")
114+
115+
la_tz = ZoneInfo('America/Los_Angeles')
116+
timestamp = datetime.now(la_tz).strftime('%Y-%m-%d_%H_%M')
117+
result_dir = Path(f"benchmarking/results/{timestamp}_{args.num_runs}")
118+
119+
logger.info(f">>> Output directory: {result_dir}")
120+
(result_dir / "benchmark_json").mkdir(parents=True, exist_ok=True)
121+
(result_dir / "benchmark_whiskers").mkdir(parents=True, exist_ok=True)
122+
123+
try:
124+
with open("benchmarking/circuits.json", 'r') as f:
125+
circuits_data = json.load(f)
126+
except (FileNotFoundError, json.JSONDecodeError) as e:
127+
logger.error(f"Failed to load circuits JSON: {e}")
128+
sys.exit(1)
129+
130+
if args.group:
131+
logger.info(f">>> Filtering circuits by group: {args.group}")
132+
circuits = [c for c in circuits_data if c.get('group') == args.group]
133+
else:
134+
circuits = circuits_data
135+
136+
json_output_files = []
137+
circuit_names = []
138+
139+
tesseract_args = [
140+
"--sample-num-shots", str(args.sample_num_shots),
141+
"--print-stats", "--threads", "48", "--beam", "5",
142+
"--no-revisit-dets", "--num-det-orders", "1",
143+
"--pqlimit", "100000", "--sample-seed", "123456"
144+
]
145+
146+
for circuit in circuits:
147+
c_name = circuit['name']
148+
c_path = circuit['path']
149+
150+
json_file = result_dir / "benchmark_json" / f"results_{c_name}.json"
151+
whisker_file = result_dir / "benchmark_whiskers" / f"results_{c_name}.png"
152+
153+
json_output_files.append(json_file)
154+
circuit_names.append(c_name)
155+
156+
logger.info("---------------------------------------------------")
157+
logger.info(f">>> BENCHMARKING CIRCUIT: {c_name}")
158+
logger.info(f">>> Path: {c_path}")
159+
160+
hyperfine_cmd = [
161+
"hyperfine",
162+
"--warmup", str(args.warmup_rounds),
163+
"--runs", str(args.num_runs),
164+
"--export-json", str(json_file)
165+
]
166+
167+
for name, d in zip(workspace_names, workspaces):
168+
hyperfine_cmd.extend(["-n", name])
169+
170+
binary_path = Path(d) / "bazel-bin" / "src" / "tesseract"
171+
if str(d) == ".":
172+
binary_path = Path("bazel-bin") / "src" / "tesseract"
173+
174+
cmd_for_binary = f"{binary_path} --circuit '{c_path}' " + " ".join(tesseract_args)
175+
hyperfine_cmd.append(cmd_for_binary)
176+
177+
workspace.run_cmd(hyperfine_cmd)
178+
179+
plotting.plot_benchmark_results(json_file=str(json_file), labels=workspace_names, output_file=str(whisker_file))
180+
181+
print_batch_summary(json_output_files, circuit_names)
182+
logger.info(f">>> Batch Run Complete! Results saved in: {result_dir}")
183+
184+
def main() -> None:
185+
parser = argparse.ArgumentParser(description="Benchmark tesseract decoder using hyperfine.")
186+
parser.add_argument("-b", "--baseline", default="main", help="Specify baseline revision (default: main)")
187+
parser.add_argument("-d", "--dir", action="append", default=[], help="Add extra working directories to benchmark against. Format: path or label=path. Can be specified multiple times.")
188+
parser.add_argument("--skip-build", action="store_true", help="Skip the bazel build step")
189+
parser.add_argument("--loop", action="store_true", help="Loop runs rather than running once.")
190+
191+
parser.add_argument("-q", "--quick", action="store_true", help="Enable quick mode (fewer shots/runs)")
192+
parser.add_argument("-g", "--group", default="", help="Filter circuits by group name")
193+
parser.add_argument("--shots", type=int, default=5000, help="Override the default sample-num-shots (mutually exclusive with -q)")
194+
parser.add_argument("--warmup", type=int, default=15, help="Override the default warmup-rounds (mutually exclusive with -q)")
195+
parser.add_argument("--runs", type=int, default=50, help="Override the default num-runs (mutually exclusive with -q)")
196+
197+
args = parser.parse_args()
198+
199+
if args.quick and (args.shots != 5000 or args.warmup != 15 or args.runs != 50):
200+
parser.error("-q/--quick cannot be used with --shots, --warmup, or --runs")
201+
202+
args.sample_num_shots = 500 if args.quick else args.shots
203+
args.warmup_rounds = 1 if args.quick else args.warmup
204+
args.num_runs = 2 if args.quick else args.runs
205+
206+
baseline_dir = "../baseline_bench_tmp"
207+
vcs = workspace.check_vcs()
208+
if not vcs:
209+
logger.error("Error: Neither a jj nor git repository detected.")
210+
sys.exit(1)
211+
with workspace.managed_baseline(baseline_dir, args.baseline, vcs):
212+
extra_workspaces = []
213+
extra_names = []
214+
for d in args.dir:
215+
if '=' in d:
216+
lbl, pth = d.split('=', 1)
217+
extra_names.append(lbl)
218+
extra_workspaces.append(pth)
219+
else:
220+
extra_names.append(Path(d).name)
221+
extra_workspaces.append(d)
222+
223+
workspaces = [baseline_dir, "."] + extra_workspaces
224+
workspace_names = ["baseline", "pwd"] + extra_names
225+
226+
workspace.build_all(workspaces, args.skip_build)
227+
228+
if args.loop:
229+
while True:
230+
run_benchmark_batch(args, workspaces, workspace_names)
231+
logger.info(">>> Restarting in 5 seconds... (Press Ctrl+C to stop)")
232+
time.sleep(5)
233+
workspace.build_all(workspaces, args.skip_build)
234+
else:
235+
run_benchmark_batch(args, workspaces, workspace_names)
236+
237+
if __name__ == "__main__":
238+
main()

benchmarking/circuits.json

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
[
2+
{
3+
"name": "r12_d12_p0.001_bivariate_bicycle_X",
4+
"group": "bivariate_bicycle_X",
5+
"path": "benchmarking/testsuite/r=12,d=12,p=0.001,noise=si1000,c=bivariate_bicycle_X,nkd=[[144,12,12]],q=288,iscolored=True,A_poly=x^3+y+y^2,B_poly=y^3+x+x^2.stim"
6+
},
7+
{
8+
"name": "r12_d12_p0.002_bivariate_bicycle_X",
9+
"group": "bivariate_bicycle_X",
10+
"path": "benchmarking/testsuite/r=12,d=12,p=0.002,noise=si1000,c=bivariate_bicycle_X,nkd=[[144,12,12]],q=288,iscolored=True,A_poly=x^3+y+y^2,B_poly=y^3+x+x^2.stim"
11+
},
12+
{
13+
"name": "r18_d18_p0.001_bivariate_bicycle_X",
14+
"group": "bivariate_bicycle_X",
15+
"path": "benchmarking/testsuite/r=18,d=18,p=0.001,noise=si1000,c=bivariate_bicycle_X,nkd=[[288,12,18]],q=576,iscolored=True,A_poly=x^3+y^2+y^7,B_poly=y^3+x+x^2.stim"
16+
},
17+
{
18+
"name": "r11_d11_p0.001_superdense_color_code_X",
19+
"group": "superdense_color_code_X",
20+
"path": "benchmarking/testsuite/r=11,d=11,p=0.001,noise=si1000,c=superdense_color_code_X,q=181,gates=cz.stim"
21+
},
22+
{
23+
"name": "r11_d11_p0.002_superdense_color_code_X",
24+
"group": "superdense_color_code_X",
25+
"path": "benchmarking/testsuite/r=11,d=11,p=0.002,noise=si1000,c=superdense_color_code_X,q=181,gates=cz.stim"
26+
},
27+
{
28+
"name": "r9_d9_p0.001_superdense_color_code_X",
29+
"group": "superdense_color_code_X",
30+
"path": "benchmarking/testsuite/r=9,d=9,p=0.001,noise=si1000,c=superdense_color_code_X,q=121,gates=cz.stim"
31+
},
32+
{
33+
"name": "r9_d9_p0.002_superdense_color_code_X",
34+
"group": "superdense_color_code_X",
35+
"path": "benchmarking/testsuite/r=9,d=9,p=0.002,noise=si1000,c=superdense_color_code_X,q=121,gates=cz.stim"
36+
},
37+
{
38+
"name": "r23_d23_p0.001_surface_code",
39+
"group": "surface_code",
40+
"path": "benchmarking/testsuite/r=23,d=23,p=0.001,noise=uniform,c=surface_code,q=2025,gates=cx.stim"
41+
},
42+
{
43+
"name": "r23_d23_p0.008_surface_code",
44+
"group": "surface_code",
45+
"path": "benchmarking/testsuite/r=23,d=23,p=0.008,noise=uniform,c=surface_code,q=2025,gates=cx.stim"
46+
},
47+
{
48+
"name": "r29_d29_p0.001_surface_code",
49+
"group": "surface_code",
50+
"path": "benchmarking/testsuite/r=29,d=29,p=0.001,noise=uniform,c=surface_code,q=3249,gates=cx.stim"
51+
},
52+
{
53+
"name": "r11_d11_p0.001_surface_code_trans_cx_X",
54+
"group": "surface_code_trans_cx_X",
55+
"path": "benchmarking/testsuite/r=11,d=11,p=0.001,noise=si1000,c=surface_code_trans_cx_X,q=482,gates=cz.stim"
56+
},
57+
{
58+
"name": "r9_d9_p0.001_surface_code_trans_cx_X",
59+
"group": "surface_code_trans_cx_X",
60+
"path": "benchmarking/testsuite/r=9,d=9,p=0.001,noise=si1000,c=surface_code_trans_cx_X,q=322,gates=cz.stim"
61+
},
62+
{
63+
"name": "r9_d9_p0.002_surface_code_trans_cx_X",
64+
"group": "surface_code_trans_cx_X",
65+
"path": "benchmarking/testsuite/r=9,d=9,p=0.002,noise=si1000,c=surface_code_trans_cx_X,q=322,gates=cz.stim"
66+
}
67+
]

benchmarking/plotting.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import json
2+
import matplotlib.pyplot as plt
3+
4+
def plot_benchmark_results(
5+
json_file: str,
6+
labels: list[str],
7+
output_file: str | None = None,
8+
title: str | None = None,
9+
) -> None:
10+
"""
11+
Plots benchmark results from a JSON file generated by hyperfine.
12+
13+
Args:
14+
json_file: Path to the JSON file with benchmark results.
15+
labels: List of labels for the plot legend.
16+
output_file: Optional path to save the generated image. If None, the plot is shown instead.
17+
title: Optional title for the plot.
18+
"""
19+
with open(json_file, encoding="utf-8") as f:
20+
results = json.load(f)["results"]
21+
22+
times = [b["times"] for b in results]
23+
24+
plt.figure(figsize=(10, 6), constrained_layout=True)
25+
boxplot = plt.boxplot(times, vert=True, patch_artist=True)
26+
cmap = plt.get_cmap("rainbow")
27+
colors = [cmap(val / len(times)) for val in range(len(times))]
28+
29+
for patch, color in zip(boxplot["boxes"], colors):
30+
patch.set_facecolor(color)
31+
32+
if title:
33+
plt.title(title)
34+
35+
plt.legend(handles=boxplot["boxes"], labels=labels, loc="best", fontsize="medium")
36+
plt.ylabel("Time [s]")
37+
plt.ylim(0, None)
38+
plt.xticks(list(range(1, len(labels) + 1)), labels, rotation=45)
39+
40+
if output_file:
41+
plt.savefig(output_file)
42+
else:
43+
plt.show()
44+
plt.close()

0 commit comments

Comments
 (0)