Skip to content

Commit c105a3b

Browse files
committed
included openMP variations to benchmark openCL performance on all devices against openMP performance on CPU
1 parent 2adbc6e commit c105a3b

File tree

8 files changed

+447
-266
lines changed

8 files changed

+447
-266
lines changed

.github/workflows/benchmark.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ name: Windows CPU OpenCL Benchmark
22

33
on:
44
workflow_dispatch:
5+
push:
6+
branches: [ main ]
57

68
jobs:
79
windows-build:
@@ -75,7 +77,7 @@ jobs:
7577
- name: Install dependencies
7678
run: |
7779
sudo apt update
78-
sudo apt-get install -y cmake build-essential ocl-icd-opencl-dev pocl-opencl-icd python3-pip
80+
sudo apt-get install -y cmake build-essential ocl-icd-opencl-dev pocl-opencl-icd libomp-dev python3-pip
7981
python3 -m pip install --upgrade pip
8082
pip install pyopencl numpy pandas matplotlib
8183
@@ -94,7 +96,7 @@ jobs:
9496
run: cmake --build build --config Release
9597

9698
- name: Run Benchmark
97-
run: python3 automation/run_benchmarks.py 0
99+
run: python3 automation/run_benchmarks.py
98100

99101
- name: Upload Benchmark Results
100102
uses: actions/upload-artifact@v4

CMakeLists.txt

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
cmake_minimum_required(VERSION 3.10)
22
project(opencl_bench)
33

4-
set(CMAKE_CXX_STANDARD 17)
4+
set(CMAKE_CXX_STANDARD 17) # Use C++17
55
set(CMAKE_CXX_STANDARD_REQUIRED ON)
66

77
# Let CMake find OpenCL automatically
88
find_package(OpenCL REQUIRED)
99

10-
add_executable(benchmark src/benchmark.cpp)
10+
# Find OpenMP
11+
find_package(OpenMP REQUIRED)
1112

12-
# Include OpenCL headers
13-
target_include_directories(benchmark PRIVATE ${OpenCL_INCLUDE_DIRS})
1413

15-
# Link OpenCL library
16-
target_link_libraries(benchmark PRIVATE ${OpenCL_LIBRARIES})
14+
include_directories(${OpenCL_INCLUDE_DIR})
15+
16+
add_executable(benchmark src/benchmark.cpp src/openMP_functions.cpp src/openCL_functions.cpp)
17+
18+
target_link_libraries(benchmark ${OpenCL_LIBRARY} OpenMP::OpenMP_CXX)

automation/run_benchmarks.py

Lines changed: 59 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import pandas as pd
99
import platform
1010

11+
MODES = ['vector add', 'vector multiply', 'relu', 'sigmoid']
12+
1113
# Define path to the benchmark executable
1214
SCRIPT_DIR = Path(__file__).parent.parent
1315
SAVE_DIR = SCRIPT_DIR / "automation" / "benchmark_results.csv"
@@ -19,6 +21,28 @@
1921
else:
2022
BENCHMARK_EXECUTABLE = SCRIPT_DIR / "build" / "benchmark"
2123

24+
def run_openMP_benchmark(N, mode=0):
25+
if not BENCHMARK_EXECUTABLE.exists():
26+
print(f"Benchmark executable not found at {BENCHMARK_EXECUTABLE}")
27+
return None
28+
try:
29+
# framework_mode=1 means CPU (OpenMP)
30+
result = subprocess.run([str(BENCHMARK_EXECUTABLE), "0", str(N), "1", str(mode)],
31+
text=True, capture_output=True)
32+
if result.returncode != 0:
33+
print(f"Error running OpenMP benchmark with N={N}: {result.stderr}")
34+
return None
35+
if "Correct:" in result.stdout and "no" in result.stdout:
36+
print("Validation failed")
37+
return None
38+
39+
for line in result.stdout.splitlines():
40+
if line.startswith("OpenMP CPU time:"):
41+
return float(line.split()[3]) # extract time in ms
42+
except Exception as e:
43+
print(f"Exception occurred while running OpenMP benchmark with N={N}: {e}")
44+
return None
45+
2246

2347
# Automatically detect all GPU devices
2448
def get_gpu_devices():
@@ -36,12 +60,12 @@ def get_gpu_devices():
3660
return devices
3761

3862

39-
def run_benchmark(device, N, mode=0):
63+
def run_openCL_benchmark(device, N, mode=0):
4064
if not BENCHMARK_EXECUTABLE.exists():
4165
print(f"Benchmark executable not found at {BENCHMARK_EXECUTABLE}")
4266
return None
4367
try:
44-
result = subprocess.run([str(BENCHMARK_EXECUTABLE), str(device), str(N), str(mode)], text=True, capture_output=True)
68+
result = subprocess.run([str(BENCHMARK_EXECUTABLE), str(device), str(N), "0", str(mode)], text=True, capture_output=True)
4569

4670
if result.returncode != 0:
4771
print(f"Error running benchmark for device {device} with N={N}: {result.stderr}")
@@ -58,50 +82,68 @@ def run_benchmark(device, N, mode=0):
5882
print(f"Exception occurred while running benchmark for device {device} with N={N}: {e}")
5983
return None
6084

61-
def plot_results():
85+
def plot_results(mode):
6286
if not SAVE_DIR.exists():
6387
print("No results file. Run benchmarks first.")
6488
return
6589

6690
df = pd.read_csv(SAVE_DIR)
6791

68-
for device_index in df["Device"].unique():
69-
subset = df[df["Device"] == device_index]
70-
plt.plot(subset["N"], subset["Time (ms)"], label=f"Device {device_index}")
92+
for device_name in df["Device"].unique():
93+
subset = df[df["Device"] == device_name]
94+
plt.plot(subset["N"], subset["Time (ms)"], marker='o', label=device_name)
7195

7296
plt.xlabel("N (size)")
7397
plt.ylabel("Time (ms)")
7498
plt.xscale("log", base=2)
7599
plt.yscale("log")
76-
plt.title("GPU Benchmark Results")
100+
plt.title("Benchmark Results for Function: " + MODES[mode])
77101
plt.legend()
78102
plt.grid(True)
79-
plt.savefig(SCRIPT_DIR / "automation" / "benchmark_results.png")
103+
path_str = "benchmark_results_" + MODES[mode] +".png"
104+
plt.savefig(SCRIPT_DIR / "automation" / path_str)
80105
plt.show()
106+
plt.clf()
107+
81108

82-
def main(mode):
109+
def single_benchmark(mode):
83110
devices = get_gpu_devices() # List of device IDs to benchmark
84-
N_values = [1 << 10, 1<< 15, 1 << 22, 1 << 24, 1 << 28] # Different sizes for the benchmark
111+
N_values = [1 << 10, 1<< 15, 1 << 22, 1 << 24, 1 << 26] # Different sizes for the benchmark
85112

113+
# OpenCL benchmarking
86114
results = []
87115
for N in N_values:
88116
for i, dev in enumerate(devices):
89117
print(f"Running benchmark for device {i} with N={N}...")
90-
output = run_benchmark(i, N, mode)
118+
output = run_openCL_benchmark(i, N, mode)
91119
if output is not None:
92-
results.append((i, N, output))
93-
print(f"Result: {output}")
120+
results.append(("OpenCL Device " + str(i), N, output))
121+
print(f"Result: {output} ms")
122+
123+
# CPU (OpenMP)
124+
for N in N_values:
125+
print(f"Running OpenMP benchmark on CPU with N={N}...")
126+
t = run_openMP_benchmark(N, mode)
127+
if t is not None:
128+
results.append(("OpenMP CPU", N, t))
129+
print(f"Result: {t} ms")
94130

95131
# Save results to CSV
96132
with open(SAVE_DIR, "w", newline='') as csvfile:
97133
csvwriter = csv.writer(csvfile)
98134
csvwriter.writerow(["Device", "N", "Time (ms)"])
99135
csvwriter.writerows(results)
100136

101-
print("Benchmarking completed. Results saved to ", SAVE_DIR)
137+
print("OpenCL benchmarking completed. Results saved to ", SAVE_DIR)
102138

103-
plot_results()
139+
plot_results(mode)
104140

105141
if __name__ == "__main__":
106-
mode = sys.argv[1] if len(sys.argv) > 1 else 0
107-
main(mode)
142+
# Single benchmark
143+
# mode = int(sys.argv[1]) if len(sys.argv) > 1 else 0
144+
# single_benchmark(mode)
145+
146+
# Run benchmarks for all modes
147+
for mode in range(len(MODES)):
148+
print(f"Starting benchmarks for mode: {MODES[mode]}")
149+
single_benchmark(mode)

0 commit comments

Comments
 (0)