sheralskumar
diff --git a/‎.github/workflows/benchmark.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/benchmark.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 8 additions & 6 deletions b/‎CMakeLists.txt‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎automation/run_benchmarks.py‎
Lines changed: 58 additions & 17 deletions b/‎automation/run_benchmarks.py‎
Lines changed: 58 additions & 17 deletions
@@ -2,6 +2,8 @@ name: Windows CPU OpenCL Benchmark
 
 on:
   workflow_dispatch:
+  push:
+    branches: [ main ]
 
 jobs:
   windows-build:
 
@@ -1,16 +1,18 @@
 cmake_minimum_required(VERSION 3.10)
 project(opencl_bench)
 
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 17)      # Use C++17
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 # Let CMake find OpenCL automatically
 find_package(OpenCL REQUIRED)
 
-add_executable(benchmark src/benchmark.cpp)
+# Find OpenMP
+find_package(OpenMP REQUIRED)
 
-# Include OpenCL headers
-target_include_directories(benchmark PRIVATE ${OpenCL_INCLUDE_DIRS})
 
-# Link OpenCL library
-target_link_libraries(benchmark PRIVATE ${OpenCL_LIBRARIES})
+include_directories(${OpenCL_INCLUDE_DIR})
+
+add_executable(benchmark src/benchmark.cpp src/openMP_functions.cpp src/openCL_functions.cpp)
+
+target_link_libraries(benchmark ${OpenCL_LIBRARY} OpenMP::OpenMP_CXX)
@@ -8,6 +8,8 @@
 import pandas as pd
 import platform
 
+MODES = ['vector add', 'vector multiply', 'relu', 'sigmoid']
+
 # Define path to the benchmark executable
 SCRIPT_DIR = Path(__file__).parent.parent
 SAVE_DIR = SCRIPT_DIR / "automation" / "benchmark_results.csv"
@@ -19,6 +21,28 @@
 else:
     BENCHMARK_EXECUTABLE = SCRIPT_DIR / "build" / "benchmark"
 
+def run_openMP_benchmark(N, mode=0):
+    if not BENCHMARK_EXECUTABLE.exists():
+        print(f"Benchmark executable not found at {BENCHMARK_EXECUTABLE}")
+        return None
+    try: 
+        # framework_mode=1 means CPU (OpenMP)
+        result = subprocess.run([str(BENCHMARK_EXECUTABLE), "0", str(N), "1", str(mode)],
+                                text=True, capture_output=True)
+        if result.returncode != 0:
+            print(f"Error running OpenMP benchmark with N={N}: {result.stderr}")
+            return None
+        if "Correct:" in result.stdout and "no" in result.stdout:
+            print("Validation failed")
+            return None
+
+        for line in result.stdout.splitlines():
+            if line.startswith("OpenMP CPU time:"):
+                return float(line.split()[3])  # extract time in ms
+    except Exception as e:
+        print(f"Exception occurred while running OpenMP benchmark with N={N}: {e}")
+        return None
+
 
 # Automatically detect all GPU devices
 def get_gpu_devices():
@@ -36,12 +60,12 @@ def get_gpu_devices():
     return devices
 
 
-def run_benchmark(device, N, mode=0):
+def run_openCL_benchmark(device, N, mode=0):
     if not BENCHMARK_EXECUTABLE.exists():
         print(f"Benchmark executable not found at {BENCHMARK_EXECUTABLE}")
         return None
     try: 
-        result = subprocess.run([str(BENCHMARK_EXECUTABLE), str(device), str(N), str(mode)], text=True, capture_output=True)
+        result = subprocess.run([str(BENCHMARK_EXECUTABLE), str(device), str(N), "0", str(mode)], text=True, capture_output=True)
 
         if result.returncode != 0:
             print(f"Error running benchmark for device {device} with N={N}: {result.stderr}")
@@ -58,50 +82,67 @@ def run_benchmark(device, N, mode=0):
         print(f"Exception occurred while running benchmark for device {device} with N={N}: {e}")
         return None
 
-def plot_results():
+def plot_results(mode):
     if not SAVE_DIR.exists():
         print("No results file. Run benchmarks first.")
         return
 
     df = pd.read_csv(SAVE_DIR)
 
-    for device_index in df["Device"].unique():
-        subset = df[df["Device"] == device_index]
-        plt.plot(subset["N"], subset["Time (ms)"], label=f"Device {device_index}")
+    for device_name in df["Device"].unique():
+        subset = df[df["Device"] == device_name]
+        plt.plot(subset["N"], subset["Time (ms)"], marker='o', label=device_name)
 
     plt.xlabel("N (size)")
     plt.ylabel("Time (ms)")
     plt.xscale("log", base=2)
     plt.yscale("log")
-    plt.title("GPU Benchmark Results")
+    plt.title("Benchmark Results for Function: " + MODES[mode])
     plt.legend()
     plt.grid(True)
-    plt.savefig(SCRIPT_DIR / "automation" / "benchmark_results.png")
+    path_str = "benchmark_results_" + MODES[mode] +".png"
+    plt.savefig(SCRIPT_DIR / "automation" / path_str)
     plt.show()
 
-def main(mode):
+
+def single_benchmark(mode):
     devices = get_gpu_devices() # List of device IDs to benchmark
-    N_values = [1 << 10, 1<< 15, 1 << 22, 1 << 24, 1 << 28]  # Different sizes for the benchmark
+    N_values = [1 << 10, 1<< 15, 1 << 22, 1 << 24, 1 << 26]  # Different sizes for the benchmark
 
+    # OpenCL benchmarking
     results = []
     for N in N_values:
         for i, dev in enumerate(devices):
             print(f"Running benchmark for device {i} with N={N}...")
-            output = run_benchmark(i, N, mode)
+            output = run_openCL_benchmark(i, N, mode)
             if output is not None:
-                results.append((i, N, output))
-                print(f"Result: {output}")
+                results.append(("OpenCL Device " + str(i), N, output))
+                print(f"Result: {output} ms")
+    
+    # CPU (OpenMP)
+    for N in N_values:
+        print(f"Running OpenMP benchmark on CPU with N={N}...")
+        t = run_openMP_benchmark(N, mode)
+        if t is not None:
+            results.append(("OpenMP CPU", N, t))
+            print(f"Result: {t} ms")
 
     # Save results to CSV
     with open(SAVE_DIR, "w", newline='') as csvfile:
         csvwriter = csv.writer(csvfile)
         csvwriter.writerow(["Device", "N", "Time (ms)"])
         csvwriter.writerows(results)
 
-    print("Benchmarking completed. Results saved to ", SAVE_DIR)
+    print("OpenCL benchmarking completed. Results saved to ", SAVE_DIR)
 
-    plot_results()
+    plot_results(mode)
 
 if __name__ == "__main__":
-    mode = sys.argv[1] if len(sys.argv) > 1 else 0
-    main(mode)
+    # Single benchmark
+    # mode = int(sys.argv[1]) if len(sys.argv) > 1 else 0
+    # single_benchmark(mode)
+
+    # Run benchmarks for all modes
+    for mode in range(len(MODES)):
+        print(f"Starting benchmarks for mode: {MODES[mode]}")
+        single_benchmark(mode)