FLAMEGPU · ptheywood · Apr 1, 2026 · Apr 7, 2026 · Apr 1, 2026
diff --git a/.github/scripts/install_cuda_windows.ps1 b/.github/scripts/install_cuda_windows.ps1
@@ -63,6 +63,10 @@ $CUDA_KNOWN_URLS = @{
     "12.9.1" = "https://developer.download.nvidia.com/compute/cuda/12.9.1/network_installers/cuda_12.9.1_windows_network.exe";
     "13.0.0" = "https://developer.download.nvidia.com/compute/cuda/13.0.0/network_installers/cuda_13.0.0_windows_network.exe";
     "13.0.1" = "https://developer.download.nvidia.com/compute/cuda/13.0.1/network_installers/cuda_13.0.1_windows_network.exe";
+    "13.0.2" = "https://developer.download.nvidia.com/compute/cuda/13.0.2/network_installers/cuda_13.0.2_windows_network.exe";
+    "13.1.0" = "https://developer.download.nvidia.com/compute/cuda/13.1.0/network_installers/cuda_13.1.0_windows_network.exe";
+    "13.1.1" = "https://developer.download.nvidia.com/compute/cuda/13.1.1/network_installers/cuda_13.1.1_windows_network.exe";
+    "13.2.0" = "https://developer.download.nvidia.com/compute/cuda/13.2.0/network_installers/cuda_13.2.0_windows_network.exe";
 }
 
 # @todo - change this to be based on _MSC_VER intead, or invert it to be CUDA keyed instead

diff --git a/.github/workflows/Draft-Release.yml b/.github/workflows/Draft-Release.yml
@@ -42,6 +42,10 @@ jobs:
         # CUDA_ARCH values are reduced compared to wheels due to CI memory issues while compiling the test suite.
         cudacxx:
           # CUDA 13
+          - cuda: "13.2"
+            cuda_arch: "75-real;120-real;120-virtual;"
+            hostcxx: gcc-14
+            os: ubuntu-24.04
           - cuda: "13.0"
             cuda_arch: "75-real;120-real;120-virtual;"
             hostcxx: gcc-13
@@ -190,6 +194,10 @@ jobs:
         # CUDA_ARCH values are reduced compared to wheels due to CI memory issues while compiling the test suite.
         cudacxx:
           # Newest and oldest CUDA 13.x we support
+          - cuda: "13.2.0"
+            cuda_arch: "75"
+            hostcxx: "Visual Studio 18 2026"
+            os: windows-2025-vs2026
           - cuda: "13.0.0"
             cuda_arch: "75"
             hostcxx: "Visual Studio 17 2022"

diff --git a/.github/workflows/Ubuntu.yml b/.github/workflows/Ubuntu.yml
@@ -30,6 +30,10 @@ jobs:
       matrix:
         cudacxx:
           # CUDA 13
+          - cuda: "13.2"
+            cuda_arch: "75"
+            hostcxx: gcc-14
+            os: ubuntu-24.04
           - cuda: "13.0"
             cuda_arch: "75"
             hostcxx: gcc-13

diff --git a/.github/workflows/Windows-Tests.yml b/.github/workflows/Windows-Tests.yml
@@ -24,6 +24,10 @@ jobs:
         # CUDA_ARCH values are reduced compared to wheels due to CI memory issues while compiling the test suite.
         cudacxx:
           # CUDA 13
+          - cuda: "13.2.0"
+            cuda_arch: "75"
+            hostcxx: "Visual Studio 18 2026"
+            os: windows-2025-vs2026
           - cuda: "13.0.0"
             cuda_arch: "75"
             hostcxx: "Visual Studio 17 2022"

diff --git a/.github/workflows/Windows.yml b/.github/workflows/Windows.yml
@@ -31,6 +31,10 @@ jobs:
       matrix:
         cudacxx:
           # CUDA 13
+          - cuda: "13.2.0"
+            cuda_arch: "75"
+            hostcxx: "Visual Studio 18 2026"
+            os: windows-2025-vs2026
           - cuda: "13.0.0"
             cuda_arch: "75"
             hostcxx: "Visual Studio 17 2022"

diff --git a/README.md b/README.md
@@ -63,9 +63,10 @@ Building FLAME GPU has the following requirements. There are also optional depen
   + For native Windows builds, CUDA `12.0-12.3` may work for some but not all parts of FLAME GPU due to c++20 compilation issues and MSVC support.
   + A [Compute Capability](https://developer.nvidia.com/cuda-gpus) `>= 5.0` (CUDA 12.x) or `>= 7.5` (CUDA 13.x) NVIDIA GPU is required for execution.
 + C++20 capable C++ compiler (host), compatible with the installed CUDA version
-  + [Microsoft Visual Studio 2022](https://visualstudio.microsoft.com/) (Windows)
+  + [Microsoft Visual Studio 2022/2026](https://visualstudio.microsoft.com/) (Windows)
     + *Note:* Visual Studio must be installed before the CUDA toolkit is installed. See the [CUDA installation guide for Windows](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html) for more information.
     + *Note:* Windows 11 SDK (10.0.22000.0) component is required within the Visual Studio (in latest versions this is default for C++ Desktop Development workloads even even on Windows 10). Windows 10 *must* be updated to build 19045 (22H2) or later to support this at runtime.
+    + *Note:* Visual Studio 2026 requires CUDA >= 13.2
   + [make](https://www.gnu.org/software/make/) and [GCC](https://gcc.gnu.org/) `>= 10` (Linux)
 + [git](https://git-scm.com/)
 

diff --git a/include/flamegpu/runtime/agent/HostAgentAPI.cuh b/include/flamegpu/runtime/agent/HostAgentAPI.cuh
@@ -992,10 +992,10 @@ void HostAgentAPI::sort_async(const std::string & variable1, Order order1, const
         // pair sort values
         if (order2 == Asc) {
             thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr<Var2T>(keys2), thrust::device_ptr<Var2T>(keys2 + agentCount),
-            thrust::device_ptr<unsigned int>(vals), thrust::less<Var2T>());
+            thrust::device_ptr<unsigned int>(vals), std::less<>());
         } else {
             thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr<Var2T>(keys2), thrust::device_ptr<Var2T>(keys2 + agentCount),
-            thrust::device_ptr<unsigned int>(vals), thrust::greater<Var2T>());
+            thrust::device_ptr<unsigned int>(vals), std::greater<>());
         }
         gpuErrchkLaunch();
         // sort keys1 based on this order
@@ -1006,10 +1006,10 @@ void HostAgentAPI::sort_async(const std::string & variable1, Order order1, const
         // pair sort
         if (order1 == Asc) {
             thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr<Var1T>(keys1), thrust::device_ptr<Var1T>(keys1 + agentCount),
-            thrust::device_ptr<unsigned int>(vals), thrust::less<Var1T>());
+            thrust::device_ptr<unsigned int>(vals), std::less<>());
         } else {
             thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr<Var1T>(keys1), thrust::device_ptr<Var1T>(keys1 + agentCount),
-            thrust::device_ptr<unsigned int>(vals), thrust::greater<Var1T>());
+            thrust::device_ptr<unsigned int>(vals), std::greater<>());
         }
         gpuErrchkLaunch();
     }