Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/scripts/install_cuda_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ $CUDA_KNOWN_URLS = @{
"12.9.1" = "https://developer.download.nvidia.com/compute/cuda/12.9.1/network_installers/cuda_12.9.1_windows_network.exe";
"13.0.0" = "https://developer.download.nvidia.com/compute/cuda/13.0.0/network_installers/cuda_13.0.0_windows_network.exe";
"13.0.1" = "https://developer.download.nvidia.com/compute/cuda/13.0.1/network_installers/cuda_13.0.1_windows_network.exe";
"13.0.2" = "https://developer.download.nvidia.com/compute/cuda/13.0.2/network_installers/cuda_13.0.2_windows_network.exe";
"13.1.0" = "https://developer.download.nvidia.com/compute/cuda/13.1.0/network_installers/cuda_13.1.0_windows_network.exe";
"13.1.1" = "https://developer.download.nvidia.com/compute/cuda/13.1.1/network_installers/cuda_13.1.1_windows_network.exe";
"13.2.0" = "https://developer.download.nvidia.com/compute/cuda/13.2.0/network_installers/cuda_13.2.0_windows_network.exe";
}

# @todo - change this to be based on _MSC_VER intead, or invert it to be CUDA keyed instead
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/Draft-Release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ jobs:
# CUDA_ARCH values are reduced compared to wheels due to CI memory issues while compiling the test suite.
cudacxx:
# CUDA 13
- cuda: "13.2"
cuda_arch: "75-real;120-real;120-virtual;"
hostcxx: gcc-14
os: ubuntu-24.04
- cuda: "13.0"
cuda_arch: "75-real;120-real;120-virtual;"
hostcxx: gcc-13
Expand Down Expand Up @@ -190,6 +194,10 @@ jobs:
# CUDA_ARCH values are reduced compared to wheels due to CI memory issues while compiling the test suite.
cudacxx:
# Newest and oldest CUDA 13.x we support
- cuda: "13.2.0"
cuda_arch: "75"
hostcxx: "Visual Studio 18 2026"
os: windows-2025-vs2026
- cuda: "13.0.0"
cuda_arch: "75"
hostcxx: "Visual Studio 17 2022"
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/Ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ jobs:
matrix:
cudacxx:
# CUDA 13
- cuda: "13.2"
cuda_arch: "75"
hostcxx: gcc-14
os: ubuntu-24.04
- cuda: "13.0"
cuda_arch: "75"
hostcxx: gcc-13
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/Windows-Tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ jobs:
# CUDA_ARCH values are reduced compared to wheels due to CI memory issues while compiling the test suite.
cudacxx:
# CUDA 13
- cuda: "13.2.0"
cuda_arch: "75"
hostcxx: "Visual Studio 18 2026"
os: windows-2025-vs2026
- cuda: "13.0.0"
cuda_arch: "75"
hostcxx: "Visual Studio 17 2022"
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/Windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ jobs:
matrix:
cudacxx:
# CUDA 13
- cuda: "13.2.0"
cuda_arch: "75"
hostcxx: "Visual Studio 18 2026"
os: windows-2025-vs2026
- cuda: "13.0.0"
cuda_arch: "75"
hostcxx: "Visual Studio 17 2022"
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,10 @@ Building FLAME GPU has the following requirements. There are also optional depen
+ For native Windows builds, CUDA `12.0-12.3` may work for some but not all parts of FLAME GPU due to c++20 compilation issues and MSVC support.
+ A [Compute Capability](https://developer.nvidia.com/cuda-gpus) `>= 5.0` (CUDA 12.x) or `>= 7.5` (CUDA 13.x) NVIDIA GPU is required for execution.
+ C++20 capable C++ compiler (host), compatible with the installed CUDA version
+ [Microsoft Visual Studio 2022](https://visualstudio.microsoft.com/) (Windows)
+ [Microsoft Visual Studio 2022/2026](https://visualstudio.microsoft.com/) (Windows)
+ *Note:* Visual Studio must be installed before the CUDA toolkit is installed. See the [CUDA installation guide for Windows](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html) for more information.
+ *Note:* Windows 11 SDK (10.0.22000.0) component is required within the Visual Studio (in latest versions this is default for C++ Desktop Development workloads even even on Windows 10). Windows 10 *must* be updated to build 19045 (22H2) or later to support this at runtime.
+ *Note:* Visual Studio 2026 requires CUDA >= 13.2
+ [make](https://www.gnu.org/software/make/) and [GCC](https://gcc.gnu.org/) `>= 10` (Linux)
+ [git](https://git-scm.com/)

Expand Down
8 changes: 4 additions & 4 deletions include/flamegpu/runtime/agent/HostAgentAPI.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -992,10 +992,10 @@ void HostAgentAPI::sort_async(const std::string & variable1, Order order1, const
// pair sort values
if (order2 == Asc) {
thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr<Var2T>(keys2), thrust::device_ptr<Var2T>(keys2 + agentCount),
thrust::device_ptr<unsigned int>(vals), thrust::less<Var2T>());
thrust::device_ptr<unsigned int>(vals), std::less<>());
} else {
thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr<Var2T>(keys2), thrust::device_ptr<Var2T>(keys2 + agentCount),
thrust::device_ptr<unsigned int>(vals), thrust::greater<Var2T>());
thrust::device_ptr<unsigned int>(vals), std::greater<>());
}
gpuErrchkLaunch();
// sort keys1 based on this order
Expand All @@ -1006,10 +1006,10 @@ void HostAgentAPI::sort_async(const std::string & variable1, Order order1, const
// pair sort
if (order1 == Asc) {
thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr<Var1T>(keys1), thrust::device_ptr<Var1T>(keys1 + agentCount),
thrust::device_ptr<unsigned int>(vals), thrust::less<Var1T>());
thrust::device_ptr<unsigned int>(vals), std::less<>());
} else {
thrust::stable_sort_by_key(thrust::cuda::par.on(stream), thrust::device_ptr<Var1T>(keys1), thrust::device_ptr<Var1T>(keys1 + agentCount),
thrust::device_ptr<unsigned int>(vals), thrust::greater<Var1T>());
thrust::device_ptr<unsigned int>(vals), std::greater<>());
}
gpuErrchkLaunch();
}
Expand Down
Loading