Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion third_party/flashattn
Submodule flashattn updated 73 files
+1 −0 .gitignore
+6 −7 csrc/flashmask_v2/distributed/ag_semaphore_ops.cuh
+7 −6 csrc/flashmask_v2/distributed/rs_semaphore_ops.cuh
+1 −1 csrc/flashmask_v2/flash_bwd_launch_template.h
+28 −4 csrc/flashmask_v2/flash_fwd_kernel_sm90.h
+83 −50 csrc/flashmask_v2/mainloop_bwd_sm90_tma_gmma_ws.hpp
+5 −3 csrc/flashmask_v2/mainloop_fwd_sm90_tma_gmma_ws.hpp
+4 −3 csrc/flashmask_v2/tile_size.h
+30 −5 flashmask/flash_mask/__init__.py
+7 −1 flashmask/flash_mask/cute/flash_bwd_sm100.py
+1 −0 flashmask/flash_mask/cute/interface.py
+4 −0 flashmask/flash_mask/flash_attn_v4/.flake8
+29 −0 flashmask/flash_mask/flash_attn_v4/LICENSE
+50 −0 flashmask/flash_mask/flash_attn_v4/__init__.py
+104 −0 flashmask/flash_mask/flash_attn_v4/ampere_helpers.py
+73 −0 flashmask/flash_mask/flash_attn_v4/barrier.py
+1,090 −0 flashmask/flash_mask/flash_attn_v4/blackwell_helpers.py
+140 −0 flashmask/flash_mask/flash_attn_v4/block_info.py
+1,436 −0 flashmask/flash_mask/flash_attn_v4/block_sparse_utils.py
+70 −0 flashmask/flash_mask/flash_attn_v4/block_sparsity.py
+287 −0 flashmask/flash_mask/flash_attn_v4/cache_utils.py
+374 −0 flashmask/flash_mask/flash_attn_v4/copy_utils.py
+153 −0 flashmask/flash_mask/flash_attn_v4/cute_dsl_ptxas.py
+49 −0 flashmask/flash_mask/flash_attn_v4/cute_dsl_utils.py
+97 −0 flashmask/flash_mask/flash_attn_v4/fa_logging.py
+21 −0 flashmask/flash_mask/flash_attn_v4/fast_math.py
+1,308 −0 flashmask/flash_mask/flash_attn_v4/flash_bwd.py
+586 −0 flashmask/flash_mask/flash_attn_v4/flash_bwd_postprocess.py
+362 −0 flashmask/flash_mask/flash_attn_v4/flash_bwd_preprocess.py
+3,980 −0 flashmask/flash_mask/flash_attn_v4/flash_bwd_sm100.py
+55 −0 flashmask/flash_mask/flash_attn_v4/flash_bwd_sm120.py
+1,891 −0 flashmask/flash_mask/flash_attn_v4/flash_bwd_sm90.py
+1,230 −0 flashmask/flash_mask/flash_attn_v4/flash_fwd.py
+699 −0 flashmask/flash_mask/flash_attn_v4/flash_fwd_combine.py
+2,998 −0 flashmask/flash_mask/flash_attn_v4/flash_fwd_sm100.py
+60 −0 flashmask/flash_mask/flash_attn_v4/flash_fwd_sm120.py
+1,535 −0 flashmask/flash_mask/flash_attn_v4/flash_fwd_sm90.py
+131 −0 flashmask/flash_mask/flash_attn_v4/hopper_helpers.py
+274 −0 flashmask/flash_mask/flash_attn_v4/layout_utils.py
+1,461 −0 flashmask/flash_mask/flash_attn_v4/mask.py
+297 −0 flashmask/flash_mask/flash_attn_v4/mma_sm100_desc.py
+47 −0 flashmask/flash_mask/flash_attn_v4/named_barrier.py
+263 −0 flashmask/flash_mask/flash_attn_v4/pack_gqa.py
+2 −0 flashmask/flash_mask/flash_attn_v4/paddle/__init__.py
+96 −0 flashmask/flash_mask/flash_attn_v4/paddle/bench_utils.py
+293 −0 flashmask/flash_mask/flash_attn_v4/paddle/benchmark.py
+425 −0 flashmask/flash_mask/flash_attn_v4/paddle/block_sparsity.py
+362 −0 flashmask/flash_mask/flash_attn_v4/paddle/compute_block_sparsity.py
+218 −0 flashmask/flash_mask/flash_attn_v4/paddle/cute_dsl_utils.py
+1,998 −0 flashmask/flash_mask/flash_attn_v4/paddle/interface.py
+479 −0 flashmask/flash_mask/flash_attn_v4/paddle/testing.py
+235 −0 flashmask/flash_mask/flash_attn_v4/paged_kv.py
+403 −0 flashmask/flash_mask/flash_attn_v4/pipeline.py
+288 −0 flashmask/flash_mask/flash_attn_v4/seqlen_info.py
+302 −0 flashmask/flash_mask/flash_attn_v4/sm100_hd256_2cta_fmha_backward.py
+3,155 −0 flashmask/flash_mask/flash_attn_v4/sm100_hd256_2cta_fmha_backward_dkdvkernel.py
+2,145 −0 flashmask/flash_mask/flash_attn_v4/sm100_hd256_2cta_fmha_backward_dqkernel.py
+1,730 −0 flashmask/flash_mask/flash_attn_v4/sm100_hd256_2cta_fmha_forward.py
+403 −0 flashmask/flash_mask/flash_attn_v4/sm90_config_search.py
+591 −0 flashmask/flash_mask/flash_attn_v4/softmax.py
+17 −0 flashmask/flash_mask/flash_attn_v4/testing.py
+1,625 −0 flashmask/flash_mask/flash_attn_v4/tile_scheduler.py
+2 −0 flashmask/flash_mask/flash_attn_v4/torch/__init__.py
+197 −0 flashmask/flash_mask/flash_attn_v4/torch/bench_utils.py
+269 −0 flashmask/flash_mask/flash_attn_v4/torch/benchmark.py
+464 −0 flashmask/flash_mask/flash_attn_v4/torch/block_sparsity.py
+379 −0 flashmask/flash_mask/flash_attn_v4/torch/compute_block_sparsity.py
+206 −0 flashmask/flash_mask/flash_attn_v4/torch/cute_dsl_utils.py
+2,182 −0 flashmask/flash_mask/flash_attn_v4/torch/interface.py
+458 −0 flashmask/flash_mask/flash_attn_v4/torch/testing.py
+944 −0 flashmask/flash_mask/flash_attn_v4/utils.py
+277 −0 flashmask/flash_mask/interface.py
+24 −1 flashmask/setup.py
Loading