ROCm · adityas-amd · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026
@@ -184,7 +184,10 @@ struct nonmaxsuppression
                     "NonMaxSuppression: dynamic input shape with use_dyn_output set to false");
             }
             fixed_shape_error_check();
-            std::vector<std::size_t> out_lens = {max_num_boxes, 3};
+            // actual selected boxes. Previously returned fixed max_num_boxes which caused zero padding bug. https://github.com/ROCm/AMDMIGraphX/issues/4679
+            std::vector<shape::dynamic_dimension> out_lens = {};
+            out_lens.push_back({0, max_num_boxes});
+            out_lens.push_back({3, 3});
             return {shape::int64_type, out_lens};
         }
     }
@@ -321,7 +324,8 @@ struct nonmaxsuppression
                             double iou_threshold,
                             double score_threshold) const
     {
-        std::fill(output.begin(), output.end(), 0);
+        // We trim the output in compute() so zeros are irrelevant
+        // std::fill(output.begin(), output.end(), 0);
         const auto& lens       = scores.get_shape().lens();
         const auto num_batches = lens[0];
         const auto num_classes = lens[1];
@@ -398,14 +402,10 @@ struct nonmaxsuppression
                                            score_threshold);
             });
         });
-        if(use_dyn_output)
-        {
-            return result.reshape({output_shape.type(), {num_selected, 3}});
-        }
-        else
-        {
-            return result;
-        }
+        // Previously only done when use_dyn_output==True
+        // Now done always - correct for both static and dynamic inputs
+        // This makes output ONNX spec compliant https://onnx.ai/onnx/operators/onnx__NonMaxSuppression.html
+        return result.reshape({output_shape.type(), {num_selected, 3}});
     }
 };
 

@@ -639,7 +639,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
            bool skip_unknown_operators,
            bool print_program_on_error,
            int64_t max_loop_iterations,
-           int64_t limit_max_iterations) {
+           int64_t limit_max_iterations,
+           bool use_dyn_output) {//https://github.com/ROCm/AMDMIGraphX/issues/4679
             migraphx::onnx_options options;
             options.default_dim_value      = default_dim_value;
             options.default_dyn_dim_value  = default_dyn_dim_value;
@@ -650,6 +651,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
             options.print_program_on_error = print_program_on_error;
             options.max_loop_iterations    = max_loop_iterations;
             options.limit_max_iterations   = limit_max_iterations;
+            options.use_dyn_output         = use_dyn_output; //https://github.com/ROCm/AMDMIGraphX/issues/4679
             return migraphx::parse_onnx(filename, options);
         },
         "Parse onnx file",
@@ -664,7 +666,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
         py::arg("skip_unknown_operators") = false,
         py::arg("print_program_on_error") = false,
         py::arg("max_loop_iterations")    = 10,
-        py::arg("limit_max_iterations")   = std::numeric_limits<uint16_t>::max());
+        py::arg("limit_max_iterations")   = std::numeric_limits<uint16_t>::max(),
+        py::arg("use_dyn_output") = false); //https://github.com/ROCm/AMDMIGraphX/issues/4679
 
     m.def(
         "parse_onnx_buffer",

@@ -265,7 +265,7 @@ TEST_CASE(nms_not_center_test)
     auto output = p.eval({}).back();
     std::vector<int64_t> result;
     output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
-    std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5}; // Only 3 boxes selected, no padding
     EXPECT(migraphx::verify::verify_rms_range(result, gold));
 }
 
@@ -299,7 +299,7 @@ TEST_CASE(nms_test)
     auto output = p.eval({}).back();
     std::vector<int64_t> result;
     output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
-    std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5};
     EXPECT(migraphx::verify::verify_rms_range(result, gold));
 }
 
@@ -337,7 +337,7 @@ TEST_CASE(nms_transpose1_test)
     auto output = p.eval({}).back();
     std::vector<int64_t> result;
     output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
-    std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5};
     EXPECT(migraphx::verify::verify_rms_range(result, gold));
 }
 
@@ -375,6 +375,192 @@ TEST_CASE(nms_transpose2_test)
     auto output = p.eval({}).back();
     std::vector<int64_t> result;
     output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
-    std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5};
+    EXPECT(migraphx::verify::verify_rms_range(result, gold));
+}
+
+// Static inputs + use_dyn_output=False
+// Was the MAIN BUG — used to pad with fake [0,0,0] zeros
+TEST_CASE(nms_static_input_dyn_out_false_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+
+    migraphx::shape boxes_s{migraphx::shape::float_type, {1, 6, 4}};
+    std::vector<float> boxes_vec = {
+        0.0, 0.0,   1.0, 1.0,   
+        0.0, 0.1,   1.0, 1.1,  
+        0.0, -0.1,  1.0, 0.9,   
+        0.0, 10.0,  1.0, 11.0,  
+        0.0, 10.1,  1.0, 11.1,  
+        0.0, 100.0, 1.0, 101.0  
+    };
+
+    migraphx::shape scores_s{migraphx::shape::float_type, {1, 1, 6}};
+    std::vector<float> scores_vec = {0.9, 0.75, 0.6, 0.95, 0.5, 0.3};
+
+    auto boxes_l         = mm->add_literal(migraphx::literal(boxes_s, boxes_vec));
+    auto scores_l        = mm->add_literal(migraphx::literal(scores_s, scores_vec));
+    auto max_out_l       = mm->add_literal(int64_t{3});
+    auto iou_threshold   = mm->add_literal(0.5f);
+    auto score_threshold = mm->add_literal(0.0f);
+
+    auto r = mm->add_instruction(
+        migraphx::make_op("nonmaxsuppression",
+                          {{"use_dyn_output", false}}),
+        boxes_l,
+        scores_l,
+        max_out_l,
+        iou_threshold,
+        score_threshold);
+    mm->add_return({r});
+
+    p.compile(migraphx::make_target("ref"));
+    auto output = p.eval({}).back();
+    std::vector<int64_t> result;
+    output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
+
+    // Only 3 selected boxes — NO fake [0,0,0] padding
+    // Before fix: output was (6,3) with 3 fake zero rows
+    // After fix:  output is  (3,3) with only real boxes
+    std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5};
+    EXPECT(result.size() == gold.size()); // must be 9 not 18
+    EXPECT(migraphx::verify::verify_rms_range(result, gold));
+}
+
+
+// Static inputs + use_dyn_output=True 
+// Used to crash with PARSE_EXPAND error https://github.com/ROCm/AMDMIGraphX/issues/4679
+// Now works correctly with static input shapes
+TEST_CASE(nms_static_input_dyn_out_true_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+
+    migraphx::shape boxes_s{migraphx::shape::float_type, {1, 6, 4}};
+    std::vector<float> boxes_vec = {
+        0.0, 0.0,   1.0, 1.0,   
+        0.0, 0.1,   1.0, 1.1,   
+        0.0, -0.1,  1.0, 0.9,   
+        0.0, 10.0,  1.0, 11.0,  
+        0.0, 10.1,  1.0, 11.1,  
+        0.0, 100.0, 1.0, 101.0  
+    };
+
+    migraphx::shape scores_s{migraphx::shape::float_type, {1, 1, 6}};
+    std::vector<float> scores_vec = {0.9, 0.75, 0.6, 0.95, 0.5, 0.3};
+
+    auto boxes_l         = mm->add_literal(migraphx::literal(boxes_s, boxes_vec));
+    auto scores_l        = mm->add_literal(migraphx::literal(scores_s, scores_vec));
+    auto max_out_l       = mm->add_literal(int64_t{3});
+    auto iou_threshold   = mm->add_literal(0.5f);
+    auto score_threshold = mm->add_literal(0.0f);
+
+    auto r = mm->add_instruction(
+        migraphx::make_op("nonmaxsuppression",
+                          {{"use_dyn_output", true}}),
+        boxes_l,
+        scores_l,
+        max_out_l,
+        iou_threshold,
+        score_threshold);
+    mm->add_return({r});
+
+    p.compile(migraphx::make_target("ref"));
+    auto output = p.eval({}).back();
+    std::vector<int64_t> result;
+    output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
+
+    // Same correct output as use_dyn_output=False
+    // Before fix: crashed with PARSE_EXPAND error
+    // After fix:  works correctly with static inputs
+    std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5};
+    EXPECT(result.size() == gold.size());
     EXPECT(migraphx::verify::verify_rms_range(result, gold));
 }
+
+
+TEST_CASE(nms_dyn_out_false_and_true_same_result_test)
+{
+    auto run_nms = [](bool use_dyn_output) {
+        migraphx::program p;
+        auto* mm = p.get_main_module();
+
+        migraphx::shape boxes_s{migraphx::shape::float_type, {1, 6, 4}};
+        std::vector<float> boxes_vec = {
+            0.0, 0.0,   1.0, 1.0,
+            0.0, 0.1,   1.0, 1.1,
+            0.0, -0.1,  1.0, 0.9,
+            0.0, 10.0,  1.0, 11.0,
+            0.0, 10.1,  1.0, 11.1,
+            0.0, 100.0, 1.0, 101.0
+        };
+
+        migraphx::shape scores_s{migraphx::shape::float_type, {1, 1, 6}};
+        std::vector<float> scores_vec = {0.9, 0.75, 0.6, 0.95, 0.5, 0.3};
+
+        auto boxes_l         = mm->add_literal(migraphx::literal(boxes_s, boxes_vec));
+        auto scores_l        = mm->add_literal(migraphx::literal(scores_s, scores_vec));
+        auto max_out_l       = mm->add_literal(int64_t{3});
+        auto iou_threshold   = mm->add_literal(0.5f);
+        auto score_threshold = mm->add_literal(0.0f);
+
+        auto r = mm->add_instruction(
+            migraphx::make_op("nonmaxsuppression",
+                              {{"use_dyn_output", use_dyn_output}}),
+            boxes_l,
+            scores_l,
+            max_out_l,
+            iou_threshold,
+            score_threshold);
+        mm->add_return({r});
+
+        p.compile(migraphx::make_target("ref"));
+        auto output = p.eval({}).back();
+        std::vector<int64_t> result;
+        output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
+        return result;
+    };
+
+    auto result_false = run_nms(false);
+    auto result_true  = run_nms(true);
+
+    // Both flags must produce identical output after fix
+    // Before fix: False gave (6,3), True gave (3,3) — different!
+    // After fix:  Both give (3,3) — identical! ✅
+    EXPECT(result_false.size() == result_true.size());
+    EXPECT(result_false == result_true);
+}
+
+
+// Dynamic inputs + use_dyn_output=False must throw
+// This is CORRECT behavior — not a bug
+// Ensures we didn't accidentally break this check
+TEST_CASE(nms_dynamic_input_dyn_out_false_throws_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+
+    // Dynamic input shapes
+    migraphx::shape boxes_s{migraphx::shape::float_type, {{1, 3}, {6, 6}, {4, 4}}};
+    migraphx::shape scores_s{migraphx::shape::float_type, {{1, 3}, {1, 1}, {6, 6}}};
+
+    auto boxes_p         = mm->add_parameter("boxes", boxes_s);
+    auto scores_p        = mm->add_parameter("scores", scores_s);
+    auto max_out_l       = mm->add_literal(int64_t{3});
+    auto iou_threshold   = mm->add_literal(0.5f);
+    auto score_threshold = mm->add_literal(0.0f);
+
+    // Dynamic inputs + use_dyn_output=False MUST throw
+    // This is intentional — dynamic inputs require True
+    EXPECT(test::throws([&] {
+        mm->add_instruction(
+            migraphx::make_op("nonmaxsuppression",
+                              {{"use_dyn_output", false}}),
+            boxes_p,
+            scores_p,
+            max_out_l,
+            iou_threshold,
+            score_threshold);
+    }));
+}