Skip to content
20 changes: 10 additions & 10 deletions src/include/migraphx/op/nonmaxsuppression.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,10 @@ struct nonmaxsuppression
"NonMaxSuppression: dynamic input shape with use_dyn_output set to false");
}
fixed_shape_error_check();
std::vector<std::size_t> out_lens = {max_num_boxes, 3};
// actual selected boxes. Previously returned fixed max_num_boxes which caused zero padding bug. https://github.com/ROCm/AMDMIGraphX/issues/4679
std::vector<shape::dynamic_dimension> out_lens = {};
out_lens.push_back({0, max_num_boxes});
out_lens.push_back({3, 3});
return {shape::int64_type, out_lens};
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this change, compute_shape() returns the same dynamic output shape in both the use_dyn_output and !use_dyn_output branches, and compute() also reshapes unconditionally. That makes the use_dyn_output flag behavior misleading (it now mainly controls whether dynamic inputs are allowed). Consider simplifying the branching / documenting this explicitly (or deprecating the flag) so callers don’t assume it controls whether the output shape is dynamic.

Copilot uses AI. Check for mistakes.
Comment on lines +187 to 192
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The output max length used for shape inference/allocation is derived from max_num_boxes = num_batches * num_boxes, but compute_nms() iterates over num_classes and can select up to num_batches * num_classes * num_boxes indices (or * min(num_boxes, max_output_boxes_per_class)), so the current {0, max_num_boxes} upper bound can under-allocate result and make std::copy(selected_indices..., output.begin()) write past the end when num_classes > 1. Consider including scores' max num_classes in the bound (e.g., boxes_max_lens[0] * inputs.at(1).max_lens()[1] * boxes_max_lens[1]) so output_shape.max_lens() is always a safe upper bound.

Copilot uses AI. Check for mistakes.
}
}
Expand Down Expand Up @@ -321,7 +324,8 @@ struct nonmaxsuppression
double iou_threshold,
double score_threshold) const
{
std::fill(output.begin(), output.end(), 0);
// We trim the output in compute() so zeros are irrelevant
// std::fill(output.begin(), output.end(), 0);
Comment thread
adityas-amd marked this conversation as resolved.
const auto& lens = scores.get_shape().lens();
const auto num_batches = lens[0];
const auto num_classes = lens[1];
Expand Down Expand Up @@ -398,14 +402,10 @@ struct nonmaxsuppression
score_threshold);
});
});
if(use_dyn_output)
{
return result.reshape({output_shape.type(), {num_selected, 3}});
}
else
{
return result;
}
// Previously only done when use_dyn_output==True
// Now done always - correct for both static and dynamic inputs
// This makes output ONNX spec compliant https://onnx.ai/onnx/operators/onnx__NonMaxSuppression.html
return result.reshape({output_shape.type(), {num_selected, 3}});
}
Comment thread
adityas-amd marked this conversation as resolved.
};

Expand Down
7 changes: 5 additions & 2 deletions src/py/migraphx_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
bool skip_unknown_operators,
bool print_program_on_error,
int64_t max_loop_iterations,
int64_t limit_max_iterations) {
int64_t limit_max_iterations,
bool use_dyn_output) {//https://github.com/ROCm/AMDMIGraphX/issues/4679
migraphx::onnx_options options;
Comment thread
adityas-amd marked this conversation as resolved.
options.default_dim_value = default_dim_value;
options.default_dyn_dim_value = default_dyn_dim_value;
Expand All @@ -650,6 +651,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
options.print_program_on_error = print_program_on_error;
options.max_loop_iterations = max_loop_iterations;
options.limit_max_iterations = limit_max_iterations;
options.use_dyn_output = use_dyn_output; //https://github.com/ROCm/AMDMIGraphX/issues/4679
return migraphx::parse_onnx(filename, options);
},
"Parse onnx file",
Expand All @@ -664,7 +666,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
py::arg("skip_unknown_operators") = false,
py::arg("print_program_on_error") = false,
py::arg("max_loop_iterations") = 10,
py::arg("limit_max_iterations") = std::numeric_limits<uint16_t>::max());
py::arg("limit_max_iterations") = std::numeric_limits<uint16_t>::max(),
py::arg("use_dyn_output") = false); //https://github.com/ROCm/AMDMIGraphX/issues/4679

m.def(
"parse_onnx_buffer",
Expand Down
194 changes: 190 additions & 4 deletions test/ref/nonmaxsuppression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ TEST_CASE(nms_not_center_test)
auto output = p.eval({}).back();
std::vector<int64_t> result;
output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0};
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5}; // Only 3 boxes selected, no padding
EXPECT(migraphx::verify::verify_rms_range(result, gold));
}

Expand Down Expand Up @@ -299,7 +299,7 @@ TEST_CASE(nms_test)
auto output = p.eval({}).back();
std::vector<int64_t> result;
output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0};
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5};
EXPECT(migraphx::verify::verify_rms_range(result, gold));
}

Expand Down Expand Up @@ -337,7 +337,7 @@ TEST_CASE(nms_transpose1_test)
auto output = p.eval({}).back();
std::vector<int64_t> result;
output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0};
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5};
EXPECT(migraphx::verify::verify_rms_range(result, gold));
}

Expand Down Expand Up @@ -375,6 +375,192 @@ TEST_CASE(nms_transpose2_test)
auto output = p.eval({}).back();
std::vector<int64_t> result;
output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0};
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5};
EXPECT(migraphx::verify::verify_rms_range(result, gold));
}

// Static inputs + use_dyn_output=False
// Was the MAIN BUG — used to pad with fake [0,0,0] zeros
TEST_CASE(nms_static_input_dyn_out_false_test)
{
migraphx::program p;
auto* mm = p.get_main_module();

migraphx::shape boxes_s{migraphx::shape::float_type, {1, 6, 4}};
std::vector<float> boxes_vec = {
0.0, 0.0, 1.0, 1.0,
0.0, 0.1, 1.0, 1.1,
0.0, -0.1, 1.0, 0.9,
0.0, 10.0, 1.0, 11.0,
0.0, 10.1, 1.0, 11.1,
0.0, 100.0, 1.0, 101.0
};

migraphx::shape scores_s{migraphx::shape::float_type, {1, 1, 6}};
std::vector<float> scores_vec = {0.9, 0.75, 0.6, 0.95, 0.5, 0.3};

auto boxes_l = mm->add_literal(migraphx::literal(boxes_s, boxes_vec));
auto scores_l = mm->add_literal(migraphx::literal(scores_s, scores_vec));
auto max_out_l = mm->add_literal(int64_t{3});
auto iou_threshold = mm->add_literal(0.5f);
auto score_threshold = mm->add_literal(0.0f);

auto r = mm->add_instruction(
migraphx::make_op("nonmaxsuppression",
{{"use_dyn_output", false}}),
boxes_l,
scores_l,
max_out_l,
iou_threshold,
score_threshold);
mm->add_return({r});

p.compile(migraphx::make_target("ref"));
auto output = p.eval({}).back();
std::vector<int64_t> result;
output.visit([&](auto out) { result.assign(out.begin(), out.end()); });

// Only 3 selected boxes — NO fake [0,0,0] padding
// Before fix: output was (6,3) with 3 fake zero rows
// After fix: output is (3,3) with only real boxes
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5};
EXPECT(result.size() == gold.size()); // must be 9 not 18
EXPECT(migraphx::verify::verify_rms_range(result, gold));
}


// Static inputs + use_dyn_output=True
// Used to crash with PARSE_EXPAND error https://github.com/ROCm/AMDMIGraphX/issues/4679
// Now works correctly with static input shapes
TEST_CASE(nms_static_input_dyn_out_true_test)
{
migraphx::program p;
auto* mm = p.get_main_module();

migraphx::shape boxes_s{migraphx::shape::float_type, {1, 6, 4}};
std::vector<float> boxes_vec = {
0.0, 0.0, 1.0, 1.0,
0.0, 0.1, 1.0, 1.1,
0.0, -0.1, 1.0, 0.9,
0.0, 10.0, 1.0, 11.0,
0.0, 10.1, 1.0, 11.1,
0.0, 100.0, 1.0, 101.0
};

migraphx::shape scores_s{migraphx::shape::float_type, {1, 1, 6}};
std::vector<float> scores_vec = {0.9, 0.75, 0.6, 0.95, 0.5, 0.3};

auto boxes_l = mm->add_literal(migraphx::literal(boxes_s, boxes_vec));
auto scores_l = mm->add_literal(migraphx::literal(scores_s, scores_vec));
auto max_out_l = mm->add_literal(int64_t{3});
auto iou_threshold = mm->add_literal(0.5f);
auto score_threshold = mm->add_literal(0.0f);

auto r = mm->add_instruction(
migraphx::make_op("nonmaxsuppression",
{{"use_dyn_output", true}}),
boxes_l,
scores_l,
max_out_l,
iou_threshold,
score_threshold);
mm->add_return({r});

p.compile(migraphx::make_target("ref"));
auto output = p.eval({}).back();
std::vector<int64_t> result;
output.visit([&](auto out) { result.assign(out.begin(), out.end()); });

// Same correct output as use_dyn_output=False
// Before fix: crashed with PARSE_EXPAND error
// After fix: works correctly with static inputs
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5};
EXPECT(result.size() == gold.size());
EXPECT(migraphx::verify::verify_rms_range(result, gold));
}


TEST_CASE(nms_dyn_out_false_and_true_same_result_test)
{
auto run_nms = [](bool use_dyn_output) {
migraphx::program p;
auto* mm = p.get_main_module();

migraphx::shape boxes_s{migraphx::shape::float_type, {1, 6, 4}};
std::vector<float> boxes_vec = {
0.0, 0.0, 1.0, 1.0,
0.0, 0.1, 1.0, 1.1,
0.0, -0.1, 1.0, 0.9,
0.0, 10.0, 1.0, 11.0,
0.0, 10.1, 1.0, 11.1,
0.0, 100.0, 1.0, 101.0
};

migraphx::shape scores_s{migraphx::shape::float_type, {1, 1, 6}};
std::vector<float> scores_vec = {0.9, 0.75, 0.6, 0.95, 0.5, 0.3};

auto boxes_l = mm->add_literal(migraphx::literal(boxes_s, boxes_vec));
auto scores_l = mm->add_literal(migraphx::literal(scores_s, scores_vec));
auto max_out_l = mm->add_literal(int64_t{3});
auto iou_threshold = mm->add_literal(0.5f);
auto score_threshold = mm->add_literal(0.0f);

auto r = mm->add_instruction(
migraphx::make_op("nonmaxsuppression",
{{"use_dyn_output", use_dyn_output}}),
boxes_l,
scores_l,
max_out_l,
iou_threshold,
score_threshold);
mm->add_return({r});

p.compile(migraphx::make_target("ref"));
auto output = p.eval({}).back();
std::vector<int64_t> result;
output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
return result;
};

auto result_false = run_nms(false);
auto result_true = run_nms(true);

// Both flags must produce identical output after fix
// Before fix: False gave (6,3), True gave (3,3) — different!
// After fix: Both give (3,3) — identical! ✅
Comment thread
adityas-amd marked this conversation as resolved.
Outdated
EXPECT(result_false.size() == result_true.size());
EXPECT(result_false == result_true);
}


// Dynamic inputs + use_dyn_output=False must throw
// This is CORRECT behavior — not a bug
// Ensures we didn't accidentally break this check
TEST_CASE(nms_dynamic_input_dyn_out_false_throws_test)
{
migraphx::program p;
auto* mm = p.get_main_module();

// Dynamic input shapes
migraphx::shape boxes_s{migraphx::shape::float_type, {{1, 3}, {6, 6}, {4, 4}}};
migraphx::shape scores_s{migraphx::shape::float_type, {{1, 3}, {1, 1}, {6, 6}}};

auto boxes_p = mm->add_parameter("boxes", boxes_s);
auto scores_p = mm->add_parameter("scores", scores_s);
auto max_out_l = mm->add_literal(int64_t{3});
auto iou_threshold = mm->add_literal(0.5f);
auto score_threshold = mm->add_literal(0.0f);

// Dynamic inputs + use_dyn_output=False MUST throw
// This is intentional — dynamic inputs require True
EXPECT(test::throws([&] {
mm->add_instruction(
migraphx::make_op("nonmaxsuppression",
{{"use_dyn_output", false}}),
boxes_p,
scores_p,
max_out_l,
iou_threshold,
score_threshold);
}));
}
Loading