From e4a184aacd33041b9588fa9a085dca83e816bc75 Mon Sep 17 00:00:00 2001 From: RyanJamesStewart Date: Thu, 21 May 2026 05:23:56 -0700 Subject: [PATCH] perf: apply min-alloc split to ByteViewGroupValueBuilder::take_n Follow-up to #22165, which replaced `drain(0..n).collect()` with the `split_vec_min_alloc` helper in the `bytes.rs` and `primitive.rs` `take_n` paths but did not cover the byte-view builder. `ByteViewGroupValueBuilder::take_n_inner` had the same idiom on `self.views`: `drain(0..n).collect()` always allocates `n` elements and leaves the retained vec at its pre-emit capacity. Under an OOM-triggered `EmitTo::First(n)` emit, `n` is close to `len`, so this copies the largest allocation. Routing through `split_vec_min_alloc` allocates `min(n, len - n)` instead, matching the other builders. Behavior is unchanged; existing `test_byte_view_take_n` / `test_byte_view_take_n_partial_completed_nonzero_index` cover both the drain and split_off branches. --- .../src/aggregates/group_values/multi_group_by/bytes_view.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index 9267cf4f27f35..6b3560c2ed26e 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -16,7 +16,7 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - GroupColumn, Nulls, nulls_equal_to, + GroupColumn, Nulls, nulls_equal_to, split_vec_min_alloc, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{ @@ -363,7 +363,7 @@ impl ByteViewGroupValueBuilder { // // - Shift the `buffer index` of remaining non-inlined `views` // - let first_n_views = self.views.drain(0..n).collect::>(); + let first_n_views = split_vec_min_alloc(&mut self.views, n); let last_non_inlined_view = first_n_views .iter()