refactor: rename is_mutation to is_merge_into, improve comments and tests

dantengsky · dantengsky · commit fddfa19d9c5c · 2026-04-10T14:05:12.000+08:00
- Rename RowFetch::is_mutation to is_merge_into for clarity
- Remove unnecessary serde(default) annotations
- Improve comments: explain why SELECT+LIMIT skips repartition,
  use "reducing" instead of "eliminating" for duplicate reads
- Rewrite test with proper structure, comments, and CREATE OR REPLACE
- Avoid unwrap in lazy_columns handling
diff --git a/src/query/service/src/physical_plans/physical_limit.rs b/src/query/service/src/physical_plans/physical_limit.rs
@@ -296,7 +296,7 @@ impl PhysicalPlanBuilder {
                 cols_to_fetch,
                 fetched_fields,
                 need_wrap_nullable: false,
-                is_mutation: false,
+                is_merge_into: false,
                 stat_info: Some(stat_info.clone()),
             });
         }
diff --git a/src/query/service/src/physical_plans/physical_mutation.rs b/src/query/service/src/physical_plans/physical_mutation.rs
@@ -453,28 +453,23 @@ impl PhysicalPlanBuilder {
 
         // If the mutation type is FullOperation, we use row_id column to split a block
         // into matched and not matched parts.
-        let has_lazy_columns = self
+        let lazy_columns = self
             .metadata
             .read()
             .get_table_lazy_columns(target_table_index)
-            .is_some_and(|cols| !cols.is_empty());
+            .filter(|cols| !cols.is_empty());
 
         if matches!(strategy, MutationStrategy::MixedMatched) {
             plan = PhysicalPlan::new(MutationSplit {
                 input: plan,
                 split_index: row_id_offset,
-                has_row_fetch: has_lazy_columns,
+                has_row_fetch: lazy_columns.is_some(),
                 meta: PhysicalPlanMeta::new("MutationSplit"),
             });
         }
 
         // Construct row fetch plan for lazy columns.
-        if has_lazy_columns {
-            let lazy_columns = self
-                .metadata
-                .read()
-                .get_table_lazy_columns(target_table_index)
-                .unwrap();
+        if let Some(lazy_columns) = lazy_columns {
             plan = build_mutation_row_fetch(
                 plan,
                 metadata.clone(),
@@ -816,7 +811,7 @@ fn build_mutation_row_fetch(
         cols_to_fetch,
         fetched_fields,
         need_wrap_nullable,
-        is_mutation: true,
+        is_merge_into: true,
         stat_info: None,
         meta: PhysicalPlanMeta::new("RowFetch"),
     })
diff --git a/src/query/service/src/physical_plans/physical_mutation_into_split.rs b/src/query/service/src/physical_plans/physical_mutation_into_split.rs
@@ -33,8 +33,8 @@ pub struct MutationSplit {
     pub meta: PhysicalPlanMeta,
     pub input: PhysicalPlan,
     pub split_index: IndexType,
-    /// Whether RowFetch follows this MutationSplit (lazy columns exist).
-    /// Block_id repartition is only beneficial when RowFetch is present.
+    /// When true, a block_id repartition is inserted before the split to reduce
+    /// duplicate block reads in the downstream RowFetch stage.
     pub has_row_fetch: bool,
 }
 
@@ -79,9 +79,8 @@ impl IPhysicalPlan for MutationSplit {
 
         let max_threads = builder.settings.get_max_threads()? as usize;
 
-        // Add block_id repartition before split so each downstream RowFetch
-        // processor sees rows from a disjoint set of blocks, eliminating
-        // duplicate block reads. Only useful when RowFetch follows.
+        // Repartition by block_id so each downstream RowFetch processor handles
+        // a disjoint set of blocks, reducing duplicate block reads.
         if self.has_row_fetch
             && max_threads > 1
             && builder
diff --git a/src/query/service/src/physical_plans/physical_row_fetch.rs b/src/query/service/src/physical_plans/physical_row_fetch.rs
@@ -51,9 +51,8 @@ pub struct RowFetch {
     pub row_id_col_offset: usize,
     pub fetched_fields: Vec<DataField>,
     pub need_wrap_nullable: bool,
-    /// True when this RowFetch is part of a MERGE INTO pipeline (not SELECT+LIMIT).
-    #[serde(default)]
-    pub is_mutation: bool,
+    /// True when this RowFetch is part of a MERGE INTO pipeline.
+    pub is_merge_into: bool,
 
     /// Only used for explain
     pub stat_info: Option<PlanStatsInfo>,
@@ -113,7 +112,7 @@ impl IPhysicalPlan for RowFetch {
             row_id_col_offset: self.row_id_col_offset,
             fetched_fields: self.fetched_fields.clone(),
             need_wrap_nullable: self.need_wrap_nullable,
-            is_mutation: self.is_mutation,
+            is_merge_into: self.is_merge_into,
             stat_info: self.stat_info.clone(),
         })
     }
@@ -132,10 +131,10 @@ impl IPhysicalPlan for RowFetch {
         if !MutationSplit::check_physical_plan(&self.input) {
             // For MatchedOnly MERGE INTO, add block_id repartition before RowFetch
             // to reduce duplicate block reads.
-            // Not applicable to SELECT+LIMIT: the exchange would destroy the sort
-            // order produced by Sort+Limit (MergePartitionProcessor uses Random
-            // strategy with non-deterministic output order).
-            if self.is_mutation {
+            // Not applicable to SELECT+LIMIT: pipeline.exchange() merges partitions
+            // with non-deterministic output order, which would destroy the sort
+            // order produced by Sort+Limit.
+            if self.is_merge_into {
                 let max_threads = builder.settings.get_max_threads()? as usize;
                 if max_threads > 1
                     && builder
diff --git a/src/query/storages/fuse/src/operations/merge_into/processors/block_id_partition_exchange.rs b/src/query/storages/fuse/src/operations/merge_into/processors/block_id_partition_exchange.rs
@@ -25,11 +25,12 @@ use databend_common_pipeline::basic::Exchange;
 /// Partitions data blocks by block_id extracted from the `_row_id` column.
 ///
 /// This ensures that rows belonging to the same physical block are routed
-/// to the same downstream processor, eliminating duplicate block reads
+/// to the same downstream processor, reducing duplicate block reads
 /// in the RowFetch stage of MERGE INTO.
 pub struct BlockIdPartitionExchange {
     row_id_col_offset: usize,
-    /// Round-robin counter for NULL row_ids (unmatched rows in MixedMatched).
+    /// Incrementing counter used by `partition()` to spread NULL row_ids
+    /// (unmatched rows in MixedMatched) evenly across partitions.
     null_counter: AtomicU64,
 }
 
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0051_merge_into_block_id_repartition.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0051_merge_into_block_id_repartition.test

Original file line number	Diff line number	Diff line change
`@@ -296,7 +296,7 @@ impl PhysicalPlanBuilder {`
`296`	`296`	`cols_to_fetch,`
`297`	`297`	`fetched_fields,`
`298`	`298`	`need_wrap_nullable: false,`
`299`		`- is_mutation: false,`
	`299`	`+ is_merge_into: false,`
`300`	`300`	`stat_info: Some(stat_info.clone()),`
`301`	`301`	`});`
`302`	`302`	`}`