From ab553d948e0accd1899a516901d7ae11f1a9b39f Mon Sep 17 00:00:00 2001 From: Bhargava Vadlamani Date: Tue, 26 May 2026 15:51:46 -0700 Subject: [PATCH 1/4] zero_col_projection_row_count_fix --- .../src/main/scala/org/apache/spark/sql/comet/util/Utils.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala b/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala index 783367c054..2bacf70be8 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala @@ -224,6 +224,7 @@ object Utils extends CometTypeShim with Logging { val (fieldVectors, batchProviderOpt) = getBatchFieldVectors(batch) val root = new VectorSchemaRoot(fieldVectors.asJava) + root.setRowCount(batch.numRows()) val provider = batchProviderOpt.getOrElse(dictionaryProvider) val writer = new ArrowStreamWriter(root, provider, Channels.newChannel(out)) @@ -336,6 +337,8 @@ object Utils extends CometTypeShim with Logging { return (Array.empty, 0L, 0L) } + targetRoot.setRowCount(totalRows.toInt) + assert( targetRoot.getRowCount.toLong == totalRows, s"Row count mismatch after coalesce: ${targetRoot.getRowCount} != $totalRows") From 24d290dea215f32ab03fe510efdd163f7035a310 Mon Sep 17 00:00:00 2001 From: Bhargava Vadlamani Date: Tue, 26 May 2026 20:35:28 -0700 Subject: [PATCH 2/4] zero_col_projection_row_count_fix --- .../spark/sql/comet/util/UtilsSuite.scala | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 spark/src/test/scala/org/apache/spark/sql/comet/util/UtilsSuite.scala diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/util/UtilsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/util/UtilsSuite.scala new file mode 100644 index 0000000000..a79b862793 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/comet/util/UtilsSuite.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.comet.util + +import org.apache.spark.sql.CometTestBase +import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} + +class UtilsSuite extends CometTestBase { + + test("serializeBatches preserves row count for a zero-column batch") { + val numRows = 5 + val batch = new ColumnarBatch(Array.empty[ColumnVector], numRows) + + val (rowCount, buf) = Utils.serializeBatches(Iterator(batch)).next() + assert(rowCount == numRows) + + val decoded = Utils.decodeBatches(buf, "test").toSeq + assert(decoded.map(_.numRows()).sum == numRows) + } + + test("coalesceBroadcastBatches preserves row count across zero-column inputs") { + val numRows = 5 + val numBatches = 3 + val batches = + (0 until numBatches).map(_ => new ColumnarBatch(Array.empty[ColumnVector], numRows)) + + val bufs = Utils.serializeBatches(batches.iterator).map(_._2).toSeq.iterator + val (coalesced, batchCount, totalRows) = Utils.coalesceBroadcastBatches(bufs) + + val expected = numRows.toLong * numBatches + assert(batchCount == numBatches) + assert(totalRows == expected) + + val decoded = coalesced.iterator.flatMap(b => Utils.decodeBatches(b, "test")).toSeq + assert(decoded.map(_.numRows()).sum == expected) + } +} From f87c5eeb794e874bdaf085f9f49eb94c6d67a62c Mon Sep 17 00:00:00 2001 From: Bhargava Vadlamani Date: Tue, 26 May 2026 20:47:15 -0700 Subject: [PATCH 3/4] zero_col_projection_row_count_fix --- .github/workflows/pr_build_linux.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml index f7d6c1a73d..0e4988e368 100644 --- a/.github/workflows/pr_build_linux.yml +++ b/.github/workflows/pr_build_linux.yml @@ -383,6 +383,7 @@ jobs: org.apache.spark.sql.comet.CometDppFallbackRepro3949Suite org.apache.spark.sql.comet.CometShuffleFallbackStickinessSuite org.apache.spark.sql.comet.CometDecimalArithmeticViewSuite + org.apache.spark.sql.comet.util.UtilsSuite org.apache.comet.objectstore.NativeConfigSuite org.apache.spark.sql.CometToPrettyStringSuite org.apache.spark.sql.CometCollationSuite From c26c1c1f350dc59d87925a70805e30cd3ee24385 Mon Sep 17 00:00:00 2001 From: Bhargava Vadlamani Date: Thu, 28 May 2026 07:35:28 -0700 Subject: [PATCH 4/4] zero_col_projection_row_count_address_pr_comments --- .github/workflows/pr_build_macos.yml | 1 + .../scala/org/apache/spark/sql/comet/util/Utils.scala | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr_build_macos.yml b/.github/workflows/pr_build_macos.yml index 7af77ca2c9..5101f5290c 100644 --- a/.github/workflows/pr_build_macos.yml +++ b/.github/workflows/pr_build_macos.yml @@ -223,6 +223,7 @@ jobs: org.apache.spark.sql.comet.CometDppFallbackRepro3949Suite org.apache.spark.sql.comet.CometShuffleFallbackStickinessSuite org.apache.spark.sql.comet.CometDecimalArithmeticViewSuite + org.apache.spark.sql.comet.util.UtilsSuite org.apache.comet.objectstore.NativeConfigSuite org.apache.spark.sql.CometToPrettyStringSuite org.apache.spark.sql.CometCollationSuite diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala b/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala index 2bacf70be8..0343983e11 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala @@ -224,7 +224,10 @@ object Utils extends CometTypeShim with Logging { val (fieldVectors, batchProviderOpt) = getBatchFieldVectors(batch) val root = new VectorSchemaRoot(fieldVectors.asJava) - root.setRowCount(batch.numRows()) + if (fieldVectors.isEmpty) { + // VSR cannot infer rowCount without field vectors + root.setRowCount(batch.numRows()) + } val provider = batchProviderOpt.getOrElse(dictionaryProvider) val writer = new ArrowStreamWriter(root, provider, Channels.newChannel(out)) @@ -337,7 +340,10 @@ object Utils extends CometTypeShim with Logging { return (Array.empty, 0L, 0L) } - targetRoot.setRowCount(totalRows.toInt) + if (targetRoot.getSchema.getFields.isEmpty) { + // VSRAppender does not update rowCount with no columns + targetRoot.setRowCount(totalRows.toInt) + } assert( targetRoot.getRowCount.toLong == totalRows,