From 467ad5812bab1b3d1f54589739c36bdc0f6558d8 Mon Sep 17 00:00:00 2001 From: Bruno Volpato Date: Tue, 10 Feb 2026 23:57:56 -0500 Subject: [PATCH] fix: return error instead of panic on schema mismatch in BatchCoalescer::push_batch Replace assert_eq! with an error return when the batch column count does not match the coalescer schema. The function already returns Result<(), ArrowError>, so callers can handle this gracefully. --- arrow-select/src/coalesce.rs | 69 ++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/arrow-select/src/coalesce.rs b/arrow-select/src/coalesce.rs index 5ea2d97e78ea..8fe88fb8c377 100644 --- a/arrow-select/src/coalesce.rs +++ b/arrow-select/src/coalesce.rs @@ -458,8 +458,14 @@ impl BatchCoalescer { let (_schema, arrays, mut num_rows) = batch.into_parts(); - // setup input rows - assert_eq!(arrays.len(), self.in_progress_arrays.len()); + // Validate column count matches the expected schema + if arrays.len() != self.in_progress_arrays.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "Batch has {} columns but BatchCoalescer expects {}", + arrays.len(), + self.in_progress_arrays.len() + ))); + } self.in_progress_arrays .iter_mut() .zip(arrays) @@ -2178,4 +2184,63 @@ mod tests { assert_eq!(expected, actual); } + + #[test] + fn test_push_batch_schema_mismatch_fewer_columns() { + // Coalescer expects 0 columns, batch has 1 + let empty_schema = Arc::new(Schema::empty()); + let mut coalescer = BatchCoalescer::new(empty_schema, 100); + let batch = uint32_batch(0..5); + let result = coalescer.push_batch(batch); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("Batch has 1 columns but BatchCoalescer expects 0"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_push_batch_schema_mismatch_more_columns() { + // Coalescer expects 2 columns, batch has 1 + let schema = Arc::new(Schema::new(vec![ + Field::new("c0", DataType::UInt32, false), + Field::new("c1", DataType::UInt32, false), + ])); + let mut coalescer = BatchCoalescer::new(schema, 100); + let batch = uint32_batch(0..5); + let result = coalescer.push_batch(batch); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("Batch has 1 columns but BatchCoalescer expects 2"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_push_batch_schema_mismatch_two_vs_zero() { + // Coalescer expects 0 columns, batch has 2 + let empty_schema = Arc::new(Schema::empty()); + let mut coalescer = BatchCoalescer::new(empty_schema, 100); + let schema = Arc::new(Schema::new(vec![ + Field::new("c0", DataType::UInt32, false), + Field::new("c1", DataType::UInt32, false), + ])); + let batch = RecordBatch::try_new( + schema, + vec![ + Arc::new(UInt32Array::from(vec![1, 2, 3])), + Arc::new(UInt32Array::from(vec![4, 5, 6])), + ], + ) + .unwrap(); + let result = coalescer.push_batch(batch); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("Batch has 2 columns but BatchCoalescer expects 0"), + "unexpected error: {err}" + ); + } }