From 529ab760d878ff50c48aef2a7889a983189619a6 Mon Sep 17 00:00:00 2001 From: klion26 Date: Tue, 9 Dec 2025 15:24:43 +0800 Subject: [PATCH 1/7] [Variant] Support field access in bracket for VariantPath --- parquet-variant/src/path.rs | 80 ++++++++++++++++++--- parquet-variant/src/utils.rs | 133 +++++++++++++++++++++++++++++------ 2 files changed, 183 insertions(+), 30 deletions(-) diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs index 2aeb9df97d82..6fd2b4c9c2df 100644 --- a/parquet-variant/src/path.rs +++ b/parquet-variant/src/path.rs @@ -14,9 +14,9 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use std::{borrow::Cow, ops::Deref}; - use crate::utils::parse_path; +use arrow_schema::ArrowError; +use std::{borrow::Cow, ops::Deref}; /// Represents a qualified path to a potential subfield or index of a variant /// value. @@ -112,9 +112,11 @@ impl<'a> From>> for VariantPath<'a> { } /// Create from &str with support for dot notation -impl<'a> From<&'a str> for VariantPath<'a> { - fn from(path: &'a str) -> Self { - VariantPath::new(path.split(".").flat_map(parse_path).collect()) +impl<'a> TryFrom<&'a str> for VariantPath<'a> { + type Error = ArrowError; + + fn try_from(path: &'a str) -> Result { + parse_path(path).map(VariantPath::new) } } @@ -211,7 +213,7 @@ mod tests { #[test] fn test_variant_path_empty_str() { - let path = VariantPath::from(""); + let path = VariantPath::try_from("").unwrap(); assert!(path.is_empty()); } @@ -224,9 +226,10 @@ mod tests { #[test] fn test_variant_path_dot_notation_with_array_index() { - let path = VariantPath::from("city.store.books[3].title"); + let path = VariantPath::try_from("city.store.books[3].title").unwrap(); - let expected = VariantPath::from("city") + let expected = VariantPath::try_from("city") + .unwrap() .join("store") .join("books") .join(3) @@ -237,7 +240,7 @@ mod tests { #[test] fn test_variant_path_dot_notation_with_only_array_index() { - let path = VariantPath::from("[3]"); + let path = VariantPath::try_from("[3]").unwrap(); let expected = VariantPath::from(3); @@ -246,10 +249,67 @@ mod tests { #[test] fn test_variant_path_dot_notation_with_starting_array_index() { - let path = VariantPath::from("[3].title"); + let path = VariantPath::try_from("[3].title").unwrap(); let expected = VariantPath::from(3).join("title"); assert_eq!(path, expected); } + + #[test] + fn test_variant_path_field_in_bracket() { + // field with index + let path = VariantPath::try_from("foo[0].bar").unwrap(); + let expected = VariantPath::from_iter([ + VariantPathElement::field("foo"), + VariantPathElement::index(0), + VariantPathElement::field("bar"), + ]); + assert_eq!(path, expected); + + // index in the end + let path = VariantPath::try_from("foo.bar[42]").unwrap(); + let expected = VariantPath::from_iter([ + VariantPathElement::field("foo"), + VariantPathElement::field("bar"), + VariantPathElement::index(42), + ]); + assert_eq!(path, expected); + + // invalid index will be treated as field + let path = VariantPath::try_from("foo.bar[abc]").unwrap(); + let expected = VariantPath::from_iter([ + VariantPathElement::field("foo"), + VariantPathElement::field("bar"), + VariantPathElement::field("abc"), + ]); + assert_eq!(path, expected); + } + + #[test] + fn test_invalid_path_parse() { + // Leading dot + let err = VariantPath::try_from(".foo.bar").unwrap_err(); + assert_eq!(err.to_string(), "Parser error: Unexpected leading '.'"); + + // Trailing dot + let err = VariantPath::try_from("foo.bar.").unwrap_err(); + assert_eq!(err.to_string(), "Parser error: Unexpected trailing '.'"); + + // No ']' will be treated as error + let err = VariantPath::try_from("foo.bar[2.baz").unwrap_err(); + assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7"); + + // No ']' because of escaped. + let err = VariantPath::try_from("foo.bar[2\\].fds").unwrap_err(); + assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7"); + + // Trailing backslash in bracket + let err = VariantPath::try_from("foo.bar[fdafa\\").unwrap_err(); + assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7"); + + // No '[' before ']' + let err = VariantPath::try_from("foo.bar]baz").unwrap_err(); + assert_eq!(err.to_string(), "Parser error: Unexpected ']' at byte 7"); + } } diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs index 6accbcb36649..0984a601b213 100644 --- a/parquet-variant/src/utils.rs +++ b/parquet-variant/src/utils.rs @@ -150,36 +150,129 @@ pub(crate) fn fits_precision(n: impl Into) -> bool { n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N) } -// Helper fn to parse input segments like foo[0] or foo[0][0] +/// Parse a path string into a vector of [`VariantPathElement`]. +/// +/// # Syntax +/// - `.field` or `field` - access object field (do not support special char) +/// - `[index]` - access array element by index +/// - `[field]` - access object field (support special char with escape `\`) +/// +/// # Escape Rules +/// Inside brackets `[...]`: +/// - `\\` -> literal `\` +/// - `\]` -> literal `]` +/// - Any other `\x` -> literal `x` +/// +/// Outside brackets, no escaping is supported. +/// +/// # Examples +/// - `""` -> empty path +/// - `"foo"` -> single field `foo` +/// - `"foo.bar"` -> nested fields `foo`, `bar` +/// - `"[1]"` -> array index 1 +/// - `"foo[1].bar"` -> field `foo`, index 1, field `bar` +/// - `"[a.b]"` -> field `a.b` (dot is literal inside bracket) +/// - `"[a\\]b]"` -> field `a]b` (escaped `]` +/// - etc. +/// +/// # Errors +/// - Leading `.` (e.g., `".foo"`) +/// - Trailing `.` (e.g., `"foo."`) +/// - Unclosed '[' (e.g., `"foo[1"`) +/// - Unexpected ']' (e.g., `"foo]"`) +/// - Trailing '`' inside bracket (treated as unclosed bracket) #[inline] -pub(crate) fn parse_path<'a>(segment: &'a str) -> Vec> { - if segment.is_empty() { - return Vec::new(); +pub(crate) fn parse_path(s: &str) -> Result>, ArrowError> { + let scan_field = |start: usize| { + s[start..] + .find(['.', '[', ']']) + .map_or_else(|| s.len(), |p| start + p) + }; + + let bytes = s.as_bytes(); + if let Some(b'.') = bytes.first() { + return Err(ArrowError::ParseError("Unexpected leading '.'".into())); } - let mut path_elements = Vec::new(); - let mut base = segment; + let mut elements = Vec::new(); + let mut i = 0; - while let Some(stripped) = base.strip_suffix(']') { - let Some(open_pos) = stripped.rfind('[') else { - return vec![VariantPathElement::field(segment)]; + while i < bytes.len() { + let (elem, end) = match bytes[i] { + b'.' => { + i += 1; // skip the dot; a field must follow + let end = scan_field(i); + if end == i { + return Err(ArrowError::ParseError(match bytes.get(i) { + None => "Unexpected trailing '.'".into(), + Some(&c) => format!("Unexpected '{}' at byte {i}", c as char), + })); + } + (VariantPathElement::field(&s[i..end]), end) + } + b'[' => { + let (element, end) = parse_in_bracket(s, i)?; + (element, end) + } + b']' => { + return Err(ArrowError::ParseError(format!( + "Unexpected ']' at byte {i}" + ))); + } + _ => { + let end = scan_field(i); + (VariantPathElement::field(&s[i..end]), end) + } }; + elements.push(elem); + i = end; + } - let index_str = &stripped[open_pos + 1..]; - let Ok(index) = index_str.parse::() else { - return vec![VariantPathElement::field(segment)]; - }; + Ok(elements) +} - path_elements.push(VariantPathElement::index(index)); - base = &stripped[..open_pos]; - } +/// Parse `[digits | field]` starting at `i` (which points to `[`). +/// Returns (VariantPathElement, position after `]`). +fn parse_in_bracket(s: &str, i: usize) -> Result<(VariantPathElement<'_>, usize), ArrowError> { + let start = i + 1; // skip '[' - if !base.is_empty() { - path_elements.push(VariantPathElement::field(base)); + let mut unescaped = String::new(); + let mut chars = s[start..].char_indices().peekable(); + let mut end = None; + + while let Some((offset, c)) = chars.next() { + match c { + // Escape: take next char literally + '\\' => { + if let Some((_, next)) = chars.next() { + unescaped.push(next); + } + // Trailing backslash will be handled as 'unclosed [' below + } + ']' => { + // Unescaped ']' ends the bracket + end = Some(start + offset); + break; + } + _ => { + unescaped.push(c); + } + } } - path_elements.reverse(); - path_elements + let end = match end { + Some(e) => e, + None => { + return Err(ArrowError::ParseError(format!("Unclosed '[' at byte {i}"))); + } + }; + + let element = match unescaped.parse() { + Ok(idx) => VariantPathElement::index(idx), + Err(_) => VariantPathElement::field(unescaped), + }; + + Ok((element, end + 1)) } #[cfg(test)] From 64f87c8a4981e519cb80a63d0ba6eb02206acf2b Mon Sep 17 00:00:00 2001 From: klion26 Date: Tue, 27 Jan 2026 15:52:39 +0800 Subject: [PATCH 2/7] fix VariantPath usage in tests --- parquet-variant-compute/src/shred_variant.rs | 55 ++++++++++-------- parquet-variant-compute/src/variant_get.rs | 60 ++++++++++---------- parquet-variant/src/path.rs | 11 +++- 3 files changed, 71 insertions(+), 55 deletions(-) diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index d82eb15af598..c5ca7610f59a 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -533,7 +533,7 @@ impl ShreddedSchemaBuilder { /// /// # Arguments /// - /// * `path` - Anything convertible to [`VariantPath`] (e.g., a `&str`) + /// * `path` - Anything convertible to [`VariantPath`] /// * `field` - Anything convertible via [`IntoShreddingField`] (e.g. `FieldRef`, /// `&DataType`, or `(&DataType, bool)` to control nullability) pub fn with_path<'a, P, F>(mut self, path: P, field: F) -> Self @@ -1596,8 +1596,8 @@ mod tests { // Create target schema: struct // Both types are supported for shredding let target_schema = ShreddedSchemaBuilder::default() - .with_path("score", &DataType::Float64) - .with_path("age", &DataType::Int64) + .with_path(VariantPath::from_str_unchecked("score"), &DataType::Float64) + .with_path(VariantPath::from_str_unchecked("age"), &DataType::Int64) .build(); let result = shred_variant(&input, &target_schema).unwrap(); @@ -2008,7 +2008,7 @@ mod tests { // Test with schema containing only id field let schema1 = ShreddedSchemaBuilder::default() - .with_path("id", &DataType::Int32) + .with_path(VariantPath::from_str_unchecked("id"), &DataType::Int32) .build(); let result1 = shred_variant(&input, &schema1).unwrap(); let value_field1 = result1.value_field().unwrap(); @@ -2016,8 +2016,8 @@ mod tests { // Test with schema containing id and age fields let schema2 = ShreddedSchemaBuilder::default() - .with_path("id", &DataType::Int32) - .with_path("age", &DataType::Int64) + .with_path(VariantPath::from_str_unchecked("id"), &DataType::Int32) + .with_path(VariantPath::from_str_unchecked("age"), &DataType::Int64) .build(); let result2 = shred_variant(&input, &schema2).unwrap(); let value_field2 = result2.value_field().unwrap(); @@ -2025,9 +2025,9 @@ mod tests { // Test with schema containing all fields let schema3 = ShreddedSchemaBuilder::default() - .with_path("id", &DataType::Int32) - .with_path("age", &DataType::Int64) - .with_path("score", &DataType::Float64) + .with_path(VariantPath::from_str_unchecked("id"), &DataType::Int32) + .with_path(VariantPath::from_str_unchecked("age"), &DataType::Int64) + .with_path(VariantPath::from_str_unchecked("score"), &DataType::Float64) .build(); let result3 = shred_variant(&input, &schema3).unwrap(); let value_field3 = result3.value_field().unwrap(); @@ -2069,8 +2069,14 @@ mod tests { ]); let target_schema = ShreddedSchemaBuilder::default() - .with_path("id", DataType::FixedSizeBinary(16)) - .with_path("session_id", DataType::FixedSizeBinary(16)) + .with_path( + VariantPath::from_str_unchecked("id"), + DataType::FixedSizeBinary(16), + ) + .with_path( + VariantPath::from_str_unchecked("session_id"), + DataType::FixedSizeBinary(16), + ) .build(); let result = shred_variant(&input, &target_schema).unwrap(); @@ -2253,8 +2259,8 @@ mod tests { #[test] fn test_variant_schema_builder_simple() { let shredding_type = ShreddedSchemaBuilder::default() - .with_path("a", &DataType::Int64) - .with_path("b", &DataType::Float64) + .with_path(VariantPath::from_str_unchecked("a"), &DataType::Int64) + .with_path(VariantPath::from_str_unchecked("b"), &DataType::Float64) .build(); assert_eq!( @@ -2269,9 +2275,9 @@ mod tests { #[test] fn test_variant_schema_builder_nested() { let shredding_type = ShreddedSchemaBuilder::default() - .with_path("a", &DataType::Int64) - .with_path("b.c", &DataType::Utf8) - .with_path("b.d", &DataType::Float64) + .with_path(VariantPath::from_str_unchecked("a"), &DataType::Int64) + .with_path(VariantPath::from_str_unchecked("b.c"), &DataType::Utf8) + .with_path(VariantPath::from_str_unchecked("b.d"), &DataType::Float64) .build(); assert_eq!( @@ -2311,10 +2317,13 @@ mod tests { fn test_variant_schema_builder_custom_nullability() { let shredding_type = ShreddedSchemaBuilder::default() .with_path( - "foo", + VariantPath::from_str_unchecked("foo"), Arc::new(Field::new("should_be_renamed", DataType::Utf8, false)), ) - .with_path("bar", (&DataType::Int64, false)) + .with_path( + VariantPath::from_str_unchecked("bar"), + (&DataType::Int64, false), + ) .build(); let DataType::Struct(fields) = shredding_type else { @@ -2346,8 +2355,8 @@ mod tests { ]); let shredding_type = ShreddedSchemaBuilder::default() - .with_path("time", &DataType::Int64) - .with_path("hostname", &DataType::Utf8) + .with_path(VariantPath::from_str_unchecked("time"), &DataType::Int64) + .with_path(VariantPath::from_str_unchecked("hostname"), &DataType::Utf8) .build(); let result = shred_variant(&input, &shredding_type).unwrap(); @@ -2429,8 +2438,8 @@ mod tests { #[test] fn test_variant_schema_builder_conflicting_path() { let shredding_type = ShreddedSchemaBuilder::default() - .with_path("a", &DataType::Int64) - .with_path("a", &DataType::Float64) + .with_path(VariantPath::from_str_unchecked("a"), &DataType::Int64) + .with_path(VariantPath::from_str_unchecked("a"), &DataType::Float64) .build(); assert_eq!( @@ -2454,7 +2463,7 @@ mod tests { #[test] fn test_variant_schema_builder_empty_path() { let shredding_type = ShreddedSchemaBuilder::default() - .with_path("", &DataType::Int64) + .with_path(VariantPath::from_str_unchecked(""), &DataType::Int64) .build(); assert_eq!(shredding_type, DataType::Int64); diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 35cd5b4e40a6..85808b709f35 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -390,7 +390,7 @@ mod test { fn get_primitive_variant_field() { single_variant_get_test( r#"{"some_field": 1234}"#, - VariantPath::from("some_field"), + VariantPath::from_str_unchecked("some_field"), "1234", ); } @@ -404,7 +404,7 @@ mod test { fn get_primitive_variant_inside_object_of_object() { single_variant_get_test( r#"{"top_level_field": {"inner_field": 1234}}"#, - VariantPath::from("top_level_field").join("inner_field"), + VariantPath::from_str_unchecked("top_level_field").join("inner_field"), "1234", ); } @@ -422,7 +422,7 @@ mod test { fn get_primitive_variant_inside_object_of_list() { single_variant_get_test( r#"{"some_field": [1234]}"#, - VariantPath::from("some_field").join(0), + VariantPath::from_str_unchecked("some_field").join(0), "1234", ); } @@ -431,7 +431,7 @@ mod test { fn get_complex_variant() { single_variant_get_test( r#"{"top_level_field": {"inner_field": 1234}}"#, - VariantPath::from("top_level_field"), + VariantPath::from_str_unchecked("top_level_field"), r#"{"inner_field": 1234}"#, ); } @@ -1818,7 +1818,7 @@ mod test { let array = shredded_object_with_x_field_variant_array(); // Test: Extract the "x" field as VariantArray first - let options = GetOptions::new_with_path(VariantPath::from("x")); + let options = GetOptions::new_with_path(VariantPath::from_str_unchecked("x")); let result = variant_get(&array, options).unwrap(); let result_variant = VariantArray::try_new(&result).unwrap(); @@ -1837,7 +1837,7 @@ mod test { // Test: Extract the "x" field as Int32Array (type conversion) let field = Field::new("x", DataType::Int32, false); - let options = GetOptions::new_with_path(VariantPath::from("x")) + let options = GetOptions::new_with_path(VariantPath::from_str_unchecked("x")) .with_as_type(Some(FieldRef::from(field))); let result = variant_get(&array, options).unwrap(); @@ -1930,11 +1930,11 @@ mod test { // Check: How does VariantPath parse different strings? println!("Testing path parsing:"); - let path_x = VariantPath::from("x"); + let path_x = VariantPath::from_str_unchecked("x"); let elements_x: Vec<_> = path_x.iter().collect(); println!(" 'x' -> {} elements: {:?}", elements_x.len(), elements_x); - let path_ax = VariantPath::from("a.x"); + let path_ax = VariantPath::from_str_unchecked("a.x"); let elements_ax: Vec<_> = path_ax.iter().collect(); println!( " 'a.x' -> {} elements: {:?}", @@ -1942,7 +1942,7 @@ mod test { elements_ax ); - let path_ax_alt = VariantPath::from("$.a.x"); + let path_ax_alt = VariantPath::from_str_unchecked("$.a.x"); let elements_ax_alt: Vec<_> = path_ax_alt.iter().collect(); println!( " '$.a.x' -> {} elements: {:?}", @@ -1950,10 +1950,10 @@ mod test { elements_ax_alt ); - let path_nested = VariantPath::from("a").join("x"); + let path_nested = VariantPath::from_str_unchecked("a").join("x"); let elements_nested: Vec<_> = path_nested.iter().collect(); println!( - " VariantPath::from('a').join('x') -> {} elements: {:?}", + " VariantPath::from_str_unchecked('a').join('x') -> {} elements: {:?}", elements_nested.len(), elements_nested ); @@ -1962,7 +1962,7 @@ mod test { let array = shredded_object_with_x_field_variant_array(); // Test if variant_get with REAL nested path throws not implemented error - let real_nested_path = VariantPath::from("a").join("x"); + let real_nested_path = VariantPath::from_str_unchecked("a").join("x"); let options = GetOptions::new_with_path(real_nested_path); let result = variant_get(&array, options); @@ -1995,7 +1995,7 @@ mod test { let unshredded_array = create_depth_0_test_data(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from("x"); + let path = VariantPath::from_str_unchecked("x"); let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&unshredded_array, options).unwrap(); @@ -2011,7 +2011,7 @@ mod test { let shredded_array = create_depth_0_shredded_test_data_simple(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from("x"); + let path = VariantPath::from_str_unchecked("x"); let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&shredded_array, options).unwrap(); @@ -2033,7 +2033,7 @@ mod test { let unshredded_array = create_nested_path_test_data(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from("a.x"); // Dot notation! + let path = VariantPath::from_str_unchecked("a.x"); // Dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&unshredded_array, options).unwrap(); @@ -2048,7 +2048,7 @@ mod test { let shredded_array = create_depth_1_shredded_test_data_working(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from("a.x"); // Dot notation! + let path = VariantPath::from_str_unchecked("a.x"); // Dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&shredded_array, options).unwrap(); @@ -2070,7 +2070,7 @@ mod test { let unshredded_array = create_depth_2_test_data(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from("a.b.x"); // Double nested dot notation! + let path = VariantPath::from_str_unchecked("a.b.x"); // Double nested dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&unshredded_array, options).unwrap(); @@ -2086,7 +2086,7 @@ mod test { let shredded_array = create_depth_2_shredded_test_data_working(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from("a.b.x"); // Double nested dot notation! + let path = VariantPath::from_str_unchecked("a.b.x"); // Double nested dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&shredded_array, options).unwrap(); @@ -2108,7 +2108,7 @@ mod test { let array = shredded_object_with_x_field_variant_array(); // Test: Extract the "x" field (single level) - this works - let single_path = VariantPath::from("x"); + let single_path = VariantPath::from_str_unchecked("x"); let field = Field::new("result", DataType::Int32, true); let options = GetOptions::new_with_path(single_path).with_as_type(Some(FieldRef::from(field))); @@ -2117,7 +2117,7 @@ mod test { println!("Single path 'x' works - result: {:?}", result); // Test: Try nested path "a.x" - this is what we need to implement - let nested_path = VariantPath::from("a").join("x"); + let nested_path = VariantPath::from_str_unchecked("a").join("x"); let field = Field::new("result", DataType::Int32, true); let options = GetOptions::new_with_path(nested_path).with_as_type(Some(FieldRef::from(field))); @@ -2584,7 +2584,7 @@ mod test { // Try to access a field with safe cast options (should return NULLs) let safe_options = GetOptions { - path: VariantPath::from("nonexistent_field"), + path: VariantPath::from_str_unchecked("nonexistent_field"), as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), cast_options: CastOptions::default(), // safe = true }; @@ -2601,7 +2601,7 @@ mod test { // Try to access a field with strict cast options (should error) let strict_options = GetOptions { - path: VariantPath::from("nonexistent_field"), + path: VariantPath::from_str_unchecked("nonexistent_field"), as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), cast_options: CastOptions { safe: false, @@ -2710,7 +2710,7 @@ mod test { // 2. The "a" field's typed_value nulls // 3. The "x" field's typed_value nulls let options = GetOptions { - path: VariantPath::from("a.x"), + path: VariantPath::from_str_unchecked("a.x"), as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), cast_options: CastOptions::default(), }; @@ -2844,7 +2844,7 @@ mod test { // Test 1: nullable field (should allow nulls from cast failures) let nullable_field = Arc::new(Field::new("result", DataType::Int32, true)); let options_nullable = GetOptions { - path: VariantPath::from("x"), + path: VariantPath::from_str_unchecked("x"), as_type: Some(nullable_field.clone()), cast_options: CastOptions::default(), }; @@ -2897,7 +2897,7 @@ mod test { // Test 2: non-nullable field (behavior should be the same with safe casting) let non_nullable_field = Arc::new(Field::new("result", DataType::Int32, false)); let options_non_nullable = GetOptions { - path: VariantPath::from("x"), + path: VariantPath::from_str_unchecked("x"), as_type: Some(non_nullable_field.clone()), cast_options: CastOptions::default(), // safe=true by default }; @@ -3072,7 +3072,7 @@ mod test { let variant_array = create_comprehensive_nested_shredded_variant(); // Extract "outer" field using path-based variant_get - let path = VariantPath::from("outer"); + let path = VariantPath::from_str_unchecked("outer"); let inner_field = Field::new("inner", DataType::Int32, true); let result_type = DataType::Struct(Fields::from(vec![inner_field])); @@ -3117,7 +3117,7 @@ mod test { let variant_array = create_comprehensive_nested_shredded_variant(); // Extract "outer.inner" field using path-based variant_get - let path = VariantPath::from("outer").join("inner"); + let path = VariantPath::from_str_unchecked("outer").join("inner"); let options = GetOptions { path, @@ -4113,7 +4113,7 @@ mod test { let all_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true)); let all_nulls_result = variant_get( &variant_array, - GetOptions::new_with_path(VariantPath::from("all_nulls")) + GetOptions::new_with_path(VariantPath::from_str_unchecked("all_nulls")) .with_as_type(Some(all_nulls_field_ref)), ) .unwrap(); @@ -4123,7 +4123,7 @@ mod test { let some_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true)); let some_nulls_result = variant_get( &variant_array, - GetOptions::new_with_path(VariantPath::from("some_nulls")) + GetOptions::new_with_path(VariantPath::from_str_unchecked("some_nulls")) .with_as_type(Some(some_nulls_field_ref)), ) .unwrap(); @@ -4138,7 +4138,7 @@ mod test { )); let struct_result = variant_get( &variant_array, - GetOptions::new_with_path(VariantPath::from("struct_field")) + GetOptions::new_with_path(VariantPath::from_str_unchecked("struct_field")) .with_as_type(Some(struct_field_ref)), ) .unwrap(); diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs index 6fd2b4c9c2df..c1fc6dced918 100644 --- a/parquet-variant/src/path.rs +++ b/parquet-variant/src/path.rs @@ -201,6 +201,13 @@ impl<'a> From for VariantPathElement<'a> { } } +/// This is mainly used for tests, it will panic if the input is invalid. +impl<'a> VariantPath<'a> { + pub fn from_str_unchecked(s: &'a str) -> Self { + VariantPath::try_from(s).unwrap() + } +} + #[cfg(test)] mod tests { use super::*; @@ -213,7 +220,7 @@ mod tests { #[test] fn test_variant_path_empty_str() { - let path = VariantPath::try_from("").unwrap(); + let path = VariantPath::from_str_unchecked(""); assert!(path.is_empty()); } @@ -226,7 +233,7 @@ mod tests { #[test] fn test_variant_path_dot_notation_with_array_index() { - let path = VariantPath::try_from("city.store.books[3].title").unwrap(); + let path = VariantPath::from_str_unchecked("city.store.books[3].title"); let expected = VariantPath::try_from("city") .unwrap() From 1357423d86c0b99317fef4884297f7e3dfe6695c Mon Sep 17 00:00:00 2001 From: klion26 Date: Tue, 27 Jan 2026 16:32:21 +0800 Subject: [PATCH 3/7] fix test --- parquet-variant-compute/src/shred_variant.rs | 6 ++-- parquet-variant/src/path.rs | 33 +++++++++++++------- parquet-variant/src/variant.rs | 6 ++-- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index c5ca7610f59a..ad791cd0d932 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -496,12 +496,12 @@ impl IntoShreddingField for (DataType, bool) { /// // Define the shredding schema using the builder /// let shredding_type = ShreddedSchemaBuilder::default() /// // store the "time" field as a separate UTC timestamp -/// .with_path("time", (&DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())), true)) +/// .with_path(VariantPath::from_str_unchecked("time"), (&DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())), true)) /// // store hostname as non-nullable Utf8 -/// .with_path("hostname", (&DataType::Utf8, false)) +/// .with_path(VariantPath::from_str_unchecked("hostname"), (&DataType::Utf8, false)) /// // pass a FieldRef directly /// .with_path( -/// "metadata.trace_id", +/// VariantPath::from_str_unchecked("metadata.trace_id"), /// Arc::new(Field::new("trace_id", DataType::FixedSizeBinary(16), false)), /// ) /// // field name with a dot: use VariantPath to avoid splitting diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs index c1fc6dced918..c5952bbc8e72 100644 --- a/parquet-variant/src/path.rs +++ b/parquet-variant/src/path.rs @@ -33,7 +33,7 @@ use std::{borrow::Cow, ops::Deref}; /// ```rust /// # use parquet_variant::{VariantPath, VariantPathElement}; /// // access the field "foo" in a variant object value -/// let path = VariantPath::from("foo"); +/// let path = VariantPath::try_from("foo").unwrap(); /// // access the first element in a variant list vale /// let path = VariantPath::from(0); /// ``` @@ -43,7 +43,7 @@ use std::{borrow::Cow, ops::Deref}; /// # use parquet_variant::{VariantPath, VariantPathElement}; /// /// You can also create a path by joining elements together: /// // access the field "foo" and then the first element in a variant list value -/// let path = VariantPath::from("foo").join(0); +/// let path = VariantPath::try_from("foo").unwrap().join(0); /// // this is the same as the previous one /// let path2 = VariantPath::from_iter(["foo".into(), 0.into()]); /// assert_eq!(path, path2); @@ -59,8 +59,8 @@ use std::{borrow::Cow, ops::Deref}; /// ``` /// # use parquet_variant::{VariantPath, VariantPathElement}; /// /// You can also convert strings directly into paths using dot notation -/// let path = VariantPath::from("foo.bar.baz"); -/// let expected = VariantPath::from("foo").join("bar").join("baz"); +/// let path = VariantPath::try_from("foo.bar.baz").unwrap(); +/// let expected = VariantPath::try_from("foo").unwrap().join("bar").join("baz"); /// assert_eq!(path, expected); /// ``` /// @@ -69,11 +69,21 @@ use std::{borrow::Cow, ops::Deref}; /// # use parquet_variant::{VariantPath, VariantPathElement}; /// /// You can access the paths using slices /// // access the field "foo" and then the first element in a variant list value -/// let path = VariantPath::from("foo") +/// let path = VariantPath::try_from("foo").unwrap() /// .join("bar") /// .join("baz"); /// assert_eq!(path[1], VariantPathElement::field("bar")); /// ``` +/// +/// # Example: Accessing filed with bracket +/// ``` +/// # use parquet_variant::{VariantPath, VariantPathElement}; +/// let path = VariantPath::try_from("a[b.c].d[2]").unwrap(); +/// let expected = VariantPath::from_iter([VariantPathElement::field("a"), +/// VariantPathElement::field("b.c"), +/// VariantPathElement::field("d"), +/// VariantPathElement::index(2)]); +/// assert_eq!(path, expected) #[derive(Debug, Clone, PartialEq, Default)] pub struct VariantPath<'a>(Vec>); @@ -103,6 +113,12 @@ impl<'a> VariantPath<'a> { pub fn is_empty(&self) -> bool { self.0.is_empty() } + + /// Parses a path string, panics on invalid input. + /// Only use for tests for known-valid input. + pub fn from_str_unchecked(s: &'a str) -> Self { + VariantPath::try_from(s).unwrap() + } } impl<'a> From>> for VariantPath<'a> { @@ -201,13 +217,6 @@ impl<'a> From for VariantPathElement<'a> { } } -/// This is mainly used for tests, it will panic if the input is invalid. -impl<'a> VariantPath<'a> { - pub fn from_str_unchecked(s: &'a str) -> Self { - VariantPath::try_from(s).unwrap() - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 819c20d554ce..53fb3c4b1e10 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -1459,9 +1459,9 @@ impl<'m, 'v> Variant<'m, 'v> { /// // given a variant like `{"foo": ["bar", "baz"]}` /// let variant = Variant::new(&metadata, &value); /// // Accessing a non existent path returns None - /// assert_eq!(variant.get_path(&VariantPath::from("non_existent")), None); + /// assert_eq!(variant.get_path(&VariantPath::try_from("non_existent").unwrap()), None); /// // Access obj["foo"] - /// let path = VariantPath::from("foo"); + /// let path = VariantPath::try_from("foo").unwrap(); /// let foo = variant.get_path(&path).expect("field `foo` should exist"); /// assert!(foo.as_list().is_some(), "field `foo` should be a list"); /// // Access foo[0] @@ -1470,7 +1470,7 @@ impl<'m, 'v> Variant<'m, 'v> { /// // bar is a string /// assert_eq!(bar.as_string(), Some("bar")); /// // You can also access nested paths - /// let path = VariantPath::from("foo").join(0); + /// let path = VariantPath::try_from("foo").unwrap().join(0); /// assert_eq!(variant.get_path(&path).unwrap(), bar); /// ``` pub fn get_path(&self, path: &VariantPath) -> Option> { From 94268264bfd17fe6a30b5b9ec83f97e9a10bc9ae Mon Sep 17 00:00:00 2001 From: klion26 Date: Mon, 9 Feb 2026 19:57:44 +0800 Subject: [PATCH 4/7] address comment --- parquet-variant-compute/src/shred_variant.rs | 50 ++++++++--------- parquet-variant-compute/src/variant_get.rs | 58 ++++++++++---------- parquet-variant/src/path.rs | 6 +- 3 files changed, 57 insertions(+), 57 deletions(-) diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index ad791cd0d932..2ebf20460f01 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -496,12 +496,12 @@ impl IntoShreddingField for (DataType, bool) { /// // Define the shredding schema using the builder /// let shredding_type = ShreddedSchemaBuilder::default() /// // store the "time" field as a separate UTC timestamp -/// .with_path(VariantPath::from_str_unchecked("time"), (&DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())), true)) +/// .with_path(VariantPath::from_str_or_panic("time"), (&DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())), true)) /// // store hostname as non-nullable Utf8 -/// .with_path(VariantPath::from_str_unchecked("hostname"), (&DataType::Utf8, false)) +/// .with_path(VariantPath::from_str_or_panic("hostname"), (&DataType::Utf8, false)) /// // pass a FieldRef directly /// .with_path( -/// VariantPath::from_str_unchecked("metadata.trace_id"), +/// VariantPath::from_str_or_panic("metadata.trace_id"), /// Arc::new(Field::new("trace_id", DataType::FixedSizeBinary(16), false)), /// ) /// // field name with a dot: use VariantPath to avoid splitting @@ -1596,8 +1596,8 @@ mod tests { // Create target schema: struct // Both types are supported for shredding let target_schema = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_unchecked("score"), &DataType::Float64) - .with_path(VariantPath::from_str_unchecked("age"), &DataType::Int64) + .with_path(VariantPath::from_str_or_panic("score"), &DataType::Float64) + .with_path(VariantPath::from_str_or_panic("age"), &DataType::Int64) .build(); let result = shred_variant(&input, &target_schema).unwrap(); @@ -2008,7 +2008,7 @@ mod tests { // Test with schema containing only id field let schema1 = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_unchecked("id"), &DataType::Int32) + .with_path(VariantPath::from_str_or_panic("id"), &DataType::Int32) .build(); let result1 = shred_variant(&input, &schema1).unwrap(); let value_field1 = result1.value_field().unwrap(); @@ -2016,8 +2016,8 @@ mod tests { // Test with schema containing id and age fields let schema2 = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_unchecked("id"), &DataType::Int32) - .with_path(VariantPath::from_str_unchecked("age"), &DataType::Int64) + .with_path(VariantPath::from_str_or_panic("id"), &DataType::Int32) + .with_path(VariantPath::from_str_or_panic("age"), &DataType::Int64) .build(); let result2 = shred_variant(&input, &schema2).unwrap(); let value_field2 = result2.value_field().unwrap(); @@ -2025,9 +2025,9 @@ mod tests { // Test with schema containing all fields let schema3 = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_unchecked("id"), &DataType::Int32) - .with_path(VariantPath::from_str_unchecked("age"), &DataType::Int64) - .with_path(VariantPath::from_str_unchecked("score"), &DataType::Float64) + .with_path(VariantPath::from_str_or_panic("id"), &DataType::Int32) + .with_path(VariantPath::from_str_or_panic("age"), &DataType::Int64) + .with_path(VariantPath::from_str_or_panic("score"), &DataType::Float64) .build(); let result3 = shred_variant(&input, &schema3).unwrap(); let value_field3 = result3.value_field().unwrap(); @@ -2070,11 +2070,11 @@ mod tests { let target_schema = ShreddedSchemaBuilder::default() .with_path( - VariantPath::from_str_unchecked("id"), + VariantPath::from_str_or_panic("id"), DataType::FixedSizeBinary(16), ) .with_path( - VariantPath::from_str_unchecked("session_id"), + VariantPath::from_str_or_panic("session_id"), DataType::FixedSizeBinary(16), ) .build(); @@ -2259,8 +2259,8 @@ mod tests { #[test] fn test_variant_schema_builder_simple() { let shredding_type = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_unchecked("a"), &DataType::Int64) - .with_path(VariantPath::from_str_unchecked("b"), &DataType::Float64) + .with_path(VariantPath::from_str_or_panic("a"), &DataType::Int64) + .with_path(VariantPath::from_str_or_panic("b"), &DataType::Float64) .build(); assert_eq!( @@ -2275,9 +2275,9 @@ mod tests { #[test] fn test_variant_schema_builder_nested() { let shredding_type = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_unchecked("a"), &DataType::Int64) - .with_path(VariantPath::from_str_unchecked("b.c"), &DataType::Utf8) - .with_path(VariantPath::from_str_unchecked("b.d"), &DataType::Float64) + .with_path(VariantPath::from_str_or_panic("a"), &DataType::Int64) + .with_path(VariantPath::from_str_or_panic("b.c"), &DataType::Utf8) + .with_path(VariantPath::from_str_or_panic("b.d"), &DataType::Float64) .build(); assert_eq!( @@ -2317,11 +2317,11 @@ mod tests { fn test_variant_schema_builder_custom_nullability() { let shredding_type = ShreddedSchemaBuilder::default() .with_path( - VariantPath::from_str_unchecked("foo"), + VariantPath::from_str_or_panic("foo"), Arc::new(Field::new("should_be_renamed", DataType::Utf8, false)), ) .with_path( - VariantPath::from_str_unchecked("bar"), + VariantPath::from_str_or_panic("bar"), (&DataType::Int64, false), ) .build(); @@ -2355,8 +2355,8 @@ mod tests { ]); let shredding_type = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_unchecked("time"), &DataType::Int64) - .with_path(VariantPath::from_str_unchecked("hostname"), &DataType::Utf8) + .with_path(VariantPath::from_str_or_panic("time"), &DataType::Int64) + .with_path(VariantPath::from_str_or_panic("hostname"), &DataType::Utf8) .build(); let result = shred_variant(&input, &shredding_type).unwrap(); @@ -2438,8 +2438,8 @@ mod tests { #[test] fn test_variant_schema_builder_conflicting_path() { let shredding_type = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_unchecked("a"), &DataType::Int64) - .with_path(VariantPath::from_str_unchecked("a"), &DataType::Float64) + .with_path(VariantPath::from_str_or_panic("a"), &DataType::Int64) + .with_path(VariantPath::from_str_or_panic("a"), &DataType::Float64) .build(); assert_eq!( @@ -2463,7 +2463,7 @@ mod tests { #[test] fn test_variant_schema_builder_empty_path() { let shredding_type = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_unchecked(""), &DataType::Int64) + .with_path(VariantPath::from_str_or_panic(""), &DataType::Int64) .build(); assert_eq!(shredding_type, DataType::Int64); diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 85808b709f35..e4698e739176 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -390,7 +390,7 @@ mod test { fn get_primitive_variant_field() { single_variant_get_test( r#"{"some_field": 1234}"#, - VariantPath::from_str_unchecked("some_field"), + VariantPath::from_str_or_panic("some_field"), "1234", ); } @@ -404,7 +404,7 @@ mod test { fn get_primitive_variant_inside_object_of_object() { single_variant_get_test( r#"{"top_level_field": {"inner_field": 1234}}"#, - VariantPath::from_str_unchecked("top_level_field").join("inner_field"), + VariantPath::from_str_or_panic("top_level_field").join("inner_field"), "1234", ); } @@ -422,7 +422,7 @@ mod test { fn get_primitive_variant_inside_object_of_list() { single_variant_get_test( r#"{"some_field": [1234]}"#, - VariantPath::from_str_unchecked("some_field").join(0), + VariantPath::from_str_or_panic("some_field").join(0), "1234", ); } @@ -431,7 +431,7 @@ mod test { fn get_complex_variant() { single_variant_get_test( r#"{"top_level_field": {"inner_field": 1234}}"#, - VariantPath::from_str_unchecked("top_level_field"), + VariantPath::from_str_or_panic("top_level_field"), r#"{"inner_field": 1234}"#, ); } @@ -1818,7 +1818,7 @@ mod test { let array = shredded_object_with_x_field_variant_array(); // Test: Extract the "x" field as VariantArray first - let options = GetOptions::new_with_path(VariantPath::from_str_unchecked("x")); + let options = GetOptions::new_with_path(VariantPath::from_str_or_panic("x")); let result = variant_get(&array, options).unwrap(); let result_variant = VariantArray::try_new(&result).unwrap(); @@ -1837,7 +1837,7 @@ mod test { // Test: Extract the "x" field as Int32Array (type conversion) let field = Field::new("x", DataType::Int32, false); - let options = GetOptions::new_with_path(VariantPath::from_str_unchecked("x")) + let options = GetOptions::new_with_path(VariantPath::from_str_or_panic("x")) .with_as_type(Some(FieldRef::from(field))); let result = variant_get(&array, options).unwrap(); @@ -1930,11 +1930,11 @@ mod test { // Check: How does VariantPath parse different strings? println!("Testing path parsing:"); - let path_x = VariantPath::from_str_unchecked("x"); + let path_x = VariantPath::from_str_or_panic("x"); let elements_x: Vec<_> = path_x.iter().collect(); println!(" 'x' -> {} elements: {:?}", elements_x.len(), elements_x); - let path_ax = VariantPath::from_str_unchecked("a.x"); + let path_ax = VariantPath::from_str_or_panic("a.x"); let elements_ax: Vec<_> = path_ax.iter().collect(); println!( " 'a.x' -> {} elements: {:?}", @@ -1942,7 +1942,7 @@ mod test { elements_ax ); - let path_ax_alt = VariantPath::from_str_unchecked("$.a.x"); + let path_ax_alt = VariantPath::from_str_or_panic("$.a.x"); let elements_ax_alt: Vec<_> = path_ax_alt.iter().collect(); println!( " '$.a.x' -> {} elements: {:?}", @@ -1950,7 +1950,7 @@ mod test { elements_ax_alt ); - let path_nested = VariantPath::from_str_unchecked("a").join("x"); + let path_nested = VariantPath::from_str_or_panic("a").join("x"); let elements_nested: Vec<_> = path_nested.iter().collect(); println!( " VariantPath::from_str_unchecked('a').join('x') -> {} elements: {:?}", @@ -1962,7 +1962,7 @@ mod test { let array = shredded_object_with_x_field_variant_array(); // Test if variant_get with REAL nested path throws not implemented error - let real_nested_path = VariantPath::from_str_unchecked("a").join("x"); + let real_nested_path = VariantPath::from_str_or_panic("a").join("x"); let options = GetOptions::new_with_path(real_nested_path); let result = variant_get(&array, options); @@ -1995,7 +1995,7 @@ mod test { let unshredded_array = create_depth_0_test_data(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_unchecked("x"); + let path = VariantPath::from_str_or_panic("x"); let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&unshredded_array, options).unwrap(); @@ -2011,7 +2011,7 @@ mod test { let shredded_array = create_depth_0_shredded_test_data_simple(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_unchecked("x"); + let path = VariantPath::from_str_or_panic("x"); let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&shredded_array, options).unwrap(); @@ -2033,7 +2033,7 @@ mod test { let unshredded_array = create_nested_path_test_data(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_unchecked("a.x"); // Dot notation! + let path = VariantPath::from_str_or_panic("a.x"); // Dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&unshredded_array, options).unwrap(); @@ -2048,7 +2048,7 @@ mod test { let shredded_array = create_depth_1_shredded_test_data_working(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_unchecked("a.x"); // Dot notation! + let path = VariantPath::from_str_or_panic("a.x"); // Dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&shredded_array, options).unwrap(); @@ -2070,7 +2070,7 @@ mod test { let unshredded_array = create_depth_2_test_data(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_unchecked("a.b.x"); // Double nested dot notation! + let path = VariantPath::from_str_or_panic("a.b.x"); // Double nested dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&unshredded_array, options).unwrap(); @@ -2086,7 +2086,7 @@ mod test { let shredded_array = create_depth_2_shredded_test_data_working(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_unchecked("a.b.x"); // Double nested dot notation! + let path = VariantPath::from_str_or_panic("a.b.x"); // Double nested dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&shredded_array, options).unwrap(); @@ -2108,7 +2108,7 @@ mod test { let array = shredded_object_with_x_field_variant_array(); // Test: Extract the "x" field (single level) - this works - let single_path = VariantPath::from_str_unchecked("x"); + let single_path = VariantPath::from_str_or_panic("x"); let field = Field::new("result", DataType::Int32, true); let options = GetOptions::new_with_path(single_path).with_as_type(Some(FieldRef::from(field))); @@ -2117,7 +2117,7 @@ mod test { println!("Single path 'x' works - result: {:?}", result); // Test: Try nested path "a.x" - this is what we need to implement - let nested_path = VariantPath::from_str_unchecked("a").join("x"); + let nested_path = VariantPath::from_str_or_panic("a").join("x"); let field = Field::new("result", DataType::Int32, true); let options = GetOptions::new_with_path(nested_path).with_as_type(Some(FieldRef::from(field))); @@ -2584,7 +2584,7 @@ mod test { // Try to access a field with safe cast options (should return NULLs) let safe_options = GetOptions { - path: VariantPath::from_str_unchecked("nonexistent_field"), + path: VariantPath::from_str_or_panic("nonexistent_field"), as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), cast_options: CastOptions::default(), // safe = true }; @@ -2601,7 +2601,7 @@ mod test { // Try to access a field with strict cast options (should error) let strict_options = GetOptions { - path: VariantPath::from_str_unchecked("nonexistent_field"), + path: VariantPath::from_str_or_panic("nonexistent_field"), as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), cast_options: CastOptions { safe: false, @@ -2710,7 +2710,7 @@ mod test { // 2. The "a" field's typed_value nulls // 3. The "x" field's typed_value nulls let options = GetOptions { - path: VariantPath::from_str_unchecked("a.x"), + path: VariantPath::from_str_or_panic("a.x"), as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), cast_options: CastOptions::default(), }; @@ -2844,7 +2844,7 @@ mod test { // Test 1: nullable field (should allow nulls from cast failures) let nullable_field = Arc::new(Field::new("result", DataType::Int32, true)); let options_nullable = GetOptions { - path: VariantPath::from_str_unchecked("x"), + path: VariantPath::from_str_or_panic("x"), as_type: Some(nullable_field.clone()), cast_options: CastOptions::default(), }; @@ -2897,7 +2897,7 @@ mod test { // Test 2: non-nullable field (behavior should be the same with safe casting) let non_nullable_field = Arc::new(Field::new("result", DataType::Int32, false)); let options_non_nullable = GetOptions { - path: VariantPath::from_str_unchecked("x"), + path: VariantPath::from_str_or_panic("x"), as_type: Some(non_nullable_field.clone()), cast_options: CastOptions::default(), // safe=true by default }; @@ -3072,7 +3072,7 @@ mod test { let variant_array = create_comprehensive_nested_shredded_variant(); // Extract "outer" field using path-based variant_get - let path = VariantPath::from_str_unchecked("outer"); + let path = VariantPath::from_str_or_panic("outer"); let inner_field = Field::new("inner", DataType::Int32, true); let result_type = DataType::Struct(Fields::from(vec![inner_field])); @@ -3117,7 +3117,7 @@ mod test { let variant_array = create_comprehensive_nested_shredded_variant(); // Extract "outer.inner" field using path-based variant_get - let path = VariantPath::from_str_unchecked("outer").join("inner"); + let path = VariantPath::from_str_or_panic("outer").join("inner"); let options = GetOptions { path, @@ -4113,7 +4113,7 @@ mod test { let all_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true)); let all_nulls_result = variant_get( &variant_array, - GetOptions::new_with_path(VariantPath::from_str_unchecked("all_nulls")) + GetOptions::new_with_path(VariantPath::from_str_or_panic("all_nulls")) .with_as_type(Some(all_nulls_field_ref)), ) .unwrap(); @@ -4123,7 +4123,7 @@ mod test { let some_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true)); let some_nulls_result = variant_get( &variant_array, - GetOptions::new_with_path(VariantPath::from_str_unchecked("some_nulls")) + GetOptions::new_with_path(VariantPath::from_str_or_panic("some_nulls")) .with_as_type(Some(some_nulls_field_ref)), ) .unwrap(); @@ -4138,7 +4138,7 @@ mod test { )); let struct_result = variant_get( &variant_array, - GetOptions::new_with_path(VariantPath::from_str_unchecked("struct_field")) + GetOptions::new_with_path(VariantPath::from_str_or_panic("struct_field")) .with_as_type(Some(struct_field_ref)), ) .unwrap(); diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs index c5952bbc8e72..8cba06ee6e6d 100644 --- a/parquet-variant/src/path.rs +++ b/parquet-variant/src/path.rs @@ -116,7 +116,7 @@ impl<'a> VariantPath<'a> { /// Parses a path string, panics on invalid input. /// Only use for tests for known-valid input. - pub fn from_str_unchecked(s: &'a str) -> Self { + pub fn from_str_or_panic(s: &'a str) -> Self { VariantPath::try_from(s).unwrap() } } @@ -229,7 +229,7 @@ mod tests { #[test] fn test_variant_path_empty_str() { - let path = VariantPath::from_str_unchecked(""); + let path = VariantPath::from_str_or_panic(""); assert!(path.is_empty()); } @@ -242,7 +242,7 @@ mod tests { #[test] fn test_variant_path_dot_notation_with_array_index() { - let path = VariantPath::from_str_unchecked("city.store.books[3].title"); + let path = VariantPath::from_str_or_panic("city.store.books[3].title"); let expected = VariantPath::try_from("city") .unwrap() From 634f236cfcf88f6598d4126e5465f1c0dff48575 Mon Sep 17 00:00:00 2001 From: klion26 Date: Mon, 9 Feb 2026 20:35:47 +0800 Subject: [PATCH 5/7] rebase main --- parquet-variant-compute/src/variant_get.rs | 30 ++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index e4698e739176..16ecd40d12ae 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -4237,12 +4237,13 @@ mod test { ]; for (request_type, expected) in expectations { - let options = GetOptions::new_with_path(VariantPath::from("outer").join("list")) - .with_as_type(Some(FieldRef::from(Field::new( - "result", - request_type.clone(), - true, - )))); + let options = + GetOptions::new_with_path(VariantPath::from_str_or_panic("outer").join("list")) + .with_as_type(Some(FieldRef::from(Field::new( + "result", + request_type.clone(), + true, + )))); let result = variant_get(&variant_array, options).unwrap(); assert_eq!(result.data_type(), expected.data_type()); @@ -4254,13 +4255,16 @@ mod test { (1, vec![None, None]), (2, vec![Some(3), None]), ] { - let index_options = - GetOptions::new_with_path(VariantPath::from("outer").join("list").join(idx)) - .with_as_type(Some(FieldRef::from(Field::new( - "result", - DataType::Int64, - true, - )))); + let index_options = GetOptions::new_with_path( + VariantPath::from_str_or_panic("outer") + .join("list") + .join(idx), + ) + .with_as_type(Some(FieldRef::from(Field::new( + "result", + DataType::Int64, + true, + )))); let index_result = variant_get(&variant_array, index_options).unwrap(); let index_expected: ArrayRef = Arc::new(Int64Array::from(expected)); assert_eq!(&index_result, &index_expected); From 7a1d4476ae2d70fb42e3977f7cf645be671447b3 Mon Sep 17 00:00:00 2001 From: klion26 Date: Thu, 12 Feb 2026 23:14:09 +0800 Subject: [PATCH 6/7] address comment --- parquet-variant-compute/src/shred_variant.rs | 130 +++++++++++-------- parquet-variant-compute/src/variant_get.rs | 67 +++++----- parquet-variant/src/path.rs | 10 +- 3 files changed, 110 insertions(+), 97 deletions(-) diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index 2ebf20460f01..5fba2fed3cba 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -493,24 +493,26 @@ impl IntoShreddingField for (DataType, bool) { /// use parquet_variant::{VariantPath, VariantPathElement}; /// use parquet_variant_compute::ShreddedSchemaBuilder; /// -/// // Define the shredding schema using the builder -/// let shredding_type = ShreddedSchemaBuilder::default() +/// fn main() -> Result<(), arrow::error::ArrowError> { +/// // Define the shredding schema using the builder +/// let shredding_type = ShreddedSchemaBuilder::default() /// // store the "time" field as a separate UTC timestamp -/// .with_path(VariantPath::from_str_or_panic("time"), (&DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())), true)) +/// .with_path("time", (&DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())), true))? /// // store hostname as non-nullable Utf8 -/// .with_path(VariantPath::from_str_or_panic("hostname"), (&DataType::Utf8, false)) +/// .with_path("hostname", (&DataType::Utf8, false))? /// // pass a FieldRef directly /// .with_path( -/// VariantPath::from_str_or_panic("metadata.trace_id"), +/// "metadata.trace_id", /// Arc::new(Field::new("trace_id", DataType::FixedSizeBinary(16), false)), -/// ) +/// )? /// // field name with a dot: use VariantPath to avoid splitting /// .with_path( /// VariantPath::from_iter([VariantPathElement::from("metrics.cpu")]), /// &DataType::Float64, -/// ) +/// )? /// .build(); -/// +/// Ok(()) +/// } /// // The shredding_type can now be passed to shred_variant: /// // let shredded = shred_variant(&input, &shredding_type)?; /// ``` @@ -536,14 +538,17 @@ impl ShreddedSchemaBuilder { /// * `path` - Anything convertible to [`VariantPath`] /// * `field` - Anything convertible via [`IntoShreddingField`] (e.g. `FieldRef`, /// `&DataType`, or `(&DataType, bool)` to control nullability) - pub fn with_path<'a, P, F>(mut self, path: P, field: F) -> Self + pub fn with_path<'a, P, F>(mut self, path: P, field: F) -> Result where - P: Into>, + P: TryInto>, + P::Error: std::fmt::Debug, F: IntoShreddingField, { - let path: VariantPath<'a> = path.into(); + let path: VariantPath<'a> = path + .try_into() + .map_err(|e| ArrowError::InvalidArgumentError(format!("{:?}", e)))?; self.root.insert_path(&path, field.into_shredding_field()); - self + Ok(self) } /// Build the final [`DataType`]. @@ -1558,7 +1563,7 @@ mod tests { } #[test] - fn test_object_shredding_comprehensive() { + fn test_object_shredding_comprehensive() -> Result<()> { let input = build_variant_array(vec![ // Row 0: Fully shredded object VariantRow::Object(vec![ @@ -1596,8 +1601,8 @@ mod tests { // Create target schema: struct // Both types are supported for shredding let target_schema = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_or_panic("score"), &DataType::Float64) - .with_path(VariantPath::from_str_or_panic("age"), &DataType::Int64) + .with_path("score", &DataType::Float64)? + .with_path("age", &DataType::Int64)? .build(); let result = shred_variant(&input, &target_schema).unwrap(); @@ -1903,6 +1908,7 @@ mod tests { }), }), ); + Ok(()) } #[test] @@ -1998,7 +2004,7 @@ mod tests { } #[test] - fn test_object_different_schemas() { + fn test_object_different_schemas() -> Result<()> { // Create object with multiple fields let input = build_variant_array(vec![VariantRow::Object(vec![ ("id", VariantValue::from(123i32)), @@ -2008,7 +2014,7 @@ mod tests { // Test with schema containing only id field let schema1 = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_or_panic("id"), &DataType::Int32) + .with_path("id", &DataType::Int32)? .build(); let result1 = shred_variant(&input, &schema1).unwrap(); let value_field1 = result1.value_field().unwrap(); @@ -2016,8 +2022,8 @@ mod tests { // Test with schema containing id and age fields let schema2 = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_or_panic("id"), &DataType::Int32) - .with_path(VariantPath::from_str_or_panic("age"), &DataType::Int64) + .with_path("id", &DataType::Int32)? + .with_path("age", &DataType::Int64)? .build(); let result2 = shred_variant(&input, &schema2).unwrap(); let value_field2 = result2.value_field().unwrap(); @@ -2025,17 +2031,19 @@ mod tests { // Test with schema containing all fields let schema3 = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_or_panic("id"), &DataType::Int32) - .with_path(VariantPath::from_str_or_panic("age"), &DataType::Int64) - .with_path(VariantPath::from_str_or_panic("score"), &DataType::Float64) + .with_path("id", &DataType::Int32)? + .with_path("age", &DataType::Int64)? + .with_path("score", &DataType::Float64)? .build(); let result3 = shred_variant(&input, &schema3).unwrap(); let value_field3 = result3.value_field().unwrap(); assert!(value_field3.is_null(0)); // fully shredded, no remaining fields + + Ok(()) } #[test] - fn test_uuid_shredding_in_objects() { + fn test_uuid_shredding_in_objects() -> Result<()> { let mock_uuid_1 = Uuid::new_v4(); let mock_uuid_2 = Uuid::new_v4(); let mock_uuid_3 = Uuid::new_v4(); @@ -2069,14 +2077,8 @@ mod tests { ]); let target_schema = ShreddedSchemaBuilder::default() - .with_path( - VariantPath::from_str_or_panic("id"), - DataType::FixedSizeBinary(16), - ) - .with_path( - VariantPath::from_str_or_panic("session_id"), - DataType::FixedSizeBinary(16), - ) + .with_path("id", DataType::FixedSizeBinary(16))? + .with_path("session_id", DataType::FixedSizeBinary(16))? .build(); let result = shred_variant(&input, &target_schema).unwrap(); @@ -2207,6 +2209,8 @@ mod tests { // Row 5: Null assert!(result.is_null(5)); + + Ok(()) } #[test] @@ -2257,10 +2261,10 @@ mod tests { } #[test] - fn test_variant_schema_builder_simple() { + fn test_variant_schema_builder_simple() -> Result<()> { let shredding_type = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_or_panic("a"), &DataType::Int64) - .with_path(VariantPath::from_str_or_panic("b"), &DataType::Float64) + .with_path("a", &DataType::Int64)? + .with_path("b", &DataType::Float64)? .build(); assert_eq!( @@ -2270,14 +2274,16 @@ mod tests { Field::new("b", DataType::Float64, true), ])) ); + + Ok(()) } #[test] - fn test_variant_schema_builder_nested() { + fn test_variant_schema_builder_nested() -> Result<()> { let shredding_type = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_or_panic("a"), &DataType::Int64) - .with_path(VariantPath::from_str_or_panic("b.c"), &DataType::Utf8) - .with_path(VariantPath::from_str_or_panic("b.d"), &DataType::Float64) + .with_path("a", &DataType::Int64)? + .with_path("b.c", &DataType::Utf8)? + .with_path("b.d", &DataType::Float64)? .build(); assert_eq!( @@ -2294,13 +2300,15 @@ mod tests { ), ])) ); + + Ok(()) } #[test] - fn test_variant_schema_builder_with_path_variant_path_arg() { + fn test_variant_schema_builder_with_path_variant_path_arg() -> Result<()> { let path = VariantPath::from_iter([VariantPathElement::from("a.b")]); let shredding_type = ShreddedSchemaBuilder::default() - .with_path(path, &DataType::Int64) + .with_path(path, &DataType::Int64)? .build(); match shredding_type { @@ -2311,19 +2319,18 @@ mod tests { } _ => panic!("expected struct data type"), } + + Ok(()) } #[test] - fn test_variant_schema_builder_custom_nullability() { + fn test_variant_schema_builder_custom_nullability() -> Result<()> { let shredding_type = ShreddedSchemaBuilder::default() .with_path( - VariantPath::from_str_or_panic("foo"), + "foo", Arc::new(Field::new("should_be_renamed", DataType::Utf8, false)), - ) - .with_path( - VariantPath::from_str_or_panic("bar"), - (&DataType::Int64, false), - ) + )? + .with_path("bar", (&DataType::Int64, false))? .build(); let DataType::Struct(fields) = shredding_type else { @@ -2337,10 +2344,12 @@ mod tests { let bar = fields.iter().find(|f| f.name() == "bar").unwrap(); assert_eq!(bar.data_type(), &DataType::Int64); assert!(!bar.is_nullable()); + + Ok(()) } #[test] - fn test_variant_schema_builder_with_shred_variant() { + fn test_variant_schema_builder_with_shred_variant() -> Result<()> { let input = build_variant_array(vec![ VariantRow::Object(vec![ ("time", VariantValue::from(1234567890i64)), @@ -2355,8 +2364,8 @@ mod tests { ]); let shredding_type = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_or_panic("time"), &DataType::Int64) - .with_path(VariantPath::from_str_or_panic("hostname"), &DataType::Utf8) + .with_path("time", &DataType::Int64)? + .with_path("hostname", &DataType::Utf8)? .build(); let result = shred_variant(&input, &shredding_type).unwrap(); @@ -2433,13 +2442,15 @@ mod tests { // Row 2 assert!(result.is_null(2)); + + Ok(()) } #[test] - fn test_variant_schema_builder_conflicting_path() { + fn test_variant_schema_builder_conflicting_path() -> Result<()> { let shredding_type = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_or_panic("a"), &DataType::Int64) - .with_path(VariantPath::from_str_or_panic("a"), &DataType::Float64) + .with_path("a", &DataType::Int64)? + .with_path("a", &DataType::Float64)? .build(); assert_eq!( @@ -2448,25 +2459,30 @@ mod tests { vec![Field::new("a", DataType::Float64, true),] )) ); + + Ok(()) } #[test] - fn test_variant_schema_builder_root_path() { + fn test_variant_schema_builder_root_path() -> Result<()> { let path = VariantPath::new(vec![]); let shredding_type = ShreddedSchemaBuilder::default() - .with_path(path, &DataType::Int64) + .with_path(path, &DataType::Int64)? .build(); assert_eq!(shredding_type, DataType::Int64); + + Ok(()) } #[test] - fn test_variant_schema_builder_empty_path() { + fn test_variant_schema_builder_empty_path() -> Result<()> { let shredding_type = ShreddedSchemaBuilder::default() - .with_path(VariantPath::from_str_or_panic(""), &DataType::Int64) + .with_path("", &DataType::Int64)? .build(); assert_eq!(shredding_type, DataType::Int64); + Ok(()) } #[test] diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 16ecd40d12ae..f9985084cc49 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -390,7 +390,7 @@ mod test { fn get_primitive_variant_field() { single_variant_get_test( r#"{"some_field": 1234}"#, - VariantPath::from_str_or_panic("some_field"), + VariantPath::try_from("some_field").unwrap(), "1234", ); } @@ -404,7 +404,9 @@ mod test { fn get_primitive_variant_inside_object_of_object() { single_variant_get_test( r#"{"top_level_field": {"inner_field": 1234}}"#, - VariantPath::from_str_or_panic("top_level_field").join("inner_field"), + VariantPath::try_from("top_level_field") + .unwrap() + .join("inner_field"), "1234", ); } @@ -422,7 +424,7 @@ mod test { fn get_primitive_variant_inside_object_of_list() { single_variant_get_test( r#"{"some_field": [1234]}"#, - VariantPath::from_str_or_panic("some_field").join(0), + VariantPath::try_from("some_field").unwrap().join(0), "1234", ); } @@ -431,7 +433,7 @@ mod test { fn get_complex_variant() { single_variant_get_test( r#"{"top_level_field": {"inner_field": 1234}}"#, - VariantPath::from_str_or_panic("top_level_field"), + VariantPath::try_from("top_level_field").unwrap(), r#"{"inner_field": 1234}"#, ); } @@ -1818,7 +1820,7 @@ mod test { let array = shredded_object_with_x_field_variant_array(); // Test: Extract the "x" field as VariantArray first - let options = GetOptions::new_with_path(VariantPath::from_str_or_panic("x")); + let options = GetOptions::new_with_path(VariantPath::try_from("x").unwrap()); let result = variant_get(&array, options).unwrap(); let result_variant = VariantArray::try_new(&result).unwrap(); @@ -1837,7 +1839,7 @@ mod test { // Test: Extract the "x" field as Int32Array (type conversion) let field = Field::new("x", DataType::Int32, false); - let options = GetOptions::new_with_path(VariantPath::from_str_or_panic("x")) + let options = GetOptions::new_with_path(VariantPath::try_from("x").unwrap()) .with_as_type(Some(FieldRef::from(field))); let result = variant_get(&array, options).unwrap(); @@ -1930,11 +1932,11 @@ mod test { // Check: How does VariantPath parse different strings? println!("Testing path parsing:"); - let path_x = VariantPath::from_str_or_panic("x"); + let path_x = VariantPath::try_from("x").unwrap(); let elements_x: Vec<_> = path_x.iter().collect(); println!(" 'x' -> {} elements: {:?}", elements_x.len(), elements_x); - let path_ax = VariantPath::from_str_or_panic("a.x"); + let path_ax = VariantPath::try_from("a.x").unwrap(); let elements_ax: Vec<_> = path_ax.iter().collect(); println!( " 'a.x' -> {} elements: {:?}", @@ -1942,7 +1944,7 @@ mod test { elements_ax ); - let path_ax_alt = VariantPath::from_str_or_panic("$.a.x"); + let path_ax_alt = VariantPath::try_from("$.a.x").unwrap(); let elements_ax_alt: Vec<_> = path_ax_alt.iter().collect(); println!( " '$.a.x' -> {} elements: {:?}", @@ -1950,10 +1952,10 @@ mod test { elements_ax_alt ); - let path_nested = VariantPath::from_str_or_panic("a").join("x"); + let path_nested = VariantPath::try_from("a").unwrap().join("x"); let elements_nested: Vec<_> = path_nested.iter().collect(); println!( - " VariantPath::from_str_unchecked('a').join('x') -> {} elements: {:?}", + " VariantPath::try_from('a').unwrap().join('x') -> {} elements: {:?}", elements_nested.len(), elements_nested ); @@ -1962,7 +1964,7 @@ mod test { let array = shredded_object_with_x_field_variant_array(); // Test if variant_get with REAL nested path throws not implemented error - let real_nested_path = VariantPath::from_str_or_panic("a").join("x"); + let real_nested_path = VariantPath::try_from("a").unwrap().join("x"); let options = GetOptions::new_with_path(real_nested_path); let result = variant_get(&array, options); @@ -1995,7 +1997,7 @@ mod test { let unshredded_array = create_depth_0_test_data(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_or_panic("x"); + let path = VariantPath::try_from("x").unwrap(); let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&unshredded_array, options).unwrap(); @@ -2011,7 +2013,7 @@ mod test { let shredded_array = create_depth_0_shredded_test_data_simple(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_or_panic("x"); + let path = VariantPath::try_from("x").unwrap(); let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&shredded_array, options).unwrap(); @@ -2033,7 +2035,7 @@ mod test { let unshredded_array = create_nested_path_test_data(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_or_panic("a.x"); // Dot notation! + let path = VariantPath::try_from("a.x").unwrap(); // Dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&unshredded_array, options).unwrap(); @@ -2048,7 +2050,7 @@ mod test { let shredded_array = create_depth_1_shredded_test_data_working(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_or_panic("a.x"); // Dot notation! + let path = VariantPath::try_from("a.x").unwrap(); // Dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&shredded_array, options).unwrap(); @@ -2070,7 +2072,7 @@ mod test { let unshredded_array = create_depth_2_test_data(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_or_panic("a.b.x"); // Double nested dot notation! + let path = VariantPath::try_from("a.b.x").unwrap(); // Double nested dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&unshredded_array, options).unwrap(); @@ -2086,7 +2088,7 @@ mod test { let shredded_array = create_depth_2_shredded_test_data_working(); let field = Field::new("result", DataType::Int32, true); - let path = VariantPath::from_str_or_panic("a.b.x"); // Double nested dot notation! + let path = VariantPath::try_from("a.b.x").unwrap(); // Double nested dot notation! let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))); let result = variant_get(&shredded_array, options).unwrap(); @@ -2108,7 +2110,7 @@ mod test { let array = shredded_object_with_x_field_variant_array(); // Test: Extract the "x" field (single level) - this works - let single_path = VariantPath::from_str_or_panic("x"); + let single_path = VariantPath::try_from("x").unwrap(); let field = Field::new("result", DataType::Int32, true); let options = GetOptions::new_with_path(single_path).with_as_type(Some(FieldRef::from(field))); @@ -2117,7 +2119,7 @@ mod test { println!("Single path 'x' works - result: {:?}", result); // Test: Try nested path "a.x" - this is what we need to implement - let nested_path = VariantPath::from_str_or_panic("a").join("x"); + let nested_path = VariantPath::try_from("a").unwrap().join("x"); let field = Field::new("result", DataType::Int32, true); let options = GetOptions::new_with_path(nested_path).with_as_type(Some(FieldRef::from(field))); @@ -2584,7 +2586,7 @@ mod test { // Try to access a field with safe cast options (should return NULLs) let safe_options = GetOptions { - path: VariantPath::from_str_or_panic("nonexistent_field"), + path: VariantPath::try_from("nonexistent_field").unwrap(), as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), cast_options: CastOptions::default(), // safe = true }; @@ -2601,7 +2603,7 @@ mod test { // Try to access a field with strict cast options (should error) let strict_options = GetOptions { - path: VariantPath::from_str_or_panic("nonexistent_field"), + path: VariantPath::try_from("nonexistent_field").unwrap(), as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), cast_options: CastOptions { safe: false, @@ -2710,7 +2712,7 @@ mod test { // 2. The "a" field's typed_value nulls // 3. The "x" field's typed_value nulls let options = GetOptions { - path: VariantPath::from_str_or_panic("a.x"), + path: VariantPath::try_from("a.x").unwrap(), as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))), cast_options: CastOptions::default(), }; @@ -2844,7 +2846,7 @@ mod test { // Test 1: nullable field (should allow nulls from cast failures) let nullable_field = Arc::new(Field::new("result", DataType::Int32, true)); let options_nullable = GetOptions { - path: VariantPath::from_str_or_panic("x"), + path: VariantPath::try_from("x").unwrap(), as_type: Some(nullable_field.clone()), cast_options: CastOptions::default(), }; @@ -2897,7 +2899,7 @@ mod test { // Test 2: non-nullable field (behavior should be the same with safe casting) let non_nullable_field = Arc::new(Field::new("result", DataType::Int32, false)); let options_non_nullable = GetOptions { - path: VariantPath::from_str_or_panic("x"), + path: VariantPath::try_from("x").unwrap(), as_type: Some(non_nullable_field.clone()), cast_options: CastOptions::default(), // safe=true by default }; @@ -3072,7 +3074,7 @@ mod test { let variant_array = create_comprehensive_nested_shredded_variant(); // Extract "outer" field using path-based variant_get - let path = VariantPath::from_str_or_panic("outer"); + let path = VariantPath::try_from("outer").unwrap(); let inner_field = Field::new("inner", DataType::Int32, true); let result_type = DataType::Struct(Fields::from(vec![inner_field])); @@ -3117,7 +3119,7 @@ mod test { let variant_array = create_comprehensive_nested_shredded_variant(); // Extract "outer.inner" field using path-based variant_get - let path = VariantPath::from_str_or_panic("outer").join("inner"); + let path = VariantPath::try_from("outer").unwrap().join("inner"); let options = GetOptions { path, @@ -4113,7 +4115,7 @@ mod test { let all_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true)); let all_nulls_result = variant_get( &variant_array, - GetOptions::new_with_path(VariantPath::from_str_or_panic("all_nulls")) + GetOptions::new_with_path(VariantPath::try_from("all_nulls").unwrap()) .with_as_type(Some(all_nulls_field_ref)), ) .unwrap(); @@ -4123,7 +4125,7 @@ mod test { let some_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true)); let some_nulls_result = variant_get( &variant_array, - GetOptions::new_with_path(VariantPath::from_str_or_panic("some_nulls")) + GetOptions::new_with_path(VariantPath::try_from("some_nulls").unwrap()) .with_as_type(Some(some_nulls_field_ref)), ) .unwrap(); @@ -4138,7 +4140,7 @@ mod test { )); let struct_result = variant_get( &variant_array, - GetOptions::new_with_path(VariantPath::from_str_or_panic("struct_field")) + GetOptions::new_with_path(VariantPath::try_from("struct_field").unwrap()) .with_as_type(Some(struct_field_ref)), ) .unwrap(); @@ -4238,7 +4240,7 @@ mod test { for (request_type, expected) in expectations { let options = - GetOptions::new_with_path(VariantPath::from_str_or_panic("outer").join("list")) + GetOptions::new_with_path(VariantPath::try_from("outer").unwrap().join("list")) .with_as_type(Some(FieldRef::from(Field::new( "result", request_type.clone(), @@ -4256,7 +4258,8 @@ mod test { (2, vec![Some(3), None]), ] { let index_options = GetOptions::new_with_path( - VariantPath::from_str_or_panic("outer") + VariantPath::try_from("outer") + .unwrap() .join("list") .join(idx), ) diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs index 8cba06ee6e6d..fe10d0451d54 100644 --- a/parquet-variant/src/path.rs +++ b/parquet-variant/src/path.rs @@ -113,12 +113,6 @@ impl<'a> VariantPath<'a> { pub fn is_empty(&self) -> bool { self.0.is_empty() } - - /// Parses a path string, panics on invalid input. - /// Only use for tests for known-valid input. - pub fn from_str_or_panic(s: &'a str) -> Self { - VariantPath::try_from(s).unwrap() - } } impl<'a> From>> for VariantPath<'a> { @@ -229,7 +223,7 @@ mod tests { #[test] fn test_variant_path_empty_str() { - let path = VariantPath::from_str_or_panic(""); + let path = VariantPath::try_from("").unwrap(); assert!(path.is_empty()); } @@ -242,7 +236,7 @@ mod tests { #[test] fn test_variant_path_dot_notation_with_array_index() { - let path = VariantPath::from_str_or_panic("city.store.books[3].title"); + let path = VariantPath::try_from("city.store.books[3].title").unwrap(); let expected = VariantPath::try_from("city") .unwrap() From bf43f7f8ae6e5249c0fd88385a222f25d19b5ad9 Mon Sep 17 00:00:00 2001 From: klion26 Date: Fri, 13 Feb 2026 00:11:02 +0800 Subject: [PATCH 7/7] add comment back --- parquet-variant-compute/src/shred_variant.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index 5fba2fed3cba..c60c602baa37 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -535,7 +535,7 @@ impl ShreddedSchemaBuilder { /// /// # Arguments /// - /// * `path` - Anything convertible to [`VariantPath`] + /// * `path` - Anything convertible to [`VariantPath`] (e.g., a `&str`) /// * `field` - Anything convertible via [`IntoShreddingField`] (e.g. `FieldRef`, /// `&DataType`, or `(&DataType, bool)` to control nullability) pub fn with_path<'a, P, F>(mut self, path: P, field: F) -> Result