diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs index e222c3ac9ccb..2aeb9df97d82 100644 --- a/parquet-variant/src/path.rs +++ b/parquet-variant/src/path.rs @@ -16,6 +16,8 @@ // under the License. use std::{borrow::Cow, ops::Deref}; +use crate::utils::parse_path; + /// Represents a qualified path to a potential subfield or index of a variant /// value. /// @@ -112,11 +114,7 @@ impl<'a> From>> for VariantPath<'a> { /// Create from &str with support for dot notation impl<'a> From<&'a str> for VariantPath<'a> { fn from(path: &'a str) -> Self { - if path.is_empty() { - VariantPath::new(vec![]) - } else { - VariantPath::new(path.split('.').map(Into::into).collect()) - } + VariantPath::new(path.split(".").flat_map(parse_path).collect()) } } @@ -223,4 +221,35 @@ mod tests { let path = VariantPath::from_iter([p]); assert!(!path.is_empty()); } + + #[test] + fn test_variant_path_dot_notation_with_array_index() { + let path = VariantPath::from("city.store.books[3].title"); + + let expected = VariantPath::from("city") + .join("store") + .join("books") + .join(3) + .join("title"); + + assert_eq!(path, expected); + } + + #[test] + fn test_variant_path_dot_notation_with_only_array_index() { + let path = VariantPath::from("[3]"); + + let expected = VariantPath::from(3); + + assert_eq!(path, expected); + } + + #[test] + fn test_variant_path_dot_notation_with_starting_array_index() { + let path = VariantPath::from("[3].title"); + + let expected = VariantPath::from(3).join("title"); + + assert_eq!(path, expected); + } } diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs index d28b8685baa2..6accbcb36649 100644 --- a/parquet-variant/src/utils.rs +++ b/parquet-variant/src/utils.rs @@ -16,6 +16,7 @@ // under the License. use std::{array::TryFromSliceError, ops::Range, str}; +use crate::VariantPathElement; use arrow_schema::ArrowError; use std::cmp::Ordering; @@ -149,6 +150,38 @@ pub(crate) fn fits_precision(n: impl Into) -> bool { n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N) } +// Helper fn to parse input segments like foo[0] or foo[0][0] +#[inline] +pub(crate) fn parse_path<'a>(segment: &'a str) -> Vec> { + if segment.is_empty() { + return Vec::new(); + } + + let mut path_elements = Vec::new(); + let mut base = segment; + + while let Some(stripped) = base.strip_suffix(']') { + let Some(open_pos) = stripped.rfind('[') else { + return vec![VariantPathElement::field(segment)]; + }; + + let index_str = &stripped[open_pos + 1..]; + let Ok(index) = index_str.parse::() else { + return vec![VariantPathElement::field(segment)]; + }; + + path_elements.push(VariantPathElement::index(index)); + base = &stripped[..open_pos]; + } + + if !base.is_empty() { + path_elements.push(VariantPathElement::field(base)); + } + + path_elements.reverse(); + path_elements +} + #[cfg(test)] mod test { use super::*;