From 3b3a4ff0c80d16889299b647e17b2797fd57c5d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Sch=C3=A4fer?= Date: Sat, 2 Mar 2024 10:07:34 +0100 Subject: [PATCH] Do not .clone() Element in code generated with generate_element This should improve performance a little. --- minidom/src/element.rs | 34 +++++++++++++++ parsers/src/util/macros.rs | 87 ++++++++++++++++++++++++++++---------- 2 files changed, 99 insertions(+), 22 deletions(-) diff --git a/minidom/src/element.rs b/minidom/src/element.rs index b1f3ed1..5e20a26 100644 --- a/minidom/src/element.rs +++ b/minidom/src/element.rs @@ -491,6 +491,22 @@ impl Element { } } + /// Returns an iterator over the child Elements, draining the element of + /// all its child nodes **including text!** + /// + /// This is a bit of a footgun, so we make this hidden in the docs (it + /// needs to be pub for macro use). Once `extract_if` + /// ([rust#43244](https://github.com/rust-lang/rust/issues/43244)) + /// is stabilized, we can replace this with a take_children which doesn't + /// remove text nodes. + #[inline] + #[doc(hidden)] + pub fn take_contents_as_children(&mut self) -> ContentsAsChildren { + ContentsAsChildren { + iter: self.children.drain(..), + } + } + /// Returns an iterator over references to every text node of this element. /// /// # Examples @@ -757,6 +773,24 @@ impl<'a> Iterator for ChildrenMut<'a> { } } +/// An iterator over references to child elements of an `Element`. +pub struct ContentsAsChildren<'a> { + iter: std::vec::Drain<'a, Node>, +} + +impl<'a> Iterator for ContentsAsChildren<'a> { + type Item = Element; + + fn next(&mut self) -> Option { + for item in &mut self.iter { + if let Node::Element(child) = item { + return Some(child); + } + } + None + } +} + /// An iterator over references to child text nodes of an `Element`. pub struct Texts<'a> { iter: slice::Iter<'a, Node>, diff --git a/parsers/src/util/macros.rs b/parsers/src/util/macros.rs index f7021d0..996d3e2 100644 --- a/parsers/src/util/macros.rs +++ b/parsers/src/util/macros.rs @@ -486,55 +486,74 @@ macro_rules! start_parse_elem { macro_rules! do_parse { ($elem:ident, Element) => { - $elem.clone() + Ok($elem) }; ($elem:ident, String) => { - $elem.text() + Ok($elem.text()) }; ($elem:ident, $constructor:ident) => { - $constructor::try_from($elem.clone())? + $constructor::try_from($elem) }; } macro_rules! do_parse_elem { ($temp:ident: Vec = $constructor:ident => $elem:ident, $name:tt, $parent_name:tt) => { - $temp.push(do_parse!($elem, $constructor)); + match do_parse!($elem, $constructor) { + Ok(v) => Ok($temp.push(v)), + Err(e) => Err(e), + } }; ($temp:ident: Option = $constructor:ident => $elem:ident, $name:tt, $parent_name:tt) => { if $temp.is_some() { - return Err(crate::util::error::Error::ParseError(concat!( + Err(crate::util::error::Error::ParseError(concat!( "Element ", $parent_name, " must not have more than one ", $name, " child." - ))); + ))) + } else { + match do_parse!($elem, $constructor) { + Ok(v) => { + $temp = Some(v); + Ok(()) + } + Err(e) => Err(e), + } } - $temp = Some(do_parse!($elem, $constructor)); }; ($temp:ident: Required = $constructor:ident => $elem:ident, $name:tt, $parent_name:tt) => { if $temp.is_some() { - return Err(crate::util::error::Error::ParseError(concat!( + Err(crate::util::error::Error::ParseError(concat!( "Element ", $parent_name, " must not have more than one ", $name, " child." - ))); + ))) + } else { + match do_parse!($elem, $constructor) { + Ok(v) => { + $temp = Some(v); + Ok(()) + } + Err(e) => Err(e), + } } - $temp = Some(do_parse!($elem, $constructor)); }; ($temp:ident: Present = $constructor:ident => $elem:ident, $name:tt, $parent_name:tt) => { if $temp { - return Err(crate::util::error::Error::ParseError(concat!( + Err(crate::util::error::Error::ParseError(concat!( "Element ", $parent_name, " must not have more than one ", $name, " child." - ))); + ))) + } else { + $temp = true; + Ok(()) } - $temp = true; }; } @@ -645,19 +664,43 @@ macro_rules! generate_element { impl ::std::convert::TryFrom for $elem { type Error = crate::util::error::Error; - fn try_from(elem: crate::Element) -> Result<$elem, crate::util::error::Error> { + fn try_from(mut elem: crate::Element) -> Result<$elem, crate::util::error::Error> { check_self!(elem, $name, $ns); check_no_unknown_attributes!(elem, $name, [$($attr_name),*]); $( start_parse_elem!($child_ident: $coucou); )* - for _child in elem.children() { + // We must pull the texts out of the Element before we pull + // the children, because take_elements_as_children drains + // *all* child nodes, including texts. + // To deal with the genericity of this macro, we need a local + // struct decl to carry the identifier around. + struct Texts { $( - if generate_child_test!(_child, $child_name, $child_ns) { - do_parse_elem!($child_ident: $coucou = $child_constructor => _child, $child_name, $name); - continue; - } + $text_ident: <$codec as Codec>::Decoded, )* + } + #[allow(unused_variables)] + let texts = Texts { + $( + $text_ident: <$codec>::decode(&elem.text())?, + )* + }; + for _child in elem.take_contents_as_children() { + let residual = _child; + $( + // TODO: get rid of the explicit generate_child_test here + // once !295 has landed. + let residual = if generate_child_test!(residual, $child_name, $child_ns) { + match do_parse_elem!($child_ident: $coucou = $child_constructor => residual, $child_name, $name) { + Ok(()) => continue, + Err(other) => return Err(other), + } + } else { + residual + }; + )* + let _ = residual; return Err(crate::util::error::Error::ParseError(concat!("Unknown child in ", $name, " element."))); } Ok($elem { @@ -668,7 +711,7 @@ macro_rules! generate_element { $child_ident: finish_parse_elem!($child_ident: $coucou = $child_name, $name), )* $( - $text_ident: <$codec>::decode(&elem.text())?, + $text_ident: texts.$text_ident, )* }) } @@ -706,10 +749,10 @@ macro_rules! impl_pubsub_item { impl ::std::convert::TryFrom for $item { type Error = Error; - fn try_from(elem: crate::Element) -> Result<$item, Error> { + fn try_from(mut elem: crate::Element) -> Result<$item, Error> { check_self!(elem, "item", $ns); check_no_unknown_attributes!(elem, "item", ["id", "publisher"]); - let mut payloads = elem.children().cloned().collect::>(); + let mut payloads = elem.take_contents_as_children().collect::>(); let payload = payloads.pop(); if !payloads.is_empty() { return Err(Error::ParseError(