xmpp-rs/xso-proc/src/state.rs

657 lines
24 KiB
Rust
Raw Normal View History

// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
//! State machines for parsing and serialising of structs and enums.
use proc_macro2::TokenStream;
use quote::{quote, ToTokens};
use syn::*;
/// A single state in a parser or serializer state machine.
pub(crate) struct State {
/// Name of the state enum variant for this state.
name: Ident,
/// Declaration of members of the state enum in this state.
decl: TokenStream,
/// Destructuring of members of the state enum in this state.
destructure: TokenStream,
/// Right-hand-side of the match arm for this state.
advance_body: TokenStream,
}
impl State {
/// Create a new state with the a builder data field.
///
/// This is a convenience wrapper around `new()` and `add_field()`. This
/// wrapper, or its equivalent, **must** be used for states used in
/// [`FromEventsStateMachine`] state machines, as those expect that the
/// first field is the builder data at render time.
pub(crate) fn new_with_builder(
name: Ident,
builder_data_ident: &Ident,
builder_data_ty: &Type,
) -> Self {
let mut result = Self::new(name);
result.add_field(builder_data_ident, builder_data_ty);
result
}
/// Create a new, empty state.
///
/// Note that an empty state will generate invalid code. At the very
/// least, a body must be added using [`Self::set_impl`] or
/// [`Self::with_impl`]. The various state machines may also have
/// additional requirements.
pub(crate) fn new(name: Ident) -> Self {
Self {
name,
decl: TokenStream::default(),
destructure: TokenStream::default(),
advance_body: TokenStream::default(),
}
}
/// Add a field to this state's data.
///
/// - `name` is the name under which the data will be accessible in the
/// state's implementation.
/// - `ty` must be the data field's type.
pub(crate) fn add_field(&mut self, name: &Ident, ty: &Type) {
self.decl.extend(quote! { #name: #ty, });
self.destructure.extend(quote! { #name, });
}
/// Modify the state to include another field and return the modified
/// state.
///
/// This is a consume-and-return-style version of [`Self::add_field`].
pub(crate) fn with_field(mut self, name: &Ident, ty: &Type) -> Self {
self.add_field(name, ty);
self
}
/// Set the `advance` implementation of this state.
///
/// `body` must be the body of the right hand side of the match arm for
/// the `advance` implementation of the state machine.
///
/// See [`FromEventsStateMachine::advance_match_arms`] and
/// [`IntoEventsSubmachine::compile`] for the respective
/// requirements on the implementations.
pub(crate) fn with_impl(mut self, body: TokenStream) -> Self {
self.advance_body = body;
self
}
}
/// A partial [`FromEventsStateMachine`] which only covers the builder for a
/// single compound.
///
/// See [`FromEventsStateMachine`] for more information on the state machines
/// in general.
pub(crate) struct FromEventsSubmachine {
/// Additional items necessary for the statemachine.
pub(crate) defs: TokenStream,
/// States and state transition implementations.
pub(crate) states: Vec<State>,
/// Initializer expression.
///
/// This expression must evaluate to a
/// `Result<#state_ty_ident, xso::FromEventsError>`.
pub(crate) init: TokenStream,
}
impl FromEventsSubmachine {
/// Convert a partial state machine into a full state machine.
///
/// This converts the abstract [`State`] items into token
/// streams for the respective parts of the state machine (the state
/// definitions and the match arms), rendering them effectively immutable.
pub(crate) fn compile(self) -> FromEventsStateMachine {
let mut state_defs = TokenStream::default();
let mut advance_match_arms = TokenStream::default();
for state in self.states {
let State {
name,
decl,
destructure,
advance_body,
} = state;
state_defs.extend(quote! {
#name { #decl },
});
// XXX: nasty hack, but works: the first member of the enum always
// exists and it always is the builder data, which we always need
// mutably available. So we can just prefix the destructuring
// token stream with `mut` to make that first member mutable.
advance_match_arms.extend(quote! {
Self::#name { mut #destructure } => {
#advance_body
}
});
}
FromEventsStateMachine {
defs: self.defs,
state_defs,
advance_match_arms,
variants: vec![FromEventsEntryPoint { init: self.init }],
}
}
/// Update the [`init`][`Self::init`] field in-place.
///
/// The function will receive a reference to the current `init` value,
/// allowing to create "wrappers" around that existing code.
pub(crate) fn with_augmented_init<F: FnOnce(&TokenStream) -> TokenStream>(
mut self,
f: F,
) -> Self {
let new_init = f(&self.init);
self.init = new_init;
self
}
}
/// A partial [`IntoEventsStateMachine`] which only covers the builder for a
/// single compound.
///
/// See [`IntoEventsStateMachine`] for more information on the state machines
/// in general.
pub(crate) struct IntoEventsSubmachine {
/// Additional items necessary for the statemachine.
pub(crate) defs: TokenStream,
/// States and state transition implementations.
pub(crate) states: Vec<State>,
/// A pattern match which destructures the target type into its parts, for
/// use by `init`.
pub(crate) destructure: TokenStream,
/// An expression which uses the names bound in `destructure` to create a
/// an instance of the state enum.
///
/// The state enum type is available as `Self` in that context.
pub(crate) init: TokenStream,
}
impl IntoEventsSubmachine {
/// Convert a partial state machine into a full state machine.
///
/// This converts the abstract [`State`] items into token
/// streams for the respective parts of the state machine (the state
/// definitions and the match arms), rendering them effectively immutable.
///
/// This requires that the [`State::advance_body`] token streams evaluate
/// to an `Option<rxml::Event>`. If it evaluates to `Some(.)`, that is
/// emitted from the iterator. If it evaluates to `None`, the `advance`
/// implementation is called again.
///
/// Each state implementation is augmented to also enter the next state,
/// causing the iterator to terminate eventually.
pub(crate) fn compile(self) -> IntoEventsStateMachine {
let mut state_defs = TokenStream::default();
let mut advance_match_arms = TokenStream::default();
for (i, state) in self.states.iter().enumerate() {
let State {
ref name,
ref decl,
ref destructure,
ref advance_body,
} = state;
let footer = match self.states.get(i + 1) {
Some(State {
name: ref next_name,
destructure: ref construct_next,
..
}) => {
quote! {
::core::result::Result::Ok((::core::option::Option::Some(Self::#next_name { #construct_next }), event))
}
}
// final state -> exit the state machine
None => {
quote! {
::core::result::Result::Ok((::core::option::Option::None, event))
}
}
};
state_defs.extend(quote! {
#name { #decl },
});
advance_match_arms.extend(quote! {
Self::#name { #destructure } => {
let event = #advance_body;
#footer
}
});
}
IntoEventsStateMachine {
defs: self.defs,
state_defs,
advance_match_arms,
variants: vec![IntoEventsEntryPoint {
init: self.init,
destructure: self.destructure,
}],
}
}
/// Update the [`init`][`Self::init`] field in-place.
///
/// The function will receive a reference to the current `init` value,
/// allowing to create "wrappers" around that existing code.
pub(crate) fn with_augmented_init<F: FnOnce(&TokenStream) -> TokenStream>(
mut self,
f: F,
) -> Self {
let new_init = f(&self.init);
self.init = new_init;
self
}
}
/// Container for a single entrypoint into a [`FromEventsStateMachine`].
pub(crate) struct FromEventsEntryPoint {
pub(crate) init: TokenStream,
}
/// A single variant's entrypoint into the event iterator.
pub(crate) struct IntoEventsEntryPoint {
/// A pattern match which destructures the target type into its parts, for
/// use by `init`.
destructure: TokenStream,
/// An expression which uses the names bound in `destructure` to create a
/// an instance of the state enum.
///
/// The state enum type is available as `Self` in that context.
init: TokenStream,
}
/// # State machine to implement `xso::FromEventsBuilder`
///
/// This struct represents a state machine consisting of the following parts:
///
/// - Extra dependencies ([`Self::defs`])
/// - States ([`Self::state_defs`])
/// - Transitions ([`Self::advance_match_arms`])
/// - Entrypoints ([`Self::variants`])
///
/// Such a state machine is best constructed by constructing one or
/// more [`FromEventsSubmachine`] structs and converting/merging them using
/// `into()` and [`merge`][`Self::merge`].
///
/// A state machine has an output type (corresponding to
/// `xso::FromEventsBuilder::Output`), which is however only implicitly defined
/// by the expressions generated in the `advance_match_arms`. That means that
/// merging submachines with different output types works, but will then generate
/// code which will fail to compile.
///
/// When converted to Rust code, the state machine will manifest as (among other
/// things) an enum type which contains all states and which has an `advance`
/// method. That method consumes the enum value and returns either a new enum
/// value, an error, or the output type of the state machine.
#[derive(Default)]
pub(crate) struct FromEventsStateMachine {
/// Extra items which are needed for the state machine implementation.
defs: TokenStream,
/// A sequence of enum variant declarations, separated and terminated by
/// commas.
state_defs: TokenStream,
/// A sequence of `match self { .. }` arms, where `self` is the state
/// enumeration type.
///
/// Each match arm must either diverge or evaluate to a
/// `Result<ControlFlow<State, Output>, xso::error::Error>`, where `State`
/// is the state enumeration and `Output` is the state machine's output
/// type.
advance_match_arms: TokenStream,
/// The different entrypoints for the state machine.
///
/// This may only contain more than one element if an enumeration is being
/// constructed by the resulting state machine.
variants: Vec<FromEventsEntryPoint>,
}
impl FromEventsStateMachine {
/// Render the state machine as a token stream.
///
/// The token stream contains the following pieces:
/// - Any definitions necessary for the statemachine to operate
/// - The state enum
/// - The builder struct
/// - The `xso::FromEventsBuilder` impl on the builder struct
/// - A `fn new(rxml::QName, rxml::AttrMap) -> Result<Self>` on the
/// builder struct.
pub(crate) fn render(
self,
vis: &Visibility,
builder_ty_ident: &Ident,
state_ty_ident: &Ident,
output_ty: &Type,
) -> Result<TokenStream> {
let Self {
defs,
state_defs,
advance_match_arms,
variants,
} = self;
let mut init_body = TokenStream::default();
for variant in variants {
let FromEventsEntryPoint { init } = variant;
init_body.extend(quote! {
let (name, mut attrs) = match { { let _ = &mut attrs; } #init } {
::core::result::Result::Ok(v) => return ::core::result::Result::Ok(v),
::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)) => return ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)),
::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs }) => (name, attrs),
};
})
}
let output_ty_ref = make_ty_ref(output_ty);
let docstr = format!("Build a {0} from XML events.\n\nThis type is generated using the [`macro@xso::FromXml`] derive macro and implements [`xso::FromEventsBuilder`] for {0}.", output_ty_ref);
Ok(quote! {
#defs
enum #state_ty_ident {
#state_defs
}
impl #state_ty_ident {
fn advance(mut self, ev: ::xso::exports::rxml::Event) -> ::core::result::Result<::std::ops::ControlFlow<Self, #output_ty>, ::xso::error::Error> {
match self {
#advance_match_arms
}.and_then(|__ok| {
match __ok {
::std::ops::ControlFlow::Break(st) => ::core::result::Result::Ok(::std::ops::ControlFlow::Break(st)),
::std::ops::ControlFlow::Continue(result) => {
::core::result::Result::Ok(::std::ops::ControlFlow::Continue(result))
}
}
})
}
}
impl #builder_ty_ident {
fn new(
name: ::xso::exports::rxml::QName,
attrs: ::xso::exports::rxml::AttrMap,
) -> ::core::result::Result<Self, ::xso::error::FromEventsError> {
#state_ty_ident::new(name, attrs).map(|ok| Self(::core::option::Option::Some(ok)))
}
}
#[doc = #docstr]
#vis struct #builder_ty_ident(::core::option::Option<#state_ty_ident>);
impl ::xso::FromEventsBuilder for #builder_ty_ident {
type Output = #output_ty;
fn feed(&mut self, ev: ::xso::exports::rxml::Event) -> ::core::result::Result<::core::option::Option<Self::Output>, ::xso::error::Error> {
let inner = self.0.take().expect("feed called after completion");
match inner.advance(ev)? {
::std::ops::ControlFlow::Continue(value) => ::core::result::Result::Ok(::core::option::Option::Some(value)),
::std::ops::ControlFlow::Break(st) => {
self.0 = ::core::option::Option::Some(st);
::core::result::Result::Ok(::core::option::Option::None)
}
}
}
}
impl #state_ty_ident {
fn new(
name: ::xso::exports::rxml::QName,
mut attrs: ::xso::exports::rxml::AttrMap,
) -> ::core::result::Result<Self, ::xso::error::FromEventsError> {
#init_body
{ let _ = &mut attrs; }
::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs })
}
}
})
}
}
/// # State machine to implement an `Iterator<Item = rxml::Event>`.
///
/// This struct represents a state machine consisting of the following parts:
///
/// - Extra dependencies ([`Self::defs`])
/// - States ([`Self::state_defs`])
/// - Transitions ([`Self::advance_match_arms`])
/// - Entrypoints ([`Self::variants`])
///
/// Such a state machine is best constructed by constructing one or
/// more [`FromEventsSubmachine`] structs and converting/merging them using
/// `into()` and [`merge`][`Self::merge`].
///
/// A state machine has an output type (corresponding to
/// `xso::FromEventsBuilder::Output`), which is however only implicitly defined
/// by the expressions generated in the `advance_match_arms`. That means that
/// merging submachines with different output types works, but will then generate
/// code which will fail to compile.
///
/// When converted to Rust code, the state machine will manifest as (among other
/// things) an enum type which contains all states and which has an `advance`
/// method. That method consumes the enum value and returns either a new enum
/// value, an error, or the output type of the state machine.
#[derive(Default)]
pub(crate) struct IntoEventsStateMachine {
/// Extra items which are needed for the state machine implementation.
defs: TokenStream,
/// A sequence of enum variant declarations, separated and terminated by
/// commas.
state_defs: TokenStream,
/// A sequence of `match self { .. }` arms, where `self` is the state
/// enumeration type.
///
/// Each match arm must either diverge or evaluate to a
/// `Result<(Option<State>, Option<Event>), xso::error::Error>`, where
/// where `State` is the state enumeration.
///
/// If `Some(.)` is returned for the event, that event is emitted. If
/// `None` is returned for the event, the advance implementation is called
/// again after switching to the state returned in the `Option<State>`
/// field.
///
/// If `None` is returned for the `Option<State>`, the iterator
/// terminates yielding the `Option<Event>` value directly (even if it is
/// `None`). After the iterator has terminated, it yields `None`
/// indefinitely.
advance_match_arms: TokenStream,
/// The different entrypoints for the state machine.
///
/// This may only contain more than one element if an enumeration is being
/// serialised by the resulting state machine.
variants: Vec<IntoEventsEntryPoint>,
}
impl IntoEventsStateMachine {
/// Render the state machine as a token stream.
///
/// The token stream contains the following pieces:
/// - Any definitions necessary for the statemachine to operate
/// - The state enum
/// - The iterator struct
/// - The `Iterator` impl on the builder struct
/// - A `fn new(T) -> Result<Self>` on the iterator struct.
pub(crate) fn render(
self,
vis: &Visibility,
input_ty: &Type,
state_ty_ident: &Ident,
event_iter_ty_ident: &Ident,
) -> Result<TokenStream> {
let Self {
defs,
state_defs,
advance_match_arms,
mut variants,
} = self;
let input_ty_ref = make_ty_ref(input_ty);
let docstr = format!("Convert a {0} into XML events.\n\nThis type is generated using the [`macro@xso::IntoXml`] derive macro and implements [`std::iter:Iterator`] for {0}.", input_ty_ref);
let init_body = if variants.len() == 1 {
let IntoEventsEntryPoint { destructure, init } = variants.remove(0);
quote! {
{
let #destructure = value;
#init
}
}
} else {
let mut match_arms = TokenStream::default();
for IntoEventsEntryPoint { destructure, init } in variants {
match_arms.extend(quote! {
#destructure => #init,
});
}
quote! {
match value {
#match_arms
}
}
};
Ok(quote! {
#defs
enum #state_ty_ident {
#state_defs
}
impl #state_ty_ident {
fn advance(mut self) -> ::core::result::Result<(::core::option::Option<Self>, ::core::option::Option<::xso::exports::rxml::Event>), ::xso::error::Error> {
match self {
#advance_match_arms
}
}
fn new(
value: #input_ty,
) -> ::core::result::Result<Self, ::xso::error::Error> {
::core::result::Result::Ok(#init_body)
}
}
#[doc = #docstr]
#vis struct #event_iter_ty_ident(::core::option::Option<#state_ty_ident>);
impl ::std::iter::Iterator for #event_iter_ty_ident {
type Item = ::core::result::Result<::xso::exports::rxml::Event, ::xso::error::Error>;
fn next(&mut self) -> ::core::option::Option<Self::Item> {
let mut state = self.0.take()?;
loop {
let (next_state, ev) = match state.advance() {
::core::result::Result::Ok(v) => v,
::core::result::Result::Err(e) => return ::core::option::Option::Some(::core::result::Result::Err(e)),
};
if let ::core::option::Option::Some(ev) = ev {
self.0 = next_state;
return ::core::option::Option::Some(::core::result::Result::Ok(ev));
}
// no event, do we have a state?
if let ::core::option::Option::Some(st) = next_state {
// we do: try again!
state = st;
continue;
} else {
// we don't: end of iterator!
self.0 = ::core::option::Option::None;
return ::core::option::Option::None;
}
}
}
}
impl #event_iter_ty_ident {
fn new(value: #input_ty) -> ::core::result::Result<Self, ::xso::error::Error> {
#state_ty_ident::new(value).map(|ok| Self(::core::option::Option::Some(ok)))
}
}
})
}
}
/// Construct a path for an intradoc link from a given type.
fn doc_link_path(ty: &Type) -> Option<String> {
match ty {
Type::Path(ref ty) => {
let (mut buf, offset) = match ty.qself {
Some(ref qself) => {
let mut buf = doc_link_path(&qself.ty)?;
buf.push_str("::");
(buf, qself.position)
}
None => {
let mut buf = String::new();
if ty.path.leading_colon.is_some() {
buf.push_str("::");
}
(buf, 0)
}
};
let last = ty.path.segments.len() - 1;
for i in offset..ty.path.segments.len() {
let segment = &ty.path.segments[i];
buf.push_str(&segment.ident.to_string());
if i < last {
buf.push_str("::");
}
}
Some(buf)
}
_ => None,
}
}
/// Create a markdown snippet which references the given type as cleanly as
/// possible.
///
/// This is used in documentation generation functions.
///
/// Not all types can be linked to; those which cannot be linked to will
/// simply be wrapped in backticks.
fn make_ty_ref(ty: &Type) -> String {
match doc_link_path(ty) {
Some(mut path) => {
path.reserve(4);
path.insert_str(0, "[`");
path.push_str("`]");
path
}
None => format!("`{}`", ty.to_token_stream()),
}
}