2024-06-22 13:35:56 +00:00
// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
//! State machines for parsing and serialising of structs and enums.
use proc_macro2 ::TokenStream ;
use quote ::{ quote , ToTokens } ;
use syn ::* ;
/// A single state in a parser or serializer state machine.
pub ( crate ) struct State {
/// Name of the state enum variant for this state.
name : Ident ,
/// Declaration of members of the state enum in this state.
decl : TokenStream ,
/// Destructuring of members of the state enum in this state.
destructure : TokenStream ,
/// Right-hand-side of the match arm for this state.
advance_body : TokenStream ,
}
impl State {
/// Create a new state with the a builder data field.
///
/// This is a convenience wrapper around `new()` and `add_field()`. This
/// wrapper, or its equivalent, **must** be used for states used in
/// [`FromEventsStateMachine`] state machines, as those expect that the
/// first field is the builder data at render time.
pub ( crate ) fn new_with_builder (
name : Ident ,
builder_data_ident : & Ident ,
builder_data_ty : & Type ,
) -> Self {
let mut result = Self ::new ( name ) ;
result . add_field ( builder_data_ident , builder_data_ty ) ;
result
}
/// Create a new, empty state.
///
/// Note that an empty state will generate invalid code. At the very
/// least, a body must be added using [`Self::set_impl`] or
/// [`Self::with_impl`]. The various state machines may also have
/// additional requirements.
pub ( crate ) fn new ( name : Ident ) -> Self {
Self {
name ,
decl : TokenStream ::default ( ) ,
destructure : TokenStream ::default ( ) ,
advance_body : TokenStream ::default ( ) ,
}
}
/// Add a field to this state's data.
///
/// - `name` is the name under which the data will be accessible in the
/// state's implementation.
/// - `ty` must be the data field's type.
pub ( crate ) fn add_field ( & mut self , name : & Ident , ty : & Type ) {
self . decl . extend ( quote! { #name : #ty , } ) ;
self . destructure . extend ( quote! { #name , } ) ;
}
/// Modify the state to include another field and return the modified
/// state.
///
/// This is a consume-and-return-style version of [`Self::add_field`].
pub ( crate ) fn with_field ( mut self , name : & Ident , ty : & Type ) -> Self {
self . add_field ( name , ty ) ;
self
}
/// Set the `advance` implementation of this state.
///
/// `body` must be the body of the right hand side of the match arm for
/// the `advance` implementation of the state machine.
///
/// See [`FromEventsStateMachine::advance_match_arms`] and
/// [`IntoEventsSubmachine::compile`] for the respective
/// requirements on the implementations.
pub ( crate ) fn with_impl ( mut self , body : TokenStream ) -> Self {
self . advance_body = body ;
self
}
2024-06-23 07:06:32 +00:00
/// Override the current `advance` implementation of this state.
///
/// This is an in-place version of [`Self::with_impl`].
pub ( crate ) fn set_impl ( & mut self , body : TokenStream ) {
self . advance_body = body ;
}
2024-06-22 13:35:56 +00:00
}
/// A partial [`FromEventsStateMachine`] which only covers the builder for a
/// single compound.
///
/// See [`FromEventsStateMachine`] for more information on the state machines
/// in general.
pub ( crate ) struct FromEventsSubmachine {
/// Additional items necessary for the statemachine.
pub ( crate ) defs : TokenStream ,
/// States and state transition implementations.
pub ( crate ) states : Vec < State > ,
/// Initializer expression.
///
/// This expression must evaluate to a
/// `Result<#state_ty_ident, xso::FromEventsError>`.
pub ( crate ) init : TokenStream ,
}
impl FromEventsSubmachine {
/// Convert a partial state machine into a full state machine.
///
/// This converts the abstract [`State`] items into token
/// streams for the respective parts of the state machine (the state
/// definitions and the match arms), rendering them effectively immutable.
pub ( crate ) fn compile ( self ) -> FromEventsStateMachine {
let mut state_defs = TokenStream ::default ( ) ;
let mut advance_match_arms = TokenStream ::default ( ) ;
for state in self . states {
let State {
name ,
decl ,
destructure ,
advance_body ,
} = state ;
state_defs . extend ( quote! {
#name { #decl } ,
} ) ;
// XXX: nasty hack, but works: the first member of the enum always
// exists and it always is the builder data, which we always need
// mutably available. So we can just prefix the destructuring
// token stream with `mut` to make that first member mutable.
advance_match_arms . extend ( quote! {
Self ::#name { mut #destructure } = > {
#advance_body
}
} ) ;
}
FromEventsStateMachine {
defs : self . defs ,
state_defs ,
advance_match_arms ,
variants : vec ! [ FromEventsEntryPoint { init : self . init } ] ,
}
}
/// Update the [`init`][`Self::init`] field in-place.
///
/// The function will receive a reference to the current `init` value,
/// allowing to create "wrappers" around that existing code.
pub ( crate ) fn with_augmented_init < F : FnOnce ( & TokenStream ) -> TokenStream > (
mut self ,
f : F ,
) -> Self {
let new_init = f ( & self . init ) ;
self . init = new_init ;
self
}
}
/// A partial [`IntoEventsStateMachine`] which only covers the builder for a
/// single compound.
///
/// See [`IntoEventsStateMachine`] for more information on the state machines
/// in general.
pub ( crate ) struct IntoEventsSubmachine {
/// Additional items necessary for the statemachine.
pub ( crate ) defs : TokenStream ,
/// States and state transition implementations.
pub ( crate ) states : Vec < State > ,
/// A pattern match which destructures the target type into its parts, for
/// use by `init`.
pub ( crate ) destructure : TokenStream ,
/// An expression which uses the names bound in `destructure` to create a
/// an instance of the state enum.
///
/// The state enum type is available as `Self` in that context.
pub ( crate ) init : TokenStream ,
}
impl IntoEventsSubmachine {
/// Convert a partial state machine into a full state machine.
///
/// This converts the abstract [`State`] items into token
/// streams for the respective parts of the state machine (the state
/// definitions and the match arms), rendering them effectively immutable.
///
/// This requires that the [`State::advance_body`] token streams evaluate
/// to an `Option<rxml::Event>`. If it evaluates to `Some(.)`, that is
/// emitted from the iterator. If it evaluates to `None`, the `advance`
/// implementation is called again.
///
/// Each state implementation is augmented to also enter the next state,
/// causing the iterator to terminate eventually.
pub ( crate ) fn compile ( self ) -> IntoEventsStateMachine {
let mut state_defs = TokenStream ::default ( ) ;
let mut advance_match_arms = TokenStream ::default ( ) ;
for ( i , state ) in self . states . iter ( ) . enumerate ( ) {
let State {
ref name ,
ref decl ,
ref destructure ,
ref advance_body ,
} = state ;
let footer = match self . states . get ( i + 1 ) {
Some ( State {
name : ref next_name ,
destructure : ref construct_next ,
..
} ) = > {
quote! {
::core ::result ::Result ::Ok ( ( ::core ::option ::Option ::Some ( Self ::#next_name { #construct_next } ) , event ) )
}
}
// final state -> exit the state machine
None = > {
quote! {
::core ::result ::Result ::Ok ( ( ::core ::option ::Option ::None , event ) )
}
}
} ;
state_defs . extend ( quote! {
#name { #decl } ,
} ) ;
advance_match_arms . extend ( quote! {
Self ::#name { #destructure } = > {
let event = #advance_body ;
#footer
}
} ) ;
}
IntoEventsStateMachine {
defs : self . defs ,
state_defs ,
advance_match_arms ,
variants : vec ! [ IntoEventsEntryPoint {
init : self . init ,
destructure : self . destructure ,
} ] ,
}
}
/// Update the [`init`][`Self::init`] field in-place.
///
/// The function will receive a reference to the current `init` value,
/// allowing to create "wrappers" around that existing code.
pub ( crate ) fn with_augmented_init < F : FnOnce ( & TokenStream ) -> TokenStream > (
mut self ,
f : F ,
) -> Self {
let new_init = f ( & self . init ) ;
self . init = new_init ;
self
}
}
/// Container for a single entrypoint into a [`FromEventsStateMachine`].
pub ( crate ) struct FromEventsEntryPoint {
pub ( crate ) init : TokenStream ,
}
/// A single variant's entrypoint into the event iterator.
pub ( crate ) struct IntoEventsEntryPoint {
/// A pattern match which destructures the target type into its parts, for
/// use by `init`.
destructure : TokenStream ,
/// An expression which uses the names bound in `destructure` to create a
/// an instance of the state enum.
///
/// The state enum type is available as `Self` in that context.
init : TokenStream ,
}
/// # State machine to implement `xso::FromEventsBuilder`
///
/// This struct represents a state machine consisting of the following parts:
///
/// - Extra dependencies ([`Self::defs`])
/// - States ([`Self::state_defs`])
/// - Transitions ([`Self::advance_match_arms`])
/// - Entrypoints ([`Self::variants`])
///
/// Such a state machine is best constructed by constructing one or
/// more [`FromEventsSubmachine`] structs and converting/merging them using
/// `into()` and [`merge`][`Self::merge`].
///
/// A state machine has an output type (corresponding to
/// `xso::FromEventsBuilder::Output`), which is however only implicitly defined
/// by the expressions generated in the `advance_match_arms`. That means that
/// merging submachines with different output types works, but will then generate
/// code which will fail to compile.
///
/// When converted to Rust code, the state machine will manifest as (among other
/// things) an enum type which contains all states and which has an `advance`
/// method. That method consumes the enum value and returns either a new enum
/// value, an error, or the output type of the state machine.
#[ derive(Default) ]
pub ( crate ) struct FromEventsStateMachine {
/// Extra items which are needed for the state machine implementation.
defs : TokenStream ,
/// A sequence of enum variant declarations, separated and terminated by
/// commas.
state_defs : TokenStream ,
/// A sequence of `match self { .. }` arms, where `self` is the state
/// enumeration type.
///
/// Each match arm must either diverge or evaluate to a
/// `Result<ControlFlow<State, Output>, xso::error::Error>`, where `State`
/// is the state enumeration and `Output` is the state machine's output
/// type.
advance_match_arms : TokenStream ,
/// The different entrypoints for the state machine.
///
/// This may only contain more than one element if an enumeration is being
/// constructed by the resulting state machine.
variants : Vec < FromEventsEntryPoint > ,
}
impl FromEventsStateMachine {
/// Render the state machine as a token stream.
///
/// The token stream contains the following pieces:
/// - Any definitions necessary for the statemachine to operate
/// - The state enum
/// - The builder struct
/// - The `xso::FromEventsBuilder` impl on the builder struct
/// - A `fn new(rxml::QName, rxml::AttrMap) -> Result<Self>` on the
/// builder struct.
pub ( crate ) fn render (
self ,
vis : & Visibility ,
builder_ty_ident : & Ident ,
state_ty_ident : & Ident ,
output_ty : & Type ,
) -> Result < TokenStream > {
let Self {
defs ,
state_defs ,
advance_match_arms ,
variants ,
} = self ;
let mut init_body = TokenStream ::default ( ) ;
for variant in variants {
let FromEventsEntryPoint { init } = variant ;
init_body . extend ( quote! {
let ( name , mut attrs ) = match { { let _ = & mut attrs ; } #init } {
::core ::result ::Result ::Ok ( v ) = > return ::core ::result ::Result ::Ok ( v ) ,
::core ::result ::Result ::Err ( ::xso ::error ::FromEventsError ::Invalid ( e ) ) = > return ::core ::result ::Result ::Err ( ::xso ::error ::FromEventsError ::Invalid ( e ) ) ,
::core ::result ::Result ::Err ( ::xso ::error ::FromEventsError ::Mismatch { name , attrs } ) = > ( name , attrs ) ,
} ;
} )
}
let output_ty_ref = make_ty_ref ( output_ty ) ;
let docstr = format! ( " Build a {0} from XML events. \n \n This type is generated using the [`macro@xso::FromXml`] derive macro and implements [`xso::FromEventsBuilder`] for {0} . " , output_ty_ref ) ;
Ok ( quote! {
#defs
enum #state_ty_ident {
#state_defs
}
impl #state_ty_ident {
fn advance ( mut self , ev : ::xso ::exports ::rxml ::Event ) -> ::core ::result ::Result < ::std ::ops ::ControlFlow < Self , #output_ty > , ::xso ::error ::Error > {
match self {
#advance_match_arms
} . and_then ( | __ok | {
match __ok {
::std ::ops ::ControlFlow ::Break ( st ) = > ::core ::result ::Result ::Ok ( ::std ::ops ::ControlFlow ::Break ( st ) ) ,
::std ::ops ::ControlFlow ::Continue ( result ) = > {
::core ::result ::Result ::Ok ( ::std ::ops ::ControlFlow ::Continue ( result ) )
}
}
} )
}
}
impl #builder_ty_ident {
fn new (
name : ::xso ::exports ::rxml ::QName ,
attrs : ::xso ::exports ::rxml ::AttrMap ,
) -> ::core ::result ::Result < Self , ::xso ::error ::FromEventsError > {
#state_ty_ident ::new ( name , attrs ) . map ( | ok | Self ( ::core ::option ::Option ::Some ( ok ) ) )
}
}
#[ doc = #docstr ]
#vis struct #builder_ty_ident ( ::core ::option ::Option < #state_ty_ident > ) ;
impl ::xso ::FromEventsBuilder for #builder_ty_ident {
type Output = #output_ty ;
fn feed ( & mut self , ev : ::xso ::exports ::rxml ::Event ) -> ::core ::result ::Result < ::core ::option ::Option < Self ::Output > , ::xso ::error ::Error > {
let inner = self . 0. take ( ) . expect ( " feed called after completion " ) ;
match inner . advance ( ev ) ? {
::std ::ops ::ControlFlow ::Continue ( value ) = > ::core ::result ::Result ::Ok ( ::core ::option ::Option ::Some ( value ) ) ,
::std ::ops ::ControlFlow ::Break ( st ) = > {
self . 0 = ::core ::option ::Option ::Some ( st ) ;
::core ::result ::Result ::Ok ( ::core ::option ::Option ::None )
}
}
}
}
impl #state_ty_ident {
fn new (
name : ::xso ::exports ::rxml ::QName ,
mut attrs : ::xso ::exports ::rxml ::AttrMap ,
) -> ::core ::result ::Result < Self , ::xso ::error ::FromEventsError > {
#init_body
{ let _ = & mut attrs ; }
::core ::result ::Result ::Err ( ::xso ::error ::FromEventsError ::Mismatch { name , attrs } )
}
}
} )
}
}
/// # State machine to implement an `Iterator<Item = rxml::Event>`.
///
/// This struct represents a state machine consisting of the following parts:
///
/// - Extra dependencies ([`Self::defs`])
/// - States ([`Self::state_defs`])
/// - Transitions ([`Self::advance_match_arms`])
/// - Entrypoints ([`Self::variants`])
///
/// Such a state machine is best constructed by constructing one or
/// more [`FromEventsSubmachine`] structs and converting/merging them using
/// `into()` and [`merge`][`Self::merge`].
///
/// A state machine has an output type (corresponding to
/// `xso::FromEventsBuilder::Output`), which is however only implicitly defined
/// by the expressions generated in the `advance_match_arms`. That means that
/// merging submachines with different output types works, but will then generate
/// code which will fail to compile.
///
/// When converted to Rust code, the state machine will manifest as (among other
/// things) an enum type which contains all states and which has an `advance`
/// method. That method consumes the enum value and returns either a new enum
/// value, an error, or the output type of the state machine.
#[ derive(Default) ]
pub ( crate ) struct IntoEventsStateMachine {
/// Extra items which are needed for the state machine implementation.
defs : TokenStream ,
/// A sequence of enum variant declarations, separated and terminated by
/// commas.
state_defs : TokenStream ,
/// A sequence of `match self { .. }` arms, where `self` is the state
/// enumeration type.
///
/// Each match arm must either diverge or evaluate to a
/// `Result<(Option<State>, Option<Event>), xso::error::Error>`, where
/// where `State` is the state enumeration.
///
/// If `Some(.)` is returned for the event, that event is emitted. If
/// `None` is returned for the event, the advance implementation is called
/// again after switching to the state returned in the `Option<State>`
/// field.
///
/// If `None` is returned for the `Option<State>`, the iterator
/// terminates yielding the `Option<Event>` value directly (even if it is
/// `None`). After the iterator has terminated, it yields `None`
/// indefinitely.
advance_match_arms : TokenStream ,
/// The different entrypoints for the state machine.
///
/// This may only contain more than one element if an enumeration is being
/// serialised by the resulting state machine.
variants : Vec < IntoEventsEntryPoint > ,
}
impl IntoEventsStateMachine {
/// Render the state machine as a token stream.
///
/// The token stream contains the following pieces:
/// - Any definitions necessary for the statemachine to operate
/// - The state enum
/// - The iterator struct
/// - The `Iterator` impl on the builder struct
/// - A `fn new(T) -> Result<Self>` on the iterator struct.
pub ( crate ) fn render (
self ,
vis : & Visibility ,
input_ty : & Type ,
state_ty_ident : & Ident ,
event_iter_ty_ident : & Ident ,
) -> Result < TokenStream > {
let Self {
defs ,
state_defs ,
advance_match_arms ,
mut variants ,
} = self ;
let input_ty_ref = make_ty_ref ( input_ty ) ;
let docstr = format! ( " Convert a {0} into XML events. \n \n This type is generated using the [`macro@xso::IntoXml`] derive macro and implements [`std::iter:Iterator`] for {0} . " , input_ty_ref ) ;
let init_body = if variants . len ( ) = = 1 {
let IntoEventsEntryPoint { destructure , init } = variants . remove ( 0 ) ;
quote! {
{
let #destructure = value ;
#init
}
}
} else {
let mut match_arms = TokenStream ::default ( ) ;
for IntoEventsEntryPoint { destructure , init } in variants {
match_arms . extend ( quote! {
#destructure = > #init ,
} ) ;
}
quote! {
match value {
#match_arms
}
}
} ;
Ok ( quote! {
#defs
enum #state_ty_ident {
#state_defs
}
impl #state_ty_ident {
fn advance ( mut self ) -> ::core ::result ::Result < ( ::core ::option ::Option < Self > , ::core ::option ::Option < ::xso ::exports ::rxml ::Event > ) , ::xso ::error ::Error > {
match self {
#advance_match_arms
}
}
fn new (
value : #input_ty ,
) -> ::core ::result ::Result < Self , ::xso ::error ::Error > {
::core ::result ::Result ::Ok ( #init_body )
}
}
#[ doc = #docstr ]
#vis struct #event_iter_ty_ident ( ::core ::option ::Option < #state_ty_ident > ) ;
impl ::std ::iter ::Iterator for #event_iter_ty_ident {
type Item = ::core ::result ::Result < ::xso ::exports ::rxml ::Event , ::xso ::error ::Error > ;
fn next ( & mut self ) -> ::core ::option ::Option < Self ::Item > {
let mut state = self . 0. take ( ) ? ;
loop {
let ( next_state , ev ) = match state . advance ( ) {
::core ::result ::Result ::Ok ( v ) = > v ,
::core ::result ::Result ::Err ( e ) = > return ::core ::option ::Option ::Some ( ::core ::result ::Result ::Err ( e ) ) ,
} ;
if let ::core ::option ::Option ::Some ( ev ) = ev {
self . 0 = next_state ;
return ::core ::option ::Option ::Some ( ::core ::result ::Result ::Ok ( ev ) ) ;
}
// no event, do we have a state?
if let ::core ::option ::Option ::Some ( st ) = next_state {
// we do: try again!
state = st ;
continue ;
} else {
// we don't: end of iterator!
self . 0 = ::core ::option ::Option ::None ;
return ::core ::option ::Option ::None ;
}
}
}
}
impl #event_iter_ty_ident {
fn new ( value : #input_ty ) -> ::core ::result ::Result < Self , ::xso ::error ::Error > {
#state_ty_ident ::new ( value ) . map ( | ok | Self ( ::core ::option ::Option ::Some ( ok ) ) )
}
}
} )
}
}
/// Construct a path for an intradoc link from a given type.
fn doc_link_path ( ty : & Type ) -> Option < String > {
match ty {
Type ::Path ( ref ty ) = > {
let ( mut buf , offset ) = match ty . qself {
Some ( ref qself ) = > {
let mut buf = doc_link_path ( & qself . ty ) ? ;
buf . push_str ( " :: " ) ;
( buf , qself . position )
}
None = > {
let mut buf = String ::new ( ) ;
if ty . path . leading_colon . is_some ( ) {
buf . push_str ( " :: " ) ;
}
( buf , 0 )
}
} ;
let last = ty . path . segments . len ( ) - 1 ;
for i in offset .. ty . path . segments . len ( ) {
let segment = & ty . path . segments [ i ] ;
buf . push_str ( & segment . ident . to_string ( ) ) ;
if i < last {
buf . push_str ( " :: " ) ;
}
}
Some ( buf )
}
_ = > None ,
}
}
/// Create a markdown snippet which references the given type as cleanly as
/// possible.
///
/// This is used in documentation generation functions.
///
/// Not all types can be linked to; those which cannot be linked to will
/// simply be wrapped in backticks.
fn make_ty_ref ( ty : & Type ) -> String {
match doc_link_path ( ty ) {
Some ( mut path ) = > {
path . reserve ( 4 ) ;
path . insert_str ( 0 , " [` " ) ;
path . push_str ( " `] " ) ;
path
}
None = > format! ( " ` {} ` " , ty . to_token_stream ( ) ) ,
}
}