Merge #1384
1384: rp: optimize rom-func-cache for runtime r=Dirbaio a=pennae storing a full function pointer initialized to a resolver trampoline lets us avoid the runtime cost of checking whether we need to do the initialization. this also slightly reduces flash usage due to a slightly more space-efficient initialization procedure. Co-authored-by: pennae <github@quasiparticle.net>
This commit is contained in:
		
						commit
						54fe50c685
					
				@ -56,50 +56,11 @@ macro_rules! declare_rom_function {
 | 
				
			|||||||
        fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
 | 
					        fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
 | 
				
			||||||
        $lookup:block
 | 
					        $lookup:block
 | 
				
			||||||
    ) => {
 | 
					    ) => {
 | 
				
			||||||
        #[doc = r"Additional access for the `"]
 | 
					        declare_rom_function!{
 | 
				
			||||||
        #[doc = stringify!($name)]
 | 
					            __internal ,
 | 
				
			||||||
        #[doc = r"` ROM function."]
 | 
					 | 
				
			||||||
        pub mod $name {
 | 
					 | 
				
			||||||
            /// Retrieve a function pointer.
 | 
					 | 
				
			||||||
            #[cfg(not(feature = "rom-func-cache"))]
 | 
					 | 
				
			||||||
            pub fn ptr() -> extern "C" fn( $($argname: $ty),* ) -> $ret {
 | 
					 | 
				
			||||||
                let p: *const u32 = $lookup;
 | 
					 | 
				
			||||||
                unsafe {
 | 
					 | 
				
			||||||
                    let func : extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
 | 
					 | 
				
			||||||
                    func
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            /// Retrieve a function pointer.
 | 
					 | 
				
			||||||
            #[cfg(feature = "rom-func-cache")]
 | 
					 | 
				
			||||||
            pub fn ptr() -> extern "C" fn( $($argname: $ty),* ) -> $ret {
 | 
					 | 
				
			||||||
                use core::sync::atomic::{AtomicU16, Ordering};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                // All pointers in the ROM fit in 16 bits, so we don't need a
 | 
					 | 
				
			||||||
                // full width word to store the cached value.
 | 
					 | 
				
			||||||
                static CACHED_PTR: AtomicU16 = AtomicU16::new(0);
 | 
					 | 
				
			||||||
                // This is safe because the lookup will always resolve
 | 
					 | 
				
			||||||
                // to the same value.  So even if an interrupt or another
 | 
					 | 
				
			||||||
                // core starts at the same time, it just repeats some
 | 
					 | 
				
			||||||
                // work and eventually writes back the correct value.
 | 
					 | 
				
			||||||
                let p: *const u32 = match CACHED_PTR.load(Ordering::Relaxed) {
 | 
					 | 
				
			||||||
                    0 => {
 | 
					 | 
				
			||||||
                        let raw: *const u32 = $lookup;
 | 
					 | 
				
			||||||
                        CACHED_PTR.store(raw as u16, Ordering::Relaxed);
 | 
					 | 
				
			||||||
                        raw
 | 
					 | 
				
			||||||
                    },
 | 
					 | 
				
			||||||
                    val => val as *const u32,
 | 
					 | 
				
			||||||
                };
 | 
					 | 
				
			||||||
                unsafe {
 | 
					 | 
				
			||||||
                    let func : extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
 | 
					 | 
				
			||||||
                    func
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            $(#[$outer])*
 | 
					            $(#[$outer])*
 | 
				
			||||||
        pub extern "C" fn $name( $($argname: $ty),* ) -> $ret {
 | 
					            fn $name( $($argname: $ty),* ) -> $ret
 | 
				
			||||||
            $name::ptr()($($argname),*)
 | 
					            $lookup
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -107,6 +68,21 @@ macro_rules! declare_rom_function {
 | 
				
			|||||||
        $(#[$outer:meta])*
 | 
					        $(#[$outer:meta])*
 | 
				
			||||||
        unsafe fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
 | 
					        unsafe fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
 | 
				
			||||||
        $lookup:block
 | 
					        $lookup:block
 | 
				
			||||||
 | 
					    ) => {
 | 
				
			||||||
 | 
					        declare_rom_function!{
 | 
				
			||||||
 | 
					            __internal unsafe ,
 | 
				
			||||||
 | 
					            $(#[$outer])*
 | 
				
			||||||
 | 
					            fn $name( $($argname: $ty),* ) -> $ret
 | 
				
			||||||
 | 
					            $lookup
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    (
 | 
				
			||||||
 | 
					        __internal
 | 
				
			||||||
 | 
					        $( $maybe_unsafe:ident )? ,
 | 
				
			||||||
 | 
					        $(#[$outer:meta])*
 | 
				
			||||||
 | 
					        fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
 | 
				
			||||||
 | 
					        $lookup:block
 | 
				
			||||||
    ) => {
 | 
					    ) => {
 | 
				
			||||||
        #[doc = r"Additional access for the `"]
 | 
					        #[doc = r"Additional access for the `"]
 | 
				
			||||||
        #[doc = stringify!($name)]
 | 
					        #[doc = stringify!($name)]
 | 
				
			||||||
@ -114,43 +90,58 @@ macro_rules! declare_rom_function {
 | 
				
			|||||||
        pub mod $name {
 | 
					        pub mod $name {
 | 
				
			||||||
            /// Retrieve a function pointer.
 | 
					            /// Retrieve a function pointer.
 | 
				
			||||||
            #[cfg(not(feature = "rom-func-cache"))]
 | 
					            #[cfg(not(feature = "rom-func-cache"))]
 | 
				
			||||||
            pub fn ptr() -> unsafe extern "C" fn( $($argname: $ty),* ) -> $ret {
 | 
					            pub fn ptr() -> $( $maybe_unsafe )? extern "C" fn( $($argname: $ty),* ) -> $ret {
 | 
				
			||||||
                let p: *const u32 = $lookup;
 | 
					                let p: *const u32 = $lookup;
 | 
				
			||||||
                unsafe {
 | 
					                unsafe {
 | 
				
			||||||
                    let func : unsafe extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
 | 
					                    let func : $( $maybe_unsafe )? extern "C" fn( $($argname: $ty),* ) -> $ret
 | 
				
			||||||
 | 
					                        = core::mem::transmute(p);
 | 
				
			||||||
                    func
 | 
					                    func
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            #[cfg(feature = "rom-func-cache")]
 | 
				
			||||||
 | 
					            // unlike rp2040-hal we store a full word, containing the full function pointer.
 | 
				
			||||||
 | 
					            // rp2040-hal saves two bytes by storing only the rom offset, at the cost of
 | 
				
			||||||
 | 
					            // having to do an indirection and an atomic operation on every rom call.
 | 
				
			||||||
 | 
					            static mut CACHE: $( $maybe_unsafe )? extern "C" fn( $($argname: $ty),* ) -> $ret
 | 
				
			||||||
 | 
					                = trampoline;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            #[cfg(feature = "rom-func-cache")]
 | 
				
			||||||
 | 
					            $( $maybe_unsafe )? extern "C" fn trampoline( $($argname: $ty),* ) -> $ret {
 | 
				
			||||||
 | 
					                use core::sync::atomic::{compiler_fence, Ordering};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let p: *const u32 = $lookup;
 | 
				
			||||||
 | 
					                #[allow(unused_unsafe)]
 | 
				
			||||||
 | 
					                unsafe {
 | 
				
			||||||
 | 
					                    CACHE = core::mem::transmute(p);
 | 
				
			||||||
 | 
					                    compiler_fence(Ordering::Release);
 | 
				
			||||||
 | 
					                    CACHE($($argname),*)
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            /// Retrieve a function pointer.
 | 
					            /// Retrieve a function pointer.
 | 
				
			||||||
            #[cfg(feature = "rom-func-cache")]
 | 
					            #[cfg(feature = "rom-func-cache")]
 | 
				
			||||||
            pub fn ptr() -> unsafe extern "C" fn( $($argname: $ty),* ) -> $ret {
 | 
					            pub fn ptr() -> $( $maybe_unsafe )? extern "C" fn( $($argname: $ty),* ) -> $ret {
 | 
				
			||||||
                use core::sync::atomic::{AtomicU16, Ordering};
 | 
					                use core::sync::atomic::{compiler_fence, Ordering};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                // All pointers in the ROM fit in 16 bits, so we don't need a
 | 
					 | 
				
			||||||
                // full width word to store the cached value.
 | 
					 | 
				
			||||||
                static CACHED_PTR: AtomicU16 = AtomicU16::new(0);
 | 
					 | 
				
			||||||
                // This is safe because the lookup will always resolve
 | 
					                // This is safe because the lookup will always resolve
 | 
				
			||||||
                // to the same value.  So even if an interrupt or another
 | 
					                // to the same value.  So even if an interrupt or another
 | 
				
			||||||
                // core starts at the same time, it just repeats some
 | 
					                // core starts at the same time, it just repeats some
 | 
				
			||||||
                // work and eventually writes back the correct value.
 | 
					                // work and eventually writes back the correct value.
 | 
				
			||||||
                let p: *const u32 = match CACHED_PTR.load(Ordering::Relaxed) {
 | 
					                //
 | 
				
			||||||
                    0 => {
 | 
					                // We easily get away with using only compiler fences here
 | 
				
			||||||
                        let raw: *const u32 = $lookup;
 | 
					                // because RP2040 SRAM is not cached. If it were we'd need
 | 
				
			||||||
                        CACHED_PTR.store(raw as u16, Ordering::Relaxed);
 | 
					                // to make sure updates propagate quickly, or just take the
 | 
				
			||||||
                        raw
 | 
					                // hit and let each core resolve every function once.
 | 
				
			||||||
                    },
 | 
					                compiler_fence(Ordering::Acquire);
 | 
				
			||||||
                    val => val as *const u32,
 | 
					 | 
				
			||||||
                };
 | 
					 | 
				
			||||||
                unsafe {
 | 
					                unsafe {
 | 
				
			||||||
                    let func : unsafe extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
 | 
					                    CACHE
 | 
				
			||||||
                    func
 | 
					 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        $(#[$outer])*
 | 
					        $(#[$outer])*
 | 
				
			||||||
        pub unsafe extern "C" fn $name( $($argname: $ty),* ) -> $ret {
 | 
					        pub $( $maybe_unsafe )? extern "C" fn $name( $($argname: $ty),* ) -> $ret {
 | 
				
			||||||
            $name::ptr()($($argname),*)
 | 
					            $name::ptr()($($argname),*)
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
@ -369,6 +360,7 @@ pub fn fplib_start() -> *const u8 {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// See Table 180 in the RP2040 datasheet for the contents of this table.
 | 
					/// See Table 180 in the RP2040 datasheet for the contents of this table.
 | 
				
			||||||
 | 
					#[cfg_attr(feature = "rom-func-cache", inline(never))]
 | 
				
			||||||
pub fn soft_float_table() -> *const usize {
 | 
					pub fn soft_float_table() -> *const usize {
 | 
				
			||||||
    rom_table_lookup(DATA_TABLE, *b"SF")
 | 
					    rom_table_lookup(DATA_TABLE, *b"SF")
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -379,6 +371,7 @@ pub fn fplib_end() -> *const u8 {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// This entry is only present in the V2 bootrom. See Table 182 in the RP2040 datasheet for the contents of this table.
 | 
					/// This entry is only present in the V2 bootrom. See Table 182 in the RP2040 datasheet for the contents of this table.
 | 
				
			||||||
 | 
					#[cfg_attr(feature = "rom-func-cache", inline(never))]
 | 
				
			||||||
pub fn soft_double_table() -> *const usize {
 | 
					pub fn soft_double_table() -> *const usize {
 | 
				
			||||||
    if rom_version_number() < 2 {
 | 
					    if rom_version_number() < 2 {
 | 
				
			||||||
        panic!(
 | 
					        panic!(
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user