From 6e9ddd46267fd0fce2333af4f15bfd86f6f17f4d Mon Sep 17 00:00:00 2001 From: Caleb Garrett <47389035+caleb-garrett@users.noreply.github.com> Date: Wed, 31 Jan 2024 21:21:36 -0500 Subject: [PATCH 01/14] Added hash module with blocking implementation. Included SHA256 example. --- embassy-stm32/Cargo.toml | 4 +- embassy-stm32/src/hash/mod.rs | 260 +++++++++++++++++++++++++++++++ embassy-stm32/src/lib.rs | 2 + examples/stm32f7/Cargo.toml | 5 +- examples/stm32f7/src/bin/hash.rs | 49 ++++++ 5 files changed, 316 insertions(+), 4 deletions(-) create mode 100644 embassy-stm32/src/hash/mod.rs create mode 100644 examples/stm32f7/src/bin/hash.rs diff --git a/embassy-stm32/Cargo.toml b/embassy-stm32/Cargo.toml index 70d4daf09..d8a4c65fa 100644 --- a/embassy-stm32/Cargo.toml +++ b/embassy-stm32/Cargo.toml @@ -68,7 +68,7 @@ rand_core = "0.6.3" sdio-host = "0.5.0" critical-section = "1.1" #stm32-metapac = { version = "15" } -stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-ab2bc2a739324793656ca1640e1caee2d88df72d" } +stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-0cb3a4fcaec702c93b3700715de796636d562b15" } vcell = "0.1.3" bxcan = "0.7.0" nb = "1.0.0" @@ -87,7 +87,7 @@ critical-section = { version = "1.1", features = ["std"] } proc-macro2 = "1.0.36" quote = "1.0.15" #stm32-metapac = { version = "15", default-features = false, features = ["metadata"]} -stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-ab2bc2a739324793656ca1640e1caee2d88df72d", default-features = false, features = ["metadata"]} +stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-0cb3a4fcaec702c93b3700715de796636d562b15", default-features = false, features = ["metadata"]} [features] diff --git a/embassy-stm32/src/hash/mod.rs b/embassy-stm32/src/hash/mod.rs new file mode 100644 index 000000000..e3d2d7b16 --- /dev/null +++ b/embassy-stm32/src/hash/mod.rs @@ -0,0 +1,260 @@ +//! Hash generator (HASH) +use core::cmp::min; + +use embassy_hal_internal::{into_ref, PeripheralRef}; +use stm32_metapac::hash::regs::*; + +use crate::pac::HASH as PAC_HASH; +use crate::peripherals::HASH; +use crate::rcc::sealed::RccPeripheral; +use crate::Peripheral; + +const NUM_CONTEXT_REGS: usize = 54; +const HASH_BUFFER_LEN: usize = 68; +const DIGEST_BLOCK_SIZE: usize = 64; + +///Hash algorithm selection +#[derive(PartialEq)] +pub enum Algorithm { + /// SHA-1 Algorithm + SHA1 = 0, + /// MD5 Algorithm + MD5 = 1, + /// SHA-224 Algorithm + SHA224 = 2, + /// SHA-256 Algorithm + SHA256 = 3, +} + +/// Input data width selection +#[repr(u8)] +#[derive(Clone, Copy)] +pub enum DataType { + ///32-bit data, no data is swapped. + Width32 = 0, + ///16-bit data, each half-word is swapped. + Width16 = 1, + ///8-bit data, all bytes are swapped. + Width8 = 2, + ///1-bit data, all bits are swapped. + Width1 = 3, +} + +/// Stores the state of the HASH peripheral for suspending/resuming +/// digest calculation. +pub struct Context { + first_word_sent: bool, + buffer: [u8; HASH_BUFFER_LEN], + buflen: usize, + algo: Algorithm, + format: DataType, + imr: u32, + str: u32, + cr: u32, + csr: [u32; NUM_CONTEXT_REGS], +} + +/// HASH driver. +pub struct Hash<'d> { + _peripheral: PeripheralRef<'d, HASH>, +} + +impl<'d> Hash<'d> { + /// Instantiates, resets, and enables the HASH peripheral. + pub fn new(peripheral: impl Peripheral
 + 'd) -> Self {
+        HASH::enable_and_reset();
+        into_ref!(peripheral);
+        let instance = Self {
+            _peripheral: peripheral,
+        };
+        instance
+    }
+
+    /// Starts computation of a new hash and returns the saved peripheral state.
+    pub fn start(&mut self, algorithm: Algorithm, format: DataType) -> Context {
+        // Define a context for this new computation.
+        let mut ctx = Context {
+            first_word_sent: false,
+            buffer: [0; 68],
+            buflen: 0,
+            algo: algorithm,
+            format: format,
+            imr: 0,
+            str: 0,
+            cr: 0,
+            csr: [0; NUM_CONTEXT_REGS],
+        };
+
+        // Set the data type in the peripheral.
+        PAC_HASH.cr().modify(|w| w.set_datatype(ctx.format as u8));
+
+        // Select the algorithm.
+        let mut algo0 = false;
+        let mut algo1 = false;
+        if ctx.algo == Algorithm::MD5 || ctx.algo == Algorithm::SHA256 {
+            algo0 = true;
+        }
+        if ctx.algo == Algorithm::SHA224 || ctx.algo == Algorithm::SHA256 {
+            algo1 = true;
+        }
+        PAC_HASH.cr().modify(|w| w.set_algo0(algo0));
+        PAC_HASH.cr().modify(|w| w.set_algo1(algo1));
+        PAC_HASH.cr().modify(|w| w.set_init(true));
+
+        // Store and return the state of the peripheral.
+        self.store_context(&mut ctx);
+        ctx
+    }
+
+    /// Restores the peripheral state using the given context,
+    /// then updates the state with the provided data.
+    pub fn update(&mut self, ctx: &mut Context, input: &[u8]) {
+        let mut data_waiting = input.len() + ctx.buflen;
+        if data_waiting < DIGEST_BLOCK_SIZE || (data_waiting < ctx.buffer.len() && !ctx.first_word_sent) {
+            // There isn't enough data to digest a block, so append it to the buffer.
+            ctx.buffer[ctx.buflen..ctx.buflen + input.len()].copy_from_slice(input);
+            ctx.buflen += input.len();
+            return;
+        }
+
+        //Restore the peripheral state.
+        self.load_context(&ctx);
+
+        let mut ilen_remaining = input.len();
+        let mut input_start = 0;
+
+        // Handle first block.
+        if !ctx.first_word_sent {
+            let empty_len = ctx.buffer.len() - ctx.buflen;
+            let copy_len = min(empty_len, ilen_remaining);
+            // Fill the buffer.
+            if copy_len > 0 {
+                ctx.buffer[ctx.buflen..ctx.buflen + copy_len].copy_from_slice(&input[0..copy_len]);
+                ctx.buflen += copy_len;
+                ilen_remaining -= copy_len;
+                input_start += copy_len;
+            }
+            assert_eq!(ctx.buflen, HASH_BUFFER_LEN);
+            self.accumulate(ctx.buffer.as_slice());
+            data_waiting -= ctx.buflen;
+            ctx.buflen = 0;
+            ctx.first_word_sent = true;
+        }
+
+        if data_waiting < 64 {
+            // There isn't enough data remaining to process another block, so store it.
+            assert_eq!(ctx.buflen, 0);
+            ctx.buffer[0..ilen_remaining].copy_from_slice(&input[input_start..input_start + ilen_remaining]);
+            ctx.buflen += ilen_remaining;
+        } else {
+            let mut total_data_sent = 0;
+            // First ingest the data in the buffer.
+            let empty_len = DIGEST_BLOCK_SIZE - ctx.buflen;
+            if empty_len > 0 {
+                let copy_len = min(empty_len, ilen_remaining);
+                ctx.buffer[ctx.buflen..ctx.buflen + copy_len]
+                    .copy_from_slice(&input[input_start..input_start + copy_len]);
+                ctx.buflen += copy_len;
+                ilen_remaining -= copy_len;
+                input_start += copy_len;
+            }
+            assert_eq!(ctx.buflen % 64, 0);
+            self.accumulate(&ctx.buffer[0..64]);
+            total_data_sent += ctx.buflen;
+            ctx.buflen = 0;
+
+            // Move any extra data to the now-empty buffer.
+            let leftovers = ilen_remaining % 64;
+            if leftovers > 0 {
+                assert!(ilen_remaining >= leftovers);
+                ctx.buffer[0..leftovers].copy_from_slice(&input[input.len() - leftovers..input.len()]);
+                ctx.buflen += leftovers;
+                ilen_remaining -= leftovers;
+            }
+            assert_eq!(ilen_remaining % 64, 0);
+
+            // Hash the remaining data.
+            self.accumulate(&input[input_start..input_start + ilen_remaining]);
+
+            total_data_sent += ilen_remaining;
+            assert_eq!(total_data_sent % 64, 0);
+            assert!(total_data_sent >= 64);
+        }
+
+        // Save the peripheral context.
+        self.store_context(ctx);
+    }
+
+    /// Computes a digest for the given context. A slice of the provided digest buffer is returned.
+    /// The length of the returned slice is dependent on the digest length of the selected algorithm.
+    pub fn finish<'a>(&mut self, mut ctx: Context, digest: &'a mut [u8; 32]) -> &'a [u8] {
+        // Restore the peripheral state.
+        self.load_context(&ctx);
+        // Hash the leftover bytes, if any.
+        self.accumulate(&ctx.buffer[0..ctx.buflen]);
+        ctx.buflen = 0;
+
+        //Start the digest calculation.
+        PAC_HASH.str().write(|w| w.set_dcal(true));
+
+        //Wait for completion.
+        while !PAC_HASH.sr().read().dcis() {}
+
+        //Return the digest.
+        let digest_words = match ctx.algo {
+            Algorithm::SHA1 => 5,
+            Algorithm::MD5 => 4,
+            Algorithm::SHA224 => 7,
+            Algorithm::SHA256 => 8,
+        };
+        let mut i = 0;
+        while i < digest_words {
+            let word = PAC_HASH.hr(i).read();
+            digest[(i * 4)..((i * 4) + 4)].copy_from_slice(word.to_be_bytes().as_slice());
+            i += 1;
+        }
+        &digest[0..digest_words * 4]
+    }
+
+    fn accumulate(&mut self, input: &[u8]) {
+        //Set the number of valid bits.
+        let num_valid_bits: u8 = (8 * (input.len() % 4)) as u8;
+        PAC_HASH.str().modify(|w| w.set_nblw(num_valid_bits));
+
+        let mut i = 0;
+        while i < input.len() {
+            let mut word: [u8; 4] = [0; 4];
+            let copy_idx = min(i + 4, input.len());
+            word[0..copy_idx - i].copy_from_slice(&input[i..copy_idx]);
+            PAC_HASH.din().write_value(u32::from_ne_bytes(word));
+            i += 4;
+        }
+    }
+
+    /// Save the peripheral state to a context.
+    fn store_context(&mut self, ctx: &mut Context) {
+        while !PAC_HASH.sr().read().dinis() {}
+        ctx.imr = PAC_HASH.imr().read().0;
+        ctx.str = PAC_HASH.str().read().0;
+        ctx.cr = PAC_HASH.cr().read().0;
+        let mut i = 0;
+        while i < NUM_CONTEXT_REGS {
+            ctx.csr[i] = PAC_HASH.csr(i).read();
+            i += 1;
+        }
+    }
+
+    /// Restore the peripheral state from a context.
+    fn load_context(&mut self, ctx: &Context) {
+        // Restore the peripheral state from the context.
+        PAC_HASH.imr().write_value(Imr { 0: ctx.imr });
+        PAC_HASH.str().write_value(Str { 0: ctx.str });
+        PAC_HASH.cr().write_value(Cr { 0: ctx.cr });
+        PAC_HASH.cr().modify(|w| w.set_init(true));
+        let mut i = 0;
+        while i < NUM_CONTEXT_REGS {
+            PAC_HASH.csr(i).write_value(ctx.csr[i]);
+            i += 1;
+        }
+    }
+}
diff --git a/embassy-stm32/src/lib.rs b/embassy-stm32/src/lib.rs
index a465fccd8..cd1ede0fa 100644
--- a/embassy-stm32/src/lib.rs
+++ b/embassy-stm32/src/lib.rs
@@ -45,6 +45,8 @@ pub mod exti;
 pub mod flash;
 #[cfg(fmc)]
 pub mod fmc;
+#[cfg(hash)]
+pub mod hash;
 #[cfg(hrtim)]
 pub mod hrtim;
 #[cfg(i2c)]
diff --git a/examples/stm32f7/Cargo.toml b/examples/stm32f7/Cargo.toml
index 941ba38cd..a612c2554 100644
--- a/examples/stm32f7/Cargo.toml
+++ b/examples/stm32f7/Cargo.toml
@@ -5,8 +5,8 @@ version = "0.1.0"
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-# Change stm32f767zi to your chip name, if necessary.
-embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["defmt", "stm32f767zi", "memory-x", "unstable-pac", "time-driver-any", "exti"]  }
+# Change stm32f777zi to your chip name, if necessary.
+embassy-stm32 = { version = "0.1.0", path = "../../embassy-stm32", features = ["defmt", "stm32f777zi", "memory-x", "unstable-pac", "time-driver-any", "exti"]  }
 embassy-sync = { version = "0.5.0", path = "../../embassy-sync", features = ["defmt"] }
 embassy-executor = { version = "0.5.0", path = "../../embassy-executor", features = ["task-arena-size-32768", "arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] }
 embassy-time = { version = "0.3.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime", "tick-hz-32_768"] }
@@ -28,6 +28,7 @@ rand_core = "0.6.3"
 critical-section = "1.1"
 embedded-storage = "0.3.1"
 static_cell = "2"
+sha2 = { version = "0.10.8", default-features = false }
 
 [profile.release]
 debug = 2
diff --git a/examples/stm32f7/src/bin/hash.rs b/examples/stm32f7/src/bin/hash.rs
new file mode 100644
index 000000000..1fd0e87eb
--- /dev/null
+++ b/examples/stm32f7/src/bin/hash.rs
@@ -0,0 +1,49 @@
+#![no_std]
+#![no_main]
+
+use defmt::info;
+use embassy_executor::Spawner;
+use embassy_stm32::Config;
+use embassy_time::{Duration, Instant};
+use {defmt_rtt as _, panic_probe as _};
+
+use embassy_stm32::hash::*;
+use sha2::{Digest, Sha256};
+
+const TEST_STRING_1: &[u8] = b"hello world";
+
+#[embassy_executor::main]
+async fn main(_spawner: Spawner) -> ! {
+    let config = Config::default();
+    let p = embassy_stm32::init(config);
+
+    let hw_start_time = Instant::now();
+
+    // Compute a digest in hardware.
+    let mut hw_hasher = Hash::new(p.HASH);
+    let mut context = hw_hasher.start(Algorithm::SHA256, DataType::Width8);
+    hw_hasher.update(&mut context, TEST_STRING_1);
+    let mut buffer: [u8; 32] = [0; 32];
+    let hw_digest = hw_hasher.finish(context, &mut buffer);
+
+    let hw_end_time = Instant::now();
+    let hw_execution_time = hw_end_time - hw_start_time;
+
+    let sw_start_time = Instant::now();
+
+    // Compute a digest in software.
+    let mut sw_hasher = Sha256::new();
+    sw_hasher.update(TEST_STRING_1);
+    let sw_digest = sw_hasher.finalize();
+
+    let sw_end_time = Instant::now();
+    let sw_execution_time = sw_end_time - sw_start_time;
+
+    info!("Hardware Digest: {:?}", hw_digest);
+    info!("Software Digest: {:?}", sw_digest[..]);
+    info!("Hardware Execution Time: {:?}", hw_execution_time);
+    info!("Software Execution Time: {:?}", sw_execution_time);
+    assert_eq!(*hw_digest, sw_digest[..]);
+
+    loop {}
+}
From 1dbfa5ab72e3596932ccb6bd258fac70d2efa563 Mon Sep 17 00:00:00 2001
From: Caleb Garrett <47389035+caleb-garrett@users.noreply.github.com>
Date: Thu, 1 Feb 2024 10:28:12 -0500
Subject: [PATCH 02/14] Added hash v1/v2 configs.
---
 embassy-stm32/src/hash/mod.rs | 6 ++++++
 1 file changed, 6 insertions(+)
diff --git a/embassy-stm32/src/hash/mod.rs b/embassy-stm32/src/hash/mod.rs
index e3d2d7b16..622777b02 100644
--- a/embassy-stm32/src/hash/mod.rs
+++ b/embassy-stm32/src/hash/mod.rs
@@ -9,7 +9,11 @@ use crate::peripherals::HASH;
 use crate::rcc::sealed::RccPeripheral;
 use crate::Peripheral;
 
+#[cfg(hash_v1)]
+const NUM_CONTEXT_REGS: usize = 51;
+#[cfg(hash_v2)]
 const NUM_CONTEXT_REGS: usize = 54;
+
 const HASH_BUFFER_LEN: usize = 68;
 const DIGEST_BLOCK_SIZE: usize = 64;
 
@@ -20,8 +24,10 @@ pub enum Algorithm {
     SHA1 = 0,
     /// MD5 Algorithm
     MD5 = 1,
+    #[cfg(hash_v2)]
     /// SHA-224 Algorithm
     SHA224 = 2,
+    #[cfg(hash_v2)]
     /// SHA-256 Algorithm
     SHA256 = 3,
 }
From 10275309021d933d5dfe4c0a96928432e11cd8b4 Mon Sep 17 00:00:00 2001
From: Caleb Garrett <47389035+caleb-garrett@users.noreply.github.com>
Date: Thu, 1 Feb 2024 17:27:25 -0500
Subject: [PATCH 03/14] Added hash interrupts for async.
---
 embassy-stm32/src/hash/mod.rs | 171 ++++++++++++++++++++++++++--------
 1 file changed, 134 insertions(+), 37 deletions(-)
diff --git a/embassy-stm32/src/hash/mod.rs b/embassy-stm32/src/hash/mod.rs
index 622777b02..4e37e60e1 100644
--- a/embassy-stm32/src/hash/mod.rs
+++ b/embassy-stm32/src/hash/mod.rs
@@ -1,22 +1,47 @@
 //! Hash generator (HASH)
 use core::cmp::min;
+use core::future::poll_fn;
+use core::marker::PhantomData;
+use core::task::Poll;
 
 use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
+
+use crate::peripherals::HASH;
 use stm32_metapac::hash::regs::*;
 
-use crate::pac::HASH as PAC_HASH;
-use crate::peripherals::HASH;
+use crate::interrupt::typelevel::Interrupt;
 use crate::rcc::sealed::RccPeripheral;
-use crate::Peripheral;
+use crate::{interrupt, pac, peripherals, Peripheral};
 
 #[cfg(hash_v1)]
 const NUM_CONTEXT_REGS: usize = 51;
 #[cfg(hash_v2)]
 const NUM_CONTEXT_REGS: usize = 54;
-
 const HASH_BUFFER_LEN: usize = 68;
 const DIGEST_BLOCK_SIZE: usize = 64;
 
+static HASH_WAKER: AtomicWaker = AtomicWaker::new();
+
+/// HASH interrupt handler.
+pub struct InterruptHandler  + 'd) -> Self {
+    pub fn new(peripheral: impl Peripheral  + 'd) -> Self {
         HASH::enable_and_reset();
         into_ref!(peripheral);
         let instance = Self {
             _peripheral: peripheral,
         };
+
+        T::Interrupt::unpend();
+        unsafe { T::Interrupt::enable() };
+
         instance
     }
 
     /// Starts computation of a new hash and returns the saved peripheral state.
-    pub fn start(&mut self, algorithm: Algorithm, format: DataType) -> Context {
+    pub async fn start(&mut self, algorithm: Algorithm, format: DataType) -> Context {
         // Define a context for this new computation.
         let mut ctx = Context {
             first_word_sent: false,
@@ -92,7 +121,7 @@ impl<'d> Hash<'d> {
         };
 
         // Set the data type in the peripheral.
-        PAC_HASH.cr().modify(|w| w.set_datatype(ctx.format as u8));
+        T::regs().cr().modify(|w| w.set_datatype(ctx.format as u8));
 
         // Select the algorithm.
         let mut algo0 = false;
@@ -103,18 +132,19 @@ impl<'d> Hash<'d> {
         if ctx.algo == Algorithm::SHA224 || ctx.algo == Algorithm::SHA256 {
             algo1 = true;
         }
-        PAC_HASH.cr().modify(|w| w.set_algo0(algo0));
-        PAC_HASH.cr().modify(|w| w.set_algo1(algo1));
-        PAC_HASH.cr().modify(|w| w.set_init(true));
+        T::regs().cr().modify(|w| w.set_algo0(algo0));
+        T::regs().cr().modify(|w| w.set_algo1(algo1));
+        T::regs().cr().modify(|w| w.set_init(true));
 
         // Store and return the state of the peripheral.
-        self.store_context(&mut ctx);
+        self.store_context(&mut ctx).await;
         ctx
     }
 
     /// Restores the peripheral state using the given context,
     /// then updates the state with the provided data.
-    pub fn update(&mut self, ctx: &mut Context, input: &[u8]) {
+    /// Peripheral state is saved upon return.
+    pub async fn update(&mut self, ctx: &mut Context, input: &[u8]) {
         let mut data_waiting = input.len() + ctx.buflen;
         if data_waiting < DIGEST_BLOCK_SIZE || (data_waiting < ctx.buffer.len() && !ctx.first_word_sent) {
             // There isn't enough data to digest a block, so append it to the buffer.
@@ -123,7 +153,7 @@ impl<'d> Hash<'d> {
             return;
         }
 
-        //Restore the peripheral state.
+        // Restore the peripheral state.
         self.load_context(&ctx);
 
         let mut ilen_remaining = input.len();
@@ -154,6 +184,7 @@ impl<'d> Hash<'d> {
             ctx.buflen += ilen_remaining;
         } else {
             let mut total_data_sent = 0;
+
             // First ingest the data in the buffer.
             let empty_len = DIGEST_BLOCK_SIZE - ctx.buflen;
             if empty_len > 0 {
@@ -188,25 +219,43 @@ impl<'d> Hash<'d> {
         }
 
         // Save the peripheral context.
-        self.store_context(ctx);
+        self.store_context(ctx).await;
     }
 
     /// Computes a digest for the given context. A slice of the provided digest buffer is returned.
     /// The length of the returned slice is dependent on the digest length of the selected algorithm.
-    pub fn finish<'a>(&mut self, mut ctx: Context, digest: &'a mut [u8; 32]) -> &'a [u8] {
+    pub async fn finish<'a>(&mut self, mut ctx: Context, digest: &'a mut [u8; 32]) -> &'a [u8] {
         // Restore the peripheral state.
         self.load_context(&ctx);
+
         // Hash the leftover bytes, if any.
         self.accumulate(&ctx.buffer[0..ctx.buflen]);
         ctx.buflen = 0;
 
         //Start the digest calculation.
-        PAC_HASH.str().write(|w| w.set_dcal(true));
+        T::regs().str().write(|w| w.set_dcal(true));
 
-        //Wait for completion.
-        while !PAC_HASH.sr().read().dcis() {}
+        // Wait for completion.
+        poll_fn(|cx| {
+            // Check if already done.
+            let bits = T::regs().sr().read();
+            if bits.dcis() {
+                return Poll::Ready(());
+            }
+            // Register waker, then enable interrupts.
+            HASH_WAKER.register(cx.waker());
+            T::regs().imr().modify(|reg| reg.set_dinie(true));
+            // Check for completion.
+            let bits = T::regs().sr().read();
+            if bits.dcis() {
+                Poll::Ready(())
+            } else {
+                Poll::Pending
+            }
+        })
+        .await;
 
-        //Return the digest.
+        // Return the digest.
         let digest_words = match ctx.algo {
             Algorithm::SHA1 => 5,
             Algorithm::MD5 => 4,
@@ -215,37 +264,57 @@ impl<'d> Hash<'d> {
         };
         let mut i = 0;
         while i < digest_words {
-            let word = PAC_HASH.hr(i).read();
+            let word = T::regs().hr(i).read();
             digest[(i * 4)..((i * 4) + 4)].copy_from_slice(word.to_be_bytes().as_slice());
             i += 1;
         }
         &digest[0..digest_words * 4]
     }
 
+    /// Push data into the hash core.
     fn accumulate(&mut self, input: &[u8]) {
-        //Set the number of valid bits.
+        // Set the number of valid bits.
         let num_valid_bits: u8 = (8 * (input.len() % 4)) as u8;
-        PAC_HASH.str().modify(|w| w.set_nblw(num_valid_bits));
+        T::regs().str().modify(|w| w.set_nblw(num_valid_bits));
 
         let mut i = 0;
         while i < input.len() {
             let mut word: [u8; 4] = [0; 4];
             let copy_idx = min(i + 4, input.len());
             word[0..copy_idx - i].copy_from_slice(&input[i..copy_idx]);
-            PAC_HASH.din().write_value(u32::from_ne_bytes(word));
+            T::regs().din().write_value(u32::from_ne_bytes(word));
             i += 4;
         }
     }
 
     /// Save the peripheral state to a context.
-    fn store_context(&mut self, ctx: &mut Context) {
-        while !PAC_HASH.sr().read().dinis() {}
-        ctx.imr = PAC_HASH.imr().read().0;
-        ctx.str = PAC_HASH.str().read().0;
-        ctx.cr = PAC_HASH.cr().read().0;
+    async fn store_context(&mut self, ctx: &mut Context) {
+        // Wait for interrupt.
+        poll_fn(|cx| {
+            // Check if already done.
+            let bits = T::regs().sr().read();
+            if bits.dinis() {
+                return Poll::Ready(());
+            }
+            // Register waker, then enable interrupts.
+            HASH_WAKER.register(cx.waker());
+            T::regs().imr().modify(|reg| reg.set_dinie(true));
+            // Check for completion.
+            let bits = T::regs().sr().read();
+            if bits.dinis() {
+                Poll::Ready(())
+            } else {
+                Poll::Pending
+            }
+        })
+        .await;
+
+        ctx.imr = T::regs().imr().read().0;
+        ctx.str = T::regs().str().read().0;
+        ctx.cr = T::regs().cr().read().0;
         let mut i = 0;
         while i < NUM_CONTEXT_REGS {
-            ctx.csr[i] = PAC_HASH.csr(i).read();
+            ctx.csr[i] = T::regs().csr(i).read();
             i += 1;
         }
     }
@@ -253,14 +322,42 @@ impl<'d> Hash<'d> {
     /// Restore the peripheral state from a context.
     fn load_context(&mut self, ctx: &Context) {
         // Restore the peripheral state from the context.
-        PAC_HASH.imr().write_value(Imr { 0: ctx.imr });
-        PAC_HASH.str().write_value(Str { 0: ctx.str });
-        PAC_HASH.cr().write_value(Cr { 0: ctx.cr });
-        PAC_HASH.cr().modify(|w| w.set_init(true));
+        T::regs().imr().write_value(Imr { 0: ctx.imr });
+        T::regs().str().write_value(Str { 0: ctx.str });
+        T::regs().cr().write_value(Cr { 0: ctx.cr });
+        T::regs().cr().modify(|w| w.set_init(true));
         let mut i = 0;
         while i < NUM_CONTEXT_REGS {
-            PAC_HASH.csr(i).write_value(ctx.csr[i]);
+            T::regs().csr(i).write_value(ctx.csr[i]);
             i += 1;
         }
     }
 }
+
+pub(crate) mod sealed {
+    use super::*;
+
+    pub trait Instance {
+        fn regs() -> pac::hash::Hash;
+    }
+}
+
+/// HASH instance trait.
+pub trait Instance: sealed::Instance + Peripheral  + crate::rcc::RccPeripheral + 'static + Send {
+    /// Interrupt for this HASH instance.
+    type Interrupt: interrupt::typelevel::Interrupt;
+}
+
+foreach_interrupt!(
+    ($inst:ident, hash, HASH, GLOBAL, $irq:ident) => {
+        impl Instance for peripherals::$inst {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
+
+        impl sealed::Instance for peripherals::$inst {
+            fn regs() -> crate::pac::hash::Hash {
+                crate::pac::$inst
+            }
+        }
+    };
+);
From 72bbfec39d3f826c1a8dd485af2da4bcbdd32e35 Mon Sep 17 00:00:00 2001
From: Caleb Garrett <47389035+caleb-garrett@users.noreply.github.com>
Date: Sat, 3 Feb 2024 16:10:00 -0500
Subject: [PATCH 04/14] Added hash DMA implementation.
---
 embassy-stm32/build.rs           |   1 +
 embassy-stm32/src/hash/mod.rs    | 143 ++++++++++++++-----------------
 examples/stm32f7/src/bin/hash.rs |  20 +++--
 3 files changed, 79 insertions(+), 85 deletions(-)
diff --git a/embassy-stm32/build.rs b/embassy-stm32/build.rs
index 948ce3aff..1a68dfc9d 100644
--- a/embassy-stm32/build.rs
+++ b/embassy-stm32/build.rs
@@ -1015,6 +1015,7 @@ fn main() {
         (("dac", "CH1"), quote!(crate::dac::DacDma1)),
         (("dac", "CH2"), quote!(crate::dac::DacDma2)),
         (("timer", "UP"), quote!(crate::timer::UpDma)),
+        (("hash", "IN"), quote!(crate::hash::Dma)),
     ]
     .into();
 
diff --git a/embassy-stm32/src/hash/mod.rs b/embassy-stm32/src/hash/mod.rs
index 4e37e60e1..ac4854f80 100644
--- a/embassy-stm32/src/hash/mod.rs
+++ b/embassy-stm32/src/hash/mod.rs
@@ -2,11 +2,13 @@
 use core::cmp::min;
 use core::future::poll_fn;
 use core::marker::PhantomData;
+use core::ptr;
 use core::task::Poll;
 
 use embassy_hal_internal::{into_ref, PeripheralRef};
 use embassy_sync::waitqueue::AtomicWaker;
 
+use crate::dma::Transfer;
 use crate::peripherals::HASH;
 use stm32_metapac::hash::regs::*;
 
@@ -18,7 +20,6 @@ use crate::{interrupt, pac, peripherals, Peripheral};
 const NUM_CONTEXT_REGS: usize = 51;
 #[cfg(hash_v2)]
 const NUM_CONTEXT_REGS: usize = 54;
-const HASH_BUFFER_LEN: usize = 68;
 const DIGEST_BLOCK_SIZE: usize = 64;
 
 static HASH_WAKER: AtomicWaker = AtomicWaker::new();
@@ -74,8 +75,7 @@ pub enum DataType {
 /// Stores the state of the HASH peripheral for suspending/resuming
 /// digest calculation.
 pub struct Context {
-    first_word_sent: bool,
-    buffer: [u8; HASH_BUFFER_LEN],
+    buffer: [u8; DIGEST_BLOCK_SIZE],
     buflen: usize,
     algo: Algorithm,
     format: DataType,
@@ -86,17 +86,19 @@ pub struct Context {
 }
 
 /// HASH driver.
-pub struct Hash<'d, T: Instance> {
+pub struct Hash<'d, T: Instance, D: Dma  + 'd) -> Self {
+    pub fn new(peripheral: impl Peripheral  + 'd, dma: impl Peripheral  + 'd) -> Self {
         HASH::enable_and_reset();
-        into_ref!(peripheral);
+        into_ref!(peripheral, dma);
         let instance = Self {
             _peripheral: peripheral,
+            dma: dma,
         };
 
         T::Interrupt::unpend();
@@ -109,8 +111,7 @@ impl<'d, T: Instance> Hash<'d, T> {
     pub async fn start(&mut self, algorithm: Algorithm, format: DataType) -> Context {
         // Define a context for this new computation.
         let mut ctx = Context {
-            first_word_sent: false,
-            buffer: [0; 68],
+            buffer: [0; DIGEST_BLOCK_SIZE],
             buflen: 0,
             algo: algorithm,
             format: format,
@@ -134,6 +135,11 @@ impl<'d, T: Instance> Hash<'d, T> {
         }
         T::regs().cr().modify(|w| w.set_algo0(algo0));
         T::regs().cr().modify(|w| w.set_algo1(algo1));
+
+        // Enable multiple DMA transfers.
+        T::regs().cr().modify(|w| w.set_mdmat(true));
+
+        // Set init to load the context registers. Necessary before storing context.
         T::regs().cr().modify(|w| w.set_init(true));
 
         // Store and return the state of the peripheral.
@@ -145,8 +151,8 @@ impl<'d, T: Instance> Hash<'d, T> {
     /// then updates the state with the provided data.
     /// Peripheral state is saved upon return.
     pub async fn update(&mut self, ctx: &mut Context, input: &[u8]) {
-        let mut data_waiting = input.len() + ctx.buflen;
-        if data_waiting < DIGEST_BLOCK_SIZE || (data_waiting < ctx.buffer.len() && !ctx.first_word_sent) {
+        let data_waiting = input.len() + ctx.buflen;
+        if data_waiting < DIGEST_BLOCK_SIZE {
             // There isn't enough data to digest a block, so append it to the buffer.
             ctx.buffer[ctx.buflen..ctx.buflen + input.len()].copy_from_slice(input);
             ctx.buflen += input.len();
@@ -159,65 +165,35 @@ impl<'d, T: Instance> Hash<'d, T> {
         let mut ilen_remaining = input.len();
         let mut input_start = 0;
 
-        // Handle first block.
-        if !ctx.first_word_sent {
-            let empty_len = ctx.buffer.len() - ctx.buflen;
+        // First ingest the data in the buffer.
+        let empty_len = DIGEST_BLOCK_SIZE - ctx.buflen;
+        if empty_len > 0 {
             let copy_len = min(empty_len, ilen_remaining);
-            // Fill the buffer.
-            if copy_len > 0 {
-                ctx.buffer[ctx.buflen..ctx.buflen + copy_len].copy_from_slice(&input[0..copy_len]);
-                ctx.buflen += copy_len;
-                ilen_remaining -= copy_len;
-                input_start += copy_len;
-            }
-            assert_eq!(ctx.buflen, HASH_BUFFER_LEN);
-            self.accumulate(ctx.buffer.as_slice());
-            data_waiting -= ctx.buflen;
-            ctx.buflen = 0;
-            ctx.first_word_sent = true;
+            ctx.buffer[ctx.buflen..ctx.buflen + copy_len].copy_from_slice(&input[input_start..input_start + copy_len]);
+            ctx.buflen += copy_len;
+            ilen_remaining -= copy_len;
+            input_start += copy_len;
         }
+        self.accumulate(&ctx.buffer).await;
+        ctx.buflen = 0;
 
-        if data_waiting < 64 {
-            // There isn't enough data remaining to process another block, so store it.
-            assert_eq!(ctx.buflen, 0);
-            ctx.buffer[0..ilen_remaining].copy_from_slice(&input[input_start..input_start + ilen_remaining]);
-            ctx.buflen += ilen_remaining;
+        // Move any extra data to the now-empty buffer.
+        let leftovers = ilen_remaining % DIGEST_BLOCK_SIZE;
+        if leftovers > 0 {
+            assert!(ilen_remaining >= leftovers);
+            ctx.buffer[0..leftovers].copy_from_slice(&input[input.len() - leftovers..input.len()]);
+            ctx.buflen += leftovers;
+            ilen_remaining -= leftovers;
         } else {
-            let mut total_data_sent = 0;
-
-            // First ingest the data in the buffer.
-            let empty_len = DIGEST_BLOCK_SIZE - ctx.buflen;
-            if empty_len > 0 {
-                let copy_len = min(empty_len, ilen_remaining);
-                ctx.buffer[ctx.buflen..ctx.buflen + copy_len]
-                    .copy_from_slice(&input[input_start..input_start + copy_len]);
-                ctx.buflen += copy_len;
-                ilen_remaining -= copy_len;
-                input_start += copy_len;
-            }
-            assert_eq!(ctx.buflen % 64, 0);
-            self.accumulate(&ctx.buffer[0..64]);
-            total_data_sent += ctx.buflen;
-            ctx.buflen = 0;
-
-            // Move any extra data to the now-empty buffer.
-            let leftovers = ilen_remaining % 64;
-            if leftovers > 0 {
-                assert!(ilen_remaining >= leftovers);
-                ctx.buffer[0..leftovers].copy_from_slice(&input[input.len() - leftovers..input.len()]);
-                ctx.buflen += leftovers;
-                ilen_remaining -= leftovers;
-            }
-            assert_eq!(ilen_remaining % 64, 0);
-
-            // Hash the remaining data.
-            self.accumulate(&input[input_start..input_start + ilen_remaining]);
-
-            total_data_sent += ilen_remaining;
-            assert_eq!(total_data_sent % 64, 0);
-            assert!(total_data_sent >= 64);
+            ctx.buffer
+                .copy_from_slice(&input[input.len() - DIGEST_BLOCK_SIZE..input.len()]);
+            ctx.buflen += DIGEST_BLOCK_SIZE;
+            ilen_remaining -= DIGEST_BLOCK_SIZE;
         }
 
+        // Hash the remaining data.
+        self.accumulate(&input[input_start..input_start + ilen_remaining]).await;
+
         // Save the peripheral context.
         self.store_context(ctx).await;
     }
@@ -228,12 +204,12 @@ impl<'d, T: Instance> Hash<'d, T> {
         // Restore the peripheral state.
         self.load_context(&ctx);
 
-        // Hash the leftover bytes, if any.
-        self.accumulate(&ctx.buffer[0..ctx.buflen]);
-        ctx.buflen = 0;
+        // Must be cleared prior to the last DMA transfer.
+        T::regs().cr().modify(|w| w.set_mdmat(false));
 
-        //Start the digest calculation.
-        T::regs().str().write(|w| w.set_dcal(true));
+        // Hash the leftover bytes, if any.
+        self.accumulate(&ctx.buffer[0..ctx.buflen]).await;
+        ctx.buflen = 0;
 
         // Wait for completion.
         poll_fn(|cx| {
@@ -272,19 +248,30 @@ impl<'d, T: Instance> Hash<'d, T> {
     }
 
     /// Push data into the hash core.
-    fn accumulate(&mut self, input: &[u8]) {
+    async fn accumulate(&mut self, input: &[u8]) {
+        // Ignore an input length of 0.
+        if input.len() == 0 {
+            return;
+        }
+
         // Set the number of valid bits.
         let num_valid_bits: u8 = (8 * (input.len() % 4)) as u8;
         T::regs().str().modify(|w| w.set_nblw(num_valid_bits));
 
-        let mut i = 0;
-        while i < input.len() {
-            let mut word: [u8; 4] = [0; 4];
-            let copy_idx = min(i + 4, input.len());
-            word[0..copy_idx - i].copy_from_slice(&input[i..copy_idx]);
-            T::regs().din().write_value(u32::from_ne_bytes(word));
-            i += 4;
+        // Configure DMA to transfer input to hash core.
+        let dma_request = self.dma.request();
+        let dst_ptr = T::regs().din().as_ptr();
+        let mut num_words = input.len() / 4;
+        if input.len() % 4 > 0 {
+            num_words += 1;
         }
+        let src_ptr = ptr::slice_from_raw_parts(input.as_ptr().cast(), num_words);
+        let dma_transfer =
+            unsafe { Transfer::new_write_raw(&mut self.dma, dma_request, src_ptr, dst_ptr, Default::default()) };
+        T::regs().cr().modify(|w| w.set_dmae(true));
+
+        // Wait for the transfer to complete.
+        dma_transfer.await;
     }
 
     /// Save the peripheral state to a context.
@@ -361,3 +348,5 @@ foreach_interrupt!(
         }
     };
 );
+
+dma_trait!(Dma, Instance);
diff --git a/examples/stm32f7/src/bin/hash.rs b/examples/stm32f7/src/bin/hash.rs
index 1fd0e87eb..a9f5aa197 100644
--- a/examples/stm32f7/src/bin/hash.rs
+++ b/examples/stm32f7/src/bin/hash.rs
@@ -4,27 +4,30 @@
 use defmt::info;
 use embassy_executor::Spawner;
 use embassy_stm32::Config;
-use embassy_time::{Duration, Instant};
+use embassy_time::Instant;
 use {defmt_rtt as _, panic_probe as _};
 
 use embassy_stm32::hash::*;
 use sha2::{Digest, Sha256};
 
-const TEST_STRING_1: &[u8] = b"hello world";
-
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) -> ! {
     let config = Config::default();
     let p = embassy_stm32::init(config);
 
+    let test_1: &[u8] = b"as;dfhaslfhas;oifvnasd;nifvnhasd;nifvhndlkfghsd;nvfnahssdfgsdafgsasdfasdfasdfasdfasdfghjklmnbvcalskdjghalskdjgfbaslkdjfgbalskdjgbalskdjbdfhsdfhsfghsfghfgh";
+    let test_2: &[u8] = b"fdhalksdjfhlasdjkfhalskdjfhgal;skdjfgalskdhfjgalskdjfglafgadfgdfgdafgaadsfgfgdfgadrgsyfthxfgjfhklhjkfgukhulkvhlvhukgfhfsrghzdhxyfufynufyuszeradrtydyytserr";
+
+    let mut hw_hasher = Hash::new(p.HASH, p.DMA2_CH7);
+
     let hw_start_time = Instant::now();
 
     // Compute a digest in hardware.
-    let mut hw_hasher = Hash::new(p.HASH);
-    let mut context = hw_hasher.start(Algorithm::SHA256, DataType::Width8);
-    hw_hasher.update(&mut context, TEST_STRING_1);
+    let mut context = hw_hasher.start(Algorithm::SHA256, DataType::Width8).await;
+    hw_hasher.update(&mut context, test_1).await;
+    hw_hasher.update(&mut context, test_2).await;
     let mut buffer: [u8; 32] = [0; 32];
-    let hw_digest = hw_hasher.finish(context, &mut buffer);
+    let hw_digest = hw_hasher.finish(context, &mut buffer).await;
 
     let hw_end_time = Instant::now();
     let hw_execution_time = hw_end_time - hw_start_time;
@@ -33,7 +36,8 @@ async fn main(_spawner: Spawner) -> ! {
 
     // Compute a digest in software.
     let mut sw_hasher = Sha256::new();
-    sw_hasher.update(TEST_STRING_1);
+    sw_hasher.update(test_1);
+    sw_hasher.update(test_2);
     let sw_digest = sw_hasher.finalize();
 
     let sw_end_time = Instant::now();
From 66f44b95d70547be8e32daac1ab611eec5fbe28a Mon Sep 17 00:00:00 2001
From: Caleb Garrett <47389035+caleb-garrett@users.noreply.github.com>
Date: Sun, 4 Feb 2024 17:16:33 -0500
Subject: [PATCH 05/14] Addressed hash CI build issues.
---
 embassy-stm32/Cargo.toml         |   4 +-
 embassy-stm32/src/hash/mod.rs    | 357 +---------------------------
 embassy-stm32/src/hash/v1.rs     | 334 +++++++++++++++++++++++++++
 embassy-stm32/src/hash/v2v3.rs   | 385 +++++++++++++++++++++++++++++++
 examples/stm32f7/src/bin/eth.rs  |   2 +-
 examples/stm32f7/src/bin/hash.rs |   6 +-
 6 files changed, 731 insertions(+), 357 deletions(-)
 create mode 100644 embassy-stm32/src/hash/v1.rs
 create mode 100644 embassy-stm32/src/hash/v2v3.rs
diff --git a/embassy-stm32/Cargo.toml b/embassy-stm32/Cargo.toml
index d8a4c65fa..00d8a5f63 100644
--- a/embassy-stm32/Cargo.toml
+++ b/embassy-stm32/Cargo.toml
@@ -68,7 +68,7 @@ rand_core = "0.6.3"
 sdio-host = "0.5.0"
 critical-section = "1.1"
 #stm32-metapac = { version = "15" }
-stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-0cb3a4fcaec702c93b3700715de796636d562b15" }
+stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-aa5dbf859fae743306f5d816905f166de824241f" }
 vcell = "0.1.3"
 bxcan = "0.7.0"
 nb = "1.0.0"
@@ -87,7 +87,7 @@ critical-section = { version = "1.1", features = ["std"] }
 proc-macro2 = "1.0.36"
 quote = "1.0.15"
 #stm32-metapac = { version = "15", default-features = false, features = ["metadata"]}
-stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-0cb3a4fcaec702c93b3700715de796636d562b15", default-features = false, features = ["metadata"]}
+stm32-metapac = { git = "https://github.com/embassy-rs/stm32-data-generated", tag = "stm32-data-aa5dbf859fae743306f5d816905f166de824241f", default-features = false, features = ["metadata"]}
 
 
 [features]
diff --git a/embassy-stm32/src/hash/mod.rs b/embassy-stm32/src/hash/mod.rs
index ac4854f80..6b23f3b55 100644
--- a/embassy-stm32/src/hash/mod.rs
+++ b/embassy-stm32/src/hash/mod.rs
@@ -1,352 +1,7 @@
-//! Hash generator (HASH)
-use core::cmp::min;
-use core::future::poll_fn;
-use core::marker::PhantomData;
-use core::ptr;
-use core::task::Poll;
+//! Hash Accelerator (HASH)
+#[cfg_attr(hash_v1, path = "v1.rs")]
+#[cfg_attr(hash_v2, path = "v2v3.rs")]
+#[cfg_attr(hash_v3, path = "v2v3.rs")]
+mod _version;
 
-use embassy_hal_internal::{into_ref, PeripheralRef};
-use embassy_sync::waitqueue::AtomicWaker;
-
-use crate::dma::Transfer;
-use crate::peripherals::HASH;
-use stm32_metapac::hash::regs::*;
-
-use crate::interrupt::typelevel::Interrupt;
-use crate::rcc::sealed::RccPeripheral;
-use crate::{interrupt, pac, peripherals, Peripheral};
-
-#[cfg(hash_v1)]
-const NUM_CONTEXT_REGS: usize = 51;
-#[cfg(hash_v2)]
-const NUM_CONTEXT_REGS: usize = 54;
-const DIGEST_BLOCK_SIZE: usize = 64;
-
-static HASH_WAKER: AtomicWaker = AtomicWaker::new();
-
-/// HASH interrupt handler.
-pub struct InterruptHandler  + 'd, dma: impl Peripheral  + 'd) -> Self {
-        HASH::enable_and_reset();
-        into_ref!(peripheral, dma);
-        let instance = Self {
-            _peripheral: peripheral,
-            dma: dma,
-        };
-
-        T::Interrupt::unpend();
-        unsafe { T::Interrupt::enable() };
-
-        instance
-    }
-
-    /// Starts computation of a new hash and returns the saved peripheral state.
-    pub async fn start(&mut self, algorithm: Algorithm, format: DataType) -> Context {
-        // Define a context for this new computation.
-        let mut ctx = Context {
-            buffer: [0; DIGEST_BLOCK_SIZE],
-            buflen: 0,
-            algo: algorithm,
-            format: format,
-            imr: 0,
-            str: 0,
-            cr: 0,
-            csr: [0; NUM_CONTEXT_REGS],
-        };
-
-        // Set the data type in the peripheral.
-        T::regs().cr().modify(|w| w.set_datatype(ctx.format as u8));
-
-        // Select the algorithm.
-        let mut algo0 = false;
-        let mut algo1 = false;
-        if ctx.algo == Algorithm::MD5 || ctx.algo == Algorithm::SHA256 {
-            algo0 = true;
-        }
-        if ctx.algo == Algorithm::SHA224 || ctx.algo == Algorithm::SHA256 {
-            algo1 = true;
-        }
-        T::regs().cr().modify(|w| w.set_algo0(algo0));
-        T::regs().cr().modify(|w| w.set_algo1(algo1));
-
-        // Enable multiple DMA transfers.
-        T::regs().cr().modify(|w| w.set_mdmat(true));
-
-        // Set init to load the context registers. Necessary before storing context.
-        T::regs().cr().modify(|w| w.set_init(true));
-
-        // Store and return the state of the peripheral.
-        self.store_context(&mut ctx).await;
-        ctx
-    }
-
-    /// Restores the peripheral state using the given context,
-    /// then updates the state with the provided data.
-    /// Peripheral state is saved upon return.
-    pub async fn update(&mut self, ctx: &mut Context, input: &[u8]) {
-        let data_waiting = input.len() + ctx.buflen;
-        if data_waiting < DIGEST_BLOCK_SIZE {
-            // There isn't enough data to digest a block, so append it to the buffer.
-            ctx.buffer[ctx.buflen..ctx.buflen + input.len()].copy_from_slice(input);
-            ctx.buflen += input.len();
-            return;
-        }
-
-        // Restore the peripheral state.
-        self.load_context(&ctx);
-
-        let mut ilen_remaining = input.len();
-        let mut input_start = 0;
-
-        // First ingest the data in the buffer.
-        let empty_len = DIGEST_BLOCK_SIZE - ctx.buflen;
-        if empty_len > 0 {
-            let copy_len = min(empty_len, ilen_remaining);
-            ctx.buffer[ctx.buflen..ctx.buflen + copy_len].copy_from_slice(&input[input_start..input_start + copy_len]);
-            ctx.buflen += copy_len;
-            ilen_remaining -= copy_len;
-            input_start += copy_len;
-        }
-        self.accumulate(&ctx.buffer).await;
-        ctx.buflen = 0;
-
-        // Move any extra data to the now-empty buffer.
-        let leftovers = ilen_remaining % DIGEST_BLOCK_SIZE;
-        if leftovers > 0 {
-            assert!(ilen_remaining >= leftovers);
-            ctx.buffer[0..leftovers].copy_from_slice(&input[input.len() - leftovers..input.len()]);
-            ctx.buflen += leftovers;
-            ilen_remaining -= leftovers;
-        } else {
-            ctx.buffer
-                .copy_from_slice(&input[input.len() - DIGEST_BLOCK_SIZE..input.len()]);
-            ctx.buflen += DIGEST_BLOCK_SIZE;
-            ilen_remaining -= DIGEST_BLOCK_SIZE;
-        }
-
-        // Hash the remaining data.
-        self.accumulate(&input[input_start..input_start + ilen_remaining]).await;
-
-        // Save the peripheral context.
-        self.store_context(ctx).await;
-    }
-
-    /// Computes a digest for the given context. A slice of the provided digest buffer is returned.
-    /// The length of the returned slice is dependent on the digest length of the selected algorithm.
-    pub async fn finish<'a>(&mut self, mut ctx: Context, digest: &'a mut [u8; 32]) -> &'a [u8] {
-        // Restore the peripheral state.
-        self.load_context(&ctx);
-
-        // Must be cleared prior to the last DMA transfer.
-        T::regs().cr().modify(|w| w.set_mdmat(false));
-
-        // Hash the leftover bytes, if any.
-        self.accumulate(&ctx.buffer[0..ctx.buflen]).await;
-        ctx.buflen = 0;
-
-        // Wait for completion.
-        poll_fn(|cx| {
-            // Check if already done.
-            let bits = T::regs().sr().read();
-            if bits.dcis() {
-                return Poll::Ready(());
-            }
-            // Register waker, then enable interrupts.
-            HASH_WAKER.register(cx.waker());
-            T::regs().imr().modify(|reg| reg.set_dinie(true));
-            // Check for completion.
-            let bits = T::regs().sr().read();
-            if bits.dcis() {
-                Poll::Ready(())
-            } else {
-                Poll::Pending
-            }
-        })
-        .await;
-
-        // Return the digest.
-        let digest_words = match ctx.algo {
-            Algorithm::SHA1 => 5,
-            Algorithm::MD5 => 4,
-            Algorithm::SHA224 => 7,
-            Algorithm::SHA256 => 8,
-        };
-        let mut i = 0;
-        while i < digest_words {
-            let word = T::regs().hr(i).read();
-            digest[(i * 4)..((i * 4) + 4)].copy_from_slice(word.to_be_bytes().as_slice());
-            i += 1;
-        }
-        &digest[0..digest_words * 4]
-    }
-
-    /// Push data into the hash core.
-    async fn accumulate(&mut self, input: &[u8]) {
-        // Ignore an input length of 0.
-        if input.len() == 0 {
-            return;
-        }
-
-        // Set the number of valid bits.
-        let num_valid_bits: u8 = (8 * (input.len() % 4)) as u8;
-        T::regs().str().modify(|w| w.set_nblw(num_valid_bits));
-
-        // Configure DMA to transfer input to hash core.
-        let dma_request = self.dma.request();
-        let dst_ptr = T::regs().din().as_ptr();
-        let mut num_words = input.len() / 4;
-        if input.len() % 4 > 0 {
-            num_words += 1;
-        }
-        let src_ptr = ptr::slice_from_raw_parts(input.as_ptr().cast(), num_words);
-        let dma_transfer =
-            unsafe { Transfer::new_write_raw(&mut self.dma, dma_request, src_ptr, dst_ptr, Default::default()) };
-        T::regs().cr().modify(|w| w.set_dmae(true));
-
-        // Wait for the transfer to complete.
-        dma_transfer.await;
-    }
-
-    /// Save the peripheral state to a context.
-    async fn store_context(&mut self, ctx: &mut Context) {
-        // Wait for interrupt.
-        poll_fn(|cx| {
-            // Check if already done.
-            let bits = T::regs().sr().read();
-            if bits.dinis() {
-                return Poll::Ready(());
-            }
-            // Register waker, then enable interrupts.
-            HASH_WAKER.register(cx.waker());
-            T::regs().imr().modify(|reg| reg.set_dinie(true));
-            // Check for completion.
-            let bits = T::regs().sr().read();
-            if bits.dinis() {
-                Poll::Ready(())
-            } else {
-                Poll::Pending
-            }
-        })
-        .await;
-
-        ctx.imr = T::regs().imr().read().0;
-        ctx.str = T::regs().str().read().0;
-        ctx.cr = T::regs().cr().read().0;
-        let mut i = 0;
-        while i < NUM_CONTEXT_REGS {
-            ctx.csr[i] = T::regs().csr(i).read();
-            i += 1;
-        }
-    }
-
-    /// Restore the peripheral state from a context.
-    fn load_context(&mut self, ctx: &Context) {
-        // Restore the peripheral state from the context.
-        T::regs().imr().write_value(Imr { 0: ctx.imr });
-        T::regs().str().write_value(Str { 0: ctx.str });
-        T::regs().cr().write_value(Cr { 0: ctx.cr });
-        T::regs().cr().modify(|w| w.set_init(true));
-        let mut i = 0;
-        while i < NUM_CONTEXT_REGS {
-            T::regs().csr(i).write_value(ctx.csr[i]);
-            i += 1;
-        }
-    }
-}
-
-pub(crate) mod sealed {
-    use super::*;
-
-    pub trait Instance {
-        fn regs() -> pac::hash::Hash;
-    }
-}
-
-/// HASH instance trait.
-pub trait Instance: sealed::Instance + Peripheral  + crate::rcc::RccPeripheral + 'static + Send {
-    /// Interrupt for this HASH instance.
-    type Interrupt: interrupt::typelevel::Interrupt;
-}
-
-foreach_interrupt!(
-    ($inst:ident, hash, HASH, GLOBAL, $irq:ident) => {
-        impl Instance for peripherals::$inst {
-            type Interrupt = crate::interrupt::typelevel::$irq;
-        }
-
-        impl sealed::Instance for peripherals::$inst {
-            fn regs() -> crate::pac::hash::Hash {
-                crate::pac::$inst
-            }
-        }
-    };
-);
-
-dma_trait!(Dma, Instance);
+pub use _version::*;
diff --git a/embassy-stm32/src/hash/v1.rs b/embassy-stm32/src/hash/v1.rs
new file mode 100644
index 000000000..50f9adc83
--- /dev/null
+++ b/embassy-stm32/src/hash/v1.rs
@@ -0,0 +1,334 @@
+//! Hash generator (HASH)
+use core::cmp::min;
+use core::future::poll_fn;
+use core::marker::PhantomData;
+use core::task::Poll;
+
+use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
+use stm32_metapac::hash::regs::*;
+
+use crate::interrupt::typelevel::Interrupt;
+use crate::peripherals::HASH;
+use crate::rcc::sealed::RccPeripheral;
+use crate::{interrupt, pac, peripherals, Peripheral};
+
+const NUM_CONTEXT_REGS: usize = 51;
+const HASH_BUFFER_LEN: usize = 68;
+const DIGEST_BLOCK_SIZE: usize = 64;
+const MAX_DIGEST_SIZE: usize = 20;
+
+static HASH_WAKER: AtomicWaker = AtomicWaker::new();
+
+/// HASH interrupt handler.
+pub struct InterruptHandler  + 'd) -> Self {
+        HASH::enable_and_reset();
+        into_ref!(peripheral);
+        let instance = Self {
+            _peripheral: peripheral,
+        };
+
+        T::Interrupt::unpend();
+        unsafe { T::Interrupt::enable() };
+
+        instance
+    }
+
+    /// Starts computation of a new hash and returns the saved peripheral state.
+    pub async fn start(&mut self, algorithm: Algorithm, format: DataType) -> Context {
+        // Define a context for this new computation.
+        let mut ctx = Context {
+            first_word_sent: false,
+            buffer: [0; HASH_BUFFER_LEN],
+            buflen: 0,
+            algo: algorithm,
+            format: format,
+            imr: 0,
+            str: 0,
+            cr: 0,
+            csr: [0; NUM_CONTEXT_REGS],
+        };
+
+        // Set the data type in the peripheral.
+        T::regs().cr().modify(|w| w.set_datatype(ctx.format as u8));
+
+        // Select the algorithm.
+        if ctx.algo == Algorithm::MD5 {
+            T::regs().cr().modify(|w| w.set_algo(true));
+        }
+
+        // Store and return the state of the peripheral.
+        self.store_context(&mut ctx).await;
+        ctx
+    }
+
+    /// Restores the peripheral state using the given context,
+    /// then updates the state with the provided data.
+    /// Peripheral state is saved upon return.
+    pub async fn update(&mut self, ctx: &mut Context, input: &[u8]) {
+        let mut data_waiting = input.len() + ctx.buflen;
+        if data_waiting < DIGEST_BLOCK_SIZE || (data_waiting < ctx.buffer.len() && !ctx.first_word_sent) {
+            // There isn't enough data to digest a block, so append it to the buffer.
+            ctx.buffer[ctx.buflen..ctx.buflen + input.len()].copy_from_slice(input);
+            ctx.buflen += input.len();
+            return;
+        }
+
+        // Restore the peripheral state.
+        self.load_context(&ctx);
+
+        let mut ilen_remaining = input.len();
+        let mut input_start = 0;
+
+        // Handle first block.
+        if !ctx.first_word_sent {
+            let empty_len = ctx.buffer.len() - ctx.buflen;
+            let copy_len = min(empty_len, ilen_remaining);
+            // Fill the buffer.
+            if copy_len > 0 {
+                ctx.buffer[ctx.buflen..ctx.buflen + copy_len].copy_from_slice(&input[0..copy_len]);
+                ctx.buflen += copy_len;
+                ilen_remaining -= copy_len;
+                input_start += copy_len;
+            }
+            self.accumulate(ctx.buffer.as_slice());
+            data_waiting -= ctx.buflen;
+            ctx.buflen = 0;
+            ctx.first_word_sent = true;
+        }
+
+        if data_waiting < DIGEST_BLOCK_SIZE {
+            // There isn't enough data remaining to process another block, so store it.
+            ctx.buffer[0..ilen_remaining].copy_from_slice(&input[input_start..input_start + ilen_remaining]);
+            ctx.buflen += ilen_remaining;
+        } else {
+            // First ingest the data in the buffer.
+            let empty_len = DIGEST_BLOCK_SIZE - ctx.buflen;
+            if empty_len > 0 {
+                let copy_len = min(empty_len, ilen_remaining);
+                ctx.buffer[ctx.buflen..ctx.buflen + copy_len]
+                    .copy_from_slice(&input[input_start..input_start + copy_len]);
+                ctx.buflen += copy_len;
+                ilen_remaining -= copy_len;
+                input_start += copy_len;
+            }
+            self.accumulate(&ctx.buffer[0..64]);
+            ctx.buflen = 0;
+
+            // Move any extra data to the now-empty buffer.
+            let leftovers = ilen_remaining % 64;
+            if leftovers > 0 {
+                ctx.buffer[0..leftovers].copy_from_slice(&input[input.len() - leftovers..input.len()]);
+                ctx.buflen += leftovers;
+                ilen_remaining -= leftovers;
+            }
+
+            // Hash the remaining data.
+            self.accumulate(&input[input_start..input_start + ilen_remaining]);
+        }
+
+        // Save the peripheral context.
+        self.store_context(ctx).await;
+    }
+
+    /// Computes a digest for the given context. A slice of the provided digest buffer is returned.
+    /// The length of the returned slice is dependent on the digest length of the selected algorithm.
+    pub async fn finish<'a>(&mut self, mut ctx: Context, digest: &'a mut [u8; MAX_DIGEST_SIZE]) -> &'a [u8] {
+        // Restore the peripheral state.
+        self.load_context(&ctx);
+
+        // Hash the leftover bytes, if any.
+        self.accumulate(&ctx.buffer[0..ctx.buflen]);
+        ctx.buflen = 0;
+
+        //Start the digest calculation.
+        T::regs().str().write(|w| w.set_dcal(true));
+
+        // Wait for completion.
+        poll_fn(|cx| {
+            // Check if already done.
+            let bits = T::regs().sr().read();
+            if bits.dcis() {
+                return Poll::Ready(());
+            }
+            // Register waker, then enable interrupts.
+            HASH_WAKER.register(cx.waker());
+            T::regs().imr().modify(|reg| reg.set_dinie(true));
+            // Check for completion.
+            let bits = T::regs().sr().read();
+            if bits.dcis() {
+                Poll::Ready(())
+            } else {
+                Poll::Pending
+            }
+        })
+        .await;
+
+        // Return the digest.
+        let digest_words = match ctx.algo {
+            Algorithm::SHA1 => 5,
+            Algorithm::MD5 => 4,
+        };
+        let mut i = 0;
+        while i < digest_words {
+            let word = T::regs().hr(i).read();
+            digest[(i * 4)..((i * 4) + 4)].copy_from_slice(word.to_be_bytes().as_slice());
+            i += 1;
+        }
+        &digest[0..digest_words * 4]
+    }
+
+    /// Push data into the hash core.
+    fn accumulate(&mut self, input: &[u8]) {
+        // Set the number of valid bits.
+        let num_valid_bits: u8 = (8 * (input.len() % 4)) as u8;
+        T::regs().str().modify(|w| w.set_nblw(num_valid_bits));
+
+        let mut i = 0;
+        while i < input.len() {
+            let mut word: [u8; 4] = [0; 4];
+            let copy_idx = min(i + 4, input.len());
+            word[0..copy_idx - i].copy_from_slice(&input[i..copy_idx]);
+            T::regs().din().write_value(u32::from_ne_bytes(word));
+            i += 4;
+        }
+    }
+
+    /// Save the peripheral state to a context.
+    async fn store_context(&mut self, ctx: &mut Context) {
+        // Wait for interrupt.
+        poll_fn(|cx| {
+            // Check if already done.
+            let bits = T::regs().sr().read();
+            if bits.dinis() {
+                return Poll::Ready(());
+            }
+            // Register waker, then enable interrupts.
+            HASH_WAKER.register(cx.waker());
+            T::regs().imr().modify(|reg| reg.set_dinie(true));
+            // Check for completion.
+            let bits = T::regs().sr().read();
+            if bits.dinis() {
+                Poll::Ready(())
+            } else {
+                Poll::Pending
+            }
+        })
+        .await;
+
+        ctx.imr = T::regs().imr().read().0;
+        ctx.str = T::regs().str().read().0;
+        ctx.cr = T::regs().cr().read().0;
+        let mut i = 0;
+        while i < NUM_CONTEXT_REGS {
+            ctx.csr[i] = T::regs().csr(i).read();
+            i += 1;
+        }
+    }
+
+    /// Restore the peripheral state from a context.
+    fn load_context(&mut self, ctx: &Context) {
+        // Restore the peripheral state from the context.
+        T::regs().imr().write_value(Imr { 0: ctx.imr });
+        T::regs().str().write_value(Str { 0: ctx.str });
+        T::regs().cr().write_value(Cr { 0: ctx.cr });
+        T::regs().cr().modify(|w| w.set_init(true));
+        let mut i = 0;
+        while i < NUM_CONTEXT_REGS {
+            T::regs().csr(i).write_value(ctx.csr[i]);
+            i += 1;
+        }
+    }
+}
+
+pub(crate) mod sealed {
+    use super::*;
+
+    pub trait Instance {
+        fn regs() -> pac::hash::Hash;
+    }
+}
+
+/// HASH instance trait.
+pub trait Instance: sealed::Instance + Peripheral  + crate::rcc::RccPeripheral + 'static + Send {
+    /// Interrupt for this HASH instance.
+    type Interrupt: interrupt::typelevel::Interrupt;
+}
+
+foreach_interrupt!(
+    ($inst:ident, hash, HASH, GLOBAL, $irq:ident) => {
+        impl Instance for peripherals::$inst {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
+
+        impl sealed::Instance for peripherals::$inst {
+            fn regs() -> crate::pac::hash::Hash {
+                crate::pac::$inst
+            }
+        }
+    };
+);
+
+dma_trait!(Dma, Instance);
diff --git a/embassy-stm32/src/hash/v2v3.rs b/embassy-stm32/src/hash/v2v3.rs
new file mode 100644
index 000000000..058864568
--- /dev/null
+++ b/embassy-stm32/src/hash/v2v3.rs
@@ -0,0 +1,385 @@
+//! Hash generator (HASH)
+use core::cmp::min;
+use core::future::poll_fn;
+use core::marker::PhantomData;
+use core::ptr;
+use core::task::Poll;
+
+use embassy_hal_internal::{into_ref, PeripheralRef};
+use embassy_sync::waitqueue::AtomicWaker;
+use stm32_metapac::hash::regs::*;
+
+use crate::dma::Transfer;
+use crate::interrupt::typelevel::Interrupt;
+use crate::peripherals::HASH;
+use crate::rcc::sealed::RccPeripheral;
+use crate::{interrupt, pac, peripherals, Peripheral};
+
+#[cfg(hash_v2)]
+const NUM_CONTEXT_REGS: usize = 54;
+#[cfg(hash_v3)]
+const NUM_CONTEXT_REGS: usize = 103;
+const DIGEST_BLOCK_SIZE: usize = 64;
+const MAX_DIGEST_SIZE: usize = 64;
+
+static HASH_WAKER: AtomicWaker = AtomicWaker::new();
+
+/// HASH interrupt handler.
+pub struct InterruptHandler  + 'd, dma: impl Peripheral  + 'd) -> Self {
+        HASH::enable_and_reset();
+        into_ref!(peripheral, dma);
+        let instance = Self {
+            _peripheral: peripheral,
+            dma: dma,
+        };
+
+        T::Interrupt::unpend();
+        unsafe { T::Interrupt::enable() };
+
+        instance
+    }
+
+    /// Starts computation of a new hash and returns the saved peripheral state.
+    pub async fn start(&mut self, algorithm: Algorithm, format: DataType) -> Context {
+        // Define a context for this new computation.
+        let mut ctx = Context {
+            buffer: [0; DIGEST_BLOCK_SIZE],
+            buflen: 0,
+            algo: algorithm,
+            format: format,
+            imr: 0,
+            str: 0,
+            cr: 0,
+            csr: [0; NUM_CONTEXT_REGS],
+        };
+
+        // Set the data type in the peripheral.
+        T::regs().cr().modify(|w| w.set_datatype(ctx.format as u8));
+
+        #[cfg(hash_v2)]
+        {
+            // Select the algorithm.
+            let mut algo0 = false;
+            let mut algo1 = false;
+            if ctx.algo == Algorithm::MD5 || ctx.algo == Algorithm::SHA256 {
+                algo0 = true;
+            }
+            if ctx.algo == Algorithm::SHA224 || ctx.algo == Algorithm::SHA256 {
+                algo1 = true;
+            }
+            T::regs().cr().modify(|w| w.set_algo0(algo0));
+            T::regs().cr().modify(|w| w.set_algo1(algo1));
+        }
+
+        #[cfg(hash_v3)]
+        T::regs().cr().modify(|w| w.set_algo(ctx.algo as u8));
+
+        // Enable multiple DMA transfers.
+        T::regs().cr().modify(|w| w.set_mdmat(true));
+
+        // Set init to load the context registers. Necessary before storing context.
+        T::regs().cr().modify(|w| w.set_init(true));
+
+        // Store and return the state of the peripheral.
+        self.store_context(&mut ctx).await;
+        ctx
+    }
+
+    /// Restores the peripheral state using the given context,
+    /// then updates the state with the provided data.
+    /// Peripheral state is saved upon return.
+    pub async fn update(&mut self, ctx: &mut Context, input: &[u8]) {
+        let data_waiting = input.len() + ctx.buflen;
+        if data_waiting < DIGEST_BLOCK_SIZE {
+            // There isn't enough data to digest a block, so append it to the buffer.
+            ctx.buffer[ctx.buflen..ctx.buflen + input.len()].copy_from_slice(input);
+            ctx.buflen += input.len();
+            return;
+        }
+
+        // Restore the peripheral state.
+        self.load_context(&ctx);
+
+        let mut ilen_remaining = input.len();
+        let mut input_start = 0;
+
+        // First ingest the data in the buffer.
+        let empty_len = DIGEST_BLOCK_SIZE - ctx.buflen;
+        if empty_len > 0 {
+            let copy_len = min(empty_len, ilen_remaining);
+            ctx.buffer[ctx.buflen..ctx.buflen + copy_len].copy_from_slice(&input[input_start..input_start + copy_len]);
+            ctx.buflen += copy_len;
+            ilen_remaining -= copy_len;
+            input_start += copy_len;
+        }
+        self.accumulate(&ctx.buffer).await;
+        ctx.buflen = 0;
+
+        // Move any extra data to the now-empty buffer.
+        let leftovers = ilen_remaining % DIGEST_BLOCK_SIZE;
+        if leftovers > 0 {
+            assert!(ilen_remaining >= leftovers);
+            ctx.buffer[0..leftovers].copy_from_slice(&input[input.len() - leftovers..input.len()]);
+            ctx.buflen += leftovers;
+            ilen_remaining -= leftovers;
+        } else {
+            ctx.buffer
+                .copy_from_slice(&input[input.len() - DIGEST_BLOCK_SIZE..input.len()]);
+            ctx.buflen += DIGEST_BLOCK_SIZE;
+            ilen_remaining -= DIGEST_BLOCK_SIZE;
+        }
+
+        // Hash the remaining data.
+        self.accumulate(&input[input_start..input_start + ilen_remaining]).await;
+
+        // Save the peripheral context.
+        self.store_context(ctx).await;
+    }
+
+    /// Computes a digest for the given context. A slice of the provided digest buffer is returned.
+    /// The length of the returned slice is dependent on the digest length of the selected algorithm.
+    pub async fn finish<'a>(&mut self, mut ctx: Context, digest: &'a mut [u8; MAX_DIGEST_SIZE]) -> &'a [u8] {
+        // Restore the peripheral state.
+        self.load_context(&ctx);
+
+        // Must be cleared prior to the last DMA transfer.
+        T::regs().cr().modify(|w| w.set_mdmat(false));
+
+        // Hash the leftover bytes, if any.
+        self.accumulate(&ctx.buffer[0..ctx.buflen]).await;
+        ctx.buflen = 0;
+
+        // Wait for completion.
+        poll_fn(|cx| {
+            // Check if already done.
+            let bits = T::regs().sr().read();
+            if bits.dcis() {
+                return Poll::Ready(());
+            }
+            // Register waker, then enable interrupts.
+            HASH_WAKER.register(cx.waker());
+            T::regs().imr().modify(|reg| reg.set_dinie(true));
+            // Check for completion.
+            let bits = T::regs().sr().read();
+            if bits.dcis() {
+                Poll::Ready(())
+            } else {
+                Poll::Pending
+            }
+        })
+        .await;
+
+        // Return the digest.
+        let digest_words = match ctx.algo {
+            Algorithm::SHA1 => 5,
+            #[cfg(hash_v2)]
+            Algorithm::MD5 => 4,
+            Algorithm::SHA224 => 7,
+            Algorithm::SHA256 => 8,
+            #[cfg(hash_v3)]
+            Algorithm::SHA384 => 12,
+            #[cfg(hash_v3)]
+            Algorithm::SHA512_224 => 7,
+            #[cfg(hash_v3)]
+            Algorithm::SHA512_256 => 8,
+            #[cfg(hash_v3)]
+            Algorithm::SHA512 => 16,
+        };
+        let mut i = 0;
+        while i < digest_words {
+            let word = T::regs().hr(i).read();
+            digest[(i * 4)..((i * 4) + 4)].copy_from_slice(word.to_be_bytes().as_slice());
+            i += 1;
+        }
+        &digest[0..digest_words * 4]
+    }
+
+    /// Push data into the hash core.
+    async fn accumulate(&mut self, input: &[u8]) {
+        // Ignore an input length of 0.
+        if input.len() == 0 {
+            return;
+        }
+
+        // Set the number of valid bits.
+        let num_valid_bits: u8 = (8 * (input.len() % 4)) as u8;
+        T::regs().str().modify(|w| w.set_nblw(num_valid_bits));
+
+        // Configure DMA to transfer input to hash core.
+        let dma_request = self.dma.request();
+        let dst_ptr = T::regs().din().as_ptr();
+        let mut num_words = input.len() / 4;
+        if input.len() % 4 > 0 {
+            num_words += 1;
+        }
+        let src_ptr = ptr::slice_from_raw_parts(input.as_ptr().cast(), num_words);
+        let dma_transfer =
+            unsafe { Transfer::new_write_raw(&mut self.dma, dma_request, src_ptr, dst_ptr, Default::default()) };
+        T::regs().cr().modify(|w| w.set_dmae(true));
+
+        // Wait for the transfer to complete.
+        dma_transfer.await;
+    }
+
+    /// Save the peripheral state to a context.
+    async fn store_context(&mut self, ctx: &mut Context) {
+        // Wait for interrupt.
+        poll_fn(|cx| {
+            // Check if already done.
+            let bits = T::regs().sr().read();
+            if bits.dinis() {
+                return Poll::Ready(());
+            }
+            // Register waker, then enable interrupts.
+            HASH_WAKER.register(cx.waker());
+            T::regs().imr().modify(|reg| reg.set_dinie(true));
+            // Check for completion.
+            let bits = T::regs().sr().read();
+            if bits.dinis() {
+                Poll::Ready(())
+            } else {
+                Poll::Pending
+            }
+        })
+        .await;
+
+        ctx.imr = T::regs().imr().read().0;
+        ctx.str = T::regs().str().read().0;
+        ctx.cr = T::regs().cr().read().0;
+        let mut i = 0;
+        while i < NUM_CONTEXT_REGS {
+            ctx.csr[i] = T::regs().csr(i).read();
+            i += 1;
+        }
+    }
+
+    /// Restore the peripheral state from a context.
+    fn load_context(&mut self, ctx: &Context) {
+        // Restore the peripheral state from the context.
+        T::regs().imr().write_value(Imr { 0: ctx.imr });
+        T::regs().str().write_value(Str { 0: ctx.str });
+        T::regs().cr().write_value(Cr { 0: ctx.cr });
+        T::regs().cr().modify(|w| w.set_init(true));
+        let mut i = 0;
+        while i < NUM_CONTEXT_REGS {
+            T::regs().csr(i).write_value(ctx.csr[i]);
+            i += 1;
+        }
+    }
+}
+
+pub(crate) mod sealed {
+    use super::*;
+
+    pub trait Instance {
+        fn regs() -> pac::hash::Hash;
+    }
+}
+
+/// HASH instance trait.
+pub trait Instance: sealed::Instance + Peripheral  + crate::rcc::RccPeripheral + 'static + Send {
+    /// Interrupt for this HASH instance.
+    type Interrupt: interrupt::typelevel::Interrupt;
+}
+
+foreach_interrupt!(
+    ($inst:ident, hash, HASH, GLOBAL, $irq:ident) => {
+        impl Instance for peripherals::$inst {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
+
+        impl sealed::Instance for peripherals::$inst {
+            fn regs() -> crate::pac::hash::Hash {
+                crate::pac::$inst
+            }
+        }
+    };
+);
+
+dma_trait!(Dma, Instance);
diff --git a/examples/stm32f7/src/bin/eth.rs b/examples/stm32f7/src/bin/eth.rs
index 5bff48197..9a608e909 100644
--- a/examples/stm32f7/src/bin/eth.rs
+++ b/examples/stm32f7/src/bin/eth.rs
@@ -19,7 +19,7 @@ use {defmt_rtt as _, panic_probe as _};
 
 bind_interrupts!(struct Irqs {
     ETH => eth::InterruptHandler;
-    RNG => rng::InterruptHandler  + 'd) -> Self {
+    pub fn new(
+        peripheral: impl Peripheral  + 'd,
+        _irq: impl interrupt::typelevel::Binding  + 'd, dma: impl Peripheral  + 'd) -> Self {
+    pub fn new(
+        peripheral: impl Peripheral  + 'd,
+        dma: impl Peripheral  + 'd,
+        _irq: impl interrupt::typelevel::Binding  + 'd,
+        dma: impl Peripheral  + 'd,
+        _irq: impl interrupt::typelevel::Binding  + crate::rcc::RccPeripheral + 'static + Send {
+    /// Interrupt for this HASH instance.
+    type Interrupt: interrupt::typelevel::Interrupt;
+}
+
+foreach_interrupt!(
+    ($inst:ident, hash, HASH, GLOBAL, $irq:ident) => {
+        impl Instance for peripherals::$inst {
+            type Interrupt = crate::interrupt::typelevel::$irq;
+        }
+
+        impl sealed::Instance for peripherals::$inst {
+            fn regs() -> crate::pac::hash::Hash {
+                crate::pac::$inst
+            }
+        }
+    };
+);
+
+dma_trait!(Dma, Instance);
diff --git a/embassy-stm32/src/hash/v1v3v4.rs b/embassy-stm32/src/hash/v1v3v4.rs
deleted file mode 100644
index 771144b11..000000000
--- a/embassy-stm32/src/hash/v1v3v4.rs
+++ /dev/null
@@ -1,399 +0,0 @@
-//! Hash generator (HASH)
-use core::cmp::min;
-use core::future::poll_fn;
-use core::marker::PhantomData;
-use core::task::Poll;
-
-use embassy_hal_internal::{into_ref, PeripheralRef};
-use embassy_sync::waitqueue::AtomicWaker;
-use stm32_metapac::hash::regs::*;
-
-use crate::interrupt::typelevel::Interrupt;
-use crate::peripherals::HASH;
-use crate::rcc::sealed::RccPeripheral;
-use crate::{interrupt, pac, peripherals, Peripheral};
-
-#[cfg(hash_v1)]
-const NUM_CONTEXT_REGS: usize = 51;
-#[cfg(hash_v3)]
-const NUM_CONTEXT_REGS: usize = 103;
-#[cfg(hash_v4)]
-const NUM_CONTEXT_REGS: usize = 54;
-
-const HASH_BUFFER_LEN: usize = 132;
-const DIGEST_BLOCK_SIZE: usize = 128;
-const MAX_DIGEST_SIZE: usize = 128;
-
-static HASH_WAKER: AtomicWaker = AtomicWaker::new();
-
-/// HASH interrupt handler.
-pub struct InterruptHandler  + 'd,
-        _irq: impl interrupt::typelevel::Binding  + crate::rcc::RccPeripheral + 'static + Send {
-    /// Interrupt for this HASH instance.
-    type Interrupt: interrupt::typelevel::Interrupt;
-}
-
-foreach_interrupt!(
-    ($inst:ident, hash, HASH, GLOBAL, $irq:ident) => {
-        impl Instance for peripherals::$inst {
-            type Interrupt = crate::interrupt::typelevel::$irq;
-        }
-
-        impl sealed::Instance for peripherals::$inst {
-            fn regs() -> crate::pac::hash::Hash {
-                crate::pac::$inst
-            }
-        }
-    };
-);
-
-dma_trait!(Dma, Instance);
diff --git a/embassy-stm32/src/hash/v2.rs b/embassy-stm32/src/hash/v2.rs
deleted file mode 100644
index b8104c825..000000000
--- a/embassy-stm32/src/hash/v2.rs
+++ /dev/null
@@ -1,389 +0,0 @@
-//! Hash generator (HASH)
-use core::cmp::min;
-use core::future::poll_fn;
-use core::marker::PhantomData;
-use core::ptr;
-use core::task::Poll;
-
-use embassy_hal_internal::{into_ref, PeripheralRef};
-use embassy_sync::waitqueue::AtomicWaker;
-use stm32_metapac::hash::regs::*;
-
-use crate::dma::Transfer;
-use crate::interrupt::typelevel::Interrupt;
-use crate::peripherals::HASH;
-use crate::rcc::sealed::RccPeripheral;
-use crate::{interrupt, pac, peripherals, Peripheral};
-
-#[cfg(hash_v2)]
-const NUM_CONTEXT_REGS: usize = 54;
-#[cfg(hash_v3)]
-const NUM_CONTEXT_REGS: usize = 103;
-const DIGEST_BLOCK_SIZE: usize = 64;
-const MAX_DIGEST_SIZE: usize = 64;
-
-static HASH_WAKER: AtomicWaker = AtomicWaker::new();
-
-/// HASH interrupt handler.
-pub struct InterruptHandler  + 'd,
-        dma: impl Peripheral  + 'd,
-        _irq: impl interrupt::typelevel::Binding  + crate::rcc::RccPeripheral + 'static + Send {
-    /// Interrupt for this HASH instance.
-    type Interrupt: interrupt::typelevel::Interrupt;
-}
-
-foreach_interrupt!(
-    ($inst:ident, hash, HASH, GLOBAL, $irq:ident) => {
-        impl Instance for peripherals::$inst {
-            type Interrupt = crate::interrupt::typelevel::$irq;
-        }
-
-        impl sealed::Instance for peripherals::$inst {
-            fn regs() -> crate::pac::hash::Hash {
-                crate::pac::$inst
-            }
-        }
-    };
-);
-
-dma_trait!(Dma, Instance);
diff --git a/examples/stm32f7/src/bin/hash.rs b/examples/stm32f7/src/bin/hash.rs
index 7d96bd49c..31f8d32a7 100644
--- a/examples/stm32f7/src/bin/hash.rs
+++ b/examples/stm32f7/src/bin/hash.rs
@@ -3,7 +3,7 @@
 
 use defmt::info;
 use embassy_executor::Spawner;
-use embassy_stm32::{bind_interrupts, Config, hash, hash::*, peripherals};
+use embassy_stm32::{bind_interrupts, hash, hash::*, peripherals, Config};
 use embassy_time::Instant;
 use sha2::{Digest, Sha256};
 use {defmt_rtt as _, panic_probe as _};
@@ -25,11 +25,11 @@ async fn main(_spawner: Spawner) -> ! {
     let hw_start_time = Instant::now();
 
     // Compute a digest in hardware.
-    let mut context = hw_hasher.start(Algorithm::SHA256, DataType::Width8).await;
+    let mut context = hw_hasher.start(Algorithm::SHA256, DataType::Width8);
     hw_hasher.update(&mut context, test_1).await;
     hw_hasher.update(&mut context, test_2).await;
-    let mut buffer: [u8; 64] = [0; 64];
-    let hw_digest = hw_hasher.finish(context, &mut buffer).await;
+    let mut hw_digest: [u8; 32] = [0; 32];
+    hw_hasher.finish(context, &mut hw_digest).await;
 
     let hw_end_time = Instant::now();
     let hw_execution_time = hw_end_time - hw_start_time;
@@ -49,7 +49,7 @@ async fn main(_spawner: Spawner) -> ! {
     info!("Software Digest: {:?}", sw_digest[..]);
     info!("Hardware Execution Time: {:?}", hw_execution_time);
     info!("Software Execution Time: {:?}", sw_execution_time);
-    assert_eq!(*hw_digest, sw_digest[..]);
+    assert_eq!(hw_digest, sw_digest[..]);
 
     loop {}
 }
diff --git a/tests/stm32/Cargo.toml b/tests/stm32/Cargo.toml
index d02f1a253..fc4420687 100644
--- a/tests/stm32/Cargo.toml
+++ b/tests/stm32/Cargo.toml
@@ -26,7 +26,7 @@ stm32l4a6zg = ["embassy-stm32/stm32l4a6zg", "chrono", "not-gpdma", "rng", "hash"
 stm32l4r5zi = ["embassy-stm32/stm32l4r5zi", "chrono", "not-gpdma", "rng"]
 stm32l552ze = ["embassy-stm32/stm32l552ze", "not-gpdma", "rng", "hash"]
 stm32u585ai = ["embassy-stm32/stm32u585ai", "chrono", "rng", "hash"]
-stm32u5a5zj = ["embassy-stm32/stm32u5a5zj", "chrono", "rng", "hash"]
+stm32u5a5zj = ["embassy-stm32/stm32u5a5zj", "chrono", "rng"]
 stm32wb55rg = ["embassy-stm32/stm32wb55rg", "chrono", "not-gpdma", "ble", "mac" , "rng"]
 stm32wba52cg = ["embassy-stm32/stm32wba52cg", "chrono", "rng", "hash"]
 stm32wl55jc = ["embassy-stm32/stm32wl55jc-cm4", "not-gpdma", "rng", "chrono"]
diff --git a/tests/stm32/src/bin/hash.rs b/tests/stm32/src/bin/hash.rs
index 2867115dc..cfcf3d976 100644
--- a/tests/stm32/src/bin/hash.rs
+++ b/tests/stm32/src/bin/hash.rs
@@ -6,6 +6,7 @@
 mod common;
 use common::*;
 use embassy_executor::Spawner;
+use embassy_stm32::dma::NoDma;
 use embassy_stm32::hash::*;
 use embassy_stm32::{bind_interrupts, hash, peripherals};
 use sha2::{Digest, Sha224, Sha256};
@@ -30,27 +31,26 @@ bind_interrupts!(struct Irqs {
 #[embassy_executor::main]
 async fn main(_spawner: Spawner) {
     let p: embassy_stm32::Peripherals = embassy_stm32::init(config());
-    let dma = peri!(p, HASH_DMA);
-    let mut hw_hasher = Hash::new(p.HASH, dma);
+    let mut hw_hasher = Hash::new(p.HASH, NoDma, Irqs);
 
     let test_1: &[u8] = b"as;dfhaslfhas;oifvnasd;nifvnhasd;nifvhndlkfghsd;nvfnahssdfgsdafgsasdfasdfasdfasdfasdfghjklmnbvcalskdjghalskdjgfbaslkdjfgbalskdjgbalskdjbdfhsdfhsfghsfghfgh";
     let test_2: &[u8] = b"fdhalksdjfhlasdjkfhalskdjfhgal;skdjfgalskdhfjgalskdjfglafgadfgdfgdafgaadsfgfgdfgadrgsyfthxfgjfhklhjkfgukhulkvhlvhukgfhfsrghzdhxyfufynufyuszeradrtydyytserr";
     let test_3: &[u8] = b"a.ewtkluGWEBR.KAJRBTA,RMNRBG,FDMGB.kger.tkasjrbt.akrjtba.krjtba.ktmyna,nmbvtyliasd;gdrtba,sfvs.kgjzshd.gkbsr.tksejb.SDkfBSE.gkfgb>ESkfbSE>gkJSBESE>kbSE>fk";
 
     // Start an SHA-256 digest.
-    let mut sha256context = hw_hasher.start(Algorithm::SHA256, DataType::Width8).await;
-    hw_hasher.update(&mut sha256context, test_1).await;
+    let mut sha256context = hw_hasher.start(Algorithm::SHA256, DataType::Width8);
+    hw_hasher.update_blocking(&mut sha256context, test_1);
 
     // Interrupt the SHA-256 digest to compute an SHA-224 digest.
-    let mut sha224context = hw_hasher.start(Algorithm::SHA224, DataType::Width8).await;
-    hw_hasher.update(&mut sha224context, test_3).await;
-    let mut sha224_digest_buffer: [u8; 64] = [0; 64];
-    let sha224_digest = hw_hasher.finish(sha224context, &mut sha224_digest_buffer).await;
+    let mut sha224context = hw_hasher.start(Algorithm::SHA224, DataType::Width8);
+    hw_hasher.update_blocking(&mut sha224context, test_3);
+    let mut sha224_digest_buffer: [u8; 28] = [0; 28];
+    let _ = hw_hasher.finish_blocking(sha224context, &mut sha224_digest_buffer);
 
     // Finish the SHA-256 digest.
-    hw_hasher.update(&mut sha256context, test_2).await;
-    let mut sha_256_digest_buffer: [u8; 64] = [0; 64];
-    let sha256_digest = hw_hasher.finish(sha256context, &mut sha_256_digest_buffer).await;
+    hw_hasher.update_blocking(&mut sha256context, test_2);
+    let mut sha256_digest_buffer: [u8; 32] = [0; 32];
+    let _ = hw_hasher.finish_blocking(sha256context, &mut sha256_digest_buffer);
 
     // Compute the SHA-256 digest in software.
     let mut sw_sha256_hasher = Sha256::new();
@@ -64,14 +64,14 @@ async fn main(_spawner: Spawner) {
     let sw_sha224_digest = sw_sha224_hasher.finalize();
 
     // Compare the SHA-256 digests.
-    info!("Hardware SHA-256 Digest: {:?}", sha256_digest);
+    info!("Hardware SHA-256 Digest: {:?}", sha256_digest_buffer);
     info!("Software SHA-256 Digest: {:?}", sw_sha256_digest[..]);
-    defmt::assert!(*sha256_digest == sw_sha256_digest[..]);
+    defmt::assert!(sha256_digest_buffer == sw_sha256_digest[..]);
 
     // Compare the SHA-224 digests.
-    info!("Hardware SHA-256 Digest: {:?}", sha224_digest);
+    info!("Hardware SHA-256 Digest: {:?}", sha224_digest_buffer);
     info!("Software SHA-256 Digest: {:?}", sw_sha224_digest[..]);
-    defmt::assert!(*sha224_digest == sw_sha224_digest[..]);
+    defmt::assert!(sha224_digest_buffer == sw_sha224_digest[..]);
 
     info!("Test OK");
     cortex_m::asm::bkpt();
diff --git a/tests/stm32/src/common.rs b/tests/stm32/src/common.rs
index 14d5b6d7b..fefe72c86 100644
--- a/tests/stm32/src/common.rs
+++ b/tests/stm32/src/common.rs
@@ -128,7 +128,6 @@ define_peris!(
 );
 #[cfg(any(feature = "stm32h755zi", feature = "stm32h753zi"))]
 define_peris!(
-    HASH_DMA = DMA1_CH0,
     UART = USART1, UART_TX = PB6, UART_RX = PB7, UART_TX_DMA = DMA1_CH0, UART_RX_DMA = DMA1_CH1,
     SPI = SPI1, SPI_SCK = PA5, SPI_MOSI = PB5, SPI_MISO = PA6, SPI_TX_DMA = DMA1_CH0, SPI_RX_DMA = DMA1_CH1,
     ADC = ADC1, DAC = DAC1, DAC_PIN = PA4,
@@ -142,21 +141,18 @@ define_peris!(
 );
 #[cfg(feature = "stm32u585ai")]
 define_peris!(
-    HASH_DMA = GPDMA1_CH0,
     UART = USART3, UART_TX = PD8, UART_RX = PD9, UART_TX_DMA = GPDMA1_CH0, UART_RX_DMA = GPDMA1_CH1,
     SPI = SPI1, SPI_SCK = PE13, SPI_MOSI = PE15, SPI_MISO = PE14, SPI_TX_DMA = GPDMA1_CH0, SPI_RX_DMA = GPDMA1_CH1,
     @irq UART = {USART3 => embassy_stm32::usart::InterruptHandler