mirror of
https://github.com/serai-dex/serai.git
synced 2025-12-08 04:09:23 +00:00
Add a cosigning protocol to ensure finalizations are unique (#433)
* Add a function to deterministically decide which Serai blocks should be co-signed Has a 5 minute latency between co-signs, also used as the maximal latency before a co-sign is started. * Get all active tributaries we're in at a specific block * Add and route CosignSubstrateBlock, a new provided TX * Split queued cosigns per network * Rename BatchSignId to SubstrateSignId * Add SubstrateSignableId, a meta-type for either Batch or Block, and modularize around it * Handle the CosignSubstrateBlock provided TX * Revert substrate_signer.rs to develop (and patch to still work) Due to SubstrateSigner moving when the prior multisig closes, yet cosigning occurring with the most recent key, a single SubstrateSigner can be reused. We could manage multiple SubstrateSigners, yet considering the much lower specifications for cosigning, I'd rather treat it distinctly. * Route cosigning through the processor * Add note to rename SubstrateSigner post-PR I don't want to do so now in order to preserve the diff's clarity. * Implement cosign evaluation into the coordinator * Get tests to compile * Bug fixes, mark blocks without cosigners available as cosigned * Correct the ID Batch preprocesses are saved under, add log statements * Create a dedicated function to handle cosigns * Correct the flow around Batch verification/queueing Verifying `Batch`s could stall when a `Batch` was signed before its predecessors/before the block it's contained in was cosigned (the latter being inevitable as we can't sign a block containing a signed batch before signing the batch). Now, Batch verification happens on a distinct async task in order to not block the handling of processor messages. This task is the sole caller of verify in order to ensure last_verified_batch isn't unexpectedly mutated. When the processor message handler needs to access it, or needs to queue a Batch, it associates the DB TXN with a lock preventing the other task from doing so. This lock, as currently implemented, is a poor and inefficient design. It should be modified to the pattern used for cosign management. Additionally, a new primitive of a DB-backed channel may be immensely valuable. Fixes a standing potential deadlock and a deadlock introduced with the cosigning protocol. * Working full-stack tests After the last commit, this only required extending a timeout. * Replace "co-sign" with "cosign" to make finding text easier * Update the coordinator tests to support cosigning * Inline prior_batch calculation to prevent panic on rotation Noticed when doing a final review of the branch.
This commit is contained in:
@@ -223,9 +223,11 @@ impl Processor {
|
||||
|
||||
/// Receive a message from the coordinator as a processor.
|
||||
pub async fn recv_message(&mut self) -> CoordinatorMessage {
|
||||
let msg = tokio::time::timeout(Duration::from_secs(10), self.queue.next(Service::Coordinator))
|
||||
.await
|
||||
.unwrap();
|
||||
// Set a timeout of an entire 6 minutes as cosigning may be delayed by up to 5 minutes
|
||||
let msg =
|
||||
tokio::time::timeout(Duration::from_secs(6 * 60), self.queue.next(Service::Coordinator))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(msg.from, Service::Coordinator);
|
||||
assert_eq!(msg.id, self.next_recv_id);
|
||||
self.queue.ack(Service::Coordinator, msg.id).await;
|
||||
|
||||
@@ -23,7 +23,10 @@ use serai_client::{
|
||||
InInstructionsEvent,
|
||||
},
|
||||
};
|
||||
use messages::{coordinator::BatchSignId, SubstrateContext, CoordinatorMessage};
|
||||
use messages::{
|
||||
coordinator::{SubstrateSignableId, SubstrateSignId},
|
||||
SubstrateContext, CoordinatorMessage,
|
||||
};
|
||||
|
||||
use crate::{*, tests::*};
|
||||
|
||||
@@ -35,9 +38,9 @@ pub async fn batch(
|
||||
) -> u64 {
|
||||
let mut id = [0; 5];
|
||||
OsRng.fill_bytes(&mut id);
|
||||
let id = BatchSignId {
|
||||
let id = SubstrateSignId {
|
||||
key: (<Ristretto as Ciphersuite>::generator() * **substrate_key).to_bytes(),
|
||||
id,
|
||||
id: SubstrateSignableId::Batch(id),
|
||||
attempt: 0,
|
||||
};
|
||||
|
||||
@@ -83,7 +86,10 @@ pub async fn batch(
|
||||
let first_preprocesses = processors[known_signer].recv_message().await;
|
||||
let participants = match first_preprocesses {
|
||||
CoordinatorMessage::Coordinator(
|
||||
messages::coordinator::CoordinatorMessage::BatchPreprocesses { id: this_id, preprocesses },
|
||||
messages::coordinator::CoordinatorMessage::SubstratePreprocesses {
|
||||
id: this_id,
|
||||
preprocesses,
|
||||
},
|
||||
) => {
|
||||
assert_eq!(&id, &this_id);
|
||||
assert_eq!(preprocesses.len(), THRESHOLD - 1);
|
||||
@@ -97,7 +103,7 @@ pub async fn batch(
|
||||
participants.insert(known_signer_i);
|
||||
participants
|
||||
}
|
||||
_ => panic!("coordinator didn't send back BatchPreprocesses"),
|
||||
_ => panic!("coordinator didn't send back SubstratePreprocesses"),
|
||||
};
|
||||
|
||||
for i in participants.clone() {
|
||||
@@ -117,7 +123,7 @@ pub async fn batch(
|
||||
assert_eq!(
|
||||
processor.recv_message().await,
|
||||
CoordinatorMessage::Coordinator(
|
||||
messages::coordinator::CoordinatorMessage::BatchPreprocesses {
|
||||
messages::coordinator::CoordinatorMessage::SubstratePreprocesses {
|
||||
id: id.clone(),
|
||||
preprocesses
|
||||
}
|
||||
@@ -129,7 +135,7 @@ pub async fn batch(
|
||||
let processor =
|
||||
&mut processors[processor_is.iter().position(|p_i| u16::from(*p_i) == u16::from(i)).unwrap()];
|
||||
processor
|
||||
.send_message(messages::coordinator::ProcessorMessage::BatchShare {
|
||||
.send_message(messages::coordinator::ProcessorMessage::SubstrateShare {
|
||||
id: id.clone(),
|
||||
shares: vec![[u8::try_from(u16::from(i)).unwrap(); 32]],
|
||||
})
|
||||
@@ -148,7 +154,7 @@ pub async fn batch(
|
||||
|
||||
assert_eq!(
|
||||
processor.recv_message().await,
|
||||
CoordinatorMessage::Coordinator(messages::coordinator::CoordinatorMessage::BatchShares {
|
||||
CoordinatorMessage::Coordinator(messages::coordinator::CoordinatorMessage::SubstrateShares {
|
||||
id: id.clone(),
|
||||
shares,
|
||||
})
|
||||
@@ -174,7 +180,10 @@ pub async fn batch(
|
||||
let serai = processors[0].serai().await;
|
||||
let mut last_serai_block = serai.latest_block().await.unwrap().number();
|
||||
|
||||
for processor in processors.iter_mut() {
|
||||
for (i, processor) in processors.iter_mut().enumerate() {
|
||||
if i == excluded_signer {
|
||||
continue;
|
||||
}
|
||||
processor
|
||||
.send_message(messages::substrate::ProcessorMessage::SignedBatch { batch: batch.clone() })
|
||||
.await;
|
||||
@@ -214,9 +223,9 @@ pub async fn batch(
|
||||
|
||||
// Verify the coordinator sends SubstrateBlock to all processors
|
||||
let last_block = serai.block_by_number(last_serai_block).await.unwrap().unwrap();
|
||||
for processor in processors.iter_mut() {
|
||||
for i in 0 .. processors.len() {
|
||||
assert_eq!(
|
||||
processor.recv_message().await,
|
||||
potentially_cosign(processors, i, processor_is, substrate_key).await,
|
||||
messages::CoordinatorMessage::Substrate(
|
||||
messages::substrate::CoordinatorMessage::SubstrateBlock {
|
||||
context: SubstrateContext {
|
||||
@@ -232,7 +241,7 @@ pub async fn batch(
|
||||
);
|
||||
|
||||
// Send the ack as expected, though it shouldn't trigger any observable behavior
|
||||
processor
|
||||
processors[i]
|
||||
.send_message(messages::ProcessorMessage::Coordinator(
|
||||
messages::coordinator::ProcessorMessage::SubstrateBlockAck {
|
||||
network: batch.batch.network,
|
||||
|
||||
172
tests/coordinator/src/tests/cosign.rs
Normal file
172
tests/coordinator/src/tests/cosign.rs
Normal file
@@ -0,0 +1,172 @@
|
||||
use std::collections::{HashSet, HashMap};
|
||||
|
||||
use zeroize::Zeroizing;
|
||||
use rand_core::{RngCore, OsRng};
|
||||
|
||||
use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto};
|
||||
use dkg::Participant;
|
||||
|
||||
use serai_client::primitives::Signature;
|
||||
use messages::{
|
||||
coordinator::{SubstrateSignableId, cosign_block_msg},
|
||||
CoordinatorMessage,
|
||||
};
|
||||
|
||||
use crate::{*, tests::*};
|
||||
|
||||
pub async fn potentially_cosign(
|
||||
processors: &mut [Processor],
|
||||
primary_processor: usize,
|
||||
processor_is: &[u8],
|
||||
substrate_key: &Zeroizing<<Ristretto as Ciphersuite>::F>,
|
||||
) -> CoordinatorMessage {
|
||||
let msg = processors[primary_processor].recv_message().await;
|
||||
let messages::CoordinatorMessage::Coordinator(
|
||||
messages::coordinator::CoordinatorMessage::CosignSubstrateBlock { id },
|
||||
) = msg.clone()
|
||||
else {
|
||||
return msg;
|
||||
};
|
||||
let SubstrateSignableId::CosigningSubstrateBlock(block) = id.id else {
|
||||
panic!("CosignSubstrateBlock didn't have CosigningSubstrateBlock id")
|
||||
};
|
||||
|
||||
for (i, processor) in processors.iter_mut().enumerate() {
|
||||
if i == primary_processor {
|
||||
continue;
|
||||
}
|
||||
assert_eq!(msg, processor.recv_message().await);
|
||||
}
|
||||
|
||||
// Select a random participant to exclude, so we know for sure who *is* participating
|
||||
assert_eq!(COORDINATORS - THRESHOLD, 1);
|
||||
let excluded_signer =
|
||||
usize::try_from(OsRng.next_u64() % u64::try_from(processors.len()).unwrap()).unwrap();
|
||||
for (i, processor) in processors.iter_mut().enumerate() {
|
||||
if i == excluded_signer {
|
||||
continue;
|
||||
}
|
||||
|
||||
processor
|
||||
.send_message(messages::coordinator::ProcessorMessage::CosignPreprocess {
|
||||
id: id.clone(),
|
||||
preprocesses: vec![[processor_is[i]; 64].to_vec()],
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
// Send from the excluded signer so they don't stay stuck
|
||||
processors[excluded_signer]
|
||||
.send_message(messages::coordinator::ProcessorMessage::CosignPreprocess {
|
||||
id: id.clone(),
|
||||
preprocesses: vec![[processor_is[excluded_signer]; 64].to_vec()],
|
||||
})
|
||||
.await;
|
||||
|
||||
// Read from a known signer to find out who was selected to sign
|
||||
let known_signer = (excluded_signer + 1) % COORDINATORS;
|
||||
let first_preprocesses = processors[known_signer].recv_message().await;
|
||||
let participants = match first_preprocesses {
|
||||
CoordinatorMessage::Coordinator(
|
||||
messages::coordinator::CoordinatorMessage::SubstratePreprocesses {
|
||||
id: this_id,
|
||||
preprocesses,
|
||||
},
|
||||
) => {
|
||||
assert_eq!(&id, &this_id);
|
||||
assert_eq!(preprocesses.len(), THRESHOLD - 1);
|
||||
let known_signer_i = Participant::new(u16::from(processor_is[known_signer])).unwrap();
|
||||
assert!(!preprocesses.contains_key(&known_signer_i));
|
||||
|
||||
let mut participants = preprocesses.keys().cloned().collect::<HashSet<_>>();
|
||||
for (p, preprocess) in preprocesses {
|
||||
assert_eq!(preprocess, vec![u8::try_from(u16::from(p)).unwrap(); 64]);
|
||||
}
|
||||
participants.insert(known_signer_i);
|
||||
participants
|
||||
}
|
||||
_ => panic!("coordinator didn't send back SubstratePreprocesses"),
|
||||
};
|
||||
|
||||
for i in participants.clone() {
|
||||
if u16::from(i) == u16::from(processor_is[known_signer]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let processor =
|
||||
&mut processors[processor_is.iter().position(|p_i| u16::from(*p_i) == u16::from(i)).unwrap()];
|
||||
let mut preprocesses = participants
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|i| (i, [u8::try_from(u16::from(i)).unwrap(); 64].to_vec()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
preprocesses.remove(&i);
|
||||
|
||||
assert_eq!(
|
||||
processor.recv_message().await,
|
||||
CoordinatorMessage::Coordinator(
|
||||
messages::coordinator::CoordinatorMessage::SubstratePreprocesses {
|
||||
id: id.clone(),
|
||||
preprocesses
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
for i in participants.clone() {
|
||||
let processor =
|
||||
&mut processors[processor_is.iter().position(|p_i| u16::from(*p_i) == u16::from(i)).unwrap()];
|
||||
processor
|
||||
.send_message(messages::coordinator::ProcessorMessage::SubstrateShare {
|
||||
id: id.clone(),
|
||||
shares: vec![[u8::try_from(u16::from(i)).unwrap(); 32]],
|
||||
})
|
||||
.await;
|
||||
}
|
||||
for i in participants.clone() {
|
||||
let processor =
|
||||
&mut processors[processor_is.iter().position(|p_i| u16::from(*p_i) == u16::from(i)).unwrap()];
|
||||
let mut shares = participants
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|i| (i, [u8::try_from(u16::from(i)).unwrap(); 32]))
|
||||
.collect::<HashMap<_, _>>();
|
||||
shares.remove(&i);
|
||||
|
||||
assert_eq!(
|
||||
processor.recv_message().await,
|
||||
CoordinatorMessage::Coordinator(messages::coordinator::CoordinatorMessage::SubstrateShares {
|
||||
id: id.clone(),
|
||||
shares,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
// Expand to a key pair as Schnorrkel expects
|
||||
// It's the private key + 32-bytes of entropy for nonces + the public key
|
||||
let mut schnorrkel_key_pair = [0; 96];
|
||||
schnorrkel_key_pair[.. 32].copy_from_slice(&substrate_key.to_repr());
|
||||
OsRng.fill_bytes(&mut schnorrkel_key_pair[32 .. 64]);
|
||||
schnorrkel_key_pair[64 ..]
|
||||
.copy_from_slice(&(<Ristretto as Ciphersuite>::generator() * **substrate_key).to_bytes());
|
||||
let signature = Signature(
|
||||
schnorrkel::keys::Keypair::from_bytes(&schnorrkel_key_pair)
|
||||
.unwrap()
|
||||
.sign_simple(b"substrate", &cosign_block_msg(block))
|
||||
.to_bytes(),
|
||||
);
|
||||
|
||||
for (i, processor) in processors.iter_mut().enumerate() {
|
||||
if i == excluded_signer {
|
||||
continue;
|
||||
}
|
||||
processor
|
||||
.send_message(messages::coordinator::ProcessorMessage::CosignedBlock {
|
||||
block,
|
||||
signature: signature.0.to_vec(),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
processors[primary_processor].recv_message().await
|
||||
}
|
||||
@@ -9,6 +9,9 @@ use crate::*;
|
||||
mod key_gen;
|
||||
pub use key_gen::key_gen;
|
||||
|
||||
mod cosign;
|
||||
pub use cosign::potentially_cosign;
|
||||
|
||||
mod batch;
|
||||
pub use batch::batch;
|
||||
|
||||
|
||||
@@ -328,9 +328,9 @@ async fn sign_test() {
|
||||
let plan_id = plan_id;
|
||||
|
||||
// We should now get a SubstrateBlock
|
||||
for processor in processors.iter_mut() {
|
||||
for i in 0 .. processors.len() {
|
||||
assert_eq!(
|
||||
processor.recv_message().await,
|
||||
potentially_cosign(&mut processors, i, &participant_is, &substrate_key).await,
|
||||
messages::CoordinatorMessage::Substrate(
|
||||
messages::substrate::CoordinatorMessage::SubstrateBlock {
|
||||
context: SubstrateContext {
|
||||
@@ -346,7 +346,7 @@ async fn sign_test() {
|
||||
);
|
||||
|
||||
// Send the ACK, claiming there's a plan to sign
|
||||
processor
|
||||
processors[i]
|
||||
.send_message(messages::ProcessorMessage::Coordinator(
|
||||
messages::coordinator::ProcessorMessage::SubstrateBlockAck {
|
||||
network: NetworkId::Bitcoin,
|
||||
|
||||
@@ -555,7 +555,7 @@ async fn mint_and_burn_test() {
|
||||
// Check for up to 5 minutes
|
||||
let mut found = false;
|
||||
let mut i = 0;
|
||||
while i < (5 * 6) {
|
||||
while i < (15 * 6) {
|
||||
if let Ok(hash) = rpc.get_block_hash(start_bitcoin_block).await {
|
||||
let block = rpc.get_block(&hash).await.unwrap();
|
||||
start_bitcoin_block += 1;
|
||||
|
||||
@@ -26,10 +26,10 @@ pub(crate) async fn recv_batch_preprocesses(
|
||||
substrate_key: &[u8; 32],
|
||||
batch: &Batch,
|
||||
attempt: u32,
|
||||
) -> (BatchSignId, HashMap<Participant, Vec<u8>>) {
|
||||
let id = BatchSignId {
|
||||
) -> (SubstrateSignId, HashMap<Participant, Vec<u8>>) {
|
||||
let id = SubstrateSignId {
|
||||
key: *substrate_key,
|
||||
id: (batch.network, batch.id).encode().try_into().unwrap(),
|
||||
id: SubstrateSignableId::Batch((batch.network, batch.id).encode().try_into().unwrap()),
|
||||
attempt,
|
||||
};
|
||||
|
||||
@@ -86,7 +86,7 @@ pub(crate) async fn recv_batch_preprocesses(
|
||||
pub(crate) async fn sign_batch(
|
||||
coordinators: &mut [Coordinator],
|
||||
key: [u8; 32],
|
||||
id: BatchSignId,
|
||||
id: SubstrateSignId,
|
||||
preprocesses: HashMap<Participant, Vec<u8>>,
|
||||
) -> SignedBatch {
|
||||
assert_eq!(preprocesses.len(), THRESHOLD);
|
||||
@@ -96,7 +96,7 @@ pub(crate) async fn sign_batch(
|
||||
|
||||
if preprocesses.contains_key(&i) {
|
||||
coordinator
|
||||
.send_message(messages::coordinator::CoordinatorMessage::BatchPreprocesses {
|
||||
.send_message(messages::coordinator::CoordinatorMessage::SubstratePreprocesses {
|
||||
id: id.clone(),
|
||||
preprocesses: clone_without(&preprocesses, &i),
|
||||
})
|
||||
@@ -111,7 +111,7 @@ pub(crate) async fn sign_batch(
|
||||
if preprocesses.contains_key(&i) {
|
||||
match coordinator.recv_message().await {
|
||||
messages::ProcessorMessage::Coordinator(
|
||||
messages::coordinator::ProcessorMessage::BatchShare {
|
||||
messages::coordinator::ProcessorMessage::SubstrateShare {
|
||||
id: this_id,
|
||||
shares: mut these_shares,
|
||||
},
|
||||
@@ -130,7 +130,7 @@ pub(crate) async fn sign_batch(
|
||||
|
||||
if preprocesses.contains_key(&i) {
|
||||
coordinator
|
||||
.send_message(messages::coordinator::CoordinatorMessage::BatchShares {
|
||||
.send_message(messages::coordinator::CoordinatorMessage::SubstrateShares {
|
||||
id: id.clone(),
|
||||
shares: clone_without(&shares, &i),
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user