Add a cosigning protocol to ensure finalizations are unique (#433)

* Add a function to deterministically decide which Serai blocks should be co-signed

Has a 5 minute latency between co-signs, also used as the maximal latency
before a co-sign is started.

* Get all active tributaries we're in at a specific block

* Add and route CosignSubstrateBlock, a new provided TX

* Split queued cosigns per network

* Rename BatchSignId to SubstrateSignId

* Add SubstrateSignableId, a meta-type for either Batch or Block, and modularize around it

* Handle the CosignSubstrateBlock provided TX

* Revert substrate_signer.rs to develop (and patch to still work)

Due to SubstrateSigner moving when the prior multisig closes, yet cosigning
occurring with the most recent key, a single SubstrateSigner can be reused.
We could manage multiple SubstrateSigners, yet considering the much lower
specifications for cosigning, I'd rather treat it distinctly.

* Route cosigning through the processor

* Add note to rename SubstrateSigner post-PR

I don't want to do so now in order to preserve the diff's clarity.

* Implement cosign evaluation into the coordinator

* Get tests to compile

* Bug fixes, mark blocks without cosigners available as cosigned

* Correct the ID Batch preprocesses are saved under, add log statements

* Create a dedicated function to handle cosigns

* Correct the flow around Batch verification/queueing

Verifying `Batch`s could stall when a `Batch` was signed before its
predecessors/before the block it's contained in was cosigned (the latter being
inevitable as we can't sign a block containing a signed batch before signing
the batch).

Now, Batch verification happens on a distinct async task in order to not block
the handling of processor messages. This task is the sole caller of verify in
order to ensure last_verified_batch isn't unexpectedly mutated.

When the processor message handler needs to access it, or needs to queue a
Batch, it associates the DB TXN with a lock preventing the other task from
doing so.

This lock, as currently implemented, is a poor and inefficient design. It
should be modified to the pattern used for cosign management. Additionally, a
new primitive of a DB-backed channel may be immensely valuable.

Fixes a standing potential deadlock and a deadlock introduced with the
cosigning protocol.

* Working full-stack tests

After the last commit, this only required extending a timeout.

* Replace "co-sign" with "cosign" to make finding text easier

* Update the coordinator tests to support cosigning

* Inline prior_batch calculation to prevent panic on rotation

Noticed when doing a final review of the branch.
This commit is contained in:
Luke Parker
2023-11-15 16:57:21 -05:00
committed by GitHub
parent 79e4cce2f6
commit 96f1d26f7a
29 changed files with 1900 additions and 348 deletions

View File

@@ -223,9 +223,11 @@ impl Processor {
/// Receive a message from the coordinator as a processor.
pub async fn recv_message(&mut self) -> CoordinatorMessage {
let msg = tokio::time::timeout(Duration::from_secs(10), self.queue.next(Service::Coordinator))
.await
.unwrap();
// Set a timeout of an entire 6 minutes as cosigning may be delayed by up to 5 minutes
let msg =
tokio::time::timeout(Duration::from_secs(6 * 60), self.queue.next(Service::Coordinator))
.await
.unwrap();
assert_eq!(msg.from, Service::Coordinator);
assert_eq!(msg.id, self.next_recv_id);
self.queue.ack(Service::Coordinator, msg.id).await;

View File

@@ -23,7 +23,10 @@ use serai_client::{
InInstructionsEvent,
},
};
use messages::{coordinator::BatchSignId, SubstrateContext, CoordinatorMessage};
use messages::{
coordinator::{SubstrateSignableId, SubstrateSignId},
SubstrateContext, CoordinatorMessage,
};
use crate::{*, tests::*};
@@ -35,9 +38,9 @@ pub async fn batch(
) -> u64 {
let mut id = [0; 5];
OsRng.fill_bytes(&mut id);
let id = BatchSignId {
let id = SubstrateSignId {
key: (<Ristretto as Ciphersuite>::generator() * **substrate_key).to_bytes(),
id,
id: SubstrateSignableId::Batch(id),
attempt: 0,
};
@@ -83,7 +86,10 @@ pub async fn batch(
let first_preprocesses = processors[known_signer].recv_message().await;
let participants = match first_preprocesses {
CoordinatorMessage::Coordinator(
messages::coordinator::CoordinatorMessage::BatchPreprocesses { id: this_id, preprocesses },
messages::coordinator::CoordinatorMessage::SubstratePreprocesses {
id: this_id,
preprocesses,
},
) => {
assert_eq!(&id, &this_id);
assert_eq!(preprocesses.len(), THRESHOLD - 1);
@@ -97,7 +103,7 @@ pub async fn batch(
participants.insert(known_signer_i);
participants
}
_ => panic!("coordinator didn't send back BatchPreprocesses"),
_ => panic!("coordinator didn't send back SubstratePreprocesses"),
};
for i in participants.clone() {
@@ -117,7 +123,7 @@ pub async fn batch(
assert_eq!(
processor.recv_message().await,
CoordinatorMessage::Coordinator(
messages::coordinator::CoordinatorMessage::BatchPreprocesses {
messages::coordinator::CoordinatorMessage::SubstratePreprocesses {
id: id.clone(),
preprocesses
}
@@ -129,7 +135,7 @@ pub async fn batch(
let processor =
&mut processors[processor_is.iter().position(|p_i| u16::from(*p_i) == u16::from(i)).unwrap()];
processor
.send_message(messages::coordinator::ProcessorMessage::BatchShare {
.send_message(messages::coordinator::ProcessorMessage::SubstrateShare {
id: id.clone(),
shares: vec![[u8::try_from(u16::from(i)).unwrap(); 32]],
})
@@ -148,7 +154,7 @@ pub async fn batch(
assert_eq!(
processor.recv_message().await,
CoordinatorMessage::Coordinator(messages::coordinator::CoordinatorMessage::BatchShares {
CoordinatorMessage::Coordinator(messages::coordinator::CoordinatorMessage::SubstrateShares {
id: id.clone(),
shares,
})
@@ -174,7 +180,10 @@ pub async fn batch(
let serai = processors[0].serai().await;
let mut last_serai_block = serai.latest_block().await.unwrap().number();
for processor in processors.iter_mut() {
for (i, processor) in processors.iter_mut().enumerate() {
if i == excluded_signer {
continue;
}
processor
.send_message(messages::substrate::ProcessorMessage::SignedBatch { batch: batch.clone() })
.await;
@@ -214,9 +223,9 @@ pub async fn batch(
// Verify the coordinator sends SubstrateBlock to all processors
let last_block = serai.block_by_number(last_serai_block).await.unwrap().unwrap();
for processor in processors.iter_mut() {
for i in 0 .. processors.len() {
assert_eq!(
processor.recv_message().await,
potentially_cosign(processors, i, processor_is, substrate_key).await,
messages::CoordinatorMessage::Substrate(
messages::substrate::CoordinatorMessage::SubstrateBlock {
context: SubstrateContext {
@@ -232,7 +241,7 @@ pub async fn batch(
);
// Send the ack as expected, though it shouldn't trigger any observable behavior
processor
processors[i]
.send_message(messages::ProcessorMessage::Coordinator(
messages::coordinator::ProcessorMessage::SubstrateBlockAck {
network: batch.batch.network,

View File

@@ -0,0 +1,172 @@
use std::collections::{HashSet, HashMap};
use zeroize::Zeroizing;
use rand_core::{RngCore, OsRng};
use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto};
use dkg::Participant;
use serai_client::primitives::Signature;
use messages::{
coordinator::{SubstrateSignableId, cosign_block_msg},
CoordinatorMessage,
};
use crate::{*, tests::*};
pub async fn potentially_cosign(
processors: &mut [Processor],
primary_processor: usize,
processor_is: &[u8],
substrate_key: &Zeroizing<<Ristretto as Ciphersuite>::F>,
) -> CoordinatorMessage {
let msg = processors[primary_processor].recv_message().await;
let messages::CoordinatorMessage::Coordinator(
messages::coordinator::CoordinatorMessage::CosignSubstrateBlock { id },
) = msg.clone()
else {
return msg;
};
let SubstrateSignableId::CosigningSubstrateBlock(block) = id.id else {
panic!("CosignSubstrateBlock didn't have CosigningSubstrateBlock id")
};
for (i, processor) in processors.iter_mut().enumerate() {
if i == primary_processor {
continue;
}
assert_eq!(msg, processor.recv_message().await);
}
// Select a random participant to exclude, so we know for sure who *is* participating
assert_eq!(COORDINATORS - THRESHOLD, 1);
let excluded_signer =
usize::try_from(OsRng.next_u64() % u64::try_from(processors.len()).unwrap()).unwrap();
for (i, processor) in processors.iter_mut().enumerate() {
if i == excluded_signer {
continue;
}
processor
.send_message(messages::coordinator::ProcessorMessage::CosignPreprocess {
id: id.clone(),
preprocesses: vec![[processor_is[i]; 64].to_vec()],
})
.await;
}
// Send from the excluded signer so they don't stay stuck
processors[excluded_signer]
.send_message(messages::coordinator::ProcessorMessage::CosignPreprocess {
id: id.clone(),
preprocesses: vec![[processor_is[excluded_signer]; 64].to_vec()],
})
.await;
// Read from a known signer to find out who was selected to sign
let known_signer = (excluded_signer + 1) % COORDINATORS;
let first_preprocesses = processors[known_signer].recv_message().await;
let participants = match first_preprocesses {
CoordinatorMessage::Coordinator(
messages::coordinator::CoordinatorMessage::SubstratePreprocesses {
id: this_id,
preprocesses,
},
) => {
assert_eq!(&id, &this_id);
assert_eq!(preprocesses.len(), THRESHOLD - 1);
let known_signer_i = Participant::new(u16::from(processor_is[known_signer])).unwrap();
assert!(!preprocesses.contains_key(&known_signer_i));
let mut participants = preprocesses.keys().cloned().collect::<HashSet<_>>();
for (p, preprocess) in preprocesses {
assert_eq!(preprocess, vec![u8::try_from(u16::from(p)).unwrap(); 64]);
}
participants.insert(known_signer_i);
participants
}
_ => panic!("coordinator didn't send back SubstratePreprocesses"),
};
for i in participants.clone() {
if u16::from(i) == u16::from(processor_is[known_signer]) {
continue;
}
let processor =
&mut processors[processor_is.iter().position(|p_i| u16::from(*p_i) == u16::from(i)).unwrap()];
let mut preprocesses = participants
.clone()
.into_iter()
.map(|i| (i, [u8::try_from(u16::from(i)).unwrap(); 64].to_vec()))
.collect::<HashMap<_, _>>();
preprocesses.remove(&i);
assert_eq!(
processor.recv_message().await,
CoordinatorMessage::Coordinator(
messages::coordinator::CoordinatorMessage::SubstratePreprocesses {
id: id.clone(),
preprocesses
}
)
);
}
for i in participants.clone() {
let processor =
&mut processors[processor_is.iter().position(|p_i| u16::from(*p_i) == u16::from(i)).unwrap()];
processor
.send_message(messages::coordinator::ProcessorMessage::SubstrateShare {
id: id.clone(),
shares: vec![[u8::try_from(u16::from(i)).unwrap(); 32]],
})
.await;
}
for i in participants.clone() {
let processor =
&mut processors[processor_is.iter().position(|p_i| u16::from(*p_i) == u16::from(i)).unwrap()];
let mut shares = participants
.clone()
.into_iter()
.map(|i| (i, [u8::try_from(u16::from(i)).unwrap(); 32]))
.collect::<HashMap<_, _>>();
shares.remove(&i);
assert_eq!(
processor.recv_message().await,
CoordinatorMessage::Coordinator(messages::coordinator::CoordinatorMessage::SubstrateShares {
id: id.clone(),
shares,
})
);
}
// Expand to a key pair as Schnorrkel expects
// It's the private key + 32-bytes of entropy for nonces + the public key
let mut schnorrkel_key_pair = [0; 96];
schnorrkel_key_pair[.. 32].copy_from_slice(&substrate_key.to_repr());
OsRng.fill_bytes(&mut schnorrkel_key_pair[32 .. 64]);
schnorrkel_key_pair[64 ..]
.copy_from_slice(&(<Ristretto as Ciphersuite>::generator() * **substrate_key).to_bytes());
let signature = Signature(
schnorrkel::keys::Keypair::from_bytes(&schnorrkel_key_pair)
.unwrap()
.sign_simple(b"substrate", &cosign_block_msg(block))
.to_bytes(),
);
for (i, processor) in processors.iter_mut().enumerate() {
if i == excluded_signer {
continue;
}
processor
.send_message(messages::coordinator::ProcessorMessage::CosignedBlock {
block,
signature: signature.0.to_vec(),
})
.await;
}
processors[primary_processor].recv_message().await
}

View File

@@ -9,6 +9,9 @@ use crate::*;
mod key_gen;
pub use key_gen::key_gen;
mod cosign;
pub use cosign::potentially_cosign;
mod batch;
pub use batch::batch;

View File

@@ -328,9 +328,9 @@ async fn sign_test() {
let plan_id = plan_id;
// We should now get a SubstrateBlock
for processor in processors.iter_mut() {
for i in 0 .. processors.len() {
assert_eq!(
processor.recv_message().await,
potentially_cosign(&mut processors, i, &participant_is, &substrate_key).await,
messages::CoordinatorMessage::Substrate(
messages::substrate::CoordinatorMessage::SubstrateBlock {
context: SubstrateContext {
@@ -346,7 +346,7 @@ async fn sign_test() {
);
// Send the ACK, claiming there's a plan to sign
processor
processors[i]
.send_message(messages::ProcessorMessage::Coordinator(
messages::coordinator::ProcessorMessage::SubstrateBlockAck {
network: NetworkId::Bitcoin,

View File

@@ -555,7 +555,7 @@ async fn mint_and_burn_test() {
// Check for up to 5 minutes
let mut found = false;
let mut i = 0;
while i < (5 * 6) {
while i < (15 * 6) {
if let Ok(hash) = rpc.get_block_hash(start_bitcoin_block).await {
let block = rpc.get_block(&hash).await.unwrap();
start_bitcoin_block += 1;

View File

@@ -26,10 +26,10 @@ pub(crate) async fn recv_batch_preprocesses(
substrate_key: &[u8; 32],
batch: &Batch,
attempt: u32,
) -> (BatchSignId, HashMap<Participant, Vec<u8>>) {
let id = BatchSignId {
) -> (SubstrateSignId, HashMap<Participant, Vec<u8>>) {
let id = SubstrateSignId {
key: *substrate_key,
id: (batch.network, batch.id).encode().try_into().unwrap(),
id: SubstrateSignableId::Batch((batch.network, batch.id).encode().try_into().unwrap()),
attempt,
};
@@ -86,7 +86,7 @@ pub(crate) async fn recv_batch_preprocesses(
pub(crate) async fn sign_batch(
coordinators: &mut [Coordinator],
key: [u8; 32],
id: BatchSignId,
id: SubstrateSignId,
preprocesses: HashMap<Participant, Vec<u8>>,
) -> SignedBatch {
assert_eq!(preprocesses.len(), THRESHOLD);
@@ -96,7 +96,7 @@ pub(crate) async fn sign_batch(
if preprocesses.contains_key(&i) {
coordinator
.send_message(messages::coordinator::CoordinatorMessage::BatchPreprocesses {
.send_message(messages::coordinator::CoordinatorMessage::SubstratePreprocesses {
id: id.clone(),
preprocesses: clone_without(&preprocesses, &i),
})
@@ -111,7 +111,7 @@ pub(crate) async fn sign_batch(
if preprocesses.contains_key(&i) {
match coordinator.recv_message().await {
messages::ProcessorMessage::Coordinator(
messages::coordinator::ProcessorMessage::BatchShare {
messages::coordinator::ProcessorMessage::SubstrateShare {
id: this_id,
shares: mut these_shares,
},
@@ -130,7 +130,7 @@ pub(crate) async fn sign_batch(
if preprocesses.contains_key(&i) {
coordinator
.send_message(messages::coordinator::CoordinatorMessage::BatchShares {
.send_message(messages::coordinator::CoordinatorMessage::SubstrateShares {
id: id.clone(),
shares: clone_without(&shares, &i),
})