Add a cosigning protocol to ensure finalizations are unique (#433)

* Add a function to deterministically decide which Serai blocks should be co-signed

Has a 5 minute latency between co-signs, also used as the maximal latency
before a co-sign is started.

* Get all active tributaries we're in at a specific block

* Add and route CosignSubstrateBlock, a new provided TX

* Split queued cosigns per network

* Rename BatchSignId to SubstrateSignId

* Add SubstrateSignableId, a meta-type for either Batch or Block, and modularize around it

* Handle the CosignSubstrateBlock provided TX

* Revert substrate_signer.rs to develop (and patch to still work)

Due to SubstrateSigner moving when the prior multisig closes, yet cosigning
occurring with the most recent key, a single SubstrateSigner can be reused.
We could manage multiple SubstrateSigners, yet considering the much lower
specifications for cosigning, I'd rather treat it distinctly.

* Route cosigning through the processor

* Add note to rename SubstrateSigner post-PR

I don't want to do so now in order to preserve the diff's clarity.

* Implement cosign evaluation into the coordinator

* Get tests to compile

* Bug fixes, mark blocks without cosigners available as cosigned

* Correct the ID Batch preprocesses are saved under, add log statements

* Create a dedicated function to handle cosigns

* Correct the flow around Batch verification/queueing

Verifying `Batch`s could stall when a `Batch` was signed before its
predecessors/before the block it's contained in was cosigned (the latter being
inevitable as we can't sign a block containing a signed batch before signing
the batch).

Now, Batch verification happens on a distinct async task in order to not block
the handling of processor messages. This task is the sole caller of verify in
order to ensure last_verified_batch isn't unexpectedly mutated.

When the processor message handler needs to access it, or needs to queue a
Batch, it associates the DB TXN with a lock preventing the other task from
doing so.

This lock, as currently implemented, is a poor and inefficient design. It
should be modified to the pattern used for cosign management. Additionally, a
new primitive of a DB-backed channel may be immensely valuable.

Fixes a standing potential deadlock and a deadlock introduced with the
cosigning protocol.

* Working full-stack tests

After the last commit, this only required extending a timeout.

* Replace "co-sign" with "cosign" to make finding text easier

* Update the coordinator tests to support cosigning

* Inline prior_batch calculation to prevent panic on rotation

Noticed when doing a final review of the branch.
This commit is contained in:
Luke Parker
2023-11-15 16:57:21 -05:00
committed by GitHub
parent 79e4cce2f6
commit 96f1d26f7a
29 changed files with 1900 additions and 348 deletions

View File

@@ -3,7 +3,10 @@ use std::sync::Arc;
use rand_core::OsRng;
use tokio::{sync::broadcast, time::sleep};
use tokio::{
sync::{mpsc, broadcast},
time::sleep,
};
use serai_db::MemDb;
@@ -32,7 +35,8 @@ async fn handle_p2p_test() {
let tributary = Arc::new(tributary);
tributary_arcs.push(tributary.clone());
let (new_tributary_send, new_tributary_recv) = broadcast::channel(5);
tokio::spawn(handle_p2p_task(p2p, new_tributary_recv));
let (cosign_send, _) = mpsc::unbounded_channel();
tokio::spawn(handle_p2p_task(p2p, cosign_send, new_tributary_recv));
new_tributary_send
.send(TributaryEvent::NewTributary(ActiveTributary { spec: spec.clone(), tributary }))
.map_err(|_| "failed to send ActiveTributary")

View File

@@ -2,6 +2,9 @@ use core::fmt::Debug;
use rand_core::{RngCore, OsRng};
use scale::{Encode, Decode};
use processor_messages::coordinator::SubstrateSignableId;
use tributary::{ReadWrite, tests::random_signed};
use crate::tributary::{SignData, Transaction};
@@ -28,10 +31,10 @@ fn random_vec<R: RngCore>(rng: &mut R, limit: usize) -> Vec<u8> {
res
}
fn random_sign_data<R: RngCore, const N: usize>(rng: &mut R) -> SignData<N> {
let mut plan = [0; N];
rng.fill_bytes(&mut plan);
fn random_sign_data<R: RngCore, Id: Clone + PartialEq + Eq + Debug + Encode + Decode>(
rng: &mut R,
plan: Id,
) -> SignData<Id> {
SignData {
plan,
attempt: random_u32(&mut OsRng),
@@ -80,10 +83,18 @@ fn tx_size_limit() {
#[test]
fn serialize_sign_data() {
test_read_write(random_sign_data::<_, 3>(&mut OsRng));
test_read_write(random_sign_data::<_, 8>(&mut OsRng));
test_read_write(random_sign_data::<_, 16>(&mut OsRng));
test_read_write(random_sign_data::<_, 24>(&mut OsRng));
let mut plan = [0; 3];
OsRng.fill_bytes(&mut plan);
test_read_write(random_sign_data::<_, _>(&mut OsRng, plan));
let mut plan = [0; 5];
OsRng.fill_bytes(&mut plan);
test_read_write(random_sign_data::<_, _>(&mut OsRng, plan));
let mut plan = [0; 8];
OsRng.fill_bytes(&mut plan);
test_read_write(random_sign_data::<_, _>(&mut OsRng, plan));
let mut plan = [0; 24];
OsRng.fill_bytes(&mut plan);
test_read_write(random_sign_data::<_, _>(&mut OsRng, plan));
}
#[test]
@@ -168,6 +179,12 @@ fn serialize_transaction() {
random_signed(&mut OsRng),
));
{
let mut block = [0; 32];
OsRng.fill_bytes(&mut block);
test_read_write(Transaction::CosignSubstrateBlock(block));
}
{
let mut block = [0; 32];
OsRng.fill_bytes(&mut block);
@@ -177,11 +194,33 @@ fn serialize_transaction() {
}
test_read_write(Transaction::SubstrateBlock(OsRng.next_u64()));
test_read_write(Transaction::BatchPreprocess(random_sign_data(&mut OsRng)));
test_read_write(Transaction::BatchShare(random_sign_data(&mut OsRng)));
{
let mut plan = [0; 5];
OsRng.fill_bytes(&mut plan);
test_read_write(Transaction::SubstratePreprocess(random_sign_data(
&mut OsRng,
SubstrateSignableId::Batch(plan),
)));
}
{
let mut plan = [0; 5];
OsRng.fill_bytes(&mut plan);
test_read_write(Transaction::SubstrateShare(random_sign_data(
&mut OsRng,
SubstrateSignableId::Batch(plan),
)));
}
test_read_write(Transaction::SignPreprocess(random_sign_data(&mut OsRng)));
test_read_write(Transaction::SignShare(random_sign_data(&mut OsRng)));
{
let mut plan = [0; 32];
OsRng.fill_bytes(&mut plan);
test_read_write(Transaction::SignPreprocess(random_sign_data(&mut OsRng, plan)));
}
{
let mut plan = [0; 32];
OsRng.fill_bytes(&mut plan);
test_read_write(Transaction::SignShare(random_sign_data(&mut OsRng, plan)));
}
{
let mut plan = [0; 32];

View File

@@ -5,7 +5,10 @@ use rand_core::OsRng;
use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto};
use tokio::{sync::broadcast, time::sleep};
use tokio::{
sync::{mpsc, broadcast},
time::sleep,
};
use serai_db::MemDb;
@@ -42,7 +45,8 @@ async fn sync_test() {
let tributary = Arc::new(tributary);
tributary_arcs.push(tributary.clone());
let (new_tributary_send, new_tributary_recv) = broadcast::channel(5);
let thread = tokio::spawn(handle_p2p_task(p2p, new_tributary_recv));
let (cosign_send, _) = mpsc::unbounded_channel();
let thread = tokio::spawn(handle_p2p_task(p2p, cosign_send, new_tributary_recv));
new_tributary_send
.send(TributaryEvent::NewTributary(ActiveTributary { spec: spec.clone(), tributary }))
.map_err(|_| "failed to send ActiveTributary")
@@ -77,7 +81,8 @@ async fn sync_test() {
let syncer_key = Ristretto::generator() * *syncer_key;
let syncer_tributary = Arc::new(syncer_tributary);
let (syncer_tributary_send, syncer_tributary_recv) = broadcast::channel(5);
tokio::spawn(handle_p2p_task(syncer_p2p.clone(), syncer_tributary_recv));
let (cosign_send, _) = mpsc::unbounded_channel();
tokio::spawn(handle_p2p_task(syncer_p2p.clone(), cosign_send, syncer_tributary_recv));
syncer_tributary_send
.send(TributaryEvent::NewTributary(ActiveTributary {
spec: spec.clone(),