Add a cosigning protocol to ensure finalizations are unique (#433)

* Add a function to deterministically decide which Serai blocks should be co-signed

Has a 5 minute latency between co-signs, also used as the maximal latency
before a co-sign is started.

* Get all active tributaries we're in at a specific block

* Add and route CosignSubstrateBlock, a new provided TX

* Split queued cosigns per network

* Rename BatchSignId to SubstrateSignId

* Add SubstrateSignableId, a meta-type for either Batch or Block, and modularize around it

* Handle the CosignSubstrateBlock provided TX

* Revert substrate_signer.rs to develop (and patch to still work)

Due to SubstrateSigner moving when the prior multisig closes, yet cosigning
occurring with the most recent key, a single SubstrateSigner can be reused.
We could manage multiple SubstrateSigners, yet considering the much lower
specifications for cosigning, I'd rather treat it distinctly.

* Route cosigning through the processor

* Add note to rename SubstrateSigner post-PR

I don't want to do so now in order to preserve the diff's clarity.

* Implement cosign evaluation into the coordinator

* Get tests to compile

* Bug fixes, mark blocks without cosigners available as cosigned

* Correct the ID Batch preprocesses are saved under, add log statements

* Create a dedicated function to handle cosigns

* Correct the flow around Batch verification/queueing

Verifying `Batch`s could stall when a `Batch` was signed before its
predecessors/before the block it's contained in was cosigned (the latter being
inevitable as we can't sign a block containing a signed batch before signing
the batch).

Now, Batch verification happens on a distinct async task in order to not block
the handling of processor messages. This task is the sole caller of verify in
order to ensure last_verified_batch isn't unexpectedly mutated.

When the processor message handler needs to access it, or needs to queue a
Batch, it associates the DB TXN with a lock preventing the other task from
doing so.

This lock, as currently implemented, is a poor and inefficient design. It
should be modified to the pattern used for cosign management. Additionally, a
new primitive of a DB-backed channel may be immensely valuable.

Fixes a standing potential deadlock and a deadlock introduced with the
cosigning protocol.

* Working full-stack tests

After the last commit, this only required extending a timeout.

* Replace "co-sign" with "cosign" to make finding text easier

* Update the coordinator tests to support cosigning

* Inline prior_batch calculation to prevent panic on rotation

Noticed when doing a final review of the branch.
This commit is contained in:
Luke Parker
2023-11-15 16:57:21 -05:00
committed by GitHub
parent 79e4cce2f6
commit 96f1d26f7a
29 changed files with 1900 additions and 348 deletions

View File

@@ -9,6 +9,8 @@ use frost::Participant;
use serai_client::validator_sets::primitives::{ValidatorSet, KeyPair};
use processor_messages::coordinator::SubstrateSignableId;
pub use serai_db::*;
use crate::tributary::TributarySpec;
@@ -16,16 +18,21 @@ use crate::tributary::TributarySpec;
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum Topic {
Dkg,
Batch([u8; 5]),
SubstrateSign(SubstrateSignableId),
Sign([u8; 32]),
}
impl Topic {
fn as_key(&self, genesis: [u8; 32]) -> Vec<u8> {
let mut res = genesis.to_vec();
#[allow(unused_assignments)] // False positive
let mut id_buf = vec![];
let (label, id) = match self {
Topic::Dkg => (b"dkg".as_slice(), [].as_slice()),
Topic::Batch(id) => (b"batch".as_slice(), id.as_slice()),
Topic::SubstrateSign(id) => {
id_buf = id.encode();
(b"substrate_sign".as_slice(), id_buf.as_slice())
}
Topic::Sign(id) => (b"sign".as_slice(), id.as_slice()),
};
res.push(u8::try_from(label.len()).unwrap());

View File

@@ -18,7 +18,7 @@ use tributary::{Signed, TransactionKind, TransactionTrait};
use processor_messages::{
key_gen::{self, KeyGenId},
coordinator::{self, BatchSignId},
coordinator::{self, SubstrateSignableId, SubstrateSignId},
sign::{self, SignId},
};
@@ -498,10 +498,50 @@ pub(crate) async fn handle_application_tx<
}
}
Transaction::CosignSubstrateBlock(hash) => {
TributaryDb::<D>::recognize_topic(
txn,
genesis,
Topic::SubstrateSign(SubstrateSignableId::CosigningSubstrateBlock(hash)),
);
NonceDecider::handle_substrate_signable(
txn,
genesis,
SubstrateSignableId::CosigningSubstrateBlock(hash),
);
let key = loop {
let Some(key_pair) = TributaryDb::<D>::key_pair(txn, spec.set()) else {
// This can happen based on a timing condition
log::warn!("CosignSubstrateBlock yet keys weren't set yet");
tokio::time::sleep(core::time::Duration::from_secs(1)).await;
continue;
};
break key_pair.0.into();
};
processors
.send(
spec.set().network,
coordinator::CoordinatorMessage::CosignSubstrateBlock {
id: SubstrateSignId {
key,
id: SubstrateSignableId::CosigningSubstrateBlock(hash),
attempt: 0,
},
},
)
.await;
}
Transaction::Batch(_, batch) => {
// Because this Batch has achieved synchrony, its batch ID should be authorized
TributaryDb::<D>::recognize_topic(txn, genesis, Topic::Batch(batch));
let nonce = NonceDecider::handle_batch(txn, genesis, batch);
TributaryDb::<D>::recognize_topic(
txn,
genesis,
Topic::SubstrateSign(SubstrateSignableId::Batch(batch)),
);
let nonce =
NonceDecider::handle_substrate_signable(txn, genesis, SubstrateSignableId::Batch(batch));
recognized_id(spec.set(), genesis, RecognizedIdType::Batch, batch.to_vec(), nonce).await;
}
@@ -518,14 +558,14 @@ pub(crate) async fn handle_application_tx<
}
}
Transaction::BatchPreprocess(data) => {
Transaction::SubstratePreprocess(data) => {
let Ok(_) = check_sign_data_len::<D>(txn, spec, data.signed.signer, data.data.len()) else {
return;
};
match handle(
txn,
&DataSpecification {
topic: Topic::Batch(data.plan),
topic: Topic::SubstrateSign(data.plan),
label: BATCH_PREPROCESS,
attempt: data.attempt,
},
@@ -534,13 +574,13 @@ pub(crate) async fn handle_application_tx<
) {
Accumulation::Ready(DataSet::Participating(mut preprocesses)) => {
unflatten(spec, &mut preprocesses);
NonceDecider::selected_for_signing_batch(txn, genesis, data.plan);
NonceDecider::selected_for_signing_substrate(txn, genesis, data.plan);
let key = TributaryDb::<D>::key_pair(txn, spec.set()).unwrap().0 .0;
processors
.send(
spec.set().network,
coordinator::CoordinatorMessage::BatchPreprocesses {
id: BatchSignId { key, id: data.plan, attempt: data.attempt },
coordinator::CoordinatorMessage::SubstratePreprocesses {
id: SubstrateSignId { key, id: data.plan, attempt: data.attempt },
preprocesses,
},
)
@@ -550,14 +590,14 @@ pub(crate) async fn handle_application_tx<
Accumulation::NotReady => {}
}
}
Transaction::BatchShare(data) => {
Transaction::SubstrateShare(data) => {
let Ok(_) = check_sign_data_len::<D>(txn, spec, data.signed.signer, data.data.len()) else {
return;
};
match handle(
txn,
&DataSpecification {
topic: Topic::Batch(data.plan),
topic: Topic::SubstrateSign(data.plan),
label: BATCH_SHARE,
attempt: data.attempt,
},
@@ -570,8 +610,8 @@ pub(crate) async fn handle_application_tx<
processors
.send(
spec.set().network,
coordinator::CoordinatorMessage::BatchShares {
id: BatchSignId { key, id: data.plan, attempt: data.attempt },
coordinator::CoordinatorMessage::SubstrateShares {
id: SubstrateSignId { key, id: data.plan, attempt: data.attempt },
shares: shares
.into_iter()
.map(|(validator, share)| (validator, share.try_into().unwrap()))

View File

@@ -1,4 +1,7 @@
use core::ops::{Deref, Range};
use core::{
ops::{Deref, Range},
fmt::Debug,
};
use std::io::{self, Read, Write};
use zeroize::Zeroizing;
@@ -15,6 +18,7 @@ use schnorr::SchnorrSignature;
use frost::Participant;
use scale::{Encode, Decode};
use processor_messages::coordinator::SubstrateSignableId;
use serai_client::{
primitives::{NetworkId, PublicKey},
@@ -167,8 +171,8 @@ impl TributarySpec {
}
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct SignData<const N: usize> {
pub plan: [u8; N],
pub struct SignData<Id: Clone + PartialEq + Eq + Debug + Encode + Decode> {
pub plan: Id,
pub attempt: u32,
pub data: Vec<Vec<u8>>,
@@ -176,10 +180,10 @@ pub struct SignData<const N: usize> {
pub signed: Signed,
}
impl<const N: usize> ReadWrite for SignData<N> {
impl<Id: Clone + PartialEq + Eq + Debug + Encode + Decode> ReadWrite for SignData<Id> {
fn read<R: io::Read>(reader: &mut R) -> io::Result<Self> {
let mut plan = [0; N];
reader.read_exact(&mut plan)?;
let plan = Id::decode(&mut scale::IoReader(&mut *reader))
.map_err(|_| io::Error::new(io::ErrorKind::Other, "invalid plan in SignData"))?;
let mut attempt = [0; 4];
reader.read_exact(&mut attempt)?;
@@ -208,7 +212,7 @@ impl<const N: usize> ReadWrite for SignData<N> {
}
fn write<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
writer.write_all(&self.plan)?;
writer.write_all(&self.plan.encode())?;
writer.write_all(&self.attempt.to_le_bytes())?;
writer.write_all(&[u8::try_from(self.data.len()).unwrap()])?;
@@ -253,6 +257,9 @@ pub enum Transaction {
},
DkgConfirmed(u32, [u8; 32], Signed),
// Co-sign a Substrate block.
CosignSubstrateBlock([u8; 32]),
// When we have synchrony on a batch, we can allow signing it
// TODO (never?): This is less efficient compared to an ExternalBlock provided transaction,
// which would be binding over the block hash and automatically achieve synchrony on all
@@ -263,11 +270,11 @@ pub enum Transaction {
// IDs
SubstrateBlock(u64),
BatchPreprocess(SignData<5>),
BatchShare(SignData<5>),
SubstratePreprocess(SignData<SubstrateSignableId>),
SubstrateShare(SignData<SubstrateSignableId>),
SignPreprocess(SignData<32>),
SignShare(SignData<32>),
SignPreprocess(SignData<[u8; 32]>),
SignShare(SignData<[u8; 32]>),
// This is defined as an Unsigned transaction in order to de-duplicate SignCompleted amongst
// reporters (who should all report the same thing)
// We do still track the signer in order to prevent a single signer from publishing arbitrarily
@@ -415,6 +422,12 @@ impl ReadWrite for Transaction {
}
5 => {
let mut block = [0; 32];
reader.read_exact(&mut block)?;
Ok(Transaction::CosignSubstrateBlock(block))
}
6 => {
let mut block = [0; 32];
reader.read_exact(&mut block)?;
let mut batch = [0; 5];
@@ -422,19 +435,19 @@ impl ReadWrite for Transaction {
Ok(Transaction::Batch(block, batch))
}
6 => {
7 => {
let mut block = [0; 8];
reader.read_exact(&mut block)?;
Ok(Transaction::SubstrateBlock(u64::from_le_bytes(block)))
}
7 => SignData::read(reader).map(Transaction::BatchPreprocess),
8 => SignData::read(reader).map(Transaction::BatchShare),
8 => SignData::read(reader).map(Transaction::SubstratePreprocess),
9 => SignData::read(reader).map(Transaction::SubstrateShare),
9 => SignData::read(reader).map(Transaction::SignPreprocess),
10 => SignData::read(reader).map(Transaction::SignShare),
10 => SignData::read(reader).map(Transaction::SignPreprocess),
11 => SignData::read(reader).map(Transaction::SignShare),
11 => {
12 => {
let mut plan = [0; 32];
reader.read_exact(&mut plan)?;
@@ -534,36 +547,41 @@ impl ReadWrite for Transaction {
signed.write(writer)
}
Transaction::Batch(block, batch) => {
Transaction::CosignSubstrateBlock(block) => {
writer.write_all(&[5])?;
writer.write_all(block)
}
Transaction::Batch(block, batch) => {
writer.write_all(&[6])?;
writer.write_all(block)?;
writer.write_all(batch)
}
Transaction::SubstrateBlock(block) => {
writer.write_all(&[6])?;
writer.write_all(&[7])?;
writer.write_all(&block.to_le_bytes())
}
Transaction::BatchPreprocess(data) => {
writer.write_all(&[7])?;
Transaction::SubstratePreprocess(data) => {
writer.write_all(&[8])?;
data.write(writer)
}
Transaction::BatchShare(data) => {
writer.write_all(&[8])?;
Transaction::SubstrateShare(data) => {
writer.write_all(&[9])?;
data.write(writer)
}
Transaction::SignPreprocess(data) => {
writer.write_all(&[9])?;
data.write(writer)
}
Transaction::SignShare(data) => {
writer.write_all(&[10])?;
data.write(writer)
}
Transaction::SignCompleted { plan, tx_hash, first_signer, signature } => {
Transaction::SignShare(data) => {
writer.write_all(&[11])?;
data.write(writer)
}
Transaction::SignCompleted { plan, tx_hash, first_signer, signature } => {
writer.write_all(&[12])?;
writer.write_all(plan)?;
writer
.write_all(&[u8::try_from(tx_hash.len()).expect("tx hash length exceed 255 bytes")])?;
@@ -585,11 +603,13 @@ impl TransactionTrait for Transaction {
Transaction::InvalidDkgShare { signed, .. } => TransactionKind::Signed(signed),
Transaction::DkgConfirmed(_, _, signed) => TransactionKind::Signed(signed),
Transaction::CosignSubstrateBlock(_) => TransactionKind::Provided("cosign"),
Transaction::Batch(_, _) => TransactionKind::Provided("batch"),
Transaction::SubstrateBlock(_) => TransactionKind::Provided("serai"),
Transaction::BatchPreprocess(data) => TransactionKind::Signed(&data.signed),
Transaction::BatchShare(data) => TransactionKind::Signed(&data.signed),
Transaction::SubstratePreprocess(data) => TransactionKind::Signed(&data.signed),
Transaction::SubstrateShare(data) => TransactionKind::Signed(&data.signed),
Transaction::SignPreprocess(data) => TransactionKind::Signed(&data.signed),
Transaction::SignShare(data) => TransactionKind::Signed(&data.signed),
@@ -607,7 +627,7 @@ impl TransactionTrait for Transaction {
}
fn verify(&self) -> Result<(), TransactionError> {
if let Transaction::BatchShare(data) = self {
if let Transaction::SubstrateShare(data) = self {
for data in &data.data {
if data.len() != 32 {
Err(TransactionError::InvalidContent)?;
@@ -655,11 +675,13 @@ impl Transaction {
Transaction::InvalidDkgShare { ref mut signed, .. } => signed,
Transaction::DkgConfirmed(_, _, ref mut signed) => signed,
Transaction::CosignSubstrateBlock(_) => panic!("signing CosignSubstrateBlock"),
Transaction::Batch(_, _) => panic!("signing Batch"),
Transaction::SubstrateBlock(_) => panic!("signing SubstrateBlock"),
Transaction::BatchPreprocess(ref mut data) => &mut data.signed,
Transaction::BatchShare(ref mut data) => &mut data.signed,
Transaction::SubstratePreprocess(ref mut data) => &mut data.signed,
Transaction::SubstrateShare(ref mut data) => &mut data.signed,
Transaction::SignPreprocess(ref mut data) => &mut data.signed,
Transaction::SignShare(ref mut data) => &mut data.signed,

View File

@@ -1,11 +1,13 @@
use serai_db::{Get, DbTxn, create_db};
use processor_messages::coordinator::SubstrateSignableId;
use crate::tributary::Transaction;
use scale::Encode;
const BATCH_CODE: u8 = 0;
const BATCH_SIGNING_CODE: u8 = 1;
const SUBSTRATE_CODE: u8 = 0;
const SUBSTRATE_SIGNING_CODE: u8 = 1;
const PLAN_CODE: u8 = 2;
const PLAN_SIGNING_CODE: u8 = 3;
@@ -30,9 +32,13 @@ impl NextNonceDb {
/// transactions in response. Enables rebooting/rebuilding validators with full safety.
pub struct NonceDecider;
impl NonceDecider {
pub fn handle_batch(txn: &mut impl DbTxn, genesis: [u8; 32], batch: [u8; 5]) -> u32 {
pub fn handle_substrate_signable(
txn: &mut impl DbTxn,
genesis: [u8; 32],
id: SubstrateSignableId,
) -> u32 {
let nonce_for = NextNonceDb::allocate_nonce(txn, genesis);
ItemNonceDb::set(txn, genesis, BATCH_CODE, &batch, &nonce_for);
ItemNonceDb::set(txn, genesis, SUBSTRATE_CODE, &id.encode(), &nonce_for);
nonce_for
}
@@ -53,12 +59,16 @@ impl NonceDecider {
// TODO: The processor won't yield shares for this if the signing protocol aborts. We need to
// detect when we're expecting shares for an aborted protocol and insert a dummy transaction
// there.
pub fn selected_for_signing_batch(txn: &mut impl DbTxn, genesis: [u8; 32], batch: [u8; 5]) {
pub fn selected_for_signing_substrate(
txn: &mut impl DbTxn,
genesis: [u8; 32],
id: SubstrateSignableId,
) {
let nonce_for = NextNonceDb::allocate_nonce(txn, genesis);
ItemNonceDb::set(txn, genesis, BATCH_SIGNING_CODE, &batch, &nonce_for);
ItemNonceDb::set(txn, genesis, SUBSTRATE_SIGNING_CODE, &id.encode(), &nonce_for);
}
// TODO: Same TODO as selected_for_signing_batch
// TODO: Same TODO as selected_for_signing_substrate
pub fn selected_for_signing_plan(txn: &mut impl DbTxn, genesis: [u8; 32], plan: [u8; 32]) {
let nonce_for = NextNonceDb::allocate_nonce(txn, genesis);
ItemNonceDb::set(txn, genesis, PLAN_SIGNING_CODE, &plan, &nonce_for);
@@ -86,23 +96,26 @@ impl NonceDecider {
assert_eq!(*attempt, 0);
Some(Some(2))
}
Transaction::CosignSubstrateBlock(_) => None,
Transaction::Batch(_, _) => None,
Transaction::SubstrateBlock(_) => None,
Transaction::BatchPreprocess(data) => {
Transaction::SubstratePreprocess(data) => {
assert_eq!(data.attempt, 0);
Some(ItemNonceDb::get(getter, genesis, BATCH_CODE, &data.plan))
Some(ItemNonceDb::get(getter, genesis, SUBSTRATE_CODE, &data.plan.encode()))
}
Transaction::BatchShare(data) => {
Transaction::SubstrateShare(data) => {
assert_eq!(data.attempt, 0);
Some(ItemNonceDb::get(getter, genesis, BATCH_SIGNING_CODE, &data.plan))
Some(ItemNonceDb::get(getter, genesis, SUBSTRATE_SIGNING_CODE, &data.plan.encode()))
}
Transaction::SignPreprocess(data) => {
assert_eq!(data.attempt, 0);
Some(ItemNonceDb::get(getter, genesis, PLAN_CODE, &data.plan))
Some(ItemNonceDb::get(getter, genesis, PLAN_CODE, &data.plan.encode()))
}
Transaction::SignShare(data) => {
assert_eq!(data.attempt, 0);
Some(ItemNonceDb::get(getter, genesis, PLAN_SIGNING_CODE, &data.plan))
Some(ItemNonceDb::get(getter, genesis, PLAN_SIGNING_CODE, &data.plan.encode()))
}
Transaction::SignCompleted { .. } => None,
}