Files
serai/coordinator/src/tests/tributary/sync.rs
Luke Parker 11fdb6da1d Coordinator Cleanup (#481)
* Move logic for evaluating if a cosign should occur to its own file

Cleans it up and makes it more robust.

* Have expected_next_batch return an error instead of retrying

While convenient to offer an error-free implementation, it potentially caused
very long lived lock acquisitions in handle_processor_message.

* Unify and clean DkgConfirmer and DkgRemoval

Does so via adding a new file for the common code, SigningProtocol.

Modifies from_cache to return the preprocess with the machine, as there's no
reason not to. Also removes an unused Result around the type.

Clarifies the security around deterministic nonces, removing them for
saved-to-disk cached preprocesses. The cached preprocesses are encrypted as the
DB is not a proper secret store.

Moves arguments always present in the protocol from function arguments into the
struct itself.

Removes the horribly ugly code in DkgRemoval, fixing multiple issues present
with it which would cause it to fail on use.

* Set SeraiBlockNumber in cosign.rs as it's used by the cosigning protocol

* Remove unnecessary Clone from lambdas in coordinator

* Remove the EventDb from Tributary scanner

We used per-Transaction DB TXNs so on error, we don't have to rescan the entire
block yet only the rest of it. We prevented scanning multiple transactions by
tracking which we already had.

This is over-engineered and not worth it.

* Implement borsh for HasEvents, removing the manual encoding

* Merge DkgConfirmer and DkgRemoval into signing_protocol.rs

Fixes a bug in DkgConfirmer which would cause it to improperly handle indexes
if any validator had multiple key shares.

* Strictly type DataSpecification's Label

* Correct threshold_i_map_to_keys_and_musig_i_map

It didn't include the participant's own index and accordingly was offset.

* Create TributaryBlockHandler

This struct contains all variables prior passed to handle_block and stops them
from being passed around again and again.

This also ensures fatal_slash is only called while handling a block, as needed
as it expects to operate under perfect consensus.

* Inline accumulate, store confirmation nonces with shares

Inlining accumulate makes sense due to the amount of data accumulate needed to
be passed.

Storing confirmation nonces with shares ensures that both are available or
neither. Prior, one could be yet the other may not have been (requiring an
assert in runtime to ensure we didn't bungle it somehow).

* Create helper functions for handling DkgRemoval/SubstrateSign/Sign Tributary TXs

* Move Label into SignData

All of our transactions which use SignData end up with the same common usage
pattern for Label, justifying this.

Removes 3 transactions, explicitly de-duplicating their handlers.

* Remove CurrentlyCompletingKeyPair for the non-contextual DkgKeyPair

* Remove the manual read/write for TributarySpec for borsh

This struct doesn't have any optimizations booned by the manual impl. Using
borsh reduces our scope.

* Use temporary variables to further minimize LoC in tributary handler

* Remove usage of tuples for non-trivial Tributary transactions

* Remove serde from dkg

serde could be used to deserialize intenrally inconsistent objects which could
lead to panics or faults.

The BorshDeserialize derives have been replaced with a manual implementation
which won't produce inconsistent objects.

* Abstract Future generics using new trait definitions in coordinator

* Move published_signed_transaction to tributary/mod.rs to reduce the size of main.rs

* Split coordinator/src/tributary/mod.rs into spec.rs and transaction.rs
2023-12-10 20:21:44 -05:00

166 lines
5.5 KiB
Rust

use core::time::Duration;
use std::{sync::Arc, collections::HashSet};
use rand_core::OsRng;
use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto};
use tokio::{
sync::{mpsc, broadcast},
time::sleep,
};
use serai_db::MemDb;
use tributary::Tributary;
use crate::{
tributary::Transaction,
ActiveTributary, TributaryEvent,
p2p::{heartbeat_tributaries_task, handle_p2p_task},
tests::{
LocalP2p,
tributary::{new_keys, new_spec, new_tributaries},
},
};
#[tokio::test]
async fn sync_test() {
let mut keys = new_keys(&mut OsRng);
let spec = new_spec(&mut OsRng, &keys);
// Ensure this can have a node fail
assert!(spec.n() > spec.t());
let mut tributaries = new_tributaries(&keys, &spec)
.await
.into_iter()
.map(|(_, p2p, tributary)| (p2p, tributary))
.collect::<Vec<_>>();
// Keep a Tributary back, effectively having it offline
let syncer_key = keys.pop().unwrap();
let (syncer_p2p, syncer_tributary) = tributaries.pop().unwrap();
// Have the rest form a P2P net
let mut tributary_senders = vec![];
let mut tributary_arcs = vec![];
let mut p2p_threads = vec![];
for (p2p, tributary) in tributaries.drain(..) {
let tributary = Arc::new(tributary);
tributary_arcs.push(tributary.clone());
let (new_tributary_send, new_tributary_recv) = broadcast::channel(5);
let (cosign_send, _) = mpsc::unbounded_channel();
let thread = tokio::spawn(handle_p2p_task(p2p, cosign_send, new_tributary_recv));
new_tributary_send
.send(TributaryEvent::NewTributary(ActiveTributary { spec: spec.clone(), tributary }))
.map_err(|_| "failed to send ActiveTributary")
.unwrap();
tributary_senders.push(new_tributary_send);
p2p_threads.push(thread);
}
let tributaries = tributary_arcs;
// After four blocks of time, we should have a new block
// We don't wait one block of time as we may have missed the chance for the first block
// We don't wait two blocks because we may have missed the chance, and then had a failure to
// propose by our 'offline' validator, which would cause the Tendermint round time to increase,
// requiring a longer delay
let block_time = u64::from(Tributary::<MemDb, Transaction, LocalP2p>::block_time());
sleep(Duration::from_secs(4 * block_time)).await;
let tip = tributaries[0].tip().await;
assert!(tip != spec.genesis());
// Sleep one second to make sure this block propagates
sleep(Duration::from_secs(1)).await;
// Make sure every tributary has it
for tributary in &tributaries {
assert!(tributary.reader().block(&tip).is_some());
}
// Now that we've confirmed the other tributaries formed a net without issue, drop the syncer's
// pending P2P messages
syncer_p2p.1.write().await.1.last_mut().unwrap().clear();
// Have it join the net
let syncer_key = Ristretto::generator() * *syncer_key;
let syncer_tributary = Arc::new(syncer_tributary);
let (syncer_tributary_send, syncer_tributary_recv) = broadcast::channel(5);
let (cosign_send, _) = mpsc::unbounded_channel();
tokio::spawn(handle_p2p_task(syncer_p2p.clone(), cosign_send, syncer_tributary_recv));
syncer_tributary_send
.send(TributaryEvent::NewTributary(ActiveTributary {
spec: spec.clone(),
tributary: syncer_tributary.clone(),
}))
.map_err(|_| "failed to send ActiveTributary to syncer")
.unwrap();
// It shouldn't automatically catch up. If it somehow was, our test would be broken
// Sanity check this
let tip = tributaries[0].tip().await;
// Wait until a new block occurs
sleep(Duration::from_secs(3 * block_time)).await;
// Make sure a new block actually occurred
assert!(tributaries[0].tip().await != tip);
// Make sure the new block alone didn't trigger catching up
assert_eq!(syncer_tributary.tip().await, spec.genesis());
// Start the heartbeat protocol
let (syncer_heartbeat_tributary_send, syncer_heartbeat_tributary_recv) = broadcast::channel(5);
tokio::spawn(heartbeat_tributaries_task(syncer_p2p, syncer_heartbeat_tributary_recv));
syncer_heartbeat_tributary_send
.send(TributaryEvent::NewTributary(ActiveTributary {
spec: spec.clone(),
tributary: syncer_tributary.clone(),
}))
.map_err(|_| "failed to send ActiveTributary to heartbeat")
.unwrap();
// The heartbeat is once every 10 blocks
sleep(Duration::from_secs(10 * block_time)).await;
assert!(syncer_tributary.tip().await != spec.genesis());
// Verify it synced to the tip
let syncer_tip = {
let tributary = &tributaries[0];
let tip = tributary.tip().await;
let syncer_tip = syncer_tributary.tip().await;
// Allow a one block tolerance in case of race conditions
assert!(
HashSet::from([tip, tributary.reader().block(&tip).unwrap().parent()]).contains(&syncer_tip)
);
syncer_tip
};
sleep(Duration::from_secs(block_time)).await;
// Verify it's now keeping up
assert!(syncer_tributary.tip().await != syncer_tip);
// Verify it's now participating in consensus
// Because only `t` validators are used in a commit, take n - t nodes offline
// leaving only `t` nodes. Which should force it to participate in the consensus
// of next blocks.
let spares = usize::from(spec.n() - spec.t());
for thread in p2p_threads.iter().take(spares) {
thread.abort();
}
// wait for a block
sleep(Duration::from_secs(block_time)).await;
if syncer_tributary
.reader()
.parsed_commit(&syncer_tributary.tip().await)
.unwrap()
.validators
.iter()
.any(|signer| signer == &syncer_key.to_bytes())
{
return;
}
panic!("synced tributary didn't start participating in consensus");
}