diff --git a/Cargo.lock b/Cargo.lock index 7e26d78a..c2dc5284 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8332,6 +8332,7 @@ dependencies = [ "serai-env", "serai-message-queue", "serai-processor-messages", + "serai-task", "sp-application-crypto", "sp-runtime", "tokio", diff --git a/coordinator/Cargo.toml b/coordinator/Cargo.toml index 7d48fcc7..8dcd4cd5 100644 --- a/coordinator/Cargo.toml +++ b/coordinator/Cargo.toml @@ -37,6 +37,7 @@ scale = { package = "parity-scale-codec", version = "3", default-features = fals zalloc = { path = "../common/zalloc" } serai-db = { path = "../common/db" } serai-env = { path = "../common/env" } +serai-task = { path = "../common/task", version = "0.1" } processor-messages = { package = "serai-processor-messages", path = "../processor/messages" } message-queue = { package = "serai-message-queue", path = "../message-queue" } diff --git a/coordinator/src/db.rs b/coordinator/src/db.rs deleted file mode 100644 index 04ee9d35..00000000 --- a/coordinator/src/db.rs +++ /dev/null @@ -1,134 +0,0 @@ -use blake2::{ - digest::{consts::U32, Digest}, - Blake2b, -}; - -use scale::Encode; -use borsh::{BorshSerialize, BorshDeserialize}; -use serai_client::{ - primitives::NetworkId, - validator_sets::primitives::{Session, ValidatorSet}, - in_instructions::primitives::{Batch, SignedBatch}, -}; - -pub use serai_db::*; - -use ::tributary::ReadWrite; -use crate::tributary::{TributarySpec, Transaction, scanner::RecognizedIdType}; - -create_db!( - MainDb { - HandledMessageDb: (network: NetworkId) -> u64, - ActiveTributaryDb: () -> Vec, - RetiredTributaryDb: (set: ValidatorSet) -> (), - FirstPreprocessDb: ( - network: NetworkId, - id_type: RecognizedIdType, - id: &[u8] - ) -> Vec>, - LastReceivedBatchDb: (network: NetworkId) -> u32, - ExpectedBatchDb: (network: NetworkId, id: u32) -> [u8; 32], - BatchDb: (network: NetworkId, id: u32) -> SignedBatch, - LastVerifiedBatchDb: (network: NetworkId) -> u32, - HandoverBatchDb: (set: ValidatorSet) -> u32, - LookupHandoverBatchDb: (network: NetworkId, batch: u32) -> Session, - QueuedBatchesDb: (set: ValidatorSet) -> Vec - } -); - -impl ActiveTributaryDb { - pub fn active_tributaries(getter: &G) -> (Vec, Vec) { - let bytes = Self::get(getter).unwrap_or_default(); - let mut bytes_ref: &[u8] = bytes.as_ref(); - - let mut tributaries = vec![]; - while !bytes_ref.is_empty() { - tributaries.push(TributarySpec::deserialize_reader(&mut bytes_ref).unwrap()); - } - - (bytes, tributaries) - } - - pub fn add_participating_in_tributary(txn: &mut impl DbTxn, spec: &TributarySpec) { - let (mut existing_bytes, existing) = ActiveTributaryDb::active_tributaries(txn); - for tributary in &existing { - if tributary == spec { - return; - } - } - - spec.serialize(&mut existing_bytes).unwrap(); - ActiveTributaryDb::set(txn, &existing_bytes); - } - - pub fn retire_tributary(txn: &mut impl DbTxn, set: ValidatorSet) { - let mut active = Self::active_tributaries(txn).1; - for i in 0 .. active.len() { - if active[i].set() == set { - active.remove(i); - break; - } - } - - let mut bytes = vec![]; - for active in active { - active.serialize(&mut bytes).unwrap(); - } - Self::set(txn, &bytes); - RetiredTributaryDb::set(txn, set, &()); - } -} - -impl FirstPreprocessDb { - pub fn save_first_preprocess( - txn: &mut impl DbTxn, - network: NetworkId, - id_type: RecognizedIdType, - id: &[u8], - preprocess: &Vec>, - ) { - if let Some(existing) = FirstPreprocessDb::get(txn, network, id_type, id) { - assert_eq!(&existing, preprocess, "saved a distinct first preprocess"); - return; - } - FirstPreprocessDb::set(txn, network, id_type, id, preprocess); - } -} - -impl ExpectedBatchDb { - pub fn save_expected_batch(txn: &mut impl DbTxn, batch: &Batch) { - LastReceivedBatchDb::set(txn, batch.network, &batch.id); - Self::set( - txn, - batch.network, - batch.id, - &Blake2b::::digest(batch.instructions.encode()).into(), - ); - } -} - -impl HandoverBatchDb { - pub fn set_handover_batch(txn: &mut impl DbTxn, set: ValidatorSet, batch: u32) { - Self::set(txn, set, &batch); - LookupHandoverBatchDb::set(txn, set.network, batch, &set.session); - } -} -impl QueuedBatchesDb { - pub fn queue(txn: &mut impl DbTxn, set: ValidatorSet, batch: &Transaction) { - let mut batches = Self::get(txn, set).unwrap_or_default(); - batch.write(&mut batches).unwrap(); - Self::set(txn, set, &batches); - } - - pub fn take(txn: &mut impl DbTxn, set: ValidatorSet) -> Vec { - let batches_vec = Self::get(txn, set).unwrap_or_default(); - txn.del(Self::key(set)); - - let mut batches: &[u8] = &batches_vec; - let mut res = vec![]; - while !batches.is_empty() { - res.push(Transaction::read(&mut batches).unwrap()); - } - res - } -} diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index 87db0135..c3eb8d80 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -1,1286 +1,5 @@ -use core::ops::Deref; -use std::{ - sync::{OnceLock, Arc}, - time::Duration, - collections::{VecDeque, HashSet, HashMap}, -}; - -use zeroize::{Zeroize, Zeroizing}; -use rand_core::OsRng; - -use ciphersuite::{ - group::{ - ff::{Field, PrimeField}, - GroupEncoding, - }, - Ciphersuite, Ristretto, -}; -use schnorr::SchnorrSignature; - -use serai_db::{DbTxn, Db}; - -use scale::Encode; -use borsh::BorshSerialize; -use serai_client::{ - primitives::NetworkId, - validator_sets::primitives::{Session, ValidatorSet, KeyPair}, - Public, Serai, SeraiInInstructions, -}; - -use message_queue::{Service, client::MessageQueue}; - -use tokio::{ - sync::{Mutex, RwLock, mpsc, broadcast}, - time::sleep, -}; - -use ::tributary::{ProvidedError, TransactionKind, TransactionTrait, Block, Tributary}; - mod tributary; -use crate::tributary::{ - TributarySpec, Label, SignData, Transaction, scanner::RecognizedIdType, PlanIds, -}; -mod db; -use db::*; - -mod p2p; -pub use p2p::*; - -use processor_messages::{ - key_gen, sign, - coordinator::{self, SubstrateSignableId}, - ProcessorMessage, -}; - -pub mod processors; -use processors::Processors; - -mod substrate; -use substrate::CosignTransactions; - -mod cosign_evaluator; -use cosign_evaluator::CosignEvaluator; - -#[cfg(test)] -pub mod tests; - -#[global_allocator] -static ALLOCATOR: zalloc::ZeroizingAlloc = - zalloc::ZeroizingAlloc(std::alloc::System); - -#[derive(Clone)] -pub struct ActiveTributary { - pub spec: TributarySpec, - pub tributary: Arc>, -} - -#[derive(Clone)] -pub enum TributaryEvent { - NewTributary(ActiveTributary), - TributaryRetired(ValidatorSet), -} - -// Creates a new tributary and sends it to all listeners. -async fn add_tributary( - db: D, - key: Zeroizing<::F>, - processors: &Pro, - p2p: P, - tributaries: &broadcast::Sender>, - spec: TributarySpec, -) { - if RetiredTributaryDb::get(&db, spec.set()).is_some() { - log::info!("not adding tributary {:?} since it's been retired", spec.set()); - } - - log::info!("adding tributary {:?}", spec.set()); - - let tributary = Tributary::<_, Transaction, _>::new( - // TODO2: Use a db on a distinct volume to protect against DoS attacks - // TODO2: Delete said db once the Tributary is dropped - db, - spec.genesis(), - spec.start_time(), - key.clone(), - spec.validators(), - p2p, - ) - .await - .unwrap(); - - // Trigger a DKG for the newly added Tributary - // If we're rebooting, we'll re-fire this message - // This is safe due to the message-queue deduplicating based off the intent system - let set = spec.set(); - - processors - .send( - set.network, - processor_messages::key_gen::CoordinatorMessage::GenerateKey { - session: set.session, - threshold: spec.t(), - evrf_public_keys: spec.evrf_public_keys(), - // TODO - // params: frost::ThresholdParams::new(spec.t(), spec.n(&[]), our_i.start).unwrap(), - // shares: u16::from(our_i.end) - u16::from(our_i.start), - }, - ) - .await; - - tributaries - .send(TributaryEvent::NewTributary(ActiveTributary { spec, tributary: Arc::new(tributary) })) - .map_err(|_| "all ActiveTributary recipients closed") - .unwrap(); -} - -// TODO: Find a better pattern for this -static HANDOVER_VERIFY_QUEUE_LOCK: OnceLock> = OnceLock::new(); - -#[allow(clippy::too_many_arguments)] -async fn handle_processor_message( - db: &mut D, - key: &Zeroizing<::F>, - serai: &Serai, - p2p: &P, - cosign_channel: &mpsc::UnboundedSender, - tributaries: &HashMap>, - network: NetworkId, - msg: &processors::Message, -) -> bool { - #[allow(clippy::nonminimal_bool)] - if let Some(already_handled) = HandledMessageDb::get(db, msg.network) { - assert!(!(already_handled > msg.id)); - assert!((already_handled == msg.id) || (already_handled == msg.id - 1)); - if already_handled == msg.id { - return true; - } - } else { - assert_eq!(msg.id, 0); - } - - let _hvq_lock = HANDOVER_VERIFY_QUEUE_LOCK.get_or_init(|| Mutex::new(())).lock().await; - let mut txn = db.txn(); - - let mut relevant_tributary = match &msg.msg { - // We'll only receive these if we fired GenerateKey, which we'll only do if if we're - // in-set, making the Tributary relevant - ProcessorMessage::KeyGen(inner_msg) => match inner_msg { - key_gen::ProcessorMessage::Participation { session, .. } | - key_gen::ProcessorMessage::GeneratedKeyPair { session, .. } | - key_gen::ProcessorMessage::Blame { session, .. } => Some(*session), - }, - ProcessorMessage::Sign(inner_msg) => match inner_msg { - // We'll only receive InvalidParticipant/Preprocess/Share if we're actively signing - sign::ProcessorMessage::InvalidParticipant { id, .. } | - sign::ProcessorMessage::Preprocess { id, .. } | - sign::ProcessorMessage::Share { id, .. } => Some(id.session), - // While the Processor's Scanner will always emit Completed, that's routed through the - // Signer and only becomes a ProcessorMessage::Completed if the Signer is present and - // confirms it - sign::ProcessorMessage::Completed { session, .. } => Some(*session), - }, - ProcessorMessage::Coordinator(inner_msg) => match inner_msg { - // This is a special case as it's relevant to *all* Tributaries for this network we're - // signing in - // It doesn't return a Tributary to become `relevant_tributary` though - coordinator::ProcessorMessage::SubstrateBlockAck { block, plans } => { - // Get the sessions for these keys - let sessions = plans - .iter() - .map(|plan| plan.session) - .filter(|session| { - RetiredTributaryDb::get(&txn, ValidatorSet { network, session: *session }).is_none() - }) - .collect::>(); - - // Ensure we have the Tributaries - for session in &sessions { - if !tributaries.contains_key(session) { - return false; - } - } - - for session in sessions { - let tributary = &tributaries[&session]; - let plans = plans - .iter() - .filter_map(|plan| Some(plan.id).filter(|_| plan.session == session)) - .collect::>(); - PlanIds::set(&mut txn, &tributary.spec.genesis(), *block, &plans); - - let tx = Transaction::SubstrateBlock(*block); - log::trace!( - "processor message effected transaction {} {:?}", - hex::encode(tx.hash()), - &tx - ); - log::trace!("providing transaction {}", hex::encode(tx.hash())); - let res = tributary.tributary.provide_transaction(tx).await; - if !(res.is_ok() || (res == Err(ProvidedError::AlreadyProvided))) { - if res == Err(ProvidedError::LocalMismatchesOnChain) { - // Spin, since this is a crit for this Tributary - loop { - log::error!( - "{}. tributary: {}, provided: SubstrateBlock({})", - "tributary added distinct provided to delayed locally provided TX", - hex::encode(tributary.spec.genesis()), - block, - ); - sleep(Duration::from_secs(60)).await; - } - } - panic!("provided an invalid transaction: {res:?}"); - } - } - - None - } - // We'll only fire these if we are the Substrate signer, making the Tributary relevant - coordinator::ProcessorMessage::InvalidParticipant { id, .. } | - coordinator::ProcessorMessage::CosignPreprocess { id, .. } | - coordinator::ProcessorMessage::BatchPreprocess { id, .. } | - coordinator::ProcessorMessage::SlashReportPreprocess { id, .. } | - coordinator::ProcessorMessage::SubstrateShare { id, .. } => Some(id.session), - // This causes an action on our P2P net yet not on any Tributary - coordinator::ProcessorMessage::CosignedBlock { block_number, block, signature } => { - let cosigned_block = CosignedBlock { - network, - block_number: *block_number, - block: *block, - signature: { - let mut arr = [0; 64]; - arr.copy_from_slice(signature); - arr - }, - }; - cosign_channel.send(cosigned_block).unwrap(); - let mut buf = vec![]; - cosigned_block.serialize(&mut buf).unwrap(); - P2p::broadcast(p2p, GossipMessageKind::CosignedBlock, buf).await; - None - } - // This causes an action on Substrate yet not on any Tributary - coordinator::ProcessorMessage::SignedSlashReport { session, signature } => { - let set = ValidatorSet { network, session: *session }; - let signature: &[u8] = signature.as_ref(); - let signature = serai_client::Signature(signature.try_into().unwrap()); - - let slashes = crate::tributary::SlashReport::get(&txn, set) - .expect("signed slash report despite not having slash report locally"); - let slashes_pubs = - slashes.iter().map(|(address, points)| (Public(*address), *points)).collect::>(); - - let tx = serai_client::SeraiValidatorSets::report_slashes( - network, - slashes - .into_iter() - .map(|(address, points)| (serai_client::SeraiAddress(address), points)) - .collect::>() - .try_into() - .unwrap(), - signature.clone(), - ); - - loop { - if serai.publish(&tx).await.is_ok() { - break None; - } - - // Check if the slashes shouldn't still be reported. If not, break. - let Ok(serai) = serai.as_of_latest_finalized_block().await else { - tokio::time::sleep(core::time::Duration::from_secs(5)).await; - continue; - }; - let Ok(key) = serai.validator_sets().key_pending_slash_report(network).await else { - tokio::time::sleep(core::time::Duration::from_secs(5)).await; - continue; - }; - let Some(key) = key else { - break None; - }; - // If this is the key for this slash report, then this will verify - use sp_application_crypto::RuntimePublic; - if !key.verify( - &serai_client::validator_sets::primitives::report_slashes_message(&set, &slashes_pubs), - &signature, - ) { - break None; - } - } - } - }, - // These don't return a relevant Tributary as there's no Tributary with action expected - ProcessorMessage::Substrate(inner_msg) => match inner_msg { - processor_messages::substrate::ProcessorMessage::Batch { batch } => { - assert_eq!( - batch.network, msg.network, - "processor sent us a batch for a different network than it was for", - ); - ExpectedBatchDb::save_expected_batch(&mut txn, batch); - None - } - // If this is a new Batch, immediately publish it (if we can) - processor_messages::substrate::ProcessorMessage::SignedBatch { batch } => { - assert_eq!( - batch.batch.network, msg.network, - "processor sent us a signed batch for a different network than it was for", - ); - - log::debug!("received batch {:?} {}", batch.batch.network, batch.batch.id); - - // Save this batch to the disk - BatchDb::set(&mut txn, batch.batch.network, batch.batch.id, &batch.clone()); - - // Get the next-to-execute batch ID - let Ok(mut next) = substrate::expected_next_batch(serai, network).await else { - return false; - }; - - // Since we have a new batch, publish all batches yet to be published to Serai - // This handles the edge-case where batch n+1 is signed before batch n is - let mut batches = VecDeque::new(); - while let Some(batch) = BatchDb::get(&txn, network, next) { - batches.push_back(batch); - next += 1; - } - - while let Some(batch) = batches.pop_front() { - // If this Batch should no longer be published, continue - let Ok(expected_next_batch) = substrate::expected_next_batch(serai, network).await else { - return false; - }; - if expected_next_batch > batch.batch.id { - continue; - } - - let tx = SeraiInInstructions::execute_batch(batch.clone()); - log::debug!("attempting to publish batch {:?} {}", batch.batch.network, batch.batch.id,); - // This publish may fail if this transactions already exists in the mempool, which is - // possible, or if this batch was already executed on-chain - // Either case will have eventual resolution and be handled by the above check on if - // this batch should execute - let res = serai.publish(&tx).await; - if res.is_ok() { - log::info!( - "published batch {network:?} {} (block {})", - batch.batch.id, - hex::encode(batch.batch.block), - ); - } else { - log::debug!( - "couldn't publish batch {:?} {}: {:?}", - batch.batch.network, - batch.batch.id, - res, - ); - // If we failed to publish it, restore it - batches.push_front(batch); - // Sleep for a few seconds before retrying to prevent hammering the node - sleep(Duration::from_secs(5)).await; - } - } - - None - } - }, - }; - - // If we have a relevant Tributary, check it's actually still relevant and has yet to be retired - if let Some(relevant_tributary_value) = relevant_tributary { - if RetiredTributaryDb::get( - &txn, - ValidatorSet { network: msg.network, session: relevant_tributary_value }, - ) - .is_some() - { - relevant_tributary = None; - } - } - - // If there's a relevant Tributary... - if let Some(relevant_tributary) = relevant_tributary { - // Make sure we have it - // Per the reasoning above, we only return a Tributary as relevant if we're a participant - // Accordingly, we do *need* to have this Tributary now to handle it UNLESS the Tributary has - // already completed and this is simply an old message (which we prior checked) - let Some(ActiveTributary { spec, tributary }) = tributaries.get(&relevant_tributary) else { - // Since we don't, sleep for a fraction of a second and return false, signaling we didn't - // handle this message - // At the start of the loop which calls this function, we'll check for new tributaries, - // making this eventually resolve - sleep(Duration::from_millis(100)).await; - return false; - }; - - let genesis = spec.genesis(); - let pub_key = Ristretto::generator() * key.deref(); - - let txs = match msg.msg.clone() { - ProcessorMessage::KeyGen(inner_msg) => match inner_msg { - key_gen::ProcessorMessage::Participation { session, participation } => { - assert_eq!(session, spec.set().session); - vec![Transaction::DkgParticipation { participation, signed: Transaction::empty_signed() }] - } - key_gen::ProcessorMessage::GeneratedKeyPair { session, substrate_key, network_key } => { - assert_eq!(session, spec.set().session); - crate::tributary::generated_key_pair::( - &mut txn, - genesis, - &KeyPair(Public(substrate_key), network_key.try_into().unwrap()), - ); - - // Create a MuSig-based machine to inform Substrate of this key generation - let confirmation_nonces = - crate::tributary::dkg_confirmation_nonces(key, spec, &mut txn, 0); - - vec![Transaction::DkgConfirmationNonces { - attempt: 0, - confirmation_nonces, - signed: Transaction::empty_signed(), - }] - } - key_gen::ProcessorMessage::Blame { session, participant } => { - assert_eq!(session, spec.set().session); - let participant = spec.reverse_lookup_i(participant).unwrap(); - vec![Transaction::RemoveParticipant { participant, signed: Transaction::empty_signed() }] - } - }, - ProcessorMessage::Sign(msg) => match msg { - sign::ProcessorMessage::InvalidParticipant { .. } => { - // TODO: Locally increase slash points to maximum (distinct from an explicitly fatal - // slash) and censor transactions (yet don't explicitly ban) - vec![] - } - sign::ProcessorMessage::Preprocess { id, preprocesses } => { - if id.attempt == 0 { - FirstPreprocessDb::save_first_preprocess( - &mut txn, - network, - RecognizedIdType::Plan, - &id.id, - &preprocesses, - ); - - vec![] - } else { - vec![Transaction::Sign(SignData { - plan: id.id, - attempt: id.attempt, - label: Label::Preprocess, - data: preprocesses, - signed: Transaction::empty_signed(), - })] - } - } - sign::ProcessorMessage::Share { id, shares } => { - vec![Transaction::Sign(SignData { - plan: id.id, - attempt: id.attempt, - label: Label::Share, - data: shares, - signed: Transaction::empty_signed(), - })] - } - sign::ProcessorMessage::Completed { session: _, id, tx } => { - let r = Zeroizing::new(::F::random(&mut OsRng)); - #[allow(non_snake_case)] - let R = ::generator() * r.deref(); - let mut tx = Transaction::SignCompleted { - plan: id, - tx_hash: tx, - first_signer: pub_key, - signature: SchnorrSignature { R, s: ::F::ZERO }, - }; - let signed = SchnorrSignature::sign(key, r, tx.sign_completed_challenge()); - match &mut tx { - Transaction::SignCompleted { signature, .. } => { - *signature = signed; - } - _ => unreachable!(), - } - vec![tx] - } - }, - ProcessorMessage::Coordinator(inner_msg) => match inner_msg { - coordinator::ProcessorMessage::SubstrateBlockAck { .. } => unreachable!(), - coordinator::ProcessorMessage::InvalidParticipant { .. } => { - // TODO: Locally increase slash points to maximum (distinct from an explicitly fatal - // slash) and censor transactions (yet don't explicitly ban) - vec![] - } - coordinator::ProcessorMessage::CosignPreprocess { id, preprocesses } | - coordinator::ProcessorMessage::SlashReportPreprocess { id, preprocesses } => { - vec![Transaction::SubstrateSign(SignData { - plan: id.id, - attempt: id.attempt, - label: Label::Preprocess, - data: preprocesses.into_iter().map(Into::into).collect(), - signed: Transaction::empty_signed(), - })] - } - coordinator::ProcessorMessage::BatchPreprocess { id, block, preprocesses } => { - log::info!( - "informed of batch (sign ID {}, attempt {}) for block {}", - hex::encode(id.id.encode()), - id.attempt, - hex::encode(block), - ); - - // If this is the first attempt instance, wait until we synchronize around the batch - // first - if id.attempt == 0 { - FirstPreprocessDb::save_first_preprocess( - &mut txn, - spec.set().network, - RecognizedIdType::Batch, - &{ - let SubstrateSignableId::Batch(id) = id.id else { - panic!("BatchPreprocess SubstrateSignableId wasn't Batch") - }; - id.to_le_bytes() - }, - &preprocesses.into_iter().map(Into::into).collect::>(), - ); - - let intended = Transaction::Batch { - block: block.0, - batch: match id.id { - SubstrateSignableId::Batch(id) => id, - _ => panic!("BatchPreprocess did not contain Batch ID"), - }, - }; - - // If this is the new key's first Batch, only create this TX once we verify all - // all prior published `Batch`s - // TODO: This assumes BatchPreprocess is immediately after Batch - // Ensure that assumption - let last_received = LastReceivedBatchDb::get(&txn, msg.network).unwrap(); - let handover_batch = HandoverBatchDb::get(&txn, spec.set()); - let mut queue = false; - if let Some(handover_batch) = handover_batch { - // There is a race condition here. We may verify all `Batch`s from the prior set, - // start signing the handover `Batch` `n`, start signing `n+1`, have `n+1` signed - // before `n` (or at the same time), yet then the prior set forges a malicious - // `Batch` `n`. - // - // The malicious `Batch` `n` would be publishable to Serai, as Serai can't - // distinguish what's intended to be a handover `Batch`, yet then anyone could - // publish the new set's `n+1`, causing their acceptance of the handover. - // - // To fix this, if this is after the handover `Batch` and we have yet to verify - // publication of the handover `Batch`, don't yet yield the provided. - if last_received > handover_batch { - if let Some(last_verified) = LastVerifiedBatchDb::get(&txn, msg.network) { - if last_verified < handover_batch { - queue = true; - } - } else { - queue = true; - } - } - } else { - HandoverBatchDb::set_handover_batch(&mut txn, spec.set(), last_received); - // If this isn't the first batch, meaning we do have to verify all prior batches, and - // the prior Batch hasn't been verified yet... - if (last_received != 0) && - LastVerifiedBatchDb::get(&txn, msg.network) - .map_or(true, |last_verified| last_verified < (last_received - 1)) - { - // Withhold this TX until we verify all prior `Batch`s - queue = true; - } - } - - if queue { - QueuedBatchesDb::queue(&mut txn, spec.set(), &intended); - vec![] - } else { - // Because this is post-verification of the handover batch, take all queued `Batch`s - // now to ensure we don't provide this before an already queued Batch - // This *may* be an unreachable case due to how last_verified_batch is set, yet it - // doesn't hurt to have as a defensive pattern - let mut res = QueuedBatchesDb::take(&mut txn, spec.set()); - res.push(intended); - res - } - } else { - vec![Transaction::SubstrateSign(SignData { - plan: id.id, - attempt: id.attempt, - label: Label::Preprocess, - data: preprocesses.into_iter().map(Into::into).collect(), - signed: Transaction::empty_signed(), - })] - } - } - coordinator::ProcessorMessage::SubstrateShare { id, shares } => { - vec![Transaction::SubstrateSign(SignData { - plan: id.id, - attempt: id.attempt, - label: Label::Share, - data: shares.into_iter().map(|share| share.to_vec()).collect(), - signed: Transaction::empty_signed(), - })] - } - #[allow(clippy::match_same_arms)] // Allowed to preserve layout - coordinator::ProcessorMessage::CosignedBlock { .. } => unreachable!(), - #[allow(clippy::match_same_arms)] - coordinator::ProcessorMessage::SignedSlashReport { .. } => unreachable!(), - }, - ProcessorMessage::Substrate(inner_msg) => match inner_msg { - processor_messages::substrate::ProcessorMessage::Batch { .. } | - processor_messages::substrate::ProcessorMessage::SignedBatch { .. } => unreachable!(), - }, - }; - - // If this created transactions, publish them - for mut tx in txs { - log::trace!("processor message effected transaction {} {:?}", hex::encode(tx.hash()), &tx); - - match tx.kind() { - TransactionKind::Provided(_) => { - log::trace!("providing transaction {}", hex::encode(tx.hash())); - let res = tributary.provide_transaction(tx.clone()).await; - if !(res.is_ok() || (res == Err(ProvidedError::AlreadyProvided))) { - if res == Err(ProvidedError::LocalMismatchesOnChain) { - // Spin, since this is a crit for this Tributary - loop { - log::error!( - "{}. tributary: {}, provided: {:?}", - "tributary added distinct provided to delayed locally provided TX", - hex::encode(spec.genesis()), - &tx, - ); - sleep(Duration::from_secs(60)).await; - } - } - panic!("provided an invalid transaction: {res:?}"); - } - } - TransactionKind::Unsigned => { - log::trace!("publishing unsigned transaction {}", hex::encode(tx.hash())); - match tributary.add_transaction(tx.clone()).await { - Ok(_) => {} - Err(e) => panic!("created an invalid unsigned transaction: {e:?}"), - } - } - TransactionKind::Signed(_, _) => { - tx.sign(&mut OsRng, genesis, key); - tributary::publish_signed_transaction(&mut txn, tributary, tx).await; - } - } - } - } - - HandledMessageDb::set(&mut txn, msg.network, &msg.id); - txn.commit(); - - true -} - -#[allow(clippy::too_many_arguments)] -async fn handle_processor_messages( - mut db: D, - key: Zeroizing<::F>, - serai: Arc, - processors: Pro, - p2p: P, - cosign_channel: mpsc::UnboundedSender, - network: NetworkId, - mut tributary_event: mpsc::UnboundedReceiver>, -) { - let mut tributaries = HashMap::new(); - loop { - match tributary_event.try_recv() { - Ok(event) => match event { - TributaryEvent::NewTributary(tributary) => { - let set = tributary.spec.set(); - assert_eq!(set.network, network); - tributaries.insert(set.session, tributary); - } - TributaryEvent::TributaryRetired(set) => { - tributaries.remove(&set.session); - } - }, - Err(mpsc::error::TryRecvError::Empty) => {} - Err(mpsc::error::TryRecvError::Disconnected) => { - panic!("handle_processor_messages tributary_event sender closed") - } - } - - // TODO: Check this ID is sane (last handled ID or expected next ID) - let Ok(msg) = tokio::time::timeout(Duration::from_secs(1), processors.recv(network)).await - else { - continue; - }; - log::trace!("entering handle_processor_message for {:?}", network); - if handle_processor_message( - &mut db, - &key, - &serai, - &p2p, - &cosign_channel, - &tributaries, - network, - &msg, - ) - .await - { - processors.ack(msg).await; - } - log::trace!("exited handle_processor_message for {:?}", network); - } -} - -#[allow(clippy::too_many_arguments)] -async fn handle_cosigns_and_batch_publication( - mut db: D, - network: NetworkId, - mut tributary_event: mpsc::UnboundedReceiver>, -) { - let mut tributaries = HashMap::new(); - 'outer: loop { - // TODO: Create a better async flow for this - tokio::time::sleep(core::time::Duration::from_millis(100)).await; - - match tributary_event.try_recv() { - Ok(event) => match event { - TributaryEvent::NewTributary(tributary) => { - let set = tributary.spec.set(); - assert_eq!(set.network, network); - tributaries.insert(set.session, tributary); - } - TributaryEvent::TributaryRetired(set) => { - tributaries.remove(&set.session); - } - }, - Err(mpsc::error::TryRecvError::Empty) => {} - Err(mpsc::error::TryRecvError::Disconnected) => { - panic!("handle_processor_messages tributary_event sender closed") - } - } - - // Handle pending cosigns - { - let mut txn = db.txn(); - while let Some((session, block, hash)) = CosignTransactions::try_recv(&mut txn, network) { - let Some(ActiveTributary { spec, tributary }) = tributaries.get(&session) else { - log::warn!("didn't yet have tributary we're supposed to cosign with"); - break; - }; - log::info!( - "{network:?} {session:?} cosigning block #{block} (hash {}...)", - hex::encode(&hash[.. 8]) - ); - let tx = Transaction::CosignSubstrateBlock(hash); - let res = tributary.provide_transaction(tx.clone()).await; - if !(res.is_ok() || (res == Err(ProvidedError::AlreadyProvided))) { - if res == Err(ProvidedError::LocalMismatchesOnChain) { - // Spin, since this is a crit for this Tributary - loop { - log::error!( - "{}. tributary: {}, provided: {:?}", - "tributary added distinct CosignSubstrateBlock", - hex::encode(spec.genesis()), - &tx, - ); - sleep(Duration::from_secs(60)).await; - } - } - panic!("provided an invalid CosignSubstrateBlock: {res:?}"); - } - } - txn.commit(); - } - - // Verify any publifshed `Batch`s - { - let _hvq_lock = HANDOVER_VERIFY_QUEUE_LOCK.get_or_init(|| Mutex::new(())).lock().await; - let mut txn = db.txn(); - let mut to_publish = vec![]; - let start_id = - LastVerifiedBatchDb::get(&txn, network).map_or(0, |already_verified| already_verified + 1); - if let Some(last_id) = - substrate::verify_published_batches::(&mut txn, network, u32::MAX).await - { - // Check if any of these `Batch`s were a handover `Batch` or the `Batch` before a handover - // `Batch` - // If so, we need to publish queued provided `Batch` transactions - for batch in start_id ..= last_id { - let is_pre_handover = LookupHandoverBatchDb::get(&txn, network, batch + 1); - if let Some(session) = is_pre_handover { - let set = ValidatorSet { network, session }; - let mut queued = QueuedBatchesDb::take(&mut txn, set); - // is_handover_batch is only set for handover `Batch`s we're participating in, making - // this safe - if queued.is_empty() { - panic!("knew the next Batch was a handover yet didn't queue it"); - } - - // Only publish the handover Batch - to_publish.push((set.session, queued.remove(0))); - // Re-queue the remaining batches - for remaining in queued { - QueuedBatchesDb::queue(&mut txn, set, &remaining); - } - } - - let is_handover = LookupHandoverBatchDb::get(&txn, network, batch); - if let Some(session) = is_handover { - for queued in QueuedBatchesDb::take(&mut txn, ValidatorSet { network, session }) { - to_publish.push((session, queued)); - } - } - } - } - - for (session, tx) in to_publish { - let Some(ActiveTributary { spec, tributary }) = tributaries.get(&session) else { - log::warn!("didn't yet have tributary we're supposed to provide a queued Batch for"); - // Safe since this will drop the txn updating the most recently queued batch - continue 'outer; - }; - log::debug!("providing Batch transaction {:?}", &tx); - let res = tributary.provide_transaction(tx.clone()).await; - if !(res.is_ok() || (res == Err(ProvidedError::AlreadyProvided))) { - if res == Err(ProvidedError::LocalMismatchesOnChain) { - // Spin, since this is a crit for this Tributary - loop { - log::error!( - "{}. tributary: {}, provided: {:?}", - "tributary added distinct Batch", - hex::encode(spec.genesis()), - &tx, - ); - sleep(Duration::from_secs(60)).await; - } - } - panic!("provided an invalid Batch: {res:?}"); - } - } - - txn.commit(); - } - } -} - -pub async fn handle_processors( - db: D, - key: Zeroizing<::F>, - serai: Arc, - processors: Pro, - p2p: P, - cosign_channel: mpsc::UnboundedSender, - mut tributary_event: broadcast::Receiver>, -) { - let mut channels = HashMap::new(); - for network in serai_client::primitives::NETWORKS { - if network == NetworkId::Serai { - continue; - } - let (processor_send, processor_recv) = mpsc::unbounded_channel(); - tokio::spawn(handle_processor_messages( - db.clone(), - key.clone(), - serai.clone(), - processors.clone(), - p2p.clone(), - cosign_channel.clone(), - network, - processor_recv, - )); - let (cosign_send, cosign_recv) = mpsc::unbounded_channel(); - tokio::spawn(handle_cosigns_and_batch_publication(db.clone(), network, cosign_recv)); - channels.insert(network, (processor_send, cosign_send)); - } - - // Listen to new tributary events - loop { - match tributary_event.recv().await.unwrap() { - TributaryEvent::NewTributary(tributary) => { - let (c1, c2) = &channels[&tributary.spec.set().network]; - c1.send(TributaryEvent::NewTributary(tributary.clone())).unwrap(); - c2.send(TributaryEvent::NewTributary(tributary)).unwrap(); - } - TributaryEvent::TributaryRetired(set) => { - let (c1, c2) = &channels[&set.network]; - c1.send(TributaryEvent::TributaryRetired(set)).unwrap(); - c2.send(TributaryEvent::TributaryRetired(set)).unwrap(); - } - }; - } -} - -pub async fn run( - raw_db: D, - key: Zeroizing<::F>, - p2p: P, - processors: Pro, - serai: Arc, -) { - let (new_tributary_spec_send, mut new_tributary_spec_recv) = mpsc::unbounded_channel(); - // Reload active tributaries from the database - for spec in ActiveTributaryDb::active_tributaries(&raw_db).1 { - new_tributary_spec_send.send(spec).unwrap(); - } - - let (perform_slash_report_send, mut perform_slash_report_recv) = mpsc::unbounded_channel(); - - let (tributary_retired_send, mut tributary_retired_recv) = mpsc::unbounded_channel(); - - // Handle new Substrate blocks - tokio::spawn(crate::substrate::scan_task( - raw_db.clone(), - key.clone(), - processors.clone(), - serai.clone(), - new_tributary_spec_send, - perform_slash_report_send, - tributary_retired_send, - )); - - // Handle the Tributaries - - // This should be large enough for an entire rotation of all tributaries - // If it's too small, the coordinator fail to boot, which is a decent sanity check - let (tributary_event, mut tributary_event_listener_1) = broadcast::channel(32); - let tributary_event_listener_2 = tributary_event.subscribe(); - let tributary_event_listener_3 = tributary_event.subscribe(); - let tributary_event_listener_4 = tributary_event.subscribe(); - let tributary_event_listener_5 = tributary_event.subscribe(); - - // Emit TributaryEvent::TributaryRetired - tokio::spawn({ - let tributary_event = tributary_event.clone(); - async move { - loop { - let retired = tributary_retired_recv.recv().await.unwrap(); - tributary_event.send(TributaryEvent::TributaryRetired(retired)).map_err(|_| ()).unwrap(); - } - } - }); - - // Spawn a task to further add Tributaries as needed - tokio::spawn({ - let raw_db = raw_db.clone(); - let key = key.clone(); - let processors = processors.clone(); - let p2p = p2p.clone(); - async move { - loop { - let spec = new_tributary_spec_recv.recv().await.unwrap(); - // Uses an inner task as Tributary::new may take several seconds - tokio::spawn({ - let raw_db = raw_db.clone(); - let key = key.clone(); - let processors = processors.clone(); - let p2p = p2p.clone(); - let tributary_event = tributary_event.clone(); - async move { - add_tributary(raw_db, key, &processors, p2p, &tributary_event, spec).await; - } - }); - } - } - }); - - // When we reach synchrony on an event requiring signing, send our preprocess for it - // TODO: Properly place this into the Tributary scanner, as it's a mess out here - let recognized_id = { - let raw_db = raw_db.clone(); - let key = key.clone(); - - let specs = Arc::new(RwLock::new(HashMap::new())); - let tributaries = Arc::new(RwLock::new(HashMap::new())); - // Spawn a task to maintain a local view of the tributaries for whenever recognized_id is - // called - tokio::spawn({ - let specs = specs.clone(); - let tributaries = tributaries.clone(); - let mut set_to_genesis = HashMap::new(); - async move { - loop { - match tributary_event_listener_1.recv().await { - Ok(TributaryEvent::NewTributary(tributary)) => { - set_to_genesis.insert(tributary.spec.set(), tributary.spec.genesis()); - tributaries.write().await.insert(tributary.spec.genesis(), tributary.tributary); - specs.write().await.insert(tributary.spec.set(), tributary.spec); - } - Ok(TributaryEvent::TributaryRetired(set)) => { - if let Some(genesis) = set_to_genesis.remove(&set) { - specs.write().await.remove(&set); - tributaries.write().await.remove(&genesis); - } - } - Err(broadcast::error::RecvError::Lagged(_)) => { - panic!("recognized_id lagged to handle tributary_event") - } - Err(broadcast::error::RecvError::Closed) => panic!("tributary_event sender closed"), - } - } - } - }); - - // Also spawn a task to handle slash reports, as this needs such a view of tributaries - tokio::spawn({ - let mut raw_db = raw_db.clone(); - let key = key.clone(); - let tributaries = tributaries.clone(); - async move { - 'task_loop: loop { - match perform_slash_report_recv.recv().await { - Some(set) => { - let (genesis, validators) = loop { - let specs = specs.read().await; - let Some(spec) = specs.get(&set) else { - // If we don't have this Tributary because it's retired, break and move on - if RetiredTributaryDb::get(&raw_db, set).is_some() { - continue 'task_loop; - } - - // This may happen if the task above is simply slow - log::warn!("tributary we don't have yet is supposed to perform a slash report"); - continue; - }; - break (spec.genesis(), spec.validators()); - }; - - let mut slashes = vec![]; - for (validator, _) in validators { - if validator == (::generator() * key.deref()) { - continue; - } - let validator = validator.to_bytes(); - - let fatally = tributary::FatallySlashed::get(&raw_db, genesis, validator).is_some(); - // TODO: Properly type this - let points = if fatally { - u32::MAX - } else { - tributary::SlashPoints::get(&raw_db, genesis, validator).unwrap_or(0) - }; - slashes.push(points); - } - - let mut tx = Transaction::SlashReport(slashes, Transaction::empty_signed()); - tx.sign(&mut OsRng, genesis, &key); - - let mut first = true; - loop { - if !first { - sleep(Duration::from_millis(100)).await; - } - first = false; - - let tributaries = tributaries.read().await; - let Some(tributary) = tributaries.get(&genesis) else { - // If we don't have this Tributary because it's retired, break and move on - if RetiredTributaryDb::get(&raw_db, set).is_some() { - break; - } - - // This may happen if the task above is simply slow - log::warn!("tributary we don't have yet is supposed to perform a slash report"); - continue; - }; - // This is safe to perform multiple times and solely needs atomicity with regards - // to itself - // TODO: Should this not take a txn accordingly? It's best practice to take a txn, - // yet taking a txn fails to declare its achieved independence - let mut txn = raw_db.txn(); - tributary::publish_signed_transaction(&mut txn, tributary, tx).await; - txn.commit(); - break; - } - } - None => panic!("perform slash report sender closed"), - } - } - } - }); - - move |set: ValidatorSet, genesis, id_type, id: Vec| { - log::debug!("recognized ID {:?} {}", id_type, hex::encode(&id)); - let mut raw_db = raw_db.clone(); - let key = key.clone(); - let tributaries = tributaries.clone(); - async move { - // The transactions for these are fired before the preprocesses are actually - // received/saved, creating a race between Tributary ack and the availability of all - // Preprocesses - // This waits until the necessary preprocess is available 0, - let get_preprocess = |raw_db, id_type, id| async move { - loop { - let Some(preprocess) = FirstPreprocessDb::get(raw_db, set.network, id_type, id) else { - log::warn!("waiting for preprocess for recognized ID"); - sleep(Duration::from_millis(100)).await; - continue; - }; - return preprocess; - } - }; - - let mut tx = match id_type { - RecognizedIdType::Batch => Transaction::SubstrateSign(SignData { - data: get_preprocess(&raw_db, id_type, &id).await, - plan: SubstrateSignableId::Batch(u32::from_le_bytes(id.try_into().unwrap())), - label: Label::Preprocess, - attempt: 0, - signed: Transaction::empty_signed(), - }), - - RecognizedIdType::Plan => Transaction::Sign(SignData { - data: get_preprocess(&raw_db, id_type, &id).await, - plan: id.try_into().unwrap(), - label: Label::Preprocess, - attempt: 0, - signed: Transaction::empty_signed(), - }), - }; - - tx.sign(&mut OsRng, genesis, &key); - - let mut first = true; - loop { - if !first { - sleep(Duration::from_millis(100)).await; - } - first = false; - - let tributaries = tributaries.read().await; - let Some(tributary) = tributaries.get(&genesis) else { - // If we don't have this Tributary because it's retired, break and move on - if RetiredTributaryDb::get(&raw_db, set).is_some() { - break; - } - - // This may happen if the task above is simply slow - log::warn!("tributary we don't have yet came to consensus on an Batch"); - continue; - }; - // This is safe to perform multiple times and solely needs atomicity with regards to - // itself - // TODO: Should this not take a txn accordingly? It's best practice to take a txn, yet - // taking a txn fails to declare its achieved independence - let mut txn = raw_db.txn(); - tributary::publish_signed_transaction(&mut txn, tributary, tx).await; - txn.commit(); - break; - } - } - } - }; - - // Handle new blocks for each Tributary - { - let raw_db = raw_db.clone(); - tokio::spawn(tributary::scanner::scan_tributaries_task( - raw_db, - key.clone(), - recognized_id, - processors.clone(), - serai.clone(), - tributary_event_listener_2, - )); - } - - // Spawn the heartbeat task, which will trigger syncing if there hasn't been a Tributary block - // in a while (presumably because we're behind) - tokio::spawn(p2p::heartbeat_tributaries_task(p2p.clone(), tributary_event_listener_3)); - - // Create the Cosign evaluator - let cosign_channel = CosignEvaluator::new(raw_db.clone(), p2p.clone(), serai.clone()); - - // Handle P2P messages - tokio::spawn(p2p::handle_p2p_task( - p2p.clone(), - cosign_channel.clone(), - tributary_event_listener_4, - )); - - // Handle all messages from processors - handle_processors( - raw_db, - key, - serai, - processors, - p2p, - cosign_channel, - tributary_event_listener_5, - ) - .await; -} - -#[tokio::main] -async fn main() { - // Override the panic handler with one which will panic if any tokio task panics - { - let existing = std::panic::take_hook(); - std::panic::set_hook(Box::new(move |panic| { - existing(panic); - const MSG: &str = "exiting the process due to a task panicking"; - println!("{MSG}"); - log::error!("{MSG}"); - std::process::exit(1); - })); - } - - if std::env::var("RUST_LOG").is_err() { - std::env::set_var("RUST_LOG", serai_env::var("RUST_LOG").unwrap_or_else(|| "info".to_string())); - } - env_logger::init(); - - log::info!("starting coordinator service..."); - - #[allow(unused_variables, unreachable_code)] - let db = { - #[cfg(all(feature = "parity-db", feature = "rocksdb"))] - panic!("built with parity-db and rocksdb"); - #[cfg(all(feature = "parity-db", not(feature = "rocksdb")))] - let db = - serai_db::new_parity_db(&serai_env::var("DB_PATH").expect("path to DB wasn't specified")); - #[cfg(feature = "rocksdb")] - let db = - serai_db::new_rocksdb(&serai_env::var("DB_PATH").expect("path to DB wasn't specified")); - db - }; - - let key = { - let mut key_hex = serai_env::var("SERAI_KEY").expect("Serai key wasn't provided"); - let mut key_vec = hex::decode(&key_hex).map_err(|_| ()).expect("Serai key wasn't hex-encoded"); - key_hex.zeroize(); - if key_vec.len() != 32 { - key_vec.zeroize(); - panic!("Serai key had an invalid length"); - } - let mut key_bytes = [0; 32]; - key_bytes.copy_from_slice(&key_vec); - key_vec.zeroize(); - let key = Zeroizing::new(::F::from_repr(key_bytes).unwrap()); - key_bytes.zeroize(); - key - }; - - let processors = Arc::new(MessageQueue::from_env(Service::Coordinator)); - - let serai = (async { - loop { - let Ok(serai) = Serai::new(format!( - "http://{}:9944", - serai_env::var("SERAI_HOSTNAME").expect("Serai hostname wasn't provided") - )) - .await - else { - log::error!("couldn't connect to the Serai node"); - sleep(Duration::from_secs(5)).await; - continue; - }; - log::info!("made initial connection to Serai node"); - return Arc::new(serai); - } - }) - .await; - let p2p = LibP2p::new(serai.clone()); - run(db, key, p2p, processors, serai).await +fn main() { + todo!("TODO") } diff --git a/coordinator/src/p2p.rs b/coordinator/src/p2p.rs deleted file mode 100644 index cecb3517..00000000 --- a/coordinator/src/p2p.rs +++ /dev/null @@ -1,1042 +0,0 @@ -use core::{time::Duration, fmt}; -use std::{ - sync::Arc, - io::{self, Read}, - collections::{HashSet, HashMap}, - time::{SystemTime, Instant}, -}; - -use async_trait::async_trait; -use rand_core::{RngCore, OsRng}; - -use scale::{Decode, Encode}; -use borsh::{BorshSerialize, BorshDeserialize}; -use serai_client::{primitives::NetworkId, validator_sets::primitives::ValidatorSet, Serai}; - -use serai_db::Db; - -use futures_util::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, StreamExt}; -use tokio::{ - sync::{Mutex, RwLock, mpsc, broadcast}, - time::sleep, -}; - -use libp2p::{ - core::multiaddr::{Protocol, Multiaddr}, - identity::Keypair, - PeerId, - tcp::Config as TcpConfig, - noise, yamux, - request_response::{ - Codec as RrCodecTrait, Message as RrMessage, Event as RrEvent, Config as RrConfig, - Behaviour as RrBehavior, ProtocolSupport, - }, - gossipsub::{ - IdentTopic, FastMessageId, MessageId, MessageAuthenticity, ValidationMode, ConfigBuilder, - IdentityTransform, AllowAllSubscriptionFilter, Event as GsEvent, PublishError, - Behaviour as GsBehavior, - }, - swarm::{NetworkBehaviour, SwarmEvent}, - SwarmBuilder, -}; - -pub(crate) use tributary::{ReadWrite, P2p as TributaryP2p}; - -use crate::{Transaction, Block, Tributary, ActiveTributary, TributaryEvent}; - -// Block size limit + 1 KB of space for signatures/metadata -const MAX_LIBP2P_GOSSIP_MESSAGE_SIZE: usize = tributary::BLOCK_SIZE_LIMIT + 1024; - -const MAX_LIBP2P_REQRES_MESSAGE_SIZE: usize = - (tributary::BLOCK_SIZE_LIMIT * BLOCKS_PER_BATCH) + 1024; - -const MAX_LIBP2P_MESSAGE_SIZE: usize = { - // Manual `max` since `max` isn't a const fn - if MAX_LIBP2P_GOSSIP_MESSAGE_SIZE > MAX_LIBP2P_REQRES_MESSAGE_SIZE { - MAX_LIBP2P_GOSSIP_MESSAGE_SIZE - } else { - MAX_LIBP2P_REQRES_MESSAGE_SIZE - } -}; - -const LIBP2P_TOPIC: &str = "serai-coordinator"; - -// Amount of blocks in a minute -const BLOCKS_PER_MINUTE: usize = (60 / (tributary::tendermint::TARGET_BLOCK_TIME / 1000)) as usize; - -// Maximum amount of blocks to send in a batch -const BLOCKS_PER_BATCH: usize = BLOCKS_PER_MINUTE + 1; - -#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, BorshSerialize, BorshDeserialize)] -pub struct CosignedBlock { - pub network: NetworkId, - pub block_number: u64, - pub block: [u8; 32], - pub signature: [u8; 64], -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] -pub enum ReqResMessageKind { - KeepAlive, - Heartbeat([u8; 32]), - Block([u8; 32]), -} - -impl ReqResMessageKind { - pub fn read(reader: &mut R) -> Option { - let mut kind = [0; 1]; - reader.read_exact(&mut kind).ok()?; - match kind[0] { - 0 => Some(ReqResMessageKind::KeepAlive), - 1 => Some({ - let mut genesis = [0; 32]; - reader.read_exact(&mut genesis).ok()?; - ReqResMessageKind::Heartbeat(genesis) - }), - 2 => Some({ - let mut genesis = [0; 32]; - reader.read_exact(&mut genesis).ok()?; - ReqResMessageKind::Block(genesis) - }), - _ => None, - } - } - - pub fn serialize(&self) -> Vec { - match self { - ReqResMessageKind::KeepAlive => vec![0], - ReqResMessageKind::Heartbeat(genesis) => { - let mut res = vec![1]; - res.extend(genesis); - res - } - ReqResMessageKind::Block(genesis) => { - let mut res = vec![2]; - res.extend(genesis); - res - } - } - } -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] -pub enum GossipMessageKind { - Tributary([u8; 32]), - CosignedBlock, -} - -impl GossipMessageKind { - pub fn read(reader: &mut R) -> Option { - let mut kind = [0; 1]; - reader.read_exact(&mut kind).ok()?; - match kind[0] { - 0 => Some({ - let mut genesis = [0; 32]; - reader.read_exact(&mut genesis).ok()?; - GossipMessageKind::Tributary(genesis) - }), - 1 => Some(GossipMessageKind::CosignedBlock), - _ => None, - } - } - - pub fn serialize(&self) -> Vec { - match self { - GossipMessageKind::Tributary(genesis) => { - let mut res = vec![0]; - res.extend(genesis); - res - } - GossipMessageKind::CosignedBlock => { - vec![1] - } - } - } -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] -pub enum P2pMessageKind { - ReqRes(ReqResMessageKind), - Gossip(GossipMessageKind), -} - -impl P2pMessageKind { - fn genesis(&self) -> Option<[u8; 32]> { - match self { - P2pMessageKind::ReqRes(ReqResMessageKind::KeepAlive) | - P2pMessageKind::Gossip(GossipMessageKind::CosignedBlock) => None, - P2pMessageKind::ReqRes( - ReqResMessageKind::Heartbeat(genesis) | ReqResMessageKind::Block(genesis), - ) | - P2pMessageKind::Gossip(GossipMessageKind::Tributary(genesis)) => Some(*genesis), - } - } -} - -impl From for P2pMessageKind { - fn from(kind: ReqResMessageKind) -> P2pMessageKind { - P2pMessageKind::ReqRes(kind) - } -} - -impl From for P2pMessageKind { - fn from(kind: GossipMessageKind) -> P2pMessageKind { - P2pMessageKind::Gossip(kind) - } -} - -#[derive(Clone, Debug)] -pub struct Message { - pub sender: P::Id, - pub kind: P2pMessageKind, - pub msg: Vec, -} - -#[derive(Clone, Debug, Encode, Decode)] -pub struct BlockCommit { - pub block: Vec, - pub commit: Vec, -} - -#[derive(Clone, Debug, Encode, Decode)] -pub struct HeartbeatBatch { - pub blocks: Vec, - pub timestamp: u64, -} - -#[async_trait] -pub trait P2p: Send + Sync + Clone + fmt::Debug + TributaryP2p { - type Id: Send + Sync + Clone + Copy + fmt::Debug; - - async fn subscribe(&self, set: ValidatorSet, genesis: [u8; 32]); - async fn unsubscribe(&self, set: ValidatorSet, genesis: [u8; 32]); - - async fn send_raw(&self, to: Self::Id, msg: Vec); - async fn broadcast_raw(&self, kind: P2pMessageKind, msg: Vec); - async fn receive(&self) -> Message; - - async fn send(&self, to: Self::Id, kind: ReqResMessageKind, msg: Vec) { - let mut actual_msg = kind.serialize(); - actual_msg.extend(msg); - self.send_raw(to, actual_msg).await; - } - async fn broadcast(&self, kind: impl Send + Into, msg: Vec) { - let kind = kind.into(); - let mut actual_msg = match kind { - P2pMessageKind::ReqRes(kind) => kind.serialize(), - P2pMessageKind::Gossip(kind) => kind.serialize(), - }; - actual_msg.extend(msg); - /* - log::trace!( - "broadcasting p2p message (kind {})", - match kind { - P2pMessageKind::KeepAlive => "KeepAlive".to_string(), - P2pMessageKind::Tributary(genesis) => format!("Tributary({})", hex::encode(genesis)), - P2pMessageKind::Heartbeat(genesis) => format!("Heartbeat({})", hex::encode(genesis)), - P2pMessageKind::Block(genesis) => format!("Block({})", hex::encode(genesis)), - P2pMessageKind::CosignedBlock => "CosignedBlock".to_string(), - } - ); - */ - self.broadcast_raw(kind, actual_msg).await; - } -} - -#[derive(Default, Clone, Copy, PartialEq, Eq, Debug)] -struct RrCodec; -#[async_trait] -impl RrCodecTrait for RrCodec { - type Protocol = &'static str; - type Request = Vec; - type Response = Vec; - - async fn read_request( - &mut self, - _: &Self::Protocol, - io: &mut R, - ) -> io::Result> { - let mut len = [0; 4]; - io.read_exact(&mut len).await?; - let len = usize::try_from(u32::from_le_bytes(len)).expect("not at least a 32-bit platform?"); - if len > MAX_LIBP2P_REQRES_MESSAGE_SIZE { - Err(io::Error::other("request length exceeded MAX_LIBP2P_REQRES_MESSAGE_SIZE"))?; - } - // This may be a non-trivial allocation easily causable - // While we could chunk the read, meaning we only perform the allocation as bandwidth is used, - // the max message size should be sufficiently sane - let mut buf = vec![0; len]; - io.read_exact(&mut buf).await?; - Ok(buf) - } - async fn read_response( - &mut self, - proto: &Self::Protocol, - io: &mut R, - ) -> io::Result> { - self.read_request(proto, io).await - } - async fn write_request( - &mut self, - _: &Self::Protocol, - io: &mut W, - req: Vec, - ) -> io::Result<()> { - io.write_all( - &u32::try_from(req.len()) - .map_err(|_| io::Error::other("request length exceeded 2**32"))? - .to_le_bytes(), - ) - .await?; - io.write_all(&req).await - } - async fn write_response( - &mut self, - proto: &Self::Protocol, - io: &mut W, - res: Vec, - ) -> io::Result<()> { - self.write_request(proto, io, res).await - } -} - -#[derive(NetworkBehaviour)] -struct Behavior { - reqres: RrBehavior, - gossipsub: GsBehavior, -} - -#[allow(clippy::type_complexity)] -#[derive(Clone)] -pub struct LibP2p { - subscribe: Arc>>, - send: Arc)>>>, - broadcast: Arc)>>>, - receive: Arc>>>, -} -impl fmt::Debug for LibP2p { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt.debug_struct("LibP2p").finish_non_exhaustive() - } -} - -impl LibP2p { - #[allow(clippy::new_without_default)] - pub fn new(serai: Arc) -> Self { - log::info!("creating a libp2p instance"); - - let throwaway_key_pair = Keypair::generate_ed25519(); - - let behavior = Behavior { - reqres: { RrBehavior::new([("/coordinator", ProtocolSupport::Full)], RrConfig::default()) }, - gossipsub: { - let heartbeat_interval = tributary::tendermint::LATENCY_TIME / 2; - let heartbeats_per_block = - usize::try_from(tributary::tendermint::TARGET_BLOCK_TIME / heartbeat_interval).unwrap(); - - use blake2::{Digest, Blake2s256}; - let config = ConfigBuilder::default() - .heartbeat_interval(Duration::from_millis(heartbeat_interval.into())) - .history_length(heartbeats_per_block * 2) - .history_gossip(heartbeats_per_block) - .max_transmit_size(MAX_LIBP2P_GOSSIP_MESSAGE_SIZE) - // We send KeepAlive after 80s - .idle_timeout(Duration::from_secs(85)) - .validation_mode(ValidationMode::Strict) - // Uses a content based message ID to avoid duplicates as much as possible - .message_id_fn(|msg| { - MessageId::new(&Blake2s256::digest([msg.topic.as_str().as_bytes(), &msg.data].concat())) - }) - // Re-defines for fast ID to prevent needing to convert into a Message to run - // message_id_fn - // This function is valid for both - .fast_message_id_fn(|msg| { - FastMessageId::new(&Blake2s256::digest( - [msg.topic.as_str().as_bytes(), &msg.data].concat(), - )) - }) - .build(); - let mut gossipsub = GsBehavior::::new( - MessageAuthenticity::Signed(throwaway_key_pair.clone()), - config.unwrap(), - ) - .unwrap(); - - // Subscribe to the base topic - let topic = IdentTopic::new(LIBP2P_TOPIC); - gossipsub.subscribe(&topic).unwrap(); - - gossipsub - }, - }; - - // Uses noise for authentication, yamux for multiplexing - // TODO: Do we want to add a custom authentication protocol to only accept connections from - // fellow validators? Doing so would reduce the potential for spam - // TODO: Relay client? - let mut swarm = SwarmBuilder::with_existing_identity(throwaway_key_pair) - .with_tokio() - .with_tcp(TcpConfig::default().nodelay(true), noise::Config::new, || { - let mut config = yamux::Config::default(); - // 1 MiB default + max message size - config.set_max_buffer_size((1024 * 1024) + MAX_LIBP2P_MESSAGE_SIZE); - // 256 KiB default + max message size - config - .set_receive_window_size(((256 * 1024) + MAX_LIBP2P_MESSAGE_SIZE).try_into().unwrap()); - config - }) - .unwrap() - .with_behaviour(|_| behavior) - .unwrap() - .build(); - const PORT: u16 = 30563; // 5132 ^ (('c' << 8) | 'o') - swarm.listen_on(format!("/ip4/0.0.0.0/tcp/{PORT}").parse().unwrap()).unwrap(); - - let (send_send, mut send_recv) = mpsc::unbounded_channel(); - let (broadcast_send, mut broadcast_recv) = mpsc::unbounded_channel(); - let (receive_send, receive_recv) = mpsc::unbounded_channel(); - let (subscribe_send, mut subscribe_recv) = mpsc::unbounded_channel(); - - fn topic_for_set(set: ValidatorSet) -> IdentTopic { - IdentTopic::new(format!("{LIBP2P_TOPIC}-{}", hex::encode(set.encode()))) - } - - // TODO: If a network has less than TARGET_PEERS, this will cause retries ad infinitum - const TARGET_PEERS: usize = 5; - - // The addrs we're currently dialing, and the networks associated with them - let dialing_peers = Arc::new(RwLock::new(HashMap::new())); - // The peers we're currently connected to, and the networks associated with them - let connected_peers = Arc::new(RwLock::new(HashMap::>::new())); - - // Find and connect to peers - let (connect_to_network_send, mut connect_to_network_recv) = - tokio::sync::mpsc::unbounded_channel(); - let (to_dial_send, mut to_dial_recv) = tokio::sync::mpsc::unbounded_channel(); - tokio::spawn({ - let dialing_peers = dialing_peers.clone(); - let connected_peers = connected_peers.clone(); - - let connect_to_network_send = connect_to_network_send.clone(); - async move { - loop { - let connect = |network: NetworkId, addr: Multiaddr| { - let dialing_peers = dialing_peers.clone(); - let connected_peers = connected_peers.clone(); - let to_dial_send = to_dial_send.clone(); - let connect_to_network_send = connect_to_network_send.clone(); - async move { - log::info!("found peer from substrate: {addr}"); - - let protocols = addr.iter().filter_map(|piece| match piece { - // Drop PeerIds from the Substrate P2p network - Protocol::P2p(_) => None, - // Use our own TCP port - Protocol::Tcp(_) => Some(Protocol::Tcp(PORT)), - other => Some(other), - }); - - let mut new_addr = Multiaddr::empty(); - for protocol in protocols { - new_addr.push(protocol); - } - let addr = new_addr; - log::debug!("transformed found peer: {addr}"); - - let (is_fresh_dial, nets) = { - let mut dialing_peers = dialing_peers.write().await; - let is_fresh_dial = !dialing_peers.contains_key(&addr); - if is_fresh_dial { - dialing_peers.insert(addr.clone(), HashSet::new()); - } - // Associate this network with this peer - dialing_peers.get_mut(&addr).unwrap().insert(network); - - let nets = dialing_peers.get(&addr).unwrap().clone(); - (is_fresh_dial, nets) - }; - - // Spawn a task to remove this peer from 'dialing' in sixty seconds, in case dialing - // fails - // This performs cleanup and bounds the size of the map to whatever growth occurs - // within a temporal window - tokio::spawn({ - let dialing_peers = dialing_peers.clone(); - let connected_peers = connected_peers.clone(); - let connect_to_network_send = connect_to_network_send.clone(); - let addr = addr.clone(); - async move { - tokio::time::sleep(core::time::Duration::from_secs(60)).await; - let mut dialing_peers = dialing_peers.write().await; - if let Some(expected_nets) = dialing_peers.remove(&addr) { - log::debug!("removed addr from dialing upon timeout: {addr}"); - - // TODO: De-duplicate this below instance - // If we failed to dial and haven't gotten enough actual connections, retry - let connected_peers = connected_peers.read().await; - for net in expected_nets { - let mut remaining_peers = 0; - for nets in connected_peers.values() { - if nets.contains(&net) { - remaining_peers += 1; - } - } - // If we do not, start connecting to this network again - if remaining_peers < TARGET_PEERS { - connect_to_network_send.send(net).expect( - "couldn't send net to connect to due to disconnects (receiver dropped?)", - ); - } - } - } - } - }); - - if is_fresh_dial { - to_dial_send.send((addr, nets)).unwrap(); - } - } - }; - - // TODO: We should also connect to random peers from random nets as needed for - // cosigning - - // Drain the chainnel, de-duplicating any networks in it - let mut connect_to_network_networks = HashSet::new(); - while let Ok(network) = connect_to_network_recv.try_recv() { - connect_to_network_networks.insert(network); - } - for network in connect_to_network_networks { - if let Ok(mut nodes) = serai.p2p_validators(network).await { - // If there's an insufficient amount of nodes known, connect to all yet add it - // back and break - if nodes.len() < TARGET_PEERS { - log::warn!( - "insufficient amount of P2P nodes known for {:?}: {}", - network, - nodes.len() - ); - // Retry this later - connect_to_network_send.send(network).unwrap(); - for node in nodes { - connect(network, node).await; - } - continue; - } - - // Randomly select up to 150% of the TARGET_PEERS - for _ in 0 .. ((3 * TARGET_PEERS) / 2) { - if !nodes.is_empty() { - let to_connect = nodes.swap_remove( - usize::try_from(OsRng.next_u64() % u64::try_from(nodes.len()).unwrap()) - .unwrap(), - ); - connect(network, to_connect).await; - } - } - } - } - // Sleep 60 seconds before moving to the next iteration - tokio::time::sleep(core::time::Duration::from_secs(60)).await; - } - } - }); - - // Manage the actual swarm - tokio::spawn({ - let mut time_of_last_p2p_message = Instant::now(); - - async move { - let connected_peers = connected_peers.clone(); - - let mut set_for_genesis = HashMap::new(); - loop { - let time_since_last = Instant::now().duration_since(time_of_last_p2p_message); - tokio::select! { - biased; - - // Subscribe to any new topics - set = subscribe_recv.recv() => { - let (subscribe, set, genesis): (_, ValidatorSet, [u8; 32]) = - set.expect("subscribe_recv closed. are we shutting down?"); - let topic = topic_for_set(set); - if subscribe { - log::info!("subscribing to p2p messages for {set:?}"); - connect_to_network_send.send(set.network).unwrap(); - set_for_genesis.insert(genesis, set); - swarm.behaviour_mut().gossipsub.subscribe(&topic).unwrap(); - } else { - log::info!("unsubscribing to p2p messages for {set:?}"); - set_for_genesis.remove(&genesis); - swarm.behaviour_mut().gossipsub.unsubscribe(&topic).unwrap(); - } - } - - msg = send_recv.recv() => { - let (peer, msg): (PeerId, Vec) = - msg.expect("send_recv closed. are we shutting down?"); - swarm.behaviour_mut().reqres.send_request(&peer, msg); - }, - - // Handle any queued outbound messages - msg = broadcast_recv.recv() => { - // Update the time of last message - time_of_last_p2p_message = Instant::now(); - - let (kind, msg): (P2pMessageKind, Vec) = - msg.expect("broadcast_recv closed. are we shutting down?"); - - if matches!(kind, P2pMessageKind::ReqRes(_)) { - // Use request/response, yet send to all connected peers - for peer_id in swarm.connected_peers().copied().collect::>() { - swarm.behaviour_mut().reqres.send_request(&peer_id, msg.clone()); - } - } else { - // Use gossipsub - - let set = - kind.genesis().and_then(|genesis| set_for_genesis.get(&genesis).copied()); - let topic = if let Some(set) = set { - topic_for_set(set) - } else { - IdentTopic::new(LIBP2P_TOPIC) - }; - - match swarm.behaviour_mut().gossipsub.publish(topic, msg.clone()) { - Err(PublishError::SigningError(e)) => { - panic!("signing error when broadcasting: {e}") - }, - Err(PublishError::InsufficientPeers) => { - log::warn!("failed to send p2p message due to insufficient peers") - } - Err(PublishError::MessageTooLarge) => { - panic!("tried to send a too large message: {}", hex::encode(msg)) - } - Err(PublishError::TransformFailed(e)) => panic!("IdentityTransform failed: {e}"), - Err(PublishError::Duplicate) | Ok(_) => {} - } - } - } - - // Handle new incoming messages - event = swarm.next() => { - match event { - Some(SwarmEvent::Dialing { connection_id, .. }) => { - log::debug!("dialing to peer in connection ID {}", &connection_id); - } - Some(SwarmEvent::ConnectionEstablished { - peer_id, - connection_id, - endpoint, - .. - }) => { - if &peer_id == swarm.local_peer_id() { - log::warn!("established a libp2p connection to ourselves"); - swarm.close_connection(connection_id); - continue; - } - - let addr = endpoint.get_remote_address(); - let nets = { - let mut dialing_peers = dialing_peers.write().await; - if let Some(nets) = dialing_peers.remove(addr) { - nets - } else { - log::debug!("connected to a peer who we didn't have within dialing"); - HashSet::new() - } - }; - { - let mut connected_peers = connected_peers.write().await; - connected_peers.insert(addr.clone(), nets); - - log::debug!( - "connection established to peer {} in connection ID {}, connected peers: {}", - &peer_id, - &connection_id, - connected_peers.len(), - ); - } - } - Some(SwarmEvent::ConnectionClosed { peer_id, endpoint, .. }) => { - let mut connected_peers = connected_peers.write().await; - let Some(nets) = connected_peers.remove(endpoint.get_remote_address()) else { - log::debug!("closed connection to peer which wasn't in connected_peers"); - continue; - }; - // Downgrade to a read lock - let connected_peers = connected_peers.downgrade(); - - // For each net we lost a peer for, check if we still have sufficient peers - // overall - for net in nets { - let mut remaining_peers = 0; - for nets in connected_peers.values() { - if nets.contains(&net) { - remaining_peers += 1; - } - } - // If we do not, start connecting to this network again - if remaining_peers < TARGET_PEERS { - connect_to_network_send - .send(net) - .expect( - "couldn't send net to connect to due to disconnects (receiver dropped?)" - ); - } - } - - log::debug!( - "connection with peer {peer_id} closed, connected peers: {}", - connected_peers.len(), - ); - } - Some(SwarmEvent::Behaviour(BehaviorEvent::Reqres( - RrEvent::Message { peer, message }, - ))) => { - let message = match message { - RrMessage::Request { request, .. } => request, - RrMessage::Response { response, .. } => response, - }; - - let mut msg_ref = message.as_slice(); - let Some(kind) = ReqResMessageKind::read(&mut msg_ref) else { continue }; - let message = Message { - sender: peer, - kind: P2pMessageKind::ReqRes(kind), - msg: msg_ref.to_vec(), - }; - receive_send.send(message).expect("receive_send closed. are we shutting down?"); - } - Some(SwarmEvent::Behaviour(BehaviorEvent::Gossipsub( - GsEvent::Message { propagation_source, message, .. }, - ))) => { - let mut msg_ref = message.data.as_slice(); - let Some(kind) = GossipMessageKind::read(&mut msg_ref) else { continue }; - let message = Message { - sender: propagation_source, - kind: P2pMessageKind::Gossip(kind), - msg: msg_ref.to_vec(), - }; - receive_send.send(message).expect("receive_send closed. are we shutting down?"); - } - _ => {} - } - } - - // Handle peers to dial - addr_and_nets = to_dial_recv.recv() => { - let (addr, nets) = - addr_and_nets.expect("received address was None (sender dropped?)"); - // If we've already dialed and connected to this address, don't further dial them - // Just associate these networks with them - if let Some(existing_nets) = connected_peers.write().await.get_mut(&addr) { - for net in nets { - existing_nets.insert(net); - } - continue; - } - - if let Err(e) = swarm.dial(addr) { - log::warn!("dialing peer failed: {e:?}"); - } - } - - // If it's been >80s since we've published a message, publish a KeepAlive since we're - // still an active service - // This is useful when we have no active tributaries and accordingly aren't sending - // heartbeats - // If we are sending heartbeats, we should've sent one after 60s of no finalized blocks - // (where a finalized block only occurs due to network activity), meaning this won't be - // run - () = tokio::time::sleep(Duration::from_secs(80).saturating_sub(time_since_last)) => { - time_of_last_p2p_message = Instant::now(); - for peer_id in swarm.connected_peers().copied().collect::>() { - swarm - .behaviour_mut() - .reqres - .send_request(&peer_id, ReqResMessageKind::KeepAlive.serialize()); - } - } - } - } - } - }); - - LibP2p { - subscribe: Arc::new(Mutex::new(subscribe_send)), - send: Arc::new(Mutex::new(send_send)), - broadcast: Arc::new(Mutex::new(broadcast_send)), - receive: Arc::new(Mutex::new(receive_recv)), - } - } -} - -#[async_trait] -impl P2p for LibP2p { - type Id = PeerId; - - async fn subscribe(&self, set: ValidatorSet, genesis: [u8; 32]) { - self - .subscribe - .lock() - .await - .send((true, set, genesis)) - .expect("subscribe_send closed. are we shutting down?"); - } - - async fn unsubscribe(&self, set: ValidatorSet, genesis: [u8; 32]) { - self - .subscribe - .lock() - .await - .send((false, set, genesis)) - .expect("subscribe_send closed. are we shutting down?"); - } - - async fn send_raw(&self, peer: Self::Id, msg: Vec) { - self.send.lock().await.send((peer, msg)).expect("send_send closed. are we shutting down?"); - } - - async fn broadcast_raw(&self, kind: P2pMessageKind, msg: Vec) { - self - .broadcast - .lock() - .await - .send((kind, msg)) - .expect("broadcast_send closed. are we shutting down?"); - } - - // TODO: We only have a single handle call this. Differentiate Send/Recv to remove this constant - // lock acquisition? - async fn receive(&self) -> Message { - self.receive.lock().await.recv().await.expect("receive_recv closed. are we shutting down?") - } -} - -#[async_trait] -impl TributaryP2p for LibP2p { - async fn broadcast(&self, genesis: [u8; 32], msg: Vec) { - ::broadcast(self, GossipMessageKind::Tributary(genesis), msg).await - } -} - -pub async fn heartbeat_tributaries_task( - p2p: P, - mut tributary_event: broadcast::Receiver>, -) { - let ten_blocks_of_time = - Duration::from_secs((10 * Tributary::::block_time()).into()); - - let mut readers = HashMap::new(); - loop { - loop { - match tributary_event.try_recv() { - Ok(TributaryEvent::NewTributary(ActiveTributary { spec, tributary })) => { - readers.insert(spec.set(), tributary.reader()); - } - Ok(TributaryEvent::TributaryRetired(set)) => { - readers.remove(&set); - } - Err(broadcast::error::TryRecvError::Empty) => break, - Err(broadcast::error::TryRecvError::Lagged(_)) => { - panic!("heartbeat_tributaries lagged to handle tributary_event") - } - Err(broadcast::error::TryRecvError::Closed) => panic!("tributary_event sender closed"), - } - } - - for tributary in readers.values() { - let tip = tributary.tip(); - let block_time = - SystemTime::UNIX_EPOCH + Duration::from_secs(tributary.time_of_block(&tip).unwrap_or(0)); - - // Only trigger syncing if the block is more than a minute behind - if SystemTime::now() > (block_time + Duration::from_secs(60)) { - log::warn!("last known tributary block was over a minute ago"); - let mut msg = tip.to_vec(); - let time: u64 = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .expect("system clock is wrong") - .as_secs(); - msg.extend(time.to_le_bytes()); - P2p::broadcast(&p2p, ReqResMessageKind::Heartbeat(tributary.genesis()), msg).await; - } - } - - // Only check once every 10 blocks of time - sleep(ten_blocks_of_time).await; - } -} - -pub async fn handle_p2p_task( - p2p: P, - cosign_channel: mpsc::UnboundedSender, - mut tributary_event: broadcast::Receiver>, -) { - let channels = Arc::new(RwLock::new(HashMap::<_, mpsc::UnboundedSender>>::new())); - tokio::spawn({ - let p2p = p2p.clone(); - let channels = channels.clone(); - let mut set_to_genesis = HashMap::new(); - async move { - loop { - match tributary_event.recv().await.unwrap() { - TributaryEvent::NewTributary(tributary) => { - let genesis = tributary.spec.genesis(); - set_to_genesis.insert(tributary.spec.set(), genesis); - - let (send, mut recv) = mpsc::unbounded_channel(); - channels.write().await.insert(genesis, send); - - // Subscribe to the topic for this tributary - p2p.subscribe(tributary.spec.set(), genesis).await; - - let spec_set = tributary.spec.set(); - - // Per-Tributary P2P message handler - tokio::spawn({ - let p2p = p2p.clone(); - async move { - loop { - let Some(msg) = recv.recv().await else { - // Channel closure happens when the tributary retires - break; - }; - match msg.kind { - P2pMessageKind::ReqRes(ReqResMessageKind::KeepAlive) => {} - - // TODO: Slash on Heartbeat which justifies a response, since the node - // obviously was offline and we must now use our bandwidth to compensate for - // them? - P2pMessageKind::ReqRes(ReqResMessageKind::Heartbeat(msg_genesis)) => { - assert_eq!(msg_genesis, genesis); - if msg.msg.len() != 40 { - log::error!("validator sent invalid heartbeat"); - continue; - } - // Only respond to recent heartbeats - let msg_time = u64::from_le_bytes(msg.msg[32 .. 40].try_into().expect( - "length-checked heartbeat message didn't have 8 bytes for the u64", - )); - if SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .expect("system clock is wrong") - .as_secs() - .saturating_sub(msg_time) > - 10 - { - continue; - } - - log::debug!("received heartbeat with a recent timestamp"); - - let reader = tributary.tributary.reader(); - - let p2p = p2p.clone(); - // Spawn a dedicated task as this may require loading large amounts of data - // from disk and take a notable amount of time - tokio::spawn(async move { - let mut latest = msg.msg[.. 32].try_into().unwrap(); - let mut to_send = vec![]; - while let Some(next) = reader.block_after(&latest) { - to_send.push(next); - latest = next; - } - if to_send.len() > 3 { - // prepare the batch to sends - let mut blocks = vec![]; - for (i, next) in to_send.iter().enumerate() { - if i >= BLOCKS_PER_BATCH { - break; - } - - blocks.push(BlockCommit { - block: reader.block(next).unwrap().serialize(), - commit: reader.commit(next).unwrap(), - }); - } - let batch = HeartbeatBatch { blocks, timestamp: msg_time }; - - p2p - .send(msg.sender, ReqResMessageKind::Block(genesis), batch.encode()) - .await; - } - }); - } - - P2pMessageKind::ReqRes(ReqResMessageKind::Block(msg_genesis)) => { - assert_eq!(msg_genesis, genesis); - // decode the batch - let Ok(batch) = HeartbeatBatch::decode(&mut msg.msg.as_ref()) else { - log::error!( - "received HeartBeatBatch message with an invalidly serialized batch" - ); - continue; - }; - - // sync blocks - for bc in batch.blocks { - // TODO: why do we use ReadWrite instead of Encode/Decode for blocks? - // Should we use the same for batches so we can read both at the same time? - let Ok(block) = Block::::read(&mut bc.block.as_slice()) else { - log::error!("received block message with an invalidly serialized block"); - continue; - }; - - let res = tributary.tributary.sync_block(block, bc.commit).await; - log::debug!( - "received block from {:?}, sync_block returned {}", - msg.sender, - res - ); - } - } - - P2pMessageKind::Gossip(GossipMessageKind::Tributary(msg_genesis)) => { - assert_eq!(msg_genesis, genesis); - log::trace!("handling message for tributary {:?}", spec_set); - if tributary.tributary.handle_message(&msg.msg).await { - P2p::broadcast(&p2p, msg.kind, msg.msg).await; - } - } - - P2pMessageKind::Gossip(GossipMessageKind::CosignedBlock) => unreachable!(), - } - } - } - }); - } - TributaryEvent::TributaryRetired(set) => { - if let Some(genesis) = set_to_genesis.remove(&set) { - p2p.unsubscribe(set, genesis).await; - channels.write().await.remove(&genesis); - } - } - } - } - } - }); - - loop { - let msg = p2p.receive().await; - match msg.kind { - P2pMessageKind::ReqRes(ReqResMessageKind::KeepAlive) => {} - P2pMessageKind::Gossip(GossipMessageKind::Tributary(genesis)) | - P2pMessageKind::ReqRes( - ReqResMessageKind::Heartbeat(genesis) | ReqResMessageKind::Block(genesis), - ) => { - if let Some(channel) = channels.read().await.get(&genesis) { - channel.send(msg).unwrap(); - } - } - P2pMessageKind::Gossip(GossipMessageKind::CosignedBlock) => { - let Ok(msg) = CosignedBlock::deserialize_reader(&mut msg.msg.as_slice()) else { - log::error!("received CosignedBlock message with invalidly serialized contents"); - continue; - }; - cosign_channel.send(msg).unwrap(); - } - } - } -} diff --git a/coordinator/src/processors.rs b/coordinator/src/processors.rs deleted file mode 100644 index 9157e2a6..00000000 --- a/coordinator/src/processors.rs +++ /dev/null @@ -1,46 +0,0 @@ -use std::sync::Arc; - -use serai_client::primitives::NetworkId; -use processor_messages::{ProcessorMessage, CoordinatorMessage}; - -use message_queue::{Service, Metadata, client::MessageQueue}; - -#[derive(Clone, PartialEq, Eq, Debug)] -pub struct Message { - pub id: u64, - pub network: NetworkId, - pub msg: ProcessorMessage, -} - -#[async_trait::async_trait] -pub trait Processors: 'static + Send + Sync + Clone { - async fn send(&self, network: NetworkId, msg: impl Send + Into); - async fn recv(&self, network: NetworkId) -> Message; - async fn ack(&self, msg: Message); -} - -#[async_trait::async_trait] -impl Processors for Arc { - async fn send(&self, network: NetworkId, msg: impl Send + Into) { - let msg: CoordinatorMessage = msg.into(); - let metadata = - Metadata { from: self.service, to: Service::Processor(network), intent: msg.intent() }; - let msg = borsh::to_vec(&msg).unwrap(); - self.queue(metadata, msg).await; - } - async fn recv(&self, network: NetworkId) -> Message { - let msg = self.next(Service::Processor(network)).await; - assert_eq!(msg.from, Service::Processor(network)); - - let id = msg.id; - - // Deserialize it into a ProcessorMessage - let msg: ProcessorMessage = - borsh::from_slice(&msg.msg).expect("message wasn't a borsh-encoded ProcessorMessage"); - - return Message { id, network, msg }; - } - async fn ack(&self, msg: Message) { - MessageQueue::ack(self, Service::Processor(msg.network), msg.id).await - } -} diff --git a/coordinator/src/tests/mod.rs b/coordinator/src/tests/mod.rs deleted file mode 100644 index db4c158f..00000000 --- a/coordinator/src/tests/mod.rs +++ /dev/null @@ -1,125 +0,0 @@ -use core::fmt::Debug; -use std::{ - sync::Arc, - collections::{VecDeque, HashSet, HashMap}, -}; - -use serai_client::{primitives::NetworkId, validator_sets::primitives::ValidatorSet}; - -use processor_messages::CoordinatorMessage; - -use async_trait::async_trait; - -use tokio::sync::RwLock; - -use crate::{ - processors::{Message, Processors}, - TributaryP2p, ReqResMessageKind, GossipMessageKind, P2pMessageKind, Message as P2pMessage, P2p, -}; - -pub mod tributary; - -#[derive(Clone)] -pub struct MemProcessors(pub Arc>>>); -impl MemProcessors { - #[allow(clippy::new_without_default)] - pub fn new() -> MemProcessors { - MemProcessors(Arc::new(RwLock::new(HashMap::new()))) - } -} - -#[async_trait::async_trait] -impl Processors for MemProcessors { - async fn send(&self, network: NetworkId, msg: impl Send + Into) { - let mut processors = self.0.write().await; - let processor = processors.entry(network).or_insert_with(VecDeque::new); - processor.push_back(msg.into()); - } - async fn recv(&self, _: NetworkId) -> Message { - todo!() - } - async fn ack(&self, _: Message) { - todo!() - } -} - -#[allow(clippy::type_complexity)] -#[derive(Clone, Debug)] -pub struct LocalP2p( - usize, - pub Arc>, Vec)>>)>>, -); - -impl LocalP2p { - pub fn new(validators: usize) -> Vec { - let shared = Arc::new(RwLock::new((HashSet::new(), vec![VecDeque::new(); validators]))); - let mut res = vec![]; - for i in 0 .. validators { - res.push(LocalP2p(i, shared.clone())); - } - res - } -} - -#[async_trait] -impl P2p for LocalP2p { - type Id = usize; - - async fn subscribe(&self, _set: ValidatorSet, _genesis: [u8; 32]) {} - async fn unsubscribe(&self, _set: ValidatorSet, _genesis: [u8; 32]) {} - - async fn send_raw(&self, to: Self::Id, msg: Vec) { - let mut msg_ref = msg.as_slice(); - let kind = ReqResMessageKind::read(&mut msg_ref).unwrap(); - self.1.write().await.1[to].push_back((self.0, P2pMessageKind::ReqRes(kind), msg_ref.to_vec())); - } - - async fn broadcast_raw(&self, kind: P2pMessageKind, msg: Vec) { - // Content-based deduplication - let mut lock = self.1.write().await; - { - let already_sent = &mut lock.0; - if already_sent.contains(&msg) { - return; - } - already_sent.insert(msg.clone()); - } - let queues = &mut lock.1; - - let kind_len = (match kind { - P2pMessageKind::ReqRes(kind) => kind.serialize(), - P2pMessageKind::Gossip(kind) => kind.serialize(), - }) - .len(); - let msg = msg[kind_len ..].to_vec(); - - for (i, msg_queue) in queues.iter_mut().enumerate() { - if i == self.0 { - continue; - } - msg_queue.push_back((self.0, kind, msg.clone())); - } - } - - async fn receive(&self) -> P2pMessage { - // This is a cursed way to implement an async read from a Vec - loop { - if let Some((sender, kind, msg)) = self.1.write().await.1[self.0].pop_front() { - return P2pMessage { sender, kind, msg }; - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } - } -} - -#[async_trait] -impl TributaryP2p for LocalP2p { - async fn broadcast(&self, genesis: [u8; 32], msg: Vec) { - ::broadcast( - self, - P2pMessageKind::Gossip(GossipMessageKind::Tributary(genesis)), - msg, - ) - .await - } -} diff --git a/coordinator/src/tests/tributary/chain.rs b/coordinator/src/tests/tributary/chain.rs deleted file mode 100644 index 746c611b..00000000 --- a/coordinator/src/tests/tributary/chain.rs +++ /dev/null @@ -1,243 +0,0 @@ -use std::{ - time::{Duration, SystemTime}, - collections::HashSet, -}; - -use zeroize::Zeroizing; -use rand_core::{RngCore, CryptoRng, OsRng}; -use futures_util::{task::Poll, poll}; - -use ciphersuite::{group::ff::Field, Ciphersuite, Ristretto}; - -use borsh::BorshDeserialize; -use serai_client::{ - primitives::NetworkId, - validator_sets::primitives::{Session, ValidatorSet}, -}; - -use tokio::time::sleep; - -use serai_db::MemDb; - -use tributary::Tributary; - -use crate::{ - GossipMessageKind, P2pMessageKind, P2p, - tributary::{Transaction, TributarySpec}, - tests::LocalP2p, -}; - -pub fn new_keys( - rng: &mut R, -) -> Vec::F>> { - let mut keys = vec![]; - for _ in 0 .. 5 { - keys.push(Zeroizing::new(::F::random(&mut *rng))); - } - keys -} - -pub fn new_spec( - rng: &mut R, - keys: &[Zeroizing<::F>], -) -> TributarySpec { - let mut serai_block = [0; 32]; - rng.fill_bytes(&mut serai_block); - - let start_time = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs(); - - let set = ValidatorSet { session: Session(0), network: NetworkId::Bitcoin }; - - let validators = keys - .iter() - .map(|key| ((::generator() * **key), 1)) - .collect::>(); - - // Generate random eVRF keys as none of these test rely on them to have any structure - let mut evrf_keys = vec![]; - for _ in 0 .. keys.len() { - let mut substrate = [0; 32]; - OsRng.fill_bytes(&mut substrate); - let mut network = vec![0; 64]; - OsRng.fill_bytes(&mut network); - evrf_keys.push((substrate, network)); - } - - let res = TributarySpec::new(serai_block, start_time, set, validators, evrf_keys); - assert_eq!( - TributarySpec::deserialize_reader(&mut borsh::to_vec(&res).unwrap().as_slice()).unwrap(), - res, - ); - res -} - -pub async fn new_tributaries( - keys: &[Zeroizing<::F>], - spec: &TributarySpec, -) -> Vec<(MemDb, LocalP2p, Tributary)> { - let p2p = LocalP2p::new(keys.len()); - let mut res = vec![]; - for (i, key) in keys.iter().enumerate() { - let db = MemDb::new(); - res.push(( - db.clone(), - p2p[i].clone(), - Tributary::<_, Transaction, _>::new( - db, - spec.genesis(), - spec.start_time(), - key.clone(), - spec.validators(), - p2p[i].clone(), - ) - .await - .unwrap(), - )); - } - res -} - -pub async fn run_tributaries( - mut tributaries: Vec<(LocalP2p, Tributary)>, -) { - loop { - for (p2p, tributary) in &mut tributaries { - while let Poll::Ready(msg) = poll!(p2p.receive()) { - match msg.kind { - P2pMessageKind::Gossip(GossipMessageKind::Tributary(genesis)) => { - assert_eq!(genesis, tributary.genesis()); - if tributary.handle_message(&msg.msg).await { - p2p.broadcast(msg.kind, msg.msg).await; - } - } - _ => panic!("unexpected p2p message found"), - } - } - } - - sleep(Duration::from_millis(100)).await; - } -} - -pub async fn wait_for_tx_inclusion( - tributary: &Tributary, - mut last_checked: [u8; 32], - hash: [u8; 32], -) -> [u8; 32] { - let reader = tributary.reader(); - loop { - let tip = tributary.tip().await; - if tip == last_checked { - sleep(Duration::from_secs(1)).await; - continue; - } - - let mut queue = vec![reader.block(&tip).unwrap()]; - let mut block = None; - while { - let parent = queue.last().unwrap().parent(); - if parent == tributary.genesis() { - false - } else { - block = Some(reader.block(&parent).unwrap()); - block.as_ref().unwrap().hash() != last_checked - } - } { - queue.push(block.take().unwrap()); - } - - while let Some(block) = queue.pop() { - for tx in &block.transactions { - if tx.hash() == hash { - return block.hash(); - } - } - } - - last_checked = tip; - } -} - -#[tokio::test] -async fn tributary_test() { - let keys = new_keys(&mut OsRng); - let spec = new_spec(&mut OsRng, &keys); - - let mut tributaries = new_tributaries(&keys, &spec) - .await - .into_iter() - .map(|(_, p2p, tributary)| (p2p, tributary)) - .collect::>(); - - let mut blocks = 0; - let mut last_block = spec.genesis(); - - // Doesn't use run_tributaries as we want to wind these down at a certain point - // run_tributaries will run them ad infinitum - let timeout = SystemTime::now() + Duration::from_secs(65); - while (blocks < 10) && (SystemTime::now().duration_since(timeout).is_err()) { - for (p2p, tributary) in &mut tributaries { - while let Poll::Ready(msg) = poll!(p2p.receive()) { - match msg.kind { - P2pMessageKind::Gossip(GossipMessageKind::Tributary(genesis)) => { - assert_eq!(genesis, tributary.genesis()); - tributary.handle_message(&msg.msg).await; - } - _ => panic!("unexpected p2p message found"), - } - } - } - - let tip = tributaries[0].1.tip().await; - if tip != last_block { - last_block = tip; - blocks += 1; - } - - sleep(Duration::from_millis(100)).await; - } - - if blocks != 10 { - panic!("tributary chain test hit timeout"); - } - - // Handle all existing messages - for (p2p, tributary) in &mut tributaries { - while let Poll::Ready(msg) = poll!(p2p.receive()) { - match msg.kind { - P2pMessageKind::Gossip(GossipMessageKind::Tributary(genesis)) => { - assert_eq!(genesis, tributary.genesis()); - tributary.handle_message(&msg.msg).await; - } - _ => panic!("unexpected p2p message found"), - } - } - } - - // handle_message informed the Tendermint machine, yet it still has to process it - // Sleep for a second accordingly - // TODO: Is there a better way to handle this? - sleep(Duration::from_secs(1)).await; - - // All tributaries should agree on the tip, within a block - let mut tips = HashSet::new(); - for (_, tributary) in &tributaries { - tips.insert(tributary.tip().await); - } - assert!(tips.len() <= 2); - if tips.len() == 2 { - for tip in &tips { - // Find a Tributary where this isn't the tip - for (_, tributary) in &tributaries { - let Some(after) = tributary.reader().block_after(tip) else { continue }; - // Make sure the block after is the other tip - assert!(tips.contains(&after)); - return; - } - } - } else { - assert_eq!(tips.len(), 1); - return; - } - panic!("tributary had different tip with a variance exceeding one block"); -} diff --git a/coordinator/src/tests/tributary/dkg.rs b/coordinator/src/tests/tributary/dkg.rs deleted file mode 100644 index aafa9a33..00000000 --- a/coordinator/src/tests/tributary/dkg.rs +++ /dev/null @@ -1,282 +0,0 @@ -use core::time::Duration; - -use zeroize::Zeroizing; -use rand_core::{RngCore, OsRng}; - -use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto}; -use frost::Participant; - -use sp_runtime::traits::Verify; -use serai_client::{ - primitives::Signature, - validator_sets::primitives::{ValidatorSet, KeyPair}, -}; - -use tokio::time::sleep; - -use serai_db::{Get, DbTxn, Db, MemDb}; - -use processor_messages::{key_gen, CoordinatorMessage}; - -use tributary::{TransactionTrait, Tributary}; - -use crate::{ - tributary::{ - Transaction, TributarySpec, - scanner::{PublishSeraiTransaction, handle_new_blocks}, - }, - tests::{ - MemProcessors, LocalP2p, - tributary::{new_keys, new_spec, new_tributaries, run_tributaries, wait_for_tx_inclusion}, - }, -}; - -#[tokio::test] -async fn dkg_test() { - env_logger::init(); - - let keys = new_keys(&mut OsRng); - let spec = new_spec(&mut OsRng, &keys); - - let full_tributaries = new_tributaries(&keys, &spec).await; - let mut dbs = vec![]; - let mut tributaries = vec![]; - for (db, p2p, tributary) in full_tributaries { - dbs.push(db); - tributaries.push((p2p, tributary)); - } - - // Run the tributaries in the background - tokio::spawn(run_tributaries(tributaries.clone())); - - let mut txs = vec![]; - // Create DKG participation for each key - for key in &keys { - let mut participation = vec![0; 4096]; - OsRng.fill_bytes(&mut participation); - - let mut tx = - Transaction::DkgParticipation { participation, signed: Transaction::empty_signed() }; - tx.sign(&mut OsRng, spec.genesis(), key); - txs.push(tx); - } - - let block_before_tx = tributaries[0].1.tip().await; - - // Publish t-1 participations - let t = ((keys.len() * 2) / 3) + 1; - for (i, tx) in txs.iter().take(t - 1).enumerate() { - assert_eq!(tributaries[i].1.add_transaction(tx.clone()).await, Ok(true)); - wait_for_tx_inclusion(&tributaries[0].1, block_before_tx, tx.hash()).await; - } - - let expected_participations = txs - .iter() - .enumerate() - .map(|(i, tx)| { - if let Transaction::DkgParticipation { participation, .. } = tx { - CoordinatorMessage::KeyGen(key_gen::CoordinatorMessage::Participation { - session: spec.set().session, - participant: Participant::new((i + 1).try_into().unwrap()).unwrap(), - participation: participation.clone(), - }) - } else { - panic!("txs wasn't a DkgParticipation"); - } - }) - .collect::>(); - - async fn new_processors( - db: &mut MemDb, - key: &Zeroizing<::F>, - spec: &TributarySpec, - tributary: &Tributary, - ) -> MemProcessors { - let processors = MemProcessors::new(); - handle_new_blocks::<_, _, _, _, _, LocalP2p>( - db, - key, - &|_, _, _, _| async { - panic!("provided TX caused recognized_id to be called in new_processors") - }, - &processors, - &(), - &|_| async { - panic!( - "test tried to publish a new Tributary TX from handle_application_tx in new_processors" - ) - }, - spec, - &tributary.reader(), - ) - .await; - processors - } - - // Instantiate a scanner and verify it has the first two participations to report (and isn't - // waiting for `t`) - let processors = new_processors(&mut dbs[0], &keys[0], &spec, &tributaries[0].1).await; - assert_eq!(processors.0.read().await.get(&spec.set().network).unwrap().len(), t - 1); - - // Publish the rest of the participations - let block_before_tx = tributaries[0].1.tip().await; - for tx in txs.iter().skip(t - 1) { - assert_eq!(tributaries[0].1.add_transaction(tx.clone()).await, Ok(true)); - wait_for_tx_inclusion(&tributaries[0].1, block_before_tx, tx.hash()).await; - } - - // Verify the scanner emits all KeyGen::Participations messages - handle_new_blocks::<_, _, _, _, _, LocalP2p>( - &mut dbs[0], - &keys[0], - &|_, _, _, _| async { - panic!("provided TX caused recognized_id to be called after DkgParticipation") - }, - &processors, - &(), - &|_| async { - panic!( - "test tried to publish a new Tributary TX from handle_application_tx after DkgParticipation" - ) - }, - &spec, - &tributaries[0].1.reader(), - ) - .await; - { - let mut msgs = processors.0.write().await; - let msgs = msgs.get_mut(&spec.set().network).unwrap(); - assert_eq!(msgs.len(), keys.len()); - for expected in &expected_participations { - assert_eq!(&msgs.pop_front().unwrap(), expected); - } - assert!(msgs.is_empty()); - } - - // Verify all keys exhibit this scanner behavior - for (i, key) in keys.iter().enumerate().skip(1) { - let processors = new_processors(&mut dbs[i], key, &spec, &tributaries[i].1).await; - let mut msgs = processors.0.write().await; - let msgs = msgs.get_mut(&spec.set().network).unwrap(); - assert_eq!(msgs.len(), keys.len()); - for expected in &expected_participations { - assert_eq!(&msgs.pop_front().unwrap(), expected); - } - assert!(msgs.is_empty()); - } - - let mut substrate_key = [0; 32]; - OsRng.fill_bytes(&mut substrate_key); - let mut network_key = vec![0; usize::try_from((OsRng.next_u64() % 32) + 32).unwrap()]; - OsRng.fill_bytes(&mut network_key); - let key_pair = KeyPair(serai_client::Public(substrate_key), network_key.try_into().unwrap()); - - let mut txs = vec![]; - for (i, key) in keys.iter().enumerate() { - let mut txn = dbs[i].txn(); - - // Claim we've generated the key pair - crate::tributary::generated_key_pair::(&mut txn, spec.genesis(), &key_pair); - - // Publish the nonces - let attempt = 0; - let mut tx = Transaction::DkgConfirmationNonces { - attempt, - confirmation_nonces: crate::tributary::dkg_confirmation_nonces(key, &spec, &mut txn, 0), - signed: Transaction::empty_signed(), - }; - txn.commit(); - tx.sign(&mut OsRng, spec.genesis(), key); - txs.push(tx); - } - let block_before_tx = tributaries[0].1.tip().await; - for (i, tx) in txs.iter().enumerate() { - assert_eq!(tributaries[i].1.add_transaction(tx.clone()).await, Ok(true)); - } - for tx in &txs { - wait_for_tx_inclusion(&tributaries[0].1, block_before_tx, tx.hash()).await; - } - - // This should not cause any new processor event as the processor doesn't handle DKG confirming - for (i, key) in keys.iter().enumerate() { - handle_new_blocks::<_, _, _, _, _, LocalP2p>( - &mut dbs[i], - key, - &|_, _, _, _| async { - panic!("provided TX caused recognized_id to be called after DkgConfirmationNonces") - }, - &processors, - &(), - // The Tributary handler should publish ConfirmationShare itself after ConfirmationNonces - &|tx| async { assert_eq!(tributaries[i].1.add_transaction(tx).await, Ok(true)) }, - &spec, - &tributaries[i].1.reader(), - ) - .await; - { - assert!(processors.0.read().await.get(&spec.set().network).unwrap().is_empty()); - } - } - - // Yet once these TXs are on-chain, the tributary should itself publish the confirmation shares - // This means in the block after the next block, the keys should be set onto Serai - // Sleep twice as long as two blocks, in case there's some stability issue - sleep(Duration::from_secs( - 2 * 2 * u64::from(Tributary::::block_time()), - )) - .await; - - struct CheckPublishSetKeys { - spec: TributarySpec, - key_pair: KeyPair, - } - #[async_trait::async_trait] - impl PublishSeraiTransaction for CheckPublishSetKeys { - async fn publish_set_keys( - &self, - _db: &(impl Sync + Get), - set: ValidatorSet, - key_pair: KeyPair, - signature_participants: bitvec::vec::BitVec, - signature: Signature, - ) { - assert_eq!(set, self.spec.set()); - assert_eq!(self.key_pair, key_pair); - assert!(signature.verify( - &*serai_client::validator_sets::primitives::set_keys_message(&set, &key_pair), - &serai_client::Public( - frost::dkg::musig::musig_key::( - &serai_client::validator_sets::primitives::musig_context(set), - &self - .spec - .validators() - .into_iter() - .zip(signature_participants) - .filter_map(|((validator, _), included)| included.then_some(validator)) - .collect::>() - ) - .unwrap() - .to_bytes() - ), - )); - } - } - - // The scanner should successfully try to publish a transaction with a validly signed signature - handle_new_blocks::<_, _, _, _, _, LocalP2p>( - &mut dbs[0], - &keys[0], - &|_, _, _, _| async { - panic!("provided TX caused recognized_id to be called after DKG confirmation") - }, - &processors, - &CheckPublishSetKeys { spec: spec.clone(), key_pair: key_pair.clone() }, - &|_| async { panic!("test tried to publish a new Tributary TX from handle_application_tx") }, - &spec, - &tributaries[0].1.reader(), - ) - .await; - { - assert!(processors.0.read().await.get(&spec.set().network).unwrap().is_empty()); - } -} diff --git a/coordinator/src/tests/tributary/handle_p2p.rs b/coordinator/src/tests/tributary/handle_p2p.rs deleted file mode 100644 index 756f4561..00000000 --- a/coordinator/src/tests/tributary/handle_p2p.rs +++ /dev/null @@ -1,74 +0,0 @@ -use core::time::Duration; -use std::sync::Arc; - -use rand_core::OsRng; - -use tokio::{ - sync::{mpsc, broadcast}, - time::sleep, -}; - -use serai_db::MemDb; - -use tributary::Tributary; - -use crate::{ - tributary::Transaction, - ActiveTributary, TributaryEvent, - p2p::handle_p2p_task, - tests::{ - LocalP2p, - tributary::{new_keys, new_spec, new_tributaries}, - }, -}; - -#[tokio::test] -async fn handle_p2p_test() { - let keys = new_keys(&mut OsRng); - let spec = new_spec(&mut OsRng, &keys); - - let mut tributaries = new_tributaries(&keys, &spec) - .await - .into_iter() - .map(|(_, p2p, tributary)| (p2p, tributary)) - .collect::>(); - - let mut tributary_senders = vec![]; - let mut tributary_arcs = vec![]; - for (p2p, tributary) in tributaries.drain(..) { - let tributary = Arc::new(tributary); - tributary_arcs.push(tributary.clone()); - let (new_tributary_send, new_tributary_recv) = broadcast::channel(5); - let (cosign_send, _) = mpsc::unbounded_channel(); - tokio::spawn(handle_p2p_task(p2p, cosign_send, new_tributary_recv)); - new_tributary_send - .send(TributaryEvent::NewTributary(ActiveTributary { spec: spec.clone(), tributary })) - .map_err(|_| "failed to send ActiveTributary") - .unwrap(); - tributary_senders.push(new_tributary_send); - } - let tributaries = tributary_arcs; - - // After two blocks of time, we should have a new block - // We don't wait one block of time as we may have missed the chance for this block - sleep(Duration::from_secs((2 * Tributary::::block_time()).into())) - .await; - let tip = tributaries[0].tip().await; - assert!(tip != spec.genesis()); - - // Sleep one second to make sure this block propagates - sleep(Duration::from_secs(1)).await; - // Make sure every tributary has it - for tributary in &tributaries { - assert!(tributary.reader().block(&tip).is_some()); - } - - // Then after another block of time, we should have yet another new block - sleep(Duration::from_secs(Tributary::::block_time().into())).await; - let new_tip = tributaries[0].tip().await; - assert!(new_tip != tip); - sleep(Duration::from_secs(1)).await; - for tributary in tributaries { - assert!(tributary.reader().block(&new_tip).is_some()); - } -} diff --git a/coordinator/src/tests/tributary/mod.rs b/coordinator/src/tests/tributary/mod.rs deleted file mode 100644 index 340809e1..00000000 --- a/coordinator/src/tests/tributary/mod.rs +++ /dev/null @@ -1,245 +0,0 @@ -use core::fmt::Debug; - -use rand_core::{RngCore, OsRng}; - -use ciphersuite::{group::Group, Ciphersuite, Ristretto}; - -use scale::{Encode, Decode}; -use serai_client::{ - primitives::Signature, - validator_sets::primitives::{MAX_KEY_SHARES_PER_SET, ValidatorSet, KeyPair}, -}; -use processor_messages::coordinator::SubstrateSignableId; - -use tributary::{ReadWrite, tests::random_signed_with_nonce}; - -use crate::tributary::{Label, SignData, Transaction, scanner::PublishSeraiTransaction}; - -mod chain; -pub use chain::*; - -mod tx; - -mod dkg; -// TODO: Test the other transactions - -mod handle_p2p; -mod sync; - -#[async_trait::async_trait] -impl PublishSeraiTransaction for () { - async fn publish_set_keys( - &self, - _db: &(impl Sync + serai_db::Get), - _set: ValidatorSet, - _key_pair: KeyPair, - _signature_participants: bitvec::vec::BitVec, - _signature: Signature, - ) { - panic!("publish_set_keys was called in test") - } -} - -fn random_u32(rng: &mut R) -> u32 { - u32::try_from(rng.next_u64() >> 32).unwrap() -} - -fn random_vec(rng: &mut R, limit: usize) -> Vec { - let len = usize::try_from(rng.next_u64() % u64::try_from(limit).unwrap()).unwrap(); - let mut res = vec![0; len]; - rng.fill_bytes(&mut res); - res -} - -fn random_sign_data( - rng: &mut R, - plan: Id, - label: Label, -) -> SignData { - SignData { - plan, - attempt: random_u32(&mut OsRng), - label, - - data: { - let mut res = vec![]; - for _ in 0 ..= (rng.next_u64() % 255) { - res.push(random_vec(&mut OsRng, 512)); - } - res - }, - - signed: random_signed_with_nonce(&mut OsRng, label.nonce()), - } -} - -fn test_read_write(value: &RW) { - assert_eq!(value, &RW::read::<&[u8]>(&mut value.serialize().as_ref()).unwrap()); -} - -#[test] -fn tx_size_limit() { - use serai_client::validator_sets::primitives::MAX_KEY_LEN; - - use tributary::TRANSACTION_SIZE_LIMIT; - - let max_dkg_coefficients = (MAX_KEY_SHARES_PER_SET * 2).div_ceil(3) + 1; - // n coefficients - // 2 ECDH values per recipient, and the encrypted share - let elements_outside_of_proof = max_dkg_coefficients + ((2 + 1) * MAX_KEY_SHARES_PER_SET); - // Then Pedersen Vector Commitments for each DH done, and the associated overhead in the proof - // It's handwaved as one commitment per DH, where we do 2 per coefficient and 1 for the explicit - // ECDHs - let vector_commitments = (2 * max_dkg_coefficients) + (2 * MAX_KEY_SHARES_PER_SET); - // Then we have commitments to the `t` polynomial of length 2 + 2 nc, where nc is the amount of - // commitments - let t_commitments = 2 + (2 * vector_commitments); - // The remainder of the proof should be ~30 elements - let proof_elements = 30; - - let handwaved_dkg_size = - ((elements_outside_of_proof + vector_commitments + t_commitments + proof_elements) * - MAX_KEY_LEN) + - 1024; - // Further scale by two in case of any errors in the above - assert!(u32::try_from(TRANSACTION_SIZE_LIMIT).unwrap() >= (2 * handwaved_dkg_size)); -} - -#[test] -fn serialize_sign_data() { - fn test_read_write(value: &SignData) { - let mut buf = vec![]; - value.write(&mut buf).unwrap(); - assert_eq!(value, &SignData::read(&mut buf.as_slice()).unwrap()) - } - - let mut plan = [0; 3]; - OsRng.fill_bytes(&mut plan); - test_read_write(&random_sign_data::<_, _>( - &mut OsRng, - plan, - if (OsRng.next_u64() % 2) == 0 { Label::Preprocess } else { Label::Share }, - )); - let mut plan = [0; 5]; - OsRng.fill_bytes(&mut plan); - test_read_write(&random_sign_data::<_, _>( - &mut OsRng, - plan, - if (OsRng.next_u64() % 2) == 0 { Label::Preprocess } else { Label::Share }, - )); - let mut plan = [0; 8]; - OsRng.fill_bytes(&mut plan); - test_read_write(&random_sign_data::<_, _>( - &mut OsRng, - plan, - if (OsRng.next_u64() % 2) == 0 { Label::Preprocess } else { Label::Share }, - )); - let mut plan = [0; 24]; - OsRng.fill_bytes(&mut plan); - test_read_write(&random_sign_data::<_, _>( - &mut OsRng, - plan, - if (OsRng.next_u64() % 2) == 0 { Label::Preprocess } else { Label::Share }, - )); -} - -#[test] -fn serialize_transaction() { - test_read_write(&Transaction::RemoveParticipant { - participant: ::G::random(&mut OsRng), - signed: random_signed_with_nonce(&mut OsRng, 0), - }); - - test_read_write(&Transaction::DkgParticipation { - participation: random_vec(&mut OsRng, 4096), - signed: random_signed_with_nonce(&mut OsRng, 0), - }); - - test_read_write(&Transaction::DkgConfirmationNonces { - attempt: random_u32(&mut OsRng), - confirmation_nonces: { - let mut nonces = [0; 64]; - OsRng.fill_bytes(&mut nonces); - nonces - }, - signed: random_signed_with_nonce(&mut OsRng, 0), - }); - - test_read_write(&Transaction::DkgConfirmationShare { - attempt: random_u32(&mut OsRng), - confirmation_share: { - let mut share = [0; 32]; - OsRng.fill_bytes(&mut share); - share - }, - signed: random_signed_with_nonce(&mut OsRng, 1), - }); - - { - let mut block = [0; 32]; - OsRng.fill_bytes(&mut block); - test_read_write(&Transaction::CosignSubstrateBlock(block)); - } - - { - let mut block = [0; 32]; - OsRng.fill_bytes(&mut block); - let batch = u32::try_from(OsRng.next_u64() >> 32).unwrap(); - test_read_write(&Transaction::Batch { block, batch }); - } - test_read_write(&Transaction::SubstrateBlock(OsRng.next_u64())); - - { - let batch = u32::try_from(OsRng.next_u64() >> 32).unwrap(); - test_read_write(&Transaction::SubstrateSign(random_sign_data( - &mut OsRng, - SubstrateSignableId::Batch(batch), - Label::Preprocess, - ))); - } - { - let batch = u32::try_from(OsRng.next_u64() >> 32).unwrap(); - test_read_write(&Transaction::SubstrateSign(random_sign_data( - &mut OsRng, - SubstrateSignableId::Batch(batch), - Label::Share, - ))); - } - - { - let mut plan = [0; 32]; - OsRng.fill_bytes(&mut plan); - test_read_write(&Transaction::Sign(random_sign_data(&mut OsRng, plan, Label::Preprocess))); - } - { - let mut plan = [0; 32]; - OsRng.fill_bytes(&mut plan); - test_read_write(&Transaction::Sign(random_sign_data(&mut OsRng, plan, Label::Share))); - } - - { - let mut plan = [0; 32]; - OsRng.fill_bytes(&mut plan); - let mut tx_hash = vec![0; (OsRng.next_u64() % 64).try_into().unwrap()]; - OsRng.fill_bytes(&mut tx_hash); - test_read_write(&Transaction::SignCompleted { - plan, - tx_hash, - first_signer: random_signed_with_nonce(&mut OsRng, 2).signer, - signature: random_signed_with_nonce(&mut OsRng, 2).signature, - }); - } - - test_read_write(&Transaction::SlashReport( - { - let amount = - usize::try_from(OsRng.next_u64() % u64::from(MAX_KEY_SHARES_PER_SET - 1)).unwrap(); - let mut points = vec![]; - for _ in 0 .. amount { - points.push((OsRng.next_u64() >> 32).try_into().unwrap()); - } - points - }, - random_signed_with_nonce(&mut OsRng, 0), - )); -} diff --git a/coordinator/src/tests/tributary/sync.rs b/coordinator/src/tests/tributary/sync.rs deleted file mode 100644 index a0b68839..00000000 --- a/coordinator/src/tests/tributary/sync.rs +++ /dev/null @@ -1,165 +0,0 @@ -use core::time::Duration; -use std::{sync::Arc, collections::HashSet}; - -use rand_core::OsRng; - -use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto}; - -use tokio::{ - sync::{mpsc, broadcast}, - time::sleep, -}; - -use serai_db::MemDb; - -use tributary::Tributary; - -use crate::{ - tributary::Transaction, - ActiveTributary, TributaryEvent, - p2p::{heartbeat_tributaries_task, handle_p2p_task}, - tests::{ - LocalP2p, - tributary::{new_keys, new_spec, new_tributaries}, - }, -}; - -#[tokio::test] -async fn sync_test() { - let mut keys = new_keys(&mut OsRng); - let spec = new_spec(&mut OsRng, &keys); - // Ensure this can have a node fail - assert!(spec.n() > spec.t()); - - let mut tributaries = new_tributaries(&keys, &spec) - .await - .into_iter() - .map(|(_, p2p, tributary)| (p2p, tributary)) - .collect::>(); - - // Keep a Tributary back, effectively having it offline - let syncer_key = keys.pop().unwrap(); - let (syncer_p2p, syncer_tributary) = tributaries.pop().unwrap(); - - // Have the rest form a P2P net - let mut tributary_senders = vec![]; - let mut tributary_arcs = vec![]; - let mut p2p_threads = vec![]; - for (p2p, tributary) in tributaries.drain(..) { - let tributary = Arc::new(tributary); - tributary_arcs.push(tributary.clone()); - let (new_tributary_send, new_tributary_recv) = broadcast::channel(5); - let (cosign_send, _) = mpsc::unbounded_channel(); - let thread = tokio::spawn(handle_p2p_task(p2p, cosign_send, new_tributary_recv)); - new_tributary_send - .send(TributaryEvent::NewTributary(ActiveTributary { spec: spec.clone(), tributary })) - .map_err(|_| "failed to send ActiveTributary") - .unwrap(); - tributary_senders.push(new_tributary_send); - p2p_threads.push(thread); - } - let tributaries = tributary_arcs; - - // After four blocks of time, we should have a new block - // We don't wait one block of time as we may have missed the chance for the first block - // We don't wait two blocks because we may have missed the chance, and then had a failure to - // propose by our 'offline' validator, which would cause the Tendermint round time to increase, - // requiring a longer delay - let block_time = u64::from(Tributary::::block_time()); - sleep(Duration::from_secs(4 * block_time)).await; - let tip = tributaries[0].tip().await; - assert!(tip != spec.genesis()); - - // Sleep one second to make sure this block propagates - sleep(Duration::from_secs(1)).await; - // Make sure every tributary has it - for tributary in &tributaries { - assert!(tributary.reader().block(&tip).is_some()); - } - - // Now that we've confirmed the other tributaries formed a net without issue, drop the syncer's - // pending P2P messages - syncer_p2p.1.write().await.1.last_mut().unwrap().clear(); - - // Have it join the net - let syncer_key = Ristretto::generator() * *syncer_key; - let syncer_tributary = Arc::new(syncer_tributary); - let (syncer_tributary_send, syncer_tributary_recv) = broadcast::channel(5); - let (cosign_send, _) = mpsc::unbounded_channel(); - tokio::spawn(handle_p2p_task(syncer_p2p.clone(), cosign_send, syncer_tributary_recv)); - syncer_tributary_send - .send(TributaryEvent::NewTributary(ActiveTributary { - spec: spec.clone(), - tributary: syncer_tributary.clone(), - })) - .map_err(|_| "failed to send ActiveTributary to syncer") - .unwrap(); - - // It shouldn't automatically catch up. If it somehow was, our test would be broken - // Sanity check this - let tip = tributaries[0].tip().await; - // Wait until a new block occurs - sleep(Duration::from_secs(3 * block_time)).await; - // Make sure a new block actually occurred - assert!(tributaries[0].tip().await != tip); - // Make sure the new block alone didn't trigger catching up - assert_eq!(syncer_tributary.tip().await, spec.genesis()); - - // Start the heartbeat protocol - let (syncer_heartbeat_tributary_send, syncer_heartbeat_tributary_recv) = broadcast::channel(5); - tokio::spawn(heartbeat_tributaries_task(syncer_p2p, syncer_heartbeat_tributary_recv)); - syncer_heartbeat_tributary_send - .send(TributaryEvent::NewTributary(ActiveTributary { - spec: spec.clone(), - tributary: syncer_tributary.clone(), - })) - .map_err(|_| "failed to send ActiveTributary to heartbeat") - .unwrap(); - - // The heartbeat is once every 10 blocks, with some limitations - sleep(Duration::from_secs(20 * block_time)).await; - assert!(syncer_tributary.tip().await != spec.genesis()); - - // Verify it synced to the tip - let syncer_tip = { - let tributary = &tributaries[0]; - - let tip = tributary.tip().await; - let syncer_tip = syncer_tributary.tip().await; - // Allow a one block tolerance in case of race conditions - assert!( - HashSet::from([tip, tributary.reader().block(&tip).unwrap().parent()]).contains(&syncer_tip) - ); - syncer_tip - }; - - sleep(Duration::from_secs(block_time)).await; - - // Verify it's now keeping up - assert!(syncer_tributary.tip().await != syncer_tip); - - // Verify it's now participating in consensus - // Because only `t` validators are used in a commit, take n - t nodes offline - // leaving only `t` nodes. Which should force it to participate in the consensus - // of next blocks. - let spares = usize::from(spec.n() - spec.t()); - for thread in p2p_threads.iter().take(spares) { - thread.abort(); - } - - // wait for a block - sleep(Duration::from_secs(block_time)).await; - - if syncer_tributary - .reader() - .parsed_commit(&syncer_tributary.tip().await) - .unwrap() - .validators - .iter() - .any(|signer| signer == &syncer_key.to_bytes()) - { - return; - } - - panic!("synced tributary didn't start participating in consensus"); -} diff --git a/coordinator/src/tests/tributary/tx.rs b/coordinator/src/tests/tributary/tx.rs deleted file mode 100644 index 9b948f36..00000000 --- a/coordinator/src/tests/tributary/tx.rs +++ /dev/null @@ -1,62 +0,0 @@ -use core::time::Duration; - -use rand_core::{RngCore, OsRng}; - -use tokio::time::sleep; - -use serai_db::MemDb; - -use tributary::{ - transaction::Transaction as TransactionTrait, Transaction as TributaryTransaction, Tributary, -}; - -use crate::{ - tributary::Transaction, - tests::{ - LocalP2p, - tributary::{new_keys, new_spec, new_tributaries, run_tributaries, wait_for_tx_inclusion}, - }, -}; - -#[tokio::test] -async fn tx_test() { - let keys = new_keys(&mut OsRng); - let spec = new_spec(&mut OsRng, &keys); - - let tributaries = new_tributaries(&keys, &spec) - .await - .into_iter() - .map(|(_, p2p, tributary)| (p2p, tributary)) - .collect::>(); - - // Run the tributaries in the background - tokio::spawn(run_tributaries(tributaries.clone())); - - // Send a TX from a random Tributary - let sender = - usize::try_from(OsRng.next_u64() % u64::try_from(tributaries.len()).unwrap()).unwrap(); - let key = keys[sender].clone(); - - let block_before_tx = tributaries[sender].1.tip().await; - // Create the TX with a null signature so we can get its sig hash - let mut tx = Transaction::DkgParticipation { - participation: { - let mut participation = vec![0; 4096]; - OsRng.fill_bytes(&mut participation); - participation - }, - signed: Transaction::empty_signed(), - }; - tx.sign(&mut OsRng, spec.genesis(), &key); - - assert_eq!(tributaries[sender].1.add_transaction(tx.clone()).await, Ok(true)); - let included_in = wait_for_tx_inclusion(&tributaries[sender].1, block_before_tx, tx.hash()).await; - // Also sleep for the block time to ensure the block is synced around before we run checks on it - sleep(Duration::from_secs(Tributary::::block_time().into())).await; - - // All tributaries should have acknowledged this transaction in a block - for (_, tributary) in tributaries { - let block = tributary.reader().block(&included_in).unwrap(); - assert_eq!(block.transactions, vec![TributaryTransaction::Application(tx.clone())]); - } -} diff --git a/coordinator/src/tributary/db.rs b/coordinator/src/tributary/db.rs index 095f18af..008cd5c8 100644 --- a/coordinator/src/tributary/db.rs +++ b/coordinator/src/tributary/db.rs @@ -3,186 +3,344 @@ use std::collections::HashMap; use scale::Encode; use borsh::{BorshSerialize, BorshDeserialize}; -use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto}; -use frost::Participant; +use serai_client::{primitives::SeraiAddress, validator_sets::primitives::ValidatorSet}; -use serai_client::validator_sets::primitives::{KeyPair, ValidatorSet}; +use processor_messages::sign::VariantSignId; -use processor_messages::coordinator::SubstrateSignableId; +use serai_db::*; -pub use serai_db::*; - -use tributary::ReadWrite; - -use crate::tributary::{Label, Transaction}; +use crate::tributary::transaction::SigningProtocolRound; +/// A topic within the database which the group participates in #[derive(Clone, Copy, PartialEq, Eq, Debug, Encode, BorshSerialize, BorshDeserialize)] pub enum Topic { - DkgConfirmation, - SubstrateSign(SubstrateSignableId), - Sign([u8; 32]), + /// Vote to remove a participant + RemoveParticipant { participant: SeraiAddress }, + + // DkgParticipation isn't represented here as participations are immediately sent to the + // processor, not accumulated within this databse + /// Participation in the signing protocol to confirm the DKG results on Substrate + DkgConfirmation { attempt: u32, label: SigningProtocolRound }, + + /// The local view of the SlashReport, to be aggregated into the final SlashReport + SlashReport, + + /// Participation in a signing protocol + Sign { id: VariantSignId, attempt: u32, label: SigningProtocolRound }, } -// A struct to refer to a piece of data all validators will presumably provide a value for. -#[derive(Clone, Copy, PartialEq, Eq, Debug, Encode)] -pub struct DataSpecification { - pub topic: Topic, - pub label: Label, - pub attempt: u32, +enum Participating { + Participated, + Everyone, } -pub enum DataSet { - Participating(HashMap>), - NotParticipating, +impl Topic { + // The topic used by the next attempt of this protocol + fn next_attempt_topic(self) -> Option { + #[allow(clippy::match_same_arms)] + match self { + Topic::RemoveParticipant { .. } => None, + Topic::DkgConfirmation { attempt, label: _ } => Some(Topic::DkgConfirmation { + attempt: attempt + 1, + label: SigningProtocolRound::Preprocess, + }), + Topic::SlashReport { .. } => None, + Topic::Sign { id, attempt, label: _ } => { + Some(Topic::Sign { id, attempt: attempt + 1, label: SigningProtocolRound::Preprocess }) + } + } + } + + // The topic for the re-attempt to schedule + fn reattempt_topic(self) -> Option<(u32, Topic)> { + #[allow(clippy::match_same_arms)] + match self { + Topic::RemoveParticipant { .. } => None, + Topic::DkgConfirmation { attempt, label } => match label { + SigningProtocolRound::Preprocess => { + let attempt = attempt + 1; + Some(( + attempt, + Topic::DkgConfirmation { attempt, label: SigningProtocolRound::Preprocess }, + )) + } + SigningProtocolRound::Share => None, + }, + Topic::SlashReport { .. } => None, + Topic::Sign { id, attempt, label } => match label { + SigningProtocolRound::Preprocess => { + let attempt = attempt + 1; + Some((attempt, Topic::Sign { id, attempt, label: SigningProtocolRound::Preprocess })) + } + SigningProtocolRound::Share => None, + }, + } + } + + /// The topic which precedes this topic as a prerequisite + /// + /// The preceding topic must define this topic as succeeding + fn preceding_topic(self) -> Option { + #[allow(clippy::match_same_arms)] + match self { + Topic::RemoveParticipant { .. } => None, + Topic::DkgConfirmation { attempt, label } => match label { + SigningProtocolRound::Preprocess => None, + SigningProtocolRound::Share => { + Some(Topic::DkgConfirmation { attempt, label: SigningProtocolRound::Preprocess }) + } + }, + Topic::SlashReport { .. } => None, + Topic::Sign { id, attempt, label } => match label { + SigningProtocolRound::Preprocess => None, + SigningProtocolRound::Share => { + Some(Topic::Sign { id, attempt, label: SigningProtocolRound::Preprocess }) + } + }, + } + } + + /// The topic which succeeds this topic, with this topic as a prerequisite + /// + /// The succeeding topic must define this topic as preceding + fn succeeding_topic(self) -> Option { + #[allow(clippy::match_same_arms)] + match self { + Topic::RemoveParticipant { .. } => None, + Topic::DkgConfirmation { attempt, label } => match label { + SigningProtocolRound::Preprocess => { + Some(Topic::DkgConfirmation { attempt, label: SigningProtocolRound::Share }) + } + SigningProtocolRound::Share => None, + }, + Topic::SlashReport { .. } => None, + Topic::Sign { id, attempt, label } => match label { + SigningProtocolRound::Preprocess => { + Some(Topic::Sign { id, attempt, label: SigningProtocolRound::Share }) + } + SigningProtocolRound::Share => None, + }, + } + } + + fn requires_whitelisting(&self) -> bool { + #[allow(clippy::match_same_arms)] + match self { + // We don't require whitelisting to remove a participant + Topic::RemoveParticipant { .. } => false, + // We don't require whitelisting for the first attempt, solely the re-attempts + Topic::DkgConfirmation { attempt, .. } => *attempt != 0, + // We don't require whitelisting for the slash report + Topic::SlashReport { .. } => false, + // We do require whitelisting for every sign protocol + Topic::Sign { .. } => true, + } + } + + fn required_participation(&self, n: u64) -> u64 { + let _ = self; + // All of our topics require 2/3rds participation + ((2 * n) / 3) + 1 + } + + fn participating(&self) -> Participating { + #[allow(clippy::match_same_arms)] + match self { + Topic::RemoveParticipant { .. } => Participating::Everyone, + Topic::DkgConfirmation { .. } => Participating::Participated, + Topic::SlashReport { .. } => Participating::Everyone, + Topic::Sign { .. } => Participating::Participated, + } + } } -pub enum Accumulation { - Ready(DataSet), - NotReady, +/// The resulting data set from an accumulation +pub enum DataSet { + /// Accumulating this did not produce a data set to act on + /// (non-existent, not ready, prior handled, not participating, etc.) + None, + /// The data set was ready and we are participating in this event + Participating(HashMap), } -// TODO: Move from genesis to set for indexing +trait Borshy: BorshSerialize + BorshDeserialize {} +impl Borshy for T {} + create_db!( - Tributary { - SeraiBlockNumber: (hash: [u8; 32]) -> u64, - SeraiDkgCompleted: (set: ValidatorSet) -> [u8; 32], + CoordinatorTributary { + // The last handled tributary block's (number, hash) + LastHandledTributaryBlock: (set: ValidatorSet) -> (u64, [u8; 32]), - TributaryBlockNumber: (block: [u8; 32]) -> u32, - LastHandledBlock: (genesis: [u8; 32]) -> [u8; 32], + // The slash points a validator has accrued, with u64::MAX representing a fatal slash. + SlashPoints: (set: ValidatorSet, validator: SeraiAddress) -> u64, - // TODO: Revisit the point of this - FatalSlashes: (genesis: [u8; 32]) -> Vec<[u8; 32]>, - // TODO: Combine these two - FatallySlashed: (genesis: [u8; 32], account: [u8; 32]) -> (), - SlashPoints: (genesis: [u8; 32], account: [u8; 32]) -> u32, + // The latest Substrate block to cosign. + LatestSubstrateBlockToCosign: (set: ValidatorSet) -> [u8; 32], - VotedToRemove: (genesis: [u8; 32], voter: [u8; 32], to_remove: [u8; 32]) -> (), - VotesToRemove: (genesis: [u8; 32], to_remove: [u8; 32]) -> u16, + // The weight accumulated for a topic. + AccumulatedWeight: (set: ValidatorSet, topic: Topic) -> u64, + // The entries accumulated for a topic, by validator. + Accumulated: (set: ValidatorSet, topic: Topic, validator: SeraiAddress) -> D, - AttemptDb: (genesis: [u8; 32], topic: &Topic) -> u32, - ReattemptDb: (genesis: [u8; 32], block: u32) -> Vec, - DataReceived: (genesis: [u8; 32], data_spec: &DataSpecification) -> u16, - DataDb: (genesis: [u8; 32], data_spec: &DataSpecification, signer_bytes: &[u8; 32]) -> Vec, - - DkgParticipation: (genesis: [u8; 32], from: u16) -> Vec, - ConfirmationNonces: (genesis: [u8; 32], attempt: u32) -> HashMap>, - DkgKeyPair: (genesis: [u8; 32]) -> KeyPair, - - PlanIds: (genesis: &[u8], block: u64) -> Vec<[u8; 32]>, - - SignedTransactionDb: (order: &[u8], nonce: u32) -> Vec, - - SlashReports: (genesis: [u8; 32], signer: [u8; 32]) -> Vec, - SlashReported: (genesis: [u8; 32]) -> u16, - SlashReportCutOff: (genesis: [u8; 32]) -> u64, - SlashReport: (set: ValidatorSet) -> Vec<([u8; 32], u32)>, + // Topics to be recognized as of a certain block number due to the reattempt protocol. + Reattempt: (set: ValidatorSet, block_number: u64) -> Vec, } ); -impl FatalSlashes { - pub fn get_as_keys(getter: &impl Get, genesis: [u8; 32]) -> Vec<::G> { - FatalSlashes::get(getter, genesis) - .unwrap_or(vec![]) - .iter() - .map(|key| ::G::from_bytes(key).unwrap()) - .collect::>() +pub struct TributaryDb; +impl TributaryDb { + pub fn last_handled_tributary_block( + getter: &impl Get, + set: ValidatorSet, + ) -> Option<(u64, [u8; 32])> { + LastHandledTributaryBlock::get(getter, set) } -} - -impl FatallySlashed { - pub fn set_fatally_slashed(txn: &mut impl DbTxn, genesis: [u8; 32], account: [u8; 32]) { - Self::set(txn, genesis, account, &()); - let mut existing = FatalSlashes::get(txn, genesis).unwrap_or_default(); - - // Don't append if we already have it, which can occur upon multiple faults - if existing.iter().any(|existing| existing == &account) { - return; - } - - existing.push(account); - FatalSlashes::set(txn, genesis, &existing); - } -} - -impl AttemptDb { - pub fn recognize_topic(txn: &mut impl DbTxn, genesis: [u8; 32], topic: Topic) { - Self::set(txn, genesis, &topic, &0u32); - } - - pub fn start_next_attempt(txn: &mut impl DbTxn, genesis: [u8; 32], topic: Topic) -> u32 { - let next = - Self::attempt(txn, genesis, topic).expect("starting next attempt for unknown topic") + 1; - Self::set(txn, genesis, &topic, &next); - next - } - - pub fn attempt(getter: &impl Get, genesis: [u8; 32], topic: Topic) -> Option { - let attempt = Self::get(getter, genesis, &topic); - // Don't require explicit recognition of the DkgConfirmation topic as it starts when the chain - // does - // Don't require explicit recognition of the SlashReport topic as it isn't a DoS risk and it - // should always happen (eventually) - if attempt.is_none() && - ((topic == Topic::DkgConfirmation) || - (topic == Topic::SubstrateSign(SubstrateSignableId::SlashReport))) - { - return Some(0); - } - attempt - } -} - -impl ReattemptDb { - pub fn schedule_reattempt( + pub fn set_last_handled_tributary_block( txn: &mut impl DbTxn, - genesis: [u8; 32], - current_block_number: u32, - topic: Topic, + set: ValidatorSet, + block_number: u64, + block_hash: [u8; 32], ) { - // 5 minutes - #[cfg(not(feature = "longer-reattempts"))] - const BASE_REATTEMPT_DELAY: u32 = (5 * 60 * 1000) / tributary::tendermint::TARGET_BLOCK_TIME; - - // 10 minutes, intended for latent environments like the GitHub CI - #[cfg(feature = "longer-reattempts")] - const BASE_REATTEMPT_DELAY: u32 = (10 * 60 * 1000) / tributary::tendermint::TARGET_BLOCK_TIME; - - // 5 minutes for attempts 0 ..= 2, 10 minutes for attempts 3 ..= 5, 15 minutes for attempts > 5 - // Assumes no event will take longer than 15 minutes, yet grows the time in case there are - // network bandwidth issues - let reattempt_delay = BASE_REATTEMPT_DELAY * - ((AttemptDb::attempt(txn, genesis, topic) - .expect("scheduling re-attempt for unknown topic") / - 3) + - 1) - .min(3); - let upon_block = current_block_number + reattempt_delay; - - let mut reattempts = Self::get(txn, genesis, upon_block).unwrap_or(vec![]); - reattempts.push(topic); - Self::set(txn, genesis, upon_block, &reattempts); + LastHandledTributaryBlock::set(txn, set, &(block_number, block_hash)); } - pub fn take(txn: &mut impl DbTxn, genesis: [u8; 32], block_number: u32) -> Vec { - let res = Self::get(txn, genesis, block_number).unwrap_or(vec![]); - if !res.is_empty() { - Self::del(txn, genesis, block_number); + pub fn recognize_topic(txn: &mut impl DbTxn, set: ValidatorSet, topic: Topic) { + AccumulatedWeight::set(txn, set, topic, &0); + } + + pub fn start_of_block(txn: &mut impl DbTxn, set: ValidatorSet, block_number: u64) { + for topic in Reattempt::take(txn, set, block_number).unwrap_or(vec![]) { + // TODO: Slash all people who preprocessed but didn't share + Self::recognize_topic(txn, set, topic); } - res } -} -impl SignedTransactionDb { - pub fn take_signed_transaction( + pub fn fatal_slash( txn: &mut impl DbTxn, - order: &[u8], - nonce: u32, - ) -> Option { - let res = SignedTransactionDb::get(txn, order, nonce) - .map(|bytes| Transaction::read(&mut bytes.as_slice()).unwrap()); - if res.is_some() { - Self::del(txn, order, nonce); + set: ValidatorSet, + validator: SeraiAddress, + reason: &str, + ) { + log::warn!("{validator} fatally slashed: {reason}"); + SlashPoints::set(txn, set, validator, &u64::MAX); + } + + pub fn is_fatally_slashed(getter: &impl Get, set: ValidatorSet, validator: SeraiAddress) -> bool { + SlashPoints::get(getter, set, validator).unwrap_or(0) == u64::MAX + } + + #[allow(clippy::too_many_arguments)] + pub fn accumulate( + txn: &mut impl DbTxn, + set: ValidatorSet, + validators: &[SeraiAddress], + total_weight: u64, + block_number: u64, + topic: Topic, + validator: SeraiAddress, + validator_weight: u64, + data: &D, + ) -> DataSet { + // This function will only be called once for a (validator, topic) tuple due to how we handle + // nonces on transactions (deterministically to the topic) + + let accumulated_weight = AccumulatedWeight::get(txn, set, topic); + if topic.requires_whitelisting() && accumulated_weight.is_none() { + Self::fatal_slash(txn, set, validator, "participated in unrecognized topic"); + return DataSet::None; + } + let mut accumulated_weight = accumulated_weight.unwrap_or(0); + + // Check if there's a preceding topic, this validator participated + let preceding_topic = topic.preceding_topic(); + if let Some(preceding_topic) = preceding_topic { + if Accumulated::::get(txn, set, preceding_topic, validator).is_none() { + Self::fatal_slash( + txn, + set, + validator, + "participated in topic without participating in prior", + ); + return DataSet::None; + } + } + + // The complete lack of validation on the data by these NOPs opens the potential for spam here + + // If we've already accumulated past the threshold, NOP + if accumulated_weight >= topic.required_participation(total_weight) { + return DataSet::None; + } + // If this is for an old attempt, NOP + if let Some(next_attempt_topic) = topic.next_attempt_topic() { + if AccumulatedWeight::get(txn, set, next_attempt_topic).is_some() { + return DataSet::None; + } + } + + // Accumulate the data + accumulated_weight += validator_weight; + AccumulatedWeight::set(txn, set, topic, &accumulated_weight); + Accumulated::set(txn, set, topic, validator, data); + + // Check if we now cross the weight threshold + if accumulated_weight >= topic.required_participation(total_weight) { + // Queue this for re-attempt after enough time passes + if let Some((attempt, reattempt_topic)) = topic.reattempt_topic() { + // 5 minutes + #[cfg(not(feature = "longer-reattempts"))] + const BASE_REATTEMPT_DELAY: u32 = + (5u32 * 60 * 1000).div_ceil(tributary::tendermint::TARGET_BLOCK_TIME); + + // 10 minutes, intended for latent environments like the GitHub CI + #[cfg(feature = "longer-reattempts")] + const BASE_REATTEMPT_DELAY: u32 = + (10u32 * 60 * 1000).div_ceil(tributary::tendermint::TARGET_BLOCK_TIME); + + // Linearly scale the time for the protocol with the attempt number + let blocks_till_reattempt = u64::from(attempt * BASE_REATTEMPT_DELAY); + + let recognize_at = block_number + blocks_till_reattempt; + let mut queued = Reattempt::get(txn, set, recognize_at).unwrap_or(Vec::with_capacity(1)); + queued.push(reattempt_topic); + Reattempt::set(txn, set, recognize_at, &queued); + } + + // Register the succeeding topic + let succeeding_topic = topic.succeeding_topic(); + if let Some(succeeding_topic) = succeeding_topic { + Self::recognize_topic(txn, set, succeeding_topic); + } + + // Fetch and return all participations + let mut data_set = HashMap::with_capacity(validators.len()); + for validator in validators { + if let Some(data) = Accumulated::::get(txn, set, topic, *validator) { + // Clean this data up if there's not a succeeding topic + // If there is, we wait as the succeeding topic checks our participation in this topic + if succeeding_topic.is_none() { + Accumulated::::del(txn, set, topic, *validator); + } + // If this *was* the succeeding topic, clean up the preceding topic's data + if let Some(preceding_topic) = preceding_topic { + Accumulated::::del(txn, set, preceding_topic, *validator); + } + data_set.insert(*validator, data); + } + } + let participated = data_set.contains_key(&validator); + match topic.participating() { + Participating::Participated => { + if participated { + DataSet::Participating(data_set) + } else { + DataSet::None + } + } + Participating::Everyone => DataSet::Participating(data_set), + } + } else { + DataSet::None } - res } } diff --git a/coordinator/src/tributary/handle.rs b/coordinator/src/tributary/handle.rs deleted file mode 100644 index c5378cc7..00000000 --- a/coordinator/src/tributary/handle.rs +++ /dev/null @@ -1,554 +0,0 @@ -use core::ops::Deref; -use std::collections::HashMap; - -use zeroize::Zeroizing; -use rand_core::OsRng; - -use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto}; -use frost::dkg::Participant; - -use scale::{Encode, Decode}; -use serai_client::{Signature, validator_sets::primitives::KeyPair}; - -use tributary::{Signed, TransactionKind, TransactionTrait}; - -use processor_messages::{ - key_gen::self, - coordinator::{self, SubstrateSignableId, SubstrateSignId}, - sign::{self, SignId}, -}; - -use serai_db::*; - -use crate::{ - processors::Processors, - tributary::{ - *, - signing_protocol::DkgConfirmer, - scanner::{ - RecognizedIdType, RIDTrait, PublishSeraiTransaction, PTTTrait, TributaryBlockHandler, - }, - }, - P2p, -}; - -pub fn dkg_confirmation_nonces( - key: &Zeroizing<::F>, - spec: &TributarySpec, - txn: &mut impl DbTxn, - attempt: u32, -) -> [u8; 64] { - DkgConfirmer::new(key, spec, txn, attempt).preprocess() -} - -pub fn generated_key_pair( - txn: &mut D::Transaction<'_>, - genesis: [u8; 32], - key_pair: &KeyPair, -) { - DkgKeyPair::set(txn, genesis, key_pair); -} - -fn unflatten(spec: &TributarySpec, data: &mut HashMap>) { - for (validator, _) in spec.validators() { - let Some(range) = spec.i(validator) else { continue }; - let Some(all_segments) = data.remove(&range.start) else { - continue; - }; - let mut data_vec = Vec::<_>::decode(&mut all_segments.as_slice()).unwrap(); - for i in u16::from(range.start) .. u16::from(range.end) { - let i = Participant::new(i).unwrap(); - data.insert(i, data_vec.remove(0)); - } - } -} - -impl< - D: Db, - T: DbTxn, - Pro: Processors, - PST: PublishSeraiTransaction, - PTT: PTTTrait, - RID: RIDTrait, - P: P2p, - > TributaryBlockHandler<'_, D, T, Pro, PST, PTT, RID, P> -{ - fn accumulate( - &mut self, - data_spec: &DataSpecification, - signer: ::G, - data: &Vec, - ) -> Accumulation { - log::debug!("accumulating entry for {:?} attempt #{}", &data_spec.topic, &data_spec.attempt); - let genesis = self.spec.genesis(); - if DataDb::get(self.txn, genesis, data_spec, &signer.to_bytes()).is_some() { - panic!("accumulating data for a participant multiple times"); - } - let signer_shares = { - let signer_i = self.spec.i(signer).expect("transaction signer wasn't a member of the set"); - u16::from(signer_i.end) - u16::from(signer_i.start) - }; - - let prior_received = DataReceived::get(self.txn, genesis, data_spec).unwrap_or_default(); - let now_received = prior_received + signer_shares; - DataReceived::set(self.txn, genesis, data_spec, &now_received); - DataDb::set(self.txn, genesis, data_spec, &signer.to_bytes(), data); - - let received_range = (prior_received + 1) ..= now_received; - - // If 2/3rds of the network participated in this preprocess, queue it for an automatic - // re-attempt - if (data_spec.label == Label::Preprocess) && received_range.contains(&self.spec.t()) { - // Double check the attempt on this entry, as we don't want to schedule a re-attempt if this - // is an old entry - // This is an assert, not part of the if check, as old data shouldn't be here in the first - // place - assert_eq!(AttemptDb::attempt(self.txn, genesis, data_spec.topic), Some(data_spec.attempt)); - ReattemptDb::schedule_reattempt(self.txn, genesis, self.block_number, data_spec.topic); - } - - // If we have all the needed commitments/preprocesses/shares, tell the processor - if received_range.contains(&self.spec.t()) { - log::debug!( - "accumulation for entry {:?} attempt #{} is ready", - &data_spec.topic, - &data_spec.attempt - ); - - let mut data = HashMap::new(); - for validator in self.spec.validators().iter().map(|validator| validator.0) { - let Some(i) = self.spec.i(validator) else { continue }; - data.insert( - i.start, - if let Some(data) = DataDb::get(self.txn, genesis, data_spec, &validator.to_bytes()) { - data - } else { - continue; - }, - ); - } - - assert_eq!(data.len(), usize::from(self.spec.t())); - - // Remove our own piece of data, if we were involved - if let Some(i) = self.spec.i(Ristretto::generator() * self.our_key.deref()) { - if data.remove(&i.start).is_some() { - return Accumulation::Ready(DataSet::Participating(data)); - } - } - return Accumulation::Ready(DataSet::NotParticipating); - } - Accumulation::NotReady - } - - fn handle_data( - &mut self, - data_spec: &DataSpecification, - bytes: &Vec, - signed: &Signed, - ) -> Accumulation { - let genesis = self.spec.genesis(); - - let Some(curr_attempt) = AttemptDb::attempt(self.txn, genesis, data_spec.topic) else { - // Premature publication of a valid ID/publication of an invalid ID - self.fatal_slash(signed.signer.to_bytes(), "published data for ID without an attempt"); - return Accumulation::NotReady; - }; - - // If they've already published a TX for this attempt, slash - // This shouldn't be reachable since nonces were made inserted by the coordinator, yet it's a - // cheap check to leave in for safety - if DataDb::get(self.txn, genesis, data_spec, &signed.signer.to_bytes()).is_some() { - self.fatal_slash(signed.signer.to_bytes(), "published data multiple times"); - return Accumulation::NotReady; - } - - // If the attempt is lesser than the blockchain's, return - if data_spec.attempt < curr_attempt { - log::debug!( - "dated attempt published onto tributary for topic {:?} (used attempt {}, current {})", - data_spec.topic, - data_spec.attempt, - curr_attempt - ); - return Accumulation::NotReady; - } - // If the attempt is greater, this is a premature publication, full slash - if data_spec.attempt > curr_attempt { - self.fatal_slash( - signed.signer.to_bytes(), - "published data with an attempt which hasn't started", - ); - return Accumulation::NotReady; - } - - // TODO: We can also full slash if shares before all commitments, or share before the - // necessary preprocesses - - // TODO: If this is shares, we need to check they are part of the selected signing set - - // Accumulate this data - self.accumulate(data_spec, signed.signer, bytes) - } - - fn check_sign_data_len( - &mut self, - signer: ::G, - len: usize, - ) -> Result<(), ()> { - let signer_i = self.spec.i(signer).expect("signer wasn't a member of the set"); - if len != usize::from(u16::from(signer_i.end) - u16::from(signer_i.start)) { - self.fatal_slash( - signer.to_bytes(), - "signer published a distinct amount of sign data than they had shares", - ); - Err(())?; - } - Ok(()) - } - - // TODO: Don't call fatal_slash in here, return the party to fatal_slash to ensure no further - // execution occurs - pub(crate) async fn handle_application_tx(&mut self, tx: Transaction) { - let genesis = self.spec.genesis(); - - // Don't handle transactions from fatally slashed participants - // This prevents removed participants from sabotaging the removal signing sessions and so on - // TODO: Because fatally slashed participants can still publish onto the blockchain, they have - // a notable DoS ability - if let TransactionKind::Signed(_, signed) = tx.kind() { - if FatallySlashed::get(self.txn, genesis, signed.signer.to_bytes()).is_some() { - return; - } - } - - match tx { - Transaction::RemoveParticipant { participant, signed } => { - if self.spec.i(participant).is_none() { - self.fatal_slash(participant.to_bytes(), "RemoveParticipant vote for non-validator"); - return; - } - - let participant = participant.to_bytes(); - let signer = signed.signer.to_bytes(); - - assert!( - VotedToRemove::get(self.txn, genesis, signer, participant).is_none(), - "VotedToRemove multiple times despite a single nonce being allocated", - ); - VotedToRemove::set(self.txn, genesis, signer, participant, &()); - - let prior_votes = VotesToRemove::get(self.txn, genesis, participant).unwrap_or(0); - let signer_votes = - self.spec.i(signed.signer).expect("signer wasn't a validator for this network?"); - let new_votes = prior_votes + u16::from(signer_votes.end) - u16::from(signer_votes.start); - VotesToRemove::set(self.txn, genesis, participant, &new_votes); - if ((prior_votes + 1) ..= new_votes).contains(&self.spec.t()) { - self.fatal_slash(participant, "RemoveParticipant vote") - } - } - - Transaction::DkgParticipation { participation, signed } => { - // Send the participation to the processor - self - .processors - .send( - self.spec.set().network, - key_gen::CoordinatorMessage::Participation { - session: self.spec.set().session, - participant: self - .spec - .i(signed.signer) - .expect("signer wasn't a validator for this network?") - .start, - participation, - }, - ) - .await; - } - - Transaction::DkgConfirmationNonces { attempt, confirmation_nonces, signed } => { - let data_spec = - DataSpecification { topic: Topic::DkgConfirmation, label: Label::Preprocess, attempt }; - match self.handle_data(&data_spec, &confirmation_nonces.to_vec(), &signed) { - Accumulation::Ready(DataSet::Participating(confirmation_nonces)) => { - log::info!( - "got all DkgConfirmationNonces for {}, attempt {attempt}", - hex::encode(genesis) - ); - - ConfirmationNonces::set(self.txn, genesis, attempt, &confirmation_nonces); - - // Send the expected DkgConfirmationShare - // TODO: Slight race condition here due to set, publish tx, then commit txn - let key_pair = DkgKeyPair::get(self.txn, genesis) - .expect("participating in confirming key we don't have"); - let mut tx = match DkgConfirmer::new(self.our_key, self.spec, self.txn, attempt) - .share(confirmation_nonces, &key_pair) - { - Ok(confirmation_share) => Transaction::DkgConfirmationShare { - attempt, - confirmation_share, - signed: Transaction::empty_signed(), - }, - Err(participant) => Transaction::RemoveParticipant { - participant: self.spec.reverse_lookup_i(participant).unwrap(), - signed: Transaction::empty_signed(), - }, - }; - tx.sign(&mut OsRng, genesis, self.our_key); - self.publish_tributary_tx.publish_tributary_tx(tx).await; - } - Accumulation::Ready(DataSet::NotParticipating) | Accumulation::NotReady => {} - } - } - - Transaction::DkgConfirmationShare { attempt, confirmation_share, signed } => { - let data_spec = - DataSpecification { topic: Topic::DkgConfirmation, label: Label::Share, attempt }; - match self.handle_data(&data_spec, &confirmation_share.to_vec(), &signed) { - Accumulation::Ready(DataSet::Participating(shares)) => { - log::info!( - "got all DkgConfirmationShare for {}, attempt {attempt}", - hex::encode(genesis) - ); - - let preprocesses = ConfirmationNonces::get(self.txn, genesis, attempt).unwrap(); - - // TODO: This can technically happen under very very very specific timing as the txn - // put happens before DkgConfirmationShare, yet the txn isn't guaranteed to be - // committed - let key_pair = DkgKeyPair::get(self.txn, genesis).expect( - "in DkgConfirmationShare handling, which happens after everyone \ - (including us) fires DkgConfirmationShare, yet no confirming key pair", - ); - - // Determine the bitstring representing who participated before we move `shares` - let validators = self.spec.validators(); - let mut signature_participants = bitvec::vec::BitVec::with_capacity(validators.len()); - for (participant, _) in validators { - signature_participants.push( - (participant == (::generator() * self.our_key.deref())) || - shares.contains_key(&self.spec.i(participant).unwrap().start), - ); - } - - // Produce the final signature - let mut confirmer = DkgConfirmer::new(self.our_key, self.spec, self.txn, attempt); - let sig = match confirmer.complete(preprocesses, &key_pair, shares) { - Ok(sig) => sig, - Err(p) => { - let mut tx = Transaction::RemoveParticipant { - participant: self.spec.reverse_lookup_i(p).unwrap(), - signed: Transaction::empty_signed(), - }; - tx.sign(&mut OsRng, genesis, self.our_key); - self.publish_tributary_tx.publish_tributary_tx(tx).await; - return; - } - }; - - self - .publish_serai_tx - .publish_set_keys( - self.db, - self.spec.set(), - key_pair, - signature_participants, - Signature(sig), - ) - .await; - } - Accumulation::Ready(DataSet::NotParticipating) | Accumulation::NotReady => {} - } - } - - Transaction::CosignSubstrateBlock(hash) => { - AttemptDb::recognize_topic( - self.txn, - genesis, - Topic::SubstrateSign(SubstrateSignableId::CosigningSubstrateBlock(hash)), - ); - - let block_number = SeraiBlockNumber::get(self.txn, hash) - .expect("CosignSubstrateBlock yet didn't save Serai block number"); - let msg = coordinator::CoordinatorMessage::CosignSubstrateBlock { - id: SubstrateSignId { - session: self.spec.set().session, - id: SubstrateSignableId::CosigningSubstrateBlock(hash), - attempt: 0, - }, - block_number, - }; - self.processors.send(self.spec.set().network, msg).await; - } - - Transaction::Batch { block: _, batch } => { - // Because this Batch has achieved synchrony, its batch ID should be authorized - AttemptDb::recognize_topic( - self.txn, - genesis, - Topic::SubstrateSign(SubstrateSignableId::Batch(batch)), - ); - self - .recognized_id - .recognized_id( - self.spec.set(), - genesis, - RecognizedIdType::Batch, - batch.to_le_bytes().to_vec(), - ) - .await; - } - - Transaction::SubstrateBlock(block) => { - let plan_ids = PlanIds::get(self.txn, &genesis, block).expect( - "synced a tributary block finalizing a substrate block in a provided transaction \ - despite us not providing that transaction", - ); - - for id in plan_ids { - AttemptDb::recognize_topic(self.txn, genesis, Topic::Sign(id)); - self - .recognized_id - .recognized_id(self.spec.set(), genesis, RecognizedIdType::Plan, id.to_vec()) - .await; - } - } - - Transaction::SubstrateSign(data) => { - let signer = data.signed.signer; - let Ok(()) = self.check_sign_data_len(signer, data.data.len()) else { - return; - }; - let expected_len = match data.label { - Label::Preprocess => 64, - Label::Share => 32, - }; - for data in &data.data { - if data.len() != expected_len { - self.fatal_slash( - signer.to_bytes(), - "unexpected length data for substrate signing protocol", - ); - return; - } - } - - let data_spec = DataSpecification { - topic: Topic::SubstrateSign(data.plan), - label: data.label, - attempt: data.attempt, - }; - let Accumulation::Ready(DataSet::Participating(mut results)) = - self.handle_data(&data_spec, &data.data.encode(), &data.signed) - else { - return; - }; - unflatten(self.spec, &mut results); - - let id = SubstrateSignId { - session: self.spec.set().session, - id: data.plan, - attempt: data.attempt, - }; - let msg = match data.label { - Label::Preprocess => coordinator::CoordinatorMessage::SubstratePreprocesses { - id, - preprocesses: results.into_iter().map(|(v, p)| (v, p.try_into().unwrap())).collect(), - }, - Label::Share => coordinator::CoordinatorMessage::SubstrateShares { - id, - shares: results.into_iter().map(|(v, p)| (v, p.try_into().unwrap())).collect(), - }, - }; - self.processors.send(self.spec.set().network, msg).await; - } - - Transaction::Sign(data) => { - let Ok(()) = self.check_sign_data_len(data.signed.signer, data.data.len()) else { - return; - }; - - let data_spec = DataSpecification { - topic: Topic::Sign(data.plan), - label: data.label, - attempt: data.attempt, - }; - if let Accumulation::Ready(DataSet::Participating(mut results)) = - self.handle_data(&data_spec, &data.data.encode(), &data.signed) - { - unflatten(self.spec, &mut results); - let id = - SignId { session: self.spec.set().session, id: data.plan, attempt: data.attempt }; - self - .processors - .send( - self.spec.set().network, - match data.label { - Label::Preprocess => { - sign::CoordinatorMessage::Preprocesses { id, preprocesses: results } - } - Label::Share => sign::CoordinatorMessage::Shares { id, shares: results }, - }, - ) - .await; - } - } - - Transaction::SignCompleted { plan, tx_hash, first_signer, signature: _ } => { - log::info!( - "on-chain SignCompleted claims {} completes {}", - hex::encode(&tx_hash), - hex::encode(plan) - ); - - if AttemptDb::attempt(self.txn, genesis, Topic::Sign(plan)).is_none() { - self.fatal_slash(first_signer.to_bytes(), "claimed an unrecognized plan was completed"); - return; - }; - - // TODO: Confirm this signer hasn't prior published a completion - - let msg = sign::CoordinatorMessage::Completed { - session: self.spec.set().session, - id: plan, - tx: tx_hash, - }; - self.processors.send(self.spec.set().network, msg).await; - } - - Transaction::SlashReport(points, signed) => { - let signer_range = self.spec.i(signed.signer).unwrap(); - let signer_len = u16::from(signer_range.end) - u16::from(signer_range.start); - if points.len() != (self.spec.validators().len() - 1) { - self.fatal_slash( - signed.signer.to_bytes(), - "submitted a distinct amount of slash points to participants", - ); - return; - } - - if SlashReports::get(self.txn, genesis, signed.signer.to_bytes()).is_some() { - self.fatal_slash(signed.signer.to_bytes(), "submitted multiple slash points"); - return; - } - SlashReports::set(self.txn, genesis, signed.signer.to_bytes(), &points); - - let prior_reported = SlashReported::get(self.txn, genesis).unwrap_or(0); - let now_reported = prior_reported + signer_len; - SlashReported::set(self.txn, genesis, &now_reported); - - if (prior_reported < self.spec.t()) && (now_reported >= self.spec.t()) { - SlashReportCutOff::set( - self.txn, - genesis, - // 30 minutes into the future - &(u64::from(self.block_number) + - ((30 * 60 * 1000) / u64::from(tributary::tendermint::TARGET_BLOCK_TIME))), - ); - } - } - } - } -} diff --git a/coordinator/src/tributary/mod.rs b/coordinator/src/tributary/mod.rs index 6e2f2661..6d748940 100644 --- a/coordinator/src/tributary/mod.rs +++ b/coordinator/src/tributary/mod.rs @@ -1,63 +1,6 @@ -use tributary::{ - ReadWrite, - transaction::{TransactionError, TransactionKind, Transaction as TransactionTrait}, - Tributary, -}; +mod transaction; +pub use transaction::Transaction; mod db; -pub use db::*; -mod spec; -pub use spec::TributarySpec; - -mod transaction; -pub use transaction::{Label, SignData, Transaction}; - -mod signing_protocol; - -mod handle; -pub use handle::*; - -pub mod scanner; - -pub async fn publish_signed_transaction( - txn: &mut D::Transaction<'_>, - tributary: &Tributary, - tx: Transaction, -) { - log::debug!("publishing transaction {}", hex::encode(tx.hash())); - - let (order, signer) = if let TransactionKind::Signed(order, signed) = tx.kind() { - let signer = signed.signer; - - // Safe as we should deterministically create transactions, meaning if this is already on-disk, - // it's what we're saving now - SignedTransactionDb::set(txn, &order, signed.nonce, &tx.serialize()); - - (order, signer) - } else { - panic!("non-signed transaction passed to publish_signed_transaction"); - }; - - // If we're trying to publish 5, when the last transaction published was 3, this will delay - // publication until the point in time we publish 4 - while let Some(tx) = SignedTransactionDb::take_signed_transaction( - txn, - &order, - tributary - .next_nonce(&signer, &order) - .await - .expect("we don't have a nonce, meaning we aren't a participant on this tributary"), - ) { - // We need to return a proper error here to enable that, due to a race condition around - // multiple publications - match tributary.add_transaction(tx.clone()).await { - Ok(_) => {} - // Some asynchonicity if InvalidNonce, assumed safe to deterministic nonces - Err(TransactionError::InvalidNonce) => { - log::warn!("publishing TX {tx:?} returned InvalidNonce. was it already added?") - } - Err(e) => panic!("created an invalid transaction: {e:?}"), - } - } -} +mod scan; diff --git a/coordinator/src/tributary/scan.rs b/coordinator/src/tributary/scan.rs new file mode 100644 index 00000000..47e1103d --- /dev/null +++ b/coordinator/src/tributary/scan.rs @@ -0,0 +1,203 @@ +use core::future::Future; +use std::collections::HashMap; + +use ciphersuite::group::GroupEncoding; + +use serai_client::{primitives::SeraiAddress, validator_sets::primitives::ValidatorSet}; + +use tributary::{ + Signed as TributarySigned, TransactionError, TransactionKind, TransactionTrait, + Transaction as TributaryTransaction, Block, TributaryReader, + tendermint::{ + tx::{TendermintTx, Evidence, decode_signed_message}, + TendermintNetwork, + }, +}; + +use serai_db::*; +use serai_task::ContinuallyRan; + +use crate::tributary::{ + db::*, + transaction::{Signed, Transaction}, +}; + +struct ScanBlock<'a, D: DbTxn, TD: Db> { + txn: &'a mut D, + set: ValidatorSet, + validators: &'a [SeraiAddress], + total_weight: u64, + validator_weights: &'a HashMap, + tributary: &'a TributaryReader, +} +impl<'a, D: DbTxn, TD: Db> ScanBlock<'a, D, TD> { + fn handle_application_tx(&mut self, block_number: u64, tx: Transaction) { + let signer = |signed: Signed| SeraiAddress(signed.signer.to_bytes()); + + if let TransactionKind::Signed(_, TributarySigned { signer, .. }) = tx.kind() { + // Don't handle transactions from those fatally slashed + // TODO: The fact they can publish these TXs makes this a notable spam vector + if TributaryDb::is_fatally_slashed(self.txn, self.set, SeraiAddress(signer.to_bytes())) { + return; + } + } + + match tx { + Transaction::RemoveParticipant { participant, signed } => { + // Accumulate this vote and fatally slash the participant if past the threshold + let signer = signer(signed); + match TributaryDb::accumulate( + self.txn, + self.set, + self.validators, + self.total_weight, + block_number, + Topic::RemoveParticipant { participant }, + signer, + self.validator_weights[&signer], + &(), + ) { + DataSet::None => {} + DataSet::Participating(_) => { + TributaryDb::fatal_slash(self.txn, self.set, participant, "voted to remove") + } + } + } + + Transaction::DkgParticipation { participation, signed } => { + // Send the participation to the processor + todo!("TODO") + } + Transaction::DkgConfirmationPreprocess { attempt, preprocess, signed } => { + // Accumulate the preprocesses into our own FROST attempt manager + todo!("TODO") + } + Transaction::DkgConfirmationShare { attempt, share, signed } => { + // Accumulate the shares into our own FROST attempt manager + todo!("TODO") + } + + Transaction::Cosign { substrate_block_hash } => { + // Update the latest intended-to-be-cosigned Substrate block + todo!("TODO") + } + Transaction::Cosigned { substrate_block_hash } => { + // Start cosigning the latest intended-to-be-cosigned block + todo!("TODO") + } + Transaction::SubstrateBlock { hash } => { + // Whitelist all of the IDs this Substrate block causes to be signed + todo!("TODO") + } + Transaction::Batch { hash } => { + // Whitelist the signing of this batch, publishing our own preprocess + todo!("TODO") + } + + Transaction::SlashReport { slash_points, signed } => { + // Accumulate, and if past the threshold, calculate *the* slash report and start signing it + todo!("TODO") + } + + Transaction::Sign { id, attempt, label, data, signed } => todo!("TODO"), + } + } + + fn handle_block(mut self, block_number: u64, block: Block) { + TributaryDb::start_of_block(self.txn, self.set, block_number); + + for tx in block.transactions { + match tx { + TributaryTransaction::Tendermint(TendermintTx::SlashEvidence(ev)) => { + // Since the evidence is on the chain, it will have already been validated + // We can just punish the signer + let data = match ev { + Evidence::ConflictingMessages(first, second) => (first, Some(second)), + Evidence::InvalidPrecommit(first) | Evidence::InvalidValidRound(first) => (first, None), + }; + /* TODO + let msgs = ( + decode_signed_message::>(&data.0).unwrap(), + if data.1.is_some() { + Some( + decode_signed_message::>(&data.1.unwrap()) + .unwrap(), + ) + } else { + None + }, + ); + + // Since anything with evidence is fundamentally faulty behavior, not just temporal + // errors, mark the node as fatally slashed + TributaryDb::fatal_slash( + self.txn, msgs.0.msg.sender, &format!("invalid tendermint messages: {msgs:?}")); + */ + todo!("TODO") + } + TributaryTransaction::Application(tx) => { + self.handle_application_tx(block_number, tx); + } + } + } + } +} + +struct ScanTributaryTask { + db: D, + set: ValidatorSet, + validators: Vec, + total_weight: u64, + validator_weights: HashMap, + tributary: TributaryReader, +} +impl ContinuallyRan for ScanTributaryTask { + fn run_iteration(&mut self) -> impl Send + Future> { + async move { + let (mut last_block_number, mut last_block_hash) = + TributaryDb::last_handled_tributary_block(&self.db, self.set) + .unwrap_or((0, self.tributary.genesis())); + + let mut made_progess = false; + while let Some(next) = self.tributary.block_after(&last_block_hash) { + let block = self.tributary.block(&next).unwrap(); + let block_number = last_block_number + 1; + let block_hash = block.hash(); + + // Make sure we have all of the provided transactions for this block + for tx in &block.transactions { + let TransactionKind::Provided(order) = tx.kind() else { + continue; + }; + + // make sure we have all the provided txs in this block locally + if !self.tributary.locally_provided_txs_in_block(&block_hash, order) { + return Err(format!( + "didn't have the provided Transactions on-chain for set (ephemeral error): {:?}", + self.set + )); + } + } + + let mut txn = self.db.txn(); + (ScanBlock { + txn: &mut txn, + set: self.set, + validators: &self.validators, + total_weight: self.total_weight, + validator_weights: &self.validator_weights, + tributary: &self.tributary, + }) + .handle_block(block_number, block); + TributaryDb::set_last_handled_tributary_block(&mut txn, self.set, block_number, block_hash); + last_block_number = block_number; + last_block_hash = block_hash; + txn.commit(); + + made_progess = true; + } + + Ok(made_progess) + } + } +} diff --git a/coordinator/src/tributary/scanner.rs b/coordinator/src/tributary/scanner.rs deleted file mode 100644 index c0b906ed..00000000 --- a/coordinator/src/tributary/scanner.rs +++ /dev/null @@ -1,685 +0,0 @@ -use core::{marker::PhantomData, future::Future, time::Duration}; -use std::sync::Arc; - -use zeroize::Zeroizing; - -use rand_core::OsRng; - -use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto}; - -use tokio::sync::broadcast; - -use scale::{Encode, Decode}; -use serai_client::{ - primitives::Signature, - validator_sets::primitives::{KeyPair, ValidatorSet}, - Serai, -}; - -use serai_db::DbTxn; - -use processor_messages::coordinator::{SubstrateSignId, SubstrateSignableId}; - -use tributary::{ - TransactionKind, Transaction as TributaryTransaction, TransactionError, Block, TributaryReader, - tendermint::{ - tx::{TendermintTx, Evidence, decode_signed_message}, - TendermintNetwork, - }, -}; - -use crate::{Db, processors::Processors, substrate::BatchInstructionsHashDb, tributary::*, P2p}; - -#[derive(Clone, Copy, PartialEq, Eq, Debug, Encode, Decode)] -pub enum RecognizedIdType { - Batch, - Plan, -} - -#[async_trait::async_trait] -pub trait RIDTrait { - async fn recognized_id( - &self, - set: ValidatorSet, - genesis: [u8; 32], - kind: RecognizedIdType, - id: Vec, - ); -} -#[async_trait::async_trait] -impl< - FRid: Send + Future, - F: Sync + Fn(ValidatorSet, [u8; 32], RecognizedIdType, Vec) -> FRid, - > RIDTrait for F -{ - async fn recognized_id( - &self, - set: ValidatorSet, - genesis: [u8; 32], - kind: RecognizedIdType, - id: Vec, - ) { - (self)(set, genesis, kind, id).await - } -} - -#[async_trait::async_trait] -pub trait PublishSeraiTransaction { - async fn publish_set_keys( - &self, - db: &(impl Sync + Get), - set: ValidatorSet, - key_pair: KeyPair, - signature_participants: bitvec::vec::BitVec, - signature: Signature, - ); -} - -mod impl_pst_for_serai { - use super::*; - - use serai_client::SeraiValidatorSets; - - // Uses a macro because Rust can't resolve the lifetimes/generics around the check function - // check is expected to return true if the effect has already occurred - // The generated publish function will return true if *we* published the transaction - macro_rules! common_pst { - ($Meta: ty, $check: ident) => { - async fn publish( - serai: &Serai, - db: &impl Get, - set: ValidatorSet, - tx: serai_client::Transaction, - meta: $Meta, - ) -> bool { - loop { - match serai.publish(&tx).await { - Ok(_) => return true, - // This is assumed to be some ephemeral error due to the assumed fault-free - // creation - // TODO2: Differentiate connection errors from invariants - Err(e) => { - // The following block is irrelevant, and can/likely will fail, if we're publishing - // a TX for an old session - // If we're on a newer session, move on - if crate::RetiredTributaryDb::get(db, set).is_some() { - log::warn!("trying to publish a TX relevant to set {set:?} which isn't the latest"); - return false; - } - - if let Ok(serai) = serai.as_of_latest_finalized_block().await { - let serai = serai.validator_sets(); - - // Check if someone else published the TX in question - if $check(serai, set, meta).await { - return false; - } - } - - log::error!("couldn't connect to Serai node to publish TX: {e:?}"); - tokio::time::sleep(core::time::Duration::from_secs(5)).await; - } - } - } - } - }; - } - - #[async_trait::async_trait] - impl PublishSeraiTransaction for Serai { - async fn publish_set_keys( - &self, - db: &(impl Sync + Get), - set: ValidatorSet, - key_pair: KeyPair, - signature_participants: bitvec::vec::BitVec, - signature: Signature, - ) { - let tx = - SeraiValidatorSets::set_keys(set.network, key_pair, signature_participants, signature); - async fn check(serai: SeraiValidatorSets<'_>, set: ValidatorSet, (): ()) -> bool { - if matches!(serai.keys(set).await, Ok(Some(_))) { - log::info!("another coordinator set key pair for {:?}", set); - return true; - } - false - } - common_pst!((), check); - if publish(self, db, set, tx, ()).await { - log::info!("published set keys for {set:?}"); - } - } - } -} - -#[async_trait::async_trait] -pub trait PTTTrait { - async fn publish_tributary_tx(&self, tx: Transaction); -} -#[async_trait::async_trait] -impl, F: Sync + Fn(Transaction) -> FPtt> PTTTrait for F { - async fn publish_tributary_tx(&self, tx: Transaction) { - (self)(tx).await - } -} - -pub struct TributaryBlockHandler< - 'a, - D: Db, - T: DbTxn, - Pro: Processors, - PST: PublishSeraiTransaction, - PTT: PTTTrait, - RID: RIDTrait, - P: P2p, -> { - pub db: &'a D, - pub txn: &'a mut T, - pub our_key: &'a Zeroizing<::F>, - pub recognized_id: &'a RID, - pub processors: &'a Pro, - pub publish_serai_tx: &'a PST, - pub publish_tributary_tx: &'a PTT, - pub spec: &'a TributarySpec, - block: Block, - pub block_number: u32, - _p2p: PhantomData

, -} - -impl< - D: Db, - T: DbTxn, - Pro: Processors, - PST: PublishSeraiTransaction, - PTT: PTTTrait, - RID: RIDTrait, - P: P2p, - > TributaryBlockHandler<'_, D, T, Pro, PST, PTT, RID, P> -{ - pub fn fatal_slash(&mut self, slashing: [u8; 32], reason: &str) { - let genesis = self.spec.genesis(); - - log::warn!("fatally slashing {}. reason: {}", hex::encode(slashing), reason); - FatallySlashed::set_fatally_slashed(self.txn, genesis, slashing); - - // TODO: disconnect the node from network/ban from further participation in all Tributaries - } - - // TODO: Once Substrate confirms a key, we need to rotate our validator set OR form a second - // Tributary post-DKG - // https://github.com/serai-dex/serai/issues/426 - - async fn handle(mut self) { - log::info!("found block for Tributary {:?}", self.spec.set()); - - let transactions = self.block.transactions.clone(); - for tx in transactions { - match tx { - TributaryTransaction::Tendermint(TendermintTx::SlashEvidence(ev)) => { - // Since the evidence is on the chain, it should already have been validated - // We can just punish the signer - let data = match ev { - Evidence::ConflictingMessages(first, second) => (first, Some(second)), - Evidence::InvalidPrecommit(first) | Evidence::InvalidValidRound(first) => (first, None), - }; - let msgs = ( - decode_signed_message::>(&data.0).unwrap(), - if data.1.is_some() { - Some( - decode_signed_message::>(&data.1.unwrap()) - .unwrap(), - ) - } else { - None - }, - ); - - // Since anything with evidence is fundamentally faulty behavior, not just temporal - // errors, mark the node as fatally slashed - self.fatal_slash(msgs.0.msg.sender, &format!("invalid tendermint messages: {msgs:?}")); - } - TributaryTransaction::Application(tx) => { - self.handle_application_tx(tx).await; - } - } - } - - let genesis = self.spec.genesis(); - - // Calculate the shares still present, spinning if not enough are - { - // Start with the original n value - let mut present_shares = self.spec.n(); - // Remove everyone fatally slashed - let current_fatal_slashes = FatalSlashes::get_as_keys(self.txn, genesis); - for removed in ¤t_fatal_slashes { - let original_i_for_removed = - self.spec.i(*removed).expect("removed party was never present"); - let removed_shares = - u16::from(original_i_for_removed.end) - u16::from(original_i_for_removed.start); - present_shares -= removed_shares; - } - - // Spin if the present shares don't satisfy the required threshold - if present_shares < self.spec.t() { - loop { - log::error!( - "fatally slashed so many participants for {:?} we no longer meet the threshold", - self.spec.set() - ); - tokio::time::sleep(core::time::Duration::from_secs(60)).await; - } - } - } - - for topic in ReattemptDb::take(self.txn, genesis, self.block_number) { - let attempt = AttemptDb::start_next_attempt(self.txn, genesis, topic); - log::info!("potentially re-attempting {topic:?} with attempt {attempt}"); - - // Slash people who failed to participate as expected in the prior attempt - { - let prior_attempt = attempt - 1; - // TODO: If 67% sent preprocesses, this should be them. Else, this should be vec![] - let expected_participants: Vec<::G> = vec![]; - - let mut did_not_participate = vec![]; - for expected_participant in expected_participants { - if DataDb::get( - self.txn, - genesis, - &DataSpecification { - topic, - // Since we got the preprocesses, we were supposed to get the shares - label: Label::Share, - attempt: prior_attempt, - }, - &expected_participant.to_bytes(), - ) - .is_none() - { - did_not_participate.push(expected_participant); - } - } - - // If a supermajority didn't participate as expected, the protocol was likely aborted due - // to detection of a completion or some larger networking error - // Accordingly, clear did_not_participate - // TODO - - // TODO: Increment the slash points of people who didn't preprocess in some expected window - // of time - - // Slash everyone who didn't participate as expected - // This may be overzealous as if a minority detects a completion, they'll abort yet the - // supermajority will cause the above allowance to not trigger, causing an honest minority - // to be slashed - // At the end of the protocol, the accumulated slashes are reduced by the amount obtained - // by the worst-performing member of the supermajority, and this is expected to - // sufficiently compensate for slashes which occur under normal operation - // TODO - } - - /* - All of these have the same common flow: - - 1) Check if this re-attempt is actually needed - 2) If so, dispatch whatever events as needed - - This is because we *always* re-attempt any protocol which had participation. That doesn't - mean we *should* re-attempt this protocol. - - The alternatives were: - 1) Note on-chain we completed a protocol, halting re-attempts upon 34%. - 2) Vote on-chain to re-attempt a protocol. - - This schema doesn't have any additional messages upon the success case (whereas - alternative #1 does) and doesn't have overhead (as alternative #2 does, sending votes and - then preprocesses. This only sends preprocesses). - */ - match topic { - Topic::DkgConfirmation => { - if SeraiDkgCompleted::get(self.txn, self.spec.set()).is_none() { - log::info!("re-attempting DKG confirmation with attempt {attempt}"); - - // Since it wasn't completed, publish our nonces for the next attempt - let confirmation_nonces = - crate::tributary::dkg_confirmation_nonces(self.our_key, self.spec, self.txn, attempt); - let mut tx = Transaction::DkgConfirmationNonces { - attempt, - confirmation_nonces, - signed: Transaction::empty_signed(), - }; - tx.sign(&mut OsRng, genesis, self.our_key); - self.publish_tributary_tx.publish_tributary_tx(tx).await; - } - } - Topic::SubstrateSign(inner_id) => { - let id = processor_messages::coordinator::SubstrateSignId { - session: self.spec.set().session, - id: inner_id, - attempt, - }; - match inner_id { - SubstrateSignableId::CosigningSubstrateBlock(block) => { - let block_number = SeraiBlockNumber::get(self.txn, block) - .expect("couldn't get the block number for prior attempted cosign"); - - // Check if the cosigner has a signature from our set for this block/a newer one - let latest_cosign = - crate::cosign_evaluator::LatestCosign::get(self.txn, self.spec.set().network) - .map_or(0, |cosign| cosign.block_number); - if latest_cosign < block_number { - log::info!("re-attempting cosigning {block_number:?} with attempt {attempt}"); - - // Instruct the processor to start the next attempt - self - .processors - .send( - self.spec.set().network, - processor_messages::coordinator::CoordinatorMessage::CosignSubstrateBlock { - id, - block_number, - }, - ) - .await; - } - } - SubstrateSignableId::Batch(batch) => { - // If the Batch hasn't appeared on-chain... - if BatchInstructionsHashDb::get(self.txn, self.spec.set().network, batch).is_none() { - log::info!("re-attempting signing batch {batch:?} with attempt {attempt}"); - - // Instruct the processor to start the next attempt - // The processor won't continue if it's already signed a Batch - // Prior checking if the Batch is on-chain just may reduce the non-participating - // 33% from publishing their re-attempt messages - self - .processors - .send( - self.spec.set().network, - processor_messages::coordinator::CoordinatorMessage::BatchReattempt { id }, - ) - .await; - } - } - SubstrateSignableId::SlashReport => { - // If this Tributary hasn't been retired... - // (published SlashReport/took too long to do so) - if crate::RetiredTributaryDb::get(self.txn, self.spec.set()).is_none() { - log::info!( - "re-attempting signing slash report for {:?} with attempt {attempt}", - self.spec.set() - ); - - let report = SlashReport::get(self.txn, self.spec.set()) - .expect("re-attempting signing a SlashReport we don't have?"); - self - .processors - .send( - self.spec.set().network, - processor_messages::coordinator::CoordinatorMessage::SignSlashReport { - id, - report, - }, - ) - .await; - } - } - } - } - Topic::Sign(id) => { - // Instruct the processor to start the next attempt - // If it has already noted a completion, it won't send a preprocess and will simply drop - // the re-attempt message - self - .processors - .send( - self.spec.set().network, - processor_messages::sign::CoordinatorMessage::Reattempt { - id: processor_messages::sign::SignId { - session: self.spec.set().session, - id, - attempt, - }, - }, - ) - .await; - } - } - } - - if Some(u64::from(self.block_number)) == SlashReportCutOff::get(self.txn, genesis) { - // Grab every slash report - let mut all_reports = vec![]; - for (i, (validator, _)) in self.spec.validators().into_iter().enumerate() { - let Some(mut report) = SlashReports::get(self.txn, genesis, validator.to_bytes()) else { - continue; - }; - // Assign them 0 points for themselves - report.insert(i, 0); - let signer_i = self.spec.i(validator).unwrap(); - let signer_len = u16::from(signer_i.end) - u16::from(signer_i.start); - // Push `n` copies, one for each of their shares - for _ in 0 .. signer_len { - all_reports.push(report.clone()); - } - } - - // For each participant, grab their median - let mut medians = vec![]; - for p in 0 .. self.spec.validators().len() { - let mut median_calc = vec![]; - for report in &all_reports { - median_calc.push(report[p]); - } - median_calc.sort_unstable(); - medians.push(median_calc[median_calc.len() / 2]); - } - - // Grab the points of the last party within the best-performing threshold - // This is done by first expanding the point values by the amount of shares - let mut sorted_medians = vec![]; - for (i, (_, shares)) in self.spec.validators().into_iter().enumerate() { - for _ in 0 .. shares { - sorted_medians.push(medians[i]); - } - } - // Then performing the sort - sorted_medians.sort_unstable(); - let worst_points_by_party_within_threshold = sorted_medians[usize::from(self.spec.t()) - 1]; - - // Reduce everyone's points by this value - for median in &mut medians { - *median = median.saturating_sub(worst_points_by_party_within_threshold); - } - - // The threshold now has the proper incentive to report this as they no longer suffer - // negative effects - // - // Additionally, if all validators had degraded performance, they don't all get penalized for - // what's likely outside their control (as it occurred universally) - - // Mark everyone fatally slashed with u32::MAX - for (i, (validator, _)) in self.spec.validators().into_iter().enumerate() { - if FatallySlashed::get(self.txn, genesis, validator.to_bytes()).is_some() { - medians[i] = u32::MAX; - } - } - - let mut report = vec![]; - for (i, (validator, _)) in self.spec.validators().into_iter().enumerate() { - if medians[i] != 0 { - report.push((validator.to_bytes(), medians[i])); - } - } - - // This does lock in the report, meaning further slash point accumulations won't be reported - // They still have value to be locally tracked due to local decisions made based off - // accumulated slash reports - SlashReport::set(self.txn, self.spec.set(), &report); - - // Start a signing protocol for this - self - .processors - .send( - self.spec.set().network, - processor_messages::coordinator::CoordinatorMessage::SignSlashReport { - id: SubstrateSignId { - session: self.spec.set().session, - id: SubstrateSignableId::SlashReport, - attempt: 0, - }, - report, - }, - ) - .await; - } - } -} - -#[allow(clippy::too_many_arguments)] -pub(crate) async fn handle_new_blocks< - D: Db, - Pro: Processors, - PST: PublishSeraiTransaction, - PTT: PTTTrait, - RID: RIDTrait, - P: P2p, ->( - db: &mut D, - key: &Zeroizing<::F>, - recognized_id: &RID, - processors: &Pro, - publish_serai_tx: &PST, - publish_tributary_tx: &PTT, - spec: &TributarySpec, - tributary: &TributaryReader, -) { - let genesis = tributary.genesis(); - let mut last_block = LastHandledBlock::get(db, genesis).unwrap_or(genesis); - let mut block_number = TributaryBlockNumber::get(db, last_block).unwrap_or(0); - while let Some(next) = tributary.block_after(&last_block) { - let block = tributary.block(&next).unwrap(); - block_number += 1; - - // Make sure we have all of the provided transactions for this block - for tx in &block.transactions { - // Provided TXs will appear first in the Block, so we can break after we hit a non-Provided - let TransactionKind::Provided(order) = tx.kind() else { - break; - }; - - // make sure we have all the provided txs in this block locally - if !tributary.locally_provided_txs_in_block(&block.hash(), order) { - return; - } - } - - let mut db_clone = db.clone(); - let mut txn = db_clone.txn(); - TributaryBlockNumber::set(&mut txn, next, &block_number); - (TributaryBlockHandler { - db, - txn: &mut txn, - spec, - our_key: key, - recognized_id, - processors, - publish_serai_tx, - publish_tributary_tx, - block, - block_number, - _p2p: PhantomData::

, - }) - .handle() - .await; - last_block = next; - LastHandledBlock::set(&mut txn, genesis, &next); - txn.commit(); - } -} - -pub(crate) async fn scan_tributaries_task< - D: Db, - Pro: Processors, - P: P2p, - RID: 'static + Send + Sync + Clone + RIDTrait, ->( - raw_db: D, - key: Zeroizing<::F>, - recognized_id: RID, - processors: Pro, - serai: Arc, - mut tributary_event: broadcast::Receiver>, -) { - log::info!("scanning tributaries"); - - loop { - match tributary_event.recv().await { - Ok(crate::TributaryEvent::NewTributary(crate::ActiveTributary { spec, tributary })) => { - // For each Tributary, spawn a dedicated scanner task - tokio::spawn({ - let raw_db = raw_db.clone(); - let key = key.clone(); - let recognized_id = recognized_id.clone(); - let processors = processors.clone(); - let serai = serai.clone(); - async move { - let spec = &spec; - let reader = tributary.reader(); - let mut tributary_db = raw_db.clone(); - loop { - // Check if the set was retired, and if so, don't further operate - if crate::db::RetiredTributaryDb::get(&raw_db, spec.set()).is_some() { - break; - } - - // Obtain the next block notification now to prevent obtaining it immediately after - // the next block occurs - let next_block_notification = tributary.next_block_notification().await; - - handle_new_blocks::<_, _, _, _, _, P>( - &mut tributary_db, - &key, - &recognized_id, - &processors, - &*serai, - &|tx: Transaction| { - let tributary = tributary.clone(); - async move { - match tributary.add_transaction(tx.clone()).await { - Ok(_) => {} - // Can happen as this occurs on a distinct DB TXN - Err(TransactionError::InvalidNonce) => { - log::warn!( - "publishing TX {tx:?} returned InvalidNonce. was it already added?" - ) - } - Err(e) => panic!("created an invalid transaction: {e:?}"), - } - } - }, - spec, - &reader, - ) - .await; - - // Run either when the notification fires, or every interval of block_time - let _ = tokio::time::timeout( - Duration::from_secs(tributary::Tributary::::block_time().into()), - next_block_notification, - ) - .await; - } - } - }); - } - // The above loop simply checks the DB every few seconds, voiding the need for this event - Ok(crate::TributaryEvent::TributaryRetired(_)) => {} - Err(broadcast::error::RecvError::Lagged(_)) => { - panic!("scan_tributaries lagged to handle tributary_event") - } - Err(broadcast::error::RecvError::Closed) => panic!("tributary_event sender closed"), - } - } -} diff --git a/coordinator/src/tributary/signing_protocol.rs b/coordinator/src/tributary/signing_protocol.rs deleted file mode 100644 index af334149..00000000 --- a/coordinator/src/tributary/signing_protocol.rs +++ /dev/null @@ -1,361 +0,0 @@ -/* - A MuSig-based signing protocol executed with the validators' keys. - - This is used for confirming the results of a DKG on-chain, an operation requiring all validators - which aren't specified as removed while still satisfying a supermajority. - - Since we're using the validator's keys, as needed for their being the root of trust, the - coordinator must perform the signing. This is distinct from all other group-signing operations, - as they're all done by the processor. - - The MuSig-aggregation achieves on-chain efficiency and enables a more secure design pattern. - While we could individually tack votes, that'd require logic to prevent voting multiple times and - tracking the accumulated votes. MuSig-aggregation simply requires checking the list is sorted and - the list's weight exceeds the threshold. - - Instead of maintaining state in memory, a combination of the DB and re-execution are used. This - is deemed acceptable re: performance as: - - 1) This is only done prior to a DKG being confirmed on Substrate and is assumed infrequent. - 2) This is an O(n) algorithm. - 3) The size of the validator set is bounded by MAX_KEY_SHARES_PER_SET. - - Accordingly, this should be tolerable. - - As for safety, it is explicitly unsafe to reuse nonces across signing sessions. This raises - concerns regarding our re-execution which is dependent on fixed nonces. Safety is derived from - the nonces being context-bound under a BFT protocol. The flow is as follows: - - 1) Decide the nonce. - 2) Publish the nonces' commitments, receiving everyone elses *and potentially the message to be - signed*. - 3) Sign and publish the signature share. - - In order for nonce re-use to occur, the received nonce commitments (or the message to be signed) - would have to be distinct and sign would have to be called again. - - Before we act on any received messages, they're ordered and finalized by a BFT algorithm. The - only way to operate on distinct received messages would be if: - - 1) A logical flaw exists, letting new messages over write prior messages - 2) A reorganization occurred from chain A to chain B, and with it, different messages - - Reorganizations are not supported, as BFT is assumed by the presence of a BFT algorithm. While - a significant amount of processes may be byzantine, leading to BFT being broken, that still will - not trigger a reorganization. The only way to move to a distinct chain, with distinct messages, - would be by rebuilding the local process (this time following chain B). Upon any complete - rebuild, we'd re-decide nonces, achieving safety. This does set a bound preventing partial - rebuilds which is accepted. - - Additionally, to ensure a rebuilt service isn't flagged as malicious, we have to check the - commitments generated from the decided nonces are in fact its commitments on-chain (TODO). - - TODO: We also need to review how we're handling Processor preprocesses and likely implement the - same on-chain-preprocess-matches-presumed-preprocess check before publishing shares. -*/ - -use core::ops::Deref; -use std::collections::{HashSet, HashMap}; - -use zeroize::{Zeroize, Zeroizing}; - -use rand_core::OsRng; - -use blake2::{Digest, Blake2s256}; - -use ciphersuite::{group::ff::PrimeField, Ciphersuite, Ristretto}; -use frost::{ - FrostError, - dkg::{Participant, musig::musig}, - ThresholdKeys, - sign::*, -}; -use frost_schnorrkel::Schnorrkel; - -use scale::Encode; - -#[rustfmt::skip] -use serai_client::validator_sets::primitives::{ValidatorSet, KeyPair, musig_context, set_keys_message}; - -use serai_db::*; - -use crate::tributary::TributarySpec; - -create_db!( - SigningProtocolDb { - CachedPreprocesses: (context: &impl Encode) -> [u8; 32] - DataSignedWith: (context: &impl Encode) -> (Vec, HashMap>), - } -); - -struct SigningProtocol<'a, T: DbTxn, C: Encode> { - pub(crate) key: &'a Zeroizing<::F>, - pub(crate) spec: &'a TributarySpec, - pub(crate) txn: &'a mut T, - pub(crate) context: C, -} - -impl SigningProtocol<'_, T, C> { - fn preprocess_internal( - &mut self, - participants: &[::G], - ) -> (AlgorithmSignMachine, [u8; 64]) { - // Encrypt the cached preprocess as recovery of it will enable recovering the private key - // While the DB isn't expected to be arbitrarily readable, it isn't a proper secret store and - // shouldn't be trusted as one - let mut encryption_key = { - let mut encryption_key_preimage = - Zeroizing::new(b"Cached Preprocess Encryption Key".to_vec()); - encryption_key_preimage.extend(self.context.encode()); - let repr = Zeroizing::new(self.key.to_repr()); - encryption_key_preimage.extend(repr.deref()); - Blake2s256::digest(&encryption_key_preimage) - }; - let encryption_key_slice: &mut [u8] = encryption_key.as_mut(); - - // Create the MuSig keys - let keys: ThresholdKeys = - musig(&musig_context(self.spec.set()), self.key, participants) - .expect("signing for a set we aren't in/validator present multiple times") - .into(); - - // Define the algorithm - let algorithm = Schnorrkel::new(b"substrate"); - - // Check if we've prior preprocessed - if CachedPreprocesses::get(self.txn, &self.context).is_none() { - // If we haven't, we create a machine solely to obtain the preprocess with - let (machine, _) = - AlgorithmMachine::new(algorithm.clone(), keys.clone()).preprocess(&mut OsRng); - - // Cache and save the preprocess to disk - let mut cache = machine.cache(); - assert_eq!(cache.0.len(), 32); - #[allow(clippy::needless_range_loop)] - for b in 0 .. 32 { - cache.0[b] ^= encryption_key_slice[b]; - } - - CachedPreprocesses::set(self.txn, &self.context, &cache.0); - } - - // We're now guaranteed to have the preprocess, hence why this `unwrap` is safe - let cached = CachedPreprocesses::get(self.txn, &self.context).unwrap(); - let mut cached = Zeroizing::new(cached); - #[allow(clippy::needless_range_loop)] - for b in 0 .. 32 { - cached[b] ^= encryption_key_slice[b]; - } - encryption_key_slice.zeroize(); - // Create the machine from the cached preprocess - let (machine, preprocess) = - AlgorithmSignMachine::from_cache(algorithm, keys, CachedPreprocess(cached)); - - (machine, preprocess.serialize().try_into().unwrap()) - } - - fn share_internal( - &mut self, - participants: &[::G], - mut serialized_preprocesses: HashMap>, - msg: &[u8], - ) -> Result<(AlgorithmSignatureMachine, [u8; 32]), Participant> { - // We can't clear the preprocess as we sitll need it to accumulate all of the shares - // We do save the message we signed so any future calls with distinct messages panic - // This assumes the txn deciding this data is committed before the share is broaadcast - if let Some((existing_msg, existing_preprocesses)) = - DataSignedWith::get(self.txn, &self.context) - { - assert_eq!(msg, &existing_msg, "obtaining a signature share for a distinct message"); - assert_eq!( - &serialized_preprocesses, &existing_preprocesses, - "obtaining a signature share with a distinct set of preprocesses" - ); - } else { - DataSignedWith::set( - self.txn, - &self.context, - &(msg.to_vec(), serialized_preprocesses.clone()), - ); - } - - // Get the preprocessed machine - let (machine, _) = self.preprocess_internal(participants); - - // Deserialize all the preprocesses - let mut participants = serialized_preprocesses.keys().copied().collect::>(); - participants.sort(); - let mut preprocesses = HashMap::new(); - for participant in participants { - preprocesses.insert( - participant, - machine - .read_preprocess(&mut serialized_preprocesses.remove(&participant).unwrap().as_slice()) - .map_err(|_| participant)?, - ); - } - - // Sign the share - let (machine, share) = machine.sign(preprocesses, msg).map_err(|e| match e { - FrostError::InternalError(e) => unreachable!("FrostError::InternalError {e}"), - FrostError::InvalidParticipant(_, _) | - FrostError::InvalidSigningSet(_) | - FrostError::InvalidParticipantQuantity(_, _) | - FrostError::DuplicatedParticipant(_) | - FrostError::MissingParticipant(_) => panic!("unexpected error during sign: {e:?}"), - FrostError::InvalidPreprocess(p) | FrostError::InvalidShare(p) => p, - })?; - - Ok((machine, share.serialize().try_into().unwrap())) - } - - fn complete_internal( - machine: AlgorithmSignatureMachine, - shares: HashMap>, - ) -> Result<[u8; 64], Participant> { - let shares = shares - .into_iter() - .map(|(p, share)| { - machine.read_share(&mut share.as_slice()).map(|share| (p, share)).map_err(|_| p) - }) - .collect::, _>>()?; - let signature = machine.complete(shares).map_err(|e| match e { - FrostError::InternalError(e) => unreachable!("FrostError::InternalError {e}"), - FrostError::InvalidParticipant(_, _) | - FrostError::InvalidSigningSet(_) | - FrostError::InvalidParticipantQuantity(_, _) | - FrostError::DuplicatedParticipant(_) | - FrostError::MissingParticipant(_) => unreachable!("{e:?}"), - FrostError::InvalidPreprocess(p) | FrostError::InvalidShare(p) => p, - })?; - Ok(signature.to_bytes()) - } -} - -// Get the keys of the participants, noted by their threshold is, and return a new map indexed by -// their MuSig is. -fn threshold_i_map_to_keys_and_musig_i_map( - spec: &TributarySpec, - our_key: &Zeroizing<::F>, - mut map: HashMap>, -) -> (Vec<::G>, HashMap>) { - // Insert our own index so calculations aren't offset - let our_threshold_i = spec - .i(::generator() * our_key.deref()) - .expect("not in a set we're signing for") - .start; - // Asserts we weren't unexpectedly already present - assert!(map.insert(our_threshold_i, vec![]).is_none()); - - let spec_validators = spec.validators(); - let key_from_threshold_i = |threshold_i| { - for (key, _) in &spec_validators { - if threshold_i == spec.i(*key).expect("validator wasn't in a set they're in").start { - return *key; - } - } - panic!("requested info for threshold i which doesn't exist") - }; - - let mut sorted = vec![]; - let mut threshold_is = map.keys().copied().collect::>(); - threshold_is.sort(); - for threshold_i in threshold_is { - sorted.push(( - threshold_i, - key_from_threshold_i(threshold_i), - map.remove(&threshold_i).unwrap(), - )); - } - - // Now that signers are sorted, with their shares, create a map with the is needed for MuSig - let mut participants = vec![]; - let mut map = HashMap::new(); - let mut our_musig_i = None; - for (raw_i, (threshold_i, key, share)) in sorted.into_iter().enumerate() { - let musig_i = Participant::new(u16::try_from(raw_i).unwrap() + 1).unwrap(); - if threshold_i == our_threshold_i { - our_musig_i = Some(musig_i); - } - participants.push(key); - map.insert(musig_i, share); - } - - map.remove(&our_musig_i.unwrap()).unwrap(); - - (participants, map) -} - -type DkgConfirmerSigningProtocol<'a, T> = - SigningProtocol<'a, T, (&'static [u8; 12], ValidatorSet, u32)>; - -pub(crate) struct DkgConfirmer<'a, T: DbTxn> { - key: &'a Zeroizing<::F>, - spec: &'a TributarySpec, - txn: &'a mut T, - attempt: u32, -} - -impl DkgConfirmer<'_, T> { - pub(crate) fn new<'a>( - key: &'a Zeroizing<::F>, - spec: &'a TributarySpec, - txn: &'a mut T, - attempt: u32, - ) -> DkgConfirmer<'a, T> { - DkgConfirmer { key, spec, txn, attempt } - } - - fn signing_protocol(&mut self) -> DkgConfirmerSigningProtocol<'_, T> { - let context = (b"DkgConfirmer", self.spec.set(), self.attempt); - SigningProtocol { key: self.key, spec: self.spec, txn: self.txn, context } - } - - fn preprocess_internal(&mut self) -> (AlgorithmSignMachine, [u8; 64]) { - // This preprocesses with just us as we only decide the participants after obtaining - // preprocesses - let participants = vec![::generator() * self.key.deref()]; - self.signing_protocol().preprocess_internal(&participants) - } - // Get the preprocess for this confirmation. - pub(crate) fn preprocess(&mut self) -> [u8; 64] { - self.preprocess_internal().1 - } - - fn share_internal( - &mut self, - preprocesses: HashMap>, - key_pair: &KeyPair, - ) -> Result<(AlgorithmSignatureMachine, [u8; 32]), Participant> { - let (participants, preprocesses) = - threshold_i_map_to_keys_and_musig_i_map(self.spec, self.key, preprocesses); - let msg = set_keys_message(&self.spec.set(), key_pair); - self.signing_protocol().share_internal(&participants, preprocesses, &msg) - } - // Get the share for this confirmation, if the preprocesses are valid. - pub(crate) fn share( - &mut self, - preprocesses: HashMap>, - key_pair: &KeyPair, - ) -> Result<[u8; 32], Participant> { - self.share_internal(preprocesses, key_pair).map(|(_, share)| share) - } - - pub(crate) fn complete( - &mut self, - preprocesses: HashMap>, - key_pair: &KeyPair, - shares: HashMap>, - ) -> Result<[u8; 64], Participant> { - assert_eq!(preprocesses.keys().collect::>(), shares.keys().collect::>()); - - let shares = threshold_i_map_to_keys_and_musig_i_map(self.spec, self.key, shares).1; - - let machine = self - .share_internal(preprocesses, key_pair) - .expect("trying to complete a machine which failed to preprocess") - .0; - - DkgConfirmerSigningProtocol::<'_, T>::complete_internal(machine, shares) - } -} diff --git a/coordinator/src/tributary/spec.rs b/coordinator/src/tributary/spec.rs deleted file mode 100644 index efc792e6..00000000 --- a/coordinator/src/tributary/spec.rs +++ /dev/null @@ -1,124 +0,0 @@ -use core::{ops::Range, fmt::Debug}; -use std::{io, collections::HashMap}; - -use transcript::{Transcript, RecommendedTranscript}; - -use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto}; -use frost::Participant; - -use scale::Encode; -use borsh::{BorshSerialize, BorshDeserialize}; - -use serai_client::validator_sets::primitives::ValidatorSet; - -fn borsh_serialize_validators( - validators: &Vec<(::G, u16)>, - writer: &mut W, -) -> Result<(), io::Error> { - let len = u16::try_from(validators.len()).unwrap(); - BorshSerialize::serialize(&len, writer)?; - for validator in validators { - BorshSerialize::serialize(&validator.0.to_bytes(), writer)?; - BorshSerialize::serialize(&validator.1, writer)?; - } - Ok(()) -} - -fn borsh_deserialize_validators( - reader: &mut R, -) -> Result::G, u16)>, io::Error> { - let len: u16 = BorshDeserialize::deserialize_reader(reader)?; - let mut res = vec![]; - for _ in 0 .. len { - let compressed: [u8; 32] = BorshDeserialize::deserialize_reader(reader)?; - let point = Option::from(::G::from_bytes(&compressed)) - .ok_or_else(|| io::Error::other("invalid point for validator"))?; - let weight: u16 = BorshDeserialize::deserialize_reader(reader)?; - res.push((point, weight)); - } - Ok(res) -} - -#[derive(Clone, PartialEq, Eq, Debug, BorshSerialize, BorshDeserialize)] -pub struct TributarySpec { - serai_block: [u8; 32], - start_time: u64, - set: ValidatorSet, - #[borsh( - serialize_with = "borsh_serialize_validators", - deserialize_with = "borsh_deserialize_validators" - )] - validators: Vec<(::G, u16)>, - evrf_public_keys: Vec<([u8; 32], Vec)>, -} - -impl TributarySpec { - pub fn new( - serai_block: [u8; 32], - start_time: u64, - set: ValidatorSet, - validators: Vec<(::G, u16)>, - evrf_public_keys: Vec<([u8; 32], Vec)>, - ) -> TributarySpec { - Self { serai_block, start_time, set, validators, evrf_public_keys } - } - - pub fn set(&self) -> ValidatorSet { - self.set - } - - pub fn genesis(&self) -> [u8; 32] { - // Calculate the genesis for this Tributary - let mut genesis = RecommendedTranscript::new(b"Serai Tributary Genesis"); - // This locks it to a specific Serai chain - genesis.append_message(b"serai_block", self.serai_block); - genesis.append_message(b"session", self.set.session.0.to_le_bytes()); - genesis.append_message(b"network", self.set.network.encode()); - let genesis = genesis.challenge(b"genesis"); - let genesis_ref: &[u8] = genesis.as_ref(); - genesis_ref[.. 32].try_into().unwrap() - } - - pub fn start_time(&self) -> u64 { - self.start_time - } - - pub fn n(&self) -> u16 { - self.validators.iter().map(|(_, weight)| *weight).sum() - } - - pub fn t(&self) -> u16 { - ((2 * self.n()) / 3) + 1 - } - - pub fn i(&self, key: ::G) -> Option> { - let mut all_is = HashMap::new(); - let mut i = 1; - for (validator, weight) in &self.validators { - all_is.insert( - *validator, - Range { start: Participant::new(i).unwrap(), end: Participant::new(i + weight).unwrap() }, - ); - i += weight; - } - - Some(all_is.get(&key)?.clone()) - } - - pub fn reverse_lookup_i(&self, i: Participant) -> Option<::G> { - for (validator, _) in &self.validators { - if self.i(*validator).map_or(false, |range| range.contains(&i)) { - return Some(*validator); - } - } - None - } - - pub fn validators(&self) -> Vec<(::G, u64)> { - self.validators.iter().map(|(validator, weight)| (*validator, u64::from(*weight))).collect() - } - - pub fn evrf_public_keys(&self) -> Vec<([u8; 32], Vec)> { - self.evrf_public_keys.clone() - } -} diff --git a/coordinator/src/tributary/transaction.rs b/coordinator/src/tributary/transaction.rs index fd8126ce..c5d00e30 100644 --- a/coordinator/src/tributary/transaction.rs +++ b/coordinator/src/tributary/transaction.rs @@ -11,10 +11,10 @@ use ciphersuite::{ }; use schnorr::SchnorrSignature; -use scale::{Encode, Decode}; +use scale::Encode; use borsh::{BorshSerialize, BorshDeserialize}; -use serai_client::primitives::PublicKey; +use serai_client::primitives::SeraiAddress; use processor_messages::sign::VariantSignId; @@ -27,33 +27,22 @@ use tributary::{ /// The label for data from a signing protocol. #[derive(Clone, Copy, PartialEq, Eq, Debug, Encode, BorshSerialize, BorshDeserialize)] -pub enum Label { +pub enum SigningProtocolRound { /// A preprocess. Preprocess, /// A signature share. Share, } -impl Label { +impl SigningProtocolRound { fn nonce(&self) -> u32 { match self { - Label::Preprocess => 0, - Label::Share => 1, + SigningProtocolRound::Preprocess => 0, + SigningProtocolRound::Share => 1, } } } -fn borsh_serialize_public( - public: &PublicKey, - writer: &mut W, -) -> Result<(), io::Error> { - // This doesn't use `encode_to` as `encode_to` panics if the writer returns an error - writer.write_all(&public.encode()) -} -fn borsh_deserialize_public(reader: &mut R) -> Result { - Decode::decode(&mut scale::IoReader(reader)).map_err(io::Error::other) -} - /// `tributary::Signed` without the nonce. /// /// All of our nonces are deterministic to the type of transaction and fields within. @@ -90,11 +79,7 @@ pub enum Transaction { /// A vote to remove a participant for invalid behavior RemoveParticipant { /// The participant to remove - #[borsh( - serialize_with = "borsh_serialize_public", - deserialize_with = "borsh_deserialize_public" - )] - participant: PublicKey, + participant: SeraiAddress, /// The transaction's signer and signature signed: Signed, }, @@ -119,7 +104,7 @@ pub enum Transaction { /// The attempt number of this signing protocol attempt: u32, // The signature share - confirmation_share: [u8; 32], + share: [u8; 32], /// The transaction's signer and signature signed: Signed, }, @@ -128,11 +113,46 @@ pub enum Transaction { /// /// When the time comes to start a new co-signing protocol, the most recent Substrate block will /// be the one selected to be cosigned. - CosignSubstrateBlock { - /// THe hash of the Substrate block to sign - hash: [u8; 32], + Cosign { + /// The hash of the Substrate block to sign + substrate_block_hash: [u8; 32], }, + /// The cosign for a Substrate block + /// + /// After producing this cosign, we need to start work on the latest intended-to-be cosigned + /// block. That requires agreement on when this cosign was produced, which we solve by embedding + /// this cosign on chain. + /// + /// We ideally don't have this transaction at all. The coordinator, without access to any of the + /// key shares, could observe the FROST signing session and determine a successful completion. + /// Unfortunately, that functionality is not present in modular-frost, so we do need to support + /// *some* asynchronous flow (where the processor or P2P network informs us of the successful + /// completion). + /// + /// If we use a `Provided` transaction, that requires everyone observe this cosign. + /// + /// If we use an `Unsigned` transaction, we can't verify the cosign signature inside + /// `Transaction::verify` unless we embedded the full `SignedCosign` on-chain. The issue is since + /// a Tributary is stateless with regards to the on-chain logic, including `Transaction::verify`, + /// we can't verify the signature against the group's public key unless we also include that (but + /// then we open a DoS where arbitrary group keys are specified to cause inclusion of arbitrary + /// blobs on chain). + /// + /// If we use a `Signed` transaction, we mitigate the DoS risk by having someone to fatally + /// slash. We have horrible performance though as for 100 validators, all 100 will publish this + /// transaction. + /// + /// We could use a signed `Unsigned` transaction, where it includes a signer and signature but + /// isn't technically a Signed transaction. This lets us de-duplicate the transaction premised on + /// its contents. + /// + /// The optimal choice is likely to use a `Provided` transaction. We don't actually need to + /// observe the produced cosign (which is ephemeral). As long as it's agreed the cosign in + /// question no longer needs to produced, which would mean the cosigning protocol at-large + /// cosigning the block in question, it'd be safe to provide this and move on to the next cosign. + Cosigned { substrate_block_hash: [u8; 32] }, + /// Acknowledge a Substrate block /// /// This is provided after the block has been cosigned. @@ -156,21 +176,14 @@ pub enum Transaction { hash: [u8; 32], }, - /// The local view of slashes observed by the transaction's sender - SlashReport { - /// The slash points accrued by each validator - slash_points: Vec, - /// The transaction's signer and signature - signed: Signed, - }, - + /// Data from a signing protocol. Sign { /// The ID of the object being signed id: VariantSignId, /// The attempt number of this signing protocol attempt: u32, /// The label for this data within the signing protocol - label: Label, + label: SigningProtocolRound, /// The data itself /// /// There will be `n` blobs of data where `n` is the amount of key shares the validator sending @@ -179,6 +192,14 @@ pub enum Transaction { /// The transaction's signer and signature signed: Signed, }, + + /// The local view of slashes observed by the transaction's sender + SlashReport { + /// The slash points accrued by each validator + slash_points: Vec, + /// The transaction's signer and signature + signed: Signed, + }, } impl ReadWrite for Transaction { @@ -208,7 +229,8 @@ impl TransactionTrait for Transaction { TransactionKind::Signed((b"DkgConfirmation", attempt).encode(), signed.nonce(1)) } - Transaction::CosignSubstrateBlock { .. } => TransactionKind::Provided("CosignSubstrateBlock"), + Transaction::Cosign { .. } => TransactionKind::Provided("CosignSubstrateBlock"), + Transaction::Cosigned { .. } => TransactionKind::Provided("Cosigned"), Transaction::SubstrateBlock { .. } => TransactionKind::Provided("SubstrateBlock"), Transaction::Batch { .. } => TransactionKind::Provided("Batch"), @@ -240,6 +262,8 @@ impl TransactionTrait for Transaction { impl Transaction { // Sign a transaction + // + // Panics if signing a transaction type which isn't `TransactionKind::Signed` pub fn sign( &mut self, rng: &mut R, @@ -254,7 +278,8 @@ impl Transaction { Transaction::DkgConfirmationPreprocess { ref mut signed, .. } => signed, Transaction::DkgConfirmationShare { ref mut signed, .. } => signed, - Transaction::CosignSubstrateBlock { .. } => panic!("signing CosignSubstrateBlock"), + Transaction::Cosign { .. } => panic!("signing CosignSubstrateBlock"), + Transaction::Cosigned { .. } => panic!("signing Cosigned"), Transaction::SubstrateBlock { .. } => panic!("signing SubstrateBlock"), Transaction::Batch { .. } => panic!("signing Batch"), diff --git a/crypto/dkg/src/evrf/mod.rs b/crypto/dkg/src/evrf/mod.rs index 3d043138..343c6141 100644 --- a/crypto/dkg/src/evrf/mod.rs +++ b/crypto/dkg/src/evrf/mod.rs @@ -106,7 +106,7 @@ pub struct Participation { impl Participation { pub fn read(reader: &mut R, n: u16) -> io::Result { - // TODO: Replace `len` with some calculcation deterministic to the params + // TODO: Replace `len` with some calculation deterministic to the params let mut len = [0; 4]; reader.read_exact(&mut len)?; let len = usize::try_from(u32::from_le_bytes(len)).expect("<32-bit platform?"); diff --git a/processor/messages/src/lib.rs b/processor/messages/src/lib.rs index 438beb5b..7c964ebc 100644 --- a/processor/messages/src/lib.rs +++ b/processor/messages/src/lib.rs @@ -83,7 +83,7 @@ pub mod sign { #[derive(Clone, Copy, PartialEq, Eq, Hash, Encode, Decode, BorshSerialize, BorshDeserialize)] pub enum VariantSignId { Cosign(u64), - Batch(u32), + Batch([u8; 32]), SlashReport, Transaction([u8; 32]), } diff --git a/substrate/primitives/src/account.rs b/substrate/primitives/src/account.rs index 5c77c28f..61940f29 100644 --- a/substrate/primitives/src/account.rs +++ b/substrate/primitives/src/account.rs @@ -52,7 +52,7 @@ pub fn borsh_deserialize_signature( // TODO: Remove this for solely Public? #[derive( - Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Encode, Decode, MaxEncodedLen, TypeInfo, + Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Encode, Decode, MaxEncodedLen, TypeInfo, )] #[cfg_attr(feature = "std", derive(Zeroize))] #[cfg_attr(feature = "borsh", derive(BorshSerialize, BorshDeserialize))]