Slash reports (#523)

* report_slashes plumbing in Substrate

Notably delays the SetRetired event until it provides a slash report or the set
after it becomes the set to report its slashes.

* Add dedicated AcceptedHandover event

* Add SlashReport TX to Tributary

* Create SlashReport TXs

* Handle SlashReport TXs

* Add logic to generate a SlashReport to the coordinator

* Route SlashReportSigner into the processor

* Finish routing the SlashReport signing/TX publication

* Add serai feature to processor's serai-client
This commit is contained in:
Luke Parker
2024-01-29 03:48:53 -05:00
committed by GitHub
parent 0b8c7ade6e
commit 4913873b10
17 changed files with 917 additions and 67 deletions

View File

@@ -9,7 +9,10 @@ use zeroize::{Zeroize, Zeroizing};
use rand_core::OsRng;
use ciphersuite::{
group::ff::{Field, PrimeField},
group::{
ff::{Field, PrimeField},
GroupEncoding,
},
Ciphersuite, Ristretto,
};
use schnorr::SchnorrSignature;
@@ -240,7 +243,9 @@ async fn handle_processor_message<D: Db, P: P2p>(
coordinator::ProcessorMessage::InvalidParticipant { id, .. } |
coordinator::ProcessorMessage::CosignPreprocess { id, .. } |
coordinator::ProcessorMessage::BatchPreprocess { id, .. } |
coordinator::ProcessorMessage::SlashReportPreprocess { id, .. } |
coordinator::ProcessorMessage::SubstrateShare { id, .. } => Some(id.session),
// This causes an action on our P2P net yet not on any Tributary
coordinator::ProcessorMessage::CosignedBlock { block_number, block, signature } => {
let cosigned_block = CosignedBlock {
network,
@@ -258,6 +263,55 @@ async fn handle_processor_message<D: Db, P: P2p>(
P2p::broadcast(p2p, P2pMessageKind::CosignedBlock, buf).await;
None
}
// This causes an action on Substrate yet not on any Tributary
coordinator::ProcessorMessage::SignedSlashReport { session, signature } => {
let set = ValidatorSet { network, session: *session };
let signature: &[u8] = signature.as_ref();
let signature = serai_client::Signature(signature.try_into().unwrap());
let slashes = crate::tributary::SlashReport::get(&txn, set)
.expect("signed slash report despite not having slash report locally");
let slashes_pubs =
slashes.iter().map(|(address, points)| (Public(*address), *points)).collect::<Vec<_>>();
let tx = serai_client::SeraiValidatorSets::report_slashes(
network,
slashes
.into_iter()
.map(|(address, points)| (serai_client::SeraiAddress(address), points))
.collect::<Vec<_>>()
.try_into()
.unwrap(),
signature.clone(),
);
loop {
if serai.publish(&tx).await.is_ok() {
break None;
}
// Check if the slashes shouldn't still be reported. If not, break.
let Ok(serai) = serai.as_of_latest_finalized_block().await else {
tokio::time::sleep(core::time::Duration::from_secs(5)).await;
continue;
};
let Ok(key) = serai.validator_sets().key_pending_slash_report(network).await else {
tokio::time::sleep(core::time::Duration::from_secs(5)).await;
continue;
};
let Some(key) = key else {
break None;
};
// If this is the key for this slash report, then this will verify
use sp_application_crypto::RuntimePublic;
if !key.verify(
&serai_client::validator_sets::primitives::report_slashes_message(&set, &slashes_pubs),
&signature,
) {
break None;
}
}
}
},
// These don't return a relevant Tributary as there's no Tributary with action expected
ProcessorMessage::Substrate(inner_msg) => match inner_msg {
@@ -550,7 +604,8 @@ async fn handle_processor_message<D: Db, P: P2p>(
// slash) and censor transactions (yet don't explicitly ban)
vec![]
}
coordinator::ProcessorMessage::CosignPreprocess { id, preprocesses } => {
coordinator::ProcessorMessage::CosignPreprocess { id, preprocesses } |
coordinator::ProcessorMessage::SlashReportPreprocess { id, preprocesses } => {
vec![Transaction::SubstrateSign(SignData {
plan: id.id,
attempt: id.attempt,
@@ -665,6 +720,8 @@ async fn handle_processor_message<D: Db, P: P2p>(
}
#[allow(clippy::match_same_arms)] // Allowed to preserve layout
coordinator::ProcessorMessage::CosignedBlock { .. } => unreachable!(),
#[allow(clippy::match_same_arms)]
coordinator::ProcessorMessage::SignedSlashReport { .. } => unreachable!(),
},
ProcessorMessage::Substrate(inner_msg) => match inner_msg {
processor_messages::substrate::ProcessorMessage::Batch { .. } |
@@ -963,6 +1020,8 @@ pub async fn run<D: Db, Pro: Processors, P: P2p>(
new_tributary_spec_send.send(spec).unwrap();
}
let (perform_slash_report_send, mut perform_slash_report_recv) = mpsc::unbounded_channel();
let (tributary_retired_send, mut tributary_retired_recv) = mpsc::unbounded_channel();
// Handle new Substrate blocks
@@ -972,6 +1031,7 @@ pub async fn run<D: Db, Pro: Processors, P: P2p>(
processors.clone(),
serai.clone(),
new_tributary_spec_send,
perform_slash_report_send,
tributary_retired_send,
));
@@ -1026,10 +1086,12 @@ pub async fn run<D: Db, Pro: Processors, P: P2p>(
let raw_db = raw_db.clone();
let key = key.clone();
let specs = Arc::new(RwLock::new(HashMap::new()));
let tributaries = Arc::new(RwLock::new(HashMap::new()));
// Spawn a task to maintain a local view of the tributaries for whenever recognized_id is
// called
tokio::spawn({
let specs = specs.clone();
let tributaries = tributaries.clone();
let mut set_to_genesis = HashMap::new();
async move {
@@ -1038,9 +1100,11 @@ pub async fn run<D: Db, Pro: Processors, P: P2p>(
Ok(TributaryEvent::NewTributary(tributary)) => {
set_to_genesis.insert(tributary.spec.set(), tributary.spec.genesis());
tributaries.write().await.insert(tributary.spec.genesis(), tributary.tributary);
specs.write().await.insert(tributary.spec.set(), tributary.spec);
}
Ok(TributaryEvent::TributaryRetired(set)) => {
if let Some(genesis) = set_to_genesis.remove(&set) {
specs.write().await.remove(&set);
tributaries.write().await.remove(&genesis);
}
}
@@ -1053,6 +1117,84 @@ pub async fn run<D: Db, Pro: Processors, P: P2p>(
}
});
// Also spawn a task to handle slash reports, as this needs such a view of tributaries
tokio::spawn({
let mut raw_db = raw_db.clone();
let key = key.clone();
let tributaries = tributaries.clone();
async move {
'task_loop: loop {
match perform_slash_report_recv.recv().await {
Some(set) => {
let (genesis, validators) = loop {
let specs = specs.read().await;
let Some(spec) = specs.get(&set) else {
// If we don't have this Tributary because it's retired, break and move on
if RetiredTributaryDb::get(&raw_db, set).is_some() {
continue 'task_loop;
}
// This may happen if the task above is simply slow
log::warn!("tributary we don't have yet is supposed to perform a slash report");
continue;
};
break (spec.genesis(), spec.validators());
};
let mut slashes = vec![];
for (validator, _) in validators {
if validator == (<Ristretto as Ciphersuite>::generator() * key.deref()) {
continue;
}
let validator = validator.to_bytes();
let fatally = tributary::FatallySlashed::get(&raw_db, genesis, validator).is_some();
// TODO: Properly type this
let points = if fatally {
u32::MAX
} else {
tributary::SlashPoints::get(&raw_db, genesis, validator).unwrap_or(0)
};
slashes.push(points);
}
let mut tx = Transaction::SlashReport(slashes, Transaction::empty_signed());
tx.sign(&mut OsRng, genesis, &key);
let mut first = true;
loop {
if !first {
sleep(Duration::from_millis(100)).await;
}
first = false;
let tributaries = tributaries.read().await;
let Some(tributary) = tributaries.get(&genesis) else {
// If we don't have this Tributary because it's retired, break and move on
if RetiredTributaryDb::get(&raw_db, set).is_some() {
break;
}
// This may happen if the task above is simply slow
log::warn!("tributary we don't have yet is supposed to perform a slash report");
continue;
};
// This is safe to perform multiple times and solely needs atomicity with regards
// to itself
// TODO: Should this not take a txn accordingly? It's best practice to take a txn,
// yet taking a txn fails to declare its achieved independence
let mut txn = raw_db.txn();
tributary::publish_signed_transaction(&mut txn, tributary, tx).await;
txn.commit();
break;
}
}
None => panic!("perform slash report sender closed"),
}
}
}
});
move |set: ValidatorSet, genesis, id_type, id: Vec<u8>| {
log::debug!("recognized ID {:?} {}", id_type, hex::encode(&id));
let mut raw_db = raw_db.clone();

View File

@@ -208,10 +208,12 @@ async fn handle_batch_and_burns<Pro: Processors>(
// Handle a specific Substrate block, returning an error when it fails to get data
// (not blocking / holding)
#[allow(clippy::too_many_arguments)]
async fn handle_block<D: Db, Pro: Processors>(
db: &mut D,
key: &Zeroizing<<Ristretto as Ciphersuite>::F>,
new_tributary_spec: &mpsc::UnboundedSender<TributarySpec>,
perform_slash_report: &mpsc::UnboundedSender<ValidatorSet>,
tributary_retired: &mpsc::UnboundedSender<ValidatorSet>,
processors: &Pro,
serai: &Serai,
@@ -287,6 +289,27 @@ async fn handle_block<D: Db, Pro: Processors>(
event_id += 1;
}
for accepted_handover in serai.as_of(hash).validator_sets().accepted_handover_events().await? {
let ValidatorSetsEvent::AcceptedHandover { set } = accepted_handover else {
panic!("AcceptedHandover event wasn't AcceptedHandover: {accepted_handover:?}");
};
if set.network == NetworkId::Serai {
continue;
}
if HandledEvent::is_unhandled(db, hash, event_id) {
log::info!("found fresh accepted handover event {:?}", accepted_handover);
// TODO: This isn't atomic with the event handling
// Send a oneshot receiver so we can await the response?
perform_slash_report.send(set).unwrap();
let mut txn = db.txn();
HandledEvent::handle_event(&mut txn, hash, event_id);
txn.commit();
}
event_id += 1;
}
for retired_set in serai.as_of(hash).validator_sets().set_retired_events().await? {
let ValidatorSetsEvent::SetRetired { set } = retired_set else {
panic!("SetRetired event wasn't SetRetired: {retired_set:?}");
@@ -320,10 +343,12 @@ async fn handle_block<D: Db, Pro: Processors>(
Ok(())
}
#[allow(clippy::too_many_arguments)]
async fn handle_new_blocks<D: Db, Pro: Processors>(
db: &mut D,
key: &Zeroizing<<Ristretto as Ciphersuite>::F>,
new_tributary_spec: &mpsc::UnboundedSender<TributarySpec>,
perform_slash_report: &mpsc::UnboundedSender<ValidatorSet>,
tributary_retired: &mpsc::UnboundedSender<ValidatorSet>,
processors: &Pro,
serai: &Serai,
@@ -349,7 +374,17 @@ async fn handle_new_blocks<D: Db, Pro: Processors>(
.expect("couldn't get block before the latest finalized block");
log::info!("handling substrate block {b}");
handle_block(db, key, new_tributary_spec, tributary_retired, processors, serai, block).await?;
handle_block(
db,
key,
new_tributary_spec,
perform_slash_report,
tributary_retired,
processors,
serai,
block,
)
.await?;
*next_block += 1;
let mut txn = db.txn();
@@ -368,6 +403,7 @@ pub async fn scan_task<D: Db, Pro: Processors>(
processors: Pro,
serai: Arc<Serai>,
new_tributary_spec: mpsc::UnboundedSender<TributarySpec>,
perform_slash_report: mpsc::UnboundedSender<ValidatorSet>,
tributary_retired: mpsc::UnboundedSender<ValidatorSet>,
) {
log::info!("scanning substrate");
@@ -443,6 +479,7 @@ pub async fn scan_task<D: Db, Pro: Processors>(
&mut db,
&key,
&new_tributary_spec,
&perform_slash_report,
&tributary_retired,
&processors,
&serai,

View File

@@ -7,7 +7,7 @@ use ciphersuite::{group::Group, Ciphersuite, Ristretto};
use scale::{Encode, Decode};
use serai_client::{
primitives::{SeraiAddress, Signature},
validator_sets::primitives::{ValidatorSet, KeyPair},
validator_sets::primitives::{MAX_KEY_SHARES_PER_SET, ValidatorSet, KeyPair},
};
use processor_messages::coordinator::SubstrateSignableId;
@@ -79,7 +79,7 @@ fn test_read_write<RW: Eq + Debug + ReadWrite>(value: &RW) {
#[test]
fn tx_size_limit() {
use serai_client::validator_sets::primitives::{MAX_KEY_SHARES_PER_SET, MAX_KEY_LEN};
use serai_client::validator_sets::primitives::MAX_KEY_LEN;
use tributary::TRANSACTION_SIZE_LIMIT;
@@ -277,4 +277,17 @@ fn serialize_transaction() {
signature: random_signed_with_nonce(&mut OsRng, 2).signature,
});
}
test_read_write(&Transaction::SlashReport(
{
let amount =
usize::try_from(OsRng.next_u64() % u64::from(MAX_KEY_SHARES_PER_SET - 1)).unwrap();
let mut points = vec![];
for _ in 0 .. amount {
points.push((OsRng.next_u64() >> 32).try_into().unwrap());
}
points
},
random_signed_with_nonce(&mut OsRng, 0),
));
}

View File

@@ -51,10 +51,13 @@ create_db!(
TributaryBlockNumber: (block: [u8; 32]) -> u32,
LastHandledBlock: (genesis: [u8; 32]) -> [u8; 32],
// TODO: Revisit the point of this
FatalSlashes: (genesis: [u8; 32]) -> Vec<[u8; 32]>,
RemovedAsOfDkgAttempt: (genesis: [u8; 32], attempt: u32) -> Vec<[u8; 32]>,
OfflineDuringDkg: (genesis: [u8; 32]) -> Vec<[u8; 32]>,
// TODO: Combine these two
FatallySlashed: (genesis: [u8; 32], account: [u8; 32]) -> (),
SlashPoints: (genesis: [u8; 32], account: [u8; 32]) -> u32,
VotedToRemove: (genesis: [u8; 32], voter: [u8; 32], to_remove: [u8; 32]) -> (),
VotesToRemove: (genesis: [u8; 32], to_remove: [u8; 32]) -> u16,
@@ -73,6 +76,11 @@ create_db!(
PlanIds: (genesis: &[u8], block: u64) -> Vec<[u8; 32]>,
SignedTransactionDb: (order: &[u8], nonce: u32) -> Vec<u8>,
SlashReports: (genesis: [u8; 32], signer: [u8; 32]) -> Vec<u32>,
SlashReported: (genesis: [u8; 32]) -> u16,
SlashReportCutOff: (genesis: [u8; 32]) -> u64,
SlashReport: (set: ValidatorSet) -> Vec<([u8; 32], u32)>,
}
);
@@ -116,7 +124,13 @@ impl AttemptDb {
pub fn attempt(getter: &impl Get, genesis: [u8; 32], topic: Topic) -> Option<u32> {
let attempt = Self::get(getter, genesis, &topic);
// Don't require explicit recognition of the Dkg topic as it starts when the chain does
if attempt.is_none() && ((topic == Topic::Dkg) || (topic == Topic::DkgConfirmation)) {
// Don't require explicit recognition of the SlashReport topic as it isn't a DoS risk and it
// should always happen (eventually)
if attempt.is_none() &&
((topic == Topic::Dkg) ||
(topic == Topic::DkgConfirmation) ||
(topic == Topic::SubstrateSign(SubstrateSignableId::SlashReport)))
{
return Some(0);
}
attempt

View File

@@ -738,6 +738,39 @@ impl<
};
self.processors.send(self.spec.set().network, msg).await;
}
Transaction::SlashReport(points, signed) => {
// Uses &[] as we only need the length which is independent to who else was removed
let signer_range = self.spec.i(&[], signed.signer).unwrap();
let signer_len = u16::from(signer_range.end) - u16::from(signer_range.start);
if points.len() != (self.spec.validators().len() - 1) {
self.fatal_slash(
signed.signer.to_bytes(),
"submitted a distinct amount of slash points to participants",
);
return;
}
if SlashReports::get(self.txn, genesis, signed.signer.to_bytes()).is_some() {
self.fatal_slash(signed.signer.to_bytes(), "submitted multiple slash points");
return;
}
SlashReports::set(self.txn, genesis, signed.signer.to_bytes(), &points);
let prior_reported = SlashReported::get(self.txn, genesis).unwrap_or(0);
let now_reported = prior_reported + signer_len;
SlashReported::set(self.txn, genesis, &now_reported);
if (prior_reported < self.spec.t()) && (now_reported >= self.spec.t()) {
SlashReportCutOff::set(
self.txn,
genesis,
// 30 minutes into the future
&(u64::from(self.block_number) +
((30 * 60 * 1000) / u64::from(tributary::tendermint::TARGET_BLOCK_TIME))),
);
}
}
}
}
}

View File

@@ -16,7 +16,7 @@ use serai_client::{
use serai_db::DbTxn;
use processor_messages::coordinator::SubstrateSignableId;
use processor_messages::coordinator::{SubstrateSignId, SubstrateSignableId};
use tributary::{
TransactionKind, Transaction as TributaryTransaction, TransactionError, Block, TributaryReader,
@@ -520,6 +520,24 @@ impl<
.await;
}
}
SubstrateSignableId::SlashReport => {
// If this Tributary hasn't been retired...
// (published SlashReport/took too long to do so)
if crate::RetiredTributaryDb::get(self.txn, self.spec.set()).is_none() {
let report = SlashReport::get(self.txn, self.spec.set())
.expect("re-attempting signing a SlashReport we don't have?");
self
.processors
.send(
self.spec.set().network,
processor_messages::coordinator::CoordinatorMessage::SignSlashReport {
id,
report,
},
)
.await;
}
}
}
}
Topic::Sign(id) => {
@@ -542,6 +560,94 @@ impl<
}
}
}
if Some(u64::from(self.block_number)) == SlashReportCutOff::get(self.txn, genesis) {
// Grab every slash report
let mut all_reports = vec![];
for (i, (validator, _)) in self.spec.validators().into_iter().enumerate() {
let Some(mut report) = SlashReports::get(self.txn, genesis, validator.to_bytes()) else {
continue;
};
// Assign them 0 points for themselves
report.insert(i, 0);
// Uses &[] as we only need the length which is independent to who else was removed
let signer_i = self.spec.i(&[], validator).unwrap();
let signer_len = u16::from(signer_i.end) - u16::from(signer_i.start);
// Push `n` copies, one for each of their shares
for _ in 0 .. signer_len {
all_reports.push(report.clone());
}
}
// For each participant, grab their median
let mut medians = vec![];
for p in 0 .. self.spec.validators().len() {
let mut median_calc = vec![];
for report in &all_reports {
median_calc.push(report[p]);
}
median_calc.sort_unstable();
medians.push(median_calc[median_calc.len() / 2]);
}
// Grab the points of the last party within the best-performing threshold
// This is done by first expanding the point values by the amount of shares
let mut sorted_medians = vec![];
for (i, (_, shares)) in self.spec.validators().into_iter().enumerate() {
for _ in 0 .. shares {
sorted_medians.push(medians[i]);
}
}
// Then performing the sort
sorted_medians.sort_unstable();
let worst_points_by_party_within_threshold = sorted_medians[usize::from(self.spec.t()) - 1];
// Reduce everyone's points by this value
for median in &mut medians {
*median = median.saturating_sub(worst_points_by_party_within_threshold);
}
// The threshold now has the proper incentive to report this as they no longer suffer
// negative effects
//
// Additionally, if all validators had degraded performance, they don't all get penalized for
// what's likely outside their control (as it occurred universally)
// Mark everyone fatally slashed with u32::MAX
for (i, (validator, _)) in self.spec.validators().into_iter().enumerate() {
if FatallySlashed::get(self.txn, genesis, validator.to_bytes()).is_some() {
medians[i] = u32::MAX;
}
}
let mut report = vec![];
for (i, (validator, _)) in self.spec.validators().into_iter().enumerate() {
if medians[i] != 0 {
report.push((validator.to_bytes(), medians[i]));
}
}
// This does lock in the report, meaning further slash point accumulations won't be reported
// They still have value to be locally tracked due to local decisions made based off
// accumulated slash reports
SlashReport::set(self.txn, self.spec.set(), &report);
// Start a signing protocol for this
self
.processors
.send(
self.spec.set().network,
processor_messages::coordinator::CoordinatorMessage::SignSlashReport {
id: SubstrateSignId {
session: self.spec.set().session,
id: SubstrateSignableId::SlashReport,
attempt: 0,
},
report,
},
)
.await;
}
}
}

View File

@@ -190,6 +190,8 @@ pub enum Transaction {
first_signer: <Ristretto as Ciphersuite>::G,
signature: SchnorrSignature<Ristretto>,
},
SlashReport(Vec<u32>, Signed),
}
impl Debug for Transaction {
@@ -244,6 +246,11 @@ impl Debug for Transaction {
.field("plan", &hex::encode(plan))
.field("tx_hash", &hex::encode(tx_hash))
.finish_non_exhaustive(),
Transaction::SlashReport(points, signed) => fmt
.debug_struct("Transaction::SignCompleted")
.field("points", points)
.field("signed", signed)
.finish(),
}
}
}
@@ -413,6 +420,25 @@ impl ReadWrite for Transaction {
Ok(Transaction::SignCompleted { plan, tx_hash, first_signer, signature })
}
11 => {
let mut len = [0];
reader.read_exact(&mut len)?;
let len = len[0];
// If the set has as many validators as MAX_KEY_SHARES_PER_SET, then the amount of distinct
// validators (the amount of validators reported on) will be at most
// `MAX_KEY_SHARES_PER_SET - 1`
if u32::from(len) > (serai_client::validator_sets::primitives::MAX_KEY_SHARES_PER_SET - 1) {
Err(io::Error::other("more points reported than allowed validator"))?;
}
let mut points = vec![0u32; len.into()];
for points in &mut points {
let mut these_points = [0; 4];
reader.read_exact(&mut these_points)?;
*points = u32::from_le_bytes(these_points);
}
Ok(Transaction::SlashReport(points, Signed::read_without_nonce(reader, 0)?))
}
_ => Err(io::Error::other("invalid transaction type")),
}
}
@@ -529,6 +555,14 @@ impl ReadWrite for Transaction {
writer.write_all(&first_signer.to_bytes())?;
signature.write(writer)
}
Transaction::SlashReport(points, signed) => {
writer.write_all(&[11])?;
writer.write_all(&[u8::try_from(points.len()).unwrap()])?;
for points in points {
writer.write_all(&points.to_le_bytes())?;
}
signed.write_without_nonce(writer)
}
}
}
}
@@ -559,6 +593,10 @@ impl TransactionTrait for Transaction {
TransactionKind::Signed((b"sign", data.plan, data.attempt).encode(), &data.signed)
}
Transaction::SignCompleted { .. } => TransactionKind::Unsigned,
Transaction::SlashReport(_, signed) => {
TransactionKind::Signed(b"slash_report".to_vec(), signed)
}
}
}
@@ -622,10 +660,13 @@ impl Transaction {
Transaction::Sign(data) => data.label.nonce(),
Transaction::SignCompleted { .. } => panic!("signing SignCompleted"),
Transaction::SlashReport(_, _) => 0,
};
(
nonce,
#[allow(clippy::match_same_arms)]
match tx {
Transaction::RemoveParticipantDueToDkg { ref mut signed, .. } |
Transaction::DkgCommitments { ref mut signed, .. } |
@@ -642,6 +683,8 @@ impl Transaction {
Transaction::Sign(ref mut data) => &mut data.signed,
Transaction::SignCompleted { .. } => panic!("signing SignCompleted"),
Transaction::SlashReport(_, ref mut signed) => signed,
},
)
}