Spawn PublishSlashReportTask

Updates it so that it'll try for every network instead of returning after any
network fails.

Uses the SlashReport type throughout the codebase.
This commit is contained in:
Luke Parker
2025-01-15 12:08:28 -05:00
parent 92a4cceeeb
commit 7312fa8d3c
10 changed files with 132 additions and 101 deletions

View File

@@ -6,8 +6,8 @@ use scale::{Encode, Decode};
use borsh::{io, BorshSerialize, BorshDeserialize};
use serai_client::{
primitives::{NetworkId, PublicKey, Signature, SeraiAddress},
validator_sets::primitives::{Session, ValidatorSet, KeyPair},
primitives::{NetworkId, PublicKey, Signature},
validator_sets::primitives::{Session, ValidatorSet, KeyPair, SlashReport},
in_instructions::primitives::SignedBatch,
Transaction,
};
@@ -183,10 +183,6 @@ impl SignedBatches {
}
}
/// The slash report was invalid.
#[derive(Debug)]
pub struct InvalidSlashReport;
/// The slash reports to publish onto Serai.
pub struct SlashReports;
impl SlashReports {
@@ -194,30 +190,25 @@ impl SlashReports {
///
/// This only saves the most recent slashes as only a single session is eligible to have its
/// slashes reported at once.
///
/// Returns Err if the slashes are invalid. Returns Ok if the slashes weren't detected as
/// invalid. Slashes may be considered invalid by the Serai blockchain later even if not detected
/// as invalid here.
pub fn set(
txn: &mut impl DbTxn,
set: ValidatorSet,
slashes: Vec<(SeraiAddress, u32)>,
slash_report: SlashReport,
signature: Signature,
) -> Result<(), InvalidSlashReport> {
) {
// If we have a more recent slash report, don't write this historic one
if let Some((existing_session, _)) = _public_db::SlashReports::get(txn, set.network) {
if existing_session.0 >= set.session.0 {
return Ok(());
return;
}
}
let tx = serai_client::validator_sets::SeraiValidatorSets::report_slashes(
set.network,
slashes.try_into().map_err(|_| InvalidSlashReport)?,
slash_report,
signature,
);
_public_db::SlashReports::set(txn, set.network, &(set.session, tx.encode()));
Ok(())
}
pub(crate) fn take(txn: &mut impl DbTxn, network: NetworkId) -> Option<(Session, Transaction)> {
let (session, tx) = _public_db::SlashReports::take(txn, network)?;

View File

@@ -22,66 +22,80 @@ impl<D: Db> PublishSlashReportTask<D> {
}
}
impl<D: Db> PublishSlashReportTask<D> {
// Returns if a slash report was successfully published
async fn publish(&mut self, network: NetworkId) -> Result<bool, String> {
let mut txn = self.db.txn();
let Some((session, slash_report)) = SlashReports::take(&mut txn, network) else {
// No slash report to publish
return Ok(false);
};
let serai = self.serai.as_of_latest_finalized_block().await.map_err(|e| format!("{e:?}"))?;
let serai = serai.validator_sets();
let session_after_slash_report = Session(session.0 + 1);
let current_session = serai.session(network).await.map_err(|e| format!("{e:?}"))?;
let current_session = current_session.map(|session| session.0);
// Only attempt to publish the slash report for session #n while session #n+1 is still
// active
let session_after_slash_report_retired = current_session > Some(session_after_slash_report.0);
if session_after_slash_report_retired {
// Commit the txn to drain this slash report from the database and not try it again later
txn.commit();
return Ok(false);
}
if Some(session_after_slash_report.0) != current_session {
// We already checked the current session wasn't greater, and they're not equal
assert!(current_session < Some(session_after_slash_report.0));
// This would mean the Serai node is resyncing and is behind where it prior was
Err("have a slash report for a session Serai has yet to retire".to_string())?;
}
// If this session which should publish a slash report already has, move on
let key_pending_slash_report =
serai.key_pending_slash_report(network).await.map_err(|e| format!("{e:?}"))?;
if key_pending_slash_report.is_none() {
txn.commit();
return Ok(false);
};
match self.serai.publish(&slash_report).await {
Ok(()) => {
txn.commit();
Ok(true)
}
// This could be specific to this TX (such as an already in mempool error) and it may be
// worthwhile to continue iteration with the other pending slash reports. We assume this
// error ephemeral and that the latency incurred for this ephemeral error to resolve is
// miniscule compared to the window available to publish the slash report. That makes
// this a non-issue.
Err(e) => Err(format!("couldn't publish slash report transaction: {e:?}")),
}
}
}
impl<D: Db> ContinuallyRan for PublishSlashReportTask<D> {
type Error = String;
fn run_iteration(&mut self) -> impl Send + Future<Output = Result<bool, Self::Error>> {
async move {
let mut made_progress = false;
let mut error = None;
for network in serai_client::primitives::NETWORKS {
if network == NetworkId::Serai {
continue;
};
let mut txn = self.db.txn();
let Some((session, slash_report)) = SlashReports::take(&mut txn, network) else {
// No slash report to publish
continue;
};
let serai =
self.serai.as_of_latest_finalized_block().await.map_err(|e| format!("{e:?}"))?;
let serai = serai.validator_sets();
let session_after_slash_report = Session(session.0 + 1);
let current_session = serai.session(network).await.map_err(|e| format!("{e:?}"))?;
let current_session = current_session.map(|session| session.0);
// Only attempt to publish the slash report for session #n while session #n+1 is still
// active
let session_after_slash_report_retired =
current_session > Some(session_after_slash_report.0);
if session_after_slash_report_retired {
// Commit the txn to drain this slash report from the database and not try it again later
txn.commit();
continue;
}
if Some(session_after_slash_report.0) != current_session {
// We already checked the current session wasn't greater, and they're not equal
assert!(current_session < Some(session_after_slash_report.0));
// This would mean the Serai node is resyncing and is behind where it prior was
Err("have a slash report for a session Serai has yet to retire".to_string())?;
}
// If this session which should publish a slash report already has, move on
let key_pending_slash_report =
serai.key_pending_slash_report(network).await.map_err(|e| format!("{e:?}"))?;
if key_pending_slash_report.is_none() {
txn.commit();
continue;
};
match self.serai.publish(&slash_report).await {
Ok(()) => {
txn.commit();
made_progress = true;
}
// This could be specific to this TX (such as an already in mempool error) and it may be
// worthwhile to continue iteration with the other pending slash reports. We assume this
// error ephemeral and that the latency incurred for this ephemeral error to resolve is
// miniscule compared to the window available to publish the slash report. That makes
// this a non-issue.
Err(e) => Err(format!("couldn't publish slash report transaction: {e:?}"))?,
}
let network_res = self.publish(network).await;
// We made progress if any network successfully published their slash report
made_progress |= network_res == Ok(true);
// We want to yield the first error *after* attempting for every network
error = error.or(network_res.err());
}
// Yield the error
if let Some(error) = error {
Err(error)?
}
Ok(made_progress)
}