Finish routing eventualities

Also corrects some misc TODOs and tidies up some log statements.
This commit is contained in:
Luke Parker
2023-04-11 05:49:27 -04:00
parent 9e78c8fc9e
commit 90f2b03595
8 changed files with 171 additions and 50 deletions

View File

@@ -148,6 +148,21 @@ impl<E: Eventuality> EventualitiesTracker<E> {
// If our self tracker already went past this block number, set it back // If our self tracker already went past this block number, set it back
self.block_number = self.block_number.min(block_number); self.block_number = self.block_number.min(block_number);
} }
pub fn drop(&mut self, id: [u8; 32]) {
// O(n) due to the lack of a reverse lookup
let mut found_key = None;
for (key, value) in &self.map {
if value.0 == id {
found_key = Some(key.clone());
break;
}
}
if let Some(key) = found_key {
self.map.remove(&key);
}
}
} }
impl<E: Eventuality> Default for EventualitiesTracker<E> { impl<E: Eventuality> Default for EventualitiesTracker<E> {

View File

@@ -5,7 +5,6 @@ use std::{
use messages::{ProcessorMessage, CoordinatorMessage}; use messages::{ProcessorMessage, CoordinatorMessage};
// TODO: Also include the coin block height here so we can delay handling if not synced?
#[derive(Clone, PartialEq, Eq, Debug)] #[derive(Clone, PartialEq, Eq, Debug)]
pub struct Message { pub struct Message {
pub id: u64, pub id: u64,

View File

@@ -109,7 +109,10 @@ async fn get_fee<C: Coin>(coin: &C, block_number: usize) -> C::Fee {
return block.median_fee(); return block.median_fee();
} }
Err(e) => { Err(e) => {
error!("couldn't get block {}: {e}", block_number); error!(
"couldn't get block {block_number} in get_fee. {} {}",
"this should only happen if the node is offline. error: ", e
);
// Since this block is considered finalized, we shouldn't be unable to get it unless the // Since this block is considered finalized, we shouldn't be unable to get it unless the
// node is offline, hence the long sleep // node is offline, hence the long sleep
sleep(Duration::from_secs(60)).await; sleep(Duration::from_secs(60)).await;
@@ -455,8 +458,6 @@ async fn run<C: Coin, D: Db, Co: Coordinator>(raw_db: D, coin: C, mut coordinato
}, },
msg = scanner.events.recv() => { msg = scanner.events.recv() => {
// These need to be sent to the coordinator which needs to check they aren't replayed
// TODO
match msg.unwrap() { match msg.unwrap() {
ScannerEvent::Block(key, block, time, outputs) => { ScannerEvent::Block(key, block, time, outputs) => {
let key = key.to_bytes().as_ref().to_vec(); let key = key.to_bytes().as_ref().to_vec();
@@ -500,6 +501,13 @@ async fn run<C: Coin, D: Db, Co: Coordinator>(raw_db: D, coin: C, mut coordinato
substrate_signers[&key].sign(time, batch).await; substrate_signers[&key].sign(time, batch).await;
}, },
ScannerEvent::Completed(id, tx) => {
// We don't know which signer had this plan, so inform all of them
for (_, signer) in signers.iter_mut() {
signer.eventuality_completion(id, &tx).await;
}
},
} }
}, },
@@ -526,7 +534,10 @@ async fn run<C: Coin, D: Db, Co: Coordinator>(raw_db: D, coin: C, mut coordinato
}, },
SignerEvent::SignedTransaction { id, tx } => { SignerEvent::SignedTransaction { id, tx } => {
// If we die after calling finish_signing, we'll never fire Completed
// TODO: Is that acceptable? Do we need to fire Completed before firing finish_signing?
main_db.finish_signing(&key, id); main_db.finish_signing(&key, id);
scanner.drop_eventuality(id).await;
coordinator coordinator
.send(ProcessorMessage::Sign(messages::sign::ProcessorMessage::Completed { .send(ProcessorMessage::Sign(messages::sign::ProcessorMessage::Completed {
key: key.to_vec(), key: key.to_vec(),

View File

@@ -16,13 +16,15 @@ use tokio::{
use crate::{ use crate::{
DbTxn, Db, DbTxn, Db,
coins::{Output, EventualitiesTracker, Block, Coin}, coins::{Output, Transaction, EventualitiesTracker, Block, Coin},
}; };
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum ScannerEvent<C: Coin> { pub enum ScannerEvent<C: Coin> {
// Block scanned // Block scanned
Block(<C::Curve as Ciphersuite>::G, <C::Block as Block<C>>::Id, SystemTime, Vec<C::Output>), Block(<C::Curve as Ciphersuite>::G, <C::Block as Block<C>>::Id, SystemTime, Vec<C::Output>),
// Eventuality completion found on-chain
Completed([u8; 32], <C::Transaction as Transaction<C>>::Id),
} }
pub type ScannerEventChannel<C> = mpsc::UnboundedReceiver<ScannerEvent<C>>; pub type ScannerEventChannel<C> = mpsc::UnboundedReceiver<ScannerEvent<C>>;
@@ -68,15 +70,31 @@ impl<C: Coin, D: Db> ScannerDb<C, D> {
} }
fn add_active_key(&mut self, txn: &mut D::Transaction, key: <C::Curve as Ciphersuite>::G) { fn add_active_key(&mut self, txn: &mut D::Transaction, key: <C::Curve as Ciphersuite>::G) {
let mut keys = self.0.get(Self::active_keys_key()).unwrap_or(vec![]); let mut keys = self.0.get(Self::active_keys_key()).unwrap_or(vec![]);
// TODO: Don't do this if the key is already marked active (which can happen based on reboot
// timing) let key_bytes = key.to_bytes();
keys.extend(key.to_bytes().as_ref());
// Don't add this key if it's already present
let key_len = key_bytes.as_ref().len();
let mut i = 0;
while i < keys.len() {
if keys[i .. (i + key_len)].as_ref() == key_bytes.as_ref() {
debug!("adding {} as an active key yet it was already present", hex::encode(key_bytes));
return;
}
i += key_len;
}
keys.extend(key_bytes.as_ref());
txn.put(Self::active_keys_key(), keys); txn.put(Self::active_keys_key(), keys);
} }
fn active_keys(&self) -> Vec<<C::Curve as Ciphersuite>::G> { fn active_keys(&self) -> Vec<<C::Curve as Ciphersuite>::G> {
let bytes_vec = self.0.get(Self::active_keys_key()).unwrap_or(vec![]); let bytes_vec = self.0.get(Self::active_keys_key()).unwrap_or(vec![]);
let mut bytes: &[u8] = bytes_vec.as_ref(); let mut bytes: &[u8] = bytes_vec.as_ref();
// Assumes keys will be 32 bytes when calculating the capacity
// If keys are larger, this may allocate more memory than needed
// If keys are smaller, this may require additional allocations
// Either are fine
let mut res = Vec::with_capacity(bytes.len() / 32); let mut res = Vec::with_capacity(bytes.len() / 32);
while !bytes.is_empty() { while !bytes.is_empty() {
res.push(C::Curve::read_G(&mut bytes).unwrap()); res.push(C::Curve::read_G(&mut bytes).unwrap());
@@ -210,6 +228,10 @@ impl<C: Coin, D: Db> ScannerHandle<C, D> {
self.scanner.write().await.eventualities.register(block_number, id, eventuality) self.scanner.write().await.eventualities.register(block_number, id, eventuality)
} }
pub async fn drop_eventuality(&self, id: [u8; 32]) {
self.scanner.write().await.eventualities.drop(id);
}
/// Rotate the key being scanned for. /// Rotate the key being scanned for.
/// ///
/// If no key has been prior set, this will become the key with no further actions. /// If no key has been prior set, this will become the key with no further actions.
@@ -362,9 +384,17 @@ impl<C: Coin, D: Db> Scanner<C, D> {
for (id, tx) in for (id, tx) in
coin.get_eventuality_completions(&mut scanner.eventualities, &block).await coin.get_eventuality_completions(&mut scanner.eventualities, &block).await
{ {
// TODO: Fire Completed // This should only happen if there's a P2P net desync or there's a malicious
let _ = id; // validator
let _ = tx; warn!(
"eventuality {} resolved by {}, as found on chain. this should not happen",
hex::encode(id),
hex::encode(&tx)
);
if !scanner.emit(ScannerEvent::Completed(id, tx)) {
return;
}
} }
let outputs = match scanner.coin.get_outputs(&block, key).await { let outputs = match scanner.coin.get_outputs(&block, key).await {

View File

@@ -49,7 +49,7 @@ impl<C: Coin, D: Db> SignerDb<C, D> {
&mut self, &mut self,
txn: &mut D::Transaction, txn: &mut D::Transaction,
id: [u8; 32], id: [u8; 32],
tx: <C::Transaction as Transaction<C>>::Id, tx: &<C::Transaction as Transaction<C>>::Id,
) { ) {
// Transactions can be completed by multiple signatures // Transactions can be completed by multiple signatures
// Save every solution in order to be robust // Save every solution in order to be robust
@@ -165,7 +165,11 @@ impl<C: Coin, D: Db> Signer<C, D> {
// If we don't have an attempt logged, it's because the coordinator is faulty OR // If we don't have an attempt logged, it's because the coordinator is faulty OR
// because we rebooted // because we rebooted
None => { None => {
warn!("not attempting {:?}. this is an error if we didn't reboot", id); warn!(
"not attempting {} #{}. this is an error if we didn't reboot",
hex::encode(id.id),
id.attempt
);
// Don't panic on the assumption we rebooted // Don't panic on the assumption we rebooted
Err(())?; Err(())?;
} }
@@ -191,6 +195,57 @@ impl<C: Coin, D: Db> Signer<C, D> {
} }
} }
async fn eventuality_completion(
&mut self,
id: [u8; 32],
tx_id: &<C::Transaction as Transaction<C>>::Id,
) {
if let Some(eventuality) = self.db.eventuality(id) {
// Transaction hasn't hit our mempool/was dropped for a different signature
// The latter can happen given certain latency conditions/a single malicious signer
// In the case of a single malicious signer, they can drag multiple honest
// validators down with them, so we unfortunately can't slash on this case
let Ok(tx) = self.coin.get_transaction(tx_id).await else {
warn!(
"a validator claimed {} completed {} yet we didn't have that TX in our mempool",
hex::encode(tx_id),
hex::encode(id),
);
return;
};
if self.coin.confirm_completion(&eventuality, &tx) {
debug!("eventuality for {} resolved in TX {}", hex::encode(id), hex::encode(tx_id));
// Stop trying to sign for this TX
let mut txn = self.db.0.txn();
self.db.save_transaction(&mut txn, &tx);
self.db.complete(&mut txn, id, tx_id);
txn.commit();
self.signable.remove(&id);
self.attempt.remove(&id);
self.preprocessing.remove(&id);
self.signing.remove(&id);
self.emit(SignerEvent::SignedTransaction { id, tx: tx.id() });
} else {
warn!(
"a validator claimed {} completed {} when it did not",
hex::encode(tx_id),
hex::encode(id)
);
}
} else {
debug!(
"signer {} informed of the completion of {}. {}",
hex::encode(self.keys.group_key().to_bytes()),
hex::encode(id),
"this signer did not have/has already completed that plan",
);
}
}
async fn handle(&mut self, msg: CoordinatorMessage) { async fn handle(&mut self, msg: CoordinatorMessage) {
match msg { match msg {
CoordinatorMessage::Preprocesses { id, mut preprocesses } => { CoordinatorMessage::Preprocesses { id, mut preprocesses } => {
@@ -201,7 +256,10 @@ impl<C: Coin, D: Db> Signer<C, D> {
let machine = match self.preprocessing.remove(&id.id) { let machine = match self.preprocessing.remove(&id.id) {
// Either rebooted or RPC error, or some invariant // Either rebooted or RPC error, or some invariant
None => { None => {
warn!("not preprocessing for {:?}. this is an error if we didn't reboot", id); warn!(
"not preprocessing for {}. this is an error if we didn't reboot",
hex::encode(id.id)
);
return; return;
} }
Some(machine) => machine, Some(machine) => machine,
@@ -248,7 +306,10 @@ impl<C: Coin, D: Db> Signer<C, D> {
panic!("never preprocessed yet signing?"); panic!("never preprocessed yet signing?");
} }
warn!("not preprocessing for {:?}. this is an error if we didn't reboot", id); warn!(
"not preprocessing for {}. this is an error if we didn't reboot",
hex::encode(id.id)
);
return; return;
} }
Some(machine) => machine, Some(machine) => machine,
@@ -273,14 +334,15 @@ impl<C: Coin, D: Db> Signer<C, D> {
// Save the transaction in case it's needed for recovery // Save the transaction in case it's needed for recovery
let mut txn = self.db.0.txn(); let mut txn = self.db.0.txn();
self.db.save_transaction(&mut txn, &tx); self.db.save_transaction(&mut txn, &tx);
self.db.complete(&mut txn, id.id, tx.id()); let tx_id = tx.id();
self.db.complete(&mut txn, id.id, &tx_id);
txn.commit(); txn.commit();
// Publish it // Publish it
if let Err(e) = self.coin.publish_transaction(&tx).await { if let Err(e) = self.coin.publish_transaction(&tx).await {
error!("couldn't publish {:?}: {:?}", tx, e); error!("couldn't publish {:?}: {:?}", tx, e);
} else { } else {
info!("published {:?}", hex::encode(tx.id())); info!("published {}", hex::encode(&tx_id));
} }
// Stop trying to sign for this TX // Stop trying to sign for this TX
@@ -289,46 +351,23 @@ impl<C: Coin, D: Db> Signer<C, D> {
assert!(self.preprocessing.remove(&id.id).is_none()); assert!(self.preprocessing.remove(&id.id).is_none());
assert!(self.signing.remove(&id.id).is_none()); assert!(self.signing.remove(&id.id).is_none());
self.emit(SignerEvent::SignedTransaction { id: id.id, tx: tx.id() }); self.emit(SignerEvent::SignedTransaction { id: id.id, tx: tx_id });
} }
CoordinatorMessage::Completed { key: _, id, tx: tx_vec } => { CoordinatorMessage::Completed { key: _, id, tx: mut tx_vec } => {
let mut tx = <C::Transaction as Transaction<C>>::Id::default(); let mut tx = <C::Transaction as Transaction<C>>::Id::default();
if tx.as_ref().len() != tx_vec.len() { if tx.as_ref().len() != tx_vec.len() {
tx_vec.truncate(2 * tx.as_ref().len());
warn!( warn!(
"a validator claimed {} completed {id:?} yet that's not a valid TX ID", "a validator claimed {} completed {} yet that's not a valid TX ID",
hex::encode(&tx) hex::encode(&tx),
hex::encode(id),
); );
return; return;
} }
tx.as_mut().copy_from_slice(&tx_vec); tx.as_mut().copy_from_slice(&tx_vec);
if let Some(eventuality) = self.db.eventuality(id) { self.eventuality_completion(id, &tx).await;
// Transaction hasn't hit our mempool/was dropped for a different signature
// The latter can happen given certain latency conditions/a single malicious signer
// In the case of a single malicious signer, they can drag multiple honest
// validators down with them, so we unfortunately can't slash on this case
let Ok(tx) = self.coin.get_transaction(&tx).await else {
todo!("queue checking eventualities"); // or give up here?
};
if self.coin.confirm_completion(&eventuality, &tx) {
// Stop trying to sign for this TX
let mut txn = self.db.0.txn();
self.db.save_transaction(&mut txn, &tx);
self.db.complete(&mut txn, id, tx.id());
txn.commit();
self.signable.remove(&id);
self.attempt.remove(&id);
self.preprocessing.remove(&id);
self.signing.remove(&id);
self.emit(SignerEvent::SignedTransaction { id, tx: tx.id() });
} else {
warn!("a validator claimed {} completed {id:?} when it did not", hex::encode(&tx.id()));
}
}
} }
} }
} }
@@ -406,7 +445,7 @@ impl<C: Coin, D: Db> Signer<C, D> {
if !id.signing_set(&signer.keys.params()).contains(&signer.keys.params().i()) { if !id.signing_set(&signer.keys.params()).contains(&signer.keys.params().i()) {
continue; continue;
} }
info!("selected to sign {:?}", id); info!("selected to sign {} #{}", hex::encode(id.id), id.attempt);
// If we reboot mid-sign, the current design has us abort all signs and wait for latter // If we reboot mid-sign, the current design has us abort all signs and wait for latter
// attempts/new signing protocols // attempts/new signing protocols
@@ -421,7 +460,11 @@ impl<C: Coin, D: Db> Signer<C, D> {
// //
// Only run if this hasn't already been attempted // Only run if this hasn't already been attempted
if signer.db.has_attempt(&id) { if signer.db.has_attempt(&id) {
warn!("already attempted {:?}. this is an error if we didn't reboot", id); warn!(
"already attempted {} #{}. this is an error if we didn't reboot",
hex::encode(id.id),
id.attempt
);
continue; continue;
} }
@@ -432,7 +475,7 @@ impl<C: Coin, D: Db> Signer<C, D> {
// Attempt to create the TX // Attempt to create the TX
let machine = match signer.coin.attempt_send(tx).await { let machine = match signer.coin.attempt_send(tx).await {
Err(e) => { Err(e) => {
error!("failed to attempt {:?}: {:?}", id, e); error!("failed to attempt {}, #{}: {:?}", hex::encode(id.id), id.attempt, e);
continue; continue;
} }
Ok(machine) => machine, Ok(machine) => machine,
@@ -503,6 +546,14 @@ impl<C: Coin, D: Db> SignerHandle<C, D> {
signer.signable.insert(id, (start, tx)); signer.signable.insert(id, (start, tx));
} }
pub async fn eventuality_completion(
&self,
id: [u8; 32],
tx: &<C::Transaction as Transaction<C>>::Id,
) {
self.signer.write().await.eventuality_completion(id, tx).await;
}
pub async fn handle(&self, msg: CoordinatorMessage) { pub async fn handle(&self, msg: CoordinatorMessage) {
self.signer.write().await.handle(msg).await; self.signer.write().await.handle(msg).await;
} }

View File

@@ -56,6 +56,9 @@ async fn spend<C: Coin, D: Db>(
assert_eq!(outputs[0].kind(), OutputType::Change); assert_eq!(outputs[0].kind(), OutputType::Change);
outputs outputs
} }
ScannerEvent::Completed(_, _) => {
panic!("unexpectedly got eventuality completion");
}
} }
} }
@@ -89,6 +92,9 @@ pub async fn test_addresses<C: Coin>(coin: C) {
assert_eq!(outputs[0].kind(), OutputType::Branch); assert_eq!(outputs[0].kind(), OutputType::Branch);
outputs outputs
} }
ScannerEvent::Completed(_, _) => {
panic!("unexpectedly got eventuality completion");
}
}; };
// Spend the branch output, creating a change output and ensuring we actually get change // Spend the branch output, creating a change output and ensuring we actually get change

View File

@@ -56,6 +56,9 @@ pub async fn test_scanner<C: Coin>(coin: C) {
assert_eq!(outputs[0].kind(), OutputType::External); assert_eq!(outputs[0].kind(), OutputType::External);
outputs outputs
} }
ScannerEvent::Completed(_, _) => {
panic!("unexpectedly got eventuality completion");
}
}; };
(scanner, outputs) (scanner, outputs)
}; };

View File

@@ -39,6 +39,9 @@ pub async fn test_wallet<C: Coin>(coin: C) {
assert_eq!(outputs.len(), 1); assert_eq!(outputs.len(), 1);
(block_id, outputs) (block_id, outputs)
} }
ScannerEvent::Completed(_, _) => {
panic!("unexpectedly got eventuality completion");
}
} }
}; };
@@ -105,6 +108,9 @@ pub async fn test_wallet<C: Coin>(coin: C) {
assert_eq!(time, block.time()); assert_eq!(time, block.time());
assert_eq!(these_outputs, outputs); assert_eq!(these_outputs, outputs);
} }
ScannerEvent::Completed(_, _) => {
panic!("unexpectedly got eventuality completion");
}
} }
// Check the Scanner DB can reload the outputs // Check the Scanner DB can reload the outputs