Don't return from sync_block until the Tendermint machine returns if it's valid or not

We had a race condition where'd we be informed of blocks 1 .. 3, and
immediately add 1 .. 3. Because we immediately tried to add 2 after 1, it'd
fail since the tip was still the genesis, yet 2 needs the tip to be 1.

Adding a channel, while ugly, was the simplest way to accomplish this.

Also has any added block be broadcasted. Else there's a race condition where a
node which syncs up to the most recent block does so, yet fails to add the next
block when it's committed to.
This commit is contained in:
Luke Parker
2023-04-24 02:44:21 -04:00
parent 14388e746c
commit cc491ee1e1
4 changed files with 86 additions and 32 deletions

View File

@@ -9,7 +9,7 @@ use std::{
use parity_scale_codec::{Encode, Decode};
use futures::{
FutureExt, StreamExt,
FutureExt, StreamExt, SinkExt,
future::{self, Fuse},
channel::mpsc,
};
@@ -135,6 +135,7 @@ pub struct TendermintMachine<N: Network> {
queue: VecDeque<MessageFor<N>>,
msg_recv: mpsc::UnboundedReceiver<SignedMessageFor<N>>,
synced_block_recv: mpsc::UnboundedReceiver<SyncedBlock<N>>,
synced_block_result_send: mpsc::UnboundedSender<bool>,
block: BlockData<N>,
}
@@ -146,6 +147,7 @@ pub struct SyncedBlock<N: Network> {
}
pub type SyncedBlockSender<N> = mpsc::UnboundedSender<SyncedBlock<N>>;
pub type SyncedBlockResultReceiver = mpsc::UnboundedReceiver<bool>;
pub type MessageSender<N> = mpsc::UnboundedSender<SignedMessageFor<N>>;
@@ -154,6 +156,8 @@ pub struct TendermintHandle<N: Network> {
/// Channel to trigger the machine to move to the next block.
/// Takes in the the previous block's commit, along with the new proposal.
pub synced_block: SyncedBlockSender<N>,
/// A channel to communicate the result of a synced_block message.
pub synced_block_result: SyncedBlockResultReceiver,
/// Channel to send messages received from the P2P layer.
pub messages: MessageSender<N>,
/// Tendermint machine to be run on an asynchronous task.
@@ -253,8 +257,10 @@ impl<N: Network + 'static> TendermintMachine<N> {
) -> TendermintHandle<N> {
let (msg_send, msg_recv) = mpsc::unbounded();
let (synced_block_send, synced_block_recv) = mpsc::unbounded();
let (synced_block_result_send, synced_block_result_recv) = mpsc::unbounded();
TendermintHandle {
synced_block: synced_block_send,
synced_block_result: synced_block_result_recv,
messages: msg_send,
machine: {
let sys_time = sys_time(last_time);
@@ -275,6 +281,7 @@ impl<N: Network + 'static> TendermintMachine<N> {
queue: VecDeque::new(),
msg_recv,
synced_block_recv,
synced_block_result_send,
block: BlockData::new(
weights,
@@ -313,16 +320,19 @@ impl<N: Network + 'static> TendermintMachine<N> {
if let Some(SyncedBlock { number, block, commit }) = msg {
// Commit is for a block we've already moved past
if number != self.block.number {
self.synced_block_result_send.send(false).await.unwrap();
continue;
}
// Commit is invalid
if !self.network.verify_commit(block.id(), &commit) {
self.synced_block_result_send.send(false).await.unwrap();
continue;
}
let proposal = self.network.add_block(block, commit.clone()).await;
self.reset_by_commit(commit, proposal).await;
self.synced_block_result_send.send(true).await.unwrap();
None
} else {
break;

View File

@@ -11,7 +11,8 @@ use futures::SinkExt;
use tokio::{sync::RwLock, time::sleep};
use tendermint_machine::{
ext::*, SignedMessageFor, SyncedBlockSender, MessageSender, TendermintMachine, TendermintHandle,
ext::*, SignedMessageFor, SyncedBlockSender, SyncedBlockResultReceiver, MessageSender,
TendermintMachine, TendermintHandle,
};
type TestValidatorId = u16;
@@ -97,7 +98,10 @@ impl Block for TestBlock {
}
#[allow(clippy::type_complexity)]
struct TestNetwork(u16, Arc<RwLock<Vec<(MessageSender<Self>, SyncedBlockSender<Self>)>>>);
struct TestNetwork(
u16,
Arc<RwLock<Vec<(MessageSender<Self>, SyncedBlockSender<Self>, SyncedBlockResultReceiver)>>>,
);
#[async_trait]
impl Network for TestNetwork {
@@ -122,7 +126,7 @@ impl Network for TestNetwork {
}
async fn broadcast(&mut self, msg: SignedMessageFor<Self>) {
for (messages, _) in self.1.write().await.iter_mut() {
for (messages, _, _) in self.1.write().await.iter_mut() {
messages.send(msg.clone()).await.unwrap();
}
}
@@ -151,21 +155,23 @@ impl Network for TestNetwork {
impl TestNetwork {
async fn new(
validators: usize,
) -> Arc<RwLock<Vec<(MessageSender<Self>, SyncedBlockSender<Self>)>>> {
) -> Arc<RwLock<Vec<(MessageSender<Self>, SyncedBlockSender<Self>, SyncedBlockResultReceiver)>>>
{
let arc = Arc::new(RwLock::new(vec![]));
{
let mut write = arc.write().await;
for i in 0 .. validators {
let i = u16::try_from(i).unwrap();
let TendermintHandle { messages, synced_block, machine } = TendermintMachine::new(
TestNetwork(i, arc.clone()),
BlockNumber(1),
SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(),
TestBlock { id: 1u32.to_le_bytes(), valid: Ok(()) },
)
.await;
let TendermintHandle { messages, synced_block, synced_block_result, machine } =
TendermintMachine::new(
TestNetwork(i, arc.clone()),
BlockNumber(1),
SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(),
TestBlock { id: 1u32.to_le_bytes(), valid: Ok(()) },
)
.await;
tokio::task::spawn(machine.run());
write.push((messages, synced_block));
write.push((messages, synced_block, synced_block_result));
}
}
arc