From 8ec0582237590be510810db9e22fb7e7a60a8cc3 Mon Sep 17 00:00:00 2001 From: Luke Parker Date: Tue, 25 Nov 2025 20:21:45 -0500 Subject: [PATCH] Add module to calculate medians --- .github/workflows/tests.yml | 1 + Cargo.lock | 10 + Cargo.toml | 2 + deny.toml | 1 + substrate/median/Cargo.toml | 28 ++ substrate/median/LICENSE | 15 + substrate/median/README.md | 3 + substrate/median/src/average.rs | 51 +++ substrate/median/src/lexicographic.rs | 143 +++++++ substrate/median/src/lib.rs | 520 ++++++++++++++++++++++++++ substrate/median/src/policy.rs | 62 +++ 11 files changed, 836 insertions(+) create mode 100644 substrate/median/Cargo.toml create mode 100644 substrate/median/LICENSE create mode 100644 substrate/median/README.md create mode 100644 substrate/median/src/average.rs create mode 100644 substrate/median/src/lexicographic.rs create mode 100644 substrate/median/src/lib.rs create mode 100644 substrate/median/src/policy.rs diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9befee7c..7dfda669 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -84,6 +84,7 @@ jobs: GITHUB_CI=true RUST_BACKTRACE=1 cargo test --all-features \ -p serai-primitives \ -p serai-abi \ + -p substrate-median \ -p serai-core-pallet \ -p serai-coins-pallet \ -p serai-validator-sets-pallet \ diff --git a/Cargo.lock b/Cargo.lock index 16e17f82..7fefa1af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10150,6 +10150,16 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "substrate-median" +version = "0.1.0" +dependencies = [ + "frame-support", + "parity-scale-codec", + "rand_core 0.6.4", + "sp-io", +] + [[package]] name = "substrate-prometheus-endpoint" version = "0.17.7" diff --git a/Cargo.toml b/Cargo.toml index d5124426..d563ee36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,8 @@ members = [ "substrate/primitives", "substrate/abi", + "substrate/median", + "substrate/core", "substrate/coins", "substrate/validator-sets", diff --git a/deny.toml b/deny.toml index 1052a71e..3ff53ad2 100644 --- a/deny.toml +++ b/deny.toml @@ -80,6 +80,7 @@ exceptions = [ { allow = ["AGPL-3.0-only"], name = "serai-coordinator" }, { allow = ["AGPL-3.0-only"], name = "pallet-session" }, + { allow = ["AGPL-3.0-only"], name = "substrate-median" }, { allow = ["AGPL-3.0-only"], name = "serai-core-pallet" }, { allow = ["AGPL-3.0-only"], name = "serai-coins-pallet" }, diff --git a/substrate/median/Cargo.toml b/substrate/median/Cargo.toml new file mode 100644 index 00000000..39fc6fbc --- /dev/null +++ b/substrate/median/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "substrate-median" +version = "0.1.0" +description = "Efficient calculation of medians within a Substrate (FRAME) runtime" +license = "AGPL-3.0-only" +repository = "https://github.com/serai-dex/serai/tree/develop/substrate/median" +authors = ["Luke Parker "] +edition = "2021" +rust-version = "1.85" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[lints] +workspace = true + +[dependencies] +scale = { package = "parity-scale-codec", version = "3", default-features = false, features = ["derive"] } +frame-support = { git = "https://github.com/serai-dex/patch-polkadot-sdk", rev = "8c36534bb0bd5a02979f94bb913d11d55fe7eadc", default-features = false } + +[dev-dependencies] +rand_core = { version = "0.6", default-features = false, features = ["std"] } +sp-io = { git = "https://github.com/serai-dex/patch-polkadot-sdk", rev = "8c36534bb0bd5a02979f94bb913d11d55fe7eadc", default-features = false } + +[features] +std = ["scale/std", "frame-support/std"] +default = ["std"] diff --git a/substrate/median/LICENSE b/substrate/median/LICENSE new file mode 100644 index 00000000..621233a9 --- /dev/null +++ b/substrate/median/LICENSE @@ -0,0 +1,15 @@ +AGPL-3.0-only license + +Copyright (c) 2023-2025 Luke Parker + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License Version 3 as +published by the Free Software Foundation. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . diff --git a/substrate/median/README.md b/substrate/median/README.md new file mode 100644 index 00000000..8ec484a4 --- /dev/null +++ b/substrate/median/README.md @@ -0,0 +1,3 @@ +# Substrate Median + +An efficient implementation of a median algorithm within a Substrate runtime. diff --git a/substrate/median/src/average.rs b/substrate/median/src/average.rs new file mode 100644 index 00000000..8b6736e8 --- /dev/null +++ b/substrate/median/src/average.rs @@ -0,0 +1,51 @@ +use core::cmp::Ord; + +/// A trait to take the average of two values +pub trait Average { + /// Calculate the average of two values. + fn average(value: Self, other: Self) -> Self; +} + +macro_rules! impl_prim_uint { + ($type: ident) => { + impl Average for $type { + /// This rounds as integer division does: by flooring the result. + fn average(value: Self, other: Self) -> Self { + /* + Since `value + other` may overflow, without promotion to a wider integer, we perform the + halving as the first operation. Then we add back the truncated bit as necessary. This + methodology doesn't overflow and doesn't require the existence of a wider integer type. + */ + (value / 2) + (other / 2) + (value & other & 1) + } + } + }; +} +impl_prim_uint!(u8); +impl_prim_uint!(u16); +impl_prim_uint!(u32); +impl_prim_uint!(u64); +impl_prim_uint!(u128); + +#[test] +fn average() { + use rand_core::{RngCore, OsRng}; + + // Basic sanity checks + { + assert_eq!(u64::average(0, 0), 0); + assert_eq!(u64::average(0, 1), 0); + assert_eq!(u64::average(0, 2), 1); + assert_eq!(u64::average(u64::MAX, u64::MAX), u64::MAX); + assert_eq!(u64::average(u64::MAX - 1, u64::MAX), u64::MAX - 1); + assert_eq!(u64::average(u64::MAX - 2, u64::MAX), u64::MAX - 1); + assert_eq!(u64::average(u64::MAX - 1, u64::MAX - 1), u64::MAX - 1); + } + + // Fuzz test the function + for _ in 0 .. 100 { + let a = OsRng.next_u64(); + let b = OsRng.next_u64(); + assert_eq!(u64::average(a, b), u64::try_from((u128::from(a) + u128::from(b)) / 2).unwrap()); + } +} diff --git a/substrate/median/src/lexicographic.rs b/substrate/median/src/lexicographic.rs new file mode 100644 index 00000000..228ba0a1 --- /dev/null +++ b/substrate/median/src/lexicographic.rs @@ -0,0 +1,143 @@ +use core::cmp::Ord; + +use scale::FullCodec; + +/// A trait to obtain an encoding whose lexicographic order corresponds to the value's. +pub trait LexicographicEncoding: Ord + FullCodec { + /// The representation of the encoding. + /// + /// This SHOULD be `[u8; N]` or similar. + type Encoding: AsMut<[u8]> + Ord + FullCodec; + /// Encode such that `cmp(e(A), e(B)) == cmp(A, B)`. + fn lexicographic_encode(&self) -> Self::Encoding; + /// Decode such that `d(e(A)) == A`. + fn lexicographic_decode(encoding: Self::Encoding) -> Self; +} + +macro_rules! impl_prim_uint { + ($type: ident) => { + impl LexicographicEncoding for $type { + type Encoding = [u8; core::mem::size_of::()]; + fn lexicographic_encode(&self) -> Self::Encoding { + self.to_be_bytes() + } + fn lexicographic_decode(encoding: Self::Encoding) -> Self { + Self::from_be_bytes(encoding) + } + } + }; +} +impl_prim_uint!(u8); +impl_prim_uint!(u16); +impl_prim_uint!(u32); +impl_prim_uint!(u64); +impl_prim_uint!(u128); + +/// A wrapper such that +/// `a.cmp(&b).reverse() == e(LexicographicReverse(a)).cmp(&e(LexicographicReverse(b)))`. +/// +/// This allows systems which only allow iterating forwards to iterate backwards by inverting the +/// direction of values via this derivative encoding. +pub struct LexicographicReverse(V::Encoding); + +impl scale::Decode for LexicographicReverse { + fn decode(input: &mut I) -> Result { + V::Encoding::decode(input).map(Self) + } +} + +impl scale::Encode for LexicographicReverse { + fn size_hint(&self) -> usize { + self.0.size_hint() + } + fn encode_to(&self, dest: &mut T) { + self.0.encode_to(dest) + } + fn encode(&self) -> Vec { + self.0.encode() + } + fn using_encoded R>(&self, f: F) -> R { + self.0.using_encoded(f) + } + fn encoded_size(&self) -> usize { + self.0.encoded_size() + } +} + +impl scale::EncodeLike for LexicographicReverse {} + +/// This is a bijective mapping such that `reverse(reverse(encoding)) == encoding`. +fn reverse>(mut encoding: E) -> E { + for byte in encoding.as_mut().iter_mut() { + *byte = !*byte; + } + encoding +} + +impl LexicographicReverse { + pub(super) fn from_encoding(encoding: V::Encoding) -> Self { + Self(reverse(encoding)) + } + pub(super) fn from(value: &V) -> Self { + Self::from_encoding(value.lexicographic_encode()) + } + pub(super) fn into(self) -> V { + V::lexicographic_decode(reverse(self.0)) + } +} + +#[test] +fn lexicographic_uint() { + use rand_core::{RngCore, OsRng}; + + // Basic sanity checks + { + assert_eq!(0u64.lexicographic_encode(), 0u64.lexicographic_encode()); + assert!(0u64.lexicographic_encode() < 1u64.lexicographic_encode()); + assert!(0u64.lexicographic_encode() <= 1u64.lexicographic_encode()); + assert!(1u64.lexicographic_encode() > 0u64.lexicographic_encode()); + assert!(1u64.lexicographic_encode() >= 0u64.lexicographic_encode()); + } + + // `lexicographic_decode` + for _ in 0 .. 100 { + let value = OsRng.next_u64(); + assert_eq!(u64::lexicographic_decode(value.lexicographic_encode()), value); + } + + // Fuzz test the ordinality + for _ in 0 .. 100 { + let a = OsRng.next_u64(); + let b = OsRng.next_u64(); + assert_eq!(a.cmp(&b), a.lexicographic_encode().cmp(&b.lexicographic_encode())); + } +} + +#[test] +fn lexicographic_reverse() { + use rand_core::{RngCore, OsRng}; + + for _ in 0 .. 100 { + let a = OsRng.next_u64(); + let b = loop { + let b = OsRng.next_u64(); + if a != b { + break b; + } + }; + let mut a_enc = a.lexicographic_encode(); + let mut b_enc = b.lexicographic_encode(); + assert_eq!(a_enc, a_enc); + assert_eq!(a.cmp(&b), a_enc.cmp(&b_enc)); + + assert_eq!(a.cmp(&b).reverse(), reverse(a_enc).cmp(&reverse(b_enc))); + + // This should be a bijective encoding + assert_eq!(reverse(reverse(a_enc)), a_enc); + assert_eq!(reverse(reverse(b_enc)), b_enc); + assert_eq!(LexicographicReverse::::from_encoding(a.lexicographic_encode()).into(), a); + assert_eq!(LexicographicReverse::::from_encoding(b.lexicographic_encode()).into(), b); + assert_eq!(LexicographicReverse::from(&a).into(), a); + assert_eq!(LexicographicReverse::from(&b).into(), b); + } +} diff --git a/substrate/median/src/lib.rs b/substrate/median/src/lib.rs new file mode 100644 index 00000000..86937a61 --- /dev/null +++ b/substrate/median/src/lib.rs @@ -0,0 +1,520 @@ +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] +#![cfg_attr(not(feature = "std"), no_std)] +#![deny(missing_docs)] + +use core::cmp::Ordering; + +use scale::{EncodeLike, FullCodec}; +use frame_support::storage::*; + +mod lexicographic; +pub use lexicographic::*; + +mod average; +pub use average::*; + +mod policy; +pub use policy::*; + +/// The store for a median. +/// +/// `KeyPrefix` is accepted so a single set of storage values may be used to back multiple medians. +/// For all `StorageDoubleMap`s however, the hasher of the second key MUST be the identity hasher. +/// +/// For all storage values present, they MUST be considered opaque to the caller and left +/// undisturbed. No assumptions may be made about their internal representation nor usage. +/// Any names or documentation comments are solely for the review of the implementation +/// itself, and are not intended to signify any potential layout nor use cases to the caller. +/// ANY external usage has undefined behavior. +pub trait MedianStore { + /// The policy to use when there are multiple candidate values. + const POLICY: Policy; + + /// The amount of items currently present within the median's list. + type Length: StorageMap; + + /// A store for the values currently present within the median. + /// + /// The value is the amount of instances of this value within the median's list. + type Store: IterableStorageDoubleMap; + + /// A secondary store for the values currently present within the median. + type ReverseStore: IterableStorageDoubleMap< + KeyPrefix, + LexicographicReverse, + (), + Query = (), + >; + + /// The position of the saved median within the list of values. + /// + /// This is necessary as when a value selected as the current median is present multiple times + /// within the list of values, the code does not know _which_ instance was selected as the + /// median, as necessary to know when to advance to a lesser/greater value. To resolve this, once + /// we know a value is the median value, we always set the position to the _first instance_ of + /// the value. This gives us a consistent frame of reference to decide the next steps of the + /// algorithm upon. + type Position: StorageMap>; + + /// The median value. + /// + /// This may drift from the actual median while an update is performed. + type Median: StorageMap>; +} + +const KEY_PREFIX_ASSERT: &str = "next value in storage had a different prefix associated"; +const AFTER_ASSERT: &str = "iterator yielding *after* key yielded key itself"; + +/// Update the median. +/// +/// This function may be called at any point to correctly calculate the current median. It will do +/// so in an amount of operations linear to the distance from the stored median to the new median. +/// +/// Since the distance is bounded by the amount of insertions to/removals from the median's list +/// which have yet to be handled, the following `push` and `pop` functions achieve a constant +/// amount of operations by calling this function _upon each and every invocation_. This leaves +/// solely a singular insertion/removal needing to be handled, and a maximum distance of one. +fn update_median< + KeyPrefix: FullCodec, + MedianValue: Average + LexicographicEncoding, + S: MedianStore, +>( + key_prefix: impl Copy + EncodeLike, +) { + let Some(mut existing_median_pos) = S::Position::get(key_prefix) else { + return; + }; + let length = S::Length::get(key_prefix); + let target_median_pos = S::POLICY.target_median_pos(length); + + let mut existing_median = + S::Median::get(key_prefix).expect("current position yet not current median"); + + // We first iterate up to the desired median position + { + let mut iter = { + let existing_median_key = + S::Store::hashed_key_for(key_prefix, existing_median.lexicographic_encode()); + S::Store::iter_from(existing_median_key) + }; + + let mut existing_median_instances = + S::Store::get(key_prefix, existing_median.lexicographic_encode()); + let mut next_value_first_pos; + while { + next_value_first_pos = existing_median_pos + existing_median_instances; + next_value_first_pos <= target_median_pos + } { + existing_median_pos = next_value_first_pos; + let (_key_prefix, next_value_encoding, next_value_instances) = iter + .next() + .expect("stored median was before the actual median yet no values were after it"); + debug_assert_eq!(key_prefix.encode(), _key_prefix.encode(), "{KEY_PREFIX_ASSERT}"); + debug_assert!( + existing_median.lexicographic_encode() != next_value_encoding, + "{AFTER_ASSERT}", + ); + existing_median = MedianValue::lexicographic_decode(next_value_encoding); + existing_median_instances = next_value_instances; + } + } + + // Then, we iterate down to the desired median position + { + let mut iter = { + let existing_median_key = + S::ReverseStore::hashed_key_for(key_prefix, LexicographicReverse::from(&existing_median)); + S::ReverseStore::iter_keys_from(existing_median_key) + }; + + while existing_median_pos > target_median_pos { + let (_key_prefix, prior_value_encoding) = iter + .next() + .expect("stored median was before the actual median yet no values were after it"); + debug_assert_eq!(key_prefix.encode(), _key_prefix.encode(), "{KEY_PREFIX_ASSERT}"); + let prior_value = prior_value_encoding.into(); + debug_assert!(prior_value != existing_median, "{AFTER_ASSERT}"); + let prior_value_instances = S::Store::get(key_prefix, prior_value.lexicographic_encode()); + existing_median = prior_value; + existing_median_pos -= prior_value_instances; + } + } + + S::Position::set(key_prefix, Some(existing_median_pos)); + S::Median::set(key_prefix, Some(existing_median)); +} + +/// A median. +/// +/// The implementation only uses a constant amount of database operations to implement insertion +/// and removal. When instantiated over a database with logarithmic complexities (such as a radix +/// trie), this effects a median with logarithmic memory/computation complexities (not requiring +/// loading all values into memory). +/// +/// This SHOULD NOT be used for small collections where the linear (or even quadratic) complexities +/// still out-perform how expensive database operations are. In those cases, the collection should +/// be written to a single storage slot, read entirely, sorted, and the median should be +/// immediately taken via indexing the value halfway through the collection. +pub trait Median: + MedianStore +{ + /// The current length of the median's list. + fn length(key_prefix: impl Copy + EncodeLike) -> u64; + + /// The current median value. + /// + /// This returns `None` if no values are present. + fn median(key_prefix: impl Copy + EncodeLike) -> Option; + + /// Push a new value onto the median. + /// + /// If the value is already present within the existing values, the amount of times it will be + /// considered present will be incremented. + fn push(key_prefix: impl Copy + EncodeLike, value: MedianValue); + + /// Pop a value from the median. + /// + /// This returns `true` if the value was present and `false` otherwise. + /// + /// If the value is present within the existing values multiple times, only a single instance + /// will be removed. + fn pop(key_prefix: impl Copy + EncodeLike, value: MedianValue) -> bool; +} + +impl< + KeyPrefix: FullCodec, + MedianValue: Average + LexicographicEncoding, + S: MedianStore, + > Median for S +{ + fn length(key_prefix: impl Copy + EncodeLike) -> u64 { + Self::Length::get(key_prefix) + } + + /* + This function assumes `Position`, `Median` are up to date. This is guaranteed by + `update_median` being called after every single `push`, `pop` call, the only defined ways to + mutate the state. + */ + fn median(key_prefix: impl Copy + EncodeLike) -> Option { + let mut current_median = Self::Median::get(key_prefix)?; + // If we're supposed to take the average, do so now + if matches!(S::POLICY, Policy::Average) { + let length = Self::length(key_prefix); + if (length % 2) == 0 { + // This will yield the target position for the lesser value in the pair + let target_median_pos_lo = Self::POLICY.target_median_pos(length); + let target_median_pos_hi = target_median_pos_lo + 1; + + /* + We need to take the average of the current value and the next value, due to + `Policy::Average` internally being considered `Policy::Lesser` and solely differing here + when the median is fetched. + + To fetch the next value, we first need to identify if `target_median_pos` points to the + _last instance_ of the currently selected median value. If it does not, then the next + value is another instance of this value, the average of them themselves, and we can + return now. + + If `target_median_pos` does point to the last instance of the currently selected median + value, then we fetch the next key in our trie to learn the next value in order to take the + average. + */ + let current_median_pos = + Self::Position::get(key_prefix).expect("current median yet no position"); + let current_median_encoding = current_median.lexicographic_encode(); + let inclusions = Self::Store::get(key_prefix, ¤t_median_encoding); + let start_pos_of_next_value = current_median_pos + inclusions; + + // Short-circuit if we are averaging two of the same value + if target_median_pos_hi < start_pos_of_next_value { + return Some(current_median); + } + + let current_median_key = Self::Store::hashed_key_for(key_prefix, ¤t_median_encoding); + let (_key_prefix, next_encoding) = Self::Store::iter_keys_from(current_median_key) + .next() + .expect("last value in storage yet looking for value after it"); + debug_assert_eq!(key_prefix.encode(), _key_prefix.encode(), "{KEY_PREFIX_ASSERT}"); + debug_assert!(current_median_encoding != next_encoding, "{AFTER_ASSERT}"); + let next_value = MedianValue::lexicographic_decode(next_encoding); + + current_median = MedianValue::average(current_median, next_value); + } + } + Some(current_median) + } + + fn push(key_prefix: impl Copy + EncodeLike, value: MedianValue) { + // Update the length + let existing_length = Self::Length::get(key_prefix); + let new_length = existing_length + 1; + Self::Length::set(key_prefix, new_length); + + // Update the amount of inclusions + let encoding = value.lexicographic_encode(); + { + let existing_presences = Self::Store::get(key_prefix, &encoding); + let new_presences = existing_presences + 1; + Self::Store::set(key_prefix, &encoding, new_presences); + if existing_presences == 0 { + Self::ReverseStore::set(key_prefix, LexicographicReverse::from_encoding(encoding), ()); + } + } + + // If this was the first value inserted, initialize and immediately return + if existing_length == 0 { + Self::Position::set(key_prefix, Some(0)); + Self::Median::set(key_prefix, Some(value)); + return; + } + + // Fetch the current median + let existing_median = + Self::Median::get(key_prefix).expect("values within median yet no median"); + + // If this value was inserted before the current median, the current median's position has + // increased + if value < existing_median { + let mut existing_median_pos = + Self::Position::get(key_prefix).expect("values within median yet no current position"); + existing_median_pos += 1; + Self::Position::set(key_prefix, Some(existing_median_pos)); + } + + // Update the median + update_median::<_, _, Self>(key_prefix); + } + + fn pop(key_prefix: impl Copy + EncodeLike, value: MedianValue) -> bool { + let encoding = value.lexicographic_encode(); + let mut inclusions = Self::Store::get(key_prefix, &encoding); + if inclusions == 0 { + return false; + } + + // Update the length + let existing_length = Self::Length::get(key_prefix); + let new_length = existing_length - 1; + Self::Length::set(key_prefix, new_length); + + // Update the presence within the median's list + inclusions -= 1; + if inclusions == 0 { + Self::Store::remove(key_prefix, &encoding); + Self::ReverseStore::remove(key_prefix, LexicographicReverse::from_encoding(encoding)); + } else { + Self::Store::set(key_prefix, encoding, inclusions); + } + + let existing_median = + Self::Median::get(key_prefix).expect("values within median yet no median"); + match value.cmp(&existing_median) { + Ordering::Less => { + let mut existing_median_pos = + Self::Position::get(key_prefix).expect("values within median yet no current position"); + existing_median_pos -= 1; + Self::Position::set(key_prefix, Some(existing_median_pos)); + } + + Ordering::Equal if inclusions == 0 => { + /* + This value was the median, then removed, leaving `Median` and `Position` in an + ill-defined state. We attempt to consider `Position` as well-defined and solely update + `Median` to also be well-defined. + + This works so long `Position` still refers to a valid position within the median's list. + It may not if the median's list started with length 1 or 2, where the current position + could have referred to the last element in the list, now popped. + + If the length was 1, the list is now empty, triggering its own special case. + + If the length was 2, we create a well-defined (and also accurate) definition for + `Position` and `Median` by setting them to the first (and only) item within + the list. + */ + if new_length == 0 { + Self::Position::remove(key_prefix); + Self::Median::remove(key_prefix); + } else { + let mut existing_median_pos = + Self::Position::get(key_prefix).expect("values within median yet no current position"); + + let new_median_encoding = if existing_median_pos >= new_length { + /* + While resetting the declared median to the first item is always safe, so long as + `update_median` is called after (as done here), `update_median` has an algorithmic + complexity linear to the distance from the declared median to the correct median. + That means this can only be done, while maintaining the desired complexities, when a + bound is known on the distance from `0` to `target_median_pos`. + + Since the list length is 1 in this case, per the reasoning above, the distance here + is `0`, making this a safe operation which also respects the desired complexities. + */ + Self::Position::set(key_prefix, Some(0)); + Self::Store::iter_key_prefix(key_prefix) + .next() + .expect("median list isn't empty yet has no values") + } else { + let existing_median_key = + Self::Store::hashed_key_for(key_prefix, existing_median.lexicographic_encode()); + let (_key_prefix, next_value_encoding) = + Self::Store::iter_keys_from(existing_median_key) + .next() + .expect("current median wasn't the last value yet no value was after"); + debug_assert_eq!(key_prefix.encode(), _key_prefix.encode(), "{KEY_PREFIX_ASSERT}"); + debug_assert!( + existing_median.lexicographic_encode() != next_value_encoding, + "{AFTER_ASSERT}", + ); + next_value_encoding + }; + + Self::Median::set( + key_prefix, + Some(MedianValue::lexicographic_decode(new_median_encoding)), + ); + } + } + + /* + If this value is an instance of the current median, for which some remain, we consider this + as removing an instance other than the first instance which is what the position refers to. + Accordingly, we don't have to update the position. + + If this is greater than the current median, then its removal does not effect the position + of the current median. + */ + Ordering::Equal | Ordering::Greater => {} + } + + // Update the median + update_median::<_, _, Self>(key_prefix); + + true + } +} + +#[test] +fn test_median() { + use frame_support::{ + Blake2_128Concat, Identity, + storage::types::{self, ValueQuery, OptionQuery}, + }; + + use rand_core::{RngCore, OsRng}; + + macro_rules! prefix { + ($name: ident, $prefix: expr) => { + struct $name; + impl frame_support::traits::StorageInstance for $name { + const STORAGE_PREFIX: &'static str = $prefix; + fn pallet_prefix() -> &'static str { + "median" + } + } + }; + } + prefix!(PrefixLength, "Length"); + prefix!(PrefixStore, "Store"); + prefix!(PrefixReverse, "Reverse"); + prefix!(PrefixPosition, "Position"); + prefix!(PrefixMedian, "Median"); + + type StorageMapStruct = + types::StorageMap; + type StorageDoubleMapStruct = + types::StorageDoubleMap; + + macro_rules! test { + ($name: ident, $policy: expr) => { + struct $name; + impl MedianStore<(), u32> for $name { + const POLICY: Policy = $policy; + type Length = StorageMapStruct; + type Store = + StorageDoubleMapStruct::Encoding, u64>; + type ReverseStore = StorageDoubleMapStruct, ()>; + type Position = StorageMapStruct; + type Median = StorageMapStruct; + } + + sp_io::TestExternalities::default().execute_with(|| { + assert_eq!($name::length(()), 0); + assert_eq!($name::median(()), None); + + let mut current_list = vec![]; + for i in 0 .. 1000 { + 'reselect: loop { + // This chooses a modulus low enough this `match` will in fact match, yet high enough + // more cases can be added without forgetting to update it being an issue + match OsRng.next_u64() % 8 { + // Push a freshly sampled value + 0 => { + #[allow(clippy::cast_possible_truncation)] + let push = OsRng.next_u64() as u32; + current_list.push(push); + current_list.sort(); + $name::push((), push); + } + // Push an existing value + 1 if !current_list.is_empty() => { + let i = + usize::try_from(OsRng.next_u64() % u64::try_from(current_list.len()).unwrap()) + .unwrap(); + let push = current_list[i]; + current_list.push(push); + current_list.sort(); + $name::push((), push); + } + // Remove an existing value + 2 if !current_list.is_empty() => { + let i = + usize::try_from(OsRng.next_u64() % u64::try_from(current_list.len()).unwrap()) + .unwrap(); + let pop = current_list.remove(i); + assert!($name::pop((), pop)); + } + // Remove a value which is not present + 3 => { + #[allow(clippy::cast_possible_truncation)] + let pop = OsRng.next_u64() as u32; + if current_list.contains(&pop) { + continue 'reselect; + } + assert!(!$name::pop((), pop)); + } + _ => continue 'reselect, + } + break 'reselect; + } + + assert_eq!( + $name::length(()), + u64::try_from(current_list.len()).unwrap(), + "length differs on iteration: {i}", + ); + let target_median_pos = + $policy.target_median_pos(u64::try_from(current_list.len()).unwrap()); + let target_median_pos = usize::try_from(target_median_pos).unwrap(); + let expected = (!current_list.is_empty()).then(|| match $policy { + Policy::Greater | Policy::Lesser => current_list[target_median_pos], + Policy::Average => { + if (current_list.len() % 2) == 0 { + u32::average(current_list[target_median_pos], current_list[target_median_pos + 1]) + } else { + current_list[target_median_pos] + } + } + }); + assert_eq!($name::median(()), expected, "median differs on iteration: {i}"); + } + }); + }; + } + + test!(Greater, Policy::Greater); + test!(Lesser, Policy::Lesser); + test!(Average, Policy::Average); +} diff --git a/substrate/median/src/policy.rs b/substrate/median/src/policy.rs new file mode 100644 index 00000000..50460eee --- /dev/null +++ b/substrate/median/src/policy.rs @@ -0,0 +1,62 @@ +/// The policy to use to select the median when the list of candidates is of even length. +pub enum Policy { + /// When two values are equally considerable, choose the greater value. + Greater, + /// When two values are equally considerable, choose the lesser value. + Lesser, + /// When two values are equally considerable, choose their average. + /// + /// The average will be the sum of the two values divided by two. For how the division may or may + /// not round, please defer to the `Div` implementation for the value operated over. + /* + Internally, this is functionally equivalent to `Policy::Lesser`. It is only when the median is + finally requested that if the policy is to take the average, we look and see if we need to + before applying the relevant logic. From the lesser value, finding the complimentary greater + value is trivial. + */ + Average, +} + +impl Policy { + /// Calculate the position of the median within the sorted list of values. + /// + /// If there are two candidates, `Policy::Greater` and `Policy::Lesser` decide which is chosen, + /// with `Policy::Average` being interpreted as `Policy::Lesser` _by this function_. + /// + /// This will return `0` if the median's list is empty, despite `0` being an invalid position in + /// that context. + pub(super) fn target_median_pos(&self, list_length: u64) -> u64 { + let mut target_median_pos = list_length / 2; + /* + If this could be two possible indexes, we use the policy to determine the target index. When + an odd amount of values are present in the median's list, the integer division by two is + inherently correct. When there's an even amount of values present, the integer division favors + the higher value, leading us to correct for when we want the lower value. + */ + if matches!(self, Policy::Lesser | Policy::Average) && ((list_length % 2) == 0) { + target_median_pos = target_median_pos.saturating_sub(1); + } + target_median_pos + } +} + +#[test] +fn policy() { + assert_eq!(Policy::Greater.target_median_pos(0), 0); + assert_eq!(Policy::Greater.target_median_pos(1), 0); + assert_eq!(Policy::Greater.target_median_pos(2), 1); + assert_eq!(Policy::Greater.target_median_pos(3), 1); + assert_eq!(Policy::Greater.target_median_pos(4), 2); + + assert_eq!(Policy::Lesser.target_median_pos(0), 0); + assert_eq!(Policy::Lesser.target_median_pos(1), 0); + assert_eq!(Policy::Lesser.target_median_pos(2), 0); + assert_eq!(Policy::Lesser.target_median_pos(3), 1); + assert_eq!(Policy::Lesser.target_median_pos(4), 1); + + assert_eq!(Policy::Average.target_median_pos(0), 0); + assert_eq!(Policy::Average.target_median_pos(1), 0); + assert_eq!(Policy::Average.target_median_pos(2), 0); + assert_eq!(Policy::Average.target_median_pos(3), 1); + assert_eq!(Policy::Average.target_median_pos(4), 1); +}