From 49c4acffbbd5732b87f33493ba4fbd94c6c903a5 Mon Sep 17 00:00:00 2001
From: Luke Parker <lukeparker5132@gmail.com>
Date: Sun, 1 Jan 2023 05:09:22 -0500
Subject: [PATCH] Use a more efficient challenge function in the dleq

The prior one did 64 scalar additions for Ed25519. The new one does 8.
This was optimized by instead of parsing byte-by-byte, u64-by-u64.

Improves perf by ~10-15%.
---
 crypto/dleq/src/lib.rs | 59 ++++++++++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 13 deletions(-)

diff --git a/crypto/dleq/src/lib.rs b/crypto/dleq/src/lib.rs
index 5372a2d5..360791c4 100644
--- a/crypto/dleq/src/lib.rs
+++ b/crypto/dleq/src/lib.rs
@@ -28,22 +28,55 @@ pub(crate) fn challenge<T: Transcript, F: PrimeField>(transcript: &mut T) -> F {
   //    and loading it in
   // 3: Iterating over each byte and manually doubling/adding. This is simplest
 
-  // Get a wide amount of bytes to safely reduce without bias
-  let target = ((usize::try_from(F::NUM_BITS).unwrap() + 7) / 8) * 2;
-  let mut challenge_bytes = transcript.challenge(b"challenge").as_ref().to_vec();
-  while challenge_bytes.len() < target {
-    // Secure given transcripts updating on challenge
-    challenge_bytes.extend(transcript.challenge(b"challenge_extension").as_ref());
-  }
-  challenge_bytes.truncate(target);
-
   let mut challenge = F::zero();
-  for b in challenge_bytes {
-    for _ in 0 .. 8 {
-      challenge = challenge.double();
+
+  // Get a wide amount of bytes to safely reduce without bias
+  // In most cases, <=1.5x bytes is enough. 2x is still standard and there's some theoretical
+  // groups which may technically require more than 1.5x bytes for this to work as intended
+  let target_bytes = ((usize::try_from(F::NUM_BITS).unwrap() + 7) / 8) * 2;
+  let mut challenge_bytes = transcript.challenge(b"challenge");
+  let challenge_bytes_len = challenge_bytes.as_ref().len();
+  // If the challenge is 32 bytes, and we need 64, we need two challenges
+  let needed_challenges = (target_bytes + (challenge_bytes_len - 1)) / challenge_bytes_len;
+
+  // The following algorithm should be equivalent to a wide reduction of the challenges,
+  // interpreted as concatenated, big-endian byte string
+  let mut handled_bytes = 0;
+  'outer: for _ in 0 ..= needed_challenges {
+    // Cursor of which byte of the challenge to use next
+    let mut b = 0;
+    while b < challenge_bytes_len {
+      // Get the next amount of bytes to attempt
+      // Only grabs the needed amount of bytes, up to 8 at a time (u64), so long as they're
+      // available in the challenge
+      let chunk_bytes = (target_bytes - handled_bytes).min(8).min(challenge_bytes_len - b);
+
+      let mut chunk = 0;
+      for _ in 0 .. chunk_bytes {
+        chunk <<= 8;
+        chunk |= u64::from(challenge_bytes.as_ref()[b]);
+        b += 1;
+      }
+      // Add this chunk
+      challenge += F::from(chunk);
+
+      handled_bytes += chunk_bytes;
+      // If we've reached the target amount of bytes, break
+      if handled_bytes == target_bytes {
+        break 'outer;
+      }
+
+      // Shift over by however many bits will be in the next chunk
+      let next_chunk_bytes = (target_bytes - handled_bytes).min(8).min(challenge_bytes_len);
+      for _ in 0 .. (next_chunk_bytes * 8) {
+        challenge = challenge.double();
+      }
     }
-    challenge += F::from(u64::from(b));
+
+    // Secure thanks to the Transcript trait having a bound of updating on challenge
+    challenge_bytes = transcript.challenge(b"challenge_extension");
   }
+
   challenge
 }