From 3e26e0b9317338746df33de811e3ec4f9a48c0d1 Mon Sep 17 00:00:00 2001 From: MesTTo Date: Tue, 23 Jun 2026 16:25:44 +1000 Subject: [PATCH] Skip dense meet joined-mask scans --- src/dense_byte_node.rs | 162 ++++++++++++++++++++--------------------- 1 file changed, 79 insertions(+), 83 deletions(-) diff --git a/src/dense_byte_node.rs b/src/dense_byte_node.rs index dd846a2..22698df 100644 --- a/src/dense_byte_node.rs +++ b/src/dense_byte_node.rs @@ -110,6 +110,15 @@ impl> ByteNode alloc, } } + #[inline(always)] + fn slot_in_word(mask_word: u64, word_base: usize, bit_idx: u32) -> usize { + let preceding_bits = if bit_idx == 0 { + 0 + } else { + (1u64 << bit_idx) - 1 + }; + word_base + (mask_word & preceding_bits).count_ones() as usize + } #[inline] pub fn reserve_capacity(&mut self, additional: usize) { self.values.reserve(additional) @@ -2043,10 +2052,8 @@ impl, Other } fn pmeet(&self, other: &ByteNode) -> AlgebraicResult { - // TODO this technically doesn't need to calculate and iterate over jm - // iterating over mm and calculating m such that the following suffices - // c_{self,other} += popcnt(m & {self,other}) - let jm: ByteMask = self.mask | other.mask; + // Iterate the overlap mask directly. Slot indexes are recovered with + // prefix popcounts in each dense-mask word. let mut mm: ByteMask = self.mask & other.mask; let mut is_identity = self.mask == mm; @@ -2058,58 +2065,55 @@ impl, Other let mut v = ValuesVec::with_capacity_in(len, self.alloc.clone()); let new_v = v.v.spare_capacity_mut(); - let mut l = 0; - let mut r = 0; let mut c = 0; + let mut self_word_base = 0; + let mut other_word_base = 0; for i in 0..4 { - let mut lm = jm.0[i]; + let self_word = self.mask.0[i]; + let other_word = other.mask.0[i]; + let mut lm = mm.0[i]; while lm != 0 { let index = lm.trailing_zeros(); - - if ((1u64 << index) & mm.0[i]) != 0 { - //This runs for cofrees that exist in both nodes - - let lv = unsafe { self.values.get_unchecked(l) }; - let rv = unsafe { other.values.get_unchecked(r) }; - match lv.pmeet(rv) { - AlgebraicResult::None => { - is_counter_identity = false; - is_identity = false; - mm.0[i] ^= 1u64 << index; - }, - AlgebraicResult::Identity(mask) => { - debug_assert!((mask & SELF_IDENT > 0) || (mask & COUNTER_IDENT > 0)); - if mask & SELF_IDENT == 0 { - is_identity = false; - } - if mask & COUNTER_IDENT == 0 { - is_counter_identity = false; - } - if mask & SELF_IDENT > 0 { - unsafe { new_v.get_unchecked_mut(c).write(lv.clone()) }; - } else { - let new_cf = Cf::from_cf(rv.clone()); - unsafe { new_v.get_unchecked_mut(c).write(new_cf) }; - } - c += 1; - }, - AlgebraicResult::Element(jv) => { + let l = Self::slot_in_word(self_word, self_word_base, index); + let r = Self::slot_in_word(other_word, other_word_base, index); + + //This runs for cofrees that exist in both nodes + let lv = unsafe { self.values.get_unchecked(l) }; + let rv = unsafe { other.values.get_unchecked(r) }; + match lv.pmeet(rv) { + AlgebraicResult::None => { + is_counter_identity = false; + is_identity = false; + mm.0[i] ^= 1u64 << index; + }, + AlgebraicResult::Identity(mask) => { + debug_assert!((mask & SELF_IDENT > 0) || (mask & COUNTER_IDENT > 0)); + if mask & SELF_IDENT == 0 { is_identity = false; + } + if mask & COUNTER_IDENT == 0 { is_counter_identity = false; - unsafe { new_v.get_unchecked_mut(c).write(jv) }; - c += 1; - }, - } - l += 1; - r += 1; - } else if ((1u64 << index) & self.mask.0[i]) != 0 { - l += 1; - } else { - r += 1; + } + if mask & SELF_IDENT > 0 { + unsafe { new_v.get_unchecked_mut(c).write(lv.clone()) }; + } else { + let new_cf = Cf::from_cf(rv.clone()); + unsafe { new_v.get_unchecked_mut(c).write(new_cf) }; + } + c += 1; + }, + AlgebraicResult::Element(jv) => { + is_identity = false; + is_counter_identity = false; + unsafe { new_v.get_unchecked_mut(c).write(jv) }; + c += 1; + }, } lm ^= 1u64 << index; } + self_word_base += self_word.count_ones() as usize; + other_word_base += other_word.count_ones() as usize; } unsafe{ v.v.set_len(c); } @@ -2222,13 +2226,10 @@ impl> ByteNode { fn prestrict>(&self, other: &ByteNode) -> AlgebraicResult where Self: Sized { - let mut is_identity = true; - - // TODO this technically doesn't need to calculate and iterate over jm - // iterating over mm and calculating m such that the following suffices - // c_{self,other} += popcnt(m & {self,other}) - let jm: ByteMask = self.mask | other.mask; + // Iterate the overlap mask directly. Slot indexes are recovered with + // prefix popcounts in each dense-mask word. let mut mm: ByteMask = self.mask & other.mask; + let mut is_identity = self.mask == mm && other.mask == mm; let mmc = [mm.0[0].count_ones(), mm.0[1].count_ones(), mm.0[2].count_ones(), mm.0[3].count_ones()]; @@ -2236,48 +2237,43 @@ impl> ByteNode let mut v = ValuesVec::with_capacity_in(len, self.alloc.clone()); let new_v = v.v.spare_capacity_mut(); - let mut l = 0; - let mut r = 0; let mut c = 0; + let mut self_word_base = 0; + let mut other_word_base = 0; for i in 0..4 { - let mut lm = jm.0[i]; + let self_word = self.mask.0[i]; + let other_word = other.mask.0[i]; + let mut lm = mm.0[i]; while lm != 0 { let index = lm.trailing_zeros(); + let l = Self::slot_in_word(self_word, self_word_base, index); + let r = Self::slot_in_word(other_word, other_word_base, index); - if ((1u64 << index) & mm.0[i]) != 0 { - let lv = unsafe { self.values.get_unchecked(l) }; - let rv = unsafe { other.values.get_unchecked(r) }; - // println!("dense prestrict {}", index as usize + i*64); + let lv = unsafe { self.values.get_unchecked(l) }; + let rv = unsafe { other.values.get_unchecked(r) }; + // println!("dense prestrict {}", index as usize + i*64); - match lv.prestrict(rv) { - AlgebraicResult::None => { - is_identity = false; - mm.0[i] ^= 1u64 << index; - } - AlgebraicResult::Identity(mask) => { - debug_assert_eq!(mask, SELF_IDENT); //restrict is non-commutative - unsafe { new_v.get_unchecked_mut(c).write(lv.clone()) }; - c += 1; - }, - AlgebraicResult::Element(jv) => { - is_identity = false; - unsafe { new_v.get_unchecked_mut(c).write(jv) }; - c += 1; - }, - } - l += 1; - r += 1; - } else { - is_identity = false; - if ((1u64 << index) & self.mask.0[i]) != 0 { - l += 1; - } else { - r += 1; + match lv.prestrict(rv) { + AlgebraicResult::None => { + is_identity = false; + mm.0[i] ^= 1u64 << index; } + AlgebraicResult::Identity(mask) => { + debug_assert_eq!(mask, SELF_IDENT); //restrict is non-commutative + unsafe { new_v.get_unchecked_mut(c).write(lv.clone()) }; + c += 1; + }, + AlgebraicResult::Element(jv) => { + is_identity = false; + unsafe { new_v.get_unchecked_mut(c).write(jv) }; + c += 1; + }, } lm ^= 1u64 << index; } + self_word_base += self_word.count_ones() as usize; + other_word_base += other_word.count_ones() as usize; } unsafe{ v.v.set_len(c); }