diff --git a/src/dense_byte_node.rs b/src/dense_byte_node.rs index dd846a2..06b6453 100644 --- a/src/dense_byte_node.rs +++ b/src/dense_byte_node.rs @@ -698,6 +698,10 @@ impl> TrieNode requested_mask.clear_bit(byte); match self.get(byte) { Some(cf) => { + if key.len() == 1 && *expect_val && cf.has_rec() { + unrequested_cofree_half = true; + } + //A key longer than 1 byte or an explicit request for a rec link can be answered with a Child if key.len() > 1 || !*expect_val { match cf.rec() { @@ -1850,7 +1854,12 @@ impl, OtherCf: CoFree if new_mask > 0 { AlgebraicResult::Identity(new_mask) } else { - AlgebraicResult::Element(Self::new(None, self.val().cloned())) + let val = if val_mask & SELF_IDENT > 0 { + self.val().cloned() + } else { + other.val().cloned() + }; + AlgebraicResult::Element(Self::new(None, val)) } }, (AlgebraicResult::Identity(rec_mask), AlgebraicResult::None) => { @@ -1864,7 +1873,12 @@ impl, OtherCf: CoFree if new_mask > 0 { AlgebraicResult::Identity(new_mask) } else { - AlgebraicResult::Element(Self::new(self.rec().cloned(), None)) + let rec = if rec_mask & SELF_IDENT > 0 { + self.rec().cloned() + } else { + other.rec().cloned() + }; + AlgebraicResult::Element(Self::new(rec, None)) } }, (rec_el, val_el) => { diff --git a/src/experimental/serialization.rs b/src/experimental/serialization.rs index 55ab71f..21d992b 100644 --- a/src/experimental/serialization.rs +++ b/src/experimental/serialization.rs @@ -817,8 +817,8 @@ pub fn deserialize_file(file_path : impl AsRef, d let [path_idx, node_idx] = node_buf.map(|x| x as usize); - let Deserialized::Path(path) = &deserialized[path_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected path")); }; - let Deserialized::Node(node) = &deserialized[node_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); }; + let Deserialized::Path(path) = deserialized.get(path_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, path offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected path")); }; + let Deserialized::Node(node) = deserialized.get(node_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, node offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); }; let mut path_node = PathMap::new(); @@ -838,8 +838,8 @@ pub fn deserialize_file(file_path : impl AsRef, d let [val_idx, node_idx] = node_buf.map(|x| x as usize); - let Deserialized::Value(value) = &deserialized[val_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected value")); }; - let Deserialized::Node(node) = &deserialized[node_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); }; + let Deserialized::Value(value) = deserialized.get(val_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, value offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected value")); }; + let Deserialized::Node(node) = deserialized.get(node_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, node offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); }; let mut value_node = node.clone(); value_node.set_val_at(&[], value.clone()); @@ -852,10 +852,10 @@ pub fn deserialize_file(file_path : impl AsRef, d let [mask_idx, branches_idx] = node_buf.map(|x| x as usize); - let Deserialized::ChildMask(mask) = &deserialized[mask_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected childmask as `(/?)*`")); }; + let Deserialized::ChildMask(mask) = deserialized.get(mask_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, childmask offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected childmask as `(/?)*`")); }; let iter = crate::utils::ByteMaskIter::new(*mask); - let Deserialized::Branches(r) = &deserialized[branches_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected branches")); }; + let Deserialized::Branches(r) = deserialized.get(branches_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, branches offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected branches")); }; let branches = &branches_buffer[r.start..r.end]; core::debug_assert_eq!(mask.into_iter().copied().map(u64::count_ones).sum::() as usize, branches.len()); @@ -864,7 +864,7 @@ pub fn deserialize_file(file_path : impl AsRef, d let mut wz = branch_node.write_zipper(); for (byte, &idx) in iter.into_iter().zip(branches) { - let Deserialized::Node(node) = &deserialized[idx as usize] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); }; + let Deserialized::Node(node) = deserialized.get(idx as usize).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, child node offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); }; core::debug_assert!(!node.is_empty()); @@ -986,6 +986,38 @@ mod test { use super::*; use std::sync::Arc; + fn write_serialized_fixture(dir : &tempfile::TempDir, name : &str, data : &[u8])->PathBuf { + let path = dir.path().join(name); + std::fs::write(&path, data).unwrap(); + path + } + + #[test] + fn deserialize_rejects_malformed_records() { + let temp_dir = tempfile::tempdir().unwrap(); + + let bad_tag = write_serialized_fixture(&temp_dir, "bad_tag.data", b"header\n? bad\n"); + let err = deserialize_file::>(&bad_tag, |b| Arc::<[u8]>::from(b)).unwrap_err(); + assert!(err.to_string().contains("expected ``")); + + let odd_path_hex = write_serialized_fixture(&temp_dir, "odd_path_hex.data", b"header\np A\n"); + let err = deserialize_file::>(&odd_path_hex, |b| Arc::<[u8]>::from(b)).unwrap_err(); + assert!(err.to_string().contains("expected path")); + } + + #[test] + fn deserialize_rejects_forward_offsets_without_panic() { + let temp_dir = tempfile::tempdir().unwrap(); + let path = write_serialized_fixture( + &temp_dir, + "forward_offset.data", + b"header\nP x0000000000000001x0000000000000002\n" + ); + + let err = deserialize_file::>(&path, |b| Arc::<[u8]>::from(b)).unwrap_err(); + assert!(err.to_string().contains("offset out of bounds")); + } + #[test] fn serialization_trivial_test() { const LEN : usize = 0x_80; @@ -1598,4 +1630,4 @@ pub fn dbg_hex_line_numbers(f : &std::fs::File, path : impl AsRef>()] = + [(); 64]; + +#[cfg(all(not(feature = "slim_ptrs"), target_arch = "x86_64", not(miri)))] +const _: [(); core::mem::size_of::>()] = + [(); 48]; + const SLOT_0_USED_MASK: u16 = 1 << 15; const SLOT_1_USED_MASK: u16 = 1 << 14; const BOTH_SLOTS_USED_MASK: u16 = SLOT_0_USED_MASK | SLOT_1_USED_MASK; diff --git a/tests/pathmap_algebra_differential.rs b/tests/pathmap_algebra_differential.rs new file mode 100644 index 0000000..d42066b --- /dev/null +++ b/tests/pathmap_algebra_differential.rs @@ -0,0 +1,180 @@ +use std::collections::BTreeSet; + +use pathmap::PathMap; + +type KeySet = BTreeSet>; + +fn next_u64(state: &mut u64) -> u64 { + *state = state + .wrapping_mul(6_364_136_223_846_793_005) + .wrapping_add(1_442_695_040_888_963_407); + *state +} + +fn fixed_width_set(seed: u64, salt: u64) -> KeySet { + let mut state = seed ^ salt; + let mut keys = KeySet::new(); + for ordinal in 0..72_u64 { + let mut key = vec![0_u8; 8]; + for byte in &mut key { + *byte = (next_u64(&mut state) >> 32) as u8; + } + key[0] ^= ordinal as u8; + keys.insert(key); + } + keys +} + +fn prefix_heavy_set(seed: u64, salt: u64) -> KeySet { + let mut state = seed ^ salt; + let mut keys = KeySet::new(); + if next_u64(&mut state) & 7 == 0 { + keys.insert(Vec::new()); + } + for index in 0..48_u8 { + let length = (next_u64(&mut state) % 9) as usize; + let mut key = Vec::with_capacity(length); + for position in 0..length { + let selector = next_u64(&mut state); + key.push(match selector % 5 { + 0 => index, + 1 => position as u8, + 2 => (selector >> 32) as u8, + 3 => b'a' + (selector % 7) as u8, + _ => 0xff_u8.wrapping_sub(index), + }); + } + keys.insert(key.clone()); + if key.len() > 1 && index % 3 == 0 { + keys.insert(key[..key.len() - 1].to_vec()); + } + if index % 7 == 0 { + key.extend_from_slice(&[0, index]); + keys.insert(key); + } + } + keys +} + +fn map_from_set(keys: &KeySet) -> PathMap<()> { + let mut map = PathMap::new(); + for key in keys { + map.insert(key, ()); + } + map +} + +fn set_from_map(map: &PathMap<()>) -> KeySet { + map.iter().map(|(key, ())| key).collect() +} + +#[test] +fn seeded_prefix_free_algebra_matches_btreeset_oracle() { + for seed in 0_u64..256 { + let a = fixed_width_set(seed, 0x243f_6a88_85a3_08d3); + let b = fixed_width_set(seed, 0x1319_8a2e_0370_7344); + let c = fixed_width_set(seed, 0xa409_3822_299f_31d0); + let ma = map_from_set(&a); + let mb = map_from_set(&b); + let mc = map_from_set(&c); + + let union = a.union(&b).cloned().collect::(); + let intersection = a.intersection(&b).cloned().collect::(); + let difference = a.difference(&b).cloned().collect::(); + assert_eq!(set_from_map(&ma.join(&mb)), union, "join seed {seed}"); + assert_eq!( + set_from_map(&ma.meet(&mb)), + intersection, + "meet seed {seed}" + ); + assert_eq!( + set_from_map(&ma.subtract(&mb)), + difference, + "subtract seed {seed}" + ); + + assert_eq!(set_from_map(&ma.join(&mb)), set_from_map(&mb.join(&ma))); + assert_eq!(set_from_map(&ma.meet(&mb)), set_from_map(&mb.meet(&ma))); + assert_eq!(set_from_map(&ma.join(&ma)), a); + assert_eq!(set_from_map(&ma.meet(&ma)), a); + assert!(set_from_map(&ma.subtract(&ma)).is_empty()); + assert_eq!( + set_from_map(&ma.join(&mb).join(&mc)), + set_from_map(&ma.join(&mb.join(&mc))) + ); + assert_eq!( + set_from_map(&ma.meet(&mb).meet(&mc)), + set_from_map(&ma.meet(&mb.meet(&mc))) + ); + assert_eq!( + set_from_map(&ma.meet(&mb.join(&mc))), + set_from_map(&ma.meet(&mb).join(&ma.meet(&mc))) + ); + } +} + +#[test] +fn cloned_prefix_heavy_maps_are_logically_isolated_under_mutation() { + for seed in 0_u64..128 { + let original_set = prefix_heavy_set(seed, 0x082e_fa98_ec4e_6c89); + let original = map_from_set(&original_set); + let mut changed = original.clone(); + let removed = original_set.iter().next().cloned(); + if let Some(key) = &removed { + assert!(changed.remove(key).is_some()); + } + let inserted = vec![0xfe, (seed >> 8) as u8, seed as u8, 0x01]; + changed.insert(&inserted, ()); + assert_eq!( + set_from_map(&original), + original_set, + "original seed {seed}" + ); + let mut expected = original_set; + if let Some(key) = removed { + expected.remove(&key); + } + expected.insert(inserted); + assert_eq!(set_from_map(&changed), expected, "clone seed {seed}"); + } +} + +#[test] +fn prefix_valued_meet_is_associative_seed_44() { + let seed = 44; + let a = map_from_set(&prefix_heavy_set(seed, 0x243f_6a88_85a3_08d3)); + let b = map_from_set(&prefix_heavy_set(seed, 0x1319_8a2e_0370_7344)); + let c = map_from_set(&prefix_heavy_set(seed, 0xa409_3822_299f_31d0)); + assert_eq!( + set_from_map(&a.meet(&b).meet(&c)), + set_from_map(&a.meet(&b.meet(&c))) + ); +} + +#[test] +fn seeded_prefix_heavy_dual_distributivity_matches_btreeset_oracle() { + // Seeds 10, 77, and 287 are focused regressions for CoFree identity + // operand selection and mixed value/onward-link exhaustiveness. + for seed in 0_u64..512 { + let a = prefix_heavy_set(seed, 0x243f_6a88_85a3_08d3); + let b = prefix_heavy_set(seed, 0x1319_8a2e_0370_7344); + let c = prefix_heavy_set(seed, 0xa409_3822_299f_31d0); + let ma = map_from_set(&a); + let mb = map_from_set(&b); + let mc = map_from_set(&c); + + let b_meet_c = b.intersection(&c).cloned().collect::(); + let expected = a.union(&b_meet_c).cloned().collect::(); + + assert_eq!( + set_from_map(&ma.join(&mb.meet(&mc))), + expected, + "left dual-distributive form seed {seed}" + ); + assert_eq!( + set_from_map(&ma.join(&mb).meet(&ma.join(&mc))), + expected, + "right dual-distributive form seed {seed}" + ); + } +}