Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions src/dense_byte_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,10 @@ impl<V: Clone + Send + Sync, A: Allocator, Cf: CoFree<V=V, A=A>> TrieNode<V, A>
requested_mask.clear_bit(byte);
match self.get(byte) {
Some(cf) => {
if key.len() == 1 && *expect_val && cf.has_rec() {
unrequested_cofree_half = true;
}

//A key longer than 1 byte or an explicit request for a rec link can be answered with a Child
if key.len() > 1 || !*expect_val {
match cf.rec() {
Expand Down Expand Up @@ -1850,7 +1854,12 @@ impl<V: Clone + Send + Sync, A: Allocator, Cf: CoFree<V=V, A=A>, OtherCf: CoFree
if new_mask > 0 {
AlgebraicResult::Identity(new_mask)
} else {
AlgebraicResult::Element(Self::new(None, self.val().cloned()))
let val = if val_mask & SELF_IDENT > 0 {
self.val().cloned()
} else {
other.val().cloned()
};
AlgebraicResult::Element(Self::new(None, val))
}
},
(AlgebraicResult::Identity(rec_mask), AlgebraicResult::None) => {
Expand All @@ -1864,7 +1873,12 @@ impl<V: Clone + Send + Sync, A: Allocator, Cf: CoFree<V=V, A=A>, OtherCf: CoFree
if new_mask > 0 {
AlgebraicResult::Identity(new_mask)
} else {
AlgebraicResult::Element(Self::new(self.rec().cloned(), None))
let rec = if rec_mask & SELF_IDENT > 0 {
self.rec().cloned()
} else {
other.rec().cloned()
};
AlgebraicResult::Element(Self::new(rec, None))
Comment on lines 701 to +1881

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I meant DenseNode -- i.e. the motivation behind these.

}
},
(rec_el, val_el) => {
Expand Down
48 changes: 40 additions & 8 deletions src/experimental/serialization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -817,8 +817,8 @@ pub fn deserialize_file<V: TrieValue>(file_path : impl AsRef<std::path::Path>, d

let [path_idx, node_idx] = node_buf.map(|x| x as usize);

let Deserialized::Path(path) = &deserialized[path_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected path")); };
let Deserialized::Node(node) = &deserialized[node_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); };
let Deserialized::Path(path) = deserialized.get(path_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, path offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected path")); };
let Deserialized::Node(node) = deserialized.get(node_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, node offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); };

let mut path_node = PathMap::new();

Expand All @@ -838,8 +838,8 @@ pub fn deserialize_file<V: TrieValue>(file_path : impl AsRef<std::path::Path>, d

let [val_idx, node_idx] = node_buf.map(|x| x as usize);

let Deserialized::Value(value) = &deserialized[val_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected value")); };
let Deserialized::Node(node) = &deserialized[node_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); };
let Deserialized::Value(value) = deserialized.get(val_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, value offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected value")); };
let Deserialized::Node(node) = deserialized.get(node_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, node offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); };

let mut value_node = node.clone();
value_node.set_val_at(&[], value.clone());
Expand All @@ -852,10 +852,10 @@ pub fn deserialize_file<V: TrieValue>(file_path : impl AsRef<std::path::Path>, d

let [mask_idx, branches_idx] = node_buf.map(|x| x as usize);

let Deserialized::ChildMask(mask) = &deserialized[mask_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected childmask as `(/?<hex_top><Hex_bot>)*`")); };
let Deserialized::ChildMask(mask) = deserialized.get(mask_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, childmask offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected childmask as `(/?<hex_top><Hex_bot>)*`")); };
let iter = crate::utils::ByteMaskIter::new(*mask);

let Deserialized::Branches(r) = &deserialized[branches_idx] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected branches")); };
let Deserialized::Branches(r) = deserialized.get(branches_idx).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, branches offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected branches")); };
let branches = &branches_buffer[r.start..r.end];

core::debug_assert_eq!(mask.into_iter().copied().map(u64::count_ones).sum::<u32>() as usize, branches.len());
Expand All @@ -864,7 +864,7 @@ pub fn deserialize_file<V: TrieValue>(file_path : impl AsRef<std::path::Path>, d
let mut wz = branch_node.write_zipper();

for (byte, &idx) in iter.into_iter().zip(branches) {
let Deserialized::Node(node) = &deserialized[idx as usize] else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); };
let Deserialized::Node(node) = deserialized.get(idx as usize).ok_or_else(|| std::io::Error::other("Malformed serialized ByteTrie, child node offset out of bounds"))? else { return Err(std::io::Error::other("Malformed serialized ByteTrie, expected node")); };

core::debug_assert!(!node.is_empty());

Expand Down Expand Up @@ -986,6 +986,38 @@ mod test {
use super::*;
use std::sync::Arc;

fn write_serialized_fixture(dir : &tempfile::TempDir, name : &str, data : &[u8])->PathBuf {
let path = dir.path().join(name);
std::fs::write(&path, data).unwrap();
path
}

#[test]
fn deserialize_rejects_malformed_records() {
let temp_dir = tempfile::tempdir().unwrap();

let bad_tag = write_serialized_fixture(&temp_dir, "bad_tag.data", b"header\n? bad\n");
let err = deserialize_file::<Arc<[u8]>>(&bad_tag, |b| Arc::<[u8]>::from(b)).unwrap_err();
assert!(err.to_string().contains("expected `<tag byte><space>`"));

let odd_path_hex = write_serialized_fixture(&temp_dir, "odd_path_hex.data", b"header\np A\n");
let err = deserialize_file::<Arc<[u8]>>(&odd_path_hex, |b| Arc::<[u8]>::from(b)).unwrap_err();
assert!(err.to_string().contains("expected path"));
}

#[test]
fn deserialize_rejects_forward_offsets_without_panic() {
let temp_dir = tempfile::tempdir().unwrap();
let path = write_serialized_fixture(
&temp_dir,
"forward_offset.data",
b"header\nP x0000000000000001x0000000000000002\n"
);

let err = deserialize_file::<Arc<[u8]>>(&path, |b| Arc::<[u8]>::from(b)).unwrap_err();
assert!(err.to_string().contains("offset out of bounds"));
}

#[test]
fn serialization_trivial_test() {
const LEN : usize = 0x_80;
Expand Down Expand Up @@ -1598,4 +1630,4 @@ pub fn dbg_hex_line_numbers(f : &std::fs::File, path : impl AsRef<std::path::Pat
}

Ok(out.into_inner().unwrap())
}
}
8 changes: 8 additions & 0 deletions src/line_list_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ pub(crate) const KEY_BYTES_CNT: usize = 42;
#[cfg(not(feature = "slim_ptrs"))]
pub(crate) const KEY_BYTES_CNT: usize = 14;

#[cfg(all(feature = "slim_ptrs", target_arch = "x86_64", not(miri)))]
const _: [(); core::mem::size_of::<LineListNode<[u8; 1024], crate::alloc::GlobalAlloc>>()] =
[(); 64];

#[cfg(all(not(feature = "slim_ptrs"), target_arch = "x86_64", not(miri)))]
const _: [(); core::mem::size_of::<LineListNode<[u8; 1024], crate::alloc::GlobalAlloc>>()] =
[(); 48];

const SLOT_0_USED_MASK: u16 = 1 << 15;
const SLOT_1_USED_MASK: u16 = 1 << 14;
const BOTH_SLOTS_USED_MASK: u16 = SLOT_0_USED_MASK | SLOT_1_USED_MASK;
Expand Down
180 changes: 180 additions & 0 deletions tests/pathmap_algebra_differential.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
use std::collections::BTreeSet;

use pathmap::PathMap;

type KeySet = BTreeSet<Vec<u8>>;

fn next_u64(state: &mut u64) -> u64 {
*state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
*state
}

fn fixed_width_set(seed: u64, salt: u64) -> KeySet {
let mut state = seed ^ salt;
let mut keys = KeySet::new();
for ordinal in 0..72_u64 {
let mut key = vec![0_u8; 8];
for byte in &mut key {
*byte = (next_u64(&mut state) >> 32) as u8;
}
key[0] ^= ordinal as u8;
keys.insert(key);
}
keys
}

fn prefix_heavy_set(seed: u64, salt: u64) -> KeySet {
let mut state = seed ^ salt;
let mut keys = KeySet::new();
if next_u64(&mut state) & 7 == 0 {
keys.insert(Vec::new());
}
for index in 0..48_u8 {
let length = (next_u64(&mut state) % 9) as usize;
let mut key = Vec::with_capacity(length);
for position in 0..length {
let selector = next_u64(&mut state);
key.push(match selector % 5 {
0 => index,
1 => position as u8,
2 => (selector >> 32) as u8,
3 => b'a' + (selector % 7) as u8,
_ => 0xff_u8.wrapping_sub(index),
});
}
keys.insert(key.clone());
if key.len() > 1 && index % 3 == 0 {
keys.insert(key[..key.len() - 1].to_vec());
}
if index % 7 == 0 {
key.extend_from_slice(&[0, index]);
keys.insert(key);
}
}
keys
}

fn map_from_set(keys: &KeySet) -> PathMap<()> {
let mut map = PathMap::new();
for key in keys {
map.insert(key, ());
}
map
}

fn set_from_map(map: &PathMap<()>) -> KeySet {
map.iter().map(|(key, ())| key).collect()
}

#[test]
fn seeded_prefix_free_algebra_matches_btreeset_oracle() {
for seed in 0_u64..256 {
let a = fixed_width_set(seed, 0x243f_6a88_85a3_08d3);
let b = fixed_width_set(seed, 0x1319_8a2e_0370_7344);
let c = fixed_width_set(seed, 0xa409_3822_299f_31d0);
let ma = map_from_set(&a);
let mb = map_from_set(&b);
let mc = map_from_set(&c);

let union = a.union(&b).cloned().collect::<KeySet>();
let intersection = a.intersection(&b).cloned().collect::<KeySet>();
let difference = a.difference(&b).cloned().collect::<KeySet>();
assert_eq!(set_from_map(&ma.join(&mb)), union, "join seed {seed}");
assert_eq!(
set_from_map(&ma.meet(&mb)),
intersection,
"meet seed {seed}"
);
assert_eq!(
set_from_map(&ma.subtract(&mb)),
difference,
"subtract seed {seed}"
);

assert_eq!(set_from_map(&ma.join(&mb)), set_from_map(&mb.join(&ma)));
assert_eq!(set_from_map(&ma.meet(&mb)), set_from_map(&mb.meet(&ma)));
assert_eq!(set_from_map(&ma.join(&ma)), a);
assert_eq!(set_from_map(&ma.meet(&ma)), a);
assert!(set_from_map(&ma.subtract(&ma)).is_empty());
assert_eq!(
set_from_map(&ma.join(&mb).join(&mc)),
set_from_map(&ma.join(&mb.join(&mc)))
);
assert_eq!(
set_from_map(&ma.meet(&mb).meet(&mc)),
set_from_map(&ma.meet(&mb.meet(&mc)))
);
assert_eq!(
set_from_map(&ma.meet(&mb.join(&mc))),
set_from_map(&ma.meet(&mb).join(&ma.meet(&mc)))
);
}
}

#[test]
fn cloned_prefix_heavy_maps_are_logically_isolated_under_mutation() {
for seed in 0_u64..128 {
let original_set = prefix_heavy_set(seed, 0x082e_fa98_ec4e_6c89);
let original = map_from_set(&original_set);
let mut changed = original.clone();
let removed = original_set.iter().next().cloned();
if let Some(key) = &removed {
assert!(changed.remove(key).is_some());
}
let inserted = vec![0xfe, (seed >> 8) as u8, seed as u8, 0x01];
changed.insert(&inserted, ());
assert_eq!(
set_from_map(&original),
original_set,
"original seed {seed}"
);
let mut expected = original_set;
if let Some(key) = removed {
expected.remove(&key);
}
expected.insert(inserted);
assert_eq!(set_from_map(&changed), expected, "clone seed {seed}");
}
}

#[test]
fn prefix_valued_meet_is_associative_seed_44() {
let seed = 44;
let a = map_from_set(&prefix_heavy_set(seed, 0x243f_6a88_85a3_08d3));
let b = map_from_set(&prefix_heavy_set(seed, 0x1319_8a2e_0370_7344));
let c = map_from_set(&prefix_heavy_set(seed, 0xa409_3822_299f_31d0));
assert_eq!(
set_from_map(&a.meet(&b).meet(&c)),
set_from_map(&a.meet(&b.meet(&c)))
);
}

#[test]
fn seeded_prefix_heavy_dual_distributivity_matches_btreeset_oracle() {
// Seeds 10, 77, and 287 are focused regressions for CoFree identity
// operand selection and mixed value/onward-link exhaustiveness.
for seed in 0_u64..512 {
let a = prefix_heavy_set(seed, 0x243f_6a88_85a3_08d3);
let b = prefix_heavy_set(seed, 0x1319_8a2e_0370_7344);
let c = prefix_heavy_set(seed, 0xa409_3822_299f_31d0);
let ma = map_from_set(&a);
let mb = map_from_set(&b);
let mc = map_from_set(&c);

let b_meet_c = b.intersection(&c).cloned().collect::<KeySet>();
let expected = a.union(&b_meet_c).cloned().collect::<KeySet>();

assert_eq!(
set_from_map(&ma.join(&mb.meet(&mc))),
expected,
"left dual-distributive form seed {seed}"
);
assert_eq!(
set_from_map(&ma.join(&mb).meet(&ma.join(&mc))),
expected,
"right dual-distributive form seed {seed}"
);
}
}