diff --git a/crates/brk_vecs/examples/main.rs b/crates/brk_vecs/examples/main.rs index 89bf6949a..bc508d123 100644 --- a/crates/brk_vecs/examples/main.rs +++ b/crates/brk_vecs/examples/main.rs @@ -6,7 +6,7 @@ use brk_vecs::{File, PAGE_SIZE}; fn main() -> Result<()> { let file = File::open(Path::new("vecs"))?; - file.grow_if_needed(PAGE_SIZE * 1_000_000)?; + file.set_min_len(PAGE_SIZE * 1_000_000)?; Ok(()) } diff --git a/crates/brk_vecs/src/file/layout.rs b/crates/brk_vecs/src/file/layout.rs index 1193ea8ec..fb884a000 100644 --- a/crates/brk_vecs/src/file/layout.rs +++ b/crates/brk_vecs/src/file/layout.rs @@ -1,11 +1,12 @@ +use std::collections::BTreeMap; use std::fs::OpenOptions; use std::sync::Arc; use std::{collections::HashMap, fs, io::BufReader, path::Path}; use bincode::decode_from_std_read; use bincode::{Decode, Encode, config}; -use brk_core::Result; -use parking_lot::{RwLock, RwLockReadGuard}; +use brk_core::{Error, Result}; +use parking_lot::RwLock; use crate::PAGE_SIZE; @@ -14,9 +15,11 @@ use super::Region; #[derive(Debug)] pub struct Layout { file: fs::File, - pub id_to_index: HashMap, - pub index_to_region: Vec>>, - // holes + id_to_index: HashMap, + start_to_index: BTreeMap, + index_to_region: Vec>>>, + /// key: start, value: gap + start_to_hole: BTreeMap, } impl Layout { @@ -35,46 +38,188 @@ impl Layout { let serialized: SerializedRegions = decode_from_std_read(&mut reader, config)?; let mut id_to_index = HashMap::new(); + let mut start_to_index = BTreeMap::new(); let mut index_to_region = vec![]; serialized.0.into_iter().for_each(|(str, region)| { - id_to_index.insert(str, index_to_region.len()); - index_to_region.push(Arc::new(RwLock::new(region))); + let index = index_to_region.len(); + id_to_index.insert(str, index); + start_to_index.insert(region.start(), index); + index_to_region.push(Some(Arc::new(RwLock::new(region)))); }); Self { file, id_to_index, + start_to_index, index_to_region, + start_to_hole: BTreeMap::new(), } } else { Self { file, id_to_index: HashMap::new(), index_to_region: Vec::new(), + start_to_index: BTreeMap::new(), + start_to_hole: BTreeMap::new(), } }) } - pub fn get_or_create_region_from_id(&mut self, id: String) -> Result { - if let Some(v) = self.id_to_index.get(&id) { - return Ok(*v); - } - let index = self.create_region()?; - self.id_to_index.insert(id, index); - Ok(index) + pub fn get_region_from_index(&self, index: usize) -> Option>> { + self.index_to_region.get(index).cloned().flatten() } - fn create_region(&mut self) -> Result { + pub fn get_region_index_from_id(&self, id: String) -> Option { + self.id_to_index.get(&id).copied() + } + + pub fn create_region_from_hole(&mut self, id: String) -> Option { let index = self.index_to_region.len(); - let length = PAGE_SIZE; + let start = self.find_smallest_adequate_hole(PAGE_SIZE)?; - Ok(0) + self.remove_or_compress_hole_to_right(start, PAGE_SIZE); + + self.id_to_index.insert(id, index); + self.start_to_index.insert(start, index); + + self.index_to_region + .push(Some(Arc::new(RwLock::new(Region::new( + start, PAGE_SIZE, PAGE_SIZE, + ))))); + + Some(index) } - pub fn get(&self, region: usize) -> Option> { - self.index_to_region.get(region).map(|r| r.read()) + pub fn find_smallest_adequate_hole(&self, reserved: u64) -> Option { + self.start_to_hole + .iter() + .filter(|(_, gap)| **gap >= reserved) + .map(|(start, gap)| (gap, start)) + .collect::>() + .pop_first() + .map(|(_, s)| *s) + } + + pub fn push_region(&mut self, id: String) -> (usize, Region) { + let index = self.index_to_region.len(); + + self.id_to_index.insert(id, index); + + let start = self + .start_to_index + .last_key_value() + .map(|(_, index)| { + let region = self + .index_to_region + .get(*index) + .unwrap() + .as_ref() + .unwrap() + .read(); + region.start() + region.reserved() + }) + .unwrap_or_default(); + + let region = Region::new(start, PAGE_SIZE, PAGE_SIZE); + + self.index_to_region + .push(Some(Arc::new(RwLock::new(region.clone())))); + + (index, region) + } + + pub fn remove_region(&mut self, index: usize) -> Option>> { + let region = self.index_to_region.get_mut(index).and_then(Option::take)?; + + self.id_to_index + .remove(&self.find_id_from_index(index).unwrap().to_owned()); + self.start_to_index.remove(®ion.read().start()); + + let lock = region.read(); + let start = lock.start(); + let reserved = lock.reserved(); + + if self + .widen_hole_to_the_left_if_any(start + reserved, reserved) + .is_none() + && let Some((&hole_start, gap)) = self.start_to_hole.range(..start).next_back() + && hole_start + *gap == start + { + self.widen_hole_to_the_right_if_any(hole_start, reserved); + } + + drop(lock); + + Some(region) + } + + pub fn has_hole_and_is_big_enough(&self, start: u64, gap_needed: u64) -> bool { + self.start_to_hole + .get(&start) + .is_some_and(|gap| *gap >= gap_needed) + } + + pub fn remove_or_compress_hole_to_right(&mut self, start: u64, compress_by: u64) { + if let Some(gap) = self.start_to_hole.remove(&start) + && gap != compress_by + { + if gap > compress_by { + self.start_to_hole + .insert(start + compress_by, gap - compress_by); + } else { + panic!("Hole too small"); + } + } + } + + fn widen_hole_to_the_left_if_any(&mut self, start: u64, widen_by: u64) -> Option { + assert!(start % PAGE_SIZE == 0); + + if widen_by > start { + panic!("Hole too small") + } + + let gap = self.start_to_hole.remove(&start)?; + assert!(widen_by % PAGE_SIZE == 0); + let start = start - widen_by; + let gap = gap + widen_by; + + if let Some((&prev_start, prev_gap)) = self.start_to_hole.range_mut(..start).next_back() + && prev_start + *prev_gap == start + { + *prev_gap += gap; + } else { + assert!(self.start_to_hole.insert(start, gap).is_none()); + } + + Some(start) + } + + fn widen_hole_to_the_right_if_any(&mut self, start: u64, widen_by: u64) -> Option { + assert!(start % PAGE_SIZE == 0); + + let gap = self.start_to_hole.get_mut(&start)?; + assert!(widen_by % PAGE_SIZE == 0); + *gap += widen_by; + + let next_hole_start = start + *gap; + if let Some(next_gap) = self.start_to_hole.remove(&next_hole_start) { + *self.start_to_hole.get_mut(&start).unwrap() += next_gap; + } + + Some(start) + } + + fn find_id_from_index(&self, index: usize) -> Option<&String> { + Some( + self.id_to_index + .iter() + .find(|(_, v)| **v == index) + .unwrap() + .0, + ) } } diff --git a/crates/brk_vecs/src/file/mod.rs b/crates/brk_vecs/src/file/mod.rs index 988542506..07cdba095 100644 --- a/crates/brk_vecs/src/file/mod.rs +++ b/crates/brk_vecs/src/file/mod.rs @@ -19,12 +19,12 @@ use region::*; use crate::file::reader::Reader; -pub const PAGE_SIZE: usize = 4096; +pub const PAGE_SIZE: u64 = 4096; pub struct File { - layout: Arc>, - file: Arc>, - mmap: Arc>, + layout: RwLock, + file: RwLock, + mmap: RwLock, } impl File { @@ -43,62 +43,193 @@ impl File { let mmap = Self::mmap(&file)?; Ok(Self { - file: Arc::new(RwLock::new(file)), - mmap: Arc::new(RwLock::new(mmap)), - layout: Arc::new(RwLock::new(layout)), + file: RwLock::new(file), + mmap: RwLock::new(mmap), + layout: RwLock::new(layout), }) } /// len % PAGE_SIZE == 0 - pub fn grow_if_needed(&self, len: usize) -> Result<()> { + pub fn set_min_len(&self, len: u64) -> Result<()> { assert!(len % PAGE_SIZE == 0); - let file = self.file.write(); - let len = len as u64; - if file.metadata()?.len() < len { + if self.file.read().metadata()?.len() < len { + let mut mmap = self.mmap.write(); + let file = self.file.write(); file.set_len(len)?; - self.remap_(&file) + *mmap = Self::mmap(&file)?; + Ok(()) } else { Ok(()) } } - pub fn get_or_create_region_from_id(&mut self, id: String) -> Result { - self.layout.write().get_or_create_region_from_id(id) + pub fn get_or_create(&self, id: String) -> Result { + if let Some(index) = self.layout.read().get_region_index_from_id(id.clone()) { + return Ok(index); + } + let mut layout = self.layout.write(); + if let Some(index) = layout.create_region_from_hole(id.clone()) { + return Ok(index); + } + let (index, region) = layout.push_region(id); + self.set_min_len(region.start() + region.reserved())?; + Ok(index) } - pub fn create_reader<'a, 'b>(&'a self, region_id: usize) -> Result> - where - 'a: 'b, - { - let layout: RwLockReadGuard<'a, Layout> = self.layout.read(); + pub fn read<'a>(&'a self, index: usize) -> Result> { let mmap: RwLockReadGuard<'a, MmapMut> = self.mmap.read(); - - let region: RwLockReadGuard<'b, Region> = - layout.get(region_id).ok_or(Error::Str("Unknown region"))?; - - Ok(Reader::new(mmap, layout, region)) + let region: RwLockReadGuard<'static, Region> = unsafe { + std::mem::transmute( + self.layout + .read() + .get_region_from_index(index) + .ok_or(Error::Str("Unknown region"))? + .read(), + ) + }; + Ok(Reader::new(mmap, region)) } - fn remap(&self) -> Result<()> { - *self.mmap.write() = Self::mmap(&self.file.read())?; + #[inline] + pub fn write_all(&mut self, region: usize, data: &[u8]) -> Result<()> { + self.write_all_at_(region, data, None) + } + + #[inline] + pub fn write_all_at(&mut self, region: usize, data: &[u8], at: u64) -> Result<()> { + self.write_all_at_(region, data, Some(at)) + } + + fn write_all_at_(&mut self, region: usize, data: &[u8], at: Option) -> Result<()> { + let Some(region) = self.layout.read().get_region_from_index(region) else { + return Err(Error::Str("Unknown region")); + }; + let region_lock = region.read(); + let start = region_lock.start(); + let reserved = region_lock.reserved(); + let left = region_lock.left(); + let data_len = data.len() as u64; + + let new_left = at.map_or_else(|| left, |at| reserved - (at - start)); + let new_len = reserved - new_left; + + if new_left >= data_len { + drop(region_lock); + let mut region_lock = region.write(); + region_lock.set_len(new_len); + + Self::write_to_mmap(&self.mmap.read(), at.unwrap_or(start), data); + return Ok(()); + } + + let layout_lock = self.layout.read(); + + let hole_start = start + reserved; + if layout_lock.has_hole_and_is_big_enough(hole_start, reserved) { + drop(layout_lock); + let mut layout_lock = self.layout.write(); + layout_lock.remove_or_compress_hole_to_right(hole_start, reserved); + drop(layout_lock); + + drop(region_lock); + let mut region_lock = region.write(); + region_lock.set_len(new_len); + region_lock.set_reserved(reserved * 2); + + Self::write_to_mmap(&self.mmap.read(), at.unwrap_or(start), data); + return Ok(()); + } + + let reserved = reserved * 2; + + if let Some(hole_start) = layout_lock.find_smallest_adequate_hole(reserved) { + drop(layout_lock); + let mut layout_lock = self.layout.write(); + layout_lock.remove_or_compress_hole_to_right(hole_start, reserved); + drop(layout_lock); + + drop(region_lock); + let mut region_lock = region.write(); + region_lock.set_start(hole_start); + region_lock.set_len(new_len); + region_lock.set_reserved(reserved * 2); + + // TODO: create hole in prev position + + Self::write_to_mmap(&self.mmap.read(), at.unwrap_or(start), data); + } + + // copy region to new position then lock and update region meta then remove + + // let old_length = region_lock.len(); + // let new_length = old_length + data_len as u64; + + // self.layout.ho + + todo!(); + Ok(()) } - fn remap_(&self, file: &fs::File) -> Result<()> { - *self.mmap.write() = Self::mmap(file)?; - Ok(()) + + fn write_to_mmap(mmap: &MmapMut, start: u64, data: &[u8]) { + let data_len = data.len(); + let start = start as usize; + let end = start + data_len; + + let slice = unsafe { std::slice::from_raw_parts_mut(mmap.as_ptr() as *mut u8, mmap.len()) }; + + slice[start..end].copy_from_slice(data); } + + pub fn truncate(&self, index: usize, from: u64) -> Result<()> { + let layout = self.layout.read(); + let Some(region) = layout.get_region_from_index(index) else { + return Err(Error::Str("Unknown region")); + }; + let mut region_ = region.write(); + let start = region_.start(); + let len = region_.len(); + + if from <= start { + return Err(Error::Str("Truncating too much")); + } else if from >= len { + return Err(Error::Str("Not truncating enough")); + } + + region_.set_len(from); + + // TODO: Widen hole if present and needed (if truncating a big portion) + // Not needed in BRK and with hole punching it's not a big deal but good to have nonetheless + + self.punch_hole(from, region_.left()) + } + + pub fn remove(&self, index: usize) -> Result>>> { + let mut layout = self.layout.write(); + let Some(region) = layout.remove_region(index) else { + return Ok(None); + }; + let region_ = region.write(); + self.punch_hole(region_.start(), region_.len())?; + drop(region_); + Ok(Some(region)) + } + fn mmap(file: &fs::File) -> Result { Ok(unsafe { MmapOptions::new().map_mut(file)? }) } - pub fn delete() {} + fn punch_hole(&self, start: u64, length: u64) -> Result<()> { + let file = self.file.write(); + Self::punch_hole_macos(&file, start, length) + } #[cfg(target_os = "macos")] - fn punch_hole(file: &fs::File, offset: u64, length: u64) -> Result<()> { + fn punch_hole_macos(file: &fs::File, start: u64, length: u64) -> Result<()> { let fpunchhole = FPunchhole { fp_flags: 0, reserved: 0, - fp_offset: offset as libc::off_t, + fp_offset: start as libc::off_t, fp_length: length as libc::off_t, }; diff --git a/crates/brk_vecs/src/file/reader.rs b/crates/brk_vecs/src/file/reader.rs index 8a2032463..431e639be 100644 --- a/crates/brk_vecs/src/file/reader.rs +++ b/crates/brk_vecs/src/file/reader.rs @@ -1,41 +1,26 @@ use memmap2::MmapMut; use parking_lot::RwLockReadGuard; -use crate::file::layout::Layout; - use super::Region; -pub struct Reader<'a, 'b> -where - 'a: 'b, -{ - layout: RwLockReadGuard<'a, Layout>, +pub struct Reader<'a> { mmap: RwLockReadGuard<'a, MmapMut>, - region: RwLockReadGuard<'b, Region>, + region: RwLockReadGuard<'static, Region>, } -impl<'a, 'b> Reader<'a, 'b> -where - 'a: 'b, -{ +impl<'a> Reader<'a> { pub fn new( mmap: RwLockReadGuard<'a, MmapMut>, - layout: RwLockReadGuard<'a, Layout>, - region: RwLockReadGuard<'b, Region>, + region: RwLockReadGuard<'static, Region>, ) -> Self { - Self { - mmap, - layout, - region, - } + Self { mmap, region } } - pub fn read(&self, offset: usize, len: usize) -> &[u8] { - assert!(offset + len < self.region.length()); - + pub fn read(&self, offset: u64, len: u64) -> &[u8] { + assert!(offset + len < self.region.len()); let start = self.region.start() + offset; let end = start + len; - &self.mmap[start..end] + &self.mmap[start as usize..end as usize] } pub fn region(&self) -> &Region { diff --git a/crates/brk_vecs/src/file/region.rs b/crates/brk_vecs/src/file/region.rs index cf8e1ff36..2ff002379 100644 --- a/crates/brk_vecs/src/file/region.rs +++ b/crates/brk_vecs/src/file/region.rs @@ -1,25 +1,18 @@ -// use std::sync::Arc; - use bincode::{Decode, Encode}; -// use parking_lot::{RwLock, RwLockReadGuard}; use crate::PAGE_SIZE; -// #[derive(Debug, Encode, Decode)] -#[derive(Debug, Encode, Decode)] +#[derive(Debug, Clone, Encode, Decode)] pub struct Region { - // Bad name /// Must be multiple of 4096 - start: usize, - length: usize, + start: u64, + length: u64, /// Must be multiple of 4096 - reserved: usize, - // lock: Arc>, - // variant: usize, // Raw or Compressed or something else ? to know if there is a header ? Since blocks 4096, storing headers individually would be dumb + reserved: u64, } impl Region { - pub fn new(start: usize, length: usize, reserved: usize) -> Self { + pub fn new(start: u64, length: u64, reserved: u64) -> Self { assert!(reserved > 0); assert!(start % PAGE_SIZE == 0); assert!(reserved % PAGE_SIZE == 0); @@ -29,51 +22,35 @@ impl Region { start, length, reserved, - // lock: Arc::new(RwLock::new(())), } } - pub fn start(&self) -> usize { + pub fn start(&self) -> u64 { self.start } - pub fn length(&self) -> usize { + pub fn set_start(&mut self, start: u64) { + assert!(start % PAGE_SIZE == 0); + self.start = start + } + + pub fn len(&self) -> u64 { self.length } - pub fn reserved(&self) -> usize { + pub fn set_len(&mut self, len: u64) { + self.length = len + } + + pub fn reserved(&self) -> u64 { self.reserved } - // pub fn lock(&self) -> RwLockReadGuard<'_, ()> { - // self.lock.read() - // } + pub fn set_reserved(&mut self, reserved: u64) { + self.reserved = reserved; + } + + pub fn left(&self) -> u64 { + self.reserved - self.length + } } - -// #[derive(Debug, Encode, Decode)] -// pub struct RegionInner { -// start: usize, -// length: usize, -// reserved: usize, -// } - -// impl From for RegionInner { -// fn from(value: Region) -> Self { -// Self { -// start: value.start, -// length: value.length, -// reserved: value.reserved, -// } -// } -// } - -// impl From for Region { -// fn from(value: RegionInner) -> Self { -// Self { -// start: value.start, -// length: value.length, -// reserved: value.reserved, -// lock: Arc::new(RwLock::new(())), -// } -// } -// } diff --git a/crates/brk_vecs/src/variants/raw.rs b/crates/brk_vecs/src/variants/raw.rs index 00ab1f1ac..172eb1c8d 100644 --- a/crates/brk_vecs/src/variants/raw.rs +++ b/crates/brk_vecs/src/variants/raw.rs @@ -1,3 +1,8 @@ +use std::sync::Arc; + +use crate::File; + pub struct RawVec { region: usize, + file: Arc, }