vecs: part 2

This commit is contained in:
nym21
2025-07-22 13:19:19 +02:00
parent 5347523921
commit 73ebcdf0d6
6 changed files with 365 additions and 122 deletions

View File

@@ -6,7 +6,7 @@ use brk_vecs::{File, PAGE_SIZE};
fn main() -> Result<()> {
let file = File::open(Path::new("vecs"))?;
file.grow_if_needed(PAGE_SIZE * 1_000_000)?;
file.set_min_len(PAGE_SIZE * 1_000_000)?;
Ok(())
}

View File

@@ -1,11 +1,12 @@
use std::collections::BTreeMap;
use std::fs::OpenOptions;
use std::sync::Arc;
use std::{collections::HashMap, fs, io::BufReader, path::Path};
use bincode::decode_from_std_read;
use bincode::{Decode, Encode, config};
use brk_core::Result;
use parking_lot::{RwLock, RwLockReadGuard};
use brk_core::{Error, Result};
use parking_lot::RwLock;
use crate::PAGE_SIZE;
@@ -14,9 +15,11 @@ use super::Region;
#[derive(Debug)]
pub struct Layout {
file: fs::File,
pub id_to_index: HashMap<String, usize>,
pub index_to_region: Vec<Arc<RwLock<Region>>>,
// holes
id_to_index: HashMap<String, usize>,
start_to_index: BTreeMap<u64, usize>,
index_to_region: Vec<Option<Arc<RwLock<Region>>>>,
/// key: start, value: gap
start_to_hole: BTreeMap<u64, u64>,
}
impl Layout {
@@ -35,46 +38,188 @@ impl Layout {
let serialized: SerializedRegions = decode_from_std_read(&mut reader, config)?;
let mut id_to_index = HashMap::new();
let mut start_to_index = BTreeMap::new();
let mut index_to_region = vec![];
serialized.0.into_iter().for_each(|(str, region)| {
id_to_index.insert(str, index_to_region.len());
index_to_region.push(Arc::new(RwLock::new(region)));
let index = index_to_region.len();
id_to_index.insert(str, index);
start_to_index.insert(region.start(), index);
index_to_region.push(Some(Arc::new(RwLock::new(region))));
});
Self {
file,
id_to_index,
start_to_index,
index_to_region,
start_to_hole: BTreeMap::new(),
}
} else {
Self {
file,
id_to_index: HashMap::new(),
index_to_region: Vec::new(),
start_to_index: BTreeMap::new(),
start_to_hole: BTreeMap::new(),
}
})
}
pub fn get_or_create_region_from_id(&mut self, id: String) -> Result<usize> {
if let Some(v) = self.id_to_index.get(&id) {
return Ok(*v);
}
let index = self.create_region()?;
self.id_to_index.insert(id, index);
Ok(index)
pub fn get_region_from_index(&self, index: usize) -> Option<Arc<RwLock<Region>>> {
self.index_to_region.get(index).cloned().flatten()
}
fn create_region(&mut self) -> Result<usize> {
pub fn get_region_index_from_id(&self, id: String) -> Option<usize> {
self.id_to_index.get(&id).copied()
}
pub fn create_region_from_hole(&mut self, id: String) -> Option<usize> {
let index = self.index_to_region.len();
let length = PAGE_SIZE;
let start = self.find_smallest_adequate_hole(PAGE_SIZE)?;
Ok(0)
self.remove_or_compress_hole_to_right(start, PAGE_SIZE);
self.id_to_index.insert(id, index);
self.start_to_index.insert(start, index);
self.index_to_region
.push(Some(Arc::new(RwLock::new(Region::new(
start, PAGE_SIZE, PAGE_SIZE,
)))));
Some(index)
}
pub fn get(&self, region: usize) -> Option<RwLockReadGuard<'_, Region>> {
self.index_to_region.get(region).map(|r| r.read())
pub fn find_smallest_adequate_hole(&self, reserved: u64) -> Option<u64> {
self.start_to_hole
.iter()
.filter(|(_, gap)| **gap >= reserved)
.map(|(start, gap)| (gap, start))
.collect::<BTreeMap<_, _>>()
.pop_first()
.map(|(_, s)| *s)
}
pub fn push_region(&mut self, id: String) -> (usize, Region) {
let index = self.index_to_region.len();
self.id_to_index.insert(id, index);
let start = self
.start_to_index
.last_key_value()
.map(|(_, index)| {
let region = self
.index_to_region
.get(*index)
.unwrap()
.as_ref()
.unwrap()
.read();
region.start() + region.reserved()
})
.unwrap_or_default();
let region = Region::new(start, PAGE_SIZE, PAGE_SIZE);
self.index_to_region
.push(Some(Arc::new(RwLock::new(region.clone()))));
(index, region)
}
pub fn remove_region(&mut self, index: usize) -> Option<Arc<RwLock<Region>>> {
let region = self.index_to_region.get_mut(index).and_then(Option::take)?;
self.id_to_index
.remove(&self.find_id_from_index(index).unwrap().to_owned());
self.start_to_index.remove(&region.read().start());
let lock = region.read();
let start = lock.start();
let reserved = lock.reserved();
if self
.widen_hole_to_the_left_if_any(start + reserved, reserved)
.is_none()
&& let Some((&hole_start, gap)) = self.start_to_hole.range(..start).next_back()
&& hole_start + *gap == start
{
self.widen_hole_to_the_right_if_any(hole_start, reserved);
}
drop(lock);
Some(region)
}
pub fn has_hole_and_is_big_enough(&self, start: u64, gap_needed: u64) -> bool {
self.start_to_hole
.get(&start)
.is_some_and(|gap| *gap >= gap_needed)
}
pub fn remove_or_compress_hole_to_right(&mut self, start: u64, compress_by: u64) {
if let Some(gap) = self.start_to_hole.remove(&start)
&& gap != compress_by
{
if gap > compress_by {
self.start_to_hole
.insert(start + compress_by, gap - compress_by);
} else {
panic!("Hole too small");
}
}
}
fn widen_hole_to_the_left_if_any(&mut self, start: u64, widen_by: u64) -> Option<u64> {
assert!(start % PAGE_SIZE == 0);
if widen_by > start {
panic!("Hole too small")
}
let gap = self.start_to_hole.remove(&start)?;
assert!(widen_by % PAGE_SIZE == 0);
let start = start - widen_by;
let gap = gap + widen_by;
if let Some((&prev_start, prev_gap)) = self.start_to_hole.range_mut(..start).next_back()
&& prev_start + *prev_gap == start
{
*prev_gap += gap;
} else {
assert!(self.start_to_hole.insert(start, gap).is_none());
}
Some(start)
}
fn widen_hole_to_the_right_if_any(&mut self, start: u64, widen_by: u64) -> Option<u64> {
assert!(start % PAGE_SIZE == 0);
let gap = self.start_to_hole.get_mut(&start)?;
assert!(widen_by % PAGE_SIZE == 0);
*gap += widen_by;
let next_hole_start = start + *gap;
if let Some(next_gap) = self.start_to_hole.remove(&next_hole_start) {
*self.start_to_hole.get_mut(&start).unwrap() += next_gap;
}
Some(start)
}
fn find_id_from_index(&self, index: usize) -> Option<&String> {
Some(
self.id_to_index
.iter()
.find(|(_, v)| **v == index)
.unwrap()
.0,
)
}
}

View File

@@ -19,12 +19,12 @@ use region::*;
use crate::file::reader::Reader;
pub const PAGE_SIZE: usize = 4096;
pub const PAGE_SIZE: u64 = 4096;
pub struct File {
layout: Arc<RwLock<Layout>>,
file: Arc<RwLock<fs::File>>,
mmap: Arc<RwLock<MmapMut>>,
layout: RwLock<Layout>,
file: RwLock<fs::File>,
mmap: RwLock<MmapMut>,
}
impl File {
@@ -43,62 +43,193 @@ impl File {
let mmap = Self::mmap(&file)?;
Ok(Self {
file: Arc::new(RwLock::new(file)),
mmap: Arc::new(RwLock::new(mmap)),
layout: Arc::new(RwLock::new(layout)),
file: RwLock::new(file),
mmap: RwLock::new(mmap),
layout: RwLock::new(layout),
})
}
/// len % PAGE_SIZE == 0
pub fn grow_if_needed(&self, len: usize) -> Result<()> {
pub fn set_min_len(&self, len: u64) -> Result<()> {
assert!(len % PAGE_SIZE == 0);
let file = self.file.write();
let len = len as u64;
if file.metadata()?.len() < len {
if self.file.read().metadata()?.len() < len {
let mut mmap = self.mmap.write();
let file = self.file.write();
file.set_len(len)?;
self.remap_(&file)
*mmap = Self::mmap(&file)?;
Ok(())
} else {
Ok(())
}
}
pub fn get_or_create_region_from_id(&mut self, id: String) -> Result<usize> {
self.layout.write().get_or_create_region_from_id(id)
pub fn get_or_create(&self, id: String) -> Result<usize> {
if let Some(index) = self.layout.read().get_region_index_from_id(id.clone()) {
return Ok(index);
}
let mut layout = self.layout.write();
if let Some(index) = layout.create_region_from_hole(id.clone()) {
return Ok(index);
}
let (index, region) = layout.push_region(id);
self.set_min_len(region.start() + region.reserved())?;
Ok(index)
}
pub fn create_reader<'a, 'b>(&'a self, region_id: usize) -> Result<Reader<'a, 'b>>
where
'a: 'b,
{
let layout: RwLockReadGuard<'a, Layout> = self.layout.read();
pub fn read<'a>(&'a self, index: usize) -> Result<Reader<'a>> {
let mmap: RwLockReadGuard<'a, MmapMut> = self.mmap.read();
let region: RwLockReadGuard<'b, Region> =
layout.get(region_id).ok_or(Error::Str("Unknown region"))?;
Ok(Reader::new(mmap, layout, region))
let region: RwLockReadGuard<'static, Region> = unsafe {
std::mem::transmute(
self.layout
.read()
.get_region_from_index(index)
.ok_or(Error::Str("Unknown region"))?
.read(),
)
};
Ok(Reader::new(mmap, region))
}
fn remap(&self) -> Result<()> {
*self.mmap.write() = Self::mmap(&self.file.read())?;
#[inline]
pub fn write_all(&mut self, region: usize, data: &[u8]) -> Result<()> {
self.write_all_at_(region, data, None)
}
#[inline]
pub fn write_all_at(&mut self, region: usize, data: &[u8], at: u64) -> Result<()> {
self.write_all_at_(region, data, Some(at))
}
fn write_all_at_(&mut self, region: usize, data: &[u8], at: Option<u64>) -> Result<()> {
let Some(region) = self.layout.read().get_region_from_index(region) else {
return Err(Error::Str("Unknown region"));
};
let region_lock = region.read();
let start = region_lock.start();
let reserved = region_lock.reserved();
let left = region_lock.left();
let data_len = data.len() as u64;
let new_left = at.map_or_else(|| left, |at| reserved - (at - start));
let new_len = reserved - new_left;
if new_left >= data_len {
drop(region_lock);
let mut region_lock = region.write();
region_lock.set_len(new_len);
Self::write_to_mmap(&self.mmap.read(), at.unwrap_or(start), data);
return Ok(());
}
let layout_lock = self.layout.read();
let hole_start = start + reserved;
if layout_lock.has_hole_and_is_big_enough(hole_start, reserved) {
drop(layout_lock);
let mut layout_lock = self.layout.write();
layout_lock.remove_or_compress_hole_to_right(hole_start, reserved);
drop(layout_lock);
drop(region_lock);
let mut region_lock = region.write();
region_lock.set_len(new_len);
region_lock.set_reserved(reserved * 2);
Self::write_to_mmap(&self.mmap.read(), at.unwrap_or(start), data);
return Ok(());
}
let reserved = reserved * 2;
if let Some(hole_start) = layout_lock.find_smallest_adequate_hole(reserved) {
drop(layout_lock);
let mut layout_lock = self.layout.write();
layout_lock.remove_or_compress_hole_to_right(hole_start, reserved);
drop(layout_lock);
drop(region_lock);
let mut region_lock = region.write();
region_lock.set_start(hole_start);
region_lock.set_len(new_len);
region_lock.set_reserved(reserved * 2);
// TODO: create hole in prev position
Self::write_to_mmap(&self.mmap.read(), at.unwrap_or(start), data);
}
// copy region to new position then lock and update region meta then remove
// let old_length = region_lock.len();
// let new_length = old_length + data_len as u64;
// self.layout.ho
todo!();
Ok(())
}
fn remap_(&self, file: &fs::File) -> Result<()> {
*self.mmap.write() = Self::mmap(file)?;
Ok(())
fn write_to_mmap(mmap: &MmapMut, start: u64, data: &[u8]) {
let data_len = data.len();
let start = start as usize;
let end = start + data_len;
let slice = unsafe { std::slice::from_raw_parts_mut(mmap.as_ptr() as *mut u8, mmap.len()) };
slice[start..end].copy_from_slice(data);
}
pub fn truncate(&self, index: usize, from: u64) -> Result<()> {
let layout = self.layout.read();
let Some(region) = layout.get_region_from_index(index) else {
return Err(Error::Str("Unknown region"));
};
let mut region_ = region.write();
let start = region_.start();
let len = region_.len();
if from <= start {
return Err(Error::Str("Truncating too much"));
} else if from >= len {
return Err(Error::Str("Not truncating enough"));
}
region_.set_len(from);
// TODO: Widen hole if present and needed (if truncating a big portion)
// Not needed in BRK and with hole punching it's not a big deal but good to have nonetheless
self.punch_hole(from, region_.left())
}
pub fn remove(&self, index: usize) -> Result<Option<Arc<RwLock<Region>>>> {
let mut layout = self.layout.write();
let Some(region) = layout.remove_region(index) else {
return Ok(None);
};
let region_ = region.write();
self.punch_hole(region_.start(), region_.len())?;
drop(region_);
Ok(Some(region))
}
fn mmap(file: &fs::File) -> Result<MmapMut> {
Ok(unsafe { MmapOptions::new().map_mut(file)? })
}
pub fn delete() {}
fn punch_hole(&self, start: u64, length: u64) -> Result<()> {
let file = self.file.write();
Self::punch_hole_macos(&file, start, length)
}
#[cfg(target_os = "macos")]
fn punch_hole(file: &fs::File, offset: u64, length: u64) -> Result<()> {
fn punch_hole_macos(file: &fs::File, start: u64, length: u64) -> Result<()> {
let fpunchhole = FPunchhole {
fp_flags: 0,
reserved: 0,
fp_offset: offset as libc::off_t,
fp_offset: start as libc::off_t,
fp_length: length as libc::off_t,
};

View File

@@ -1,41 +1,26 @@
use memmap2::MmapMut;
use parking_lot::RwLockReadGuard;
use crate::file::layout::Layout;
use super::Region;
pub struct Reader<'a, 'b>
where
'a: 'b,
{
layout: RwLockReadGuard<'a, Layout>,
pub struct Reader<'a> {
mmap: RwLockReadGuard<'a, MmapMut>,
region: RwLockReadGuard<'b, Region>,
region: RwLockReadGuard<'static, Region>,
}
impl<'a, 'b> Reader<'a, 'b>
where
'a: 'b,
{
impl<'a> Reader<'a> {
pub fn new(
mmap: RwLockReadGuard<'a, MmapMut>,
layout: RwLockReadGuard<'a, Layout>,
region: RwLockReadGuard<'b, Region>,
region: RwLockReadGuard<'static, Region>,
) -> Self {
Self {
mmap,
layout,
region,
}
Self { mmap, region }
}
pub fn read(&self, offset: usize, len: usize) -> &[u8] {
assert!(offset + len < self.region.length());
pub fn read(&self, offset: u64, len: u64) -> &[u8] {
assert!(offset + len < self.region.len());
let start = self.region.start() + offset;
let end = start + len;
&self.mmap[start..end]
&self.mmap[start as usize..end as usize]
}
pub fn region(&self) -> &Region {

View File

@@ -1,25 +1,18 @@
// use std::sync::Arc;
use bincode::{Decode, Encode};
// use parking_lot::{RwLock, RwLockReadGuard};
use crate::PAGE_SIZE;
// #[derive(Debug, Encode, Decode)]
#[derive(Debug, Encode, Decode)]
#[derive(Debug, Clone, Encode, Decode)]
pub struct Region {
// Bad name
/// Must be multiple of 4096
start: usize,
length: usize,
start: u64,
length: u64,
/// Must be multiple of 4096
reserved: usize,
// lock: Arc<RwLock<()>>,
// variant: usize, // Raw or Compressed or something else ? to know if there is a header ? Since blocks 4096, storing headers individually would be dumb
reserved: u64,
}
impl Region {
pub fn new(start: usize, length: usize, reserved: usize) -> Self {
pub fn new(start: u64, length: u64, reserved: u64) -> Self {
assert!(reserved > 0);
assert!(start % PAGE_SIZE == 0);
assert!(reserved % PAGE_SIZE == 0);
@@ -29,51 +22,35 @@ impl Region {
start,
length,
reserved,
// lock: Arc::new(RwLock::new(())),
}
}
pub fn start(&self) -> usize {
pub fn start(&self) -> u64 {
self.start
}
pub fn length(&self) -> usize {
pub fn set_start(&mut self, start: u64) {
assert!(start % PAGE_SIZE == 0);
self.start = start
}
pub fn len(&self) -> u64 {
self.length
}
pub fn reserved(&self) -> usize {
pub fn set_len(&mut self, len: u64) {
self.length = len
}
pub fn reserved(&self) -> u64 {
self.reserved
}
// pub fn lock(&self) -> RwLockReadGuard<'_, ()> {
// self.lock.read()
// }
pub fn set_reserved(&mut self, reserved: u64) {
self.reserved = reserved;
}
pub fn left(&self) -> u64 {
self.reserved - self.length
}
}
// #[derive(Debug, Encode, Decode)]
// pub struct RegionInner {
// start: usize,
// length: usize,
// reserved: usize,
// }
// impl From<Region> for RegionInner {
// fn from(value: Region) -> Self {
// Self {
// start: value.start,
// length: value.length,
// reserved: value.reserved,
// }
// }
// }
// impl From<RegionInner> for Region {
// fn from(value: RegionInner) -> Self {
// Self {
// start: value.start,
// length: value.length,
// reserved: value.reserved,
// lock: Arc::new(RwLock::new(())),
// }
// }
// }

View File

@@ -1,3 +1,8 @@
use std::sync::Arc;
use crate::File;
pub struct RawVec {
region: usize,
file: Arc<File>,
}