mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-05-08 05:09:10 -07:00
computer: convert stores to vecs part 1
This commit is contained in:
@@ -1,11 +1,12 @@
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
fs::{File, OpenOptions},
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
fs::{self, File, OpenOptions},
|
||||
io::{self, Seek, SeekFrom, Write},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use brk_core::Result;
|
||||
use brk_core::{Error, Result};
|
||||
use memmap2::Mmap;
|
||||
|
||||
use crate::{AnyVec, HEADER_OFFSET, Header};
|
||||
@@ -44,10 +45,22 @@ where
|
||||
if j >= pushed.len() {
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(pushed.get(j).map(Cow::Borrowed))
|
||||
} else {
|
||||
Ok(self.read_(index, mmap)?.map(Cow::Owned))
|
||||
return Ok(pushed.get(j).map(Cow::Borrowed));
|
||||
}
|
||||
|
||||
let updated = self.updated();
|
||||
if !updated.is_empty()
|
||||
&& let Some(updated) = updated.get(&index)
|
||||
{
|
||||
return Ok(Some(Cow::Borrowed(updated)));
|
||||
}
|
||||
|
||||
let holes = self.holes();
|
||||
if !holes.is_empty() && holes.contains(&index) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(self.read_(index, mmap)?.map(Cow::Owned))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -72,6 +85,47 @@ where
|
||||
self.mut_pushed().push(value)
|
||||
}
|
||||
|
||||
fn holes(&self) -> &BTreeSet<usize>;
|
||||
fn mut_holes(&mut self) -> &mut BTreeSet<usize>;
|
||||
fn take(&mut self, index: I, mmap: &Mmap) -> Result<Option<T>> {
|
||||
let opt = self.get_or_read(index, mmap)?.map(|v| v.into_owned());
|
||||
if opt.is_some() {
|
||||
let uindex = index.unwrap_to_usize();
|
||||
let updated = self.mut_updated();
|
||||
if !updated.is_empty() {
|
||||
updated.remove(&uindex);
|
||||
}
|
||||
self.mut_holes().insert(uindex);
|
||||
}
|
||||
Ok(opt)
|
||||
}
|
||||
|
||||
fn updated(&self) -> &BTreeMap<usize, T>;
|
||||
fn mut_updated(&mut self) -> &mut BTreeMap<usize, T>;
|
||||
#[inline]
|
||||
fn update(&mut self, index: I, value: T) -> Result<()> {
|
||||
let uindex = index.unwrap_to_usize();
|
||||
let stored_len = self.stored_len();
|
||||
|
||||
if uindex >= stored_len {
|
||||
if let Some(prev) = self.mut_pushed().get_mut(uindex - stored_len) {
|
||||
*prev = value;
|
||||
return Ok(());
|
||||
} else {
|
||||
return Err(Error::IndexTooHigh);
|
||||
}
|
||||
}
|
||||
|
||||
let holes = self.mut_holes();
|
||||
if !holes.is_empty() {
|
||||
holes.remove(&index.unwrap_to_usize());
|
||||
}
|
||||
|
||||
self.mut_updated().insert(index.unwrap_to_usize(), value);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn header(&self) -> &Header;
|
||||
fn mut_header(&mut self) -> &mut Header;
|
||||
|
||||
@@ -85,14 +139,24 @@ where
|
||||
parent.join(name)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn path(&self) -> PathBuf {
|
||||
Self::path_(self.parent(), self.name())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn path_(parent: &Path, name: &str) -> PathBuf {
|
||||
Self::folder_(parent, name).join(I::to_string())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn holes_path(&self) -> PathBuf {
|
||||
Self::holes_path_(self.parent(), self.name())
|
||||
}
|
||||
#[inline]
|
||||
fn holes_path_(parent: &Path, name: &str) -> PathBuf {
|
||||
Self::folder_(parent, name).join(format!("{}_holes", I::to_string()))
|
||||
}
|
||||
|
||||
// ---
|
||||
|
||||
fn open_file(&self) -> io::Result<File> {
|
||||
@@ -134,6 +198,10 @@ where
|
||||
|
||||
#[inline]
|
||||
fn reset_(&mut self) -> Result<()> {
|
||||
let holes_path = self.holes_path();
|
||||
if fs::exists(&holes_path)? {
|
||||
fs::remove_file(&holes_path)?;
|
||||
}
|
||||
let mut file = self.open_file()?;
|
||||
self.file_truncate_and_write_all(&mut file, HEADER_OFFSET as u64, &[])
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::{fmt::Debug, ops::Add};
|
||||
|
||||
use brk_core::{Error, Printable, Result};
|
||||
use zerocopy::{Immutable, IntoBytes, KnownLayout, TryFromBytes};
|
||||
|
||||
pub trait StoredIndex
|
||||
where
|
||||
@@ -15,6 +16,10 @@ where
|
||||
+ TryInto<usize>
|
||||
+ From<usize>
|
||||
+ Add<usize, Output = Self>
|
||||
+ TryFromBytes
|
||||
+ IntoBytes
|
||||
+ Immutable
|
||||
+ KnownLayout
|
||||
+ Send
|
||||
+ Sync
|
||||
+ Printable,
|
||||
@@ -37,6 +42,10 @@ where
|
||||
+ TryInto<usize>
|
||||
+ From<usize>
|
||||
+ Add<usize, Output = Self>
|
||||
+ TryFromBytes
|
||||
+ IntoBytes
|
||||
+ Immutable
|
||||
+ KnownLayout
|
||||
+ Send
|
||||
+ Sync
|
||||
+ Printable,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
fs, mem,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
@@ -57,6 +58,8 @@ where
|
||||
}
|
||||
|
||||
pub fn import(parent: &Path, name: &str, version: Version) -> Result<Self> {
|
||||
panic!("Compressed vecs are a work in progress right now, please use raw vecs instead");
|
||||
|
||||
let mut inner = RawVec::import(parent, name, version)?;
|
||||
|
||||
let pages_meta = {
|
||||
@@ -196,6 +199,22 @@ where
|
||||
fn mut_pushed(&mut self) -> &mut Vec<T> {
|
||||
self.inner.mut_pushed()
|
||||
}
|
||||
#[inline]
|
||||
fn holes(&self) -> &BTreeSet<usize> {
|
||||
self.inner.holes()
|
||||
}
|
||||
#[inline]
|
||||
fn mut_holes(&mut self) -> &mut BTreeSet<usize> {
|
||||
self.inner.mut_holes()
|
||||
}
|
||||
#[inline]
|
||||
fn updated(&self) -> &BTreeMap<usize, T> {
|
||||
self.inner.updated()
|
||||
}
|
||||
#[inline]
|
||||
fn mut_updated(&mut self) -> &mut BTreeMap<usize, T> {
|
||||
self.inner.mut_updated()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn path(&self) -> PathBuf {
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
fs::{self, File},
|
||||
io,
|
||||
marker::PhantomData,
|
||||
mem,
|
||||
os::unix::fs::FileExt,
|
||||
path::{Path, PathBuf},
|
||||
sync::{
|
||||
Arc,
|
||||
@@ -24,12 +26,17 @@ const VERSION: Version = Version::ONE;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RawVec<I, T> {
|
||||
// --- Needed for &, TODO: Weak copy ?
|
||||
header: Header,
|
||||
parent: PathBuf,
|
||||
name: &'static str,
|
||||
pushed: Vec<T>,
|
||||
local_stored_len: Option<usize>,
|
||||
shared_stored_len: Arc<AtomicUsize>,
|
||||
// --- Needed for &mut
|
||||
pushed: Vec<T>,
|
||||
has_stored_holes: bool,
|
||||
holes: BTreeSet<usize>,
|
||||
updated: BTreeMap<usize, T>,
|
||||
local_stored_len: Option<usize>,
|
||||
phantom: PhantomData<I>,
|
||||
}
|
||||
|
||||
@@ -90,17 +97,36 @@ where
|
||||
0
|
||||
};
|
||||
|
||||
let mut has_stored_holes = false;
|
||||
let holes_path = Self::holes_path_(parent, name);
|
||||
let holes = if fs::exists(&holes_path)? {
|
||||
has_stored_holes = true;
|
||||
let bytes = fs::read(&holes_path)?;
|
||||
bytes
|
||||
.chunks(size_of::<usize>())
|
||||
.map(|b| -> Result<usize> {
|
||||
Ok(usize::from_ne_bytes(brk_core::copy_first_8bytes(b)?))
|
||||
})
|
||||
.collect::<Result<BTreeSet<usize>>>()?
|
||||
} else {
|
||||
BTreeSet::new()
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
header,
|
||||
name: Box::leak(Box::new(name.to_string())),
|
||||
parent: parent.to_owned(),
|
||||
pushed: vec![],
|
||||
has_stored_holes,
|
||||
holes,
|
||||
updated: BTreeMap::new(),
|
||||
local_stored_len: Some(stored_len),
|
||||
shared_stored_len: Arc::new(AtomicUsize::new(stored_len)),
|
||||
phantom: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn set_stored_len(&mut self, len: usize) {
|
||||
self.local_stored_len.replace(len);
|
||||
self.shared_stored_len.store(len, Ordering::Relaxed);
|
||||
@@ -171,6 +197,24 @@ where
|
||||
&mut self.pushed
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn holes(&self) -> &BTreeSet<usize> {
|
||||
&self.holes
|
||||
}
|
||||
#[inline]
|
||||
fn mut_holes(&mut self) -> &mut BTreeSet<usize> {
|
||||
&mut self.holes
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn updated(&self) -> &BTreeMap<usize, T> {
|
||||
&self.updated
|
||||
}
|
||||
#[inline]
|
||||
fn mut_updated(&mut self) -> &mut BTreeMap<usize, T> {
|
||||
&mut self.updated
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn parent(&self) -> &Path {
|
||||
&self.parent
|
||||
@@ -181,33 +225,70 @@ where
|
||||
|
||||
let pushed_len = self.pushed_len();
|
||||
|
||||
if pushed_len == 0 {
|
||||
let has_new_data = pushed_len != 0;
|
||||
let has_updated_data = !self.updated.is_empty();
|
||||
let has_holes = !self.holes.is_empty();
|
||||
let had_holes = self.has_stored_holes && !has_holes;
|
||||
|
||||
if !has_new_data && !has_updated_data && !has_holes && !had_holes {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let bytes = {
|
||||
let pushed = &mut self.pushed;
|
||||
if has_new_data || has_updated_data {
|
||||
let mut file = file_opt.unwrap_or(self.open_file()?);
|
||||
|
||||
let mut bytes: Vec<u8> = vec![0; pushed.len() * Self::SIZE_OF_T];
|
||||
if has_new_data {
|
||||
let bytes = {
|
||||
let mut bytes: Vec<u8> = vec![0; pushed_len * Self::SIZE_OF_T];
|
||||
|
||||
let unsafe_bytes = UnsafeSlice::new(&mut bytes);
|
||||
let unsafe_bytes = UnsafeSlice::new(&mut bytes);
|
||||
|
||||
mem::take(pushed)
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.for_each(|(i, v)| unsafe_bytes.copy_slice(i * Self::SIZE_OF_T, v.as_bytes()));
|
||||
mem::take(&mut self.pushed)
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.for_each(|(i, v)| {
|
||||
unsafe_bytes.copy_slice(i * Self::SIZE_OF_T, v.as_bytes())
|
||||
});
|
||||
|
||||
bytes
|
||||
};
|
||||
bytes
|
||||
};
|
||||
|
||||
let mut file = file_opt.unwrap_or(self.open_file()?);
|
||||
self.file_write_all(&mut file, &bytes)?;
|
||||
self.file_write_all(&mut file, &bytes)?;
|
||||
|
||||
if let Some(local_stored_len) = self.local_stored_len.as_mut() {
|
||||
*local_stored_len += pushed_len;
|
||||
if let Some(local_stored_len) = self.local_stored_len.as_mut() {
|
||||
*local_stored_len += pushed_len;
|
||||
}
|
||||
self.shared_stored_len
|
||||
.fetch_add(pushed_len, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
if has_updated_data {
|
||||
mem::take(&mut self.updated)
|
||||
.into_iter()
|
||||
.try_for_each(|(i, v)| -> Result<()> {
|
||||
file.write_all_at(
|
||||
v.as_bytes(),
|
||||
((i * Self::SIZE_OF_T) + HEADER_OFFSET) as u64,
|
||||
)?;
|
||||
Ok(())
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
if has_holes || had_holes {
|
||||
let holes_path = self.holes_path();
|
||||
if has_holes {
|
||||
fs::write(
|
||||
&holes_path,
|
||||
self.holes
|
||||
.iter()
|
||||
.flat_map(|i| i.to_ne_bytes())
|
||||
.collect::<Vec<_>>(),
|
||||
)?;
|
||||
} else if had_holes {
|
||||
let _ = fs::remove_file(&holes_path);
|
||||
}
|
||||
}
|
||||
self.shared_stored_len
|
||||
.fetch_add(pushed_len, Ordering::Relaxed);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -278,9 +359,12 @@ impl<I, T> Clone for RawVec<I, T> {
|
||||
parent: self.parent.clone(),
|
||||
name: self.name,
|
||||
pushed: vec![],
|
||||
phantom: PhantomData,
|
||||
updated: BTreeMap::new(),
|
||||
has_stored_holes: false,
|
||||
holes: BTreeSet::new(),
|
||||
local_stored_len: None,
|
||||
shared_stored_len: self.shared_stored_len.clone(),
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
@@ -105,6 +106,36 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn holes(&self) -> &BTreeSet<usize> {
|
||||
match self {
|
||||
StoredVec::Raw(v) => v.holes(),
|
||||
StoredVec::Compressed(v) => v.holes(),
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
fn mut_holes(&mut self) -> &mut BTreeSet<usize> {
|
||||
match self {
|
||||
StoredVec::Raw(v) => v.mut_holes(),
|
||||
StoredVec::Compressed(v) => v.mut_holes(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn updated(&self) -> &BTreeMap<usize, T> {
|
||||
match self {
|
||||
StoredVec::Raw(v) => v.updated(),
|
||||
StoredVec::Compressed(v) => v.updated(),
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
fn mut_updated(&mut self) -> &mut BTreeMap<usize, T> {
|
||||
match self {
|
||||
StoredVec::Raw(v) => v.mut_updated(),
|
||||
StoredVec::Compressed(v) => v.mut_updated(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn path(&self) -> PathBuf {
|
||||
match self {
|
||||
|
||||
Reference in New Issue
Block a user