diff --git a/crates/ltk_rst/Cargo.toml b/crates/ltk_rst/Cargo.toml index 8c412490..d9222f7d 100644 --- a/crates/ltk_rst/Cargo.toml +++ b/crates/ltk_rst/Cargo.toml @@ -12,7 +12,7 @@ serde = ["dep:serde"] [dependencies] thiserror = { workspace = true } byteorder = { workspace = true } -ltk_hash = { version = "0.2.6", path = "../ltk_hash" } +xxhash-rust.workspace = true ltk_io_ext = { version = "0.4.2", path = "../ltk_io_ext" } serde = { workspace = true, optional = true } diff --git a/crates/ltk_rst/src/hash.rs b/crates/ltk_rst/src/hash.rs index 635c3d69..47455a6a 100644 --- a/crates/ltk_rst/src/hash.rs +++ b/crates/ltk_rst/src/hash.rs @@ -1,30 +1,92 @@ -use ltk_hash::xxhash::xxhash64; +use std::fmt::LowerHex; + +use xxhash_rust::xxh64::xxh64; use crate::version::RstHashType; +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +pub struct RstHash(pub u64); + +impl LowerHex for RstHash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl std::ops::Deref for RstHash { + type Target = u64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl From for RstHash { + fn from(value: u64) -> Self { + Self(value) + } +} + +impl RstHash { + /// Computes the masked XXHash64 of `key` (lowercased), suitable for use + /// as an RST entry hash (without the string-offset component). + /// + /// The result is masked as defined by the [`RstHashType`] + #[must_use] + #[inline(always)] + pub fn new(key: impl AsRef, hash_type: RstHashType) -> Self { + let lowered = key.as_ref().to_ascii_lowercase(); + let raw = xxh64(lowered.as_bytes(), 0); + Self(raw & hash_type.hash_mask()) + } + + #[must_use] + #[inline(always)] + pub fn pack_entry(self, offset: u64, hash_type: RstHashType) -> PackedHash { + PackedHash::pack(self, offset, hash_type) + } +} + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +pub struct PackedHash(pub u64); + +impl LowerHex for PackedHash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl From for PackedHash { + fn from(value: u64) -> Self { + Self(value) + } +} + +impl std::ops::Deref for PackedHash { + type Target = u64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +// TODO: make hash_type mismatching impossible via type system +impl PackedHash { + /// Packs a pre-computed masked `hash` together with a string `offset` into the + /// single `u64` value, for use in the RST hash table. + /// + /// NOTE: hash_type MUST match what was given when creating your [`RstHash`] + #[must_use] + #[inline(always)] + pub fn pack(hash: RstHash, offset: u64, hash_type: RstHashType) -> Self { + Self(hash.0 | (offset << hash_type.offset_shift())) + } -/// Computes the masked XXHash64 of `key` lowercased as UTF-8, suitable for use -/// as an RST entry hash (without the string-offset component). -/// -/// The result is masked to the bit-width defined by `hash_type`: -/// - [`RstHashType::Complex`] → lower 40 bits -/// - [`RstHashType::Simple`] → lower 39 bits -pub fn compute_hash(key: &str, hash_type: RstHashType) -> u64 { - let lowered = key.to_lowercase(); - let raw = xxhash64(lowered.as_bytes(), 0); - raw & hash_type.hash_mask() -} - -/// Packs a pre-computed masked `hash` together with a string `offset` into the -/// single `u64` value written into the RST hash table. -#[inline] -pub fn pack_entry(hash: u64, offset: u64, hash_type: RstHashType) -> u64 { - hash | (offset << hash_type.offset_shift()) -} - -/// Unpacks a raw RST hash-table entry into `(hash, offset)`. -#[inline] -pub fn unpack_entry(entry: u64, hash_type: RstHashType) -> (u64, u64) { - let hash = entry & hash_type.hash_mask(); - let offset = entry >> hash_type.offset_shift(); - (hash, offset) + /// Unpacks a raw RST hash-table entry into `(hash, offset)`. + #[must_use] + #[inline(always)] + pub fn unpack_entry(self, hash_type: RstHashType) -> (RstHash, u64) { + let hash = self.0 & hash_type.hash_mask(); + let offset = self.0 >> hash_type.offset_shift(); + (RstHash(hash), offset) + } } diff --git a/crates/ltk_rst/src/lib.rs b/crates/ltk_rst/src/lib.rs index 2f249b85..1a51d2e1 100644 --- a/crates/ltk_rst/src/lib.rs +++ b/crates/ltk_rst/src/lib.rs @@ -36,9 +36,9 @@ //! # Hashing keys manually //! //! ``` -//! use ltk_rst::{RstHashType, compute_hash}; +//! use ltk_rst::{RstHashType, RstHash}; //! -//! let hash = compute_hash("game_client_quit", RstHashType::Simple); +//! let hash = RstHash::new("game_client_quit", RstHashType::Simple); //! println!("{hash:#018x}"); //! ``` diff --git a/crates/ltk_rst/src/rst.rs b/crates/ltk_rst/src/rst.rs index 88ae86aa..fd69cbdd 100644 --- a/crates/ltk_rst/src/rst.rs +++ b/crates/ltk_rst/src/rst.rs @@ -5,8 +5,8 @@ use byteorder::{ReadBytesExt as _, WriteBytesExt as _, LE}; use ltk_io_ext::ReaderExt as _; use crate::error::RstError; -use crate::hash::{compute_hash, pack_entry, unpack_entry}; use crate::version::RstVersion; +use crate::{PackedHash, RstHash}; /// Magic bytes at the start of every RST file: `"RST"`. pub const MAGIC: &[u8; 3] = b"RST"; @@ -50,7 +50,7 @@ pub const MAGIC: &[u8; 3] = b"RST"; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Stringtable { /// Hash → string mapping. - pub entries: HashMap, + pub entries: HashMap, } impl Stringtable { @@ -72,26 +72,23 @@ impl Stringtable { } /// Returns an iterator over the entries in the table. - pub fn iter(&self) -> impl Iterator { + pub fn iter(&self) -> impl Iterator { self.entries.iter() } /// Returns the string associated with `hash`, if any. - pub fn get(&self, hash: u64) -> Option<&str> { - self.entries.get(&hash).map(|s| s.as_str()) + pub fn get(&self, hash: impl Into) -> Option<&str> { + self.entries.get(&hash.into()).map(|s| s.as_str()) } /// Inserts an entry by pre-computed hash. - /// - /// The hash must already be masked to the bit-width of the desired - /// [`RstHashType`] — use [`compute_hash`] to produce it. - pub fn insert(&mut self, hash: u64, value: impl Into) { + pub fn insert(&mut self, hash: RstHash, value: impl Into) { self.entries.insert(hash, value.into()); } /// Hashes `key` using the latest version's hash type and inserts the entry. - pub fn insert_str(&mut self, key: &str, value: impl Into) { - let hash = compute_hash(key, RstVersion::V5.hash_type()); + pub fn insert_str(&mut self, key: impl AsRef, value: impl Into) { + let hash = RstHash::new(key, RstVersion::V5.hash_type()); self.insert(hash, value); } @@ -121,10 +118,10 @@ impl Stringtable { } let count = reader.read_i32::()? as usize; - let mut pairs: Vec<(u64, u64)> = Vec::with_capacity(count); + let mut pairs: Vec<(RstHash, u64)> = Vec::with_capacity(count); for _ in 0..count { - let raw = reader.read_u64::()?; - pairs.push(unpack_entry(raw, hash_type)); + let raw = PackedHash(reader.read_u64::()?); + pairs.push(raw.unpack_entry(hash_type)); } // V2–V4 have a mode byte (read and discard). @@ -134,7 +131,7 @@ impl Stringtable { let data_start = reader.stream_position()?; let mut offset_cache: HashMap = HashMap::with_capacity(count); - let mut entries: HashMap = HashMap::with_capacity(count); + let mut entries: HashMap<_, String> = HashMap::with_capacity(count); for (hash, offset) in pairs { let text = if let Some(cached) = offset_cache.get(&offset) { @@ -164,7 +161,7 @@ impl Stringtable { // Build string data blob with deduplication, and collect packed entries let mut data: Vec = Vec::new(); let mut text_to_offset: HashMap<&str, u64> = HashMap::with_capacity(self.entries.len()); - let mut packed_entries: Vec = Vec::with_capacity(self.entries.len()); + let mut packed_entries: Vec = Vec::with_capacity(self.entries.len()); for (hash, text) in &self.entries { let offset = if let Some(&off) = text_to_offset.get(text.as_str()) { @@ -176,12 +173,12 @@ impl Stringtable { off }; - let packed = pack_entry(*hash, offset, hash_type); + let packed = hash.pack_entry(offset, hash_type); packed_entries.push(packed); } for packed in &packed_entries { - writer.write_u64::(*packed)?; + writer.write_u64::(**packed)?; } writer.write_all(&data)?; diff --git a/crates/ltk_rst/src/version.rs b/crates/ltk_rst/src/version.rs index 62c48c30..d1196e15 100644 --- a/crates/ltk_rst/src/version.rs +++ b/crates/ltk_rst/src/version.rs @@ -59,21 +59,29 @@ impl RstVersion { #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum RstHashType { /// 40-bit hash key (`(1 << 40) - 1`). Used by RST v2 and v3. - Complex = 40, + Complex, /// 39-bit hash key (`(1 << 39) - 1`). Used by RST v4 and v5. - Simple = 39, + Simple, } impl RstHashType { /// Returns the bitmask for the hash portion of a packed entry. - #[inline] + /// + /// - [`RstHashType::Complex`] → preserve lower 40 bits + /// - [`RstHashType::Simple`] → preserve lower 39 bits + #[must_use] + #[inline(always)] pub fn hash_mask(self) -> u64 { - (1u64 << (self as u8)) - 1 + (1u64 << self.offset_shift()) - 1 } /// Returns the bit-shift used when packing or unpacking the string offset. - #[inline] + #[must_use] + #[inline(always)] pub fn offset_shift(self) -> u8 { - self as u8 + match self { + Self::Complex => 40, + Self::Simple => 39, + } } } diff --git a/crates/ltk_rst/tests/parse_files.rs b/crates/ltk_rst/tests/parse_files.rs index b2b99b9d..5724d9ec 100644 --- a/crates/ltk_rst/tests/parse_files.rs +++ b/crates/ltk_rst/tests/parse_files.rs @@ -8,7 +8,7 @@ use std::fs::File; use std::io::{BufReader, Cursor}; use std::path::Path; -use ltk_rst::{compute_hash, RstError, RstHashType, Stringtable}; +use ltk_rst::{RstError, RstHash, RstHashType, Stringtable}; const TEST_FILES_ROOT: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../../test-files/data/menu"); @@ -129,9 +129,9 @@ fn round_trip_bootstrap() { /// result. #[test] fn compute_hash_is_case_insensitive() { - let lower = compute_hash("game_client_quit", RstHashType::Simple); - let upper = compute_hash("GAME_CLIENT_QUIT", RstHashType::Simple); - let mixed = compute_hash("Game_Client_Quit", RstHashType::Simple); + let lower = RstHash::new("game_client_quit", RstHashType::Simple); + let upper = RstHash::new("GAME_CLIENT_QUIT", RstHashType::Simple); + let mixed = RstHash::new("Game_Client_Quit", RstHashType::Simple); assert_eq!(lower, upper); assert_eq!(lower, mixed); @@ -144,11 +144,11 @@ fn compute_hash_respects_bit_width() { let simple_mask = (1u64 << 39) - 1; let complex_mask = (1u64 << 40) - 1; - let simple_hash = compute_hash("some_key", RstHashType::Simple); - let complex_hash = compute_hash("some_key", RstHashType::Complex); + let simple_hash = RstHash::new("some_key", RstHashType::Simple); + let complex_hash = RstHash::new("some_key", RstHashType::Complex); - assert_eq!(simple_hash & simple_mask, simple_hash); - assert_eq!(complex_hash & complex_mask, complex_hash); + assert_eq!(simple_hash.0 & simple_mask, *simple_hash); + assert_eq!(complex_hash.0 & complex_mask, *complex_hash); } #[test] @@ -187,8 +187,8 @@ fn insert_str_round_trips() { let mut cursor = Cursor::new(&buf); let loaded = Stringtable::from_rst_reader(&mut cursor).expect("re-parse failed"); - let quit_hash = compute_hash("game_client_quit", RstHashType::Simple); - let play_hash = compute_hash("game_client_play", RstHashType::Simple); + let quit_hash = RstHash::new("game_client_quit", RstHashType::Simple); + let play_hash = RstHash::new("game_client_play", RstHashType::Simple); assert_eq!(loaded.get(quit_hash), Some("Quit")); assert_eq!(loaded.get(play_hash), Some("Play")); @@ -202,7 +202,7 @@ fn to_writer_deduplicates_strings() { let shared_value = "Shared string value"; for i in 0u64..10 { - table.insert(i, shared_value); + table.insert(i.into(), shared_value); } let mut buf = Vec::new();