From 7f50fe0360175b74f0ff0302a597b676e854fe75 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Thu, 5 Dec 2019 05:11:10 -0800 Subject: [PATCH 01/56] Make serde dependency optional, but enabled by default --- .travis.yml | 1 + Cargo.toml | 6 +++++- src/trivial_impls.rs | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ad47308..180d855 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ rust: os: - linux script: + - cargo build --no-default-features - cargo build - cargo test --all - "cd string-cache-codegen && cargo build && cd .." diff --git a/Cargo.toml b/Cargo.toml index eec2438..364131a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,10 +17,14 @@ edition = "2018" [lib] name = "string_cache" +[features] +serde_support = ["serde"] +default = ["serde_support"] + [dependencies] precomputed-hash = "0.1" lazy_static = "1" -serde = "1" +serde = { version = "1", optional = true } phf_shared = "0.8" new_debug_unreachable = "1.0" diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index ed53be2..4c055fd 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -8,6 +8,7 @@ // except according to those terms. use crate::{Atom, StaticAtomSet}; +#[cfg(feature = "serde_support")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::borrow::Cow; use std::fmt; @@ -69,6 +70,7 @@ impl AsRef for Atom { } } +#[cfg(feature = "serde_support")] impl Serialize for Atom { fn serialize(&self, serializer: S) -> Result where @@ -79,6 +81,7 @@ impl Serialize for Atom { } } +#[cfg(feature = "serde_support")] impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { fn deserialize(deserializer: D) -> Result where From 82ac0d955d2d1785eb5634cdbf61465adc0d759e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 19 Nov 2020 19:09:37 +0100 Subject: [PATCH 02/56] Bump to 0.8.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 364131a..01b9282 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.0" # Also update README.md when making a semver-breaking change +version = "0.8.1" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From c057dde7fdbdb9618d15374d74642b8267c0c837 Mon Sep 17 00:00:00 2001 From: l3ops Date: Tue, 20 Apr 2021 13:48:57 +0200 Subject: [PATCH 03/56] Add an Atom::try_static method to create an Atom only if it exists in the static table --- integration-tests/src/lib.rs | 6 ++++++ src/atom.rs | 26 ++++++++++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index 3aa2a44..1f2be87 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -290,6 +290,12 @@ fn test_from_string() { assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); } +#[test] +fn test_try_static() { + assert!(Atom::try_static("head").is_some()); + assert!(Atom::try_static("not in the static table").is_none()); +} + #[cfg(all(test, feature = "unstable"))] #[path = "bench.rs"] mod bench; diff --git a/src/atom.rs b/src/atom.rs index 5011d40..6da0044 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -149,6 +149,22 @@ impl Atom { _ => unsafe { debug_unreachable!() }, } } + + pub fn try_static(string_to_add: &str) -> Option { + Self::try_static_internal(string_to_add).ok() + } + + fn try_static_internal(string_to_add: &str) -> Result { + let static_set = Static::get(); + let hash = phf_shared::hash(&*string_to_add, &static_set.key); + let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); + + if static_set.atoms[index as usize] == string_to_add { + Ok(Self::pack_static(index)) + } else { + Err(hash) + } + } } impl Default for Atom { @@ -170,13 +186,7 @@ impl Hash for Atom { impl<'a, Static: StaticAtomSet> From> for Atom { fn from(string_to_add: Cow<'a, str>) -> Self { - let static_set = Static::get(); - let hash = phf_shared::hash(&*string_to_add, &static_set.key); - let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); - - if static_set.atoms[index as usize] == string_to_add { - Self::pack_static(index) - } else { + Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { let len = string_to_add.len(); if len <= MAX_INLINE_LEN { let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); @@ -200,7 +210,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { phantom: PhantomData, } } - } + }) } } From 609a59f19273d6e261ee47543b4e0d576ca3707e Mon Sep 17 00:00:00 2001 From: Russell Mull Date: Tue, 31 Aug 2021 09:30:20 -0700 Subject: [PATCH 04/56] Update phf, rand dependencies --- Cargo.toml | 2 +- integration-tests/Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 01b9282..88c09e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ default = ["serde_support"] precomputed-hash = "0.1" lazy_static = "1" serde = { version = "1", optional = true } -phf_shared = "0.8" +phf_shared = "0.10" new_debug_unreachable = "1.0" [[test]] diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 736e34a..a0b047c 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -19,7 +19,7 @@ unstable = [] string_cache = { version = "0.8", path = ".." } [dev-dependencies] -rand = "0.7" +rand = "0.8" string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } [build-dependencies] diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index d212d7e..5ae3a52 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -13,7 +13,7 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -phf_generator = "0.8" -phf_shared = "0.8" +phf_generator = "0.10" +phf_shared = "0.10" proc-macro2 = "1" quote = "1" From 72f38cf1836292c1533e662d6741f7e433e3cdbf Mon Sep 17 00:00:00 2001 From: Nathan West Date: Mon, 13 Sep 2021 23:48:39 -0400 Subject: [PATCH 05/56] Use a custom Visitor in Deserialize --- src/trivial_impls.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 4c055fd..3f2d75e 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -87,7 +87,33 @@ impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { where D: Deserializer<'a>, { - let string: String = Deserialize::deserialize(deserializer)?; - Ok(Atom::from(string)) + use serde::de; + use std::marker::PhantomData; + + struct AtomVisitor(PhantomData); + + impl<'de, Static: StaticAtomSet> de::Visitor<'de> for AtomVisitor { + type Value = Atom; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "an Atom") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + Ok(Atom::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: de::Error, + { + Ok(Atom::from(v)) + } + } + + deserializer.deserialize_string(AtomVisitor(PhantomData)) } } From 58d0c572db093c751797bb50c482f8375396cc7c Mon Sep 17 00:00:00 2001 From: Nathan West Date: Tue, 14 Sep 2021 00:26:04 -0400 Subject: [PATCH 06/56] Use deserialize_str instead of deserialize_string --- src/trivial_impls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 3f2d75e..c0119ca 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -114,6 +114,6 @@ impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { } } - deserializer.deserialize_string(AtomVisitor(PhantomData)) + deserializer.deserialize_str(AtomVisitor(PhantomData)) } } From f95e39b2cf9e6610659c22c5c3e4ac271f886eba Mon Sep 17 00:00:00 2001 From: Donny Date: Fri, 8 Oct 2021 16:14:03 +0900 Subject: [PATCH 07/56] Use parking_lot --- Cargo.toml | 1 + src/atom.rs | 3 +-- src/dynamic_set.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 01b9282..a54b134 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ lazy_static = "1" serde = { version = "1", optional = true } phf_shared = "0.8" new_debug_unreachable = "1.0" +parking_lot = "0.11" [[test]] name = "small-stack" diff --git a/src/atom.rs b/src/atom.rs index 6da0044..0d74408 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -201,7 +201,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { } } else { let ptr: std::ptr::NonNull = - DYNAMIC_SET.lock().unwrap().insert(string_to_add, hash.g); + DYNAMIC_SET.lock().insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -239,7 +239,6 @@ impl Drop for Atom { fn drop_slow(this: &mut Atom) { DYNAMIC_SET .lock() - .unwrap() .remove(this.unsafe_data.get() as *mut Entry); } } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 08c9dcd..f926f1e 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -8,12 +8,12 @@ // except according to those terms. use lazy_static::lazy_static; +use parking_lot::Mutex; use std::borrow::Cow; use std::mem; use std::ptr::NonNull; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use std::sync::Mutex; const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u32 = (1 << 12) - 1; From bcf15b9269e8cff3560087c45d3ee66f8bf4fc76 Mon Sep 17 00:00:00 2001 From: cybai Date: Sat, 9 Oct 2021 01:18:28 +0900 Subject: [PATCH 08/56] Move CI to GitHub Actions --- .github/workflows/ci.yml | 46 ++++++++++++++++++++++++++++++++++++++++ .travis.yml | 15 ------------- 2 files changed, 46 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..d1c124a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: Run CI +on: + push: + branches: ["master"] + pull_request: + branches: ["**"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +env: + RUST_BACKTRACE: 1 + SHELL: /bin/bash + +jobs: + ci: + name: Run CI + runs-on: ubuntu-20.04 + + strategy: + matrix: + rust: [1.36.0, nightly, beta, stable] + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 2 + - name: Setup Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: ${{ matrix.rust }} + default: true + override: true + - name: Build + run: | + cargo build --no-default-features + cargo build + - name: Tests + run: cargo test --all + - name: Build codegen + run: | + cd string-cache-codegen && cargo build && cd .. + + if [ ${{ matrix.rust }} = nightly ]; then + cd integration-tests && cargo test --features unstable && cd ..; + fi diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 180d855..0000000 --- a/.travis.yml +++ /dev/null @@ -1,15 +0,0 @@ -sudo: false -language: rust -rust: - - 1.36.0 - - nightly - - beta - - stable -os: - - linux -script: - - cargo build --no-default-features - - cargo build - - cargo test --all - - "cd string-cache-codegen && cargo build && cd .." - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd integration-tests && cargo test --features unstable && cd ..; fi" From 0806c6f08f086d9033f62ff95865a9690dc35101 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 9 Oct 2021 12:15:46 -0400 Subject: [PATCH 09/56] Add homu result. --- .github/workflows/ci.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1c124a..ee596b5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,3 +44,18 @@ jobs: if [ ${{ matrix.rust }} = nightly ]; then cd integration-tests && cargo test --features unstable && cd ..; fi + + + build_result: + name: homu build finished + runs-on: ubuntu-latest + needs: + - "ci" + + steps: + - name: Mark the job as successful + run: exit 0 + if: success() + - name: Mark the job as unsuccessful + run: exit 1 + if: "!success()" From 3a35e765552a22c54aefce9588ba8bf6de86457b Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 9 Oct 2021 12:22:12 -0400 Subject: [PATCH 10/56] Run CI on auto branch. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee596b5..d043617 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,7 +1,7 @@ name: Run CI on: push: - branches: ["master"] + branches: ["auto"] pull_request: branches: ["**"] From ddaf1bd4074369c698235b5e196f506ae0d21c0a Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Mon, 11 Oct 2021 21:44:41 -0400 Subject: [PATCH 11/56] Publish 0.8.2. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a54b134..2636de6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.1" # Also update README.md when making a semver-breaking change +version = "0.8.2" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From dd6a60115991e13c21ce77fe12cf11245c04ddcb Mon Sep 17 00:00:00 2001 From: Alex Touchet Date: Thu, 14 Oct 2021 17:15:32 -0700 Subject: [PATCH 12/56] Replace Travis CI badge with GitHub Actions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c9c8ac..fdf4c0a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # string-cache -[![Build Status](https://travis-ci.com/servo/string-cache.svg?branch=master)](https://travis-ci.com/servo/string-cache) +[![Build Status](https://github.com/servo/string-cache/actions/workflows/ci.yml/badge.svg)](https://github.com/servo/string-cache/actions) [Documentation](https://docs.rs/string_cache/) From edce5ddd8b0740609b51440e4d7d5ba752b376d4 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sun, 21 Nov 2021 15:36:17 -0500 Subject: [PATCH 13/56] Update MSRV to 1.38.0. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d043617..e8485a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: - rust: [1.36.0, nightly, beta, stable] + rust: [1.38.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 5c3d60334a496402d78d906e7661dd4b72931a44 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sun, 21 Nov 2021 16:06:28 -0500 Subject: [PATCH 14/56] Update MSRV to 1.40.0 --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e8485a9..39b55c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,11 +15,11 @@ env: jobs: ci: name: Run CI - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: matrix: - rust: [1.38.0, nightly, beta, stable] + rust: [1.40.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 7934bf9eb7784962d5af8f96258756fc480d4c22 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Thu, 16 Dec 2021 16:16:49 -0700 Subject: [PATCH 15/56] Fix warning and typo in small stack regression test --- tests/small-stack.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/small-stack.rs b/tests/small-stack.rs index 269cad7..bb607af 100644 --- a/tests/small-stack.rs +++ b/tests/small-stack.rs @@ -1,6 +1,6 @@ // Regression test for https://github.com/servo/html5ever/issues/393 // -// Create a dynamic atom − causing initialization of the golbal hash map − +// Create a dynamic atom − causing initialization of the global hash map − // in a thread that has a small stack. // // This is a separate test program rather than a `#[test] fn` among others @@ -9,7 +9,7 @@ fn main() { std::thread::Builder::new() .stack_size(50_000) .spawn(|| { - string_cache::DefaultAtom::from("12345678"); + let _atom = string_cache::DefaultAtom::from("12345678"); }) .unwrap() .join() From 474d27785f1339fa3056a8d320da2aa8cfec3d19 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:17:01 -0700 Subject: [PATCH 16/56] Add a Clippy exception for `derive_hash_xor_eq` This is a string interning library. It does some weird things related to hashing. This is fine. --- src/lib.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index b4a8fd5..441cb4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -103,6 +103,19 @@ #![cfg_attr(test, deny(warnings))] +// Types, such as Atom, that impl Hash must follow the hash invariant: if two objects match +// with PartialEq, they must also have the same Hash. Clippy warns on types that derive one while +// manually impl-ing the other, because it seems easy for the two to drift apart, causing the +// invariant to be violated. +// +// But Atom is a newtype over NonZeroU64, and probably always will be, since cheap comparisons and +// copying are this library's purpose. So we know what the PartialEq comparison is going to do. +// +// The `get_hash` function, seen in `atom.rs`, consults that number, plus the global string interner +// tables. The only way for the resulting hash for two Atoms with the same inner 64-bit number to +// differ would be if the table entry changed between invocations, and that would be really bad. +#![allow(clippy::derive_hash_xor_eq)] + mod atom; mod dynamic_set; mod static_sets; From c2afb8bbc3104867c5d89b79cc83bec1fae9fabd Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:19:12 -0700 Subject: [PATCH 17/56] Remove redundant `use` line --- src/atom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index 0d74408..c02651b 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -10,7 +10,7 @@ use crate::dynamic_set::{Entry, DYNAMIC_SET}; use crate::static_sets::StaticAtomSet; use debug_unreachable::debug_unreachable; -use phf_shared; + use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; From 78c516d82f48f215ec214979ed3b3a4b874c3dd9 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:20:25 -0700 Subject: [PATCH 18/56] Remove redundant reference operands --- src/dynamic_set.rs | 2 +- src/trivial_impls.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index f926f1e..2eab9da 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -56,7 +56,7 @@ impl Set { let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); while let Some(entry) = ptr.take() { - if entry.hash == hash && &*entry.string == &*string { + if entry.hash == hash && *entry.string == *string { if entry.ref_count.fetch_add(1, SeqCst) > 0 { return NonNull::from(&mut **entry); } diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index c0119ca..960dde0 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -39,7 +39,7 @@ impl PartialEq> for str { impl PartialEq for Atom { fn eq(&self, other: &String) -> bool { - &self[..] == &other[..] + self[..] == other[..] } } @@ -66,7 +66,7 @@ impl fmt::Display for Atom { impl AsRef for Atom { fn as_ref(&self) -> &str { - &self + self } } From 71925156c4ec58bc6cfc1f31f8d6a9ee58a4cfc0 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:22:12 -0700 Subject: [PATCH 19/56] Convert manual loop to `while let` --- src/dynamic_set.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 2eab9da..602b700 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -94,11 +94,8 @@ impl Set { let mut current: &mut Option> = &mut self.buckets[bucket_index]; - loop { - let entry_ptr: *mut Entry = match current.as_mut() { - Some(entry) => &mut **entry, - None => break, - }; + while let Some(entry_ptr) = current.as_mut() { + let entry_ptr: *mut Entry = &mut **entry_ptr; if entry_ptr == ptr { mem::drop(mem::replace(current, unsafe { (*entry_ptr).next_in_bucket.take() From 36da1fd4d86b5803082ec27155d43b0ca0c68709 Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Fri, 4 Feb 2022 12:34:49 +0100 Subject: [PATCH 20/56] Bump new_debug_unreachable to 1.0.2 Previous versions are incompatible with -Z minimal-versions builds. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d3d2093..2b0b466 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ precomputed-hash = "0.1" lazy_static = "1" serde = { version = "1", optional = true } phf_shared = "0.10" -new_debug_unreachable = "1.0" +new_debug_unreachable = "1.0.2" parking_lot = "0.11" [[test]] From 97a4f6d1c457fc45151216d7b54446e573cdf322 Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Fri, 4 Feb 2022 12:36:52 +0100 Subject: [PATCH 21/56] Bump lazy_static to 1.1.0 This version builds with `-Z minimal-versions` when warnings are denied. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2b0b466..747d499 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" -lazy_static = "1" +lazy_static = "1.1.0" serde = { version = "1", optional = true } phf_shared = "0.10" new_debug_unreachable = "1.0.2" From ab30960f33f1a5e87ccb62e1e9cfd9677fe84cbb Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Fri, 4 Feb 2022 13:08:40 +0100 Subject: [PATCH 22/56] Bump to 0.8.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 747d499..9ba5095 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.2" # Also update README.md when making a semver-breaking change +version = "0.8.3" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From cbd1dc3b7f2085a93945e29c000723d232aec61b Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 14 Feb 2022 11:18:26 -0500 Subject: [PATCH 23/56] fix: bump parking lot to 0.12 in order to not create wasm export --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9ba5095..5d94040 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ lazy_static = "1.1.0" serde = { version = "1", optional = true } phf_shared = "0.10" new_debug_unreachable = "1.0.2" -parking_lot = "0.11" +parking_lot = "0.12" [[test]] name = "small-stack" From 6c0f8253813ea24b4f8be07658159fc6edb7acd2 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 14 Feb 2022 13:11:15 -0500 Subject: [PATCH 24/56] Bump CI to rust 1.49 (not sure if desired though) --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39b55c5..fe17a69 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: - rust: [1.40.0, nightly, beta, stable] + rust: [1.49.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 019118878fc994417edfd6d7a389c317b0654aa0 Mon Sep 17 00:00:00 2001 From: Alex Touchet Date: Mon, 28 Feb 2022 14:43:55 -0800 Subject: [PATCH 25/56] Use SPDX license format --- Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5d94040..6ca2f39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "string_cache" version = "0.8.3" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." -license = "MIT / Apache-2.0" +license = "MIT OR Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache/" edition = "2018" diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 5ae3a52..f207e21 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -3,7 +3,7 @@ name = "string_cache_codegen" version = "0.5.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." -license = "MIT / Apache-2.0" +license = "MIT OR Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache_codegen/" edition = "2018" From 9ae0f889bb96bc890b59eef1cd271c603b5a690f Mon Sep 17 00:00:00 2001 From: Xidorn Quan Date: Fri, 18 Mar 2022 22:48:05 +1100 Subject: [PATCH 26/56] Replace lazy_static with once_cell --- Cargo.toml | 2 +- src/dynamic_set.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5d94040..e47eb8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" -lazy_static = "1.1.0" +once_cell = "1.10.0" serde = { version = "1", optional = true } phf_shared = "0.10" new_debug_unreachable = "1.0.2" diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 602b700..229a79f 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use parking_lot::Mutex; use std::borrow::Cow; use std::mem; @@ -38,16 +38,16 @@ fn entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } -lazy_static! { - pub(crate) static ref DYNAMIC_SET: Mutex = Mutex::new({ +pub(crate) static DYNAMIC_SET: Lazy> = Lazy::new(|| { + Mutex::new({ type T = Option>; let _static_assert_size_eq = std::mem::transmute::; let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); Set { buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, } - }); -} + }) +}); impl Set { pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> NonNull { From aa644096ae26aba2e7192d67a74101e6a21469ac Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 21 Mar 2022 14:24:45 -0400 Subject: [PATCH 27/56] 0.8.4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e47eb8a..db1e95d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.3" # Also update README.md when making a semver-breaking change +version = "0.8.4" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 5034bde5c45b40af8e1c40672263193fa7050b32 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sun, 27 Mar 2022 18:09:47 -0400 Subject: [PATCH 28/56] Publish string-cache-codegen 0.5.2. --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index f207e21..5eb5125 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.1" # Also update ../README.md when making a semver-breaking change +version = "0.5.2" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" From c58ac06272bf59135e58fdf8df0dd5e696019382 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Tue, 12 Jul 2022 11:49:26 +0100 Subject: [PATCH 29/56] Fix comment typo --- src/atom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index c02651b..3fad611 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -105,7 +105,7 @@ impl Atom { } impl Atom { - /// Return the internal repersentation. For testing. + /// Return the internal representation. For testing. #[doc(hidden)] pub fn unsafe_data(&self) -> u64 { self.unsafe_data.get() From 8f5bed52d29a4d201705665fc9599c0d06bb4427 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Wed, 14 Dec 2022 21:33:36 -0500 Subject: [PATCH 30/56] Update MSRV. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe17a69..f99d2e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: - rust: [1.49.0, nightly, beta, stable] + rust: [1.56.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 37b459f8ce1ec694e8218ebdeef30c06f68e6205 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 14 Dec 2022 10:44:50 +0100 Subject: [PATCH 31/56] Add trivial impl of Borrow for Atom This enables Atom to be used in methods like HashMap::entry_ref. --- src/trivial_impls.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 960dde0..24baaf1 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -10,7 +10,7 @@ use crate::{Atom, StaticAtomSet}; #[cfg(feature = "serde_support")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::borrow::Cow; +use std::borrow::{Borrow, Cow}; use std::fmt; impl ::precomputed_hash::PrecomputedHash for Atom { @@ -70,6 +70,12 @@ impl AsRef for Atom { } } +impl Borrow for Atom { + fn borrow(&self) -> &str { + self + } +} + #[cfg(feature = "serde_support")] impl Serialize for Atom { fn serialize(&self, serializer: S) -> Result From b473a4ad3be989166031f56976f7ce54ae79ac05 Mon Sep 17 00:00:00 2001 From: Boshen Date: Thu, 16 Feb 2023 21:58:39 +0800 Subject: [PATCH 32/56] feat: use bucket mutex instead of global mutex for dynamic set This implementation uses bucket level mutex with linear probing. --- src/atom.rs | 7 ++----- src/dynamic_set.rs | 36 +++++++++++++++++++----------------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index c02651b..7856947 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -200,8 +200,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { phantom: PhantomData, } } else { - let ptr: std::ptr::NonNull = - DYNAMIC_SET.lock().insert(string_to_add, hash.g); + let ptr: std::ptr::NonNull = DYNAMIC_SET.insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -237,9 +236,7 @@ impl Drop for Atom { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - DYNAMIC_SET - .lock() - .remove(this.unsafe_data.get() as *mut Entry); + DYNAMIC_SET.remove(this.unsafe_data.get() as *mut Entry); } } } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 229a79f..6ea4ba6 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -19,7 +19,7 @@ const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u32 = (1 << 12) - 1; pub(crate) struct Set { - buckets: Box<[Option>; NB_BUCKETS]>, + buckets: Box<[Mutex>>]>, } pub(crate) struct Entry { @@ -38,22 +38,24 @@ fn entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } -pub(crate) static DYNAMIC_SET: Lazy> = Lazy::new(|| { - Mutex::new({ - type T = Option>; - let _static_assert_size_eq = std::mem::transmute::; - let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); - Set { - buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, - } - }) +pub(crate) static DYNAMIC_SET: Lazy = Lazy::new(|| { + // NOTE: Using const initialization for buckets breaks the small-stack test. + // ``` + // // buckets: [Mutex>>; NB_BUCKETS], + // const MUTEX: Mutex>> = Mutex::new(None); + // let buckets = Box::new([MUTEX; NB_BUCKETS]); + // ``` + let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect(); + Set { buckets } }); impl Set { - pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> NonNull { + pub(crate) fn insert(&self, string: Cow, hash: u32) -> NonNull { let bucket_index = (hash & BUCKET_MASK) as usize; + let mut linked_list = self.buckets[bucket_index].lock(); + { - let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); + let mut ptr: Option<&mut Box> = linked_list.as_mut(); while let Some(entry) = ptr.take() { if entry.hash == hash && *entry.string == *string { @@ -74,25 +76,25 @@ impl Set { debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); let string = string.into_owned(); let mut entry = Box::new(Entry { - next_in_bucket: self.buckets[bucket_index].take(), + next_in_bucket: linked_list.take(), hash, ref_count: AtomicIsize::new(1), string: string.into_boxed_str(), }); let ptr = NonNull::from(&mut *entry); - self.buckets[bucket_index] = Some(entry); - + *linked_list = Some(entry); ptr } - pub(crate) fn remove(&mut self, ptr: *mut Entry) { + pub(crate) fn remove(&self, ptr: *mut Entry) { let bucket_index = { let value: &Entry = unsafe { &*ptr }; debug_assert!(value.ref_count.load(SeqCst) == 0); (value.hash & BUCKET_MASK) as usize }; - let mut current: &mut Option> = &mut self.buckets[bucket_index]; + let mut linked_list = self.buckets[bucket_index].lock(); + let mut current: &mut Option> = &mut linked_list; while let Some(entry_ptr) = current.as_mut() { let entry_ptr: *mut Entry = &mut **entry_ptr; From e01688eb974ac52b24f19c69fb8ca398a07cdd32 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Wed, 22 Feb 2023 11:50:52 +0100 Subject: [PATCH 33/56] Bump version to 0.8.5 This will allow dependent packages to pick up improvements to mutex performance. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 16ef966..2d29863 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.4" # Also update README.md when making a semver-breaking change +version = "0.8.5" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 9c7b0aa84a5d862f57c88e59f07d3c66efe58908 Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Thu, 23 Feb 2023 02:35:49 +0200 Subject: [PATCH 34/56] Revert trivial impl of Borrow for Atom --- src/trivial_impls.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 24baaf1..0b2c98b 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -70,12 +70,6 @@ impl AsRef for Atom { } } -impl Borrow for Atom { - fn borrow(&self) -> &str { - self - } -} - #[cfg(feature = "serde_support")] impl Serialize for Atom { fn serialize(&self, serializer: S) -> Result From 4e45fde044657fe94a00d5cbd0d2910a1f827d0e Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Thu, 23 Feb 2023 15:34:07 +0200 Subject: [PATCH 35/56] remove unused import --- src/trivial_impls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 0b2c98b..960dde0 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -10,7 +10,7 @@ use crate::{Atom, StaticAtomSet}; #[cfg(feature = "serde_support")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::borrow::{Borrow, Cow}; +use std::borrow::Cow; use std::fmt; impl ::precomputed_hash::PrecomputedHash for Atom { From 448bf6b9c14e1f6f81a3f820006508fab8c4388b Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Thu, 23 Feb 2023 22:04:50 -0500 Subject: [PATCH 36/56] Publish 0.8.6. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2d29863..6067114 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.5" # Also update README.md when making a semver-breaking change +version = "0.8.6" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 126c173ac3ba18888821038be28aeba44af1023c Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Tue, 28 Feb 2023 18:00:46 +0200 Subject: [PATCH 37/56] test: add common dependency usage --- integration-tests/src/common-usage.rs | 19 +++++++++++++++++++ integration-tests/src/lib.rs | 4 ++++ 2 files changed, 23 insertions(+) create mode 100644 integration-tests/src/common-usage.rs diff --git a/integration-tests/src/common-usage.rs b/integration-tests/src/common-usage.rs new file mode 100644 index 0000000..7b7380a --- /dev/null +++ b/integration-tests/src/common-usage.rs @@ -0,0 +1,19 @@ +/// Test common usage by popular dependents (html5ever, lalrpop, browserlists-rs), to ensure no API-surface breaking changes +/// Created after https://github.com/servo/string-cache/issues/271 +use std::collections::HashMap; + +use crate::Atom; +use crate::TestAtom; + +#[test] +fn usage_with_hashmap() { + let mut map: HashMap = HashMap::new(); + + map.insert(test_atom!("area"), 1); + map.insert("str_into".into(), 2); + map.insert("atom_from".into(), 3); + + assert_eq!(map.get(&"area".into()).unwrap(), &1); + assert_eq!(map.get(&"str_into".into()).unwrap(), &2); + assert_eq!(map.get(&Atom::from("atom_from")).unwrap(), &3); +} diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index 1f2be87..aaacdff 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -296,6 +296,10 @@ fn test_try_static() { assert!(Atom::try_static("not in the static table").is_none()); } +#[cfg(test)] +#[path = "common-usage.rs"] +mod common_usage; + #[cfg(all(test, feature = "unstable"))] #[path = "bench.rs"] mod bench; From 120ba6c88e9337a810149b5afa4eecf32d8006d8 Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Fri, 3 Mar 2023 22:22:19 +0200 Subject: [PATCH 38/56] fix: move debug_assert check --- src/dynamic_set.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 6ea4ba6..46e7a54 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -87,13 +87,11 @@ impl Set { } pub(crate) fn remove(&self, ptr: *mut Entry) { - let bucket_index = { - let value: &Entry = unsafe { &*ptr }; - debug_assert!(value.ref_count.load(SeqCst) == 0); - (value.hash & BUCKET_MASK) as usize - }; + let value: &Entry = unsafe { &*ptr }; + let bucket_index = (value.hash & BUCKET_MASK) as usize; let mut linked_list = self.buckets[bucket_index].lock(); + debug_assert!(value.ref_count.load(SeqCst) == 0); let mut current: &mut Option> = &mut linked_list; while let Some(entry_ptr) = current.as_mut() { From 34f914c99e8bdc5f2fa842fd04f190c7c9e4df3b Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Tue, 7 Mar 2023 08:54:42 -0500 Subject: [PATCH 39/56] Publish 0.8.7 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6067114..b0f4957 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.6" # Also update README.md when making a semver-breaking change +version = "0.8.7" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 1ae3d0dcbf2c50f7d4a6f8c6e5f16a410ecc40d6 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Mon, 10 Jul 2023 14:14:57 +0200 Subject: [PATCH 40/56] Enable the GitHub merge queue (#280) --- .github/workflows/ci.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f99d2e9..a4615f2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,9 +1,11 @@ -name: Run CI +name: CI on: push: - branches: ["auto"] + branches: ["master"] pull_request: branches: ["**"] + merge_group: + types: [checks_requested] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -14,7 +16,7 @@ env: jobs: ci: - name: Run CI + name: Build and Test runs-on: ubuntu-latest strategy: @@ -35,8 +37,10 @@ jobs: run: | cargo build --no-default-features cargo build - - name: Tests - run: cargo test --all + - uses: actions-rs/cargo@v1 + with: + command: test + args: --all - name: Build codegen run: | cd string-cache-codegen && cargo build && cd .. @@ -47,7 +51,7 @@ jobs: build_result: - name: homu build finished + name: Result runs-on: ubuntu-latest needs: - "ci" From b46a64fa8c74fbef9b297ab05cace66da536e8a1 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Mon, 11 Mar 2024 10:37:57 +0100 Subject: [PATCH 41/56] Rename `master` branch to `main` (#283) --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a4615f2..8b48d1a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,9 +1,8 @@ name: CI on: push: - branches: ["master"] + branches: ["main"] pull_request: - branches: ["**"] merge_group: types: [checks_requested] From a7793f0e6739bb4976c80db2351163c9a7d005c2 Mon Sep 17 00:00:00 2001 From: Matthew Martin Date: Mon, 11 Mar 2024 04:44:34 -0500 Subject: [PATCH 42/56] Update phf to 0.11 (#281) --- .github/workflows/ci.yml | 2 +- Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8b48d1a..b133023 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: - rust: [1.56.0, nightly, beta, stable] + rust: [1.60.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 diff --git a/Cargo.toml b/Cargo.toml index b0f4957..df159f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ default = ["serde_support"] precomputed-hash = "0.1" once_cell = "1.10.0" serde = { version = "1", optional = true } -phf_shared = "0.10" +phf_shared = "0.11" new_debug_unreachable = "1.0.2" parking_lot = "0.12" diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 5eb5125..b059bfc 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -13,7 +13,7 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -phf_generator = "0.10" -phf_shared = "0.10" +phf_generator = "0.11" +phf_shared = "0.11" proc-macro2 = "1" quote = "1" From 1b636e99cb1bd8dff31bb4fc5be089002a635c12 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Wed, 17 Jul 2024 22:35:24 +0100 Subject: [PATCH 43/56] Skip bounds check for inline slices (#277) --- src/atom.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index 321b0a4..d1bd7b8 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -254,8 +254,9 @@ impl ops::Deref for Atom { } INLINE_TAG => { let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET; + debug_assert!(len as usize <= MAX_INLINE_LEN); let src = inline_atom_slice(&self.unsafe_data); - str::from_utf8_unchecked(&src[..(len as usize)]) + str::from_utf8_unchecked(src.get_unchecked(..(len as usize))) } STATIC_TAG => Static::get().atoms[self.static_index() as usize], _ => debug_unreachable!(), From c8fed62876eed738b16f7e0ae3d9974391e8f59f Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Wed, 31 Jul 2024 23:13:14 +0100 Subject: [PATCH 44/56] Prefer inline representation over static (#278) * Benchmarks use longer static strings * Use inline for short strings Closes #276. --- integration-tests/build.rs | 4 +++ integration-tests/src/bench.rs | 8 ++--- integration-tests/src/lib.rs | 25 +++++++++++----- src/atom.rs | 53 ++++++++++++++++++++++++---------- string-cache-codegen/lib.rs | 51 +++++++++++++++++++++++++------- 5 files changed, 104 insertions(+), 37 deletions(-) diff --git a/integration-tests/build.rs b/integration-tests/build.rs index da40873..6293e4c 100644 --- a/integration-tests/build.rs +++ b/integration-tests/build.rs @@ -9,6 +9,7 @@ fn main() { "a", "b", "address", + "defaults", "area", "body", "font-weight", @@ -16,6 +17,9 @@ fn main() { "html", "head", "id", + "❤", + "❤💯", + "❤💯❤💯", ]) .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs")) .unwrap() diff --git a/integration-tests/src/bench.rs b/integration-tests/src/bench.rs index 4d8f012..45e7199 100644 --- a/integration-tests/src/bench.rs +++ b/integration-tests/src/bench.rs @@ -153,7 +153,7 @@ bench_all!([eq ne lt clone_string] for longer_string = super::longer_dynamic_a, super::longer_dynamic_b); bench_all!([eq ne intern as_ref clone is_static lt] - for static_atom = test_atom!("a"), test_atom!("b")); + for static_atom = test_atom!("defaults"), test_atom!("font-weight")); bench_all!([intern as_ref clone is_inline] for short_inline_atom = mk("e"), mk("f")); @@ -168,13 +168,13 @@ bench_all!([eq ne intern as_ref clone is_dynamic lt] for longer_dynamic_atom = mk(super::longer_dynamic_a), mk(super::longer_dynamic_b)); bench_all!([intern as_ref clone is_static] - for static_at_runtime = mk("a"), mk("b")); + for static_at_runtime = mk("defaults"), mk("font-weight")); bench_all!([ne lt x_static y_inline] - for static_vs_inline = test_atom!("a"), mk("f")); + for static_vs_inline = test_atom!("defaults"), mk("f")); bench_all!([ne lt x_static y_dynamic] - for static_vs_dynamic = test_atom!("a"), mk(super::longer_dynamic_b)); + for static_vs_dynamic = test_atom!("defaults"), mk(super::longer_dynamic_b)); bench_all!([ne lt x_inline y_dynamic] for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b)); diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index aaacdff..a788d93 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -45,9 +45,12 @@ fn test_as_slice() { #[test] fn test_types() { assert!(Atom::from("").is_static()); - assert!(Atom::from("id").is_static()); - assert!(Atom::from("body").is_static()); - assert!(Atom::from("a").is_static()); + assert!(Atom::from("defaults").is_static()); + assert!(Atom::from("font-weight").is_static()); + assert!(Atom::from("id").is_inline()); + assert!(Atom::from("body").is_inline()); + assert!(Atom::from("a").is_inline()); + assert!(Atom::from("address").is_inline()); assert!(Atom::from("c").is_inline()); assert!(Atom::from("zz").is_inline()); assert!(Atom::from("zzz").is_inline()); @@ -168,11 +171,13 @@ fn repr() { // static atom table, the tag values, etc. // Static atoms - check_static("a", test_atom!("a")); - check_static("address", test_atom!("address")); - check_static("area", test_atom!("area")); + check_static("defaults", test_atom!("defaults")); + check_static("font-weight", test_atom!("font-weight")); // Inline atoms + check("a", 0x0000_0000_0000_6111); + check("address", 0x7373_6572_6464_6171); + check("area", 0x0000_0061_6572_6141); check("e", 0x0000_0000_0000_6511); check("xyzzy", 0x0000_797A_7A79_7851); check("xyzzy01", 0x3130_797A_7A79_7871); @@ -193,8 +198,13 @@ fn test_threads() { #[test] fn atom_macro() { + assert_eq!(test_atom!("a"), Atom::from("a")); assert_eq!(test_atom!("body"), Atom::from("body")); + assert_eq!(test_atom!("address"), Atom::from("address")); + assert_eq!(test_atom!("❤"), Atom::from("❤")); + assert_eq!(test_atom!("❤💯"), Atom::from("❤💯")); assert_eq!(test_atom!("font-weight"), Atom::from("font-weight")); + assert_eq!(test_atom!("❤💯❤💯"), Atom::from("❤💯❤💯")); } #[test] @@ -292,7 +302,8 @@ fn test_from_string() { #[test] fn test_try_static() { - assert!(Atom::try_static("head").is_some()); + assert!(Atom::try_static("defaults").is_some()); + assert!(Atom::try_static("head").is_none()); assert!(Atom::try_static("not in the static table").is_none()); } diff --git a/src/atom.rs b/src/atom.rs index d1bd7b8..7a3dea9 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -99,6 +99,25 @@ impl Atom { } } + /// For the atom!() macros + #[inline(always)] + #[doc(hidden)] + pub const fn pack_inline(mut n: u64, len: u8) -> Self { + if cfg!(target_endian = "big") { + // Reverse order of top 7 bytes. + // Bottom 8 bits of `n` are zero, and we need that to remain so. + // String data is stored in top 7 bytes, tag and length in bottom byte. + n = n.to_le() << 8; + } + + let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n; + Self { + // INLINE_TAG ensures this is never zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } + } + fn tag(&self) -> u8 { (self.unsafe_data.get() & TAG_MASK) as u8 } @@ -186,20 +205,22 @@ impl Hash for Atom { impl<'a, Static: StaticAtomSet> From> for Atom { fn from(string_to_add: Cow<'a, str>) -> Self { - Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { - let len = string_to_add.len(); - if len <= MAX_INLINE_LEN { - let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); - { - let dest = inline_atom_slice_mut(&mut data); - dest[..len].copy_from_slice(string_to_add.as_bytes()) - } - Atom { - // INLINE_TAG ensures this is never zero - unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, - phantom: PhantomData, - } - } else { + let len = string_to_add.len(); + if len == 0 { + Self::pack_static(Static::empty_string_index()) + } else if len <= MAX_INLINE_LEN { + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); + { + let dest = inline_atom_slice_mut(&mut data); + dest[..len].copy_from_slice(string_to_add.as_bytes()); + } + Atom { + // INLINE_TAG ensures this is never zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } + } else { + Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { let ptr: std::ptr::NonNull = DYNAMIC_SET.insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); @@ -208,8 +229,8 @@ impl<'a, Static: StaticAtomSet> From> for Atom { unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, phantom: PhantomData, } - } - }) + }) + } } } diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 0fe4819..3228946 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -187,11 +187,19 @@ impl AtomType { // which would cause divisions by zero in rust-phf. self.atoms.insert(String::new()); - let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); - let hash_state = phf_generator::generate_hash(&atoms); + // Strings over 7 bytes + empty string added to static set. + // Otherwise stored inline. + let (static_strs, inline_strs): (Vec<_>, Vec<_>) = self + .atoms + .iter() + .map(String::as_str) + .partition(|s| s.len() > 7 || s.is_empty()); + + // Static strings + let hash_state = phf_generator::generate_hash(&static_strs); let phf_generator::HashState { key, disps, map } = hash_state; let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip(); - let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); + let atoms: Vec<&str> = map.iter().map(|&idx| static_strs[idx]).collect(); let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; let indices = 0..atoms.len() as u32; @@ -228,16 +236,33 @@ impl AtomType { let macro_name = new_term(&*self.macro_name); let module = module.parse::().unwrap(); let atom_prefix = format!("ATOM_{}_", type_name.to_string().to_uppercase()); - let const_names: Vec<_> = atoms + let new_const_name = |atom: &str| { + let mut name = atom_prefix.clone(); + for c in atom.chars() { + name.push_str(&format!("_{:02X}", c as u32)) + } + new_term(&name) + }; + let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect(); + + // Inline strings + let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs .iter() - .map(|atom| { - let mut name = atom_prefix.clone(); - for c in atom.chars() { - name.push_str(&format!("_{:02X}", c as u32)) + .map(|s| { + let const_name = new_const_name(s); + + let mut value = 0u64; + for (index, c) in s.bytes().enumerate() { + value = value | ((c as u64) << (index * 8 + 8)); } - new_term(&name) + + let len = s.len() as u8; + + (const_name, (value, len)) }) - .collect(); + .unzip(); + let (inline_values, inline_lengths): (Vec<_>, Vec<_>) = + inline_values_and_lengths.into_iter().unzip(); quote! { #atom_doc @@ -265,6 +290,9 @@ impl AtomType { #( pub const #const_names: #type_name = #type_name::pack_static(#indices); )* + #( + pub const #inline_const_names: #type_name = #type_name::pack_inline(#inline_values, #inline_lengths); + )* #macro_doc #[macro_export] @@ -272,6 +300,9 @@ impl AtomType { #( (#atoms) => { #module::#const_names }; )* + #( + (#inline_strs) => { #module::#inline_const_names }; + )* } } } From 021012ea995461efdec6ddb5eb30a1bf9481d7c4 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Thu, 1 Aug 2024 01:23:58 -0400 Subject: [PATCH 45/56] Update MSRV to 1.61. (#284) Signed-off-by: Josh Matthews --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b133023..aa6f952 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: - rust: [1.60.0, nightly, beta, stable] + rust: [1.61.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 471ca0d8978cf6ce7dbfd170e67a103cfe62b975 Mon Sep 17 00:00:00 2001 From: cactter <109739451+cactter@users.noreply.github.com> Date: Sat, 10 Aug 2024 00:53:57 +0800 Subject: [PATCH 46/56] The scope of the unsafe block can be appropriately reduced (#263) * Shrink unsafe block * Remove empty lines --------- Co-authored-by: Martin Robinson --- src/atom.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 7a3dea9..7e15357 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -383,28 +383,24 @@ impl Atom { #[inline(always)] fn inline_atom_slice(x: &NonZeroU64) -> &[u8] { - unsafe { let x: *const NonZeroU64 = x; let mut data = x as *const u8; // All except the lowest byte, which is first in little-endian, last in big-endian. if cfg!(target_endian = "little") { - data = data.offset(1); + data = unsafe { data.offset(1) }; } let len = 7; - slice::from_raw_parts(data, len) - } + unsafe { slice::from_raw_parts(data, len) } } #[inline(always)] -fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { - unsafe { +fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { let x: *mut u64 = x; let mut data = x as *mut u8; // All except the lowest byte, which is first in little-endian, last in big-endian. if cfg!(target_endian = "little") { - data = data.offset(1); + data = unsafe { data.offset(1) }; } let len = 7; - slice::from_raw_parts_mut(data, len) - } + unsafe { slice::from_raw_parts_mut(data, len) } } From e03f29061d1359adae2741f3d20f1184a636221c Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 1 Feb 2025 14:43:21 +0100 Subject: [PATCH 47/56] MSRV 1.70 + Replace `once_cell::sync::Lazy` with `std::sync::OnceLock` (#287) * Update MSRV to 1.70 * Replace `once_cell::sync::Lazy` with `std::sync::OnceLock` --- .github/workflows/ci.yml | 2 +- Cargo.toml | 1 - src/atom.rs | 6 +++--- src/dynamic_set.rs | 14 +++++++++----- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aa6f952..c5cdfa7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: - rust: [1.61.0, nightly, beta, stable] + rust: [1.70.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 diff --git a/Cargo.toml b/Cargo.toml index df159f1..678eda3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,6 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" -once_cell = "1.10.0" serde = { version = "1", optional = true } phf_shared = "0.11" new_debug_unreachable = "1.0.2" diff --git a/src/atom.rs b/src/atom.rs index 7e15357..adf5f62 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use crate::dynamic_set::{Entry, DYNAMIC_SET}; +use crate::dynamic_set::{dynamic_set, Entry}; use crate::static_sets::StaticAtomSet; use debug_unreachable::debug_unreachable; @@ -221,7 +221,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { } } else { Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { - let ptr: std::ptr::NonNull = DYNAMIC_SET.insert(string_to_add, hash.g); + let ptr: std::ptr::NonNull = dynamic_set().insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -257,7 +257,7 @@ impl Drop for Atom { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - DYNAMIC_SET.remove(this.unsafe_data.get() as *mut Entry); + dynamic_set().remove(this.unsafe_data.get() as *mut Entry); } } } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 46e7a54..4442b4d 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -7,13 +7,13 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use once_cell::sync::Lazy; use parking_lot::Mutex; use std::borrow::Cow; use std::mem; use std::ptr::NonNull; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; +use std::sync::OnceLock; const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u32 = (1 << 12) - 1; @@ -38,16 +38,20 @@ fn entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } -pub(crate) static DYNAMIC_SET: Lazy = Lazy::new(|| { +pub(crate) fn dynamic_set() -> &'static Set { // NOTE: Using const initialization for buckets breaks the small-stack test. // ``` // // buckets: [Mutex>>; NB_BUCKETS], // const MUTEX: Mutex>> = Mutex::new(None); // let buckets = Box::new([MUTEX; NB_BUCKETS]); // ``` - let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect(); - Set { buckets } -}); + static DYNAMIC_SET: OnceLock = OnceLock::new(); + + DYNAMIC_SET.get_or_init(|| { + let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect(); + Set { buckets } + }) +} impl Set { pub(crate) fn insert(&self, string: Cow, hash: u32) -> NonNull { From 27221cec100d883420ef0fd06a7af5593262e4d8 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 1 Feb 2025 08:45:33 -0500 Subject: [PATCH 48/56] Publish 0.8.8. Signed-off-by: Josh Matthews --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 678eda3..338eeba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.7" # Also update README.md when making a semver-breaking change +version = "0.8.8" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 4a5bb75adaa35d4076ab30091b0dbc433f369c45 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 1 Feb 2025 13:09:13 -0500 Subject: [PATCH 49/56] Publish string-cache-codegen 0.5.3. Signed-off-by: Josh Matthews --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index b059bfc..53c1f0f 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.2" # Also update ../README.md when making a semver-breaking change +version = "0.5.3" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 14ae86af8fbe7a14229296473e9ac18b67228f52 Mon Sep 17 00:00:00 2001 From: Alex Touchet <26315797+atouchet@users.noreply.github.com> Date: Sat, 1 Feb 2025 10:10:09 -0800 Subject: [PATCH 50/56] Set rust-version in Cargo.toml (#288) --- Cargo.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 338eeba..6ceabfe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,13 @@ [package] name = "string_cache" version = "0.8.8" # Also update README.md when making a semver-breaking change -authors = [ "The Servo Project Developers" ] +authors = ["The Servo Project Developers"] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" repository = "https://github.com/servo/string-cache" -documentation = "https://docs.rs/string_cache/" +documentation = "https://docs.rs/string_cache" edition = "2018" +rust-version = "1.70.0" # Do not `exclude` ./string-cache-codegen because we want to include # ./string-cache-codegen/shared.rs, and `include` is a pain to use From d9e888f2f61d43c1868849a506104ee02d79027c Mon Sep 17 00:00:00 2001 From: Ygg01 Date: Wed, 19 Feb 2025 11:59:59 +0100 Subject: [PATCH 51/56] Add test for atom order stability (#290) * Add test for atom order stability. * Made test for iteration order a unit test # Conflicts: # string-cache-codegen/tests/reproducibility_test.rs --- string-cache-codegen/lib.rs | 39 ++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 3228946..c703cf7 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -69,7 +69,7 @@ #![recursion_limit = "128"] use quote::quote; -use std::collections::HashSet; +use std::collections::BTreeSet; use std::fs::File; use std::io::{self, BufWriter, Write}; use std::path::Path; @@ -81,7 +81,7 @@ pub struct AtomType { static_set_doc: Option, macro_name: String, macro_doc: Option, - atoms: HashSet, + atoms: BTreeSet, } impl AtomType { @@ -114,7 +114,7 @@ impl AtomType { atom_doc: None, static_set_doc: None, macro_doc: None, - atoms: HashSet::new(), + atoms: BTreeSet::new(), } } @@ -181,6 +181,26 @@ impl AtomType { ) } + #[cfg(test)] + /// Write generated code to destination [`Vec`] and return it as [`String`] + /// + /// Used mostly for testing or displaying a value. + pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result + { + destination.write_all( + self.to_tokens() + .to_string() + // Insert some newlines to make the generated code slightly easier to read. + .replace(" [ \"", "[\n\"") + .replace("\" , ", "\",\n") + .replace(" ( \"", "\n( \"") + .replace("; ", ";\n") + .as_bytes(), + )?; + let str = String::from_utf8(destination).unwrap(); + Ok(str) + } + fn to_tokens(&mut self) -> proc_macro2::TokenStream { // `impl Default for Atom` requires the empty string to be in the static set. // This also makes sure the set in non-empty, @@ -315,3 +335,16 @@ impl AtomType { self.write_to(BufWriter::new(File::create(path)?)) } } + +#[test] +fn test_iteration_order() { + let x1 = crate::AtomType::new("foo::Atom", "foo_atom!") + .atoms(&["x", "xlink", "svg", "test"]) + .write_to_string(Vec::new()).expect("write to string cache x1"); + + let x2 = crate::AtomType::new("foo::Atom", "foo_atom!") + .atoms(&["x", "xlink", "svg", "test"]) + .write_to_string(Vec::new()).expect("write to string cache x2"); + + assert_eq!(x1, x2); +} \ No newline at end of file From 88600346b44ae50afaf52a31d6a0db1c37b94b02 Mon Sep 17 00:00:00 2001 From: Ygg01 Date: Thu, 20 Feb 2025 13:40:19 +0100 Subject: [PATCH 52/56] Publish 0.5.4 string cache codegen (#292) --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 53c1f0f..a9660bd 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.3" # Also update ../README.md when making a semver-breaking change +version = "0.5.4" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" From b92f7eb3ff504034ec58c0154f9a9b053e23da4f Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Fri, 28 Mar 2025 07:15:11 +1300 Subject: [PATCH 53/56] Implement MallocSizeOf for Atom (#289) Signed-off-by: Nico Burns --- .github/workflows/ci.yml | 1 + Cargo.toml | 3 ++- src/atom.rs | 9 +++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c5cdfa7..74ade77 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,7 @@ jobs: run: | cargo build --no-default-features cargo build + cargo build --features malloc_size_of - uses: actions-rs/cargo@v1 with: command: test diff --git a/Cargo.toml b/Cargo.toml index 6ceabfe..287bd07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.8" # Also update README.md when making a semver-breaking change +version = "0.8.9" # Also update README.md when making a semver-breaking change authors = ["The Servo Project Developers"] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" @@ -25,6 +25,7 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" serde = { version = "1", optional = true } +malloc_size_of = { version = "0.1", default-features = false, optional = true } phf_shared = "0.11" new_debug_unreachable = "1.0.2" parking_lot = "0.12" diff --git a/src/atom.rs b/src/atom.rs index adf5f62..5a8aa7f 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -82,6 +82,15 @@ pub struct Atom { phantom: PhantomData, } +// This isn't really correct as the Atoms can technically take up space. But I guess it's ok +// as it is possible to measure the size of the atom set separately/ +#[cfg(feature = "malloc_size_of")] +impl malloc_size_of::MallocSizeOf for Atom { + fn size_of(&self, _ops: &mut malloc_size_of::MallocSizeOfOps) -> usize { + 0 + } +} + // FIXME: bound removed from the struct definition before of this error for pack_static: // "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable" // https://github.com/rust-lang/rust/issues/57563 From eb5ad11b53a8e132fa09781f062c47ce352f80d6 Mon Sep 17 00:00:00 2001 From: Cheng Xu <3105373+xu-cheng@users.noreply.github.com> Date: Tue, 26 Aug 2025 00:47:01 -0700 Subject: [PATCH 54/56] Update phf to 0.13 (#295) --- Cargo.toml | 4 ++-- README.md | 6 +++--- integration-tests/Cargo.toml | 6 +++--- src/lib.rs | 4 ++-- string-cache-codegen/Cargo.toml | 6 +++--- string-cache-codegen/lib.rs | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 287bd07..e73215e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.9" # Also update README.md when making a semver-breaking change +version = "0.9.0" # Also update README.md when making a semver-breaking change authors = ["The Servo Project Developers"] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" @@ -26,7 +26,7 @@ default = ["serde_support"] precomputed-hash = "0.1" serde = { version = "1", optional = true } malloc_size_of = { version = "0.1", default-features = false, optional = true } -phf_shared = "0.11" +phf_shared = "0.13" new_debug_unreachable = "1.0.2" parking_lot = "0.12" diff --git a/README.md b/README.md index fdf4c0a..429d1ec 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.8" +string_cache = "0.9" ``` In `lib.rs`: @@ -31,10 +31,10 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.8" +string_cache = "0.9" [build-dependencies] -string_cache_codegen = "0.5" +string_cache_codegen = "0.6" ``` In `build.rs`: diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index a0b047c..12c0ad0 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -16,11 +16,11 @@ test = true unstable = [] [dependencies] -string_cache = { version = "0.8", path = ".." } +string_cache = { version = "0.9", path = ".." } [dev-dependencies] rand = "0.8" -string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } +string_cache_codegen = { version = "0.6", path = "../string-cache-codegen" } [build-dependencies] -string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } +string_cache_codegen = { version = "0.6", path = "../string-cache-codegen" } diff --git a/src/lib.rs b/src/lib.rs index 441cb4e..3cc29b1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,10 +25,10 @@ //! In `Cargo.toml`: //! ```toml //! [dependencies] -//! string_cache = "0.8" +//! string_cache = "0.9" //! //! [dev-dependencies] -//! string_cache_codegen = "0.5" +//! string_cache_codegen = "0.6" //! ``` //! //! In `build.rs`: diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index a9660bd..a6e9da0 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.4" # Also update ../README.md when making a semver-breaking change +version = "0.6.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" @@ -13,7 +13,7 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -phf_generator = "0.11" -phf_shared = "0.11" +phf_generator = "0.13" +phf_shared = "0.13" proc-macro2 = "1" quote = "1" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index c703cf7..69ff612 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -19,10 +19,10 @@ //! build = "build.rs" //! //! [dependencies] -//! string_cache = "0.8" +//! string_cache = "0.9" //! //! [build-dependencies] -//! string_cache_codegen = "0.5" +//! string_cache_codegen = "0.6" //! ``` //! //! In `build.rs`: @@ -183,8 +183,8 @@ impl AtomType { #[cfg(test)] /// Write generated code to destination [`Vec`] and return it as [`String`] - /// - /// Used mostly for testing or displaying a value. + /// + /// Used mostly for testing or displaying a value. pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result { destination.write_all( @@ -347,4 +347,4 @@ fn test_iteration_order() { .write_to_string(Vec::new()).expect("write to string cache x2"); assert_eq!(x1, x2); -} \ No newline at end of file +} From 533b64e132ec65a616317d2607f536da024d19a9 Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Fri, 5 Sep 2025 13:01:12 +0100 Subject: [PATCH 55/56] Make macros accept idents where atom value is a valid ident (#296) * Make macros accept idents where atom is a valid atom This means that local_name!(html) will work as well as local_name!("html") Signed-off-by: Nico Burns * Fix tests build Signed-off-by: Nico Burns --------- Signed-off-by: Nico Burns --- integration-tests/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 55 ++++++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 12c0ad0..4562747 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -19,7 +19,7 @@ unstable = [] string_cache = { version = "0.9", path = ".." } [dev-dependencies] -rand = "0.8" +rand = { version = "0.8", features = ["small_rng"] } string_cache_codegen = { version = "0.6", path = "../string-cache-codegen" } [build-dependencies] diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 69ff612..525ef3a 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -68,6 +68,7 @@ #![recursion_limit = "128"] +use proc_macro2::Ident; use quote::quote; use std::collections::BTreeSet; use std::fs::File; @@ -185,8 +186,7 @@ impl AtomType { /// Write generated code to destination [`Vec`] and return it as [`String`] /// /// Used mostly for testing or displaying a value. - pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result - { + pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result { destination.write_all( self.to_tokens() .to_string() @@ -223,6 +223,30 @@ impl AtomType { let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; let indices = 0..atoms.len() as u32; + fn is_valid_ident(name: &str) -> bool { + let begins_with_letter_or_underscore = name + .chars() + .next() + .is_some_and(|c| c.is_alphabetic() || c == '_'); + let is_alphanumeric = name.chars().all(|c| c.is_alphanumeric() || c == '_'); + + begins_with_letter_or_underscore && is_alphanumeric + } + + let atoms_for_idents: Vec<&str> = atoms + .iter() + .copied() + .filter(|x| is_valid_ident(x)) + .collect(); + let atom_idents: Vec = atoms_for_idents.iter().map(|atom| new_term(atom)).collect(); + + let istrs_for_idents: Vec<&str> = inline_strs + .iter() + .copied() + .filter(|x| is_valid_ident(x)) + .collect(); + let istr_idents: Vec = istrs_for_idents.iter().map(|atom| new_term(atom)).collect(); + let hashes: Vec = atoms .iter() .map(|string| { @@ -249,8 +273,9 @@ impl AtomType { Some(ref doc) => quote!(#[doc = #doc]), None => quote!(), }; - let new_term = - |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site()); + fn new_term(string: &str) -> Ident { + Ident::new(string, proc_macro2::Span::call_site()) + } let static_set_name = new_term(&format!("{}StaticSet", type_name)); let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); @@ -264,6 +289,16 @@ impl AtomType { new_term(&name) }; let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect(); + let ident_const_names: Vec<_> = atoms_for_idents + .iter() + .copied() + .map(new_const_name) + .collect(); + let ident_inline_const_names: Vec<_> = istrs_for_idents + .iter() + .copied() + .map(new_const_name) + .collect(); // Inline strings let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs @@ -323,6 +358,12 @@ impl AtomType { #( (#inline_strs) => { #module::#inline_const_names }; )* + #( + (#atom_idents) => { #module::#ident_const_names }; + )* + #( + (#istr_idents) => { #module::#ident_inline_const_names }; + )* } } } @@ -340,11 +381,13 @@ impl AtomType { fn test_iteration_order() { let x1 = crate::AtomType::new("foo::Atom", "foo_atom!") .atoms(&["x", "xlink", "svg", "test"]) - .write_to_string(Vec::new()).expect("write to string cache x1"); + .write_to_string(Vec::new()) + .expect("write to string cache x1"); let x2 = crate::AtomType::new("foo::Atom", "foo_atom!") .atoms(&["x", "xlink", "svg", "test"]) - .write_to_string(Vec::new()).expect("write to string cache x2"); + .write_to_string(Vec::new()) + .expect("write to string cache x2"); assert_eq!(x1, x2); } From fd1475d07d5ff30e5d2ca343671577c77ab5679a Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Mon, 8 Sep 2025 14:50:19 +0100 Subject: [PATCH 56/56] Bump version of string-cache-codegen to v0.6.1 (#297) Signed-off-by: Nico Burns --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index a6e9da0..20eced9 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.6.0" # Also update ../README.md when making a semver-breaking change +version = "0.6.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0"