From 2dd32b7b994e5aebb920709697ec9387087bc94c Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 27 Nov 2015 12:26:00 -0800 Subject: [PATCH 001/212] Add "novalidate" to atom list. --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 17131e2..2efd5c9 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -740,6 +740,7 @@ pub static ATOMS: &'static [&'static str] = &[ "notin", "notprsubset", "notsubset", + "novalidate", "nowrap", "number", "numoctaves", From 1bd99f16042ef58d0d06ad0a28e61bf9d4382797 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Fri, 27 Nov 2015 16:15:27 -0500 Subject: [PATCH 002/212] Bump the version number. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c6390f2..7448f19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.0" +version = "0.2.1" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 9a2f529a4fe9a4ceb0a9eff42591b1531584263a Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sun, 29 Nov 2015 02:21:08 -0800 Subject: [PATCH 003/212] Add formnovalidate to atom list --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 2efd5c9..ac7e7be 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -494,6 +494,7 @@ pub static ATOMS: &'static [&'static str] = &[ "format", "formenctype", "formmethod", + "formnovalidate", "formtarget", "frameborder", "framespacing", From be84acff1664ed906d84ffd25466376f2cba601c Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sun, 29 Nov 2015 12:34:38 -0800 Subject: [PATCH 004/212] Bump version number to v0.2.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7448f19..870f113 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.1" +version = "0.2.2" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 9ce5e1984abafcea5cd4fb6700009791320c618a Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 20:38:24 -0500 Subject: [PATCH 005/212] Add 'change' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/change --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index ac7e7be..6a64ef4 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -258,6 +258,7 @@ pub static ATOMS: &'static [&'static str] = &[ "cellpadding", "cellspacing", "center", + "change", "char", "charoff", "charset", From f0e2f66116c3b110c9a7c6c2c60fc6fbab4d697a Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 20:39:43 -0500 Subject: [PATCH 006/212] Add 'readystatechange' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/readystatechange --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 6a64ef4..ba938ce 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -927,6 +927,7 @@ pub static ATOMS: &'static [&'static str] = &[ "rationals", "rb", "readonly", + "readystatechange", "real", "reals", "rect", From 47c268b6e2d4512e9b238f8b76dac932baac702d Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 21:56:47 -0500 Subject: [PATCH 007/212] Add 'DOMContentLoaded' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/DOMContentLoaded --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index ba938ce..6f65583 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -368,6 +368,7 @@ pub static ATOMS: &'static [&'static str] = &[ "dl", "domain", "domainofapplication", + "DOMContentLoaded", "dominant-baseline", "draggable", "dur", From 3ddd8f1592c956caa8b4b4cbf84f97ff4926a332 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 21:59:46 -0500 Subject: [PATCH 008/212] Add 'load' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/load --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 6f65583..9ceb1d9 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -616,6 +616,7 @@ pub static ATOMS: &'static [&'static str] = &[ "list-style-position", "list-style-type", "ln", + "load", "local", "log", "logbase", From 2f83b7f9df9f57979d874475972c2b5d02016761 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 22:09:35 -0500 Subject: [PATCH 009/212] Add 'afterscriptexecute' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/afterscriptexecute --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 9ceb1d9..bc36d14 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -103,6 +103,7 @@ pub static ATOMS: &'static [&'static str] = &[ "active", "actuate", "additive", + "afterscriptexecute", "align", "alignment-baseline", "alignmentscope", From 15a51ffc88a4411ebe032526e50202f135767391 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 22:10:34 -0500 Subject: [PATCH 010/212] Add 'beforescriptexecute' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/beforescriptexecute --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index bc36d14..a71f205 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -215,6 +215,7 @@ pub static ATOMS: &'static [&'static str] = &[ "bbox", "bdi", "bdo", + "beforescriptexecute", "beforeunload", "begin", "bevelled", From 2388dc68b746df1ef86864cd6e3f5e7e786c3ee4 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 22:33:07 -0500 Subject: [PATCH 011/212] Add 'loadend' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/loadend --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index a71f205..defc5ea 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -619,6 +619,7 @@ pub static ATOMS: &'static [&'static str] = &[ "list-style-type", "ln", "load", + "loadend", "local", "log", "logbase", From 4eb03dc4bb90138f43bf120af49c36c485e27950 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 22:42:30 -0500 Subject: [PATCH 012/212] Add 'loadstart' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/loadstart --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index defc5ea..aab11ad 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -619,6 +619,7 @@ pub static ATOMS: &'static [&'static str] = &[ "list-style-type", "ln", "load", + "loadstart", "loadend", "local", "log", From 466899f5819650b91e74b640031627e5a60cf6b4 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 22:51:12 -0500 Subject: [PATCH 013/212] Add 'webglcontextcreationerror' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/webglcontextcreationerror --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index aab11ad..cd45d06 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -1168,6 +1168,7 @@ pub static ATOMS: &'static [&'static str] = &[ "v-mathematical", "vspace", "wbr", + "webglcontextcreationerror", "week", "when", "white-space", From fc91e33e70cf527981d71f1406726c073fba336b Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 23:07:13 -0500 Subject: [PATCH 014/212] Add 'storage' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/storage --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index cd45d06..6eb4178 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -1049,6 +1049,7 @@ pub static ATOMS: &'static [&'static str] = &[ "stop", "stop-color", "stop-opacity", + "storage", "stretchy", "strike", "strikethrough-position", From 085b999d846c6fe40a1ec03d7b21407effa4906d Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 23:10:53 -0500 Subject: [PATCH 015/212] Add 'message' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/message --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 6eb4178..3950558 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -683,6 +683,7 @@ pub static ATOMS: &'static [&'static str] = &[ "menu", "menuitem", "merror", + "message", "metadata", "meter", "method", From 92b83cae2767bc304724f35aa736cba998b3cf43 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 23:16:03 -0500 Subject: [PATCH 016/212] Add 'abort' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/abort --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 3950558..58614ca 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -89,6 +89,7 @@ pub static ATOMS: &'static [&'static str] = &[ "http://www.w3.org/1998/Math/MathML", "abbr", + "abort", "abs", "accent", "accent-height", From 6a07c773b8019f03e7c3ec6a714268c2f745f0f1 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 23:23:56 -0500 Subject: [PATCH 017/212] Bump version: 0.2.2 -> 0.2.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 870f113..145a480 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.2" +version = "0.2.3" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From dc5433833d44c99046e6f27c1e9c8110b4b53fb3 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Fri, 11 Dec 2015 11:07:36 -0800 Subject: [PATCH 018/212] Add invalid to the list and bump version number --- Cargo.toml | 2 +- src/static_atom_list.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 145a480..7a6e41b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.3" +version = "0.2.4" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 58614ca..1bcaf98 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -565,6 +565,7 @@ pub static ATOMS: &'static [&'static str] = &[ "intercept", "intersect", "interval", + "invalid", "inverse", "irrelevant", "isindex", From aa43810dddcd5830464c50cf65303ced400050f5 Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Mon, 30 Nov 2015 11:17:32 -0600 Subject: [PATCH 019/212] Added more atoms to static_atom_list. The list now includes: * all the CSS attributes and DOM events used by Servo, * the element ids such as #text used by Servo, * the User Agent strings such as Mozilla used by Servo. At this point, every string constant in the Servo code base can be atomized. --- src/static_atom_list.rs | 51 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 1bcaf98..e97b51f 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -88,6 +88,20 @@ pub static ATOMS: &'static [&'static str] = &[ "http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML", + "#text", + "#comment", + "#document", + "#document-fragment", + + // User agent strings + "4.0", + "Gecko", + "Linux", + "Mac", + "Mozilla", + "Netscape", + "Win32", + "abbr", "abort", "abs", @@ -219,10 +233,12 @@ pub static ATOMS: &'static [&'static str] = &[ "beforescriptexecute", "beforeunload", "begin", + "bevel", "bevelled", "bgcolor", "bias", "blink", + "blob", "border", "border-bottom", "border-bottom-color", @@ -249,6 +265,7 @@ pub static ATOMS: &'static [&'static str] = &[ "border-top-width", "border-width", "bottom", + "butt", "bvar", "by", "calcmode", @@ -327,6 +344,7 @@ pub static ATOMS: &'static [&'static str] = &[ "csymbol", "curl", "cursor", + "customevent", "cx", "cy", "d", @@ -396,6 +414,7 @@ pub static ATOMS: &'static [&'static str] = &[ "error", "eulergamma", "event", + "events", "exists", "exp", "exponent", @@ -469,6 +488,8 @@ pub static ATOMS: &'static [&'static str] = &[ "filterRes", "filterunits", "filterUnits", + "float", + "flood", "flood-color", "flood-opacity", "floor", @@ -541,6 +562,7 @@ pub static ATOMS: &'static [&'static str] = &[ "href", "hreflang", "hspace", + "htmlevents", "http-equiv", "i", "icon", @@ -554,6 +576,7 @@ pub static ATOMS: &'static [&'static str] = &[ "imaginaryi", "img", "implies", + "important", "in", "in2", "index", @@ -581,14 +604,18 @@ pub static ATOMS: &'static [&'static str] = &[ "kernelunitlength", "kernelUnitLength", "kerning", + "keyboardevent", "keydown", + "keyevents", "keygen", "keypoints", "keyPoints", + "keypress", "keysplines", "keySplines", "keytimes", "keyTimes", + "keyup", "label", "lambda", "lang", @@ -686,6 +713,7 @@ pub static ATOMS: &'static [&'static str] = &[ "menuitem", "merror", "message", + "messageevent", "metadata", "meter", "method", @@ -699,16 +727,21 @@ pub static ATOMS: &'static [&'static str] = &[ "minus", "min-width", "missing-glyph", + "miter", "mlabeledtr", "mmultiscripts", "mn", "mo", "mode", - "month", "moment", "momentabout", - "movablelimits", + "month", + "mousedown", + "mouseevent", + "mouseevents", "mouseover", + "mouseup", + "movablelimits", "mover", "mozbrowser", "mpadded", @@ -729,6 +762,7 @@ pub static ATOMS: &'static [&'static str] = &[ "mtext", "mtr", "multicol", + "multipart/form-data", "multiple", "munder", "munderover", @@ -739,6 +773,7 @@ pub static ATOMS: &'static [&'static str] = &[ "neq", "nest", "nextid", + "no message", "nobr", "noembed", "nohref", @@ -757,8 +792,10 @@ pub static ATOMS: &'static [&'static str] = &[ "numoctaves", "numOctaves", "occurrence", + "off", "offset", "ol", + "on", "onabort", "onactivate", "onafterprint", @@ -910,6 +947,7 @@ pub static ATOMS: &'static [&'static str] = &[ "polygon", "polyline", "position", + "post", "poster", "power", "prefetch", @@ -933,6 +971,7 @@ pub static ATOMS: &'static [&'static str] = &[ "radio", "radiogroup", "radius", + "range", "rationals", "rb", "readonly", @@ -964,6 +1003,7 @@ pub static ATOMS: &'static [&'static str] = &[ "requiredfeatures", "requiredFeatures", "reset", + "resize", "restart", "result", "rev", @@ -971,6 +1011,7 @@ pub static ATOMS: &'static [&'static str] = &[ "role", "root", "rotate", + "round", "rowalign", "rowlines", "rows", @@ -1036,6 +1077,7 @@ pub static ATOMS: &'static [&'static str] = &[ "speed", "spreadmethod", "spreadMethod", + "square", "src", "srcdoc", "standby", @@ -1097,6 +1139,7 @@ pub static ATOMS: &'static [&'static str] = &[ "tel", "tendsto", "text", + "text/plain", "text-align", "text-anchor", "text-decoration", @@ -1112,6 +1155,7 @@ pub static ATOMS: &'static [&'static str] = &[ "times", "to", "top", + "touchevent", "transform", "transition-delay", "transition-duration", @@ -1127,6 +1171,8 @@ pub static ATOMS: &'static [&'static str] = &[ "u", "u1", "u2", + "uievent", + "uievents", "ul", "underline-position", "underline-thickness", @@ -1140,6 +1186,7 @@ pub static ATOMS: &'static [&'static str] = &[ "url", "use", "usemap", + "UTF-8", "valign", "v-alphabetic", "value", From 73c022344ba818d672f01b49d77fa4d58a0ee115 Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Tue, 15 Dec 2015 13:29:18 -0600 Subject: [PATCH 020/212] Add non-mutating AsciiExt methods to Atom. We add the following methods from AsciiExt: ``` pub fn is_ascii(&self) -> bool; pub fn to_ascii_uppercase(&self) -> Atom; pub fn to_ascii_lowercase(&self) -> Atom; pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool; ``` We can't implement AsciiExt in full because it requires mutable access. --- src/atom/mod.rs | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index cc0a85e..dd01db3 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -17,6 +17,7 @@ use std::ops; use std::ptr; use std::slice; use std::str; +use std::ascii::AsciiExt; use std::cmp::Ordering::{self, Equal}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; @@ -294,6 +295,31 @@ impl Deserialize for Atom { } } +// AsciiExt requires mutating methods, so we just implement the non-mutating ones. +// We don't need to implement is_ascii because there's no performance improvement +// over the one from &str. +impl Atom { + pub fn to_ascii_uppercase(&self) -> Atom { + if self.chars().all(char::is_uppercase) { + self.clone() + } else { + Atom::from(&*((&**self).to_ascii_uppercase())) + } + } + + pub fn to_ascii_lowercase(&self) -> Atom { + if self.chars().all(char::is_lowercase) { + self.clone() + } else { + Atom::from(&*((&**self).to_ascii_lowercase())) + } + } + + pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { + (self == other) || (&**self).eq_ignore_ascii_case(&**other) + } +} + // Atoms use a compact representation which fits this enum in a single u64. // Inlining avoids actually constructing the unpacked representation in memory. #[allow(missing_copy_implementations)] @@ -655,4 +681,35 @@ mod tests { fn string_cache_entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } + + #[test] + fn test_ascii_lowercase() { + assert_eq!(Atom::from("").to_ascii_lowercase(), Atom::from("")); + assert_eq!(Atom::from("aZ9").to_ascii_lowercase(), Atom::from("az9")); + assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_lowercase(), Atom::from("the quick brown fox!")); + assert_eq!(Atom::from("JE VAIS À PARIS").to_ascii_lowercase(), Atom::from("je vais À paris")); + } + + #[test] + fn test_ascii_uppercase() { + assert_eq!(Atom::from("").to_ascii_uppercase(), Atom::from("")); + assert_eq!(Atom::from("aZ9").to_ascii_uppercase(), Atom::from("AZ9")); + assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_uppercase(), Atom::from("THE QUICK BROWN FOX!")); + assert_eq!(Atom::from("Je vais à Paris").to_ascii_uppercase(), Atom::from("JE VAIS à PARIS")); + } + + #[test] + fn test_eq_ignore_ascii_case() { + assert!(Atom::from("").eq_ignore_ascii_case(&Atom::from(""))); + assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("aZ9"))); + assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("Az9"))); + assert!(Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!"))); + assert!(Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("je VAIS à PARIS"))); + assert!(!Atom::from("").eq_ignore_ascii_case(&Atom::from("az9"))); + assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from(""))); + assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("9Za"))); + assert!(!Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!!"))); + assert!(!Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("JE vais À paris"))); + } + } From 02f022def51ccf6c0d9383e8468246bf4f1b2242 Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Tue, 15 Dec 2015 14:57:32 -0600 Subject: [PATCH 021/212] Version bump. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7a6e41b..7854557 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.4" +version = "0.2.5" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 4c331c6f4d8c7a2000b0d29045d7645975ad4ba6 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Tue, 2 Feb 2016 10:29:29 +0100 Subject: [PATCH 022/212] Allow heapsize 0.2 --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7854557..21e981c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.5" +version = "0.2.6" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -41,7 +41,7 @@ version = "0" optional = true [dependencies.heapsize] -version = "0.1.1" +version = ">=0.1.1, <0.3" optional = true [dependencies.heapsize_plugin] From a4abe871b56f7fef35af7bf9cdd10fd593fe4485 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Thu, 4 Feb 2016 01:30:56 +0100 Subject: [PATCH 023/212] Derive HeapSizeOf for QualName --- Cargo.toml | 2 +- src/namespace.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 21e981c..9c0f6fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.6" +version = "0.2.7" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/namespace.rs b/src/namespace.rs index 6fe0564..4b4d142 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -20,6 +20,7 @@ pub struct Namespace(pub Atom); /// A name with a namespace. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct QualName { pub ns: Namespace, pub local: Atom, From 4daa491cb9e369ec07ba32b75e8243983b6a8ee9 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Fri, 5 Feb 2016 17:10:52 -0500 Subject: [PATCH 024/212] Add preload to static list. --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index e97b51f..a169d2e 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -56,6 +56,7 @@ pub static ATOMS: &'static [&'static str] = &[ "param", "plaintext", "pre", + "preload", "rp", "rt", "script", From 2e5a0365babf7c2554c99c40551a80fadb40166d Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Fri, 5 Feb 2016 17:33:22 -0500 Subject: [PATCH 025/212] Version bump to 0.2.8 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9c0f6fc..2395fc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.7" +version = "0.2.8" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From d1e0142e857b496dcbb2ffe5d0248773afb8a443 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 10 Feb 2016 16:17:47 +0100 Subject: [PATCH 026/212] Allow heapsize 0.3 --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2395fc8..8e06f29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.8" +version = "0.2.9" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -41,7 +41,7 @@ version = "0" optional = true [dependencies.heapsize] -version = ">=0.1.1, <0.3" +version = ">=0.1.1, <0.4" optional = true [dependencies.heapsize_plugin] From 37c7ec607a8dd1d35ca92c661e810b5683823e1b Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 24 Feb 2016 02:07:45 +0100 Subject: [PATCH 027/212] Implement From for Atom --- src/atom/mod.rs | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index dd01db3..ab680a4 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -11,14 +11,15 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::ascii::AsciiExt; +use std::borrow::Cow; +use std::cmp::Ordering::{self, Equal}; use std::fmt; use std::mem; use std::ops; use std::ptr; use std::slice; use std::str; -use std::ascii::AsciiExt; -use std::cmp::Ordering::{self, Equal}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; @@ -53,13 +54,13 @@ struct StringCacheEntry { } impl StringCacheEntry { - fn new(next: Option>, hash: u64, string_to_add: &str) + fn new(next: Option>, hash: u64, string: String) -> StringCacheEntry { StringCacheEntry { next_in_bucket: next, hash: hash, ref_count: AtomicIsize::new(1), - string: String::from(string_to_add), + string: string, } } } @@ -71,14 +72,14 @@ impl StringCache { } } - fn add(&mut self, string_to_add: &str, hash: u64) -> *mut StringCacheEntry { + fn add(&mut self, string: Cow, hash: u64) -> *mut StringCacheEntry { let bucket_index = (hash & BUCKET_MASK) as usize; { let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); while let Some(entry) = ptr.take() { - if entry.hash == hash && entry.string == string_to_add { + if entry.hash == hash && entry.string == &*string { if entry.ref_count.fetch_add(1, SeqCst) > 0 { return &mut **entry; } @@ -94,11 +95,17 @@ impl StringCache { } } debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + let string = string.into_owned(); + let _string_clone = if cfg!(feature = "log-events") { + string.clone() + } else { + "".to_owned() + }; let mut entry = Box::new(StringCacheEntry::new( - self.buckets[bucket_index].take(), hash, string_to_add)); + self.buckets[bucket_index].take(), hash, string)); let ptr: *mut StringCacheEntry = &mut *entry; self.buckets[bucket_index] = Some(entry); - log!(Event::Insert(ptr as u64, String::from(string_to_add))); + log!(Event::Insert(ptr as u64, _string_clone)); ptr } @@ -148,10 +155,10 @@ impl Atom { } } -impl<'a> From<&'a str> for Atom { +impl<'a> From> for Atom { #[inline] - fn from(string_to_add: &str) -> Atom { - let unpacked = match STATIC_ATOM_SET.get_index_or_hash(string_to_add) { + fn from(string_to_add: Cow<'a, str>) -> Atom { + let unpacked = match STATIC_ATOM_SET.get_index_or_hash(&*string_to_add) { Ok(id) => Static(id as u32), Err(hash) => { let len = string_to_add.len(); @@ -171,6 +178,20 @@ impl<'a> From<&'a str> for Atom { } } +impl<'a> From<&'a str> for Atom { + #[inline] + fn from(string_to_add: &str) -> Atom { + Atom::from(Cow::Borrowed(string_to_add)) + } +} + +impl From for Atom { + #[inline] + fn from(string_to_add: String) -> Atom { + Atom::from(Cow::Owned(string_to_add)) + } +} + impl Clone for Atom { #[inline(always)] fn clone(&self) -> Atom { @@ -712,4 +733,8 @@ mod tests { assert!(!Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("JE vais À paris"))); } + #[test] + fn test_from_string() { + assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); + } } From 17279dd59910ba1412034364df2e512cd47c0b94 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 24 Feb 2016 10:58:17 +0100 Subject: [PATCH 028/212] Allow to retrieve the heap size of the dynamic string cache --- src/atom/mod.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index dd01db3..086181a 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -9,6 +9,9 @@ #![allow(non_upper_case_globals)] +#[cfg(feature = "heap_size")] +use heapsize::HeapSizeOf; + use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; @@ -37,14 +40,34 @@ macro_rules! log (($e:expr) => (())); const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; + struct StringCache { buckets: [Option>; NB_BUCKETS], } +#[cfg(feature = "heap_size")] +impl HeapSizeOf for StringCache { + fn heap_size_of_children(&self) -> usize { + self.buckets.iter().fold(0, |size, bucket| size + bucket.heap_size_of_children()) + } +} + lazy_static! { static ref STRING_CACHE: Mutex = Mutex::new(StringCache::new()); } +/// A token that represents the heap used by the dynamic string cache. +#[cfg(feature = "heap_size")] +pub struct StringCacheHeap; + +#[cfg(feature = "heap_size")] +impl HeapSizeOf for StringCacheHeap { + fn heap_size_of_children(&self) -> usize { + STRING_CACHE.lock().unwrap().heap_size_of_children() + } +} + +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] struct StringCacheEntry { next_in_bucket: Option>, hash: u64, From d053d557821fa62d9746f2cf47961a2331b36b4c Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 24 Feb 2016 14:49:41 +0100 Subject: [PATCH 029/212] Test on OS X through Travis --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 3c4848a..75b5bc3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,9 @@ rust: - nightly - beta - stable +os: + - linux + - osx script: - cargo build - cargo test From dda983bcf9ff4fb1f3523e43330b4606bb6c6b7e Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 24 Feb 2016 15:59:49 +0100 Subject: [PATCH 030/212] Bump to 0.2.10 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8e06f29..19c6bbc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.9" +version = "0.2.10" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 1466ce68d7a0b4082a529fa42be29fb74fa8fa65 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 27 Feb 2016 19:22:06 +0100 Subject: [PATCH 031/212] Update to Rust 2016-02-26 --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 19c6bbc..a5a966e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.10" +version = "0.2.11" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -29,7 +29,7 @@ heap_size = ["heapsize", "heapsize_plugin"] [dependencies] lazy_static = "0.1.10" -serde = "0.6" +serde = ">=0.6, <0.8" phf_shared = "0.7.4" debug_unreachable = "0.0.6" @@ -45,7 +45,7 @@ version = ">=0.1.1, <0.4" optional = true [dependencies.heapsize_plugin] -version = "0.1.1" +version = "0.1.4" optional = true [build-dependencies] From 805296783f926595e043a53167600165298adc18 Mon Sep 17 00:00:00 2001 From: Arnaud Marant Date: Sat, 9 Apr 2016 21:49:09 +0200 Subject: [PATCH 032/212] add dirname atom for HTMLInputElement attribute related to servo issue : https://github.com/servo/servo/issues/10491 --- Cargo.toml | 2 +- src/static_atom_list.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a5a966e..02b0425 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.11" +version = "0.2.12" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index a169d2e..ebb4965 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -379,6 +379,7 @@ pub static ATOMS: &'static [&'static str] = &[ "diffuseConstant", "dir", "direction", + "dirname", "disabled", "discard", "display", From dc892173fcb6367f813aa6538dc23edd1b631608 Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Tue, 12 Apr 2016 09:56:08 +0200 Subject: [PATCH 033/212] Define Atom::get_hash(). This is already used by rust-selectors for its Bloom filter, and is implemented there by accessing the data field directly. Also, in a Gecko-based Atom implementation, the implementation will need to be different, so it's better to have it here. --- src/atom/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 80dc68e..f4d087a 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -176,6 +176,10 @@ impl Atom { unsafe fn unpack(&self) -> UnpackedAtom { UnpackedAtom::from_packed(self.data) } + + pub fn get_hash(&self) -> u32 { + ((self.data >> 32) ^ self.data) as u32 + } } impl<'a> From> for Atom { From 6b9ffe9205ef611f07d85ecf646f7fb9d062ceb1 Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Tue, 12 Apr 2016 10:05:29 +0200 Subject: [PATCH 034/212] Bump version. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 02b0425..04860f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.12" +version = "0.2.13" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From f86afb6a88a4ec08faf3ec38acc5ad03d1d04e6c Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Thu, 14 Apr 2016 15:57:47 +0200 Subject: [PATCH 035/212] Update lazy_static. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 04860f4..292dbe9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ unstable = [] heap_size = ["heapsize", "heapsize_plugin"] [dependencies] -lazy_static = "0.1.10" +lazy_static = "0.2" serde = ">=0.6, <0.8" phf_shared = "0.7.4" debug_unreachable = "0.0.6" From 986df64f19d73033c61a6ed0b4304beef75bc50e Mon Sep 17 00:00:00 2001 From: Aidan Hobson Sayers Date: Wed, 27 Apr 2016 18:55:11 +0100 Subject: [PATCH 036/212] Fix event log example --- .travis.yml | 3 ++- examples/event-log/src/main.rs | 14 ++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 75b5bc3..e99d9a3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,7 @@ script: - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features heap_size; fi" - - "cd examples/summarize-events/ && cargo build" + - "cd examples/event-log/ && cargo build && cd ../.." + - "cd examples/summarize-events/ && cargo build && cd ../.." notifications: webhooks: http://build.servo.org:54856/travis diff --git a/examples/event-log/src/main.rs b/examples/event-log/src/main.rs index 6efff10..89adfdf 100644 --- a/examples/event-log/src/main.rs +++ b/examples/event-log/src/main.rs @@ -13,19 +13,21 @@ use string_cache::Atom; use string_cache::event; use std::io; +use std::io::prelude::*; fn main() { println!("Reading stdin to end of file"); - let stdin = io::stdin().read_to_string().unwrap(); + let mut stdin = String::new(); + io::stdin().read_to_string(&mut stdin).unwrap(); let mut atoms = vec![]; - for word in stdin.as_slice().split(|c: char| c.is_whitespace()) { - atoms.push(Atom::from_slice(word)); + for word in stdin.split(|c: char| c.is_whitespace()) { + atoms.push(Atom::from(word)); } - let log = event::LOG.lock(); + let log = event::LOG.lock().unwrap(); - println!("Created {:u} atoms, logged {:u} events:", atoms.len(), log.len()); + println!("Created {} atoms, logged {} events:", atoms.len(), log.len()); for e in log.iter() { - println!("{}", e); + println!("{:?}", e); } } From 7fd2514e56612e4319090224bc3791e5866b46a3 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Mon, 2 May 2016 09:32:12 -0700 Subject: [PATCH 037/212] Update/specify dependency versions --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 292dbe9..98fe44d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,13 +31,13 @@ heap_size = ["heapsize", "heapsize_plugin"] lazy_static = "0.2" serde = ">=0.6, <0.8" phf_shared = "0.7.4" -debug_unreachable = "0.0.6" +debug_unreachable = "0.1.1" [dev-dependencies] -rand = "0" +rand = "0.3" [dependencies.rustc-serialize] -version = "0" +version = "0.3" optional = true [dependencies.heapsize] From 1a7316db9160e5bc1423687bb252c3e72819f313 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Mon, 2 May 2016 11:36:05 -0700 Subject: [PATCH 038/212] Release version 0.2.14 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 98fe44d..93c620e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.13" +version = "0.2.14" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 478c24c360947b93ae77abf535f6d9b39e0f0e3f Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Wed, 11 May 2016 16:18:46 +0200 Subject: [PATCH 039/212] Implement comparison between &Atom and &str. --- Cargo.toml | 2 +- src/atom/mod.rs | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 93c620e..1aa67e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.14" +version = "0.2.15" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index f4d087a..2136a8b 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -182,6 +182,18 @@ impl Atom { } } +impl PartialEq for Atom { + fn eq(&self, other: &str) -> bool { + &self[..] == other + } +} + +impl PartialEq for str { + fn eq(&self, other: &Atom) -> bool { + self == &other[..] + } +} + impl<'a> From> for Atom { #[inline] fn from(string_to_add: Cow<'a, str>) -> Atom { From 2acc85255007c76c5cbb5a597504e6ce34775cb9 Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Tue, 17 May 2016 15:01:23 -0700 Subject: [PATCH 040/212] Add font families to the static atoms. --- src/static_atom_list.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index ebb4965..00433fa 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -103,6 +103,13 @@ pub static ATOMS: &'static [&'static str] = &[ "Netscape", "Win32", + // Font families + "serif", + "sans-serif", + "cursive", + "fantasy", + "monospace", + "abbr", "abort", "abs", @@ -1054,7 +1061,6 @@ pub static ATOMS: &'static [&'static str] = &[ "sep", "separator", "separators", - "serif", "set", "setdiff", "shape", From c5d1e4ec9275f24c944d1d166eafa2c00fd4c2f0 Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Fri, 13 May 2016 19:49:05 -0700 Subject: [PATCH 041/212] Implement trivial BorrowedAtom, BorrowedNamespace, with_str, and eq_str_ignore_ascii_case. --- Cargo.toml | 2 +- src/atom/mod.rs | 26 +++++++++++++++++++++++++- src/lib.rs | 4 ++-- src/namespace.rs | 16 ++++++++++++++++ 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1aa67e4..4e7135e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.15" +version = "0.2.16" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 2136a8b..23ea24e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -171,6 +171,21 @@ pub struct Atom { pub data: u64, } +pub struct BorrowedAtom<'a>(pub &'a Atom); + +impl<'a> ops::Deref for BorrowedAtom<'a> { + type Target = Atom; + fn deref(&self) -> &Atom { + self.0 + } +} + +impl<'a> PartialEq for BorrowedAtom<'a> { + fn eq(&self, other: &Atom) -> bool { + self.0 == other + } +} + impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { @@ -180,6 +195,11 @@ impl Atom { pub fn get_hash(&self) -> u32 { ((self.data >> 32) ^ self.data) as u32 } + + pub fn with_str(&self, cb: F) -> Output + where F: FnOnce(&str) -> Output { + cb(self) + } } impl PartialEq for Atom { @@ -376,7 +396,11 @@ impl Atom { } pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { - (self == other) || (&**self).eq_ignore_ascii_case(&**other) + (self == other) || self.eq_str_ignore_ascii_case(&**other) + } + + pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool { + (&**self).eq_ignore_ascii_case(other) } } diff --git a/src/lib.rs b/src/lib.rs index 65ad039..7c5bdac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,8 +25,8 @@ extern crate serde; extern crate phf_shared; -pub use atom::Atom; -pub use namespace::{Namespace, QualName}; +pub use atom::{Atom, BorrowedAtom}; +pub use namespace::{BorrowedNamespace, Namespace, QualName}; #[macro_export] macro_rules! qualname { diff --git a/src/namespace.rs b/src/namespace.rs index 4b4d142..cae98dd 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -10,6 +10,7 @@ //! **Note:** This may move as string-cache becomes less Web-specific. use atom::Atom; +use std::ops; /// An atom that is meant to represent a namespace in the HTML / XML sense. /// Whether a given string represents a namespace is contextual, so this is @@ -18,6 +19,21 @@ use atom::Atom; #[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct Namespace(pub Atom); +pub struct BorrowedNamespace<'a>(pub &'a Namespace); + +impl<'a> ops::Deref for BorrowedNamespace<'a> { + type Target = Namespace; + fn deref(&self) -> &Namespace { + self.0 + } +} + +impl<'a> PartialEq for BorrowedNamespace<'a> { + fn eq(&self, other: &Namespace) -> bool { + self.0 == other + } +} + /// A name with a namespace. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] #[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] From e43cf3d5774d5e55328776cf7449df4a07466236 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 21 May 2016 18:45:51 -0400 Subject: [PATCH 042/212] Add nonce to the atom list --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 00433fa..c45ebb8 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -49,6 +49,7 @@ pub static ATOMS: &'static [&'static str] = &[ "marquee", "meta", "noframes", + "nonce", "noscript", "object", "optgroup", From 90c0c4e58dccca61f8b5b8cd8d5149d604b126e1 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 21 May 2016 18:46:13 -0400 Subject: [PATCH 043/212] Bump version to 0.2.17 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4e7135e..bd1d435 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.16" +version = "0.2.17" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From beb496756067f66e47f3c5a14db2d399fbc66270 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Tue, 24 May 2016 12:10:05 +0530 Subject: [PATCH 044/212] add onstatechange to static_atom_list --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index c45ebb8..84ff334 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -892,6 +892,7 @@ pub static ATOMS: &'static [&'static str] = &[ "onscroll", "onselect", "onselectstart", + "onstatechange", "onstart", "onstop", "onstorage", From 97b55762a83226b2640db53dcc385fbda8659874 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Tue, 24 May 2016 11:03:24 +0200 Subject: [PATCH 045/212] Update to 0.2.18 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bd1d435..bc0e534 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.17" +version = "0.2.18" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 20a2ca86b0298f2bd094dbec8853eb2ccc8e0bf2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 26 May 2016 15:39:08 +0200 Subject: [PATCH 046/212] Rename Atom::data to unsafe_data and hide it in docs. --- build.rs | 2 +- examples/summarize-events/src/main.rs | 2 +- src/atom/mod.rs | 33 ++++++++++++++------------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/build.rs b/build.rs index 7571868..65358f6 100644 --- a/build.rs +++ b/build.rs @@ -61,7 +61,7 @@ fn write_atom_macro(hash_state: &phf_generator::HashState) { writeln!(file, r"macro_rules! atom {{").unwrap(); for &s in set.iter() { let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); - writeln!(file, r"({:?}) => {{ $crate::Atom {{ data: 0x{:x} }} }};", s, data).unwrap(); + writeln!(file, r"({:?}) => {{ $crate::Atom {{ unsafe_data: 0x{:x} }} }};", s, data).unwrap(); } writeln!(file, r"}}").unwrap(); } diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index 1b1aa64..8a44389 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -88,7 +88,7 @@ fn main() { // FIXME: We really shouldn't be allowed to do this. It's a memory-safety // hazard; the field is only public for the atom!() macro. - _ => Atom { data: ev.id }.to_string(), + _ => Atom { unsafe_data: ev.id }.to_string(), }; match summary.entry(string) { diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 23ea24e..5ccf8a6 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -168,7 +168,8 @@ impl StringCache { pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. - pub data: u64, + #[doc(hidden)] + pub unsafe_data: u64, } pub struct BorrowedAtom<'a>(pub &'a Atom); @@ -189,11 +190,11 @@ impl<'a> PartialEq for BorrowedAtom<'a> { impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { - UnpackedAtom::from_packed(self.data) + UnpackedAtom::from_packed(self.unsafe_data) } pub fn get_hash(&self) -> u32 { - ((self.data >> 32) ^ self.data) as u32 + ((self.unsafe_data >> 32) ^ self.unsafe_data) as u32 } pub fn with_str(&self, cb: F) -> Output @@ -233,7 +234,7 @@ impl<'a> From> for Atom { let data = unsafe { unpacked.pack() }; log!(Event::Intern(data)); - Atom { data: data } + Atom { unsafe_data: data } } } @@ -255,7 +256,7 @@ impl Clone for Atom { #[inline(always)] fn clone(&self) -> Atom { unsafe { - match from_packed_dynamic(self.data) { + match from_packed_dynamic(self.unsafe_data) { Some(entry) => { let entry = entry as *mut StringCacheEntry; (*entry).ref_count.fetch_add(1, SeqCst); @@ -264,7 +265,7 @@ impl Clone for Atom { } } Atom { - data: self.data + unsafe_data: self.unsafe_data } } } @@ -274,11 +275,11 @@ impl Drop for Atom { fn drop(&mut self) { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - STRING_CACHE.lock().unwrap().remove(this.data); + STRING_CACHE.lock().unwrap().remove(this.unsafe_data); } unsafe { - match from_packed_dynamic(self.data) { + match from_packed_dynamic(self.unsafe_data) { Some(entry) => { let entry = entry as *mut StringCacheEntry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { @@ -300,7 +301,7 @@ impl ops::Deref for Atom { unsafe { match self.unpack() { Inline(..) => { - let buf = inline_orig_bytes(&self.data); + let buf = inline_orig_bytes(&self.unsafe_data); str::from_utf8(buf).unwrap() }, Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), @@ -338,7 +339,7 @@ impl fmt::Debug for Atom { impl PartialOrd for Atom { #[inline] fn partial_cmp(&self, other: &Atom) -> Option { - if self.data == other.data { + if self.unsafe_data == other.unsafe_data { return Some(Equal); } self.as_ref().partial_cmp(other.as_ref()) @@ -348,7 +349,7 @@ impl PartialOrd for Atom { impl Ord for Atom { #[inline] fn cmp(&self, other: &Atom) -> Ordering { - if self.data == other.data { + if self.unsafe_data == other.unsafe_data { return Equal; } self.as_ref().cmp(other.as_ref()) @@ -662,14 +663,14 @@ mod tests { #[test] fn repr() { fn check(s: &str, data: u64) { - assert_eq_fmt!("0x{:016X}", Atom::from(s).data, data); + assert_eq_fmt!("0x{:016X}", Atom::from(s).unsafe_data, data); } fn check_static(s: &str, x: Atom) { - assert_eq_fmt!("0x{:016X}", x.data, Atom::from(s).data); - assert_eq!(0x2, x.data & 0xFFFF_FFFF); + assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); + assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); // The index is unspecified by phf. - assert!((x.data >> 32) <= STATIC_ATOM_SET.iter().len() as u64); + assert!((x.unsafe_data >> 32) <= STATIC_ATOM_SET.iter().len() as u64); } // This test is here to make sure we don't change atom representation @@ -687,7 +688,7 @@ mod tests { check("xyzzy01", 0x3130_797A_7A79_7871); // Dynamic atoms. This is a pointer so we can't verify every bit. - assert_eq!(0x00, Atom::from("a dynamic string").data & 0xf); + assert_eq!(0x00, Atom::from("a dynamic string").unsafe_data & 0xf); } #[test] From 3175848b066668f6e37257084bea3578b99baa30 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 27 May 2016 23:10:58 +0200 Subject: [PATCH 047/212] Store Box instead of String --- Cargo.toml | 2 +- src/atom/mod.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bc0e534..2f248e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.18" +version = "0.2.19" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 5ccf8a6..89beb4e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -73,7 +73,7 @@ struct StringCacheEntry { next_in_bucket: Option>, hash: u64, ref_count: AtomicIsize, - string: String, + string: Box, } impl StringCacheEntry { @@ -83,7 +83,7 @@ impl StringCacheEntry { next_in_bucket: next, hash: hash, ref_count: AtomicIsize::new(1), - string: string, + string: string.into_boxed_str(), } } } @@ -102,7 +102,7 @@ impl StringCache { self.buckets[bucket_index].as_mut(); while let Some(entry) = ptr.take() { - if entry.hash == hash && entry.string == &*string { + if entry.hash == hash && &*entry.string == &*string { if entry.ref_count.fetch_add(1, SeqCst) > 0 { return &mut **entry; } @@ -696,7 +696,7 @@ mod tests { // Guard against accidental changes to the sizes of things. use std::mem; assert_eq!(if cfg!(feature = "unstable") { 8 } else { 16 }, mem::size_of::()); - assert_eq!(48, mem::size_of::()); + assert_eq!(40, mem::size_of::()); } #[test] From ede5dceb8f394ee501b7caea77ebf3a6f1f82af1 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 18 Jun 2016 23:49:55 +0800 Subject: [PATCH 048/212] Add CORS settings and referrer policy atoms to the list --- src/static_atom_list.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 84ff334..5ceff3a 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -154,6 +154,7 @@ pub static ATOMS: &'static [&'static str] = &[ "animation", "annotation", "annotation-xml", + "anonymous", "apply", "approx", "arabic-form", @@ -784,6 +785,8 @@ pub static ATOMS: &'static [&'static str] = &[ "nest", "nextid", "no message", + "no-referrer", + "no-referrer-when-downgrade", "nobr", "noembed", "nohref", @@ -908,6 +911,7 @@ pub static ATOMS: &'static [&'static str] = &[ "orient", "orientation", "origin", + "origin-when-cross-origin", "other", "otherwise", "outerproduct", @@ -1191,10 +1195,12 @@ pub static ATOMS: &'static [&'static str] = &[ "unicode-range", "union", "units-per-em", + "unsafe-url", "unselectable", "uplimit", "url", "use", + "use-credentials", "usemap", "UTF-8", "valign", From 77dc9ccb742b59e3de52d1f30676d593d1554def Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 18 Jun 2016 23:50:34 +0800 Subject: [PATCH 049/212] Bump version to 0.2.20 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2f248e3..fb6b579 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.19" +version = "0.2.20" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 09f955fd7608cf7beca1b4c5068b347b65e082f5 Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Tue, 5 Jul 2016 13:28:49 +0200 Subject: [PATCH 050/212] Add some more static atoms. --- src/static_atom_list.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 5ceff3a..ec91b3f 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -134,6 +134,7 @@ pub static ATOMS: &'static [&'static str] = &[ "alink", "alphabetic", "alt", + "alternate", "altglyph", "altGlyph", "altglyphdef", @@ -155,6 +156,7 @@ pub static ATOMS: &'static [&'static str] = &[ "annotation", "annotation-xml", "anonymous", + "apple-touch-icon", "apply", "approx", "arabic-form", @@ -1123,6 +1125,7 @@ pub static ATOMS: &'static [&'static str] = &[ "stroke-opacity", "stroke-width", "strong", + "stylesheet", "sub", "submit", "subscriptshift", From bb130c31f73388e2564d91bfb431ce3812a6693c Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Tue, 5 Jul 2016 13:48:24 +0200 Subject: [PATCH 051/212] Bump to 0.2.21. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fb6b579..57c8cf8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.20" +version = "0.2.21" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From d19558f0afe0162ca289949ebe3694d9bc609ac4 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 22 Jul 2016 03:41:24 +0200 Subject: [PATCH 052/212] Implement Default and PartialEq. --- Cargo.toml | 2 +- src/atom/mod.rs | 6 ++++++ src/namespace.rs | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 57c8cf8..0ddc506 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.21" +version = "0.2.22" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 89beb4e..fd0a58e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -215,6 +215,12 @@ impl PartialEq for str { } } +impl PartialEq for Atom { + fn eq(&self, other: &String) -> bool { + &self[..] == &other[..] + } +} + impl<'a> From> for Atom { #[inline] fn from(string_to_add: Cow<'a, str>) -> Atom { diff --git a/src/namespace.rs b/src/namespace.rs index cae98dd..fe2cbae 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -19,6 +19,12 @@ use std::ops; #[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct Namespace(pub Atom); +impl Default for Namespace { + fn default() -> Self { + ns!() + } +} + pub struct BorrowedNamespace<'a>(pub &'a Namespace); impl<'a> ops::Deref for BorrowedNamespace<'a> { From 91314e0e656e2eb4ea5c2b8526dafb148c47e2dc Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 22 Jul 2016 16:08:26 +0200 Subject: [PATCH 053/212] Implement Default for Atom --- src/atom/mod.rs | 6 ++++++ src/namespace.rs | 8 +------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index fd0a58e..4878320 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -203,6 +203,12 @@ impl Atom { } } +impl Default for Atom { + fn default() -> Self { + atom!("") + } +} + impl PartialEq for Atom { fn eq(&self, other: &str) -> bool { &self[..] == other diff --git a/src/namespace.rs b/src/namespace.rs index fe2cbae..9ae836b 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -15,16 +15,10 @@ use std::ops; /// An atom that is meant to represent a namespace in the HTML / XML sense. /// Whether a given string represents a namespace is contextual, so this is /// a transparent wrapper that will not catch all mistakes. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone, Default)] #[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct Namespace(pub Atom); -impl Default for Namespace { - fn default() -> Self { - ns!() - } -} - pub struct BorrowedNamespace<'a>(pub &'a Namespace); impl<'a> ops::Deref for BorrowedNamespace<'a> { From 31dea0b1d1fa86657b321f9089cacc637cc5ea57 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 30 Jul 2016 15:26:44 +0200 Subject: [PATCH 054/212] Allow serde 0.8 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0ddc506..2608b83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ heap_size = ["heapsize", "heapsize_plugin"] [dependencies] lazy_static = "0.2" -serde = ">=0.6, <0.8" +serde = ">=0.6, <0.9" phf_shared = "0.7.4" debug_unreachable = "0.1.1" From ba9bde9c2d3c91a3b6a3319cfc84d1487d425b01 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 30 Jul 2016 15:30:39 +0200 Subject: [PATCH 055/212] Remove use of heapsize_plugin --- .travis.yml | 2 +- Cargo.toml | 6 +----- src/atom/mod.rs | 21 +++++++++++++++------ src/lib.rs | 4 +--- src/namespace.rs | 8 ++++++-- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/.travis.yml b/.travis.yml index e99d9a3..485a91f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ script: - cargo test - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features heap_size; fi" + - cargo test --features heapsize - "cd examples/event-log/ && cargo build && cd ../.." - "cd examples/summarize-events/ && cargo build && cd ../.." notifications: diff --git a/Cargo.toml b/Cargo.toml index 2608b83..3bb0269 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ log-events = ["rustc-serialize"] unstable = [] # HeapSizeOf support -heap_size = ["heapsize", "heapsize_plugin"] +heap_size = ["heapsize"] [dependencies] lazy_static = "0.2" @@ -44,10 +44,6 @@ optional = true version = ">=0.1.1, <0.4" optional = true -[dependencies.heapsize_plugin] -version = "0.1.4" -optional = true - [build-dependencies] phf_generator = "0.7.4" phf_shared = "0.7.4" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 4878320..85a6402 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -9,7 +9,7 @@ #![allow(non_upper_case_globals)] -#[cfg(feature = "heap_size")] +#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -46,7 +46,7 @@ struct StringCache { buckets: [Option>; NB_BUCKETS], } -#[cfg(feature = "heap_size")] +#[cfg(feature = "heapsize")] impl HeapSizeOf for StringCache { fn heap_size_of_children(&self) -> usize { self.buckets.iter().fold(0, |size, bucket| size + bucket.heap_size_of_children()) @@ -58,17 +58,16 @@ lazy_static! { } /// A token that represents the heap used by the dynamic string cache. -#[cfg(feature = "heap_size")] +#[cfg(feature = "heapsize")] pub struct StringCacheHeap; -#[cfg(feature = "heap_size")] +#[cfg(feature = "heapsize")] impl HeapSizeOf for StringCacheHeap { fn heap_size_of_children(&self) -> usize { STRING_CACHE.lock().unwrap().heap_size_of_children() } } -#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] struct StringCacheEntry { next_in_bucket: Option>, hash: u64, @@ -76,6 +75,14 @@ struct StringCacheEntry { string: Box, } +#[cfg(feature = "heapsize")] +impl HeapSizeOf for StringCacheEntry { + fn heap_size_of_children(&self) -> usize { + self.next_in_bucket.heap_size_of_children() + + self.string.heap_size_of_children() + } +} + impl StringCacheEntry { fn new(next: Option>, hash: u64, string: String) -> StringCacheEntry { @@ -163,7 +170,6 @@ impl StringCache { // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. #[cfg_attr(feature = "unstable", unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent -#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] #[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -172,6 +178,9 @@ pub struct Atom { pub unsafe_data: u64, } +#[cfg(feature = "heapsize")] +known_heap_size!(0, Atom); + pub struct BorrowedAtom<'a>(pub &'a Atom); impl<'a> ops::Deref for BorrowedAtom<'a> { diff --git a/src/lib.rs b/src/lib.rs index 7c5bdac..471207b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,12 +13,10 @@ #![cfg_attr(test, deny(warnings))] #![cfg_attr(all(test, feature = "unstable"), feature(test, filling_drop))] #![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag))] -#![cfg_attr(feature = "heap_size", feature(plugin, custom_derive))] -#![cfg_attr(feature = "heap_size", plugin(heapsize_plugin))] #[cfg(all(test, feature = "unstable"))] extern crate test; #[cfg(feature = "log-events")] extern crate rustc_serialize; -#[cfg(feature = "heap_size")] extern crate heapsize; +#[cfg(feature = "heapsize")] #[macro_use] extern crate heapsize; #[cfg(test)] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; diff --git a/src/namespace.rs b/src/namespace.rs index 9ae836b..6af1557 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -16,9 +16,11 @@ use std::ops; /// Whether a given string represents a namespace is contextual, so this is /// a transparent wrapper that will not catch all mistakes. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone, Default)] -#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct Namespace(pub Atom); +#[cfg(feature = "heapsize")] +known_heap_size!(0, Namespace); + pub struct BorrowedNamespace<'a>(pub &'a Namespace); impl<'a> ops::Deref for BorrowedNamespace<'a> { @@ -36,12 +38,14 @@ impl<'a> PartialEq for BorrowedNamespace<'a> { /// A name with a namespace. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] -#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct QualName { pub ns: Namespace, pub local: Atom, } +#[cfg(feature = "heapsize")] +known_heap_size!(0, QualName); + impl QualName { #[inline] pub fn new(ns: Namespace, local: Atom) -> QualName { From cc945ed2d8f45f50b8b70bf01d5937cd38fc5878 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 30 Jul 2016 15:33:08 +0200 Subject: [PATCH 056/212] Enable doctests --- Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3bb0269..3954c45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,9 +12,6 @@ build = "build.rs" [lib] name = "string_cache" -# https://github.com/rust-lang/cargo/issues/1512 -doctest = false - [features] # Enable event logging for generating benchmark traces. From 07abb7b51ad97f69f6509dd65699d23775510236 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 30 Jul 2016 15:31:05 +0200 Subject: [PATCH 057/212] Bump version to 0.2.23 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3954c45..9b5f882 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.22" +version = "0.2.23" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 9abce7858a7d9d231cc236216297e7b5c0628331 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 18 Aug 2016 14:03:52 +0200 Subject: [PATCH 058/212] Display for Namespace --- Cargo.toml | 2 +- src/namespace.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9b5f882..e29218d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.23" +version = "0.2.24" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/namespace.rs b/src/namespace.rs index 6af1557..12bd718 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -10,6 +10,7 @@ //! **Note:** This may move as string-cache becomes less Web-specific. use atom::Atom; +use std::fmt; use std::ops; /// An atom that is meant to represent a namespace in the HTML / XML sense. @@ -36,6 +37,13 @@ impl<'a> PartialEq for BorrowedNamespace<'a> { } } +impl fmt::Display for Namespace { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(&self.0, f) + } +} + /// A name with a namespace. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] pub struct QualName { From 4c0ee074c66671cc5cdc1b883b4d8f449c53f4c2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 26 Aug 2016 14:33:24 +0200 Subject: [PATCH 059/212] Drop flags are dead, long live MIR! --- Cargo.toml | 2 +- src/atom/mod.rs | 24 +++++++++--------------- src/lib.rs | 3 +-- 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e29218d..5d707cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.24" +version = "0.2.25" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 85a6402..e3f6b8b 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -169,7 +169,6 @@ impl StringCache { // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. -#[cfg_attr(feature = "unstable", unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent #[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -714,9 +713,16 @@ mod tests { #[test] fn assert_sizes() { - // Guard against accidental changes to the sizes of things. use std::mem; - assert_eq!(if cfg!(feature = "unstable") { 8 } else { 16 }, mem::size_of::()); + struct EmptyWithDrop; + impl Drop for EmptyWithDrop { + fn drop(&mut self) {} + } + let compiler_uses_inline_drop_flags = mem::size_of::() > 0; + + // Guard against accidental changes to the sizes of things. + assert_eq!(mem::size_of::(), + if compiler_uses_inline_drop_flags { 16 } else { 8 }); assert_eq!(40, mem::size_of::()); } @@ -771,18 +777,6 @@ mod tests { let _: &str = atom.as_ref(); } - /// Atom uses #[unsafe_no_drop_flag] to stay small, so drop() may be called more than once. - /// In calls after the first one, the atom will be filled with a POST_DROP value. - /// drop() must be a no-op in this case. - #[cfg(feature = "unstable")] - #[test] - fn atom_drop_is_idempotent() { - use super::from_packed_dynamic; - unsafe { - assert_eq!(from_packed_dynamic(mem::POST_DROP_U64), None); - } - } - #[test] fn string_cache_entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); diff --git a/src/lib.rs b/src/lib.rs index 471207b..b466a77 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,8 +11,7 @@ #![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] -#![cfg_attr(all(test, feature = "unstable"), feature(test, filling_drop))] -#![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag))] +#![cfg_attr(all(test, feature = "unstable"), feature(test))] #[cfg(all(test, feature = "unstable"))] extern crate test; #[cfg(feature = "log-events")] extern crate rustc_serialize; From 38eb8f31d728c984c6a2911045931aa1c0ff4baa Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Fri, 26 Aug 2016 14:46:17 -0700 Subject: [PATCH 060/212] Add transitionend atoms to the static list --- Cargo.toml | 2 +- src/static_atom_list.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5d707cd..bfec76e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.25" +version = "0.2.26" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index ec91b3f..7f06a4a 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -902,6 +902,7 @@ pub static ATOMS: &'static [&'static str] = &[ "onstop", "onstorage", "onsubmit", + "ontransitionend", "onunload", "onzoom", "opacity", @@ -1177,8 +1178,9 @@ pub static ATOMS: &'static [&'static str] = &[ "transition-delay", "transition-duration", "transition-property", - "transitions", "transition-timing-function", + "transitionend", + "transitions", "transpose", "tref", "true", From a9e439509a81c0e669b1da5cc6c02d4b0d588939 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 3 Sep 2016 14:47:57 -0700 Subject: [PATCH 061/212] Add missing animatable property names --- src/static_atom_list.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 7f06a4a..57561e5 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -277,6 +277,7 @@ pub static ATOMS: &'static [&'static str] = &[ "border-top-width", "border-width", "bottom", + "box-shadow", "butt", "bvar", "by", @@ -501,6 +502,8 @@ pub static ATOMS: &'static [&'static str] = &[ "filterRes", "filterunits", "filterUnits", + "flex-grow", + "flex-shrink", "float", "flood", "flood-color", @@ -920,6 +923,7 @@ pub static ATOMS: &'static [&'static str] = &[ "outerproduct", "outline", "outline-color", + "outline-offset", "outline-style", "outline-width", "output", @@ -948,6 +952,7 @@ pub static ATOMS: &'static [&'static str] = &[ "patternTransform", "patternunits", "patternUnits", + "perspective", "pi", "piece", "piecewise", @@ -1157,16 +1162,18 @@ pub static ATOMS: &'static [&'static str] = &[ "tel", "tendsto", "text", - "text/plain", "text-align", "text-anchor", "text-decoration", + "text-indent", + "text-orientation", + "text-rendering", + "text-shadow", + "text/plain", "textlength", "textLength", - "text-orientation", "textpath", "textPath", - "text-rendering", "thickmathspace", "thinmathspace", "time", @@ -1175,6 +1182,7 @@ pub static ATOMS: &'static [&'static str] = &[ "top", "touchevent", "transform", + "transform-origin", "transition-delay", "transition-duration", "transition-property", @@ -1277,6 +1285,7 @@ pub static ATOMS: &'static [&'static str] = &[ "ychannelselector", "yChannelSelector", "z", + "z-index", "zoomandpan", "zoomAndPan", ]; From a64b0611633f3898e129b4bf0241aa0878450adb Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 3 Sep 2016 14:48:15 -0700 Subject: [PATCH 062/212] Version bump to 0.2.27 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bfec76e..dc0d94e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.26" +version = "0.2.27" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 5fb7cef6c6e92cdb9eeb3dce620d7a2b06c7a180 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Fri, 16 Sep 2016 20:20:36 +0530 Subject: [PATCH 063/212] Add activate string --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 57561e5..2519838 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -124,6 +124,7 @@ pub static ATOMS: &'static [&'static str] = &[ "acronym", "action", "actiontype", + "activate", "active", "actuate", "additive", From ca1bbf653b310b1fc6f90cb10a5fdefd180eb525 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Fri, 16 Sep 2016 10:28:13 -0700 Subject: [PATCH 064/212] Version bump to 0.2.28 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index dc0d94e..4595af5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.27" +version = "0.2.28" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 952a6b4ab78e7ccee85ad8768bfa58c3a2db85f0 Mon Sep 17 00:00:00 2001 From: Taryn Hill Date: Sun, 18 Sep 2016 22:10:22 -0500 Subject: [PATCH 065/212] Add minlength to static_atom_list Bump to 0.2.29 --- Cargo.toml | 2 +- src/static_atom_list.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4595af5..f7d56bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.28" +version = "0.2.29" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 2519838..c016768 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -740,6 +740,7 @@ pub static ATOMS: &'static [&'static str] = &[ "mi", "min", "min-height", + "minlength", "minsize", "minus", "min-width", From 948c453ce3e3ff2caac0cb793a3ff1e4f1180a30 Mon Sep 17 00:00:00 2001 From: "Ying-Ruei Liang(KK)" Date: Fri, 23 Sep 2016 14:58:32 +0800 Subject: [PATCH 066/212] Add "referrerpolicy" to static_atom_list --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index c016768..f90ca6a 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -1004,6 +1004,7 @@ pub static ATOMS: &'static [&'static str] = &[ "real", "reals", "rect", + "referrerpolicy", "refx", "refX", "refy", From de67df7eb667652fcbee1c06f21242b70b32a414 Mon Sep 17 00:00:00 2001 From: "Ying-Ruei Liang(KK)" Date: Fri, 23 Sep 2016 21:04:02 +0800 Subject: [PATCH 067/212] Version bump to 0.2.30 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f7d56bf..0ece410 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.29" +version = "0.2.30" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From c3229388c8cb046c1fadf255aed3b1616aebeb98 Mon Sep 17 00:00:00 2001 From: Aidan Hobson Sayers Date: Fri, 21 Oct 2016 18:48:40 +0100 Subject: [PATCH 068/212] Make cmp massively faster for inline atoms --- src/atom/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index e3f6b8b..0182242 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -322,7 +322,7 @@ impl ops::Deref for Atom { match self.unpack() { Inline(..) => { let buf = inline_orig_bytes(&self.unsafe_data); - str::from_utf8(buf).unwrap() + str::from_utf8_unchecked(buf) }, Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), Dynamic(entry) => { From 40d8629f293c24cbbd9a3289fe796496b0b2e970 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 21:22:00 +0200 Subject: [PATCH 069/212] Make tests pass on a 32-bit system. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Testable on 64-bit Linux with: ``` rustup target add i686-unknown-linux-gnu cargo test --target i686-unknown-linux-gnu ``` (or similarly on anther 64-bit platforms), assuming a linker and libc for this target are available on the system. Leaving #162 open to add CI for this. (Unfortunately Travis doesn’t use rustup out of the box.) --- src/atom/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index e3f6b8b..6ff7e5f 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -723,7 +723,8 @@ mod tests { // Guard against accidental changes to the sizes of things. assert_eq!(mem::size_of::(), if compiler_uses_inline_drop_flags { 16 } else { 8 }); - assert_eq!(40, mem::size_of::()); + assert_eq!(mem::size_of::(), + 8 + 4 * mem::size_of::()); } #[test] From 90bdc783146149924c3f5ba0b5d3f9d68803f74d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 24 Oct 2016 17:59:53 +0200 Subject: [PATCH 070/212] Use more compact TOML syntax. --- Cargo.toml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0ece410..8ec53bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,18 +29,12 @@ lazy_static = "0.2" serde = ">=0.6, <0.9" phf_shared = "0.7.4" debug_unreachable = "0.1.1" +rustc-serialize = { version = "0.3", optional = true } +heapsize = { version = ">=0.1.1, <0.4", optional = true } [dev-dependencies] rand = "0.3" -[dependencies.rustc-serialize] -version = "0.3" -optional = true - -[dependencies.heapsize] -version = ">=0.1.1, <0.4" -optional = true - [build-dependencies] phf_generator = "0.7.4" phf_shared = "0.7.4" From 85816177ce9e2eec2c294d0c22aa630eead6e3bc Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 17:01:08 +0200 Subject: [PATCH 071/212] Breaking changes are coming. --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8ec53bc..c896ee7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.30" +version = "0.3.0" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -26,11 +26,11 @@ heap_size = ["heapsize"] [dependencies] lazy_static = "0.2" -serde = ">=0.6, <0.9" +serde = "0.8" phf_shared = "0.7.4" debug_unreachable = "0.1.1" rustc-serialize = { version = "0.3", optional = true } -heapsize = { version = ">=0.1.1, <0.4", optional = true } +heapsize = { version = "0.3", optional = true } [dev-dependencies] rand = "0.3" From 0c82b2c704b7b742e8844559269a8d62bc5a9455 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 24 Oct 2016 18:01:35 +0200 Subject: [PATCH 072/212] Remove BorrowedAtom. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit selectors doesn’t use it anymore. --- src/atom/mod.rs | 15 --------------- src/lib.rs | 4 ++-- src/namespace.rs | 16 ---------------- 3 files changed, 2 insertions(+), 33 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 0e8f7ef..6749db7 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -180,21 +180,6 @@ pub struct Atom { #[cfg(feature = "heapsize")] known_heap_size!(0, Atom); -pub struct BorrowedAtom<'a>(pub &'a Atom); - -impl<'a> ops::Deref for BorrowedAtom<'a> { - type Target = Atom; - fn deref(&self) -> &Atom { - self.0 - } -} - -impl<'a> PartialEq for BorrowedAtom<'a> { - fn eq(&self, other: &Atom) -> bool { - self.0 == other - } -} - impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { diff --git a/src/lib.rs b/src/lib.rs index b466a77..f6390c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,8 +22,8 @@ extern crate serde; extern crate phf_shared; -pub use atom::{Atom, BorrowedAtom}; -pub use namespace::{BorrowedNamespace, Namespace, QualName}; +pub use atom::Atom; +pub use namespace::{Namespace, QualName}; #[macro_export] macro_rules! qualname { diff --git a/src/namespace.rs b/src/namespace.rs index 12bd718..0a415fa 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -11,7 +11,6 @@ use atom::Atom; use std::fmt; -use std::ops; /// An atom that is meant to represent a namespace in the HTML / XML sense. /// Whether a given string represents a namespace is contextual, so this is @@ -22,21 +21,6 @@ pub struct Namespace(pub Atom); #[cfg(feature = "heapsize")] known_heap_size!(0, Namespace); -pub struct BorrowedNamespace<'a>(pub &'a Namespace); - -impl<'a> ops::Deref for BorrowedNamespace<'a> { - type Target = Namespace; - fn deref(&self) -> &Namespace { - self.0 - } -} - -impl<'a> PartialEq for BorrowedNamespace<'a> { - fn eq(&self, other: &Namespace) -> bool { - self.0 == other - } -} - impl fmt::Display for Namespace { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { From fef1fa6cf47deda487f76d9e95b473314941f4c4 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 24 Oct 2016 18:28:35 +0200 Subject: [PATCH 073/212] Remove macro hack rendered unneeded by `$crate`. --- src/lib.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f6390c5..febeea6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -61,13 +61,3 @@ pub mod event; pub mod atom; pub mod namespace; pub mod shared; - -// A private module so that macro-expanded idents like -// `::string_cache::atom::Atom` will also work in this crate. -// -// `libstd` uses the same trick. -#[doc(hidden)] -mod string_cache { - pub use atom; - pub use namespace; -} From dbf6ad786175c7ff3f89798c3c3f827b4e9b4760 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 24 Oct 2016 18:29:09 +0200 Subject: [PATCH 074/212] =?UTF-8?q?Remove=20namespaces.=20They=E2=80=99re?= =?UTF-8?q?=20going=20into=20htmlever.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 29 ----------------- src/namespace.rs | 81 ------------------------------------------------ 2 files changed, 110 deletions(-) delete mode 100644 src/namespace.rs diff --git a/src/lib.rs b/src/lib.rs index febeea6..44bade8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,34 +23,6 @@ extern crate serde; extern crate phf_shared; pub use atom::Atom; -pub use namespace::{Namespace, QualName}; - -#[macro_export] -macro_rules! qualname { - ("", $local:tt) => { - $crate::namespace::QualName { - ns: ns!(), - local: atom!($local), - } - }; - ($ns:tt, $local:tt) => { - $crate::namespace::QualName { - ns: ns!($ns), - local: atom!($local), - } - } -} - -#[macro_export] -macro_rules! ns { - () => { $crate::Namespace(atom!("")) }; - (html) => { $crate::Namespace(atom!("http://www.w3.org/1999/xhtml")) }; - (xml) => { $crate::Namespace(atom!("http://www.w3.org/XML/1998/namespace")) }; - (xmlns) => { $crate::Namespace(atom!("http://www.w3.org/2000/xmlns/")) }; - (xlink) => { $crate::Namespace(atom!("http://www.w3.org/1999/xlink")) }; - (svg) => { $crate::Namespace(atom!("http://www.w3.org/2000/svg")) }; - (mathml) => { $crate::Namespace(atom!("http://www.w3.org/1998/Math/MathML")) }; -} include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); @@ -59,5 +31,4 @@ include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); pub mod event; pub mod atom; -pub mod namespace; pub mod shared; diff --git a/src/namespace.rs b/src/namespace.rs deleted file mode 100644 index 0a415fa..0000000 --- a/src/namespace.rs +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! **Note:** This may move as string-cache becomes less Web-specific. - -use atom::Atom; -use std::fmt; - -/// An atom that is meant to represent a namespace in the HTML / XML sense. -/// Whether a given string represents a namespace is contextual, so this is -/// a transparent wrapper that will not catch all mistakes. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone, Default)] -pub struct Namespace(pub Atom); - -#[cfg(feature = "heapsize")] -known_heap_size!(0, Namespace); - -impl fmt::Display for Namespace { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - ::fmt(&self.0, f) - } -} - -/// A name with a namespace. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] -pub struct QualName { - pub ns: Namespace, - pub local: Atom, -} - -#[cfg(feature = "heapsize")] -known_heap_size!(0, QualName); - -impl QualName { - #[inline] - pub fn new(ns: Namespace, local: Atom) -> QualName { - QualName { - ns: ns, - local: local, - } - } -} - -#[cfg(test)] -mod tests { - use super::{Namespace, QualName}; - use Atom; - - #[test] - fn ns_macro() { - assert_eq!(ns!(), Namespace(Atom::from(""))); - - assert_eq!(ns!(html), Namespace(Atom::from("http://www.w3.org/1999/xhtml"))); - assert_eq!(ns!(xml), Namespace(Atom::from("http://www.w3.org/XML/1998/namespace"))); - assert_eq!(ns!(xmlns), Namespace(Atom::from("http://www.w3.org/2000/xmlns/"))); - assert_eq!(ns!(xlink), Namespace(Atom::from("http://www.w3.org/1999/xlink"))); - assert_eq!(ns!(svg), Namespace(Atom::from("http://www.w3.org/2000/svg"))); - assert_eq!(ns!(mathml), Namespace(Atom::from("http://www.w3.org/1998/Math/MathML"))); - } - - #[test] - fn qualname() { - assert_eq!(QualName::new(ns!(), atom!("")), - QualName { ns: ns!(), local: Atom::from("") }); - assert_eq!(QualName::new(ns!(xml), atom!("base")), - QualName { ns: ns!(xml), local: atom!("base") }); - } - - #[test] - fn qualname_macro() { - assert_eq!(qualname!("", ""), QualName { ns: ns!(), local: atom!("") }); - assert_eq!(qualname!(xml, "base"), QualName { ns: ns!(xml), local: atom!("base") }); - } -} From 135c895c7d606a1eb6e89f372669eff407d3ab60 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 24 Oct 2016 18:10:06 +0200 Subject: [PATCH 075/212] Stop using derive. Prepare for adding a type parameter that derive would require bounds on. --- src/atom/mod.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 6749db7..4469ca9 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -18,6 +18,7 @@ use std::ascii::AsciiExt; use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; +use std::hash::{Hash, Hasher}; use std::mem; use std::ops; use std::ptr; @@ -167,9 +168,6 @@ impl StringCache { } } -// NOTE: Deriving Eq here implies that a given string must always -// be interned the same way. -#[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. @@ -202,6 +200,23 @@ impl Default for Atom { } } +impl Hash for Atom { + #[inline] + fn hash(&self, state: &mut H) where H: Hasher { + self.unsafe_data.hash(state) + } +} + +impl Eq for Atom {} + +impl PartialEq for Atom { +// NOTE: This impl requires that a given string must always be interned the same way. + #[inline] + fn eq(&self, other: &Atom) -> bool { + self.unsafe_data == other.unsafe_data + } +} + impl PartialEq for Atom { fn eq(&self, other: &str) -> bool { &self[..] == other @@ -297,7 +312,6 @@ impl Drop for Atom { } } - impl ops::Deref for Atom { type Target = str; From 046b0447414297db9d54ea85e8c16002aaa32f17 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 15:05:44 +0200 Subject: [PATCH 076/212] Rename StaticAtomSet struct to PhfStrSet. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We’re about to introduce a trait name StaticAtomSet. --- build.rs | 4 ++-- src/atom/mod.rs | 2 +- src/shared.rs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/build.rs b/build.rs index 65358f6..f530b0d 100644 --- a/build.rs +++ b/build.rs @@ -33,7 +33,7 @@ fn write_static_atom_set(hash_state: &phf_generator::HashState) { macro_rules! w { ($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) } } - w!("pub static STATIC_ATOM_SET: StaticAtomSet = StaticAtomSet {{"); + w!("pub static STATIC_ATOM_SET: PhfStrSet = PhfStrSet {{"); w!(" key: {},", hash_state.key); w!(" disps: &["); for &(d1, d2) in &hash_state.disps { @@ -49,7 +49,7 @@ fn write_static_atom_set(hash_state: &phf_generator::HashState) { } fn write_atom_macro(hash_state: &phf_generator::HashState) { - let set = shared::StaticAtomSet { + let set = shared::PhfStrSet { key: hash_state.key, disps: leak(hash_state.disps.clone()), atoms: leak(hash_state.map.iter().map(|&idx| static_atom_list::ATOMS[idx]).collect()), diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 4469ca9..6d14cee 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -29,7 +29,7 @@ use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STATIC_SHIFT_BITS, - ENTRY_ALIGNMENT, pack_static, StaticAtomSet}; + ENTRY_ALIGNMENT, pack_static, PhfStrSet}; use self::UnpackedAtom::{Dynamic, Inline, Static}; #[cfg(feature = "log-events")] diff --git a/src/shared.rs b/src/shared.rs index a653872..8b9133b 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -24,13 +24,13 @@ pub fn pack_static(n: u32) -> u64 { (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) } -pub struct StaticAtomSet { +pub struct PhfStrSet { pub key: u64, pub disps: &'static [(u32, u32)], pub atoms: &'static [&'static str], } -impl StaticAtomSet { +impl PhfStrSet { #[inline] pub fn get_index_or_hash(&self, s: &str) -> Result { let hash = phf_shared::hash(s, self.key); From 367bf9f252e6a2b965f7f97bcd958b270ea66ea2 Mon Sep 17 00:00:00 2001 From: Aidan Hobson Sayers Date: Mon, 24 Oct 2016 18:41:32 +0200 Subject: [PATCH 077/212] Add a type parameter to Atom. --- build.rs | 7 +++- src/atom/bench.rs | 18 ++++++--- src/atom/mod.rs | 99 +++++++++++++++++++++++++++-------------------- 3 files changed, 77 insertions(+), 47 deletions(-) diff --git a/build.rs b/build.rs index f530b0d..6515570 100644 --- a/build.rs +++ b/build.rs @@ -61,7 +61,12 @@ fn write_atom_macro(hash_state: &phf_generator::HashState) { writeln!(file, r"macro_rules! atom {{").unwrap(); for &s in set.iter() { let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); - writeln!(file, r"({:?}) => {{ $crate::Atom {{ unsafe_data: 0x{:x} }} }};", s, data).unwrap(); + writeln!( + file, + r"({:?}) => {{ $crate::Atom {{ unsafe_data: 0x{:x}, phantom: ::std::marker::PhantomData }} }};", + s, + data + ).unwrap(); } writeln!(file, r"}}").unwrap(); } diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 96b0790..585d1c1 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -27,9 +27,17 @@ and cheap to move around, which isn't reflected in these tests. */ -use atom::Atom; +use atom::tests::Atom; use test::{Bencher, black_box}; +macro_rules! test_atom { + ($tt: tt) => {{ + // Add type annotation to help inference + let atom: Atom = atom!($tt); + atom + }} +} + // Just shorthand fn mk(x: &str) -> Atom { Atom::from(x) @@ -134,7 +142,7 @@ macro_rules! bench_all ( use std::string::ToString; use std::iter::repeat; - use atom::Atom; + use atom::tests::Atom; use atom::UnpackedAtom::{Static, Inline, Dynamic}; use super::mk; @@ -157,7 +165,7 @@ bench_all!([eq ne lt clone_string] for longer_string = super::longer_dynamic_a, super::longer_dynamic_b); bench_all!([eq ne intern as_ref clone is_static lt] - for static_atom = atom!("a"), atom!("b")); + for static_atom = test_atom!("a"), test_atom!("b")); bench_all!([intern as_ref clone is_inline] for short_inline_atom = mk("e"), mk("f")); @@ -175,10 +183,10 @@ bench_all!([intern as_ref clone is_static] for static_at_runtime = mk("a"), mk("b")); bench_all!([ne lt x_static y_inline] - for static_vs_inline = atom!("a"), mk("f")); + for static_vs_inline = test_atom!("a"), mk("f")); bench_all!([ne lt x_static y_dynamic] - for static_vs_dynamic = atom!("a"), mk(super::longer_dynamic_b)); + for static_vs_dynamic = test_atom!("a"), mk(super::longer_dynamic_b)); bench_all!([ne lt x_inline y_dynamic] for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b)); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 6d14cee..4021a81 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -19,6 +19,7 @@ use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; use std::hash::{Hash, Hasher}; +use std::marker::PhantomData; use std::mem; use std::ops; use std::ptr; @@ -168,17 +169,27 @@ impl StringCache { } } -pub struct Atom { +pub trait StaticAtomSet {} + +pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. #[doc(hidden)] pub unsafe_data: u64, + + #[doc(hidden)] + pub phantom: PhantomData, } #[cfg(feature = "heapsize")] -known_heap_size!(0, Atom); +impl HeapSizeOf for Atom { + #[inline(always)] + fn heap_size_of_children(&self) -> usize { + 0 + } +} -impl Atom { +impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { UnpackedAtom::from_packed(self.unsafe_data) @@ -194,50 +205,50 @@ impl Atom { } } -impl Default for Atom { +impl Default for Atom { fn default() -> Self { atom!("") } } -impl Hash for Atom { +impl Hash for Atom { #[inline] fn hash(&self, state: &mut H) where H: Hasher { self.unsafe_data.hash(state) } } -impl Eq for Atom {} +impl Eq for Atom {} -impl PartialEq for Atom { // NOTE: This impl requires that a given string must always be interned the same way. +impl PartialEq for Atom { #[inline] - fn eq(&self, other: &Atom) -> bool { + fn eq(&self, other: &Self) -> bool { self.unsafe_data == other.unsafe_data } } -impl PartialEq for Atom { +impl PartialEq for Atom { fn eq(&self, other: &str) -> bool { &self[..] == other } } -impl PartialEq for str { - fn eq(&self, other: &Atom) -> bool { +impl PartialEq> for str { + fn eq(&self, other: &Atom) -> bool { self == &other[..] } } -impl PartialEq for Atom { +impl PartialEq for Atom { fn eq(&self, other: &String) -> bool { &self[..] == &other[..] } } -impl<'a> From> for Atom { +impl<'a, Static: StaticAtomSet> From> for Atom { #[inline] - fn from(string_to_add: Cow<'a, str>) -> Atom { + fn from(string_to_add: Cow<'a, str>) -> Self { let unpacked = match STATIC_ATOM_SET.get_index_or_hash(&*string_to_add) { Ok(id) => Static(id as u32), Err(hash) => { @@ -254,27 +265,27 @@ impl<'a> From> for Atom { let data = unsafe { unpacked.pack() }; log!(Event::Intern(data)); - Atom { unsafe_data: data } + Atom { unsafe_data: data, phantom: PhantomData } } } -impl<'a> From<&'a str> for Atom { +impl<'a, Static: StaticAtomSet> From<&'a str> for Atom { #[inline] - fn from(string_to_add: &str) -> Atom { + fn from(string_to_add: &str) -> Self { Atom::from(Cow::Borrowed(string_to_add)) } } -impl From for Atom { +impl From for Atom { #[inline] - fn from(string_to_add: String) -> Atom { + fn from(string_to_add: String) -> Self { Atom::from(Cow::Owned(string_to_add)) } } -impl Clone for Atom { +impl Clone for Atom { #[inline(always)] - fn clone(&self) -> Atom { + fn clone(&self) -> Self { unsafe { match from_packed_dynamic(self.unsafe_data) { Some(entry) => { @@ -285,16 +296,17 @@ impl Clone for Atom { } } Atom { - unsafe_data: self.unsafe_data + unsafe_data: self.unsafe_data, + phantom: PhantomData, } } } -impl Drop for Atom { +impl Drop for Atom { #[inline] fn drop(&mut self) { // Out of line to guide inlining. - fn drop_slow(this: &mut Atom) { + fn drop_slow(this: &mut Atom) { STRING_CACHE.lock().unwrap().remove(this.unsafe_data); } @@ -312,7 +324,7 @@ impl Drop for Atom { } } -impl ops::Deref for Atom { +impl ops::Deref for Atom { type Target = str; #[inline] @@ -333,14 +345,14 @@ impl ops::Deref for Atom { } } -impl fmt::Display for Atom { +impl fmt::Display for Atom { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { ::fmt(self, f) } } -impl fmt::Debug for Atom { +impl fmt::Debug for Atom { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ty_str = unsafe { @@ -355,9 +367,9 @@ impl fmt::Debug for Atom { } } -impl PartialOrd for Atom { +impl PartialOrd for Atom { #[inline] - fn partial_cmp(&self, other: &Atom) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { if self.unsafe_data == other.unsafe_data { return Some(Equal); } @@ -365,9 +377,9 @@ impl PartialOrd for Atom { } } -impl Ord for Atom { +impl Ord for Atom { #[inline] - fn cmp(&self, other: &Atom) -> Ordering { + fn cmp(&self, other: &Self) -> Ordering { if self.unsafe_data == other.unsafe_data { return Equal; } @@ -375,21 +387,21 @@ impl Ord for Atom { } } -impl AsRef for Atom { +impl AsRef for Atom { fn as_ref(&self) -> &str { &self } } -impl Serialize for Atom { - fn serialize(&self, serializer: &mut S) -> Result<(),S::Error> where S: Serializer { +impl Serialize for Atom { + fn serialize(&self, serializer: &mut S) -> Result<(), S::Error> where S: Serializer { let string: &str = self.as_ref(); string.serialize(serializer) } } -impl Deserialize for Atom { - fn deserialize(deserializer: &mut D) -> Result where D: Deserializer { +impl Deserialize for Atom { + fn deserialize(deserializer: &mut D) -> Result where D: Deserializer { let string: String = try!(Deserialize::deserialize(deserializer)); Ok(Atom::from(&*string)) } @@ -398,8 +410,8 @@ impl Deserialize for Atom { // AsciiExt requires mutating methods, so we just implement the non-mutating ones. // We don't need to implement is_ascii because there's no performance improvement // over the one from &str. -impl Atom { - pub fn to_ascii_uppercase(&self) -> Atom { +impl Atom { + pub fn to_ascii_uppercase(&self) -> Self { if self.chars().all(char::is_uppercase) { self.clone() } else { @@ -407,7 +419,7 @@ impl Atom { } } - pub fn to_ascii_lowercase(&self) -> Atom { + pub fn to_ascii_lowercase(&self) -> Self { if self.chars().all(char::is_lowercase) { self.clone() } else { @@ -546,10 +558,15 @@ mod bench; mod tests { use std::mem; use std::thread; - use super::{Atom, StringCacheEntry, STATIC_ATOM_SET}; + use super::Atom as GenericAtom; + use super::{StaticAtomSet, StringCacheEntry, STATIC_ATOM_SET}; use super::UnpackedAtom::{Dynamic, Inline, Static}; use shared::ENTRY_ALIGNMENT; + pub type Atom = GenericAtom; + pub struct DummyStatic; + impl StaticAtomSet for DummyStatic {} + #[test] fn test_as_slice() { let s0 = Atom::from(""); @@ -720,7 +737,7 @@ mod tests { let compiler_uses_inline_drop_flags = mem::size_of::() > 0; // Guard against accidental changes to the sizes of things. - assert_eq!(mem::size_of::(), + assert_eq!(mem::size_of::(), if compiler_uses_inline_drop_flags { 16 } else { 8 }); assert_eq!(mem::size_of::(), 8 + 4 * mem::size_of::()); From 0ad43eab58e052ca56972e18de26f762ecb4233a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 15:27:13 +0200 Subject: [PATCH 078/212] Make StaticAtomSet impls provide a PhfStrSet. --- src/atom/mod.rs | 22 ++++++++++++++-------- src/lib.rs | 3 ++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 4021a81..8eea9ec 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -169,7 +169,9 @@ impl StringCache { } } -pub trait StaticAtomSet {} +pub trait StaticAtomSet { + fn get() -> &'static PhfStrSet; +} pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -249,7 +251,7 @@ impl PartialEq for Atom { impl<'a, Static: StaticAtomSet> From> for Atom { #[inline] fn from(string_to_add: Cow<'a, str>) -> Self { - let unpacked = match STATIC_ATOM_SET.get_index_or_hash(&*string_to_add) { + let unpacked = match Static::get().get_index_or_hash(&*string_to_add) { Ok(id) => Static(id as u32), Err(hash) => { let len = string_to_add.len(); @@ -335,7 +337,7 @@ impl ops::Deref for Atom { let buf = inline_orig_bytes(&self.unsafe_data); str::from_utf8_unchecked(buf) }, - Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), + Static(idx) => Static::get().index(idx).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string @@ -561,11 +563,15 @@ mod tests { use super::Atom as GenericAtom; use super::{StaticAtomSet, StringCacheEntry, STATIC_ATOM_SET}; use super::UnpackedAtom::{Dynamic, Inline, Static}; - use shared::ENTRY_ALIGNMENT; + use shared::{ENTRY_ALIGNMENT, PhfStrSet}; - pub type Atom = GenericAtom; - pub struct DummyStatic; - impl StaticAtomSet for DummyStatic {} + pub type Atom = GenericAtom; + pub struct DefaultStatic; + impl StaticAtomSet for DefaultStatic { + fn get() -> &'static PhfStrSet { + &STATIC_ATOM_SET + } + } #[test] fn test_as_slice() { @@ -706,7 +712,7 @@ mod tests { assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); // The index is unspecified by phf. - assert!((x.unsafe_data >> 32) <= STATIC_ATOM_SET.iter().len() as u64); + assert!((x.unsafe_data >> 32) <= DefaultStatic::get().iter().len() as u64); } // This test is here to make sure we don't change atom representation diff --git a/src/lib.rs b/src/lib.rs index 44bade8..60d67d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,8 @@ extern crate serde; extern crate phf_shared; -pub use atom::Atom; +pub use atom::{Atom, StaticAtomSet}; +pub use shared::PhfStrSet; include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); From c3eac36425ba12daa1b526cfc2912426921d4a3b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 15:49:18 +0200 Subject: [PATCH 079/212] Inline PhfStrSet methods. --- Cargo.toml | 1 - build.rs | 22 +++------------------- src/atom/mod.rs | 42 ++++++++++++++++++++++++++---------------- src/lib.rs | 3 +-- src/shared.rs | 31 ------------------------------- 5 files changed, 30 insertions(+), 69 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c896ee7..80266d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,4 +37,3 @@ rand = "0.3" [build-dependencies] phf_generator = "0.7.4" -phf_shared = "0.7.4" diff --git a/build.rs b/build.rs index 6515570..ea87b0f 100644 --- a/build.rs +++ b/build.rs @@ -1,4 +1,3 @@ -extern crate phf_shared; extern crate phf_generator; #[path = "src/shared.rs"] #[allow(dead_code)] mod shared; @@ -7,9 +6,7 @@ extern crate phf_generator; use std::env; use std::fs::File; use std::io::{BufWriter, Write}; -use std::mem; use std::path::Path; -use std::slice; fn main() { let hash_state = generate(); @@ -49,30 +46,17 @@ fn write_static_atom_set(hash_state: &phf_generator::HashState) { } fn write_atom_macro(hash_state: &phf_generator::HashState) { - let set = shared::PhfStrSet { - key: hash_state.key, - disps: leak(hash_state.disps.clone()), - atoms: leak(hash_state.map.iter().map(|&idx| static_atom_list::ATOMS[idx]).collect()), - }; - let path = Path::new(&env::var("OUT_DIR").unwrap()).join("atom_macro.rs"); let mut file = BufWriter::new(File::create(&path).unwrap()); writeln!(file, r"#[macro_export]").unwrap(); writeln!(file, r"macro_rules! atom {{").unwrap(); - for &s in set.iter() { - let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); + for (i, &idx) in hash_state.map.iter().enumerate() { writeln!( file, r"({:?}) => {{ $crate::Atom {{ unsafe_data: 0x{:x}, phantom: ::std::marker::PhantomData }} }};", - s, - data + static_atom_list::ATOMS[idx], + shared::pack_static(i as u32), ).unwrap(); } writeln!(file, r"}}").unwrap(); } - -fn leak(v: Vec) -> &'static [T] { - let slice = unsafe { slice::from_raw_parts(v.as_ptr(), v.len()) }; - mem::forget(v); - slice -} diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 8eea9ec..6be6cb6 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -12,6 +12,7 @@ #[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; +use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::ascii::AsciiExt; @@ -30,7 +31,7 @@ use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STATIC_SHIFT_BITS, - ENTRY_ALIGNMENT, pack_static, PhfStrSet}; + ENTRY_ALIGNMENT, pack_static}; use self::UnpackedAtom::{Dynamic, Inline, Static}; #[cfg(feature = "log-events")] @@ -173,6 +174,12 @@ pub trait StaticAtomSet { fn get() -> &'static PhfStrSet; } +pub struct PhfStrSet { + pub key: u64, + pub disps: &'static [(u32, u32)], + pub atoms: &'static [&'static str], +} + pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. @@ -251,17 +258,20 @@ impl PartialEq for Atom { impl<'a, Static: StaticAtomSet> From> for Atom { #[inline] fn from(string_to_add: Cow<'a, str>) -> Self { - let unpacked = match Static::get().get_index_or_hash(&*string_to_add) { - Ok(id) => Static(id as u32), - Err(hash) => { - let len = string_to_add.len(); - if len <= MAX_INLINE_LEN { - let mut buf: [u8; 7] = [0; 7]; - copy_memory(string_to_add.as_bytes(), &mut buf); - Inline(len as u8, buf) - } else { - Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) - } + let static_set = Static::get(); + let hash = phf_shared::hash(&*string_to_add, static_set.key); + let index = phf_shared::get_index(hash, static_set.disps, static_set.atoms.len()); + + let unpacked = if static_set.atoms[index as usize] == string_to_add { + Static(index) + } else { + let len = string_to_add.len(); + if len <= MAX_INLINE_LEN { + let mut buf: [u8; 7] = [0; 7]; + copy_memory(string_to_add.as_bytes(), &mut buf); + Inline(len as u8, buf) + } else { + Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) } }; @@ -337,7 +347,7 @@ impl ops::Deref for Atom { let buf = inline_orig_bytes(&self.unsafe_data); str::from_utf8_unchecked(buf) }, - Static(idx) => Static::get().index(idx).expect("bad static atom"), + Static(idx) => Static::get().atoms.get(idx as usize).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string @@ -561,9 +571,9 @@ mod tests { use std::mem; use std::thread; use super::Atom as GenericAtom; - use super::{StaticAtomSet, StringCacheEntry, STATIC_ATOM_SET}; + use super::{StaticAtomSet, StringCacheEntry, STATIC_ATOM_SET, PhfStrSet}; use super::UnpackedAtom::{Dynamic, Inline, Static}; - use shared::{ENTRY_ALIGNMENT, PhfStrSet}; + use shared::ENTRY_ALIGNMENT; pub type Atom = GenericAtom; pub struct DefaultStatic; @@ -712,7 +722,7 @@ mod tests { assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); // The index is unspecified by phf. - assert!((x.unsafe_data >> 32) <= DefaultStatic::get().iter().len() as u64); + assert!((x.unsafe_data >> 32) <= DefaultStatic::get().atoms.len() as u64); } // This test is here to make sure we don't change atom representation diff --git a/src/lib.rs b/src/lib.rs index 60d67d1..da777cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,8 +22,7 @@ extern crate serde; extern crate phf_shared; -pub use atom::{Atom, StaticAtomSet}; -pub use shared::PhfStrSet; +pub use atom::{Atom, StaticAtomSet, PhfStrSet}; include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); diff --git a/src/shared.rs b/src/shared.rs index 8b9133b..f69a4f4 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -7,8 +7,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use phf_shared; - // FIXME(rust-lang/rust#18153): generate these from an enum pub const DYNAMIC_TAG: u8 = 0b_00; pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble @@ -23,32 +21,3 @@ pub const STATIC_SHIFT_BITS: usize = 32; pub fn pack_static(n: u32) -> u64 { (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) } - -pub struct PhfStrSet { - pub key: u64, - pub disps: &'static [(u32, u32)], - pub atoms: &'static [&'static str], -} - -impl PhfStrSet { - #[inline] - pub fn get_index_or_hash(&self, s: &str) -> Result { - let hash = phf_shared::hash(s, self.key); - let index = phf_shared::get_index(hash, self.disps, self.atoms.len()); - if self.atoms[index as usize] == s { - Ok(index) - } else { - Err(hash) - } - } - - #[inline] - pub fn index(&self, i: u32) -> Option<&'static str> { - self.atoms.get(i as usize).map(|&s| s) - } - - #[inline] - pub fn iter(&self) -> ::std::slice::Iter<&'static str> { - self.atoms.iter() - } -} From 684191e1a1fc5a7ab5d19e6ce676636159f02cc5 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 16:46:38 +0200 Subject: [PATCH 080/212] Flatten the src/atom directory --- src/{atom/mod.rs => atom.rs} | 1 + src/{atom => }/bench.rs | 0 2 files changed, 1 insertion(+) rename src/{atom/mod.rs => atom.rs} (99%) rename src/{atom => }/bench.rs (100%) diff --git a/src/atom/mod.rs b/src/atom.rs similarity index 99% rename from src/atom/mod.rs rename to src/atom.rs index 6be6cb6..399328a 100644 --- a/src/atom/mod.rs +++ b/src/atom.rs @@ -564,6 +564,7 @@ fn copy_memory(src: &[u8], dst: &mut [u8]) { } #[cfg(all(test, feature = "unstable"))] +#[path = "bench.rs"] mod bench; #[cfg(test)] diff --git a/src/atom/bench.rs b/src/bench.rs similarity index 100% rename from src/atom/bench.rs rename to src/bench.rs From fabedb09fe03e548ad252accab7f16f3c70b34e5 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 16:56:55 +0200 Subject: [PATCH 081/212] Add EmptyStaticAtomSet and DefaultAtom. --- src/atom.rs | 18 ++++++++++++++++++ src/lib.rs | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index 399328a..14614af 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -180,6 +180,24 @@ pub struct PhfStrSet { pub atoms: &'static [&'static str], } +pub struct EmptyStaticAtomSet; + +impl StaticAtomSet for EmptyStaticAtomSet { + fn get() -> &'static PhfStrSet { + // The name is a lie: this set is not empty (it contains the empty string) + // but that’s only to avoid divisions by zero in rust-phf. + static SET: PhfStrSet = PhfStrSet { + key: 0, + disps: &[(0, 0)], + atoms: &[""], + }; + &SET + } +} + +/// Use this if you don’t care about static atoms. +pub type DefaultAtom = Atom; + pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. diff --git a/src/lib.rs b/src/lib.rs index da777cf..f5ea9d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,7 @@ extern crate serde; extern crate phf_shared; -pub use atom::{Atom, StaticAtomSet, PhfStrSet}; +pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); From e9a82f7a71148328685b67edc07e6557d9cebada Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 17:12:51 +0200 Subject: [PATCH 082/212] Make examples/* crates build again. --- examples/event-log/src/main.rs | 2 +- examples/summarize-events/src/main.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/event-log/src/main.rs b/examples/event-log/src/main.rs index 89adfdf..7a25e29 100644 --- a/examples/event-log/src/main.rs +++ b/examples/event-log/src/main.rs @@ -9,7 +9,7 @@ extern crate string_cache; -use string_cache::Atom; +use string_cache::DefaultAtom as Atom; use string_cache::event; use std::io; diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index 8a44389..66773e2 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -16,10 +16,11 @@ extern crate phf_shared; #[allow(dead_code)] mod shared; -use string_cache::Atom; +use string_cache::DefaultAtom as Atom; use std::{env, cmp}; use std::collections::hash_map::{HashMap, Entry}; +use std::marker::PhantomData; use std::path::Path; #[derive(RustcDecodable, Debug)] @@ -88,7 +89,7 @@ fn main() { // FIXME: We really shouldn't be allowed to do this. It's a memory-safety // hazard; the field is only public for the atom!() macro. - _ => Atom { unsafe_data: ev.id }.to_string(), + _ => Atom { unsafe_data: ev.id, phantom: PhantomData }.to_string(), }; match summary.entry(string) { From 4a6d5f1e50631872c7221816a2582858da0c8e4a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 17:32:32 +0200 Subject: [PATCH 083/212] Link to docs.rs rather than doc.servo.org. --- Cargo.toml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 80266d8..074ef79 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" -documentation = "http://doc.servo.org/string_cache/" +documentation = "https://docs.rs/string_cache/" build = "build.rs" [lib] diff --git a/README.md b/README.md index f18a7dd..6d020ac 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,6 @@ [![Build Status](https://travis-ci.org/servo/string-cache.svg?branch=master)](https://travis-ci.org/servo/string-cache) -[Documentation](http://doc.servo.org/string_cache/) +[Documentation](https://docs.rs/string_cache/) A string interning library for Rust, developed as part of the [Servo](https://github.com/servo/servo) project. From f054f9b201dcae543ef2c955374c5ed54d291cc1 Mon Sep 17 00:00:00 2001 From: Aidan Hobson Sayers Date: Tue, 25 Oct 2016 19:53:28 +0200 Subject: [PATCH 084/212] Add and use new string-cache-codegen crate --- .travis.yml | 1 + Cargo.toml | 8 +- build.rs | 62 ++---------- examples/summarize-events/src/main.rs | 2 +- src/atom.rs | 51 +++++----- src/bench.rs | 20 ++-- src/lib.rs | 12 ++- string-cache-codegen/Cargo.toml | 16 ++++ string-cache-codegen/lib.rs | 120 ++++++++++++++++++++++++ {src => string-cache-codegen}/shared.rs | 0 10 files changed, 189 insertions(+), 103 deletions(-) create mode 100644 string-cache-codegen/Cargo.toml create mode 100644 string-cache-codegen/lib.rs rename {src => string-cache-codegen}/shared.rs (100%) diff --git a/.travis.yml b/.travis.yml index 485a91f..b7ee145 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,7 @@ script: - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - cargo test --features heapsize + - "cd string-cache-codegen/ && cargo build && cd .." - "cd examples/event-log/ && cargo build && cd ../.." - "cd examples/summarize-events/ && cargo build && cd ../.." notifications: diff --git a/Cargo.toml b/Cargo.toml index 074ef79..8443c73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,12 @@ repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache/" build = "build.rs" +# Do not `exclude` ./string-cache-codegen because we want to include +# ./string-cache-codegen/shared.rs, and `include` is a pain to use +# (It has to be exhaustive.) +# This means that packages for this crate include some unused files, +# but they’re not too big so that shouldn’t be a problem. + [lib] name = "string_cache" @@ -36,4 +42,4 @@ heapsize = { version = "0.3", optional = true } rand = "0.3" [build-dependencies] -phf_generator = "0.7.4" +string_cache_codegen = { version = "0.3", path = "./string-cache-codegen" } diff --git a/build.rs b/build.rs index ea87b0f..8cd89df 100644 --- a/build.rs +++ b/build.rs @@ -1,62 +1,14 @@ -extern crate phf_generator; +extern crate string_cache_codegen; -#[path = "src/shared.rs"] #[allow(dead_code)] mod shared; -#[path = "src/static_atom_list.rs"] mod static_atom_list; +#[path = "src/static_atom_list.rs"] +mod static_atom_list; use std::env; -use std::fs::File; -use std::io::{BufWriter, Write}; use std::path::Path; fn main() { - let hash_state = generate(); - write_static_atom_set(&hash_state); - write_atom_macro(&hash_state); -} - -fn generate() -> phf_generator::HashState { - let mut set = std::collections::HashSet::new(); - for atom in static_atom_list::ATOMS { - if !set.insert(atom) { - panic!("duplicate static atom `{:?}`", atom); - } - } - phf_generator::generate_hash(static_atom_list::ATOMS) -} - -fn write_static_atom_set(hash_state: &phf_generator::HashState) { - let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs"); - let mut file = BufWriter::new(File::create(&path).unwrap()); - macro_rules! w { - ($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) } - } - w!("pub static STATIC_ATOM_SET: PhfStrSet = PhfStrSet {{"); - w!(" key: {},", hash_state.key); - w!(" disps: &["); - for &(d1, d2) in &hash_state.disps { - w!(" ({}, {}),", d1, d2); - } - w!(" ],"); - w!(" atoms: &["); - for &idx in &hash_state.map { - w!(" {:?},", static_atom_list::ATOMS[idx]); - } - w!(" ],"); - w!("}};"); -} - -fn write_atom_macro(hash_state: &phf_generator::HashState) { - let path = Path::new(&env::var("OUT_DIR").unwrap()).join("atom_macro.rs"); - let mut file = BufWriter::new(File::create(&path).unwrap()); - writeln!(file, r"#[macro_export]").unwrap(); - writeln!(file, r"macro_rules! atom {{").unwrap(); - for (i, &idx) in hash_state.map.iter().enumerate() { - writeln!( - file, - r"({:?}) => {{ $crate::Atom {{ unsafe_data: 0x{:x}, phantom: ::std::marker::PhantomData }} }};", - static_atom_list::ATOMS[idx], - shared::pack_static(i as u32), - ).unwrap(); - } - writeln!(file, r"}}").unwrap(); + string_cache_codegen::AtomType::new("atom::tests::TestAtom", "test_atom!") + .atoms(static_atom_list::ATOMS) + .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs")) + .unwrap() } diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index 66773e2..70ab6be 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -12,7 +12,7 @@ extern crate string_cache; extern crate rustc_serialize; extern crate phf_shared; -#[path = "../../../src/shared.rs"] +#[path = "../../../string-cache-codegen/shared.rs"] #[allow(dead_code)] mod shared; diff --git a/src/atom.rs b/src/atom.rs index 14614af..64124cb 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -37,8 +37,6 @@ use self::UnpackedAtom::{Dynamic, Inline, Static}; #[cfg(feature = "log-events")] use event::Event; -include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs")); - #[cfg(not(feature = "log-events"))] macro_rules! log (($e:expr) => (())); @@ -199,7 +197,7 @@ impl StaticAtomSet for EmptyStaticAtomSet { pub type DefaultAtom = Atom; pub struct Atom { - /// This field is public so that the `atom!()` macro can use it. + /// This field is public so that the `atom!()` macros can use it. /// You should not otherwise access this field. #[doc(hidden)] pub unsafe_data: u64, @@ -234,7 +232,7 @@ impl Atom { impl Default for Atom { fn default() -> Self { - atom!("") + Self::from("") } } @@ -581,26 +579,17 @@ fn copy_memory(src: &[u8], dst: &mut [u8]) { } } -#[cfg(all(test, feature = "unstable"))] -#[path = "bench.rs"] -mod bench; - #[cfg(test)] +#[macro_use] mod tests { use std::mem; use std::thread; - use super::Atom as GenericAtom; - use super::{StaticAtomSet, StringCacheEntry, STATIC_ATOM_SET, PhfStrSet}; + use super::{StaticAtomSet, StringCacheEntry}; use super::UnpackedAtom::{Dynamic, Inline, Static}; use shared::ENTRY_ALIGNMENT; - pub type Atom = GenericAtom; - pub struct DefaultStatic; - impl StaticAtomSet for DefaultStatic { - fn get() -> &'static PhfStrSet { - &STATIC_ATOM_SET - } - } + include!(concat!(env!("OUT_DIR"), "/test_atom.rs")); + pub type Atom = TestAtom; #[test] fn test_as_slice() { @@ -741,7 +730,7 @@ mod tests { assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); // The index is unspecified by phf. - assert!((x.unsafe_data >> 32) <= DefaultStatic::get().atoms.len() as u64); + assert!((x.unsafe_data >> 32) <= TestAtomStaticSet::get().atoms.len() as u64); } // This test is here to make sure we don't change atom representation @@ -749,9 +738,9 @@ mod tests { // static atom table, the tag values, etc. // Static atoms - check_static("a", atom!("a")); - check_static("address", atom!("address")); - check_static("area", atom!("area")); + check_static("a", test_atom!("a")); + check_static("address", test_atom!("address")); + check_static("area", test_atom!("area")); // Inline atoms check("e", 0x0000_0000_0000_6511); @@ -790,27 +779,27 @@ mod tests { #[test] fn atom_macro() { - assert_eq!(atom!("body"), Atom::from("body")); - assert_eq!(atom!("font-weight"), Atom::from("font-weight")); + assert_eq!(test_atom!("body"), Atom::from("body")); + assert_eq!(test_atom!("font-weight"), Atom::from("font-weight")); } #[test] fn match_atom() { assert_eq!(2, match Atom::from("head") { - atom!("br") => 1, - atom!("html") | atom!("head") => 2, + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, _ => 3, }); assert_eq!(3, match Atom::from("body") { - atom!("br") => 1, - atom!("html") | atom!("head") => 2, + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, _ => 3, }); assert_eq!(3, match Atom::from("zzzzzz") { - atom!("br") => 1, - atom!("html") | atom!("head") => 2, + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, _ => 3, }); } @@ -869,3 +858,7 @@ mod tests { assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); } } + +#[cfg(all(test, feature = "unstable"))] +#[path = "bench.rs"] +mod bench; diff --git a/src/bench.rs b/src/bench.rs index 585d1c1..f6f5248 100644 --- a/src/bench.rs +++ b/src/bench.rs @@ -27,20 +27,12 @@ and cheap to move around, which isn't reflected in these tests. */ -use atom::tests::Atom; +use atom::tests::TestAtom; use test::{Bencher, black_box}; -macro_rules! test_atom { - ($tt: tt) => {{ - // Add type annotation to help inference - let atom: Atom = atom!($tt); - atom - }} -} - // Just shorthand -fn mk(x: &str) -> Atom { - Atom::from(x) +fn mk(x: &str) -> TestAtom { + TestAtom::from(x) } macro_rules! check_type (($name:ident, $x:expr, $p:pat) => ( @@ -89,7 +81,7 @@ macro_rules! bench_one ( fn intern(b: &mut Bencher) { let x = $x.to_string(); b.iter(|| { - black_box(Atom::from(&*x)); + black_box(TestAtom::from(&*x)); }); } ); @@ -142,7 +134,7 @@ macro_rules! bench_all ( use std::string::ToString; use std::iter::repeat; - use atom::tests::Atom; + use atom::tests::TestAtom; use atom::UnpackedAtom::{Static, Inline, Dynamic}; use super::mk; @@ -213,7 +205,7 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( *n = (*n % 0x40) + 0x20; } let s = str::from_utf8(&buf[..]).unwrap(); - black_box(Atom::from(s)); + black_box(TestAtom::from(s)); }); } )); diff --git a/src/lib.rs b/src/lib.rs index f5ea9d4..ed72634 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,11 +24,17 @@ extern crate phf_shared; pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; -include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); - #[cfg(feature = "log-events")] #[macro_use] pub mod event; pub mod atom; -pub mod shared; + +#[path = "../string-cache-codegen/shared.rs"] +mod shared; + +// Make test_atom! macro work in this crate. +// `$crate` would not be appropriate for other crates creating such macros +mod string_cache { + pub use {Atom, StaticAtomSet, PhfStrSet}; +} diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml new file mode 100644 index 0000000..7e721a2 --- /dev/null +++ b/string-cache-codegen/Cargo.toml @@ -0,0 +1,16 @@ +[package] + +name = "string_cache_codegen" +version = "0.3.0" +authors = [ "The Servo Project Developers" ] +description = "A codegen library for string-cache, developed as part of the Servo project." +license = "MIT / Apache-2.0" +repository = "https://github.com/servo/string-cache" +documentation = "https://docs.rs/string_cache_codegen/" + +[lib] +name = "string_cache_codegen" +path = "lib.rs" + +[dependencies] +phf_generator = "0.7.15" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs new file mode 100644 index 0000000..c0ac564 --- /dev/null +++ b/string-cache-codegen/lib.rs @@ -0,0 +1,120 @@ +// Copyright 2016 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate phf_generator; + +use std::collections::HashSet; +use std::fs::File; +use std::io::{self, Write, BufWriter}; +use std::path::Path; + +#[allow(dead_code)] +mod shared; + +/// A builder for a static atom set and relevant macros +pub struct AtomType { + path: String, + macro_name: String, + atoms: HashSet, +} + +impl AtomType { + /// Constructs a new static atom set builder + /// + /// `path` is a path within a crate of the atom type that will be created. + /// e.g. `"FooAtom"` at the crate root or `"foo::Atom"` if the generated code + /// is included in a `foo` module. + /// + /// `macro_name` must end with `!`. + /// + /// For example, `AtomType::new("foo::FooAtom", "foo_atom!")` will generate: + /// + /// ```rust + /// pub type FooAtom = ::string_cache::Atom; + /// pub struct FooAtomStaticSet; + /// impl ::string_cache::StaticAtomSet for FooAtomStaticSet { + /// // ... + /// } + /// #[macro_export] + /// macro_rules foo_atom { + /// // Expands to: $crate::foo::FooAtom { … } + /// } + pub fn new(path: &str, macro_name: &str) -> Self { + let mut set = HashSet::new(); + set.insert(String::new()); // rust-phf requires a non-empty set + assert!(macro_name.ends_with('!')); + AtomType { + path: path.to_owned(), + macro_name: macro_name[..macro_name.len() - 1].to_owned(), + atoms: set, + } + } + + /// Adds an atom to the builder + pub fn atom(&mut self, s: &str) -> &mut Self { + self.atoms.insert(s.to_owned()); + self + } + + /// Adds multiple atoms to the builder + pub fn atoms(&mut self, iter: I) -> &mut Self + where I: IntoIterator, I::Item: AsRef { + self.atoms.extend(iter.into_iter().map(|s| s.as_ref().to_owned())); + self + } + + /// Write generated code to `destination`. + pub fn write_to(&self, mut destination: W) -> io::Result<()> where W: Write { + let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); + let hash_state = phf_generator::generate_hash(&atoms); + let atoms: Vec<&str> = hash_state.map.iter().map(|&idx| atoms[idx]).collect(); + + let type_name = if let Some(position) = self.path.rfind("::") { + &self.path[position + "::".len() ..] + } else { + &self.path + }; + + macro_rules! w { + ($($arg: expr),+) => { try!(writeln!(destination, $($arg),+)) } + } + + w!("pub type {} = ::string_cache::Atom<{}StaticSet>;", type_name, type_name); + w!("pub struct {}StaticSet;", type_name); + w!("impl ::string_cache::StaticAtomSet for {}StaticSet {{", type_name); + w!(" fn get() -> &'static ::string_cache::PhfStrSet {{"); + w!(" static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet {{"); + w!(" key: {:#?},", hash_state.key); + w!(" disps: &{:?},", hash_state.disps); + w!(" atoms: &{:#?},", atoms); + w!(" }};"); + w!(" &SET"); + w!(" }}"); + w!("}}"); + w!("#[macro_export]"); + w!("macro_rules! {} {{", self.macro_name); + for (i, atom) in atoms.iter().enumerate() { + w!("({:?}) => {{ $crate::{} {{ unsafe_data: 0x{:x}, phantom: ::std::marker::PhantomData }} }};", + atom, + self.path, + shared::pack_static(i as u32) + ); + } + w!("}}"); + Ok(()) + } + + /// Create a new file at `path` and write generated code there. + /// + /// Typical usage: + /// `.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))` + pub fn write_to_file(&self, path: &Path) -> io::Result<()> { + self.write_to(BufWriter::new(try!(File::create(path)))) + } +} diff --git a/src/shared.rs b/string-cache-codegen/shared.rs similarity index 100% rename from src/shared.rs rename to string-cache-codegen/shared.rs From 570fea040ab3c964add1b2c74916c4c49dd7610d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 20:01:31 +0200 Subject: [PATCH 085/212] Remove the big static atom list. Fix #22 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s going into various atom types in html5ever and Servo. --- build.rs | 7 +- src/static_atom_list.rs | 1294 --------------------------------------- 2 files changed, 3 insertions(+), 1298 deletions(-) delete mode 100644 src/static_atom_list.rs diff --git a/build.rs b/build.rs index 8cd89df..4b0bb9e 100644 --- a/build.rs +++ b/build.rs @@ -1,14 +1,13 @@ extern crate string_cache_codegen; -#[path = "src/static_atom_list.rs"] -mod static_atom_list; - use std::env; use std::path::Path; fn main() { string_cache_codegen::AtomType::new("atom::tests::TestAtom", "test_atom!") - .atoms(static_atom_list::ATOMS) + .atoms(&[ + "a", "b", "address", "area", "body", "font-weight", "br", "html", "head", "id", + ]) .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs")) .unwrap() } diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs deleted file mode 100644 index f90ca6a..0000000 --- a/src/static_atom_list.rs +++ /dev/null @@ -1,1294 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -pub static ATOMS: &'static [&'static str] = &[ - - // The order is not preserved by phf. - - "a", - "address", - "applet", - "area", - "article", - "aside", - "b", - "base", - "basefont", - "bgsound", - "big", - "blockquote", - "body", - "br", - "button", - "caption", - "col", - "colgroup", - "dd", - "dt", - "embed", - "form", - "frame", - "frameset", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "head", - "html", - "input", - "li", - "link", - "marquee", - "meta", - "noframes", - "nonce", - "noscript", - "object", - "optgroup", - "option", - "param", - "plaintext", - "pre", - "preload", - "rp", - "rt", - "script", - "select", - "source", - "style", - "svg", - "table", - "tbody", - "td", - "template", - "textarea", - "tfoot", - "th", - "thead", - "title", - "tr", - "track", - "xmp", - - "", - - "*", - - // XML namespaces known to the HTML syntax spec - "http://www.w3.org/1999/xhtml", - "http://www.w3.org/XML/1998/namespace", - "http://www.w3.org/2000/xmlns/", - "http://www.w3.org/1999/xlink", - "http://www.w3.org/2000/svg", - "http://www.w3.org/1998/Math/MathML", - - "#text", - "#comment", - "#document", - "#document-fragment", - - // User agent strings - "4.0", - "Gecko", - "Linux", - "Mac", - "Mozilla", - "Netscape", - "Win32", - - // Font families - "serif", - "sans-serif", - "cursive", - "fantasy", - "monospace", - - "abbr", - "abort", - "abs", - "accent", - "accent-height", - "accentunder", - "accept", - "accept-charset", - "accesskey", - "accumulate", - "acronym", - "action", - "actiontype", - "activate", - "active", - "actuate", - "additive", - "afterscriptexecute", - "align", - "alignment-baseline", - "alignmentscope", - "alink", - "alphabetic", - "alt", - "alternate", - "altglyph", - "altGlyph", - "altglyphdef", - "altGlyphDef", - "altglyphitem", - "altGlyphItem", - "altimg", - "alttext", - "amplitude", - "and", - "animate", - "animatecolor", - "animateColor", - "animatemotion", - "animateMotion", - "animatetransform", - "animateTransform", - "animation", - "annotation", - "annotation-xml", - "anonymous", - "apple-touch-icon", - "apply", - "approx", - "arabic-form", - "arccos", - "arccosh", - "arccot", - "arccoth", - "arccsc", - "arccsch", - "archive", - "arcrole", - "arcsec", - "arcsech", - "arcsin", - "arcsinh", - "arctan", - "arctanh", - "arg", - "aria-activedescendant", - "aria-atomic", - "aria-autocomplete", - "aria-busy", - "aria-channel", - "aria-checked", - "aria-controls", - "aria-datatype", - "aria-describedby", - "aria-disabled", - "aria-dropeffect", - "aria-expanded", - "aria-flowto", - "aria-grab", - "aria-haspopup", - "aria-hidden", - "aria-invalid", - "aria-labelledby", - "aria-level", - "aria-live", - "aria-multiline", - "aria-multiselectable", - "aria-owns", - "aria-posinset", - "aria-pressed", - "aria-readonly", - "aria-relevant", - "aria-required", - "aria-secret", - "aria-selected", - "aria-setsize", - "aria-sort", - "aria-templateid", - "aria-valuemax", - "aria-valuemin", - "aria-valuenow", - "ascent", - "async", - "attributename", - "attributeName", - "attributetype", - "attributeType", - "audio", - "autocomplete", - "autofocus", - "autoplay", - "autosubmit", - "axis", - "azimuth", - "background", - "background-attachment", - "background-clip", - "background-color", - "background-image", - "background-origin", - "background-position", - "background-repeat", - "background-size", - "basefrequency", - "baseFrequency", - "baseline", - "baseline-shift", - "baseprofile", - "baseProfile", - "bbox", - "bdi", - "bdo", - "beforescriptexecute", - "beforeunload", - "begin", - "bevel", - "bevelled", - "bgcolor", - "bias", - "blink", - "blob", - "border", - "border-bottom", - "border-bottom-color", - "border-bottom-left-radius", - "border-bottom-right-radius", - "border-bottom-style", - "border-bottom-width", - "border-color", - "border-left", - "border-left-color", - "border-left-style", - "border-left-width", - "border-radius", - "border-right", - "border-right-color", - "border-right-style", - "border-right-width", - "border-style", - "border-top", - "border-top-color", - "border-top-left-radius", - "border-top-right-radius", - "border-top-style", - "border-top-width", - "border-width", - "bottom", - "box-shadow", - "butt", - "bvar", - "by", - "calcmode", - "calcMode", - "canvas", - "cap-height", - "card", - "cartesianproduct", - "ceiling", - "cellpadding", - "cellspacing", - "center", - "change", - "char", - "charoff", - "charset", - "checkbox", - "checked", - "ci", - "circle", - "cite", - "class", - "classid", - "clear", - "click", - "clip", - "clip-path", - "clippath", - "clipPath", - "clippathunits", - "clipPathUnits", - "clip-rule", - "close", - "closure", - "cn", - "code", - "codebase", - "codetype", - "codomain", - "color", - "color-interpolation", - "color-interpolation-filters", - "color-profile", - "color-rendering", - "cols", - "colspan", - "columnalign", - "column-count", - "columnlines", - "columns", - "columnspacing", - "columnspan", - "column-width", - "columnwidth", - "compact", - "complexes", - "compose", - "condition", - "conjugate", - "content", - "contenteditable", - "contentscripttype", - "contentScriptType", - "contentstyletype", - "contentStyleType", - "contextmenu", - "controls", - "coords", - "cos", - "cosh", - "cot", - "coth", - "crossorigin", - "csc", - "csch", - "csymbol", - "curl", - "cursor", - "customevent", - "cx", - "cy", - "d", - "data", - "datafld", - "dataformatas", - "datalist", - "datasrc", - "datatemplate", - "date", - "datetime", - "datetime-local", - "declare", - "default", - "defer", - "definition-src", - "definitionurl", - "definitionURL", - "defs", - "degree", - "del", - "depth", - "desc", - "descent", - "details", - "determinant", - "dfn", - "dialog", - "diff", - "diffuseconstant", - "diffuseConstant", - "dir", - "direction", - "dirname", - "disabled", - "discard", - "display", - "displaystyle", - "div", - "divergence", - "divide", - "divisor", - "dl", - "domain", - "domainofapplication", - "DOMContentLoaded", - "dominant-baseline", - "draggable", - "dur", - "dx", - "dy", - "edge", - "edgemode", - "edgeMode", - "elevation", - "ellipse", - "em", - "email", - "emptyset", - "enable-background", - "encoding", - "enctype", - "end", - "eq", - "equalcolumns", - "equalrows", - "equivalent", - "error", - "eulergamma", - "event", - "events", - "exists", - "exp", - "exponent", - "exponentiale", - "externalresourcesrequired", - "externalResourcesRequired", - "face", - "factorial", - "factorof", - "false", - "feblend", - "feBlend", - "fecolormatrix", - "feColorMatrix", - "fecomponenttransfer", - "feComponentTransfer", - "fecomposite", - "feComposite", - "feconvolvematrix", - "feConvolveMatrix", - "fediffuselighting", - "feDiffuseLighting", - "fedisplacementmap", - "feDisplacementMap", - "fedistantlight", - "feDistantLight", - "fedropshadow", - "feDropShadow", - "feflood", - "feFlood", - "fefunca", - "feFuncA", - "fefuncb", - "feFuncB", - "fefuncg", - "feFuncG", - "fefuncr", - "feFuncR", - "fegaussianblur", - "feGaussianBlur", - "feimage", - "feImage", - "femerge", - "feMerge", - "femergenode", - "feMergeNode", - "femorphology", - "feMorphology", - "fence", - "feoffset", - "feOffset", - "fepointlight", - "fePointLight", - "fespecularlighting", - "feSpecularLighting", - "fespotlight", - "feSpotLight", - "fetile", - "feTile", - "feturbulence", - "feTurbulence", - "fieldset", - "figcaption", - "figure", - "file", - "fill", - "fill-opacity", - "fill-rule", - "filter", - "filterres", - "filterRes", - "filterunits", - "filterUnits", - "flex-grow", - "flex-shrink", - "float", - "flood", - "flood-color", - "flood-opacity", - "floor", - "fn", - "font", - "font-face", - "font-face-format", - "font-face-name", - "font-face-src", - "font-face-uri", - "font-family", - "fontfamily", - "font-size", - "fontsize", - "font-size-adjust", - "font-stretch", - "font-style", - "fontstyle", - "font-variant", - "font-weight", - "fontweight", - "footer", - "for", - "forall", - "foreignobject", - "foreignObject", - "formaction", - "format", - "formenctype", - "formmethod", - "formnovalidate", - "formtarget", - "frameborder", - "framespacing", - "from", - "fx", - "fy", - "g", - "g1", - "g2", - "gcd", - "geq", - "glyph", - "glyph-name", - "glyph-orientation-horizontal", - "glyph-orientation-vertical", - "glyphref", - "glyphRef", - "grad", - "gradienttransform", - "gradientTransform", - "gradientunits", - "gradientUnits", - "groupalign", - "gt", - "handler", - "hanging", - "header", - "headers", - "height", - "hgroup", - "hidden", - "hidefocus", - "high", - "hkern", - "horiz-adv-x", - "horiz-origin-x", - "horiz-origin-y", - "hr", - "href", - "hreflang", - "hspace", - "htmlevents", - "http-equiv", - "i", - "icon", - "id", - "ident", - "ideographic", - "iframe", - "image", - "image-rendering", - "imaginary", - "imaginaryi", - "img", - "implies", - "important", - "in", - "in2", - "index", - "infinity", - "inputmode", - "ins", - "int", - "integers", - "intercept", - "intersect", - "interval", - "invalid", - "inverse", - "irrelevant", - "isindex", - "ismap", - "k", - "k1", - "k2", - "k3", - "k4", - "kbd", - "kernelmatrix", - "kernelMatrix", - "kernelunitlength", - "kernelUnitLength", - "kerning", - "keyboardevent", - "keydown", - "keyevents", - "keygen", - "keypoints", - "keyPoints", - "keypress", - "keysplines", - "keySplines", - "keytimes", - "keyTimes", - "keyup", - "label", - "lambda", - "lang", - "language", - "laplacian", - "largeop", - "lcm", - "left", - "legend", - "lengthadjust", - "lengthAdjust", - "leq", - "letter-spacing", - "lighting-color", - "limit", - "limitingconeangle", - "limitingConeAngle", - "line", - "lineargradient", - "linearGradient", - "linebreak", - "line-height", - "linethickness", - "list", - "listener", - "listing", - "list-style", - "list-style-image", - "list-style-position", - "list-style-type", - "ln", - "load", - "loadstart", - "loadend", - "local", - "log", - "logbase", - "longdesc", - "loop", - "low", - "lowlimit", - "lowsrc", - "lquote", - "lspace", - "lt", - "macros", - "maction", - "main", - "maligngroup", - "malignmark", - "manifest", - "map", - "margin", - "margin-bottom", - "marginheight", - "margin-left", - "margin-right", - "margin-top", - "marginwidth", - "mark", - "marker", - "marker-end", - "markerheight", - "markerHeight", - "marker-mid", - "marker-start", - "markerunits", - "markerUnits", - "markerwidth", - "markerWidth", - "mask", - "maskcontentunits", - "maskContentUnits", - "maskunits", - "maskUnits", - "math", - "mathbackground", - "mathcolor", - "mathematical", - "mathsize", - "mathvariant", - "matrix", - "matrixrow", - "max", - "max-height", - "maxlength", - "maxsize", - "max-width", - "mean", - "media", - "median", - "mediummathspace", - "menclose", - "menu", - "menuitem", - "merror", - "message", - "messageevent", - "metadata", - "meter", - "method", - "mfenced", - "mfrac", - "mglyph", - "mi", - "min", - "min-height", - "minlength", - "minsize", - "minus", - "min-width", - "missing-glyph", - "miter", - "mlabeledtr", - "mmultiscripts", - "mn", - "mo", - "mode", - "moment", - "momentabout", - "month", - "mousedown", - "mouseevent", - "mouseevents", - "mouseover", - "mouseup", - "movablelimits", - "mover", - "mozbrowser", - "mpadded", - "mpath", - "mphantom", - "mprescripts", - "mroot", - "mrow", - "ms", - "mspace", - "msqrt", - "mstyle", - "msub", - "msubsup", - "msup", - "mtable", - "mtd", - "mtext", - "mtr", - "multicol", - "multipart/form-data", - "multiple", - "munder", - "munderover", - "name", - "nargs", - "naturalnumbers", - "nav", - "neq", - "nest", - "nextid", - "no message", - "no-referrer", - "no-referrer-when-downgrade", - "nobr", - "noembed", - "nohref", - "none", - "noresize", - "noshade", - "not", - "notanumber", - "notation", - "notin", - "notprsubset", - "notsubset", - "novalidate", - "nowrap", - "number", - "numoctaves", - "numOctaves", - "occurrence", - "off", - "offset", - "ol", - "on", - "onabort", - "onactivate", - "onafterprint", - "onafterupdate", - "onbefordeactivate", - "onbeforeactivate", - "onbeforecopy", - "onbeforecut", - "onbeforeeditfocus", - "onbeforepaste", - "onbeforeprint", - "onbeforeunload", - "onbeforeupdate", - "onbegin", - "onblur", - "onbounce", - "oncellchange", - "onchange", - "onclick", - "oncontextmenu", - "oncontrolselect", - "oncopy", - "oncut", - "ondataavailable", - "ondatasetchanged", - "ondatasetcomplete", - "ondblclick", - "ondeactivate", - "ondrag", - "ondragdrop", - "ondragend", - "ondragenter", - "ondragleave", - "ondragover", - "ondragstart", - "ondrop", - "onend", - "onerror", - "onerrorupdate", - "onfilterchange", - "onfinish", - "onfocus", - "onfocusin", - "onfocusout", - "onformchange", - "onforminput", - "onhashchange", - "onhelp", - "oninput", - "oninvalid", - "onkeydown", - "onkeypress", - "onkeyup", - "onlanguagechange", - "onload", - "onlosecapture", - "onmessage", - "onmousedown", - "onmouseenter", - "onmouseleave", - "onmousemove", - "onmouseout", - "onmouseover", - "onmouseup", - "onmousewheel", - "onmove", - "onmoveend", - "onmovestart", - "onoffline", - "ononline", - "onpagehide", - "onpageshow", - "onpaste", - "onpopstate", - "onpropertychange", - "onreadystatechange", - "onrepeat", - "onreset", - "onresize", - "onrowenter", - "onrowexit", - "onrowsdelete", - "onrowsinserted", - "onscroll", - "onselect", - "onselectstart", - "onstatechange", - "onstart", - "onstop", - "onstorage", - "onsubmit", - "ontransitionend", - "onunload", - "onzoom", - "opacity", - "open", - "operator", - "optimum", - "or", - "order", - "orient", - "orientation", - "origin", - "origin-when-cross-origin", - "other", - "otherwise", - "outerproduct", - "outline", - "outline-color", - "outline-offset", - "outline-style", - "outline-width", - "output", - "overflow", - "overflow-wrap", - "overflow-x", - "overflow-y", - "overline-position", - "overline-thickness", - "p", - "padding", - "padding-bottom", - "padding-left", - "padding-right", - "padding-top", - "panose-1", - "partialdiff", - "password", - "path", - "pathlength", - "pathLength", - "pattern", - "patterncontentunits", - "patternContentUnits", - "patterntransform", - "patternTransform", - "patternunits", - "patternUnits", - "perspective", - "pi", - "piece", - "piecewise", - "ping", - "placeholder", - "plus", - "pointer-events", - "points", - "pointsatx", - "pointsAtX", - "pointsaty", - "pointsAtY", - "pointsatz", - "pointsAtZ", - "polygon", - "polyline", - "position", - "post", - "poster", - "power", - "prefetch", - "preservealpha", - "preserveAlpha", - "preserveaspectratio", - "preserveAspectRatio", - "primes", - "primitiveunits", - "primitiveUnits", - "product", - "profile", - "progress", - "prompt", - "prsubset", - "q", - "quotient", - "r", - "radialgradient", - "radialGradient", - "radio", - "radiogroup", - "radius", - "range", - "rationals", - "rb", - "readonly", - "readystatechange", - "real", - "reals", - "rect", - "referrerpolicy", - "refx", - "refX", - "refy", - "refY", - "rel", - "reln", - "rem", - "rendering-intent", - "repeat", - "repeatcount", - "repeatCount", - "repeatdur", - "repeatDur", - "repeat-max", - "repeat-min", - "repeat-start", - "repeat-template", - "replace", - "required", - "requiredextensions", - "requiredExtensions", - "requiredfeatures", - "requiredFeatures", - "reset", - "resize", - "restart", - "result", - "rev", - "right", - "role", - "root", - "rotate", - "round", - "rowalign", - "rowlines", - "rows", - "rowspacing", - "rowspan", - "rquote", - "rspace", - "rtc", - "ruby", - "rule", - "rules", - "rx", - "ry", - "s", - "samp", - "sandbox", - "scalarproduct", - "scale", - "scheme", - "scope", - "scoped", - "scriptlevel", - "scriptminsize", - "scriptsizemultiplier", - "scrolldelay", - "scrolling", - "sdev", - "seamless", - "search", - "sec", - "sech", - "section", - "seed", - "selected", - "selection", - "selector", - "semantics", - "sep", - "separator", - "separators", - "set", - "setdiff", - "shape", - "shape-rendering", - "show", - "sin", - "sinh", - "size", - "sizes", - "slope", - "small", - "solidcolor", - "space", - "spacer", - "spacing", - "span", - "specification", - "specularconstant", - "specularConstant", - "specularexponent", - "specularExponent", - "speed", - "spreadmethod", - "spreadMethod", - "square", - "src", - "srcdoc", - "standby", - "start", - "startoffset", - "startOffset", - "stddeviation", - "stdDeviation", - "stemh", - "stemv", - "step", - "stitchtiles", - "stitchTiles", - "stop", - "stop-color", - "stop-opacity", - "storage", - "stretchy", - "strike", - "strikethrough-position", - "strikethrough-thickness", - "string", - "stroke", - "stroke-dasharray", - "stroke-dashoffset", - "stroke-linecap", - "stroke-linejoin", - "stroke-miterlimit", - "stroke-opacity", - "stroke-width", - "strong", - "stylesheet", - "sub", - "submit", - "subscriptshift", - "subset", - "sum", - "summary", - "sup", - "superscriptshift", - "surfacescale", - "surfaceScale", - "switch", - "symbol", - "symmetric", - "systemlanguage", - "systemLanguage", - "tabindex", - "table-layout", - "tablevalues", - "tableValues", - "tan", - "tanh", - "target", - "targetx", - "targetX", - "targety", - "targetY", - "tbreak", - "tel", - "tendsto", - "text", - "text-align", - "text-anchor", - "text-decoration", - "text-indent", - "text-orientation", - "text-rendering", - "text-shadow", - "text/plain", - "textlength", - "textLength", - "textpath", - "textPath", - "thickmathspace", - "thinmathspace", - "time", - "times", - "to", - "top", - "touchevent", - "transform", - "transform-origin", - "transition-delay", - "transition-duration", - "transition-property", - "transition-timing-function", - "transitionend", - "transitions", - "transpose", - "tref", - "true", - "tspan", - "tt", - "type", - "u", - "u1", - "u2", - "uievent", - "uievents", - "ul", - "underline-position", - "underline-thickness", - "unicode", - "unicode-bidi", - "unicode-range", - "union", - "units-per-em", - "unsafe-url", - "unselectable", - "uplimit", - "url", - "use", - "use-credentials", - "usemap", - "UTF-8", - "valign", - "v-alphabetic", - "value", - "values", - "valuetype", - "var", - "variance", - "vector", - "vectorproduct", - "version", - "vert-adv-y", - "vertical-align", - "vert-origin-x", - "vert-origin-y", - "verythickmathspace", - "verythinmathspace", - "veryverythickmathspace", - "veryverythinmathspace", - "v-hanging", - "video", - "v-ideographic", - "view", - "viewbox", - "viewBox", - "viewtarget", - "viewTarget", - "visibility", - "vkern", - "vlink", - "v-mathematical", - "vspace", - "wbr", - "webglcontextcreationerror", - "week", - "when", - "white-space", - "width", - "widths", - "word-spacing", - "word-wrap", - "wrap", - "writing-mode", - "x", - "x1", - "x2", - "xchannelselector", - "xChannelSelector", - "x-height", - "xlink", - "xlink:actuate", - "xlink:arcrole", - "xlink:href", - "xlink:role", - "xlink:show", - "xlink:title", - "xlink:type", - "xml:base", - "xml:lang", - "xmlns", - "xmlns:xlink", - "xml:space", - "xor", - "xref", - "y", - "y1", - "y2", - "ychannelselector", - "yChannelSelector", - "z", - "z-index", - "zoomandpan", - "zoomAndPan", -]; From 06b2116e3b5fba398881c249fb1df2e959514eba Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 21:02:46 +0200 Subject: [PATCH 086/212] Make the Default impl use a static atom. --- src/atom.rs | 17 ++++++++++++++++- string-cache-codegen/lib.rs | 23 +++++++++++++++-------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 64124cb..1b12148 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -170,6 +170,7 @@ impl StringCache { pub trait StaticAtomSet { fn get() -> &'static PhfStrSet; + fn empty_string_index() -> u32; } pub struct PhfStrSet { @@ -191,6 +192,10 @@ impl StaticAtomSet for EmptyStaticAtomSet { }; &SET } + + fn empty_string_index() -> u32 { + 0 + } } /// Use this if you don’t care about static atoms. @@ -231,8 +236,12 @@ impl Atom { } impl Default for Atom { + #[inline] fn default() -> Self { - Self::from("") + Atom { + unsafe_data: pack_static(Static::empty_string_index()), + phantom: PhantomData + } } } @@ -663,6 +672,12 @@ mod tests { assert!(i0 != d0); } + #[test] + fn default() { + assert_eq!(TestAtom::default(), test_atom!("")); + assert_eq!(&*TestAtom::default(), ""); + } + #[test] fn ord() { fn check(x: &str, y: &str) { diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index c0ac564..eadd445 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -46,13 +46,11 @@ impl AtomType { /// // Expands to: $crate::foo::FooAtom { … } /// } pub fn new(path: &str, macro_name: &str) -> Self { - let mut set = HashSet::new(); - set.insert(String::new()); // rust-phf requires a non-empty set - assert!(macro_name.ends_with('!')); + assert!(macro_name.ends_with("!")); AtomType { path: path.to_owned(), - macro_name: macro_name[..macro_name.len() - 1].to_owned(), - atoms: set, + macro_name: macro_name[..macro_name.len() - "!".len()].to_owned(), + atoms: HashSet::new(), } } @@ -70,10 +68,16 @@ impl AtomType { } /// Write generated code to `destination`. - pub fn write_to(&self, mut destination: W) -> io::Result<()> where W: Write { + pub fn write_to(&mut self, mut destination: W) -> io::Result<()> where W: Write { + // `impl Default for Atom` requires the empty string to be in the static set. + // This also makes sure the set in non-empty, + // which would cause divisions by zero in rust-phf. + self.atoms.insert(String::new()); + let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); let hash_state = phf_generator::generate_hash(&atoms); let atoms: Vec<&str> = hash_state.map.iter().map(|&idx| atoms[idx]).collect(); + let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap(); let type_name = if let Some(position) = self.path.rfind("::") { &self.path[position + "::".len() ..] @@ -90,12 +94,15 @@ impl AtomType { w!("impl ::string_cache::StaticAtomSet for {}StaticSet {{", type_name); w!(" fn get() -> &'static ::string_cache::PhfStrSet {{"); w!(" static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet {{"); - w!(" key: {:#?},", hash_state.key); + w!(" key: {},", hash_state.key); w!(" disps: &{:?},", hash_state.disps); w!(" atoms: &{:#?},", atoms); w!(" }};"); w!(" &SET"); w!(" }}"); + w!(" fn empty_string_index() -> u32 {{"); + w!(" {}", empty_string_index); + w!(" }}"); w!("}}"); w!("#[macro_export]"); w!("macro_rules! {} {{", self.macro_name); @@ -114,7 +121,7 @@ impl AtomType { /// /// Typical usage: /// `.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))` - pub fn write_to_file(&self, path: &Path) -> io::Result<()> { + pub fn write_to_file(&mut self, path: &Path) -> io::Result<()> { self.write_to(BufWriter::new(try!(File::create(path)))) } } From 1d256d0c3a6be5dbcd6e9b5188743e2154ed1680 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 27 Oct 2016 17:42:10 +0200 Subject: [PATCH 087/212] Add usage example in README. --- README.md | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/README.md b/README.md index 6d020ac..d244f69 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,74 @@ [Documentation](https://docs.rs/string_cache/) A string interning library for Rust, developed as part of the [Servo](https://github.com/servo/servo) project. + +## Simple usage + +In `Cargo.toml`: + +```toml +[dependencies] +string_cache = "0.3" +``` + +In `lib.rs`: + +```rust +extern crate string_cache; +use string_cache::DefaultAtom as Atom; +``` + +## With static atoms + +In `Cargo.toml`: + +```toml +[package] +build = "build.rs" + +[dependencies] +string_cache = "0.3" + +[build-dependencies] +string_cache_codegen = "0.3" +``` + +In `build.rs`: + +```rust +extern crate string_cache_codegen; + +use std::env; +use std::path::Path; + +fn main() { + string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!") + .atoms(&["foo", "bar"]) + .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs")) + .unwrap() +} +``` + +In `lib.rs`: + +```rust +extern crate string_cache; + +mod foo { + include!(concat!(env!("OUT_DIR"), "/foo_atom.rs")); +} +``` + +The generated code will define a `FooAtom` type and a `foo_atom!` macro. +The macro can be used in expression or patterns, with strings listed in `build.rs`. +For example: + +```rust +fn compute_something(input: &foo::FooAtom) -> u32 { + match *input { + foo_atom!("foo") => 1, + foo_atom!("bar") => 2, + _ => 3, + } +} +``` From 4528d77fe46ce41df019d851e9d50af18fb94963 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 28 Oct 2016 17:49:42 +0200 Subject: [PATCH 088/212] Note in toml files to also update README --- Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8443c73..efbf981 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.3.0" +version = "0.3.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 7e721a2..c829d64 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.3.0" +version = "0.3.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" From 122d793bbbd85e2a8b6532f4606a3014afdea780 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 2 Nov 2016 14:44:17 +0100 Subject: [PATCH 089/212] Cargo insists on no shared files between crates. --- Cargo.toml | 1 + shared/Cargo.toml | 11 +++++++++++ string-cache-codegen/shared.rs => shared/lib.rs | 0 src/lib.rs | 4 +--- string-cache-codegen/Cargo.toml | 3 ++- string-cache-codegen/lib.rs | 4 +--- 6 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 shared/Cargo.toml rename string-cache-codegen/shared.rs => shared/lib.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index efbf981..ae233cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ phf_shared = "0.7.4" debug_unreachable = "0.1.1" rustc-serialize = { version = "0.3", optional = true } heapsize = { version = "0.3", optional = true } +string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] rand = "0.3" diff --git a/shared/Cargo.toml b/shared/Cargo.toml new file mode 100644 index 0000000..6f8286d --- /dev/null +++ b/shared/Cargo.toml @@ -0,0 +1,11 @@ +[package] + +name = "string_cache_shared" +version = "0.3.0" +authors = [ "The Servo Project Developers" ] +description = "Code share between string_cache and string_cache_codegen." +license = "MIT / Apache-2.0" +repository = "https://github.com/servo/string-cache" + +[lib] +path = "lib.rs" diff --git a/string-cache-codegen/shared.rs b/shared/lib.rs similarity index 100% rename from string-cache-codegen/shared.rs rename to shared/lib.rs diff --git a/src/lib.rs b/src/lib.rs index ed72634..6229344 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ #[macro_use] extern crate debug_unreachable; extern crate serde; extern crate phf_shared; +extern crate string_cache_shared as shared; pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; @@ -30,9 +31,6 @@ pub mod event; pub mod atom; -#[path = "../string-cache-codegen/shared.rs"] -mod shared; - // Make test_atom! macro work in this crate. // `$crate` would not be appropriate for other crates creating such macros mod string_cache { diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index c829d64..710aa15 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.3.0" # Also update ../README.md when making a semver-breaking change +version = "0.3.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -13,4 +13,5 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] +string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index eadd445..f16beed 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -8,15 +8,13 @@ // except according to those terms. extern crate phf_generator; +extern crate string_cache_shared as shared; use std::collections::HashSet; use std::fs::File; use std::io::{self, Write, BufWriter}; use std::path::Path; -#[allow(dead_code)] -mod shared; - /// A builder for a static atom set and relevant macros pub struct AtomType { path: String, From ae2e6547fc7387ec5a2d228a5578e6b99332253b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 25 Nov 2016 14:20:08 +0100 Subject: [PATCH 090/212] Use the quote crate for code generation --- string-cache-codegen/Cargo.toml | 1 + string-cache-codegen/lib.rs | 80 ++++++++++++++++++++------------- 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 710aa15..b4fd102 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -15,3 +15,4 @@ path = "lib.rs" [dependencies] string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" +quote = "0.3.9" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index f16beed..1505a37 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -9,10 +9,12 @@ extern crate phf_generator; extern crate string_cache_shared as shared; +#[macro_use] extern crate quote; use std::collections::HashSet; use std::fs::File; use std::io::{self, Write, BufWriter}; +use std::iter; use std::path::Path; /// A builder for a static atom set and relevant macros @@ -67,6 +69,18 @@ impl AtomType { /// Write generated code to `destination`. pub fn write_to(&mut self, mut destination: W) -> io::Result<()> where W: Write { + destination.write_all( + self.to_tokens() + .as_str() + // Insert some newlines to make the generated code slightly easier to read. + .replace(" [ \"", "[\n\"") + .replace("\" , ", "\",\n") + .replace(" ( \"", "\n( \"") + .replace("; ", ";\n") + .as_bytes()) + } + + fn to_tokens(&mut self) -> quote::Tokens { // `impl Default for Atom` requires the empty string to be in the static set. // This also makes sure the set in non-empty, // which would cause divisions by zero in rust-phf. @@ -74,45 +88,49 @@ impl AtomType { let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); let hash_state = phf_generator::generate_hash(&atoms); - let atoms: Vec<&str> = hash_state.map.iter().map(|&idx| atoms[idx]).collect(); - let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap(); + let phf_generator::HashState { key, disps, map } = hash_state; + let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); + let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; + let data = (0..atoms.len()).map(|i| quote::Hex(shared::pack_static(i as u32))); let type_name = if let Some(position) = self.path.rfind("::") { &self.path[position + "::".len() ..] } else { &self.path }; + let static_set_name = quote::Ident::from(format!("{}StaticSet", type_name)); + let type_name = quote::Ident::from(type_name); + let macro_name = quote::Ident::from(&*self.macro_name); + let path = iter::repeat(quote::Ident::from(&*self.path)); - macro_rules! w { - ($($arg: expr),+) => { try!(writeln!(destination, $($arg),+)) } - } - - w!("pub type {} = ::string_cache::Atom<{}StaticSet>;", type_name, type_name); - w!("pub struct {}StaticSet;", type_name); - w!("impl ::string_cache::StaticAtomSet for {}StaticSet {{", type_name); - w!(" fn get() -> &'static ::string_cache::PhfStrSet {{"); - w!(" static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet {{"); - w!(" key: {},", hash_state.key); - w!(" disps: &{:?},", hash_state.disps); - w!(" atoms: &{:#?},", atoms); - w!(" }};"); - w!(" &SET"); - w!(" }}"); - w!(" fn empty_string_index() -> u32 {{"); - w!(" {}", empty_string_index); - w!(" }}"); - w!("}}"); - w!("#[macro_export]"); - w!("macro_rules! {} {{", self.macro_name); - for (i, atom) in atoms.iter().enumerate() { - w!("({:?}) => {{ $crate::{} {{ unsafe_data: 0x{:x}, phantom: ::std::marker::PhantomData }} }};", - atom, - self.path, - shared::pack_static(i as u32) - ); + quote! { + pub type #type_name = ::string_cache::Atom<#static_set_name>; + pub struct #static_set_name; + impl ::string_cache::StaticAtomSet for #static_set_name { + fn get() -> &'static ::string_cache::PhfStrSet { + static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet { + key: #key, + disps: &#disps, + atoms: &#atoms, + }; + &SET + } + fn empty_string_index() -> u32 { + #empty_string_index + } + } + #[macro_export] + macro_rules! #macro_name { + #( + (#atoms) => { + $crate::#path { + unsafe_data: #data, + phantom: ::std::marker::PhantomData, + } + }; + )* + } } - w!("}}"); - Ok(()) } /// Create a new file at `path` and write generated code there. From b305af0b62c46ec2384faf403330f430933943f2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 26 Nov 2016 13:08:34 +0100 Subject: [PATCH 091/212] Fix examples build --- examples/summarize-events/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index 70ab6be..a74e659 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -12,7 +12,7 @@ extern crate string_cache; extern crate rustc_serialize; extern crate phf_shared; -#[path = "../../../string-cache-codegen/shared.rs"] +#[path = "../../../shared/lib.rs"] #[allow(dead_code)] mod shared; From cf2302533faa11679b8ff6179d21c67c90506884 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 1 Feb 2017 14:56:13 +0100 Subject: [PATCH 092/212] Remove heap_size feature --- Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ae233cd..c84a746 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,9 +27,6 @@ log-events = ["rustc-serialize"] # Use unstable features to optimize space and time (memory and CPU usage). unstable = [] -# HeapSizeOf support -heap_size = ["heapsize"] - [dependencies] lazy_static = "0.2" serde = "0.8" From 6db3edbe30c760d9c8d44b1fb6ea26f77a4e3259 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Thu, 16 Feb 2017 18:29:34 +0100 Subject: [PATCH 093/212] Silence a warning --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 6229344..b7961c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ #[cfg(all(test, feature = "unstable"))] extern crate test; #[cfg(feature = "log-events")] extern crate rustc_serialize; -#[cfg(feature = "heapsize")] #[macro_use] extern crate heapsize; +#[cfg(feature = "heapsize")] extern crate heapsize; #[cfg(test)] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; From 9e3c85c3ad3441629f88434ca1a290406a6bf797 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 1 Feb 2017 14:57:05 +0100 Subject: [PATCH 094/212] Remove Atom::with_str --- src/atom.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 1b12148..b03c5ac 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -228,11 +228,6 @@ impl Atom { pub fn get_hash(&self) -> u32 { ((self.unsafe_data >> 32) ^ self.unsafe_data) as u32 } - - pub fn with_str(&self, cb: F) -> Output - where F: FnOnce(&str) -> Output { - cb(self) - } } impl Default for Atom { From 1e5790b6a96b8b8c9aaed515a3d4b1abe6c51ebb Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 1 Feb 2017 14:58:53 +0100 Subject: [PATCH 095/212] Update serde to 0.9 --- Cargo.toml | 2 +- src/atom.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c84a746..94f8891 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ unstable = [] [dependencies] lazy_static = "0.2" -serde = "0.8" +serde = "0.9" phf_shared = "0.7.4" debug_unreachable = "0.1.1" rustc-serialize = { version = "0.3", optional = true } diff --git a/src/atom.rs b/src/atom.rs index b03c5ac..25e258c 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -426,16 +426,16 @@ impl AsRef for Atom { } impl Serialize for Atom { - fn serialize(&self, serializer: &mut S) -> Result<(), S::Error> where S: Serializer { + fn serialize(&self, serializer: S) -> Result where S: Serializer { let string: &str = self.as_ref(); string.serialize(serializer) } } impl Deserialize for Atom { - fn deserialize(deserializer: &mut D) -> Result where D: Deserializer { + fn deserialize(deserializer: D) -> Result where D: Deserializer { let string: String = try!(Deserialize::deserialize(deserializer)); - Ok(Atom::from(&*string)) + Ok(Atom::from(string)) } } From c39ce5849ae75669a83432a6a0765af4ef7ff9e1 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 17 Feb 2017 00:52:33 +0100 Subject: [PATCH 096/212] Disable macOS builds Travis is backlogged into oblivion. --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b7ee145..05ea9e7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,6 @@ rust: - stable os: - linux - - osx script: - cargo build - cargo test From 0ec0bfec9fe533250308ca6d4e9b1ec5a9f39906 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 1 Feb 2017 15:01:39 +0100 Subject: [PATCH 097/212] Bump version to 0.4.0 --- Cargo.toml | 2 +- README.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 94f8891..6bee177 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.3.0" # Also update README.md when making a semver-breaking change +version = "0.4.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/README.md b/README.md index d244f69..43cf6fb 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.3" +string_cache = "0.4" ``` In `lib.rs`: @@ -31,7 +31,7 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.3" +string_cache = "0.4" [build-dependencies] string_cache_codegen = "0.3" From f7ce84308369359362a4e59caa7d563bc99fddb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Tue, 21 Mar 2017 22:38:04 +0100 Subject: [PATCH 098/212] Expose the precomputed hash using a trait so that I can use it from rust-selectors. This allows us to get rid of the extra hashing overhead every time we check the bloom filter. --- Cargo.toml | 1 + src/atom.rs | 28 +++++++++++++++++++++++++++- src/lib.rs | 3 ++- string-cache-codegen/Cargo.toml | 1 + string-cache-codegen/lib.rs | 10 ++++++++++ 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6bee177..2d7b3a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ log-events = ["rustc-serialize"] unstable = [] [dependencies] +precomputed-hash = "0.1" lazy_static = "0.2" serde = "0.9" phf_shared = "0.7.4" diff --git a/src/atom.rs b/src/atom.rs index 25e258c..32542b7 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -177,6 +177,7 @@ pub struct PhfStrSet { pub key: u64, pub disps: &'static [(u32, u32)], pub atoms: &'static [&'static str], + pub hashes: &'static [u32], } pub struct EmptyStaticAtomSet; @@ -189,6 +190,8 @@ impl StaticAtomSet for EmptyStaticAtomSet { key: 0, disps: &[(0, 0)], atoms: &[""], + // "" SipHash'd, and xored with u64_hash_to_u32. + hashes: &[0x3ddddef3], }; &SET } @@ -219,6 +222,17 @@ impl HeapSizeOf for Atom { } } +impl ::precomputed_hash::PrecomputedHash for Atom { + fn precomputed_hash(&self) -> u32 { + self.get_hash() + } +} + +fn u64_hash_as_u32(h: u64) -> u32 { + // This may or may not be great... + ((h >> 32) ^ h) as u32 +} + impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { @@ -226,7 +240,19 @@ impl Atom { } pub fn get_hash(&self) -> u32 { - ((self.unsafe_data >> 32) ^ self.unsafe_data) as u32 + match unsafe { self.unpack() } { + Static(index) => { + let static_set = Static::get(); + static_set.hashes[index as usize] + } + Dynamic(entry) => { + let entry = entry as *mut StringCacheEntry; + u64_hash_as_u32(unsafe { (*entry).hash }) + } + Inline(..) => { + u64_hash_as_u32(self.unsafe_data) + } + } } } diff --git a/src/lib.rs b/src/lib.rs index b7961c3..16f531c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,8 +19,9 @@ #[cfg(test)] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; -extern crate serde; extern crate phf_shared; +extern crate precomputed_hash; +extern crate serde; extern crate string_cache_shared as shared; pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index b4fd102..a81644c 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -15,4 +15,5 @@ path = "lib.rs" [dependencies] string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" +phf_shared = "0.7.4" quote = "0.3.9" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 1505a37..dabd0bb 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -7,7 +7,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +#![recursion_limit = "128"] + extern crate phf_generator; +extern crate phf_shared; extern crate string_cache_shared as shared; #[macro_use] extern crate quote; @@ -93,6 +96,12 @@ impl AtomType { let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; let data = (0..atoms.len()).map(|i| quote::Hex(shared::pack_static(i as u32))); + let hashes: Vec = + atoms.iter().map(|string| { + let hash = phf_shared::hash(string, key); + ((hash >> 32) ^ hash) as u32 + }).collect(); + let type_name = if let Some(position) = self.path.rfind("::") { &self.path[position + "::".len() ..] } else { @@ -112,6 +121,7 @@ impl AtomType { key: #key, disps: &#disps, atoms: &#atoms, + hashes: &#hashes }; &SET } From 8518b44e00bc3b8038f0968dfa57d1f49e3246fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Fri, 7 Apr 2017 12:32:59 +0200 Subject: [PATCH 099/212] Version bump. --- Cargo.toml | 7 +++---- README.md | 6 +++--- src/event.rs | 27 --------------------------- src/lib.rs | 1 - string-cache-codegen/Cargo.toml | 2 +- 5 files changed, 7 insertions(+), 36 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2d7b3a5..5450a37 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.4.0" # Also update README.md when making a semver-breaking change +version = "0.5.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -22,7 +22,7 @@ name = "string_cache" # Enable event logging for generating benchmark traces. # See examples/event-log. -log-events = ["rustc-serialize"] +log-events = [] # Use unstable features to optimize space and time (memory and CPU usage). unstable = [] @@ -33,7 +33,6 @@ lazy_static = "0.2" serde = "0.9" phf_shared = "0.7.4" debug_unreachable = "0.1.1" -rustc-serialize = { version = "0.3", optional = true } heapsize = { version = "0.3", optional = true } string_cache_shared = {path = "./shared", version = "0.3"} @@ -41,4 +40,4 @@ string_cache_shared = {path = "./shared", version = "0.3"} rand = "0.3" [build-dependencies] -string_cache_codegen = { version = "0.3", path = "./string-cache-codegen" } +string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } diff --git a/README.md b/README.md index 43cf6fb..5b58e51 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.4" +string_cache = "0.5" ``` In `lib.rs`: @@ -31,10 +31,10 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.4" +string_cache = "0.5" [build-dependencies] -string_cache_codegen = "0.3" +string_cache_codegen = "0.4" ``` In `build.rs`: diff --git a/src/event.rs b/src/event.rs index 79af4a1..1b777d3 100644 --- a/src/event.rs +++ b/src/event.rs @@ -8,7 +8,6 @@ // except according to those terms. use std::sync::Mutex; -use rustc_serialize::{Encoder, Encodable}; #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Debug)] pub enum Event { @@ -27,29 +26,3 @@ pub fn log(e: Event) { } macro_rules! log (($e:expr) => (::event::log($e))); - -// Serialize by converting to this private struct, -// which produces more convenient output. - -#[derive(RustcEncodable)] -struct SerializeEvent<'a> { - event: &'static str, - id: u64, - string: Option<&'a String>, -} - -impl Encodable for Event { - fn encode(&self, s: &mut S) -> Result<(), S::Error> { - let (event, id, string) = match *self { - Event::Intern(id) => ("intern", id, None), - Event::Insert(id, ref s) => ("insert", id, Some(s)), - Event::Remove(id) => ("remove", id, None), - }; - - SerializeEvent { - event: event, - id: id, - string: string - }.encode(s) - } -} diff --git a/src/lib.rs b/src/lib.rs index 16f531c..447a399 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,6 @@ #![cfg_attr(all(test, feature = "unstable"), feature(test))] #[cfg(all(test, feature = "unstable"))] extern crate test; -#[cfg(feature = "log-events")] extern crate rustc_serialize; #[cfg(feature = "heapsize")] extern crate heapsize; #[cfg(test)] extern crate rand; #[macro_use] extern crate lazy_static; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index a81644c..14b93a6 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.3.1" # Also update ../README.md when making a semver-breaking change +version = "0.4.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" From 1dff0d8f1d5b1d0b347113827ce0e8988de25c65 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 1 May 2017 19:53:23 +0200 Subject: [PATCH 100/212] Make to_ascii_{upper,lower}case more efficent. * Use `From for Atom` * Use the fast path with non-letters too --- Cargo.toml | 2 +- src/atom.rs | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5450a37..af58a60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.5.0" # Also update README.md when making a semver-breaking change +version = "0.5.1" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom.rs b/src/atom.rs index 32542b7..9a92e5f 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -470,19 +470,21 @@ impl Deserialize for Atom { // over the one from &str. impl Atom { pub fn to_ascii_uppercase(&self) -> Self { - if self.chars().all(char::is_uppercase) { - self.clone() - } else { - Atom::from(&*((&**self).to_ascii_uppercase())) + for b in self.bytes() { + if let b'a' ... b'z' = b { + return Atom::from((&**self).to_ascii_uppercase()) + } } + self.clone() } pub fn to_ascii_lowercase(&self) -> Self { - if self.chars().all(char::is_lowercase) { - self.clone() - } else { - Atom::from(&*((&**self).to_ascii_lowercase())) + for b in self.bytes() { + if let b'A' ... b'Z' = b { + return Atom::from((&**self).to_ascii_lowercase()) + } } + self.clone() } pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { From 287754ac87b2289f9a27621c30b25a408cd9ad4f Mon Sep 17 00:00:00 2001 From: Ashley Mannix Date: Mon, 22 May 2017 21:16:16 +1000 Subject: [PATCH 101/212] update to serde 1.0 --- Cargo.toml | 2 +- src/atom.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index af58a60..fdef822 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ unstable = [] [dependencies] precomputed-hash = "0.1" lazy_static = "0.2" -serde = "0.9" +serde = "1" phf_shared = "0.7.4" debug_unreachable = "0.1.1" heapsize = { version = "0.3", optional = true } diff --git a/src/atom.rs b/src/atom.rs index 9a92e5f..b96c73c 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -458,8 +458,8 @@ impl Serialize for Atom { } } -impl Deserialize for Atom { - fn deserialize(deserializer: D) -> Result where D: Deserializer { +impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { + fn deserialize(deserializer: D) -> Result where D: Deserializer<'a> { let string: String = try!(Deserialize::deserialize(deserializer)); Ok(Atom::from(string)) } From 28bce79df00dbcd196d1f322ade9bca5e957fd17 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Sun, 28 May 2017 23:25:56 +0200 Subject: [PATCH 102/212] Allow heapsize 0.4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index af58a60..c7d1492 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ lazy_static = "0.2" serde = "0.9" phf_shared = "0.7.4" debug_unreachable = "0.1.1" -heapsize = { version = "0.3", optional = true } +heapsize = { version = ">= 0.3, < 0.5", optional = true } string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] From 830835a047fb07052634a1a0a0043bd575a3fc70 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Jun 2017 12:54:03 +0200 Subject: [PATCH 103/212] Avoid some allocations in to_ascii_{upper,lower}case --- Cargo.toml | 2 +- src/atom.rs | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c7d1492..c9f61ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.5.1" # Also update README.md when making a semver-breaking change +version = "0.5.2" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom.rs b/src/atom.rs index 9a92e5f..5012227 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -469,19 +469,37 @@ impl Deserialize for Atom { // We don't need to implement is_ascii because there's no performance improvement // over the one from &str. impl Atom { + fn from_mutated_str(s: &str, f: F) -> Self { + let mut buffer: [u8; 64] = unsafe { mem::uninitialized() }; + if let Some(buffer_prefix) = buffer.get_mut(..s.len()) { + buffer_prefix.copy_from_slice(s.as_bytes()); + // FIXME: use from std::str when stable https://github.com/rust-lang/rust/issues/41119 + pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { + mem::transmute(v) + } + let as_str = unsafe { from_utf8_unchecked_mut(buffer_prefix) }; + f(as_str); + Atom::from(&*as_str) + } else { + let mut string = s.to_owned(); + f(&mut string); + Atom::from(string) + } + } + pub fn to_ascii_uppercase(&self) -> Self { - for b in self.bytes() { + for (i, b) in self.bytes().enumerate() { if let b'a' ... b'z' = b { - return Atom::from((&**self).to_ascii_uppercase()) + return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase()) } } self.clone() } pub fn to_ascii_lowercase(&self) -> Self { - for b in self.bytes() { + for (i, b) in self.bytes().enumerate() { if let b'A' ... b'Z' = b { - return Atom::from((&**self).to_ascii_lowercase()) + return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase()) } } self.clone() From 1ae39ead7892aaa1365eb4f1e622abb8607842c2 Mon Sep 17 00:00:00 2001 From: Ashley Mannix Date: Mon, 22 May 2017 21:16:16 +1000 Subject: [PATCH 104/212] update to serde 1.0 --- Cargo.toml | 2 +- src/atom.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c9f61ee..d1e797a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ unstable = [] [dependencies] precomputed-hash = "0.1" lazy_static = "0.2" -serde = "0.9" +serde = "1" phf_shared = "0.7.4" debug_unreachable = "0.1.1" heapsize = { version = ">= 0.3, < 0.5", optional = true } diff --git a/src/atom.rs b/src/atom.rs index 5012227..9adc4da 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -458,8 +458,8 @@ impl Serialize for Atom { } } -impl Deserialize for Atom { - fn deserialize(deserializer: D) -> Result where D: Deserializer { +impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { + fn deserialize(deserializer: D) -> Result where D: Deserializer<'a> { let string: String = try!(Deserialize::deserialize(deserializer)); Ok(Atom::from(string)) } From cf74a86c66b2d5469e119b211be9c77032ce52c7 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 16 Jun 2017 10:36:49 +0200 Subject: [PATCH 105/212] Serde update is a breaking change --- Cargo.toml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d1e797a..b5de7a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.5.2" # Also update README.md when making a semver-breaking change +version = "0.6.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/README.md b/README.md index 5b58e51..191c935 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.5" +string_cache = "0.6" ``` In `lib.rs`: From 83942971f4502a80a0449116434aef895d624732 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 7 Jul 2017 16:31:54 +0200 Subject: [PATCH 106/212] Remove RawSlice, support big-endian platforms --- src/atom.rs | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 9adc4da..66f8666 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -528,18 +528,31 @@ enum UnpackedAtom { Static(u32), } -struct RawSlice { - data: *const u8, - len: usize, +#[inline(always)] +fn inline_atom_slice(x: &u64) -> &[u8] { + unsafe { + let x: *const u64 = x; + let mut data = x as *const u8; + // All except the lowest byte, which is first in little-endian, last in big-endian. + if cfg!(target_endian = "little") { + data = data.offset(1); + } + let len = 7; + slice::from_raw_parts(data, len) + } } -#[cfg(target_endian = "little")] // Not implemented yet for big-endian #[inline(always)] -unsafe fn inline_atom_slice(x: &u64) -> RawSlice { - let x: *const u64 = x; - RawSlice { - data: (x as *const u8).offset(1), - len: 7, +fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { + unsafe { + let x: *mut u64 = x; + let mut data = x as *mut u8; + // All except the lowest byte, which is first in little-endian, last in big-endian. + if cfg!(target_endian = "little") { + data = data.offset(1); + } + let len = 7; + slice::from_raw_parts_mut(data, len) } } @@ -557,9 +570,7 @@ impl UnpackedAtom { debug_assert!((len as usize) <= MAX_INLINE_LEN); let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { - let raw_slice = inline_atom_slice(&mut data); - let dest: &mut [u8] = slice::from_raw_parts_mut( - raw_slice.data as *mut u8, raw_slice.len); + let dest = inline_atom_slice_mut(&mut data); copy_memory(&buf[..], dest); } data @@ -578,8 +589,7 @@ impl UnpackedAtom { let len = ((data & 0xf0) >> 4) as usize; debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; - let raw_slice = inline_atom_slice(&data); - let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); + let src = inline_atom_slice(&data); copy_memory(src, &mut buf[..]); Inline(len as u8, buf) }, @@ -606,8 +616,7 @@ unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { match UnpackedAtom::from_packed(*data) { Inline(len, _) => { - let raw_slice = inline_atom_slice(&data); - let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); + let src = inline_atom_slice(&data); &src[..(len as usize)] } _ => debug_unreachable!(), From c8ebbfb15bd1526918dfc5afae95cbf37ba9fd85 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 7 Jul 2017 16:36:36 +0200 Subject: [PATCH 107/212] Replace ad-hoc copy_memory function with [T]::copy_from_slice --- src/atom.rs | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 66f8666..e269391 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -23,7 +23,6 @@ use std::hash::{Hash, Hasher}; use std::marker::PhantomData; use std::mem; use std::ops; -use std::ptr; use std::slice; use std::str; use std::sync::Mutex; @@ -314,7 +313,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { let len = string_to_add.len(); if len <= MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; - copy_memory(string_to_add.as_bytes(), &mut buf); + buf[..len].copy_from_slice(string_to_add.as_bytes()); Inline(len as u8, buf) } else { Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) @@ -571,7 +570,7 @@ impl UnpackedAtom { let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { let dest = inline_atom_slice_mut(&mut data); - copy_memory(&buf[..], dest); + dest.copy_from_slice(&buf) } data } @@ -590,7 +589,7 @@ impl UnpackedAtom { debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; let src = inline_atom_slice(&data); - copy_memory(src, &mut buf[..]); + buf.copy_from_slice(src); Inline(len as u8, buf) }, _ => debug_unreachable!(), @@ -623,21 +622,6 @@ unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { } } - -/// Copy of std::slice::bytes::copy_memory, which is unstable. -#[inline] -fn copy_memory(src: &[u8], dst: &mut [u8]) { - let len_src = src.len(); - assert!(dst.len() >= len_src); - // `dst` is unaliasable, so we know statically it doesn't overlap - // with `src`. - unsafe { - ptr::copy_nonoverlapping(src.as_ptr(), - dst.as_mut_ptr(), - len_src); - } -} - #[cfg(test)] #[macro_use] mod tests { From 88d65e7d4331795e789758ee65c001e811dc8225 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 7 Jul 2017 19:06:34 +0200 Subject: [PATCH 108/212] v0.6.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b5de7a4..4410f9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.6.0" # Also update README.md when making a semver-breaking change +version = "0.6.1" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From c5585bd446ec98de76ef7b7c80e63d232368946d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Mon, 7 Aug 2017 12:43:59 +0200 Subject: [PATCH 109/212] atom: Use the same hash everywhere. This will allow me to write a "no-op" hasher that expects a single u32 in the style system, to avoid hashing the hash again. --- src/atom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index e269391..cb9f28a 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -268,7 +268,7 @@ impl Default for Atom { impl Hash for Atom { #[inline] fn hash(&self, state: &mut H) where H: Hasher { - self.unsafe_data.hash(state) + state.write_u32(self.get_hash()) } } From e468085594cdfa014db6f873b5b9c9901a22f6cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Mon, 7 Aug 2017 12:44:58 +0200 Subject: [PATCH 110/212] v0.6.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4410f9e..71f1444 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.6.1" # Also update README.md when making a semver-breaking change +version = "0.6.2" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From a955aba2b1d6ede679c34b022a6bc4c6560677ee Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 29 Aug 2017 03:13:51 +0200 Subject: [PATCH 111/212] Correct cfg for optional rand dependency --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 447a399..3bbfa2e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ #[cfg(all(test, feature = "unstable"))] extern crate test; #[cfg(feature = "heapsize")] extern crate heapsize; -#[cfg(test)] extern crate rand; +#[cfg(all(test, feature = "unstable"))] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; extern crate phf_shared; From 673f8ea4df010ccf4b5e94a2fa02bdf704b90598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Wed, 4 Oct 2017 23:34:02 +0200 Subject: [PATCH 112/212] Implement From<&Atom> for Atom. Needed to do more complex stuff in selectors with local names and namespace urls. --- src/atom.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/atom.rs b/src/atom.rs index cb9f28a..9790198 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -227,6 +227,12 @@ impl ::precomputed_hash::PrecomputedHash for Atom } } +impl<'a, Static: StaticAtomSet> From<&'a Atom> for Atom { + fn from(atom: &'a Self) -> Self { + atom.clone() + } +} + fn u64_hash_as_u32(h: u64) -> u32 { // This may or may not be great... ((h >> 32) ^ h) as u32 From 84ed420ba098bf35d12b1a75c70f040295550326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Wed, 4 Oct 2017 23:35:18 +0200 Subject: [PATCH 113/212] Bump version. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 71f1444..07366e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.6.2" # Also update README.md when making a semver-breaking change +version = "0.6.3" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From f48bdff01d34f833349325a2485f86472290556d Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 24 Oct 2017 11:19:36 +1100 Subject: [PATCH 114/212] Remove `heapsize` dependency. The heapsize crate is being deprecated in favour of the malloc_size_of crate within Servo. --- .travis.yml | 2 +- Cargo.toml | 3 +-- README.md | 4 ++-- src/atom.rs | 37 ------------------------------------- src/lib.rs | 1 - 5 files changed, 4 insertions(+), 43 deletions(-) diff --git a/.travis.yml b/.travis.yml index 05ea9e7..438e39d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ script: - cargo test - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - - cargo test --features heapsize + - cargo test - "cd string-cache-codegen/ && cargo build && cd .." - "cd examples/event-log/ && cargo build && cd ../.." - "cd examples/summarize-events/ && cargo build && cd ../.." diff --git a/Cargo.toml b/Cargo.toml index 07366e8..64ea6f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.6.3" # Also update README.md when making a semver-breaking change +version = "0.7.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -33,7 +33,6 @@ lazy_static = "0.2" serde = "1" phf_shared = "0.7.4" debug_unreachable = "0.1.1" -heapsize = { version = ">= 0.3, < 0.5", optional = true } string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] diff --git a/README.md b/README.md index 191c935..657f646 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.6" +string_cache = "0.7" ``` In `lib.rs`: @@ -31,7 +31,7 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.5" +string_cache = "0.7" [build-dependencies] string_cache_codegen = "0.4" diff --git a/src/atom.rs b/src/atom.rs index 9790198..10e9943 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -9,9 +9,6 @@ #![allow(non_upper_case_globals)] -#[cfg(feature = "heapsize")] -use heapsize::HeapSizeOf; - use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -46,28 +43,10 @@ struct StringCache { buckets: [Option>; NB_BUCKETS], } -#[cfg(feature = "heapsize")] -impl HeapSizeOf for StringCache { - fn heap_size_of_children(&self) -> usize { - self.buckets.iter().fold(0, |size, bucket| size + bucket.heap_size_of_children()) - } -} - lazy_static! { static ref STRING_CACHE: Mutex = Mutex::new(StringCache::new()); } -/// A token that represents the heap used by the dynamic string cache. -#[cfg(feature = "heapsize")] -pub struct StringCacheHeap; - -#[cfg(feature = "heapsize")] -impl HeapSizeOf for StringCacheHeap { - fn heap_size_of_children(&self) -> usize { - STRING_CACHE.lock().unwrap().heap_size_of_children() - } -} - struct StringCacheEntry { next_in_bucket: Option>, hash: u64, @@ -75,14 +54,6 @@ struct StringCacheEntry { string: Box, } -#[cfg(feature = "heapsize")] -impl HeapSizeOf for StringCacheEntry { - fn heap_size_of_children(&self) -> usize { - self.next_in_bucket.heap_size_of_children() + - self.string.heap_size_of_children() - } -} - impl StringCacheEntry { fn new(next: Option>, hash: u64, string: String) -> StringCacheEntry { @@ -213,14 +184,6 @@ pub struct Atom { pub phantom: PhantomData, } -#[cfg(feature = "heapsize")] -impl HeapSizeOf for Atom { - #[inline(always)] - fn heap_size_of_children(&self) -> usize { - 0 - } -} - impl ::precomputed_hash::PrecomputedHash for Atom { fn precomputed_hash(&self) -> u32 { self.get_hash() diff --git a/src/lib.rs b/src/lib.rs index 3bbfa2e..04890e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,6 @@ #![cfg_attr(all(test, feature = "unstable"), feature(test))] #[cfg(all(test, feature = "unstable"))] extern crate test; -#[cfg(feature = "heapsize")] extern crate heapsize; #[cfg(all(test, feature = "unstable"))] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; From 83bd6cb2a1dcb5574042452e51e6c84c3020c042 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 10 Nov 2017 07:41:55 +0100 Subject: [PATCH 115/212] Fixed denied "unused import" warning on Nigthly --- src/atom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index 10e9943..add3e55 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -12,7 +12,7 @@ use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::ascii::AsciiExt; +#[allow(unused_imports)] use std::ascii::AsciiExt; use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; From 8b33173f274bd943ad9a8751a0417952dca03bb5 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Wed, 22 Nov 2017 16:42:40 +0000 Subject: [PATCH 116/212] Allow the generated macros to have documentation --- string-cache-codegen/lib.rs | 76 ++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index dabd0bb..83a63a3 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -6,6 +6,65 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +//! A crate to create static string caches at compiletime. +//! +//! # Examples +//! +//! With static atoms: +//! +//! In `Cargo.toml`: +//! +//! ```toml +//! [package] +//! build = "build.rs" +//! +//! [dependencies] +//! string_cache = "0.7" +//! +//! [build-dependencies] +//! string_cache_codegen = "0.4" +//! ``` +//! +//! In `build.rs`: +//! +//! ```no_run +//! extern crate string_cache_codegen; +//! +//! use std::env; +//! use std::path::Path; +//! +//! fn main() { +//! string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!") +//! .atoms(&["foo", "bar"]) +//! .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs")) +//! .unwrap() +//! } +//! ``` +//! +//! In `lib.rs`: +//! +//! ```ignore +//! extern crate string_cache; +//! +//! mod foo { +//! include!(concat!(env!("OUT_DIR"), "/foo_atom.rs")); +//! } +//! ``` +//! +//! The generated code will define a `FooAtom` type and a `foo_atom!` macro. +//! The macro can be used in expression or patterns, with strings listed in `build.rs`. +//! For example: +//! +//! ```ignore +//! fn compute_something(input: &foo::FooAtom) -> u32 { +//! match *input { +//! foo_atom!("foo") => 1, +//! foo_atom!("bar") => 2, +//! _ => 3, +//! } +//! } +//! ``` +//! #![recursion_limit = "128"] @@ -24,6 +83,7 @@ use std::path::Path; pub struct AtomType { path: String, macro_name: String, + macro_doc: Option, atoms: HashSet, } @@ -38,7 +98,7 @@ impl AtomType { /// /// For example, `AtomType::new("foo::FooAtom", "foo_atom!")` will generate: /// - /// ```rust + /// ```ignore /// pub type FooAtom = ::string_cache::Atom; /// pub struct FooAtomStaticSet; /// impl ::string_cache::StaticAtomSet for FooAtomStaticSet { @@ -53,10 +113,19 @@ impl AtomType { AtomType { path: path.to_owned(), macro_name: macro_name[..macro_name.len() - "!".len()].to_owned(), + macro_doc: None, atoms: HashSet::new(), } } + /// Add some documentation to the generated macro. + /// + /// Note that `docs` should not contain the `///` at the front of normal docs. + pub fn with_macro_doc(&mut self, docs: &str) -> &mut Self { + self.macro_doc = Some(docs.to_owned()); + self + } + /// Adds an atom to the builder pub fn atom(&mut self, s: &str) -> &mut Self { self.atoms.insert(s.to_owned()); @@ -107,6 +176,10 @@ impl AtomType { } else { &self.path }; + let macro_doc = match self.macro_doc { + Some(ref doc) => quote!(#[doc = #doc]), + None => quote!() + }; let static_set_name = quote::Ident::from(format!("{}StaticSet", type_name)); let type_name = quote::Ident::from(type_name); let macro_name = quote::Ident::from(&*self.macro_name); @@ -129,6 +202,7 @@ impl AtomType { #empty_string_index } } + #macro_doc #[macro_export] macro_rules! #macro_name { #( From 8b38b6f3af0a3a473b12444bcd3add4b9e1e3a26 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Thu, 23 Nov 2017 23:06:41 +0000 Subject: [PATCH 117/212] Multiple changes - Add documentation to types & methods in atom.rs - Add ability to add documentation to generated types --- src/atom.rs | 44 ++++++++++++++++++++++++++++++++----- string-cache-codegen/lib.rs | 38 +++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index add3e55..c8b2f3c 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -138,11 +138,28 @@ impl StringCache { } } +/// A static `PhfStrSet` +/// +/// This trait is implemented by static sets of interned strings generated using +/// `string_cache_codegen`, and `EmptyStaticAtomSet` for when strings will be added dynamically. +/// +/// It is used by the methods of [`Atom`] to check if a string is present in the static set. +/// +/// [`Atom`]: struct.Atom.html pub trait StaticAtomSet { + /// Get the location of the static string set in the binary. fn get() -> &'static PhfStrSet; + /// Get the index of the empty string, which is in every set and is used for `Atom::default`. fn empty_string_index() -> u32; } +/// A string set created using a [perfect hash function], specifically +/// [Hash, Displace and Compress]. +/// +/// See the CHD document for the meaning of the struct fields. +/// +/// [perfect hash function]: https://en.wikipedia.org/wiki/Perfect_hash_function +/// [Hash, Displace and Compress]: http://cmph.sourceforge.net/papers/esa09.pdf pub struct PhfStrSet { pub key: u64, pub disps: &'static [(u32, u32)], @@ -150,6 +167,7 @@ pub struct PhfStrSet { pub hashes: &'static [u32], } +/// An empty static atom set for when only dynamic strings will be added pub struct EmptyStaticAtomSet; impl StaticAtomSet for EmptyStaticAtomSet { @@ -174,6 +192,10 @@ impl StaticAtomSet for EmptyStaticAtomSet { /// Use this if you don’t care about static atoms. pub type DefaultAtom = Atom; +/// Represents a string that has been interned. +/// +/// In reality this contains a complex packed datastructure and the methods to extract information +/// from it, along with type information to tell the compiler which static set it corresponds to. pub struct Atom { /// This field is public so that the `atom!()` macros can use it. /// You should not otherwise access this field. @@ -207,6 +229,7 @@ impl Atom { UnpackedAtom::from_packed(self.unsafe_data) } + /// Get the hash of the string as it is stored in the set. pub fn get_hash(&self) -> u32 { match unsafe { self.unpack() } { Static(index) => { @@ -441,11 +464,7 @@ impl Atom { let mut buffer: [u8; 64] = unsafe { mem::uninitialized() }; if let Some(buffer_prefix) = buffer.get_mut(..s.len()) { buffer_prefix.copy_from_slice(s.as_bytes()); - // FIXME: use from std::str when stable https://github.com/rust-lang/rust/issues/41119 - pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { - mem::transmute(v) - } - let as_str = unsafe { from_utf8_unchecked_mut(buffer_prefix) }; + let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) }; f(as_str); Atom::from(&*as_str) } else { @@ -455,6 +474,9 @@ impl Atom { } } + /// Like [`to_ascii_uppercase`]. + /// + /// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase pub fn to_ascii_uppercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { if let b'a' ... b'z' = b { @@ -464,6 +486,9 @@ impl Atom { self.clone() } + /// Like [`to_ascii_lowercase`]. + /// + /// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase pub fn to_ascii_lowercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { if let b'A' ... b'Z' = b { @@ -473,10 +498,16 @@ impl Atom { self.clone() } + /// Like [`eq_ignore_ascii_case`]. + /// + /// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { (self == other) || self.eq_str_ignore_ascii_case(&**other) } + /// Like [`eq_ignore_ascii_case`], but takes an unhashed string as `other`. + /// + /// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool { (&**self).eq_ignore_ascii_case(other) } @@ -525,6 +556,8 @@ fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { } impl UnpackedAtom { + /// Pack a key, fitting it into a u64 with flags and data. See `string_cache_shared` for + /// hints for the layout. #[inline(always)] unsafe fn pack(self) -> u64 { match self { @@ -546,6 +579,7 @@ impl UnpackedAtom { } } + /// Unpack a key, extracting information from a single u64 into useable structs. #[inline(always)] unsafe fn from_packed(data: u64) -> UnpackedAtom { debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 83a63a3..e92f8a0 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -82,6 +82,8 @@ use std::path::Path; /// A builder for a static atom set and relevant macros pub struct AtomType { path: String, + atom_doc: Option, + static_set_doc: Option, macro_name: String, macro_doc: Option, atoms: HashSet, @@ -108,16 +110,40 @@ impl AtomType { /// macro_rules foo_atom { /// // Expands to: $crate::foo::FooAtom { … } /// } + /// ``` pub fn new(path: &str, macro_name: &str) -> Self { - assert!(macro_name.ends_with("!")); + assert!(macro_name.ends_with("!"), "`macro_name` must end with '!'"); AtomType { path: path.to_owned(), macro_name: macro_name[..macro_name.len() - "!".len()].to_owned(), + atom_doc: None, + static_set_doc: None, macro_doc: None, atoms: HashSet::new(), } } + /// Add some documentation to the generated Atom type alias. + /// + /// This can help the user know that the type uses interned strings. + /// + /// Note that `docs` should not contain the `///` at the front of normal docs. + pub fn with_atom_doc(&mut self, docs: &str) -> &mut Self { + self.atom_doc = Some(docs.to_owned()); + self + } + + /// Add some documentation to the generated static set. + /// + /// This can help the user know that this type is zero-sized and just references a static + /// lookup table, or point them to the `Atom` type alias for more info. + /// + /// Note that `docs` should not contain the `///` at the front of normal docs. + pub fn with_static_set_doc(&mut self, docs: &str) -> &mut Self { + self.static_set_doc = Some(docs.to_owned()); + self + } + /// Add some documentation to the generated macro. /// /// Note that `docs` should not contain the `///` at the front of normal docs. @@ -176,6 +202,14 @@ impl AtomType { } else { &self.path }; + let atom_doc = match self.atom_doc { + Some(ref doc) => quote!(#[doc = #doc]), + None => quote!() + }; + let static_set_doc = match self.static_set_doc { + Some(ref doc) => quote!(#[doc = #doc]), + None => quote!() + }; let macro_doc = match self.macro_doc { Some(ref doc) => quote!(#[doc = #doc]), None => quote!() @@ -186,7 +220,9 @@ impl AtomType { let path = iter::repeat(quote::Ident::from(&*self.path)); quote! { + #atom_doc pub type #type_name = ::string_cache::Atom<#static_set_name>; + #static_set_doc pub struct #static_set_name; impl ::string_cache::StaticAtomSet for #static_set_name { fn get() -> &'static ::string_cache::PhfStrSet { From abb9c61222e587eb60c6b168a614b5b20a52cd61 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Mon, 4 Dec 2017 21:55:42 +0100 Subject: [PATCH 118/212] Update lazy_static to 1.0 and bump version --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 64ea6f6..14adf72 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.7.0" # Also update README.md when making a semver-breaking change +version = "0.7.1" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -29,7 +29,7 @@ unstable = [] [dependencies] precomputed-hash = "0.1" -lazy_static = "0.2" +lazy_static = "1" serde = "1" phf_shared = "0.7.4" debug_unreachable = "0.1.1" From 42f4684796ea90c4b81844a76d824089300383ed Mon Sep 17 00:00:00 2001 From: hcpl Date: Sun, 1 Apr 2018 14:07:22 +0300 Subject: [PATCH 119/212] Update dependencies --- Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 3 ++- string-cache-codegen/lib.rs | 24 ++++++++++++++---------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 14adf72..8b0a204 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ debug_unreachable = "0.1.1" string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] -rand = "0.3" +rand = "0.4" [build-dependencies] string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 14b93a6..afb724d 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -16,4 +16,5 @@ path = "lib.rs" string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" phf_shared = "0.7.4" -quote = "0.3.9" +proc-macro2 = "0.3.1" +quote = "0.5.1" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index e92f8a0..c61da73 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -72,6 +72,7 @@ extern crate phf_generator; extern crate phf_shared; extern crate string_cache_shared as shared; #[macro_use] extern crate quote; +extern crate proc_macro2; use std::collections::HashSet; use std::fs::File; @@ -169,7 +170,7 @@ impl AtomType { pub fn write_to(&mut self, mut destination: W) -> io::Result<()> where W: Write { destination.write_all( self.to_tokens() - .as_str() + .to_string() // Insert some newlines to make the generated code slightly easier to read. .replace(" [ \"", "[\n\"") .replace("\" , ", "\",\n") @@ -187,9 +188,11 @@ impl AtomType { let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); let hash_state = phf_generator::generate_hash(&atoms); let phf_generator::HashState { key, disps, map } = hash_state; + let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip(); let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); + let atoms_ref = &atoms; let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; - let data = (0..atoms.len()).map(|i| quote::Hex(shared::pack_static(i as u32))); + let data = (0..atoms.len()).map(|i| proc_macro2::Literal::u64_suffixed(shared::pack_static(i as u32))); let hashes: Vec = atoms.iter().map(|string| { @@ -214,10 +217,11 @@ impl AtomType { Some(ref doc) => quote!(#[doc = #doc]), None => quote!() }; - let static_set_name = quote::Ident::from(format!("{}StaticSet", type_name)); - let type_name = quote::Ident::from(type_name); - let macro_name = quote::Ident::from(&*self.macro_name); - let path = iter::repeat(quote::Ident::from(&*self.path)); + let produce_term = |string: &str| proc_macro2::Term::new(string, proc_macro2::Span::call_site()); + let static_set_name = produce_term(&format!("{}StaticSet", type_name)); + let type_name = produce_term(type_name); + let macro_name = produce_term(&*self.macro_name); + let path = iter::repeat(produce_term(&*self.path)); quote! { #atom_doc @@ -228,9 +232,9 @@ impl AtomType { fn get() -> &'static ::string_cache::PhfStrSet { static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet { key: #key, - disps: &#disps, - atoms: &#atoms, - hashes: &#hashes + disps: &[#((#disps0, #disps1)),*], + atoms: &[#(#atoms_ref),*], + hashes: &[#(#hashes),*] }; &SET } @@ -242,7 +246,7 @@ impl AtomType { #[macro_export] macro_rules! #macro_name { #( - (#atoms) => { + (#atoms_ref) => { $crate::#path { unsafe_data: #data, phantom: ::std::marker::PhantomData, From f392c9b14dfbf6eb9fa22dd5ad223b14e2a4bd58 Mon Sep 17 00:00:00 2001 From: hcpl Date: Tue, 10 Apr 2018 17:00:30 +0300 Subject: [PATCH 120/212] Increment version numbers of affected crates Also fix an unnoticed bug and render data as hex numbers. --- Cargo.toml | 2 +- src/atom.rs | 1 - string-cache-codegen/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 19 +++++++++++++------ 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8b0a204..95ad89a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.7.1" # Also update README.md when making a semver-breaking change +version = "0.7.2" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom.rs b/src/atom.rs index c8b2f3c..7a48031 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -12,7 +12,6 @@ use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -#[allow(unused_imports)] use std::ascii::AsciiExt; use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index afb724d..c11952b 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.4.0" # Also update ../README.md when making a semver-breaking change +version = "0.4.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index c61da73..7312bcf 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -192,7 +192,14 @@ impl AtomType { let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); let atoms_ref = &atoms; let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; - let data = (0..atoms.len()).map(|i| proc_macro2::Literal::u64_suffixed(shared::pack_static(i as u32))); + let data = (0..atoms.len()).map(|i| { + format!("0x{:X}u64", shared::pack_static(i as u32)) + .parse::() + .unwrap() + .into_iter() + .next() + .unwrap() + }); let hashes: Vec = atoms.iter().map(|string| { @@ -217,11 +224,11 @@ impl AtomType { Some(ref doc) => quote!(#[doc = #doc]), None => quote!() }; - let produce_term = |string: &str| proc_macro2::Term::new(string, proc_macro2::Span::call_site()); - let static_set_name = produce_term(&format!("{}StaticSet", type_name)); - let type_name = produce_term(type_name); - let macro_name = produce_term(&*self.macro_name); - let path = iter::repeat(produce_term(&*self.path)); + let new_term = |string: &str| proc_macro2::Term::new(string, proc_macro2::Span::call_site()); + let static_set_name = new_term(&format!("{}StaticSet", type_name)); + let type_name = new_term(type_name); + let macro_name = new_term(&*self.macro_name); + let path = iter::repeat(self.path.parse::().unwrap()); quote! { #atom_doc From d5b6071b949e69793ed1659bb4a1d81a70780a0f Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Mon, 16 Apr 2018 16:43:19 +0100 Subject: [PATCH 121/212] Add a simple example of library use. Specifically for internment without any static values. --- examples/simple.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 examples/simple.rs diff --git a/examples/simple.rs b/examples/simple.rs new file mode 100644 index 0000000..89f7369 --- /dev/null +++ b/examples/simple.rs @@ -0,0 +1,26 @@ +extern crate string_cache; + +use string_cache::{DefaultAtom, Atom}; + +fn main() { + + let mut interned_stuff = Vec::new(); + let text = "here is a sentence of text that will be tokenised and interned and some repeated \ + tokens is of text and"; + for word in text.split_whitespace() { + let seen_before = interned_stuff.iter() + // We can use impl PartialEq where T is anything string-like to compare to + // interned strings to either other interned strings, or actual strings Comparing two + // interned strings is very fast (normally a single cpu operation). + .filter(|interned_word| interned_word == &word) + .count(); + if seen_before > 0 { + println!(r#"Seen the word "{}" {} times"#, word, seen_before); + } else { + println!(r#"Not seen the word "{}" before"#, word); + } + // We use the impl From<(Cow<'a, str>, or &'a str, or String) for Atom to intern a + // new string + interned_stuff.push(DefaultAtom::from(word)); + } +} From 035ad182c67527876de5a5acd5cbde993db8724d Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Mon, 16 Apr 2018 16:58:31 +0100 Subject: [PATCH 122/212] Added some module-level docs --- src/lib.rs | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 04890e7..3571da9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,96 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +//! +//! A library for interning things that are `AsRef`. +//! +//! Some strings may be interned at compile time using the `string-cache-codegen` crate, or the +//! `EmptyStaticAtomSet` may be used that has no comiple-time interned strings. An `Atom` is an +//! interned string for a given set (either `EmptyStaticAtomSet` or a generated `StaticAtomSet`). +//! +//! Generated `Atom`s will have assocated macros to intern static strings at compile-time. +//! +//! # Examples +//! +//! Here are two examples, one with compile-time `Atom`s, and one without. +//! +//! ## With compile-time atoms +//! +//! In `Cargo.toml`: +//! ```toml +//! [dependencies] +//! string_cache = "0.7" +//! +//! [dev-dependencies] +//! string_cache_codegen = "0.4" +//! ``` +//! +//! In `build.rs`: +//! ```rust +//! extern crate string_cache_codegen; +//! +//! use std::env; +//! use std::path::Path; +//! +//! fn main() { +//! string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!") +//! .atoms(&["foo", "bar"]) +//! .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs")) +//! .unwrap() +//! } +//! ``` +//! +//! In `lib.rs`: +//! ```rust +//! extern crate string_cache; +//! +//! mod foo { +//! include!(concat!(env!("OUT_DIR"), "/foo_atom.rs")); +//! } +//! +//! fn use_the_atom(t: &str) { +//! match *t { +//! foo_atom!("foo") => println!("Found foo!"), +//! foo_atom!("bar") => println!("Found bar!"), +//! // foo_atom!("baz") => println!("Found baz!"), - would be a compile time error +//! _ => { +//! println!("String not interned"); +//! // We can intern strings at runtime as well +//! foo::FooAtom::from(t) +//! } +//! } +//! } +//! ``` +//! +//! ## No compile-time atoms +//! +//! ```rust +//! extern crate string_cache; +//! +//! +//! let mut interned_stuff = Vec::new(); +//! let text = "here is a sentence of text that will be tokenised and +//! interned and some repeated tokens is of text and"; +//! for word in text.split_whitespace() { +//! let seen_before = interned_stuff.iter() +//! // We can use impl PartialEq where T is anything string-like +//! // to compare to interned strings to either other interned strings, +//! // or actual strings Comparing two interned strings is very fast +//! // (normally a single cpu operation). +//! .filter(|interned_word| interned_word == &word) +//! .count(); +//! if seen_before > 0 { +//! println!(r#"Seen the word "{}" {} times"#, word, seen_before); +//! } else { +//! println!(r#"Not seen the word "{}" before"#, word); +//! } +//! // We use the impl From<(Cow<'a, str>, or &'a str, or String)> for +//! // Atom to intern a new string. +//! interned_stuff.push(DefaultAtom::from(word)); +//! } +//! ``` +//! + #![crate_name = "string_cache"] #![crate_type = "rlib"] From 19cbfa9a718331d3313e9de281e4f7c63310e5a6 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Tue, 17 Apr 2018 09:55:38 +0100 Subject: [PATCH 123/212] Fix tests --- Cargo.toml | 1 + examples/simple.rs | 2 +- src/lib.rs | 8 +++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 95ad89a..0eafa8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] rand = "0.4" +string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } [build-dependencies] string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } diff --git a/examples/simple.rs b/examples/simple.rs index 89f7369..b375049 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -1,6 +1,6 @@ extern crate string_cache; -use string_cache::{DefaultAtom, Atom}; +use string_cache::DefaultAtom; fn main() { diff --git a/src/lib.rs b/src/lib.rs index 3571da9..2912cab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,7 +32,7 @@ //! ``` //! //! In `build.rs`: -//! ```rust +//! ``` //! extern crate string_cache_codegen; //! //! use std::env; @@ -47,7 +47,7 @@ //! ``` //! //! In `lib.rs`: -//! ```rust +//! ```ignore //! extern crate string_cache; //! //! mod foo { @@ -70,9 +70,11 @@ //! //! ## No compile-time atoms //! -//! ```rust +//! ``` //! extern crate string_cache; //! +//! use string_cache::DefaultAtom; +//! //! //! let mut interned_stuff = Vec::new(); //! let text = "here is a sentence of text that will be tokenised and From 9b41702503fc59d9abaec9f852b7c0b8c0f35d3f Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Tue, 17 Apr 2018 10:18:58 +0100 Subject: [PATCH 124/212] Try fix examples --- src/lib.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2912cab..86deee3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,6 +32,7 @@ //! ``` //! //! In `build.rs`: +//! //! ``` //! extern crate string_cache_codegen; //! @@ -47,6 +48,7 @@ //! ``` //! //! In `lib.rs`: +//! //! ```ignore //! extern crate string_cache; //! @@ -71,11 +73,10 @@ //! ## No compile-time atoms //! //! ``` -//! extern crate string_cache; -//! +//! # extern crate string_cache; //! use string_cache::DefaultAtom; //! -//! +//! # fn main() { //! let mut interned_stuff = Vec::new(); //! let text = "here is a sentence of text that will be tokenised and //! interned and some repeated tokens is of text and"; @@ -96,6 +97,7 @@ //! // Atom to intern a new string. //! interned_stuff.push(DefaultAtom::from(word)); //! } +//! # } //! ``` //! From 007c8260f0727038774e133b880521300fade5a4 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Sat, 2 Jun 2018 21:37:01 -0700 Subject: [PATCH 125/212] Switch to fork of debug_unreachable Because the original debug_unreachable is abandoned and doesn't work correctly in modern Rust. --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 95ad89a..fda2c99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.7.2" # Also update README.md when making a semver-breaking change +version = "0.7.3" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -32,7 +32,7 @@ precomputed-hash = "0.1" lazy_static = "1" serde = "1" phf_shared = "0.7.4" -debug_unreachable = "0.1.1" +new_debug_unreachable = "1.0" string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] From f26c0f6294af7a5554309961884a9095a97451f7 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Tue, 5 Jun 2018 15:05:40 +0200 Subject: [PATCH 126/212] Update quote, proc-macro2 and bump version in codegen --- string-cache-codegen/Cargo.toml | 6 +++--- string-cache-codegen/lib.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index c11952b..a3d78ed 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.4.1" # Also update ../README.md when making a semver-breaking change +version = "0.4.2" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -16,5 +16,5 @@ path = "lib.rs" string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" phf_shared = "0.7.4" -proc-macro2 = "0.3.1" -quote = "0.5.1" +proc-macro2 = "0.4" +quote = "0.6" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 7312bcf..b675712 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -179,7 +179,7 @@ impl AtomType { .as_bytes()) } - fn to_tokens(&mut self) -> quote::Tokens { + fn to_tokens(&mut self) -> proc_macro2::TokenStream { // `impl Default for Atom` requires the empty string to be in the static set. // This also makes sure the set in non-empty, // which would cause divisions by zero in rust-phf. @@ -224,7 +224,7 @@ impl AtomType { Some(ref doc) => quote!(#[doc = #doc]), None => quote!() }; - let new_term = |string: &str| proc_macro2::Term::new(string, proc_macro2::Span::call_site()); + let new_term = |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site()); let static_set_name = new_term(&format!("{}StaticSet", type_name)); let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); From 94883855ea265775492ae4bcbf9b78accab4544d Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Wed, 25 Jul 2018 18:30:49 +0100 Subject: [PATCH 127/212] Fix typo --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 86deee3..7eb8216 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,7 +11,7 @@ //! A library for interning things that are `AsRef`. //! //! Some strings may be interned at compile time using the `string-cache-codegen` crate, or the -//! `EmptyStaticAtomSet` may be used that has no comiple-time interned strings. An `Atom` is an +//! `EmptyStaticAtomSet` may be used that has no compile-time interned strings. An `Atom` is an //! interned string for a given set (either `EmptyStaticAtomSet` or a generated `StaticAtomSet`). //! //! Generated `Atom`s will have assocated macros to intern static strings at compile-time. From 48917bae5a7a2341529aa553a1c38f1f29deed80 Mon Sep 17 00:00:00 2001 From: Jan Andre Ikenmeyer Date: Sat, 10 Nov 2018 17:10:56 +0100 Subject: [PATCH 128/212] Remove unneeded webhook notification --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 438e39d..f0c2e36 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,5 +15,3 @@ script: - "cd string-cache-codegen/ && cargo build && cd .." - "cd examples/event-log/ && cargo build && cd ../.." - "cd examples/summarize-events/ && cargo build && cd ../.." -notifications: - webhooks: http://build.servo.org:54856/travis From 8114d01592a7f388416491eb1aad1dc4e47eb242 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Mon, 11 Mar 2019 16:38:42 -0400 Subject: [PATCH 129/212] Add licenses to shared and codegen crates. --- shared/Cargo.toml | 2 +- shared/LICENSE-APACHE | 201 ++++++++++++++++++++++++++++ shared/LICENSE-MIT | 25 ++++ string-cache-codegen/Cargo.toml | 2 +- string-cache-codegen/LICENSE-APACHE | 201 ++++++++++++++++++++++++++++ string-cache-codegen/LICENSE-MIT | 25 ++++ 6 files changed, 454 insertions(+), 2 deletions(-) create mode 100644 shared/LICENSE-APACHE create mode 100644 shared/LICENSE-MIT create mode 100644 string-cache-codegen/LICENSE-APACHE create mode 100644 string-cache-codegen/LICENSE-MIT diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 6f8286d..ce8d53f 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.3.0" +version = "0.3.1" authors = [ "The Servo Project Developers" ] description = "Code share between string_cache and string_cache_codegen." license = "MIT / Apache-2.0" diff --git a/shared/LICENSE-APACHE b/shared/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/shared/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/shared/LICENSE-MIT b/shared/LICENSE-MIT new file mode 100644 index 0000000..807526f --- /dev/null +++ b/shared/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2012-2013 Mozilla Foundation + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index a3d78ed..d6e1400 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.4.2" # Also update ../README.md when making a semver-breaking change +version = "0.4.3" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/string-cache-codegen/LICENSE-APACHE b/string-cache-codegen/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/string-cache-codegen/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/string-cache-codegen/LICENSE-MIT b/string-cache-codegen/LICENSE-MIT new file mode 100644 index 0000000..807526f --- /dev/null +++ b/string-cache-codegen/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2012-2013 Mozilla Foundation + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. From 3238aee8967bebd635234a4859a315d5cce1210e Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Wed, 31 Jul 2019 13:54:20 +0200 Subject: [PATCH 130/212] Bump phf_generator generator version to 0.7.22 This is useful for cargo minimal version builds. `phf_generator` in versions before 0.7.22 depended on rand 0.3 which itself depended on libc 0.1.1 that no longer compiles with the newest Rust version. --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index d6e1400..a7241de 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -14,7 +14,7 @@ path = "lib.rs" [dependencies] string_cache_shared = {path = "../shared", version = "0.3"} -phf_generator = "0.7.15" +phf_generator = "0.7.22" phf_shared = "0.7.4" proc-macro2 = "0.4" quote = "0.6" From a98e06372aca511760fae08a85f91e5b5d1bc9fd Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Wed, 31 Jul 2019 13:59:42 +0200 Subject: [PATCH 131/212] Replace ... operator with ..= --- src/atom.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 7a48031..c05ef1d 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -478,7 +478,7 @@ impl Atom { /// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase pub fn to_ascii_uppercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { - if let b'a' ... b'z' = b { + if let b'a' ..= b'z' = b { return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase()) } } @@ -490,7 +490,7 @@ impl Atom { /// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase pub fn to_ascii_lowercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { - if let b'A' ... b'Z' = b { + if let b'A' ..= b'Z' = b { return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase()) } } From db1eda55bce00f1b94c0aa2174c986ed619965f5 Mon Sep 17 00:00:00 2001 From: Alex Touchet Date: Wed, 31 Jul 2019 08:12:53 -0700 Subject: [PATCH 132/212] Update Travis CI URL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 657f646..ddd3bc0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # string-cache -[![Build Status](https://travis-ci.org/servo/string-cache.svg?branch=master)](https://travis-ci.org/servo/string-cache) +[![Build Status](https://travis-ci.com/servo/string-cache.svg?branch=master)](https://travis-ci.com/servo/string-cache) [Documentation](https://docs.rs/string_cache/) From 45c3dae8f7fb203fb385ec66117c881bc14de3f3 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Sat, 17 Aug 2019 16:28:03 +0200 Subject: [PATCH 133/212] Update syn related dependencies to 1.0 and bump version --- string-cache-codegen/Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index d6e1400..0a828f3 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.4.3" # Also update ../README.md when making a semver-breaking change +version = "0.4.4" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -16,5 +16,5 @@ path = "lib.rs" string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" phf_shared = "0.7.4" -proc-macro2 = "0.4" -quote = "0.6" +proc-macro2 = "1" +quote = "1" From 904fb633625149e1930dab5a6ff6a65986ba258a Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Sun, 18 Aug 2019 14:58:46 +0200 Subject: [PATCH 134/212] Fix nightly build by fixing deprecation warnings --- src/atom.rs | 6 ++++-- string-cache-codegen/lib.rs | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index c05ef1d..11acf2a 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -450,7 +450,7 @@ impl Serialize for Atom { impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { fn deserialize(deserializer: D) -> Result where D: Deserializer<'a> { - let string: String = try!(Deserialize::deserialize(deserializer)); + let string: String = Deserialize::deserialize(deserializer)?; Ok(Atom::from(string)) } } @@ -460,7 +460,9 @@ impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { // over the one from &str. impl Atom { fn from_mutated_str(s: &str, f: F) -> Self { - let mut buffer: [u8; 64] = unsafe { mem::uninitialized() }; + let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit(); + let buffer = unsafe { &mut *buffer.as_mut_ptr() }; + if let Some(buffer_prefix) = buffer.get_mut(..s.len()) { buffer_prefix.copy_from_slice(s.as_bytes()); let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) }; diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index b675712..8a06d33 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -269,6 +269,6 @@ impl AtomType { /// Typical usage: /// `.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))` pub fn write_to_file(&mut self, path: &Path) -> io::Result<()> { - self.write_to(BufWriter::new(try!(File::create(path)))) + self.write_to(BufWriter::new(File::create(path)?)) } } From 80e59c1741117a69c5611f3a4c2bfe6cf0ef1ca1 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Mon, 26 Aug 2019 16:50:06 +0300 Subject: [PATCH 135/212] Document the semantics of Atom's generic parameter Provide a short example that shows why interning temporary atoms will not blow up memory consumption. Fixes #212 --- src/atom.rs | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index c05ef1d..21f9e64 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -193,8 +193,46 @@ pub type DefaultAtom = Atom; /// Represents a string that has been interned. /// -/// In reality this contains a complex packed datastructure and the methods to extract information -/// from it, along with type information to tell the compiler which static set it corresponds to. +/// While the type definition for `Atom` indicates that it generic on a particular +/// implementation of an atom set, you don't need to worry about this. Atoms can be static +/// and come from a `StaticAtomSet` generated by the `string_cache_codegen` crate, or they +/// can be dynamic and created by you on an `EmptyStaticAtomSet`. +/// +/// `Atom` implements `Clone` but not `Copy`, since internally atoms are reference-counted; +/// this means that you may need to `.clone()` an atom to keep copies to it in different +/// places, or when passing it to a function that takes an `Atom` rather than an `&Atom`. +/// +/// ## Creating an atom at runtime +/// +/// If you use `string_cache_codegen` to generate a precomputed list of atoms, your code +/// may then do something like read data from somewhere and extract tokens that need to be +/// compared to the atoms. In this case, you can use `Atom::from(&str)` or +/// `Atom::from(String)`. These create a reference-counted atom which will be +/// automatically freed when all references to it are dropped. +/// +/// This means that your application can safely have a loop which tokenizes data, creates +/// atoms from the tokens, and compares the atoms to a predefined set of keywords, without +/// running the risk of arbitrary memory consumption from creating large numbers of atoms — +/// as long as your application does not store clones of the atoms it creates along the +/// way. +/// +/// For example, the following is safe and will not consume arbitrary amounts of memory: +/// +/// ```ignore +/// let untrusted_data = "large amounts of text ..."; +/// +/// for token in untrusted_data.split_whitespace() { +/// let atom = Atom::from(token); // interns the string +/// +/// if atom == Atom::from("keyword") { +/// // handle that keyword +/// } else if atom == Atom::from("another_keyword") { +/// // handle that keyword +/// } else { +/// println!("unknown keyword"); +/// } +/// } // atom is dropped here, so it is not kept around in memory +/// ``` pub struct Atom { /// This field is public so that the `atom!()` macros can use it. /// You should not otherwise access this field. From af7a9c1d19254c367d4ce123f78cb883738abe95 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 30 Sep 2019 15:26:17 +0200 Subject: [PATCH 136/212] Fix initializing the global hash map with a small stack Fix https://github.com/servo/html5ever/issues/393 --- Cargo.toml | 6 +++++- src/atom.rs | 9 +++++++-- tests/small-stack.rs | 12 ++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 tests/small-stack.rs diff --git a/Cargo.toml b/Cargo.toml index dbdad57..d98a065 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.7.3" # Also update README.md when making a semver-breaking change +version = "0.7.4" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -41,3 +41,7 @@ string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } [build-dependencies] string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } + +[[test]] +name = "small-stack" +harness = false diff --git a/src/atom.rs b/src/atom.rs index f575c02..ac1f397 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -39,7 +39,7 @@ const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; struct StringCache { - buckets: [Option>; NB_BUCKETS], + buckets: Box<[Option>; NB_BUCKETS]>, } lazy_static! { @@ -67,8 +67,13 @@ impl StringCacheEntry { impl StringCache { fn new() -> StringCache { + type T = Option>; + let _static_assert_size_eq = std::mem::transmute::; + let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); StringCache { - buckets: unsafe { mem::zeroed() }, + buckets: unsafe { + Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) + }, } } diff --git a/tests/small-stack.rs b/tests/small-stack.rs new file mode 100644 index 0000000..300b142 --- /dev/null +++ b/tests/small-stack.rs @@ -0,0 +1,12 @@ +// Regression test for https://github.com/servo/html5ever/issues/393 +// +// Create a dynamic atom − causing initialization of the golbal hash map − +// in a thread that has a small stack. +// +// This is a separate test program rather than a `#[test] fn` among others +// to make sure that nothing else has already initialized the map in this process. +fn main() { + std::thread::Builder::new().stack_size(50_000).spawn(|| { + string_cache::DefaultAtom::from("12345678"); + }).unwrap().join().unwrap() +} From b94483f90f54a6a08ae082958077d640d26ffe19 Mon Sep 17 00:00:00 2001 From: Brendan Zabarauskas Date: Tue, 8 Oct 2019 15:12:20 +1100 Subject: [PATCH 137/212] Pin rustc version in .travis.yml --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index f0c2e36..bcb5f68 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ sudo: false language: rust rust: + - 1.36.0 - nightly - beta - stable From d56ba88cd94acd485d69965f03bd025f33745b35 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 18 Oct 2019 11:23:29 +0200 Subject: [PATCH 138/212] Remove log-events --- .travis.yml | 4 - Cargo.toml | 6 - examples/event-log/Cargo.toml | 9 -- examples/event-log/README.md | 4 - examples/event-log/src/main.rs | 33 ----- examples/summarize-events/Cargo.toml | 13 -- examples/summarize-events/src/main.rs | 170 -------------------------- src/atom.rs | 15 --- src/lib.rs | 4 - 9 files changed, 258 deletions(-) delete mode 100644 examples/event-log/Cargo.toml delete mode 100644 examples/event-log/README.md delete mode 100644 examples/event-log/src/main.rs delete mode 100644 examples/summarize-events/Cargo.toml delete mode 100644 examples/summarize-events/src/main.rs diff --git a/.travis.yml b/.travis.yml index bcb5f68..fa09a1e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,10 +9,6 @@ os: - linux script: - cargo build - - cargo test - - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - cargo test - "cd string-cache-codegen/ && cargo build && cd .." - - "cd examples/event-log/ && cargo build && cd ../.." - - "cd examples/summarize-events/ && cargo build && cd ../.." diff --git a/Cargo.toml b/Cargo.toml index d98a065..61a853f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,4 @@ [package] - name = "string_cache" version = "0.7.4" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] @@ -19,11 +18,6 @@ build = "build.rs" name = "string_cache" [features] - -# Enable event logging for generating benchmark traces. -# See examples/event-log. -log-events = [] - # Use unstable features to optimize space and time (memory and CPU usage). unstable = [] diff --git a/examples/event-log/Cargo.toml b/examples/event-log/Cargo.toml deleted file mode 100644 index 7edd5cf..0000000 --- a/examples/event-log/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] - -name = "string-cache-event-log-example" -version = "0.0.0" -authors = [ "The Servo Project Developers" ] - -[dependencies.string_cache] -path = "../.." -features = ["log-events"] diff --git a/examples/event-log/README.md b/examples/event-log/README.md deleted file mode 100644 index b2deb39..0000000 --- a/examples/event-log/README.md +++ /dev/null @@ -1,4 +0,0 @@ -string-cache can record logs of what it's doing, which can be useful for -guiding future changes to the library. This project demonstrates how to build -string-cache with logging enabled (see `Cargo.toml`), and how to access the log -at runtime. diff --git a/examples/event-log/src/main.rs b/examples/event-log/src/main.rs deleted file mode 100644 index 7a25e29..0000000 --- a/examples/event-log/src/main.rs +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -extern crate string_cache; - -use string_cache::DefaultAtom as Atom; -use string_cache::event; - -use std::io; -use std::io::prelude::*; - -fn main() { - println!("Reading stdin to end of file"); - let mut stdin = String::new(); - io::stdin().read_to_string(&mut stdin).unwrap(); - let mut atoms = vec![]; - for word in stdin.split(|c: char| c.is_whitespace()) { - atoms.push(Atom::from(word)); - } - - let log = event::LOG.lock().unwrap(); - - println!("Created {} atoms, logged {} events:", atoms.len(), log.len()); - for e in log.iter() { - println!("{:?}", e); - } -} diff --git a/examples/summarize-events/Cargo.toml b/examples/summarize-events/Cargo.toml deleted file mode 100644 index 7d2e7ba..0000000 --- a/examples/summarize-events/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] - -name = "string-cache-summarize-events" -version = "0.0.0" -authors = [ "The Servo Project Developers" ] - -[dependencies] -csv = "0" -rustc-serialize = "0" -phf_shared = "0.7.4" - -[dependencies.string_cache] -path = "../.." diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs deleted file mode 100644 index a74e659..0000000 --- a/examples/summarize-events/src/main.rs +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -extern crate csv; -extern crate string_cache; -extern crate rustc_serialize; -extern crate phf_shared; - -#[path = "../../../shared/lib.rs"] -#[allow(dead_code)] -mod shared; - -use string_cache::DefaultAtom as Atom; - -use std::{env, cmp}; -use std::collections::hash_map::{HashMap, Entry}; -use std::marker::PhantomData; -use std::path::Path; - -#[derive(RustcDecodable, Debug)] -struct Event { - event: String, - id: u64, - string: Option, -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -enum Kind { - Dynamic, - Inline, - Static, -} - -impl Kind { - fn from_tag(tag: u8) -> Kind { - match tag { - shared::DYNAMIC_TAG => Kind::Dynamic, - shared::INLINE_TAG => Kind::Inline, - shared::STATIC_TAG => Kind::Static, - _ => panic!() - } - } - - fn to_tag(self) -> u8 { - match self { - Kind::Dynamic => shared::DYNAMIC_TAG, - Kind::Inline => shared::INLINE_TAG, - Kind::Static => shared::STATIC_TAG, - } - } -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -struct Summary { - kind: Kind, - times: usize, -} - -fn main() { - let filename = env::args().skip(1).next() - .expect("Usage: string-cache-summarize-events foo.csv"); - let path = &Path::new(&filename); - let mut file = csv::Reader::from_file(path).unwrap(); - - // Over the lifetime of a program, one dynamic atom might get interned at - // several addresses, and one address may be used to intern several - // different strings. For this reason we must separately track the - // currently-allocated atoms and the summary of all atoms ever created. - let mut dynamic: HashMap = HashMap::new(); - let mut peak_dynamic = 0; - let mut summary: HashMap = HashMap::new(); - let mut inserts = 0; - - for record in file.decode() { - let ev: Event = record.unwrap(); - match &ev.event[..] { - "intern" => { - let tag = (ev.id & 0xf) as u8; - assert!(tag <= shared::STATIC_TAG); - - let string = match tag { - shared::DYNAMIC_TAG => dynamic[&ev.id].clone(), - - // FIXME: We really shouldn't be allowed to do this. It's a memory-safety - // hazard; the field is only public for the atom!() macro. - _ => Atom { unsafe_data: ev.id, phantom: PhantomData }.to_string(), - }; - - match summary.entry(string) { - Entry::Occupied(entry) => entry.into_mut().times += 1, - Entry::Vacant(entry) => { - entry.insert(Summary { - kind: Kind::from_tag(tag), - times: 1, - }); - } - } - }, - - "insert" => { - assert!(!dynamic.contains_key(&ev.id)); - dynamic.insert(ev.id, ev.string.expect("no string to insert")); - peak_dynamic = cmp::max(peak_dynamic, dynamic.len()); - inserts += 1; - } - - "remove" => { - assert!(dynamic.contains_key(&ev.id)); - dynamic.remove(&ev.id); - } - - e => panic!("unknown event {}", e), - } - } - - // Get all records, in a stable order. - let mut summary: Vec<_> = summary.into_iter().collect(); - summary.sort_by(|&(ref a, _), &(ref b, _)| a.cmp(b)); - - // Sort by number of occurrences, descending. - summary.sort_by(|&(_, a), &(_, b)| b.times.cmp(&a.times)); - let longest_atom = summary.iter().map(|&(ref k, _)| k.len()) - .max().unwrap_or(0); - - let pad = |c, n| { - for _ in n..longest_atom { - print!("{}", c); - } - }; - - let mut total = 0; - let mut by_kind = [0, 0, 0]; - for &(_, Summary { kind, times }) in &summary { - total += times; - by_kind[kind.to_tag() as usize] += times; - } - - println!("\n"); - println!("kind times pct"); - println!("------- ------- ----"); - for (k, &n) in by_kind.iter().enumerate() { - let k: Kind = Kind::from_tag(k as u8); - print!("{:7?} {:7} {:4.1}", - k, n, 100.0 * (n as f64) / (total as f64)); - - match k { - Kind::Dynamic => println!(" {} inserts, peak size {}, miss rate {:4.1}%", - inserts, peak_dynamic, 100.0 * (inserts as f64) / (n as f64)), - _ => println!(""), - } - } - println!(""); - println!("total {:7}", total); - println!("\n"); - - pad(' ', 4); - println!("atom times kind"); - pad('-', 4); - println!("---- ------ -------"); - for (string, Summary { kind, times }) in summary { - pad(' ', string.chars().count()); - println!("{} {:6} {:?}", string, times, kind); - } -} diff --git a/src/atom.rs b/src/atom.rs index ac1f397..3aef7e2 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -29,12 +29,6 @@ use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STAT ENTRY_ALIGNMENT, pack_static}; use self::UnpackedAtom::{Dynamic, Inline, Static}; -#[cfg(feature = "log-events")] -use event::Event; - -#[cfg(not(feature = "log-events"))] -macro_rules! log (($e:expr) => (())); - const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; @@ -101,16 +95,10 @@ impl StringCache { } debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); let string = string.into_owned(); - let _string_clone = if cfg!(feature = "log-events") { - string.clone() - } else { - "".to_owned() - }; let mut entry = Box::new(StringCacheEntry::new( self.buckets[bucket_index].take(), hash, string)); let ptr: *mut StringCacheEntry = &mut *entry; self.buckets[bucket_index] = Some(entry); - log!(Event::Insert(ptr as u64, _string_clone)); ptr } @@ -137,8 +125,6 @@ impl StringCache { } current = unsafe { &mut (*entry_ptr).next_in_bucket }; } - - log!(Event::Remove(key)); } } @@ -355,7 +341,6 @@ impl<'a, Static: StaticAtomSet> From> for Atom { }; let data = unsafe { unpacked.pack() }; - log!(Event::Intern(data)); Atom { unsafe_data: data, phantom: PhantomData } } } diff --git a/src/lib.rs b/src/lib.rs index 7eb8216..fc00f3a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,10 +118,6 @@ extern crate string_cache_shared as shared; pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; -#[cfg(feature = "log-events")] -#[macro_use] -pub mod event; - pub mod atom; // Make test_atom! macro work in this crate. From 5f6ad92839a87d3c445e228f458f26625db8db6f Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 18 Oct 2019 16:13:52 +0200 Subject: [PATCH 139/212] Update phf to 0.8 --- Cargo.toml | 2 +- src/atom.rs | 5 +++-- string-cache-codegen/Cargo.toml | 4 ++-- string-cache-codegen/lib.rs | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 61a853f..46e9117 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ unstable = [] precomputed-hash = "0.1" lazy_static = "1" serde = "1" -phf_shared = "0.7.4" +phf_shared = "0.8" new_debug_unreachable = "1.0" string_cache_shared = {path = "./shared", version = "0.3"} diff --git a/src/atom.rs b/src/atom.rs index 3aef7e2..7dce151 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -324,8 +324,8 @@ impl<'a, Static: StaticAtomSet> From> for Atom { #[inline] fn from(string_to_add: Cow<'a, str>) -> Self { let static_set = Static::get(); - let hash = phf_shared::hash(&*string_to_add, static_set.key); - let index = phf_shared::get_index(hash, static_set.disps, static_set.atoms.len()); + let hash = phf_shared::hash(&*string_to_add, &static_set.key); + let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); let unpacked = if static_set.atoms[index as usize] == string_to_add { Static(index) @@ -336,6 +336,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { buf[..len].copy_from_slice(string_to_add.as_bytes()); Inline(len as u8, buf) } else { + let hash = (hash.g as u64) << 32 | (hash.f1 as u64); Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) } }; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index e088413..4bcd3ee 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -14,7 +14,7 @@ path = "lib.rs" [dependencies] string_cache_shared = {path = "../shared", version = "0.3"} -phf_generator = "0.7.22" -phf_shared = "0.7.4" +phf_generator = "0.8" +phf_shared = "0.8" proc-macro2 = "1" quote = "1" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 8a06d33..e878b0f 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -203,8 +203,8 @@ impl AtomType { let hashes: Vec = atoms.iter().map(|string| { - let hash = phf_shared::hash(string, key); - ((hash >> 32) ^ hash) as u32 + let hash = phf_shared::hash(string, &key); + (hash.g ^ hash.f1) as u32 }).collect(); let type_name = if let Some(position) = self.path.rfind("::") { From a337b617011dc201975bc8ad3e3c4eaa7e123022 Mon Sep 17 00:00:00 2001 From: David Kellum Date: Fri, 27 Sep 2019 12:18:14 -0700 Subject: [PATCH 140/212] Move tests, bench and codegen deps to sub-package Closes #225, #226. This removes the codegen build dependency of the string_cache crate, thereby minimizing dep for users that don't need codegen. To allow the now external integration tests and benchmarks to test the same things, public Atom::is_(static|dynamic|inline) -> bool methods were also added. --- .travis.yml | 6 +- Cargo.toml | 18 +- integration-tests/Cargo.toml | 25 ++ build.rs => integration-tests/build.rs | 2 +- {src => integration-tests/src}/bench.rs | 24 +- integration-tests/src/lib.rs | 276 ++++++++++++++++++++++ src/atom.rs | 296 +++--------------------- src/lib.rs | 5 +- 8 files changed, 353 insertions(+), 299 deletions(-) create mode 100644 integration-tests/Cargo.toml rename build.rs => integration-tests/build.rs (79%) rename {src => integration-tests/src}/bench.rs (94%) create mode 100644 integration-tests/src/lib.rs diff --git a/.travis.yml b/.travis.yml index fa09a1e..ad47308 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,6 @@ os: - linux script: - cargo build - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - - cargo test - - "cd string-cache-codegen/ && cargo build && cd .." + - cargo test --all + - "cd string-cache-codegen && cargo build && cd .." + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd integration-tests && cargo test --features unstable && cd ..; fi" diff --git a/Cargo.toml b/Cargo.toml index 46e9117..8c40c1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,6 @@ description = "A string interning library for Rust, developed as part of the Ser license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache/" -build = "build.rs" # Do not `exclude` ./string-cache-codegen because we want to include # ./string-cache-codegen/shared.rs, and `include` is a pain to use @@ -17,10 +16,6 @@ build = "build.rs" [lib] name = "string_cache" -[features] -# Use unstable features to optimize space and time (memory and CPU usage). -unstable = [] - [dependencies] precomputed-hash = "0.1" lazy_static = "1" @@ -29,13 +24,12 @@ phf_shared = "0.8" new_debug_unreachable = "1.0" string_cache_shared = {path = "./shared", version = "0.3"} -[dev-dependencies] -rand = "0.4" -string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } - -[build-dependencies] -string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } - [[test]] name = "small-stack" harness = false + +[workspace] +members = [ + "string-cache-codegen", + "integration-tests", +] diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml new file mode 100644 index 0000000..1a92ecc --- /dev/null +++ b/integration-tests/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "integration_tests" +version = "0.0.1" +authors = [ "The Servo Project Developers" ] +build = "build.rs" +publish = false + +[lib] +doctest = false +test = true + +[features] + +# Use unstable features to optimize space and time (memory and CPU usage). +unstable = [] + +[dependencies] +string_cache = { version = "0.7", path = ".." } + +[dev-dependencies] +rand = "0.4" +string_cache_codegen = { version = "0.4", path = "../string-cache-codegen" } + +[build-dependencies] +string_cache_codegen = { version = "0.4", path = "../string-cache-codegen" } diff --git a/build.rs b/integration-tests/build.rs similarity index 79% rename from build.rs rename to integration-tests/build.rs index 4b0bb9e..2cb93b1 100644 --- a/build.rs +++ b/integration-tests/build.rs @@ -4,7 +4,7 @@ use std::env; use std::path::Path; fn main() { - string_cache_codegen::AtomType::new("atom::tests::TestAtom", "test_atom!") + string_cache_codegen::AtomType::new("TestAtom", "test_atom!") .atoms(&[ "a", "b", "address", "area", "body", "font-weight", "br", "html", "head", "id", ]) diff --git a/src/bench.rs b/integration-tests/src/bench.rs similarity index 94% rename from src/bench.rs rename to integration-tests/src/bench.rs index f6f5248..7866ae9 100644 --- a/src/bench.rs +++ b/integration-tests/src/bench.rs @@ -26,8 +26,8 @@ Furthermore, a large part of the point of interning is to make strings small and cheap to move around, which isn't reflected in these tests. */ +use crate::TestAtom; -use atom::tests::TestAtom; use test::{Bencher, black_box}; // Just shorthand @@ -35,14 +35,11 @@ fn mk(x: &str) -> TestAtom { TestAtom::from(x) } -macro_rules! check_type (($name:ident, $x:expr, $p:pat) => ( +macro_rules! check_type (($name:ident, $x:expr) => ( // NB: "cargo bench" does not run these! #[test] fn $name() { - match unsafe { $x.unpack() } { - $p => (), - _ => panic!("atom has wrong type"), - } + assert!($x, "atom has wrong type"); } )); @@ -62,12 +59,12 @@ macro_rules! bench_tiny_op (($name:ident, $op:ident, $ctor_x:expr, $ctor_y:expr) )); macro_rules! bench_one ( - (x_static $x:expr, $y:expr) => (check_type!(check_type_x, $x, Static(..));); - (x_inline $x:expr, $y:expr) => (check_type!(check_type_x, $x, Inline(..));); - (x_dynamic $x:expr, $y:expr) => (check_type!(check_type_x, $x, Dynamic(..));); - (y_static $x:expr, $y:expr) => (check_type!(check_type_y, $y, Static(..));); - (y_inline $x:expr, $y:expr) => (check_type!(check_type_y, $y, Inline(..));); - (y_dynamic $x:expr, $y:expr) => (check_type!(check_type_y, $y, Dynamic(..));); + (x_static $x:expr, $y:expr) => (check_type!(check_type_x, $x.is_static());); + (x_inline $x:expr, $y:expr) => (check_type!(check_type_x, $x.is_inline());); + (x_dynamic $x:expr, $y:expr) => (check_type!(check_type_x, $x.is_dynamic());); + (y_static $x:expr, $y:expr) => (check_type!(check_type_y, $y.is_static());); + (y_inline $x:expr, $y:expr) => (check_type!(check_type_y, $y.is_inline());); + (y_dynamic $x:expr, $y:expr) => (check_type!(check_type_y, $y.is_dynamic());); (is_static $x:expr, $y:expr) => (bench_one!(x_static $x, $y); bench_one!(y_static $x, $y);); (is_inline $x:expr, $y:expr) => (bench_one!(x_inline $x, $y); bench_one!(y_inline $x, $y);); (is_dynamic $x:expr, $y:expr) => (bench_one!(x_dynamic $x, $y); bench_one!(y_dynamic $x, $y);); @@ -134,8 +131,7 @@ macro_rules! bench_all ( use std::string::ToString; use std::iter::repeat; - use atom::tests::TestAtom; - use atom::UnpackedAtom::{Static, Inline, Dynamic}; + use crate::TestAtom; use super::mk; diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs new file mode 100644 index 0000000..d993e57 --- /dev/null +++ b/integration-tests/src/lib.rs @@ -0,0 +1,276 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![cfg(test)] + +#![deny(warnings)] +#![allow(non_upper_case_globals)] + +#![cfg_attr(feature = "unstable", feature(test))] + +extern crate string_cache; + +#[cfg(feature = "unstable")] extern crate test; +#[cfg(feature = "unstable")] extern crate rand; + +use std::thread; +use string_cache::atom::StaticAtomSet; + +include!(concat!(env!("OUT_DIR"), "/test_atom.rs")); +pub type Atom = TestAtom; + +#[test] +fn test_as_slice() { + let s0 = Atom::from(""); + assert!(s0.as_ref() == ""); + + let s1 = Atom::from("class"); + assert!(s1.as_ref() == "class"); + + let i0 = Atom::from("blah"); + assert!(i0.as_ref() == "blah"); + + let s0 = Atom::from("BLAH"); + assert!(s0.as_ref() == "BLAH"); + + let d0 = Atom::from("zzzzzzzzzz"); + assert!(d0.as_ref() == "zzzzzzzzzz"); + + let d1 = Atom::from("ZZZZZZZZZZ"); + assert!(d1.as_ref() == "ZZZZZZZZZZ"); +} + +#[test] +fn test_types() { + assert!(Atom::from("").is_static()); + assert!(Atom::from("id").is_static()); + assert!(Atom::from("body").is_static()); + assert!(Atom::from("a").is_static()); + assert!(Atom::from("c").is_inline()); + assert!(Atom::from("zz").is_inline()); + assert!(Atom::from("zzz").is_inline()); + assert!(Atom::from("zzzz").is_inline()); + assert!(Atom::from("zzzzz").is_inline()); + assert!(Atom::from("zzzzzz").is_inline()); + assert!(Atom::from("zzzzzzz").is_inline()); + assert!(Atom::from("zzzzzzzz").is_dynamic()); + assert!(Atom::from("zzzzzzzzzzzzz").is_dynamic()); +} + +#[test] +fn test_equality() { + let s0 = Atom::from("fn"); + let s1 = Atom::from("fn"); + let s2 = Atom::from("loop"); + + let i0 = Atom::from("blah"); + let i1 = Atom::from("blah"); + let i2 = Atom::from("blah2"); + + let d0 = Atom::from("zzzzzzzz"); + let d1 = Atom::from("zzzzzzzz"); + let d2 = Atom::from("zzzzzzzzz"); + + assert!(s0 == s1); + assert!(s0 != s2); + + assert!(i0 == i1); + assert!(i0 != i2); + + assert!(d0 == d1); + assert!(d0 != d2); + + assert!(s0 != i0); + assert!(s0 != d0); + assert!(i0 != d0); +} + +#[test] +fn default() { + assert_eq!(TestAtom::default(), test_atom!("")); + assert_eq!(&*TestAtom::default(), ""); +} + +#[test] +fn ord() { + fn check(x: &str, y: &str) { + assert_eq!(x < y, Atom::from(x) < Atom::from(y)); + assert_eq!(x.cmp(y), Atom::from(x).cmp(&Atom::from(y))); + assert_eq!(x.partial_cmp(y), Atom::from(x).partial_cmp(&Atom::from(y))); + } + + check("a", "body"); + check("asdf", "body"); + check("zasdf", "body"); + check("z", "body"); + + check("a", "bbbbb"); + check("asdf", "bbbbb"); + check("zasdf", "bbbbb"); + check("z", "bbbbb"); +} + +#[test] +fn clone() { + let s0 = Atom::from("fn"); + let s1 = s0.clone(); + let s2 = Atom::from("loop"); + + let i0 = Atom::from("blah"); + let i1 = i0.clone(); + let i2 = Atom::from("blah2"); + + let d0 = Atom::from("zzzzzzzz"); + let d1 = d0.clone(); + let d2 = Atom::from("zzzzzzzzz"); + + assert!(s0 == s1); + assert!(s0 != s2); + + assert!(i0 == i1); + assert!(i0 != i2); + + assert!(d0 == d1); + assert!(d0 != d2); + + assert!(s0 != i0); + assert!(s0 != d0); + assert!(i0 != d0); +} + +macro_rules! assert_eq_fmt (($fmt:expr, $x:expr, $y:expr) => ({ + let x = $x; + let y = $y; + if x != y { + panic!("assertion failed: {} != {}", + format_args!($fmt, x), + format_args!($fmt, y)); + } +})); + +#[test] +fn repr() { + fn check(s: &str, data: u64) { + assert_eq_fmt!("0x{:016X}", Atom::from(s).unsafe_data, data); + } + + fn check_static(s: &str, x: Atom) { + assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); + assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); + // The index is unspecified by phf. + assert!((x.unsafe_data >> 32) <= TestAtomStaticSet::get().atoms.len() as u64); + } + + // This test is here to make sure we don't change atom representation + // by accident. It may need adjusting if there are changes to the + // static atom table, the tag values, etc. + + // Static atoms + check_static("a", test_atom!("a")); + check_static("address", test_atom!("address")); + check_static("area", test_atom!("area")); + + // Inline atoms + check("e", 0x0000_0000_0000_6511); + check("xyzzy", 0x0000_797A_7A79_7851); + check("xyzzy01", 0x3130_797A_7A79_7871); + + // Dynamic atoms. This is a pointer so we can't verify every bit. + assert_eq!(0x00, Atom::from("a dynamic string").unsafe_data & 0xf); +} + +#[test] +fn test_threads() { + for _ in 0_u32..100 { + thread::spawn(move || { + let _ = Atom::from("a dynamic string"); + let _ = Atom::from("another string"); + }); + } +} + +#[test] +fn atom_macro() { + assert_eq!(test_atom!("body"), Atom::from("body")); + assert_eq!(test_atom!("font-weight"), Atom::from("font-weight")); +} + +#[test] +fn match_atom() { + assert_eq!(2, match Atom::from("head") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + }); + + assert_eq!(3, match Atom::from("body") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + }); + + assert_eq!(3, match Atom::from("zzzzzz") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + }); +} + +#[test] +fn ensure_deref() { + // Ensure we can Deref to a &str + let atom = Atom::from("foobar"); + let _: &str = &atom; +} + +#[test] +fn ensure_as_ref() { + // Ensure we can as_ref to a &str + let atom = Atom::from("foobar"); + let _: &str = atom.as_ref(); +} + +#[test] +fn test_ascii_lowercase() { + assert_eq!(Atom::from("").to_ascii_lowercase(), Atom::from("")); + assert_eq!(Atom::from("aZ9").to_ascii_lowercase(), Atom::from("az9")); + assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_lowercase(), Atom::from("the quick brown fox!")); + assert_eq!(Atom::from("JE VAIS À PARIS").to_ascii_lowercase(), Atom::from("je vais À paris")); +} + +#[test] +fn test_ascii_uppercase() { + assert_eq!(Atom::from("").to_ascii_uppercase(), Atom::from("")); + assert_eq!(Atom::from("aZ9").to_ascii_uppercase(), Atom::from("AZ9")); + assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_uppercase(), Atom::from("THE QUICK BROWN FOX!")); + assert_eq!(Atom::from("Je vais à Paris").to_ascii_uppercase(), Atom::from("JE VAIS à PARIS")); +} + +#[test] +fn test_eq_ignore_ascii_case() { + assert!(Atom::from("").eq_ignore_ascii_case(&Atom::from(""))); + assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("aZ9"))); + assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("Az9"))); + assert!(Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!"))); + assert!(Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("je VAIS à PARIS"))); + assert!(!Atom::from("").eq_ignore_ascii_case(&Atom::from("az9"))); + assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from(""))); + assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("9Za"))); + assert!(!Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!!"))); + assert!(!Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("JE vais À paris"))); +} + +#[test] +fn test_from_string() { + assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); +} + +#[cfg(all(test, feature = "unstable"))] +#[path = "bench.rs"] +mod bench; diff --git a/src/atom.rs b/src/atom.rs index 7dce151..8b48218 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -111,7 +111,6 @@ impl StringCache { (value.hash & BUCKET_MASK) as usize }; - let mut current: &mut Option> = &mut self.buckets[bucket_index]; loop { @@ -257,6 +256,33 @@ impl Atom { UnpackedAtom::from_packed(self.unsafe_data) } + /// Return true if this is a static Atom. + #[doc(hidden)] + pub fn is_static(&self) -> bool { + match unsafe { self.unpack() } { + Static(..) => true, + _ => false + } + } + + /// Return true if this is a dynamic Atom. + #[doc(hidden)] + pub fn is_dynamic(&self) -> bool { + match unsafe { self.unpack() } { + Dynamic(..) => true, + _ => false + } + } + + /// Return true if this is an inline Atom. + #[doc(hidden)] + pub fn is_inline(&self) -> bool { + match unsafe { self.unpack() } { + Inline(..) => true, + _ => false + } + } + /// Get the hash of the string as it is stored in the set. pub fn get_hash(&self) -> u32 { match unsafe { self.unpack() } { @@ -655,184 +681,14 @@ unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { } } +// Some minor tests of internal layout here. See ../integration-tests for much +// more. #[cfg(test)] -#[macro_use] mod tests { use std::mem; - use std::thread; - use super::{StaticAtomSet, StringCacheEntry}; - use super::UnpackedAtom::{Dynamic, Inline, Static}; + use super::{DefaultAtom, StringCacheEntry}; use shared::ENTRY_ALIGNMENT; - include!(concat!(env!("OUT_DIR"), "/test_atom.rs")); - pub type Atom = TestAtom; - - #[test] - fn test_as_slice() { - let s0 = Atom::from(""); - assert!(s0.as_ref() == ""); - - let s1 = Atom::from("class"); - assert!(s1.as_ref() == "class"); - - let i0 = Atom::from("blah"); - assert!(i0.as_ref() == "blah"); - - let s0 = Atom::from("BLAH"); - assert!(s0.as_ref() == "BLAH"); - - let d0 = Atom::from("zzzzzzzzzz"); - assert!(d0.as_ref() == "zzzzzzzzzz"); - - let d1 = Atom::from("ZZZZZZZZZZ"); - assert!(d1.as_ref() == "ZZZZZZZZZZ"); - } - - macro_rules! unpacks_to (($e:expr, $t:pat) => ( - match unsafe { Atom::from($e).unpack() } { - $t => (), - _ => panic!("atom has wrong type"), - } - )); - - #[test] - fn test_types() { - unpacks_to!("", Static(..)); - unpacks_to!("id", Static(..)); - unpacks_to!("body", Static(..)); - unpacks_to!("c", Inline(..)); // "z" is a static atom - unpacks_to!("zz", Inline(..)); - unpacks_to!("zzz", Inline(..)); - unpacks_to!("zzzz", Inline(..)); - unpacks_to!("zzzzz", Inline(..)); - unpacks_to!("zzzzzz", Inline(..)); - unpacks_to!("zzzzzzz", Inline(..)); - unpacks_to!("zzzzzzzz", Dynamic(..)); - unpacks_to!("zzzzzzzzzzzzz", Dynamic(..)); - } - - #[test] - fn test_equality() { - let s0 = Atom::from("fn"); - let s1 = Atom::from("fn"); - let s2 = Atom::from("loop"); - - let i0 = Atom::from("blah"); - let i1 = Atom::from("blah"); - let i2 = Atom::from("blah2"); - - let d0 = Atom::from("zzzzzzzz"); - let d1 = Atom::from("zzzzzzzz"); - let d2 = Atom::from("zzzzzzzzz"); - - assert!(s0 == s1); - assert!(s0 != s2); - - assert!(i0 == i1); - assert!(i0 != i2); - - assert!(d0 == d1); - assert!(d0 != d2); - - assert!(s0 != i0); - assert!(s0 != d0); - assert!(i0 != d0); - } - - #[test] - fn default() { - assert_eq!(TestAtom::default(), test_atom!("")); - assert_eq!(&*TestAtom::default(), ""); - } - - #[test] - fn ord() { - fn check(x: &str, y: &str) { - assert_eq!(x < y, Atom::from(x) < Atom::from(y)); - assert_eq!(x.cmp(y), Atom::from(x).cmp(&Atom::from(y))); - assert_eq!(x.partial_cmp(y), Atom::from(x).partial_cmp(&Atom::from(y))); - } - - check("a", "body"); - check("asdf", "body"); - check("zasdf", "body"); - check("z", "body"); - - check("a", "bbbbb"); - check("asdf", "bbbbb"); - check("zasdf", "bbbbb"); - check("z", "bbbbb"); - } - - #[test] - fn clone() { - let s0 = Atom::from("fn"); - let s1 = s0.clone(); - let s2 = Atom::from("loop"); - - let i0 = Atom::from("blah"); - let i1 = i0.clone(); - let i2 = Atom::from("blah2"); - - let d0 = Atom::from("zzzzzzzz"); - let d1 = d0.clone(); - let d2 = Atom::from("zzzzzzzzz"); - - assert!(s0 == s1); - assert!(s0 != s2); - - assert!(i0 == i1); - assert!(i0 != i2); - - assert!(d0 == d1); - assert!(d0 != d2); - - assert!(s0 != i0); - assert!(s0 != d0); - assert!(i0 != d0); - } - - macro_rules! assert_eq_fmt (($fmt:expr, $x:expr, $y:expr) => ({ - let x = $x; - let y = $y; - if x != y { - panic!("assertion failed: {} != {}", - format_args!($fmt, x), - format_args!($fmt, y)); - } - })); - - #[test] - fn repr() { - fn check(s: &str, data: u64) { - assert_eq_fmt!("0x{:016X}", Atom::from(s).unsafe_data, data); - } - - fn check_static(s: &str, x: Atom) { - assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); - assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); - // The index is unspecified by phf. - assert!((x.unsafe_data >> 32) <= TestAtomStaticSet::get().atoms.len() as u64); - } - - // This test is here to make sure we don't change atom representation - // by accident. It may need adjusting if there are changes to the - // static atom table, the tag values, etc. - - // Static atoms - check_static("a", test_atom!("a")); - check_static("address", test_atom!("address")); - check_static("area", test_atom!("area")); - - // Inline atoms - check("e", 0x0000_0000_0000_6511); - check("xyzzy", 0x0000_797A_7A79_7851); - check("xyzzy01", 0x3130_797A_7A79_7871); - - // Dynamic atoms. This is a pointer so we can't verify every bit. - assert_eq!(0x00, Atom::from("a dynamic string").unsafe_data & 0xf); - } - #[test] fn assert_sizes() { use std::mem; @@ -843,104 +699,14 @@ mod tests { let compiler_uses_inline_drop_flags = mem::size_of::() > 0; // Guard against accidental changes to the sizes of things. - assert_eq!(mem::size_of::(), + assert_eq!(mem::size_of::(), if compiler_uses_inline_drop_flags { 16 } else { 8 }); assert_eq!(mem::size_of::(), 8 + 4 * mem::size_of::()); } - #[test] - fn test_threads() { - for _ in 0_u32..100 { - thread::spawn(move || { - let _ = Atom::from("a dynamic string"); - let _ = Atom::from("another string"); - }); - } - } - - #[test] - fn atom_macro() { - assert_eq!(test_atom!("body"), Atom::from("body")); - assert_eq!(test_atom!("font-weight"), Atom::from("font-weight")); - } - - #[test] - fn match_atom() { - assert_eq!(2, match Atom::from("head") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); - - assert_eq!(3, match Atom::from("body") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); - - assert_eq!(3, match Atom::from("zzzzzz") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); - } - - #[test] - fn ensure_deref() { - // Ensure we can Deref to a &str - let atom = Atom::from("foobar"); - let _: &str = &atom; - } - - #[test] - fn ensure_as_ref() { - // Ensure we can as_ref to a &str - let atom = Atom::from("foobar"); - let _: &str = atom.as_ref(); - } - #[test] fn string_cache_entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } - - #[test] - fn test_ascii_lowercase() { - assert_eq!(Atom::from("").to_ascii_lowercase(), Atom::from("")); - assert_eq!(Atom::from("aZ9").to_ascii_lowercase(), Atom::from("az9")); - assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_lowercase(), Atom::from("the quick brown fox!")); - assert_eq!(Atom::from("JE VAIS À PARIS").to_ascii_lowercase(), Atom::from("je vais À paris")); - } - - #[test] - fn test_ascii_uppercase() { - assert_eq!(Atom::from("").to_ascii_uppercase(), Atom::from("")); - assert_eq!(Atom::from("aZ9").to_ascii_uppercase(), Atom::from("AZ9")); - assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_uppercase(), Atom::from("THE QUICK BROWN FOX!")); - assert_eq!(Atom::from("Je vais à Paris").to_ascii_uppercase(), Atom::from("JE VAIS à PARIS")); - } - - #[test] - fn test_eq_ignore_ascii_case() { - assert!(Atom::from("").eq_ignore_ascii_case(&Atom::from(""))); - assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("aZ9"))); - assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("Az9"))); - assert!(Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!"))); - assert!(Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("je VAIS à PARIS"))); - assert!(!Atom::from("").eq_ignore_ascii_case(&Atom::from("az9"))); - assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from(""))); - assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("9Za"))); - assert!(!Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!!"))); - assert!(!Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("JE vais À paris"))); - } - - #[test] - fn test_from_string() { - assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); - } } - -#[cfg(all(test, feature = "unstable"))] -#[path = "bench.rs"] -mod bench; diff --git a/src/lib.rs b/src/lib.rs index fc00f3a..7de917c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,7 +33,7 @@ //! //! In `build.rs`: //! -//! ``` +//! ```ignore //! extern crate string_cache_codegen; //! //! use std::env; @@ -105,10 +105,7 @@ #![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] -#![cfg_attr(all(test, feature = "unstable"), feature(test))] -#[cfg(all(test, feature = "unstable"))] extern crate test; -#[cfg(all(test, feature = "unstable"))] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; extern crate phf_shared; From d182bbfdbaf7515856e88d629a92e3f65e6b0e41 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 18 Oct 2019 16:29:33 +0200 Subject: [PATCH 141/212] Update rand to 0.7 --- integration-tests/Cargo.toml | 2 +- integration-tests/src/bench.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 1a92ecc..7bd1772 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -18,7 +18,7 @@ unstable = [] string_cache = { version = "0.7", path = ".." } [dev-dependencies] -rand = "0.4" +rand = "0.7" string_cache_codegen = { version = "0.4", path = "../string-cache-codegen" } [build-dependencies] diff --git a/integration-tests/src/bench.rs b/integration-tests/src/bench.rs index 7866ae9..459c913 100644 --- a/integration-tests/src/bench.rs +++ b/integration-tests/src/bench.rs @@ -184,9 +184,9 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( fn $name(b: &mut Bencher) { use std::str; use rand; - use rand::Rng; + use rand::{RngCore, SeedableRng}; - let mut gen = rand::weak_rng(); + let mut gen = rand::rngs::SmallRng::from_entropy(); b.iter(|| { // We have to generate new atoms on every iter, because // the dynamic atom table isn't reset. From 07a74fa9bf90285d0a9d8383a483a02b7d571dae Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 18 Oct 2019 16:31:15 +0200 Subject: [PATCH 142/212] Update version to 0.8 --- Cargo.toml | 2 +- README.md | 6 +++--- integration-tests/Cargo.toml | 6 +++--- src/lib.rs | 4 ++-- string-cache-codegen/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8c40c1e..3ade61d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.7.4" # Also update README.md when making a semver-breaking change +version = "0.8.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/README.md b/README.md index ddd3bc0..9c9c8ac 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.7" +string_cache = "0.8" ``` In `lib.rs`: @@ -31,10 +31,10 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.7" +string_cache = "0.8" [build-dependencies] -string_cache_codegen = "0.4" +string_cache_codegen = "0.5" ``` In `build.rs`: diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 7bd1772..7f1c60b 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -15,11 +15,11 @@ test = true unstable = [] [dependencies] -string_cache = { version = "0.7", path = ".." } +string_cache = { version = "0.8", path = ".." } [dev-dependencies] rand = "0.7" -string_cache_codegen = { version = "0.4", path = "../string-cache-codegen" } +string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } [build-dependencies] -string_cache_codegen = { version = "0.4", path = "../string-cache-codegen" } +string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } diff --git a/src/lib.rs b/src/lib.rs index 7de917c..0191bd8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,10 +25,10 @@ //! In `Cargo.toml`: //! ```toml //! [dependencies] -//! string_cache = "0.7" +//! string_cache = "0.8" //! //! [dev-dependencies] -//! string_cache_codegen = "0.4" +//! string_cache_codegen = "0.5" //! ``` //! //! In `build.rs`: diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 4bcd3ee..1dc5493 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.4.4" # Also update ../README.md when making a semver-breaking change +version = "0.5.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index e878b0f..7f5cec2 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -19,10 +19,10 @@ //! build = "build.rs" //! //! [dependencies] -//! string_cache = "0.7" +//! string_cache = "0.8" //! //! [build-dependencies] -//! string_cache_codegen = "0.4" +//! string_cache_codegen = "0.5" //! ``` //! //! In `build.rs`: From 2e1d42a9e4b98eddd1a5bfb127ba4ed083302374 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 16:58:11 +0200 Subject: [PATCH 143/212] Run rustfmt --- examples/simple.rs | 4 +- integration-tests/build.rs | 11 +++- integration-tests/src/bench.rs | 14 ++--- integration-tests/src/lib.rs | 85 ++++++++++++++++++----------- shared/lib.rs | 4 +- src/atom.rs | 98 +++++++++++++++++++++------------- src/lib.rs | 11 ++-- string-cache-codegen/lib.rs | 52 +++++++++++------- tests/small-stack.rs | 11 ++-- 9 files changed, 182 insertions(+), 108 deletions(-) diff --git a/examples/simple.rs b/examples/simple.rs index b375049..8a524ff 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -3,12 +3,12 @@ extern crate string_cache; use string_cache::DefaultAtom; fn main() { - let mut interned_stuff = Vec::new(); let text = "here is a sentence of text that will be tokenised and interned and some repeated \ tokens is of text and"; for word in text.split_whitespace() { - let seen_before = interned_stuff.iter() + let seen_before = interned_stuff + .iter() // We can use impl PartialEq where T is anything string-like to compare to // interned strings to either other interned strings, or actual strings Comparing two // interned strings is very fast (normally a single cpu operation). diff --git a/integration-tests/build.rs b/integration-tests/build.rs index 2cb93b1..e7e89d4 100644 --- a/integration-tests/build.rs +++ b/integration-tests/build.rs @@ -6,7 +6,16 @@ use std::path::Path; fn main() { string_cache_codegen::AtomType::new("TestAtom", "test_atom!") .atoms(&[ - "a", "b", "address", "area", "body", "font-weight", "br", "html", "head", "id", + "a", + "b", + "address", + "area", + "body", + "font-weight", + "br", + "html", + "head", + "id", ]) .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs")) .unwrap() diff --git a/integration-tests/src/bench.rs b/integration-tests/src/bench.rs index 459c913..4d8f012 100644 --- a/integration-tests/src/bench.rs +++ b/integration-tests/src/bench.rs @@ -28,7 +28,7 @@ and cheap to move around, which isn't reflected in these tests. */ use crate::TestAtom; -use test::{Bencher, black_box}; +use test::{black_box, Bencher}; // Just shorthand fn mk(x: &str) -> TestAtom { @@ -142,10 +142,10 @@ macro_rules! bench_all ( ); ); -pub const longer_dynamic_a: &'static str - = "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Band"; -pub const longer_dynamic_b: &'static str - = "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Ban!"; +pub const longer_dynamic_a: &'static str = + "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Band"; +pub const longer_dynamic_b: &'static str = + "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Ban!"; bench_all!([eq ne lt clone_string] for short_string = "e", "f"); bench_all!([eq ne lt clone_string] for medium_string = "xyzzy01", "xyzzy02"); @@ -206,7 +206,7 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( } )); -bench_rand!(intern_rand_008, 8); -bench_rand!(intern_rand_032, 32); +bench_rand!(intern_rand_008, 8); +bench_rand!(intern_rand_032, 32); bench_rand!(intern_rand_128, 128); bench_rand!(intern_rand_512, 512); diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index d993e57..c6b9980 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -8,16 +8,16 @@ // except according to those terms. #![cfg(test)] - #![deny(warnings)] #![allow(non_upper_case_globals)] - #![cfg_attr(feature = "unstable", feature(test))] extern crate string_cache; -#[cfg(feature = "unstable")] extern crate test; -#[cfg(feature = "unstable")] extern crate rand; +#[cfg(feature = "unstable")] +extern crate rand; +#[cfg(feature = "unstable")] +extern crate test; use std::thread; use string_cache::atom::StaticAtomSet; @@ -172,13 +172,13 @@ fn repr() { // static atom table, the tag values, etc. // Static atoms - check_static("a", test_atom!("a")); + check_static("a", test_atom!("a")); check_static("address", test_atom!("address")); - check_static("area", test_atom!("area")); + check_static("area", test_atom!("area")); // Inline atoms - check("e", 0x0000_0000_0000_6511); - check("xyzzy", 0x0000_797A_7A79_7851); + check("e", 0x0000_0000_0000_6511); + check("xyzzy", 0x0000_797A_7A79_7851); check("xyzzy01", 0x3130_797A_7A79_7871); // Dynamic atoms. This is a pointer so we can't verify every bit. @@ -203,23 +203,32 @@ fn atom_macro() { #[test] fn match_atom() { - assert_eq!(2, match Atom::from("head") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); - - assert_eq!(3, match Atom::from("body") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); - - assert_eq!(3, match Atom::from("zzzzzz") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); + assert_eq!( + 2, + match Atom::from("head") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + } + ); + + assert_eq!( + 3, + match Atom::from("body") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + } + ); + + assert_eq!( + 3, + match Atom::from("zzzzzz") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + } + ); } #[test] @@ -240,16 +249,28 @@ fn ensure_as_ref() { fn test_ascii_lowercase() { assert_eq!(Atom::from("").to_ascii_lowercase(), Atom::from("")); assert_eq!(Atom::from("aZ9").to_ascii_lowercase(), Atom::from("az9")); - assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_lowercase(), Atom::from("the quick brown fox!")); - assert_eq!(Atom::from("JE VAIS À PARIS").to_ascii_lowercase(), Atom::from("je vais À paris")); + assert_eq!( + Atom::from("The Quick Brown Fox!").to_ascii_lowercase(), + Atom::from("the quick brown fox!") + ); + assert_eq!( + Atom::from("JE VAIS À PARIS").to_ascii_lowercase(), + Atom::from("je vais À paris") + ); } #[test] fn test_ascii_uppercase() { assert_eq!(Atom::from("").to_ascii_uppercase(), Atom::from("")); assert_eq!(Atom::from("aZ9").to_ascii_uppercase(), Atom::from("AZ9")); - assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_uppercase(), Atom::from("THE QUICK BROWN FOX!")); - assert_eq!(Atom::from("Je vais à Paris").to_ascii_uppercase(), Atom::from("JE VAIS à PARIS")); + assert_eq!( + Atom::from("The Quick Brown Fox!").to_ascii_uppercase(), + Atom::from("THE QUICK BROWN FOX!") + ); + assert_eq!( + Atom::from("Je vais à Paris").to_ascii_uppercase(), + Atom::from("JE VAIS à PARIS") + ); } #[test] @@ -257,12 +278,14 @@ fn test_eq_ignore_ascii_case() { assert!(Atom::from("").eq_ignore_ascii_case(&Atom::from(""))); assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("aZ9"))); assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("Az9"))); - assert!(Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!"))); + assert!(Atom::from("The Quick Brown Fox!") + .eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!"))); assert!(Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("je VAIS à PARIS"))); assert!(!Atom::from("").eq_ignore_ascii_case(&Atom::from("az9"))); assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from(""))); assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("9Za"))); - assert!(!Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!!"))); + assert!(!Atom::from("The Quick Brown Fox!") + .eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!!"))); assert!(!Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("JE vais À paris"))); } diff --git a/shared/lib.rs b/shared/lib.rs index f69a4f4..75c21d0 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -9,10 +9,10 @@ // FIXME(rust-lang/rust#18153): generate these from an enum pub const DYNAMIC_TAG: u8 = 0b_00; -pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble +pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble pub const STATIC_TAG: u8 = 0b_10; pub const TAG_MASK: u64 = 0b_11; -pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. +pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. pub const MAX_INLINE_LEN: usize = 7; diff --git a/src/atom.rs b/src/atom.rs index 8b48218..8f4e999 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -21,15 +21,17 @@ use std::mem; use std::ops; use std::slice; use std::str; -use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; +use std::sync::Mutex; -use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STATIC_SHIFT_BITS, - ENTRY_ALIGNMENT, pack_static}; use self::UnpackedAtom::{Dynamic, Inline, Static}; +use shared::{ + pack_static, DYNAMIC_TAG, ENTRY_ALIGNMENT, INLINE_TAG, MAX_INLINE_LEN, STATIC_SHIFT_BITS, + STATIC_TAG, TAG_MASK, +}; -const NB_BUCKETS: usize = 1 << 12; // 4096 +const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; struct StringCache { @@ -48,8 +50,7 @@ struct StringCacheEntry { } impl StringCacheEntry { - fn new(next: Option>, hash: u64, string: String) - -> StringCacheEntry { + fn new(next: Option>, hash: u64, string: String) -> StringCacheEntry { StringCacheEntry { next_in_bucket: next, hash: hash, @@ -65,17 +66,14 @@ impl StringCache { let _static_assert_size_eq = std::mem::transmute::; let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); StringCache { - buckets: unsafe { - Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) - }, + buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, } } fn add(&mut self, string: Cow, hash: u64) -> *mut StringCacheEntry { let bucket_index = (hash & BUCKET_MASK) as usize; { - let mut ptr: Option<&mut Box> = - self.buckets[bucket_index].as_mut(); + let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); while let Some(entry) = ptr.take() { if entry.hash == hash && &*entry.string == &*string { @@ -96,7 +94,10 @@ impl StringCache { debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); let string = string.into_owned(); let mut entry = Box::new(StringCacheEntry::new( - self.buckets[bucket_index].take(), hash, string)); + self.buckets[bucket_index].take(), + hash, + string, + )); let ptr: *mut StringCacheEntry = &mut *entry; self.buckets[bucket_index] = Some(entry); @@ -119,7 +120,9 @@ impl StringCache { None => break, }; if entry_ptr == ptr { - mem::drop(mem::replace(current, unsafe { (*entry_ptr).next_in_bucket.take() })); + mem::drop(mem::replace(current, unsafe { + (*entry_ptr).next_in_bucket.take() + })); break; } current = unsafe { &mut (*entry_ptr).next_in_bucket }; @@ -261,7 +264,7 @@ impl Atom { pub fn is_static(&self) -> bool { match unsafe { self.unpack() } { Static(..) => true, - _ => false + _ => false, } } @@ -270,7 +273,7 @@ impl Atom { pub fn is_dynamic(&self) -> bool { match unsafe { self.unpack() } { Dynamic(..) => true, - _ => false + _ => false, } } @@ -279,7 +282,7 @@ impl Atom { pub fn is_inline(&self) -> bool { match unsafe { self.unpack() } { Inline(..) => true, - _ => false + _ => false, } } @@ -294,9 +297,7 @@ impl Atom { let entry = entry as *mut StringCacheEntry; u64_hash_as_u32(unsafe { (*entry).hash }) } - Inline(..) => { - u64_hash_as_u32(self.unsafe_data) - } + Inline(..) => u64_hash_as_u32(self.unsafe_data), } } } @@ -306,14 +307,17 @@ impl Default for Atom { fn default() -> Self { Atom { unsafe_data: pack_static(Static::empty_string_index()), - phantom: PhantomData + phantom: PhantomData, } } } impl Hash for Atom { #[inline] - fn hash(&self, state: &mut H) where H: Hasher { + fn hash(&self, state: &mut H) + where + H: Hasher, + { state.write_u32(self.get_hash()) } } @@ -368,7 +372,10 @@ impl<'a, Static: StaticAtomSet> From> for Atom { }; let data = unsafe { unpacked.pack() }; - Atom { unsafe_data: data, phantom: PhantomData } + Atom { + unsafe_data: data, + phantom: PhantomData, + } } } @@ -394,7 +401,7 @@ impl Clone for Atom { Some(entry) => { let entry = entry as *mut StringCacheEntry; (*entry).ref_count.fetch_add(1, SeqCst); - }, + } None => (), } } @@ -437,8 +444,11 @@ impl ops::Deref for Atom { Inline(..) => { let buf = inline_orig_bytes(&self.unsafe_data); str::from_utf8_unchecked(buf) - }, - Static(idx) => Static::get().atoms.get(idx as usize).expect("bad static atom"), + } + Static(idx) => Static::get() + .atoms + .get(idx as usize) + .expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string @@ -497,14 +507,20 @@ impl AsRef for Atom { } impl Serialize for Atom { - fn serialize(&self, serializer: S) -> Result where S: Serializer { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { let string: &str = self.as_ref(); string.serialize(serializer) } } impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { - fn deserialize(deserializer: D) -> Result where D: Deserializer<'a> { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'a>, + { let string: String = Deserialize::deserialize(deserializer)?; Ok(Atom::from(string)) } @@ -535,8 +551,8 @@ impl Atom { /// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase pub fn to_ascii_uppercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { - if let b'a' ..= b'z' = b { - return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase()) + if let b'a'..=b'z' = b { + return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase()); } } self.clone() @@ -547,8 +563,8 @@ impl Atom { /// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase pub fn to_ascii_lowercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { - if let b'A' ..= b'Z' = b { - return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase()) + if let b'A'..=b'Z' = b { + return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase()); } } self.clone() @@ -650,7 +666,7 @@ impl UnpackedAtom { let src = inline_atom_slice(&data); buf.copy_from_slice(src); Inline(len as u8, buf) - }, + } _ => debug_unreachable!(), } } @@ -685,9 +701,9 @@ unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { // more. #[cfg(test)] mod tests { - use std::mem; use super::{DefaultAtom, StringCacheEntry}; use shared::ENTRY_ALIGNMENT; + use std::mem; #[test] fn assert_sizes() { @@ -699,10 +715,18 @@ mod tests { let compiler_uses_inline_drop_flags = mem::size_of::() > 0; // Guard against accidental changes to the sizes of things. - assert_eq!(mem::size_of::(), - if compiler_uses_inline_drop_flags { 16 } else { 8 }); - assert_eq!(mem::size_of::(), - 8 + 4 * mem::size_of::()); + assert_eq!( + mem::size_of::(), + if compiler_uses_inline_drop_flags { + 16 + } else { + 8 + } + ); + assert_eq!( + mem::size_of::(), + 8 + 4 * mem::size_of::() + ); } #[test] diff --git a/src/lib.rs b/src/lib.rs index 0191bd8..058b833 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -103,22 +103,23 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] - #![cfg_attr(test, deny(warnings))] -#[macro_use] extern crate lazy_static; -#[macro_use] extern crate debug_unreachable; +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate debug_unreachable; extern crate phf_shared; extern crate precomputed_hash; extern crate serde; extern crate string_cache_shared as shared; -pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; +pub use atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; pub mod atom; // Make test_atom! macro work in this crate. // `$crate` would not be appropriate for other crates creating such macros mod string_cache { - pub use {Atom, StaticAtomSet, PhfStrSet}; + pub use {Atom, PhfStrSet, StaticAtomSet}; } diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 7f5cec2..86af84f 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -71,12 +71,13 @@ extern crate phf_generator; extern crate phf_shared; extern crate string_cache_shared as shared; -#[macro_use] extern crate quote; +#[macro_use] +extern crate quote; extern crate proc_macro2; use std::collections::HashSet; use std::fs::File; -use std::io::{self, Write, BufWriter}; +use std::io::{self, BufWriter, Write}; use std::iter; use std::path::Path; @@ -161,22 +162,30 @@ impl AtomType { /// Adds multiple atoms to the builder pub fn atoms(&mut self, iter: I) -> &mut Self - where I: IntoIterator, I::Item: AsRef { - self.atoms.extend(iter.into_iter().map(|s| s.as_ref().to_owned())); + where + I: IntoIterator, + I::Item: AsRef, + { + self.atoms + .extend(iter.into_iter().map(|s| s.as_ref().to_owned())); self } /// Write generated code to `destination`. - pub fn write_to(&mut self, mut destination: W) -> io::Result<()> where W: Write { + pub fn write_to(&mut self, mut destination: W) -> io::Result<()> + where + W: Write, + { destination.write_all( self.to_tokens() - .to_string() - // Insert some newlines to make the generated code slightly easier to read. - .replace(" [ \"", "[\n\"") - .replace("\" , ", "\",\n") - .replace(" ( \"", "\n( \"") - .replace("; ", ";\n") - .as_bytes()) + .to_string() + // Insert some newlines to make the generated code slightly easier to read. + .replace(" [ \"", "[\n\"") + .replace("\" , ", "\",\n") + .replace(" ( \"", "\n( \"") + .replace("; ", ";\n") + .as_bytes(), + ) } fn to_tokens(&mut self) -> proc_macro2::TokenStream { @@ -201,30 +210,33 @@ impl AtomType { .unwrap() }); - let hashes: Vec = - atoms.iter().map(|string| { + let hashes: Vec = atoms + .iter() + .map(|string| { let hash = phf_shared::hash(string, &key); (hash.g ^ hash.f1) as u32 - }).collect(); + }) + .collect(); let type_name = if let Some(position) = self.path.rfind("::") { - &self.path[position + "::".len() ..] + &self.path[position + "::".len()..] } else { &self.path }; let atom_doc = match self.atom_doc { Some(ref doc) => quote!(#[doc = #doc]), - None => quote!() + None => quote!(), }; let static_set_doc = match self.static_set_doc { Some(ref doc) => quote!(#[doc = #doc]), - None => quote!() + None => quote!(), }; let macro_doc = match self.macro_doc { Some(ref doc) => quote!(#[doc = #doc]), - None => quote!() + None => quote!(), }; - let new_term = |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site()); + let new_term = + |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site()); let static_set_name = new_term(&format!("{}StaticSet", type_name)); let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); diff --git a/tests/small-stack.rs b/tests/small-stack.rs index 300b142..269cad7 100644 --- a/tests/small-stack.rs +++ b/tests/small-stack.rs @@ -6,7 +6,12 @@ // This is a separate test program rather than a `#[test] fn` among others // to make sure that nothing else has already initialized the map in this process. fn main() { - std::thread::Builder::new().stack_size(50_000).spawn(|| { - string_cache::DefaultAtom::from("12345678"); - }).unwrap().join().unwrap() + std::thread::Builder::new() + .stack_size(50_000) + .spawn(|| { + string_cache::DefaultAtom::from("12345678"); + }) + .unwrap() + .join() + .unwrap() } From a317539e722ca7b173f984a168dd5159ff6a8b2b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 17:45:42 +0200 Subject: [PATCH 144/212] Make fields of Atom private atom!() macros create values through a `const fn` constructor --- Cargo.toml | 1 - integration-tests/src/lib.rs | 10 +- shared/Cargo.toml | 11 -- shared/LICENSE-APACHE | 201 -------------------------------- shared/LICENSE-MIT | 25 ---- shared/lib.rs | 23 ---- src/atom.rs | 107 +++++++++-------- src/lib.rs | 1 - string-cache-codegen/Cargo.toml | 1 - string-cache-codegen/lib.rs | 50 ++++---- 10 files changed, 92 insertions(+), 338 deletions(-) delete mode 100644 shared/Cargo.toml delete mode 100644 shared/LICENSE-APACHE delete mode 100644 shared/LICENSE-MIT delete mode 100644 shared/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 3ade61d..5ba5775 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,6 @@ lazy_static = "1" serde = "1" phf_shared = "0.8" new_debug_unreachable = "1.0" -string_cache_shared = {path = "./shared", version = "0.3"} [[test]] name = "small-stack" diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index c6b9980..614d068 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -157,14 +157,14 @@ macro_rules! assert_eq_fmt (($fmt:expr, $x:expr, $y:expr) => ({ #[test] fn repr() { fn check(s: &str, data: u64) { - assert_eq_fmt!("0x{:016X}", Atom::from(s).unsafe_data, data); + assert_eq_fmt!("0x{:016X}", Atom::from(s).unsafe_data(), data); } fn check_static(s: &str, x: Atom) { - assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); - assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); + assert_eq_fmt!("0x{:016X}", x.unsafe_data(), Atom::from(s).unsafe_data()); + assert_eq!(0x2, x.unsafe_data() & 0xFFFF_FFFF); // The index is unspecified by phf. - assert!((x.unsafe_data >> 32) <= TestAtomStaticSet::get().atoms.len() as u64); + assert!((x.unsafe_data() >> 32) <= TestAtomStaticSet::get().atoms.len() as u64); } // This test is here to make sure we don't change atom representation @@ -182,7 +182,7 @@ fn repr() { check("xyzzy01", 0x3130_797A_7A79_7871); // Dynamic atoms. This is a pointer so we can't verify every bit. - assert_eq!(0x00, Atom::from("a dynamic string").unsafe_data & 0xf); + assert_eq!(0x00, Atom::from("a dynamic string").unsafe_data() & 0xf); } #[test] diff --git a/shared/Cargo.toml b/shared/Cargo.toml deleted file mode 100644 index ce8d53f..0000000 --- a/shared/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] - -name = "string_cache_shared" -version = "0.3.1" -authors = [ "The Servo Project Developers" ] -description = "Code share between string_cache and string_cache_codegen." -license = "MIT / Apache-2.0" -repository = "https://github.com/servo/string-cache" - -[lib] -path = "lib.rs" diff --git a/shared/LICENSE-APACHE b/shared/LICENSE-APACHE deleted file mode 100644 index 16fe87b..0000000 --- a/shared/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/shared/LICENSE-MIT b/shared/LICENSE-MIT deleted file mode 100644 index 807526f..0000000 --- a/shared/LICENSE-MIT +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2012-2013 Mozilla Foundation - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/shared/lib.rs b/shared/lib.rs deleted file mode 100644 index 75c21d0..0000000 --- a/shared/lib.rs +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2015 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// FIXME(rust-lang/rust#18153): generate these from an enum -pub const DYNAMIC_TAG: u8 = 0b_00; -pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble -pub const STATIC_TAG: u8 = 0b_10; -pub const TAG_MASK: u64 = 0b_11; -pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. - -pub const MAX_INLINE_LEN: usize = 7; - -pub const STATIC_SHIFT_BITS: usize = 32; - -pub fn pack_static(n: u32) -> u64 { - (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) -} diff --git a/src/atom.rs b/src/atom.rs index 8f4e999..8f6a983 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -26,10 +26,16 @@ use std::sync::atomic::Ordering::SeqCst; use std::sync::Mutex; use self::UnpackedAtom::{Dynamic, Inline, Static}; -use shared::{ - pack_static, DYNAMIC_TAG, ENTRY_ALIGNMENT, INLINE_TAG, MAX_INLINE_LEN, STATIC_SHIFT_BITS, - STATIC_TAG, TAG_MASK, -}; + +const DYNAMIC_TAG: u8 = 0b_00; +const INLINE_TAG: u8 = 0b_01; // len in upper nybble +const STATIC_TAG: u8 = 0b_10; +const TAG_MASK: u64 = 0b_11; +const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. + +const MAX_INLINE_LEN: usize = 7; + +const STATIC_SHIFT_BITS: usize = 32; const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; @@ -138,7 +144,7 @@ impl StringCache { /// It is used by the methods of [`Atom`] to check if a string is present in the static set. /// /// [`Atom`]: struct.Atom.html -pub trait StaticAtomSet { +pub trait StaticAtomSet: Ord { /// Get the location of the static string set in the binary. fn get() -> &'static PhfStrSet; /// Get the index of the empty string, which is in every set and is used for `Atom::default`. @@ -160,6 +166,7 @@ pub struct PhfStrSet { } /// An empty static atom set for when only dynamic strings will be added +#[derive(PartialEq, Eq, PartialOrd, Ord)] pub struct EmptyStaticAtomSet; impl StaticAtomSet for EmptyStaticAtomSet { @@ -226,14 +233,11 @@ pub type DefaultAtom = Atom; /// } /// } // atom is dropped here, so it is not kept around in memory /// ``` -pub struct Atom { - /// This field is public so that the `atom!()` macros can use it. - /// You should not otherwise access this field. - #[doc(hidden)] - pub unsafe_data: u64, - - #[doc(hidden)] - pub phantom: PhantomData, +#[derive(PartialEq, Eq)] +// NOTE: Deriving PartialEq requires that a given string must always be interned the same way. +pub struct Atom { + unsafe_data: u64, + phantom: PhantomData, } impl ::precomputed_hash::PrecomputedHash for Atom { @@ -253,13 +257,34 @@ fn u64_hash_as_u32(h: u64) -> u32 { ((h >> 32) ^ h) as u32 } +// FIXME: bound removed from the struct definition before of this error for pack_static: +// "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable" +// https://github.com/rust-lang/rust/issues/57563 +impl Atom { + /// For the atom!() macros + #[inline(always)] + #[doc(hidden)] + pub const fn pack_static(n: u32) -> Self { + Self { + unsafe_data: (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS), + phantom: PhantomData, + } + } +} + impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { UnpackedAtom::from_packed(self.unsafe_data) } - /// Return true if this is a static Atom. + /// Return the internal repersentation. For testing. + #[doc(hidden)] + pub fn unsafe_data(&self) -> u64 { + self.unsafe_data + } + + /// Return true if this is a static Atom. For testing. #[doc(hidden)] pub fn is_static(&self) -> bool { match unsafe { self.unpack() } { @@ -268,7 +293,7 @@ impl Atom { } } - /// Return true if this is a dynamic Atom. + /// Return true if this is a dynamic Atom. For testing. #[doc(hidden)] pub fn is_dynamic(&self) -> bool { match unsafe { self.unpack() } { @@ -277,7 +302,7 @@ impl Atom { } } - /// Return true if this is an inline Atom. + /// Return true if this is an inline Atom. For testing. #[doc(hidden)] pub fn is_inline(&self) -> bool { match unsafe { self.unpack() } { @@ -305,10 +330,7 @@ impl Atom { impl Default for Atom { #[inline] fn default() -> Self { - Atom { - unsafe_data: pack_static(Static::empty_string_index()), - phantom: PhantomData, - } + Atom::pack_static(Static::empty_string_index()) } } @@ -322,16 +344,6 @@ impl Hash for Atom { } } -impl Eq for Atom {} - -// NOTE: This impl requires that a given string must always be interned the same way. -impl PartialEq for Atom { - #[inline] - fn eq(&self, other: &Self) -> bool { - self.unsafe_data == other.unsafe_data - } -} - impl PartialEq for Atom { fn eq(&self, other: &str) -> bool { &self[..] == other @@ -371,11 +383,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { } }; - let data = unsafe { unpacked.pack() }; - Atom { - unsafe_data: data, - phantom: PhantomData, - } + unsafe { unpacked.pack() } } } @@ -412,11 +420,11 @@ impl Clone for Atom { } } -impl Drop for Atom { +impl Drop for Atom { #[inline] fn drop(&mut self) { // Out of line to guide inlining. - fn drop_slow(this: &mut Atom) { + fn drop_slow(this: &mut Atom) { STRING_CACHE.lock().unwrap().remove(this.unsafe_data); } @@ -631,22 +639,28 @@ impl UnpackedAtom { /// Pack a key, fitting it into a u64 with flags and data. See `string_cache_shared` for /// hints for the layout. #[inline(always)] - unsafe fn pack(self) -> u64 { + unsafe fn pack(self) -> Atom { match self { - Static(n) => pack_static(n), + Static(n) => Atom::pack_static(n), Dynamic(p) => { - let n = p as u64; - debug_assert!(0 == n & TAG_MASK); - n + let unsafe_data = p as u64; + debug_assert!(0 == unsafe_data & TAG_MASK); + Atom { + unsafe_data, + phantom: PhantomData, + } } Inline(len, buf) => { debug_assert!((len as usize) <= MAX_INLINE_LEN); - let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); + let mut unsafe_data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { - let dest = inline_atom_slice_mut(&mut data); + let dest = inline_atom_slice_mut(&mut unsafe_data); dest.copy_from_slice(&buf) } - data + Atom { + unsafe_data, + phantom: PhantomData, + } } } } @@ -701,8 +715,7 @@ unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { // more. #[cfg(test)] mod tests { - use super::{DefaultAtom, StringCacheEntry}; - use shared::ENTRY_ALIGNMENT; + use super::{DefaultAtom, StringCacheEntry, ENTRY_ALIGNMENT}; use std::mem; #[test] diff --git a/src/lib.rs b/src/lib.rs index 058b833..484f606 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -112,7 +112,6 @@ extern crate debug_unreachable; extern crate phf_shared; extern crate precomputed_hash; extern crate serde; -extern crate string_cache_shared as shared; pub use atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 1dc5493..847bace 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -13,7 +13,6 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.8" phf_shared = "0.8" proc-macro2 = "1" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 86af84f..f22b88c 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -70,7 +70,6 @@ extern crate phf_generator; extern crate phf_shared; -extern crate string_cache_shared as shared; #[macro_use] extern crate quote; extern crate proc_macro2; @@ -78,7 +77,6 @@ extern crate proc_macro2; use std::collections::HashSet; use std::fs::File; use std::io::{self, BufWriter, Write}; -use std::iter; use std::path::Path; /// A builder for a static atom set and relevant macros @@ -199,16 +197,8 @@ impl AtomType { let phf_generator::HashState { key, disps, map } = hash_state; let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip(); let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); - let atoms_ref = &atoms; let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; - let data = (0..atoms.len()).map(|i| { - format!("0x{:X}u64", shared::pack_static(i as u32)) - .parse::() - .unwrap() - .into_iter() - .next() - .unwrap() - }); + let indices = 0..atoms.len() as u32; let hashes: Vec = atoms .iter() @@ -218,10 +208,11 @@ impl AtomType { }) .collect(); - let type_name = if let Some(position) = self.path.rfind("::") { - &self.path[position + "::".len()..] - } else { - &self.path + let mut path_parts = self.path.rsplitn(2, "::"); + let type_name = path_parts.next().unwrap(); + let module = match path_parts.next() { + Some(m) => format!("$crate::{}", m), + None => format!("$crate"), }; let atom_doc = match self.atom_doc { Some(ref doc) => quote!(#[doc = #doc]), @@ -240,19 +231,32 @@ impl AtomType { let static_set_name = new_term(&format!("{}StaticSet", type_name)); let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); - let path = iter::repeat(self.path.parse::().unwrap()); + let module = module.parse::().unwrap(); + let const_names: Vec<_> = atoms + .iter() + .map(|atom| { + let mut name = String::from("ATOM"); + for c in atom.chars() { + name.push_str(&format!("_{:02X}", c as u32)) + } + new_term(&name) + }) + .collect(); quote! { #atom_doc pub type #type_name = ::string_cache::Atom<#static_set_name>; + #static_set_doc + #[derive(PartialEq, Eq, PartialOrd, Ord)] pub struct #static_set_name; + impl ::string_cache::StaticAtomSet for #static_set_name { fn get() -> &'static ::string_cache::PhfStrSet { static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet { key: #key, disps: &[#((#disps0, #disps1)),*], - atoms: &[#(#atoms_ref),*], + atoms: &[#(#atoms),*], hashes: &[#(#hashes),*] }; &SET @@ -261,16 +265,16 @@ impl AtomType { #empty_string_index } } + + #( + pub const #const_names: #type_name = #type_name::pack_static(#indices); + )* + #macro_doc #[macro_export] macro_rules! #macro_name { #( - (#atoms_ref) => { - $crate::#path { - unsafe_data: #data, - phantom: ::std::marker::PhantomData, - } - }; + (#atoms) => { #module::#const_names }; )* } } From d653207e6cebfc5965b7dac778da1b95376a27fc Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 18:10:28 +0200 Subject: [PATCH 145/212] Add the non-zero optimization to Atom --- src/atom.rs | 58 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 8f6a983..095140e 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -18,6 +18,7 @@ use std::fmt; use std::hash::{Hash, Hasher}; use std::marker::PhantomData; use std::mem; +use std::num::NonZeroU64; use std::ops; use std::slice; use std::str; @@ -110,8 +111,7 @@ impl StringCache { ptr } - fn remove(&mut self, key: u64) { - let ptr = key as *mut StringCacheEntry; + fn remove(&mut self, ptr: *mut StringCacheEntry) { let bucket_index = { let value: &StringCacheEntry = unsafe { &*ptr }; debug_assert!(value.ref_count.load(SeqCst) == 0); @@ -236,7 +236,7 @@ pub type DefaultAtom = Atom; #[derive(PartialEq, Eq)] // NOTE: Deriving PartialEq requires that a given string must always be interned the same way. pub struct Atom { - unsafe_data: u64, + unsafe_data: NonZeroU64, phantom: PhantomData, } @@ -266,7 +266,10 @@ impl Atom { #[doc(hidden)] pub const fn pack_static(n: u32) -> Self { Self { - unsafe_data: (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS), + unsafe_data: unsafe { + // STATIC_TAG ensure this is non-zero + NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS)) + }, phantom: PhantomData, } } @@ -281,7 +284,7 @@ impl Atom { /// Return the internal repersentation. For testing. #[doc(hidden)] pub fn unsafe_data(&self) -> u64 { - self.unsafe_data + self.unsafe_data.get() } /// Return true if this is a static Atom. For testing. @@ -322,7 +325,7 @@ impl Atom { let entry = entry as *mut StringCacheEntry; u64_hash_as_u32(unsafe { (*entry).hash }) } - Inline(..) => u64_hash_as_u32(self.unsafe_data), + Inline(..) => u64_hash_as_u32(self.unsafe_data.get()), } } } @@ -405,7 +408,7 @@ impl Clone for Atom { #[inline(always)] fn clone(&self) -> Self { unsafe { - match from_packed_dynamic(self.unsafe_data) { + match from_packed_dynamic(self.unsafe_data.get()) { Some(entry) => { let entry = entry as *mut StringCacheEntry; (*entry).ref_count.fetch_add(1, SeqCst); @@ -425,11 +428,14 @@ impl Drop for Atom { fn drop(&mut self) { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - STRING_CACHE.lock().unwrap().remove(this.unsafe_data); + STRING_CACHE + .lock() + .unwrap() + .remove(this.unsafe_data.get() as *mut StringCacheEntry); } unsafe { - match from_packed_dynamic(self.unsafe_data) { + match from_packed_dynamic(self.unsafe_data.get()) { Some(entry) => { let entry = entry as *mut StringCacheEntry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { @@ -608,9 +614,9 @@ enum UnpackedAtom { } #[inline(always)] -fn inline_atom_slice(x: &u64) -> &[u8] { +fn inline_atom_slice(x: &NonZeroU64) -> &[u8] { unsafe { - let x: *const u64 = x; + let x: *const NonZeroU64 = x; let mut data = x as *const u8; // All except the lowest byte, which is first in little-endian, last in big-endian. if cfg!(target_endian = "little") { @@ -643,22 +649,24 @@ impl UnpackedAtom { match self { Static(n) => Atom::pack_static(n), Dynamic(p) => { - let unsafe_data = p as u64; - debug_assert!(0 == unsafe_data & TAG_MASK); + let data = p as u64; + debug_assert!(0 == data & TAG_MASK); Atom { - unsafe_data, + // Callers are responsible for calling this with a valid, non-null pointer + unsafe_data: NonZeroU64::new_unchecked(data), phantom: PhantomData, } } Inline(len, buf) => { debug_assert!((len as usize) <= MAX_INLINE_LEN); - let mut unsafe_data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { - let dest = inline_atom_slice_mut(&mut unsafe_data); + let dest = inline_atom_slice_mut(&mut data); dest.copy_from_slice(&buf) } Atom { - unsafe_data, + // INLINE_TAG ensures this is never zero + unsafe_data: NonZeroU64::new_unchecked(data), phantom: PhantomData, } } @@ -667,14 +675,14 @@ impl UnpackedAtom { /// Unpack a key, extracting information from a single u64 into useable structs. #[inline(always)] - unsafe fn from_packed(data: u64) -> UnpackedAtom { + unsafe fn from_packed(data: NonZeroU64) -> UnpackedAtom { debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged - match (data & TAG_MASK) as u8 { - DYNAMIC_TAG => Dynamic(data as *mut ()), - STATIC_TAG => Static((data >> STATIC_SHIFT_BITS) as u32), + match (data.get() & TAG_MASK) as u8 { + DYNAMIC_TAG => Dynamic(data.get() as *mut ()), + STATIC_TAG => Static((data.get() >> STATIC_SHIFT_BITS) as u32), INLINE_TAG => { - let len = ((data & 0xf0) >> 4) as usize; + let len = ((data.get() & 0xf0) >> 4) as usize; debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; let src = inline_atom_slice(&data); @@ -701,7 +709,7 @@ unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { /// /// It's undefined behavior to call this on a non-inline atom!! #[inline(always)] -unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { +unsafe fn inline_orig_bytes<'a>(data: &'a NonZeroU64) -> &'a [u8] { match UnpackedAtom::from_packed(*data) { Inline(len, _) => { let src = inline_atom_slice(&data); @@ -736,6 +744,10 @@ mod tests { 8 } ); + assert_eq!( + mem::size_of::>(), + mem::size_of::(), + ); assert_eq!( mem::size_of::(), 8 + 4 * mem::size_of::() From 09abcfbab37b08152b347e3c7062bf564cfd46c8 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 18:29:50 +0200 Subject: [PATCH 146/212] Remove unused events.rs --- src/event.rs | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 src/event.rs diff --git a/src/event.rs b/src/event.rs deleted file mode 100644 index 1b777d3..0000000 --- a/src/event.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use std::sync::Mutex; - -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Debug)] -pub enum Event { - Intern(u64), - Insert(u64, String), - Remove(u64), -} - -lazy_static! { - pub static ref LOG: Mutex> - = Mutex::new(Vec::with_capacity(50_000)); -} - -pub fn log(e: Event) { - LOG.lock().unwrap().push(e); -} - -macro_rules! log (($e:expr) => (::event::log($e))); From d968f8c0f4734bca285a39fd2cf93f5041568959 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 18:33:50 +0200 Subject: [PATCH 147/212] test_atom! is in another crate now --- src/lib.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 484f606..bbbcf00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -116,9 +116,3 @@ extern crate serde; pub use atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; pub mod atom; - -// Make test_atom! macro work in this crate. -// `$crate` would not be appropriate for other crates creating such macros -mod string_cache { - pub use {Atom, PhfStrSet, StaticAtomSet}; -} From d191aae845c370e052a1cf74ec079672241a69dd Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 18:39:50 +0200 Subject: [PATCH 148/212] Switch to the 2018 edition --- Cargo.toml | 1 + examples/simple.rs | 2 +- integration-tests/Cargo.toml | 1 + integration-tests/build.rs | 2 +- integration-tests/src/lib.rs | 4 ---- src/atom.rs | 2 ++ src/lib.rs | 10 +--------- string-cache-codegen/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 7 +------ 9 files changed, 9 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5ba5775..eec2438 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ description = "A string interning library for Rust, developed as part of the Ser license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache/" +edition = "2018" # Do not `exclude` ./string-cache-codegen because we want to include # ./string-cache-codegen/shared.rs, and `include` is a pain to use diff --git a/examples/simple.rs b/examples/simple.rs index 8a524ff..f063b06 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -1,4 +1,4 @@ -extern crate string_cache; + use string_cache::DefaultAtom; diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 7f1c60b..736e34a 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" authors = [ "The Servo Project Developers" ] build = "build.rs" publish = false +edition = "2018" [lib] doctest = false diff --git a/integration-tests/build.rs b/integration-tests/build.rs index e7e89d4..da40873 100644 --- a/integration-tests/build.rs +++ b/integration-tests/build.rs @@ -1,4 +1,4 @@ -extern crate string_cache_codegen; +use string_cache_codegen; use std::env; use std::path::Path; diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index 614d068..28a5836 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -12,10 +12,6 @@ #![allow(non_upper_case_globals)] #![cfg_attr(feature = "unstable", feature(test))] -extern crate string_cache; - -#[cfg(feature = "unstable")] -extern crate rand; #[cfg(feature = "unstable")] extern crate test; diff --git a/src/atom.rs b/src/atom.rs index 095140e..4c508e0 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -9,6 +9,8 @@ #![allow(non_upper_case_globals)] +use debug_unreachable::debug_unreachable; +use lazy_static::lazy_static; use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; diff --git a/src/lib.rs b/src/lib.rs index bbbcf00..fbfacb6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,14 +105,6 @@ #![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] -#[macro_use] -extern crate lazy_static; -#[macro_use] -extern crate debug_unreachable; -extern crate phf_shared; -extern crate precomputed_hash; -extern crate serde; - -pub use atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; +pub use crate::atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; pub mod atom; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 847bace..42cda70 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,5 +1,4 @@ [package] - name = "string_cache_codegen" version = "0.5.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] @@ -7,6 +6,7 @@ description = "A codegen library for string-cache, developed as part of the Serv license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache_codegen/" +edition = "2018" [lib] name = "string_cache_codegen" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index f22b88c..0d90271 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -68,12 +68,7 @@ #![recursion_limit = "128"] -extern crate phf_generator; -extern crate phf_shared; -#[macro_use] -extern crate quote; -extern crate proc_macro2; - +use quote::quote; use std::collections::HashSet; use std::fs::File; use std::io::{self, BufWriter, Write}; From a578c80406dc687a16b93f819bf33e9043c66a95 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 18:41:07 +0200 Subject: [PATCH 149/212] The atom module does not need to be public --- integration-tests/src/lib.rs | 2 +- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index 28a5836..3aa2a44 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -16,7 +16,7 @@ extern crate test; use std::thread; -use string_cache::atom::StaticAtomSet; +use string_cache::StaticAtomSet; include!(concat!(env!("OUT_DIR"), "/test_atom.rs")); pub type Atom = TestAtom; diff --git a/src/lib.rs b/src/lib.rs index fbfacb6..3409c4d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -107,4 +107,4 @@ pub use crate::atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; -pub mod atom; +mod atom; From b5174eaa3baabf7f2008ddcff27e8b02fdbf22cf Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 19:05:04 +0200 Subject: [PATCH 150/212] Move the global hash map to its own module --- src/atom.rs | 172 +++++++-------------------------------------- src/dynamic_set.rs | 110 +++++++++++++++++++++++++++++ src/lib.rs | 1 + 3 files changed, 138 insertions(+), 145 deletions(-) create mode 100644 src/dynamic_set.rs diff --git a/src/atom.rs b/src/atom.rs index 4c508e0..6e2027a 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -9,11 +9,10 @@ #![allow(non_upper_case_globals)] +use crate::dynamic_set::{Entry, DYNAMIC_SET}; use debug_unreachable::debug_unreachable; -use lazy_static::lazy_static; use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; - use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; @@ -24,9 +23,7 @@ use std::num::NonZeroU64; use std::ops; use std::slice; use std::str; -use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use std::sync::Mutex; use self::UnpackedAtom::{Dynamic, Inline, Static}; @@ -34,110 +31,10 @@ const DYNAMIC_TAG: u8 = 0b_00; const INLINE_TAG: u8 = 0b_01; // len in upper nybble const STATIC_TAG: u8 = 0b_10; const TAG_MASK: u64 = 0b_11; -const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. const MAX_INLINE_LEN: usize = 7; - const STATIC_SHIFT_BITS: usize = 32; -const NB_BUCKETS: usize = 1 << 12; // 4096 -const BUCKET_MASK: u64 = (1 << 12) - 1; - -struct StringCache { - buckets: Box<[Option>; NB_BUCKETS]>, -} - -lazy_static! { - static ref STRING_CACHE: Mutex = Mutex::new(StringCache::new()); -} - -struct StringCacheEntry { - next_in_bucket: Option>, - hash: u64, - ref_count: AtomicIsize, - string: Box, -} - -impl StringCacheEntry { - fn new(next: Option>, hash: u64, string: String) -> StringCacheEntry { - StringCacheEntry { - next_in_bucket: next, - hash: hash, - ref_count: AtomicIsize::new(1), - string: string.into_boxed_str(), - } - } -} - -impl StringCache { - fn new() -> StringCache { - type T = Option>; - let _static_assert_size_eq = std::mem::transmute::; - let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); - StringCache { - buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, - } - } - - fn add(&mut self, string: Cow, hash: u64) -> *mut StringCacheEntry { - let bucket_index = (hash & BUCKET_MASK) as usize; - { - let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); - - while let Some(entry) = ptr.take() { - if entry.hash == hash && &*entry.string == &*string { - if entry.ref_count.fetch_add(1, SeqCst) > 0 { - return &mut **entry; - } - // Uh-oh. The pointer's reference count was zero, which means someone may try - // to free it. (Naive attempts to defend against this, for example having the - // destructor check to see whether the reference count is indeed zero, don't - // work due to ABA.) Thus we need to temporarily add a duplicate string to the - // list. - entry.ref_count.fetch_sub(1, SeqCst); - break; - } - ptr = entry.next_in_bucket.as_mut(); - } - } - debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); - let string = string.into_owned(); - let mut entry = Box::new(StringCacheEntry::new( - self.buckets[bucket_index].take(), - hash, - string, - )); - let ptr: *mut StringCacheEntry = &mut *entry; - self.buckets[bucket_index] = Some(entry); - - ptr - } - - fn remove(&mut self, ptr: *mut StringCacheEntry) { - let bucket_index = { - let value: &StringCacheEntry = unsafe { &*ptr }; - debug_assert!(value.ref_count.load(SeqCst) == 0); - (value.hash & BUCKET_MASK) as usize - }; - - let mut current: &mut Option> = &mut self.buckets[bucket_index]; - - loop { - let entry_ptr: *mut StringCacheEntry = match current.as_mut() { - Some(entry) => &mut **entry, - None => break, - }; - if entry_ptr == ptr { - mem::drop(mem::replace(current, unsafe { - (*entry_ptr).next_in_bucket.take() - })); - break; - } - current = unsafe { &mut (*entry_ptr).next_in_bucket }; - } - } -} - /// A static `PhfStrSet` /// /// This trait is implemented by static sets of interned strings generated using @@ -324,7 +221,7 @@ impl Atom { static_set.hashes[index as usize] } Dynamic(entry) => { - let entry = entry as *mut StringCacheEntry; + let entry = entry as *mut Entry; u64_hash_as_u32(unsafe { (*entry).hash }) } Inline(..) => u64_hash_as_u32(self.unsafe_data.get()), @@ -384,7 +281,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { Inline(len as u8, buf) } else { let hash = (hash.g as u64) << 32 | (hash.f1 as u64); - Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) + Dynamic(DYNAMIC_SET.lock().unwrap().insert(string_to_add, hash) as *mut ()) } }; @@ -412,7 +309,7 @@ impl Clone for Atom { unsafe { match from_packed_dynamic(self.unsafe_data.get()) { Some(entry) => { - let entry = entry as *mut StringCacheEntry; + let entry = entry as *mut Entry; (*entry).ref_count.fetch_add(1, SeqCst); } None => (), @@ -430,16 +327,16 @@ impl Drop for Atom { fn drop(&mut self) { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - STRING_CACHE + DYNAMIC_SET .lock() .unwrap() - .remove(this.unsafe_data.get() as *mut StringCacheEntry); + .remove(this.unsafe_data.get() as *mut Entry); } unsafe { match from_packed_dynamic(self.unsafe_data.get()) { Some(entry) => { - let entry = entry as *mut StringCacheEntry; + let entry = entry as *mut Entry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { drop_slow(self); } @@ -466,7 +363,7 @@ impl ops::Deref for Atom { .get(idx as usize) .expect("bad static atom"), Dynamic(entry) => { - let entry = entry as *mut StringCacheEntry; + let entry = entry as *mut Entry; &(*entry).string } } @@ -723,41 +620,26 @@ unsafe fn inline_orig_bytes<'a>(data: &'a NonZeroU64) -> &'a [u8] { // Some minor tests of internal layout here. See ../integration-tests for much // more. -#[cfg(test)] -mod tests { - use super::{DefaultAtom, StringCacheEntry, ENTRY_ALIGNMENT}; +#[test] +fn assert_sizes() { use std::mem; - - #[test] - fn assert_sizes() { - use std::mem; - struct EmptyWithDrop; - impl Drop for EmptyWithDrop { - fn drop(&mut self) {} - } - let compiler_uses_inline_drop_flags = mem::size_of::() > 0; - - // Guard against accidental changes to the sizes of things. - assert_eq!( - mem::size_of::(), - if compiler_uses_inline_drop_flags { - 16 - } else { - 8 - } - ); - assert_eq!( - mem::size_of::>(), - mem::size_of::(), - ); - assert_eq!( - mem::size_of::(), - 8 + 4 * mem::size_of::() - ); + struct EmptyWithDrop; + impl Drop for EmptyWithDrop { + fn drop(&mut self) {} } + let compiler_uses_inline_drop_flags = mem::size_of::() > 0; - #[test] - fn string_cache_entry_alignment_is_sufficient() { - assert!(mem::align_of::() >= ENTRY_ALIGNMENT); - } + // Guard against accidental changes to the sizes of things. + assert_eq!( + mem::size_of::(), + if compiler_uses_inline_drop_flags { + 16 + } else { + 8 + } + ); + assert_eq!( + mem::size_of::>(), + mem::size_of::(), + ); } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs new file mode 100644 index 0000000..5bdeb68 --- /dev/null +++ b/src/dynamic_set.rs @@ -0,0 +1,110 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use lazy_static::lazy_static; +use std::borrow::Cow; +use std::mem; +use std::sync::atomic::AtomicIsize; +use std::sync::atomic::Ordering::SeqCst; +use std::sync::Mutex; + +const NB_BUCKETS: usize = 1 << 12; // 4096 +const BUCKET_MASK: u64 = (1 << 12) - 1; + +pub(crate) struct Set { + buckets: Box<[Option>; NB_BUCKETS]>, +} + +pub(crate) struct Entry { + pub(crate) string: Box, + pub(crate) hash: u64, + pub(crate) ref_count: AtomicIsize, + next_in_bucket: Option>, +} + +// Addresses are a multiples of this, +// and therefore have have TAG_MASK bits unset, available for tagging. +pub(crate) const ENTRY_ALIGNMENT: usize = 4; + +#[test] +fn entry_alignment_is_sufficient() { + assert!(mem::align_of::() >= ENTRY_ALIGNMENT); +} + +lazy_static! { + pub(crate) static ref DYNAMIC_SET: Mutex = Mutex::new({ + type T = Option>; + let _static_assert_size_eq = std::mem::transmute::; + let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); + Set { + buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, + } + }); +} + +impl Set { + pub(crate) fn insert(&mut self, string: Cow, hash: u64) -> *mut Entry { + let bucket_index = (hash & BUCKET_MASK) as usize; + { + let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); + + while let Some(entry) = ptr.take() { + if entry.hash == hash && &*entry.string == &*string { + if entry.ref_count.fetch_add(1, SeqCst) > 0 { + return &mut **entry; + } + // Uh-oh. The pointer's reference count was zero, which means someone may try + // to free it. (Naive attempts to defend against this, for example having the + // destructor check to see whether the reference count is indeed zero, don't + // work due to ABA.) Thus we need to temporarily add a duplicate string to the + // list. + entry.ref_count.fetch_sub(1, SeqCst); + break; + } + ptr = entry.next_in_bucket.as_mut(); + } + } + debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + let string = string.into_owned(); + let mut entry = Box::new(Entry { + next_in_bucket: self.buckets[bucket_index].take(), + hash, + ref_count: AtomicIsize::new(1), + string: string.into_boxed_str(), + }); + let ptr: *mut Entry = &mut *entry; + self.buckets[bucket_index] = Some(entry); + + ptr + } + + pub(crate) fn remove(&mut self, ptr: *mut Entry) { + let bucket_index = { + let value: &Entry = unsafe { &*ptr }; + debug_assert!(value.ref_count.load(SeqCst) == 0); + (value.hash & BUCKET_MASK) as usize + }; + + let mut current: &mut Option> = &mut self.buckets[bucket_index]; + + loop { + let entry_ptr: *mut Entry = match current.as_mut() { + Some(entry) => &mut **entry, + None => break, + }; + if entry_ptr == ptr { + mem::drop(mem::replace(current, unsafe { + (*entry_ptr).next_in_bucket.take() + })); + break; + } + current = unsafe { &mut (*entry_ptr).next_in_bucket }; + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 3409c4d..5751b25 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -108,3 +108,4 @@ pub use crate::atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; mod atom; +mod dynamic_set; From e25b4c664f3df2647b745f7ea90d4b47dd336906 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 19:13:57 +0200 Subject: [PATCH 151/212] =?UTF-8?q?Keep=20a=20u32=20hash=20rather=20than?= =?UTF-8?q?=20u64=20in=20dynamic=5Fatoms=5Fmap::Entry,=20since=20that?= =?UTF-8?q?=E2=80=99s=20all=20we=20ever=20use?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/atom.rs | 10 +++++++--- src/dynamic_set.rs | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 6e2027a..5476d7e 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -222,7 +222,7 @@ impl Atom { } Dynamic(entry) => { let entry = entry as *mut Entry; - u64_hash_as_u32(unsafe { (*entry).hash }) + unsafe { (*entry).hash } } Inline(..) => u64_hash_as_u32(self.unsafe_data.get()), } @@ -280,8 +280,12 @@ impl<'a, Static: StaticAtomSet> From> for Atom { buf[..len].copy_from_slice(string_to_add.as_bytes()); Inline(len as u8, buf) } else { - let hash = (hash.g as u64) << 32 | (hash.f1 as u64); - Dynamic(DYNAMIC_SET.lock().unwrap().insert(string_to_add, hash) as *mut ()) + Dynamic( + DYNAMIC_SET + .lock() + .unwrap() + .insert(string_to_add, hash.g) as *mut (), + ) } }; diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 5bdeb68..0a2044b 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -15,7 +15,7 @@ use std::sync::atomic::Ordering::SeqCst; use std::sync::Mutex; const NB_BUCKETS: usize = 1 << 12; // 4096 -const BUCKET_MASK: u64 = (1 << 12) - 1; +const BUCKET_MASK: u32 = (1 << 12) - 1; pub(crate) struct Set { buckets: Box<[Option>; NB_BUCKETS]>, @@ -23,7 +23,7 @@ pub(crate) struct Set { pub(crate) struct Entry { pub(crate) string: Box, - pub(crate) hash: u64, + pub(crate) hash: u32, pub(crate) ref_count: AtomicIsize, next_in_bucket: Option>, } @@ -49,7 +49,7 @@ lazy_static! { } impl Set { - pub(crate) fn insert(&mut self, string: Cow, hash: u64) -> *mut Entry { + pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> *mut Entry { let bucket_index = (hash & BUCKET_MASK) as usize; { let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); From 1a92659fdf7f198f3632f30200d6da93fa43c3c1 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 19:48:59 +0200 Subject: [PATCH 152/212] Remove UnpackedAtom, inline its code --- src/atom.rs | 253 +++++++++++++-------------------------------- src/dynamic_set.rs | 7 +- 2 files changed, 78 insertions(+), 182 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 5476d7e..da41899 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -25,12 +25,12 @@ use std::slice; use std::str; use std::sync::atomic::Ordering::SeqCst; -use self::UnpackedAtom::{Dynamic, Inline, Static}; - const DYNAMIC_TAG: u8 = 0b_00; const INLINE_TAG: u8 = 0b_01; // len in upper nybble const STATIC_TAG: u8 = 0b_10; const TAG_MASK: u64 = 0b_11; +const LEN_OFFSET: u64 = 4; +const LEN_MASK: u64 = 0xF0; const MAX_INLINE_LEN: usize = 7; const STATIC_SHIFT_BITS: usize = 32; @@ -151,11 +151,6 @@ impl<'a, Static: StaticAtomSet> From<&'a Atom> for Atom { } } -fn u64_hash_as_u32(h: u64) -> u32 { - // This may or may not be great... - ((h >> 32) ^ h) as u32 -} - // FIXME: bound removed from the struct definition before of this error for pack_static: // "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable" // https://github.com/rust-lang/rust/issues/57563 @@ -166,20 +161,19 @@ impl Atom { pub const fn pack_static(n: u32) -> Self { Self { unsafe_data: unsafe { - // STATIC_TAG ensure this is non-zero + // STATIC_TAG ensures this is non-zero NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS)) }, phantom: PhantomData, } } -} -impl Atom { - #[inline(always)] - unsafe fn unpack(&self) -> UnpackedAtom { - UnpackedAtom::from_packed(self.unsafe_data) + fn tag(&self) -> u8 { + (self.unsafe_data.get() & TAG_MASK) as u8 } +} +impl Atom { /// Return the internal repersentation. For testing. #[doc(hidden)] pub fn unsafe_data(&self) -> u64 { @@ -189,42 +183,39 @@ impl Atom { /// Return true if this is a static Atom. For testing. #[doc(hidden)] pub fn is_static(&self) -> bool { - match unsafe { self.unpack() } { - Static(..) => true, - _ => false, - } + self.tag() == STATIC_TAG } /// Return true if this is a dynamic Atom. For testing. #[doc(hidden)] pub fn is_dynamic(&self) -> bool { - match unsafe { self.unpack() } { - Dynamic(..) => true, - _ => false, - } + self.tag() == DYNAMIC_TAG } /// Return true if this is an inline Atom. For testing. #[doc(hidden)] pub fn is_inline(&self) -> bool { - match unsafe { self.unpack() } { - Inline(..) => true, - _ => false, - } + self.tag() == INLINE_TAG + } + + fn static_index(&self) -> u64 { + self.unsafe_data.get() >> STATIC_SHIFT_BITS } /// Get the hash of the string as it is stored in the set. pub fn get_hash(&self) -> u32 { - match unsafe { self.unpack() } { - Static(index) => { - let static_set = Static::get(); - static_set.hashes[index as usize] - } - Dynamic(entry) => { - let entry = entry as *mut Entry; + match self.tag() { + DYNAMIC_TAG => { + let entry = self.unsafe_data.get() as *const Entry; unsafe { (*entry).hash } } - Inline(..) => u64_hash_as_u32(self.unsafe_data.get()), + STATIC_TAG => Static::get().hashes[self.static_index() as usize], + INLINE_TAG => { + let data = self.unsafe_data.get(); + // This may or may not be great... + ((data >> 32) ^ data) as u32 + } + _ => unsafe { debug_unreachable!() }, } } } @@ -265,31 +256,40 @@ impl PartialEq for Atom { } impl<'a, Static: StaticAtomSet> From> for Atom { - #[inline] fn from(string_to_add: Cow<'a, str>) -> Self { let static_set = Static::get(); let hash = phf_shared::hash(&*string_to_add, &static_set.key); let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); - let unpacked = if static_set.atoms[index as usize] == string_to_add { - Static(index) + if static_set.atoms[index as usize] == string_to_add { + Self::pack_static(index) } else { let len = string_to_add.len(); if len <= MAX_INLINE_LEN { - let mut buf: [u8; 7] = [0; 7]; - buf[..len].copy_from_slice(string_to_add.as_bytes()); - Inline(len as u8, buf) + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); + { + let dest = inline_atom_slice_mut(&mut data); + dest[..len].copy_from_slice(string_to_add.as_bytes()) + } + Atom { + // INLINE_TAG ensures this is never zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } } else { - Dynamic( - DYNAMIC_SET - .lock() - .unwrap() - .insert(string_to_add, hash.g) as *mut (), - ) + let ptr: std::ptr::NonNull = DYNAMIC_SET + .lock() + .unwrap() + .insert(string_to_add, hash.g); + let data = ptr.as_ptr() as u64; + debug_assert!(0 == data & TAG_MASK); + Atom { + // The address of a ptr::NonNull is non-zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } } - }; - - unsafe { unpacked.pack() } + } } } @@ -310,25 +310,24 @@ impl From for Atom { impl Clone for Atom { #[inline(always)] fn clone(&self) -> Self { - unsafe { - match from_packed_dynamic(self.unsafe_data.get()) { - Some(entry) => { - let entry = entry as *mut Entry; - (*entry).ref_count.fetch_add(1, SeqCst); - } - None => (), - } - } - Atom { - unsafe_data: self.unsafe_data, - phantom: PhantomData, + if self.tag() == DYNAMIC_TAG { + let entry = self.unsafe_data.get() as *const Entry; + unsafe { &*entry }.ref_count.fetch_add(1, SeqCst); } + Atom { ..*self } } } impl Drop for Atom { #[inline] fn drop(&mut self) { + if self.tag() == DYNAMIC_TAG { + let entry = self.unsafe_data.get() as *const Entry; + if unsafe { &*entry }.ref_count.fetch_sub(1, SeqCst) == 1 { + drop_slow(self) + } + } + // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { DYNAMIC_SET @@ -336,18 +335,6 @@ impl Drop for Atom { .unwrap() .remove(this.unsafe_data.get() as *mut Entry); } - - unsafe { - match from_packed_dynamic(self.unsafe_data.get()) { - Some(entry) => { - let entry = entry as *mut Entry; - if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { - drop_slow(self); - } - } - _ => (), - } - } } } @@ -357,19 +344,18 @@ impl ops::Deref for Atom { #[inline] fn deref(&self) -> &str { unsafe { - match self.unpack() { - Inline(..) => { - let buf = inline_orig_bytes(&self.unsafe_data); - str::from_utf8_unchecked(buf) - } - Static(idx) => Static::get() - .atoms - .get(idx as usize) - .expect("bad static atom"), - Dynamic(entry) => { - let entry = entry as *mut Entry; + match self.tag() { + DYNAMIC_TAG => { + let entry = self.unsafe_data.get() as *const Entry; &(*entry).string } + INLINE_TAG => { + let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET; + let src = inline_atom_slice(&self.unsafe_data); + str::from_utf8_unchecked(&src[..(len as usize)]) + } + STATIC_TAG => Static::get().atoms[self.static_index() as usize], + _ => debug_unreachable!(), } } } @@ -386,10 +372,11 @@ impl fmt::Debug for Atom { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ty_str = unsafe { - match self.unpack() { - Dynamic(..) => "dynamic", - Inline(..) => "inline", - Static(..) => "static", + match self.tag() { + DYNAMIC_TAG => "dynamic", + INLINE_TAG => "inline", + STATIC_TAG => "static", + _ => debug_unreachable!(), } }; @@ -502,20 +489,6 @@ impl Atom { } } -// Atoms use a compact representation which fits this enum in a single u64. -// Inlining avoids actually constructing the unpacked representation in memory. -#[allow(missing_copy_implementations)] -enum UnpackedAtom { - /// Pointer to a dynamic table entry. Must be 16-byte aligned! - Dynamic(*mut ()), - - /// Length + bytes of string. - Inline(u8, [u8; 7]), - - /// Index in static interning table. - Static(u32), -} - #[inline(always)] fn inline_atom_slice(x: &NonZeroU64) -> &[u8] { unsafe { @@ -544,84 +517,6 @@ fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { } } -impl UnpackedAtom { - /// Pack a key, fitting it into a u64 with flags and data. See `string_cache_shared` for - /// hints for the layout. - #[inline(always)] - unsafe fn pack(self) -> Atom { - match self { - Static(n) => Atom::pack_static(n), - Dynamic(p) => { - let data = p as u64; - debug_assert!(0 == data & TAG_MASK); - Atom { - // Callers are responsible for calling this with a valid, non-null pointer - unsafe_data: NonZeroU64::new_unchecked(data), - phantom: PhantomData, - } - } - Inline(len, buf) => { - debug_assert!((len as usize) <= MAX_INLINE_LEN); - let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); - { - let dest = inline_atom_slice_mut(&mut data); - dest.copy_from_slice(&buf) - } - Atom { - // INLINE_TAG ensures this is never zero - unsafe_data: NonZeroU64::new_unchecked(data), - phantom: PhantomData, - } - } - } - } - - /// Unpack a key, extracting information from a single u64 into useable structs. - #[inline(always)] - unsafe fn from_packed(data: NonZeroU64) -> UnpackedAtom { - debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged - - match (data.get() & TAG_MASK) as u8 { - DYNAMIC_TAG => Dynamic(data.get() as *mut ()), - STATIC_TAG => Static((data.get() >> STATIC_SHIFT_BITS) as u32), - INLINE_TAG => { - let len = ((data.get() & 0xf0) >> 4) as usize; - debug_assert!(len <= MAX_INLINE_LEN); - let mut buf: [u8; 7] = [0; 7]; - let src = inline_atom_slice(&data); - buf.copy_from_slice(src); - Inline(len as u8, buf) - } - _ => debug_unreachable!(), - } - } -} - -/// Used for a fast path in Clone and Drop. -#[inline(always)] -unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { - if (DYNAMIC_TAG as u64) == (data & TAG_MASK) { - Some(data as *mut ()) - } else { - None - } -} - -/// For as_slice on inline atoms, we need a pointer into the original -/// string contents. -/// -/// It's undefined behavior to call this on a non-inline atom!! -#[inline(always)] -unsafe fn inline_orig_bytes<'a>(data: &'a NonZeroU64) -> &'a [u8] { - match UnpackedAtom::from_packed(*data) { - Inline(len, _) => { - let src = inline_atom_slice(&data); - &src[..(len as usize)] - } - _ => debug_unreachable!(), - } -} - // Some minor tests of internal layout here. See ../integration-tests for much // more. #[test] diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 0a2044b..08c9dcd 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -10,6 +10,7 @@ use lazy_static::lazy_static; use std::borrow::Cow; use std::mem; +use std::ptr::NonNull; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; use std::sync::Mutex; @@ -49,7 +50,7 @@ lazy_static! { } impl Set { - pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> *mut Entry { + pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> NonNull { let bucket_index = (hash & BUCKET_MASK) as usize; { let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); @@ -57,7 +58,7 @@ impl Set { while let Some(entry) = ptr.take() { if entry.hash == hash && &*entry.string == &*string { if entry.ref_count.fetch_add(1, SeqCst) > 0 { - return &mut **entry; + return NonNull::from(&mut **entry); } // Uh-oh. The pointer's reference count was zero, which means someone may try // to free it. (Naive attempts to defend against this, for example having the @@ -78,7 +79,7 @@ impl Set { ref_count: AtomicIsize::new(1), string: string.into_boxed_str(), }); - let ptr: *mut Entry = &mut *entry; + let ptr = NonNull::from(&mut *entry); self.buckets[bucket_index] = Some(entry); ptr From 4837db4fb68dcea7824973da5ba08e967e06defe Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 20:19:53 +0200 Subject: [PATCH 153/212] Move static sets to their own module --- src/atom.rs | 88 ++-------------------------------------------- src/lib.rs | 20 +++++++++-- src/static_sets.rs | 64 +++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 87 deletions(-) create mode 100644 src/static_sets.rs diff --git a/src/atom.rs b/src/atom.rs index da41899..a04abee 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -10,6 +10,7 @@ #![allow(non_upper_case_globals)] use crate::dynamic_set::{Entry, DYNAMIC_SET}; +use crate::static_sets::StaticAtomSet; use debug_unreachable::debug_unreachable; use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -35,61 +36,6 @@ const LEN_MASK: u64 = 0xF0; const MAX_INLINE_LEN: usize = 7; const STATIC_SHIFT_BITS: usize = 32; -/// A static `PhfStrSet` -/// -/// This trait is implemented by static sets of interned strings generated using -/// `string_cache_codegen`, and `EmptyStaticAtomSet` for when strings will be added dynamically. -/// -/// It is used by the methods of [`Atom`] to check if a string is present in the static set. -/// -/// [`Atom`]: struct.Atom.html -pub trait StaticAtomSet: Ord { - /// Get the location of the static string set in the binary. - fn get() -> &'static PhfStrSet; - /// Get the index of the empty string, which is in every set and is used for `Atom::default`. - fn empty_string_index() -> u32; -} - -/// A string set created using a [perfect hash function], specifically -/// [Hash, Displace and Compress]. -/// -/// See the CHD document for the meaning of the struct fields. -/// -/// [perfect hash function]: https://en.wikipedia.org/wiki/Perfect_hash_function -/// [Hash, Displace and Compress]: http://cmph.sourceforge.net/papers/esa09.pdf -pub struct PhfStrSet { - pub key: u64, - pub disps: &'static [(u32, u32)], - pub atoms: &'static [&'static str], - pub hashes: &'static [u32], -} - -/// An empty static atom set for when only dynamic strings will be added -#[derive(PartialEq, Eq, PartialOrd, Ord)] -pub struct EmptyStaticAtomSet; - -impl StaticAtomSet for EmptyStaticAtomSet { - fn get() -> &'static PhfStrSet { - // The name is a lie: this set is not empty (it contains the empty string) - // but that’s only to avoid divisions by zero in rust-phf. - static SET: PhfStrSet = PhfStrSet { - key: 0, - disps: &[(0, 0)], - atoms: &[""], - // "" SipHash'd, and xored with u64_hash_to_u32. - hashes: &[0x3ddddef3], - }; - &SET - } - - fn empty_string_index() -> u32 { - 0 - } -} - -/// Use this if you don’t care about static atoms. -pub type DefaultAtom = Atom; - /// Represents a string that has been interned. /// /// While the type definition for `Atom` indicates that it generic on a particular @@ -277,10 +223,8 @@ impl<'a, Static: StaticAtomSet> From> for Atom { phantom: PhantomData, } } else { - let ptr: std::ptr::NonNull = DYNAMIC_SET - .lock() - .unwrap() - .insert(string_to_add, hash.g); + let ptr: std::ptr::NonNull = + DYNAMIC_SET.lock().unwrap().insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -516,29 +460,3 @@ fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { slice::from_raw_parts_mut(data, len) } } - -// Some minor tests of internal layout here. See ../integration-tests for much -// more. -#[test] -fn assert_sizes() { - use std::mem; - struct EmptyWithDrop; - impl Drop for EmptyWithDrop { - fn drop(&mut self) {} - } - let compiler_uses_inline_drop_flags = mem::size_of::() > 0; - - // Guard against accidental changes to the sizes of things. - assert_eq!( - mem::size_of::(), - if compiler_uses_inline_drop_flags { - 16 - } else { - 8 - } - ); - assert_eq!( - mem::size_of::>(), - mem::size_of::(), - ); -} diff --git a/src/lib.rs b/src/lib.rs index 5751b25..b83c81c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,7 +105,23 @@ #![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] -pub use crate::atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; - mod atom; mod dynamic_set; +mod static_sets; + +pub use atom::Atom; +pub use static_sets::{EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; + +/// Use this if you don’t care about static atoms. +pub type DefaultAtom = Atom; + +// Some minor tests of internal layout here. +// See ../integration-tests for much more. + +/// Guard against accidental changes to the sizes of things. +#[test] +fn assert_sizes() { + use std::mem::size_of; + assert_eq!(size_of::(), 8); + assert_eq!(size_of::>(), size_of::(),); +} diff --git a/src/static_sets.rs b/src/static_sets.rs new file mode 100644 index 0000000..f7f1799 --- /dev/null +++ b/src/static_sets.rs @@ -0,0 +1,64 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/// A static `PhfStrSet` +/// +/// This trait is implemented by static sets of interned strings generated using +/// `string_cache_codegen`, and `EmptyStaticAtomSet` for when strings will be added dynamically. +/// +/// It is used by the methods of [`Atom`] to check if a string is present in the static set. +/// +/// [`Atom`]: struct.Atom.html +pub trait StaticAtomSet: Ord { + /// Get the location of the static string set in the binary. + fn get() -> &'static PhfStrSet; + /// Get the index of the empty string, which is in every set and is used for `Atom::default`. + fn empty_string_index() -> u32; +} + +/// A string set created using a [perfect hash function], specifically +/// [Hash, Displace and Compress]. +/// +/// See the CHD document for the meaning of the struct fields. +/// +/// [perfect hash function]: https://en.wikipedia.org/wiki/Perfect_hash_function +/// [Hash, Displace and Compress]: http://cmph.sourceforge.net/papers/esa09.pdf +pub struct PhfStrSet { + #[doc(hidden)] + pub key: u64, + #[doc(hidden)] + pub disps: &'static [(u32, u32)], + #[doc(hidden)] + pub atoms: &'static [&'static str], + #[doc(hidden)] + pub hashes: &'static [u32], +} + +/// An empty static atom set for when only dynamic strings will be added +#[derive(PartialEq, Eq, PartialOrd, Ord)] +pub struct EmptyStaticAtomSet; + +impl StaticAtomSet for EmptyStaticAtomSet { + fn get() -> &'static PhfStrSet { + // The name is a lie: this set is not empty (it contains the empty string) + // but that’s only to avoid divisions by zero in rust-phf. + static SET: PhfStrSet = PhfStrSet { + key: 0, + disps: &[(0, 0)], + atoms: &[""], + // "" SipHash'd, and xored with u64_hash_to_u32. + hashes: &[0x3ddddef3], + }; + &SET + } + + fn empty_string_index() -> u32 { + 0 + } +} From 57ff374f19232678f64611e22aac4af2129d58a8 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 20:20:19 +0200 Subject: [PATCH 154/212] Remove attributes redundant with Cargo --- src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b83c81c..fd5e677 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -101,8 +101,6 @@ //! ``` //! -#![crate_name = "string_cache"] -#![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] mod atom; From 8bedc3ee1406d5d0b7b202c3b067e7fce58a26cf Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 20:24:41 +0200 Subject: [PATCH 155/212] Move trait impls that simply forward to something else to their own module --- src/atom.rs | 80 --------------------------------------- src/lib.rs | 1 + src/trivial_impls.rs | 90 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 80 deletions(-) create mode 100644 src/trivial_impls.rs diff --git a/src/atom.rs b/src/atom.rs index a04abee..5011d40 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -7,13 +7,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![allow(non_upper_case_globals)] - use crate::dynamic_set::{Entry, DYNAMIC_SET}; use crate::static_sets::StaticAtomSet; use debug_unreachable::debug_unreachable; use phf_shared; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; @@ -85,18 +82,6 @@ pub struct Atom { phantom: PhantomData, } -impl ::precomputed_hash::PrecomputedHash for Atom { - fn precomputed_hash(&self) -> u32 { - self.get_hash() - } -} - -impl<'a, Static: StaticAtomSet> From<&'a Atom> for Atom { - fn from(atom: &'a Self) -> Self { - atom.clone() - } -} - // FIXME: bound removed from the struct definition before of this error for pack_static: // "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable" // https://github.com/rust-lang/rust/issues/57563 @@ -183,24 +168,6 @@ impl Hash for Atom { } } -impl PartialEq for Atom { - fn eq(&self, other: &str) -> bool { - &self[..] == other - } -} - -impl PartialEq> for str { - fn eq(&self, other: &Atom) -> bool { - self == &other[..] - } -} - -impl PartialEq for Atom { - fn eq(&self, other: &String) -> bool { - &self[..] == &other[..] - } -} - impl<'a, Static: StaticAtomSet> From> for Atom { fn from(string_to_add: Cow<'a, str>) -> Self { let static_set = Static::get(); @@ -237,20 +204,6 @@ impl<'a, Static: StaticAtomSet> From> for Atom { } } -impl<'a, Static: StaticAtomSet> From<&'a str> for Atom { - #[inline] - fn from(string_to_add: &str) -> Self { - Atom::from(Cow::Borrowed(string_to_add)) - } -} - -impl From for Atom { - #[inline] - fn from(string_to_add: String) -> Self { - Atom::from(Cow::Owned(string_to_add)) - } -} - impl Clone for Atom { #[inline(always)] fn clone(&self) -> Self { @@ -305,13 +258,6 @@ impl ops::Deref for Atom { } } -impl fmt::Display for Atom { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - ::fmt(self, f) - } -} - impl fmt::Debug for Atom { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -348,32 +294,6 @@ impl Ord for Atom { } } -impl AsRef for Atom { - fn as_ref(&self) -> &str { - &self - } -} - -impl Serialize for Atom { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let string: &str = self.as_ref(); - string.serialize(serializer) - } -} - -impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'a>, - { - let string: String = Deserialize::deserialize(deserializer)?; - Ok(Atom::from(string)) - } -} - // AsciiExt requires mutating methods, so we just implement the non-mutating ones. // We don't need to implement is_ascii because there's no performance improvement // over the one from &str. diff --git a/src/lib.rs b/src/lib.rs index fd5e677..b4a8fd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -106,6 +106,7 @@ mod atom; mod dynamic_set; mod static_sets; +mod trivial_impls; pub use atom::Atom; pub use static_sets::{EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs new file mode 100644 index 0000000..ed53be2 --- /dev/null +++ b/src/trivial_impls.rs @@ -0,0 +1,90 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::{Atom, StaticAtomSet}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::borrow::Cow; +use std::fmt; + +impl ::precomputed_hash::PrecomputedHash for Atom { + fn precomputed_hash(&self) -> u32 { + self.get_hash() + } +} + +impl<'a, Static: StaticAtomSet> From<&'a Atom> for Atom { + fn from(atom: &'a Self) -> Self { + atom.clone() + } +} + +impl PartialEq for Atom { + fn eq(&self, other: &str) -> bool { + &self[..] == other + } +} + +impl PartialEq> for str { + fn eq(&self, other: &Atom) -> bool { + self == &other[..] + } +} + +impl PartialEq for Atom { + fn eq(&self, other: &String) -> bool { + &self[..] == &other[..] + } +} + +impl<'a, Static: StaticAtomSet> From<&'a str> for Atom { + #[inline] + fn from(string_to_add: &str) -> Self { + Atom::from(Cow::Borrowed(string_to_add)) + } +} + +impl From for Atom { + #[inline] + fn from(string_to_add: String) -> Self { + Atom::from(Cow::Owned(string_to_add)) + } +} + +impl fmt::Display for Atom { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + +impl AsRef for Atom { + fn as_ref(&self) -> &str { + &self + } +} + +impl Serialize for Atom { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let string: &str = self.as_ref(); + string.serialize(serializer) + } +} + +impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'a>, + { + let string: String = Deserialize::deserialize(deserializer)?; + Ok(Atom::from(string)) + } +} From b78c9da2e8b446f1d8a22172003531d9673fb055 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 21 Oct 2019 12:48:08 +0200 Subject: [PATCH 156/212] Use unique const names in string_cache_codegen --- string-cache-codegen/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 42cda70..d212d7e 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.0" # Also update ../README.md when making a semver-breaking change +version = "0.5.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 0d90271..0fe4819 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -227,10 +227,11 @@ impl AtomType { let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); let module = module.parse::().unwrap(); + let atom_prefix = format!("ATOM_{}_", type_name.to_string().to_uppercase()); let const_names: Vec<_> = atoms .iter() .map(|atom| { - let mut name = String::from("ATOM"); + let mut name = atom_prefix.clone(); for c in atom.chars() { name.push_str(&format!("_{:02X}", c as u32)) } From 7f50fe0360175b74f0ff0302a597b676e854fe75 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Thu, 5 Dec 2019 05:11:10 -0800 Subject: [PATCH 157/212] Make serde dependency optional, but enabled by default --- .travis.yml | 1 + Cargo.toml | 6 +++++- src/trivial_impls.rs | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ad47308..180d855 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ rust: os: - linux script: + - cargo build --no-default-features - cargo build - cargo test --all - "cd string-cache-codegen && cargo build && cd .." diff --git a/Cargo.toml b/Cargo.toml index eec2438..364131a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,10 +17,14 @@ edition = "2018" [lib] name = "string_cache" +[features] +serde_support = ["serde"] +default = ["serde_support"] + [dependencies] precomputed-hash = "0.1" lazy_static = "1" -serde = "1" +serde = { version = "1", optional = true } phf_shared = "0.8" new_debug_unreachable = "1.0" diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index ed53be2..4c055fd 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -8,6 +8,7 @@ // except according to those terms. use crate::{Atom, StaticAtomSet}; +#[cfg(feature = "serde_support")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::borrow::Cow; use std::fmt; @@ -69,6 +70,7 @@ impl AsRef for Atom { } } +#[cfg(feature = "serde_support")] impl Serialize for Atom { fn serialize(&self, serializer: S) -> Result where @@ -79,6 +81,7 @@ impl Serialize for Atom { } } +#[cfg(feature = "serde_support")] impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { fn deserialize(deserializer: D) -> Result where From 82ac0d955d2d1785eb5634cdbf61465adc0d759e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 19 Nov 2020 19:09:37 +0100 Subject: [PATCH 158/212] Bump to 0.8.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 364131a..01b9282 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.0" # Also update README.md when making a semver-breaking change +version = "0.8.1" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From c057dde7fdbdb9618d15374d74642b8267c0c837 Mon Sep 17 00:00:00 2001 From: l3ops Date: Tue, 20 Apr 2021 13:48:57 +0200 Subject: [PATCH 159/212] Add an Atom::try_static method to create an Atom only if it exists in the static table --- integration-tests/src/lib.rs | 6 ++++++ src/atom.rs | 26 ++++++++++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index 3aa2a44..1f2be87 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -290,6 +290,12 @@ fn test_from_string() { assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); } +#[test] +fn test_try_static() { + assert!(Atom::try_static("head").is_some()); + assert!(Atom::try_static("not in the static table").is_none()); +} + #[cfg(all(test, feature = "unstable"))] #[path = "bench.rs"] mod bench; diff --git a/src/atom.rs b/src/atom.rs index 5011d40..6da0044 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -149,6 +149,22 @@ impl Atom { _ => unsafe { debug_unreachable!() }, } } + + pub fn try_static(string_to_add: &str) -> Option { + Self::try_static_internal(string_to_add).ok() + } + + fn try_static_internal(string_to_add: &str) -> Result { + let static_set = Static::get(); + let hash = phf_shared::hash(&*string_to_add, &static_set.key); + let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); + + if static_set.atoms[index as usize] == string_to_add { + Ok(Self::pack_static(index)) + } else { + Err(hash) + } + } } impl Default for Atom { @@ -170,13 +186,7 @@ impl Hash for Atom { impl<'a, Static: StaticAtomSet> From> for Atom { fn from(string_to_add: Cow<'a, str>) -> Self { - let static_set = Static::get(); - let hash = phf_shared::hash(&*string_to_add, &static_set.key); - let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); - - if static_set.atoms[index as usize] == string_to_add { - Self::pack_static(index) - } else { + Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { let len = string_to_add.len(); if len <= MAX_INLINE_LEN { let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); @@ -200,7 +210,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { phantom: PhantomData, } } - } + }) } } From 609a59f19273d6e261ee47543b4e0d576ca3707e Mon Sep 17 00:00:00 2001 From: Russell Mull Date: Tue, 31 Aug 2021 09:30:20 -0700 Subject: [PATCH 160/212] Update phf, rand dependencies --- Cargo.toml | 2 +- integration-tests/Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 01b9282..88c09e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ default = ["serde_support"] precomputed-hash = "0.1" lazy_static = "1" serde = { version = "1", optional = true } -phf_shared = "0.8" +phf_shared = "0.10" new_debug_unreachable = "1.0" [[test]] diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 736e34a..a0b047c 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -19,7 +19,7 @@ unstable = [] string_cache = { version = "0.8", path = ".." } [dev-dependencies] -rand = "0.7" +rand = "0.8" string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } [build-dependencies] diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index d212d7e..5ae3a52 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -13,7 +13,7 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -phf_generator = "0.8" -phf_shared = "0.8" +phf_generator = "0.10" +phf_shared = "0.10" proc-macro2 = "1" quote = "1" From 72f38cf1836292c1533e662d6741f7e433e3cdbf Mon Sep 17 00:00:00 2001 From: Nathan West Date: Mon, 13 Sep 2021 23:48:39 -0400 Subject: [PATCH 161/212] Use a custom Visitor in Deserialize --- src/trivial_impls.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 4c055fd..3f2d75e 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -87,7 +87,33 @@ impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { where D: Deserializer<'a>, { - let string: String = Deserialize::deserialize(deserializer)?; - Ok(Atom::from(string)) + use serde::de; + use std::marker::PhantomData; + + struct AtomVisitor(PhantomData); + + impl<'de, Static: StaticAtomSet> de::Visitor<'de> for AtomVisitor { + type Value = Atom; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "an Atom") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + Ok(Atom::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: de::Error, + { + Ok(Atom::from(v)) + } + } + + deserializer.deserialize_string(AtomVisitor(PhantomData)) } } From 58d0c572db093c751797bb50c482f8375396cc7c Mon Sep 17 00:00:00 2001 From: Nathan West Date: Tue, 14 Sep 2021 00:26:04 -0400 Subject: [PATCH 162/212] Use deserialize_str instead of deserialize_string --- src/trivial_impls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 3f2d75e..c0119ca 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -114,6 +114,6 @@ impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { } } - deserializer.deserialize_string(AtomVisitor(PhantomData)) + deserializer.deserialize_str(AtomVisitor(PhantomData)) } } From f95e39b2cf9e6610659c22c5c3e4ac271f886eba Mon Sep 17 00:00:00 2001 From: Donny Date: Fri, 8 Oct 2021 16:14:03 +0900 Subject: [PATCH 163/212] Use parking_lot --- Cargo.toml | 1 + src/atom.rs | 3 +-- src/dynamic_set.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 01b9282..a54b134 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ lazy_static = "1" serde = { version = "1", optional = true } phf_shared = "0.8" new_debug_unreachable = "1.0" +parking_lot = "0.11" [[test]] name = "small-stack" diff --git a/src/atom.rs b/src/atom.rs index 6da0044..0d74408 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -201,7 +201,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { } } else { let ptr: std::ptr::NonNull = - DYNAMIC_SET.lock().unwrap().insert(string_to_add, hash.g); + DYNAMIC_SET.lock().insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -239,7 +239,6 @@ impl Drop for Atom { fn drop_slow(this: &mut Atom) { DYNAMIC_SET .lock() - .unwrap() .remove(this.unsafe_data.get() as *mut Entry); } } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 08c9dcd..f926f1e 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -8,12 +8,12 @@ // except according to those terms. use lazy_static::lazy_static; +use parking_lot::Mutex; use std::borrow::Cow; use std::mem; use std::ptr::NonNull; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use std::sync::Mutex; const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u32 = (1 << 12) - 1; From bcf15b9269e8cff3560087c45d3ee66f8bf4fc76 Mon Sep 17 00:00:00 2001 From: cybai Date: Sat, 9 Oct 2021 01:18:28 +0900 Subject: [PATCH 164/212] Move CI to GitHub Actions --- .github/workflows/ci.yml | 46 ++++++++++++++++++++++++++++++++++++++++ .travis.yml | 15 ------------- 2 files changed, 46 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..d1c124a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: Run CI +on: + push: + branches: ["master"] + pull_request: + branches: ["**"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +env: + RUST_BACKTRACE: 1 + SHELL: /bin/bash + +jobs: + ci: + name: Run CI + runs-on: ubuntu-20.04 + + strategy: + matrix: + rust: [1.36.0, nightly, beta, stable] + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 2 + - name: Setup Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: ${{ matrix.rust }} + default: true + override: true + - name: Build + run: | + cargo build --no-default-features + cargo build + - name: Tests + run: cargo test --all + - name: Build codegen + run: | + cd string-cache-codegen && cargo build && cd .. + + if [ ${{ matrix.rust }} = nightly ]; then + cd integration-tests && cargo test --features unstable && cd ..; + fi diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 180d855..0000000 --- a/.travis.yml +++ /dev/null @@ -1,15 +0,0 @@ -sudo: false -language: rust -rust: - - 1.36.0 - - nightly - - beta - - stable -os: - - linux -script: - - cargo build --no-default-features - - cargo build - - cargo test --all - - "cd string-cache-codegen && cargo build && cd .." - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd integration-tests && cargo test --features unstable && cd ..; fi" From 0806c6f08f086d9033f62ff95865a9690dc35101 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 9 Oct 2021 12:15:46 -0400 Subject: [PATCH 165/212] Add homu result. --- .github/workflows/ci.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1c124a..ee596b5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,3 +44,18 @@ jobs: if [ ${{ matrix.rust }} = nightly ]; then cd integration-tests && cargo test --features unstable && cd ..; fi + + + build_result: + name: homu build finished + runs-on: ubuntu-latest + needs: + - "ci" + + steps: + - name: Mark the job as successful + run: exit 0 + if: success() + - name: Mark the job as unsuccessful + run: exit 1 + if: "!success()" From 3a35e765552a22c54aefce9588ba8bf6de86457b Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 9 Oct 2021 12:22:12 -0400 Subject: [PATCH 166/212] Run CI on auto branch. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee596b5..d043617 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,7 +1,7 @@ name: Run CI on: push: - branches: ["master"] + branches: ["auto"] pull_request: branches: ["**"] From ddaf1bd4074369c698235b5e196f506ae0d21c0a Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Mon, 11 Oct 2021 21:44:41 -0400 Subject: [PATCH 167/212] Publish 0.8.2. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a54b134..2636de6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.1" # Also update README.md when making a semver-breaking change +version = "0.8.2" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From dd6a60115991e13c21ce77fe12cf11245c04ddcb Mon Sep 17 00:00:00 2001 From: Alex Touchet Date: Thu, 14 Oct 2021 17:15:32 -0700 Subject: [PATCH 168/212] Replace Travis CI badge with GitHub Actions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c9c8ac..fdf4c0a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # string-cache -[![Build Status](https://travis-ci.com/servo/string-cache.svg?branch=master)](https://travis-ci.com/servo/string-cache) +[![Build Status](https://github.com/servo/string-cache/actions/workflows/ci.yml/badge.svg)](https://github.com/servo/string-cache/actions) [Documentation](https://docs.rs/string_cache/) From edce5ddd8b0740609b51440e4d7d5ba752b376d4 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sun, 21 Nov 2021 15:36:17 -0500 Subject: [PATCH 169/212] Update MSRV to 1.38.0. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d043617..e8485a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: - rust: [1.36.0, nightly, beta, stable] + rust: [1.38.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 5c3d60334a496402d78d906e7661dd4b72931a44 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sun, 21 Nov 2021 16:06:28 -0500 Subject: [PATCH 170/212] Update MSRV to 1.40.0 --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e8485a9..39b55c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,11 +15,11 @@ env: jobs: ci: name: Run CI - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: matrix: - rust: [1.38.0, nightly, beta, stable] + rust: [1.40.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 7934bf9eb7784962d5af8f96258756fc480d4c22 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Thu, 16 Dec 2021 16:16:49 -0700 Subject: [PATCH 171/212] Fix warning and typo in small stack regression test --- tests/small-stack.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/small-stack.rs b/tests/small-stack.rs index 269cad7..bb607af 100644 --- a/tests/small-stack.rs +++ b/tests/small-stack.rs @@ -1,6 +1,6 @@ // Regression test for https://github.com/servo/html5ever/issues/393 // -// Create a dynamic atom − causing initialization of the golbal hash map − +// Create a dynamic atom − causing initialization of the global hash map − // in a thread that has a small stack. // // This is a separate test program rather than a `#[test] fn` among others @@ -9,7 +9,7 @@ fn main() { std::thread::Builder::new() .stack_size(50_000) .spawn(|| { - string_cache::DefaultAtom::from("12345678"); + let _atom = string_cache::DefaultAtom::from("12345678"); }) .unwrap() .join() From 474d27785f1339fa3056a8d320da2aa8cfec3d19 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:17:01 -0700 Subject: [PATCH 172/212] Add a Clippy exception for `derive_hash_xor_eq` This is a string interning library. It does some weird things related to hashing. This is fine. --- src/lib.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index b4a8fd5..441cb4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -103,6 +103,19 @@ #![cfg_attr(test, deny(warnings))] +// Types, such as Atom, that impl Hash must follow the hash invariant: if two objects match +// with PartialEq, they must also have the same Hash. Clippy warns on types that derive one while +// manually impl-ing the other, because it seems easy for the two to drift apart, causing the +// invariant to be violated. +// +// But Atom is a newtype over NonZeroU64, and probably always will be, since cheap comparisons and +// copying are this library's purpose. So we know what the PartialEq comparison is going to do. +// +// The `get_hash` function, seen in `atom.rs`, consults that number, plus the global string interner +// tables. The only way for the resulting hash for two Atoms with the same inner 64-bit number to +// differ would be if the table entry changed between invocations, and that would be really bad. +#![allow(clippy::derive_hash_xor_eq)] + mod atom; mod dynamic_set; mod static_sets; From c2afb8bbc3104867c5d89b79cc83bec1fae9fabd Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:19:12 -0700 Subject: [PATCH 173/212] Remove redundant `use` line --- src/atom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index 0d74408..c02651b 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -10,7 +10,7 @@ use crate::dynamic_set::{Entry, DYNAMIC_SET}; use crate::static_sets::StaticAtomSet; use debug_unreachable::debug_unreachable; -use phf_shared; + use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; From 78c516d82f48f215ec214979ed3b3a4b874c3dd9 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:20:25 -0700 Subject: [PATCH 174/212] Remove redundant reference operands --- src/dynamic_set.rs | 2 +- src/trivial_impls.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index f926f1e..2eab9da 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -56,7 +56,7 @@ impl Set { let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); while let Some(entry) = ptr.take() { - if entry.hash == hash && &*entry.string == &*string { + if entry.hash == hash && *entry.string == *string { if entry.ref_count.fetch_add(1, SeqCst) > 0 { return NonNull::from(&mut **entry); } diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index c0119ca..960dde0 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -39,7 +39,7 @@ impl PartialEq> for str { impl PartialEq for Atom { fn eq(&self, other: &String) -> bool { - &self[..] == &other[..] + self[..] == other[..] } } @@ -66,7 +66,7 @@ impl fmt::Display for Atom { impl AsRef for Atom { fn as_ref(&self) -> &str { - &self + self } } From 71925156c4ec58bc6cfc1f31f8d6a9ee58a4cfc0 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:22:12 -0700 Subject: [PATCH 175/212] Convert manual loop to `while let` --- src/dynamic_set.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 2eab9da..602b700 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -94,11 +94,8 @@ impl Set { let mut current: &mut Option> = &mut self.buckets[bucket_index]; - loop { - let entry_ptr: *mut Entry = match current.as_mut() { - Some(entry) => &mut **entry, - None => break, - }; + while let Some(entry_ptr) = current.as_mut() { + let entry_ptr: *mut Entry = &mut **entry_ptr; if entry_ptr == ptr { mem::drop(mem::replace(current, unsafe { (*entry_ptr).next_in_bucket.take() From 36da1fd4d86b5803082ec27155d43b0ca0c68709 Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Fri, 4 Feb 2022 12:34:49 +0100 Subject: [PATCH 176/212] Bump new_debug_unreachable to 1.0.2 Previous versions are incompatible with -Z minimal-versions builds. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d3d2093..2b0b466 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ precomputed-hash = "0.1" lazy_static = "1" serde = { version = "1", optional = true } phf_shared = "0.10" -new_debug_unreachable = "1.0" +new_debug_unreachable = "1.0.2" parking_lot = "0.11" [[test]] From 97a4f6d1c457fc45151216d7b54446e573cdf322 Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Fri, 4 Feb 2022 12:36:52 +0100 Subject: [PATCH 177/212] Bump lazy_static to 1.1.0 This version builds with `-Z minimal-versions` when warnings are denied. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2b0b466..747d499 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" -lazy_static = "1" +lazy_static = "1.1.0" serde = { version = "1", optional = true } phf_shared = "0.10" new_debug_unreachable = "1.0.2" From ab30960f33f1a5e87ccb62e1e9cfd9677fe84cbb Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Fri, 4 Feb 2022 13:08:40 +0100 Subject: [PATCH 178/212] Bump to 0.8.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 747d499..9ba5095 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.2" # Also update README.md when making a semver-breaking change +version = "0.8.3" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From cbd1dc3b7f2085a93945e29c000723d232aec61b Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 14 Feb 2022 11:18:26 -0500 Subject: [PATCH 179/212] fix: bump parking lot to 0.12 in order to not create wasm export --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9ba5095..5d94040 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ lazy_static = "1.1.0" serde = { version = "1", optional = true } phf_shared = "0.10" new_debug_unreachable = "1.0.2" -parking_lot = "0.11" +parking_lot = "0.12" [[test]] name = "small-stack" From 6c0f8253813ea24b4f8be07658159fc6edb7acd2 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 14 Feb 2022 13:11:15 -0500 Subject: [PATCH 180/212] Bump CI to rust 1.49 (not sure if desired though) --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39b55c5..fe17a69 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: - rust: [1.40.0, nightly, beta, stable] + rust: [1.49.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 019118878fc994417edfd6d7a389c317b0654aa0 Mon Sep 17 00:00:00 2001 From: Alex Touchet Date: Mon, 28 Feb 2022 14:43:55 -0800 Subject: [PATCH 181/212] Use SPDX license format --- Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5d94040..6ca2f39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "string_cache" version = "0.8.3" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." -license = "MIT / Apache-2.0" +license = "MIT OR Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache/" edition = "2018" diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 5ae3a52..f207e21 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -3,7 +3,7 @@ name = "string_cache_codegen" version = "0.5.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." -license = "MIT / Apache-2.0" +license = "MIT OR Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache_codegen/" edition = "2018" From 9ae0f889bb96bc890b59eef1cd271c603b5a690f Mon Sep 17 00:00:00 2001 From: Xidorn Quan Date: Fri, 18 Mar 2022 22:48:05 +1100 Subject: [PATCH 182/212] Replace lazy_static with once_cell --- Cargo.toml | 2 +- src/dynamic_set.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5d94040..e47eb8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" -lazy_static = "1.1.0" +once_cell = "1.10.0" serde = { version = "1", optional = true } phf_shared = "0.10" new_debug_unreachable = "1.0.2" diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 602b700..229a79f 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use parking_lot::Mutex; use std::borrow::Cow; use std::mem; @@ -38,16 +38,16 @@ fn entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } -lazy_static! { - pub(crate) static ref DYNAMIC_SET: Mutex = Mutex::new({ +pub(crate) static DYNAMIC_SET: Lazy> = Lazy::new(|| { + Mutex::new({ type T = Option>; let _static_assert_size_eq = std::mem::transmute::; let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); Set { buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, } - }); -} + }) +}); impl Set { pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> NonNull { From aa644096ae26aba2e7192d67a74101e6a21469ac Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 21 Mar 2022 14:24:45 -0400 Subject: [PATCH 183/212] 0.8.4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e47eb8a..db1e95d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.3" # Also update README.md when making a semver-breaking change +version = "0.8.4" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 5034bde5c45b40af8e1c40672263193fa7050b32 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sun, 27 Mar 2022 18:09:47 -0400 Subject: [PATCH 184/212] Publish string-cache-codegen 0.5.2. --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index f207e21..5eb5125 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.1" # Also update ../README.md when making a semver-breaking change +version = "0.5.2" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" From c58ac06272bf59135e58fdf8df0dd5e696019382 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Tue, 12 Jul 2022 11:49:26 +0100 Subject: [PATCH 185/212] Fix comment typo --- src/atom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index c02651b..3fad611 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -105,7 +105,7 @@ impl Atom { } impl Atom { - /// Return the internal repersentation. For testing. + /// Return the internal representation. For testing. #[doc(hidden)] pub fn unsafe_data(&self) -> u64 { self.unsafe_data.get() From 8f5bed52d29a4d201705665fc9599c0d06bb4427 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Wed, 14 Dec 2022 21:33:36 -0500 Subject: [PATCH 186/212] Update MSRV. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe17a69..f99d2e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: - rust: [1.49.0, nightly, beta, stable] + rust: [1.56.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 37b459f8ce1ec694e8218ebdeef30c06f68e6205 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 14 Dec 2022 10:44:50 +0100 Subject: [PATCH 187/212] Add trivial impl of Borrow for Atom This enables Atom to be used in methods like HashMap::entry_ref. --- src/trivial_impls.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 960dde0..24baaf1 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -10,7 +10,7 @@ use crate::{Atom, StaticAtomSet}; #[cfg(feature = "serde_support")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::borrow::Cow; +use std::borrow::{Borrow, Cow}; use std::fmt; impl ::precomputed_hash::PrecomputedHash for Atom { @@ -70,6 +70,12 @@ impl AsRef for Atom { } } +impl Borrow for Atom { + fn borrow(&self) -> &str { + self + } +} + #[cfg(feature = "serde_support")] impl Serialize for Atom { fn serialize(&self, serializer: S) -> Result From b473a4ad3be989166031f56976f7ce54ae79ac05 Mon Sep 17 00:00:00 2001 From: Boshen Date: Thu, 16 Feb 2023 21:58:39 +0800 Subject: [PATCH 188/212] feat: use bucket mutex instead of global mutex for dynamic set This implementation uses bucket level mutex with linear probing. --- src/atom.rs | 7 ++----- src/dynamic_set.rs | 36 +++++++++++++++++++----------------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index c02651b..7856947 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -200,8 +200,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { phantom: PhantomData, } } else { - let ptr: std::ptr::NonNull = - DYNAMIC_SET.lock().insert(string_to_add, hash.g); + let ptr: std::ptr::NonNull = DYNAMIC_SET.insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -237,9 +236,7 @@ impl Drop for Atom { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - DYNAMIC_SET - .lock() - .remove(this.unsafe_data.get() as *mut Entry); + DYNAMIC_SET.remove(this.unsafe_data.get() as *mut Entry); } } } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 229a79f..6ea4ba6 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -19,7 +19,7 @@ const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u32 = (1 << 12) - 1; pub(crate) struct Set { - buckets: Box<[Option>; NB_BUCKETS]>, + buckets: Box<[Mutex>>]>, } pub(crate) struct Entry { @@ -38,22 +38,24 @@ fn entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } -pub(crate) static DYNAMIC_SET: Lazy> = Lazy::new(|| { - Mutex::new({ - type T = Option>; - let _static_assert_size_eq = std::mem::transmute::; - let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); - Set { - buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, - } - }) +pub(crate) static DYNAMIC_SET: Lazy = Lazy::new(|| { + // NOTE: Using const initialization for buckets breaks the small-stack test. + // ``` + // // buckets: [Mutex>>; NB_BUCKETS], + // const MUTEX: Mutex>> = Mutex::new(None); + // let buckets = Box::new([MUTEX; NB_BUCKETS]); + // ``` + let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect(); + Set { buckets } }); impl Set { - pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> NonNull { + pub(crate) fn insert(&self, string: Cow, hash: u32) -> NonNull { let bucket_index = (hash & BUCKET_MASK) as usize; + let mut linked_list = self.buckets[bucket_index].lock(); + { - let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); + let mut ptr: Option<&mut Box> = linked_list.as_mut(); while let Some(entry) = ptr.take() { if entry.hash == hash && *entry.string == *string { @@ -74,25 +76,25 @@ impl Set { debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); let string = string.into_owned(); let mut entry = Box::new(Entry { - next_in_bucket: self.buckets[bucket_index].take(), + next_in_bucket: linked_list.take(), hash, ref_count: AtomicIsize::new(1), string: string.into_boxed_str(), }); let ptr = NonNull::from(&mut *entry); - self.buckets[bucket_index] = Some(entry); - + *linked_list = Some(entry); ptr } - pub(crate) fn remove(&mut self, ptr: *mut Entry) { + pub(crate) fn remove(&self, ptr: *mut Entry) { let bucket_index = { let value: &Entry = unsafe { &*ptr }; debug_assert!(value.ref_count.load(SeqCst) == 0); (value.hash & BUCKET_MASK) as usize }; - let mut current: &mut Option> = &mut self.buckets[bucket_index]; + let mut linked_list = self.buckets[bucket_index].lock(); + let mut current: &mut Option> = &mut linked_list; while let Some(entry_ptr) = current.as_mut() { let entry_ptr: *mut Entry = &mut **entry_ptr; From e01688eb974ac52b24f19c69fb8ca398a07cdd32 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Wed, 22 Feb 2023 11:50:52 +0100 Subject: [PATCH 189/212] Bump version to 0.8.5 This will allow dependent packages to pick up improvements to mutex performance. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 16ef966..2d29863 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.4" # Also update README.md when making a semver-breaking change +version = "0.8.5" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 9c7b0aa84a5d862f57c88e59f07d3c66efe58908 Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Thu, 23 Feb 2023 02:35:49 +0200 Subject: [PATCH 190/212] Revert trivial impl of Borrow for Atom --- src/trivial_impls.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 24baaf1..0b2c98b 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -70,12 +70,6 @@ impl AsRef for Atom { } } -impl Borrow for Atom { - fn borrow(&self) -> &str { - self - } -} - #[cfg(feature = "serde_support")] impl Serialize for Atom { fn serialize(&self, serializer: S) -> Result From 4e45fde044657fe94a00d5cbd0d2910a1f827d0e Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Thu, 23 Feb 2023 15:34:07 +0200 Subject: [PATCH 191/212] remove unused import --- src/trivial_impls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 0b2c98b..960dde0 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -10,7 +10,7 @@ use crate::{Atom, StaticAtomSet}; #[cfg(feature = "serde_support")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::borrow::{Borrow, Cow}; +use std::borrow::Cow; use std::fmt; impl ::precomputed_hash::PrecomputedHash for Atom { From 448bf6b9c14e1f6f81a3f820006508fab8c4388b Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Thu, 23 Feb 2023 22:04:50 -0500 Subject: [PATCH 192/212] Publish 0.8.6. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2d29863..6067114 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.5" # Also update README.md when making a semver-breaking change +version = "0.8.6" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 126c173ac3ba18888821038be28aeba44af1023c Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Tue, 28 Feb 2023 18:00:46 +0200 Subject: [PATCH 193/212] test: add common dependency usage --- integration-tests/src/common-usage.rs | 19 +++++++++++++++++++ integration-tests/src/lib.rs | 4 ++++ 2 files changed, 23 insertions(+) create mode 100644 integration-tests/src/common-usage.rs diff --git a/integration-tests/src/common-usage.rs b/integration-tests/src/common-usage.rs new file mode 100644 index 0000000..7b7380a --- /dev/null +++ b/integration-tests/src/common-usage.rs @@ -0,0 +1,19 @@ +/// Test common usage by popular dependents (html5ever, lalrpop, browserlists-rs), to ensure no API-surface breaking changes +/// Created after https://github.com/servo/string-cache/issues/271 +use std::collections::HashMap; + +use crate::Atom; +use crate::TestAtom; + +#[test] +fn usage_with_hashmap() { + let mut map: HashMap = HashMap::new(); + + map.insert(test_atom!("area"), 1); + map.insert("str_into".into(), 2); + map.insert("atom_from".into(), 3); + + assert_eq!(map.get(&"area".into()).unwrap(), &1); + assert_eq!(map.get(&"str_into".into()).unwrap(), &2); + assert_eq!(map.get(&Atom::from("atom_from")).unwrap(), &3); +} diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index 1f2be87..aaacdff 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -296,6 +296,10 @@ fn test_try_static() { assert!(Atom::try_static("not in the static table").is_none()); } +#[cfg(test)] +#[path = "common-usage.rs"] +mod common_usage; + #[cfg(all(test, feature = "unstable"))] #[path = "bench.rs"] mod bench; From 120ba6c88e9337a810149b5afa4eecf32d8006d8 Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Fri, 3 Mar 2023 22:22:19 +0200 Subject: [PATCH 194/212] fix: move debug_assert check --- src/dynamic_set.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 6ea4ba6..46e7a54 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -87,13 +87,11 @@ impl Set { } pub(crate) fn remove(&self, ptr: *mut Entry) { - let bucket_index = { - let value: &Entry = unsafe { &*ptr }; - debug_assert!(value.ref_count.load(SeqCst) == 0); - (value.hash & BUCKET_MASK) as usize - }; + let value: &Entry = unsafe { &*ptr }; + let bucket_index = (value.hash & BUCKET_MASK) as usize; let mut linked_list = self.buckets[bucket_index].lock(); + debug_assert!(value.ref_count.load(SeqCst) == 0); let mut current: &mut Option> = &mut linked_list; while let Some(entry_ptr) = current.as_mut() { From 34f914c99e8bdc5f2fa842fd04f190c7c9e4df3b Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Tue, 7 Mar 2023 08:54:42 -0500 Subject: [PATCH 195/212] Publish 0.8.7 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6067114..b0f4957 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.6" # Also update README.md when making a semver-breaking change +version = "0.8.7" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 1ae3d0dcbf2c50f7d4a6f8c6e5f16a410ecc40d6 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Mon, 10 Jul 2023 14:14:57 +0200 Subject: [PATCH 196/212] Enable the GitHub merge queue (#280) --- .github/workflows/ci.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f99d2e9..a4615f2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,9 +1,11 @@ -name: Run CI +name: CI on: push: - branches: ["auto"] + branches: ["master"] pull_request: branches: ["**"] + merge_group: + types: [checks_requested] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -14,7 +16,7 @@ env: jobs: ci: - name: Run CI + name: Build and Test runs-on: ubuntu-latest strategy: @@ -35,8 +37,10 @@ jobs: run: | cargo build --no-default-features cargo build - - name: Tests - run: cargo test --all + - uses: actions-rs/cargo@v1 + with: + command: test + args: --all - name: Build codegen run: | cd string-cache-codegen && cargo build && cd .. @@ -47,7 +51,7 @@ jobs: build_result: - name: homu build finished + name: Result runs-on: ubuntu-latest needs: - "ci" From b46a64fa8c74fbef9b297ab05cace66da536e8a1 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Mon, 11 Mar 2024 10:37:57 +0100 Subject: [PATCH 197/212] Rename `master` branch to `main` (#283) --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a4615f2..8b48d1a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,9 +1,8 @@ name: CI on: push: - branches: ["master"] + branches: ["main"] pull_request: - branches: ["**"] merge_group: types: [checks_requested] From a7793f0e6739bb4976c80db2351163c9a7d005c2 Mon Sep 17 00:00:00 2001 From: Matthew Martin Date: Mon, 11 Mar 2024 04:44:34 -0500 Subject: [PATCH 198/212] Update phf to 0.11 (#281) --- .github/workflows/ci.yml | 2 +- Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8b48d1a..b133023 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: - rust: [1.56.0, nightly, beta, stable] + rust: [1.60.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 diff --git a/Cargo.toml b/Cargo.toml index b0f4957..df159f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ default = ["serde_support"] precomputed-hash = "0.1" once_cell = "1.10.0" serde = { version = "1", optional = true } -phf_shared = "0.10" +phf_shared = "0.11" new_debug_unreachable = "1.0.2" parking_lot = "0.12" diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 5eb5125..b059bfc 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -13,7 +13,7 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -phf_generator = "0.10" -phf_shared = "0.10" +phf_generator = "0.11" +phf_shared = "0.11" proc-macro2 = "1" quote = "1" From 1b636e99cb1bd8dff31bb4fc5be089002a635c12 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Wed, 17 Jul 2024 22:35:24 +0100 Subject: [PATCH 199/212] Skip bounds check for inline slices (#277) --- src/atom.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index 321b0a4..d1bd7b8 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -254,8 +254,9 @@ impl ops::Deref for Atom { } INLINE_TAG => { let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET; + debug_assert!(len as usize <= MAX_INLINE_LEN); let src = inline_atom_slice(&self.unsafe_data); - str::from_utf8_unchecked(&src[..(len as usize)]) + str::from_utf8_unchecked(src.get_unchecked(..(len as usize))) } STATIC_TAG => Static::get().atoms[self.static_index() as usize], _ => debug_unreachable!(), From c8fed62876eed738b16f7e0ae3d9974391e8f59f Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Wed, 31 Jul 2024 23:13:14 +0100 Subject: [PATCH 200/212] Prefer inline representation over static (#278) * Benchmarks use longer static strings * Use inline for short strings Closes #276. --- integration-tests/build.rs | 4 +++ integration-tests/src/bench.rs | 8 ++--- integration-tests/src/lib.rs | 25 +++++++++++----- src/atom.rs | 53 ++++++++++++++++++++++++---------- string-cache-codegen/lib.rs | 51 +++++++++++++++++++++++++------- 5 files changed, 104 insertions(+), 37 deletions(-) diff --git a/integration-tests/build.rs b/integration-tests/build.rs index da40873..6293e4c 100644 --- a/integration-tests/build.rs +++ b/integration-tests/build.rs @@ -9,6 +9,7 @@ fn main() { "a", "b", "address", + "defaults", "area", "body", "font-weight", @@ -16,6 +17,9 @@ fn main() { "html", "head", "id", + "❤", + "❤💯", + "❤💯❤💯", ]) .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs")) .unwrap() diff --git a/integration-tests/src/bench.rs b/integration-tests/src/bench.rs index 4d8f012..45e7199 100644 --- a/integration-tests/src/bench.rs +++ b/integration-tests/src/bench.rs @@ -153,7 +153,7 @@ bench_all!([eq ne lt clone_string] for longer_string = super::longer_dynamic_a, super::longer_dynamic_b); bench_all!([eq ne intern as_ref clone is_static lt] - for static_atom = test_atom!("a"), test_atom!("b")); + for static_atom = test_atom!("defaults"), test_atom!("font-weight")); bench_all!([intern as_ref clone is_inline] for short_inline_atom = mk("e"), mk("f")); @@ -168,13 +168,13 @@ bench_all!([eq ne intern as_ref clone is_dynamic lt] for longer_dynamic_atom = mk(super::longer_dynamic_a), mk(super::longer_dynamic_b)); bench_all!([intern as_ref clone is_static] - for static_at_runtime = mk("a"), mk("b")); + for static_at_runtime = mk("defaults"), mk("font-weight")); bench_all!([ne lt x_static y_inline] - for static_vs_inline = test_atom!("a"), mk("f")); + for static_vs_inline = test_atom!("defaults"), mk("f")); bench_all!([ne lt x_static y_dynamic] - for static_vs_dynamic = test_atom!("a"), mk(super::longer_dynamic_b)); + for static_vs_dynamic = test_atom!("defaults"), mk(super::longer_dynamic_b)); bench_all!([ne lt x_inline y_dynamic] for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b)); diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index aaacdff..a788d93 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -45,9 +45,12 @@ fn test_as_slice() { #[test] fn test_types() { assert!(Atom::from("").is_static()); - assert!(Atom::from("id").is_static()); - assert!(Atom::from("body").is_static()); - assert!(Atom::from("a").is_static()); + assert!(Atom::from("defaults").is_static()); + assert!(Atom::from("font-weight").is_static()); + assert!(Atom::from("id").is_inline()); + assert!(Atom::from("body").is_inline()); + assert!(Atom::from("a").is_inline()); + assert!(Atom::from("address").is_inline()); assert!(Atom::from("c").is_inline()); assert!(Atom::from("zz").is_inline()); assert!(Atom::from("zzz").is_inline()); @@ -168,11 +171,13 @@ fn repr() { // static atom table, the tag values, etc. // Static atoms - check_static("a", test_atom!("a")); - check_static("address", test_atom!("address")); - check_static("area", test_atom!("area")); + check_static("defaults", test_atom!("defaults")); + check_static("font-weight", test_atom!("font-weight")); // Inline atoms + check("a", 0x0000_0000_0000_6111); + check("address", 0x7373_6572_6464_6171); + check("area", 0x0000_0061_6572_6141); check("e", 0x0000_0000_0000_6511); check("xyzzy", 0x0000_797A_7A79_7851); check("xyzzy01", 0x3130_797A_7A79_7871); @@ -193,8 +198,13 @@ fn test_threads() { #[test] fn atom_macro() { + assert_eq!(test_atom!("a"), Atom::from("a")); assert_eq!(test_atom!("body"), Atom::from("body")); + assert_eq!(test_atom!("address"), Atom::from("address")); + assert_eq!(test_atom!("❤"), Atom::from("❤")); + assert_eq!(test_atom!("❤💯"), Atom::from("❤💯")); assert_eq!(test_atom!("font-weight"), Atom::from("font-weight")); + assert_eq!(test_atom!("❤💯❤💯"), Atom::from("❤💯❤💯")); } #[test] @@ -292,7 +302,8 @@ fn test_from_string() { #[test] fn test_try_static() { - assert!(Atom::try_static("head").is_some()); + assert!(Atom::try_static("defaults").is_some()); + assert!(Atom::try_static("head").is_none()); assert!(Atom::try_static("not in the static table").is_none()); } diff --git a/src/atom.rs b/src/atom.rs index d1bd7b8..7a3dea9 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -99,6 +99,25 @@ impl Atom { } } + /// For the atom!() macros + #[inline(always)] + #[doc(hidden)] + pub const fn pack_inline(mut n: u64, len: u8) -> Self { + if cfg!(target_endian = "big") { + // Reverse order of top 7 bytes. + // Bottom 8 bits of `n` are zero, and we need that to remain so. + // String data is stored in top 7 bytes, tag and length in bottom byte. + n = n.to_le() << 8; + } + + let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n; + Self { + // INLINE_TAG ensures this is never zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } + } + fn tag(&self) -> u8 { (self.unsafe_data.get() & TAG_MASK) as u8 } @@ -186,20 +205,22 @@ impl Hash for Atom { impl<'a, Static: StaticAtomSet> From> for Atom { fn from(string_to_add: Cow<'a, str>) -> Self { - Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { - let len = string_to_add.len(); - if len <= MAX_INLINE_LEN { - let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); - { - let dest = inline_atom_slice_mut(&mut data); - dest[..len].copy_from_slice(string_to_add.as_bytes()) - } - Atom { - // INLINE_TAG ensures this is never zero - unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, - phantom: PhantomData, - } - } else { + let len = string_to_add.len(); + if len == 0 { + Self::pack_static(Static::empty_string_index()) + } else if len <= MAX_INLINE_LEN { + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); + { + let dest = inline_atom_slice_mut(&mut data); + dest[..len].copy_from_slice(string_to_add.as_bytes()); + } + Atom { + // INLINE_TAG ensures this is never zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } + } else { + Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { let ptr: std::ptr::NonNull = DYNAMIC_SET.insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); @@ -208,8 +229,8 @@ impl<'a, Static: StaticAtomSet> From> for Atom { unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, phantom: PhantomData, } - } - }) + }) + } } } diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 0fe4819..3228946 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -187,11 +187,19 @@ impl AtomType { // which would cause divisions by zero in rust-phf. self.atoms.insert(String::new()); - let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); - let hash_state = phf_generator::generate_hash(&atoms); + // Strings over 7 bytes + empty string added to static set. + // Otherwise stored inline. + let (static_strs, inline_strs): (Vec<_>, Vec<_>) = self + .atoms + .iter() + .map(String::as_str) + .partition(|s| s.len() > 7 || s.is_empty()); + + // Static strings + let hash_state = phf_generator::generate_hash(&static_strs); let phf_generator::HashState { key, disps, map } = hash_state; let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip(); - let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); + let atoms: Vec<&str> = map.iter().map(|&idx| static_strs[idx]).collect(); let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; let indices = 0..atoms.len() as u32; @@ -228,16 +236,33 @@ impl AtomType { let macro_name = new_term(&*self.macro_name); let module = module.parse::().unwrap(); let atom_prefix = format!("ATOM_{}_", type_name.to_string().to_uppercase()); - let const_names: Vec<_> = atoms + let new_const_name = |atom: &str| { + let mut name = atom_prefix.clone(); + for c in atom.chars() { + name.push_str(&format!("_{:02X}", c as u32)) + } + new_term(&name) + }; + let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect(); + + // Inline strings + let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs .iter() - .map(|atom| { - let mut name = atom_prefix.clone(); - for c in atom.chars() { - name.push_str(&format!("_{:02X}", c as u32)) + .map(|s| { + let const_name = new_const_name(s); + + let mut value = 0u64; + for (index, c) in s.bytes().enumerate() { + value = value | ((c as u64) << (index * 8 + 8)); } - new_term(&name) + + let len = s.len() as u8; + + (const_name, (value, len)) }) - .collect(); + .unzip(); + let (inline_values, inline_lengths): (Vec<_>, Vec<_>) = + inline_values_and_lengths.into_iter().unzip(); quote! { #atom_doc @@ -265,6 +290,9 @@ impl AtomType { #( pub const #const_names: #type_name = #type_name::pack_static(#indices); )* + #( + pub const #inline_const_names: #type_name = #type_name::pack_inline(#inline_values, #inline_lengths); + )* #macro_doc #[macro_export] @@ -272,6 +300,9 @@ impl AtomType { #( (#atoms) => { #module::#const_names }; )* + #( + (#inline_strs) => { #module::#inline_const_names }; + )* } } } From 021012ea995461efdec6ddb5eb30a1bf9481d7c4 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Thu, 1 Aug 2024 01:23:58 -0400 Subject: [PATCH 201/212] Update MSRV to 1.61. (#284) Signed-off-by: Josh Matthews --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b133023..aa6f952 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: - rust: [1.60.0, nightly, beta, stable] + rust: [1.61.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 471ca0d8978cf6ce7dbfd170e67a103cfe62b975 Mon Sep 17 00:00:00 2001 From: cactter <109739451+cactter@users.noreply.github.com> Date: Sat, 10 Aug 2024 00:53:57 +0800 Subject: [PATCH 202/212] The scope of the unsafe block can be appropriately reduced (#263) * Shrink unsafe block * Remove empty lines --------- Co-authored-by: Martin Robinson --- src/atom.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 7a3dea9..7e15357 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -383,28 +383,24 @@ impl Atom { #[inline(always)] fn inline_atom_slice(x: &NonZeroU64) -> &[u8] { - unsafe { let x: *const NonZeroU64 = x; let mut data = x as *const u8; // All except the lowest byte, which is first in little-endian, last in big-endian. if cfg!(target_endian = "little") { - data = data.offset(1); + data = unsafe { data.offset(1) }; } let len = 7; - slice::from_raw_parts(data, len) - } + unsafe { slice::from_raw_parts(data, len) } } #[inline(always)] -fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { - unsafe { +fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { let x: *mut u64 = x; let mut data = x as *mut u8; // All except the lowest byte, which is first in little-endian, last in big-endian. if cfg!(target_endian = "little") { - data = data.offset(1); + data = unsafe { data.offset(1) }; } let len = 7; - slice::from_raw_parts_mut(data, len) - } + unsafe { slice::from_raw_parts_mut(data, len) } } From e03f29061d1359adae2741f3d20f1184a636221c Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 1 Feb 2025 14:43:21 +0100 Subject: [PATCH 203/212] MSRV 1.70 + Replace `once_cell::sync::Lazy` with `std::sync::OnceLock` (#287) * Update MSRV to 1.70 * Replace `once_cell::sync::Lazy` with `std::sync::OnceLock` --- .github/workflows/ci.yml | 2 +- Cargo.toml | 1 - src/atom.rs | 6 +++--- src/dynamic_set.rs | 14 +++++++++----- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aa6f952..c5cdfa7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: - rust: [1.61.0, nightly, beta, stable] + rust: [1.70.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 diff --git a/Cargo.toml b/Cargo.toml index df159f1..678eda3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,6 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" -once_cell = "1.10.0" serde = { version = "1", optional = true } phf_shared = "0.11" new_debug_unreachable = "1.0.2" diff --git a/src/atom.rs b/src/atom.rs index 7e15357..adf5f62 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use crate::dynamic_set::{Entry, DYNAMIC_SET}; +use crate::dynamic_set::{dynamic_set, Entry}; use crate::static_sets::StaticAtomSet; use debug_unreachable::debug_unreachable; @@ -221,7 +221,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { } } else { Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { - let ptr: std::ptr::NonNull = DYNAMIC_SET.insert(string_to_add, hash.g); + let ptr: std::ptr::NonNull = dynamic_set().insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -257,7 +257,7 @@ impl Drop for Atom { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - DYNAMIC_SET.remove(this.unsafe_data.get() as *mut Entry); + dynamic_set().remove(this.unsafe_data.get() as *mut Entry); } } } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 46e7a54..4442b4d 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -7,13 +7,13 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use once_cell::sync::Lazy; use parking_lot::Mutex; use std::borrow::Cow; use std::mem; use std::ptr::NonNull; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; +use std::sync::OnceLock; const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u32 = (1 << 12) - 1; @@ -38,16 +38,20 @@ fn entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } -pub(crate) static DYNAMIC_SET: Lazy = Lazy::new(|| { +pub(crate) fn dynamic_set() -> &'static Set { // NOTE: Using const initialization for buckets breaks the small-stack test. // ``` // // buckets: [Mutex>>; NB_BUCKETS], // const MUTEX: Mutex>> = Mutex::new(None); // let buckets = Box::new([MUTEX; NB_BUCKETS]); // ``` - let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect(); - Set { buckets } -}); + static DYNAMIC_SET: OnceLock = OnceLock::new(); + + DYNAMIC_SET.get_or_init(|| { + let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect(); + Set { buckets } + }) +} impl Set { pub(crate) fn insert(&self, string: Cow, hash: u32) -> NonNull { From 27221cec100d883420ef0fd06a7af5593262e4d8 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 1 Feb 2025 08:45:33 -0500 Subject: [PATCH 204/212] Publish 0.8.8. Signed-off-by: Josh Matthews --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 678eda3..338eeba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.7" # Also update README.md when making a semver-breaking change +version = "0.8.8" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 4a5bb75adaa35d4076ab30091b0dbc433f369c45 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 1 Feb 2025 13:09:13 -0500 Subject: [PATCH 205/212] Publish string-cache-codegen 0.5.3. Signed-off-by: Josh Matthews --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index b059bfc..53c1f0f 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.2" # Also update ../README.md when making a semver-breaking change +version = "0.5.3" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 14ae86af8fbe7a14229296473e9ac18b67228f52 Mon Sep 17 00:00:00 2001 From: Alex Touchet <26315797+atouchet@users.noreply.github.com> Date: Sat, 1 Feb 2025 10:10:09 -0800 Subject: [PATCH 206/212] Set rust-version in Cargo.toml (#288) --- Cargo.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 338eeba..6ceabfe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,13 @@ [package] name = "string_cache" version = "0.8.8" # Also update README.md when making a semver-breaking change -authors = [ "The Servo Project Developers" ] +authors = ["The Servo Project Developers"] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" repository = "https://github.com/servo/string-cache" -documentation = "https://docs.rs/string_cache/" +documentation = "https://docs.rs/string_cache" edition = "2018" +rust-version = "1.70.0" # Do not `exclude` ./string-cache-codegen because we want to include # ./string-cache-codegen/shared.rs, and `include` is a pain to use From d9e888f2f61d43c1868849a506104ee02d79027c Mon Sep 17 00:00:00 2001 From: Ygg01 Date: Wed, 19 Feb 2025 11:59:59 +0100 Subject: [PATCH 207/212] Add test for atom order stability (#290) * Add test for atom order stability. * Made test for iteration order a unit test # Conflicts: # string-cache-codegen/tests/reproducibility_test.rs --- string-cache-codegen/lib.rs | 39 ++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 3228946..c703cf7 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -69,7 +69,7 @@ #![recursion_limit = "128"] use quote::quote; -use std::collections::HashSet; +use std::collections::BTreeSet; use std::fs::File; use std::io::{self, BufWriter, Write}; use std::path::Path; @@ -81,7 +81,7 @@ pub struct AtomType { static_set_doc: Option, macro_name: String, macro_doc: Option, - atoms: HashSet, + atoms: BTreeSet, } impl AtomType { @@ -114,7 +114,7 @@ impl AtomType { atom_doc: None, static_set_doc: None, macro_doc: None, - atoms: HashSet::new(), + atoms: BTreeSet::new(), } } @@ -181,6 +181,26 @@ impl AtomType { ) } + #[cfg(test)] + /// Write generated code to destination [`Vec`] and return it as [`String`] + /// + /// Used mostly for testing or displaying a value. + pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result + { + destination.write_all( + self.to_tokens() + .to_string() + // Insert some newlines to make the generated code slightly easier to read. + .replace(" [ \"", "[\n\"") + .replace("\" , ", "\",\n") + .replace(" ( \"", "\n( \"") + .replace("; ", ";\n") + .as_bytes(), + )?; + let str = String::from_utf8(destination).unwrap(); + Ok(str) + } + fn to_tokens(&mut self) -> proc_macro2::TokenStream { // `impl Default for Atom` requires the empty string to be in the static set. // This also makes sure the set in non-empty, @@ -315,3 +335,16 @@ impl AtomType { self.write_to(BufWriter::new(File::create(path)?)) } } + +#[test] +fn test_iteration_order() { + let x1 = crate::AtomType::new("foo::Atom", "foo_atom!") + .atoms(&["x", "xlink", "svg", "test"]) + .write_to_string(Vec::new()).expect("write to string cache x1"); + + let x2 = crate::AtomType::new("foo::Atom", "foo_atom!") + .atoms(&["x", "xlink", "svg", "test"]) + .write_to_string(Vec::new()).expect("write to string cache x2"); + + assert_eq!(x1, x2); +} \ No newline at end of file From 88600346b44ae50afaf52a31d6a0db1c37b94b02 Mon Sep 17 00:00:00 2001 From: Ygg01 Date: Thu, 20 Feb 2025 13:40:19 +0100 Subject: [PATCH 208/212] Publish 0.5.4 string cache codegen (#292) --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 53c1f0f..a9660bd 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.3" # Also update ../README.md when making a semver-breaking change +version = "0.5.4" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" From b92f7eb3ff504034ec58c0154f9a9b053e23da4f Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Fri, 28 Mar 2025 07:15:11 +1300 Subject: [PATCH 209/212] Implement MallocSizeOf for Atom (#289) Signed-off-by: Nico Burns --- .github/workflows/ci.yml | 1 + Cargo.toml | 3 ++- src/atom.rs | 9 +++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c5cdfa7..74ade77 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,7 @@ jobs: run: | cargo build --no-default-features cargo build + cargo build --features malloc_size_of - uses: actions-rs/cargo@v1 with: command: test diff --git a/Cargo.toml b/Cargo.toml index 6ceabfe..287bd07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.8" # Also update README.md when making a semver-breaking change +version = "0.8.9" # Also update README.md when making a semver-breaking change authors = ["The Servo Project Developers"] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" @@ -25,6 +25,7 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" serde = { version = "1", optional = true } +malloc_size_of = { version = "0.1", default-features = false, optional = true } phf_shared = "0.11" new_debug_unreachable = "1.0.2" parking_lot = "0.12" diff --git a/src/atom.rs b/src/atom.rs index adf5f62..5a8aa7f 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -82,6 +82,15 @@ pub struct Atom { phantom: PhantomData, } +// This isn't really correct as the Atoms can technically take up space. But I guess it's ok +// as it is possible to measure the size of the atom set separately/ +#[cfg(feature = "malloc_size_of")] +impl malloc_size_of::MallocSizeOf for Atom { + fn size_of(&self, _ops: &mut malloc_size_of::MallocSizeOfOps) -> usize { + 0 + } +} + // FIXME: bound removed from the struct definition before of this error for pack_static: // "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable" // https://github.com/rust-lang/rust/issues/57563 From eb5ad11b53a8e132fa09781f062c47ce352f80d6 Mon Sep 17 00:00:00 2001 From: Cheng Xu <3105373+xu-cheng@users.noreply.github.com> Date: Tue, 26 Aug 2025 00:47:01 -0700 Subject: [PATCH 210/212] Update phf to 0.13 (#295) --- Cargo.toml | 4 ++-- README.md | 6 +++--- integration-tests/Cargo.toml | 6 +++--- src/lib.rs | 4 ++-- string-cache-codegen/Cargo.toml | 6 +++--- string-cache-codegen/lib.rs | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 287bd07..e73215e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.9" # Also update README.md when making a semver-breaking change +version = "0.9.0" # Also update README.md when making a semver-breaking change authors = ["The Servo Project Developers"] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" @@ -26,7 +26,7 @@ default = ["serde_support"] precomputed-hash = "0.1" serde = { version = "1", optional = true } malloc_size_of = { version = "0.1", default-features = false, optional = true } -phf_shared = "0.11" +phf_shared = "0.13" new_debug_unreachable = "1.0.2" parking_lot = "0.12" diff --git a/README.md b/README.md index fdf4c0a..429d1ec 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.8" +string_cache = "0.9" ``` In `lib.rs`: @@ -31,10 +31,10 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.8" +string_cache = "0.9" [build-dependencies] -string_cache_codegen = "0.5" +string_cache_codegen = "0.6" ``` In `build.rs`: diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index a0b047c..12c0ad0 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -16,11 +16,11 @@ test = true unstable = [] [dependencies] -string_cache = { version = "0.8", path = ".." } +string_cache = { version = "0.9", path = ".." } [dev-dependencies] rand = "0.8" -string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } +string_cache_codegen = { version = "0.6", path = "../string-cache-codegen" } [build-dependencies] -string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } +string_cache_codegen = { version = "0.6", path = "../string-cache-codegen" } diff --git a/src/lib.rs b/src/lib.rs index 441cb4e..3cc29b1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,10 +25,10 @@ //! In `Cargo.toml`: //! ```toml //! [dependencies] -//! string_cache = "0.8" +//! string_cache = "0.9" //! //! [dev-dependencies] -//! string_cache_codegen = "0.5" +//! string_cache_codegen = "0.6" //! ``` //! //! In `build.rs`: diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index a9660bd..a6e9da0 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.4" # Also update ../README.md when making a semver-breaking change +version = "0.6.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" @@ -13,7 +13,7 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -phf_generator = "0.11" -phf_shared = "0.11" +phf_generator = "0.13" +phf_shared = "0.13" proc-macro2 = "1" quote = "1" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index c703cf7..69ff612 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -19,10 +19,10 @@ //! build = "build.rs" //! //! [dependencies] -//! string_cache = "0.8" +//! string_cache = "0.9" //! //! [build-dependencies] -//! string_cache_codegen = "0.5" +//! string_cache_codegen = "0.6" //! ``` //! //! In `build.rs`: @@ -183,8 +183,8 @@ impl AtomType { #[cfg(test)] /// Write generated code to destination [`Vec`] and return it as [`String`] - /// - /// Used mostly for testing or displaying a value. + /// + /// Used mostly for testing or displaying a value. pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result { destination.write_all( @@ -347,4 +347,4 @@ fn test_iteration_order() { .write_to_string(Vec::new()).expect("write to string cache x2"); assert_eq!(x1, x2); -} \ No newline at end of file +} From 533b64e132ec65a616317d2607f536da024d19a9 Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Fri, 5 Sep 2025 13:01:12 +0100 Subject: [PATCH 211/212] Make macros accept idents where atom value is a valid ident (#296) * Make macros accept idents where atom is a valid atom This means that local_name!(html) will work as well as local_name!("html") Signed-off-by: Nico Burns * Fix tests build Signed-off-by: Nico Burns --------- Signed-off-by: Nico Burns --- integration-tests/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 55 ++++++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 12c0ad0..4562747 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -19,7 +19,7 @@ unstable = [] string_cache = { version = "0.9", path = ".." } [dev-dependencies] -rand = "0.8" +rand = { version = "0.8", features = ["small_rng"] } string_cache_codegen = { version = "0.6", path = "../string-cache-codegen" } [build-dependencies] diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 69ff612..525ef3a 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -68,6 +68,7 @@ #![recursion_limit = "128"] +use proc_macro2::Ident; use quote::quote; use std::collections::BTreeSet; use std::fs::File; @@ -185,8 +186,7 @@ impl AtomType { /// Write generated code to destination [`Vec`] and return it as [`String`] /// /// Used mostly for testing or displaying a value. - pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result - { + pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result { destination.write_all( self.to_tokens() .to_string() @@ -223,6 +223,30 @@ impl AtomType { let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; let indices = 0..atoms.len() as u32; + fn is_valid_ident(name: &str) -> bool { + let begins_with_letter_or_underscore = name + .chars() + .next() + .is_some_and(|c| c.is_alphabetic() || c == '_'); + let is_alphanumeric = name.chars().all(|c| c.is_alphanumeric() || c == '_'); + + begins_with_letter_or_underscore && is_alphanumeric + } + + let atoms_for_idents: Vec<&str> = atoms + .iter() + .copied() + .filter(|x| is_valid_ident(x)) + .collect(); + let atom_idents: Vec = atoms_for_idents.iter().map(|atom| new_term(atom)).collect(); + + let istrs_for_idents: Vec<&str> = inline_strs + .iter() + .copied() + .filter(|x| is_valid_ident(x)) + .collect(); + let istr_idents: Vec = istrs_for_idents.iter().map(|atom| new_term(atom)).collect(); + let hashes: Vec = atoms .iter() .map(|string| { @@ -249,8 +273,9 @@ impl AtomType { Some(ref doc) => quote!(#[doc = #doc]), None => quote!(), }; - let new_term = - |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site()); + fn new_term(string: &str) -> Ident { + Ident::new(string, proc_macro2::Span::call_site()) + } let static_set_name = new_term(&format!("{}StaticSet", type_name)); let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); @@ -264,6 +289,16 @@ impl AtomType { new_term(&name) }; let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect(); + let ident_const_names: Vec<_> = atoms_for_idents + .iter() + .copied() + .map(new_const_name) + .collect(); + let ident_inline_const_names: Vec<_> = istrs_for_idents + .iter() + .copied() + .map(new_const_name) + .collect(); // Inline strings let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs @@ -323,6 +358,12 @@ impl AtomType { #( (#inline_strs) => { #module::#inline_const_names }; )* + #( + (#atom_idents) => { #module::#ident_const_names }; + )* + #( + (#istr_idents) => { #module::#ident_inline_const_names }; + )* } } } @@ -340,11 +381,13 @@ impl AtomType { fn test_iteration_order() { let x1 = crate::AtomType::new("foo::Atom", "foo_atom!") .atoms(&["x", "xlink", "svg", "test"]) - .write_to_string(Vec::new()).expect("write to string cache x1"); + .write_to_string(Vec::new()) + .expect("write to string cache x1"); let x2 = crate::AtomType::new("foo::Atom", "foo_atom!") .atoms(&["x", "xlink", "svg", "test"]) - .write_to_string(Vec::new()).expect("write to string cache x2"); + .write_to_string(Vec::new()) + .expect("write to string cache x2"); assert_eq!(x1, x2); } From fd1475d07d5ff30e5d2ca343671577c77ab5679a Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Mon, 8 Sep 2025 14:50:19 +0100 Subject: [PATCH 212/212] Bump version of string-cache-codegen to v0.6.1 (#297) Signed-off-by: Nico Burns --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index a6e9da0..20eced9 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.6.0" # Also update ../README.md when making a semver-breaking change +version = "0.6.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0"