From e4c20800418f9bd0a8f15c3ffa92b93075950910 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 26 Aug 2014 14:22:41 -0700 Subject: [PATCH 001/379] Add the option of building with Cargo --- .gitignore | 3 +++ Cargo.toml | 10 ++++++++++ Makefile.in | 8 ++++---- atom.rs => src/atom.rs | 0 lib.rs => src/lib.rs | 0 static_atoms.rs => src/static_atoms.rs | 0 6 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 Cargo.toml rename atom.rs => src/atom.rs (100%) rename lib.rs => src/lib.rs (100%) rename static_atoms.rs => src/static_atoms.rs (100%) diff --git a/.gitignore b/.gitignore index 5df5866..fafa631 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /doc +/Makefile +/Cargo.lock +/target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..0268c52 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] + +name = "string_cache" +version = "0.0.0" +authors = [ "The Servo Project Developers" ] + +[dependencies.phf] +git = "https://github.com/sfackler/rust-phf" +[dependencies.phf_mac] +git = "https://github.com/sfackler/rust-phf" diff --git a/Makefile.in b/Makefile.in index 6e5f2ee..5c80d4c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -7,16 +7,16 @@ RUSTDOC ?= rustdoc RUSTDOC_FLAGS ?= RUSTDOC_TARGET ?= doc -RUST_SRC=$(shell find $(VPATH)/. -type f -name '*.rs') +RUST_SRC=$(shell find $(VPATH)/src -type f -name '*.rs') .PHONY: all all: libstring-cache.dummy -libstring-cache.dummy: lib.rs $(RUST_SRC) $(EXT_DEPS) +libstring-cache.dummy: src/lib.rs $(RUST_SRC) $(EXT_DEPS) $(RUSTC) $(RUSTFLAGS) $< --out-dir . touch $@ -string-cache-test: lib.rs $(RUST_SRC) +string-cache-test: src/lib.rs $(RUST_SRC) $(RUSTC) $(RUSTFLAGS) $< -o $@ --test .PHONY: check @@ -26,7 +26,7 @@ check: string-cache-test .PHONY: doc doc: $(RUSTDOC_TARGET)/string_cache/index.html -$(RUSTDOC_TARGET)/string_cache/index.html: lib.rs $(RUST_SRC) $(EXT_DEPS) +$(RUSTDOC_TARGET)/string_cache/index.html: src/lib.rs $(RUST_SRC) $(EXT_DEPS) $(RUSTDOC) $(RUSTDOC_FLAGS) $< -o $(RUSTDOC_TARGET) .PHONY: clean diff --git a/atom.rs b/src/atom.rs similarity index 100% rename from atom.rs rename to src/atom.rs diff --git a/lib.rs b/src/lib.rs similarity index 100% rename from lib.rs rename to src/lib.rs diff --git a/static_atoms.rs b/src/static_atoms.rs similarity index 100% rename from static_atoms.rs rename to src/static_atoms.rs From a5a7bb638cb9e1988263c6d6f9a80a6c28be5a97 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 26 Aug 2014 15:23:20 -0700 Subject: [PATCH 002/379] Import macro-based static atoms from html5ever --- Cargo.toml | 3 + Makefile.in | 13 +- macros/Cargo.toml | 10 + macros/src/data.rs | 1072 +++++++++++++++++++++++++++++++++++++++++ macros/src/lib.rs | 136 ++++++ shared/static_atom.rs | 28 ++ src/atom.rs | 78 +-- src/lib.rs | 13 +- src/static_atoms.rs | 70 --- 9 files changed, 1319 insertions(+), 104 deletions(-) create mode 100644 macros/Cargo.toml create mode 100644 macros/src/data.rs create mode 100644 macros/src/lib.rs create mode 100644 shared/static_atom.rs delete mode 100644 src/static_atoms.rs diff --git a/Cargo.toml b/Cargo.toml index 0268c52..00e057b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,6 @@ authors = [ "The Servo Project Developers" ] git = "https://github.com/sfackler/rust-phf" [dependencies.phf_mac] git = "https://github.com/sfackler/rust-phf" + +[dependencies.string_cache_macros] +path = "macros" diff --git a/Makefile.in b/Makefile.in index 5c80d4c..033472a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -7,17 +7,18 @@ RUSTDOC ?= rustdoc RUSTDOC_FLAGS ?= RUSTDOC_TARGET ?= doc -RUST_SRC=$(shell find $(VPATH)/src -type f -name '*.rs') +RUST_SRC=$(shell find $(VPATH)/src $(VPATH)/macros $(VPATH)/shared -type f -name '*.rs') .PHONY: all all: libstring-cache.dummy -libstring-cache.dummy: src/lib.rs $(RUST_SRC) $(EXT_DEPS) - $(RUSTC) $(RUSTFLAGS) $< --out-dir . +libstring-cache.dummy: $(RUST_SRC) $(EXT_DEPS) + $(RUSTC) $(RUSTFLAGS) $(VPATH)/macros/src/lib.rs --out-dir . + $(RUSTC) $(RUSTFLAGS) -L . $(VPATH)/src/lib.rs --out-dir . touch $@ -string-cache-test: src/lib.rs $(RUST_SRC) - $(RUSTC) $(RUSTFLAGS) $< -o $@ --test +string-cache-test: $(RUST_SRC) + $(RUSTC) $(RUSTFLAGS) -L . $< -o $@ --test .PHONY: check check: string-cache-test @@ -26,7 +27,7 @@ check: string-cache-test .PHONY: doc doc: $(RUSTDOC_TARGET)/string_cache/index.html -$(RUSTDOC_TARGET)/string_cache/index.html: src/lib.rs $(RUST_SRC) $(EXT_DEPS) +$(RUSTDOC_TARGET)/string_cache/index.html: $(RUST_SRC) $(EXT_DEPS) $(RUSTDOC) $(RUSTDOC_FLAGS) $< -o $(RUSTDOC_TARGET) .PHONY: clean diff --git a/macros/Cargo.toml b/macros/Cargo.toml new file mode 100644 index 0000000..7fb85ae --- /dev/null +++ b/macros/Cargo.toml @@ -0,0 +1,10 @@ +[package] + +name = "string_cache_macros" +version = "0.0.0" +authors = [ "The Servo Project Developers" ] + +[lib] + +name = "string_cache_macros" +plugin = true diff --git a/macros/src/data.rs b/macros/src/data.rs new file mode 100644 index 0000000..ec4b975 --- /dev/null +++ b/macros/src/data.rs @@ -0,0 +1,1072 @@ +// Copyright 2014 The Servo Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// The first 64 atoms are special: we can quickly check membership +// in sets of these, using a bitmask. This includes every tag that +// appears in more than one set in the tree builder spec, plus a +// few others (arbitrarily chosen). +// +// FIXME(kmc): check if this is really true with the packed tag bits +// +// This list must remain sorted. +pub static fast_set_atoms: [&'static str, ..64] = [ + "a", + "address", + "applet", + "area", + "article", + "aside", + "b", + "base", + "basefont", + "bgsound", + "big", + "blockquote", + "body", + "br", + "button", + "caption", + "col", + "colgroup", + "dd", + "dt", + "embed", + "form", + "frame", + "frameset", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "html", + "input", + "li", + "link", + "marquee", + "meta", + "noframes", + "noscript", + "object", + "optgroup", + "option", + "param", + "plaintext", + "pre", + "rp", + "rt", + "script", + "select", + "source", + "style", + "svg", + "table", + "tbody", + "td", + "template", + "textarea", + "tfoot", + "th", + "thead", + "title", + "tr", + "track", + "xmp", +]; + +// The rest. +// +// This list must remain sorted. +pub static other_atoms: &'static [&'static str] = &[ + "", + "abbr", + "abs", + "accent", + "accent-height", + "accentunder", + "accept", + "accept-charset", + "accesskey", + "accumulate", + "acronym", + "action", + "actiontype", + "active", + "actuate", + "additive", + "align", + "alignment-baseline", + "alignmentscope", + "alink", + "alphabetic", + "alt", + "altGlyph", + "altGlyphDef", + "altGlyphItem", + "altglyph", + "altglyphdef", + "altglyphitem", + "altimg", + "alttext", + "amplitude", + "and", + "animate", + "animateColor", + "animateMotion", + "animateTransform", + "animatecolor", + "animatemotion", + "animatetransform", + "animation", + "annotation", + "annotation-xml", + "apply", + "approx", + "arabic-form", + "arccos", + "arccosh", + "arccot", + "arccoth", + "arccsc", + "arccsch", + "archive", + "arcrole", + "arcsec", + "arcsech", + "arcsin", + "arcsinh", + "arctan", + "arctanh", + "arg", + "aria-activedescendant", + "aria-atomic", + "aria-autocomplete", + "aria-busy", + "aria-channel", + "aria-checked", + "aria-controls", + "aria-datatype", + "aria-describedby", + "aria-disabled", + "aria-dropeffect", + "aria-expanded", + "aria-flowto", + "aria-grab", + "aria-haspopup", + "aria-hidden", + "aria-invalid", + "aria-labelledby", + "aria-level", + "aria-live", + "aria-multiline", + "aria-multiselectable", + "aria-owns", + "aria-posinset", + "aria-pressed", + "aria-readonly", + "aria-relevant", + "aria-required", + "aria-secret", + "aria-selected", + "aria-setsize", + "aria-sort", + "aria-templateid", + "aria-valuemax", + "aria-valuemin", + "aria-valuenow", + "ascent", + "async", + "attributeName", + "attributeType", + "attributename", + "attributetype", + "audio", + "autocomplete", + "autofocus", + "autoplay", + "autosubmit", + "axis", + "azimuth", + "background", + "baseFrequency", + "baseProfile", + "basefrequency", + "baseline", + "baseline-shift", + "baseprofile", + "bbox", + "bdo", + "begin", + "bevelled", + "bgcolor", + "bias", + "border", + "bvar", + "by", + "calcMode", + "calcmode", + "canvas", + "cap-height", + "card", + "cartesianproduct", + "ceiling", + "cellpadding", + "cellspacing", + "center", + "char", + "charoff", + "charset", + "checked", + "ci", + "circle", + "cite", + "class", + "classid", + "clear", + "clip", + "clip-path", + "clip-rule", + "clipPath", + "clipPathUnits", + "clippath", + "clippathunits", + "close", + "closure", + "cn", + "code", + "codebase", + "codetype", + "codomain", + "color", + "color-interpolation", + "color-interpolation-filters", + "color-profile", + "color-rendering", + "cols", + "colspan", + "columnalign", + "columnlines", + "columnspacing", + "columnspan", + "columnwidth", + "compact", + "complexes", + "compose", + "condition", + "conjugate", + "content", + "contentScriptType", + "contentStyleType", + "contenteditable", + "contentscripttype", + "contentstyletype", + "contextmenu", + "controls", + "coords", + "cos", + "cosh", + "cot", + "coth", + "crossorigin", + "csc", + "csch", + "csymbol", + "curl", + "cursor", + "cx", + "cy", + "d", + "data", + "datafld", + "dataformatas", + "datasrc", + "datatemplate", + "datetime", + "declare", + "default", + "defer", + "definition-src", + "definitionURL", + "definitionurl", + "defs", + "degree", + "del", + "depth", + "desc", + "descent", + "details", + "determinant", + "dfn", + "dialog", + "diff", + "diffuseConstant", + "diffuseconstant", + "dir", + "direction", + "disabled", + "discard", + "display", + "displaystyle", + "div", + "divergence", + "divide", + "divisor", + "dl", + "domain", + "domainofapplication", + "dominant-baseline", + "draggable", + "dur", + "dx", + "dy", + "edge", + "edgeMode", + "edgemode", + "elevation", + "ellipse", + "em", + "emptyset", + "enable-background", + "encoding", + "enctype", + "end", + "eq", + "equalcolumns", + "equalrows", + "equivalent", + "eulergamma", + "exists", + "exp", + "exponent", + "exponentiale", + "externalResourcesRequired", + "externalresourcesrequired", + "face", + "factorial", + "factorof", + "false", + "feBlend", + "feColorMatrix", + "feComponentTransfer", + "feComposite", + "feConvolveMatrix", + "feDiffuseLighting", + "feDisplacementMap", + "feDistantLight", + "feFlood", + "feFuncA", + "feFuncB", + "feFuncG", + "feFuncR", + "feGaussianBlur", + "feImage", + "feMerge", + "feMergeNode", + "feMorphology", + "feOffset", + "fePointLight", + "feSpecularLighting", + "feSpotLight", + "feTile", + "feTurbulence", + "feblend", + "fecolormatrix", + "fecomponenttransfer", + "fecomposite", + "feconvolvematrix", + "fediffuselighting", + "fedisplacementmap", + "fedistantlight", + "feflood", + "fefunca", + "fefuncb", + "fefuncg", + "fefuncr", + "fegaussianblur", + "feimage", + "femerge", + "femergenode", + "femorphology", + "fence", + "feoffset", + "fepointlight", + "fespecularlighting", + "fespotlight", + "fetile", + "feturbulence", + "fieldset", + "figcaption", + "figure", + "fill", + "fill-opacity", + "fill-rule", + "filter", + "filterRes", + "filterUnits", + "filterres", + "filterunits", + "flood-color", + "flood-opacity", + "floor", + "fn", + "font", + "font-face", + "font-face-format", + "font-face-name", + "font-face-src", + "font-face-uri", + "font-family", + "font-size", + "font-size-adjust", + "font-stretch", + "font-style", + "font-variant", + "font-weight", + "fontfamily", + "fontsize", + "fontstyle", + "fontweight", + "footer", + "for", + "forall", + "foreignObject", + "foreignobject", + "format", + "frameborder", + "framespacing", + "from", + "fx", + "fy", + "g", + "g1", + "g2", + "gcd", + "geq", + "glyph", + "glyph-name", + "glyph-orientation-horizontal", + "glyph-orientation-vertical", + "glyphRef", + "glyphref", + "grad", + "gradientTransform", + "gradientUnits", + "gradienttransform", + "gradientunits", + "groupalign", + "gt", + "handler", + "hanging", + "header", + "headers", + "height", + "hgroup", + "hidden", + "hidefocus", + "high", + "hkern", + "horiz-adv-x", + "horiz-origin-x", + "horiz-origin-y", + "hr", + "href", + "hreflang", + "hspace", + "http-equiv", + "i", + "icon", + "id", + "ident", + "ideographic", + "iframe", + "image", + "image-rendering", + "imaginary", + "imaginaryi", + "img", + "implies", + "in", + "in2", + "index", + "infinity", + "inputmode", + "ins", + "int", + "integers", + "intercept", + "intersect", + "interval", + "inverse", + "irrelevant", + "isindex", + "ismap", + "k", + "k1", + "k2", + "k3", + "k4", + "kbd", + "kernelMatrix", + "kernelUnitLength", + "kernelmatrix", + "kernelunitlength", + "kerning", + "keyPoints", + "keySplines", + "keyTimes", + "keygen", + "keypoints", + "keysplines", + "keytimes", + "label", + "lambda", + "lang", + "language", + "laplacian", + "largeop", + "lcm", + "legend", + "lengthAdjust", + "lengthadjust", + "leq", + "letter-spacing", + "lighting-color", + "limit", + "limitingConeAngle", + "limitingconeangle", + "line", + "linearGradient", + "lineargradient", + "linebreak", + "linethickness", + "list", + "listener", + "listing", + "ln", + "local", + "log", + "logbase", + "longdesc", + "loop", + "low", + "lowlimit", + "lowsrc", + "lquote", + "lspace", + "lt", + "macros", + "maction", + "main", + "maligngroup", + "malignmark", + "manifest", + "map", + "marginheight", + "marginwidth", + "mark", + "marker", + "marker-end", + "marker-mid", + "marker-start", + "markerHeight", + "markerUnits", + "markerWidth", + "markerheight", + "markerunits", + "markerwidth", + "mask", + "maskContentUnits", + "maskUnits", + "maskcontentunits", + "maskunits", + "math", + "mathbackground", + "mathcolor", + "mathematical", + "mathsize", + "mathvariant", + "matrix", + "matrixrow", + "max", + "maxlength", + "maxsize", + "mean", + "media", + "median", + "mediummathspace", + "menclose", + "menu", + "menuitem", + "merror", + "metadata", + "meter", + "method", + "mfenced", + "mfrac", + "mglyph", + "mi", + "min", + "minsize", + "minus", + "missing-glyph", + "mlabeledtr", + "mmultiscripts", + "mn", + "mo", + "mode", + "moment", + "momentabout", + "movablelimits", + "mover", + "mpadded", + "mpath", + "mphantom", + "mprescripts", + "mroot", + "mrow", + "ms", + "mspace", + "msqrt", + "mstyle", + "msub", + "msubsup", + "msup", + "mtable", + "mtd", + "mtext", + "mtr", + "multiple", + "munder", + "munderover", + "name", + "nargs", + "naturalnumbers", + "nav", + "neq", + "nest", + "nobr", + "noembed", + "nohref", + "none", + "noresize", + "noshade", + "not", + "notanumber", + "notation", + "notin", + "notprsubset", + "notsubset", + "nowrap", + "numOctaves", + "numoctaves", + "occurrence", + "offset", + "ol", + "onabort", + "onactivate", + "onafterprint", + "onafterupdate", + "onbefordeactivate", + "onbeforeactivate", + "onbeforecopy", + "onbeforecut", + "onbeforeeditfocus", + "onbeforepaste", + "onbeforeprint", + "onbeforeunload", + "onbeforeupdate", + "onbegin", + "onblur", + "onbounce", + "oncellchange", + "onchange", + "onclick", + "oncontextmenu", + "oncontrolselect", + "oncopy", + "oncut", + "ondataavailable", + "ondatasetchanged", + "ondatasetcomplete", + "ondblclick", + "ondeactivate", + "ondrag", + "ondragdrop", + "ondragend", + "ondragenter", + "ondragleave", + "ondragover", + "ondragstart", + "ondrop", + "onend", + "onerror", + "onerrorupdate", + "onfilterchange", + "onfinish", + "onfocus", + "onfocusin", + "onfocusout", + "onformchange", + "onforminput", + "onhelp", + "oninput", + "oninvalid", + "onkeydown", + "onkeypress", + "onkeyup", + "onload", + "onlosecapture", + "onmessage", + "onmousedown", + "onmouseenter", + "onmouseleave", + "onmousemove", + "onmouseout", + "onmouseover", + "onmouseup", + "onmousewheel", + "onmove", + "onmoveend", + "onmovestart", + "onpaste", + "onpropertychange", + "onreadystatechange", + "onrepeat", + "onreset", + "onresize", + "onrowenter", + "onrowexit", + "onrowsdelete", + "onrowsinserted", + "onscroll", + "onselect", + "onselectstart", + "onstart", + "onstop", + "onsubmit", + "onunload", + "onzoom", + "opacity", + "open", + "operator", + "optimum", + "or", + "order", + "orient", + "orientation", + "origin", + "other", + "otherwise", + "outerproduct", + "output", + "overflow", + "overline-position", + "overline-thickness", + "p", + "panose-1", + "partialdiff", + "path", + "pathLength", + "pathlength", + "pattern", + "patternContentUnits", + "patternTransform", + "patternUnits", + "patterncontentunits", + "patterntransform", + "patternunits", + "pi", + "piece", + "piecewise", + "ping", + "plus", + "pointer-events", + "points", + "pointsAtX", + "pointsAtY", + "pointsAtZ", + "pointsatx", + "pointsaty", + "pointsatz", + "polygon", + "polyline", + "poster", + "power", + "prefetch", + "preserveAlpha", + "preserveAspectRatio", + "preservealpha", + "preserveaspectratio", + "primes", + "primitiveUnits", + "primitiveunits", + "product", + "profile", + "progress", + "prompt", + "prsubset", + "q", + "quotient", + "r", + "radialGradient", + "radialgradient", + "radiogroup", + "radius", + "rationals", + "readonly", + "real", + "reals", + "rect", + "refX", + "refY", + "refx", + "refy", + "rel", + "reln", + "rem", + "rendering-intent", + "repeat", + "repeat-max", + "repeat-min", + "repeat-start", + "repeat-template", + "repeatCount", + "repeatDur", + "repeatcount", + "repeatdur", + "replace", + "required", + "requiredExtensions", + "requiredFeatures", + "requiredextensions", + "requiredfeatures", + "restart", + "result", + "rev", + "role", + "root", + "rotate", + "rowalign", + "rowlines", + "rows", + "rowspacing", + "rowspan", + "rquote", + "rspace", + "ruby", + "rule", + "rules", + "rx", + "ry", + "s", + "samp", + "sandbox", + "scalarproduct", + "scale", + "scheme", + "scope", + "scoped", + "scriptlevel", + "scriptminsize", + "scriptsizemultiplier", + "scrolldelay", + "scrolling", + "sdev", + "seamless", + "sec", + "sech", + "section", + "seed", + "selected", + "selection", + "selector", + "semantics", + "sep", + "separator", + "separators", + "set", + "setdiff", + "shape", + "shape-rendering", + "show", + "sin", + "sinh", + "size", + "slope", + "small", + "solidcolor", + "space", + "spacing", + "span", + "specification", + "specularConstant", + "specularExponent", + "specularconstant", + "specularexponent", + "speed", + "spreadMethod", + "spreadmethod", + "src", + "srcdoc", + "standby", + "start", + "startOffset", + "startoffset", + "stdDeviation", + "stddeviation", + "stemh", + "stemv", + "step", + "stitchTiles", + "stitchtiles", + "stop", + "stop-color", + "stop-opacity", + "stretchy", + "strike", + "strikethrough-position", + "strikethrough-thickness", + "string", + "stroke", + "stroke-dasharray", + "stroke-dashoffset", + "stroke-linecap", + "stroke-linejoin", + "stroke-miterlimit", + "stroke-opacity", + "stroke-width", + "strong", + "sub", + "subscriptshift", + "subset", + "sum", + "summary", + "sup", + "superscriptshift", + "surfaceScale", + "surfacescale", + "switch", + "symbol", + "symmetric", + "systemLanguage", + "systemlanguage", + "tabindex", + "tableValues", + "tablevalues", + "tan", + "tanh", + "target", + "targetX", + "targetY", + "targetx", + "targety", + "tbreak", + "tendsto", + "text", + "text-anchor", + "text-decoration", + "text-rendering", + "textLength", + "textPath", + "textlength", + "textpath", + "thickmathspace", + "thinmathspace", + "time", + "times", + "to", + "transform", + "transpose", + "tref", + "true", + "tspan", + "tt", + "type", + "u", + "u1", + "u2", + "ul", + "underline-position", + "underline-thickness", + "unicode", + "unicode-bidi", + "unicode-range", + "union", + "units-per-em", + "unselectable", + "uplimit", + "use", + "usemap", + "v-alphabetic", + "v-hanging", + "v-ideographic", + "v-mathematical", + "valign", + "value", + "values", + "valuetype", + "var", + "variance", + "vector", + "vectorproduct", + "version", + "vert-adv-y", + "vert-origin-x", + "vert-origin-y", + "verythickmathspace", + "verythinmathspace", + "veryverythickmathspace", + "veryverythinmathspace", + "video", + "view", + "viewBox", + "viewTarget", + "viewbox", + "viewtarget", + "visibility", + "vkern", + "vlink", + "vspace", + "wbr", + "when", + "width", + "widths", + "word-spacing", + "wrap", + "writing-mode", + "x", + "x-height", + "x1", + "x2", + "xChannelSelector", + "xchannelselector", + "xlink:actuate", + "xlink:arcrole", + "xlink:href", + "xlink:role", + "xlink:show", + "xlink:type", + "xml:base", + "xml:lang", + "xml:space", + "xmlns", + "xmlns:xlink", + "xor", + "xref", + "y", + "y1", + "y2", + "yChannelSelector", + "ychannelselector", + "z", + "zoomAndPan", + "zoomandpan", +]; diff --git a/macros/src/lib.rs b/macros/src/lib.rs new file mode 100644 index 0000000..29520f6 --- /dev/null +++ b/macros/src/lib.rs @@ -0,0 +1,136 @@ +// Copyright 2014 The Servo Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![crate_name="string_cache_macros"] +#![crate_type="dylib"] + +#![feature(macro_rules, plugin_registrar, quote, managed_boxes)] +#![allow(unused_imports)] // for quotes + +extern crate syntax; +extern crate rustc; + +use rustc::plugin::Registry; +use syntax::codemap::Span; +use syntax::ast::{TokenTree, TTTok}; +use syntax::ast; +use syntax::ext::base::{ExtCtxt, MacResult, MacExpr}; +use syntax::parse::token::{get_ident, InternedString, LIT_STR, IDENT}; + +use std::iter::Chain; +use std::slice::{Items, Found, NotFound}; +use std::gc::Gc; + +mod data; + +#[path="../../shared/static_atom.rs"] +mod static_atom; + +macro_rules! bail ( ($cx:expr, $sp:expr, $msg:expr) => ({ + $cx.span_err($sp, $msg); + return ::syntax::ext::base::DummyResult::any($sp); +})) + +macro_rules! bail_if ( ($e:expr, $cx:expr, $sp:expr, $msg:expr) => ( + if $e { bail!($cx, $sp, $msg) } +)) + +macro_rules! expect ( ($cx:expr, $sp:expr, $e:expr, $msg:expr) => ( + match $e { + Some(x) => x, + None => bail!($cx, $sp, $msg), + } +)) + +fn all_atoms<'a>() -> Chain, Items<'a, &'static str>> { + data::fast_set_atoms.iter().chain(data::other_atoms.iter()) +} + +// Build a PhfMap yielding static atom IDs. +// Takes no arguments. +fn expand_static_atom_map(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { + bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); + let tts: Vec = all_atoms().enumerate().flat_map(|(i, k)| { + let i = i as u32; + (quote_tokens!(&mut *cx, $k => $i,)).move_iter() + }).collect(); + MacExpr::new(quote_expr!(&mut *cx, phf_map!($tts))) +} + +// Build the array to convert IDs back to strings. +// FIXME: share storage with the PhfMap keys. +fn expand_static_atom_array(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { + bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_array!()"); + let tts: Vec = all_atoms().flat_map(|k| + quote_tokens!(&mut *cx, $k,).move_iter() + ).collect(); + MacExpr::new(quote_expr!(&mut *cx, &[$tts])) +} + +fn atom_tok_to_str(t: &TokenTree) -> Option { + Some(get_ident(match *t { + TTTok(_, IDENT(s, _)) => s, + TTTok(_, LIT_STR(s)) => s.ident(), + _ => return None, + })) +} + +fn find_atom(name: InternedString) -> Option { + // Use bsearch instead of bsearch_elem because of type mismatch + // between &'t str and &'static str. + match data::fast_set_atoms.binary_search(|&x| x.cmp(&name.get())) { + Found(i) => Some(i), + NotFound(_) => match data::other_atoms.binary_search(|&x| x.cmp(&name.get())) { + Found(i) => Some(i+64), + NotFound(_) => None, + }, + } +} + +struct AtomResult { + expr: Gc, + pat: Gc, +} + +impl MacResult for AtomResult { + fn make_expr(&self) -> Option> { + Some(self.expr) + } + + fn make_pat(&self) -> Option> { + Some(self.pat) + } +} + +// Translate `atom!(title)` or `atom!("font-weight")` into an `Atom` constant or pattern. +fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { + let usage = "Usage: atom!(html) or atom!(\"font-weight\")"; + let name = match tt { + [ref t] => expect!(cx, sp, atom_tok_to_str(t), usage), + _ => bail!(cx, sp, usage), + }; + + let i = expect!(cx, sp, find_atom(name.clone()), + format!("Unknown static atom {:s}", name.get()).as_slice()); + + let data = static_atom::add_tag(i as u32); + + box AtomResult { + expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), + pat: quote_pat!(&mut *cx, ::string_cache::atom::Atom { data: $data }), + } as Box +} + +// NB: This needs to be public or we get a linker error. +#[plugin_registrar] +pub fn plugin_registrar(reg: &mut Registry) { + reg.register_macro("static_atom_map", expand_static_atom_map); + reg.register_macro("static_atom_array", expand_static_atom_array); + reg.register_macro("atom", expand_atom); +} diff --git a/shared/static_atom.rs b/shared/static_atom.rs new file mode 100644 index 0000000..e50e669 --- /dev/null +++ b/shared/static_atom.rs @@ -0,0 +1,28 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! This code is compiled into both the macros crate and the run-time +//! library, in order to guarantee consistency. + +#![allow(dead_code)] + +pub static STATIC_TAG: u8 = 2; + +static STATIC_SHIFT_BITS: uint = 32; + +#[inline(always)] +pub fn add_tag(atom_id: u32) -> u64 { + (atom_id as u64 << STATIC_SHIFT_BITS) | (STATIC_TAG as u64) +} + +/// Undefined to call this on a non-static atom! +#[inline(always)] +pub fn remove_tag(atom_data: u64) -> u32 { + (atom_data >> STATIC_SHIFT_BITS) as u32 +} diff --git a/src/atom.rs b/src/atom.rs index 036ae16..07365e1 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -7,7 +7,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use static_atoms::atom::StaticAtom; +use phf::PhfMap; + use std::fmt; use std::hash::{Hash, Hasher, sip}; use std::mem; @@ -20,6 +21,8 @@ use sync::Mutex; use sync::one::{Once, ONCE_INIT}; use std::rt::heap; +#[path="../shared/static_atom.rs"] +mod static_atom; // Inline atoms are probably buggy on big-endian architectures. #[allow(dead_code)] @@ -29,9 +32,12 @@ static IS_LITTLE_ENDIAN: bool = cfg!(target_endian = "little"); static mut global_string_cache_ptr: *mut Mutex = 0 as *mut _; -static STATIC_SHIFT_BITS: uint = 32; static ENTRY_ALIGNMENT: uint = 16; +// Macro-generated tables for static atoms. +static static_atom_map: PhfMap<&'static str, u32> = static_atom_map!(); +static static_atom_array: &'static [&'static str] = static_atom_array!(); + // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. #[repr(u8)] @@ -39,7 +45,7 @@ static ENTRY_ALIGNMENT: uint = 16; enum AtomType { Dynamic = 0, Inline = 1, - Static = 2, + Static = static_atom::STATIC_TAG, } struct StringCache { @@ -139,19 +145,21 @@ impl StringCache { #[deriving(Eq, Hash, PartialEq)] pub struct Atom { - data: u64 + /// This field is public so that the `atom!()` macro can use it. + /// You should not otherwise access this field. + pub data: u64, } impl Atom { - pub fn from_static(atom_id: StaticAtom) -> Atom { + pub fn from_static(atom_id: u32) -> Atom { Atom { - data: (atom_id as u64 << STATIC_SHIFT_BITS) | (Static as u64) + data: static_atom::add_tag(atom_id), } } pub fn from_slice(string_to_add: &str) -> Atom { - match from_str::(string_to_add) { - Some(atom_id) => { + match static_atom_map.find_equiv(&string_to_add) { + Some(&atom_id) => { Atom::from_static(atom_id) }, None => { @@ -175,8 +183,8 @@ impl Atom { } }, Static => { - let key: StaticAtom = unsafe { mem::transmute((self.data >> STATIC_SHIFT_BITS) as u32) }; - key.as_slice() + *static_atom_array.get(static_atom::remove_tag(self.data) as uint) + .expect("bad static atom") }, Dynamic => { let hash_value = unsafe { &*(self.data as *const StringCacheEntry) }; @@ -246,12 +254,6 @@ impl Clone for Atom { } } -impl Equiv for Atom { - fn equiv(&self, atom_id: &StaticAtom) -> bool { - self.get_type() == Static && self.data >> STATIC_SHIFT_BITS == *atom_id as u64 - } -} - impl Drop for Atom { fn drop(&mut self) { match self.get_type() { @@ -296,7 +298,6 @@ impl Ord for Atom { mod tests { use std::task::spawn; use super::{Atom, Static, Inline, Dynamic}; - use static_atoms::atom; use test::Bencher; #[test] @@ -328,7 +329,11 @@ mod tests { let s1 = Atom::from_slice("id"); assert!(s1.get_type_and_inline_len() == (Static, 0)); - let i0 = Atom::from_slice("z"); + let s1 = Atom::from_slice("body"); + assert!(s1.get_type_and_inline_len() == (Static, 0)); + + // "z" is a static atom + let i0 = Atom::from_slice("c"); assert!(i0.get_type_and_inline_len() == (Inline, 1)); let i1 = Atom::from_slice("zz"); @@ -430,15 +435,6 @@ mod tests { assert!(i0 != d0); } - #[test] - fn test_equiv() { - let s0 = Atom::from_slice("div"); - assert!(s0.equiv(&atom::Div)); - - let s1 = Atom::from_slice("Div"); - assert!(!s1.equiv(&atom::Div)); - } - #[test] fn test_threads() { for _ in range(0u32, 100u32) { @@ -490,4 +486,32 @@ mod tests { } }); } + + #[test] + fn atom_macro() { + assert_eq!(atom!(body), Atom::from_slice("body")); + assert_eq!(atom!("body"), Atom::from_slice("body")); + assert_eq!(atom!("font-weight"), Atom::from_slice("font-weight")); + } + + #[test] + fn match_atom() { + assert_eq!(2u, match Atom::from_slice("head") { + atom!(br) => 1u, + atom!(html) | atom!(head) => 2u, + _ => 3u, + }); + + assert_eq!(3u, match Atom::from_slice("body") { + atom!(br) => 1u, + atom!(html) | atom!(head) => 2u, + _ => 3u, + }); + + assert_eq!(3u, match Atom::from_slice("zzzzzz") { + atom!(br) => 1u, + atom!(html) | atom!(head) => 2u, + _ => 3u, + }); + } } diff --git a/src/lib.rs b/src/lib.rs index 0f958fc..874c42b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,5 +22,16 @@ extern crate test; extern crate phf_mac; extern crate phf; +#[phase(plugin)] +extern crate string_cache_macros; + pub mod atom; -pub mod static_atoms; + +// A private module so that macro-expanded idents like +// `::string_cache::atom::Atom` will also work in this crate. +// +// `libstd` uses the same trick. +#[doc(hidden)] +mod string_cache { + pub use atom; +} diff --git a/src/static_atoms.rs b/src/static_atoms.rs deleted file mode 100644 index 6fc5e41..0000000 --- a/src/static_atoms.rs +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! A list of static atoms that are pre-hashed at compile time. - -pub mod atom { - use phf::PhfOrderedMap; - use std::from_str::FromStr; - - #[repr(u32)] - #[deriving(Eq, PartialEq)] - pub enum StaticAtom { - EmptyString, - Id, - Class, - Href, - Style, - Span, - Width, - Height, - Type, - Data, - New, - Name, - Src, - Rel, - Div, - } - - static STATIC_ATOMS: PhfOrderedMap<&'static str, StaticAtom> = phf_ordered_map!( - "" => EmptyString, - "id" => Id, - "class" => Class, - "href" => Href, - "style" => Style, - "span" => Span, - "width" => Width, - "height" => Height, - "type" => Type, - "data" => Data, - "new" => New, - "name" => Name, - "src" => Src, - "rel" => Rel, - "div" => Div, - ); - - impl FromStr for StaticAtom { - #[inline] - fn from_str(string: &str) -> Option { - match STATIC_ATOMS.find_equiv(&string) { - None => None, - Some(&k) => Some(k) - } - } - } - - impl StaticAtom { - pub fn as_slice(&self) -> &'static str { - let &(string, _) = STATIC_ATOMS.entries().idx(*self as uint).unwrap(); - string - } - } -} From 692123495ad27a2bce16880786d97e992a82239d Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 26 Aug 2014 15:32:42 -0700 Subject: [PATCH 003/379] Re-export atom::Atom at top level --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 874c42b..a5bd608 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,6 +25,8 @@ extern crate phf; #[phase(plugin)] extern crate string_cache_macros; +pub use atom::Atom; + pub mod atom; // A private module so that macro-expanded idents like From 3d940fd2d6ce1786d92fc950340cbc3fac8c2d5e Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 26 Aug 2014 16:22:37 -0700 Subject: [PATCH 004/379] Fixes for Servo's older Rust --- macros/src/lib.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 29520f6..24e160e 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -24,7 +24,7 @@ use syntax::ext::base::{ExtCtxt, MacResult, MacExpr}; use syntax::parse::token::{get_ident, InternedString, LIT_STR, IDENT}; use std::iter::Chain; -use std::slice::{Items, Found, NotFound}; +use std::slice::Items; use std::gc::Gc; mod data; @@ -84,13 +84,9 @@ fn atom_tok_to_str(t: &TokenTree) -> Option { fn find_atom(name: InternedString) -> Option { // Use bsearch instead of bsearch_elem because of type mismatch // between &'t str and &'static str. - match data::fast_set_atoms.binary_search(|&x| x.cmp(&name.get())) { - Found(i) => Some(i), - NotFound(_) => match data::other_atoms.binary_search(|&x| x.cmp(&name.get())) { - Found(i) => Some(i+64), - NotFound(_) => None, - }, - } + data::fast_set_atoms.bsearch(|&x| x.cmp(&name.get())).or_else(|| + data::other_atoms.bsearch(|&x| x.cmp(&name.get())).map(|i| i+64)) + } struct AtomResult { From 6ac3bf4f58426ab1e82401c7d59e9c485635cb4b Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Fri, 5 Sep 2014 13:47:50 -0700 Subject: [PATCH 005/379] update to make FNV hashing work --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index a5bd608..3e50408 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![feature(phase, macro_rules)] +#![feature(phase, macro_rules, default_type_params)] extern crate sync; extern crate debug; From b9a8cb63dbe2c76b1387c7f786c35c3406a9ed22 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 1 Sep 2014 17:44:03 -0700 Subject: [PATCH 006/379] Upgrade to rustc 0.12.0-pre (0bdac78da 2014-09-01 21:31:00 +0000) --- macros/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 24e160e..ef563e8 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -54,7 +54,7 @@ fn all_atoms<'a>() -> Chain, Items<'a, &'static str>> { // Build a PhfMap yielding static atom IDs. // Takes no arguments. -fn expand_static_atom_map(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { +fn expand_static_atom_map(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); let tts: Vec = all_atoms().enumerate().flat_map(|(i, k)| { let i = i as u32; @@ -65,7 +65,7 @@ fn expand_static_atom_map(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box Box { +fn expand_static_atom_array(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_array!()"); let tts: Vec = all_atoms().flat_map(|k| quote_tokens!(&mut *cx, $k,).move_iter() @@ -105,7 +105,7 @@ impl MacResult for AtomResult { } // Translate `atom!(title)` or `atom!("font-weight")` into an `Atom` constant or pattern. -fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { +fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { let usage = "Usage: atom!(html) or atom!(\"font-weight\")"; let name = match tt { [ref t] => expect!(cx, sp, atom_tok_to_str(t), usage), From 357e023d81cbf951eff3f549ec5ada375881c0c8 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Wed, 27 Aug 2014 20:10:50 -0700 Subject: [PATCH 007/379] Build with #![no_std] This helps us make a C-friendly library, which is important for html5ever. --- src/atom.rs | 35 +++++++++++++++++++++-------------- src/lib.rs | 21 ++++++++++++++++++++- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 07365e1..760b2bb 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -7,19 +7,22 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use core::prelude::*; + use phf::PhfMap; -use std::fmt; -use std::hash::{Hash, Hasher, sip}; -use std::mem; -use std::ptr; -use std::slice; -use std::slice::bytes; -use std::str; -use std::sync::atomics::{AtomicInt, SeqCst}; +use core::fmt; +use core::mem; +use core::ptr; +use core::slice; +use core::slice::bytes; +use core::str; +use core::atomic::{AtomicInt, SeqCst}; +use alloc::heap; +use collections::string::String; +use collections::hash::{Hash, Hasher, sip}; use sync::Mutex; use sync::one::{Once, ONCE_INIT}; -use std::rt::heap; #[path="../shared/static_atom.rs"] mod static_atom; @@ -66,7 +69,7 @@ impl StringCacheEntry { next_in_bucket: next, hash: hash, ref_count: AtomicInt::new(1), - string: string_to_add.to_string(), + string: String::from_str(string_to_add), } } } @@ -296,9 +299,13 @@ impl Ord for Atom { #[cfg(test)] mod tests { + use core::prelude::*; + use std::task::spawn; use super::{Atom, Static, Inline, Dynamic}; use test::Bencher; + use collections::MutableSeq; + use collections::vec::Vec; #[test] fn test_as_slice() { @@ -447,8 +454,8 @@ mod tests { #[bench] fn bench_strings(b: &mut Bencher) { - let mut strings0 = vec!(); - let mut strings1 = vec!(); + let mut strings0 = Vec::new(); + let mut strings1 = Vec::new(); for _ in range(0u32, 1000u32) { strings0.push("a"); @@ -468,8 +475,8 @@ mod tests { #[bench] fn bench_atoms(b: &mut Bencher) { - let mut atoms0 = vec!(); - let mut atoms1 = vec!(); + let mut atoms0 = Vec::new(); + let mut atoms1 = Vec::new(); for _ in range(0u32, 1000u32) { atoms0.push(Atom::from_slice("a")); diff --git a/src/lib.rs b/src/lib.rs index 3e50408..5100df8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,14 +10,26 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![feature(phase, macro_rules, default_type_params)] +#![feature(phase, macro_rules, default_type_params, globs)] +#![no_std] +#[phase(plugin, link)] +extern crate core; + +extern crate alloc; +extern crate collections; extern crate sync; extern crate debug; #[cfg(test)] extern crate test; +#[cfg(test)] +extern crate native; + +#[cfg(test)] +extern crate std; + #[phase(plugin)] extern crate phf_mac; extern crate phf; @@ -37,3 +49,10 @@ pub mod atom; mod string_cache { pub use atom; } + +// For macros and deriving. +#[cfg(not(test))] +mod std { + pub use core::{cmp, fmt}; + pub use collections::hash; +} From 86d0017234ddb4c3531418441eaea47ad96ab55f Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Wed, 3 Sep 2014 15:50:31 -0700 Subject: [PATCH 008/379] Use a single structure for both directions of static atom mapping --- macros/src/lib.rs | 29 +++++++---------------------- src/atom.rs | 15 +++++++-------- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index ef563e8..03db560 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -48,29 +48,15 @@ macro_rules! expect ( ($cx:expr, $sp:expr, $e:expr, $msg:expr) => ( } )) -fn all_atoms<'a>() -> Chain, Items<'a, &'static str>> { - data::fast_set_atoms.iter().chain(data::other_atoms.iter()) -} - -// Build a PhfMap yielding static atom IDs. +// Build a PhfOrderedSet of static atoms. // Takes no arguments. -fn expand_static_atom_map(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { +fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); - let tts: Vec = all_atoms().enumerate().flat_map(|(i, k)| { - let i = i as u32; - (quote_tokens!(&mut *cx, $k => $i,)).move_iter() + let all_atoms = data::fast_set_atoms.iter().chain(data::other_atoms.iter()); + let tts: Vec = all_atoms.flat_map(|k| { + (quote_tokens!(&mut *cx, $k,)).move_iter() }).collect(); - MacExpr::new(quote_expr!(&mut *cx, phf_map!($tts))) -} - -// Build the array to convert IDs back to strings. -// FIXME: share storage with the PhfMap keys. -fn expand_static_atom_array(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_array!()"); - let tts: Vec = all_atoms().flat_map(|k| - quote_tokens!(&mut *cx, $k,).move_iter() - ).collect(); - MacExpr::new(quote_expr!(&mut *cx, &[$tts])) + MacExpr::new(quote_expr!(&mut *cx, phf_ordered_set!($tts))) } fn atom_tok_to_str(t: &TokenTree) -> Option { @@ -126,7 +112,6 @@ fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box = 0 as *mut _; static ENTRY_ALIGNMENT: uint = 16; -// Macro-generated tables for static atoms. -static static_atom_map: PhfMap<&'static str, u32> = static_atom_map!(); -static static_atom_array: &'static [&'static str] = static_atom_array!(); +// Macro-generated table for static atoms. +static static_atom_set: PhfOrderedSet<&'static str> = static_atom_set!(); // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. @@ -161,9 +160,9 @@ impl Atom { } pub fn from_slice(string_to_add: &str) -> Atom { - match static_atom_map.find_equiv(&string_to_add) { - Some(&atom_id) => { - Atom::from_static(atom_id) + match static_atom_set.find_index_equiv(&string_to_add) { + Some(atom_id) => { + Atom::from_static(atom_id as u32) }, None => { if string_to_add.len() < 8 { @@ -186,7 +185,7 @@ impl Atom { } }, Static => { - *static_atom_array.get(static_atom::remove_tag(self.data) as uint) + *static_atom_set.iter().idx(static_atom::remove_tag(self.data) as uint) .expect("bad static atom") }, Dynamic => { From 7c923bd0bc2f35c8a246ece994f538bc0a454978 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Wed, 3 Sep 2014 16:09:08 -0700 Subject: [PATCH 009/379] Eliminate the requirement for macros/src/data.rs to remain sorted --- macros/Cargo.toml | 3 +++ macros/src/data.rs | 25 +++++++++++-------------- macros/src/lib.rs | 29 +++++++++++++++++------------ 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/macros/Cargo.toml b/macros/Cargo.toml index 7fb85ae..1030717 100644 --- a/macros/Cargo.toml +++ b/macros/Cargo.toml @@ -8,3 +8,6 @@ authors = [ "The Servo Project Developers" ] name = "string_cache_macros" plugin = true + +[dependencies.lazy_static] +git = "https://github.com/Kimundi/lazy-static.rs" diff --git a/macros/src/data.rs b/macros/src/data.rs index ec4b975..22e4313 100644 --- a/macros/src/data.rs +++ b/macros/src/data.rs @@ -7,15 +7,15 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// The first 64 atoms are special: we can quickly check membership -// in sets of these, using a bitmask. This includes every tag that -// appears in more than one set in the tree builder spec, plus a -// few others (arbitrarily chosen). -// -// FIXME(kmc): check if this is really true with the packed tag bits -// -// This list must remain sorted. -pub static fast_set_atoms: [&'static str, ..64] = [ +pub static atoms: &'static [&'static str] = &[ + + // The first 64 atoms are special: we can quickly check membership + // in sets of these, using a bitmask. This includes every tag that + // appears in more than one set in the tree builder spec, plus a + // few others (arbitrarily chosen). + // + // FIXME(kmc): check if this is really true with the packed tag bits + "a", "address", "applet", @@ -80,12 +80,9 @@ pub static fast_set_atoms: [&'static str, ..64] = [ "tr", "track", "xmp", -]; -// The rest. -// -// This list must remain sorted. -pub static other_atoms: &'static [&'static str] = &[ + // End of first 64 atoms. + "", "abbr", "abs", diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 03db560..2e9a5b6 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -10,12 +10,15 @@ #![crate_name="string_cache_macros"] #![crate_type="dylib"] -#![feature(macro_rules, plugin_registrar, quote, managed_boxes)] +#![feature(macro_rules, plugin_registrar, quote, managed_boxes, phase)] #![allow(unused_imports)] // for quotes extern crate syntax; extern crate rustc; +#[phase(plugin)] +extern crate lazy_static; + use rustc::plugin::Registry; use syntax::codemap::Span; use syntax::ast::{TokenTree, TTTok}; @@ -24,8 +27,8 @@ use syntax::ext::base::{ExtCtxt, MacResult, MacExpr}; use syntax::parse::token::{get_ident, InternedString, LIT_STR, IDENT}; use std::iter::Chain; -use std::slice::Items; use std::gc::Gc; +use std::collections::HashMap; mod data; @@ -52,8 +55,7 @@ macro_rules! expect ( ($cx:expr, $sp:expr, $e:expr, $msg:expr) => ( // Takes no arguments. fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); - let all_atoms = data::fast_set_atoms.iter().chain(data::other_atoms.iter()); - let tts: Vec = all_atoms.flat_map(|k| { + let tts: Vec = data::atoms.iter().flat_map(|k| { (quote_tokens!(&mut *cx, $k,)).move_iter() }).collect(); MacExpr::new(quote_expr!(&mut *cx, phf_ordered_set!($tts))) @@ -67,12 +69,15 @@ fn atom_tok_to_str(t: &TokenTree) -> Option { })) } -fn find_atom(name: InternedString) -> Option { - // Use bsearch instead of bsearch_elem because of type mismatch - // between &'t str and &'static str. - data::fast_set_atoms.bsearch(|&x| x.cmp(&name.get())).or_else(|| - data::other_atoms.bsearch(|&x| x.cmp(&name.get())).map(|i| i+64)) - +// Build a map from atoms to IDs for use in implementing the atom!() macro. +lazy_static! { + static ref STATIC_ATOM_MAP: HashMap<&'static str, uint> = { + let mut m = HashMap::new(); + for (i, x) in data::atoms.iter().enumerate() { + m.insert(*x, i); + } + m + }; } struct AtomResult { @@ -98,10 +103,10 @@ fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box bail!(cx, sp, usage), }; - let i = expect!(cx, sp, find_atom(name.clone()), + let i = expect!(cx, sp, STATIC_ATOM_MAP.find_equiv(&name.get()), format!("Unknown static atom {:s}", name.get()).as_slice()); - let data = static_atom::add_tag(i as u32); + let data = static_atom::add_tag(*i as u32); box AtomResult { expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), From ee9250e3524b767a3f5af985cef58fb1dfba0704 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Wed, 3 Sep 2014 16:16:00 -0700 Subject: [PATCH 010/379] Move crate-level macros stuff into its own file --- macros/src/{ => atom}/data.rs | 0 macros/src/atom/mod.rs | 87 +++++++++++++++++++++++++++++++++++ macros/src/lib.rs | 81 ++------------------------------ 3 files changed, 90 insertions(+), 78 deletions(-) rename macros/src/{ => atom}/data.rs (100%) create mode 100644 macros/src/atom/mod.rs diff --git a/macros/src/data.rs b/macros/src/atom/data.rs similarity index 100% rename from macros/src/data.rs rename to macros/src/atom/data.rs diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs new file mode 100644 index 0000000..885308a --- /dev/null +++ b/macros/src/atom/mod.rs @@ -0,0 +1,87 @@ +// Copyright 2014 The Servo Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use syntax::codemap::Span; +use syntax::ast::{TokenTree, TTTok}; +use syntax::ast; +use syntax::ext::base::{ExtCtxt, MacResult, MacExpr}; +use syntax::parse::token::{get_ident, InternedString, LIT_STR, IDENT}; + +use std::iter::Chain; +use std::slice::{Items, Found, NotFound}; +use std::gc::Gc; +use std::collections::HashMap; + +mod data; + +#[path="../../../shared/static_atom.rs"] +mod static_atom; + +// Build a PhfOrderedSet of static atoms. +// Takes no arguments. +pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { + bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); + let tts: Vec = data::atoms.iter().flat_map(|k| { + (quote_tokens!(&mut *cx, $k,)).move_iter() + }).collect(); + MacExpr::new(quote_expr!(&mut *cx, phf_ordered_set!($tts))) +} + +fn atom_tok_to_str(t: &TokenTree) -> Option { + Some(get_ident(match *t { + TTTok(_, IDENT(s, _)) => s, + TTTok(_, LIT_STR(s)) => s.ident(), + _ => return None, + })) +} + +// Build a map from atoms to IDs for use in implementing the atom!() macro. +lazy_static! { + static ref STATIC_ATOM_MAP: HashMap<&'static str, uint> = { + let mut m = HashMap::new(); + for (i, x) in data::atoms.iter().enumerate() { + m.insert(*x, i); + } + m + }; +} + +struct AtomResult { + expr: Gc, + pat: Gc, +} + +impl MacResult for AtomResult { + fn make_expr(&self) -> Option> { + Some(self.expr) + } + + fn make_pat(&self) -> Option> { + Some(self.pat) + } +} + +// Translate `atom!(title)` or `atom!("font-weight")` into an `Atom` constant or pattern. +pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { + let usage = "Usage: atom!(html) or atom!(\"font-weight\")"; + let name = match tt { + [ref t] => expect!(cx, sp, atom_tok_to_str(t), usage), + _ => bail!(cx, sp, usage), + }; + + let i = expect!(cx, sp, STATIC_ATOM_MAP.find_equiv(&name.get()), + format!("Unknown static atom {:s}", name.get()).as_slice()); + + let data = static_atom::add_tag(*i as u32); + + box AtomResult { + expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), + pat: quote_pat!(&mut *cx, ::string_cache::atom::Atom { data: $data }), + } as Box +} diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 2e9a5b6..6e0322a 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -20,20 +20,6 @@ extern crate rustc; extern crate lazy_static; use rustc::plugin::Registry; -use syntax::codemap::Span; -use syntax::ast::{TokenTree, TTTok}; -use syntax::ast; -use syntax::ext::base::{ExtCtxt, MacResult, MacExpr}; -use syntax::parse::token::{get_ident, InternedString, LIT_STR, IDENT}; - -use std::iter::Chain; -use std::gc::Gc; -use std::collections::HashMap; - -mod data; - -#[path="../../shared/static_atom.rs"] -mod static_atom; macro_rules! bail ( ($cx:expr, $sp:expr, $msg:expr) => ({ $cx.span_err($sp, $msg); @@ -51,72 +37,11 @@ macro_rules! expect ( ($cx:expr, $sp:expr, $e:expr, $msg:expr) => ( } )) -// Build a PhfOrderedSet of static atoms. -// Takes no arguments. -fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); - let tts: Vec = data::atoms.iter().flat_map(|k| { - (quote_tokens!(&mut *cx, $k,)).move_iter() - }).collect(); - MacExpr::new(quote_expr!(&mut *cx, phf_ordered_set!($tts))) -} - -fn atom_tok_to_str(t: &TokenTree) -> Option { - Some(get_ident(match *t { - TTTok(_, IDENT(s, _)) => s, - TTTok(_, LIT_STR(s)) => s.ident(), - _ => return None, - })) -} - -// Build a map from atoms to IDs for use in implementing the atom!() macro. -lazy_static! { - static ref STATIC_ATOM_MAP: HashMap<&'static str, uint> = { - let mut m = HashMap::new(); - for (i, x) in data::atoms.iter().enumerate() { - m.insert(*x, i); - } - m - }; -} - -struct AtomResult { - expr: Gc, - pat: Gc, -} - -impl MacResult for AtomResult { - fn make_expr(&self) -> Option> { - Some(self.expr) - } - - fn make_pat(&self) -> Option> { - Some(self.pat) - } -} - -// Translate `atom!(title)` or `atom!("font-weight")` into an `Atom` constant or pattern. -fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - let usage = "Usage: atom!(html) or atom!(\"font-weight\")"; - let name = match tt { - [ref t] => expect!(cx, sp, atom_tok_to_str(t), usage), - _ => bail!(cx, sp, usage), - }; - - let i = expect!(cx, sp, STATIC_ATOM_MAP.find_equiv(&name.get()), - format!("Unknown static atom {:s}", name.get()).as_slice()); - - let data = static_atom::add_tag(*i as u32); - - box AtomResult { - expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), - pat: quote_pat!(&mut *cx, ::string_cache::atom::Atom { data: $data }), - } as Box -} +mod atom; // NB: This needs to be public or we get a linker error. #[plugin_registrar] pub fn plugin_registrar(reg: &mut Registry) { - reg.register_macro("static_atom_set", expand_static_atom_set); - reg.register_macro("atom", expand_atom); + reg.register_macro("static_atom_set", atom::expand_static_atom_set); + reg.register_macro("atom", atom::expand_atom); } From dcc418709bd3beb5568ee4c20090a196c21b490f Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Wed, 3 Sep 2014 16:52:38 -0700 Subject: [PATCH 011/379] Statically intern namespaces, and provide a shorthand macro We can't make these simple Atom constants, because the Atom type has a destructor. --- macros/src/atom/data.rs | 9 ++++++++ macros/src/atom/mod.rs | 51 +++++++++++++++++++++++++++++++++++------ macros/src/lib.rs | 1 + src/atom.rs | 17 ++++++++++++++ 4 files changed, 71 insertions(+), 7 deletions(-) diff --git a/macros/src/atom/data.rs b/macros/src/atom/data.rs index 22e4313..5ab7a76 100644 --- a/macros/src/atom/data.rs +++ b/macros/src/atom/data.rs @@ -84,6 +84,15 @@ pub static atoms: &'static [&'static str] = &[ // End of first 64 atoms. "", + + // XML namespaces known to the HTML syntax spec + "http://www.w3.org/1999/xhtml", + "http://www.w3.org/XML/1998/namespace", + "http://www.w3.org/2000/xmlns/", + "http://www.w3.org/1999/xlink", + "http://www.w3.org/2000/svg", + "http://www.w3.org/1998/Math/MathML", + "abbr", "abs", "accent", diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 885308a..cb85d92 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -17,6 +17,7 @@ use std::iter::Chain; use std::slice::{Items, Found, NotFound}; use std::gc::Gc; use std::collections::HashMap; +use std::ascii::AsciiExt; mod data; @@ -67,6 +68,18 @@ impl MacResult for AtomResult { } } +fn expand_atom_str(cx: &mut ExtCtxt, sp: Span, name: &str) -> Box { + let i = expect!(cx, sp, STATIC_ATOM_MAP.find_equiv(&name), + format!("Unknown static atom {:s}", name).as_slice()); + + let data = static_atom::add_tag(*i as u32); + + box AtomResult { + expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), + pat: quote_pat!(&mut *cx, ::string_cache::atom::Atom { data: $data }), + } as Box +} + // Translate `atom!(title)` or `atom!("font-weight")` into an `Atom` constant or pattern. pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { let usage = "Usage: atom!(html) or atom!(\"font-weight\")"; @@ -74,14 +87,38 @@ pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box expect!(cx, sp, atom_tok_to_str(t), usage), _ => bail!(cx, sp, usage), }; + expand_atom_str(cx, sp, name.get()) +} - let i = expect!(cx, sp, STATIC_ATOM_MAP.find_equiv(&name.get()), - format!("Unknown static atom {:s}", name.get()).as_slice()); +// Translate `ns!(HTML)` into `atom!("http://www.w3.org/1999/xhtml")`. +// The argument is ASCII-case-insensitive. +pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { + static all_ns: &'static [(&'static str, &'static str)] = [ + ("html", "http://www.w3.org/1999/xhtml"), + ("xml", "http://www.w3.org/XML/1998/namespace"), + ("xmlns", "http://www.w3.org/2000/xmlns/"), + ("xlink", "http://www.w3.org/1999/xlink"), + ("svg", "http://www.w3.org/2000/svg"), + ("mathml", "http://www.w3.org/1998/Math/MathML"), + ]; - let data = static_atom::add_tag(*i as u32); + let name = match tt { + [ref t] => atom_tok_to_str(t), + _ => None, + }; - box AtomResult { - expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), - pat: quote_pat!(&mut *cx, ::string_cache::atom::Atom { data: $data }), - } as Box + match name { + Some(name) => { + for &(short, url) in all_ns.iter() { + if short.eq_ignore_ascii_case(name.get()) { + return expand_atom_str(cx, sp, url); + } + } + } + None => (), + } + + let ns_names: Vec<&'static str> = all_ns.iter().map(|&(x, _)| x).collect(); + bail!(cx, sp, format!("Usage: ns!(HTML), case-insensitive. Known namespaces: {:s}", + ns_names.connect(" ")).as_slice()); } diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 6e0322a..d300001 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -44,4 +44,5 @@ mod atom; pub fn plugin_registrar(reg: &mut Registry) { reg.register_macro("static_atom_set", atom::expand_static_atom_set); reg.register_macro("atom", atom::expand_atom); + reg.register_macro("ns", atom::expand_ns); } diff --git a/src/atom.rs b/src/atom.rs index d24cf32..623db54 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -520,4 +520,21 @@ mod tests { _ => 3u, }); } + + #[test] + fn ns_macro() { + assert_eq!(ns!(html), atom!("http://www.w3.org/1999/xhtml")); + assert_eq!(ns!(xml), atom!("http://www.w3.org/XML/1998/namespace")); + assert_eq!(ns!(xmlns), atom!("http://www.w3.org/2000/xmlns/")); + assert_eq!(ns!(xlink), atom!("http://www.w3.org/1999/xlink")); + assert_eq!(ns!(svg), atom!("http://www.w3.org/2000/svg")); + assert_eq!(ns!(mathml), atom!("http://www.w3.org/1998/Math/MathML")); + + assert_eq!(ns!(HtMl), atom!("http://www.w3.org/1999/xhtml")); + assert_eq!(ns!(xMl), atom!("http://www.w3.org/XML/1998/namespace")); + assert_eq!(ns!(XmLnS), atom!("http://www.w3.org/2000/xmlns/")); + assert_eq!(ns!(xLiNk), atom!("http://www.w3.org/1999/xlink")); + assert_eq!(ns!(SvG), atom!("http://www.w3.org/2000/svg")); + assert_eq!(ns!(mAtHmL), atom!("http://www.w3.org/1998/Math/MathML")); + } } From c2939d54e910aad735441ecbf02922a4b92c80af Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Fri, 12 Sep 2014 13:17:41 -0700 Subject: [PATCH 012/379] Only take the lock when the ref count hits zero --- src/atom.rs | 61 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 07365e1..b279835 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -114,31 +114,31 @@ impl StringCache { let ptr = key as *mut StringCacheEntry; let value: &mut StringCacheEntry = unsafe { mem::transmute(ptr) }; - if value.ref_count.fetch_sub(1, SeqCst) == 1 { - let bucket_index = (value.hash & (self.buckets.len()-1) as u64) as uint; - - let mut current = self.buckets[bucket_index]; - let mut prev: *mut StringCacheEntry = ptr::mut_null(); - - while current != ptr::mut_null() { - if current == ptr { - if prev != ptr::mut_null() { - unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; - } else { - unsafe { self.buckets[bucket_index] = (*current).next_in_bucket }; - } - break; + debug_assert!(value.ref_count.load(SeqCst) == 0); + + let bucket_index = (value.hash & (self.buckets.len()-1) as u64) as uint; + + let mut current = self.buckets[bucket_index]; + let mut prev: *mut StringCacheEntry = ptr::mut_null(); + + while current != ptr::mut_null() { + if current == ptr { + if prev != ptr::mut_null() { + unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; + } else { + unsafe { self.buckets[bucket_index] = (*current).next_in_bucket }; } - prev = current; - unsafe { current = (*current).next_in_bucket }; + break; } - assert!(current != ptr::mut_null()); + prev = current; + unsafe { current = (*current).next_in_bucket }; + } + assert!(current != ptr::mut_null()); - unsafe { - ptr::read(ptr as *const StringCacheEntry); - heap::deallocate(ptr as *mut u8, - mem::size_of::(), ENTRY_ALIGNMENT); - } + unsafe { + ptr::read(ptr as *const StringCacheEntry); + heap::deallocate(ptr as *mut u8, + mem::size_of::(), ENTRY_ALIGNMENT); } } } @@ -239,6 +239,7 @@ impl Atom { } impl Clone for Atom { + #[inline] fn clone(&self) -> Atom { let atom_type = self.get_type(); match atom_type { @@ -255,11 +256,23 @@ impl Clone for Atom { } impl Drop for Atom { + #[inline] fn drop(&mut self) { + // Out of line to guide inlining. + fn drop_slow(this: &mut Atom) { + let mut string_cache = unsafe { + &*global_string_cache_ptr + }.lock(); + string_cache.remove(this.data); + } + match self.get_type() { Dynamic => { - let mut string_cache = unsafe { &*global_string_cache_ptr }.lock(); - string_cache.remove(self.data); + let ptr = self.data as *mut StringCacheEntry; + let value: &mut StringCacheEntry = unsafe { mem::transmute(ptr) }; + if value.ref_count.fetch_sub(1, SeqCst) == 1 { + drop_slow(self); + } }, _ => {} } From 7a7e9ea9075d6babdc85f3acbfb64ae7f12fe609 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Fri, 12 Sep 2014 18:07:39 -0700 Subject: [PATCH 013/379] Fix race in ref count manipulation in slow-drop --- src/atom.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index b279835..ff9c97d 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -263,7 +263,18 @@ impl Drop for Atom { let mut string_cache = unsafe { &*global_string_cache_ptr }.lock(); - string_cache.remove(this.data); + + // Note that we need a second check here. The problem we are trying to defend against + // is thus: + // + // 1. Thread B calls Atom::new() and takes mutex. + // 2. Thread A calls drop(), ref count drops to 0. + // 3. Thread B bumps ref count to 1. + // + // In this case we need thread A to perform a separate check. + if value.ref_count.fetch(SeqCst) == 0 { + string_cache.remove(this.data); + } } match self.get_type() { From b8bd87f2843d675a4052bf704959fc0d2fa0198d Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 16 Sep 2014 10:58:02 -0700 Subject: [PATCH 014/379] Fix race in ref count manipulation harder --- src/atom.rs | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index ff9c97d..844d4e3 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -92,7 +92,24 @@ impl StringCache { ptr = value.next_in_bucket; } - if ptr == ptr::mut_null() { + let mut should_add = false; + if ptr != ptr::mut_null() { + unsafe { + if (*ptr).ref_count.fetch_add(1, SeqCst) == 0 { + // Uh-oh. The pointer's reference count was zero, which means someone may try + // to free it. (Naive attempts to defend against this, for example having the + // destructor check to see whether the reference count is indeed zero, don't + // work due to ABA.) Thus we need to temporarily add a duplicate string to the + // list. + should_add = true; + (*ptr).ref_count.fetch_sub(1, SeqCst); + } + } + } else { + should_add = true + } + + if should_add { unsafe { ptr = heap::allocate(mem::size_of::(), ENTRY_ALIGNMENT) as *mut StringCacheEntry; @@ -100,10 +117,6 @@ impl StringCache { StringCacheEntry::new(self.buckets[bucket_index], hash, string_to_add)); } self.buckets[bucket_index] = ptr; - } else { - unsafe { - (*ptr).ref_count.fetch_add(1, SeqCst); - } } assert!(ptr != ptr::mut_null()); @@ -215,7 +228,9 @@ impl Atom { }); } - let mut string_cache = unsafe { &*global_string_cache_ptr }.lock(); + let mut string_cache = unsafe { + (&*global_string_cache_ptr).lock() + }; let hash_value_address = string_cache.add(string); Atom { data: hash_value_address | Dynamic as u64 @@ -261,20 +276,9 @@ impl Drop for Atom { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { let mut string_cache = unsafe { - &*global_string_cache_ptr - }.lock(); - - // Note that we need a second check here. The problem we are trying to defend against - // is thus: - // - // 1. Thread B calls Atom::new() and takes mutex. - // 2. Thread A calls drop(), ref count drops to 0. - // 3. Thread B bumps ref count to 1. - // - // In this case we need thread A to perform a separate check. - if value.ref_count.fetch(SeqCst) == 0 { - string_cache.remove(this.data); - } + (&*global_string_cache_ptr).lock() + }; + string_cache.remove(this.data); } match self.get_type() { From e7848140fe6a5b7ee08dcfc7c9ad9d98e3216115 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 16 Sep 2014 09:53:55 -0700 Subject: [PATCH 015/379] Upgrade to rustc 2014-09-15 --- macros/src/atom/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index cb85d92..5c390db 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -7,6 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use syntax::ptr::P; use syntax::codemap::Span; use syntax::ast::{TokenTree, TTTok}; use syntax::ast; @@ -15,7 +16,6 @@ use syntax::parse::token::{get_ident, InternedString, LIT_STR, IDENT}; use std::iter::Chain; use std::slice::{Items, Found, NotFound}; -use std::gc::Gc; use std::collections::HashMap; use std::ascii::AsciiExt; @@ -54,16 +54,16 @@ lazy_static! { } struct AtomResult { - expr: Gc, - pat: Gc, + expr: P, + pat: P, } impl MacResult for AtomResult { - fn make_expr(&self) -> Option> { + fn make_expr(self: Box) -> Option> { Some(self.expr) } - fn make_pat(&self) -> Option> { + fn make_pat(self: Box) -> Option> { Some(self.pat) } } From f5b09bd7d290dfc04a8d15c206ad6a3da2dc538e Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Fri, 19 Sep 2014 18:46:06 -0700 Subject: [PATCH 016/379] Add more static atoms Servo's interface to the HTML parser needs these. --- macros/src/atom/data.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/macros/src/atom/data.rs b/macros/src/atom/data.rs index 5ab7a76..172fb9a 100644 --- a/macros/src/atom/data.rs +++ b/macros/src/atom/data.rs @@ -209,6 +209,7 @@ pub static atoms: &'static [&'static str] = &[ "baseline-shift", "baseprofile", "bbox", + "bdi", "bdo", "begin", "bevelled", @@ -293,6 +294,7 @@ pub static atoms: &'static [&'static str] = &[ "data", "datafld", "dataformatas", + "datalist", "datasrc", "datatemplate", "datetime", @@ -910,6 +912,7 @@ pub static atoms: &'static [&'static str] = &[ "small", "solidcolor", "space", + "spacer", "spacing", "span", "specification", From 91532048e4b41cc85796d06a6e07b94ea47d06e8 Mon Sep 17 00:00:00 2001 From: Tetsuharu OHZEKI Date: Sun, 21 Sep 2014 22:00:03 +0900 Subject: [PATCH 017/379] Use ptr::null_mut() instead of ptr::mut_null() which is deprecated. --- src/atom.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 2a75283..0dfffd8 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -86,7 +86,7 @@ impl StringCache { let bucket_index = (hash & (self.buckets.len()-1) as u64) as uint; let mut ptr = self.buckets[bucket_index]; - while ptr != ptr::mut_null() { + while ptr != ptr::null_mut() { let value = unsafe { &*ptr }; if value.hash == hash && value.string.as_slice() == string_to_add { break; @@ -95,7 +95,7 @@ impl StringCache { } let mut should_add = false; - if ptr != ptr::mut_null() { + if ptr != ptr::null_mut() { unsafe { if (*ptr).ref_count.fetch_add(1, SeqCst) == 0 { // Uh-oh. The pointer's reference count was zero, which means someone may try @@ -121,7 +121,7 @@ impl StringCache { self.buckets[bucket_index] = ptr; } - assert!(ptr != ptr::mut_null()); + assert!(ptr != ptr::null_mut()); ptr as u64 } @@ -134,11 +134,11 @@ impl StringCache { let bucket_index = (value.hash & (self.buckets.len()-1) as u64) as uint; let mut current = self.buckets[bucket_index]; - let mut prev: *mut StringCacheEntry = ptr::mut_null(); + let mut prev: *mut StringCacheEntry = ptr::null_mut(); - while current != ptr::mut_null() { + while current != ptr::null_mut() { if current == ptr { - if prev != ptr::mut_null() { + if prev != ptr::null_mut() { unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; } else { unsafe { self.buckets[bucket_index] = (*current).next_in_bucket }; @@ -148,7 +148,7 @@ impl StringCache { prev = current; unsafe { current = (*current).next_in_bucket }; } - assert!(current != ptr::mut_null()); + assert!(current != ptr::null_mut()); unsafe { ptr::read(ptr as *const StringCacheEntry); From d5adb12dafc619ebb43e01e0be386e52bd0ea769 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 29 Sep 2014 15:40:01 -0700 Subject: [PATCH 018/379] Add a wrapper for atoms that represent namespaces --- macros/src/atom/mod.rs | 63 ++++++++++++++++++++++------------- src/atom.rs | 17 ---------- src/lib.rs | 5 ++- src/namespace.rs | 74 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 40 deletions(-) create mode 100644 src/namespace.rs diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 5c390db..f88080e 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -53,6 +53,7 @@ lazy_static! { }; } +// FIXME: libsyntax should provide this (rust-lang/rust#17637) struct AtomResult { expr: P, pat: P, @@ -68,16 +69,18 @@ impl MacResult for AtomResult { } } -fn expand_atom_str(cx: &mut ExtCtxt, sp: Span, name: &str) -> Box { - let i = expect!(cx, sp, STATIC_ATOM_MAP.find_equiv(&name), - format!("Unknown static atom {:s}", name).as_slice()); +fn make_atom_result(cx: &mut ExtCtxt, sp: Span, name: &str) -> Option { + let i = match STATIC_ATOM_MAP.find_equiv(&name) { + Some(i) => i, + None => return None, + }; let data = static_atom::add_tag(*i as u32); - box AtomResult { + Some(AtomResult { expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), pat: quote_pat!(&mut *cx, ::string_cache::atom::Atom { data: $data }), - } as Box + }) } // Translate `atom!(title)` or `atom!("font-weight")` into an `Atom` constant or pattern. @@ -87,13 +90,15 @@ pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box expect!(cx, sp, atom_tok_to_str(t), usage), _ => bail!(cx, sp, usage), }; - expand_atom_str(cx, sp, name.get()) + box expect!(cx, sp, make_atom_result(cx, sp, name.get()), + format!("Unknown static atom {:s}", name.get()).as_slice()) } -// Translate `ns!(HTML)` into `atom!("http://www.w3.org/1999/xhtml")`. +// Translate `ns!(HTML)` into `Namespace { atom: atom!("http://www.w3.org/1999/xhtml") }`. // The argument is ASCII-case-insensitive. pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { static all_ns: &'static [(&'static str, &'static str)] = [ + ("", ""), ("html", "http://www.w3.org/1999/xhtml"), ("xml", "http://www.w3.org/XML/1998/namespace"), ("xmlns", "http://www.w3.org/2000/xmlns/"), @@ -102,23 +107,37 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box String { + let ns_names: Vec<&'static str> = all_ns.slice_from(1).iter() + .map(|&(x, _)| x).collect(); + format!("Usage: ns!(HTML), case-insensitive. \ + Known namespaces: {:s}", + ns_names.connect(" ")) + } + + let name = expect!(cx, sp, match tt { [ref t] => atom_tok_to_str(t), _ => None, - }; + }, usage().as_slice()); - match name { - Some(name) => { - for &(short, url) in all_ns.iter() { - if short.eq_ignore_ascii_case(name.get()) { - return expand_atom_str(cx, sp, url); - } - } - } - None => (), - } + let &(short, url) = expect!(cx, sp, + all_ns.iter().find(|&&(short, _)| short.eq_ignore_ascii_case(name.get())), + usage().as_slice()); + + // All of the URLs should be in the static atom table. + let AtomResult { expr, pat } = expect!(cx, sp, make_atom_result(cx, sp, url), + format!("internal plugin error: can't find namespace url {:s}", url).as_slice()); - let ns_names: Vec<&'static str> = all_ns.iter().map(|&(x, _)| x).collect(); - bail!(cx, sp, format!("Usage: ns!(HTML), case-insensitive. Known namespaces: {:s}", - ns_names.connect(" ")).as_slice()); + box AtomResult { + expr: quote_expr!(&mut *cx, ::string_cache::namespace::Namespace($expr)), + pat: quote_pat!(&mut *cx, ::string_cache::namespace::Namespace($pat)), + } } + +#[macro_export] +macro_rules! qualname (($ns:tt, $local:tt) => ( + ::string_cache::namespace::QualName { + ns: ns!($ns), + local: atom!($local), + } +)) diff --git a/src/atom.rs b/src/atom.rs index 0dfffd8..ddeb8a2 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -548,21 +548,4 @@ mod tests { _ => 3u, }); } - - #[test] - fn ns_macro() { - assert_eq!(ns!(html), atom!("http://www.w3.org/1999/xhtml")); - assert_eq!(ns!(xml), atom!("http://www.w3.org/XML/1998/namespace")); - assert_eq!(ns!(xmlns), atom!("http://www.w3.org/2000/xmlns/")); - assert_eq!(ns!(xlink), atom!("http://www.w3.org/1999/xlink")); - assert_eq!(ns!(svg), atom!("http://www.w3.org/2000/svg")); - assert_eq!(ns!(mathml), atom!("http://www.w3.org/1998/Math/MathML")); - - assert_eq!(ns!(HtMl), atom!("http://www.w3.org/1999/xhtml")); - assert_eq!(ns!(xMl), atom!("http://www.w3.org/XML/1998/namespace")); - assert_eq!(ns!(XmLnS), atom!("http://www.w3.org/2000/xmlns/")); - assert_eq!(ns!(xLiNk), atom!("http://www.w3.org/1999/xlink")); - assert_eq!(ns!(SvG), atom!("http://www.w3.org/2000/svg")); - assert_eq!(ns!(mAtHmL), atom!("http://www.w3.org/1998/Math/MathML")); - } } diff --git a/src/lib.rs b/src/lib.rs index 5100df8..b36a783 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,8 +38,10 @@ extern crate phf; extern crate string_cache_macros; pub use atom::Atom; +pub use namespace::{Namespace, QualName}; pub mod atom; +pub mod namespace; // A private module so that macro-expanded idents like // `::string_cache::atom::Atom` will also work in this crate. @@ -48,11 +50,12 @@ pub mod atom; #[doc(hidden)] mod string_cache { pub use atom; + pub use namespace; } // For macros and deriving. #[cfg(not(test))] mod std { - pub use core::{cmp, fmt}; + pub use core::{cmp, fmt, clone, option}; pub use collections::hash; } diff --git a/src/namespace.rs b/src/namespace.rs new file mode 100644 index 0000000..f1efa7e --- /dev/null +++ b/src/namespace.rs @@ -0,0 +1,74 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![experimental="This may move as string-cache becomes less Web-specific."] + +use core::prelude::*; + +use atom::Atom; + +/// An atom that is meant to represent a namespace in the HTML / XML sense. +/// Whether a given string represents a namespace is contextual, so this is +/// a transparent wrapper that will not catch all mistakes. +#[deriving(PartialEq, Eq, PartialOrd, Ord, Hash, Show, Clone)] +pub struct Namespace(pub Atom); + +/// A name with a namespace. +#[deriving(PartialEq, Eq, PartialOrd, Ord, Hash, Show, Clone)] +pub struct QualName { + pub ns: Namespace, + pub local: Atom, +} + +impl QualName { + pub fn new(ns: Namespace, local: Atom) -> QualName { + QualName { + ns: ns, + local: local, + } + } +} + +#[cfg(test)] +mod tests { + use super::{Namespace, QualName}; + + #[test] + fn ns_macro() { + assert_eq!(ns!(""), Namespace(atom!(""))); + + assert_eq!(ns!(html), Namespace(atom!("http://www.w3.org/1999/xhtml"))); + assert_eq!(ns!(xml), Namespace(atom!("http://www.w3.org/XML/1998/namespace"))); + assert_eq!(ns!(xmlns), Namespace(atom!("http://www.w3.org/2000/xmlns/"))); + assert_eq!(ns!(xlink), Namespace(atom!("http://www.w3.org/1999/xlink"))); + assert_eq!(ns!(svg), Namespace(atom!("http://www.w3.org/2000/svg"))); + assert_eq!(ns!(mathml), Namespace(atom!("http://www.w3.org/1998/Math/MathML"))); + + assert_eq!(ns!(HtMl), Namespace(atom!("http://www.w3.org/1999/xhtml"))); + assert_eq!(ns!(xMl), Namespace(atom!("http://www.w3.org/XML/1998/namespace"))); + assert_eq!(ns!(XmLnS), Namespace(atom!("http://www.w3.org/2000/xmlns/"))); + assert_eq!(ns!(xLiNk), Namespace(atom!("http://www.w3.org/1999/xlink"))); + assert_eq!(ns!(SvG), Namespace(atom!("http://www.w3.org/2000/svg"))); + assert_eq!(ns!(mAtHmL), Namespace(atom!("http://www.w3.org/1998/Math/MathML"))); + } + + #[test] + fn qualname() { + assert_eq!(QualName::new(ns!(""), atom!("")), + QualName { ns: ns!(""), local: atom!("") }); + assert_eq!(QualName::new(ns!(XML), atom!(base)), + QualName { ns: ns!(XML), local: atom!(base) }); + } + + #[test] + fn qualname_macro() { + assert_eq!(qualname!("", ""), QualName { ns: ns!(""), local: atom!("") }); + assert_eq!(qualname!(XML, base), QualName { ns: ns!(XML), local: atom!(base) }); + } +} From b7fa84656ee4162aec9fc16db368d21ae6853e68 Mon Sep 17 00:00:00 2001 From: Glenn Watson Date: Thu, 2 Oct 2014 16:23:53 +1000 Subject: [PATCH 019/379] Add some CSS keywords as static atoms. --- macros/src/atom/data.rs | 49 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/macros/src/atom/data.rs b/macros/src/atom/data.rs index 172fb9a..b011709 100644 --- a/macros/src/atom/data.rs +++ b/macros/src/atom/data.rs @@ -202,6 +202,11 @@ pub static atoms: &'static [&'static str] = &[ "axis", "azimuth", "background", + "background-attachment", + "background-color", + "background-image", + "background-position", + "background-repeat", "baseFrequency", "baseProfile", "basefrequency", @@ -216,6 +221,26 @@ pub static atoms: &'static [&'static str] = &[ "bgcolor", "bias", "border", + "border-bottom", + "border-bottom-color", + "border-bottom-style", + "border-bottom-width", + "border-color", + "border-left", + "border-left-color", + "border-left-style", + "border-left-width", + "border-right", + "border-right-color", + "border-right-style", + "border-right-width", + "border-style", + "border-top", + "border-top-color", + "border-top-style", + "border-top-width", + "border-width", + "bottom", "bvar", "by", "calcMode", @@ -541,6 +566,7 @@ pub static atoms: &'static [&'static str] = &[ "laplacian", "largeop", "lcm", + "left", "legend", "lengthAdjust", "lengthadjust", @@ -551,6 +577,7 @@ pub static atoms: &'static [&'static str] = &[ "limitingConeAngle", "limitingconeangle", "line", + "line-height", "linearGradient", "lineargradient", "linebreak", @@ -577,6 +604,11 @@ pub static atoms: &'static [&'static str] = &[ "malignmark", "manifest", "map", + "margin", + "margin-bottom", + "margin-left", + "margin-right", + "margin-top", "marginheight", "marginwidth", "mark", @@ -604,6 +636,8 @@ pub static atoms: &'static [&'static str] = &[ "matrix", "matrixrow", "max", + "max-height", + "max-width", "maxlength", "maxsize", "mean", @@ -622,6 +656,8 @@ pub static atoms: &'static [&'static str] = &[ "mglyph", "mi", "min", + "min-height", + "min-width", "minsize", "minus", "missing-glyph", @@ -779,6 +815,11 @@ pub static atoms: &'static [&'static str] = &[ "overline-position", "overline-thickness", "p", + "padding", + "padding-bottom", + "padding-left", + "padding-right", + "padding-top", "panose-1", "partialdiff", "path", @@ -806,6 +847,7 @@ pub static atoms: &'static [&'static str] = &[ "pointsatz", "polygon", "polyline", + "position", "poster", "power", "prefetch", @@ -859,6 +901,7 @@ pub static atoms: &'static [&'static str] = &[ "restart", "result", "rev", + "right", "role", "root", "rotate", @@ -968,6 +1011,7 @@ pub static atoms: &'static [&'static str] = &[ "systemLanguage", "systemlanguage", "tabindex", + "table-layout", "tableValues", "tablevalues", "tan", @@ -980,8 +1024,10 @@ pub static atoms: &'static [&'static str] = &[ "tbreak", "tendsto", "text", + "text-align", "text-anchor", "text-decoration", + "text-orientation", "text-rendering", "textLength", "textPath", @@ -992,6 +1038,7 @@ pub static atoms: &'static [&'static str] = &[ "time", "times", "to", + "top", "transform", "transpose", "tref", @@ -1030,6 +1077,7 @@ pub static atoms: &'static [&'static str] = &[ "vert-adv-y", "vert-origin-x", "vert-origin-y", + "vertical-align", "verythickmathspace", "verythinmathspace", "veryverythickmathspace", @@ -1046,6 +1094,7 @@ pub static atoms: &'static [&'static str] = &[ "vspace", "wbr", "when", + "white-space", "width", "widths", "word-spacing", From ac27c7f530e2076679edcdd032dc2f2ddd51e1bd Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Sat, 4 Oct 2014 15:23:48 +0200 Subject: [PATCH 020/379] Remove the managed_boxes feature (fixes #23). --- macros/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index d300001..c0c1f19 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name="string_cache_macros"] #![crate_type="dylib"] -#![feature(macro_rules, plugin_registrar, quote, managed_boxes, phase)] +#![feature(macro_rules, plugin_registrar, quote, phase)] #![allow(unused_imports)] // for quotes extern crate syntax; From 03e90c11ea89ed06e7fc918583762b481745756b Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sun, 12 Oct 2014 13:20:21 -0700 Subject: [PATCH 021/379] Upgrade to rustc 0.13.0-dev (78a767689 2014-10-10 14:57:03 +0000) Needed for html5ever master. --- shared/static_atom.rs | 4 ++-- src/atom.rs | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/shared/static_atom.rs b/shared/static_atom.rs index e50e669..56dfe95 100644 --- a/shared/static_atom.rs +++ b/shared/static_atom.rs @@ -12,9 +12,9 @@ #![allow(dead_code)] -pub static STATIC_TAG: u8 = 2; +pub const STATIC_TAG: u8 = 2; -static STATIC_SHIFT_BITS: uint = 32; +const STATIC_SHIFT_BITS: uint = 32; #[inline(always)] pub fn add_tag(atom_id: u32) -> u64 { diff --git a/src/atom.rs b/src/atom.rs index ddeb8a2..e1e27e0 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -7,6 +7,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +#![allow(non_uppercase_statics)] + use core::prelude::*; use phf::PhfOrderedSet; @@ -30,12 +32,12 @@ mod static_atom; // Inline atoms are probably buggy on big-endian architectures. #[allow(dead_code)] #[static_assert] -static IS_LITTLE_ENDIAN: bool = cfg!(target_endian = "little"); +const IS_LITTLE_ENDIAN: bool = cfg!(target_endian = "little"); static mut global_string_cache_ptr: *mut Mutex = 0 as *mut _; -static ENTRY_ALIGNMENT: uint = 16; +const ENTRY_ALIGNMENT: uint = 16; // Macro-generated table for static atoms. static static_atom_set: PhfOrderedSet<&'static str> = static_atom_set!(); From 0c940f6750c2724ec046bf2598055a5aaaf4cb47 Mon Sep 17 00:00:00 2001 From: Daniel Fath Date: Sat, 18 Oct 2014 20:27:40 +0200 Subject: [PATCH 022/379] Upgrade to rustc 0.13.0-nightly (222ae8b9b 2014-10-18 00:47:22 +0000) - Need this working for html5ever. --- src/atom.rs | 4 ++-- src/lib.rs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index e1e27e0..c3ecaaa 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -45,7 +45,7 @@ static static_atom_set: PhfOrderedSet<&'static str> = static_atom_set!(); // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. #[repr(u8)] -#[deriving(Eq, PartialEq)] +#[deriving(Eq, PartialEq, Show)] enum AtomType { Dynamic = 0, Inline = 1, @@ -300,7 +300,7 @@ impl Drop for Atom { impl fmt::Show for Atom { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Atom('{:s}' type={:?})", self.as_slice(), self.get_type()) + write!(f, "Atom('{:s}' type={})", self.as_slice(), self.get_type()) } } diff --git a/src/lib.rs b/src/lib.rs index b36a783..197dea5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,6 @@ extern crate core; extern crate alloc; extern crate collections; extern crate sync; -extern crate debug; #[cfg(test)] extern crate test; From 3e4a64211daf7e687eeb71de3a3c4cfc1b20e38a Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Wed, 22 Oct 2014 10:09:29 +1100 Subject: [PATCH 023/379] Replace removed deprecated method call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `move_iter` was deprecated, now it’s gone altogether. --- macros/src/atom/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index f88080e..651c58c 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -29,7 +29,7 @@ mod static_atom; pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); let tts: Vec = data::atoms.iter().flat_map(|k| { - (quote_tokens!(&mut *cx, $k,)).move_iter() + (quote_tokens!(&mut *cx, $k,)).into_iter() }).collect(); MacExpr::new(quote_expr!(&mut *cx, phf_ordered_set!($tts))) } From 724232d1b1c40fd44501c33024f840b033f816fb Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 21 Oct 2014 18:31:09 -0700 Subject: [PATCH 024/379] atom: Slight efficiency optimization in string interning --- src/atom.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index c3ecaaa..e55d87c 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -135,20 +135,20 @@ impl StringCache { let bucket_index = (value.hash & (self.buckets.len()-1) as u64) as uint; - let mut current = self.buckets[bucket_index]; - let mut prev: *mut StringCacheEntry = ptr::null_mut(); + let mut prevp = &self.buckets[bucket_index]; + let mut current = *prevp; while current != ptr::null_mut() { if current == ptr { - if prev != ptr::null_mut() { - unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; - } else { - unsafe { self.buckets[bucket_index] = (*current).next_in_bucket }; - } + unsafe { + (**prevp).next_in_bucket = (*current).next_in_bucket + }; break; } - prev = current; - unsafe { current = (*current).next_in_bucket }; + unsafe { + prevp = &(*current).next_in_bucket; + current = *prevp + }; } assert!(current != ptr::null_mut()); From a906b4df770e135fff14512c5b65356ff0289f31 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sun, 19 Oct 2014 20:53:35 -0700 Subject: [PATCH 025/379] Fix PartialOrd for Atom This snuck in during a Rust upgrade. --- src/atom.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index e55d87c..5aec526 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -306,14 +306,10 @@ impl fmt::Show for Atom { impl PartialOrd for Atom { fn partial_cmp(&self, other: &Atom) -> Option { - self.data.partial_cmp(&other.data) - } - - fn lt(&self, other: &Atom) -> bool { if self.data == other.data { - return false; + return Some(Equal); } - self.as_slice() < other.as_slice() + self.as_slice().partial_cmp(&other.as_slice()) } } @@ -430,6 +426,7 @@ mod tests { fn check(x: &str, y: &str) { assert_eq!(x < y, Atom::from_slice(x) < Atom::from_slice(y)); assert_eq!(x.cmp(&y), Atom::from_slice(x).cmp(&Atom::from_slice(y))); + assert_eq!(x.partial_cmp(&y), Atom::from_slice(x).partial_cmp(&Atom::from_slice(y))); } check("a", "body"); From b28311dcccc125199e0134a1112ced23390c28eb Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sun, 19 Oct 2014 08:45:19 -0700 Subject: [PATCH 026/379] Add tests for low-level representation --- src/atom.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/atom.rs b/src/atom.rs index 5aec526..650b7bd 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -326,6 +326,7 @@ impl Ord for Atom { mod tests { use core::prelude::*; + use std::fmt; use std::task::spawn; use super::{Atom, Static, Inline, Dynamic}; use test::Bencher; @@ -468,6 +469,52 @@ mod tests { assert!(i0 != d0); } + macro_rules! assert_eq_fmt (($fmt:expr, $x:expr, $y:expr) => ({ + let x = $x; + let y = $y; + if x != y { + fail!("assertion failed: {} != {}", + format_args!(fmt::format, $fmt, x).as_slice(), + format_args!(fmt::format, $fmt, y).as_slice()); + } + })) + + #[test] + fn repr() { + fn check(s: &str, data: u64) { + assert_eq_fmt!("0x{:016X}", Atom::from_slice(s).data, data); + } + + fn check_static(s: &str, x: Atom, data: u64) { + check(s, data); + assert_eq_fmt!("0x{:016X}", x.data, data); + } + + // This test is here to make sure we don't change atom representation + // by accident. It may need adjusting if there are changes to the + // static atom table, the tag values, etc. + + // Static atoms + check_static("a", atom!(a), 0x0000_0000_0000_0002); + check_static("address", atom!(address), 0x0000_0001_0000_0002); + check_static("area", atom!(area), 0x0000_0003_0000_0002); + + // Inline atoms + check("e", 0x0000_0000_0000_6511); + check("xyzzy", 0x0000_797A_7A79_7851); + check("xyzzy01", 0x3130_797A_7A79_7871); + + // Dynamic atoms. This is a pointer so we can't verify every bit. + assert_eq!(0x00, Atom::from_slice("a dynamic string").data & 0xf); + } + + #[test] + fn assert_entry_size() { + // Guard against accidental changes to the size of StringCacheEntry. + use core::mem; + assert_eq!(48, mem::size_of::()); + } + #[test] fn test_threads() { for _ in range(0u32, 100u32) { From dac017de631985c83d3acc5e27243e90e2e48346 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sat, 18 Oct 2014 18:27:20 -0700 Subject: [PATCH 027/379] Add more comprehensive benchmarks --- src/atom/bench.rs | 217 +++++++++++++++++++++++++++++++++++ src/{atom.rs => atom/mod.rs} | 50 +------- 2 files changed, 221 insertions(+), 46 deletions(-) create mode 100644 src/atom/bench.rs rename src/{atom.rs => atom/mod.rs} (93%) diff --git a/src/atom/bench.rs b/src/atom/bench.rs new file mode 100644 index 0000000..7424164 --- /dev/null +++ b/src/atom/bench.rs @@ -0,0 +1,217 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/* + +A cautionary note about these benchmarks: + +Many of the operations we're attempting to measure take less than one +nanosecond. That's why we run them thousands of times in a loop just to get a +single iteration that Rust's statistical benchmarking can work with. At that +scale, any change anywhere in the library can produce durable performance +regressions on the order of half a nanosecond, i.e. "500 ns" in the output for +a test like eq_x_1000. + +We can't get anything done if we rachet on these numbers! They are more useful +for selecting between alternatives, and for noticing large regressions or +inconsistencies. + +Furthermore, a large part of the point of interning is to make strings small +and cheap to move around, which isn't reflected in these tests. + +*/ + +use atom::Atom; +use test::{Bencher, black_box}; + +// Just shorthand +fn mk(x: &str) -> Atom { + Atom::from_slice(x) +} + +macro_rules! check_type (($name:ident, $x:expr, $p:pat) => ( + // NB: "cargo bench" does not run these! + #[test] + fn $name() { + match $x.get_type() { + $p => (), + _ => fail!("atom has wrong type"), + } + } +)) + +macro_rules! bench_tiny_op (($name:ident, $op:ident, $ctor_x:expr, $ctor_y:expr) => ( + #[bench] + fn $name(b: &mut Bencher) { + const n: uint = 1000; + let xs = Vec::from_elem(n, $ctor_x); + let ys = Vec::from_elem(n, $ctor_y); + + b.iter(|| { + for (x, y) in xs.iter().zip(ys.iter()) { + black_box(x.$op(y)); + } + }); + } +)) + +macro_rules! bench_one ( + (x_static $x:expr, $y:expr) => (check_type!(check_type_x, $x, Static)); + (x_inline $x:expr, $y:expr) => (check_type!(check_type_x, $x, Inline)); + (x_dynamic $x:expr, $y:expr) => (check_type!(check_type_x, $x, Dynamic)); + (y_static $x:expr, $y:expr) => (check_type!(check_type_y, $y, Static)); + (y_inline $x:expr, $y:expr) => (check_type!(check_type_y, $y, Inline)); + (y_dynamic $x:expr, $y:expr) => (check_type!(check_type_y, $y, Dynamic)); + (is_static $x:expr, $y:expr) => (bench_one!(x_static $x, $y) bench_one!(y_static $x, $y)); + (is_inline $x:expr, $y:expr) => (bench_one!(x_inline $x, $y) bench_one!(y_inline $x, $y)); + (is_dynamic $x:expr, $y:expr) => (bench_one!(x_dynamic $x, $y) bench_one!(y_dynamic $x, $y)); + + (eq $x:expr, $_y:expr) => (bench_tiny_op!(eq_x_1000, eq, $x, $x)); + (ne $x:expr, $y:expr) => (bench_tiny_op!(ne_x_1000, ne, $x, $y)); + (lt $x:expr, $y:expr) => (bench_tiny_op!(lt_x_1000, lt, $x, $y)); + + (intern $x:expr, $_y:expr) => ( + #[bench] + fn intern(b: &mut Bencher) { + let x = $x.as_slice().to_string(); + let x = x.as_slice(); + b.iter(|| { + black_box(Atom::from_slice(x)); + }); + } + ); + + (as_slice $x:expr, $_y:expr) => ( + #[bench] + fn as_slice_x_1000(b: &mut Bencher) { + let x = $x; + b.iter(|| { + for _ in range(0, 1000u) { + black_box(x.as_slice()); + } + }); + } + ); + + (clone $x:expr, $_y:expr) => ( + #[bench] + fn clone_x_1000(b: &mut Bencher) { + let x = $x; + b.iter(|| { + for _ in range(0, 1000u) { + black_box(x.clone()); + } + }); + } + ); + + (clone_string $x:expr, $_y:expr) => ( + #[bench] + fn clone_x_1000(b: &mut Bencher) { + let x = $x.to_string(); + b.iter(|| { + for _ in range(0, 1000u) { + black_box(x.clone()); + } + }); + } + ); +) + +macro_rules! bench_all ( + ([ $($which:ident)+ ] for $name:ident = $x:expr, $y:expr) => ( + // FIXME: This module works around rust-lang/rust#12249 so we don't + // have to repeat the names for eq and neq. + mod $name { + #![allow(unused_imports)] + + use core::prelude::*; + use collections::vec::Vec; + use test::{Bencher, black_box}; + use std::to_string::ToString; + + use atom::{Atom, Static, Inline, Dynamic}; + + use super::mk; + + $( + bench_one!($which $x, $y) + )+ + } + ); +) + +pub const longer_dynamic_a: &'static str + = "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Band"; +pub const longer_dynamic_b: &'static str + = "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Ban!"; + +bench_all!([eq ne lt clone_string] for short_string = "e", "f") +bench_all!([eq ne lt clone_string] for medium_string = "xyzzy01", "xyzzy02") +bench_all!([eq ne lt clone_string] + for longer_string = super::longer_dynamic_a, super::longer_dynamic_b) + +bench_all!([eq ne intern as_slice clone is_static lt] + for static_atom = atom!(a), atom!(b)) + +bench_all!([intern as_slice clone is_inline] + for short_inline_atom = mk("e"), mk("f")) + +bench_all!([eq ne intern as_slice clone is_inline lt] + for medium_inline_atom = mk("xyzzy01"), mk("xyzzy02")) + +bench_all!([intern as_slice clone is_dynamic] + for min_dynamic_atom = mk("xyzzy001"), mk("xyzzy002")) + +bench_all!([eq ne intern as_slice clone is_dynamic lt] + for longer_dynamic_atom = mk(super::longer_dynamic_a), mk(super::longer_dynamic_b)) + +bench_all!([intern as_slice clone is_static] + for static_at_runtime = mk("a"), mk("b")) + +bench_all!([ne lt x_static y_inline] + for static_vs_inline = atom!(a), mk("f")) + +bench_all!([ne lt x_static y_dynamic] + for static_vs_dynamic = atom!(a), mk(super::longer_dynamic_b)) + +bench_all!([ne lt x_inline y_dynamic] + for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b)) + +macro_rules! bench_rand ( ($name:ident, $len:expr) => ( + #[bench] + fn $name(b: &mut Bencher) { + use std::{str, rand}; + use std::slice::MutableSlice; + use std::rand::Rng; + + let mut gen = rand::weak_rng(); + b.iter(|| { + // We have to generate new atoms on every iter, because + // the dynamic atom table isn't reset. + // + // I measured the overhead of random string generation + // as about 3-12% at one point. + + let mut buf: [u8, ..$len] = [0, ..$len]; + gen.fill_bytes(buf); + for n in buf.iter_mut() { + // shift into printable ASCII + *n = (*n % 0x40) + 0x20; + } + let s = unsafe { str::raw::from_utf8(buf) }; + black_box(Atom::from_slice(s)); + }); + } +)) + +bench_rand!(intern_rand_008, 8) +bench_rand!(intern_rand_032, 32) +bench_rand!(intern_rand_128, 128) +bench_rand!(intern_rand_512, 512) diff --git a/src/atom.rs b/src/atom/mod.rs similarity index 93% rename from src/atom.rs rename to src/atom/mod.rs index 650b7bd..6d89346 100644 --- a/src/atom.rs +++ b/src/atom/mod.rs @@ -26,7 +26,7 @@ use collections::hash::{Hash, Hasher, sip}; use sync::Mutex; use sync::one::{Once, ONCE_INIT}; -#[path="../shared/static_atom.rs"] +#[path="../../shared/static_atom.rs"] mod static_atom; // Inline atoms are probably buggy on big-endian architectures. @@ -322,6 +322,9 @@ impl Ord for Atom { } } +#[cfg(test)] +mod bench; + #[cfg(test)] mod tests { use core::prelude::*; @@ -329,9 +332,6 @@ mod tests { use std::fmt; use std::task::spawn; use super::{Atom, Static, Inline, Dynamic}; - use test::Bencher; - use collections::MutableSeq; - use collections::vec::Vec; #[test] fn test_as_slice() { @@ -525,48 +525,6 @@ mod tests { } } - #[bench] - fn bench_strings(b: &mut Bencher) { - let mut strings0 = Vec::new(); - let mut strings1 = Vec::new(); - - for _ in range(0u32, 1000u32) { - strings0.push("a"); - strings1.push("b"); - } - - let mut eq_count = 0u32; - - b.iter(|| { - for (s0, s1) in strings0.iter().zip(strings1.iter()) { - if s0 == s1 { - eq_count += 1; - } - } - }); - } - - #[bench] - fn bench_atoms(b: &mut Bencher) { - let mut atoms0 = Vec::new(); - let mut atoms1 = Vec::new(); - - for _ in range(0u32, 1000u32) { - atoms0.push(Atom::from_slice("a")); - atoms1.push(Atom::from_slice("b")); - } - - let mut eq_count = 0u32; - - b.iter(|| { - for (a0, a1) in atoms0.iter().zip(atoms1.iter()) { - if a0 == a1 { - eq_count += 1; - } - } - }); - } - #[test] fn atom_macro() { assert_eq!(atom!(body), Atom::from_slice("body")); From 62a130f74921ccb585f582a7e67951016bca2cbb Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 21 Oct 2014 21:04:59 -0700 Subject: [PATCH 028/379] Revert "atom: Slight efficiency optimization in string interning" It causes segfaults in the new benchmark suite. This reverts commit 724232d1b1c40fd44501c33024f840b033f816fb. --- src/atom/mod.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 6d89346..37b92d8 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -135,20 +135,20 @@ impl StringCache { let bucket_index = (value.hash & (self.buckets.len()-1) as u64) as uint; - let mut prevp = &self.buckets[bucket_index]; - let mut current = *prevp; + let mut current = self.buckets[bucket_index]; + let mut prev: *mut StringCacheEntry = ptr::null_mut(); while current != ptr::null_mut() { if current == ptr { - unsafe { - (**prevp).next_in_bucket = (*current).next_in_bucket - }; + if prev != ptr::null_mut() { + unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; + } else { + unsafe { self.buckets[bucket_index] = (*current).next_in_bucket }; + } break; } - unsafe { - prevp = &(*current).next_in_bucket; - current = *prevp - }; + prev = current; + unsafe { current = (*current).next_in_bucket }; } assert!(current != ptr::null_mut()); From d7bf22ead1a90fb596401d9262994477aa3a0ee9 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sat, 18 Oct 2014 20:07:15 -0700 Subject: [PATCH 029/379] Consolidate packing / unpacking code Mostly for safety and maintainability. This makes it easy to see the overall encoding scheme, between repr.rs and the representation tests in atom/mod.rs. Thanks to the magic of LLVM's inlining and SROA, this code is faster on some benchmarks and not significantly slower on any. Also added debug_assert!s and changed the existing non-test assert!s. I believe they're all safe to omit in release builds. --- macros/src/atom/mod.rs | 7 +- macros/src/lib.rs | 1 + shared/repr.rs | 124 ++++++++++++++++++++ shared/static_atom.rs | 28 ----- src/atom/bench.rs | 17 +-- src/atom/mod.rs | 256 ++++++++++++++++------------------------- 6 files changed, 240 insertions(+), 193 deletions(-) create mode 100644 shared/repr.rs delete mode 100644 shared/static_atom.rs diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 651c58c..a839715 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -21,8 +21,8 @@ use std::ascii::AsciiExt; mod data; -#[path="../../../shared/static_atom.rs"] -mod static_atom; +#[path="../../../shared/repr.rs"] +mod repr; // Build a PhfOrderedSet of static atoms. // Takes no arguments. @@ -75,7 +75,8 @@ fn make_atom_result(cx: &mut ExtCtxt, sp: Span, name: &str) -> Option return None, }; - let data = static_atom::add_tag(*i as u32); + // In the case of static atoms, the call to pack() doesn't use any unsafe code. + let data = unsafe { repr::Static(*i as u32).pack() }; Some(AtomResult { expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), diff --git a/macros/src/lib.rs b/macros/src/lib.rs index c0c1f19..b2ae589 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -13,6 +13,7 @@ #![feature(macro_rules, plugin_registrar, quote, phase)] #![allow(unused_imports)] // for quotes +extern crate core; extern crate syntax; extern crate rustc; diff --git a/shared/repr.rs b/shared/repr.rs new file mode 100644 index 0000000..8402b05 --- /dev/null +++ b/shared/repr.rs @@ -0,0 +1,124 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Details of the atom representation that need to be shared between +//! the macros crate and the run-time library, in order to guarantee +//! consistency. + +#![allow(dead_code, unused_imports)] + +use core::{mem, raw, intrinsics}; +use core::option::{Option, Some, None}; +use core::ptr::RawPtr; +use core::slice::{ImmutableSlice, AsSlice}; +use core::slice::bytes; + +// FIXME(rust-lang/rust#18153): generate these from an enum +pub const DYNAMIC_TAG: u8 = 0u8; +pub const INLINE_TAG: u8 = 1u8; // len in upper nybble +pub const STATIC_TAG: u8 = 2u8; + +pub const MAX_INLINE_LEN: uint = 7; + +// Atoms use a compact representation which fits this enum in a single u64. +// Inlining avoids actually constructing the unpacked representation in memory. +pub enum UnpackedAtom { + /// Pointer to a dynamic table entry. Must be 16-byte aligned! + Dynamic(*mut ()), + + /// Length + bytes of string. + Inline(u8, [u8, ..7]), + + /// Index in static interning table. + Static(u32), +} + +const STATIC_SHIFT_BITS: uint = 32; + +#[inline(always)] +unsafe fn inline_atom_slice(x: &u64) -> raw::Slice { + #[static_assert] + const IS_LITTLE_ENDIAN: bool = cfg!(target_endian = "little"); + + raw::Slice { + data: ((x as *const u64) as *const u8).offset(1), + len: 7, + } +} + +impl UnpackedAtom { + #[inline(always)] + pub unsafe fn pack(self) -> u64 { + match self { + Static(n) => (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS), + Dynamic(p) => { + let n = p as u64; + debug_assert!(0 == n & 0xf); + n + } + Inline(len, buf) => { + debug_assert!((len as uint) <= MAX_INLINE_LEN); + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); + { + let dest: &mut [u8] = mem::transmute(inline_atom_slice(&mut data)); + bytes::copy_memory(dest, buf.as_slice()); + } + data + } + } + } + + #[inline(always)] + pub unsafe fn from_packed(data: u64) -> UnpackedAtom { + #[static_assert] + const DYNAMIC_IS_UNTAGGED: bool = DYNAMIC_TAG == 0; + + match (data & 0xf) as u8 { + DYNAMIC_TAG => Dynamic(data as *mut ()), + STATIC_TAG => Static((data >> STATIC_SHIFT_BITS) as u32), + INLINE_TAG => { + let len = ((data & 0xf0) >> 4) as uint; + debug_assert!(len <= MAX_INLINE_LEN); + let mut buf: [u8, ..7] = [0, ..7]; + let src: &[u8] = mem::transmute(inline_atom_slice(&data)); + bytes::copy_memory(buf, src); + Inline(len as u8, buf) + }, + + // intrinsics::unreachable() in release builds? + // See rust-lang/rust#18152. + _ => fail!("impossible"), + } + } +} + +/// Used for a fast path in Clone and Drop. +#[inline(always)] +pub unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { + if (DYNAMIC_TAG as u64) == (data & 0xf) { + Some(data as *mut ()) + } else { + None + } +} + +/// For as_slice on inline atoms, we need a pointer into the original +/// string contents. +/// +/// It's undefined behavior to call this on a non-inline atom!! +#[inline(always)] +pub unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { + match UnpackedAtom::from_packed(*data) { + Inline(len, _) => { + let src: &[u8] = mem::transmute(inline_atom_slice(data)); + src.slice_to(len as uint) + } + _ => intrinsics::unreachable(), + } +} diff --git a/shared/static_atom.rs b/shared/static_atom.rs deleted file mode 100644 index 56dfe95..0000000 --- a/shared/static_atom.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! This code is compiled into both the macros crate and the run-time -//! library, in order to guarantee consistency. - -#![allow(dead_code)] - -pub const STATIC_TAG: u8 = 2; - -const STATIC_SHIFT_BITS: uint = 32; - -#[inline(always)] -pub fn add_tag(atom_id: u32) -> u64 { - (atom_id as u64 << STATIC_SHIFT_BITS) | (STATIC_TAG as u64) -} - -/// Undefined to call this on a non-static atom! -#[inline(always)] -pub fn remove_tag(atom_data: u64) -> u32 { - (atom_data >> STATIC_SHIFT_BITS) as u32 -} diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 7424164..7b5d5a9 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -39,7 +39,7 @@ macro_rules! check_type (($name:ident, $x:expr, $p:pat) => ( // NB: "cargo bench" does not run these! #[test] fn $name() { - match $x.get_type() { + match unsafe { $x.unpack() } { $p => (), _ => fail!("atom has wrong type"), } @@ -62,12 +62,12 @@ macro_rules! bench_tiny_op (($name:ident, $op:ident, $ctor_x:expr, $ctor_y:expr) )) macro_rules! bench_one ( - (x_static $x:expr, $y:expr) => (check_type!(check_type_x, $x, Static)); - (x_inline $x:expr, $y:expr) => (check_type!(check_type_x, $x, Inline)); - (x_dynamic $x:expr, $y:expr) => (check_type!(check_type_x, $x, Dynamic)); - (y_static $x:expr, $y:expr) => (check_type!(check_type_y, $y, Static)); - (y_inline $x:expr, $y:expr) => (check_type!(check_type_y, $y, Inline)); - (y_dynamic $x:expr, $y:expr) => (check_type!(check_type_y, $y, Dynamic)); + (x_static $x:expr, $y:expr) => (check_type!(check_type_x, $x, Static(..))); + (x_inline $x:expr, $y:expr) => (check_type!(check_type_x, $x, Inline(..))); + (x_dynamic $x:expr, $y:expr) => (check_type!(check_type_x, $x, Dynamic(..))); + (y_static $x:expr, $y:expr) => (check_type!(check_type_y, $y, Static(..))); + (y_inline $x:expr, $y:expr) => (check_type!(check_type_y, $y, Inline(..))); + (y_dynamic $x:expr, $y:expr) => (check_type!(check_type_y, $y, Dynamic(..))); (is_static $x:expr, $y:expr) => (bench_one!(x_static $x, $y) bench_one!(y_static $x, $y)); (is_inline $x:expr, $y:expr) => (bench_one!(x_inline $x, $y) bench_one!(y_inline $x, $y)); (is_dynamic $x:expr, $y:expr) => (bench_one!(x_dynamic $x, $y) bench_one!(y_dynamic $x, $y)); @@ -136,7 +136,8 @@ macro_rules! bench_all ( use test::{Bencher, black_box}; use std::to_string::ToString; - use atom::{Atom, Static, Inline, Dynamic}; + use atom::Atom; + use atom::repr::{Static, Inline, Dynamic}; use super::mk; diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 37b92d8..57cc073 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -16,47 +16,48 @@ use phf::PhfOrderedSet; use core::fmt; use core::mem; use core::ptr; -use core::slice; use core::slice::bytes; use core::str; use core::atomic::{AtomicInt, SeqCst}; use alloc::heap; use collections::string::String; use collections::hash::{Hash, Hasher, sip}; -use sync::Mutex; +use sync::{Mutex, MutexGuard}; use sync::one::{Once, ONCE_INIT}; -#[path="../../shared/static_atom.rs"] -mod static_atom; +use self::repr::{UnpackedAtom, Static, Inline, Dynamic}; -// Inline atoms are probably buggy on big-endian architectures. -#[allow(dead_code)] -#[static_assert] -const IS_LITTLE_ENDIAN: bool = cfg!(target_endian = "little"); - - -static mut global_string_cache_ptr: *mut Mutex = 0 as *mut _; +#[path="../../shared/repr.rs"] +pub mod repr; +// Needed for memory safety of the tagging scheme! const ENTRY_ALIGNMENT: uint = 16; // Macro-generated table for static atoms. static static_atom_set: PhfOrderedSet<&'static str> = static_atom_set!(); -// NOTE: Deriving Eq here implies that a given string must always -// be interned the same way. -#[repr(u8)] -#[deriving(Eq, PartialEq, Show)] -enum AtomType { - Dynamic = 0, - Inline = 1, - Static = static_atom::STATIC_TAG, -} - struct StringCache { hasher: sip::SipHasher, buckets: [*mut StringCacheEntry, ..4096], } +impl StringCache { + #[inline] + fn lock<'a>() -> MutexGuard<'a, StringCache> { + static mut global_string_cache_ptr: *mut Mutex = 0 as *mut _; + static mut START: Once = ONCE_INIT; + + unsafe { + START.doit(|| { + let cache = box Mutex::new(StringCache::new()); + global_string_cache_ptr = mem::transmute(cache); + }); + + (&*global_string_cache_ptr).lock() + } + } +} + struct StringCacheEntry { next_in_bucket: *mut StringCacheEntry, hash: u64, @@ -83,7 +84,7 @@ impl StringCache { } } - fn add(&mut self, string_to_add: &str) -> u64 { + fn add(&mut self, string_to_add: &str) -> *mut StringCacheEntry { let hash = self.hasher.hash(&string_to_add); let bucket_index = (hash & (self.buckets.len()-1) as u64) as uint; let mut ptr = self.buckets[bucket_index]; @@ -123,8 +124,8 @@ impl StringCache { self.buckets[bucket_index] = ptr; } - assert!(ptr != ptr::null_mut()); - ptr as u64 + debug_assert!(ptr != ptr::null_mut()); + ptr } fn remove(&mut self, key: u64) { @@ -150,7 +151,7 @@ impl StringCache { prev = current; unsafe { current = (*current).next_in_bucket }; } - assert!(current != ptr::null_mut()); + debug_assert!(current != ptr::null_mut()); unsafe { ptr::read(ptr as *const StringCacheEntry); @@ -160,6 +161,8 @@ impl StringCache { } } +// NOTE: Deriving Eq here implies that a given string must always +// be interned the same way. #[deriving(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -168,105 +171,60 @@ pub struct Atom { } impl Atom { - pub fn from_static(atom_id: u32) -> Atom { - Atom { - data: static_atom::add_tag(atom_id), - } + #[inline(always)] + unsafe fn unpack(&self) -> UnpackedAtom { + UnpackedAtom::from_packed(self.data) } pub fn from_slice(string_to_add: &str) -> Atom { - match static_atom_set.find_index_equiv(&string_to_add) { - Some(atom_id) => { - Atom::from_static(atom_id as u32) - }, + let unpacked = match static_atom_set.find_index_equiv(&string_to_add) { + Some(id) => Static(id as u32), None => { - if string_to_add.len() < 8 { - Atom::from_inline(string_to_add) + let len = string_to_add.len(); + if len <= repr::MAX_INLINE_LEN { + let mut buf: [u8, ..7] = [0, ..7]; + bytes::copy_memory(buf, string_to_add.as_bytes()); + Inline(len as u8, buf) } else { - Atom::from_dynamic(string_to_add) - } - } - } - } - - pub fn as_slice<'t>(&'t self) -> &'t str { - let (atom_type, string_len) = self.get_type_and_inline_len(); - let ptr = self as *const Atom as *const u8; - match atom_type { - Inline => { - unsafe { - let data = ptr.offset(1) as *const [u8, ..7]; - str::raw::from_utf8((*data).slice_to(string_len)) + Dynamic(StringCache::lock().add(string_to_add) as *mut ()) } - }, - Static => { - *static_atom_set.iter().idx(static_atom::remove_tag(self.data) as uint) - .expect("bad static atom") - }, - Dynamic => { - let hash_value = unsafe { &*(self.data as *const StringCacheEntry) }; - hash_value.string.as_slice() } - } - } + }; - #[inline] - fn from_inline(string: &str) -> Atom { - assert!(string.len() < 8); - let mut string_data: u64 = 0; - unsafe { slice::raw::mut_buf_as_slice(&mut string_data as *mut u64 as *mut u8, 7, - |b| bytes::copy_memory(b, string.as_bytes())) }; Atom { - data: (Inline as u64) | (string.len() as u64 << 4) | (string_data << 8), + data: unsafe { unpacked.pack() }, } } - #[inline] - fn from_dynamic(string: &str) -> Atom { - static mut START: Once = ONCE_INIT; - + pub fn as_slice<'t>(&'t self) -> &'t str { unsafe { - START.doit(|| { - let cache = box Mutex::new(StringCache::new()); - global_string_cache_ptr = mem::transmute(cache); - }); - } - - let mut string_cache = unsafe { - (&*global_string_cache_ptr).lock() - }; - let hash_value_address = string_cache.add(string); - Atom { - data: hash_value_address | Dynamic as u64 + match self.unpack() { + Inline(..) => { + let buf = repr::inline_orig_bytes(&self.data); + debug_assert!(str::is_utf8(buf)); + str::raw::from_utf8(buf) + }, + Static(idx) => *static_atom_set.iter().idx(idx as uint).expect("bad static atom"), + Dynamic(entry) => { + let entry = entry as *mut StringCacheEntry; + (*entry).string.as_slice() + } + } } } - - #[inline] - fn get_type(&self) -> AtomType { - unsafe { mem::transmute((self.data & 0xf) as u8) } - } - - #[inline] - fn get_type_and_inline_len(&self) -> (AtomType, uint) { - let atom_type = self.get_type(); - let len = match atom_type { - Static | Dynamic => 0, - Inline => ((self.data & 0xf0) >> 4) as uint - }; - (atom_type, len) - } } impl Clone for Atom { - #[inline] + #[inline(always)] fn clone(&self) -> Atom { - let atom_type = self.get_type(); - match atom_type { - Dynamic => { - let hash_value = unsafe { &mut *(self.data as *mut StringCacheEntry) }; - hash_value.ref_count.fetch_add(1, SeqCst); + unsafe { + match repr::from_packed_dynamic(self.data) { + Some(entry) => { + let entry = entry as *mut StringCacheEntry; + (*entry).ref_count.fetch_add(1, SeqCst); + }, + None => (), } - _ => {} } Atom { data: self.data @@ -279,28 +237,34 @@ impl Drop for Atom { fn drop(&mut self) { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - let mut string_cache = unsafe { - (&*global_string_cache_ptr).lock() - }; - string_cache.remove(this.data); + StringCache::lock().remove(this.data); } - match self.get_type() { - Dynamic => { - let ptr = self.data as *mut StringCacheEntry; - let value: &mut StringCacheEntry = unsafe { mem::transmute(ptr) }; - if value.ref_count.fetch_sub(1, SeqCst) == 1 { - drop_slow(self); + unsafe { + match repr::from_packed_dynamic(self.data) { + Some(entry) => { + let entry = entry as *mut StringCacheEntry; + if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { + drop_slow(self); + } } - }, - _ => {} + None => (), + } } } } impl fmt::Show for Atom { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Atom('{:s}' type={})", self.as_slice(), self.get_type()) + let ty_str = unsafe { + match self.unpack() { + Dynamic(..) => "dynamic", + Inline(..) => "inline", + Static(..) => "static", + } + }; + + write!(f, "Atom('{:s}' type={:s})", self.as_slice(), ty_str) } } @@ -331,7 +295,8 @@ mod tests { use std::fmt; use std::task::spawn; - use super::{Atom, Static, Inline, Dynamic}; + use super::Atom; + use super::repr::{Static, Inline, Dynamic}; #[test] fn test_as_slice() { @@ -354,44 +319,27 @@ mod tests { assert!(d1.as_slice() == "ZZZZZZZZZZ"); } + macro_rules! unpacks_to (($e:expr, $t:pat) => ( + match unsafe { Atom::from_slice($e).unpack() } { + $t => (), + _ => fail!("atom has wrong type"), + } + )) + #[test] fn test_types() { - let s0 = Atom::from_slice(""); - assert!(s0.get_type_and_inline_len() == (Static, 0)); - - let s1 = Atom::from_slice("id"); - assert!(s1.get_type_and_inline_len() == (Static, 0)); - - let s1 = Atom::from_slice("body"); - assert!(s1.get_type_and_inline_len() == (Static, 0)); - - // "z" is a static atom - let i0 = Atom::from_slice("c"); - assert!(i0.get_type_and_inline_len() == (Inline, 1)); - - let i1 = Atom::from_slice("zz"); - assert!(i1.get_type_and_inline_len() == (Inline, 2)); - - let i2 = Atom::from_slice("zzz"); - assert!(i2.get_type_and_inline_len() == (Inline, 3)); - - let i3 = Atom::from_slice("zzzz"); - assert!(i3.get_type_and_inline_len() == (Inline, 4)); - - let i4 = Atom::from_slice("zzzzz"); - assert!(i4.get_type_and_inline_len() == (Inline, 5)); - - let i5 = Atom::from_slice("zzzzzz"); - assert!(i5.get_type_and_inline_len() == (Inline, 6)); - - let i6 = Atom::from_slice("zzzzzzz"); - assert!(i6.get_type_and_inline_len() == (Inline, 7)); - - let d0 = Atom::from_slice("zzzzzzzz"); - assert!(d0.get_type_and_inline_len() == (Dynamic, 0)); - - let d1 = Atom::from_slice("zzzzzzzzzzzzz"); - assert!(d1.get_type_and_inline_len() == (Dynamic, 0)); + unpacks_to!("", Static(..)); + unpacks_to!("id", Static(..)); + unpacks_to!("body", Static(..)); + unpacks_to!("c", Inline(..)); // "z" is a static atom + unpacks_to!("zz", Inline(..)); + unpacks_to!("zzz", Inline(..)); + unpacks_to!("zzzz", Inline(..)); + unpacks_to!("zzzzz", Inline(..)); + unpacks_to!("zzzzzz", Inline(..)); + unpacks_to!("zzzzzzz", Inline(..)); + unpacks_to!("zzzzzzzz", Dynamic(..)); + unpacks_to!("zzzzzzzzzzzzz", Dynamic(..)); } #[test] From 40bf9d2440a06e71d23c68f184e072bfd5f78c68 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 20 Oct 2014 12:19:54 -0700 Subject: [PATCH 030/379] Use #[unsafe_no_drop_flag] so that Atom can be only 64 bits Drop flags are going away, hopefully, but in the mean time here's a simple fix. --- src/atom/mod.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 57cc073..8f545da 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -163,6 +163,7 @@ impl StringCache { // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. +#[unsafe_no_drop_flag] #[deriving(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -242,13 +243,16 @@ impl Drop for Atom { unsafe { match repr::from_packed_dynamic(self.data) { - Some(entry) => { + // We use #[unsafe_no_drop_flag] so that Atom will be only 64 + // bits. That means we need to ignore a NULL pointer here, + // which represents a value that was moved out. + Some(entry) if entry.is_not_null() => { let entry = entry as *mut StringCacheEntry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { drop_slow(self); } } - None => (), + _ => (), } } } @@ -457,9 +461,10 @@ mod tests { } #[test] - fn assert_entry_size() { - // Guard against accidental changes to the size of StringCacheEntry. + fn assert_sizes() { + // Guard against accidental changes to the sizes of things. use core::mem; + assert_eq!(8, mem::size_of::()); assert_eq!(48, mem::size_of::()); } From 63171520ef333433dfb8aebb4286688e5e48ff35 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 20 Oct 2014 13:33:03 -0700 Subject: [PATCH 031/379] Use lazy-static for the global hash table --- Cargo.toml | 7 +++++-- src/atom/mod.rs | 25 ++++++------------------- src/lib.rs | 9 ++++++++- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 00e057b..9dd2291 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,10 +4,13 @@ name = "string_cache" version = "0.0.0" authors = [ "The Servo Project Developers" ] +[dependencies.string_cache_macros] +path = "macros" + [dependencies.phf] git = "https://github.com/sfackler/rust-phf" [dependencies.phf_mac] git = "https://github.com/sfackler/rust-phf" -[dependencies.string_cache_macros] -path = "macros" +[dependencies.lazy_static] +git = "https://github.com/Kimundi/lazy-static.rs" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 8f545da..cb740da 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -20,10 +20,10 @@ use core::slice::bytes; use core::str; use core::atomic::{AtomicInt, SeqCst}; use alloc::heap; +use alloc::boxed::Box; use collections::string::String; use collections::hash::{Hash, Hasher, sip}; -use sync::{Mutex, MutexGuard}; -use sync::one::{Once, ONCE_INIT}; +use sync::Mutex; use self::repr::{UnpackedAtom, Static, Inline, Dynamic}; @@ -41,21 +41,8 @@ struct StringCache { buckets: [*mut StringCacheEntry, ..4096], } -impl StringCache { - #[inline] - fn lock<'a>() -> MutexGuard<'a, StringCache> { - static mut global_string_cache_ptr: *mut Mutex = 0 as *mut _; - static mut START: Once = ONCE_INIT; - - unsafe { - START.doit(|| { - let cache = box Mutex::new(StringCache::new()); - global_string_cache_ptr = mem::transmute(cache); - }); - - (&*global_string_cache_ptr).lock() - } - } +lazy_static! { + static ref STRING_CACHE: Mutex = Mutex::new(StringCache::new()); } struct StringCacheEntry { @@ -187,7 +174,7 @@ impl Atom { bytes::copy_memory(buf, string_to_add.as_bytes()); Inline(len as u8, buf) } else { - Dynamic(StringCache::lock().add(string_to_add) as *mut ()) + Dynamic(STRING_CACHE.lock().add(string_to_add) as *mut ()) } } }; @@ -238,7 +225,7 @@ impl Drop for Atom { fn drop(&mut self) { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - StringCache::lock().remove(this.data); + STRING_CACHE.lock().remove(this.data); } unsafe { diff --git a/src/lib.rs b/src/lib.rs index 197dea5..390cbcf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,6 +33,9 @@ extern crate std; extern crate phf_mac; extern crate phf; +#[phase(plugin)] +extern crate lazy_static; + #[phase(plugin)] extern crate string_cache_macros; @@ -55,6 +58,10 @@ mod string_cache { // For macros and deriving. #[cfg(not(test))] mod std { - pub use core::{cmp, fmt, clone, option}; + pub use core::{cmp, fmt, clone, option, mem}; pub use collections::hash; + + pub mod sync { + pub use sync::one::{Once, ONCE_INIT}; + } } From 2ce893d63b7f8e022c74b637615004a8759bc9b3 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 20 Oct 2014 13:51:33 -0700 Subject: [PATCH 032/379] Use XXHash instead of SipHash This is an approximately 8% speedup on interning a 56-byte string. --- Cargo.toml | 3 +++ src/atom/mod.rs | 7 ++++--- src/lib.rs | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9dd2291..5397918 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,6 @@ git = "https://github.com/sfackler/rust-phf" [dependencies.lazy_static] git = "https://github.com/Kimundi/lazy-static.rs" + +[dependencies.xxhash] +git = "https://github.com/Jurily/rust-xxhash" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index cb740da..47b3c65 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -12,6 +12,7 @@ use core::prelude::*; use phf::PhfOrderedSet; +use xxhash::XXHasher; use core::fmt; use core::mem; @@ -22,7 +23,7 @@ use core::atomic::{AtomicInt, SeqCst}; use alloc::heap; use alloc::boxed::Box; use collections::string::String; -use collections::hash::{Hash, Hasher, sip}; +use collections::hash::{Hash, Hasher}; use sync::Mutex; use self::repr::{UnpackedAtom, Static, Inline, Dynamic}; @@ -37,7 +38,7 @@ const ENTRY_ALIGNMENT: uint = 16; static static_atom_set: PhfOrderedSet<&'static str> = static_atom_set!(); struct StringCache { - hasher: sip::SipHasher, + hasher: XXHasher, buckets: [*mut StringCacheEntry, ..4096], } @@ -66,7 +67,7 @@ impl StringCacheEntry { impl StringCache { fn new() -> StringCache { StringCache { - hasher: sip::SipHasher::new(), + hasher: XXHasher::new(), buckets: unsafe { mem::zeroed() }, } } diff --git a/src/lib.rs b/src/lib.rs index 390cbcf..bd40364 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,6 +36,8 @@ extern crate phf; #[phase(plugin)] extern crate lazy_static; +extern crate xxhash; + #[phase(plugin)] extern crate string_cache_macros; From 82e1af05ba19b99223df47feb34436a1ca9ef268 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 21 Oct 2014 20:51:25 -0700 Subject: [PATCH 033/379] Fix warnings --- macros/src/atom/data.rs | 2 +- macros/src/atom/mod.rs | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/macros/src/atom/data.rs b/macros/src/atom/data.rs index b011709..44bb5f7 100644 --- a/macros/src/atom/data.rs +++ b/macros/src/atom/data.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -pub static atoms: &'static [&'static str] = &[ +pub static ATOMS: &'static [&'static str] = &[ // The first 64 atoms are special: we can quickly check membership // in sets of these, using a bitmask. This includes every tag that diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index a839715..8aa9ed3 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -28,7 +28,7 @@ mod repr; // Takes no arguments. pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); - let tts: Vec = data::atoms.iter().flat_map(|k| { + let tts: Vec = data::ATOMS.iter().flat_map(|k| { (quote_tokens!(&mut *cx, $k,)).into_iter() }).collect(); MacExpr::new(quote_expr!(&mut *cx, phf_ordered_set!($tts))) @@ -46,7 +46,7 @@ fn atom_tok_to_str(t: &TokenTree) -> Option { lazy_static! { static ref STATIC_ATOM_MAP: HashMap<&'static str, uint> = { let mut m = HashMap::new(); - for (i, x) in data::atoms.iter().enumerate() { + for (i, x) in data::ATOMS.iter().enumerate() { m.insert(*x, i); } m @@ -69,7 +69,7 @@ impl MacResult for AtomResult { } } -fn make_atom_result(cx: &mut ExtCtxt, sp: Span, name: &str) -> Option { +fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { let i = match STATIC_ATOM_MAP.find_equiv(&name) { Some(i) => i, None => return None, @@ -91,14 +91,14 @@ pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box expect!(cx, sp, atom_tok_to_str(t), usage), _ => bail!(cx, sp, usage), }; - box expect!(cx, sp, make_atom_result(cx, sp, name.get()), + box expect!(cx, sp, make_atom_result(cx, name.get()), format!("Unknown static atom {:s}", name.get()).as_slice()) } // Translate `ns!(HTML)` into `Namespace { atom: atom!("http://www.w3.org/1999/xhtml") }`. // The argument is ASCII-case-insensitive. pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - static all_ns: &'static [(&'static str, &'static str)] = [ + static ALL_NS: &'static [(&'static str, &'static str)] = [ ("", ""), ("html", "http://www.w3.org/1999/xhtml"), ("xml", "http://www.w3.org/XML/1998/namespace"), @@ -109,7 +109,7 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box String { - let ns_names: Vec<&'static str> = all_ns.slice_from(1).iter() + let ns_names: Vec<&'static str> = ALL_NS.slice_from(1).iter() .map(|&(x, _)| x).collect(); format!("Usage: ns!(HTML), case-insensitive. \ Known namespaces: {:s}", @@ -121,12 +121,12 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box None, }, usage().as_slice()); - let &(short, url) = expect!(cx, sp, - all_ns.iter().find(|&&(short, _)| short.eq_ignore_ascii_case(name.get())), + let &(_, url) = expect!(cx, sp, + ALL_NS.iter().find(|&&(short, _)| short.eq_ignore_ascii_case(name.get())), usage().as_slice()); // All of the URLs should be in the static atom table. - let AtomResult { expr, pat } = expect!(cx, sp, make_atom_result(cx, sp, url), + let AtomResult { expr, pat } = expect!(cx, sp, make_atom_result(cx, url), format!("internal plugin error: can't find namespace url {:s}", url).as_slice()); box AtomResult { From e4f39097a2428eac8922daabca6530218b8bb704 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 21 Oct 2014 17:54:16 -0700 Subject: [PATCH 034/379] Add optional event logging We can use this with Servo to generate traces from real websites for optimization purposes. --- Cargo.toml | 3 +++ src/atom/mod.rs | 15 ++++++++++++--- src/event.rs | 36 ++++++++++++++++++++++++++++++++++++ src/lib.rs | 8 +++++++- 4 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 src/event.rs diff --git a/Cargo.toml b/Cargo.toml index 5397918..577945a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,9 @@ name = "string_cache" version = "0.0.0" authors = [ "The Servo Project Developers" ] +[features] +log-events = [] + [dependencies.string_cache_macros] path = "macros" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 47b3c65..0863d81 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -28,6 +28,12 @@ use sync::Mutex; use self::repr::{UnpackedAtom, Static, Inline, Dynamic}; +#[cfg(feature = "log-events")] +use event; + +#[cfg(not(feature = "log-events"))] +macro_rules! log (($e:expr) => (())) + #[path="../../shared/repr.rs"] pub mod repr; @@ -110,6 +116,7 @@ impl StringCache { StringCacheEntry::new(self.buckets[bucket_index], hash, string_to_add)); } self.buckets[bucket_index] = ptr; + log!(event::Insert(ptr as u64, String::from_str(string_to_add))); } debug_assert!(ptr != ptr::null_mut()); @@ -146,6 +153,8 @@ impl StringCache { heap::deallocate(ptr as *mut u8, mem::size_of::(), ENTRY_ALIGNMENT); } + + log!(event::Remove(key)); } } @@ -180,9 +189,9 @@ impl Atom { } }; - Atom { - data: unsafe { unpacked.pack() }, - } + let data = unsafe { unpacked.pack() }; + log!(event::Intern(data)) + Atom { data: data } } pub fn as_slice<'t>(&'t self) -> &'t str { diff --git a/src/event.rs b/src/event.rs new file mode 100644 index 0000000..87f03df --- /dev/null +++ b/src/event.rs @@ -0,0 +1,36 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![macro_escape] + +use core::prelude::*; + +use alloc::boxed::Box; +use collections::MutableSeq; +use collections::vec::Vec; +use collections::string::String; +use sync::Mutex; + +#[deriving(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Show, Encodable)] +pub enum Event { + Intern(u64), + Insert(u64, String), + Remove(u64), +} + +lazy_static! { + pub static ref LOG: Mutex> + = Mutex::new(Vec::with_capacity(50_000)); +} + +pub fn log(e: Event) { + LOG.lock().push(e); +} + +macro_rules! log (($e:expr) => (::event::log($e))) diff --git a/src/lib.rs b/src/lib.rs index bd40364..724283f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,9 +41,15 @@ extern crate xxhash; #[phase(plugin)] extern crate string_cache_macros; +#[cfg(feature = "log-events")] +extern crate serialize; + pub use atom::Atom; pub use namespace::{Namespace, QualName}; +#[cfg(feature = "log-events")] +pub mod event; + pub mod atom; pub mod namespace; @@ -60,7 +66,7 @@ mod string_cache { // For macros and deriving. #[cfg(not(test))] mod std { - pub use core::{cmp, fmt, clone, option, mem}; + pub use core::{cmp, fmt, clone, option, mem, result}; pub use collections::hash; pub mod sync { From 9dceba69a7e5d540e6e51fa8cfc3635e45ff96d4 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 21 Oct 2014 19:53:30 -0700 Subject: [PATCH 035/379] Add an example for event logging --- Cargo.toml | 3 +++ examples/event-log/Cargo.toml | 9 +++++++++ examples/event-log/README.md | 4 ++++ examples/event-log/src/main.rs | 31 +++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+) create mode 100644 examples/event-log/Cargo.toml create mode 100644 examples/event-log/README.md create mode 100644 examples/event-log/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index 577945a..3de6de2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,9 @@ version = "0.0.0" authors = [ "The Servo Project Developers" ] [features] + +# Enable event logging for generating benchmark traces. +# See examples/event-log. log-events = [] [dependencies.string_cache_macros] diff --git a/examples/event-log/Cargo.toml b/examples/event-log/Cargo.toml new file mode 100644 index 0000000..2a28bf9 --- /dev/null +++ b/examples/event-log/Cargo.toml @@ -0,0 +1,9 @@ +[package] + +name = "string-cache-event-log-example" +version = "0.0.0" +authors = [ "The Servo Project Developers" ] + +[dependencies.string_cache] +git = "https://github.com/servo/string-cache" +features = ["log-events"] diff --git a/examples/event-log/README.md b/examples/event-log/README.md new file mode 100644 index 0000000..b2deb39 --- /dev/null +++ b/examples/event-log/README.md @@ -0,0 +1,4 @@ +string-cache can record logs of what it's doing, which can be useful for +guiding future changes to the library. This project demonstrates how to build +string-cache with logging enabled (see `Cargo.toml`), and how to access the log +at runtime. diff --git a/examples/event-log/src/main.rs b/examples/event-log/src/main.rs new file mode 100644 index 0000000..6efff10 --- /dev/null +++ b/examples/event-log/src/main.rs @@ -0,0 +1,31 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate string_cache; + +use string_cache::Atom; +use string_cache::event; + +use std::io; + +fn main() { + println!("Reading stdin to end of file"); + let stdin = io::stdin().read_to_string().unwrap(); + let mut atoms = vec![]; + for word in stdin.as_slice().split(|c: char| c.is_whitespace()) { + atoms.push(Atom::from_slice(word)); + } + + let log = event::LOG.lock(); + + println!("Created {:u} atoms, logged {:u} events:", atoms.len(), log.len()); + for e in log.iter() { + println!("{}", e); + } +} From f8e80e0f44de36fe2218aae497f2b181b9347605 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Wed, 22 Oct 2014 13:21:20 -0700 Subject: [PATCH 036/379] Add a little info to the README --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cba6fc1..36e27e2 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ -string-cache -============ +# string-cache + +A string interning library for Rust, developed as part of the [Servo](https://github.com/servo/servo) project. From 533230bb83f9712de5c15ab3c41ccb763d04af01 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Fri, 24 Oct 2014 15:08:58 -0700 Subject: [PATCH 037/379] Get rid of an unneeded unsafe in macros code --- macros/src/atom/mod.rs | 3 +-- shared/repr.rs | 6 +++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 8aa9ed3..2fd57c7 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -75,8 +75,7 @@ fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { None => return None, }; - // In the case of static atoms, the call to pack() doesn't use any unsafe code. - let data = unsafe { repr::Static(*i as u32).pack() }; + let data = repr::pack_static(*i as u32); Some(AtomResult { expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), diff --git a/shared/repr.rs b/shared/repr.rs index 8402b05..63483c1 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -52,11 +52,15 @@ unsafe fn inline_atom_slice(x: &u64) -> raw::Slice { } } +pub fn pack_static(n: u32) -> u64 { + (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) +} + impl UnpackedAtom { #[inline(always)] pub unsafe fn pack(self) -> u64 { match self { - Static(n) => (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS), + Static(n) => pack_static(n), Dynamic(p) => { let n = p as u64; debug_assert!(0 == n & 0xf); From 3d398f08559ef9881fa113ee5f7d9e1c46ec72ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Andrasek?= Date: Wed, 29 Oct 2014 19:01:54 +0100 Subject: [PATCH 038/379] TTTok => TtToken --- macros/src/atom/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 2fd57c7..40ac8eb 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -9,7 +9,7 @@ use syntax::ptr::P; use syntax::codemap::Span; -use syntax::ast::{TokenTree, TTTok}; +use syntax::ast::{TokenTree, TtToken}; use syntax::ast; use syntax::ext::base::{ExtCtxt, MacResult, MacExpr}; use syntax::parse::token::{get_ident, InternedString, LIT_STR, IDENT}; @@ -36,8 +36,8 @@ pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> B fn atom_tok_to_str(t: &TokenTree) -> Option { Some(get_ident(match *t { - TTTok(_, IDENT(s, _)) => s, - TTTok(_, LIT_STR(s)) => s.ident(), + TtToken(_, IDENT(s, _)) => s, + TtToken(_, LIT_STR(s)) => s.ident(), _ => return None, })) } From 6d48be9b97a0194f00c09cd9281760d819ed5a98 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Wed, 29 Oct 2014 16:42:33 -0700 Subject: [PATCH 039/379] Upgrade to rustc 0.13.0-dev (77f44d4a7 2014-10-29 20:16:57 +0000) --- macros/src/atom/mod.rs | 6 +++--- shared/repr.rs | 2 +- src/atom/bench.rs | 2 +- src/atom/mod.rs | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 40ac8eb..178ddda 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -12,7 +12,7 @@ use syntax::codemap::Span; use syntax::ast::{TokenTree, TtToken}; use syntax::ast; use syntax::ext::base::{ExtCtxt, MacResult, MacExpr}; -use syntax::parse::token::{get_ident, InternedString, LIT_STR, IDENT}; +use syntax::parse::token::{get_ident, InternedString, LitStr, Ident}; use std::iter::Chain; use std::slice::{Items, Found, NotFound}; @@ -36,8 +36,8 @@ pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> B fn atom_tok_to_str(t: &TokenTree) -> Option { Some(get_ident(match *t { - TtToken(_, IDENT(s, _)) => s, - TtToken(_, LIT_STR(s)) => s.ident(), + TtToken(_, Ident(s, _)) => s, + TtToken(_, LitStr(s)) => s.ident(), _ => return None, })) } diff --git a/shared/repr.rs b/shared/repr.rs index 63483c1..764e342 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -97,7 +97,7 @@ impl UnpackedAtom { // intrinsics::unreachable() in release builds? // See rust-lang/rust#18152. - _ => fail!("impossible"), + _ => panic!("impossible"), } } } diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 7b5d5a9..b86646f 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -41,7 +41,7 @@ macro_rules! check_type (($name:ident, $x:expr, $p:pat) => ( fn $name() { match unsafe { $x.unpack() } { $p => (), - _ => fail!("atom has wrong type"), + _ => panic!("atom has wrong type"), } } )) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 0863d81..194408c 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -323,7 +323,7 @@ mod tests { macro_rules! unpacks_to (($e:expr, $t:pat) => ( match unsafe { Atom::from_slice($e).unpack() } { $t => (), - _ => fail!("atom has wrong type"), + _ => panic!("atom has wrong type"), } )) @@ -422,7 +422,7 @@ mod tests { let x = $x; let y = $y; if x != y { - fail!("assertion failed: {} != {}", + panic!("assertion failed: {} != {}", format_args!(fmt::format, $fmt, x).as_slice(), format_args!(fmt::format, $fmt, y).as_slice()); } From 8851e5ec46ce21c3816e432dd22e29da3dc9e43a Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Wed, 5 Nov 2014 11:12:59 -0800 Subject: [PATCH 040/379] rustup --- macros/src/atom/mod.rs | 2 +- src/atom/mod.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 178ddda..5a21d26 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -70,7 +70,7 @@ impl MacResult for AtomResult { } fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { - let i = match STATIC_ATOM_MAP.find_equiv(&name) { + let i = match STATIC_ATOM_MAP.find_equiv(name) { Some(i) => i, None => return None, }; diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 194408c..2dfaf5f 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -7,11 +7,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![allow(non_uppercase_statics)] +#![allow(non_upper_case_globals)] use core::prelude::*; -use phf::PhfOrderedSet; +use phf::OrderedSet; use xxhash::XXHasher; use core::fmt; @@ -41,7 +41,7 @@ pub mod repr; const ENTRY_ALIGNMENT: uint = 16; // Macro-generated table for static atoms. -static static_atom_set: PhfOrderedSet<&'static str> = static_atom_set!(); +static static_atom_set: OrderedSet<&'static str> = static_atom_set!(); struct StringCache { hasher: XXHasher, @@ -175,7 +175,7 @@ impl Atom { } pub fn from_slice(string_to_add: &str) -> Atom { - let unpacked = match static_atom_set.find_index_equiv(&string_to_add) { + let unpacked = match static_atom_set.find_index_equiv(string_to_add) { Some(id) => Static(id as u32), None => { let len = string_to_add.len(); From 110cffdd4e592984998f83dbe260e6af88673953 Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Fri, 7 Nov 2014 12:12:56 -0800 Subject: [PATCH 041/379] rustup --- shared/repr.rs | 2 +- src/atom/bench.rs | 2 +- src/atom/mod.rs | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/shared/repr.rs b/shared/repr.rs index 764e342..ef12056 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -16,7 +16,7 @@ use core::{mem, raw, intrinsics}; use core::option::{Option, Some, None}; use core::ptr::RawPtr; -use core::slice::{ImmutableSlice, AsSlice}; +use core::slice::{SlicePrelude, AsSlice}; use core::slice::bytes; // FIXME(rust-lang/rust#18153): generate these from an enum diff --git a/src/atom/bench.rs b/src/atom/bench.rs index b86646f..d54beb3 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -189,7 +189,7 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( #[bench] fn $name(b: &mut Bencher) { use std::{str, rand}; - use std::slice::MutableSlice; + use std::slice::SlicePrelude; use std::rand::Rng; let mut gen = rand::weak_rng(); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 2dfaf5f..a026254 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -274,7 +274,7 @@ impl PartialOrd for Atom { if self.data == other.data { return Some(Equal); } - self.as_slice().partial_cmp(&other.as_slice()) + self.as_slice().partial_cmp(other.as_slice()) } } @@ -283,7 +283,7 @@ impl Ord for Atom { if self.data == other.data { return Equal; } - self.as_slice().cmp(&other.as_slice()) + self.as_slice().cmp(other.as_slice()) } } @@ -375,8 +375,8 @@ mod tests { fn ord() { fn check(x: &str, y: &str) { assert_eq!(x < y, Atom::from_slice(x) < Atom::from_slice(y)); - assert_eq!(x.cmp(&y), Atom::from_slice(x).cmp(&Atom::from_slice(y))); - assert_eq!(x.partial_cmp(&y), Atom::from_slice(x).partial_cmp(&Atom::from_slice(y))); + assert_eq!(x.cmp(y), Atom::from_slice(x).cmp(&Atom::from_slice(y))); + assert_eq!(x.partial_cmp(y), Atom::from_slice(x).partial_cmp(&Atom::from_slice(y))); } check("a", "body"); From 91717ae69a52fc8d1c7404381ce3b7f165dcc477 Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Tue, 18 Nov 2014 11:34:12 -0800 Subject: [PATCH 042/379] rustup --- shared/repr.rs | 2 ++ src/atom/mod.rs | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/shared/repr.rs b/shared/repr.rs index ef12056..e4eadaf 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -19,6 +19,8 @@ use core::ptr::RawPtr; use core::slice::{SlicePrelude, AsSlice}; use core::slice::bytes; +pub use self::UnpackedAtom::{Dynamic, Inline, Static}; + // FIXME(rust-lang/rust#18153): generate these from an enum pub const DYNAMIC_TAG: u8 = 0u8; pub const INLINE_TAG: u8 = 1u8; // len in upper nybble diff --git a/src/atom/mod.rs b/src/atom/mod.rs index a026254..c3cc61f 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -175,7 +175,7 @@ impl Atom { } pub fn from_slice(string_to_add: &str) -> Atom { - let unpacked = match static_atom_set.find_index_equiv(string_to_add) { + let unpacked = match static_atom_set.get_index_equiv(string_to_add) { Some(id) => Static(id as u32), None => { let len = string_to_add.len(); From 2c8ad1f2c06fd6fdb362ed9a9393b18c8cde2929 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 18 Nov 2014 15:05:40 -0800 Subject: [PATCH 043/379] Upgrade to rustc 0.13.0-dev (c8d6e3b2c 2014-11-18 19:11:43 +0000) --- macros/src/atom/mod.rs | 4 ++-- shared/repr.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 5a21d26..698a688 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -70,7 +70,7 @@ impl MacResult for AtomResult { } fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { - let i = match STATIC_ATOM_MAP.find_equiv(name) { + let i = match STATIC_ATOM_MAP.get(name) { Some(i) => i, None => return None, }; @@ -97,7 +97,7 @@ pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box Box { - static ALL_NS: &'static [(&'static str, &'static str)] = [ + static ALL_NS: &'static [(&'static str, &'static str)] = &[ ("", ""), ("html", "http://www.w3.org/1999/xhtml"), ("xml", "http://www.w3.org/XML/1998/namespace"), diff --git a/shared/repr.rs b/shared/repr.rs index e4eadaf..76ec070 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -93,7 +93,7 @@ impl UnpackedAtom { debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8, ..7] = [0, ..7]; let src: &[u8] = mem::transmute(inline_atom_slice(&data)); - bytes::copy_memory(buf, src); + bytes::copy_memory(buf.as_mut_slice(), src); Inline(len as u8, buf) }, From e34c2095c7dd2279c218b39b59715700c3711e91 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 18 Nov 2014 16:40:53 -0800 Subject: [PATCH 044/379] Complete Rust upgrade --- src/atom/bench.rs | 8 ++++---- src/atom/mod.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/atom/bench.rs b/src/atom/bench.rs index d54beb3..3ea3ec8 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -134,7 +134,7 @@ macro_rules! bench_all ( use core::prelude::*; use collections::vec::Vec; use test::{Bencher, black_box}; - use std::to_string::ToString; + use std::string::ToString; use atom::Atom; use atom::repr::{Static, Inline, Dynamic}; @@ -189,7 +189,7 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( #[bench] fn $name(b: &mut Bencher) { use std::{str, rand}; - use std::slice::SlicePrelude; + use std::slice::{SlicePrelude, AsSlice}; use std::rand::Rng; let mut gen = rand::weak_rng(); @@ -201,12 +201,12 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( // as about 3-12% at one point. let mut buf: [u8, ..$len] = [0, ..$len]; - gen.fill_bytes(buf); + gen.fill_bytes(buf.as_mut_slice()); for n in buf.iter_mut() { // shift into printable ASCII *n = (*n % 0x40) + 0x20; } - let s = unsafe { str::raw::from_utf8(buf) }; + let s = unsafe { str::raw::from_utf8(buf.as_slice()) }; black_box(Atom::from_slice(s)); }); } diff --git a/src/atom/mod.rs b/src/atom/mod.rs index c3cc61f..3f81438 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -181,7 +181,7 @@ impl Atom { let len = string_to_add.len(); if len <= repr::MAX_INLINE_LEN { let mut buf: [u8, ..7] = [0, ..7]; - bytes::copy_memory(buf, string_to_add.as_bytes()); + bytes::copy_memory(buf.as_mut_slice(), string_to_add.as_bytes()); Inline(len as u8, buf) } else { Dynamic(STRING_CACHE.lock().add(string_to_add) as *mut ()) From fe7894addaab5f884018747f6cff60537dfafd28 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Wed, 19 Nov 2014 17:11:29 +0100 Subject: [PATCH 045/379] Remove redundant build files I think these were leftovers from before Cargo. `cargo build` works fine without them, and `make all` and `make check` fail currently. --- .gitignore | 1 - Makefile.in | 35 ----------------------------------- configure | 4 ---- 3 files changed, 40 deletions(-) delete mode 100644 Makefile.in delete mode 100755 configure diff --git a/.gitignore b/.gitignore index fafa631..d67d766 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ /doc -/Makefile /Cargo.lock /target diff --git a/Makefile.in b/Makefile.in deleted file mode 100644 index 033472a..0000000 --- a/Makefile.in +++ /dev/null @@ -1,35 +0,0 @@ -VPATH=%VPATH% - -RUSTC ?= rustc -RUSTFLAGS += -L ../../phf/rust-phf -EXT_DEPS ?= -RUSTDOC ?= rustdoc -RUSTDOC_FLAGS ?= -RUSTDOC_TARGET ?= doc - -RUST_SRC=$(shell find $(VPATH)/src $(VPATH)/macros $(VPATH)/shared -type f -name '*.rs') - -.PHONY: all -all: libstring-cache.dummy - -libstring-cache.dummy: $(RUST_SRC) $(EXT_DEPS) - $(RUSTC) $(RUSTFLAGS) $(VPATH)/macros/src/lib.rs --out-dir . - $(RUSTC) $(RUSTFLAGS) -L . $(VPATH)/src/lib.rs --out-dir . - touch $@ - -string-cache-test: $(RUST_SRC) - $(RUSTC) $(RUSTFLAGS) -L . $< -o $@ --test - -.PHONY: check -check: string-cache-test - ./string-cache-test $(TEST) - -.PHONY: doc -doc: $(RUSTDOC_TARGET)/string_cache/index.html - -$(RUSTDOC_TARGET)/string_cache/index.html: $(RUST_SRC) $(EXT_DEPS) - $(RUSTDOC) $(RUSTDOC_FLAGS) $< -o $(RUSTDOC_TARGET) - -.PHONY: clean -clean: - rm -f *.o *.a *.so *.dylib *.rlib *.dll *.dummy *-test diff --git a/configure b/configure deleted file mode 100755 index 62a0f4c..0000000 --- a/configure +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -SRCDIR="$(cd $(dirname $0) && pwd)" -sed "s#%VPATH%#${SRCDIR}#" ${SRCDIR}/Makefile.in > Makefile From 75521e65aea222297303039bedf4400a06ec1dbb Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Thu, 20 Nov 2014 16:06:10 -0800 Subject: [PATCH 046/379] rustup (and rust-phf-up) --- macros/src/atom/mod.rs | 10 +++++----- src/atom/mod.rs | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 698a688..618daa6 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -12,7 +12,7 @@ use syntax::codemap::Span; use syntax::ast::{TokenTree, TtToken}; use syntax::ast; use syntax::ext::base::{ExtCtxt, MacResult, MacExpr}; -use syntax::parse::token::{get_ident, InternedString, LitStr, Ident}; +use syntax::parse::token::{get_ident, InternedString, Ident, Literal, Lit}; use std::iter::Chain; use std::slice::{Items, Found, NotFound}; @@ -37,7 +37,7 @@ pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> B fn atom_tok_to_str(t: &TokenTree) -> Option { Some(get_ident(match *t { TtToken(_, Ident(s, _)) => s, - TtToken(_, LitStr(s)) => s.ident(), + TtToken(_, Literal(Lit::Str_(s), _)) => s.ident(), _ => return None, })) } @@ -91,7 +91,7 @@ pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box bail!(cx, sp, usage), }; box expect!(cx, sp, make_atom_result(cx, name.get()), - format!("Unknown static atom {:s}", name.get()).as_slice()) + format!("Unknown static atom {}", name.get()).as_slice()) } // Translate `ns!(HTML)` into `Namespace { atom: atom!("http://www.w3.org/1999/xhtml") }`. @@ -111,7 +111,7 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box = ALL_NS.slice_from(1).iter() .map(|&(x, _)| x).collect(); format!("Usage: ns!(HTML), case-insensitive. \ - Known namespaces: {:s}", + Known namespaces: {}", ns_names.connect(" ")) } @@ -126,7 +126,7 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box Atom { - let unpacked = match static_atom_set.get_index_equiv(string_to_add) { + let unpacked = match static_atom_set.get_index(string_to_add) { Some(id) => Static(id as u32), None => { let len = string_to_add.len(); @@ -265,7 +265,7 @@ impl fmt::Show for Atom { } }; - write!(f, "Atom('{:s}' type={:s})", self.as_slice(), ty_str) + write!(f, "Atom('{}' type={})", self.as_slice(), ty_str) } } From e0ff12d3d4ab246157f31ed084f59ff556601c13 Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Sat, 22 Nov 2014 16:19:53 -0800 Subject: [PATCH 047/379] move to crates.io --- Cargo.toml | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3de6de2..a9cfa56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,16 +10,13 @@ authors = [ "The Servo Project Developers" ] # See examples/event-log. log-events = [] +[dependencies] +phf = "0" +phf_mac = "0" +xxhash = "0" + [dependencies.string_cache_macros] path = "macros" -[dependencies.phf] -git = "https://github.com/sfackler/rust-phf" -[dependencies.phf_mac] -git = "https://github.com/sfackler/rust-phf" - [dependencies.lazy_static] git = "https://github.com/Kimundi/lazy-static.rs" - -[dependencies.xxhash] -git = "https://github.com/Jurily/rust-xxhash" From be61096d6481ce02f2b09ab01edc9ccd3d867d18 Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Mon, 24 Nov 2014 11:00:52 -0800 Subject: [PATCH 048/379] rustup --- src/atom/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 80f2835..850faf2 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -200,7 +200,7 @@ impl Atom { Inline(..) => { let buf = repr::inline_orig_bytes(&self.data); debug_assert!(str::is_utf8(buf)); - str::raw::from_utf8(buf) + str::raw::from_utf8_unchecked(buf) }, Static(idx) => *static_atom_set.iter().idx(idx as uint).expect("bad static atom"), Dynamic(entry) => { From ec45b73217dae1590f9af6eb42864e185a956600 Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Mon, 24 Nov 2014 11:02:26 -0800 Subject: [PATCH 049/379] rustup --- src/atom/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 850faf2..3f43e17 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -200,7 +200,7 @@ impl Atom { Inline(..) => { let buf = repr::inline_orig_bytes(&self.data); debug_assert!(str::is_utf8(buf)); - str::raw::from_utf8_unchecked(buf) + str::from_utf8_unchecked(buf) }, Static(idx) => *static_atom_set.iter().idx(idx as uint).expect("bad static atom"), Dynamic(entry) => { From 1ed7e97f7f3c795b6bb74e9ca331990dbe43ca24 Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Tue, 25 Nov 2014 09:46:43 -0800 Subject: [PATCH 050/379] rustup --- src/lib.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 724283f..b637e32 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,9 +23,6 @@ extern crate sync; #[cfg(test)] extern crate test; -#[cfg(test)] -extern crate native; - #[cfg(test)] extern crate std; From bf93b93cee570ccb0bd40e3c7f6c4f7a77b94631 Mon Sep 17 00:00:00 2001 From: Michael Layzell Date: Sat, 6 Dec 2014 22:47:03 -0500 Subject: [PATCH 051/379] Switch to using std::sync instead of libsync libsync has been removed from rust, which means that for sync primitives, std::sync is required. This switches string-cache to using std::sync. Unfortunately, this means that string-cache now depends on both std and core. --- macros/src/atom/mod.rs | 1 - src/atom/mod.rs | 2 +- src/lib.rs | 13 ------------- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 618daa6..a6cba35 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -15,7 +15,6 @@ use syntax::ext::base::{ExtCtxt, MacResult, MacExpr}; use syntax::parse::token::{get_ident, InternedString, Ident, Literal, Lit}; use std::iter::Chain; -use std::slice::{Items, Found, NotFound}; use std::collections::HashMap; use std::ascii::AsciiExt; diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 3f43e17..81f1693 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -24,7 +24,7 @@ use alloc::heap; use alloc::boxed::Box; use collections::string::String; use collections::hash::{Hash, Hasher}; -use sync::Mutex; +use std::sync::Mutex; use self::repr::{UnpackedAtom, Static, Inline, Dynamic}; diff --git a/src/lib.rs b/src/lib.rs index b637e32..45008b4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,12 +18,10 @@ extern crate core; extern crate alloc; extern crate collections; -extern crate sync; #[cfg(test)] extern crate test; -#[cfg(test)] extern crate std; #[phase(plugin)] @@ -59,14 +57,3 @@ mod string_cache { pub use atom; pub use namespace; } - -// For macros and deriving. -#[cfg(not(test))] -mod std { - pub use core::{cmp, fmt, clone, option, mem, result}; - pub use collections::hash; - - pub mod sync { - pub use sync::one::{Once, ONCE_INIT}; - } -} From 07c2c728e12c321c33a07f74c924be3127b479e9 Mon Sep 17 00:00:00 2001 From: Akos Kiss Date: Tue, 6 Jan 2015 18:05:27 +0000 Subject: [PATCH 052/379] Update to build with latest rustc nightly * Macro syntax changed (mandatory closing semicolon). * Array sytanx changed (`[ty, ..n]` replaced by `[ty; n]`). * Enums became namespaced. * `proc()` syntax got replaced by `move ||`. * `#[deriving(x)]` became deprected and got replaced by `#[derive(x)]`. * Some APIs changed (around pointers, slices, vectors, hashes, locks, strings, threads). --- macros/src/atom/mod.rs | 2 +- macros/src/lib.rs | 6 ++-- shared/repr.rs | 10 +++--- src/atom/bench.rs | 81 +++++++++++++++++++++--------------------- src/atom/mod.rs | 40 +++++++++++---------- src/event.rs | 2 +- src/lib.rs | 2 +- src/namespace.rs | 4 +-- 8 files changed, 76 insertions(+), 71 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index a6cba35..e697354 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -139,4 +139,4 @@ macro_rules! qualname (($ns:tt, $local:tt) => ( ns: ns!($ns), local: atom!($local), } -)) +)); diff --git a/macros/src/lib.rs b/macros/src/lib.rs index b2ae589..be15804 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -25,18 +25,18 @@ use rustc::plugin::Registry; macro_rules! bail ( ($cx:expr, $sp:expr, $msg:expr) => ({ $cx.span_err($sp, $msg); return ::syntax::ext::base::DummyResult::any($sp); -})) +})); macro_rules! bail_if ( ($e:expr, $cx:expr, $sp:expr, $msg:expr) => ( if $e { bail!($cx, $sp, $msg) } -)) +)); macro_rules! expect ( ($cx:expr, $sp:expr, $e:expr, $msg:expr) => ( match $e { Some(x) => x, None => bail!($cx, $sp, $msg), } -)) +)); mod atom; diff --git a/shared/repr.rs b/shared/repr.rs index 76ec070..ef3ecf7 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -14,9 +14,9 @@ #![allow(dead_code, unused_imports)] use core::{mem, raw, intrinsics}; -use core::option::{Option, Some, None}; -use core::ptr::RawPtr; -use core::slice::{SlicePrelude, AsSlice}; +use core::option::Option::{self, Some, None}; +use core::ptr::PtrExt; +use core::slice::{AsSlice, SliceExt}; use core::slice::bytes; pub use self::UnpackedAtom::{Dynamic, Inline, Static}; @@ -35,7 +35,7 @@ pub enum UnpackedAtom { Dynamic(*mut ()), /// Length + bytes of string. - Inline(u8, [u8, ..7]), + Inline(u8, [u8; 7]), /// Index in static interning table. Static(u32), @@ -91,7 +91,7 @@ impl UnpackedAtom { INLINE_TAG => { let len = ((data & 0xf0) >> 4) as uint; debug_assert!(len <= MAX_INLINE_LEN); - let mut buf: [u8, ..7] = [0, ..7]; + let mut buf: [u8; 7] = [0; 7]; let src: &[u8] = mem::transmute(inline_atom_slice(&data)); bytes::copy_memory(buf.as_mut_slice(), src); Inline(len as u8, buf) diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 3ea3ec8..4559e5a 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -44,14 +44,14 @@ macro_rules! check_type (($name:ident, $x:expr, $p:pat) => ( _ => panic!("atom has wrong type"), } } -)) +)); macro_rules! bench_tiny_op (($name:ident, $op:ident, $ctor_x:expr, $ctor_y:expr) => ( #[bench] fn $name(b: &mut Bencher) { const n: uint = 1000; - let xs = Vec::from_elem(n, $ctor_x); - let ys = Vec::from_elem(n, $ctor_y); + let xs: Vec<_> = repeat($ctor_x).take(n).collect(); + let ys: Vec<_> = repeat($ctor_y).take(n).collect(); b.iter(|| { for (x, y) in xs.iter().zip(ys.iter()) { @@ -59,22 +59,22 @@ macro_rules! bench_tiny_op (($name:ident, $op:ident, $ctor_x:expr, $ctor_y:expr) } }); } -)) +)); macro_rules! bench_one ( - (x_static $x:expr, $y:expr) => (check_type!(check_type_x, $x, Static(..))); - (x_inline $x:expr, $y:expr) => (check_type!(check_type_x, $x, Inline(..))); - (x_dynamic $x:expr, $y:expr) => (check_type!(check_type_x, $x, Dynamic(..))); - (y_static $x:expr, $y:expr) => (check_type!(check_type_y, $y, Static(..))); - (y_inline $x:expr, $y:expr) => (check_type!(check_type_y, $y, Inline(..))); - (y_dynamic $x:expr, $y:expr) => (check_type!(check_type_y, $y, Dynamic(..))); - (is_static $x:expr, $y:expr) => (bench_one!(x_static $x, $y) bench_one!(y_static $x, $y)); - (is_inline $x:expr, $y:expr) => (bench_one!(x_inline $x, $y) bench_one!(y_inline $x, $y)); - (is_dynamic $x:expr, $y:expr) => (bench_one!(x_dynamic $x, $y) bench_one!(y_dynamic $x, $y)); - - (eq $x:expr, $_y:expr) => (bench_tiny_op!(eq_x_1000, eq, $x, $x)); - (ne $x:expr, $y:expr) => (bench_tiny_op!(ne_x_1000, ne, $x, $y)); - (lt $x:expr, $y:expr) => (bench_tiny_op!(lt_x_1000, lt, $x, $y)); + (x_static $x:expr, $y:expr) => (check_type!(check_type_x, $x, Static(..));); + (x_inline $x:expr, $y:expr) => (check_type!(check_type_x, $x, Inline(..));); + (x_dynamic $x:expr, $y:expr) => (check_type!(check_type_x, $x, Dynamic(..));); + (y_static $x:expr, $y:expr) => (check_type!(check_type_y, $y, Static(..));); + (y_inline $x:expr, $y:expr) => (check_type!(check_type_y, $y, Inline(..));); + (y_dynamic $x:expr, $y:expr) => (check_type!(check_type_y, $y, Dynamic(..));); + (is_static $x:expr, $y:expr) => (bench_one!(x_static $x, $y); bench_one!(y_static $x, $y);); + (is_inline $x:expr, $y:expr) => (bench_one!(x_inline $x, $y); bench_one!(y_inline $x, $y);); + (is_dynamic $x:expr, $y:expr) => (bench_one!(x_dynamic $x, $y); bench_one!(y_dynamic $x, $y);); + + (eq $x:expr, $_y:expr) => (bench_tiny_op!(eq_x_1000, eq, $x, $x);); + (ne $x:expr, $y:expr) => (bench_tiny_op!(ne_x_1000, ne, $x, $y);); + (lt $x:expr, $y:expr) => (bench_tiny_op!(lt_x_1000, lt, $x, $y);); (intern $x:expr, $_y:expr) => ( #[bench] @@ -122,7 +122,7 @@ macro_rules! bench_one ( }); } ); -) +); macro_rules! bench_all ( ([ $($which:ident)+ ] for $name:ident = $x:expr, $y:expr) => ( @@ -135,6 +135,7 @@ macro_rules! bench_all ( use collections::vec::Vec; use test::{Bencher, black_box}; use std::string::ToString; + use std::iter::repeat; use atom::Atom; use atom::repr::{Static, Inline, Dynamic}; @@ -142,54 +143,54 @@ macro_rules! bench_all ( use super::mk; $( - bench_one!($which $x, $y) + bench_one!($which $x, $y); )+ } ); -) +); pub const longer_dynamic_a: &'static str = "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Band"; pub const longer_dynamic_b: &'static str = "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Ban!"; -bench_all!([eq ne lt clone_string] for short_string = "e", "f") -bench_all!([eq ne lt clone_string] for medium_string = "xyzzy01", "xyzzy02") +bench_all!([eq ne lt clone_string] for short_string = "e", "f"); +bench_all!([eq ne lt clone_string] for medium_string = "xyzzy01", "xyzzy02"); bench_all!([eq ne lt clone_string] - for longer_string = super::longer_dynamic_a, super::longer_dynamic_b) + for longer_string = super::longer_dynamic_a, super::longer_dynamic_b); bench_all!([eq ne intern as_slice clone is_static lt] - for static_atom = atom!(a), atom!(b)) + for static_atom = atom!(a), atom!(b)); bench_all!([intern as_slice clone is_inline] - for short_inline_atom = mk("e"), mk("f")) + for short_inline_atom = mk("e"), mk("f")); bench_all!([eq ne intern as_slice clone is_inline lt] - for medium_inline_atom = mk("xyzzy01"), mk("xyzzy02")) + for medium_inline_atom = mk("xyzzy01"), mk("xyzzy02")); bench_all!([intern as_slice clone is_dynamic] - for min_dynamic_atom = mk("xyzzy001"), mk("xyzzy002")) + for min_dynamic_atom = mk("xyzzy001"), mk("xyzzy002")); bench_all!([eq ne intern as_slice clone is_dynamic lt] - for longer_dynamic_atom = mk(super::longer_dynamic_a), mk(super::longer_dynamic_b)) + for longer_dynamic_atom = mk(super::longer_dynamic_a), mk(super::longer_dynamic_b)); bench_all!([intern as_slice clone is_static] - for static_at_runtime = mk("a"), mk("b")) + for static_at_runtime = mk("a"), mk("b")); bench_all!([ne lt x_static y_inline] - for static_vs_inline = atom!(a), mk("f")) + for static_vs_inline = atom!(a), mk("f")); bench_all!([ne lt x_static y_dynamic] - for static_vs_dynamic = atom!(a), mk(super::longer_dynamic_b)) + for static_vs_dynamic = atom!(a), mk(super::longer_dynamic_b)); bench_all!([ne lt x_inline y_dynamic] - for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b)) + for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b)); macro_rules! bench_rand ( ($name:ident, $len:expr) => ( #[bench] fn $name(b: &mut Bencher) { use std::{str, rand}; - use std::slice::{SlicePrelude, AsSlice}; + use std::slice::{AsSlice, SliceExt}; use std::rand::Rng; let mut gen = rand::weak_rng(); @@ -200,19 +201,19 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( // I measured the overhead of random string generation // as about 3-12% at one point. - let mut buf: [u8, ..$len] = [0, ..$len]; + let mut buf: [u8; $len] = [0; $len]; gen.fill_bytes(buf.as_mut_slice()); for n in buf.iter_mut() { // shift into printable ASCII *n = (*n % 0x40) + 0x20; } - let s = unsafe { str::raw::from_utf8(buf.as_slice()) }; + let s = unsafe { str::from_utf8(buf.as_slice()).unwrap() }; black_box(Atom::from_slice(s)); }); } -)) +)); -bench_rand!(intern_rand_008, 8) -bench_rand!(intern_rand_032, 32) -bench_rand!(intern_rand_128, 128) -bench_rand!(intern_rand_512, 512) +bench_rand!(intern_rand_008, 8); +bench_rand!(intern_rand_032, 32); +bench_rand!(intern_rand_128, 128); +bench_rand!(intern_rand_512, 512); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 81f1693..0309d36 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -15,16 +15,19 @@ use phf::OrderedSet; use xxhash::XXHasher; use core::fmt; +use core::iter::RandomAccessIterator; use core::mem; use core::ptr; use core::slice::bytes; use core::str; -use core::atomic::{AtomicInt, SeqCst}; use alloc::heap; use alloc::boxed::Box; use collections::string::String; -use collections::hash::{Hash, Hasher}; +use std::cmp::Ordering::{self, Equal}; +use std::hash::{Hash, Hasher}; use std::sync::Mutex; +use std::sync::atomic::AtomicInt; +use std::sync::atomic::Ordering::SeqCst; use self::repr::{UnpackedAtom, Static, Inline, Dynamic}; @@ -32,7 +35,7 @@ use self::repr::{UnpackedAtom, Static, Inline, Dynamic}; use event; #[cfg(not(feature = "log-events"))] -macro_rules! log (($e:expr) => (())) +macro_rules! log (($e:expr) => (())); #[path="../../shared/repr.rs"] pub mod repr; @@ -45,7 +48,7 @@ static static_atom_set: OrderedSet<&'static str> = static_atom_set!(); struct StringCache { hasher: XXHasher, - buckets: [*mut StringCacheEntry, ..4096], + buckets: [*mut StringCacheEntry; 4096], } lazy_static! { @@ -59,6 +62,8 @@ struct StringCacheEntry { string: String, } +unsafe impl Send for *mut StringCacheEntry { } + impl StringCacheEntry { fn new(next: *mut StringCacheEntry, hash: u64, string_to_add: &str) -> StringCacheEntry { StringCacheEntry { @@ -161,7 +166,7 @@ impl StringCache { // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. #[unsafe_no_drop_flag] -#[deriving(Eq, Hash, PartialEq)] +#[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. @@ -180,17 +185,17 @@ impl Atom { None => { let len = string_to_add.len(); if len <= repr::MAX_INLINE_LEN { - let mut buf: [u8, ..7] = [0, ..7]; + let mut buf: [u8; 7] = [0; 7]; bytes::copy_memory(buf.as_mut_slice(), string_to_add.as_bytes()); Inline(len as u8, buf) } else { - Dynamic(STRING_CACHE.lock().add(string_to_add) as *mut ()) + Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add) as *mut ()) } } }; let data = unsafe { unpacked.pack() }; - log!(event::Intern(data)) + log!(event::Intern(data)); Atom { data: data } } @@ -199,8 +204,7 @@ impl Atom { match self.unpack() { Inline(..) => { let buf = repr::inline_orig_bytes(&self.data); - debug_assert!(str::is_utf8(buf)); - str::from_utf8_unchecked(buf) + str::from_utf8(buf).unwrap() }, Static(idx) => *static_atom_set.iter().idx(idx as uint).expect("bad static atom"), Dynamic(entry) => { @@ -235,7 +239,7 @@ impl Drop for Atom { fn drop(&mut self) { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - STRING_CACHE.lock().remove(this.data); + STRING_CACHE.lock().unwrap().remove(this.data); } unsafe { @@ -243,7 +247,7 @@ impl Drop for Atom { // We use #[unsafe_no_drop_flag] so that Atom will be only 64 // bits. That means we need to ignore a NULL pointer here, // which represents a value that was moved out. - Some(entry) if entry.is_not_null() => { + Some(entry) if !entry.is_null() => { let entry = entry as *mut StringCacheEntry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { drop_slow(self); @@ -295,7 +299,7 @@ mod tests { use core::prelude::*; use std::fmt; - use std::task::spawn; + use std::thread::Thread; use super::Atom; use super::repr::{Static, Inline, Dynamic}; @@ -325,7 +329,7 @@ mod tests { $t => (), _ => panic!("atom has wrong type"), } - )) + )); #[test] fn test_types() { @@ -423,10 +427,10 @@ mod tests { let y = $y; if x != y { panic!("assertion failed: {} != {}", - format_args!(fmt::format, $fmt, x).as_slice(), - format_args!(fmt::format, $fmt, y).as_slice()); + format_args!($fmt, x), + format_args!($fmt, y)); } - })) + })); #[test] fn repr() { @@ -468,7 +472,7 @@ mod tests { #[test] fn test_threads() { for _ in range(0u32, 100u32) { - spawn(proc() { + Thread::spawn(move || { let _ = Atom::from_slice("a dynamic string"); let _ = Atom::from_slice("another string"); }); diff --git a/src/event.rs b/src/event.rs index 87f03df..39dcb5d 100644 --- a/src/event.rs +++ b/src/event.rs @@ -33,4 +33,4 @@ pub fn log(e: Event) { LOG.lock().push(e); } -macro_rules! log (($e:expr) => (::event::log($e))) +macro_rules! log (($e:expr) => (::event::log($e))); diff --git a/src/lib.rs b/src/lib.rs index 45008b4..eebafed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![feature(phase, macro_rules, default_type_params, globs)] +#![feature(phase, macro_rules, default_type_params, globs, old_orphan_check)] #![no_std] #[phase(plugin, link)] diff --git a/src/namespace.rs b/src/namespace.rs index f1efa7e..1c78916 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -16,11 +16,11 @@ use atom::Atom; /// An atom that is meant to represent a namespace in the HTML / XML sense. /// Whether a given string represents a namespace is contextual, so this is /// a transparent wrapper that will not catch all mistakes. -#[deriving(PartialEq, Eq, PartialOrd, Ord, Hash, Show, Clone)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Show, Clone)] pub struct Namespace(pub Atom); /// A name with a namespace. -#[deriving(PartialEq, Eq, PartialOrd, Ord, Hash, Show, Clone)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Show, Clone)] pub struct QualName { pub ns: Namespace, pub local: Atom, From 302103ebf52fa1fb43905bc399b5b5d0cdf6effe Mon Sep 17 00:00:00 2001 From: Akos Kiss Date: Thu, 8 Jan 2015 23:33:57 +0000 Subject: [PATCH 053/379] Update * Features changed (became deprecated or added to Rust) * Fixed warnings (unnecessary `unsafe`, unused import) --- macros/src/lib.rs | 4 ++-- src/atom/bench.rs | 2 +- src/atom/mod.rs | 1 - src/lib.rs | 11 ++++++----- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index be15804..ee6b86d 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -10,14 +10,14 @@ #![crate_name="string_cache_macros"] #![crate_type="dylib"] -#![feature(macro_rules, plugin_registrar, quote, phase)] +#![feature(plugin_registrar, quote)] #![allow(unused_imports)] // for quotes extern crate core; extern crate syntax; extern crate rustc; -#[phase(plugin)] +#[macro_use] extern crate lazy_static; use rustc::plugin::Registry; diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 4559e5a..bc56d31 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -207,7 +207,7 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( // shift into printable ASCII *n = (*n % 0x40) + 0x20; } - let s = unsafe { str::from_utf8(buf.as_slice()).unwrap() }; + let s = str::from_utf8(buf.as_slice()).unwrap(); black_box(Atom::from_slice(s)); }); } diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 0309d36..73db860 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -298,7 +298,6 @@ mod bench; mod tests { use core::prelude::*; - use std::fmt; use std::thread::Thread; use super::Atom; use super::repr::{Static, Inline, Dynamic}; diff --git a/src/lib.rs b/src/lib.rs index eebafed..8a0938a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,10 +10,10 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![feature(phase, macro_rules, default_type_params, globs, old_orphan_check)] +#![feature(plugin, old_orphan_check)] #![no_std] -#[phase(plugin, link)] +#[macro_use] extern crate core; extern crate alloc; @@ -24,16 +24,17 @@ extern crate test; extern crate std; -#[phase(plugin)] +#[plugin] extern crate phf_mac; extern crate phf; -#[phase(plugin)] +#[macro_use] extern crate lazy_static; extern crate xxhash; -#[phase(plugin)] +#[plugin] +#[macro_use] extern crate string_cache_macros; #[cfg(feature = "log-events")] From ce93ce58f1ecebeaa9e1f61bb318441575abfeb4 Mon Sep 17 00:00:00 2001 From: Akos Kiss Date: Sat, 10 Jan 2015 01:46:56 +0000 Subject: [PATCH 054/379] Update * Added feature gate required by box syntax. * `int`/`uint` types are getting deprecated, replaced some occurrences and added feature gate. * Adapted the usage of `XXHasher` to redesigned hashing. --- macros/src/lib.rs | 2 +- shared/repr.rs | 8 ++++---- src/atom/mod.rs | 6 ++---- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index ee6b86d..73e2f8a 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name="string_cache_macros"] #![crate_type="dylib"] -#![feature(plugin_registrar, quote)] +#![feature(plugin_registrar, quote, int_uint, box_syntax)] #![allow(unused_imports)] // for quotes extern crate core; diff --git a/shared/repr.rs b/shared/repr.rs index ef3ecf7..6893730 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -26,7 +26,7 @@ pub const DYNAMIC_TAG: u8 = 0u8; pub const INLINE_TAG: u8 = 1u8; // len in upper nybble pub const STATIC_TAG: u8 = 2u8; -pub const MAX_INLINE_LEN: uint = 7; +pub const MAX_INLINE_LEN: usize = 7; // Atoms use a compact representation which fits this enum in a single u64. // Inlining avoids actually constructing the unpacked representation in memory. @@ -41,7 +41,7 @@ pub enum UnpackedAtom { Static(u32), } -const STATIC_SHIFT_BITS: uint = 32; +const STATIC_SHIFT_BITS: usize = 32; #[inline(always)] unsafe fn inline_atom_slice(x: &u64) -> raw::Slice { @@ -69,7 +69,7 @@ impl UnpackedAtom { n } Inline(len, buf) => { - debug_assert!((len as uint) <= MAX_INLINE_LEN); + debug_assert!((len as usize) <= MAX_INLINE_LEN); let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { let dest: &mut [u8] = mem::transmute(inline_atom_slice(&mut data)); @@ -89,7 +89,7 @@ impl UnpackedAtom { DYNAMIC_TAG => Dynamic(data as *mut ()), STATIC_TAG => Static((data >> STATIC_SHIFT_BITS) as u32), INLINE_TAG => { - let len = ((data & 0xf0) >> 4) as uint; + let len = ((data & 0xf0) >> 4) as usize; debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; let src: &[u8] = mem::transmute(inline_atom_slice(&data)); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 73db860..587db95 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -12,7 +12,7 @@ use core::prelude::*; use phf::OrderedSet; -use xxhash::XXHasher; +use xxhash; use core::fmt; use core::iter::RandomAccessIterator; @@ -47,7 +47,6 @@ const ENTRY_ALIGNMENT: uint = 16; static static_atom_set: OrderedSet<&'static str> = static_atom_set!(); struct StringCache { - hasher: XXHasher, buckets: [*mut StringCacheEntry; 4096], } @@ -78,13 +77,12 @@ impl StringCacheEntry { impl StringCache { fn new() -> StringCache { StringCache { - hasher: XXHasher::new(), buckets: unsafe { mem::zeroed() }, } } fn add(&mut self, string_to_add: &str) -> *mut StringCacheEntry { - let hash = self.hasher.hash(&string_to_add); + let hash = xxhash::hash(&string_to_add); let bucket_index = (hash & (self.buckets.len()-1) as u64) as uint; let mut ptr = self.buckets[bucket_index]; From fe7ce8c56526a55ba358c8dda1471ed9f800cce8 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Thu, 15 Jan 2015 10:41:29 -0800 Subject: [PATCH 055/379] Warnings cleanup --- macros/src/lib.rs | 2 ++ shared/repr.rs | 3 ++- src/atom/bench.rs | 8 ++++---- src/atom/mod.rs | 34 +++++++++++++++++----------------- src/lib.rs | 2 ++ 5 files changed, 27 insertions(+), 22 deletions(-) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 73e2f8a..e625ee2 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -11,7 +11,9 @@ #![crate_type="dylib"] #![feature(plugin_registrar, quote, int_uint, box_syntax)] +#![deny(warnings)] #![allow(unused_imports)] // for quotes +#![allow(unstable)] extern crate core; extern crate syntax; diff --git a/shared/repr.rs b/shared/repr.rs index 6893730..aa8f8b5 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -30,6 +30,7 @@ pub const MAX_INLINE_LEN: usize = 7; // Atoms use a compact representation which fits this enum in a single u64. // Inlining avoids actually constructing the unpacked representation in memory. +#[allow(missing_copy_implementations)] pub enum UnpackedAtom { /// Pointer to a dynamic table entry. Must be 16-byte aligned! Dynamic(*mut ()), @@ -123,7 +124,7 @@ pub unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { match UnpackedAtom::from_packed(*data) { Inline(len, _) => { let src: &[u8] = mem::transmute(inline_atom_slice(data)); - src.slice_to(len as uint) + src.slice_to(len as usize) } _ => intrinsics::unreachable(), } diff --git a/src/atom/bench.rs b/src/atom/bench.rs index bc56d31..d4f746d 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -49,7 +49,7 @@ macro_rules! check_type (($name:ident, $x:expr, $p:pat) => ( macro_rules! bench_tiny_op (($name:ident, $op:ident, $ctor_x:expr, $ctor_y:expr) => ( #[bench] fn $name(b: &mut Bencher) { - const n: uint = 1000; + const n: usize = 1000; let xs: Vec<_> = repeat($ctor_x).take(n).collect(); let ys: Vec<_> = repeat($ctor_y).take(n).collect(); @@ -92,7 +92,7 @@ macro_rules! bench_one ( fn as_slice_x_1000(b: &mut Bencher) { let x = $x; b.iter(|| { - for _ in range(0, 1000u) { + for _ in range(0, 1000) { black_box(x.as_slice()); } }); @@ -104,7 +104,7 @@ macro_rules! bench_one ( fn clone_x_1000(b: &mut Bencher) { let x = $x; b.iter(|| { - for _ in range(0, 1000u) { + for _ in range(0, 1000) { black_box(x.clone()); } }); @@ -116,7 +116,7 @@ macro_rules! bench_one ( fn clone_x_1000(b: &mut Bencher) { let x = $x.to_string(); b.iter(|| { - for _ in range(0, 1000u) { + for _ in range(0, 1000) { black_box(x.clone()); } }); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 587db95..c129eb7 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -24,7 +24,7 @@ use alloc::heap; use alloc::boxed::Box; use collections::string::String; use std::cmp::Ordering::{self, Equal}; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Mutex; use std::sync::atomic::AtomicInt; use std::sync::atomic::Ordering::SeqCst; @@ -41,7 +41,7 @@ macro_rules! log (($e:expr) => (())); pub mod repr; // Needed for memory safety of the tagging scheme! -const ENTRY_ALIGNMENT: uint = 16; +const ENTRY_ALIGNMENT: usize = 16; // Macro-generated table for static atoms. static static_atom_set: OrderedSet<&'static str> = static_atom_set!(); @@ -83,7 +83,7 @@ impl StringCache { fn add(&mut self, string_to_add: &str) -> *mut StringCacheEntry { let hash = xxhash::hash(&string_to_add); - let bucket_index = (hash & (self.buckets.len()-1) as u64) as uint; + let bucket_index = (hash & (self.buckets.len()-1) as u64) as usize; let mut ptr = self.buckets[bucket_index]; while ptr != ptr::null_mut() { @@ -132,7 +132,7 @@ impl StringCache { debug_assert!(value.ref_count.load(SeqCst) == 0); - let bucket_index = (value.hash & (self.buckets.len()-1) as u64) as uint; + let bucket_index = (value.hash & (self.buckets.len()-1) as u64) as usize; let mut current = self.buckets[bucket_index]; let mut prev: *mut StringCacheEntry = ptr::null_mut(); @@ -204,7 +204,7 @@ impl Atom { let buf = repr::inline_orig_bytes(&self.data); str::from_utf8(buf).unwrap() }, - Static(idx) => *static_atom_set.iter().idx(idx as uint).expect("bad static atom"), + Static(idx) => *static_atom_set.iter().idx(idx as usize).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; (*entry).string.as_slice() @@ -485,22 +485,22 @@ mod tests { #[test] fn match_atom() { - assert_eq!(2u, match Atom::from_slice("head") { - atom!(br) => 1u, - atom!(html) | atom!(head) => 2u, - _ => 3u, + assert_eq!(2, match Atom::from_slice("head") { + atom!(br) => 1, + atom!(html) | atom!(head) => 2, + _ => 3, }); - assert_eq!(3u, match Atom::from_slice("body") { - atom!(br) => 1u, - atom!(html) | atom!(head) => 2u, - _ => 3u, + assert_eq!(3, match Atom::from_slice("body") { + atom!(br) => 1, + atom!(html) | atom!(head) => 2, + _ => 3, }); - assert_eq!(3u, match Atom::from_slice("zzzzzz") { - atom!(br) => 1u, - atom!(html) | atom!(head) => 2u, - _ => 3u, + assert_eq!(3, match Atom::from_slice("zzzzzz") { + atom!(br) => 1, + atom!(html) | atom!(head) => 2, + _ => 3, }); } } diff --git a/src/lib.rs b/src/lib.rs index 8a0938a..ea4aac5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,8 @@ #![feature(plugin, old_orphan_check)] #![no_std] +#![deny(warnings)] +#![allow(unstable)] #[macro_use] extern crate core; From 43a1e5d0d0f2a45e2b96160c8fbe6e1d9602cfa9 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Thu, 15 Jan 2015 10:58:03 -0800 Subject: [PATCH 056/379] Don't link plugin crates --- src/lib.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ea4aac5..7601fb2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,7 +26,7 @@ extern crate test; extern crate std; -#[plugin] +#[plugin] #[no_link] extern crate phf_mac; extern crate phf; @@ -35,8 +35,7 @@ extern crate lazy_static; extern crate xxhash; -#[plugin] -#[macro_use] +#[plugin] #[no_link] #[macro_use] extern crate string_cache_macros; #[cfg(feature = "log-events")] From 23ebee6fe5fd5842d6c2acf95faf8193cd1c67a6 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Sat, 24 Jan 2015 15:38:26 +0530 Subject: [PATCH 057/379] Fix deprecated stuff so it builds again --- macros/src/atom/mod.rs | 2 +- shared/repr.rs | 2 +- src/atom/mod.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index e697354..6b4004d 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -107,7 +107,7 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box String { - let ns_names: Vec<&'static str> = ALL_NS.slice_from(1).iter() + let ns_names: Vec<&'static str> = ALL_NS[1..].iter() .map(|&(x, _)| x).collect(); format!("Usage: ns!(HTML), case-insensitive. \ Known namespaces: {}", diff --git a/shared/repr.rs b/shared/repr.rs index aa8f8b5..91f2c39 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -124,7 +124,7 @@ pub unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { match UnpackedAtom::from_packed(*data) { Inline(len, _) => { let src: &[u8] = mem::transmute(inline_atom_slice(data)); - src.slice_to(len as usize) + &src[..(len as usize)] } _ => intrinsics::unreachable(), } diff --git a/src/atom/mod.rs b/src/atom/mod.rs index c129eb7..adabea7 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -257,7 +257,7 @@ impl Drop for Atom { } } -impl fmt::Show for Atom { +impl fmt::Debug for Atom { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ty_str = unsafe { match self.unpack() { From b73b45983df820af9b713f98e7a0154ff45739d3 Mon Sep 17 00:00:00 2001 From: Adam Roben Date: Thu, 29 Jan 2015 12:13:31 -0500 Subject: [PATCH 058/379] Set up Travis CI support This was heavily based on servo/html5ever's .travis.yml file. --- .travis.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..f2acd45 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,11 @@ +sudo: false +env: + global: + - secure: uytPp0Fs+LT3QDEhDM3LJWiLvT2AHbdWnXrPEs+bYshQwt9wST+KQnYLyRfBuGg2ux3pkZwsRUFexvN8pQ3ab4aU2P21Xo98TzdXRwXurNYePgk/3tykEH+JrL52DfjCWB1VsjzzFrP02XU0XtB30qWC/n+fxeMWT7JT2GVh/OE= +language: rust +script: + - cargo build + - cargo test +after_script: + - cd target + - curl http://www.rust-ci.org/artifacts/put?t=$RUSTCI_TOKEN | sh From f3cb67bc217ccb53a7ae8b6290c12cb2c2873180 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Thu, 29 Jan 2015 17:17:59 -0800 Subject: [PATCH 059/379] Get rid of doc upload for now This is based on the h5e config, which is also broken -- see servo/html5ever#76. --- .travis.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index f2acd45..28692ac 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,5 @@ sudo: false -env: - global: - - secure: uytPp0Fs+LT3QDEhDM3LJWiLvT2AHbdWnXrPEs+bYshQwt9wST+KQnYLyRfBuGg2ux3pkZwsRUFexvN8pQ3ab4aU2P21Xo98TzdXRwXurNYePgk/3tykEH+JrL52DfjCWB1VsjzzFrP02XU0XtB30qWC/n+fxeMWT7JT2GVh/OE= language: rust script: - cargo build - cargo test -after_script: - - cd target - - curl http://www.rust-ci.org/artifacts/put?t=$RUSTCI_TOKEN | sh From 76a5705a6bc7b208d4e347c8035c70cd1171f8ee Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Thu, 29 Jan 2015 17:19:58 -0800 Subject: [PATCH 060/379] Add Travis status to README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 36e27e2..616dfcc 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ # string-cache +[![Build Status](https://travis-ci.org/servo/string-cache.svg?branch=master)](https://travis-ci.org/servo/string-cache) + A string interning library for Rust, developed as part of the [Servo](https://github.com/servo/servo) project. From aa0482aea5f165b7f360e78338740cded114602b Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Thu, 29 Jan 2015 17:28:10 -0800 Subject: [PATCH 061/379] Upgrade to rustc 1.0.0-dev (c5961ad06 2015-01-28 21:49:38 +0000) Fixes #53. --- macros/src/lib.rs | 3 +-- shared/repr.rs | 7 ++----- src/atom/bench.rs | 3 --- src/atom/mod.rs | 22 ++++++++-------------- src/event.rs | 7 +------ src/lib.rs | 11 +---------- src/namespace.rs | 5 ++--- 7 files changed, 15 insertions(+), 43 deletions(-) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index e625ee2..adacbcd 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -11,11 +11,10 @@ #![crate_type="dylib"] #![feature(plugin_registrar, quote, int_uint, box_syntax)] +#![feature(rustc_private, core, std_misc)] #![deny(warnings)] #![allow(unused_imports)] // for quotes -#![allow(unstable)] -extern crate core; extern crate syntax; extern crate rustc; diff --git a/shared/repr.rs b/shared/repr.rs index 91f2c39..8e69d2e 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -13,11 +13,8 @@ #![allow(dead_code, unused_imports)] -use core::{mem, raw, intrinsics}; -use core::option::Option::{self, Some, None}; -use core::ptr::PtrExt; -use core::slice::{AsSlice, SliceExt}; -use core::slice::bytes; +use std::{mem, raw, intrinsics}; +use std::slice::bytes; pub use self::UnpackedAtom::{Dynamic, Inline, Static}; diff --git a/src/atom/bench.rs b/src/atom/bench.rs index d4f746d..3c61c36 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -131,8 +131,6 @@ macro_rules! bench_all ( mod $name { #![allow(unused_imports)] - use core::prelude::*; - use collections::vec::Vec; use test::{Bencher, black_box}; use std::string::ToString; use std::iter::repeat; @@ -190,7 +188,6 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( #[bench] fn $name(b: &mut Bencher) { use std::{str, rand}; - use std::slice::{AsSlice, SliceExt}; use std::rand::Rng; let mut gen = rand::weak_rng(); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index adabea7..8d20877 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -9,20 +9,16 @@ #![allow(non_upper_case_globals)] -use core::prelude::*; - use phf::OrderedSet; use xxhash; -use core::fmt; -use core::iter::RandomAccessIterator; -use core::mem; -use core::ptr; -use core::slice::bytes; -use core::str; -use alloc::heap; -use alloc::boxed::Box; -use collections::string::String; +use std::fmt; +use std::iter::RandomAccessIterator; +use std::mem; +use std::ptr; +use std::slice::bytes; +use std::str; +use std::rt::heap; use std::cmp::Ordering::{self, Equal}; use std::hash::Hash; use std::sync::Mutex; @@ -294,8 +290,6 @@ mod bench; #[cfg(test)] mod tests { - use core::prelude::*; - use std::thread::Thread; use super::Atom; use super::repr::{Static, Inline, Dynamic}; @@ -461,7 +455,7 @@ mod tests { #[test] fn assert_sizes() { // Guard against accidental changes to the sizes of things. - use core::mem; + use std::mem; assert_eq!(8, mem::size_of::()); assert_eq!(48, mem::size_of::()); } diff --git a/src/event.rs b/src/event.rs index 39dcb5d..3f81705 100644 --- a/src/event.rs +++ b/src/event.rs @@ -9,12 +9,7 @@ #![macro_escape] -use core::prelude::*; - -use alloc::boxed::Box; -use collections::MutableSeq; -use collections::vec::Vec; -use collections::string::String; +use std::MutableSeq; use sync::Mutex; #[deriving(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Show, Encodable)] diff --git a/src/lib.rs b/src/lib.rs index 7601fb2..ae1fb02 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,21 +11,12 @@ #![crate_type = "rlib"] #![feature(plugin, old_orphan_check)] -#![no_std] +#![feature(core, collections, alloc, hash)] #![deny(warnings)] -#![allow(unstable)] - -#[macro_use] -extern crate core; - -extern crate alloc; -extern crate collections; #[cfg(test)] extern crate test; -extern crate std; - #[plugin] #[no_link] extern crate phf_mac; extern crate phf; diff --git a/src/namespace.rs b/src/namespace.rs index 1c78916..c2d2c14 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -7,9 +7,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![experimental="This may move as string-cache becomes less Web-specific."] - -use core::prelude::*; +#![unstable(feature = "string_cache_namespace", + reason = "This may move as string-cache becomes less Web-specific.")] use atom::Atom; From ce292efaa7b3665c2bce6df8347d685d7c4605ac Mon Sep 17 00:00:00 2001 From: Adam Roben Date: Thu, 29 Jan 2015 12:09:51 -0500 Subject: [PATCH 062/379] Fix warnings during `cargo test` too --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index ae1fb02..f43a61e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ #![feature(plugin, old_orphan_check)] #![feature(core, collections, alloc, hash)] #![deny(warnings)] +#![cfg_attr(test, feature(test, std_misc))] #[cfg(test)] extern crate test; From 2c5f7e49b5b8b71ada1ee917ba96a6d852e85cbe Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Sat, 31 Jan 2015 17:30:19 +0530 Subject: [PATCH 063/379] old_orphan_check gate unnecessary --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index f43a61e..00d7b5e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![feature(plugin, old_orphan_check)] +#![feature(plugin)] #![feature(core, collections, alloc, hash)] #![deny(warnings)] #![cfg_attr(test, feature(test, std_misc))] From 3e06ac9700be9a78e412310efc0e1f2183cbb788 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Sat, 31 Jan 2015 17:31:19 +0530 Subject: [PATCH 064/379] Show -> Debug --- src/namespace.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/namespace.rs b/src/namespace.rs index c2d2c14..e1d5f44 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -15,11 +15,11 @@ use atom::Atom; /// An atom that is meant to represent a namespace in the HTML / XML sense. /// Whether a given string represents a namespace is contextual, so this is /// a transparent wrapper that will not catch all mistakes. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Show, Clone)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] pub struct Namespace(pub Atom); /// A name with a namespace. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Show, Clone)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] pub struct QualName { pub ns: Namespace, pub local: Atom, From 926aeff8e7199f3ef91b5041ad215b6b4e62b064 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Sun, 8 Feb 2015 02:55:35 +0530 Subject: [PATCH 065/379] InternedString changes --- macros/src/atom/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 6b4004d..1b5524f 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -89,8 +89,8 @@ pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box expect!(cx, sp, atom_tok_to_str(t), usage), _ => bail!(cx, sp, usage), }; - box expect!(cx, sp, make_atom_result(cx, name.get()), - format!("Unknown static atom {}", name.get()).as_slice()) + box expect!(cx, sp, make_atom_result(cx, &*name), + format!("Unknown static atom {}", &*name).as_slice()) } // Translate `ns!(HTML)` into `Namespace { atom: atom!("http://www.w3.org/1999/xhtml") }`. @@ -120,7 +120,7 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box Date: Sun, 8 Feb 2015 02:55:45 +0530 Subject: [PATCH 066/379] phf_mac renamed --- Cargo.toml | 2 +- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a9cfa56..6930987 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ log-events = [] [dependencies] phf = "0" -phf_mac = "0" +phf_macros = "0" xxhash = "0" [dependencies.string_cache_macros] diff --git a/src/lib.rs b/src/lib.rs index f43a61e..9a9caba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,7 @@ extern crate test; #[plugin] #[no_link] -extern crate phf_mac; +extern crate phf_macros; extern crate phf; #[macro_use] From bc4a897d039ca21d32cff1ad4e3fd2f83dc4f5d7 Mon Sep 17 00:00:00 2001 From: Eunchong Yu Date: Sun, 15 Feb 2015 03:02:13 +0900 Subject: [PATCH 067/379] Update to rustc 1.0.0-nightly (b9ba643b7 2015-02-13 21:15:39 +0000) --- Cargo.toml | 1 + src/atom/bench.rs | 5 +++-- src/atom/mod.rs | 6 +++--- src/lib.rs | 9 ++++++--- src/namespace.rs | 4 ++-- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6930987..1e3a4d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ log-events = [] [dependencies] phf = "0" phf_macros = "0" +rand = "0" xxhash = "0" [dependencies.string_cache_macros] diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 3c61c36..9589128 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -187,8 +187,9 @@ bench_all!([ne lt x_inline y_dynamic] macro_rules! bench_rand ( ($name:ident, $len:expr) => ( #[bench] fn $name(b: &mut Bencher) { - use std::{str, rand}; - use std::rand::Rng; + use std::str; + use rand; + use rand::Rng; let mut gen = rand::weak_rng(); b.iter(|| { diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 8d20877..7e1c042 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -22,7 +22,7 @@ use std::rt::heap; use std::cmp::Ordering::{self, Equal}; use std::hash::Hash; use std::sync::Mutex; -use std::sync::atomic::AtomicInt; +use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; use self::repr::{UnpackedAtom, Static, Inline, Dynamic}; @@ -53,7 +53,7 @@ lazy_static! { struct StringCacheEntry { next_in_bucket: *mut StringCacheEntry, hash: u64, - ref_count: AtomicInt, + ref_count: AtomicIsize, string: String, } @@ -64,7 +64,7 @@ impl StringCacheEntry { StringCacheEntry { next_in_bucket: next, hash: hash, - ref_count: AtomicInt::new(1), + ref_count: AtomicIsize::new(1), string: String::from_str(string_to_add), } } diff --git a/src/lib.rs b/src/lib.rs index 9a9caba..7ae402a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,24 +10,27 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![feature(plugin, old_orphan_check)] +#![feature(plugin, unsafe_no_drop_flag)] #![feature(core, collections, alloc, hash)] #![deny(warnings)] #![cfg_attr(test, feature(test, std_misc))] +#![cfg_attr(bench, feature(rand))] +#![plugin(phf_macros, string_cache_macros)] #[cfg(test)] extern crate test; -#[plugin] #[no_link] +#[no_link] extern crate phf_macros; extern crate phf; #[macro_use] extern crate lazy_static; +extern crate rand; extern crate xxhash; -#[plugin] #[no_link] #[macro_use] +#[no_link] #[macro_use] extern crate string_cache_macros; #[cfg(feature = "log-events")] diff --git a/src/namespace.rs b/src/namespace.rs index c2d2c14..e1d5f44 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -15,11 +15,11 @@ use atom::Atom; /// An atom that is meant to represent a namespace in the HTML / XML sense. /// Whether a given string represents a namespace is contextual, so this is /// a transparent wrapper that will not catch all mistakes. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Show, Clone)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] pub struct Namespace(pub Atom); /// A name with a namespace. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Show, Clone)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] pub struct QualName { pub ns: Namespace, pub local: Atom, From 6d6e19da971f2d68b1b095e965bb3289ad133db8 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sat, 14 Feb 2015 10:12:39 -0800 Subject: [PATCH 068/379] Remove unneeded `extern crate` --- src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7ae402a..7092ef3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,8 +20,6 @@ #[cfg(test)] extern crate test; -#[no_link] -extern crate phf_macros; extern crate phf; #[macro_use] From e9ade750ba4eaaeaaea7e38be735beda0b11115a Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sat, 14 Feb 2015 10:13:14 -0800 Subject: [PATCH 069/379] Move non-procedural macro out of string_cache_macros There is no need for #[macro_use] extern crate string_cache_macros; anymore; it's purely a compiler plugin. To get the qualname! macro, use #[macro_use] extern crate string_cache; --- macros/src/atom/mod.rs | 8 -------- src/lib.rs | 11 ++++++++--- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/macros/src/atom/mod.rs b/macros/src/atom/mod.rs index 1b5524f..946d823 100644 --- a/macros/src/atom/mod.rs +++ b/macros/src/atom/mod.rs @@ -132,11 +132,3 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box ( - ::string_cache::namespace::QualName { - ns: ns!($ns), - local: atom!($local), - } -)); diff --git a/src/lib.rs b/src/lib.rs index 7092ef3..38ff59a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,15 +28,20 @@ extern crate lazy_static; extern crate rand; extern crate xxhash; -#[no_link] #[macro_use] -extern crate string_cache_macros; - #[cfg(feature = "log-events")] extern crate serialize; pub use atom::Atom; pub use namespace::{Namespace, QualName}; +#[macro_export] +macro_rules! qualname (($ns:tt, $local:tt) => ( + ::string_cache::namespace::QualName { + ns: ns!($ns), + local: atom!($local), + } +)); + #[cfg(feature = "log-events")] pub mod event; From 99e484c4889559746ac97612a2c5a0d2bfe165c3 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sat, 14 Feb 2015 10:15:35 -0800 Subject: [PATCH 070/379] Rename string_cache_macros to string_cache_plugin See https://github.com/rust-lang/rust/issues/22199 --- Cargo.toml | 4 ++-- {macros => plugin}/Cargo.toml | 4 ++-- {macros => plugin}/src/atom/data.rs | 0 {macros => plugin}/src/atom/mod.rs | 0 {macros => plugin}/src/lib.rs | 2 +- src/lib.rs | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) rename {macros => plugin}/Cargo.toml (75%) rename {macros => plugin}/src/atom/data.rs (100%) rename {macros => plugin}/src/atom/mod.rs (100%) rename {macros => plugin}/src/lib.rs (97%) diff --git a/Cargo.toml b/Cargo.toml index 1e3a4d9..3ee08be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,8 +16,8 @@ phf_macros = "0" rand = "0" xxhash = "0" -[dependencies.string_cache_macros] -path = "macros" +[dependencies.string_cache_plugin] +path = "plugin" [dependencies.lazy_static] git = "https://github.com/Kimundi/lazy-static.rs" diff --git a/macros/Cargo.toml b/plugin/Cargo.toml similarity index 75% rename from macros/Cargo.toml rename to plugin/Cargo.toml index 1030717..996fe17 100644 --- a/macros/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,12 +1,12 @@ [package] -name = "string_cache_macros" +name = "string_cache_plugin" version = "0.0.0" authors = [ "The Servo Project Developers" ] [lib] -name = "string_cache_macros" +name = "string_cache_plugin" plugin = true [dependencies.lazy_static] diff --git a/macros/src/atom/data.rs b/plugin/src/atom/data.rs similarity index 100% rename from macros/src/atom/data.rs rename to plugin/src/atom/data.rs diff --git a/macros/src/atom/mod.rs b/plugin/src/atom/mod.rs similarity index 100% rename from macros/src/atom/mod.rs rename to plugin/src/atom/mod.rs diff --git a/macros/src/lib.rs b/plugin/src/lib.rs similarity index 97% rename from macros/src/lib.rs rename to plugin/src/lib.rs index adacbcd..32ebc84 100644 --- a/macros/src/lib.rs +++ b/plugin/src/lib.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![crate_name="string_cache_macros"] +#![crate_name="string_cache_plugin"] #![crate_type="dylib"] #![feature(plugin_registrar, quote, int_uint, box_syntax)] diff --git a/src/lib.rs b/src/lib.rs index 38ff59a..e4b97b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ #![deny(warnings)] #![cfg_attr(test, feature(test, std_misc))] #![cfg_attr(bench, feature(rand))] -#![plugin(phf_macros, string_cache_macros)] +#![plugin(phf_macros, string_cache_plugin)] #[cfg(test)] extern crate test; From c57d501ed158791a634ea5deba0e60a4a425b1be Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 23 Feb 2015 14:16:52 -0800 Subject: [PATCH 071/379] Switch back to SipHash XXHash hasn't been updated in more than a month, and it performs only a little better. Let's switch back to SipHash for now. This reverts commit 2ce893d63b7f8e022c74b637615004a8759bc9b3. --- Cargo.toml | 1 - src/atom/mod.rs | 5 ++--- src/lib.rs | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3ee08be..9c6b791 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,6 @@ log-events = [] phf = "0" phf_macros = "0" rand = "0" -xxhash = "0" [dependencies.string_cache_plugin] path = "plugin" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 7e1c042..a398fb1 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -10,7 +10,6 @@ #![allow(non_upper_case_globals)] use phf::OrderedSet; -use xxhash; use std::fmt; use std::iter::RandomAccessIterator; @@ -20,7 +19,7 @@ use std::slice::bytes; use std::str; use std::rt::heap; use std::cmp::Ordering::{self, Equal}; -use std::hash::Hash; +use std::hash::{self, Hash}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; @@ -78,7 +77,7 @@ impl StringCache { } fn add(&mut self, string_to_add: &str) -> *mut StringCacheEntry { - let hash = xxhash::hash(&string_to_add); + let hash = hash::hash::<_, SipHasher>(&string_to_add); let bucket_index = (hash & (self.buckets.len()-1) as u64) as usize; let mut ptr = self.buckets[bucket_index]; diff --git a/src/lib.rs b/src/lib.rs index e4b97b9..c048544 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,7 +26,6 @@ extern crate phf; extern crate lazy_static; extern crate rand; -extern crate xxhash; #[cfg(feature = "log-events")] extern crate serialize; From 2e0c07c83e0e9a6b1c0cc9a60ad8f7354f4c43c6 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 23 Feb 2015 14:49:33 -0800 Subject: [PATCH 072/379] Upgrade to rustc 1.0.0-dev (2b01a37ec 2015-02-21) (built 2015-02-23) --- plugin/src/lib.rs | 2 +- src/atom/mod.rs | 6 +++--- src/lib.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs index 32ebc84..0a37bab 100644 --- a/plugin/src/lib.rs +++ b/plugin/src/lib.rs @@ -11,7 +11,7 @@ #![crate_type="dylib"] #![feature(plugin_registrar, quote, int_uint, box_syntax)] -#![feature(rustc_private, core, std_misc)] +#![feature(rustc_private, core)] #![deny(warnings)] #![allow(unused_imports)] // for quotes diff --git a/src/atom/mod.rs b/src/atom/mod.rs index a398fb1..3287d51 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -19,7 +19,7 @@ use std::slice::bytes; use std::str; use std::rt::heap; use std::cmp::Ordering::{self, Equal}; -use std::hash::{self, Hash}; +use std::hash::{self, Hash, SipHasher}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; @@ -289,7 +289,7 @@ mod bench; #[cfg(test)] mod tests { - use std::thread::Thread; + use std::thread; use super::Atom; use super::repr::{Static, Inline, Dynamic}; @@ -462,7 +462,7 @@ mod tests { #[test] fn test_threads() { for _ in range(0u32, 100u32) { - Thread::spawn(move || { + thread::spawn(move || { let _ = Atom::from_slice("a dynamic string"); let _ = Atom::from_slice("another string"); }); diff --git a/src/lib.rs b/src/lib.rs index c048544..e1ee840 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ #![feature(plugin, unsafe_no_drop_flag)] #![feature(core, collections, alloc, hash)] #![deny(warnings)] -#![cfg_attr(test, feature(test, std_misc))] +#![cfg_attr(test, feature(test))] #![cfg_attr(bench, feature(rand))] #![plugin(phf_macros, string_cache_plugin)] From 95ee6329a86bf3d09ca4ff36a74b2fe321d2faea Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sat, 25 Oct 2014 13:09:44 -0700 Subject: [PATCH 073/379] Provide a nicer serialization for Event --- src/event.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/event.rs b/src/event.rs index 3f81705..3a0df8d 100644 --- a/src/event.rs +++ b/src/event.rs @@ -12,7 +12,7 @@ use std::MutableSeq; use sync::Mutex; -#[deriving(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Show, Encodable)] +#[deriving(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Show)] pub enum Event { Intern(u64), Insert(u64, String), @@ -29,3 +29,29 @@ pub fn log(e: Event) { } macro_rules! log (($e:expr) => (::event::log($e))); + +// Serialize by converting to this private struct, +// which produces more convenient output. + +#[deriving(Encodable)] +struct SerializeEvent<'a> { + event: &'static str, + id: u64, + string: Option<&'a String>, +} + +impl> Encodable for Event { + fn encode(&self, s: &mut S) -> Result<(), E> { + let (event, id, string) = match *self { + Intern(id) => ("intern", id, None), + Insert(id, ref s) => ("insert", id, Some(s)), + Remove(id) => ("remove", id, None), + }; + + SerializeEvent { + event: event, + id: id, + string: string + }.encode(s) + } +} From 10da0cfcea3efdb5223caa1909d5d5f4eee7143b Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sat, 25 Oct 2014 16:12:17 -0700 Subject: [PATCH 074/379] Add a program to analyze CSV event dumps --- examples/summarize-events/Cargo.toml | 12 +++ examples/summarize-events/src/main.rs | 150 ++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 examples/summarize-events/Cargo.toml create mode 100644 examples/summarize-events/src/main.rs diff --git a/examples/summarize-events/Cargo.toml b/examples/summarize-events/Cargo.toml new file mode 100644 index 0000000..2749da0 --- /dev/null +++ b/examples/summarize-events/Cargo.toml @@ -0,0 +1,12 @@ +[package] + +name = "string-cache-summarize-events" +version = "0.0.0" +authors = [ "The Servo Project Developers" ] + +[dependencies] +csv = "0" +rustc-serialize = "0" + +[dependencies.string_cache] +git = "https://github.com/servo/string-cache" diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs new file mode 100644 index 0000000..a771d51 --- /dev/null +++ b/examples/summarize-events/src/main.rs @@ -0,0 +1,150 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(std_misc, core, env, old_path)] + +extern crate csv; +extern crate string_cache; +extern crate "rustc-serialize" as rustc_serialize; + +use string_cache::Atom; +use string_cache::atom::repr; + +use std::{env, cmp}; +use std::num::FromPrimitive; +use std::collections::hash_map::{HashMap, Entry}; + +#[derive(RustcDecodable, Debug)] +struct Event { + event: String, + id: u64, + string: Option, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive)] +#[repr(u8)] +enum Kind { + Dynamic, + Inline, + Static, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +struct Summary { + kind: Kind, + times: usize, +} + +fn main() { + let filename = env::args().skip(1).next() + .expect("Usage: string-cache-summarize-events foo.csv"); + let path = &Path::new(filename); + let mut file = csv::Reader::from_file(path); + + // Over the lifetime of a program, one dynamic atom might get interned at + // several addresses, and one address may be used to intern several + // different strings. For this reason we must separately track the + // currently-allocated atoms and the summary of all atoms ever created. + let mut dynamic: HashMap = HashMap::new(); + let mut peak_dynamic = 0; + let mut summary: HashMap = HashMap::new(); + let mut inserts = 0; + + for record in file.decode() { + let ev: Event = record.unwrap(); + match &ev.event[..] { + "intern" => { + let tag = (ev.id & 0xf) as u8; + assert!(tag <= repr::STATIC_TAG); + + let string = match tag { + repr::DYNAMIC_TAG => dynamic[ev.id].clone(), + + // FIXME: We really shouldn't be allowed to do this. It's a memory-safety + // hazard; the field is only public for the atom!() macro. + _ => Atom { data: ev.id }.as_slice().to_string(), + }; + + match summary.entry(string) { + Entry::Occupied(entry) => entry.into_mut().times += 1, + Entry::Vacant(entry) => { + entry.insert(Summary { + kind: FromPrimitive::from_u8(tag).unwrap(), + times: 1, + }); + } + } + }, + + "insert" => { + assert!(!dynamic.contains_key(&ev.id)); + dynamic.insert(ev.id, ev.string.expect("no string to insert")); + peak_dynamic = cmp::max(peak_dynamic, dynamic.len()); + inserts += 1; + } + + "remove" => { + assert!(dynamic.contains_key(&ev.id)); + dynamic.remove(&ev.id); + } + + e => panic!("unknown event {}", e), + } + } + + // Get all records, in a stable order. + let mut summary: Vec<_> = summary.into_iter().collect(); + summary.sort_by(|&(ref a, _), &(ref b, _)| a.cmp(b)); + + // Sort by number of occurrences, descending. + summary.sort_by(|&(_, a), &(_, b)| b.times.cmp(&a.times)); + let longest_atom = summary.iter().map(|&(ref k, _)| k.len()) + .max().unwrap_or(0); + + let pad = |c, n| { + for _ in range(n, longest_atom) { + print!("{}", c); + } + }; + + let mut total = 0; + let mut by_kind = [0, 0, 0]; + for &(_, Summary { kind, times }) in &summary { + total += times; + by_kind[kind as usize] += times; + } + + println!("\n"); + println!("kind times pct"); + println!("------- ------- ----"); + for (k, &n) in by_kind.iter().enumerate() { + let k: Kind = FromPrimitive::from_uint(k).unwrap(); + print!("{:7?} {:7} {:4.1}", + k, n, 100.0 * (n as f64) / (total as f64)); + + match k { + Kind::Dynamic => println!(" {} inserts, peak size {}, miss rate {:4.1}%", + inserts, peak_dynamic, 100.0 * (inserts as f64) / (n as f64)), + _ => println!(""), + } + } + println!(""); + println!("total {:7}", total); + println!("\n"); + + pad(' ', 4); + println!("atom times kind"); + pad('-', 4); + println!("---- ------ -------"); + for (string, Summary { kind, times }) in summary { + use std::str::StrExt; + pad(' ', string.chars().count()); + println!("{} {:6} {:?}", string, times, kind); + } +} From 535813897de46e1c95360f360b2c32f93ed64252 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 23 Feb 2015 16:34:45 -0800 Subject: [PATCH 075/379] Un-bitrot the event logging stuff --- .travis.yml | 5 ++++- Cargo.toml | 6 +++++- src/atom/mod.rs | 8 ++++---- src/event.rs | 22 ++++++++++------------ src/lib.rs | 3 ++- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/.travis.yml b/.travis.yml index 28692ac..02760d0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,8 @@ sudo: false language: rust script: - - cargo build - cargo test + - cargo clean + - cargo test --features log-events + - cd examples/summarize-events/ + - cargo build diff --git a/Cargo.toml b/Cargo.toml index 9c6b791..94e4703 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,13 +8,17 @@ authors = [ "The Servo Project Developers" ] # Enable event logging for generating benchmark traces. # See examples/event-log. -log-events = [] +log-events = ["rustc-serialize"] [dependencies] phf = "0" phf_macros = "0" rand = "0" +[dependencies.rustc-serialize] +version = "0" +optional = true + [dependencies.string_cache_plugin] path = "plugin" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 3287d51..9f4a07e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -27,7 +27,7 @@ use std::sync::atomic::Ordering::SeqCst; use self::repr::{UnpackedAtom, Static, Inline, Dynamic}; #[cfg(feature = "log-events")] -use event; +use event::Event; #[cfg(not(feature = "log-events"))] macro_rules! log (($e:expr) => (())); @@ -114,7 +114,7 @@ impl StringCache { StringCacheEntry::new(self.buckets[bucket_index], hash, string_to_add)); } self.buckets[bucket_index] = ptr; - log!(event::Insert(ptr as u64, String::from_str(string_to_add))); + log!(Event::Insert(ptr as u64, String::from_str(string_to_add))); } debug_assert!(ptr != ptr::null_mut()); @@ -152,7 +152,7 @@ impl StringCache { mem::size_of::(), ENTRY_ALIGNMENT); } - log!(event::Remove(key)); + log!(Event::Remove(key)); } } @@ -188,7 +188,7 @@ impl Atom { }; let data = unsafe { unpacked.pack() }; - log!(event::Intern(data)); + log!(Event::Intern(data)); Atom { data: data } } diff --git a/src/event.rs b/src/event.rs index 3a0df8d..79af4a1 100644 --- a/src/event.rs +++ b/src/event.rs @@ -7,12 +7,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![macro_escape] +use std::sync::Mutex; +use rustc_serialize::{Encoder, Encodable}; -use std::MutableSeq; -use sync::Mutex; - -#[deriving(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Show)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Debug)] pub enum Event { Intern(u64), Insert(u64, String), @@ -25,7 +23,7 @@ lazy_static! { } pub fn log(e: Event) { - LOG.lock().push(e); + LOG.lock().unwrap().push(e); } macro_rules! log (($e:expr) => (::event::log($e))); @@ -33,19 +31,19 @@ macro_rules! log (($e:expr) => (::event::log($e))); // Serialize by converting to this private struct, // which produces more convenient output. -#[deriving(Encodable)] +#[derive(RustcEncodable)] struct SerializeEvent<'a> { event: &'static str, id: u64, string: Option<&'a String>, } -impl> Encodable for Event { - fn encode(&self, s: &mut S) -> Result<(), E> { +impl Encodable for Event { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { let (event, id, string) = match *self { - Intern(id) => ("intern", id, None), - Insert(id, ref s) => ("insert", id, Some(s)), - Remove(id) => ("remove", id, None), + Event::Intern(id) => ("intern", id, None), + Event::Insert(id, ref s) => ("insert", id, Some(s)), + Event::Remove(id) => ("remove", id, None), }; SerializeEvent { diff --git a/src/lib.rs b/src/lib.rs index e1ee840..893f724 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,7 +28,7 @@ extern crate lazy_static; extern crate rand; #[cfg(feature = "log-events")] -extern crate serialize; +extern crate "rustc-serialize" as rustc_serialize; pub use atom::Atom; pub use namespace::{Namespace, QualName}; @@ -42,6 +42,7 @@ macro_rules! qualname (($ns:tt, $local:tt) => ( )); #[cfg(feature = "log-events")] +#[macro_use] pub mod event; pub mod atom; From c1d67a39a1dc2655013f9a3ff4670424715621ac Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 26 Feb 2015 15:00:26 +0100 Subject: [PATCH 076/379] Upgrade to rustc 1.0.0-nightly (4db0b3246 2015-02-25) (built 2015-02-26) --- src/atom/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 9f4a07e..f4483f5 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -56,7 +56,7 @@ struct StringCacheEntry { string: String, } -unsafe impl Send for *mut StringCacheEntry { } +unsafe impl Send for StringCache { } impl StringCacheEntry { fn new(next: *mut StringCacheEntry, hash: u64, string_to_add: &str) -> StringCacheEntry { From ac14cb2e1055708325a0170a92455e2587f198a0 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 26 Feb 2015 20:05:29 +0100 Subject: [PATCH 077/379] Use local string-cache (not from git) in expamles/summarize-events --- examples/summarize-events/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/summarize-events/Cargo.toml b/examples/summarize-events/Cargo.toml index 2749da0..fc9e345 100644 --- a/examples/summarize-events/Cargo.toml +++ b/examples/summarize-events/Cargo.toml @@ -9,4 +9,4 @@ csv = "0" rustc-serialize = "0" [dependencies.string_cache] -git = "https://github.com/servo/string-cache" +path = "../.." From b3f27354664caf6362419b9e001288d19a1e9c7a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 26 Feb 2015 20:26:18 +0100 Subject: [PATCH 078/379] Use local string-cache (not from git) in examples/event-log --- examples/event-log/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/event-log/Cargo.toml b/examples/event-log/Cargo.toml index 2a28bf9..7edd5cf 100644 --- a/examples/event-log/Cargo.toml +++ b/examples/event-log/Cargo.toml @@ -5,5 +5,5 @@ version = "0.0.0" authors = [ "The Servo Project Developers" ] [dependencies.string_cache] -git = "https://github.com/servo/string-cache" +path = "../.." features = ["log-events"] From 3f2b9fe059ceb6049ed06658a1f79a5b08bb47ed Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Thu, 26 Feb 2015 19:50:57 -0800 Subject: [PATCH 079/379] Use macros from rust-mac --- plugin/Cargo.toml | 3 +++ plugin/src/atom/mod.rs | 14 +++++++------- plugin/src/lib.rs | 19 +++---------------- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 996fe17..2f408d4 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -11,3 +11,6 @@ plugin = true [dependencies.lazy_static] git = "https://github.com/Kimundi/lazy-static.rs" + +[dependencies.mac] +git = "https://github.com/reem/rust-mac" diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index 946d823..4fd97c7 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -26,7 +26,7 @@ mod repr; // Build a PhfOrderedSet of static atoms. // Takes no arguments. pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); + ext_bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); let tts: Vec = data::ATOMS.iter().flat_map(|k| { (quote_tokens!(&mut *cx, $k,)).into_iter() }).collect(); @@ -86,10 +86,10 @@ fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { let usage = "Usage: atom!(html) or atom!(\"font-weight\")"; let name = match tt { - [ref t] => expect!(cx, sp, atom_tok_to_str(t), usage), - _ => bail!(cx, sp, usage), + [ref t] => ext_expect!(cx, sp, atom_tok_to_str(t), usage), + _ => ext_bail!(cx, sp, usage), }; - box expect!(cx, sp, make_atom_result(cx, &*name), + box ext_expect!(cx, sp, make_atom_result(cx, &*name), format!("Unknown static atom {}", &*name).as_slice()) } @@ -114,17 +114,17 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box atom_tok_to_str(t), _ => None, }, usage().as_slice()); - let &(_, url) = expect!(cx, sp, + let &(_, url) = ext_expect!(cx, sp, ALL_NS.iter().find(|&&(short, _)| short.eq_ignore_ascii_case(&*name)), usage().as_slice()); // All of the URLs should be in the static atom table. - let AtomResult { expr, pat } = expect!(cx, sp, make_atom_result(cx, url), + let AtomResult { expr, pat } = ext_expect!(cx, sp, make_atom_result(cx, url), format!("internal plugin error: can't find namespace url {}", url).as_slice()); box AtomResult { diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs index 0a37bab..c6d5ee7 100644 --- a/plugin/src/lib.rs +++ b/plugin/src/lib.rs @@ -21,23 +21,10 @@ extern crate rustc; #[macro_use] extern crate lazy_static; -use rustc::plugin::Registry; +#[macro_use] +extern crate mac; -macro_rules! bail ( ($cx:expr, $sp:expr, $msg:expr) => ({ - $cx.span_err($sp, $msg); - return ::syntax::ext::base::DummyResult::any($sp); -})); - -macro_rules! bail_if ( ($e:expr, $cx:expr, $sp:expr, $msg:expr) => ( - if $e { bail!($cx, $sp, $msg) } -)); - -macro_rules! expect ( ($cx:expr, $sp:expr, $e:expr, $msg:expr) => ( - match $e { - Some(x) => x, - None => bail!($cx, $sp, $msg), - } -)); +use rustc::plugin::Registry; mod atom; From db6e0729b3fce5ce75b7eb28ed7a137646f3ea41 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 2 Mar 2015 11:30:00 +0100 Subject: [PATCH 080/379] Upgrade to rustc 1.0.0-nightly (890293655 2015-02-28) (built 2015-03-01) --- plugin/src/atom/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index 946d823..bb9aabc 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -11,7 +11,7 @@ use syntax::ptr::P; use syntax::codemap::Span; use syntax::ast::{TokenTree, TtToken}; use syntax::ast; -use syntax::ext::base::{ExtCtxt, MacResult, MacExpr}; +use syntax::ext::base::{ExtCtxt, MacResult, MacEager}; use syntax::parse::token::{get_ident, InternedString, Ident, Literal, Lit}; use std::iter::Chain; @@ -30,7 +30,7 @@ pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> B let tts: Vec = data::ATOMS.iter().flat_map(|k| { (quote_tokens!(&mut *cx, $k,)).into_iter() }).collect(); - MacExpr::new(quote_expr!(&mut *cx, phf_ordered_set!($tts))) + MacEager::expr(quote_expr!(&mut *cx, phf_ordered_set!($tts))) } fn atom_tok_to_str(t: &TokenTree) -> Option { From 26017d2dfe649c27eb2d3b654927c450f10babb0 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Wed, 4 Mar 2015 14:28:50 -0800 Subject: [PATCH 081/379] Upgrade to rustc 1.0.0-dev (fed12499e 2015-03-03) (built 2015-03-04) --- examples/summarize-events/src/main.rs | 6 +++--- plugin/src/lib.rs | 2 +- src/lib.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index a771d51..284e8c6 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![feature(std_misc, core, env, old_path)] +#![feature(std_misc, core, old_path)] extern crate csv; extern crate string_cache; @@ -45,7 +45,7 @@ fn main() { let filename = env::args().skip(1).next() .expect("Usage: string-cache-summarize-events foo.csv"); let path = &Path::new(filename); - let mut file = csv::Reader::from_file(path); + let mut file = csv::Reader::from_file(path).unwrap(); // Over the lifetime of a program, one dynamic atom might get interned at // several addresses, and one address may be used to intern several @@ -124,7 +124,7 @@ fn main() { println!("kind times pct"); println!("------- ------- ----"); for (k, &n) in by_kind.iter().enumerate() { - let k: Kind = FromPrimitive::from_uint(k).unwrap(); + let k: Kind = FromPrimitive::from_usize(k).unwrap(); print!("{:7?} {:7} {:4.1}", k, n, 100.0 * (n as f64) / (total as f64)); diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs index c6d5ee7..ee2981b 100644 --- a/plugin/src/lib.rs +++ b/plugin/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name="string_cache_plugin"] #![crate_type="dylib"] -#![feature(plugin_registrar, quote, int_uint, box_syntax)] +#![feature(plugin_registrar, quote, int_uint, box_syntax, static_assert)] #![feature(rustc_private, core)] #![deny(warnings)] #![allow(unused_imports)] // for quotes diff --git a/src/lib.rs b/src/lib.rs index 893f724..5d30514 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![feature(plugin, unsafe_no_drop_flag)] +#![feature(plugin, unsafe_no_drop_flag, static_assert)] #![feature(core, collections, alloc, hash)] #![deny(warnings)] #![cfg_attr(test, feature(test))] From 654e690fa7f1ce9ff74a8636dd4b7427ce520bb3 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 26 Mar 2015 09:31:16 +0100 Subject: [PATCH 082/379] Upgrade to rustc 1.0.0-nightly (123a754cb 2015-03-24) (built 2015-03-25) --- plugin/src/atom/mod.rs | 8 ++++---- shared/repr.rs | 2 +- src/atom/bench.rs | 11 +++++------ src/atom/mod.rs | 6 +++--- src/lib.rs | 2 +- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index a4c797b..11988f5 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -90,7 +90,7 @@ pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box ext_bail!(cx, sp, usage), }; box ext_expect!(cx, sp, make_atom_result(cx, &*name), - format!("Unknown static atom {}", &*name).as_slice()) + &format!("Unknown static atom {}", &*name)) } // Translate `ns!(HTML)` into `Namespace { atom: atom!("http://www.w3.org/1999/xhtml") }`. @@ -117,15 +117,15 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box atom_tok_to_str(t), _ => None, - }, usage().as_slice()); + }, &usage()); let &(_, url) = ext_expect!(cx, sp, ALL_NS.iter().find(|&&(short, _)| short.eq_ignore_ascii_case(&*name)), - usage().as_slice()); + &usage()); // All of the URLs should be in the static atom table. let AtomResult { expr, pat } = ext_expect!(cx, sp, make_atom_result(cx, url), - format!("internal plugin error: can't find namespace url {}", url).as_slice()); + &format!("internal plugin error: can't find namespace url {}", url)); box AtomResult { expr: quote_expr!(&mut *cx, ::string_cache::namespace::Namespace($expr)), diff --git a/shared/repr.rs b/shared/repr.rs index 8e69d2e..64d962c 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -71,7 +71,7 @@ impl UnpackedAtom { let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { let dest: &mut [u8] = mem::transmute(inline_atom_slice(&mut data)); - bytes::copy_memory(dest, buf.as_slice()); + bytes::copy_memory(dest, &buf[..]); } data } diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 9589128..1476636 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -80,9 +80,8 @@ macro_rules! bench_one ( #[bench] fn intern(b: &mut Bencher) { let x = $x.as_slice().to_string(); - let x = x.as_slice(); b.iter(|| { - black_box(Atom::from_slice(x)); + black_box(Atom::from_slice(&x)); }); } ); @@ -92,7 +91,7 @@ macro_rules! bench_one ( fn as_slice_x_1000(b: &mut Bencher) { let x = $x; b.iter(|| { - for _ in range(0, 1000) { + for _ in 0..1000 { black_box(x.as_slice()); } }); @@ -104,7 +103,7 @@ macro_rules! bench_one ( fn clone_x_1000(b: &mut Bencher) { let x = $x; b.iter(|| { - for _ in range(0, 1000) { + for _ in 0..1000 { black_box(x.clone()); } }); @@ -116,7 +115,7 @@ macro_rules! bench_one ( fn clone_x_1000(b: &mut Bencher) { let x = $x.to_string(); b.iter(|| { - for _ in range(0, 1000) { + for _ in 0..1000 { black_box(x.clone()); } }); @@ -205,7 +204,7 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( // shift into printable ASCII *n = (*n % 0x40) + 0x20; } - let s = str::from_utf8(buf.as_slice()).unwrap(); + let s = str::from_utf8(&buf[..]).unwrap(); black_box(Atom::from_slice(s)); }); } diff --git a/src/atom/mod.rs b/src/atom/mod.rs index f4483f5..70053e5 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -83,7 +83,7 @@ impl StringCache { while ptr != ptr::null_mut() { let value = unsafe { &*ptr }; - if value.hash == hash && value.string.as_slice() == string_to_add { + if value.hash == hash && value.string == string_to_add { break; } ptr = value.next_in_bucket; @@ -202,7 +202,7 @@ impl Atom { Static(idx) => *static_atom_set.iter().idx(idx as usize).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; - (*entry).string.as_slice() + &(*entry).string } } } @@ -461,7 +461,7 @@ mod tests { #[test] fn test_threads() { - for _ in range(0u32, 100u32) { + for _ in 0_u32..100 { thread::spawn(move || { let _ = Atom::from_slice("a dynamic string"); let _ = Atom::from_slice("another string"); diff --git a/src/lib.rs b/src/lib.rs index 5d30514..6c93ee9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,7 +28,7 @@ extern crate lazy_static; extern crate rand; #[cfg(feature = "log-events")] -extern crate "rustc-serialize" as rustc_serialize; +extern crate rustc_serialize; pub use atom::Atom; pub use namespace::{Namespace, QualName}; From 1ec2b387ded31cd122ab1548bd358dd7866e01a8 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 26 Mar 2015 09:52:46 +0100 Subject: [PATCH 083/379] Upgrade examples/summarize-events --- examples/summarize-events/src/main.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index 284e8c6..9da1445 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -7,11 +7,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![feature(std_misc, core, old_path)] +#![feature(core)] extern crate csv; extern crate string_cache; -extern crate "rustc-serialize" as rustc_serialize; +extern crate rustc_serialize; use string_cache::Atom; use string_cache::atom::repr; @@ -19,6 +19,7 @@ use string_cache::atom::repr; use std::{env, cmp}; use std::num::FromPrimitive; use std::collections::hash_map::{HashMap, Entry}; +use std::path::Path; #[derive(RustcDecodable, Debug)] struct Event { @@ -44,7 +45,7 @@ struct Summary { fn main() { let filename = env::args().skip(1).next() .expect("Usage: string-cache-summarize-events foo.csv"); - let path = &Path::new(filename); + let path = &Path::new(&filename); let mut file = csv::Reader::from_file(path).unwrap(); // Over the lifetime of a program, one dynamic atom might get interned at @@ -64,7 +65,7 @@ fn main() { assert!(tag <= repr::STATIC_TAG); let string = match tag { - repr::DYNAMIC_TAG => dynamic[ev.id].clone(), + repr::DYNAMIC_TAG => dynamic[&ev.id].clone(), // FIXME: We really shouldn't be allowed to do this. It's a memory-safety // hazard; the field is only public for the atom!() macro. @@ -108,7 +109,7 @@ fn main() { .max().unwrap_or(0); let pad = |c, n| { - for _ in range(n, longest_atom) { + for _ in n..longest_atom { print!("{}", c); } }; @@ -143,7 +144,6 @@ fn main() { pad('-', 4); println!("---- ------ -------"); for (string, Summary { kind, times }) in summary { - use std::str::StrExt; pad(' ', string.chars().count()); println!("{} {:6} {:?}", string, times, kind); } From d2bcdc92f99198633de49802424013d4229ff524 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 26 Mar 2015 18:48:49 -0400 Subject: [PATCH 084/379] Fix compilation with latest Rust --- shared/repr.rs | 3 ++- src/atom/mod.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/shared/repr.rs b/shared/repr.rs index 64d962c..c114222 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -46,8 +46,9 @@ unsafe fn inline_atom_slice(x: &u64) -> raw::Slice { #[static_assert] const IS_LITTLE_ENDIAN: bool = cfg!(target_endian = "little"); + let x: *const u64 = x; raw::Slice { - data: ((x as *const u64) as *const u8).offset(1), + data: (x as *const u8).offset(1), len: 7, } } diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 70053e5..57dedfb 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -147,7 +147,7 @@ impl StringCache { debug_assert!(current != ptr::null_mut()); unsafe { - ptr::read(ptr as *const StringCacheEntry); + ptr::read(ptr); heap::deallocate(ptr as *mut u8, mem::size_of::(), ENTRY_ALIGNMENT); } From 0127325b00a1e41e98155fa470691882c5f0a2dd Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 26 Mar 2015 18:58:25 -0400 Subject: [PATCH 085/379] Allow Atom Deref to &str Fixes #75 --- src/atom/mod.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 70053e5..13ca97e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -14,6 +14,7 @@ use phf::OrderedSet; use std::fmt; use std::iter::RandomAccessIterator; use std::mem; +use std::ops; use std::ptr; use std::slice::bytes; use std::str; @@ -252,6 +253,15 @@ impl Drop for Atom { } } +impl ops::Deref for Atom { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + self.as_slice() + } +} + impl fmt::Debug for Atom { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ty_str = unsafe { @@ -496,4 +506,11 @@ mod tests { _ => 3, }); } + + #[test] + fn ensure_deref() { + // Ensure we can Deref to a &str + let atom = Atom::from_slice("foobar"); + let _: &str = &atom; + } } From 4a5660d39f634eee5953bec042a29c7974445d8e Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Sun, 29 Mar 2015 00:13:40 -0400 Subject: [PATCH 086/379] Fix some compilation issues with latest Rust --- plugin/src/atom/mod.rs | 2 +- plugin/src/lib.rs | 4 ++-- shared/repr.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index 11988f5..1123b1e 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -43,7 +43,7 @@ fn atom_tok_to_str(t: &TokenTree) -> Option { // Build a map from atoms to IDs for use in implementing the atom!() macro. lazy_static! { - static ref STATIC_ATOM_MAP: HashMap<&'static str, uint> = { + static ref STATIC_ATOM_MAP: HashMap<&'static str, usize> = { let mut m = HashMap::new(); for (i, x) in data::ATOMS.iter().enumerate() { m.insert(*x, i); diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs index ee2981b..6f8c7eb 100644 --- a/plugin/src/lib.rs +++ b/plugin/src/lib.rs @@ -10,8 +10,8 @@ #![crate_name="string_cache_plugin"] #![crate_type="dylib"] -#![feature(plugin_registrar, quote, int_uint, box_syntax, static_assert)] -#![feature(rustc_private, core)] +#![feature(plugin_registrar, quote, box_syntax, static_assert)] +#![feature(rustc_private, core, slice_patterns)] #![deny(warnings)] #![allow(unused_imports)] // for quotes diff --git a/shared/repr.rs b/shared/repr.rs index c114222..b4bbe61 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -92,7 +92,7 @@ impl UnpackedAtom { debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; let src: &[u8] = mem::transmute(inline_atom_slice(&data)); - bytes::copy_memory(buf.as_mut_slice(), src); + bytes::copy_memory(&mut buf[..], src); Inline(len as u8, buf) }, From 7bd40d5fe448d7c5ac8c5a81a82223d477dff800 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Sun, 29 Mar 2015 22:24:44 -0700 Subject: [PATCH 087/379] Fix test suite --- src/atom/bench.rs | 2 +- src/atom/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 1476636..a5d6af2 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -199,7 +199,7 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( // as about 3-12% at one point. let mut buf: [u8; $len] = [0; $len]; - gen.fill_bytes(buf.as_mut_slice()); + gen.fill_bytes(&mut buf); for n in buf.iter_mut() { // shift into printable ASCII *n = (*n % 0x40) + 0x20; diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 436cde2..99b220a 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -180,7 +180,7 @@ impl Atom { let len = string_to_add.len(); if len <= repr::MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; - bytes::copy_memory(buf.as_mut_slice(), string_to_add.as_bytes()); + bytes::copy_memory(&mut buf, string_to_add.as_bytes()); Inline(len as u8, buf) } else { Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add) as *mut ()) From 1d6bf3d9638e9454ea52a5e99c235a6e34b0059a Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Tue, 31 Mar 2015 15:42:51 -0700 Subject: [PATCH 088/379] Peg to an old version of phf temporarily --- Cargo.toml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 94e4703..6fd5701 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,10 +11,12 @@ authors = [ "The Servo Project Developers" ] log-events = ["rustc-serialize"] [dependencies] -phf = "0" -phf_macros = "0" rand = "0" +# FIXME(#80): upgrade to newer version +phf = "0.6" +phf_macros = "0.6" + [dependencies.rustc-serialize] version = "0" optional = true From ace97c7e5cca7b92fa5e28ff1e2739df1d9bcbfa Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Wed, 1 Apr 2015 09:08:29 -0700 Subject: [PATCH 089/379] Upgrade to rustc 1.0.0-nightly (d754722a0 2015-03-31) (built 2015-04-01) --- shared/repr.rs | 4 ++-- src/atom/mod.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/shared/repr.rs b/shared/repr.rs index b4bbe61..99dd010 100644 --- a/shared/repr.rs +++ b/shared/repr.rs @@ -72,7 +72,7 @@ impl UnpackedAtom { let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { let dest: &mut [u8] = mem::transmute(inline_atom_slice(&mut data)); - bytes::copy_memory(dest, &buf[..]); + bytes::copy_memory(&buf[..], dest); } data } @@ -92,7 +92,7 @@ impl UnpackedAtom { debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; let src: &[u8] = mem::transmute(inline_atom_slice(&data)); - bytes::copy_memory(&mut buf[..], src); + bytes::copy_memory(src, &mut buf[..]); Inline(len as u8, buf) }, diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 99b220a..906bad8 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -180,7 +180,7 @@ impl Atom { let len = string_to_add.len(); if len <= repr::MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; - bytes::copy_memory(&mut buf, string_to_add.as_bytes()); + bytes::copy_memory(string_to_add.as_bytes(), &mut buf); Inline(len as u8, buf) } else { Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add) as *mut ()) From fc1204034f623bc4040aee2ee3bd024c74b28a85 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Fri, 3 Apr 2015 21:53:48 -0700 Subject: [PATCH 090/379] Add some more static atoms --- plugin/src/atom/data.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugin/src/atom/data.rs b/plugin/src/atom/data.rs index 44bb5f7..50c35a3 100644 --- a/plugin/src/atom/data.rs +++ b/plugin/src/atom/data.rs @@ -394,6 +394,7 @@ pub static ATOMS: &'static [&'static str] = &[ "feDiffuseLighting", "feDisplacementMap", "feDistantLight", + "feDropShadow", "feFlood", "feFuncA", "feFuncB", @@ -418,6 +419,7 @@ pub static ATOMS: &'static [&'static str] = &[ "fediffuselighting", "fedisplacementmap", "fedistantlight", + "fedropshadow", "feflood", "fefunca", "fefuncb", @@ -1111,12 +1113,14 @@ pub static ATOMS: &'static [&'static str] = &[ "xlink:href", "xlink:role", "xlink:show", + "xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns", "xmlns:xlink", + "xlink", "xor", "xref", "y", From 2eca78a33ae9b292b4c32be8b6bf5495c3148490 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 6 Apr 2015 15:32:36 -0700 Subject: [PATCH 091/379] impl Display for Atom --- src/atom/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 906bad8..1e2f0da 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -262,6 +262,12 @@ impl ops::Deref for Atom { } } +impl fmt::Display for Atom { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + impl fmt::Debug for Atom { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ty_str = unsafe { From b12ab4473b22fd38fd1516c0651c21edacf17174 Mon Sep 17 00:00:00 2001 From: Keegan McAllister Date: Mon, 6 Apr 2015 15:37:57 -0700 Subject: [PATCH 092/379] Make more functions eligible for cross-crate inlining --- src/atom/mod.rs | 6 ++++++ src/namespace.rs | 1 + 2 files changed, 7 insertions(+) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 1e2f0da..9963f8a 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -173,6 +173,7 @@ impl Atom { UnpackedAtom::from_packed(self.data) } + #[inline] pub fn from_slice(string_to_add: &str) -> Atom { let unpacked = match static_atom_set.get_index(string_to_add) { Some(id) => Static(id as u32), @@ -193,6 +194,7 @@ impl Atom { Atom { data: data } } + #[inline] pub fn as_slice<'t>(&'t self) -> &'t str { unsafe { match self.unpack() { @@ -263,12 +265,14 @@ impl ops::Deref for Atom { } impl fmt::Display for Atom { + #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { ::fmt(self, f) } } impl fmt::Debug for Atom { + #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ty_str = unsafe { match self.unpack() { @@ -283,6 +287,7 @@ impl fmt::Debug for Atom { } impl PartialOrd for Atom { + #[inline] fn partial_cmp(&self, other: &Atom) -> Option { if self.data == other.data { return Some(Equal); @@ -292,6 +297,7 @@ impl PartialOrd for Atom { } impl Ord for Atom { + #[inline] fn cmp(&self, other: &Atom) -> Ordering { if self.data == other.data { return Equal; diff --git a/src/namespace.rs b/src/namespace.rs index e1d5f44..4bc229f 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -26,6 +26,7 @@ pub struct QualName { } impl QualName { + #[inline] pub fn new(ns: Namespace, local: Atom) -> QualName { QualName { ns: ns, From 6b8216e193a2e175f547212d9410ecd0bb44d2f6 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 11 Apr 2015 08:38:57 +0200 Subject: [PATCH 093/379] Upgrade phf. Fix #80. Depends on https://github.com/sfackler/rust-phf/pull/55 being merged and published on crates.io. --- .gitignore | 1 + Cargo.toml | 6 ++---- src/atom/mod.rs | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index d67d766..8280aff 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /doc /Cargo.lock /target +.cargo/config diff --git a/Cargo.toml b/Cargo.toml index 6fd5701..405e5ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,10 +12,8 @@ log-events = ["rustc-serialize"] [dependencies] rand = "0" - -# FIXME(#80): upgrade to newer version -phf = "0.6" -phf_macros = "0.6" +phf = "0.7" +phf_macros = "0.7" [dependencies.rustc-serialize] version = "0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 9963f8a..ae4fb43 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -12,7 +12,6 @@ use phf::OrderedSet; use std::fmt; -use std::iter::RandomAccessIterator; use std::mem; use std::ops; use std::ptr; @@ -202,7 +201,7 @@ impl Atom { let buf = repr::inline_orig_bytes(&self.data); str::from_utf8(buf).unwrap() }, - Static(idx) => *static_atom_set.iter().idx(idx as usize).expect("bad static atom"), + Static(idx) => *static_atom_set.index(idx as usize).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string From fbc920ac9a5af0d70ce9ca1a0789b5ebc7b7b65a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 12 Apr 2015 16:39:08 +0200 Subject: [PATCH 094/379] Disable doctests because of https://github.com/rust-lang/cargo/issues/1512 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note that there weren’t any doctest to run anyway. --- Cargo.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 405e5ac..ee3c84d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,12 @@ name = "string_cache" version = "0.0.0" authors = [ "The Servo Project Developers" ] +[lib] +name = "string_cache" + +# https://github.com/rust-lang/cargo/issues/1512 +doctest = false + [features] # Enable event logging for generating benchmark traces. From 385c4ac197742e355631209be3613c8a9c7594b6 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 24 Apr 2015 07:12:48 +0200 Subject: [PATCH 095/379] Upgrade to rustc 1.1.0-nightly (90cc83015 2015-04-22) (built 2015-04-23) --- src/namespace.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/namespace.rs b/src/namespace.rs index 4bc229f..6ae5904 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -7,8 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![unstable(feature = "string_cache_namespace", - reason = "This may move as string-cache becomes less Web-specific.")] +//! **Note:** This may move as string-cache becomes less Web-specific. use atom::Atom; From 32d7a8104e6a0ba29bfb8a58100db0180de24909 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 29 Apr 2015 22:34:53 +0200 Subject: [PATCH 096/379] Also ignore Cargo.lock and target in examples/summarize-events/ --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 8280aff..c17061b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ /doc -/Cargo.lock -/target +Cargo.lock +target .cargo/config From b6edafb232012ed293654b4da1417ec9d5755c47 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 29 Apr 2015 22:37:49 +0200 Subject: [PATCH 097/379] Use dependencies from crates.io instead of git. --- Cargo.toml | 4 +--- plugin/Cargo.toml | 8 +++----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ee3c84d..ece88de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ log-events = ["rustc-serialize"] rand = "0" phf = "0.7" phf_macros = "0.7" +lazy_static = "0.1.10" [dependencies.rustc-serialize] version = "0" @@ -27,6 +28,3 @@ optional = true [dependencies.string_cache_plugin] path = "plugin" - -[dependencies.lazy_static] -git = "https://github.com/Kimundi/lazy-static.rs" diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 2f408d4..472d13e 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -9,8 +9,6 @@ authors = [ "The Servo Project Developers" ] name = "string_cache_plugin" plugin = true -[dependencies.lazy_static] -git = "https://github.com/Kimundi/lazy-static.rs" - -[dependencies.mac] -git = "https://github.com/reem/rust-mac" +[dependencies] +lazy_static = "0.1.10" +mac = "0.0.2" From c5912f925e9c1db7dbe4a7980fbc3eed08eef51d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 29 Apr 2015 23:22:09 +0200 Subject: [PATCH 098/379] Publish on cartes.io --- Cargo.toml | 10 +++++++++- plugin/Cargo.toml | 9 ++++++++- plugin/src/atom/mod.rs | 5 +---- plugin/src/lib.rs | 4 +++- shared/Cargo.toml | 13 +++++++++++++ shared/{repr.rs => lib.rs} | 7 ++++--- src/atom/bench.rs | 2 +- src/atom/mod.rs | 14 ++++++-------- src/lib.rs | 2 ++ 9 files changed, 47 insertions(+), 19 deletions(-) create mode 100644 shared/Cargo.toml rename shared/{repr.rs => lib.rs} (95%) diff --git a/Cargo.toml b/Cargo.toml index ece88de..f15b549 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,11 @@ [package] name = "string_cache" -version = "0.0.0" +version = "0.1.0" authors = [ "The Servo Project Developers" ] +description = "A string interning library for Rust, developed as part of the Servo project." +license = "MIT / Apache-2.0" +repository = "https://github.com/servo/string-cache" [lib] name = "string_cache" @@ -28,3 +31,8 @@ optional = true [dependencies.string_cache_plugin] path = "plugin" +version = "0.1.1" + +[dependencies.string_cache_shared] +path = "shared" +version = "0.1.0" diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 472d13e..4e579fa 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,14 +1,21 @@ [package] name = "string_cache_plugin" -version = "0.0.0" +version = "0.1.1" authors = [ "The Servo Project Developers" ] +description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." +license = "MIT / Apache-2.0" +repository = "https://github.com/servo/string-cache" [lib] name = "string_cache_plugin" plugin = true +[dependencies.string_cache_shared] +path = "../shared" +version = "0.1.0" + [dependencies] lazy_static = "0.1.10" mac = "0.0.2" diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index 1123b1e..ea1a9af 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -20,9 +20,6 @@ use std::ascii::AsciiExt; mod data; -#[path="../../../shared/repr.rs"] -mod repr; - // Build a PhfOrderedSet of static atoms. // Takes no arguments. pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { @@ -74,7 +71,7 @@ fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { None => return None, }; - let data = repr::pack_static(*i as u32); + let data = ::string_cache_shared::pack_static(*i as u32); Some(AtomResult { expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs index 6f8c7eb..25bf327 100644 --- a/plugin/src/lib.rs +++ b/plugin/src/lib.rs @@ -11,7 +11,7 @@ #![crate_type="dylib"] #![feature(plugin_registrar, quote, box_syntax, static_assert)] -#![feature(rustc_private, core, slice_patterns)] +#![feature(rustc_private, slice_patterns)] #![deny(warnings)] #![allow(unused_imports)] // for quotes @@ -24,6 +24,8 @@ extern crate lazy_static; #[macro_use] extern crate mac; +extern crate string_cache_shared; + use rustc::plugin::Registry; mod atom; diff --git a/shared/Cargo.toml b/shared/Cargo.toml new file mode 100644 index 0000000..d09c317 --- /dev/null +++ b/shared/Cargo.toml @@ -0,0 +1,13 @@ +[package] + +name = "string_cache_shared" +version = "0.1.0" +authors = [ "The Servo Project Developers" ] +description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." +license = "MIT / Apache-2.0" +repository = "https://github.com/servo/string-cache" + +[lib] + +name = "string_cache_shared" +path = "lib.rs" diff --git a/shared/repr.rs b/shared/lib.rs similarity index 95% rename from shared/repr.rs rename to shared/lib.rs index 99dd010..432fbc4 100644 --- a/shared/repr.rs +++ b/shared/lib.rs @@ -11,7 +11,8 @@ //! the macros crate and the run-time library, in order to guarantee //! consistency. -#![allow(dead_code, unused_imports)] +#![feature(core, static_assert)] +#![deny(warnings)] use std::{mem, raw, intrinsics}; use std::slice::bytes; @@ -44,7 +45,7 @@ const STATIC_SHIFT_BITS: usize = 32; #[inline(always)] unsafe fn inline_atom_slice(x: &u64) -> raw::Slice { #[static_assert] - const IS_LITTLE_ENDIAN: bool = cfg!(target_endian = "little"); + const _IS_LITTLE_ENDIAN: bool = cfg!(target_endian = "little"); let x: *const u64 = x; raw::Slice { @@ -82,7 +83,7 @@ impl UnpackedAtom { #[inline(always)] pub unsafe fn from_packed(data: u64) -> UnpackedAtom { #[static_assert] - const DYNAMIC_IS_UNTAGGED: bool = DYNAMIC_TAG == 0; + const _DYNAMIC_IS_UNTAGGED: bool = DYNAMIC_TAG == 0; match (data & 0xf) as u8 { DYNAMIC_TAG => Dynamic(data as *mut ()), diff --git a/src/atom/bench.rs b/src/atom/bench.rs index a5d6af2..365ec48 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -135,7 +135,7 @@ macro_rules! bench_all ( use std::iter::repeat; use atom::Atom; - use atom::repr::{Static, Inline, Dynamic}; + use string_cache_shared::{Static, Inline, Dynamic}; use super::mk; diff --git a/src/atom/mod.rs b/src/atom/mod.rs index ae4fb43..e9ef451 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -24,7 +24,7 @@ use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use self::repr::{UnpackedAtom, Static, Inline, Dynamic}; +use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic}; #[cfg(feature = "log-events")] use event::Event; @@ -32,8 +32,6 @@ use event::Event; #[cfg(not(feature = "log-events"))] macro_rules! log (($e:expr) => (())); -#[path="../../shared/repr.rs"] -pub mod repr; // Needed for memory safety of the tagging scheme! const ENTRY_ALIGNMENT: usize = 16; @@ -178,7 +176,7 @@ impl Atom { Some(id) => Static(id as u32), None => { let len = string_to_add.len(); - if len <= repr::MAX_INLINE_LEN { + if len <= string_cache_shared::MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; bytes::copy_memory(string_to_add.as_bytes(), &mut buf); Inline(len as u8, buf) @@ -198,7 +196,7 @@ impl Atom { unsafe { match self.unpack() { Inline(..) => { - let buf = repr::inline_orig_bytes(&self.data); + let buf = string_cache_shared::inline_orig_bytes(&self.data); str::from_utf8(buf).unwrap() }, Static(idx) => *static_atom_set.index(idx as usize).expect("bad static atom"), @@ -215,7 +213,7 @@ impl Clone for Atom { #[inline(always)] fn clone(&self) -> Atom { unsafe { - match repr::from_packed_dynamic(self.data) { + match string_cache_shared::from_packed_dynamic(self.data) { Some(entry) => { let entry = entry as *mut StringCacheEntry; (*entry).ref_count.fetch_add(1, SeqCst); @@ -238,7 +236,7 @@ impl Drop for Atom { } unsafe { - match repr::from_packed_dynamic(self.data) { + match string_cache_shared::from_packed_dynamic(self.data) { // We use #[unsafe_no_drop_flag] so that Atom will be only 64 // bits. That means we need to ignore a NULL pointer here, // which represents a value that was moved out. @@ -312,7 +310,7 @@ mod bench; mod tests { use std::thread; use super::Atom; - use super::repr::{Static, Inline, Dynamic}; + use string_cache_shared::{Static, Inline, Dynamic}; #[test] fn test_as_slice() { diff --git a/src/lib.rs b/src/lib.rs index 6c93ee9..edc297d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,8 @@ extern crate rand; #[cfg(feature = "log-events")] extern crate rustc_serialize; +extern crate string_cache_shared; + pub use atom::Atom; pub use namespace::{Namespace, QualName}; From 49da4030b9c94c6f72b44319794b96d0fd6471d2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 11 May 2015 18:23:54 +0200 Subject: [PATCH 099/379] Remove usage of #[static_assert] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s probably going away: https://github.com/rust-lang/rfcs/pull/1096 --- plugin/src/lib.rs | 2 +- shared/lib.rs | 9 +++------ src/lib.rs | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs index 25bf327..b5136dd 100644 --- a/plugin/src/lib.rs +++ b/plugin/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name="string_cache_plugin"] #![crate_type="dylib"] -#![feature(plugin_registrar, quote, box_syntax, static_assert)] +#![feature(plugin_registrar, quote, box_syntax)] #![feature(rustc_private, slice_patterns)] #![deny(warnings)] #![allow(unused_imports)] // for quotes diff --git a/shared/lib.rs b/shared/lib.rs index 432fbc4..6c5ce7d 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -11,7 +11,7 @@ //! the macros crate and the run-time library, in order to guarantee //! consistency. -#![feature(core, static_assert)] +#![feature(core)] #![deny(warnings)] use std::{mem, raw, intrinsics}; @@ -42,11 +42,9 @@ pub enum UnpackedAtom { const STATIC_SHIFT_BITS: usize = 32; +#[cfg(target_endian = "little")] // Not implemented yet for big-endian #[inline(always)] unsafe fn inline_atom_slice(x: &u64) -> raw::Slice { - #[static_assert] - const _IS_LITTLE_ENDIAN: bool = cfg!(target_endian = "little"); - let x: *const u64 = x; raw::Slice { data: (x as *const u8).offset(1), @@ -82,8 +80,7 @@ impl UnpackedAtom { #[inline(always)] pub unsafe fn from_packed(data: u64) -> UnpackedAtom { - #[static_assert] - const _DYNAMIC_IS_UNTAGGED: bool = DYNAMIC_TAG == 0; + debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged match (data & 0xf) as u8 { DYNAMIC_TAG => Dynamic(data as *mut ()), diff --git a/src/lib.rs b/src/lib.rs index edc297d..34bb932 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![feature(plugin, unsafe_no_drop_flag, static_assert)] +#![feature(plugin, unsafe_no_drop_flag)] #![feature(core, collections, alloc, hash)] #![deny(warnings)] #![cfg_attr(test, feature(test))] From 0c840717b4fa693d8abe18fe9a1301f87ba4a837 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Sun, 7 Jun 2015 08:01:16 -0400 Subject: [PATCH 100/379] Link to documentation --- Cargo.toml | 1 + README.md | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index f15b549..94b43eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" +documentation = "http://doc.servo.org/string_cache/" [lib] name = "string_cache" diff --git a/README.md b/README.md index 616dfcc..f18a7dd 100644 --- a/README.md +++ b/README.md @@ -2,4 +2,6 @@ [![Build Status](https://travis-ci.org/servo/string-cache.svg?branch=master)](https://travis-ci.org/servo/string-cache) +[Documentation](http://doc.servo.org/string_cache/) + A string interning library for Rust, developed as part of the [Servo](https://github.com/servo/servo) project. From e66ba2020fb3f6874dab26813bfe5a52cc62904b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 9 Jun 2015 11:50:03 +0200 Subject: [PATCH 101/379] Travis-CI now defaults to Rust stable. Use nightly. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 02760d0..63f10e3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,6 @@ sudo: false language: rust +rust: nightly script: - cargo test - cargo clean From d16898a8f78ec771f40f06490a7ddfa88ead5d71 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 9 Jun 2015 12:00:21 +0200 Subject: [PATCH 102/379] Update examples/summarize-events --- examples/summarize-events/Cargo.toml | 3 +++ examples/summarize-events/src/main.rs | 37 +++++++++++++++++++-------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/examples/summarize-events/Cargo.toml b/examples/summarize-events/Cargo.toml index fc9e345..27d173f 100644 --- a/examples/summarize-events/Cargo.toml +++ b/examples/summarize-events/Cargo.toml @@ -10,3 +10,6 @@ rustc-serialize = "0" [dependencies.string_cache] path = "../.." + +[dependencies.string_cache_shared] +path = "../../shared" diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index 9da1445..db50fcd 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -7,17 +7,14 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![feature(core)] - extern crate csv; extern crate string_cache; +extern crate string_cache_shared; extern crate rustc_serialize; use string_cache::Atom; -use string_cache::atom::repr; use std::{env, cmp}; -use std::num::FromPrimitive; use std::collections::hash_map::{HashMap, Entry}; use std::path::Path; @@ -28,14 +25,32 @@ struct Event { string: Option, } -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive)] -#[repr(u8)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] enum Kind { Dynamic, Inline, Static, } +impl Kind { + fn from_tag(tag: u8) -> Kind { + match tag { + string_cache_shared::DYNAMIC_TAG => Kind::Dynamic, + string_cache_shared::INLINE_TAG => Kind::Inline, + string_cache_shared::STATIC_TAG => Kind::Static, + _ => panic!() + } + } + + fn to_tag(self) -> u8 { + match self { + Kind::Dynamic => string_cache_shared::DYNAMIC_TAG, + Kind::Inline => string_cache_shared::INLINE_TAG, + Kind::Static => string_cache_shared::STATIC_TAG, + } + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] struct Summary { kind: Kind, @@ -62,10 +77,10 @@ fn main() { match &ev.event[..] { "intern" => { let tag = (ev.id & 0xf) as u8; - assert!(tag <= repr::STATIC_TAG); + assert!(tag <= string_cache_shared::STATIC_TAG); let string = match tag { - repr::DYNAMIC_TAG => dynamic[&ev.id].clone(), + string_cache_shared::DYNAMIC_TAG => dynamic[&ev.id].clone(), // FIXME: We really shouldn't be allowed to do this. It's a memory-safety // hazard; the field is only public for the atom!() macro. @@ -76,7 +91,7 @@ fn main() { Entry::Occupied(entry) => entry.into_mut().times += 1, Entry::Vacant(entry) => { entry.insert(Summary { - kind: FromPrimitive::from_u8(tag).unwrap(), + kind: Kind::from_tag(tag), times: 1, }); } @@ -118,14 +133,14 @@ fn main() { let mut by_kind = [0, 0, 0]; for &(_, Summary { kind, times }) in &summary { total += times; - by_kind[kind as usize] += times; + by_kind[kind.to_tag() as usize] += times; } println!("\n"); println!("kind times pct"); println!("------- ------- ----"); for (k, &n) in by_kind.iter().enumerate() { - let k: Kind = FromPrimitive::from_usize(k).unwrap(); + let k: Kind = Kind::from_tag(k as u8); print!("{:7?} {:7} {:4.1}", k, n, 100.0 * (n as f64) / (total as f64)); From 1fa8d0317a327d43846139281520e9061cab151f Mon Sep 17 00:00:00 2001 From: Jack Moffitt Date: Tue, 9 Jun 2015 16:43:15 -0600 Subject: [PATCH 103/379] Minor changes for new Rust. --- src/atom/mod.rs | 4 ++-- src/lib.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index e9ef451..31ec94a 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -62,7 +62,7 @@ impl StringCacheEntry { next_in_bucket: next, hash: hash, ref_count: AtomicIsize::new(1), - string: String::from_str(string_to_add), + string: String::from(string_to_add), } } } @@ -112,7 +112,7 @@ impl StringCache { StringCacheEntry::new(self.buckets[bucket_index], hash, string_to_add)); } self.buckets[bucket_index] = ptr; - log!(Event::Insert(ptr as u64, String::from_str(string_to_add))); + log!(Event::Insert(ptr as u64, String::from(string_to_add))); } debug_assert!(ptr != ptr::null_mut()); diff --git a/src/lib.rs b/src/lib.rs index 34bb932..568c027 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,7 +11,7 @@ #![crate_type = "rlib"] #![feature(plugin, unsafe_no_drop_flag)] -#![feature(core, collections, alloc, hash)] +#![feature(core, alloc, hash)] #![deny(warnings)] #![cfg_attr(test, feature(test))] #![cfg_attr(bench, feature(rand))] From 01388d25b54ecd85fb64e46750b1cda9ccffee33 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jun 2015 23:36:37 +0200 Subject: [PATCH 104/379] v0.1.2 --- Cargo.toml | 2 +- plugin/Cargo.toml | 2 +- shared/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 94b43eb..cb0b541 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.0" +version = "0.1.2" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 4e579fa..398e5a2 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_plugin" -version = "0.1.1" +version = "0.1.2" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." license = "MIT / Apache-2.0" diff --git a/shared/Cargo.toml b/shared/Cargo.toml index d09c317..06a4852 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.0" +version = "0.1.2" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" From 41b267f339048fd90f63073b2203a94f74060284 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 15 Jun 2015 23:40:01 +0200 Subject: [PATCH 105/379] v0.1.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cb0b541..893ddea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.2" +version = "0.1.3" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 68c931a9a2ee7792ba7e376a146523b11ec07db2 Mon Sep 17 00:00:00 2001 From: Jack Moffitt Date: Mon, 15 Jun 2015 15:45:05 -0600 Subject: [PATCH 106/379] Notify homu of travis changes --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 63f10e3..3c6e71e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,3 +7,5 @@ script: - cargo test --features log-events - cd examples/summarize-events/ - cargo build +notifications: + webhooks: http://build.servo.org:54856/travis From c188d6649b2a946a7152d3cbe8b48f5d76a9f0ce Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 19 Jun 2015 12:14:47 +0200 Subject: [PATCH 107/379] Upgrade to rustc 1.2.0-nightly (2f5683913 2015-06-18) --- Cargo.toml | 2 +- shared/Cargo.toml | 2 +- shared/lib.rs | 2 +- src/lib.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 893ddea..e7fff28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.3" +version = "0.1.4" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 06a4852..d0bc2ef 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.2" +version = "0.1.3" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" diff --git a/shared/lib.rs b/shared/lib.rs index 6c5ce7d..896015d 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -11,7 +11,7 @@ //! the macros crate and the run-time library, in order to guarantee //! consistency. -#![feature(core)] +#![feature(raw, slice_bytes, core_intrinsics)] #![deny(warnings)] use std::{mem, raw, intrinsics}; diff --git a/src/lib.rs b/src/lib.rs index 568c027..1d7f146 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,7 +11,7 @@ #![crate_type = "rlib"] #![feature(plugin, unsafe_no_drop_flag)] -#![feature(core, alloc, hash)] +#![feature(slice_bytes, heap_api, hash_default)] #![deny(warnings)] #![cfg_attr(test, feature(test))] #![cfg_attr(bench, feature(rand))] From 09876f7d6f46cda981cfd017349b80bd82c2f2c5 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Mon, 6 Jul 2015 10:31:00 -0700 Subject: [PATCH 108/379] Implement AsRef for Atom https://github.com/servo/servo/pull/6555#issuecomment-118786436 --- src/atom/mod.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 31ec94a..4abef9a 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -303,6 +303,12 @@ impl Ord for Atom { } } +impl AsRef for Atom { + fn as_ref(&self) -> &str { + &self + } +} + #[cfg(test)] mod bench; @@ -522,4 +528,11 @@ mod tests { let atom = Atom::from_slice("foobar"); let _: &str = &atom; } + + #[test] + fn ensure_as_ref() { + // Ensure we can as_ref to a &str + let atom = Atom::from_slice("foobar"); + let _: &str = atom.as_ref(); + } } From b66c3e51fdae228d85e607efa55cc114f6c939d4 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Mon, 6 Jul 2015 10:47:42 -0700 Subject: [PATCH 109/379] Bump version: 0.1.4 -> 0.1.5 I need this in a released version: https://github.com/servo/string-cache/pull/90 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e7fff28..5566d83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.4" +version = "0.1.5" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 6caf30a06b2462174647620f6143bf5803d15e2b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 13 Jul 2015 16:17:33 +0200 Subject: [PATCH 110/379] Upgrade to rustc 1.3.0-nightly (7ea2674c7 2015-07-13) --- plugin/Cargo.toml | 2 +- plugin/src/atom/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 398e5a2..90c396f 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_plugin" -version = "0.1.2" +version = "0.1.3" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." license = "MIT / Apache-2.0" diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index ea1a9af..91474b4 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -108,7 +108,7 @@ pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box Date: Mon, 13 Jul 2015 09:53:31 -0700 Subject: [PATCH 111/379] Add support for serialization via `serde` --- Cargo.toml | 1 + src/atom/mod.rs | 15 +++++++++++++++ src/lib.rs | 2 ++ 3 files changed, 18 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 5566d83..1921ade 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ rand = "0" phf = "0.7" phf_macros = "0.7" lazy_static = "0.1.10" +serde = "0.4.2" [dependencies.rustc-serialize] version = "0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 4abef9a..9763ed2 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -10,6 +10,7 @@ #![allow(non_upper_case_globals)] use phf::OrderedSet; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; use std::mem; @@ -309,6 +310,20 @@ impl AsRef for Atom { } } +impl Serialize for Atom { + fn serialize(&self, serializer: &mut S) -> Result<(),S::Error> where S: Serializer { + let string: &str = self.as_ref(); + string.serialize(serializer) + } +} + +impl Deserialize for Atom { + fn deserialize(deserializer: &mut D) -> Result where D: Deserializer { + let string: String = try!(Deserialize::deserialize(deserializer)); + Ok(Atom::from_slice(&*string)) + } +} + #[cfg(test)] mod bench; diff --git a/src/lib.rs b/src/lib.rs index 1d7f146..e52992a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,8 @@ extern crate rand; #[cfg(feature = "log-events")] extern crate rustc_serialize; +extern crate serde; + extern crate string_cache_shared; pub use atom::Atom; From 81f680c72d623b0a29e7f1738c1a352729298576 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Wed, 15 Jul 2015 18:57:58 -0700 Subject: [PATCH 112/379] v0.1.16 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1921ade..e2b28c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.5" +version = "0.1.6" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From c136ab8b18b30f3e65e35076cb44b92037405d1a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 24 Jul 2015 21:12:06 +0200 Subject: [PATCH 113/379] Add static atoms for names of CSS properties recently added to Servo. --- plugin/Cargo.toml | 2 +- plugin/src/atom/data.rs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 90c396f..4db662e 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_plugin" -version = "0.1.3" +version = "0.1.4" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." license = "MIT / Apache-2.0" diff --git a/plugin/src/atom/data.rs b/plugin/src/atom/data.rs index 50c35a3..18c14a5 100644 --- a/plugin/src/atom/data.rs +++ b/plugin/src/atom/data.rs @@ -1131,4 +1131,34 @@ pub static ATOMS: &'static [&'static str] = &[ "z", "zoomAndPan", "zoomandpan", + + "background-size", + "background-origin", + "background-clip", + "border-top-left-radius", + "border-top-right-radius", + "border-bottom-right-radius", + "border-bottom-left-radius", + "outline-color", + "outline-style", + "outline-width", + "overflow-wrap", + "list-style-image", + "list-style-position", + "list-style-type", + "column-count", + "column-width", + "overflow-x", + "overflow-y", + "transition-property", + "transition-duration", + "transition-timing-function", + "transition-delay", + "border-radius", + "outline", + "word-wrap", + "list-style", + "columns", + "transitions", + ]; From 5169a5b02a3211c26d7204c9829576d40d9be31c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 18 Jul 2015 18:22:57 +0200 Subject: [PATCH 114/379] Remove usage of unstable features in string_cache_shared --- shared/Cargo.toml | 5 ++++- shared/lib.rs | 52 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/shared/Cargo.toml b/shared/Cargo.toml index d0bc2ef..f669443 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.3" +version = "0.1.2" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" @@ -11,3 +11,6 @@ repository = "https://github.com/servo/string-cache" name = "string_cache_shared" path = "lib.rs" + +[dependencies] +debug_unreachable = "0.0.5" diff --git a/shared/lib.rs b/shared/lib.rs index 896015d..c48ed70 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -11,11 +11,12 @@ //! the macros crate and the run-time library, in order to guarantee //! consistency. -#![feature(raw, slice_bytes, core_intrinsics)] #![deny(warnings)] -use std::{mem, raw, intrinsics}; -use std::slice::bytes; +#[macro_use] extern crate debug_unreachable; + +use std::ptr; +use std::slice; pub use self::UnpackedAtom::{Dynamic, Inline, Static}; @@ -42,11 +43,16 @@ pub enum UnpackedAtom { const STATIC_SHIFT_BITS: usize = 32; +struct RawSlice { + data: *const u8, + len: usize, +} + #[cfg(target_endian = "little")] // Not implemented yet for big-endian #[inline(always)] -unsafe fn inline_atom_slice(x: &u64) -> raw::Slice { +unsafe fn inline_atom_slice(x: &u64) -> RawSlice { let x: *const u64 = x; - raw::Slice { + RawSlice { data: (x as *const u8).offset(1), len: 7, } @@ -70,8 +76,10 @@ impl UnpackedAtom { debug_assert!((len as usize) <= MAX_INLINE_LEN); let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { - let dest: &mut [u8] = mem::transmute(inline_atom_slice(&mut data)); - bytes::copy_memory(&buf[..], dest); + let raw_slice = inline_atom_slice(&mut data); + let dest: &mut [u8] = slice::from_raw_parts_mut( + raw_slice.data as *mut u8, raw_slice.len); + copy_memory(&buf[..], dest); } data } @@ -89,14 +97,12 @@ impl UnpackedAtom { let len = ((data & 0xf0) >> 4) as usize; debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; - let src: &[u8] = mem::transmute(inline_atom_slice(&data)); - bytes::copy_memory(src, &mut buf[..]); + let raw_slice = inline_atom_slice(&data); + let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); + copy_memory(src, &mut buf[..]); Inline(len as u8, buf) }, - - // intrinsics::unreachable() in release builds? - // See rust-lang/rust#18152. - _ => panic!("impossible"), + _ => debug_unreachable!(), } } } @@ -119,9 +125,25 @@ pub unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { pub unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { match UnpackedAtom::from_packed(*data) { Inline(len, _) => { - let src: &[u8] = mem::transmute(inline_atom_slice(data)); + let raw_slice = inline_atom_slice(&data); + let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); &src[..(len as usize)] } - _ => intrinsics::unreachable(), + _ => debug_unreachable!(), + } +} + + +/// Copy of std::slice::bytes::copy_memory, which is unstable. +#[inline] +pub fn copy_memory(src: &[u8], dst: &mut [u8]) { + let len_src = src.len(); + assert!(dst.len() >= len_src); + // `dst` is unaliasable, so we know statically it doesn't overlap + // with `src`. + unsafe { + ptr::copy_nonoverlapping(src.as_ptr(), + dst.as_mut_ptr(), + len_src); } } From 917dd80098ba44462321ce721576c798f276d6d8 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 18 Jul 2015 18:23:41 +0200 Subject: [PATCH 115/379] Use phf_codegen instead of phf_macros --- Cargo.toml | 4 +-- plugin/src/atom/mod.rs | 26 ++----------------- plugin/src/lib.rs | 1 - shared/Cargo.toml | 5 ++++ shared/build.rs | 20 ++++++++++++++ shared/lib.rs | 3 +++ .../data.rs => shared/static_atom_list.rs | 0 src/atom/mod.rs | 9 +++---- src/lib.rs | 4 +-- 9 files changed, 35 insertions(+), 37 deletions(-) create mode 100644 shared/build.rs rename plugin/src/atom/data.rs => shared/static_atom_list.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index e2b28c4..f47b41b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.6" +version = "0.1.7" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -22,8 +22,6 @@ log-events = ["rustc-serialize"] [dependencies] rand = "0" -phf = "0.7" -phf_macros = "0.7" lazy_static = "0.1.10" serde = "0.4.2" diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index 91474b4..93cdeec 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -18,17 +18,6 @@ use std::iter::Chain; use std::collections::HashMap; use std::ascii::AsciiExt; -mod data; - -// Build a PhfOrderedSet of static atoms. -// Takes no arguments. -pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - ext_bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); - let tts: Vec = data::ATOMS.iter().flat_map(|k| { - (quote_tokens!(&mut *cx, $k,)).into_iter() - }).collect(); - MacEager::expr(quote_expr!(&mut *cx, phf_ordered_set!($tts))) -} fn atom_tok_to_str(t: &TokenTree) -> Option { Some(get_ident(match *t { @@ -38,17 +27,6 @@ fn atom_tok_to_str(t: &TokenTree) -> Option { })) } -// Build a map from atoms to IDs for use in implementing the atom!() macro. -lazy_static! { - static ref STATIC_ATOM_MAP: HashMap<&'static str, usize> = { - let mut m = HashMap::new(); - for (i, x) in data::ATOMS.iter().enumerate() { - m.insert(*x, i); - } - m - }; -} - // FIXME: libsyntax should provide this (rust-lang/rust#17637) struct AtomResult { expr: P, @@ -66,12 +44,12 @@ impl MacResult for AtomResult { } fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { - let i = match STATIC_ATOM_MAP.get(name) { + let i = match ::string_cache_shared::STATIC_ATOM_SET.get_index(name) { Some(i) => i, None => return None, }; - let data = ::string_cache_shared::pack_static(*i as u32); + let data = ::string_cache_shared::pack_static(i as u32); Some(AtomResult { expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs index b5136dd..138a0f9 100644 --- a/plugin/src/lib.rs +++ b/plugin/src/lib.rs @@ -33,7 +33,6 @@ mod atom; // NB: This needs to be public or we get a linker error. #[plugin_registrar] pub fn plugin_registrar(reg: &mut Registry) { - reg.register_macro("static_atom_set", atom::expand_static_atom_set); reg.register_macro("atom", atom::expand_atom); reg.register_macro("ns", atom::expand_ns); } diff --git a/shared/Cargo.toml b/shared/Cargo.toml index f669443..60437a8 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -6,6 +6,7 @@ authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" +build = "build.rs" [lib] @@ -14,3 +15,7 @@ path = "lib.rs" [dependencies] debug_unreachable = "0.0.5" +phf = "0.7.3" + +[build-dependencies] +phf_codegen = "0.7.3" diff --git a/shared/build.rs b/shared/build.rs new file mode 100644 index 0000000..0aafd27 --- /dev/null +++ b/shared/build.rs @@ -0,0 +1,20 @@ +extern crate phf_codegen; + +mod static_atom_list; + +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; + +fn main() { + let mut set = phf_codegen::OrderedSet::new(); + for &atom in static_atom_list::ATOMS { + set.entry(atom); + } + + let path = Path::new(env!("OUT_DIR")).join("static_atom_set.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); + write!(&mut file, "pub static STATIC_ATOM_SET: phf::OrderedSet<&'static str> = ").unwrap(); + set.build(&mut file).unwrap(); + write!(&mut file, ";\n").unwrap(); +} diff --git a/shared/lib.rs b/shared/lib.rs index c48ed70..8ff9db6 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -14,12 +14,15 @@ #![deny(warnings)] #[macro_use] extern crate debug_unreachable; +extern crate phf; use std::ptr; use std::slice; pub use self::UnpackedAtom::{Dynamic, Inline, Static}; +include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs")); + // FIXME(rust-lang/rust#18153): generate these from an enum pub const DYNAMIC_TAG: u8 = 0u8; pub const INLINE_TAG: u8 = 1u8; // len in upper nybble diff --git a/plugin/src/atom/data.rs b/shared/static_atom_list.rs similarity index 100% rename from plugin/src/atom/data.rs rename to shared/static_atom_list.rs diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 9763ed2..8b101a3 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -9,7 +9,6 @@ #![allow(non_upper_case_globals)] -use phf::OrderedSet; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; @@ -25,7 +24,7 @@ use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic}; +use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic, STATIC_ATOM_SET}; #[cfg(feature = "log-events")] use event::Event; @@ -37,8 +36,6 @@ macro_rules! log (($e:expr) => (())); // Needed for memory safety of the tagging scheme! const ENTRY_ALIGNMENT: usize = 16; -// Macro-generated table for static atoms. -static static_atom_set: OrderedSet<&'static str> = static_atom_set!(); struct StringCache { buckets: [*mut StringCacheEntry; 4096], @@ -173,7 +170,7 @@ impl Atom { #[inline] pub fn from_slice(string_to_add: &str) -> Atom { - let unpacked = match static_atom_set.get_index(string_to_add) { + let unpacked = match STATIC_ATOM_SET.get_index(string_to_add) { Some(id) => Static(id as u32), None => { let len = string_to_add.len(); @@ -200,7 +197,7 @@ impl Atom { let buf = string_cache_shared::inline_orig_bytes(&self.data); str::from_utf8(buf).unwrap() }, - Static(idx) => *static_atom_set.index(idx as usize).expect("bad static atom"), + Static(idx) => *STATIC_ATOM_SET.index(idx as usize).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string diff --git a/src/lib.rs b/src/lib.rs index e52992a..f568312 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,13 +15,11 @@ #![deny(warnings)] #![cfg_attr(test, feature(test))] #![cfg_attr(bench, feature(rand))] -#![plugin(phf_macros, string_cache_plugin)] +#![plugin(string_cache_plugin)] #[cfg(test)] extern crate test; -extern crate phf; - #[macro_use] extern crate lazy_static; From 747bf2479f32c75069956e7bd2fd25de68cc00c5 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 18 Jul 2015 19:34:36 +0200 Subject: [PATCH 116/379] Lower ENTRY_ALIGNMENT from 16 to 4 We only need 2 bits to store the tag (which is 0b_00, 0b_01, or 0b_10). This will enable using `Box::new` instead of `heap::allocate`. @gw, was there another reason to have it at 16? --- shared/lib.rs | 14 ++++++++------ src/atom/mod.rs | 17 +++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/shared/lib.rs b/shared/lib.rs index 8ff9db6..466ab18 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -24,9 +24,11 @@ pub use self::UnpackedAtom::{Dynamic, Inline, Static}; include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs")); // FIXME(rust-lang/rust#18153): generate these from an enum -pub const DYNAMIC_TAG: u8 = 0u8; -pub const INLINE_TAG: u8 = 1u8; // len in upper nybble -pub const STATIC_TAG: u8 = 2u8; +pub const DYNAMIC_TAG: u8 = 0b_00; +pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble +pub const STATIC_TAG: u8 = 0b_10; +pub const TAG_MASK: u64 = 0b_11; +pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. pub const MAX_INLINE_LEN: usize = 7; @@ -72,7 +74,7 @@ impl UnpackedAtom { Static(n) => pack_static(n), Dynamic(p) => { let n = p as u64; - debug_assert!(0 == n & 0xf); + debug_assert!(0 == n & TAG_MASK); n } Inline(len, buf) => { @@ -93,7 +95,7 @@ impl UnpackedAtom { pub unsafe fn from_packed(data: u64) -> UnpackedAtom { debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged - match (data & 0xf) as u8 { + match (data & TAG_MASK) as u8 { DYNAMIC_TAG => Dynamic(data as *mut ()), STATIC_TAG => Static((data >> STATIC_SHIFT_BITS) as u32), INLINE_TAG => { @@ -113,7 +115,7 @@ impl UnpackedAtom { /// Used for a fast path in Clone and Drop. #[inline(always)] pub unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { - if (DYNAMIC_TAG as u64) == (data & 0xf) { + if (DYNAMIC_TAG as u64) == (data & TAG_MASK) { Some(data as *mut ()) } else { None diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 8b101a3..ac198e3 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -11,6 +11,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::cmp::max; use std::fmt; use std::mem; use std::ops; @@ -24,7 +25,8 @@ use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic, STATIC_ATOM_SET}; +use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic, STATIC_ATOM_SET, + ENTRY_ALIGNMENT}; #[cfg(feature = "log-events")] use event::Event; @@ -33,10 +35,6 @@ use event::Event; macro_rules! log (($e:expr) => (())); -// Needed for memory safety of the tagging scheme! -const ENTRY_ALIGNMENT: usize = 16; - - struct StringCache { buckets: [*mut StringCacheEntry; 4096], } @@ -104,8 +102,10 @@ impl StringCache { if should_add { unsafe { - ptr = heap::allocate(mem::size_of::(), ENTRY_ALIGNMENT) - as *mut StringCacheEntry; + ptr = heap::allocate( + mem::size_of::(), + max(mem::align_of::(), ENTRY_ALIGNMENT) + ) as *mut StringCacheEntry; ptr::write(ptr, StringCacheEntry::new(self.buckets[bucket_index], hash, string_to_add)); } @@ -145,7 +145,8 @@ impl StringCache { unsafe { ptr::read(ptr); heap::deallocate(ptr as *mut u8, - mem::size_of::(), ENTRY_ALIGNMENT); + mem::size_of::(), + max(mem::align_of::(), ENTRY_ALIGNMENT)); } log!(Event::Remove(key)); From 36fb3113f2731b6c8b445b7a7e7446d7c4ed7ac9 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 18 Jul 2015 19:35:28 +0200 Subject: [PATCH 117/379] Update to filling drop --- src/atom/mod.rs | 19 ++++++++++++++----- src/lib.rs | 2 +- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index ac198e3..b0de01e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -155,7 +155,7 @@ impl StringCache { // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. -#[unsafe_no_drop_flag] +#[unsafe_no_drop_flag] // See tests::atom_drop_is_idempotent #[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -236,10 +236,7 @@ impl Drop for Atom { unsafe { match string_cache_shared::from_packed_dynamic(self.data) { - // We use #[unsafe_no_drop_flag] so that Atom will be only 64 - // bits. That means we need to ignore a NULL pointer here, - // which represents a value that was moved out. - Some(entry) if !entry.is_null() => { + Some(entry) => { let entry = entry as *mut StringCacheEntry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { drop_slow(self); @@ -251,6 +248,7 @@ impl Drop for Atom { } } + impl ops::Deref for Atom { type Target = str; @@ -548,4 +546,15 @@ mod tests { let atom = Atom::from_slice("foobar"); let _: &str = atom.as_ref(); } + + /// Atom uses #[unsafe_no_drop_flag] to stay small, so drop() may be called more than once. + /// In calls after the first one, the atom will be filled with a POST_DROP value. + /// drop() must be a no-op in this case. + #[test] + fn atom_drop_is_idempotent() { + unsafe { + assert_eq!(::string_cache_shared::from_packed_dynamic(::std::mem::POST_DROP_U64), None); + } + } + } diff --git a/src/lib.rs b/src/lib.rs index f568312..27a588a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ #![feature(plugin, unsafe_no_drop_flag)] #![feature(slice_bytes, heap_api, hash_default)] #![deny(warnings)] -#![cfg_attr(test, feature(test))] +#![cfg_attr(test, feature(test, filling_drop))] #![cfg_attr(bench, feature(rand))] #![plugin(string_cache_plugin)] From 8a3f35c11e59a02f1380aaf51eaa177b3a2a4f57 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 18 Jul 2015 22:53:17 +0200 Subject: [PATCH 118/379] Move remaining usage of unstable features behind a `unstable` Cargo feature flag. Without `--features unstable`: * `Atom` is 16 bytes instead of 8. (It has a drop flag.) * `ns!` and `atom!` are giant generated macros instead of plugins, and so may increase compile time. --- Cargo.toml | 9 +++++++ build.rs | 59 ++++++++++++++++++++++++++++++++++++++++++ plugin/src/atom/mod.rs | 10 +------ shared/lib.rs | 10 +++++++ src/atom/mod.rs | 53 ++++++++++++++++++++----------------- src/lib.rs | 8 +++--- src/namespace.rs | 29 +++++++++++---------- 7 files changed, 128 insertions(+), 50 deletions(-) create mode 100644 build.rs diff --git a/Cargo.toml b/Cargo.toml index f47b41b..2af53cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ description = "A string interning library for Rust, developed as part of the Ser license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "http://doc.servo.org/string_cache/" +build = "build.rs" [lib] name = "string_cache" @@ -20,6 +21,9 @@ doctest = false # See examples/event-log. log-events = ["rustc-serialize"] +# Use unstable features to optimize space and time (memory and CPU usage). +unstable = ["string_cache_plugin"] + [dependencies] rand = "0" lazy_static = "0.1.10" @@ -32,7 +36,12 @@ optional = true [dependencies.string_cache_plugin] path = "plugin" version = "0.1.1" +optional = true [dependencies.string_cache_shared] path = "shared" version = "0.1.0" + +[build-dependencies.string_cache_shared] +path = "shared" +version = "0.1.0" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..7434cfc --- /dev/null +++ b/build.rs @@ -0,0 +1,59 @@ +extern crate string_cache_shared; + +use string_cache_shared::{STATIC_ATOM_SET, ALL_NS, pack_static}; + +use std::ascii::AsciiExt; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; + +fn main() { + let path = Path::new(env!("OUT_DIR")).join("ns_macro_without_plugin.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); + writeln!(file, r"#[macro_export]").unwrap(); + writeln!(file, r"macro_rules! ns {{").unwrap(); + writeln!(file, "(\"\") => {{ $crate::Namespace({}) }};", atom("")).unwrap(); + for &(prefix, url) in ALL_NS { + if !prefix.is_empty() { + generate_combination("".to_owned(), prefix, url, &mut file); + } + } + writeln!(file, r"}}").unwrap(); + + writeln!(file, r"#[macro_export]").unwrap(); + writeln!(file, r"macro_rules! atom {{").unwrap(); + for &s in STATIC_ATOM_SET.iter() { + if is_ident(s) { + writeln!(file, r"( {} ) => {{ {} }};", s, atom(s)).unwrap(); + } + writeln!(file, r"({:?}) => {{ {} }};", s, atom(s)).unwrap(); + } + writeln!(file, r"}}").unwrap(); +} + +fn generate_combination(prefix1: String, suffix: &str, url: &str, file: &mut BufWriter) { + if suffix.is_empty() { + writeln!(file, r"({:?}) => {{ $crate::Namespace({}) }};", prefix1, atom(url)).unwrap(); + writeln!(file, r"( {} ) => {{ $crate::Namespace({}) }};", prefix1, atom(url)).unwrap(); + } else { + let prefix2 = prefix1.clone(); + generate_combination(prefix1 + &*suffix[..1].to_ascii_lowercase(), &suffix[1..], url, file); + generate_combination(prefix2 + &*suffix[..1].to_ascii_uppercase(), &suffix[1..], url, file); + } +} + +fn atom(s: &str) -> String { + let data = pack_static(STATIC_ATOM_SET.get_index(s).unwrap() as u32); + format!("$crate::Atom {{ data: 0x{:x} }}", data) +} + +fn is_ident(s: &str) -> bool { + let mut chars = s.chars(); + !s.is_empty() && match chars.next().unwrap() { + 'a'...'z' | 'A'...'Z' | '_' => true, + _ => false + } && chars.all(|c| match c { + 'a'...'z' | 'A'...'Z' | '_' | '0'...'9' => true, + _ => false + }) +} diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index 93cdeec..ee0a89e 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -71,15 +71,7 @@ pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box Box { - static ALL_NS: &'static [(&'static str, &'static str)] = &[ - ("", ""), - ("html", "http://www.w3.org/1999/xhtml"), - ("xml", "http://www.w3.org/XML/1998/namespace"), - ("xmlns", "http://www.w3.org/2000/xmlns/"), - ("xlink", "http://www.w3.org/1999/xlink"), - ("svg", "http://www.w3.org/2000/svg"), - ("mathml", "http://www.w3.org/1998/Math/MathML"), - ]; + use string_cache_shared::ALL_NS; fn usage() -> String { let ns_names: Vec<&'static str> = ALL_NS[1..].iter() diff --git a/shared/lib.rs b/shared/lib.rs index 466ab18..d0e3f09 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -48,6 +48,16 @@ pub enum UnpackedAtom { const STATIC_SHIFT_BITS: usize = 32; +pub static ALL_NS: &'static [(&'static str, &'static str)] = &[ + ("", ""), + ("html", "http://www.w3.org/1999/xhtml"), + ("xml", "http://www.w3.org/XML/1998/namespace"), + ("xmlns", "http://www.w3.org/2000/xmlns/"), + ("xlink", "http://www.w3.org/1999/xlink"), + ("svg", "http://www.w3.org/2000/svg"), + ("mathml", "http://www.w3.org/1998/Math/MathML"), +]; + struct RawSlice { data: *const u8, len: usize, diff --git a/src/atom/mod.rs b/src/atom/mod.rs index b0de01e..9df47be 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -11,22 +11,19 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::cmp::max; use std::fmt; use std::mem; use std::ops; use std::ptr; -use std::slice::bytes; use std::str; -use std::rt::heap; use std::cmp::Ordering::{self, Equal}; -use std::hash::{self, Hash, SipHasher}; +use std::hash::{Hash, SipHasher, Hasher}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic, STATIC_ATOM_SET, - ENTRY_ALIGNMENT}; + ENTRY_ALIGNMENT, copy_memory}; #[cfg(feature = "log-events")] use event::Event; @@ -71,7 +68,11 @@ impl StringCache { } fn add(&mut self, string_to_add: &str) -> *mut StringCacheEntry { - let hash = hash::hash::<_, SipHasher>(&string_to_add); + let hash = { + let mut hasher = SipHasher::default(); + string_to_add.hash(&mut hasher); + hasher.finish() + }; let bucket_index = (hash & (self.buckets.len()-1) as u64) as usize; let mut ptr = self.buckets[bucket_index]; @@ -101,14 +102,11 @@ impl StringCache { } if should_add { - unsafe { - ptr = heap::allocate( - mem::size_of::(), - max(mem::align_of::(), ENTRY_ALIGNMENT) - ) as *mut StringCacheEntry; - ptr::write(ptr, - StringCacheEntry::new(self.buckets[bucket_index], hash, string_to_add)); - } + debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + let mut entry = Box::new(StringCacheEntry::new( + self.buckets[bucket_index], hash, string_to_add)); + ptr = &mut *entry; + mem::forget(entry); self.buckets[bucket_index] = ptr; log!(Event::Insert(ptr as u64, String::from(string_to_add))); } @@ -143,19 +141,21 @@ impl StringCache { debug_assert!(current != ptr::null_mut()); unsafe { - ptr::read(ptr); - heap::deallocate(ptr as *mut u8, - mem::size_of::(), - max(mem::align_of::(), ENTRY_ALIGNMENT)); + box_from_raw(ptr); } log!(Event::Remove(key)); } } +// Box::from_raw is not stable yet +unsafe fn box_from_raw(raw: *mut T) -> Box { + mem::transmute(raw) +} + // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. -#[unsafe_no_drop_flag] // See tests::atom_drop_is_idempotent +#[cfg_attr(unstable, unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent #[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -177,7 +177,7 @@ impl Atom { let len = string_to_add.len(); if len <= string_cache_shared::MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; - bytes::copy_memory(string_to_add.as_bytes(), &mut buf); + copy_memory(string_to_add.as_bytes(), &mut buf); Inline(len as u8, buf) } else { Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add) as *mut ()) @@ -325,9 +325,10 @@ mod bench; #[cfg(test)] mod tests { + use std::mem; use std::thread; - use super::Atom; - use string_cache_shared::{Static, Inline, Dynamic}; + use super::{Atom, StringCacheEntry}; + use string_cache_shared::{Static, Inline, Dynamic, ENTRY_ALIGNMENT, from_packed_dynamic}; #[test] fn test_as_slice() { @@ -491,7 +492,7 @@ mod tests { fn assert_sizes() { // Guard against accidental changes to the sizes of things. use std::mem; - assert_eq!(8, mem::size_of::()); + assert_eq!(if cfg!(unstable) { 8 } else { 16 }, mem::size_of::()); assert_eq!(48, mem::size_of::()); } @@ -553,8 +554,12 @@ mod tests { #[test] fn atom_drop_is_idempotent() { unsafe { - assert_eq!(::string_cache_shared::from_packed_dynamic(::std::mem::POST_DROP_U64), None); + assert_eq!(from_packed_dynamic(mem::POST_DROP_U64), None); } } + #[test] + fn string_cache_entry_alignment_is_sufficient() { + assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + } } diff --git a/src/lib.rs b/src/lib.rs index 27a588a..82c6128 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,12 +10,11 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![feature(plugin, unsafe_no_drop_flag)] -#![feature(slice_bytes, heap_api, hash_default)] #![deny(warnings)] #![cfg_attr(test, feature(test, filling_drop))] #![cfg_attr(bench, feature(rand))] -#![plugin(string_cache_plugin)] +#![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] +#![cfg_attr(feature = "unstable", plugin(string_cache_plugin))] #[cfg(test)] extern crate test; @@ -43,6 +42,9 @@ macro_rules! qualname (($ns:tt, $local:tt) => ( } )); +#[cfg(not(feature = "unstable"))] +include!(concat!(env!("OUT_DIR"), "/ns_macro_without_plugin.rs")); + #[cfg(feature = "log-events")] #[macro_use] pub mod event; diff --git a/src/namespace.rs b/src/namespace.rs index 6ae5904..cd5543b 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -37,30 +37,31 @@ impl QualName { #[cfg(test)] mod tests { use super::{Namespace, QualName}; + use Atom; #[test] fn ns_macro() { - assert_eq!(ns!(""), Namespace(atom!(""))); + assert_eq!(ns!(""), Namespace(Atom::from_slice(""))); - assert_eq!(ns!(html), Namespace(atom!("http://www.w3.org/1999/xhtml"))); - assert_eq!(ns!(xml), Namespace(atom!("http://www.w3.org/XML/1998/namespace"))); - assert_eq!(ns!(xmlns), Namespace(atom!("http://www.w3.org/2000/xmlns/"))); - assert_eq!(ns!(xlink), Namespace(atom!("http://www.w3.org/1999/xlink"))); - assert_eq!(ns!(svg), Namespace(atom!("http://www.w3.org/2000/svg"))); - assert_eq!(ns!(mathml), Namespace(atom!("http://www.w3.org/1998/Math/MathML"))); + assert_eq!(ns!(html), Namespace(Atom::from_slice("http://www.w3.org/1999/xhtml"))); + assert_eq!(ns!(xml), Namespace(Atom::from_slice("http://www.w3.org/XML/1998/namespace"))); + assert_eq!(ns!(xmlns), Namespace(Atom::from_slice("http://www.w3.org/2000/xmlns/"))); + assert_eq!(ns!(xlink), Namespace(Atom::from_slice("http://www.w3.org/1999/xlink"))); + assert_eq!(ns!(svg), Namespace(Atom::from_slice("http://www.w3.org/2000/svg"))); + assert_eq!(ns!(mathml), Namespace(Atom::from_slice("http://www.w3.org/1998/Math/MathML"))); - assert_eq!(ns!(HtMl), Namespace(atom!("http://www.w3.org/1999/xhtml"))); - assert_eq!(ns!(xMl), Namespace(atom!("http://www.w3.org/XML/1998/namespace"))); - assert_eq!(ns!(XmLnS), Namespace(atom!("http://www.w3.org/2000/xmlns/"))); - assert_eq!(ns!(xLiNk), Namespace(atom!("http://www.w3.org/1999/xlink"))); - assert_eq!(ns!(SvG), Namespace(atom!("http://www.w3.org/2000/svg"))); - assert_eq!(ns!(mAtHmL), Namespace(atom!("http://www.w3.org/1998/Math/MathML"))); + assert_eq!(ns!(HtMl), Namespace(Atom::from_slice("http://www.w3.org/1999/xhtml"))); + assert_eq!(ns!(xMl), Namespace(Atom::from_slice("http://www.w3.org/XML/1998/namespace"))); + assert_eq!(ns!(XmLnS), Namespace(Atom::from_slice("http://www.w3.org/2000/xmlns/"))); + assert_eq!(ns!(xLiNk), Namespace(Atom::from_slice("http://www.w3.org/1999/xlink"))); + assert_eq!(ns!(SvG), Namespace(Atom::from_slice("http://www.w3.org/2000/svg"))); + assert_eq!(ns!(mAtHmL), Namespace(Atom::from_slice("http://www.w3.org/1998/Math/MathML"))); } #[test] fn qualname() { assert_eq!(QualName::new(ns!(""), atom!("")), - QualName { ns: ns!(""), local: atom!("") }); + QualName { ns: ns!(""), local: Atom::from_slice("") }); assert_eq!(QualName::new(ns!(XML), atom!(base)), QualName { ns: ns!(XML), local: atom!(base) }); } From d75004fe6f15a769e94dbaa016cac897f51e704d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 19 Jul 2015 00:11:28 +0200 Subject: [PATCH 119/379] Have Travis run `cargo build` on Rust stable and beta --- .travis.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3c6e71e..a666452 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,14 @@ sudo: false language: rust -rust: nightly +rust: + - nightly + - beta + - stable script: - - cargo test - - cargo clean - - cargo test --features log-events - - cd examples/summarize-events/ - - cargo build + - "if [ $TRAVIS_RUST_VERSION != nightly ]; then cargo build; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo clean; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features log-events; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd examples/summarize-events/ && cargo build; fi" notifications: webhooks: http://build.servo.org:54856/travis From ba28f5173f5a82410e5ca723185b304ab0641bbf Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 19 Jul 2015 00:12:52 +0200 Subject: [PATCH 120/379] Only make warnings fatal when testing. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That way, a deprecation warning won’t break dependants. --- plugin/src/lib.rs | 2 +- shared/lib.rs | 2 +- src/lib.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs index 138a0f9..ac4f6f3 100644 --- a/plugin/src/lib.rs +++ b/plugin/src/lib.rs @@ -12,7 +12,7 @@ #![feature(plugin_registrar, quote, box_syntax)] #![feature(rustc_private, slice_patterns)] -#![deny(warnings)] +#![cfg_attr(test, deny(warnings))] #![allow(unused_imports)] // for quotes extern crate syntax; diff --git a/shared/lib.rs b/shared/lib.rs index d0e3f09..a8226b7 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -11,7 +11,7 @@ //! the macros crate and the run-time library, in order to guarantee //! consistency. -#![deny(warnings)] +#![cfg_attr(test, deny(warnings))] #[macro_use] extern crate debug_unreachable; extern crate phf; diff --git a/src/lib.rs b/src/lib.rs index 82c6128..af35aed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![deny(warnings)] +#![cfg_attr(test, deny(warnings))] #![cfg_attr(test, feature(test, filling_drop))] #![cfg_attr(bench, feature(rand))] #![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] From 4706e16f50b7e19638d23e7c87b1add94ebda503 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 19 Jul 2015 00:52:29 +0200 Subject: [PATCH 121/379] Also test with unstable features on Travis. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index a666452..5aefa64 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ script: - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test; fi" - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo clean; fi" - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features log-events; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd examples/summarize-events/ && cargo build; fi" notifications: webhooks: http://build.servo.org:54856/travis From 2d146b55b9a3e3e3ae598ce7830d78ef85c0bb48 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 19 Jul 2015 08:34:26 +0200 Subject: [PATCH 122/379] =?UTF-8?q?Make=20rand=20a=20dev-dependency.=20(It?= =?UTF-8?q?=E2=80=99s=20only=20used=20in=20tests.)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.toml | 4 +++- src/lib.rs | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2af53cc..5831418 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,10 +25,12 @@ log-events = ["rustc-serialize"] unstable = ["string_cache_plugin"] [dependencies] -rand = "0" lazy_static = "0.1.10" serde = "0.4.2" +[dev-dependencies] +rand = "0" + [dependencies.rustc-serialize] version = "0" optional = true diff --git a/src/lib.rs b/src/lib.rs index af35aed..7ae017b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,6 @@ #![cfg_attr(test, deny(warnings))] #![cfg_attr(test, feature(test, filling_drop))] -#![cfg_attr(bench, feature(rand))] #![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] #![cfg_attr(feature = "unstable", plugin(string_cache_plugin))] @@ -22,6 +21,7 @@ extern crate test; #[macro_use] extern crate lazy_static; +#[cfg(test)] extern crate rand; #[cfg(feature = "log-events")] From 9de43cc0d792f27c0e848312e625a91da7fb307c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 19 Jul 2015 09:39:44 +0200 Subject: [PATCH 123/379] Explicitly drop the result of box_from_raw() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s not necessary, but makes it clearer to readers what’s going on. --- src/atom/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 9df47be..a3ea4b8 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -141,7 +141,7 @@ impl StringCache { debug_assert!(current != ptr::null_mut()); unsafe { - box_from_raw(ptr); + mem::drop(box_from_raw(ptr)); } log!(Event::Remove(key)); From 7452b763fc827642e7f2b46fcf5b5954e3c5f4af Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 20 Jul 2015 15:08:32 +0200 Subject: [PATCH 124/379] Gratuitious refactoring. --- build.rs | 2 +- src/atom/mod.rs | 61 +++++++++++++++++++++++-------------------------- src/lib.rs | 2 +- 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/build.rs b/build.rs index 7434cfc..0efc595 100644 --- a/build.rs +++ b/build.rs @@ -8,7 +8,7 @@ use std::io::{BufWriter, Write}; use std::path::Path; fn main() { - let path = Path::new(env!("OUT_DIR")).join("ns_macro_without_plugin.rs"); + let path = Path::new(env!("OUT_DIR")).join("ns_atom_macros_without_plugin.rs"); let mut file = BufWriter::new(File::create(&path).unwrap()); writeln!(file, r"#[macro_export]").unwrap(); writeln!(file, r"macro_rules! ns {{").unwrap(); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index a3ea4b8..f759ad8 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -31,9 +31,10 @@ use event::Event; #[cfg(not(feature = "log-events"))] macro_rules! log (($e:expr) => (())); - +const NB_BUCKETS: usize = 1 << 12; // 4096 +const BUCKET_MASK: u64 = (1 << 12) - 1; struct StringCache { - buckets: [*mut StringCacheEntry; 4096], + buckets: [*mut StringCacheEntry; NB_BUCKETS], } lazy_static! { @@ -63,7 +64,7 @@ impl StringCacheEntry { impl StringCache { fn new() -> StringCache { StringCache { - buckets: unsafe { mem::zeroed() }, + buckets: [ptr::null_mut(); NB_BUCKETS], } } @@ -73,10 +74,10 @@ impl StringCache { string_to_add.hash(&mut hasher); hasher.finish() }; - let bucket_index = (hash & (self.buckets.len()-1) as u64) as usize; + let bucket_index = (hash & BUCKET_MASK) as usize; let mut ptr = self.buckets[bucket_index]; - while ptr != ptr::null_mut() { + while !ptr.is_null() { let value = unsafe { &*ptr }; if value.hash == hash && value.string == string_to_add { break; @@ -84,34 +85,28 @@ impl StringCache { ptr = value.next_in_bucket; } - let mut should_add = false; - if ptr != ptr::null_mut() { + if !ptr.is_null() { unsafe { - if (*ptr).ref_count.fetch_add(1, SeqCst) == 0 { - // Uh-oh. The pointer's reference count was zero, which means someone may try - // to free it. (Naive attempts to defend against this, for example having the - // destructor check to see whether the reference count is indeed zero, don't - // work due to ABA.) Thus we need to temporarily add a duplicate string to the - // list. - should_add = true; - (*ptr).ref_count.fetch_sub(1, SeqCst); + if (*ptr).ref_count.fetch_add(1, SeqCst) > 0 { + return ptr; } + // Uh-oh. The pointer's reference count was zero, which means someone may try + // to free it. (Naive attempts to defend against this, for example having the + // destructor check to see whether the reference count is indeed zero, don't + // work due to ABA.) Thus we need to temporarily add a duplicate string to the + // list. + (*ptr).ref_count.fetch_sub(1, SeqCst); } - } else { - should_add = true } - if should_add { - debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); - let mut entry = Box::new(StringCacheEntry::new( - self.buckets[bucket_index], hash, string_to_add)); - ptr = &mut *entry; - mem::forget(entry); - self.buckets[bucket_index] = ptr; - log!(Event::Insert(ptr as u64, String::from(string_to_add))); - } + debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + let mut entry = Box::new(StringCacheEntry::new( + self.buckets[bucket_index], hash, string_to_add)); + ptr = &mut *entry; + mem::forget(entry); + self.buckets[bucket_index] = ptr; + log!(Event::Insert(ptr as u64, String::from(string_to_add))); - debug_assert!(ptr != ptr::null_mut()); ptr } @@ -121,24 +116,24 @@ impl StringCache { debug_assert!(value.ref_count.load(SeqCst) == 0); - let bucket_index = (value.hash & (self.buckets.len()-1) as u64) as usize; + let bucket_index = (value.hash & BUCKET_MASK) as usize; let mut current = self.buckets[bucket_index]; let mut prev: *mut StringCacheEntry = ptr::null_mut(); - while current != ptr::null_mut() { + while !current.is_null() { if current == ptr { - if prev != ptr::null_mut() { - unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; - } else { + if prev.is_null() { unsafe { self.buckets[bucket_index] = (*current).next_in_bucket }; + } else { + unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; } break; } prev = current; unsafe { current = (*current).next_in_bucket }; } - debug_assert!(current != ptr::null_mut()); + debug_assert!(!current.is_null()); unsafe { mem::drop(box_from_raw(ptr)); diff --git a/src/lib.rs b/src/lib.rs index 7ae017b..446584a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,7 +43,7 @@ macro_rules! qualname (($ns:tt, $local:tt) => ( )); #[cfg(not(feature = "unstable"))] -include!(concat!(env!("OUT_DIR"), "/ns_macro_without_plugin.rs")); +include!(concat!(env!("OUT_DIR"), "/ns_atom_macros_without_plugin.rs")); #[cfg(feature = "log-events")] #[macro_use] From 66ad520292e0f9eef38730357bb4d6ca87300114 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 21 Jul 2015 01:07:31 +0200 Subject: [PATCH 125/379] Have Travis run (some) tests on stable Rust. --- .travis.yml | 9 ++++----- src/atom/mod.rs | 6 ++++-- src/lib.rs | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5aefa64..c8e34eb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,11 +5,10 @@ rust: - beta - stable script: - - "if [ $TRAVIS_RUST_VERSION != nightly ]; then cargo build; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo clean; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features log-events; fi" + - cargo build + - cargo test + - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd examples/summarize-events/ && cargo build; fi" + - "cd examples/summarize-events/ && cargo build" notifications: webhooks: http://build.servo.org:54856/travis diff --git a/src/atom/mod.rs b/src/atom/mod.rs index f759ad8..58ad41f 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -315,7 +315,7 @@ impl Deserialize for Atom { } } -#[cfg(test)] +#[cfg(all(test, feature = "unstable"))] mod bench; #[cfg(test)] @@ -323,7 +323,7 @@ mod tests { use std::mem; use std::thread; use super::{Atom, StringCacheEntry}; - use string_cache_shared::{Static, Inline, Dynamic, ENTRY_ALIGNMENT, from_packed_dynamic}; + use string_cache_shared::{Static, Inline, Dynamic, ENTRY_ALIGNMENT}; #[test] fn test_as_slice() { @@ -546,8 +546,10 @@ mod tests { /// Atom uses #[unsafe_no_drop_flag] to stay small, so drop() may be called more than once. /// In calls after the first one, the atom will be filled with a POST_DROP value. /// drop() must be a no-op in this case. + #[cfg(feature = "unstable")] #[test] fn atom_drop_is_idempotent() { + use string_cache_shared::from_packed_dynamic; unsafe { assert_eq!(from_packed_dynamic(mem::POST_DROP_U64), None); } diff --git a/src/lib.rs b/src/lib.rs index 446584a..10f4b26 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,11 +11,11 @@ #![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] -#![cfg_attr(test, feature(test, filling_drop))] +#![cfg_attr(all(test, feature = "unstable"), feature(test, filling_drop))] #![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] #![cfg_attr(feature = "unstable", plugin(string_cache_plugin))] -#[cfg(test)] +#[cfg(all(test, feature = "unstable"))] extern crate test; #[macro_use] From f7fd9640988f9963189631eaaf433caa21c38541 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 25 Jul 2015 09:44:07 +0200 Subject: [PATCH 126/379] Fix #70 using Option> instead of *mut StringCacheEntry --- src/atom/mod.rs | 96 ++++++++++++++++++++----------------------------- 1 file changed, 39 insertions(+), 57 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 58ad41f..eb19793 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -14,7 +14,6 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; use std::mem; use std::ops; -use std::ptr; use std::str; use std::cmp::Ordering::{self, Equal}; use std::hash::{Hash, SipHasher, Hasher}; @@ -34,7 +33,7 @@ macro_rules! log (($e:expr) => (())); const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; struct StringCache { - buckets: [*mut StringCacheEntry; NB_BUCKETS], + buckets: [Option>; NB_BUCKETS], } lazy_static! { @@ -42,16 +41,15 @@ lazy_static! { } struct StringCacheEntry { - next_in_bucket: *mut StringCacheEntry, + next_in_bucket: Option>, hash: u64, ref_count: AtomicIsize, string: String, } -unsafe impl Send for StringCache { } - impl StringCacheEntry { - fn new(next: *mut StringCacheEntry, hash: u64, string_to_add: &str) -> StringCacheEntry { + fn new(next: Option>, hash: u64, string_to_add: &str) + -> StringCacheEntry { StringCacheEntry { next_in_bucket: next, hash: hash, @@ -64,7 +62,7 @@ impl StringCacheEntry { impl StringCache { fn new() -> StringCache { StringCache { - buckets: [ptr::null_mut(); NB_BUCKETS], + buckets: unsafe { mem::zeroed() }, } } @@ -75,36 +73,31 @@ impl StringCache { hasher.finish() }; let bucket_index = (hash & BUCKET_MASK) as usize; - let mut ptr = self.buckets[bucket_index]; - - while !ptr.is_null() { - let value = unsafe { &*ptr }; - if value.hash == hash && value.string == string_to_add { - break; - } - ptr = value.next_in_bucket; - } - - if !ptr.is_null() { - unsafe { - if (*ptr).ref_count.fetch_add(1, SeqCst) > 0 { - return ptr; + { + let mut ptr: Option<&mut Box> = + self.buckets[bucket_index].as_mut(); + + while let Some(entry) = ptr.take() { + if entry.hash == hash && entry.string == string_to_add { + if entry.ref_count.fetch_add(1, SeqCst) > 0 { + return &mut **entry; + } + // Uh-oh. The pointer's reference count was zero, which means someone may try + // to free it. (Naive attempts to defend against this, for example having the + // destructor check to see whether the reference count is indeed zero, don't + // work due to ABA.) Thus we need to temporarily add a duplicate string to the + // list. + entry.ref_count.fetch_sub(1, SeqCst); + break; } - // Uh-oh. The pointer's reference count was zero, which means someone may try - // to free it. (Naive attempts to defend against this, for example having the - // destructor check to see whether the reference count is indeed zero, don't - // work due to ABA.) Thus we need to temporarily add a duplicate string to the - // list. - (*ptr).ref_count.fetch_sub(1, SeqCst); + ptr = entry.next_in_bucket.as_mut(); } } - debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); let mut entry = Box::new(StringCacheEntry::new( - self.buckets[bucket_index], hash, string_to_add)); - ptr = &mut *entry; - mem::forget(entry); - self.buckets[bucket_index] = ptr; + self.buckets[bucket_index].take(), hash, string_to_add)); + let ptr: *mut StringCacheEntry = &mut *entry; + self.buckets[bucket_index] = Some(entry); log!(Event::Insert(ptr as u64, String::from(string_to_add))); ptr @@ -112,42 +105,31 @@ impl StringCache { fn remove(&mut self, key: u64) { let ptr = key as *mut StringCacheEntry; - let value: &mut StringCacheEntry = unsafe { mem::transmute(ptr) }; - - debug_assert!(value.ref_count.load(SeqCst) == 0); + let bucket_index = { + let value: &StringCacheEntry = unsafe { &*ptr }; + debug_assert!(value.ref_count.load(SeqCst) == 0); + (value.hash & BUCKET_MASK) as usize + }; - let bucket_index = (value.hash & BUCKET_MASK) as usize; - let mut current = self.buckets[bucket_index]; - let mut prev: *mut StringCacheEntry = ptr::null_mut(); + let mut current: &mut Option> = &mut self.buckets[bucket_index]; - while !current.is_null() { - if current == ptr { - if prev.is_null() { - unsafe { self.buckets[bucket_index] = (*current).next_in_bucket }; - } else { - unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; - } + loop { + let entry_ptr: *mut StringCacheEntry = match current.as_mut() { + Some(entry) => &mut **entry, + None => break, + }; + if entry_ptr == ptr { + mem::drop(mem::replace(current, unsafe { (*entry_ptr).next_in_bucket.take() })); break; } - prev = current; - unsafe { current = (*current).next_in_bucket }; - } - debug_assert!(!current.is_null()); - - unsafe { - mem::drop(box_from_raw(ptr)); + current = unsafe { &mut (*entry_ptr).next_in_bucket }; } log!(Event::Remove(key)); } } -// Box::from_raw is not stable yet -unsafe fn box_from_raw(raw: *mut T) -> Box { - mem::transmute(raw) -} - // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. #[cfg_attr(unstable, unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent From 337c5fbe2b0c5a10ed76596d5832fa6154925bd0 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 27 Jul 2015 10:37:54 +0200 Subject: [PATCH 127/379] v0.1.7 --- plugin/Cargo.toml | 2 +- shared/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 4db662e..9b32407 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_plugin" -version = "0.1.4" +version = "0.1.5" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." license = "MIT / Apache-2.0" diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 60437a8..0bcc3f6 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.2" +version = "0.1.4" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" From 0b26fab4ab785c70aaf64fe3f271c52b2cea7153 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 27 Jul 2015 10:40:43 +0200 Subject: [PATCH 128/379] Depend on updated sub-crates. --- Cargo.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5831418..26c85c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.7" +version = "0.1.8" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -37,13 +37,13 @@ optional = true [dependencies.string_cache_plugin] path = "plugin" -version = "0.1.1" +version = "0.1.5" optional = true [dependencies.string_cache_shared] path = "shared" -version = "0.1.0" +version = "0.1.4" [build-dependencies.string_cache_shared] path = "shared" -version = "0.1.0" +version = "0.1.4" From 2870dbc8316cfbc7d6ed8ab3b2ce446d59983e8e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 28 Jul 2015 10:16:39 +0200 Subject: [PATCH 129/379] Fix #[cfg(feature = "unstable")] syntax --- Cargo.toml | 2 +- src/atom/mod.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 26c85c8..099d90b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.8" +version = "0.1.9" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index eb19793..82589ce 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -132,7 +132,7 @@ impl StringCache { // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. -#[cfg_attr(unstable, unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent +#[cfg_attr(feature = "unstable", unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent #[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -469,7 +469,7 @@ mod tests { fn assert_sizes() { // Guard against accidental changes to the sizes of things. use std::mem; - assert_eq!(if cfg!(unstable) { 8 } else { 16 }, mem::size_of::()); + assert_eq!(if cfg!(feature = "unstable") { 8 } else { 16 }, mem::size_of::()); assert_eq!(48, mem::size_of::()); } From b02261aef56d3e9a36ad7f7104d2eab7b7cb941f Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Tue, 28 Jul 2015 18:31:25 -0400 Subject: [PATCH 130/379] Add form atoms --- shared/static_atom_list.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 18c14a5..231c22a 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -474,7 +474,11 @@ pub static ATOMS: &'static [&'static str] = &[ "forall", "foreignObject", "foreignobject", + "formaction", "format", + "formenctype", + "formmethod", + "formtarget", "frameborder", "framespacing", "from", From 94660ab7c7c61dcf89ee442fa927df88386b1345 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 29 Jul 2015 15:41:39 +0200 Subject: [PATCH 131/379] Upgrade to rustc 1.3.0-nightly (8d432fbf1 2015-07-29) --- plugin/Cargo.toml | 2 +- plugin/src/atom/mod.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 9b32407..089082d 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_plugin" -version = "0.1.5" +version = "0.1.6" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." license = "MIT / Apache-2.0" diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index ee0a89e..a9568ef 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -12,7 +12,7 @@ use syntax::codemap::Span; use syntax::ast::{TokenTree, TtToken}; use syntax::ast; use syntax::ext::base::{ExtCtxt, MacResult, MacEager}; -use syntax::parse::token::{get_ident, InternedString, Ident, Literal, Lit}; +use syntax::parse::token::{InternedString, Ident, Literal, Lit}; use std::iter::Chain; use std::collections::HashMap; @@ -20,11 +20,11 @@ use std::ascii::AsciiExt; fn atom_tok_to_str(t: &TokenTree) -> Option { - Some(get_ident(match *t { - TtToken(_, Ident(s, _)) => s, - TtToken(_, Literal(Lit::Str_(s), _)) => s.ident(), + Some(match *t { + TtToken(_, Ident(s, _)) => s.name.as_str(), + TtToken(_, Literal(Lit::Str_(s), _)) => s.as_str(), _ => return None, - })) + }) } // FIXME: libsyntax should provide this (rust-lang/rust#17637) From 79f7743265972551c820f0010a71d8b15ad972f4 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Thu, 30 Jul 2015 10:57:53 -0700 Subject: [PATCH 132/379] Fix cross-compilation error in shared/build.rs --- shared/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/build.rs b/shared/build.rs index 0aafd27..157dc77 100644 --- a/shared/build.rs +++ b/shared/build.rs @@ -12,7 +12,7 @@ fn main() { set.entry(atom); } - let path = Path::new(env!("OUT_DIR")).join("static_atom_set.rs"); + let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs"); let mut file = BufWriter::new(File::create(&path).unwrap()); write!(&mut file, "pub static STATIC_ATOM_SET: phf::OrderedSet<&'static str> = ").unwrap(); set.build(&mut file).unwrap(); From 28c2db776115bf5edcc4bc3e53dbe206a03ba1d1 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 30 Jul 2015 20:00:31 +0200 Subject: [PATCH 133/379] Fix cross-compilation. --- Cargo.toml | 2 +- build.rs | 3 ++- shared/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 099d90b..8c67da8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.9" +version = "0.1.10" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/build.rs b/build.rs index 0efc595..88ce1f1 100644 --- a/build.rs +++ b/build.rs @@ -2,13 +2,14 @@ extern crate string_cache_shared; use string_cache_shared::{STATIC_ATOM_SET, ALL_NS, pack_static}; +use std::env; use std::ascii::AsciiExt; use std::fs::File; use std::io::{BufWriter, Write}; use std::path::Path; fn main() { - let path = Path::new(env!("OUT_DIR")).join("ns_atom_macros_without_plugin.rs"); + let path = Path::new(&env::var("OUT_DIR").unwrap()).join("ns_atom_macros_without_plugin.rs"); let mut file = BufWriter::new(File::create(&path).unwrap()); writeln!(file, r"#[macro_export]").unwrap(); writeln!(file, r"macro_rules! ns {{").unwrap(); diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 0bcc3f6..6667df4 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.4" +version = "0.1.5" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" @@ -14,7 +14,7 @@ name = "string_cache_shared" path = "lib.rs" [dependencies] -debug_unreachable = "0.0.5" +debug_unreachable = "0.0.6" phf = "0.7.3" [build-dependencies] From 2997df477d63edf18a8e55f6f3ae56de1cc48445 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 3 Aug 2015 21:29:01 +0200 Subject: [PATCH 134/379] Use phf_shared and phf_generator directly instead of phf. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to bypass the indirection of `phf::OrderedSet`: we don’t care about the order, only about getting numeric indices. This will also allow us to re-use the phf hash for the dynamic table, to avoid hashing strings twice. --- shared/Cargo.toml | 4 ++-- shared/build.rs | 31 ++++++++++++++++++++++++------- shared/lib.rs | 31 ++++++++++++++++++++++++++++++- shared/static_atom_list.rs | 9 +-------- src/atom/mod.rs | 17 ++++++++++------- 5 files changed, 67 insertions(+), 25 deletions(-) diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 6667df4..04d3c92 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -15,7 +15,7 @@ path = "lib.rs" [dependencies] debug_unreachable = "0.0.6" -phf = "0.7.3" +phf_shared = "0.7.3" [build-dependencies] -phf_codegen = "0.7.3" +phf_generator = "0.7.3" diff --git a/shared/build.rs b/shared/build.rs index 157dc77..9a0cbeb 100644 --- a/shared/build.rs +++ b/shared/build.rs @@ -1,4 +1,4 @@ -extern crate phf_codegen; +extern crate phf_generator; mod static_atom_list; @@ -7,14 +7,31 @@ use std::io::{BufWriter, Write}; use std::path::Path; fn main() { - let mut set = phf_codegen::OrderedSet::new(); - for &atom in static_atom_list::ATOMS { - set.entry(atom); + let mut set = std::collections::HashSet::new(); + for atom in static_atom_list::ATOMS { + if !set.insert(atom) { + panic!("duplicate static atom `{:?}`", atom); + } } + let state = phf_generator::generate_hash(static_atom_list::ATOMS); + let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs"); let mut file = BufWriter::new(File::create(&path).unwrap()); - write!(&mut file, "pub static STATIC_ATOM_SET: phf::OrderedSet<&'static str> = ").unwrap(); - set.build(&mut file).unwrap(); - write!(&mut file, ";\n").unwrap(); + macro_rules! w { + ($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) } + } + w!("pub static STATIC_ATOM_SET: StaticAtomSet = StaticAtomSet {{"); + w!(" key: {},", state.key); + w!(" disps: &["); + for &(d1, d2) in &state.disps { + w!(" ({}, {}),", d1, d2); + } + w!(" ],"); + w!(" atoms: &["); + for &idx in &state.map { + w!(" {:?},", static_atom_list::ATOMS[idx]); + } + w!(" ],"); + w!("}};"); } diff --git a/shared/lib.rs b/shared/lib.rs index a8226b7..a346c61 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -14,7 +14,7 @@ #![cfg_attr(test, deny(warnings))] #[macro_use] extern crate debug_unreachable; -extern crate phf; +extern crate phf_shared; use std::ptr; use std::slice; @@ -32,6 +32,35 @@ pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, av pub const MAX_INLINE_LEN: usize = 7; +pub struct StaticAtomSet { + key: u64, + disps: &'static [(u32, u32)], + atoms: &'static [&'static str], +} + +impl StaticAtomSet { + #[inline] + pub fn get_index(&self, s: &str) -> Option { + let hash = phf_shared::hash(s, self.key); + let index = phf_shared::get_index(hash, self.disps, self.atoms.len()); + if self.atoms[index as usize] == s { + Some(index) + } else { + None + } + } + + #[inline] + pub fn index(&self, i: u32) -> Option<&'static str> { + self.atoms.get(i as usize).map(|&s| s) + } + + #[inline] + pub fn iter(&self) -> slice::Iter<&'static str> { + self.atoms.iter() + } +} + // Atoms use a compact representation which fits this enum in a single u64. // Inlining avoids actually constructing the unpacked representation in memory. #[allow(missing_copy_implementations)] diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 231c22a..855718b 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -9,12 +9,7 @@ pub static ATOMS: &'static [&'static str] = &[ - // The first 64 atoms are special: we can quickly check membership - // in sets of these, using a bitmask. This includes every tag that - // appears in more than one set in the tree builder spec, plus a - // few others (arbitrarily chosen). - // - // FIXME(kmc): check if this is really true with the packed tag bits + // The order is not preserved by phf. "a", "address", @@ -81,8 +76,6 @@ pub static ATOMS: &'static [&'static str] = &[ "track", "xmp", - // End of first 64 atoms. - "", // XML namespaces known to the HTML syntax spec diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 82589ce..9859ef7 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -175,7 +175,7 @@ impl Atom { let buf = string_cache_shared::inline_orig_bytes(&self.data); str::from_utf8(buf).unwrap() }, - Static(idx) => *STATIC_ATOM_SET.index(idx as usize).expect("bad static atom"), + Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string @@ -442,9 +442,12 @@ mod tests { assert_eq_fmt!("0x{:016X}", Atom::from_slice(s).data, data); } - fn check_static(s: &str, x: Atom, data: u64) { - check(s, data); - assert_eq_fmt!("0x{:016X}", x.data, data); + fn check_static(s: &str, x: Atom) { + use string_cache_shared::STATIC_ATOM_SET; + assert_eq_fmt!("0x{:016X}", x.data, Atom::from_slice(s).data); + assert_eq!(0x2, x.data & 0xFFFF_FFFF); + // The index is unspecified by phf. + assert!((x.data >> 32) <= STATIC_ATOM_SET.iter().len() as u64); } // This test is here to make sure we don't change atom representation @@ -452,9 +455,9 @@ mod tests { // static atom table, the tag values, etc. // Static atoms - check_static("a", atom!(a), 0x0000_0000_0000_0002); - check_static("address", atom!(address), 0x0000_0001_0000_0002); - check_static("area", atom!(area), 0x0000_0003_0000_0002); + check_static("a", atom!(a)); + check_static("address", atom!(address)); + check_static("area", atom!(area)); // Inline atoms check("e", 0x0000_0000_0000_6511); From 4c812795fa16604f3e6ff241d7b58e0f56facfb9 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 3 Aug 2015 21:35:56 +0200 Subject: [PATCH 135/379] Reuse the phf hash for the dynamic map. --- build.rs | 2 +- plugin/src/atom/mod.rs | 6 +++--- shared/lib.rs | 6 +++--- src/atom/mod.rs | 16 +++++----------- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/build.rs b/build.rs index 88ce1f1..87c226e 100644 --- a/build.rs +++ b/build.rs @@ -44,7 +44,7 @@ fn generate_combination(prefix1: String, suffix: &str, url: &str, file: &mut Buf } fn atom(s: &str) -> String { - let data = pack_static(STATIC_ATOM_SET.get_index(s).unwrap() as u32); + let data = pack_static(STATIC_ATOM_SET.get_index_or_hash(s).unwrap() as u32); format!("$crate::Atom {{ data: 0x{:x} }}", data) } diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index a9568ef..bff6189 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -44,9 +44,9 @@ impl MacResult for AtomResult { } fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { - let i = match ::string_cache_shared::STATIC_ATOM_SET.get_index(name) { - Some(i) => i, - None => return None, + let i = match ::string_cache_shared::STATIC_ATOM_SET.get_index_or_hash(name) { + Ok(i) => i, + Err(_hash) => return None, }; let data = ::string_cache_shared::pack_static(i as u32); diff --git a/shared/lib.rs b/shared/lib.rs index a346c61..9475bd6 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -40,13 +40,13 @@ pub struct StaticAtomSet { impl StaticAtomSet { #[inline] - pub fn get_index(&self, s: &str) -> Option { + pub fn get_index_or_hash(&self, s: &str) -> Result { let hash = phf_shared::hash(s, self.key); let index = phf_shared::get_index(hash, self.disps, self.atoms.len()); if self.atoms[index as usize] == s { - Some(index) + Ok(index) } else { - None + Err(hash) } } diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 9859ef7..ec9eef1 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -16,7 +16,6 @@ use std::mem; use std::ops; use std::str; use std::cmp::Ordering::{self, Equal}; -use std::hash::{Hash, SipHasher, Hasher}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; @@ -66,12 +65,7 @@ impl StringCache { } } - fn add(&mut self, string_to_add: &str) -> *mut StringCacheEntry { - let hash = { - let mut hasher = SipHasher::default(); - string_to_add.hash(&mut hasher); - hasher.finish() - }; + fn add(&mut self, string_to_add: &str, hash: u64) -> *mut StringCacheEntry { let bucket_index = (hash & BUCKET_MASK) as usize; { let mut ptr: Option<&mut Box> = @@ -148,16 +142,16 @@ impl Atom { #[inline] pub fn from_slice(string_to_add: &str) -> Atom { - let unpacked = match STATIC_ATOM_SET.get_index(string_to_add) { - Some(id) => Static(id as u32), - None => { + let unpacked = match STATIC_ATOM_SET.get_index_or_hash(string_to_add) { + Ok(id) => Static(id as u32), + Err(hash) => { let len = string_to_add.len(); if len <= string_cache_shared::MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; copy_memory(string_to_add.as_bytes(), &mut buf); Inline(len as u8, buf) } else { - Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add) as *mut ()) + Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) } } }; From 573161f67d9522a137dd1d063c23b6d15aac8a96 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 4 Aug 2015 08:52:08 +0200 Subject: [PATCH 136/379] Bump phf requirement and version numbers. --- Cargo.toml | 8 ++++---- plugin/Cargo.toml | 4 ++-- shared/Cargo.toml | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8c67da8..0301a7b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.10" +version = "0.1.11" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -37,13 +37,13 @@ optional = true [dependencies.string_cache_plugin] path = "plugin" -version = "0.1.5" +version = "0.1.7" optional = true [dependencies.string_cache_shared] path = "shared" -version = "0.1.4" +version = "0.1.6" [build-dependencies.string_cache_shared] path = "shared" -version = "0.1.4" +version = "0.1.6" diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 089082d..25f0155 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_plugin" -version = "0.1.6" +version = "0.1.7" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." license = "MIT / Apache-2.0" @@ -14,7 +14,7 @@ plugin = true [dependencies.string_cache_shared] path = "../shared" -version = "0.1.0" +version = "0.1.6" [dependencies] lazy_static = "0.1.10" diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 04d3c92..c5c0bf4 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.5" +version = "0.1.6" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" @@ -15,7 +15,7 @@ path = "lib.rs" [dependencies] debug_unreachable = "0.0.6" -phf_shared = "0.7.3" +phf_shared = "0.7.4" [build-dependencies] -phf_generator = "0.7.3" +phf_generator = "0.7.4" From 07d9bf057efc9199ca8293ceacf96ef33963b6f3 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Sat, 8 Aug 2015 09:09:08 -0700 Subject: [PATCH 137/379] Upgrade serde to pick up bincode support --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8c67da8..889548e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ unstable = ["string_cache_plugin"] [dependencies] lazy_static = "0.1.10" -serde = "0.4.2" +serde = "0.5" [dev-dependencies] rand = "0" From 18ac6e1ec9375795835addaf081b4cbe038af573 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Sat, 8 Aug 2015 09:38:36 -0700 Subject: [PATCH 138/379] 0.1.11 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 889548e..1022fa8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.10" +version = "0.1.11" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 783f0ec12fdc49d35a567d6ab55dafa772b9c6be Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sun, 9 Aug 2015 09:50:07 -0700 Subject: [PATCH 139/379] derive(HeapSizeOf). Part of servo/heapsize#5. --- .travis.yml | 1 + Cargo.toml | 11 +++++++++++ src/atom/mod.rs | 1 + src/lib.rs | 5 +++++ src/namespace.rs | 1 + 5 files changed, 19 insertions(+) diff --git a/.travis.yml b/.travis.yml index c8e34eb..3c4848a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ script: - cargo test - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features heap_size; fi" - "cd examples/summarize-events/ && cargo build" notifications: webhooks: http://build.servo.org:54856/travis diff --git a/Cargo.toml b/Cargo.toml index 1022fa8..b4ea1e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,9 @@ log-events = ["rustc-serialize"] # Use unstable features to optimize space and time (memory and CPU usage). unstable = ["string_cache_plugin"] +# HeapSizeOf support +heap_size = ["heapsize", "heapsize_plugin"] + [dependencies] lazy_static = "0.1.10" serde = "0.5" @@ -44,6 +47,14 @@ optional = true path = "shared" version = "0.1.4" +[dependencies.heapsize] +git = "https://github.com/servo/heapsize.git" +optional = true + +[dependencies.heapsize_plugin] +git = "https://github.com/servo/heapsize.git" +optional = true + [build-dependencies.string_cache_shared] path = "shared" version = "0.1.4" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 82589ce..5dee884 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -133,6 +133,7 @@ impl StringCache { // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. #[cfg_attr(feature = "unstable", unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] #[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. diff --git a/src/lib.rs b/src/lib.rs index 10f4b26..1f9c4b2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,9 @@ #![cfg_attr(test, deny(warnings))] #![cfg_attr(all(test, feature = "unstable"), feature(test, filling_drop))] #![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] +#![cfg_attr(feature = "heap_size", feature(plugin, custom_derive))] #![cfg_attr(feature = "unstable", plugin(string_cache_plugin))] +#![cfg_attr(feature = "heap_size", plugin(heapsize_plugin))] #[cfg(all(test, feature = "unstable"))] extern crate test; @@ -27,6 +29,9 @@ extern crate rand; #[cfg(feature = "log-events")] extern crate rustc_serialize; +#[cfg(feature = "heap_size")] +extern crate heapsize; + extern crate serde; extern crate string_cache_shared; diff --git a/src/namespace.rs b/src/namespace.rs index cd5543b..5603114 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -15,6 +15,7 @@ use atom::Atom; /// Whether a given string represents a namespace is contextual, so this is /// a transparent wrapper that will not catch all mistakes. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct Namespace(pub Atom); /// A name with a namespace. From 730382c73d5e1a0fade0d610ad659f4e3493e15a Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 11 Aug 2015 09:32:21 -0700 Subject: [PATCH 140/379] Switch to crates.io heapsize. --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b4ea1e0..0dadad5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,11 +48,11 @@ path = "shared" version = "0.1.4" [dependencies.heapsize] -git = "https://github.com/servo/heapsize.git" +version = "0.1.1" optional = true [dependencies.heapsize_plugin] -git = "https://github.com/servo/heapsize.git" +version = "0.0.1" optional = true [build-dependencies.string_cache_shared] From 6d4e0fb3c1a8b7b7d2345ed6b2ba6d077a4c22d3 Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Tue, 1 Sep 2015 14:53:15 +0200 Subject: [PATCH 141/379] Sort the static atoms below the namespaces. --- shared/static_atom_list.rs | 298 ++++++++++++++++++------------------- 1 file changed, 148 insertions(+), 150 deletions(-) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 231c22a..2ff5993 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -114,23 +114,23 @@ pub static ATOMS: &'static [&'static str] = &[ "alink", "alphabetic", "alt", - "altGlyph", - "altGlyphDef", - "altGlyphItem", "altglyph", + "altGlyph", "altglyphdef", + "altGlyphDef", "altglyphitem", + "altGlyphItem", "altimg", "alttext", "amplitude", "and", "animate", - "animateColor", - "animateMotion", - "animateTransform", "animatecolor", + "animateColor", "animatemotion", + "animateMotion", "animatetransform", + "animateTransform", "animation", "annotation", "annotation-xml", @@ -190,10 +190,10 @@ pub static ATOMS: &'static [&'static str] = &[ "aria-valuenow", "ascent", "async", - "attributeName", - "attributeType", "attributename", + "attributeName", "attributetype", + "attributeType", "audio", "autocomplete", "autofocus", @@ -203,16 +203,19 @@ pub static ATOMS: &'static [&'static str] = &[ "azimuth", "background", "background-attachment", + "background-clip", "background-color", "background-image", + "background-origin", "background-position", "background-repeat", - "baseFrequency", - "baseProfile", + "background-size", "basefrequency", + "baseFrequency", "baseline", "baseline-shift", "baseprofile", + "baseProfile", "bbox", "bdi", "bdo", @@ -223,6 +226,8 @@ pub static ATOMS: &'static [&'static str] = &[ "border", "border-bottom", "border-bottom-color", + "border-bottom-left-radius", + "border-bottom-right-radius", "border-bottom-style", "border-bottom-width", "border-color", @@ -230,6 +235,7 @@ pub static ATOMS: &'static [&'static str] = &[ "border-left-color", "border-left-style", "border-left-width", + "border-radius", "border-right", "border-right-color", "border-right-style", @@ -237,14 +243,16 @@ pub static ATOMS: &'static [&'static str] = &[ "border-style", "border-top", "border-top-color", + "border-top-left-radius", + "border-top-right-radius", "border-top-style", "border-top-width", "border-width", "bottom", "bvar", "by", - "calcMode", "calcmode", + "calcMode", "canvas", "cap-height", "card", @@ -265,11 +273,11 @@ pub static ATOMS: &'static [&'static str] = &[ "clear", "clip", "clip-path", - "clip-rule", - "clipPath", - "clipPathUnits", "clippath", + "clipPath", "clippathunits", + "clipPathUnits", + "clip-rule", "close", "closure", "cn", @@ -285,9 +293,12 @@ pub static ATOMS: &'static [&'static str] = &[ "cols", "colspan", "columnalign", + "column-count", "columnlines", + "columns", "columnspacing", "columnspan", + "column-width", "columnwidth", "compact", "complexes", @@ -295,11 +306,11 @@ pub static ATOMS: &'static [&'static str] = &[ "condition", "conjugate", "content", - "contentScriptType", - "contentStyleType", "contenteditable", "contentscripttype", + "contentScriptType", "contentstyletype", + "contentStyleType", "contextmenu", "controls", "coords", @@ -327,8 +338,8 @@ pub static ATOMS: &'static [&'static str] = &[ "default", "defer", "definition-src", - "definitionURL", "definitionurl", + "definitionURL", "defs", "degree", "del", @@ -340,8 +351,8 @@ pub static ATOMS: &'static [&'static str] = &[ "dfn", "dialog", "diff", - "diffuseConstant", "diffuseconstant", + "diffuseConstant", "dir", "direction", "disabled", @@ -361,8 +372,8 @@ pub static ATOMS: &'static [&'static str] = &[ "dx", "dy", "edge", - "edgeMode", "edgemode", + "edgeMode", "elevation", "ellipse", "em", @@ -380,63 +391,63 @@ pub static ATOMS: &'static [&'static str] = &[ "exp", "exponent", "exponentiale", - "externalResourcesRequired", "externalresourcesrequired", + "externalResourcesRequired", "face", "factorial", "factorof", "false", - "feBlend", - "feColorMatrix", - "feComponentTransfer", - "feComposite", - "feConvolveMatrix", - "feDiffuseLighting", - "feDisplacementMap", - "feDistantLight", - "feDropShadow", - "feFlood", - "feFuncA", - "feFuncB", - "feFuncG", - "feFuncR", - "feGaussianBlur", - "feImage", - "feMerge", - "feMergeNode", - "feMorphology", - "feOffset", - "fePointLight", - "feSpecularLighting", - "feSpotLight", - "feTile", - "feTurbulence", "feblend", + "feBlend", "fecolormatrix", + "feColorMatrix", "fecomponenttransfer", + "feComponentTransfer", "fecomposite", + "feComposite", "feconvolvematrix", + "feConvolveMatrix", "fediffuselighting", + "feDiffuseLighting", "fedisplacementmap", + "feDisplacementMap", "fedistantlight", + "feDistantLight", "fedropshadow", + "feDropShadow", "feflood", + "feFlood", "fefunca", + "feFuncA", "fefuncb", + "feFuncB", "fefuncg", + "feFuncG", "fefuncr", + "feFuncR", "fegaussianblur", + "feGaussianBlur", "feimage", + "feImage", "femerge", + "feMerge", "femergenode", + "feMergeNode", "femorphology", + "feMorphology", "fence", "feoffset", + "feOffset", "fepointlight", + "fePointLight", "fespecularlighting", + "feSpecularLighting", "fespotlight", + "feSpotLight", "fetile", + "feTile", "feturbulence", + "feTurbulence", "fieldset", "figcaption", "figure", @@ -444,10 +455,10 @@ pub static ATOMS: &'static [&'static str] = &[ "fill-opacity", "fill-rule", "filter", - "filterRes", - "filterUnits", "filterres", + "filterRes", "filterunits", + "filterUnits", "flood-color", "flood-opacity", "floor", @@ -459,21 +470,21 @@ pub static ATOMS: &'static [&'static str] = &[ "font-face-src", "font-face-uri", "font-family", + "fontfamily", "font-size", + "fontsize", "font-size-adjust", "font-stretch", "font-style", + "fontstyle", "font-variant", "font-weight", - "fontfamily", - "fontsize", - "fontstyle", "fontweight", "footer", "for", "forall", - "foreignObject", "foreignobject", + "foreignObject", "formaction", "format", "formenctype", @@ -493,13 +504,13 @@ pub static ATOMS: &'static [&'static str] = &[ "glyph-name", "glyph-orientation-horizontal", "glyph-orientation-vertical", - "glyphRef", "glyphref", + "glyphRef", "grad", - "gradientTransform", - "gradientUnits", "gradienttransform", + "gradientTransform", "gradientunits", + "gradientUnits", "groupalign", "gt", "handler", @@ -553,18 +564,18 @@ pub static ATOMS: &'static [&'static str] = &[ "k3", "k4", "kbd", - "kernelMatrix", - "kernelUnitLength", "kernelmatrix", + "kernelMatrix", "kernelunitlength", + "kernelUnitLength", "kerning", - "keyPoints", - "keySplines", - "keyTimes", "keygen", "keypoints", + "keyPoints", "keysplines", + "keySplines", "keytimes", + "keyTimes", "label", "lambda", "lang", @@ -574,23 +585,27 @@ pub static ATOMS: &'static [&'static str] = &[ "lcm", "left", "legend", - "lengthAdjust", "lengthadjust", + "lengthAdjust", "leq", "letter-spacing", "lighting-color", "limit", - "limitingConeAngle", "limitingconeangle", + "limitingConeAngle", "line", - "line-height", - "linearGradient", "lineargradient", + "linearGradient", "linebreak", + "line-height", "linethickness", "list", "listener", "listing", + "list-style", + "list-style-image", + "list-style-position", + "list-style-type", "ln", "local", "log", @@ -612,27 +627,27 @@ pub static ATOMS: &'static [&'static str] = &[ "map", "margin", "margin-bottom", + "marginheight", "margin-left", "margin-right", "margin-top", - "marginheight", "marginwidth", "mark", "marker", "marker-end", + "markerheight", + "markerHeight", "marker-mid", "marker-start", - "markerHeight", - "markerUnits", - "markerWidth", - "markerheight", "markerunits", + "markerUnits", "markerwidth", + "markerWidth", "mask", - "maskContentUnits", - "maskUnits", "maskcontentunits", + "maskContentUnits", "maskunits", + "maskUnits", "math", "mathbackground", "mathcolor", @@ -643,9 +658,9 @@ pub static ATOMS: &'static [&'static str] = &[ "matrixrow", "max", "max-height", - "max-width", "maxlength", "maxsize", + "max-width", "mean", "media", "median", @@ -663,9 +678,9 @@ pub static ATOMS: &'static [&'static str] = &[ "mi", "min", "min-height", - "min-width", "minsize", "minus", + "min-width", "missing-glyph", "mlabeledtr", "mmultiscripts", @@ -715,8 +730,8 @@ pub static ATOMS: &'static [&'static str] = &[ "notprsubset", "notsubset", "nowrap", - "numOctaves", "numoctaves", + "numOctaves", "occurrence", "offset", "ol", @@ -816,8 +831,15 @@ pub static ATOMS: &'static [&'static str] = &[ "other", "otherwise", "outerproduct", + "outline", + "outline-color", + "outline-style", + "outline-width", "output", "overflow", + "overflow-wrap", + "overflow-x", + "overflow-y", "overline-position", "overline-thickness", "p", @@ -829,15 +851,15 @@ pub static ATOMS: &'static [&'static str] = &[ "panose-1", "partialdiff", "path", - "pathLength", "pathlength", + "pathLength", "pattern", - "patternContentUnits", - "patternTransform", - "patternUnits", "patterncontentunits", + "patternContentUnits", "patterntransform", + "patternTransform", "patternunits", + "patternUnits", "pi", "piece", "piecewise", @@ -845,25 +867,25 @@ pub static ATOMS: &'static [&'static str] = &[ "plus", "pointer-events", "points", - "pointsAtX", - "pointsAtY", - "pointsAtZ", "pointsatx", + "pointsAtX", "pointsaty", + "pointsAtY", "pointsatz", + "pointsAtZ", "polygon", "polyline", "position", "poster", "power", "prefetch", - "preserveAlpha", - "preserveAspectRatio", "preservealpha", + "preserveAlpha", "preserveaspectratio", + "preserveAspectRatio", "primes", - "primitiveUnits", "primitiveunits", + "primitiveUnits", "product", "profile", "progress", @@ -872,8 +894,8 @@ pub static ATOMS: &'static [&'static str] = &[ "q", "quotient", "r", - "radialGradient", "radialgradient", + "radialGradient", "radiogroup", "radius", "rationals", @@ -881,29 +903,29 @@ pub static ATOMS: &'static [&'static str] = &[ "real", "reals", "rect", - "refX", - "refY", "refx", + "refX", "refy", + "refY", "rel", "reln", "rem", "rendering-intent", "repeat", + "repeatcount", + "repeatCount", + "repeatdur", + "repeatDur", "repeat-max", "repeat-min", "repeat-start", "repeat-template", - "repeatCount", - "repeatDur", - "repeatcount", - "repeatdur", "replace", "required", - "requiredExtensions", - "requiredFeatures", "requiredextensions", + "requiredExtensions", "requiredfeatures", + "requiredFeatures", "restart", "result", "rev", @@ -965,26 +987,26 @@ pub static ATOMS: &'static [&'static str] = &[ "spacing", "span", "specification", - "specularConstant", - "specularExponent", "specularconstant", + "specularConstant", "specularexponent", + "specularExponent", "speed", - "spreadMethod", "spreadmethod", + "spreadMethod", "src", "srcdoc", "standby", "start", - "startOffset", "startoffset", - "stdDeviation", + "startOffset", "stddeviation", + "stdDeviation", "stemh", "stemv", "step", - "stitchTiles", "stitchtiles", + "stitchTiles", "stop", "stop-color", "stop-opacity", @@ -1009,36 +1031,36 @@ pub static ATOMS: &'static [&'static str] = &[ "summary", "sup", "superscriptshift", - "surfaceScale", "surfacescale", + "surfaceScale", "switch", "symbol", "symmetric", - "systemLanguage", "systemlanguage", + "systemLanguage", "tabindex", "table-layout", - "tableValues", "tablevalues", + "tableValues", "tan", "tanh", "target", - "targetX", - "targetY", "targetx", + "targetX", "targety", + "targetY", "tbreak", "tendsto", "text", "text-align", "text-anchor", "text-decoration", - "text-orientation", - "text-rendering", - "textLength", - "textPath", "textlength", + "textLength", + "text-orientation", "textpath", + "textPath", + "text-rendering", "thickmathspace", "thinmathspace", "time", @@ -1046,6 +1068,11 @@ pub static ATOMS: &'static [&'static str] = &[ "to", "top", "transform", + "transition-delay", + "transition-duration", + "transition-property", + "transitions", + "transition-timing-function", "transpose", "tref", "true", @@ -1067,11 +1094,8 @@ pub static ATOMS: &'static [&'static str] = &[ "uplimit", "use", "usemap", - "v-alphabetic", - "v-hanging", - "v-ideographic", - "v-mathematical", "valign", + "v-alphabetic", "value", "values", "valuetype", @@ -1081,22 +1105,25 @@ pub static ATOMS: &'static [&'static str] = &[ "vectorproduct", "version", "vert-adv-y", + "vertical-align", "vert-origin-x", "vert-origin-y", - "vertical-align", "verythickmathspace", "verythinmathspace", "veryverythickmathspace", "veryverythinmathspace", + "v-hanging", "video", + "v-ideographic", "view", - "viewBox", - "viewTarget", "viewbox", + "viewBox", "viewtarget", + "viewTarget", "visibility", "vkern", "vlink", + "v-mathematical", "vspace", "wbr", "when", @@ -1104,14 +1131,16 @@ pub static ATOMS: &'static [&'static str] = &[ "width", "widths", "word-spacing", + "word-wrap", "wrap", "writing-mode", "x", - "x-height", "x1", "x2", - "xChannelSelector", "xchannelselector", + "xChannelSelector", + "x-height", + "xlink", "xlink:actuate", "xlink:arcrole", "xlink:href", @@ -1121,48 +1150,17 @@ pub static ATOMS: &'static [&'static str] = &[ "xlink:type", "xml:base", "xml:lang", - "xml:space", "xmlns", "xmlns:xlink", - "xlink", + "xml:space", "xor", "xref", "y", "y1", "y2", - "yChannelSelector", "ychannelselector", + "yChannelSelector", "z", - "zoomAndPan", "zoomandpan", - - "background-size", - "background-origin", - "background-clip", - "border-top-left-radius", - "border-top-right-radius", - "border-bottom-right-radius", - "border-bottom-left-radius", - "outline-color", - "outline-style", - "outline-width", - "overflow-wrap", - "list-style-image", - "list-style-position", - "list-style-type", - "column-count", - "column-width", - "overflow-x", - "overflow-y", - "transition-property", - "transition-duration", - "transition-timing-function", - "transition-delay", - "border-radius", - "outline", - "word-wrap", - "list-style", - "columns", - "transitions", - + "zoomAndPan", ]; From 2f8b88a1ec24418e9a302285d280c3c0d404080e Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Tue, 1 Sep 2015 15:58:30 +0200 Subject: [PATCH 142/379] Add some more static atoms. --- shared/static_atom_list.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 2ff5993..de97b3c 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -387,6 +387,7 @@ pub static ATOMS: &'static [&'static str] = &[ "equalrows", "equivalent", "eulergamma", + "event", "exists", "exp", "exponent", @@ -691,6 +692,7 @@ pub static ATOMS: &'static [&'static str] = &[ "momentabout", "movablelimits", "mover", + "mozbrowser", "mpadded", "mpath", "mphantom", @@ -781,12 +783,14 @@ pub static ATOMS: &'static [&'static str] = &[ "onfocusout", "onformchange", "onforminput", + "onhashchange", "onhelp", "oninput", "oninvalid", "onkeydown", "onkeypress", "onkeyup", + "onlanguagechange", "onload", "onlosecapture", "onmessage", @@ -801,7 +805,12 @@ pub static ATOMS: &'static [&'static str] = &[ "onmove", "onmoveend", "onmovestart", + "onoffline", + "ononline", + "onpagehide", + "onpageshow", "onpaste", + "onpopstate", "onpropertychange", "onreadystatechange", "onrepeat", @@ -816,6 +825,7 @@ pub static ATOMS: &'static [&'static str] = &[ "onselectstart", "onstart", "onstop", + "onstorage", "onsubmit", "onunload", "onzoom", @@ -864,6 +874,7 @@ pub static ATOMS: &'static [&'static str] = &[ "piece", "piecewise", "ping", + "placeholder", "plus", "pointer-events", "points", @@ -971,6 +982,7 @@ pub static ATOMS: &'static [&'static str] = &[ "sep", "separator", "separators", + "serif", "set", "setdiff", "shape", From 62492df27de8ffc35dbaf69980561cd68f9a0421 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 1 Sep 2015 16:17:12 +0200 Subject: [PATCH 143/379] Update heapsize_plugin --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0dadad5..4883f10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,7 +52,7 @@ version = "0.1.1" optional = true [dependencies.heapsize_plugin] -version = "0.0.1" +version = "0.1.0" optional = true [build-dependencies.string_cache_shared] From 604ee24828eb0ea7f4b85fcb5b1724fb05b52718 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Tue, 8 Sep 2015 17:37:22 +0200 Subject: [PATCH 144/379] Add missing Ruby elements "rb" and "rtc" --- shared/static_atom_list.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 0004bda..20591d8 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -944,6 +944,8 @@ pub static ATOMS: &'static [&'static str] = &[ "rowspan", "rquote", "rspace", + "rb", + "rtc", "ruby", "rule", "rules", From fd1a17ec947b84ea3457637c30b955b16e672ef1 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Tue, 8 Sep 2015 17:58:19 +0200 Subject: [PATCH 145/379] Put our atoms back into order --- shared/static_atom_list.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 20591d8..a72df96 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -903,6 +903,7 @@ pub static ATOMS: &'static [&'static str] = &[ "radiogroup", "radius", "rationals", + "rb", "readonly", "real", "reals", @@ -944,7 +945,6 @@ pub static ATOMS: &'static [&'static str] = &[ "rowspan", "rquote", "rspace", - "rb", "rtc", "ruby", "rule", From eb2fee1270c230d156afa7f73fd93ca0f8d9be9e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 8 Sep 2015 18:07:06 +0200 Subject: [PATCH 146/379] v0.1.12 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 008ed10..a71bc23 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.11" +version = "0.1.12" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From d3cee6feb4c23af7aaa435e2cfa8703857b32953 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Sat, 12 Sep 2015 21:38:38 +0100 Subject: [PATCH 147/379] Add a few atoms to ATOMS static slice https://html.spec.whatwg.org/multipage/#other-elements,-attributes-and-apis:blink https://html.spec.whatwg.org/multipage/#other-elements,-attributes-and-apis:multicol https://html.spec.whatwg.org/multipage/#other-elements,-attributes-and-apis:nextid --- shared/static_atom_list.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index a72df96..5f95997 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -216,6 +216,7 @@ pub static ATOMS: &'static [&'static str] = &[ "bevelled", "bgcolor", "bias", + "blink", "border", "border-bottom", "border-bottom-color", @@ -703,6 +704,7 @@ pub static ATOMS: &'static [&'static str] = &[ "mtd", "mtext", "mtr", + "multicol", "multiple", "munder", "munderover", @@ -712,6 +714,7 @@ pub static ATOMS: &'static [&'static str] = &[ "nav", "neq", "nest", + "nextid", "nobr", "noembed", "nohref", From a04d0ea5fd7d249a81c32e6244c4894529290759 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 13 Sep 2015 05:02:36 +0200 Subject: [PATCH 148/379] string_cache_shared v0.1.7 --- shared/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/Cargo.toml b/shared/Cargo.toml index c5c0bf4..23e03c7 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.6" +version = "0.1.7" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" From 40d5c57701b66507ae39c6afc7016446c6e294e8 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Mon, 21 Sep 2015 19:47:33 +0530 Subject: [PATCH 149/379] Bump serde --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a71bc23..2ae7e6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.12" +version = "0.1.13" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -29,7 +29,7 @@ heap_size = ["heapsize", "heapsize_plugin"] [dependencies] lazy_static = "0.1.10" -serde = "0.5" +serde = "0.6" [dev-dependencies] rand = "0" From dd474cd374585a1e851d7599df2d9e2d1f94ce66 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Sun, 25 Oct 2015 10:15:30 -0700 Subject: [PATCH 150/379] Add event types --- shared/static_atom_list.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 5f95997..8e68dfe 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -265,6 +265,7 @@ pub static ATOMS: &'static [&'static str] = &[ "class", "classid", "clear", + "click", "clip", "clip-path", "clippath", @@ -564,6 +565,7 @@ pub static ATOMS: &'static [&'static str] = &[ "kernelunitlength", "kernelUnitLength", "kerning", + "keydown", "keygen", "keypoints", "keyPoints", From c83d753d7fcfff471973372cd912905d869a7b6f Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Mon, 26 Oct 2015 16:57:32 +0530 Subject: [PATCH 151/379] bump --- Cargo.toml | 6 +++--- plugin/Cargo.toml | 4 ++-- shared/Cargo.toml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2ae7e6d..1892af4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.13" +version = "0.1.14" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -40,12 +40,12 @@ optional = true [dependencies.string_cache_plugin] path = "plugin" -version = "0.1.7" +version = "0.1.8" optional = true [dependencies.string_cache_shared] path = "shared" -version = "0.1.6" +version = "0.1.8" [dependencies.heapsize] version = "0.1.1" diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 25f0155..8214635 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_plugin" -version = "0.1.7" +version = "0.1.8" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." license = "MIT / Apache-2.0" @@ -14,7 +14,7 @@ plugin = true [dependencies.string_cache_shared] path = "../shared" -version = "0.1.6" +version = "0.1.8" [dependencies] lazy_static = "0.1.10" diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 23e03c7..b2423ae 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.7" +version = "0.1.8" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" From ca6af9ce806cbce62c79884ae5c91b036b1acb5a Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Mon, 26 Oct 2015 10:02:01 -0500 Subject: [PATCH 152/379] Added "*" atom. --- shared/static_atom_list.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 8e68dfe..022f529 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -78,6 +78,8 @@ pub static ATOMS: &'static [&'static str] = &[ "", + "*", + // XML namespaces known to the HTML syntax spec "http://www.w3.org/1999/xhtml", "http://www.w3.org/XML/1998/namespace", From 145f5332901fe238b7b43961cdf34e5c253f6474 Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Mon, 26 Oct 2015 10:57:57 -0500 Subject: [PATCH 153/379] Version bump. --- Cargo.toml | 8 ++++---- plugin/Cargo.toml | 4 ++-- shared/Cargo.toml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1892af4..13c51a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.14" +version = "0.1.15" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -40,12 +40,12 @@ optional = true [dependencies.string_cache_plugin] path = "plugin" -version = "0.1.8" +version = "0.1.9" optional = true [dependencies.string_cache_shared] path = "shared" -version = "0.1.8" +version = "0.1.9" [dependencies.heapsize] version = "0.1.1" @@ -57,4 +57,4 @@ optional = true [build-dependencies.string_cache_shared] path = "shared" -version = "0.1.6" +version = "0.1.9" diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 8214635..a8a8084 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_plugin" -version = "0.1.8" +version = "0.1.9" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." license = "MIT / Apache-2.0" @@ -14,7 +14,7 @@ plugin = true [dependencies.string_cache_shared] path = "../shared" -version = "0.1.8" +version = "0.1.9" [dependencies] lazy_static = "0.1.10" diff --git a/shared/Cargo.toml b/shared/Cargo.toml index b2423ae..0b32fd9 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.8" +version = "0.1.9" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" From 5cf72fcf6467aac0821b6c0b7553f6e06d1e6c7e Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Wed, 4 Nov 2015 11:34:03 -0500 Subject: [PATCH 154/379] Add static strings from 'type' values https://html.spec.whatwg.org/multipage/#attr-input-type --- shared/static_atom_list.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 022f529..25d6cac 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -330,7 +330,9 @@ pub static ATOMS: &'static [&'static str] = &[ "datalist", "datasrc", "datatemplate", + "date", "datetime", + "datetime-local", "declare", "default", "defer", @@ -374,6 +376,7 @@ pub static ATOMS: &'static [&'static str] = &[ "elevation", "ellipse", "em", + "email", "emptyset", "enable-background", "encoding", @@ -686,6 +689,7 @@ pub static ATOMS: &'static [&'static str] = &[ "mn", "mo", "mode", + "month", "moment", "momentabout", "movablelimits", @@ -732,6 +736,7 @@ pub static ATOMS: &'static [&'static str] = &[ "notprsubset", "notsubset", "nowrap", + "number", "numoctaves", "numOctaves", "occurrence", @@ -860,6 +865,7 @@ pub static ATOMS: &'static [&'static str] = &[ "padding-top", "panose-1", "partialdiff", + "password", "path", "pathlength", "pathLength", @@ -973,6 +979,7 @@ pub static ATOMS: &'static [&'static str] = &[ "scrolling", "sdev", "seamless", + "search", "sec", "sech", "section", @@ -1064,6 +1071,7 @@ pub static ATOMS: &'static [&'static str] = &[ "targety", "targetY", "tbreak", + "tel", "tendsto", "text", "text-align", @@ -1106,6 +1114,7 @@ pub static ATOMS: &'static [&'static str] = &[ "units-per-em", "unselectable", "uplimit", + "url", "use", "usemap", "valign", @@ -1140,6 +1149,7 @@ pub static ATOMS: &'static [&'static str] = &[ "v-mathematical", "vspace", "wbr", + "week", "when", "white-space", "width", From d3b7e41b33e927e330b7f36e990b67599cc2cee3 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Wed, 4 Nov 2015 11:39:20 -0500 Subject: [PATCH 155/379] Bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 13c51a9..dbf248d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.15" +version = "0.1.16" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 3f396412dc6249675ea4061782bbc4d4fde39186 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Wed, 4 Nov 2015 13:34:16 -0500 Subject: [PATCH 156/379] Bump string_cache_shared After completing #117, I bumped string_cache, but I should have bumped string_cache_shared --- shared/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 0b32fd9..18e528f 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.9" +version = "0.1.10" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" From 2aaa61f1bca8609e4016f3c9e7288874bd2e864c Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 10 Nov 2015 07:11:49 +0530 Subject: [PATCH 157/379] Rustup to rustc 1.6.0-nightly (5b4986fa5 2015-11-08) --- plugin/Cargo.toml | 2 +- plugin/src/atom/mod.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index a8a8084..9ba2dc1 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_plugin" -version = "0.1.9" +version = "0.1.10" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." license = "MIT / Apache-2.0" diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index bff6189..3176508 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -9,7 +9,7 @@ use syntax::ptr::P; use syntax::codemap::Span; -use syntax::ast::{TokenTree, TtToken}; +use syntax::ast::TokenTree; use syntax::ast; use syntax::ext::base::{ExtCtxt, MacResult, MacEager}; use syntax::parse::token::{InternedString, Ident, Literal, Lit}; @@ -21,8 +21,8 @@ use std::ascii::AsciiExt; fn atom_tok_to_str(t: &TokenTree) -> Option { Some(match *t { - TtToken(_, Ident(s, _)) => s.name.as_str(), - TtToken(_, Literal(Lit::Str_(s), _)) => s.as_str(), + TokenTree::Token(_, Ident(s, _)) => s.name.as_str(), + TokenTree::Token(_, Literal(Lit::Str_(s), _)) => s.as_str(), _ => return None, }) } From c466010c366e2da7b4312b792c9a4246d2d7a7d5 Mon Sep 17 00:00:00 2001 From: Paul Rouget Date: Wed, 11 Nov 2015 13:16:43 +0100 Subject: [PATCH 158/379] add 'sizes' atom --- shared/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 25d6cac..29464c0 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -1000,6 +1000,7 @@ pub static ATOMS: &'static [&'static str] = &[ "sin", "sinh", "size", + "sizes", "slope", "small", "solidcolor", From 461e94230952718d1ee413292a3883253db71d62 Mon Sep 17 00:00:00 2001 From: Paul Rouget Date: Wed, 11 Nov 2015 13:16:57 +0100 Subject: [PATCH 159/379] version bump --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index dbf248d..ab8b45a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.16" +version = "0.1.17" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 4536270a900ec43b801296c50cefcf4525568460 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 12 Nov 2015 23:24:12 -0500 Subject: [PATCH 160/379] Add more types --- shared/Cargo.toml | 2 +- shared/static_atom_list.rs | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 18e528f..4be0054 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.10" +version = "0.1.11" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 29464c0..84aa376 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -260,6 +260,7 @@ pub static ATOMS: &'static [&'static str] = &[ "char", "charoff", "charset", + "checkbox", "checked", "ci", "circle", @@ -452,6 +453,7 @@ pub static ATOMS: &'static [&'static str] = &[ "fieldset", "figcaption", "figure", + "file", "fill", "fill-opacity", "fill-rule", @@ -913,6 +915,7 @@ pub static ATOMS: &'static [&'static str] = &[ "r", "radialgradient", "radialGradient", + "radio", "radiogroup", "radius", "rationals", @@ -944,6 +947,7 @@ pub static ATOMS: &'static [&'static str] = &[ "requiredExtensions", "requiredfeatures", "requiredFeatures", + "reset", "restart", "result", "rev", @@ -1047,6 +1051,7 @@ pub static ATOMS: &'static [&'static str] = &[ "stroke-width", "strong", "sub", + "submit", "subscriptshift", "subset", "sum", From 993b83f652f03639773f881a96e4459da69fd7bb Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Fri, 13 Nov 2015 17:22:35 +0100 Subject: [PATCH 161/379] Add "error" to the static atoms. --- shared/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 84aa376..6ca0c77 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -387,6 +387,7 @@ pub static ATOMS: &'static [&'static str] = &[ "equalcolumns", "equalrows", "equivalent", + "error", "eulergamma", "event", "exists", From 21a210526d8e1949476c117e5f3d691c4f957576 Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Fri, 13 Nov 2015 17:23:53 +0100 Subject: [PATCH 162/379] Bump the version number. --- shared/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 4be0054..60ab570 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.11" +version = "0.1.12" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" From de5ea4deeedc035d63a8b8d14d51856793bf4711 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 19 Nov 2015 20:17:13 +0100 Subject: [PATCH 163/379] Remove Atom::as_slice in favor of Deref and AsRef --- Cargo.toml | 2 +- examples/summarize-events/src/main.rs | 2 +- src/atom/bench.rs | 20 +++++------ src/atom/mod.rs | 49 ++++++++++++--------------- 4 files changed, 34 insertions(+), 39 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ab8b45a..f85e6a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.17" +version = "0.2.0" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index db50fcd..b5f08fe 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -84,7 +84,7 @@ fn main() { // FIXME: We really shouldn't be allowed to do this. It's a memory-safety // hazard; the field is only public for the atom!() macro. - _ => Atom { data: ev.id }.as_slice().to_string(), + _ => Atom { data: ev.id }.to_string(), }; match summary.entry(string) { diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 365ec48..be88c8e 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -79,20 +79,20 @@ macro_rules! bench_one ( (intern $x:expr, $_y:expr) => ( #[bench] fn intern(b: &mut Bencher) { - let x = $x.as_slice().to_string(); + let x = $x.to_string(); b.iter(|| { black_box(Atom::from_slice(&x)); }); } ); - (as_slice $x:expr, $_y:expr) => ( + (as_ref $x:expr, $_y:expr) => ( #[bench] - fn as_slice_x_1000(b: &mut Bencher) { + fn as_ref_x_1000(b: &mut Bencher) { let x = $x; b.iter(|| { for _ in 0..1000 { - black_box(x.as_slice()); + black_box(x.as_ref()); } }); } @@ -156,22 +156,22 @@ bench_all!([eq ne lt clone_string] for medium_string = "xyzzy01", "xyzzy02"); bench_all!([eq ne lt clone_string] for longer_string = super::longer_dynamic_a, super::longer_dynamic_b); -bench_all!([eq ne intern as_slice clone is_static lt] +bench_all!([eq ne intern as_ref clone is_static lt] for static_atom = atom!(a), atom!(b)); -bench_all!([intern as_slice clone is_inline] +bench_all!([intern as_ref clone is_inline] for short_inline_atom = mk("e"), mk("f")); -bench_all!([eq ne intern as_slice clone is_inline lt] +bench_all!([eq ne intern as_ref clone is_inline lt] for medium_inline_atom = mk("xyzzy01"), mk("xyzzy02")); -bench_all!([intern as_slice clone is_dynamic] +bench_all!([intern as_ref clone is_dynamic] for min_dynamic_atom = mk("xyzzy001"), mk("xyzzy002")); -bench_all!([eq ne intern as_slice clone is_dynamic lt] +bench_all!([eq ne intern as_ref clone is_dynamic lt] for longer_dynamic_atom = mk(super::longer_dynamic_a), mk(super::longer_dynamic_b)); -bench_all!([intern as_slice clone is_static] +bench_all!([intern as_ref clone is_static] for static_at_runtime = mk("a"), mk("b")); bench_all!([ne lt x_static y_inline] diff --git a/src/atom/mod.rs b/src/atom/mod.rs index e3e315c..9aa49c2 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -161,23 +161,6 @@ impl Atom { log!(Event::Intern(data)); Atom { data: data } } - - #[inline] - pub fn as_slice<'t>(&'t self) -> &'t str { - unsafe { - match self.unpack() { - Inline(..) => { - let buf = string_cache_shared::inline_orig_bytes(&self.data); - str::from_utf8(buf).unwrap() - }, - Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), - Dynamic(entry) => { - let entry = entry as *mut StringCacheEntry; - &(*entry).string - } - } - } - } } impl Clone for Atom { @@ -226,7 +209,19 @@ impl ops::Deref for Atom { #[inline] fn deref(&self) -> &str { - self.as_slice() + unsafe { + match self.unpack() { + Inline(..) => { + let buf = string_cache_shared::inline_orig_bytes(&self.data); + str::from_utf8(buf).unwrap() + }, + Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), + Dynamic(entry) => { + let entry = entry as *mut StringCacheEntry; + &(*entry).string + } + } + } } } @@ -248,7 +243,7 @@ impl fmt::Debug for Atom { } }; - write!(f, "Atom('{}' type={})", self.as_slice(), ty_str) + write!(f, "Atom('{}' type={})", &*self, ty_str) } } @@ -258,7 +253,7 @@ impl PartialOrd for Atom { if self.data == other.data { return Some(Equal); } - self.as_slice().partial_cmp(other.as_slice()) + self.as_ref().partial_cmp(other.as_ref()) } } @@ -268,7 +263,7 @@ impl Ord for Atom { if self.data == other.data { return Equal; } - self.as_slice().cmp(other.as_slice()) + self.as_ref().cmp(other.as_ref()) } } @@ -305,22 +300,22 @@ mod tests { #[test] fn test_as_slice() { let s0 = Atom::from_slice(""); - assert!(s0.as_slice() == ""); + assert!(s0.as_ref() == ""); let s1 = Atom::from_slice("class"); - assert!(s1.as_slice() == "class"); + assert!(s1.as_ref() == "class"); let i0 = Atom::from_slice("blah"); - assert!(i0.as_slice() == "blah"); + assert!(i0.as_ref() == "blah"); let s0 = Atom::from_slice("BLAH"); - assert!(s0.as_slice() == "BLAH"); + assert!(s0.as_ref() == "BLAH"); let d0 = Atom::from_slice("zzzzzzzzzz"); - assert!(d0.as_slice() == "zzzzzzzzzz"); + assert!(d0.as_ref() == "zzzzzzzzzz"); let d1 = Atom::from_slice("ZZZZZZZZZZ"); - assert!(d1.as_slice() == "ZZZZZZZZZZ"); + assert!(d1.as_ref() == "ZZZZZZZZZZ"); } macro_rules! unpacks_to (($e:expr, $t:pat) => ( From d9f3133d47f105d32419c3965acb9a2d347a76f7 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 19 Nov 2015 20:26:04 +0100 Subject: [PATCH 164/379] Replace Atom::from_slice with From trait impls. --- src/atom/bench.rs | 6 ++-- src/atom/mod.rs | 82 ++++++++++++++++++++++++----------------------- src/namespace.rs | 28 ++++++++-------- 3 files changed, 59 insertions(+), 57 deletions(-) diff --git a/src/atom/bench.rs b/src/atom/bench.rs index be88c8e..d7b18b9 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -32,7 +32,7 @@ use test::{Bencher, black_box}; // Just shorthand fn mk(x: &str) -> Atom { - Atom::from_slice(x) + Atom::from(x) } macro_rules! check_type (($name:ident, $x:expr, $p:pat) => ( @@ -81,7 +81,7 @@ macro_rules! bench_one ( fn intern(b: &mut Bencher) { let x = $x.to_string(); b.iter(|| { - black_box(Atom::from_slice(&x)); + black_box(Atom::from(&*x)); }); } ); @@ -205,7 +205,7 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( *n = (*n % 0x40) + 0x20; } let s = str::from_utf8(&buf[..]).unwrap(); - black_box(Atom::from_slice(s)); + black_box(Atom::from(s)); }); } )); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 9aa49c2..a6e361d 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -140,9 +140,11 @@ impl Atom { unsafe fn unpack(&self) -> UnpackedAtom { UnpackedAtom::from_packed(self.data) } +} +impl<'a> From<&'a str> for Atom { #[inline] - pub fn from_slice(string_to_add: &str) -> Atom { + fn from(string_to_add: &str) -> Atom { let unpacked = match STATIC_ATOM_SET.get_index_or_hash(string_to_add) { Ok(id) => Static(id as u32), Err(hash) => { @@ -283,7 +285,7 @@ impl Serialize for Atom { impl Deserialize for Atom { fn deserialize(deserializer: &mut D) -> Result where D: Deserializer { let string: String = try!(Deserialize::deserialize(deserializer)); - Ok(Atom::from_slice(&*string)) + Ok(Atom::from(&*string)) } } @@ -299,27 +301,27 @@ mod tests { #[test] fn test_as_slice() { - let s0 = Atom::from_slice(""); + let s0 = Atom::from(""); assert!(s0.as_ref() == ""); - let s1 = Atom::from_slice("class"); + let s1 = Atom::from("class"); assert!(s1.as_ref() == "class"); - let i0 = Atom::from_slice("blah"); + let i0 = Atom::from("blah"); assert!(i0.as_ref() == "blah"); - let s0 = Atom::from_slice("BLAH"); + let s0 = Atom::from("BLAH"); assert!(s0.as_ref() == "BLAH"); - let d0 = Atom::from_slice("zzzzzzzzzz"); + let d0 = Atom::from("zzzzzzzzzz"); assert!(d0.as_ref() == "zzzzzzzzzz"); - let d1 = Atom::from_slice("ZZZZZZZZZZ"); + let d1 = Atom::from("ZZZZZZZZZZ"); assert!(d1.as_ref() == "ZZZZZZZZZZ"); } macro_rules! unpacks_to (($e:expr, $t:pat) => ( - match unsafe { Atom::from_slice($e).unpack() } { + match unsafe { Atom::from($e).unpack() } { $t => (), _ => panic!("atom has wrong type"), } @@ -343,17 +345,17 @@ mod tests { #[test] fn test_equality() { - let s0 = Atom::from_slice("fn"); - let s1 = Atom::from_slice("fn"); - let s2 = Atom::from_slice("loop"); + let s0 = Atom::from("fn"); + let s1 = Atom::from("fn"); + let s2 = Atom::from("loop"); - let i0 = Atom::from_slice("blah"); - let i1 = Atom::from_slice("blah"); - let i2 = Atom::from_slice("blah2"); + let i0 = Atom::from("blah"); + let i1 = Atom::from("blah"); + let i2 = Atom::from("blah2"); - let d0 = Atom::from_slice("zzzzzzzz"); - let d1 = Atom::from_slice("zzzzzzzz"); - let d2 = Atom::from_slice("zzzzzzzzz"); + let d0 = Atom::from("zzzzzzzz"); + let d1 = Atom::from("zzzzzzzz"); + let d2 = Atom::from("zzzzzzzzz"); assert!(s0 == s1); assert!(s0 != s2); @@ -372,9 +374,9 @@ mod tests { #[test] fn ord() { fn check(x: &str, y: &str) { - assert_eq!(x < y, Atom::from_slice(x) < Atom::from_slice(y)); - assert_eq!(x.cmp(y), Atom::from_slice(x).cmp(&Atom::from_slice(y))); - assert_eq!(x.partial_cmp(y), Atom::from_slice(x).partial_cmp(&Atom::from_slice(y))); + assert_eq!(x < y, Atom::from(x) < Atom::from(y)); + assert_eq!(x.cmp(y), Atom::from(x).cmp(&Atom::from(y))); + assert_eq!(x.partial_cmp(y), Atom::from(x).partial_cmp(&Atom::from(y))); } check("a", "body"); @@ -390,17 +392,17 @@ mod tests { #[test] fn clone() { - let s0 = Atom::from_slice("fn"); + let s0 = Atom::from("fn"); let s1 = s0.clone(); - let s2 = Atom::from_slice("loop"); + let s2 = Atom::from("loop"); - let i0 = Atom::from_slice("blah"); + let i0 = Atom::from("blah"); let i1 = i0.clone(); - let i2 = Atom::from_slice("blah2"); + let i2 = Atom::from("blah2"); - let d0 = Atom::from_slice("zzzzzzzz"); + let d0 = Atom::from("zzzzzzzz"); let d1 = d0.clone(); - let d2 = Atom::from_slice("zzzzzzzzz"); + let d2 = Atom::from("zzzzzzzzz"); assert!(s0 == s1); assert!(s0 != s2); @@ -429,12 +431,12 @@ mod tests { #[test] fn repr() { fn check(s: &str, data: u64) { - assert_eq_fmt!("0x{:016X}", Atom::from_slice(s).data, data); + assert_eq_fmt!("0x{:016X}", Atom::from(s).data, data); } fn check_static(s: &str, x: Atom) { use string_cache_shared::STATIC_ATOM_SET; - assert_eq_fmt!("0x{:016X}", x.data, Atom::from_slice(s).data); + assert_eq_fmt!("0x{:016X}", x.data, Atom::from(s).data); assert_eq!(0x2, x.data & 0xFFFF_FFFF); // The index is unspecified by phf. assert!((x.data >> 32) <= STATIC_ATOM_SET.iter().len() as u64); @@ -455,7 +457,7 @@ mod tests { check("xyzzy01", 0x3130_797A_7A79_7871); // Dynamic atoms. This is a pointer so we can't verify every bit. - assert_eq!(0x00, Atom::from_slice("a dynamic string").data & 0xf); + assert_eq!(0x00, Atom::from("a dynamic string").data & 0xf); } #[test] @@ -470,34 +472,34 @@ mod tests { fn test_threads() { for _ in 0_u32..100 { thread::spawn(move || { - let _ = Atom::from_slice("a dynamic string"); - let _ = Atom::from_slice("another string"); + let _ = Atom::from("a dynamic string"); + let _ = Atom::from("another string"); }); } } #[test] fn atom_macro() { - assert_eq!(atom!(body), Atom::from_slice("body")); - assert_eq!(atom!("body"), Atom::from_slice("body")); - assert_eq!(atom!("font-weight"), Atom::from_slice("font-weight")); + assert_eq!(atom!(body), Atom::from("body")); + assert_eq!(atom!("body"), Atom::from("body")); + assert_eq!(atom!("font-weight"), Atom::from("font-weight")); } #[test] fn match_atom() { - assert_eq!(2, match Atom::from_slice("head") { + assert_eq!(2, match Atom::from("head") { atom!(br) => 1, atom!(html) | atom!(head) => 2, _ => 3, }); - assert_eq!(3, match Atom::from_slice("body") { + assert_eq!(3, match Atom::from("body") { atom!(br) => 1, atom!(html) | atom!(head) => 2, _ => 3, }); - assert_eq!(3, match Atom::from_slice("zzzzzz") { + assert_eq!(3, match Atom::from("zzzzzz") { atom!(br) => 1, atom!(html) | atom!(head) => 2, _ => 3, @@ -507,14 +509,14 @@ mod tests { #[test] fn ensure_deref() { // Ensure we can Deref to a &str - let atom = Atom::from_slice("foobar"); + let atom = Atom::from("foobar"); let _: &str = &atom; } #[test] fn ensure_as_ref() { // Ensure we can as_ref to a &str - let atom = Atom::from_slice("foobar"); + let atom = Atom::from("foobar"); let _: &str = atom.as_ref(); } diff --git a/src/namespace.rs b/src/namespace.rs index 5603114..e69eda3 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -42,27 +42,27 @@ mod tests { #[test] fn ns_macro() { - assert_eq!(ns!(""), Namespace(Atom::from_slice(""))); + assert_eq!(ns!(""), Namespace(Atom::from(""))); - assert_eq!(ns!(html), Namespace(Atom::from_slice("http://www.w3.org/1999/xhtml"))); - assert_eq!(ns!(xml), Namespace(Atom::from_slice("http://www.w3.org/XML/1998/namespace"))); - assert_eq!(ns!(xmlns), Namespace(Atom::from_slice("http://www.w3.org/2000/xmlns/"))); - assert_eq!(ns!(xlink), Namespace(Atom::from_slice("http://www.w3.org/1999/xlink"))); - assert_eq!(ns!(svg), Namespace(Atom::from_slice("http://www.w3.org/2000/svg"))); - assert_eq!(ns!(mathml), Namespace(Atom::from_slice("http://www.w3.org/1998/Math/MathML"))); + assert_eq!(ns!(html), Namespace(Atom::from("http://www.w3.org/1999/xhtml"))); + assert_eq!(ns!(xml), Namespace(Atom::from("http://www.w3.org/XML/1998/namespace"))); + assert_eq!(ns!(xmlns), Namespace(Atom::from("http://www.w3.org/2000/xmlns/"))); + assert_eq!(ns!(xlink), Namespace(Atom::from("http://www.w3.org/1999/xlink"))); + assert_eq!(ns!(svg), Namespace(Atom::from("http://www.w3.org/2000/svg"))); + assert_eq!(ns!(mathml), Namespace(Atom::from("http://www.w3.org/1998/Math/MathML"))); - assert_eq!(ns!(HtMl), Namespace(Atom::from_slice("http://www.w3.org/1999/xhtml"))); - assert_eq!(ns!(xMl), Namespace(Atom::from_slice("http://www.w3.org/XML/1998/namespace"))); - assert_eq!(ns!(XmLnS), Namespace(Atom::from_slice("http://www.w3.org/2000/xmlns/"))); - assert_eq!(ns!(xLiNk), Namespace(Atom::from_slice("http://www.w3.org/1999/xlink"))); - assert_eq!(ns!(SvG), Namespace(Atom::from_slice("http://www.w3.org/2000/svg"))); - assert_eq!(ns!(mAtHmL), Namespace(Atom::from_slice("http://www.w3.org/1998/Math/MathML"))); + assert_eq!(ns!(HtMl), Namespace(Atom::from("http://www.w3.org/1999/xhtml"))); + assert_eq!(ns!(xMl), Namespace(Atom::from("http://www.w3.org/XML/1998/namespace"))); + assert_eq!(ns!(XmLnS), Namespace(Atom::from("http://www.w3.org/2000/xmlns/"))); + assert_eq!(ns!(xLiNk), Namespace(Atom::from("http://www.w3.org/1999/xlink"))); + assert_eq!(ns!(SvG), Namespace(Atom::from("http://www.w3.org/2000/svg"))); + assert_eq!(ns!(mAtHmL), Namespace(Atom::from("http://www.w3.org/1998/Math/MathML"))); } #[test] fn qualname() { assert_eq!(QualName::new(ns!(""), atom!("")), - QualName { ns: ns!(""), local: Atom::from_slice("") }); + QualName { ns: ns!(""), local: Atom::from("") }); assert_eq!(QualName::new(ns!(XML), atom!(base)), QualName { ns: ns!(XML), local: atom!(base) }); } From eb85a9a38003b018a282abcfb7118399fc28c23a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 19 Nov 2015 20:12:25 +0100 Subject: [PATCH 165/379] Merge into a single crate. Use macros even on unstable. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaking changes: * `ns!("")` should be written `ns!()` * Other `ns!(…)` macros should be lowercase, unquoted. --- Cargo.toml | 19 +-- build.rs | 83 +++++++---- examples/summarize-events/Cargo.toml | 4 +- examples/summarize-events/src/main.rs | 22 +-- plugin/Cargo.toml | 21 --- plugin/src/atom/mod.rs | 101 -------------- plugin/src/lib.rs | 38 ----- shared/Cargo.toml | 21 --- shared/build.rs | 37 ----- shared/lib.rs | 193 -------------------------- src/atom/bench.rs | 2 +- src/atom/mod.rs | 140 +++++++++++++++++-- src/lib.rs | 60 ++++---- src/namespace.rs | 21 +-- src/shared.rs | 54 +++++++ {shared => src}/static_atom_list.rs | 0 16 files changed, 303 insertions(+), 513 deletions(-) delete mode 100644 plugin/Cargo.toml delete mode 100644 plugin/src/atom/mod.rs delete mode 100644 plugin/src/lib.rs delete mode 100644 shared/Cargo.toml delete mode 100644 shared/build.rs delete mode 100644 shared/lib.rs create mode 100644 src/shared.rs rename {shared => src}/static_atom_list.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index f85e6a6..3345041 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ doctest = false log-events = ["rustc-serialize"] # Use unstable features to optimize space and time (memory and CPU usage). -unstable = ["string_cache_plugin"] +unstable = [] # HeapSizeOf support heap_size = ["heapsize", "heapsize_plugin"] @@ -30,6 +30,8 @@ heap_size = ["heapsize", "heapsize_plugin"] [dependencies] lazy_static = "0.1.10" serde = "0.6" +phf_shared = "0.7.4" +debug_unreachable = "0.0.6" [dev-dependencies] rand = "0" @@ -38,15 +40,6 @@ rand = "0" version = "0" optional = true -[dependencies.string_cache_plugin] -path = "plugin" -version = "0.1.9" -optional = true - -[dependencies.string_cache_shared] -path = "shared" -version = "0.1.9" - [dependencies.heapsize] version = "0.1.1" optional = true @@ -55,6 +48,6 @@ optional = true version = "0.1.0" optional = true -[build-dependencies.string_cache_shared] -path = "shared" -version = "0.1.9" +[build-dependencies] +phf_generator = "0.7.4" +phf_shared = "0.7.4" diff --git a/build.rs b/build.rs index 87c226e..b3f870c 100644 --- a/build.rs +++ b/build.rs @@ -1,50 +1,81 @@ -extern crate string_cache_shared; +extern crate phf_shared; +extern crate phf_generator; -use string_cache_shared::{STATIC_ATOM_SET, ALL_NS, pack_static}; +#[path = "src/shared.rs"] #[allow(dead_code)] mod shared; +#[path = "src/static_atom_list.rs"] mod static_atom_list; use std::env; -use std::ascii::AsciiExt; use std::fs::File; use std::io::{BufWriter, Write}; +use std::mem; use std::path::Path; +use std::slice; fn main() { - let path = Path::new(&env::var("OUT_DIR").unwrap()).join("ns_atom_macros_without_plugin.rs"); - let mut file = BufWriter::new(File::create(&path).unwrap()); - writeln!(file, r"#[macro_export]").unwrap(); - writeln!(file, r"macro_rules! ns {{").unwrap(); - writeln!(file, "(\"\") => {{ $crate::Namespace({}) }};", atom("")).unwrap(); - for &(prefix, url) in ALL_NS { - if !prefix.is_empty() { - generate_combination("".to_owned(), prefix, url, &mut file); + let hash_state = generate(); + write_static_atom_set(&hash_state); + write_atom_macro(&hash_state); +} + +fn generate() -> phf_generator::HashState { + let mut set = std::collections::HashSet::new(); + for atom in static_atom_list::ATOMS { + if !set.insert(atom) { + panic!("duplicate static atom `{:?}`", atom); } } - writeln!(file, r"}}").unwrap(); + phf_generator::generate_hash(static_atom_list::ATOMS) +} +fn write_static_atom_set(hash_state: &phf_generator::HashState) { + let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); + macro_rules! w { + ($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) } + } + w!("pub static STATIC_ATOM_SET: StaticAtomSet = StaticAtomSet {{"); + w!(" key: {},", hash_state.key); + w!(" disps: &["); + for &(d1, d2) in &hash_state.disps { + w!(" ({}, {}),", d1, d2); + } + w!(" ],"); + w!(" atoms: &["); + for &idx in &hash_state.map { + w!(" {:?},", static_atom_list::ATOMS[idx]); + } + w!(" ],"); + w!("}};"); +} + +fn write_atom_macro(hash_state: &phf_generator::HashState) { + let set = shared::StaticAtomSet { + key: hash_state.key, + disps: leak(hash_state.disps.clone()), + atoms: leak(hash_state.map.iter().map(|&idx| static_atom_list::ATOMS[idx]).collect()), + }; + + let path = Path::new(&env::var("OUT_DIR").unwrap()).join("atom_macro.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); writeln!(file, r"#[macro_export]").unwrap(); writeln!(file, r"macro_rules! atom {{").unwrap(); - for &s in STATIC_ATOM_SET.iter() { + for &s in set.iter() { if is_ident(s) { - writeln!(file, r"( {} ) => {{ {} }};", s, atom(s)).unwrap(); + writeln!(file, r"( {} ) => {{ {} }};", s, atom(&set, s)).unwrap(); } - writeln!(file, r"({:?}) => {{ {} }};", s, atom(s)).unwrap(); + writeln!(file, r"({:?}) => {{ {} }};", s, atom(&set, s)).unwrap(); } writeln!(file, r"}}").unwrap(); } -fn generate_combination(prefix1: String, suffix: &str, url: &str, file: &mut BufWriter) { - if suffix.is_empty() { - writeln!(file, r"({:?}) => {{ $crate::Namespace({}) }};", prefix1, atom(url)).unwrap(); - writeln!(file, r"( {} ) => {{ $crate::Namespace({}) }};", prefix1, atom(url)).unwrap(); - } else { - let prefix2 = prefix1.clone(); - generate_combination(prefix1 + &*suffix[..1].to_ascii_lowercase(), &suffix[1..], url, file); - generate_combination(prefix2 + &*suffix[..1].to_ascii_uppercase(), &suffix[1..], url, file); - } +fn leak(v: Vec) -> &'static [T] { + let slice = unsafe { slice::from_raw_parts(v.as_ptr(), v.len()) }; + mem::forget(v); + slice } -fn atom(s: &str) -> String { - let data = pack_static(STATIC_ATOM_SET.get_index_or_hash(s).unwrap() as u32); +fn atom(set: &shared::StaticAtomSet, s: &str) -> String { + let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); format!("$crate::Atom {{ data: 0x{:x} }}", data) } diff --git a/examples/summarize-events/Cargo.toml b/examples/summarize-events/Cargo.toml index 27d173f..7d2e7ba 100644 --- a/examples/summarize-events/Cargo.toml +++ b/examples/summarize-events/Cargo.toml @@ -7,9 +7,7 @@ authors = [ "The Servo Project Developers" ] [dependencies] csv = "0" rustc-serialize = "0" +phf_shared = "0.7.4" [dependencies.string_cache] path = "../.." - -[dependencies.string_cache_shared] -path = "../../shared" diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index b5f08fe..1b1aa64 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -9,8 +9,12 @@ extern crate csv; extern crate string_cache; -extern crate string_cache_shared; extern crate rustc_serialize; +extern crate phf_shared; + +#[path = "../../../src/shared.rs"] +#[allow(dead_code)] +mod shared; use string_cache::Atom; @@ -35,18 +39,18 @@ enum Kind { impl Kind { fn from_tag(tag: u8) -> Kind { match tag { - string_cache_shared::DYNAMIC_TAG => Kind::Dynamic, - string_cache_shared::INLINE_TAG => Kind::Inline, - string_cache_shared::STATIC_TAG => Kind::Static, + shared::DYNAMIC_TAG => Kind::Dynamic, + shared::INLINE_TAG => Kind::Inline, + shared::STATIC_TAG => Kind::Static, _ => panic!() } } fn to_tag(self) -> u8 { match self { - Kind::Dynamic => string_cache_shared::DYNAMIC_TAG, - Kind::Inline => string_cache_shared::INLINE_TAG, - Kind::Static => string_cache_shared::STATIC_TAG, + Kind::Dynamic => shared::DYNAMIC_TAG, + Kind::Inline => shared::INLINE_TAG, + Kind::Static => shared::STATIC_TAG, } } } @@ -77,10 +81,10 @@ fn main() { match &ev.event[..] { "intern" => { let tag = (ev.id & 0xf) as u8; - assert!(tag <= string_cache_shared::STATIC_TAG); + assert!(tag <= shared::STATIC_TAG); let string = match tag { - string_cache_shared::DYNAMIC_TAG => dynamic[&ev.id].clone(), + shared::DYNAMIC_TAG => dynamic[&ev.id].clone(), // FIXME: We really shouldn't be allowed to do this. It's a memory-safety // hazard; the field is only public for the atom!() macro. diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml deleted file mode 100644 index 9ba2dc1..0000000 --- a/plugin/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] - -name = "string_cache_plugin" -version = "0.1.10" -authors = [ "The Servo Project Developers" ] -description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." -license = "MIT / Apache-2.0" -repository = "https://github.com/servo/string-cache" - -[lib] - -name = "string_cache_plugin" -plugin = true - -[dependencies.string_cache_shared] -path = "../shared" -version = "0.1.9" - -[dependencies] -lazy_static = "0.1.10" -mac = "0.0.2" diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs deleted file mode 100644 index 3176508..0000000 --- a/plugin/src/atom/mod.rs +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use syntax::ptr::P; -use syntax::codemap::Span; -use syntax::ast::TokenTree; -use syntax::ast; -use syntax::ext::base::{ExtCtxt, MacResult, MacEager}; -use syntax::parse::token::{InternedString, Ident, Literal, Lit}; - -use std::iter::Chain; -use std::collections::HashMap; -use std::ascii::AsciiExt; - - -fn atom_tok_to_str(t: &TokenTree) -> Option { - Some(match *t { - TokenTree::Token(_, Ident(s, _)) => s.name.as_str(), - TokenTree::Token(_, Literal(Lit::Str_(s), _)) => s.as_str(), - _ => return None, - }) -} - -// FIXME: libsyntax should provide this (rust-lang/rust#17637) -struct AtomResult { - expr: P, - pat: P, -} - -impl MacResult for AtomResult { - fn make_expr(self: Box) -> Option> { - Some(self.expr) - } - - fn make_pat(self: Box) -> Option> { - Some(self.pat) - } -} - -fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { - let i = match ::string_cache_shared::STATIC_ATOM_SET.get_index_or_hash(name) { - Ok(i) => i, - Err(_hash) => return None, - }; - - let data = ::string_cache_shared::pack_static(i as u32); - - Some(AtomResult { - expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), - pat: quote_pat!(&mut *cx, ::string_cache::atom::Atom { data: $data }), - }) -} - -// Translate `atom!(title)` or `atom!("font-weight")` into an `Atom` constant or pattern. -pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - let usage = "Usage: atom!(html) or atom!(\"font-weight\")"; - let name = match tt { - [ref t] => ext_expect!(cx, sp, atom_tok_to_str(t), usage), - _ => ext_bail!(cx, sp, usage), - }; - box ext_expect!(cx, sp, make_atom_result(cx, &*name), - &format!("Unknown static atom {}", &*name)) -} - -// Translate `ns!(HTML)` into `Namespace { atom: atom!("http://www.w3.org/1999/xhtml") }`. -// The argument is ASCII-case-insensitive. -pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - use string_cache_shared::ALL_NS; - - fn usage() -> String { - let ns_names: Vec<&'static str> = ALL_NS[1..].iter() - .map(|&(x, _)| x).collect(); - format!("Usage: ns!(HTML), case-insensitive. \ - Known namespaces: {}", - ns_names.join(" ")) - } - - let name = ext_expect!(cx, sp, match tt { - [ref t] => atom_tok_to_str(t), - _ => None, - }, &usage()); - - let &(_, url) = ext_expect!(cx, sp, - ALL_NS.iter().find(|&&(short, _)| short.eq_ignore_ascii_case(&*name)), - &usage()); - - // All of the URLs should be in the static atom table. - let AtomResult { expr, pat } = ext_expect!(cx, sp, make_atom_result(cx, url), - &format!("internal plugin error: can't find namespace url {}", url)); - - box AtomResult { - expr: quote_expr!(&mut *cx, ::string_cache::namespace::Namespace($expr)), - pat: quote_pat!(&mut *cx, ::string_cache::namespace::Namespace($pat)), - } -} diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs deleted file mode 100644 index ac4f6f3..0000000 --- a/plugin/src/lib.rs +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#![crate_name="string_cache_plugin"] -#![crate_type="dylib"] - -#![feature(plugin_registrar, quote, box_syntax)] -#![feature(rustc_private, slice_patterns)] -#![cfg_attr(test, deny(warnings))] -#![allow(unused_imports)] // for quotes - -extern crate syntax; -extern crate rustc; - -#[macro_use] -extern crate lazy_static; - -#[macro_use] -extern crate mac; - -extern crate string_cache_shared; - -use rustc::plugin::Registry; - -mod atom; - -// NB: This needs to be public or we get a linker error. -#[plugin_registrar] -pub fn plugin_registrar(reg: &mut Registry) { - reg.register_macro("atom", atom::expand_atom); - reg.register_macro("ns", atom::expand_ns); -} diff --git a/shared/Cargo.toml b/shared/Cargo.toml deleted file mode 100644 index 60ab570..0000000 --- a/shared/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] - -name = "string_cache_shared" -version = "0.1.12" -authors = [ "The Servo Project Developers" ] -description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." -license = "MIT / Apache-2.0" -repository = "https://github.com/servo/string-cache" -build = "build.rs" - -[lib] - -name = "string_cache_shared" -path = "lib.rs" - -[dependencies] -debug_unreachable = "0.0.6" -phf_shared = "0.7.4" - -[build-dependencies] -phf_generator = "0.7.4" diff --git a/shared/build.rs b/shared/build.rs deleted file mode 100644 index 9a0cbeb..0000000 --- a/shared/build.rs +++ /dev/null @@ -1,37 +0,0 @@ -extern crate phf_generator; - -mod static_atom_list; - -use std::fs::File; -use std::io::{BufWriter, Write}; -use std::path::Path; - -fn main() { - let mut set = std::collections::HashSet::new(); - for atom in static_atom_list::ATOMS { - if !set.insert(atom) { - panic!("duplicate static atom `{:?}`", atom); - } - } - - let state = phf_generator::generate_hash(static_atom_list::ATOMS); - - let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs"); - let mut file = BufWriter::new(File::create(&path).unwrap()); - macro_rules! w { - ($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) } - } - w!("pub static STATIC_ATOM_SET: StaticAtomSet = StaticAtomSet {{"); - w!(" key: {},", state.key); - w!(" disps: &["); - for &(d1, d2) in &state.disps { - w!(" ({}, {}),", d1, d2); - } - w!(" ],"); - w!(" atoms: &["); - for &idx in &state.map { - w!(" {:?},", static_atom_list::ATOMS[idx]); - } - w!(" ],"); - w!("}};"); -} diff --git a/shared/lib.rs b/shared/lib.rs deleted file mode 100644 index 9475bd6..0000000 --- a/shared/lib.rs +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Details of the atom representation that need to be shared between -//! the macros crate and the run-time library, in order to guarantee -//! consistency. - -#![cfg_attr(test, deny(warnings))] - -#[macro_use] extern crate debug_unreachable; -extern crate phf_shared; - -use std::ptr; -use std::slice; - -pub use self::UnpackedAtom::{Dynamic, Inline, Static}; - -include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs")); - -// FIXME(rust-lang/rust#18153): generate these from an enum -pub const DYNAMIC_TAG: u8 = 0b_00; -pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble -pub const STATIC_TAG: u8 = 0b_10; -pub const TAG_MASK: u64 = 0b_11; -pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. - -pub const MAX_INLINE_LEN: usize = 7; - -pub struct StaticAtomSet { - key: u64, - disps: &'static [(u32, u32)], - atoms: &'static [&'static str], -} - -impl StaticAtomSet { - #[inline] - pub fn get_index_or_hash(&self, s: &str) -> Result { - let hash = phf_shared::hash(s, self.key); - let index = phf_shared::get_index(hash, self.disps, self.atoms.len()); - if self.atoms[index as usize] == s { - Ok(index) - } else { - Err(hash) - } - } - - #[inline] - pub fn index(&self, i: u32) -> Option<&'static str> { - self.atoms.get(i as usize).map(|&s| s) - } - - #[inline] - pub fn iter(&self) -> slice::Iter<&'static str> { - self.atoms.iter() - } -} - -// Atoms use a compact representation which fits this enum in a single u64. -// Inlining avoids actually constructing the unpacked representation in memory. -#[allow(missing_copy_implementations)] -pub enum UnpackedAtom { - /// Pointer to a dynamic table entry. Must be 16-byte aligned! - Dynamic(*mut ()), - - /// Length + bytes of string. - Inline(u8, [u8; 7]), - - /// Index in static interning table. - Static(u32), -} - -const STATIC_SHIFT_BITS: usize = 32; - -pub static ALL_NS: &'static [(&'static str, &'static str)] = &[ - ("", ""), - ("html", "http://www.w3.org/1999/xhtml"), - ("xml", "http://www.w3.org/XML/1998/namespace"), - ("xmlns", "http://www.w3.org/2000/xmlns/"), - ("xlink", "http://www.w3.org/1999/xlink"), - ("svg", "http://www.w3.org/2000/svg"), - ("mathml", "http://www.w3.org/1998/Math/MathML"), -]; - -struct RawSlice { - data: *const u8, - len: usize, -} - -#[cfg(target_endian = "little")] // Not implemented yet for big-endian -#[inline(always)] -unsafe fn inline_atom_slice(x: &u64) -> RawSlice { - let x: *const u64 = x; - RawSlice { - data: (x as *const u8).offset(1), - len: 7, - } -} - -pub fn pack_static(n: u32) -> u64 { - (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) -} - -impl UnpackedAtom { - #[inline(always)] - pub unsafe fn pack(self) -> u64 { - match self { - Static(n) => pack_static(n), - Dynamic(p) => { - let n = p as u64; - debug_assert!(0 == n & TAG_MASK); - n - } - Inline(len, buf) => { - debug_assert!((len as usize) <= MAX_INLINE_LEN); - let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); - { - let raw_slice = inline_atom_slice(&mut data); - let dest: &mut [u8] = slice::from_raw_parts_mut( - raw_slice.data as *mut u8, raw_slice.len); - copy_memory(&buf[..], dest); - } - data - } - } - } - - #[inline(always)] - pub unsafe fn from_packed(data: u64) -> UnpackedAtom { - debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged - - match (data & TAG_MASK) as u8 { - DYNAMIC_TAG => Dynamic(data as *mut ()), - STATIC_TAG => Static((data >> STATIC_SHIFT_BITS) as u32), - INLINE_TAG => { - let len = ((data & 0xf0) >> 4) as usize; - debug_assert!(len <= MAX_INLINE_LEN); - let mut buf: [u8; 7] = [0; 7]; - let raw_slice = inline_atom_slice(&data); - let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); - copy_memory(src, &mut buf[..]); - Inline(len as u8, buf) - }, - _ => debug_unreachable!(), - } - } -} - -/// Used for a fast path in Clone and Drop. -#[inline(always)] -pub unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { - if (DYNAMIC_TAG as u64) == (data & TAG_MASK) { - Some(data as *mut ()) - } else { - None - } -} - -/// For as_slice on inline atoms, we need a pointer into the original -/// string contents. -/// -/// It's undefined behavior to call this on a non-inline atom!! -#[inline(always)] -pub unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { - match UnpackedAtom::from_packed(*data) { - Inline(len, _) => { - let raw_slice = inline_atom_slice(&data); - let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); - &src[..(len as usize)] - } - _ => debug_unreachable!(), - } -} - - -/// Copy of std::slice::bytes::copy_memory, which is unstable. -#[inline] -pub fn copy_memory(src: &[u8], dst: &mut [u8]) { - let len_src = src.len(); - assert!(dst.len() >= len_src); - // `dst` is unaliasable, so we know statically it doesn't overlap - // with `src`. - unsafe { - ptr::copy_nonoverlapping(src.as_ptr(), - dst.as_mut_ptr(), - len_src); - } -} diff --git a/src/atom/bench.rs b/src/atom/bench.rs index d7b18b9..864945c 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -135,7 +135,7 @@ macro_rules! bench_all ( use std::iter::repeat; use atom::Atom; - use string_cache_shared::{Static, Inline, Dynamic}; + use atom::UnpackedAtom::{Static, Inline, Dynamic}; use super::mk; diff --git a/src/atom/mod.rs b/src/atom/mod.rs index a6e361d..1b143e3 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -14,18 +14,23 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; use std::mem; use std::ops; +use std::ptr; +use std::slice; use std::str; use std::cmp::Ordering::{self, Equal}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic, STATIC_ATOM_SET, - ENTRY_ALIGNMENT, copy_memory}; +use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STATIC_SHIFT_BITS, + ENTRY_ALIGNMENT, pack_static, StaticAtomSet}; +use self::UnpackedAtom::{Dynamic, Inline, Static}; #[cfg(feature = "log-events")] use event::Event; +include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs")); + #[cfg(not(feature = "log-events"))] macro_rules! log (($e:expr) => (())); @@ -149,7 +154,7 @@ impl<'a> From<&'a str> for Atom { Ok(id) => Static(id as u32), Err(hash) => { let len = string_to_add.len(); - if len <= string_cache_shared::MAX_INLINE_LEN { + if len <= MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; copy_memory(string_to_add.as_bytes(), &mut buf); Inline(len as u8, buf) @@ -169,7 +174,7 @@ impl Clone for Atom { #[inline(always)] fn clone(&self) -> Atom { unsafe { - match string_cache_shared::from_packed_dynamic(self.data) { + match from_packed_dynamic(self.data) { Some(entry) => { let entry = entry as *mut StringCacheEntry; (*entry).ref_count.fetch_add(1, SeqCst); @@ -192,7 +197,7 @@ impl Drop for Atom { } unsafe { - match string_cache_shared::from_packed_dynamic(self.data) { + match from_packed_dynamic(self.data) { Some(entry) => { let entry = entry as *mut StringCacheEntry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { @@ -214,7 +219,7 @@ impl ops::Deref for Atom { unsafe { match self.unpack() { Inline(..) => { - let buf = string_cache_shared::inline_orig_bytes(&self.data); + let buf = inline_orig_bytes(&self.data); str::from_utf8(buf).unwrap() }, Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), @@ -289,6 +294,121 @@ impl Deserialize for Atom { } } +// Atoms use a compact representation which fits this enum in a single u64. +// Inlining avoids actually constructing the unpacked representation in memory. +#[allow(missing_copy_implementations)] +enum UnpackedAtom { + /// Pointer to a dynamic table entry. Must be 16-byte aligned! + Dynamic(*mut ()), + + /// Length + bytes of string. + Inline(u8, [u8; 7]), + + /// Index in static interning table. + Static(u32), +} + +struct RawSlice { + data: *const u8, + len: usize, +} + +#[cfg(target_endian = "little")] // Not implemented yet for big-endian +#[inline(always)] +unsafe fn inline_atom_slice(x: &u64) -> RawSlice { + let x: *const u64 = x; + RawSlice { + data: (x as *const u8).offset(1), + len: 7, + } +} + +impl UnpackedAtom { + #[inline(always)] + unsafe fn pack(self) -> u64 { + match self { + Static(n) => pack_static(n), + Dynamic(p) => { + let n = p as u64; + debug_assert!(0 == n & TAG_MASK); + n + } + Inline(len, buf) => { + debug_assert!((len as usize) <= MAX_INLINE_LEN); + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); + { + let raw_slice = inline_atom_slice(&mut data); + let dest: &mut [u8] = slice::from_raw_parts_mut( + raw_slice.data as *mut u8, raw_slice.len); + copy_memory(&buf[..], dest); + } + data + } + } + } + + #[inline(always)] + unsafe fn from_packed(data: u64) -> UnpackedAtom { + debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged + + match (data & TAG_MASK) as u8 { + DYNAMIC_TAG => Dynamic(data as *mut ()), + STATIC_TAG => Static((data >> STATIC_SHIFT_BITS) as u32), + INLINE_TAG => { + let len = ((data & 0xf0) >> 4) as usize; + debug_assert!(len <= MAX_INLINE_LEN); + let mut buf: [u8; 7] = [0; 7]; + let raw_slice = inline_atom_slice(&data); + let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); + copy_memory(src, &mut buf[..]); + Inline(len as u8, buf) + }, + _ => debug_unreachable!(), + } + } +} + +/// Used for a fast path in Clone and Drop. +#[inline(always)] +unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { + if (DYNAMIC_TAG as u64) == (data & TAG_MASK) { + Some(data as *mut ()) + } else { + None + } +} + +/// For as_slice on inline atoms, we need a pointer into the original +/// string contents. +/// +/// It's undefined behavior to call this on a non-inline atom!! +#[inline(always)] +unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { + match UnpackedAtom::from_packed(*data) { + Inline(len, _) => { + let raw_slice = inline_atom_slice(&data); + let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); + &src[..(len as usize)] + } + _ => debug_unreachable!(), + } +} + + +/// Copy of std::slice::bytes::copy_memory, which is unstable. +#[inline] +fn copy_memory(src: &[u8], dst: &mut [u8]) { + let len_src = src.len(); + assert!(dst.len() >= len_src); + // `dst` is unaliasable, so we know statically it doesn't overlap + // with `src`. + unsafe { + ptr::copy_nonoverlapping(src.as_ptr(), + dst.as_mut_ptr(), + len_src); + } +} + #[cfg(all(test, feature = "unstable"))] mod bench; @@ -296,8 +416,9 @@ mod bench; mod tests { use std::mem; use std::thread; - use super::{Atom, StringCacheEntry}; - use string_cache_shared::{Static, Inline, Dynamic, ENTRY_ALIGNMENT}; + use super::{Atom, StringCacheEntry, STATIC_ATOM_SET}; + use super::UnpackedAtom::{Dynamic, Inline, Static}; + use shared::ENTRY_ALIGNMENT; #[test] fn test_as_slice() { @@ -435,7 +556,6 @@ mod tests { } fn check_static(s: &str, x: Atom) { - use string_cache_shared::STATIC_ATOM_SET; assert_eq_fmt!("0x{:016X}", x.data, Atom::from(s).data); assert_eq!(0x2, x.data & 0xFFFF_FFFF); // The index is unspecified by phf. @@ -526,7 +646,7 @@ mod tests { #[cfg(feature = "unstable")] #[test] fn atom_drop_is_idempotent() { - use string_cache_shared::from_packed_dynamic; + use super::from_packed_dynamic; unsafe { assert_eq!(from_packed_dynamic(mem::POST_DROP_U64), None); } diff --git a/src/lib.rs b/src/lib.rs index 1f9c4b2..65ad039 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,43 +12,50 @@ #![cfg_attr(test, deny(warnings))] #![cfg_attr(all(test, feature = "unstable"), feature(test, filling_drop))] -#![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] +#![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag))] #![cfg_attr(feature = "heap_size", feature(plugin, custom_derive))] -#![cfg_attr(feature = "unstable", plugin(string_cache_plugin))] #![cfg_attr(feature = "heap_size", plugin(heapsize_plugin))] -#[cfg(all(test, feature = "unstable"))] -extern crate test; - -#[macro_use] -extern crate lazy_static; - -#[cfg(test)] -extern crate rand; - -#[cfg(feature = "log-events")] -extern crate rustc_serialize; - -#[cfg(feature = "heap_size")] -extern crate heapsize; - +#[cfg(all(test, feature = "unstable"))] extern crate test; +#[cfg(feature = "log-events")] extern crate rustc_serialize; +#[cfg(feature = "heap_size")] extern crate heapsize; +#[cfg(test)] extern crate rand; +#[macro_use] extern crate lazy_static; +#[macro_use] extern crate debug_unreachable; extern crate serde; - -extern crate string_cache_shared; +extern crate phf_shared; pub use atom::Atom; pub use namespace::{Namespace, QualName}; #[macro_export] -macro_rules! qualname (($ns:tt, $local:tt) => ( - ::string_cache::namespace::QualName { - ns: ns!($ns), - local: atom!($local), +macro_rules! qualname { + ("", $local:tt) => { + $crate::namespace::QualName { + ns: ns!(), + local: atom!($local), + } + }; + ($ns:tt, $local:tt) => { + $crate::namespace::QualName { + ns: ns!($ns), + local: atom!($local), + } } -)); +} + +#[macro_export] +macro_rules! ns { + () => { $crate::Namespace(atom!("")) }; + (html) => { $crate::Namespace(atom!("http://www.w3.org/1999/xhtml")) }; + (xml) => { $crate::Namespace(atom!("http://www.w3.org/XML/1998/namespace")) }; + (xmlns) => { $crate::Namespace(atom!("http://www.w3.org/2000/xmlns/")) }; + (xlink) => { $crate::Namespace(atom!("http://www.w3.org/1999/xlink")) }; + (svg) => { $crate::Namespace(atom!("http://www.w3.org/2000/svg")) }; + (mathml) => { $crate::Namespace(atom!("http://www.w3.org/1998/Math/MathML")) }; +} -#[cfg(not(feature = "unstable"))] -include!(concat!(env!("OUT_DIR"), "/ns_atom_macros_without_plugin.rs")); +include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); #[cfg(feature = "log-events")] #[macro_use] @@ -56,6 +63,7 @@ pub mod event; pub mod atom; pub mod namespace; +pub mod shared; // A private module so that macro-expanded idents like // `::string_cache::atom::Atom` will also work in this crate. diff --git a/src/namespace.rs b/src/namespace.rs index e69eda3..b80dbb0 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -42,7 +42,7 @@ mod tests { #[test] fn ns_macro() { - assert_eq!(ns!(""), Namespace(Atom::from(""))); + assert_eq!(ns!(), Namespace(Atom::from(""))); assert_eq!(ns!(html), Namespace(Atom::from("http://www.w3.org/1999/xhtml"))); assert_eq!(ns!(xml), Namespace(Atom::from("http://www.w3.org/XML/1998/namespace"))); @@ -50,26 +50,19 @@ mod tests { assert_eq!(ns!(xlink), Namespace(Atom::from("http://www.w3.org/1999/xlink"))); assert_eq!(ns!(svg), Namespace(Atom::from("http://www.w3.org/2000/svg"))); assert_eq!(ns!(mathml), Namespace(Atom::from("http://www.w3.org/1998/Math/MathML"))); - - assert_eq!(ns!(HtMl), Namespace(Atom::from("http://www.w3.org/1999/xhtml"))); - assert_eq!(ns!(xMl), Namespace(Atom::from("http://www.w3.org/XML/1998/namespace"))); - assert_eq!(ns!(XmLnS), Namespace(Atom::from("http://www.w3.org/2000/xmlns/"))); - assert_eq!(ns!(xLiNk), Namespace(Atom::from("http://www.w3.org/1999/xlink"))); - assert_eq!(ns!(SvG), Namespace(Atom::from("http://www.w3.org/2000/svg"))); - assert_eq!(ns!(mAtHmL), Namespace(Atom::from("http://www.w3.org/1998/Math/MathML"))); } #[test] fn qualname() { - assert_eq!(QualName::new(ns!(""), atom!("")), - QualName { ns: ns!(""), local: Atom::from("") }); - assert_eq!(QualName::new(ns!(XML), atom!(base)), - QualName { ns: ns!(XML), local: atom!(base) }); + assert_eq!(QualName::new(ns!(), atom!("")), + QualName { ns: ns!(), local: Atom::from("") }); + assert_eq!(QualName::new(ns!(xml), atom!(base)), + QualName { ns: ns!(xml), local: atom!(base) }); } #[test] fn qualname_macro() { - assert_eq!(qualname!("", ""), QualName { ns: ns!(""), local: atom!("") }); - assert_eq!(qualname!(XML, base), QualName { ns: ns!(XML), local: atom!(base) }); + assert_eq!(qualname!("", ""), QualName { ns: ns!(), local: atom!("") }); + assert_eq!(qualname!(xml, base), QualName { ns: ns!(xml), local: atom!(base) }); } } diff --git a/src/shared.rs b/src/shared.rs new file mode 100644 index 0000000..a653872 --- /dev/null +++ b/src/shared.rs @@ -0,0 +1,54 @@ +// Copyright 2015 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use phf_shared; + +// FIXME(rust-lang/rust#18153): generate these from an enum +pub const DYNAMIC_TAG: u8 = 0b_00; +pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble +pub const STATIC_TAG: u8 = 0b_10; +pub const TAG_MASK: u64 = 0b_11; +pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. + +pub const MAX_INLINE_LEN: usize = 7; + +pub const STATIC_SHIFT_BITS: usize = 32; + +pub fn pack_static(n: u32) -> u64 { + (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) +} + +pub struct StaticAtomSet { + pub key: u64, + pub disps: &'static [(u32, u32)], + pub atoms: &'static [&'static str], +} + +impl StaticAtomSet { + #[inline] + pub fn get_index_or_hash(&self, s: &str) -> Result { + let hash = phf_shared::hash(s, self.key); + let index = phf_shared::get_index(hash, self.disps, self.atoms.len()); + if self.atoms[index as usize] == s { + Ok(index) + } else { + Err(hash) + } + } + + #[inline] + pub fn index(&self, i: u32) -> Option<&'static str> { + self.atoms.get(i as usize).map(|&s| s) + } + + #[inline] + pub fn iter(&self) -> ::std::slice::Iter<&'static str> { + self.atoms.iter() + } +} diff --git a/shared/static_atom_list.rs b/src/static_atom_list.rs similarity index 100% rename from shared/static_atom_list.rs rename to src/static_atom_list.rs From 4d505d679f3a1ba8ca243907093f30a71c9dccb3 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Nov 2015 00:39:21 +0100 Subject: [PATCH 166/379] Drop support for unquoted identifiers in atom!() --- build.rs | 22 ++-------------------- src/atom/bench.rs | 6 +++--- src/atom/mod.rs | 19 +++++++++---------- src/namespace.rs | 6 +++--- 4 files changed, 17 insertions(+), 36 deletions(-) diff --git a/build.rs b/build.rs index b3f870c..7571868 100644 --- a/build.rs +++ b/build.rs @@ -60,10 +60,8 @@ fn write_atom_macro(hash_state: &phf_generator::HashState) { writeln!(file, r"#[macro_export]").unwrap(); writeln!(file, r"macro_rules! atom {{").unwrap(); for &s in set.iter() { - if is_ident(s) { - writeln!(file, r"( {} ) => {{ {} }};", s, atom(&set, s)).unwrap(); - } - writeln!(file, r"({:?}) => {{ {} }};", s, atom(&set, s)).unwrap(); + let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); + writeln!(file, r"({:?}) => {{ $crate::Atom {{ data: 0x{:x} }} }};", s, data).unwrap(); } writeln!(file, r"}}").unwrap(); } @@ -73,19 +71,3 @@ fn leak(v: Vec) -> &'static [T] { mem::forget(v); slice } - -fn atom(set: &shared::StaticAtomSet, s: &str) -> String { - let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); - format!("$crate::Atom {{ data: 0x{:x} }}", data) -} - -fn is_ident(s: &str) -> bool { - let mut chars = s.chars(); - !s.is_empty() && match chars.next().unwrap() { - 'a'...'z' | 'A'...'Z' | '_' => true, - _ => false - } && chars.all(|c| match c { - 'a'...'z' | 'A'...'Z' | '_' | '0'...'9' => true, - _ => false - }) -} diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 864945c..96b0790 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -157,7 +157,7 @@ bench_all!([eq ne lt clone_string] for longer_string = super::longer_dynamic_a, super::longer_dynamic_b); bench_all!([eq ne intern as_ref clone is_static lt] - for static_atom = atom!(a), atom!(b)); + for static_atom = atom!("a"), atom!("b")); bench_all!([intern as_ref clone is_inline] for short_inline_atom = mk("e"), mk("f")); @@ -175,10 +175,10 @@ bench_all!([intern as_ref clone is_static] for static_at_runtime = mk("a"), mk("b")); bench_all!([ne lt x_static y_inline] - for static_vs_inline = atom!(a), mk("f")); + for static_vs_inline = atom!("a"), mk("f")); bench_all!([ne lt x_static y_dynamic] - for static_vs_dynamic = atom!(a), mk(super::longer_dynamic_b)); + for static_vs_dynamic = atom!("a"), mk(super::longer_dynamic_b)); bench_all!([ne lt x_inline y_dynamic] for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b)); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 1b143e3..cc0a85e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -567,9 +567,9 @@ mod tests { // static atom table, the tag values, etc. // Static atoms - check_static("a", atom!(a)); - check_static("address", atom!(address)); - check_static("area", atom!(area)); + check_static("a", atom!("a")); + check_static("address", atom!("address")); + check_static("area", atom!("area")); // Inline atoms check("e", 0x0000_0000_0000_6511); @@ -600,7 +600,6 @@ mod tests { #[test] fn atom_macro() { - assert_eq!(atom!(body), Atom::from("body")); assert_eq!(atom!("body"), Atom::from("body")); assert_eq!(atom!("font-weight"), Atom::from("font-weight")); } @@ -608,20 +607,20 @@ mod tests { #[test] fn match_atom() { assert_eq!(2, match Atom::from("head") { - atom!(br) => 1, - atom!(html) | atom!(head) => 2, + atom!("br") => 1, + atom!("html") | atom!("head") => 2, _ => 3, }); assert_eq!(3, match Atom::from("body") { - atom!(br) => 1, - atom!(html) | atom!(head) => 2, + atom!("br") => 1, + atom!("html") | atom!("head") => 2, _ => 3, }); assert_eq!(3, match Atom::from("zzzzzz") { - atom!(br) => 1, - atom!(html) | atom!(head) => 2, + atom!("br") => 1, + atom!("html") | atom!("head") => 2, _ => 3, }); } diff --git a/src/namespace.rs b/src/namespace.rs index b80dbb0..6fe0564 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -56,13 +56,13 @@ mod tests { fn qualname() { assert_eq!(QualName::new(ns!(), atom!("")), QualName { ns: ns!(), local: Atom::from("") }); - assert_eq!(QualName::new(ns!(xml), atom!(base)), - QualName { ns: ns!(xml), local: atom!(base) }); + assert_eq!(QualName::new(ns!(xml), atom!("base")), + QualName { ns: ns!(xml), local: atom!("base") }); } #[test] fn qualname_macro() { assert_eq!(qualname!("", ""), QualName { ns: ns!(), local: atom!("") }); - assert_eq!(qualname!(xml, base), QualName { ns: ns!(xml), local: atom!(base) }); + assert_eq!(qualname!(xml, "base"), QualName { ns: ns!(xml), local: atom!("base") }); } } From c15bbf16c7945192a82e5d3180eb3aa80f631643 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Oliveira?= Date: Fri, 27 Nov 2015 18:19:05 +0000 Subject: [PATCH 167/379] add mouseover and beforeunload to atom list --- Cargo.toml | 2 +- src/static_atom_list.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3345041..c6390f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ version = "0.1.1" optional = true [dependencies.heapsize_plugin] -version = "0.1.0" +version = "0.1.1" optional = true [build-dependencies] diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 6ca0c77..17131e2 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -214,6 +214,7 @@ pub static ATOMS: &'static [&'static str] = &[ "bbox", "bdi", "bdo", + "beforeunload", "begin", "bevelled", "bgcolor", @@ -696,6 +697,7 @@ pub static ATOMS: &'static [&'static str] = &[ "moment", "momentabout", "movablelimits", + "mouseover", "mover", "mozbrowser", "mpadded", From 2dd32b7b994e5aebb920709697ec9387087bc94c Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 27 Nov 2015 12:26:00 -0800 Subject: [PATCH 168/379] Add "novalidate" to atom list. --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 17131e2..2efd5c9 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -740,6 +740,7 @@ pub static ATOMS: &'static [&'static str] = &[ "notin", "notprsubset", "notsubset", + "novalidate", "nowrap", "number", "numoctaves", From 1bd99f16042ef58d0d06ad0a28e61bf9d4382797 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Fri, 27 Nov 2015 16:15:27 -0500 Subject: [PATCH 169/379] Bump the version number. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c6390f2..7448f19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.0" +version = "0.2.1" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 9a2f529a4fe9a4ceb0a9eff42591b1531584263a Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sun, 29 Nov 2015 02:21:08 -0800 Subject: [PATCH 170/379] Add formnovalidate to atom list --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 2efd5c9..ac7e7be 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -494,6 +494,7 @@ pub static ATOMS: &'static [&'static str] = &[ "format", "formenctype", "formmethod", + "formnovalidate", "formtarget", "frameborder", "framespacing", From be84acff1664ed906d84ffd25466376f2cba601c Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sun, 29 Nov 2015 12:34:38 -0800 Subject: [PATCH 171/379] Bump version number to v0.2.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7448f19..870f113 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.1" +version = "0.2.2" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 9ce5e1984abafcea5cd4fb6700009791320c618a Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 20:38:24 -0500 Subject: [PATCH 172/379] Add 'change' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/change --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index ac7e7be..6a64ef4 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -258,6 +258,7 @@ pub static ATOMS: &'static [&'static str] = &[ "cellpadding", "cellspacing", "center", + "change", "char", "charoff", "charset", From f0e2f66116c3b110c9a7c6c2c60fc6fbab4d697a Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 20:39:43 -0500 Subject: [PATCH 173/379] Add 'readystatechange' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/readystatechange --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 6a64ef4..ba938ce 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -927,6 +927,7 @@ pub static ATOMS: &'static [&'static str] = &[ "rationals", "rb", "readonly", + "readystatechange", "real", "reals", "rect", From 47c268b6e2d4512e9b238f8b76dac932baac702d Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 21:56:47 -0500 Subject: [PATCH 174/379] Add 'DOMContentLoaded' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/DOMContentLoaded --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index ba938ce..6f65583 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -368,6 +368,7 @@ pub static ATOMS: &'static [&'static str] = &[ "dl", "domain", "domainofapplication", + "DOMContentLoaded", "dominant-baseline", "draggable", "dur", From 3ddd8f1592c956caa8b4b4cbf84f97ff4926a332 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 21:59:46 -0500 Subject: [PATCH 175/379] Add 'load' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/load --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 6f65583..9ceb1d9 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -616,6 +616,7 @@ pub static ATOMS: &'static [&'static str] = &[ "list-style-position", "list-style-type", "ln", + "load", "local", "log", "logbase", From 2f83b7f9df9f57979d874475972c2b5d02016761 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 22:09:35 -0500 Subject: [PATCH 176/379] Add 'afterscriptexecute' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/afterscriptexecute --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 9ceb1d9..bc36d14 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -103,6 +103,7 @@ pub static ATOMS: &'static [&'static str] = &[ "active", "actuate", "additive", + "afterscriptexecute", "align", "alignment-baseline", "alignmentscope", From 15a51ffc88a4411ebe032526e50202f135767391 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 22:10:34 -0500 Subject: [PATCH 177/379] Add 'beforescriptexecute' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/beforescriptexecute --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index bc36d14..a71f205 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -215,6 +215,7 @@ pub static ATOMS: &'static [&'static str] = &[ "bbox", "bdi", "bdo", + "beforescriptexecute", "beforeunload", "begin", "bevelled", From 2388dc68b746df1ef86864cd6e3f5e7e786c3ee4 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 22:33:07 -0500 Subject: [PATCH 178/379] Add 'loadend' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/loadend --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index a71f205..defc5ea 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -619,6 +619,7 @@ pub static ATOMS: &'static [&'static str] = &[ "list-style-type", "ln", "load", + "loadend", "local", "log", "logbase", From 4eb03dc4bb90138f43bf120af49c36c485e27950 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 22:42:30 -0500 Subject: [PATCH 179/379] Add 'loadstart' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/loadstart --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index defc5ea..aab11ad 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -619,6 +619,7 @@ pub static ATOMS: &'static [&'static str] = &[ "list-style-type", "ln", "load", + "loadstart", "loadend", "local", "log", From 466899f5819650b91e74b640031627e5a60cf6b4 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 22:51:12 -0500 Subject: [PATCH 180/379] Add 'webglcontextcreationerror' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/webglcontextcreationerror --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index aab11ad..cd45d06 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -1168,6 +1168,7 @@ pub static ATOMS: &'static [&'static str] = &[ "v-mathematical", "vspace", "wbr", + "webglcontextcreationerror", "week", "when", "white-space", From fc91e33e70cf527981d71f1406726c073fba336b Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 23:07:13 -0500 Subject: [PATCH 181/379] Add 'storage' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/storage --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index cd45d06..6eb4178 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -1049,6 +1049,7 @@ pub static ATOMS: &'static [&'static str] = &[ "stop", "stop-color", "stop-opacity", + "storage", "stretchy", "strike", "strikethrough-position", From 085b999d846c6fe40a1ec03d7b21407effa4906d Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 23:10:53 -0500 Subject: [PATCH 182/379] Add 'message' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/message --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 6eb4178..3950558 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -683,6 +683,7 @@ pub static ATOMS: &'static [&'static str] = &[ "menu", "menuitem", "merror", + "message", "metadata", "meter", "method", From 92b83cae2767bc304724f35aa736cba998b3cf43 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 23:16:03 -0500 Subject: [PATCH 183/379] Add 'abort' to static atom list https://developer.mozilla.org/en-US/docs/Web/Events/abort --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 3950558..58614ca 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -89,6 +89,7 @@ pub static ATOMS: &'static [&'static str] = &[ "http://www.w3.org/1998/Math/MathML", "abbr", + "abort", "abs", "accent", "accent-height", From 6a07c773b8019f03e7c3ec6a714268c2f745f0f1 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Thu, 10 Dec 2015 23:23:56 -0500 Subject: [PATCH 184/379] Bump version: 0.2.2 -> 0.2.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 870f113..145a480 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.2" +version = "0.2.3" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From dc5433833d44c99046e6f27c1e9c8110b4b53fb3 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Fri, 11 Dec 2015 11:07:36 -0800 Subject: [PATCH 185/379] Add invalid to the list and bump version number --- Cargo.toml | 2 +- src/static_atom_list.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 145a480..7a6e41b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.3" +version = "0.2.4" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 58614ca..1bcaf98 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -565,6 +565,7 @@ pub static ATOMS: &'static [&'static str] = &[ "intercept", "intersect", "interval", + "invalid", "inverse", "irrelevant", "isindex", From aa43810dddcd5830464c50cf65303ced400050f5 Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Mon, 30 Nov 2015 11:17:32 -0600 Subject: [PATCH 186/379] Added more atoms to static_atom_list. The list now includes: * all the CSS attributes and DOM events used by Servo, * the element ids such as #text used by Servo, * the User Agent strings such as Mozilla used by Servo. At this point, every string constant in the Servo code base can be atomized. --- src/static_atom_list.rs | 51 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 1bcaf98..e97b51f 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -88,6 +88,20 @@ pub static ATOMS: &'static [&'static str] = &[ "http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML", + "#text", + "#comment", + "#document", + "#document-fragment", + + // User agent strings + "4.0", + "Gecko", + "Linux", + "Mac", + "Mozilla", + "Netscape", + "Win32", + "abbr", "abort", "abs", @@ -219,10 +233,12 @@ pub static ATOMS: &'static [&'static str] = &[ "beforescriptexecute", "beforeunload", "begin", + "bevel", "bevelled", "bgcolor", "bias", "blink", + "blob", "border", "border-bottom", "border-bottom-color", @@ -249,6 +265,7 @@ pub static ATOMS: &'static [&'static str] = &[ "border-top-width", "border-width", "bottom", + "butt", "bvar", "by", "calcmode", @@ -327,6 +344,7 @@ pub static ATOMS: &'static [&'static str] = &[ "csymbol", "curl", "cursor", + "customevent", "cx", "cy", "d", @@ -396,6 +414,7 @@ pub static ATOMS: &'static [&'static str] = &[ "error", "eulergamma", "event", + "events", "exists", "exp", "exponent", @@ -469,6 +488,8 @@ pub static ATOMS: &'static [&'static str] = &[ "filterRes", "filterunits", "filterUnits", + "float", + "flood", "flood-color", "flood-opacity", "floor", @@ -541,6 +562,7 @@ pub static ATOMS: &'static [&'static str] = &[ "href", "hreflang", "hspace", + "htmlevents", "http-equiv", "i", "icon", @@ -554,6 +576,7 @@ pub static ATOMS: &'static [&'static str] = &[ "imaginaryi", "img", "implies", + "important", "in", "in2", "index", @@ -581,14 +604,18 @@ pub static ATOMS: &'static [&'static str] = &[ "kernelunitlength", "kernelUnitLength", "kerning", + "keyboardevent", "keydown", + "keyevents", "keygen", "keypoints", "keyPoints", + "keypress", "keysplines", "keySplines", "keytimes", "keyTimes", + "keyup", "label", "lambda", "lang", @@ -686,6 +713,7 @@ pub static ATOMS: &'static [&'static str] = &[ "menuitem", "merror", "message", + "messageevent", "metadata", "meter", "method", @@ -699,16 +727,21 @@ pub static ATOMS: &'static [&'static str] = &[ "minus", "min-width", "missing-glyph", + "miter", "mlabeledtr", "mmultiscripts", "mn", "mo", "mode", - "month", "moment", "momentabout", - "movablelimits", + "month", + "mousedown", + "mouseevent", + "mouseevents", "mouseover", + "mouseup", + "movablelimits", "mover", "mozbrowser", "mpadded", @@ -729,6 +762,7 @@ pub static ATOMS: &'static [&'static str] = &[ "mtext", "mtr", "multicol", + "multipart/form-data", "multiple", "munder", "munderover", @@ -739,6 +773,7 @@ pub static ATOMS: &'static [&'static str] = &[ "neq", "nest", "nextid", + "no message", "nobr", "noembed", "nohref", @@ -757,8 +792,10 @@ pub static ATOMS: &'static [&'static str] = &[ "numoctaves", "numOctaves", "occurrence", + "off", "offset", "ol", + "on", "onabort", "onactivate", "onafterprint", @@ -910,6 +947,7 @@ pub static ATOMS: &'static [&'static str] = &[ "polygon", "polyline", "position", + "post", "poster", "power", "prefetch", @@ -933,6 +971,7 @@ pub static ATOMS: &'static [&'static str] = &[ "radio", "radiogroup", "radius", + "range", "rationals", "rb", "readonly", @@ -964,6 +1003,7 @@ pub static ATOMS: &'static [&'static str] = &[ "requiredfeatures", "requiredFeatures", "reset", + "resize", "restart", "result", "rev", @@ -971,6 +1011,7 @@ pub static ATOMS: &'static [&'static str] = &[ "role", "root", "rotate", + "round", "rowalign", "rowlines", "rows", @@ -1036,6 +1077,7 @@ pub static ATOMS: &'static [&'static str] = &[ "speed", "spreadmethod", "spreadMethod", + "square", "src", "srcdoc", "standby", @@ -1097,6 +1139,7 @@ pub static ATOMS: &'static [&'static str] = &[ "tel", "tendsto", "text", + "text/plain", "text-align", "text-anchor", "text-decoration", @@ -1112,6 +1155,7 @@ pub static ATOMS: &'static [&'static str] = &[ "times", "to", "top", + "touchevent", "transform", "transition-delay", "transition-duration", @@ -1127,6 +1171,8 @@ pub static ATOMS: &'static [&'static str] = &[ "u", "u1", "u2", + "uievent", + "uievents", "ul", "underline-position", "underline-thickness", @@ -1140,6 +1186,7 @@ pub static ATOMS: &'static [&'static str] = &[ "url", "use", "usemap", + "UTF-8", "valign", "v-alphabetic", "value", From 73c022344ba818d672f01b49d77fa4d58a0ee115 Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Tue, 15 Dec 2015 13:29:18 -0600 Subject: [PATCH 187/379] Add non-mutating AsciiExt methods to Atom. We add the following methods from AsciiExt: ``` pub fn is_ascii(&self) -> bool; pub fn to_ascii_uppercase(&self) -> Atom; pub fn to_ascii_lowercase(&self) -> Atom; pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool; ``` We can't implement AsciiExt in full because it requires mutable access. --- src/atom/mod.rs | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index cc0a85e..dd01db3 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -17,6 +17,7 @@ use std::ops; use std::ptr; use std::slice; use std::str; +use std::ascii::AsciiExt; use std::cmp::Ordering::{self, Equal}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; @@ -294,6 +295,31 @@ impl Deserialize for Atom { } } +// AsciiExt requires mutating methods, so we just implement the non-mutating ones. +// We don't need to implement is_ascii because there's no performance improvement +// over the one from &str. +impl Atom { + pub fn to_ascii_uppercase(&self) -> Atom { + if self.chars().all(char::is_uppercase) { + self.clone() + } else { + Atom::from(&*((&**self).to_ascii_uppercase())) + } + } + + pub fn to_ascii_lowercase(&self) -> Atom { + if self.chars().all(char::is_lowercase) { + self.clone() + } else { + Atom::from(&*((&**self).to_ascii_lowercase())) + } + } + + pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { + (self == other) || (&**self).eq_ignore_ascii_case(&**other) + } +} + // Atoms use a compact representation which fits this enum in a single u64. // Inlining avoids actually constructing the unpacked representation in memory. #[allow(missing_copy_implementations)] @@ -655,4 +681,35 @@ mod tests { fn string_cache_entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } + + #[test] + fn test_ascii_lowercase() { + assert_eq!(Atom::from("").to_ascii_lowercase(), Atom::from("")); + assert_eq!(Atom::from("aZ9").to_ascii_lowercase(), Atom::from("az9")); + assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_lowercase(), Atom::from("the quick brown fox!")); + assert_eq!(Atom::from("JE VAIS À PARIS").to_ascii_lowercase(), Atom::from("je vais À paris")); + } + + #[test] + fn test_ascii_uppercase() { + assert_eq!(Atom::from("").to_ascii_uppercase(), Atom::from("")); + assert_eq!(Atom::from("aZ9").to_ascii_uppercase(), Atom::from("AZ9")); + assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_uppercase(), Atom::from("THE QUICK BROWN FOX!")); + assert_eq!(Atom::from("Je vais à Paris").to_ascii_uppercase(), Atom::from("JE VAIS à PARIS")); + } + + #[test] + fn test_eq_ignore_ascii_case() { + assert!(Atom::from("").eq_ignore_ascii_case(&Atom::from(""))); + assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("aZ9"))); + assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("Az9"))); + assert!(Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!"))); + assert!(Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("je VAIS à PARIS"))); + assert!(!Atom::from("").eq_ignore_ascii_case(&Atom::from("az9"))); + assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from(""))); + assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("9Za"))); + assert!(!Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!!"))); + assert!(!Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("JE vais À paris"))); + } + } From 02f022def51ccf6c0d9383e8468246bf4f1b2242 Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Tue, 15 Dec 2015 14:57:32 -0600 Subject: [PATCH 188/379] Version bump. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7a6e41b..7854557 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.4" +version = "0.2.5" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 4c331c6f4d8c7a2000b0d29045d7645975ad4ba6 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Tue, 2 Feb 2016 10:29:29 +0100 Subject: [PATCH 189/379] Allow heapsize 0.2 --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7854557..21e981c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.5" +version = "0.2.6" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -41,7 +41,7 @@ version = "0" optional = true [dependencies.heapsize] -version = "0.1.1" +version = ">=0.1.1, <0.3" optional = true [dependencies.heapsize_plugin] From a4abe871b56f7fef35af7bf9cdd10fd593fe4485 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Thu, 4 Feb 2016 01:30:56 +0100 Subject: [PATCH 190/379] Derive HeapSizeOf for QualName --- Cargo.toml | 2 +- src/namespace.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 21e981c..9c0f6fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.6" +version = "0.2.7" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/namespace.rs b/src/namespace.rs index 6fe0564..4b4d142 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -20,6 +20,7 @@ pub struct Namespace(pub Atom); /// A name with a namespace. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct QualName { pub ns: Namespace, pub local: Atom, From 4daa491cb9e369ec07ba32b75e8243983b6a8ee9 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Fri, 5 Feb 2016 17:10:52 -0500 Subject: [PATCH 191/379] Add preload to static list. --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index e97b51f..a169d2e 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -56,6 +56,7 @@ pub static ATOMS: &'static [&'static str] = &[ "param", "plaintext", "pre", + "preload", "rp", "rt", "script", From 2e5a0365babf7c2554c99c40551a80fadb40166d Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Fri, 5 Feb 2016 17:33:22 -0500 Subject: [PATCH 192/379] Version bump to 0.2.8 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9c0f6fc..2395fc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.7" +version = "0.2.8" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From d1e0142e857b496dcbb2ffe5d0248773afb8a443 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 10 Feb 2016 16:17:47 +0100 Subject: [PATCH 193/379] Allow heapsize 0.3 --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2395fc8..8e06f29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.8" +version = "0.2.9" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -41,7 +41,7 @@ version = "0" optional = true [dependencies.heapsize] -version = ">=0.1.1, <0.3" +version = ">=0.1.1, <0.4" optional = true [dependencies.heapsize_plugin] From 37c7ec607a8dd1d35ca92c661e810b5683823e1b Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 24 Feb 2016 02:07:45 +0100 Subject: [PATCH 194/379] Implement From for Atom --- src/atom/mod.rs | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index dd01db3..ab680a4 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -11,14 +11,15 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::ascii::AsciiExt; +use std::borrow::Cow; +use std::cmp::Ordering::{self, Equal}; use std::fmt; use std::mem; use std::ops; use std::ptr; use std::slice; use std::str; -use std::ascii::AsciiExt; -use std::cmp::Ordering::{self, Equal}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; @@ -53,13 +54,13 @@ struct StringCacheEntry { } impl StringCacheEntry { - fn new(next: Option>, hash: u64, string_to_add: &str) + fn new(next: Option>, hash: u64, string: String) -> StringCacheEntry { StringCacheEntry { next_in_bucket: next, hash: hash, ref_count: AtomicIsize::new(1), - string: String::from(string_to_add), + string: string, } } } @@ -71,14 +72,14 @@ impl StringCache { } } - fn add(&mut self, string_to_add: &str, hash: u64) -> *mut StringCacheEntry { + fn add(&mut self, string: Cow, hash: u64) -> *mut StringCacheEntry { let bucket_index = (hash & BUCKET_MASK) as usize; { let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); while let Some(entry) = ptr.take() { - if entry.hash == hash && entry.string == string_to_add { + if entry.hash == hash && entry.string == &*string { if entry.ref_count.fetch_add(1, SeqCst) > 0 { return &mut **entry; } @@ -94,11 +95,17 @@ impl StringCache { } } debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + let string = string.into_owned(); + let _string_clone = if cfg!(feature = "log-events") { + string.clone() + } else { + "".to_owned() + }; let mut entry = Box::new(StringCacheEntry::new( - self.buckets[bucket_index].take(), hash, string_to_add)); + self.buckets[bucket_index].take(), hash, string)); let ptr: *mut StringCacheEntry = &mut *entry; self.buckets[bucket_index] = Some(entry); - log!(Event::Insert(ptr as u64, String::from(string_to_add))); + log!(Event::Insert(ptr as u64, _string_clone)); ptr } @@ -148,10 +155,10 @@ impl Atom { } } -impl<'a> From<&'a str> for Atom { +impl<'a> From> for Atom { #[inline] - fn from(string_to_add: &str) -> Atom { - let unpacked = match STATIC_ATOM_SET.get_index_or_hash(string_to_add) { + fn from(string_to_add: Cow<'a, str>) -> Atom { + let unpacked = match STATIC_ATOM_SET.get_index_or_hash(&*string_to_add) { Ok(id) => Static(id as u32), Err(hash) => { let len = string_to_add.len(); @@ -171,6 +178,20 @@ impl<'a> From<&'a str> for Atom { } } +impl<'a> From<&'a str> for Atom { + #[inline] + fn from(string_to_add: &str) -> Atom { + Atom::from(Cow::Borrowed(string_to_add)) + } +} + +impl From for Atom { + #[inline] + fn from(string_to_add: String) -> Atom { + Atom::from(Cow::Owned(string_to_add)) + } +} + impl Clone for Atom { #[inline(always)] fn clone(&self) -> Atom { @@ -712,4 +733,8 @@ mod tests { assert!(!Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("JE vais À paris"))); } + #[test] + fn test_from_string() { + assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); + } } From 17279dd59910ba1412034364df2e512cd47c0b94 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 24 Feb 2016 10:58:17 +0100 Subject: [PATCH 195/379] Allow to retrieve the heap size of the dynamic string cache --- src/atom/mod.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index dd01db3..086181a 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -9,6 +9,9 @@ #![allow(non_upper_case_globals)] +#[cfg(feature = "heap_size")] +use heapsize::HeapSizeOf; + use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; @@ -37,14 +40,34 @@ macro_rules! log (($e:expr) => (())); const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; + struct StringCache { buckets: [Option>; NB_BUCKETS], } +#[cfg(feature = "heap_size")] +impl HeapSizeOf for StringCache { + fn heap_size_of_children(&self) -> usize { + self.buckets.iter().fold(0, |size, bucket| size + bucket.heap_size_of_children()) + } +} + lazy_static! { static ref STRING_CACHE: Mutex = Mutex::new(StringCache::new()); } +/// A token that represents the heap used by the dynamic string cache. +#[cfg(feature = "heap_size")] +pub struct StringCacheHeap; + +#[cfg(feature = "heap_size")] +impl HeapSizeOf for StringCacheHeap { + fn heap_size_of_children(&self) -> usize { + STRING_CACHE.lock().unwrap().heap_size_of_children() + } +} + +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] struct StringCacheEntry { next_in_bucket: Option>, hash: u64, From d053d557821fa62d9746f2cf47961a2331b36b4c Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 24 Feb 2016 14:49:41 +0100 Subject: [PATCH 196/379] Test on OS X through Travis --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 3c4848a..75b5bc3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,9 @@ rust: - nightly - beta - stable +os: + - linux + - osx script: - cargo build - cargo test From dda983bcf9ff4fb1f3523e43330b4606bb6c6b7e Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 24 Feb 2016 15:59:49 +0100 Subject: [PATCH 197/379] Bump to 0.2.10 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8e06f29..19c6bbc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.9" +version = "0.2.10" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 1466ce68d7a0b4082a529fa42be29fb74fa8fa65 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 27 Feb 2016 19:22:06 +0100 Subject: [PATCH 198/379] Update to Rust 2016-02-26 --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 19c6bbc..a5a966e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.10" +version = "0.2.11" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -29,7 +29,7 @@ heap_size = ["heapsize", "heapsize_plugin"] [dependencies] lazy_static = "0.1.10" -serde = "0.6" +serde = ">=0.6, <0.8" phf_shared = "0.7.4" debug_unreachable = "0.0.6" @@ -45,7 +45,7 @@ version = ">=0.1.1, <0.4" optional = true [dependencies.heapsize_plugin] -version = "0.1.1" +version = "0.1.4" optional = true [build-dependencies] From 805296783f926595e043a53167600165298adc18 Mon Sep 17 00:00:00 2001 From: Arnaud Marant Date: Sat, 9 Apr 2016 21:49:09 +0200 Subject: [PATCH 199/379] add dirname atom for HTMLInputElement attribute related to servo issue : https://github.com/servo/servo/issues/10491 --- Cargo.toml | 2 +- src/static_atom_list.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a5a966e..02b0425 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.11" +version = "0.2.12" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index a169d2e..ebb4965 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -379,6 +379,7 @@ pub static ATOMS: &'static [&'static str] = &[ "diffuseConstant", "dir", "direction", + "dirname", "disabled", "discard", "display", From dc892173fcb6367f813aa6538dc23edd1b631608 Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Tue, 12 Apr 2016 09:56:08 +0200 Subject: [PATCH 200/379] Define Atom::get_hash(). This is already used by rust-selectors for its Bloom filter, and is implemented there by accessing the data field directly. Also, in a Gecko-based Atom implementation, the implementation will need to be different, so it's better to have it here. --- src/atom/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 80dc68e..f4d087a 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -176,6 +176,10 @@ impl Atom { unsafe fn unpack(&self) -> UnpackedAtom { UnpackedAtom::from_packed(self.data) } + + pub fn get_hash(&self) -> u32 { + ((self.data >> 32) ^ self.data) as u32 + } } impl<'a> From> for Atom { From 6b9ffe9205ef611f07d85ecf646f7fb9d062ceb1 Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Tue, 12 Apr 2016 10:05:29 +0200 Subject: [PATCH 201/379] Bump version. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 02b0425..04860f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.12" +version = "0.2.13" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From f86afb6a88a4ec08faf3ec38acc5ad03d1d04e6c Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Thu, 14 Apr 2016 15:57:47 +0200 Subject: [PATCH 202/379] Update lazy_static. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 04860f4..292dbe9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ unstable = [] heap_size = ["heapsize", "heapsize_plugin"] [dependencies] -lazy_static = "0.1.10" +lazy_static = "0.2" serde = ">=0.6, <0.8" phf_shared = "0.7.4" debug_unreachable = "0.0.6" From 986df64f19d73033c61a6ed0b4304beef75bc50e Mon Sep 17 00:00:00 2001 From: Aidan Hobson Sayers Date: Wed, 27 Apr 2016 18:55:11 +0100 Subject: [PATCH 203/379] Fix event log example --- .travis.yml | 3 ++- examples/event-log/src/main.rs | 14 ++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 75b5bc3..e99d9a3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,7 @@ script: - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features heap_size; fi" - - "cd examples/summarize-events/ && cargo build" + - "cd examples/event-log/ && cargo build && cd ../.." + - "cd examples/summarize-events/ && cargo build && cd ../.." notifications: webhooks: http://build.servo.org:54856/travis diff --git a/examples/event-log/src/main.rs b/examples/event-log/src/main.rs index 6efff10..89adfdf 100644 --- a/examples/event-log/src/main.rs +++ b/examples/event-log/src/main.rs @@ -13,19 +13,21 @@ use string_cache::Atom; use string_cache::event; use std::io; +use std::io::prelude::*; fn main() { println!("Reading stdin to end of file"); - let stdin = io::stdin().read_to_string().unwrap(); + let mut stdin = String::new(); + io::stdin().read_to_string(&mut stdin).unwrap(); let mut atoms = vec![]; - for word in stdin.as_slice().split(|c: char| c.is_whitespace()) { - atoms.push(Atom::from_slice(word)); + for word in stdin.split(|c: char| c.is_whitespace()) { + atoms.push(Atom::from(word)); } - let log = event::LOG.lock(); + let log = event::LOG.lock().unwrap(); - println!("Created {:u} atoms, logged {:u} events:", atoms.len(), log.len()); + println!("Created {} atoms, logged {} events:", atoms.len(), log.len()); for e in log.iter() { - println!("{}", e); + println!("{:?}", e); } } From 7fd2514e56612e4319090224bc3791e5866b46a3 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Mon, 2 May 2016 09:32:12 -0700 Subject: [PATCH 204/379] Update/specify dependency versions --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 292dbe9..98fe44d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,13 +31,13 @@ heap_size = ["heapsize", "heapsize_plugin"] lazy_static = "0.2" serde = ">=0.6, <0.8" phf_shared = "0.7.4" -debug_unreachable = "0.0.6" +debug_unreachable = "0.1.1" [dev-dependencies] -rand = "0" +rand = "0.3" [dependencies.rustc-serialize] -version = "0" +version = "0.3" optional = true [dependencies.heapsize] From 1a7316db9160e5bc1423687bb252c3e72819f313 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Mon, 2 May 2016 11:36:05 -0700 Subject: [PATCH 205/379] Release version 0.2.14 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 98fe44d..93c620e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.13" +version = "0.2.14" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 478c24c360947b93ae77abf535f6d9b39e0f0e3f Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Wed, 11 May 2016 16:18:46 +0200 Subject: [PATCH 206/379] Implement comparison between &Atom and &str. --- Cargo.toml | 2 +- src/atom/mod.rs | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 93c620e..1aa67e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.14" +version = "0.2.15" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index f4d087a..2136a8b 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -182,6 +182,18 @@ impl Atom { } } +impl PartialEq for Atom { + fn eq(&self, other: &str) -> bool { + &self[..] == other + } +} + +impl PartialEq for str { + fn eq(&self, other: &Atom) -> bool { + self == &other[..] + } +} + impl<'a> From> for Atom { #[inline] fn from(string_to_add: Cow<'a, str>) -> Atom { From 2acc85255007c76c5cbb5a597504e6ce34775cb9 Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Tue, 17 May 2016 15:01:23 -0700 Subject: [PATCH 207/379] Add font families to the static atoms. --- src/static_atom_list.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index ebb4965..00433fa 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -103,6 +103,13 @@ pub static ATOMS: &'static [&'static str] = &[ "Netscape", "Win32", + // Font families + "serif", + "sans-serif", + "cursive", + "fantasy", + "monospace", + "abbr", "abort", "abs", @@ -1054,7 +1061,6 @@ pub static ATOMS: &'static [&'static str] = &[ "sep", "separator", "separators", - "serif", "set", "setdiff", "shape", From c5d1e4ec9275f24c944d1d166eafa2c00fd4c2f0 Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Fri, 13 May 2016 19:49:05 -0700 Subject: [PATCH 208/379] Implement trivial BorrowedAtom, BorrowedNamespace, with_str, and eq_str_ignore_ascii_case. --- Cargo.toml | 2 +- src/atom/mod.rs | 26 +++++++++++++++++++++++++- src/lib.rs | 4 ++-- src/namespace.rs | 16 ++++++++++++++++ 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1aa67e4..4e7135e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.15" +version = "0.2.16" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 2136a8b..23ea24e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -171,6 +171,21 @@ pub struct Atom { pub data: u64, } +pub struct BorrowedAtom<'a>(pub &'a Atom); + +impl<'a> ops::Deref for BorrowedAtom<'a> { + type Target = Atom; + fn deref(&self) -> &Atom { + self.0 + } +} + +impl<'a> PartialEq for BorrowedAtom<'a> { + fn eq(&self, other: &Atom) -> bool { + self.0 == other + } +} + impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { @@ -180,6 +195,11 @@ impl Atom { pub fn get_hash(&self) -> u32 { ((self.data >> 32) ^ self.data) as u32 } + + pub fn with_str(&self, cb: F) -> Output + where F: FnOnce(&str) -> Output { + cb(self) + } } impl PartialEq for Atom { @@ -376,7 +396,11 @@ impl Atom { } pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { - (self == other) || (&**self).eq_ignore_ascii_case(&**other) + (self == other) || self.eq_str_ignore_ascii_case(&**other) + } + + pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool { + (&**self).eq_ignore_ascii_case(other) } } diff --git a/src/lib.rs b/src/lib.rs index 65ad039..7c5bdac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,8 +25,8 @@ extern crate serde; extern crate phf_shared; -pub use atom::Atom; -pub use namespace::{Namespace, QualName}; +pub use atom::{Atom, BorrowedAtom}; +pub use namespace::{BorrowedNamespace, Namespace, QualName}; #[macro_export] macro_rules! qualname { diff --git a/src/namespace.rs b/src/namespace.rs index 4b4d142..cae98dd 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -10,6 +10,7 @@ //! **Note:** This may move as string-cache becomes less Web-specific. use atom::Atom; +use std::ops; /// An atom that is meant to represent a namespace in the HTML / XML sense. /// Whether a given string represents a namespace is contextual, so this is @@ -18,6 +19,21 @@ use atom::Atom; #[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct Namespace(pub Atom); +pub struct BorrowedNamespace<'a>(pub &'a Namespace); + +impl<'a> ops::Deref for BorrowedNamespace<'a> { + type Target = Namespace; + fn deref(&self) -> &Namespace { + self.0 + } +} + +impl<'a> PartialEq for BorrowedNamespace<'a> { + fn eq(&self, other: &Namespace) -> bool { + self.0 == other + } +} + /// A name with a namespace. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] #[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] From e43cf3d5774d5e55328776cf7449df4a07466236 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 21 May 2016 18:45:51 -0400 Subject: [PATCH 209/379] Add nonce to the atom list --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 00433fa..c45ebb8 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -49,6 +49,7 @@ pub static ATOMS: &'static [&'static str] = &[ "marquee", "meta", "noframes", + "nonce", "noscript", "object", "optgroup", From 90c0c4e58dccca61f8b5b8cd8d5149d604b126e1 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 21 May 2016 18:46:13 -0400 Subject: [PATCH 210/379] Bump version to 0.2.17 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4e7135e..bd1d435 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.16" +version = "0.2.17" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From beb496756067f66e47f3c5a14db2d399fbc66270 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Tue, 24 May 2016 12:10:05 +0530 Subject: [PATCH 211/379] add onstatechange to static_atom_list --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index c45ebb8..84ff334 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -892,6 +892,7 @@ pub static ATOMS: &'static [&'static str] = &[ "onscroll", "onselect", "onselectstart", + "onstatechange", "onstart", "onstop", "onstorage", From 97b55762a83226b2640db53dcc385fbda8659874 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Tue, 24 May 2016 11:03:24 +0200 Subject: [PATCH 212/379] Update to 0.2.18 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bd1d435..bc0e534 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.17" +version = "0.2.18" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 20a2ca86b0298f2bd094dbec8853eb2ccc8e0bf2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 26 May 2016 15:39:08 +0200 Subject: [PATCH 213/379] Rename Atom::data to unsafe_data and hide it in docs. --- build.rs | 2 +- examples/summarize-events/src/main.rs | 2 +- src/atom/mod.rs | 33 ++++++++++++++------------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/build.rs b/build.rs index 7571868..65358f6 100644 --- a/build.rs +++ b/build.rs @@ -61,7 +61,7 @@ fn write_atom_macro(hash_state: &phf_generator::HashState) { writeln!(file, r"macro_rules! atom {{").unwrap(); for &s in set.iter() { let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); - writeln!(file, r"({:?}) => {{ $crate::Atom {{ data: 0x{:x} }} }};", s, data).unwrap(); + writeln!(file, r"({:?}) => {{ $crate::Atom {{ unsafe_data: 0x{:x} }} }};", s, data).unwrap(); } writeln!(file, r"}}").unwrap(); } diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index 1b1aa64..8a44389 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -88,7 +88,7 @@ fn main() { // FIXME: We really shouldn't be allowed to do this. It's a memory-safety // hazard; the field is only public for the atom!() macro. - _ => Atom { data: ev.id }.to_string(), + _ => Atom { unsafe_data: ev.id }.to_string(), }; match summary.entry(string) { diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 23ea24e..5ccf8a6 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -168,7 +168,8 @@ impl StringCache { pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. - pub data: u64, + #[doc(hidden)] + pub unsafe_data: u64, } pub struct BorrowedAtom<'a>(pub &'a Atom); @@ -189,11 +190,11 @@ impl<'a> PartialEq for BorrowedAtom<'a> { impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { - UnpackedAtom::from_packed(self.data) + UnpackedAtom::from_packed(self.unsafe_data) } pub fn get_hash(&self) -> u32 { - ((self.data >> 32) ^ self.data) as u32 + ((self.unsafe_data >> 32) ^ self.unsafe_data) as u32 } pub fn with_str(&self, cb: F) -> Output @@ -233,7 +234,7 @@ impl<'a> From> for Atom { let data = unsafe { unpacked.pack() }; log!(Event::Intern(data)); - Atom { data: data } + Atom { unsafe_data: data } } } @@ -255,7 +256,7 @@ impl Clone for Atom { #[inline(always)] fn clone(&self) -> Atom { unsafe { - match from_packed_dynamic(self.data) { + match from_packed_dynamic(self.unsafe_data) { Some(entry) => { let entry = entry as *mut StringCacheEntry; (*entry).ref_count.fetch_add(1, SeqCst); @@ -264,7 +265,7 @@ impl Clone for Atom { } } Atom { - data: self.data + unsafe_data: self.unsafe_data } } } @@ -274,11 +275,11 @@ impl Drop for Atom { fn drop(&mut self) { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - STRING_CACHE.lock().unwrap().remove(this.data); + STRING_CACHE.lock().unwrap().remove(this.unsafe_data); } unsafe { - match from_packed_dynamic(self.data) { + match from_packed_dynamic(self.unsafe_data) { Some(entry) => { let entry = entry as *mut StringCacheEntry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { @@ -300,7 +301,7 @@ impl ops::Deref for Atom { unsafe { match self.unpack() { Inline(..) => { - let buf = inline_orig_bytes(&self.data); + let buf = inline_orig_bytes(&self.unsafe_data); str::from_utf8(buf).unwrap() }, Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), @@ -338,7 +339,7 @@ impl fmt::Debug for Atom { impl PartialOrd for Atom { #[inline] fn partial_cmp(&self, other: &Atom) -> Option { - if self.data == other.data { + if self.unsafe_data == other.unsafe_data { return Some(Equal); } self.as_ref().partial_cmp(other.as_ref()) @@ -348,7 +349,7 @@ impl PartialOrd for Atom { impl Ord for Atom { #[inline] fn cmp(&self, other: &Atom) -> Ordering { - if self.data == other.data { + if self.unsafe_data == other.unsafe_data { return Equal; } self.as_ref().cmp(other.as_ref()) @@ -662,14 +663,14 @@ mod tests { #[test] fn repr() { fn check(s: &str, data: u64) { - assert_eq_fmt!("0x{:016X}", Atom::from(s).data, data); + assert_eq_fmt!("0x{:016X}", Atom::from(s).unsafe_data, data); } fn check_static(s: &str, x: Atom) { - assert_eq_fmt!("0x{:016X}", x.data, Atom::from(s).data); - assert_eq!(0x2, x.data & 0xFFFF_FFFF); + assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); + assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); // The index is unspecified by phf. - assert!((x.data >> 32) <= STATIC_ATOM_SET.iter().len() as u64); + assert!((x.unsafe_data >> 32) <= STATIC_ATOM_SET.iter().len() as u64); } // This test is here to make sure we don't change atom representation @@ -687,7 +688,7 @@ mod tests { check("xyzzy01", 0x3130_797A_7A79_7871); // Dynamic atoms. This is a pointer so we can't verify every bit. - assert_eq!(0x00, Atom::from("a dynamic string").data & 0xf); + assert_eq!(0x00, Atom::from("a dynamic string").unsafe_data & 0xf); } #[test] From 3175848b066668f6e37257084bea3578b99baa30 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 27 May 2016 23:10:58 +0200 Subject: [PATCH 214/379] Store Box instead of String --- Cargo.toml | 2 +- src/atom/mod.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bc0e534..2f248e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.18" +version = "0.2.19" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 5ccf8a6..89beb4e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -73,7 +73,7 @@ struct StringCacheEntry { next_in_bucket: Option>, hash: u64, ref_count: AtomicIsize, - string: String, + string: Box, } impl StringCacheEntry { @@ -83,7 +83,7 @@ impl StringCacheEntry { next_in_bucket: next, hash: hash, ref_count: AtomicIsize::new(1), - string: string, + string: string.into_boxed_str(), } } } @@ -102,7 +102,7 @@ impl StringCache { self.buckets[bucket_index].as_mut(); while let Some(entry) = ptr.take() { - if entry.hash == hash && entry.string == &*string { + if entry.hash == hash && &*entry.string == &*string { if entry.ref_count.fetch_add(1, SeqCst) > 0 { return &mut **entry; } @@ -696,7 +696,7 @@ mod tests { // Guard against accidental changes to the sizes of things. use std::mem; assert_eq!(if cfg!(feature = "unstable") { 8 } else { 16 }, mem::size_of::()); - assert_eq!(48, mem::size_of::()); + assert_eq!(40, mem::size_of::()); } #[test] From ede5dceb8f394ee501b7caea77ebf3a6f1f82af1 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 18 Jun 2016 23:49:55 +0800 Subject: [PATCH 215/379] Add CORS settings and referrer policy atoms to the list --- src/static_atom_list.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 84ff334..5ceff3a 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -154,6 +154,7 @@ pub static ATOMS: &'static [&'static str] = &[ "animation", "annotation", "annotation-xml", + "anonymous", "apply", "approx", "arabic-form", @@ -784,6 +785,8 @@ pub static ATOMS: &'static [&'static str] = &[ "nest", "nextid", "no message", + "no-referrer", + "no-referrer-when-downgrade", "nobr", "noembed", "nohref", @@ -908,6 +911,7 @@ pub static ATOMS: &'static [&'static str] = &[ "orient", "orientation", "origin", + "origin-when-cross-origin", "other", "otherwise", "outerproduct", @@ -1191,10 +1195,12 @@ pub static ATOMS: &'static [&'static str] = &[ "unicode-range", "union", "units-per-em", + "unsafe-url", "unselectable", "uplimit", "url", "use", + "use-credentials", "usemap", "UTF-8", "valign", From 77dc9ccb742b59e3de52d1f30676d593d1554def Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 18 Jun 2016 23:50:34 +0800 Subject: [PATCH 216/379] Bump version to 0.2.20 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2f248e3..fb6b579 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.19" +version = "0.2.20" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 09f955fd7608cf7beca1b4c5068b347b65e082f5 Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Tue, 5 Jul 2016 13:28:49 +0200 Subject: [PATCH 217/379] Add some more static atoms. --- src/static_atom_list.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 5ceff3a..ec91b3f 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -134,6 +134,7 @@ pub static ATOMS: &'static [&'static str] = &[ "alink", "alphabetic", "alt", + "alternate", "altglyph", "altGlyph", "altglyphdef", @@ -155,6 +156,7 @@ pub static ATOMS: &'static [&'static str] = &[ "annotation", "annotation-xml", "anonymous", + "apple-touch-icon", "apply", "approx", "arabic-form", @@ -1123,6 +1125,7 @@ pub static ATOMS: &'static [&'static str] = &[ "stroke-opacity", "stroke-width", "strong", + "stylesheet", "sub", "submit", "subscriptshift", From bb130c31f73388e2564d91bfb431ce3812a6693c Mon Sep 17 00:00:00 2001 From: Ms2ger Date: Tue, 5 Jul 2016 13:48:24 +0200 Subject: [PATCH 218/379] Bump to 0.2.21. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fb6b579..57c8cf8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.20" +version = "0.2.21" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From d19558f0afe0162ca289949ebe3694d9bc609ac4 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 22 Jul 2016 03:41:24 +0200 Subject: [PATCH 219/379] Implement Default and PartialEq. --- Cargo.toml | 2 +- src/atom/mod.rs | 6 ++++++ src/namespace.rs | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 57c8cf8..0ddc506 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.21" +version = "0.2.22" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 89beb4e..fd0a58e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -215,6 +215,12 @@ impl PartialEq for str { } } +impl PartialEq for Atom { + fn eq(&self, other: &String) -> bool { + &self[..] == &other[..] + } +} + impl<'a> From> for Atom { #[inline] fn from(string_to_add: Cow<'a, str>) -> Atom { diff --git a/src/namespace.rs b/src/namespace.rs index cae98dd..fe2cbae 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -19,6 +19,12 @@ use std::ops; #[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct Namespace(pub Atom); +impl Default for Namespace { + fn default() -> Self { + ns!() + } +} + pub struct BorrowedNamespace<'a>(pub &'a Namespace); impl<'a> ops::Deref for BorrowedNamespace<'a> { From 91314e0e656e2eb4ea5c2b8526dafb148c47e2dc Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 22 Jul 2016 16:08:26 +0200 Subject: [PATCH 220/379] Implement Default for Atom --- src/atom/mod.rs | 6 ++++++ src/namespace.rs | 8 +------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index fd0a58e..4878320 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -203,6 +203,12 @@ impl Atom { } } +impl Default for Atom { + fn default() -> Self { + atom!("") + } +} + impl PartialEq for Atom { fn eq(&self, other: &str) -> bool { &self[..] == other diff --git a/src/namespace.rs b/src/namespace.rs index fe2cbae..9ae836b 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -15,16 +15,10 @@ use std::ops; /// An atom that is meant to represent a namespace in the HTML / XML sense. /// Whether a given string represents a namespace is contextual, so this is /// a transparent wrapper that will not catch all mistakes. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone, Default)] #[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct Namespace(pub Atom); -impl Default for Namespace { - fn default() -> Self { - ns!() - } -} - pub struct BorrowedNamespace<'a>(pub &'a Namespace); impl<'a> ops::Deref for BorrowedNamespace<'a> { From 31dea0b1d1fa86657b321f9089cacc637cc5ea57 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 30 Jul 2016 15:26:44 +0200 Subject: [PATCH 221/379] Allow serde 0.8 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0ddc506..2608b83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ heap_size = ["heapsize", "heapsize_plugin"] [dependencies] lazy_static = "0.2" -serde = ">=0.6, <0.8" +serde = ">=0.6, <0.9" phf_shared = "0.7.4" debug_unreachable = "0.1.1" From ba9bde9c2d3c91a3b6a3319cfc84d1487d425b01 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 30 Jul 2016 15:30:39 +0200 Subject: [PATCH 222/379] Remove use of heapsize_plugin --- .travis.yml | 2 +- Cargo.toml | 6 +----- src/atom/mod.rs | 21 +++++++++++++++------ src/lib.rs | 4 +--- src/namespace.rs | 8 ++++++-- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/.travis.yml b/.travis.yml index e99d9a3..485a91f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ script: - cargo test - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features heap_size; fi" + - cargo test --features heapsize - "cd examples/event-log/ && cargo build && cd ../.." - "cd examples/summarize-events/ && cargo build && cd ../.." notifications: diff --git a/Cargo.toml b/Cargo.toml index 2608b83..3bb0269 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ log-events = ["rustc-serialize"] unstable = [] # HeapSizeOf support -heap_size = ["heapsize", "heapsize_plugin"] +heap_size = ["heapsize"] [dependencies] lazy_static = "0.2" @@ -44,10 +44,6 @@ optional = true version = ">=0.1.1, <0.4" optional = true -[dependencies.heapsize_plugin] -version = "0.1.4" -optional = true - [build-dependencies] phf_generator = "0.7.4" phf_shared = "0.7.4" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 4878320..85a6402 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -9,7 +9,7 @@ #![allow(non_upper_case_globals)] -#[cfg(feature = "heap_size")] +#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -46,7 +46,7 @@ struct StringCache { buckets: [Option>; NB_BUCKETS], } -#[cfg(feature = "heap_size")] +#[cfg(feature = "heapsize")] impl HeapSizeOf for StringCache { fn heap_size_of_children(&self) -> usize { self.buckets.iter().fold(0, |size, bucket| size + bucket.heap_size_of_children()) @@ -58,17 +58,16 @@ lazy_static! { } /// A token that represents the heap used by the dynamic string cache. -#[cfg(feature = "heap_size")] +#[cfg(feature = "heapsize")] pub struct StringCacheHeap; -#[cfg(feature = "heap_size")] +#[cfg(feature = "heapsize")] impl HeapSizeOf for StringCacheHeap { fn heap_size_of_children(&self) -> usize { STRING_CACHE.lock().unwrap().heap_size_of_children() } } -#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] struct StringCacheEntry { next_in_bucket: Option>, hash: u64, @@ -76,6 +75,14 @@ struct StringCacheEntry { string: Box, } +#[cfg(feature = "heapsize")] +impl HeapSizeOf for StringCacheEntry { + fn heap_size_of_children(&self) -> usize { + self.next_in_bucket.heap_size_of_children() + + self.string.heap_size_of_children() + } +} + impl StringCacheEntry { fn new(next: Option>, hash: u64, string: String) -> StringCacheEntry { @@ -163,7 +170,6 @@ impl StringCache { // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. #[cfg_attr(feature = "unstable", unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent -#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] #[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -172,6 +178,9 @@ pub struct Atom { pub unsafe_data: u64, } +#[cfg(feature = "heapsize")] +known_heap_size!(0, Atom); + pub struct BorrowedAtom<'a>(pub &'a Atom); impl<'a> ops::Deref for BorrowedAtom<'a> { diff --git a/src/lib.rs b/src/lib.rs index 7c5bdac..471207b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,12 +13,10 @@ #![cfg_attr(test, deny(warnings))] #![cfg_attr(all(test, feature = "unstable"), feature(test, filling_drop))] #![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag))] -#![cfg_attr(feature = "heap_size", feature(plugin, custom_derive))] -#![cfg_attr(feature = "heap_size", plugin(heapsize_plugin))] #[cfg(all(test, feature = "unstable"))] extern crate test; #[cfg(feature = "log-events")] extern crate rustc_serialize; -#[cfg(feature = "heap_size")] extern crate heapsize; +#[cfg(feature = "heapsize")] #[macro_use] extern crate heapsize; #[cfg(test)] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; diff --git a/src/namespace.rs b/src/namespace.rs index 9ae836b..6af1557 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -16,9 +16,11 @@ use std::ops; /// Whether a given string represents a namespace is contextual, so this is /// a transparent wrapper that will not catch all mistakes. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone, Default)] -#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct Namespace(pub Atom); +#[cfg(feature = "heapsize")] +known_heap_size!(0, Namespace); + pub struct BorrowedNamespace<'a>(pub &'a Namespace); impl<'a> ops::Deref for BorrowedNamespace<'a> { @@ -36,12 +38,14 @@ impl<'a> PartialEq for BorrowedNamespace<'a> { /// A name with a namespace. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] -#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct QualName { pub ns: Namespace, pub local: Atom, } +#[cfg(feature = "heapsize")] +known_heap_size!(0, QualName); + impl QualName { #[inline] pub fn new(ns: Namespace, local: Atom) -> QualName { From cc945ed2d8f45f50b8b70bf01d5937cd38fc5878 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 30 Jul 2016 15:33:08 +0200 Subject: [PATCH 223/379] Enable doctests --- Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3bb0269..3954c45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,9 +12,6 @@ build = "build.rs" [lib] name = "string_cache" -# https://github.com/rust-lang/cargo/issues/1512 -doctest = false - [features] # Enable event logging for generating benchmark traces. From 07abb7b51ad97f69f6509dd65699d23775510236 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 30 Jul 2016 15:31:05 +0200 Subject: [PATCH 224/379] Bump version to 0.2.23 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3954c45..9b5f882 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.22" +version = "0.2.23" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 9abce7858a7d9d231cc236216297e7b5c0628331 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 18 Aug 2016 14:03:52 +0200 Subject: [PATCH 225/379] Display for Namespace --- Cargo.toml | 2 +- src/namespace.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9b5f882..e29218d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.23" +version = "0.2.24" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/namespace.rs b/src/namespace.rs index 6af1557..12bd718 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -10,6 +10,7 @@ //! **Note:** This may move as string-cache becomes less Web-specific. use atom::Atom; +use std::fmt; use std::ops; /// An atom that is meant to represent a namespace in the HTML / XML sense. @@ -36,6 +37,13 @@ impl<'a> PartialEq for BorrowedNamespace<'a> { } } +impl fmt::Display for Namespace { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(&self.0, f) + } +} + /// A name with a namespace. #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] pub struct QualName { From 4c0ee074c66671cc5cdc1b883b4d8f449c53f4c2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 26 Aug 2016 14:33:24 +0200 Subject: [PATCH 226/379] Drop flags are dead, long live MIR! --- Cargo.toml | 2 +- src/atom/mod.rs | 24 +++++++++--------------- src/lib.rs | 3 +-- 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e29218d..5d707cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.24" +version = "0.2.25" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 85a6402..e3f6b8b 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -169,7 +169,6 @@ impl StringCache { // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. -#[cfg_attr(feature = "unstable", unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent #[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -714,9 +713,16 @@ mod tests { #[test] fn assert_sizes() { - // Guard against accidental changes to the sizes of things. use std::mem; - assert_eq!(if cfg!(feature = "unstable") { 8 } else { 16 }, mem::size_of::()); + struct EmptyWithDrop; + impl Drop for EmptyWithDrop { + fn drop(&mut self) {} + } + let compiler_uses_inline_drop_flags = mem::size_of::() > 0; + + // Guard against accidental changes to the sizes of things. + assert_eq!(mem::size_of::(), + if compiler_uses_inline_drop_flags { 16 } else { 8 }); assert_eq!(40, mem::size_of::()); } @@ -771,18 +777,6 @@ mod tests { let _: &str = atom.as_ref(); } - /// Atom uses #[unsafe_no_drop_flag] to stay small, so drop() may be called more than once. - /// In calls after the first one, the atom will be filled with a POST_DROP value. - /// drop() must be a no-op in this case. - #[cfg(feature = "unstable")] - #[test] - fn atom_drop_is_idempotent() { - use super::from_packed_dynamic; - unsafe { - assert_eq!(from_packed_dynamic(mem::POST_DROP_U64), None); - } - } - #[test] fn string_cache_entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); diff --git a/src/lib.rs b/src/lib.rs index 471207b..b466a77 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,8 +11,7 @@ #![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] -#![cfg_attr(all(test, feature = "unstable"), feature(test, filling_drop))] -#![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag))] +#![cfg_attr(all(test, feature = "unstable"), feature(test))] #[cfg(all(test, feature = "unstable"))] extern crate test; #[cfg(feature = "log-events")] extern crate rustc_serialize; From 38eb8f31d728c984c6a2911045931aa1c0ff4baa Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Fri, 26 Aug 2016 14:46:17 -0700 Subject: [PATCH 227/379] Add transitionend atoms to the static list --- Cargo.toml | 2 +- src/static_atom_list.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5d707cd..bfec76e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.25" +version = "0.2.26" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index ec91b3f..7f06a4a 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -902,6 +902,7 @@ pub static ATOMS: &'static [&'static str] = &[ "onstop", "onstorage", "onsubmit", + "ontransitionend", "onunload", "onzoom", "opacity", @@ -1177,8 +1178,9 @@ pub static ATOMS: &'static [&'static str] = &[ "transition-delay", "transition-duration", "transition-property", - "transitions", "transition-timing-function", + "transitionend", + "transitions", "transpose", "tref", "true", From a9e439509a81c0e669b1da5cc6c02d4b0d588939 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 3 Sep 2016 14:47:57 -0700 Subject: [PATCH 228/379] Add missing animatable property names --- src/static_atom_list.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 7f06a4a..57561e5 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -277,6 +277,7 @@ pub static ATOMS: &'static [&'static str] = &[ "border-top-width", "border-width", "bottom", + "box-shadow", "butt", "bvar", "by", @@ -501,6 +502,8 @@ pub static ATOMS: &'static [&'static str] = &[ "filterRes", "filterunits", "filterUnits", + "flex-grow", + "flex-shrink", "float", "flood", "flood-color", @@ -920,6 +923,7 @@ pub static ATOMS: &'static [&'static str] = &[ "outerproduct", "outline", "outline-color", + "outline-offset", "outline-style", "outline-width", "output", @@ -948,6 +952,7 @@ pub static ATOMS: &'static [&'static str] = &[ "patternTransform", "patternunits", "patternUnits", + "perspective", "pi", "piece", "piecewise", @@ -1157,16 +1162,18 @@ pub static ATOMS: &'static [&'static str] = &[ "tel", "tendsto", "text", - "text/plain", "text-align", "text-anchor", "text-decoration", + "text-indent", + "text-orientation", + "text-rendering", + "text-shadow", + "text/plain", "textlength", "textLength", - "text-orientation", "textpath", "textPath", - "text-rendering", "thickmathspace", "thinmathspace", "time", @@ -1175,6 +1182,7 @@ pub static ATOMS: &'static [&'static str] = &[ "top", "touchevent", "transform", + "transform-origin", "transition-delay", "transition-duration", "transition-property", @@ -1277,6 +1285,7 @@ pub static ATOMS: &'static [&'static str] = &[ "ychannelselector", "yChannelSelector", "z", + "z-index", "zoomandpan", "zoomAndPan", ]; From a64b0611633f3898e129b4bf0241aa0878450adb Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Sat, 3 Sep 2016 14:48:15 -0700 Subject: [PATCH 229/379] Version bump to 0.2.27 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bfec76e..dc0d94e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.26" +version = "0.2.27" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 5fb7cef6c6e92cdb9eeb3dce620d7a2b06c7a180 Mon Sep 17 00:00:00 2001 From: Rahul Sharma Date: Fri, 16 Sep 2016 20:20:36 +0530 Subject: [PATCH 230/379] Add activate string --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 57561e5..2519838 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -124,6 +124,7 @@ pub static ATOMS: &'static [&'static str] = &[ "acronym", "action", "actiontype", + "activate", "active", "actuate", "additive", From ca1bbf653b310b1fc6f90cb10a5fdefd180eb525 Mon Sep 17 00:00:00 2001 From: Keith Yeung Date: Fri, 16 Sep 2016 10:28:13 -0700 Subject: [PATCH 231/379] Version bump to 0.2.28 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index dc0d94e..4595af5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.27" +version = "0.2.28" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 952a6b4ab78e7ccee85ad8768bfa58c3a2db85f0 Mon Sep 17 00:00:00 2001 From: Taryn Hill Date: Sun, 18 Sep 2016 22:10:22 -0500 Subject: [PATCH 232/379] Add minlength to static_atom_list Bump to 0.2.29 --- Cargo.toml | 2 +- src/static_atom_list.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4595af5..f7d56bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.28" +version = "0.2.29" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index 2519838..c016768 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -740,6 +740,7 @@ pub static ATOMS: &'static [&'static str] = &[ "mi", "min", "min-height", + "minlength", "minsize", "minus", "min-width", From 948c453ce3e3ff2caac0cb793a3ff1e4f1180a30 Mon Sep 17 00:00:00 2001 From: "Ying-Ruei Liang(KK)" Date: Fri, 23 Sep 2016 14:58:32 +0800 Subject: [PATCH 233/379] Add "referrerpolicy" to static_atom_list --- src/static_atom_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs index c016768..f90ca6a 100644 --- a/src/static_atom_list.rs +++ b/src/static_atom_list.rs @@ -1004,6 +1004,7 @@ pub static ATOMS: &'static [&'static str] = &[ "real", "reals", "rect", + "referrerpolicy", "refx", "refX", "refy", From de67df7eb667652fcbee1c06f21242b70b32a414 Mon Sep 17 00:00:00 2001 From: "Ying-Ruei Liang(KK)" Date: Fri, 23 Sep 2016 21:04:02 +0800 Subject: [PATCH 234/379] Version bump to 0.2.30 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f7d56bf..0ece410 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.29" +version = "0.2.30" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From c3229388c8cb046c1fadf255aed3b1616aebeb98 Mon Sep 17 00:00:00 2001 From: Aidan Hobson Sayers Date: Fri, 21 Oct 2016 18:48:40 +0100 Subject: [PATCH 235/379] Make cmp massively faster for inline atoms --- src/atom/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index e3f6b8b..0182242 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -322,7 +322,7 @@ impl ops::Deref for Atom { match self.unpack() { Inline(..) => { let buf = inline_orig_bytes(&self.unsafe_data); - str::from_utf8(buf).unwrap() + str::from_utf8_unchecked(buf) }, Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), Dynamic(entry) => { From 40d8629f293c24cbbd9a3289fe796496b0b2e970 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 21:22:00 +0200 Subject: [PATCH 236/379] Make tests pass on a 32-bit system. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Testable on 64-bit Linux with: ``` rustup target add i686-unknown-linux-gnu cargo test --target i686-unknown-linux-gnu ``` (or similarly on anther 64-bit platforms), assuming a linker and libc for this target are available on the system. Leaving #162 open to add CI for this. (Unfortunately Travis doesn’t use rustup out of the box.) --- src/atom/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index e3f6b8b..6ff7e5f 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -723,7 +723,8 @@ mod tests { // Guard against accidental changes to the sizes of things. assert_eq!(mem::size_of::(), if compiler_uses_inline_drop_flags { 16 } else { 8 }); - assert_eq!(40, mem::size_of::()); + assert_eq!(mem::size_of::(), + 8 + 4 * mem::size_of::()); } #[test] From 90bdc783146149924c3f5ba0b5d3f9d68803f74d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 24 Oct 2016 17:59:53 +0200 Subject: [PATCH 237/379] Use more compact TOML syntax. --- Cargo.toml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0ece410..8ec53bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,18 +29,12 @@ lazy_static = "0.2" serde = ">=0.6, <0.9" phf_shared = "0.7.4" debug_unreachable = "0.1.1" +rustc-serialize = { version = "0.3", optional = true } +heapsize = { version = ">=0.1.1, <0.4", optional = true } [dev-dependencies] rand = "0.3" -[dependencies.rustc-serialize] -version = "0.3" -optional = true - -[dependencies.heapsize] -version = ">=0.1.1, <0.4" -optional = true - [build-dependencies] phf_generator = "0.7.4" phf_shared = "0.7.4" From 85816177ce9e2eec2c294d0c22aa630eead6e3bc Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 17:01:08 +0200 Subject: [PATCH 238/379] Breaking changes are coming. --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8ec53bc..c896ee7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.2.30" +version = "0.3.0" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -26,11 +26,11 @@ heap_size = ["heapsize"] [dependencies] lazy_static = "0.2" -serde = ">=0.6, <0.9" +serde = "0.8" phf_shared = "0.7.4" debug_unreachable = "0.1.1" rustc-serialize = { version = "0.3", optional = true } -heapsize = { version = ">=0.1.1, <0.4", optional = true } +heapsize = { version = "0.3", optional = true } [dev-dependencies] rand = "0.3" From 0c82b2c704b7b742e8844559269a8d62bc5a9455 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 24 Oct 2016 18:01:35 +0200 Subject: [PATCH 239/379] Remove BorrowedAtom. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit selectors doesn’t use it anymore. --- src/atom/mod.rs | 15 --------------- src/lib.rs | 4 ++-- src/namespace.rs | 16 ---------------- 3 files changed, 2 insertions(+), 33 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 0e8f7ef..6749db7 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -180,21 +180,6 @@ pub struct Atom { #[cfg(feature = "heapsize")] known_heap_size!(0, Atom); -pub struct BorrowedAtom<'a>(pub &'a Atom); - -impl<'a> ops::Deref for BorrowedAtom<'a> { - type Target = Atom; - fn deref(&self) -> &Atom { - self.0 - } -} - -impl<'a> PartialEq for BorrowedAtom<'a> { - fn eq(&self, other: &Atom) -> bool { - self.0 == other - } -} - impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { diff --git a/src/lib.rs b/src/lib.rs index b466a77..f6390c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,8 +22,8 @@ extern crate serde; extern crate phf_shared; -pub use atom::{Atom, BorrowedAtom}; -pub use namespace::{BorrowedNamespace, Namespace, QualName}; +pub use atom::Atom; +pub use namespace::{Namespace, QualName}; #[macro_export] macro_rules! qualname { diff --git a/src/namespace.rs b/src/namespace.rs index 12bd718..0a415fa 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -11,7 +11,6 @@ use atom::Atom; use std::fmt; -use std::ops; /// An atom that is meant to represent a namespace in the HTML / XML sense. /// Whether a given string represents a namespace is contextual, so this is @@ -22,21 +21,6 @@ pub struct Namespace(pub Atom); #[cfg(feature = "heapsize")] known_heap_size!(0, Namespace); -pub struct BorrowedNamespace<'a>(pub &'a Namespace); - -impl<'a> ops::Deref for BorrowedNamespace<'a> { - type Target = Namespace; - fn deref(&self) -> &Namespace { - self.0 - } -} - -impl<'a> PartialEq for BorrowedNamespace<'a> { - fn eq(&self, other: &Namespace) -> bool { - self.0 == other - } -} - impl fmt::Display for Namespace { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { From fef1fa6cf47deda487f76d9e95b473314941f4c4 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 24 Oct 2016 18:28:35 +0200 Subject: [PATCH 240/379] Remove macro hack rendered unneeded by `$crate`. --- src/lib.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f6390c5..febeea6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -61,13 +61,3 @@ pub mod event; pub mod atom; pub mod namespace; pub mod shared; - -// A private module so that macro-expanded idents like -// `::string_cache::atom::Atom` will also work in this crate. -// -// `libstd` uses the same trick. -#[doc(hidden)] -mod string_cache { - pub use atom; - pub use namespace; -} From dbf6ad786175c7ff3f89798c3c3f827b4e9b4760 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 24 Oct 2016 18:29:09 +0200 Subject: [PATCH 241/379] =?UTF-8?q?Remove=20namespaces.=20They=E2=80=99re?= =?UTF-8?q?=20going=20into=20htmlever.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 29 ----------------- src/namespace.rs | 81 ------------------------------------------------ 2 files changed, 110 deletions(-) delete mode 100644 src/namespace.rs diff --git a/src/lib.rs b/src/lib.rs index febeea6..44bade8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,34 +23,6 @@ extern crate serde; extern crate phf_shared; pub use atom::Atom; -pub use namespace::{Namespace, QualName}; - -#[macro_export] -macro_rules! qualname { - ("", $local:tt) => { - $crate::namespace::QualName { - ns: ns!(), - local: atom!($local), - } - }; - ($ns:tt, $local:tt) => { - $crate::namespace::QualName { - ns: ns!($ns), - local: atom!($local), - } - } -} - -#[macro_export] -macro_rules! ns { - () => { $crate::Namespace(atom!("")) }; - (html) => { $crate::Namespace(atom!("http://www.w3.org/1999/xhtml")) }; - (xml) => { $crate::Namespace(atom!("http://www.w3.org/XML/1998/namespace")) }; - (xmlns) => { $crate::Namespace(atom!("http://www.w3.org/2000/xmlns/")) }; - (xlink) => { $crate::Namespace(atom!("http://www.w3.org/1999/xlink")) }; - (svg) => { $crate::Namespace(atom!("http://www.w3.org/2000/svg")) }; - (mathml) => { $crate::Namespace(atom!("http://www.w3.org/1998/Math/MathML")) }; -} include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); @@ -59,5 +31,4 @@ include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); pub mod event; pub mod atom; -pub mod namespace; pub mod shared; diff --git a/src/namespace.rs b/src/namespace.rs deleted file mode 100644 index 0a415fa..0000000 --- a/src/namespace.rs +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! **Note:** This may move as string-cache becomes less Web-specific. - -use atom::Atom; -use std::fmt; - -/// An atom that is meant to represent a namespace in the HTML / XML sense. -/// Whether a given string represents a namespace is contextual, so this is -/// a transparent wrapper that will not catch all mistakes. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone, Default)] -pub struct Namespace(pub Atom); - -#[cfg(feature = "heapsize")] -known_heap_size!(0, Namespace); - -impl fmt::Display for Namespace { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - ::fmt(&self.0, f) - } -} - -/// A name with a namespace. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] -pub struct QualName { - pub ns: Namespace, - pub local: Atom, -} - -#[cfg(feature = "heapsize")] -known_heap_size!(0, QualName); - -impl QualName { - #[inline] - pub fn new(ns: Namespace, local: Atom) -> QualName { - QualName { - ns: ns, - local: local, - } - } -} - -#[cfg(test)] -mod tests { - use super::{Namespace, QualName}; - use Atom; - - #[test] - fn ns_macro() { - assert_eq!(ns!(), Namespace(Atom::from(""))); - - assert_eq!(ns!(html), Namespace(Atom::from("http://www.w3.org/1999/xhtml"))); - assert_eq!(ns!(xml), Namespace(Atom::from("http://www.w3.org/XML/1998/namespace"))); - assert_eq!(ns!(xmlns), Namespace(Atom::from("http://www.w3.org/2000/xmlns/"))); - assert_eq!(ns!(xlink), Namespace(Atom::from("http://www.w3.org/1999/xlink"))); - assert_eq!(ns!(svg), Namespace(Atom::from("http://www.w3.org/2000/svg"))); - assert_eq!(ns!(mathml), Namespace(Atom::from("http://www.w3.org/1998/Math/MathML"))); - } - - #[test] - fn qualname() { - assert_eq!(QualName::new(ns!(), atom!("")), - QualName { ns: ns!(), local: Atom::from("") }); - assert_eq!(QualName::new(ns!(xml), atom!("base")), - QualName { ns: ns!(xml), local: atom!("base") }); - } - - #[test] - fn qualname_macro() { - assert_eq!(qualname!("", ""), QualName { ns: ns!(), local: atom!("") }); - assert_eq!(qualname!(xml, "base"), QualName { ns: ns!(xml), local: atom!("base") }); - } -} From 135c895c7d606a1eb6e89f372669eff407d3ab60 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 24 Oct 2016 18:10:06 +0200 Subject: [PATCH 242/379] Stop using derive. Prepare for adding a type parameter that derive would require bounds on. --- src/atom/mod.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 6749db7..4469ca9 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -18,6 +18,7 @@ use std::ascii::AsciiExt; use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; +use std::hash::{Hash, Hasher}; use std::mem; use std::ops; use std::ptr; @@ -167,9 +168,6 @@ impl StringCache { } } -// NOTE: Deriving Eq here implies that a given string must always -// be interned the same way. -#[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. @@ -202,6 +200,23 @@ impl Default for Atom { } } +impl Hash for Atom { + #[inline] + fn hash(&self, state: &mut H) where H: Hasher { + self.unsafe_data.hash(state) + } +} + +impl Eq for Atom {} + +impl PartialEq for Atom { +// NOTE: This impl requires that a given string must always be interned the same way. + #[inline] + fn eq(&self, other: &Atom) -> bool { + self.unsafe_data == other.unsafe_data + } +} + impl PartialEq for Atom { fn eq(&self, other: &str) -> bool { &self[..] == other @@ -297,7 +312,6 @@ impl Drop for Atom { } } - impl ops::Deref for Atom { type Target = str; From 046b0447414297db9d54ea85e8c16002aaa32f17 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 15:05:44 +0200 Subject: [PATCH 243/379] Rename StaticAtomSet struct to PhfStrSet. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We’re about to introduce a trait name StaticAtomSet. --- build.rs | 4 ++-- src/atom/mod.rs | 2 +- src/shared.rs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/build.rs b/build.rs index 65358f6..f530b0d 100644 --- a/build.rs +++ b/build.rs @@ -33,7 +33,7 @@ fn write_static_atom_set(hash_state: &phf_generator::HashState) { macro_rules! w { ($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) } } - w!("pub static STATIC_ATOM_SET: StaticAtomSet = StaticAtomSet {{"); + w!("pub static STATIC_ATOM_SET: PhfStrSet = PhfStrSet {{"); w!(" key: {},", hash_state.key); w!(" disps: &["); for &(d1, d2) in &hash_state.disps { @@ -49,7 +49,7 @@ fn write_static_atom_set(hash_state: &phf_generator::HashState) { } fn write_atom_macro(hash_state: &phf_generator::HashState) { - let set = shared::StaticAtomSet { + let set = shared::PhfStrSet { key: hash_state.key, disps: leak(hash_state.disps.clone()), atoms: leak(hash_state.map.iter().map(|&idx| static_atom_list::ATOMS[idx]).collect()), diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 4469ca9..6d14cee 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -29,7 +29,7 @@ use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STATIC_SHIFT_BITS, - ENTRY_ALIGNMENT, pack_static, StaticAtomSet}; + ENTRY_ALIGNMENT, pack_static, PhfStrSet}; use self::UnpackedAtom::{Dynamic, Inline, Static}; #[cfg(feature = "log-events")] diff --git a/src/shared.rs b/src/shared.rs index a653872..8b9133b 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -24,13 +24,13 @@ pub fn pack_static(n: u32) -> u64 { (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) } -pub struct StaticAtomSet { +pub struct PhfStrSet { pub key: u64, pub disps: &'static [(u32, u32)], pub atoms: &'static [&'static str], } -impl StaticAtomSet { +impl PhfStrSet { #[inline] pub fn get_index_or_hash(&self, s: &str) -> Result { let hash = phf_shared::hash(s, self.key); From 367bf9f252e6a2b965f7f97bcd958b270ea66ea2 Mon Sep 17 00:00:00 2001 From: Aidan Hobson Sayers Date: Mon, 24 Oct 2016 18:41:32 +0200 Subject: [PATCH 244/379] Add a type parameter to Atom. --- build.rs | 7 +++- src/atom/bench.rs | 18 ++++++--- src/atom/mod.rs | 99 +++++++++++++++++++++++++++-------------------- 3 files changed, 77 insertions(+), 47 deletions(-) diff --git a/build.rs b/build.rs index f530b0d..6515570 100644 --- a/build.rs +++ b/build.rs @@ -61,7 +61,12 @@ fn write_atom_macro(hash_state: &phf_generator::HashState) { writeln!(file, r"macro_rules! atom {{").unwrap(); for &s in set.iter() { let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); - writeln!(file, r"({:?}) => {{ $crate::Atom {{ unsafe_data: 0x{:x} }} }};", s, data).unwrap(); + writeln!( + file, + r"({:?}) => {{ $crate::Atom {{ unsafe_data: 0x{:x}, phantom: ::std::marker::PhantomData }} }};", + s, + data + ).unwrap(); } writeln!(file, r"}}").unwrap(); } diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 96b0790..585d1c1 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -27,9 +27,17 @@ and cheap to move around, which isn't reflected in these tests. */ -use atom::Atom; +use atom::tests::Atom; use test::{Bencher, black_box}; +macro_rules! test_atom { + ($tt: tt) => {{ + // Add type annotation to help inference + let atom: Atom = atom!($tt); + atom + }} +} + // Just shorthand fn mk(x: &str) -> Atom { Atom::from(x) @@ -134,7 +142,7 @@ macro_rules! bench_all ( use std::string::ToString; use std::iter::repeat; - use atom::Atom; + use atom::tests::Atom; use atom::UnpackedAtom::{Static, Inline, Dynamic}; use super::mk; @@ -157,7 +165,7 @@ bench_all!([eq ne lt clone_string] for longer_string = super::longer_dynamic_a, super::longer_dynamic_b); bench_all!([eq ne intern as_ref clone is_static lt] - for static_atom = atom!("a"), atom!("b")); + for static_atom = test_atom!("a"), test_atom!("b")); bench_all!([intern as_ref clone is_inline] for short_inline_atom = mk("e"), mk("f")); @@ -175,10 +183,10 @@ bench_all!([intern as_ref clone is_static] for static_at_runtime = mk("a"), mk("b")); bench_all!([ne lt x_static y_inline] - for static_vs_inline = atom!("a"), mk("f")); + for static_vs_inline = test_atom!("a"), mk("f")); bench_all!([ne lt x_static y_dynamic] - for static_vs_dynamic = atom!("a"), mk(super::longer_dynamic_b)); + for static_vs_dynamic = test_atom!("a"), mk(super::longer_dynamic_b)); bench_all!([ne lt x_inline y_dynamic] for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b)); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 6d14cee..4021a81 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -19,6 +19,7 @@ use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; use std::hash::{Hash, Hasher}; +use std::marker::PhantomData; use std::mem; use std::ops; use std::ptr; @@ -168,17 +169,27 @@ impl StringCache { } } -pub struct Atom { +pub trait StaticAtomSet {} + +pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. #[doc(hidden)] pub unsafe_data: u64, + + #[doc(hidden)] + pub phantom: PhantomData, } #[cfg(feature = "heapsize")] -known_heap_size!(0, Atom); +impl HeapSizeOf for Atom { + #[inline(always)] + fn heap_size_of_children(&self) -> usize { + 0 + } +} -impl Atom { +impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { UnpackedAtom::from_packed(self.unsafe_data) @@ -194,50 +205,50 @@ impl Atom { } } -impl Default for Atom { +impl Default for Atom { fn default() -> Self { atom!("") } } -impl Hash for Atom { +impl Hash for Atom { #[inline] fn hash(&self, state: &mut H) where H: Hasher { self.unsafe_data.hash(state) } } -impl Eq for Atom {} +impl Eq for Atom {} -impl PartialEq for Atom { // NOTE: This impl requires that a given string must always be interned the same way. +impl PartialEq for Atom { #[inline] - fn eq(&self, other: &Atom) -> bool { + fn eq(&self, other: &Self) -> bool { self.unsafe_data == other.unsafe_data } } -impl PartialEq for Atom { +impl PartialEq for Atom { fn eq(&self, other: &str) -> bool { &self[..] == other } } -impl PartialEq for str { - fn eq(&self, other: &Atom) -> bool { +impl PartialEq> for str { + fn eq(&self, other: &Atom) -> bool { self == &other[..] } } -impl PartialEq for Atom { +impl PartialEq for Atom { fn eq(&self, other: &String) -> bool { &self[..] == &other[..] } } -impl<'a> From> for Atom { +impl<'a, Static: StaticAtomSet> From> for Atom { #[inline] - fn from(string_to_add: Cow<'a, str>) -> Atom { + fn from(string_to_add: Cow<'a, str>) -> Self { let unpacked = match STATIC_ATOM_SET.get_index_or_hash(&*string_to_add) { Ok(id) => Static(id as u32), Err(hash) => { @@ -254,27 +265,27 @@ impl<'a> From> for Atom { let data = unsafe { unpacked.pack() }; log!(Event::Intern(data)); - Atom { unsafe_data: data } + Atom { unsafe_data: data, phantom: PhantomData } } } -impl<'a> From<&'a str> for Atom { +impl<'a, Static: StaticAtomSet> From<&'a str> for Atom { #[inline] - fn from(string_to_add: &str) -> Atom { + fn from(string_to_add: &str) -> Self { Atom::from(Cow::Borrowed(string_to_add)) } } -impl From for Atom { +impl From for Atom { #[inline] - fn from(string_to_add: String) -> Atom { + fn from(string_to_add: String) -> Self { Atom::from(Cow::Owned(string_to_add)) } } -impl Clone for Atom { +impl Clone for Atom { #[inline(always)] - fn clone(&self) -> Atom { + fn clone(&self) -> Self { unsafe { match from_packed_dynamic(self.unsafe_data) { Some(entry) => { @@ -285,16 +296,17 @@ impl Clone for Atom { } } Atom { - unsafe_data: self.unsafe_data + unsafe_data: self.unsafe_data, + phantom: PhantomData, } } } -impl Drop for Atom { +impl Drop for Atom { #[inline] fn drop(&mut self) { // Out of line to guide inlining. - fn drop_slow(this: &mut Atom) { + fn drop_slow(this: &mut Atom) { STRING_CACHE.lock().unwrap().remove(this.unsafe_data); } @@ -312,7 +324,7 @@ impl Drop for Atom { } } -impl ops::Deref for Atom { +impl ops::Deref for Atom { type Target = str; #[inline] @@ -333,14 +345,14 @@ impl ops::Deref for Atom { } } -impl fmt::Display for Atom { +impl fmt::Display for Atom { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { ::fmt(self, f) } } -impl fmt::Debug for Atom { +impl fmt::Debug for Atom { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ty_str = unsafe { @@ -355,9 +367,9 @@ impl fmt::Debug for Atom { } } -impl PartialOrd for Atom { +impl PartialOrd for Atom { #[inline] - fn partial_cmp(&self, other: &Atom) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { if self.unsafe_data == other.unsafe_data { return Some(Equal); } @@ -365,9 +377,9 @@ impl PartialOrd for Atom { } } -impl Ord for Atom { +impl Ord for Atom { #[inline] - fn cmp(&self, other: &Atom) -> Ordering { + fn cmp(&self, other: &Self) -> Ordering { if self.unsafe_data == other.unsafe_data { return Equal; } @@ -375,21 +387,21 @@ impl Ord for Atom { } } -impl AsRef for Atom { +impl AsRef for Atom { fn as_ref(&self) -> &str { &self } } -impl Serialize for Atom { - fn serialize(&self, serializer: &mut S) -> Result<(),S::Error> where S: Serializer { +impl Serialize for Atom { + fn serialize(&self, serializer: &mut S) -> Result<(), S::Error> where S: Serializer { let string: &str = self.as_ref(); string.serialize(serializer) } } -impl Deserialize for Atom { - fn deserialize(deserializer: &mut D) -> Result where D: Deserializer { +impl Deserialize for Atom { + fn deserialize(deserializer: &mut D) -> Result where D: Deserializer { let string: String = try!(Deserialize::deserialize(deserializer)); Ok(Atom::from(&*string)) } @@ -398,8 +410,8 @@ impl Deserialize for Atom { // AsciiExt requires mutating methods, so we just implement the non-mutating ones. // We don't need to implement is_ascii because there's no performance improvement // over the one from &str. -impl Atom { - pub fn to_ascii_uppercase(&self) -> Atom { +impl Atom { + pub fn to_ascii_uppercase(&self) -> Self { if self.chars().all(char::is_uppercase) { self.clone() } else { @@ -407,7 +419,7 @@ impl Atom { } } - pub fn to_ascii_lowercase(&self) -> Atom { + pub fn to_ascii_lowercase(&self) -> Self { if self.chars().all(char::is_lowercase) { self.clone() } else { @@ -546,10 +558,15 @@ mod bench; mod tests { use std::mem; use std::thread; - use super::{Atom, StringCacheEntry, STATIC_ATOM_SET}; + use super::Atom as GenericAtom; + use super::{StaticAtomSet, StringCacheEntry, STATIC_ATOM_SET}; use super::UnpackedAtom::{Dynamic, Inline, Static}; use shared::ENTRY_ALIGNMENT; + pub type Atom = GenericAtom; + pub struct DummyStatic; + impl StaticAtomSet for DummyStatic {} + #[test] fn test_as_slice() { let s0 = Atom::from(""); @@ -720,7 +737,7 @@ mod tests { let compiler_uses_inline_drop_flags = mem::size_of::() > 0; // Guard against accidental changes to the sizes of things. - assert_eq!(mem::size_of::(), + assert_eq!(mem::size_of::(), if compiler_uses_inline_drop_flags { 16 } else { 8 }); assert_eq!(mem::size_of::(), 8 + 4 * mem::size_of::()); From 0ad43eab58e052ca56972e18de26f762ecb4233a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 15:27:13 +0200 Subject: [PATCH 245/379] Make StaticAtomSet impls provide a PhfStrSet. --- src/atom/mod.rs | 22 ++++++++++++++-------- src/lib.rs | 3 ++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 4021a81..8eea9ec 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -169,7 +169,9 @@ impl StringCache { } } -pub trait StaticAtomSet {} +pub trait StaticAtomSet { + fn get() -> &'static PhfStrSet; +} pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -249,7 +251,7 @@ impl PartialEq for Atom { impl<'a, Static: StaticAtomSet> From> for Atom { #[inline] fn from(string_to_add: Cow<'a, str>) -> Self { - let unpacked = match STATIC_ATOM_SET.get_index_or_hash(&*string_to_add) { + let unpacked = match Static::get().get_index_or_hash(&*string_to_add) { Ok(id) => Static(id as u32), Err(hash) => { let len = string_to_add.len(); @@ -335,7 +337,7 @@ impl ops::Deref for Atom { let buf = inline_orig_bytes(&self.unsafe_data); str::from_utf8_unchecked(buf) }, - Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), + Static(idx) => Static::get().index(idx).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string @@ -561,11 +563,15 @@ mod tests { use super::Atom as GenericAtom; use super::{StaticAtomSet, StringCacheEntry, STATIC_ATOM_SET}; use super::UnpackedAtom::{Dynamic, Inline, Static}; - use shared::ENTRY_ALIGNMENT; + use shared::{ENTRY_ALIGNMENT, PhfStrSet}; - pub type Atom = GenericAtom; - pub struct DummyStatic; - impl StaticAtomSet for DummyStatic {} + pub type Atom = GenericAtom; + pub struct DefaultStatic; + impl StaticAtomSet for DefaultStatic { + fn get() -> &'static PhfStrSet { + &STATIC_ATOM_SET + } + } #[test] fn test_as_slice() { @@ -706,7 +712,7 @@ mod tests { assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); // The index is unspecified by phf. - assert!((x.unsafe_data >> 32) <= STATIC_ATOM_SET.iter().len() as u64); + assert!((x.unsafe_data >> 32) <= DefaultStatic::get().iter().len() as u64); } // This test is here to make sure we don't change atom representation diff --git a/src/lib.rs b/src/lib.rs index 44bade8..60d67d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,8 @@ extern crate serde; extern crate phf_shared; -pub use atom::Atom; +pub use atom::{Atom, StaticAtomSet}; +pub use shared::PhfStrSet; include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); From c3eac36425ba12daa1b526cfc2912426921d4a3b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 15:49:18 +0200 Subject: [PATCH 246/379] Inline PhfStrSet methods. --- Cargo.toml | 1 - build.rs | 22 +++------------------- src/atom/mod.rs | 42 ++++++++++++++++++++++++++---------------- src/lib.rs | 3 +-- src/shared.rs | 31 ------------------------------- 5 files changed, 30 insertions(+), 69 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c896ee7..80266d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,4 +37,3 @@ rand = "0.3" [build-dependencies] phf_generator = "0.7.4" -phf_shared = "0.7.4" diff --git a/build.rs b/build.rs index 6515570..ea87b0f 100644 --- a/build.rs +++ b/build.rs @@ -1,4 +1,3 @@ -extern crate phf_shared; extern crate phf_generator; #[path = "src/shared.rs"] #[allow(dead_code)] mod shared; @@ -7,9 +6,7 @@ extern crate phf_generator; use std::env; use std::fs::File; use std::io::{BufWriter, Write}; -use std::mem; use std::path::Path; -use std::slice; fn main() { let hash_state = generate(); @@ -49,30 +46,17 @@ fn write_static_atom_set(hash_state: &phf_generator::HashState) { } fn write_atom_macro(hash_state: &phf_generator::HashState) { - let set = shared::PhfStrSet { - key: hash_state.key, - disps: leak(hash_state.disps.clone()), - atoms: leak(hash_state.map.iter().map(|&idx| static_atom_list::ATOMS[idx]).collect()), - }; - let path = Path::new(&env::var("OUT_DIR").unwrap()).join("atom_macro.rs"); let mut file = BufWriter::new(File::create(&path).unwrap()); writeln!(file, r"#[macro_export]").unwrap(); writeln!(file, r"macro_rules! atom {{").unwrap(); - for &s in set.iter() { - let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); + for (i, &idx) in hash_state.map.iter().enumerate() { writeln!( file, r"({:?}) => {{ $crate::Atom {{ unsafe_data: 0x{:x}, phantom: ::std::marker::PhantomData }} }};", - s, - data + static_atom_list::ATOMS[idx], + shared::pack_static(i as u32), ).unwrap(); } writeln!(file, r"}}").unwrap(); } - -fn leak(v: Vec) -> &'static [T] { - let slice = unsafe { slice::from_raw_parts(v.as_ptr(), v.len()) }; - mem::forget(v); - slice -} diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 8eea9ec..6be6cb6 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -12,6 +12,7 @@ #[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; +use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::ascii::AsciiExt; @@ -30,7 +31,7 @@ use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STATIC_SHIFT_BITS, - ENTRY_ALIGNMENT, pack_static, PhfStrSet}; + ENTRY_ALIGNMENT, pack_static}; use self::UnpackedAtom::{Dynamic, Inline, Static}; #[cfg(feature = "log-events")] @@ -173,6 +174,12 @@ pub trait StaticAtomSet { fn get() -> &'static PhfStrSet; } +pub struct PhfStrSet { + pub key: u64, + pub disps: &'static [(u32, u32)], + pub atoms: &'static [&'static str], +} + pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. @@ -251,17 +258,20 @@ impl PartialEq for Atom { impl<'a, Static: StaticAtomSet> From> for Atom { #[inline] fn from(string_to_add: Cow<'a, str>) -> Self { - let unpacked = match Static::get().get_index_or_hash(&*string_to_add) { - Ok(id) => Static(id as u32), - Err(hash) => { - let len = string_to_add.len(); - if len <= MAX_INLINE_LEN { - let mut buf: [u8; 7] = [0; 7]; - copy_memory(string_to_add.as_bytes(), &mut buf); - Inline(len as u8, buf) - } else { - Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) - } + let static_set = Static::get(); + let hash = phf_shared::hash(&*string_to_add, static_set.key); + let index = phf_shared::get_index(hash, static_set.disps, static_set.atoms.len()); + + let unpacked = if static_set.atoms[index as usize] == string_to_add { + Static(index) + } else { + let len = string_to_add.len(); + if len <= MAX_INLINE_LEN { + let mut buf: [u8; 7] = [0; 7]; + copy_memory(string_to_add.as_bytes(), &mut buf); + Inline(len as u8, buf) + } else { + Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) } }; @@ -337,7 +347,7 @@ impl ops::Deref for Atom { let buf = inline_orig_bytes(&self.unsafe_data); str::from_utf8_unchecked(buf) }, - Static(idx) => Static::get().index(idx).expect("bad static atom"), + Static(idx) => Static::get().atoms.get(idx as usize).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string @@ -561,9 +571,9 @@ mod tests { use std::mem; use std::thread; use super::Atom as GenericAtom; - use super::{StaticAtomSet, StringCacheEntry, STATIC_ATOM_SET}; + use super::{StaticAtomSet, StringCacheEntry, STATIC_ATOM_SET, PhfStrSet}; use super::UnpackedAtom::{Dynamic, Inline, Static}; - use shared::{ENTRY_ALIGNMENT, PhfStrSet}; + use shared::ENTRY_ALIGNMENT; pub type Atom = GenericAtom; pub struct DefaultStatic; @@ -712,7 +722,7 @@ mod tests { assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); // The index is unspecified by phf. - assert!((x.unsafe_data >> 32) <= DefaultStatic::get().iter().len() as u64); + assert!((x.unsafe_data >> 32) <= DefaultStatic::get().atoms.len() as u64); } // This test is here to make sure we don't change atom representation diff --git a/src/lib.rs b/src/lib.rs index 60d67d1..da777cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,8 +22,7 @@ extern crate serde; extern crate phf_shared; -pub use atom::{Atom, StaticAtomSet}; -pub use shared::PhfStrSet; +pub use atom::{Atom, StaticAtomSet, PhfStrSet}; include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); diff --git a/src/shared.rs b/src/shared.rs index 8b9133b..f69a4f4 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -7,8 +7,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use phf_shared; - // FIXME(rust-lang/rust#18153): generate these from an enum pub const DYNAMIC_TAG: u8 = 0b_00; pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble @@ -23,32 +21,3 @@ pub const STATIC_SHIFT_BITS: usize = 32; pub fn pack_static(n: u32) -> u64 { (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) } - -pub struct PhfStrSet { - pub key: u64, - pub disps: &'static [(u32, u32)], - pub atoms: &'static [&'static str], -} - -impl PhfStrSet { - #[inline] - pub fn get_index_or_hash(&self, s: &str) -> Result { - let hash = phf_shared::hash(s, self.key); - let index = phf_shared::get_index(hash, self.disps, self.atoms.len()); - if self.atoms[index as usize] == s { - Ok(index) - } else { - Err(hash) - } - } - - #[inline] - pub fn index(&self, i: u32) -> Option<&'static str> { - self.atoms.get(i as usize).map(|&s| s) - } - - #[inline] - pub fn iter(&self) -> ::std::slice::Iter<&'static str> { - self.atoms.iter() - } -} From 684191e1a1fc5a7ab5d19e6ce676636159f02cc5 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 16:46:38 +0200 Subject: [PATCH 247/379] Flatten the src/atom directory --- src/{atom/mod.rs => atom.rs} | 1 + src/{atom => }/bench.rs | 0 2 files changed, 1 insertion(+) rename src/{atom/mod.rs => atom.rs} (99%) rename src/{atom => }/bench.rs (100%) diff --git a/src/atom/mod.rs b/src/atom.rs similarity index 99% rename from src/atom/mod.rs rename to src/atom.rs index 6be6cb6..399328a 100644 --- a/src/atom/mod.rs +++ b/src/atom.rs @@ -564,6 +564,7 @@ fn copy_memory(src: &[u8], dst: &mut [u8]) { } #[cfg(all(test, feature = "unstable"))] +#[path = "bench.rs"] mod bench; #[cfg(test)] diff --git a/src/atom/bench.rs b/src/bench.rs similarity index 100% rename from src/atom/bench.rs rename to src/bench.rs From fabedb09fe03e548ad252accab7f16f3c70b34e5 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 16:56:55 +0200 Subject: [PATCH 248/379] Add EmptyStaticAtomSet and DefaultAtom. --- src/atom.rs | 18 ++++++++++++++++++ src/lib.rs | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index 399328a..14614af 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -180,6 +180,24 @@ pub struct PhfStrSet { pub atoms: &'static [&'static str], } +pub struct EmptyStaticAtomSet; + +impl StaticAtomSet for EmptyStaticAtomSet { + fn get() -> &'static PhfStrSet { + // The name is a lie: this set is not empty (it contains the empty string) + // but that’s only to avoid divisions by zero in rust-phf. + static SET: PhfStrSet = PhfStrSet { + key: 0, + disps: &[(0, 0)], + atoms: &[""], + }; + &SET + } +} + +/// Use this if you don’t care about static atoms. +pub type DefaultAtom = Atom; + pub struct Atom { /// This field is public so that the `atom!()` macro can use it. /// You should not otherwise access this field. diff --git a/src/lib.rs b/src/lib.rs index da777cf..f5ea9d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,7 @@ extern crate serde; extern crate phf_shared; -pub use atom::{Atom, StaticAtomSet, PhfStrSet}; +pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); From e9a82f7a71148328685b67edc07e6557d9cebada Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 17:12:51 +0200 Subject: [PATCH 249/379] Make examples/* crates build again. --- examples/event-log/src/main.rs | 2 +- examples/summarize-events/src/main.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/event-log/src/main.rs b/examples/event-log/src/main.rs index 89adfdf..7a25e29 100644 --- a/examples/event-log/src/main.rs +++ b/examples/event-log/src/main.rs @@ -9,7 +9,7 @@ extern crate string_cache; -use string_cache::Atom; +use string_cache::DefaultAtom as Atom; use string_cache::event; use std::io; diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index 8a44389..66773e2 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -16,10 +16,11 @@ extern crate phf_shared; #[allow(dead_code)] mod shared; -use string_cache::Atom; +use string_cache::DefaultAtom as Atom; use std::{env, cmp}; use std::collections::hash_map::{HashMap, Entry}; +use std::marker::PhantomData; use std::path::Path; #[derive(RustcDecodable, Debug)] @@ -88,7 +89,7 @@ fn main() { // FIXME: We really shouldn't be allowed to do this. It's a memory-safety // hazard; the field is only public for the atom!() macro. - _ => Atom { unsafe_data: ev.id }.to_string(), + _ => Atom { unsafe_data: ev.id, phantom: PhantomData }.to_string(), }; match summary.entry(string) { From 4a6d5f1e50631872c7221816a2582858da0c8e4a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 17:32:32 +0200 Subject: [PATCH 250/379] Link to docs.rs rather than doc.servo.org. --- Cargo.toml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 80266d8..074ef79 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" -documentation = "http://doc.servo.org/string_cache/" +documentation = "https://docs.rs/string_cache/" build = "build.rs" [lib] diff --git a/README.md b/README.md index f18a7dd..6d020ac 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,6 @@ [![Build Status](https://travis-ci.org/servo/string-cache.svg?branch=master)](https://travis-ci.org/servo/string-cache) -[Documentation](http://doc.servo.org/string_cache/) +[Documentation](https://docs.rs/string_cache/) A string interning library for Rust, developed as part of the [Servo](https://github.com/servo/servo) project. From f054f9b201dcae543ef2c955374c5ed54d291cc1 Mon Sep 17 00:00:00 2001 From: Aidan Hobson Sayers Date: Tue, 25 Oct 2016 19:53:28 +0200 Subject: [PATCH 251/379] Add and use new string-cache-codegen crate --- .travis.yml | 1 + Cargo.toml | 8 +- build.rs | 62 ++---------- examples/summarize-events/src/main.rs | 2 +- src/atom.rs | 51 +++++----- src/bench.rs | 20 ++-- src/lib.rs | 12 ++- string-cache-codegen/Cargo.toml | 16 ++++ string-cache-codegen/lib.rs | 120 ++++++++++++++++++++++++ {src => string-cache-codegen}/shared.rs | 0 10 files changed, 189 insertions(+), 103 deletions(-) create mode 100644 string-cache-codegen/Cargo.toml create mode 100644 string-cache-codegen/lib.rs rename {src => string-cache-codegen}/shared.rs (100%) diff --git a/.travis.yml b/.travis.yml index 485a91f..b7ee145 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,7 @@ script: - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - cargo test --features heapsize + - "cd string-cache-codegen/ && cargo build && cd .." - "cd examples/event-log/ && cargo build && cd ../.." - "cd examples/summarize-events/ && cargo build && cd ../.." notifications: diff --git a/Cargo.toml b/Cargo.toml index 074ef79..8443c73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,12 @@ repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache/" build = "build.rs" +# Do not `exclude` ./string-cache-codegen because we want to include +# ./string-cache-codegen/shared.rs, and `include` is a pain to use +# (It has to be exhaustive.) +# This means that packages for this crate include some unused files, +# but they’re not too big so that shouldn’t be a problem. + [lib] name = "string_cache" @@ -36,4 +42,4 @@ heapsize = { version = "0.3", optional = true } rand = "0.3" [build-dependencies] -phf_generator = "0.7.4" +string_cache_codegen = { version = "0.3", path = "./string-cache-codegen" } diff --git a/build.rs b/build.rs index ea87b0f..8cd89df 100644 --- a/build.rs +++ b/build.rs @@ -1,62 +1,14 @@ -extern crate phf_generator; +extern crate string_cache_codegen; -#[path = "src/shared.rs"] #[allow(dead_code)] mod shared; -#[path = "src/static_atom_list.rs"] mod static_atom_list; +#[path = "src/static_atom_list.rs"] +mod static_atom_list; use std::env; -use std::fs::File; -use std::io::{BufWriter, Write}; use std::path::Path; fn main() { - let hash_state = generate(); - write_static_atom_set(&hash_state); - write_atom_macro(&hash_state); -} - -fn generate() -> phf_generator::HashState { - let mut set = std::collections::HashSet::new(); - for atom in static_atom_list::ATOMS { - if !set.insert(atom) { - panic!("duplicate static atom `{:?}`", atom); - } - } - phf_generator::generate_hash(static_atom_list::ATOMS) -} - -fn write_static_atom_set(hash_state: &phf_generator::HashState) { - let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs"); - let mut file = BufWriter::new(File::create(&path).unwrap()); - macro_rules! w { - ($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) } - } - w!("pub static STATIC_ATOM_SET: PhfStrSet = PhfStrSet {{"); - w!(" key: {},", hash_state.key); - w!(" disps: &["); - for &(d1, d2) in &hash_state.disps { - w!(" ({}, {}),", d1, d2); - } - w!(" ],"); - w!(" atoms: &["); - for &idx in &hash_state.map { - w!(" {:?},", static_atom_list::ATOMS[idx]); - } - w!(" ],"); - w!("}};"); -} - -fn write_atom_macro(hash_state: &phf_generator::HashState) { - let path = Path::new(&env::var("OUT_DIR").unwrap()).join("atom_macro.rs"); - let mut file = BufWriter::new(File::create(&path).unwrap()); - writeln!(file, r"#[macro_export]").unwrap(); - writeln!(file, r"macro_rules! atom {{").unwrap(); - for (i, &idx) in hash_state.map.iter().enumerate() { - writeln!( - file, - r"({:?}) => {{ $crate::Atom {{ unsafe_data: 0x{:x}, phantom: ::std::marker::PhantomData }} }};", - static_atom_list::ATOMS[idx], - shared::pack_static(i as u32), - ).unwrap(); - } - writeln!(file, r"}}").unwrap(); + string_cache_codegen::AtomType::new("atom::tests::TestAtom", "test_atom!") + .atoms(static_atom_list::ATOMS) + .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs")) + .unwrap() } diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index 66773e2..70ab6be 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -12,7 +12,7 @@ extern crate string_cache; extern crate rustc_serialize; extern crate phf_shared; -#[path = "../../../src/shared.rs"] +#[path = "../../../string-cache-codegen/shared.rs"] #[allow(dead_code)] mod shared; diff --git a/src/atom.rs b/src/atom.rs index 14614af..64124cb 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -37,8 +37,6 @@ use self::UnpackedAtom::{Dynamic, Inline, Static}; #[cfg(feature = "log-events")] use event::Event; -include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs")); - #[cfg(not(feature = "log-events"))] macro_rules! log (($e:expr) => (())); @@ -199,7 +197,7 @@ impl StaticAtomSet for EmptyStaticAtomSet { pub type DefaultAtom = Atom; pub struct Atom { - /// This field is public so that the `atom!()` macro can use it. + /// This field is public so that the `atom!()` macros can use it. /// You should not otherwise access this field. #[doc(hidden)] pub unsafe_data: u64, @@ -234,7 +232,7 @@ impl Atom { impl Default for Atom { fn default() -> Self { - atom!("") + Self::from("") } } @@ -581,26 +579,17 @@ fn copy_memory(src: &[u8], dst: &mut [u8]) { } } -#[cfg(all(test, feature = "unstable"))] -#[path = "bench.rs"] -mod bench; - #[cfg(test)] +#[macro_use] mod tests { use std::mem; use std::thread; - use super::Atom as GenericAtom; - use super::{StaticAtomSet, StringCacheEntry, STATIC_ATOM_SET, PhfStrSet}; + use super::{StaticAtomSet, StringCacheEntry}; use super::UnpackedAtom::{Dynamic, Inline, Static}; use shared::ENTRY_ALIGNMENT; - pub type Atom = GenericAtom; - pub struct DefaultStatic; - impl StaticAtomSet for DefaultStatic { - fn get() -> &'static PhfStrSet { - &STATIC_ATOM_SET - } - } + include!(concat!(env!("OUT_DIR"), "/test_atom.rs")); + pub type Atom = TestAtom; #[test] fn test_as_slice() { @@ -741,7 +730,7 @@ mod tests { assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); // The index is unspecified by phf. - assert!((x.unsafe_data >> 32) <= DefaultStatic::get().atoms.len() as u64); + assert!((x.unsafe_data >> 32) <= TestAtomStaticSet::get().atoms.len() as u64); } // This test is here to make sure we don't change atom representation @@ -749,9 +738,9 @@ mod tests { // static atom table, the tag values, etc. // Static atoms - check_static("a", atom!("a")); - check_static("address", atom!("address")); - check_static("area", atom!("area")); + check_static("a", test_atom!("a")); + check_static("address", test_atom!("address")); + check_static("area", test_atom!("area")); // Inline atoms check("e", 0x0000_0000_0000_6511); @@ -790,27 +779,27 @@ mod tests { #[test] fn atom_macro() { - assert_eq!(atom!("body"), Atom::from("body")); - assert_eq!(atom!("font-weight"), Atom::from("font-weight")); + assert_eq!(test_atom!("body"), Atom::from("body")); + assert_eq!(test_atom!("font-weight"), Atom::from("font-weight")); } #[test] fn match_atom() { assert_eq!(2, match Atom::from("head") { - atom!("br") => 1, - atom!("html") | atom!("head") => 2, + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, _ => 3, }); assert_eq!(3, match Atom::from("body") { - atom!("br") => 1, - atom!("html") | atom!("head") => 2, + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, _ => 3, }); assert_eq!(3, match Atom::from("zzzzzz") { - atom!("br") => 1, - atom!("html") | atom!("head") => 2, + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, _ => 3, }); } @@ -869,3 +858,7 @@ mod tests { assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); } } + +#[cfg(all(test, feature = "unstable"))] +#[path = "bench.rs"] +mod bench; diff --git a/src/bench.rs b/src/bench.rs index 585d1c1..f6f5248 100644 --- a/src/bench.rs +++ b/src/bench.rs @@ -27,20 +27,12 @@ and cheap to move around, which isn't reflected in these tests. */ -use atom::tests::Atom; +use atom::tests::TestAtom; use test::{Bencher, black_box}; -macro_rules! test_atom { - ($tt: tt) => {{ - // Add type annotation to help inference - let atom: Atom = atom!($tt); - atom - }} -} - // Just shorthand -fn mk(x: &str) -> Atom { - Atom::from(x) +fn mk(x: &str) -> TestAtom { + TestAtom::from(x) } macro_rules! check_type (($name:ident, $x:expr, $p:pat) => ( @@ -89,7 +81,7 @@ macro_rules! bench_one ( fn intern(b: &mut Bencher) { let x = $x.to_string(); b.iter(|| { - black_box(Atom::from(&*x)); + black_box(TestAtom::from(&*x)); }); } ); @@ -142,7 +134,7 @@ macro_rules! bench_all ( use std::string::ToString; use std::iter::repeat; - use atom::tests::Atom; + use atom::tests::TestAtom; use atom::UnpackedAtom::{Static, Inline, Dynamic}; use super::mk; @@ -213,7 +205,7 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( *n = (*n % 0x40) + 0x20; } let s = str::from_utf8(&buf[..]).unwrap(); - black_box(Atom::from(s)); + black_box(TestAtom::from(s)); }); } )); diff --git a/src/lib.rs b/src/lib.rs index f5ea9d4..ed72634 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,11 +24,17 @@ extern crate phf_shared; pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; -include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); - #[cfg(feature = "log-events")] #[macro_use] pub mod event; pub mod atom; -pub mod shared; + +#[path = "../string-cache-codegen/shared.rs"] +mod shared; + +// Make test_atom! macro work in this crate. +// `$crate` would not be appropriate for other crates creating such macros +mod string_cache { + pub use {Atom, StaticAtomSet, PhfStrSet}; +} diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml new file mode 100644 index 0000000..7e721a2 --- /dev/null +++ b/string-cache-codegen/Cargo.toml @@ -0,0 +1,16 @@ +[package] + +name = "string_cache_codegen" +version = "0.3.0" +authors = [ "The Servo Project Developers" ] +description = "A codegen library for string-cache, developed as part of the Servo project." +license = "MIT / Apache-2.0" +repository = "https://github.com/servo/string-cache" +documentation = "https://docs.rs/string_cache_codegen/" + +[lib] +name = "string_cache_codegen" +path = "lib.rs" + +[dependencies] +phf_generator = "0.7.15" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs new file mode 100644 index 0000000..c0ac564 --- /dev/null +++ b/string-cache-codegen/lib.rs @@ -0,0 +1,120 @@ +// Copyright 2016 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate phf_generator; + +use std::collections::HashSet; +use std::fs::File; +use std::io::{self, Write, BufWriter}; +use std::path::Path; + +#[allow(dead_code)] +mod shared; + +/// A builder for a static atom set and relevant macros +pub struct AtomType { + path: String, + macro_name: String, + atoms: HashSet, +} + +impl AtomType { + /// Constructs a new static atom set builder + /// + /// `path` is a path within a crate of the atom type that will be created. + /// e.g. `"FooAtom"` at the crate root or `"foo::Atom"` if the generated code + /// is included in a `foo` module. + /// + /// `macro_name` must end with `!`. + /// + /// For example, `AtomType::new("foo::FooAtom", "foo_atom!")` will generate: + /// + /// ```rust + /// pub type FooAtom = ::string_cache::Atom; + /// pub struct FooAtomStaticSet; + /// impl ::string_cache::StaticAtomSet for FooAtomStaticSet { + /// // ... + /// } + /// #[macro_export] + /// macro_rules foo_atom { + /// // Expands to: $crate::foo::FooAtom { … } + /// } + pub fn new(path: &str, macro_name: &str) -> Self { + let mut set = HashSet::new(); + set.insert(String::new()); // rust-phf requires a non-empty set + assert!(macro_name.ends_with('!')); + AtomType { + path: path.to_owned(), + macro_name: macro_name[..macro_name.len() - 1].to_owned(), + atoms: set, + } + } + + /// Adds an atom to the builder + pub fn atom(&mut self, s: &str) -> &mut Self { + self.atoms.insert(s.to_owned()); + self + } + + /// Adds multiple atoms to the builder + pub fn atoms(&mut self, iter: I) -> &mut Self + where I: IntoIterator, I::Item: AsRef { + self.atoms.extend(iter.into_iter().map(|s| s.as_ref().to_owned())); + self + } + + /// Write generated code to `destination`. + pub fn write_to(&self, mut destination: W) -> io::Result<()> where W: Write { + let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); + let hash_state = phf_generator::generate_hash(&atoms); + let atoms: Vec<&str> = hash_state.map.iter().map(|&idx| atoms[idx]).collect(); + + let type_name = if let Some(position) = self.path.rfind("::") { + &self.path[position + "::".len() ..] + } else { + &self.path + }; + + macro_rules! w { + ($($arg: expr),+) => { try!(writeln!(destination, $($arg),+)) } + } + + w!("pub type {} = ::string_cache::Atom<{}StaticSet>;", type_name, type_name); + w!("pub struct {}StaticSet;", type_name); + w!("impl ::string_cache::StaticAtomSet for {}StaticSet {{", type_name); + w!(" fn get() -> &'static ::string_cache::PhfStrSet {{"); + w!(" static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet {{"); + w!(" key: {:#?},", hash_state.key); + w!(" disps: &{:?},", hash_state.disps); + w!(" atoms: &{:#?},", atoms); + w!(" }};"); + w!(" &SET"); + w!(" }}"); + w!("}}"); + w!("#[macro_export]"); + w!("macro_rules! {} {{", self.macro_name); + for (i, atom) in atoms.iter().enumerate() { + w!("({:?}) => {{ $crate::{} {{ unsafe_data: 0x{:x}, phantom: ::std::marker::PhantomData }} }};", + atom, + self.path, + shared::pack_static(i as u32) + ); + } + w!("}}"); + Ok(()) + } + + /// Create a new file at `path` and write generated code there. + /// + /// Typical usage: + /// `.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))` + pub fn write_to_file(&self, path: &Path) -> io::Result<()> { + self.write_to(BufWriter::new(try!(File::create(path)))) + } +} diff --git a/src/shared.rs b/string-cache-codegen/shared.rs similarity index 100% rename from src/shared.rs rename to string-cache-codegen/shared.rs From 570fea040ab3c964add1b2c74916c4c49dd7610d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 20:01:31 +0200 Subject: [PATCH 252/379] Remove the big static atom list. Fix #22 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s going into various atom types in html5ever and Servo. --- build.rs | 7 +- src/static_atom_list.rs | 1294 --------------------------------------- 2 files changed, 3 insertions(+), 1298 deletions(-) delete mode 100644 src/static_atom_list.rs diff --git a/build.rs b/build.rs index 8cd89df..4b0bb9e 100644 --- a/build.rs +++ b/build.rs @@ -1,14 +1,13 @@ extern crate string_cache_codegen; -#[path = "src/static_atom_list.rs"] -mod static_atom_list; - use std::env; use std::path::Path; fn main() { string_cache_codegen::AtomType::new("atom::tests::TestAtom", "test_atom!") - .atoms(static_atom_list::ATOMS) + .atoms(&[ + "a", "b", "address", "area", "body", "font-weight", "br", "html", "head", "id", + ]) .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs")) .unwrap() } diff --git a/src/static_atom_list.rs b/src/static_atom_list.rs deleted file mode 100644 index f90ca6a..0000000 --- a/src/static_atom_list.rs +++ /dev/null @@ -1,1294 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -pub static ATOMS: &'static [&'static str] = &[ - - // The order is not preserved by phf. - - "a", - "address", - "applet", - "area", - "article", - "aside", - "b", - "base", - "basefont", - "bgsound", - "big", - "blockquote", - "body", - "br", - "button", - "caption", - "col", - "colgroup", - "dd", - "dt", - "embed", - "form", - "frame", - "frameset", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "head", - "html", - "input", - "li", - "link", - "marquee", - "meta", - "noframes", - "nonce", - "noscript", - "object", - "optgroup", - "option", - "param", - "plaintext", - "pre", - "preload", - "rp", - "rt", - "script", - "select", - "source", - "style", - "svg", - "table", - "tbody", - "td", - "template", - "textarea", - "tfoot", - "th", - "thead", - "title", - "tr", - "track", - "xmp", - - "", - - "*", - - // XML namespaces known to the HTML syntax spec - "http://www.w3.org/1999/xhtml", - "http://www.w3.org/XML/1998/namespace", - "http://www.w3.org/2000/xmlns/", - "http://www.w3.org/1999/xlink", - "http://www.w3.org/2000/svg", - "http://www.w3.org/1998/Math/MathML", - - "#text", - "#comment", - "#document", - "#document-fragment", - - // User agent strings - "4.0", - "Gecko", - "Linux", - "Mac", - "Mozilla", - "Netscape", - "Win32", - - // Font families - "serif", - "sans-serif", - "cursive", - "fantasy", - "monospace", - - "abbr", - "abort", - "abs", - "accent", - "accent-height", - "accentunder", - "accept", - "accept-charset", - "accesskey", - "accumulate", - "acronym", - "action", - "actiontype", - "activate", - "active", - "actuate", - "additive", - "afterscriptexecute", - "align", - "alignment-baseline", - "alignmentscope", - "alink", - "alphabetic", - "alt", - "alternate", - "altglyph", - "altGlyph", - "altglyphdef", - "altGlyphDef", - "altglyphitem", - "altGlyphItem", - "altimg", - "alttext", - "amplitude", - "and", - "animate", - "animatecolor", - "animateColor", - "animatemotion", - "animateMotion", - "animatetransform", - "animateTransform", - "animation", - "annotation", - "annotation-xml", - "anonymous", - "apple-touch-icon", - "apply", - "approx", - "arabic-form", - "arccos", - "arccosh", - "arccot", - "arccoth", - "arccsc", - "arccsch", - "archive", - "arcrole", - "arcsec", - "arcsech", - "arcsin", - "arcsinh", - "arctan", - "arctanh", - "arg", - "aria-activedescendant", - "aria-atomic", - "aria-autocomplete", - "aria-busy", - "aria-channel", - "aria-checked", - "aria-controls", - "aria-datatype", - "aria-describedby", - "aria-disabled", - "aria-dropeffect", - "aria-expanded", - "aria-flowto", - "aria-grab", - "aria-haspopup", - "aria-hidden", - "aria-invalid", - "aria-labelledby", - "aria-level", - "aria-live", - "aria-multiline", - "aria-multiselectable", - "aria-owns", - "aria-posinset", - "aria-pressed", - "aria-readonly", - "aria-relevant", - "aria-required", - "aria-secret", - "aria-selected", - "aria-setsize", - "aria-sort", - "aria-templateid", - "aria-valuemax", - "aria-valuemin", - "aria-valuenow", - "ascent", - "async", - "attributename", - "attributeName", - "attributetype", - "attributeType", - "audio", - "autocomplete", - "autofocus", - "autoplay", - "autosubmit", - "axis", - "azimuth", - "background", - "background-attachment", - "background-clip", - "background-color", - "background-image", - "background-origin", - "background-position", - "background-repeat", - "background-size", - "basefrequency", - "baseFrequency", - "baseline", - "baseline-shift", - "baseprofile", - "baseProfile", - "bbox", - "bdi", - "bdo", - "beforescriptexecute", - "beforeunload", - "begin", - "bevel", - "bevelled", - "bgcolor", - "bias", - "blink", - "blob", - "border", - "border-bottom", - "border-bottom-color", - "border-bottom-left-radius", - "border-bottom-right-radius", - "border-bottom-style", - "border-bottom-width", - "border-color", - "border-left", - "border-left-color", - "border-left-style", - "border-left-width", - "border-radius", - "border-right", - "border-right-color", - "border-right-style", - "border-right-width", - "border-style", - "border-top", - "border-top-color", - "border-top-left-radius", - "border-top-right-radius", - "border-top-style", - "border-top-width", - "border-width", - "bottom", - "box-shadow", - "butt", - "bvar", - "by", - "calcmode", - "calcMode", - "canvas", - "cap-height", - "card", - "cartesianproduct", - "ceiling", - "cellpadding", - "cellspacing", - "center", - "change", - "char", - "charoff", - "charset", - "checkbox", - "checked", - "ci", - "circle", - "cite", - "class", - "classid", - "clear", - "click", - "clip", - "clip-path", - "clippath", - "clipPath", - "clippathunits", - "clipPathUnits", - "clip-rule", - "close", - "closure", - "cn", - "code", - "codebase", - "codetype", - "codomain", - "color", - "color-interpolation", - "color-interpolation-filters", - "color-profile", - "color-rendering", - "cols", - "colspan", - "columnalign", - "column-count", - "columnlines", - "columns", - "columnspacing", - "columnspan", - "column-width", - "columnwidth", - "compact", - "complexes", - "compose", - "condition", - "conjugate", - "content", - "contenteditable", - "contentscripttype", - "contentScriptType", - "contentstyletype", - "contentStyleType", - "contextmenu", - "controls", - "coords", - "cos", - "cosh", - "cot", - "coth", - "crossorigin", - "csc", - "csch", - "csymbol", - "curl", - "cursor", - "customevent", - "cx", - "cy", - "d", - "data", - "datafld", - "dataformatas", - "datalist", - "datasrc", - "datatemplate", - "date", - "datetime", - "datetime-local", - "declare", - "default", - "defer", - "definition-src", - "definitionurl", - "definitionURL", - "defs", - "degree", - "del", - "depth", - "desc", - "descent", - "details", - "determinant", - "dfn", - "dialog", - "diff", - "diffuseconstant", - "diffuseConstant", - "dir", - "direction", - "dirname", - "disabled", - "discard", - "display", - "displaystyle", - "div", - "divergence", - "divide", - "divisor", - "dl", - "domain", - "domainofapplication", - "DOMContentLoaded", - "dominant-baseline", - "draggable", - "dur", - "dx", - "dy", - "edge", - "edgemode", - "edgeMode", - "elevation", - "ellipse", - "em", - "email", - "emptyset", - "enable-background", - "encoding", - "enctype", - "end", - "eq", - "equalcolumns", - "equalrows", - "equivalent", - "error", - "eulergamma", - "event", - "events", - "exists", - "exp", - "exponent", - "exponentiale", - "externalresourcesrequired", - "externalResourcesRequired", - "face", - "factorial", - "factorof", - "false", - "feblend", - "feBlend", - "fecolormatrix", - "feColorMatrix", - "fecomponenttransfer", - "feComponentTransfer", - "fecomposite", - "feComposite", - "feconvolvematrix", - "feConvolveMatrix", - "fediffuselighting", - "feDiffuseLighting", - "fedisplacementmap", - "feDisplacementMap", - "fedistantlight", - "feDistantLight", - "fedropshadow", - "feDropShadow", - "feflood", - "feFlood", - "fefunca", - "feFuncA", - "fefuncb", - "feFuncB", - "fefuncg", - "feFuncG", - "fefuncr", - "feFuncR", - "fegaussianblur", - "feGaussianBlur", - "feimage", - "feImage", - "femerge", - "feMerge", - "femergenode", - "feMergeNode", - "femorphology", - "feMorphology", - "fence", - "feoffset", - "feOffset", - "fepointlight", - "fePointLight", - "fespecularlighting", - "feSpecularLighting", - "fespotlight", - "feSpotLight", - "fetile", - "feTile", - "feturbulence", - "feTurbulence", - "fieldset", - "figcaption", - "figure", - "file", - "fill", - "fill-opacity", - "fill-rule", - "filter", - "filterres", - "filterRes", - "filterunits", - "filterUnits", - "flex-grow", - "flex-shrink", - "float", - "flood", - "flood-color", - "flood-opacity", - "floor", - "fn", - "font", - "font-face", - "font-face-format", - "font-face-name", - "font-face-src", - "font-face-uri", - "font-family", - "fontfamily", - "font-size", - "fontsize", - "font-size-adjust", - "font-stretch", - "font-style", - "fontstyle", - "font-variant", - "font-weight", - "fontweight", - "footer", - "for", - "forall", - "foreignobject", - "foreignObject", - "formaction", - "format", - "formenctype", - "formmethod", - "formnovalidate", - "formtarget", - "frameborder", - "framespacing", - "from", - "fx", - "fy", - "g", - "g1", - "g2", - "gcd", - "geq", - "glyph", - "glyph-name", - "glyph-orientation-horizontal", - "glyph-orientation-vertical", - "glyphref", - "glyphRef", - "grad", - "gradienttransform", - "gradientTransform", - "gradientunits", - "gradientUnits", - "groupalign", - "gt", - "handler", - "hanging", - "header", - "headers", - "height", - "hgroup", - "hidden", - "hidefocus", - "high", - "hkern", - "horiz-adv-x", - "horiz-origin-x", - "horiz-origin-y", - "hr", - "href", - "hreflang", - "hspace", - "htmlevents", - "http-equiv", - "i", - "icon", - "id", - "ident", - "ideographic", - "iframe", - "image", - "image-rendering", - "imaginary", - "imaginaryi", - "img", - "implies", - "important", - "in", - "in2", - "index", - "infinity", - "inputmode", - "ins", - "int", - "integers", - "intercept", - "intersect", - "interval", - "invalid", - "inverse", - "irrelevant", - "isindex", - "ismap", - "k", - "k1", - "k2", - "k3", - "k4", - "kbd", - "kernelmatrix", - "kernelMatrix", - "kernelunitlength", - "kernelUnitLength", - "kerning", - "keyboardevent", - "keydown", - "keyevents", - "keygen", - "keypoints", - "keyPoints", - "keypress", - "keysplines", - "keySplines", - "keytimes", - "keyTimes", - "keyup", - "label", - "lambda", - "lang", - "language", - "laplacian", - "largeop", - "lcm", - "left", - "legend", - "lengthadjust", - "lengthAdjust", - "leq", - "letter-spacing", - "lighting-color", - "limit", - "limitingconeangle", - "limitingConeAngle", - "line", - "lineargradient", - "linearGradient", - "linebreak", - "line-height", - "linethickness", - "list", - "listener", - "listing", - "list-style", - "list-style-image", - "list-style-position", - "list-style-type", - "ln", - "load", - "loadstart", - "loadend", - "local", - "log", - "logbase", - "longdesc", - "loop", - "low", - "lowlimit", - "lowsrc", - "lquote", - "lspace", - "lt", - "macros", - "maction", - "main", - "maligngroup", - "malignmark", - "manifest", - "map", - "margin", - "margin-bottom", - "marginheight", - "margin-left", - "margin-right", - "margin-top", - "marginwidth", - "mark", - "marker", - "marker-end", - "markerheight", - "markerHeight", - "marker-mid", - "marker-start", - "markerunits", - "markerUnits", - "markerwidth", - "markerWidth", - "mask", - "maskcontentunits", - "maskContentUnits", - "maskunits", - "maskUnits", - "math", - "mathbackground", - "mathcolor", - "mathematical", - "mathsize", - "mathvariant", - "matrix", - "matrixrow", - "max", - "max-height", - "maxlength", - "maxsize", - "max-width", - "mean", - "media", - "median", - "mediummathspace", - "menclose", - "menu", - "menuitem", - "merror", - "message", - "messageevent", - "metadata", - "meter", - "method", - "mfenced", - "mfrac", - "mglyph", - "mi", - "min", - "min-height", - "minlength", - "minsize", - "minus", - "min-width", - "missing-glyph", - "miter", - "mlabeledtr", - "mmultiscripts", - "mn", - "mo", - "mode", - "moment", - "momentabout", - "month", - "mousedown", - "mouseevent", - "mouseevents", - "mouseover", - "mouseup", - "movablelimits", - "mover", - "mozbrowser", - "mpadded", - "mpath", - "mphantom", - "mprescripts", - "mroot", - "mrow", - "ms", - "mspace", - "msqrt", - "mstyle", - "msub", - "msubsup", - "msup", - "mtable", - "mtd", - "mtext", - "mtr", - "multicol", - "multipart/form-data", - "multiple", - "munder", - "munderover", - "name", - "nargs", - "naturalnumbers", - "nav", - "neq", - "nest", - "nextid", - "no message", - "no-referrer", - "no-referrer-when-downgrade", - "nobr", - "noembed", - "nohref", - "none", - "noresize", - "noshade", - "not", - "notanumber", - "notation", - "notin", - "notprsubset", - "notsubset", - "novalidate", - "nowrap", - "number", - "numoctaves", - "numOctaves", - "occurrence", - "off", - "offset", - "ol", - "on", - "onabort", - "onactivate", - "onafterprint", - "onafterupdate", - "onbefordeactivate", - "onbeforeactivate", - "onbeforecopy", - "onbeforecut", - "onbeforeeditfocus", - "onbeforepaste", - "onbeforeprint", - "onbeforeunload", - "onbeforeupdate", - "onbegin", - "onblur", - "onbounce", - "oncellchange", - "onchange", - "onclick", - "oncontextmenu", - "oncontrolselect", - "oncopy", - "oncut", - "ondataavailable", - "ondatasetchanged", - "ondatasetcomplete", - "ondblclick", - "ondeactivate", - "ondrag", - "ondragdrop", - "ondragend", - "ondragenter", - "ondragleave", - "ondragover", - "ondragstart", - "ondrop", - "onend", - "onerror", - "onerrorupdate", - "onfilterchange", - "onfinish", - "onfocus", - "onfocusin", - "onfocusout", - "onformchange", - "onforminput", - "onhashchange", - "onhelp", - "oninput", - "oninvalid", - "onkeydown", - "onkeypress", - "onkeyup", - "onlanguagechange", - "onload", - "onlosecapture", - "onmessage", - "onmousedown", - "onmouseenter", - "onmouseleave", - "onmousemove", - "onmouseout", - "onmouseover", - "onmouseup", - "onmousewheel", - "onmove", - "onmoveend", - "onmovestart", - "onoffline", - "ononline", - "onpagehide", - "onpageshow", - "onpaste", - "onpopstate", - "onpropertychange", - "onreadystatechange", - "onrepeat", - "onreset", - "onresize", - "onrowenter", - "onrowexit", - "onrowsdelete", - "onrowsinserted", - "onscroll", - "onselect", - "onselectstart", - "onstatechange", - "onstart", - "onstop", - "onstorage", - "onsubmit", - "ontransitionend", - "onunload", - "onzoom", - "opacity", - "open", - "operator", - "optimum", - "or", - "order", - "orient", - "orientation", - "origin", - "origin-when-cross-origin", - "other", - "otherwise", - "outerproduct", - "outline", - "outline-color", - "outline-offset", - "outline-style", - "outline-width", - "output", - "overflow", - "overflow-wrap", - "overflow-x", - "overflow-y", - "overline-position", - "overline-thickness", - "p", - "padding", - "padding-bottom", - "padding-left", - "padding-right", - "padding-top", - "panose-1", - "partialdiff", - "password", - "path", - "pathlength", - "pathLength", - "pattern", - "patterncontentunits", - "patternContentUnits", - "patterntransform", - "patternTransform", - "patternunits", - "patternUnits", - "perspective", - "pi", - "piece", - "piecewise", - "ping", - "placeholder", - "plus", - "pointer-events", - "points", - "pointsatx", - "pointsAtX", - "pointsaty", - "pointsAtY", - "pointsatz", - "pointsAtZ", - "polygon", - "polyline", - "position", - "post", - "poster", - "power", - "prefetch", - "preservealpha", - "preserveAlpha", - "preserveaspectratio", - "preserveAspectRatio", - "primes", - "primitiveunits", - "primitiveUnits", - "product", - "profile", - "progress", - "prompt", - "prsubset", - "q", - "quotient", - "r", - "radialgradient", - "radialGradient", - "radio", - "radiogroup", - "radius", - "range", - "rationals", - "rb", - "readonly", - "readystatechange", - "real", - "reals", - "rect", - "referrerpolicy", - "refx", - "refX", - "refy", - "refY", - "rel", - "reln", - "rem", - "rendering-intent", - "repeat", - "repeatcount", - "repeatCount", - "repeatdur", - "repeatDur", - "repeat-max", - "repeat-min", - "repeat-start", - "repeat-template", - "replace", - "required", - "requiredextensions", - "requiredExtensions", - "requiredfeatures", - "requiredFeatures", - "reset", - "resize", - "restart", - "result", - "rev", - "right", - "role", - "root", - "rotate", - "round", - "rowalign", - "rowlines", - "rows", - "rowspacing", - "rowspan", - "rquote", - "rspace", - "rtc", - "ruby", - "rule", - "rules", - "rx", - "ry", - "s", - "samp", - "sandbox", - "scalarproduct", - "scale", - "scheme", - "scope", - "scoped", - "scriptlevel", - "scriptminsize", - "scriptsizemultiplier", - "scrolldelay", - "scrolling", - "sdev", - "seamless", - "search", - "sec", - "sech", - "section", - "seed", - "selected", - "selection", - "selector", - "semantics", - "sep", - "separator", - "separators", - "set", - "setdiff", - "shape", - "shape-rendering", - "show", - "sin", - "sinh", - "size", - "sizes", - "slope", - "small", - "solidcolor", - "space", - "spacer", - "spacing", - "span", - "specification", - "specularconstant", - "specularConstant", - "specularexponent", - "specularExponent", - "speed", - "spreadmethod", - "spreadMethod", - "square", - "src", - "srcdoc", - "standby", - "start", - "startoffset", - "startOffset", - "stddeviation", - "stdDeviation", - "stemh", - "stemv", - "step", - "stitchtiles", - "stitchTiles", - "stop", - "stop-color", - "stop-opacity", - "storage", - "stretchy", - "strike", - "strikethrough-position", - "strikethrough-thickness", - "string", - "stroke", - "stroke-dasharray", - "stroke-dashoffset", - "stroke-linecap", - "stroke-linejoin", - "stroke-miterlimit", - "stroke-opacity", - "stroke-width", - "strong", - "stylesheet", - "sub", - "submit", - "subscriptshift", - "subset", - "sum", - "summary", - "sup", - "superscriptshift", - "surfacescale", - "surfaceScale", - "switch", - "symbol", - "symmetric", - "systemlanguage", - "systemLanguage", - "tabindex", - "table-layout", - "tablevalues", - "tableValues", - "tan", - "tanh", - "target", - "targetx", - "targetX", - "targety", - "targetY", - "tbreak", - "tel", - "tendsto", - "text", - "text-align", - "text-anchor", - "text-decoration", - "text-indent", - "text-orientation", - "text-rendering", - "text-shadow", - "text/plain", - "textlength", - "textLength", - "textpath", - "textPath", - "thickmathspace", - "thinmathspace", - "time", - "times", - "to", - "top", - "touchevent", - "transform", - "transform-origin", - "transition-delay", - "transition-duration", - "transition-property", - "transition-timing-function", - "transitionend", - "transitions", - "transpose", - "tref", - "true", - "tspan", - "tt", - "type", - "u", - "u1", - "u2", - "uievent", - "uievents", - "ul", - "underline-position", - "underline-thickness", - "unicode", - "unicode-bidi", - "unicode-range", - "union", - "units-per-em", - "unsafe-url", - "unselectable", - "uplimit", - "url", - "use", - "use-credentials", - "usemap", - "UTF-8", - "valign", - "v-alphabetic", - "value", - "values", - "valuetype", - "var", - "variance", - "vector", - "vectorproduct", - "version", - "vert-adv-y", - "vertical-align", - "vert-origin-x", - "vert-origin-y", - "verythickmathspace", - "verythinmathspace", - "veryverythickmathspace", - "veryverythinmathspace", - "v-hanging", - "video", - "v-ideographic", - "view", - "viewbox", - "viewBox", - "viewtarget", - "viewTarget", - "visibility", - "vkern", - "vlink", - "v-mathematical", - "vspace", - "wbr", - "webglcontextcreationerror", - "week", - "when", - "white-space", - "width", - "widths", - "word-spacing", - "word-wrap", - "wrap", - "writing-mode", - "x", - "x1", - "x2", - "xchannelselector", - "xChannelSelector", - "x-height", - "xlink", - "xlink:actuate", - "xlink:arcrole", - "xlink:href", - "xlink:role", - "xlink:show", - "xlink:title", - "xlink:type", - "xml:base", - "xml:lang", - "xmlns", - "xmlns:xlink", - "xml:space", - "xor", - "xref", - "y", - "y1", - "y2", - "ychannelselector", - "yChannelSelector", - "z", - "z-index", - "zoomandpan", - "zoomAndPan", -]; From 06b2116e3b5fba398881c249fb1df2e959514eba Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Oct 2016 21:02:46 +0200 Subject: [PATCH 253/379] Make the Default impl use a static atom. --- src/atom.rs | 17 ++++++++++++++++- string-cache-codegen/lib.rs | 23 +++++++++++++++-------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 64124cb..1b12148 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -170,6 +170,7 @@ impl StringCache { pub trait StaticAtomSet { fn get() -> &'static PhfStrSet; + fn empty_string_index() -> u32; } pub struct PhfStrSet { @@ -191,6 +192,10 @@ impl StaticAtomSet for EmptyStaticAtomSet { }; &SET } + + fn empty_string_index() -> u32 { + 0 + } } /// Use this if you don’t care about static atoms. @@ -231,8 +236,12 @@ impl Atom { } impl Default for Atom { + #[inline] fn default() -> Self { - Self::from("") + Atom { + unsafe_data: pack_static(Static::empty_string_index()), + phantom: PhantomData + } } } @@ -663,6 +672,12 @@ mod tests { assert!(i0 != d0); } + #[test] + fn default() { + assert_eq!(TestAtom::default(), test_atom!("")); + assert_eq!(&*TestAtom::default(), ""); + } + #[test] fn ord() { fn check(x: &str, y: &str) { diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index c0ac564..eadd445 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -46,13 +46,11 @@ impl AtomType { /// // Expands to: $crate::foo::FooAtom { … } /// } pub fn new(path: &str, macro_name: &str) -> Self { - let mut set = HashSet::new(); - set.insert(String::new()); // rust-phf requires a non-empty set - assert!(macro_name.ends_with('!')); + assert!(macro_name.ends_with("!")); AtomType { path: path.to_owned(), - macro_name: macro_name[..macro_name.len() - 1].to_owned(), - atoms: set, + macro_name: macro_name[..macro_name.len() - "!".len()].to_owned(), + atoms: HashSet::new(), } } @@ -70,10 +68,16 @@ impl AtomType { } /// Write generated code to `destination`. - pub fn write_to(&self, mut destination: W) -> io::Result<()> where W: Write { + pub fn write_to(&mut self, mut destination: W) -> io::Result<()> where W: Write { + // `impl Default for Atom` requires the empty string to be in the static set. + // This also makes sure the set in non-empty, + // which would cause divisions by zero in rust-phf. + self.atoms.insert(String::new()); + let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); let hash_state = phf_generator::generate_hash(&atoms); let atoms: Vec<&str> = hash_state.map.iter().map(|&idx| atoms[idx]).collect(); + let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap(); let type_name = if let Some(position) = self.path.rfind("::") { &self.path[position + "::".len() ..] @@ -90,12 +94,15 @@ impl AtomType { w!("impl ::string_cache::StaticAtomSet for {}StaticSet {{", type_name); w!(" fn get() -> &'static ::string_cache::PhfStrSet {{"); w!(" static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet {{"); - w!(" key: {:#?},", hash_state.key); + w!(" key: {},", hash_state.key); w!(" disps: &{:?},", hash_state.disps); w!(" atoms: &{:#?},", atoms); w!(" }};"); w!(" &SET"); w!(" }}"); + w!(" fn empty_string_index() -> u32 {{"); + w!(" {}", empty_string_index); + w!(" }}"); w!("}}"); w!("#[macro_export]"); w!("macro_rules! {} {{", self.macro_name); @@ -114,7 +121,7 @@ impl AtomType { /// /// Typical usage: /// `.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))` - pub fn write_to_file(&self, path: &Path) -> io::Result<()> { + pub fn write_to_file(&mut self, path: &Path) -> io::Result<()> { self.write_to(BufWriter::new(try!(File::create(path)))) } } From 1d256d0c3a6be5dbcd6e9b5188743e2154ed1680 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 27 Oct 2016 17:42:10 +0200 Subject: [PATCH 254/379] Add usage example in README. --- README.md | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/README.md b/README.md index 6d020ac..d244f69 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,74 @@ [Documentation](https://docs.rs/string_cache/) A string interning library for Rust, developed as part of the [Servo](https://github.com/servo/servo) project. + +## Simple usage + +In `Cargo.toml`: + +```toml +[dependencies] +string_cache = "0.3" +``` + +In `lib.rs`: + +```rust +extern crate string_cache; +use string_cache::DefaultAtom as Atom; +``` + +## With static atoms + +In `Cargo.toml`: + +```toml +[package] +build = "build.rs" + +[dependencies] +string_cache = "0.3" + +[build-dependencies] +string_cache_codegen = "0.3" +``` + +In `build.rs`: + +```rust +extern crate string_cache_codegen; + +use std::env; +use std::path::Path; + +fn main() { + string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!") + .atoms(&["foo", "bar"]) + .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs")) + .unwrap() +} +``` + +In `lib.rs`: + +```rust +extern crate string_cache; + +mod foo { + include!(concat!(env!("OUT_DIR"), "/foo_atom.rs")); +} +``` + +The generated code will define a `FooAtom` type and a `foo_atom!` macro. +The macro can be used in expression or patterns, with strings listed in `build.rs`. +For example: + +```rust +fn compute_something(input: &foo::FooAtom) -> u32 { + match *input { + foo_atom!("foo") => 1, + foo_atom!("bar") => 2, + _ => 3, + } +} +``` From 4528d77fe46ce41df019d851e9d50af18fb94963 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 28 Oct 2016 17:49:42 +0200 Subject: [PATCH 255/379] Note in toml files to also update README --- Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8443c73..efbf981 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.3.0" +version = "0.3.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 7e721a2..c829d64 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.3.0" +version = "0.3.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" From 122d793bbbd85e2a8b6532f4606a3014afdea780 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 2 Nov 2016 14:44:17 +0100 Subject: [PATCH 256/379] Cargo insists on no shared files between crates. --- Cargo.toml | 1 + shared/Cargo.toml | 11 +++++++++++ string-cache-codegen/shared.rs => shared/lib.rs | 0 src/lib.rs | 4 +--- string-cache-codegen/Cargo.toml | 3 ++- string-cache-codegen/lib.rs | 4 +--- 6 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 shared/Cargo.toml rename string-cache-codegen/shared.rs => shared/lib.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index efbf981..ae233cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ phf_shared = "0.7.4" debug_unreachable = "0.1.1" rustc-serialize = { version = "0.3", optional = true } heapsize = { version = "0.3", optional = true } +string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] rand = "0.3" diff --git a/shared/Cargo.toml b/shared/Cargo.toml new file mode 100644 index 0000000..6f8286d --- /dev/null +++ b/shared/Cargo.toml @@ -0,0 +1,11 @@ +[package] + +name = "string_cache_shared" +version = "0.3.0" +authors = [ "The Servo Project Developers" ] +description = "Code share between string_cache and string_cache_codegen." +license = "MIT / Apache-2.0" +repository = "https://github.com/servo/string-cache" + +[lib] +path = "lib.rs" diff --git a/string-cache-codegen/shared.rs b/shared/lib.rs similarity index 100% rename from string-cache-codegen/shared.rs rename to shared/lib.rs diff --git a/src/lib.rs b/src/lib.rs index ed72634..6229344 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ #[macro_use] extern crate debug_unreachable; extern crate serde; extern crate phf_shared; +extern crate string_cache_shared as shared; pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; @@ -30,9 +31,6 @@ pub mod event; pub mod atom; -#[path = "../string-cache-codegen/shared.rs"] -mod shared; - // Make test_atom! macro work in this crate. // `$crate` would not be appropriate for other crates creating such macros mod string_cache { diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index c829d64..710aa15 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.3.0" # Also update ../README.md when making a semver-breaking change +version = "0.3.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -13,4 +13,5 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] +string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index eadd445..f16beed 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -8,15 +8,13 @@ // except according to those terms. extern crate phf_generator; +extern crate string_cache_shared as shared; use std::collections::HashSet; use std::fs::File; use std::io::{self, Write, BufWriter}; use std::path::Path; -#[allow(dead_code)] -mod shared; - /// A builder for a static atom set and relevant macros pub struct AtomType { path: String, From ae2e6547fc7387ec5a2d228a5578e6b99332253b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 25 Nov 2016 14:20:08 +0100 Subject: [PATCH 257/379] Use the quote crate for code generation --- string-cache-codegen/Cargo.toml | 1 + string-cache-codegen/lib.rs | 80 ++++++++++++++++++++------------- 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 710aa15..b4fd102 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -15,3 +15,4 @@ path = "lib.rs" [dependencies] string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" +quote = "0.3.9" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index f16beed..1505a37 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -9,10 +9,12 @@ extern crate phf_generator; extern crate string_cache_shared as shared; +#[macro_use] extern crate quote; use std::collections::HashSet; use std::fs::File; use std::io::{self, Write, BufWriter}; +use std::iter; use std::path::Path; /// A builder for a static atom set and relevant macros @@ -67,6 +69,18 @@ impl AtomType { /// Write generated code to `destination`. pub fn write_to(&mut self, mut destination: W) -> io::Result<()> where W: Write { + destination.write_all( + self.to_tokens() + .as_str() + // Insert some newlines to make the generated code slightly easier to read. + .replace(" [ \"", "[\n\"") + .replace("\" , ", "\",\n") + .replace(" ( \"", "\n( \"") + .replace("; ", ";\n") + .as_bytes()) + } + + fn to_tokens(&mut self) -> quote::Tokens { // `impl Default for Atom` requires the empty string to be in the static set. // This also makes sure the set in non-empty, // which would cause divisions by zero in rust-phf. @@ -74,45 +88,49 @@ impl AtomType { let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); let hash_state = phf_generator::generate_hash(&atoms); - let atoms: Vec<&str> = hash_state.map.iter().map(|&idx| atoms[idx]).collect(); - let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap(); + let phf_generator::HashState { key, disps, map } = hash_state; + let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); + let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; + let data = (0..atoms.len()).map(|i| quote::Hex(shared::pack_static(i as u32))); let type_name = if let Some(position) = self.path.rfind("::") { &self.path[position + "::".len() ..] } else { &self.path }; + let static_set_name = quote::Ident::from(format!("{}StaticSet", type_name)); + let type_name = quote::Ident::from(type_name); + let macro_name = quote::Ident::from(&*self.macro_name); + let path = iter::repeat(quote::Ident::from(&*self.path)); - macro_rules! w { - ($($arg: expr),+) => { try!(writeln!(destination, $($arg),+)) } - } - - w!("pub type {} = ::string_cache::Atom<{}StaticSet>;", type_name, type_name); - w!("pub struct {}StaticSet;", type_name); - w!("impl ::string_cache::StaticAtomSet for {}StaticSet {{", type_name); - w!(" fn get() -> &'static ::string_cache::PhfStrSet {{"); - w!(" static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet {{"); - w!(" key: {},", hash_state.key); - w!(" disps: &{:?},", hash_state.disps); - w!(" atoms: &{:#?},", atoms); - w!(" }};"); - w!(" &SET"); - w!(" }}"); - w!(" fn empty_string_index() -> u32 {{"); - w!(" {}", empty_string_index); - w!(" }}"); - w!("}}"); - w!("#[macro_export]"); - w!("macro_rules! {} {{", self.macro_name); - for (i, atom) in atoms.iter().enumerate() { - w!("({:?}) => {{ $crate::{} {{ unsafe_data: 0x{:x}, phantom: ::std::marker::PhantomData }} }};", - atom, - self.path, - shared::pack_static(i as u32) - ); + quote! { + pub type #type_name = ::string_cache::Atom<#static_set_name>; + pub struct #static_set_name; + impl ::string_cache::StaticAtomSet for #static_set_name { + fn get() -> &'static ::string_cache::PhfStrSet { + static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet { + key: #key, + disps: &#disps, + atoms: &#atoms, + }; + &SET + } + fn empty_string_index() -> u32 { + #empty_string_index + } + } + #[macro_export] + macro_rules! #macro_name { + #( + (#atoms) => { + $crate::#path { + unsafe_data: #data, + phantom: ::std::marker::PhantomData, + } + }; + )* + } } - w!("}}"); - Ok(()) } /// Create a new file at `path` and write generated code there. From b305af0b62c46ec2384faf403330f430933943f2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 26 Nov 2016 13:08:34 +0100 Subject: [PATCH 258/379] Fix examples build --- examples/summarize-events/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index 70ab6be..a74e659 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -12,7 +12,7 @@ extern crate string_cache; extern crate rustc_serialize; extern crate phf_shared; -#[path = "../../../string-cache-codegen/shared.rs"] +#[path = "../../../shared/lib.rs"] #[allow(dead_code)] mod shared; From cf2302533faa11679b8ff6179d21c67c90506884 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 1 Feb 2017 14:56:13 +0100 Subject: [PATCH 259/379] Remove heap_size feature --- Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ae233cd..c84a746 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,9 +27,6 @@ log-events = ["rustc-serialize"] # Use unstable features to optimize space and time (memory and CPU usage). unstable = [] -# HeapSizeOf support -heap_size = ["heapsize"] - [dependencies] lazy_static = "0.2" serde = "0.8" From 6db3edbe30c760d9c8d44b1fb6ea26f77a4e3259 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Thu, 16 Feb 2017 18:29:34 +0100 Subject: [PATCH 260/379] Silence a warning --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 6229344..b7961c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ #[cfg(all(test, feature = "unstable"))] extern crate test; #[cfg(feature = "log-events")] extern crate rustc_serialize; -#[cfg(feature = "heapsize")] #[macro_use] extern crate heapsize; +#[cfg(feature = "heapsize")] extern crate heapsize; #[cfg(test)] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; From 9e3c85c3ad3441629f88434ca1a290406a6bf797 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 1 Feb 2017 14:57:05 +0100 Subject: [PATCH 261/379] Remove Atom::with_str --- src/atom.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 1b12148..b03c5ac 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -228,11 +228,6 @@ impl Atom { pub fn get_hash(&self) -> u32 { ((self.unsafe_data >> 32) ^ self.unsafe_data) as u32 } - - pub fn with_str(&self, cb: F) -> Output - where F: FnOnce(&str) -> Output { - cb(self) - } } impl Default for Atom { From 1e5790b6a96b8b8c9aaed515a3d4b1abe6c51ebb Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 1 Feb 2017 14:58:53 +0100 Subject: [PATCH 262/379] Update serde to 0.9 --- Cargo.toml | 2 +- src/atom.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c84a746..94f8891 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ unstable = [] [dependencies] lazy_static = "0.2" -serde = "0.8" +serde = "0.9" phf_shared = "0.7.4" debug_unreachable = "0.1.1" rustc-serialize = { version = "0.3", optional = true } diff --git a/src/atom.rs b/src/atom.rs index b03c5ac..25e258c 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -426,16 +426,16 @@ impl AsRef for Atom { } impl Serialize for Atom { - fn serialize(&self, serializer: &mut S) -> Result<(), S::Error> where S: Serializer { + fn serialize(&self, serializer: S) -> Result where S: Serializer { let string: &str = self.as_ref(); string.serialize(serializer) } } impl Deserialize for Atom { - fn deserialize(deserializer: &mut D) -> Result where D: Deserializer { + fn deserialize(deserializer: D) -> Result where D: Deserializer { let string: String = try!(Deserialize::deserialize(deserializer)); - Ok(Atom::from(&*string)) + Ok(Atom::from(string)) } } From c39ce5849ae75669a83432a6a0765af4ef7ff9e1 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 17 Feb 2017 00:52:33 +0100 Subject: [PATCH 263/379] Disable macOS builds Travis is backlogged into oblivion. --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b7ee145..05ea9e7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,6 @@ rust: - stable os: - linux - - osx script: - cargo build - cargo test From 0ec0bfec9fe533250308ca6d4e9b1ec5a9f39906 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Wed, 1 Feb 2017 15:01:39 +0100 Subject: [PATCH 264/379] Bump version to 0.4.0 --- Cargo.toml | 2 +- README.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 94f8891..6bee177 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.3.0" # Also update README.md when making a semver-breaking change +version = "0.4.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/README.md b/README.md index d244f69..43cf6fb 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.3" +string_cache = "0.4" ``` In `lib.rs`: @@ -31,7 +31,7 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.3" +string_cache = "0.4" [build-dependencies] string_cache_codegen = "0.3" From f7ce84308369359362a4e59caa7d563bc99fddb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Tue, 21 Mar 2017 22:38:04 +0100 Subject: [PATCH 265/379] Expose the precomputed hash using a trait so that I can use it from rust-selectors. This allows us to get rid of the extra hashing overhead every time we check the bloom filter. --- Cargo.toml | 1 + src/atom.rs | 28 +++++++++++++++++++++++++++- src/lib.rs | 3 ++- string-cache-codegen/Cargo.toml | 1 + string-cache-codegen/lib.rs | 10 ++++++++++ 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6bee177..2d7b3a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ log-events = ["rustc-serialize"] unstable = [] [dependencies] +precomputed-hash = "0.1" lazy_static = "0.2" serde = "0.9" phf_shared = "0.7.4" diff --git a/src/atom.rs b/src/atom.rs index 25e258c..32542b7 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -177,6 +177,7 @@ pub struct PhfStrSet { pub key: u64, pub disps: &'static [(u32, u32)], pub atoms: &'static [&'static str], + pub hashes: &'static [u32], } pub struct EmptyStaticAtomSet; @@ -189,6 +190,8 @@ impl StaticAtomSet for EmptyStaticAtomSet { key: 0, disps: &[(0, 0)], atoms: &[""], + // "" SipHash'd, and xored with u64_hash_to_u32. + hashes: &[0x3ddddef3], }; &SET } @@ -219,6 +222,17 @@ impl HeapSizeOf for Atom { } } +impl ::precomputed_hash::PrecomputedHash for Atom { + fn precomputed_hash(&self) -> u32 { + self.get_hash() + } +} + +fn u64_hash_as_u32(h: u64) -> u32 { + // This may or may not be great... + ((h >> 32) ^ h) as u32 +} + impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { @@ -226,7 +240,19 @@ impl Atom { } pub fn get_hash(&self) -> u32 { - ((self.unsafe_data >> 32) ^ self.unsafe_data) as u32 + match unsafe { self.unpack() } { + Static(index) => { + let static_set = Static::get(); + static_set.hashes[index as usize] + } + Dynamic(entry) => { + let entry = entry as *mut StringCacheEntry; + u64_hash_as_u32(unsafe { (*entry).hash }) + } + Inline(..) => { + u64_hash_as_u32(self.unsafe_data) + } + } } } diff --git a/src/lib.rs b/src/lib.rs index b7961c3..16f531c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,8 +19,9 @@ #[cfg(test)] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; -extern crate serde; extern crate phf_shared; +extern crate precomputed_hash; +extern crate serde; extern crate string_cache_shared as shared; pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index b4fd102..a81644c 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -15,4 +15,5 @@ path = "lib.rs" [dependencies] string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" +phf_shared = "0.7.4" quote = "0.3.9" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 1505a37..dabd0bb 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -7,7 +7,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +#![recursion_limit = "128"] + extern crate phf_generator; +extern crate phf_shared; extern crate string_cache_shared as shared; #[macro_use] extern crate quote; @@ -93,6 +96,12 @@ impl AtomType { let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; let data = (0..atoms.len()).map(|i| quote::Hex(shared::pack_static(i as u32))); + let hashes: Vec = + atoms.iter().map(|string| { + let hash = phf_shared::hash(string, key); + ((hash >> 32) ^ hash) as u32 + }).collect(); + let type_name = if let Some(position) = self.path.rfind("::") { &self.path[position + "::".len() ..] } else { @@ -112,6 +121,7 @@ impl AtomType { key: #key, disps: &#disps, atoms: &#atoms, + hashes: &#hashes }; &SET } From 8518b44e00bc3b8038f0968dfa57d1f49e3246fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Fri, 7 Apr 2017 12:32:59 +0200 Subject: [PATCH 266/379] Version bump. --- Cargo.toml | 7 +++---- README.md | 6 +++--- src/event.rs | 27 --------------------------- src/lib.rs | 1 - string-cache-codegen/Cargo.toml | 2 +- 5 files changed, 7 insertions(+), 36 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2d7b3a5..5450a37 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.4.0" # Also update README.md when making a semver-breaking change +version = "0.5.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -22,7 +22,7 @@ name = "string_cache" # Enable event logging for generating benchmark traces. # See examples/event-log. -log-events = ["rustc-serialize"] +log-events = [] # Use unstable features to optimize space and time (memory and CPU usage). unstable = [] @@ -33,7 +33,6 @@ lazy_static = "0.2" serde = "0.9" phf_shared = "0.7.4" debug_unreachable = "0.1.1" -rustc-serialize = { version = "0.3", optional = true } heapsize = { version = "0.3", optional = true } string_cache_shared = {path = "./shared", version = "0.3"} @@ -41,4 +40,4 @@ string_cache_shared = {path = "./shared", version = "0.3"} rand = "0.3" [build-dependencies] -string_cache_codegen = { version = "0.3", path = "./string-cache-codegen" } +string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } diff --git a/README.md b/README.md index 43cf6fb..5b58e51 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.4" +string_cache = "0.5" ``` In `lib.rs`: @@ -31,10 +31,10 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.4" +string_cache = "0.5" [build-dependencies] -string_cache_codegen = "0.3" +string_cache_codegen = "0.4" ``` In `build.rs`: diff --git a/src/event.rs b/src/event.rs index 79af4a1..1b777d3 100644 --- a/src/event.rs +++ b/src/event.rs @@ -8,7 +8,6 @@ // except according to those terms. use std::sync::Mutex; -use rustc_serialize::{Encoder, Encodable}; #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Debug)] pub enum Event { @@ -27,29 +26,3 @@ pub fn log(e: Event) { } macro_rules! log (($e:expr) => (::event::log($e))); - -// Serialize by converting to this private struct, -// which produces more convenient output. - -#[derive(RustcEncodable)] -struct SerializeEvent<'a> { - event: &'static str, - id: u64, - string: Option<&'a String>, -} - -impl Encodable for Event { - fn encode(&self, s: &mut S) -> Result<(), S::Error> { - let (event, id, string) = match *self { - Event::Intern(id) => ("intern", id, None), - Event::Insert(id, ref s) => ("insert", id, Some(s)), - Event::Remove(id) => ("remove", id, None), - }; - - SerializeEvent { - event: event, - id: id, - string: string - }.encode(s) - } -} diff --git a/src/lib.rs b/src/lib.rs index 16f531c..447a399 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,6 @@ #![cfg_attr(all(test, feature = "unstable"), feature(test))] #[cfg(all(test, feature = "unstable"))] extern crate test; -#[cfg(feature = "log-events")] extern crate rustc_serialize; #[cfg(feature = "heapsize")] extern crate heapsize; #[cfg(test)] extern crate rand; #[macro_use] extern crate lazy_static; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index a81644c..14b93a6 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.3.1" # Also update ../README.md when making a semver-breaking change +version = "0.4.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" From 1dff0d8f1d5b1d0b347113827ce0e8988de25c65 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 1 May 2017 19:53:23 +0200 Subject: [PATCH 267/379] Make to_ascii_{upper,lower}case more efficent. * Use `From for Atom` * Use the fast path with non-letters too --- Cargo.toml | 2 +- src/atom.rs | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5450a37..af58a60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.5.0" # Also update README.md when making a semver-breaking change +version = "0.5.1" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom.rs b/src/atom.rs index 32542b7..9a92e5f 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -470,19 +470,21 @@ impl Deserialize for Atom { // over the one from &str. impl Atom { pub fn to_ascii_uppercase(&self) -> Self { - if self.chars().all(char::is_uppercase) { - self.clone() - } else { - Atom::from(&*((&**self).to_ascii_uppercase())) + for b in self.bytes() { + if let b'a' ... b'z' = b { + return Atom::from((&**self).to_ascii_uppercase()) + } } + self.clone() } pub fn to_ascii_lowercase(&self) -> Self { - if self.chars().all(char::is_lowercase) { - self.clone() - } else { - Atom::from(&*((&**self).to_ascii_lowercase())) + for b in self.bytes() { + if let b'A' ... b'Z' = b { + return Atom::from((&**self).to_ascii_lowercase()) + } } + self.clone() } pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { From 287754ac87b2289f9a27621c30b25a408cd9ad4f Mon Sep 17 00:00:00 2001 From: Ashley Mannix Date: Mon, 22 May 2017 21:16:16 +1000 Subject: [PATCH 268/379] update to serde 1.0 --- Cargo.toml | 2 +- src/atom.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index af58a60..fdef822 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ unstable = [] [dependencies] precomputed-hash = "0.1" lazy_static = "0.2" -serde = "0.9" +serde = "1" phf_shared = "0.7.4" debug_unreachable = "0.1.1" heapsize = { version = "0.3", optional = true } diff --git a/src/atom.rs b/src/atom.rs index 9a92e5f..b96c73c 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -458,8 +458,8 @@ impl Serialize for Atom { } } -impl Deserialize for Atom { - fn deserialize(deserializer: D) -> Result where D: Deserializer { +impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { + fn deserialize(deserializer: D) -> Result where D: Deserializer<'a> { let string: String = try!(Deserialize::deserialize(deserializer)); Ok(Atom::from(string)) } From 28bce79df00dbcd196d1f322ade9bca5e957fd17 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Sun, 28 May 2017 23:25:56 +0200 Subject: [PATCH 269/379] Allow heapsize 0.4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index af58a60..c7d1492 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ lazy_static = "0.2" serde = "0.9" phf_shared = "0.7.4" debug_unreachable = "0.1.1" -heapsize = { version = "0.3", optional = true } +heapsize = { version = ">= 0.3, < 0.5", optional = true } string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] From 830835a047fb07052634a1a0a0043bd575a3fc70 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Jun 2017 12:54:03 +0200 Subject: [PATCH 270/379] Avoid some allocations in to_ascii_{upper,lower}case --- Cargo.toml | 2 +- src/atom.rs | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c7d1492..c9f61ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.5.1" # Also update README.md when making a semver-breaking change +version = "0.5.2" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom.rs b/src/atom.rs index 9a92e5f..5012227 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -469,19 +469,37 @@ impl Deserialize for Atom { // We don't need to implement is_ascii because there's no performance improvement // over the one from &str. impl Atom { + fn from_mutated_str(s: &str, f: F) -> Self { + let mut buffer: [u8; 64] = unsafe { mem::uninitialized() }; + if let Some(buffer_prefix) = buffer.get_mut(..s.len()) { + buffer_prefix.copy_from_slice(s.as_bytes()); + // FIXME: use from std::str when stable https://github.com/rust-lang/rust/issues/41119 + pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { + mem::transmute(v) + } + let as_str = unsafe { from_utf8_unchecked_mut(buffer_prefix) }; + f(as_str); + Atom::from(&*as_str) + } else { + let mut string = s.to_owned(); + f(&mut string); + Atom::from(string) + } + } + pub fn to_ascii_uppercase(&self) -> Self { - for b in self.bytes() { + for (i, b) in self.bytes().enumerate() { if let b'a' ... b'z' = b { - return Atom::from((&**self).to_ascii_uppercase()) + return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase()) } } self.clone() } pub fn to_ascii_lowercase(&self) -> Self { - for b in self.bytes() { + for (i, b) in self.bytes().enumerate() { if let b'A' ... b'Z' = b { - return Atom::from((&**self).to_ascii_lowercase()) + return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase()) } } self.clone() From 1ae39ead7892aaa1365eb4f1e622abb8607842c2 Mon Sep 17 00:00:00 2001 From: Ashley Mannix Date: Mon, 22 May 2017 21:16:16 +1000 Subject: [PATCH 271/379] update to serde 1.0 --- Cargo.toml | 2 +- src/atom.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c9f61ee..d1e797a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ unstable = [] [dependencies] precomputed-hash = "0.1" lazy_static = "0.2" -serde = "0.9" +serde = "1" phf_shared = "0.7.4" debug_unreachable = "0.1.1" heapsize = { version = ">= 0.3, < 0.5", optional = true } diff --git a/src/atom.rs b/src/atom.rs index 5012227..9adc4da 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -458,8 +458,8 @@ impl Serialize for Atom { } } -impl Deserialize for Atom { - fn deserialize(deserializer: D) -> Result where D: Deserializer { +impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { + fn deserialize(deserializer: D) -> Result where D: Deserializer<'a> { let string: String = try!(Deserialize::deserialize(deserializer)); Ok(Atom::from(string)) } From cf74a86c66b2d5469e119b211be9c77032ce52c7 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 16 Jun 2017 10:36:49 +0200 Subject: [PATCH 272/379] Serde update is a breaking change --- Cargo.toml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d1e797a..b5de7a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.5.2" # Also update README.md when making a semver-breaking change +version = "0.6.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/README.md b/README.md index 5b58e51..191c935 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.5" +string_cache = "0.6" ``` In `lib.rs`: From 83942971f4502a80a0449116434aef895d624732 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 7 Jul 2017 16:31:54 +0200 Subject: [PATCH 273/379] Remove RawSlice, support big-endian platforms --- src/atom.rs | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 9adc4da..66f8666 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -528,18 +528,31 @@ enum UnpackedAtom { Static(u32), } -struct RawSlice { - data: *const u8, - len: usize, +#[inline(always)] +fn inline_atom_slice(x: &u64) -> &[u8] { + unsafe { + let x: *const u64 = x; + let mut data = x as *const u8; + // All except the lowest byte, which is first in little-endian, last in big-endian. + if cfg!(target_endian = "little") { + data = data.offset(1); + } + let len = 7; + slice::from_raw_parts(data, len) + } } -#[cfg(target_endian = "little")] // Not implemented yet for big-endian #[inline(always)] -unsafe fn inline_atom_slice(x: &u64) -> RawSlice { - let x: *const u64 = x; - RawSlice { - data: (x as *const u8).offset(1), - len: 7, +fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { + unsafe { + let x: *mut u64 = x; + let mut data = x as *mut u8; + // All except the lowest byte, which is first in little-endian, last in big-endian. + if cfg!(target_endian = "little") { + data = data.offset(1); + } + let len = 7; + slice::from_raw_parts_mut(data, len) } } @@ -557,9 +570,7 @@ impl UnpackedAtom { debug_assert!((len as usize) <= MAX_INLINE_LEN); let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { - let raw_slice = inline_atom_slice(&mut data); - let dest: &mut [u8] = slice::from_raw_parts_mut( - raw_slice.data as *mut u8, raw_slice.len); + let dest = inline_atom_slice_mut(&mut data); copy_memory(&buf[..], dest); } data @@ -578,8 +589,7 @@ impl UnpackedAtom { let len = ((data & 0xf0) >> 4) as usize; debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; - let raw_slice = inline_atom_slice(&data); - let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); + let src = inline_atom_slice(&data); copy_memory(src, &mut buf[..]); Inline(len as u8, buf) }, @@ -606,8 +616,7 @@ unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { match UnpackedAtom::from_packed(*data) { Inline(len, _) => { - let raw_slice = inline_atom_slice(&data); - let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); + let src = inline_atom_slice(&data); &src[..(len as usize)] } _ => debug_unreachable!(), From c8ebbfb15bd1526918dfc5afae95cbf37ba9fd85 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 7 Jul 2017 16:36:36 +0200 Subject: [PATCH 274/379] Replace ad-hoc copy_memory function with [T]::copy_from_slice --- src/atom.rs | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 66f8666..e269391 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -23,7 +23,6 @@ use std::hash::{Hash, Hasher}; use std::marker::PhantomData; use std::mem; use std::ops; -use std::ptr; use std::slice; use std::str; use std::sync::Mutex; @@ -314,7 +313,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { let len = string_to_add.len(); if len <= MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; - copy_memory(string_to_add.as_bytes(), &mut buf); + buf[..len].copy_from_slice(string_to_add.as_bytes()); Inline(len as u8, buf) } else { Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) @@ -571,7 +570,7 @@ impl UnpackedAtom { let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { let dest = inline_atom_slice_mut(&mut data); - copy_memory(&buf[..], dest); + dest.copy_from_slice(&buf) } data } @@ -590,7 +589,7 @@ impl UnpackedAtom { debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; let src = inline_atom_slice(&data); - copy_memory(src, &mut buf[..]); + buf.copy_from_slice(src); Inline(len as u8, buf) }, _ => debug_unreachable!(), @@ -623,21 +622,6 @@ unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { } } - -/// Copy of std::slice::bytes::copy_memory, which is unstable. -#[inline] -fn copy_memory(src: &[u8], dst: &mut [u8]) { - let len_src = src.len(); - assert!(dst.len() >= len_src); - // `dst` is unaliasable, so we know statically it doesn't overlap - // with `src`. - unsafe { - ptr::copy_nonoverlapping(src.as_ptr(), - dst.as_mut_ptr(), - len_src); - } -} - #[cfg(test)] #[macro_use] mod tests { From 88d65e7d4331795e789758ee65c001e811dc8225 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 7 Jul 2017 19:06:34 +0200 Subject: [PATCH 275/379] v0.6.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b5de7a4..4410f9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.6.0" # Also update README.md when making a semver-breaking change +version = "0.6.1" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From c5585bd446ec98de76ef7b7c80e63d232368946d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Mon, 7 Aug 2017 12:43:59 +0200 Subject: [PATCH 276/379] atom: Use the same hash everywhere. This will allow me to write a "no-op" hasher that expects a single u32 in the style system, to avoid hashing the hash again. --- src/atom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index e269391..cb9f28a 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -268,7 +268,7 @@ impl Default for Atom { impl Hash for Atom { #[inline] fn hash(&self, state: &mut H) where H: Hasher { - self.unsafe_data.hash(state) + state.write_u32(self.get_hash()) } } From e468085594cdfa014db6f873b5b9c9901a22f6cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Mon, 7 Aug 2017 12:44:58 +0200 Subject: [PATCH 277/379] v0.6.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4410f9e..71f1444 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.6.1" # Also update README.md when making a semver-breaking change +version = "0.6.2" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From a955aba2b1d6ede679c34b022a6bc4c6560677ee Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 29 Aug 2017 03:13:51 +0200 Subject: [PATCH 278/379] Correct cfg for optional rand dependency --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 447a399..3bbfa2e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ #[cfg(all(test, feature = "unstable"))] extern crate test; #[cfg(feature = "heapsize")] extern crate heapsize; -#[cfg(test)] extern crate rand; +#[cfg(all(test, feature = "unstable"))] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; extern crate phf_shared; From 673f8ea4df010ccf4b5e94a2fa02bdf704b90598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Wed, 4 Oct 2017 23:34:02 +0200 Subject: [PATCH 279/379] Implement From<&Atom> for Atom. Needed to do more complex stuff in selectors with local names and namespace urls. --- src/atom.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/atom.rs b/src/atom.rs index cb9f28a..9790198 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -227,6 +227,12 @@ impl ::precomputed_hash::PrecomputedHash for Atom } } +impl<'a, Static: StaticAtomSet> From<&'a Atom> for Atom { + fn from(atom: &'a Self) -> Self { + atom.clone() + } +} + fn u64_hash_as_u32(h: u64) -> u32 { // This may or may not be great... ((h >> 32) ^ h) as u32 From 84ed420ba098bf35d12b1a75c70f040295550326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Cobos=20=C3=81lvarez?= Date: Wed, 4 Oct 2017 23:35:18 +0200 Subject: [PATCH 280/379] Bump version. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 71f1444..07366e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.6.2" # Also update README.md when making a semver-breaking change +version = "0.6.3" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From f48bdff01d34f833349325a2485f86472290556d Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 24 Oct 2017 11:19:36 +1100 Subject: [PATCH 281/379] Remove `heapsize` dependency. The heapsize crate is being deprecated in favour of the malloc_size_of crate within Servo. --- .travis.yml | 2 +- Cargo.toml | 3 +-- README.md | 4 ++-- src/atom.rs | 37 ------------------------------------- src/lib.rs | 1 - 5 files changed, 4 insertions(+), 43 deletions(-) diff --git a/.travis.yml b/.travis.yml index 05ea9e7..438e39d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ script: - cargo test - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - - cargo test --features heapsize + - cargo test - "cd string-cache-codegen/ && cargo build && cd .." - "cd examples/event-log/ && cargo build && cd ../.." - "cd examples/summarize-events/ && cargo build && cd ../.." diff --git a/Cargo.toml b/Cargo.toml index 07366e8..64ea6f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.6.3" # Also update README.md when making a semver-breaking change +version = "0.7.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -33,7 +33,6 @@ lazy_static = "0.2" serde = "1" phf_shared = "0.7.4" debug_unreachable = "0.1.1" -heapsize = { version = ">= 0.3, < 0.5", optional = true } string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] diff --git a/README.md b/README.md index 191c935..657f646 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.6" +string_cache = "0.7" ``` In `lib.rs`: @@ -31,7 +31,7 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.5" +string_cache = "0.7" [build-dependencies] string_cache_codegen = "0.4" diff --git a/src/atom.rs b/src/atom.rs index 9790198..10e9943 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -9,9 +9,6 @@ #![allow(non_upper_case_globals)] -#[cfg(feature = "heapsize")] -use heapsize::HeapSizeOf; - use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -46,28 +43,10 @@ struct StringCache { buckets: [Option>; NB_BUCKETS], } -#[cfg(feature = "heapsize")] -impl HeapSizeOf for StringCache { - fn heap_size_of_children(&self) -> usize { - self.buckets.iter().fold(0, |size, bucket| size + bucket.heap_size_of_children()) - } -} - lazy_static! { static ref STRING_CACHE: Mutex = Mutex::new(StringCache::new()); } -/// A token that represents the heap used by the dynamic string cache. -#[cfg(feature = "heapsize")] -pub struct StringCacheHeap; - -#[cfg(feature = "heapsize")] -impl HeapSizeOf for StringCacheHeap { - fn heap_size_of_children(&self) -> usize { - STRING_CACHE.lock().unwrap().heap_size_of_children() - } -} - struct StringCacheEntry { next_in_bucket: Option>, hash: u64, @@ -75,14 +54,6 @@ struct StringCacheEntry { string: Box, } -#[cfg(feature = "heapsize")] -impl HeapSizeOf for StringCacheEntry { - fn heap_size_of_children(&self) -> usize { - self.next_in_bucket.heap_size_of_children() + - self.string.heap_size_of_children() - } -} - impl StringCacheEntry { fn new(next: Option>, hash: u64, string: String) -> StringCacheEntry { @@ -213,14 +184,6 @@ pub struct Atom { pub phantom: PhantomData, } -#[cfg(feature = "heapsize")] -impl HeapSizeOf for Atom { - #[inline(always)] - fn heap_size_of_children(&self) -> usize { - 0 - } -} - impl ::precomputed_hash::PrecomputedHash for Atom { fn precomputed_hash(&self) -> u32 { self.get_hash() diff --git a/src/lib.rs b/src/lib.rs index 3bbfa2e..04890e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,6 @@ #![cfg_attr(all(test, feature = "unstable"), feature(test))] #[cfg(all(test, feature = "unstable"))] extern crate test; -#[cfg(feature = "heapsize")] extern crate heapsize; #[cfg(all(test, feature = "unstable"))] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; From 83bd6cb2a1dcb5574042452e51e6c84c3020c042 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 10 Nov 2017 07:41:55 +0100 Subject: [PATCH 282/379] Fixed denied "unused import" warning on Nigthly --- src/atom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index 10e9943..add3e55 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -12,7 +12,7 @@ use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::ascii::AsciiExt; +#[allow(unused_imports)] use std::ascii::AsciiExt; use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; From 8b33173f274bd943ad9a8751a0417952dca03bb5 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Wed, 22 Nov 2017 16:42:40 +0000 Subject: [PATCH 283/379] Allow the generated macros to have documentation --- string-cache-codegen/lib.rs | 76 ++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index dabd0bb..83a63a3 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -6,6 +6,65 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +//! A crate to create static string caches at compiletime. +//! +//! # Examples +//! +//! With static atoms: +//! +//! In `Cargo.toml`: +//! +//! ```toml +//! [package] +//! build = "build.rs" +//! +//! [dependencies] +//! string_cache = "0.7" +//! +//! [build-dependencies] +//! string_cache_codegen = "0.4" +//! ``` +//! +//! In `build.rs`: +//! +//! ```no_run +//! extern crate string_cache_codegen; +//! +//! use std::env; +//! use std::path::Path; +//! +//! fn main() { +//! string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!") +//! .atoms(&["foo", "bar"]) +//! .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs")) +//! .unwrap() +//! } +//! ``` +//! +//! In `lib.rs`: +//! +//! ```ignore +//! extern crate string_cache; +//! +//! mod foo { +//! include!(concat!(env!("OUT_DIR"), "/foo_atom.rs")); +//! } +//! ``` +//! +//! The generated code will define a `FooAtom` type and a `foo_atom!` macro. +//! The macro can be used in expression or patterns, with strings listed in `build.rs`. +//! For example: +//! +//! ```ignore +//! fn compute_something(input: &foo::FooAtom) -> u32 { +//! match *input { +//! foo_atom!("foo") => 1, +//! foo_atom!("bar") => 2, +//! _ => 3, +//! } +//! } +//! ``` +//! #![recursion_limit = "128"] @@ -24,6 +83,7 @@ use std::path::Path; pub struct AtomType { path: String, macro_name: String, + macro_doc: Option, atoms: HashSet, } @@ -38,7 +98,7 @@ impl AtomType { /// /// For example, `AtomType::new("foo::FooAtom", "foo_atom!")` will generate: /// - /// ```rust + /// ```ignore /// pub type FooAtom = ::string_cache::Atom; /// pub struct FooAtomStaticSet; /// impl ::string_cache::StaticAtomSet for FooAtomStaticSet { @@ -53,10 +113,19 @@ impl AtomType { AtomType { path: path.to_owned(), macro_name: macro_name[..macro_name.len() - "!".len()].to_owned(), + macro_doc: None, atoms: HashSet::new(), } } + /// Add some documentation to the generated macro. + /// + /// Note that `docs` should not contain the `///` at the front of normal docs. + pub fn with_macro_doc(&mut self, docs: &str) -> &mut Self { + self.macro_doc = Some(docs.to_owned()); + self + } + /// Adds an atom to the builder pub fn atom(&mut self, s: &str) -> &mut Self { self.atoms.insert(s.to_owned()); @@ -107,6 +176,10 @@ impl AtomType { } else { &self.path }; + let macro_doc = match self.macro_doc { + Some(ref doc) => quote!(#[doc = #doc]), + None => quote!() + }; let static_set_name = quote::Ident::from(format!("{}StaticSet", type_name)); let type_name = quote::Ident::from(type_name); let macro_name = quote::Ident::from(&*self.macro_name); @@ -129,6 +202,7 @@ impl AtomType { #empty_string_index } } + #macro_doc #[macro_export] macro_rules! #macro_name { #( From 8b38b6f3af0a3a473b12444bcd3add4b9e1e3a26 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Thu, 23 Nov 2017 23:06:41 +0000 Subject: [PATCH 284/379] Multiple changes - Add documentation to types & methods in atom.rs - Add ability to add documentation to generated types --- src/atom.rs | 44 ++++++++++++++++++++++++++++++++----- string-cache-codegen/lib.rs | 38 +++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index add3e55..c8b2f3c 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -138,11 +138,28 @@ impl StringCache { } } +/// A static `PhfStrSet` +/// +/// This trait is implemented by static sets of interned strings generated using +/// `string_cache_codegen`, and `EmptyStaticAtomSet` for when strings will be added dynamically. +/// +/// It is used by the methods of [`Atom`] to check if a string is present in the static set. +/// +/// [`Atom`]: struct.Atom.html pub trait StaticAtomSet { + /// Get the location of the static string set in the binary. fn get() -> &'static PhfStrSet; + /// Get the index of the empty string, which is in every set and is used for `Atom::default`. fn empty_string_index() -> u32; } +/// A string set created using a [perfect hash function], specifically +/// [Hash, Displace and Compress]. +/// +/// See the CHD document for the meaning of the struct fields. +/// +/// [perfect hash function]: https://en.wikipedia.org/wiki/Perfect_hash_function +/// [Hash, Displace and Compress]: http://cmph.sourceforge.net/papers/esa09.pdf pub struct PhfStrSet { pub key: u64, pub disps: &'static [(u32, u32)], @@ -150,6 +167,7 @@ pub struct PhfStrSet { pub hashes: &'static [u32], } +/// An empty static atom set for when only dynamic strings will be added pub struct EmptyStaticAtomSet; impl StaticAtomSet for EmptyStaticAtomSet { @@ -174,6 +192,10 @@ impl StaticAtomSet for EmptyStaticAtomSet { /// Use this if you don’t care about static atoms. pub type DefaultAtom = Atom; +/// Represents a string that has been interned. +/// +/// In reality this contains a complex packed datastructure and the methods to extract information +/// from it, along with type information to tell the compiler which static set it corresponds to. pub struct Atom { /// This field is public so that the `atom!()` macros can use it. /// You should not otherwise access this field. @@ -207,6 +229,7 @@ impl Atom { UnpackedAtom::from_packed(self.unsafe_data) } + /// Get the hash of the string as it is stored in the set. pub fn get_hash(&self) -> u32 { match unsafe { self.unpack() } { Static(index) => { @@ -441,11 +464,7 @@ impl Atom { let mut buffer: [u8; 64] = unsafe { mem::uninitialized() }; if let Some(buffer_prefix) = buffer.get_mut(..s.len()) { buffer_prefix.copy_from_slice(s.as_bytes()); - // FIXME: use from std::str when stable https://github.com/rust-lang/rust/issues/41119 - pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { - mem::transmute(v) - } - let as_str = unsafe { from_utf8_unchecked_mut(buffer_prefix) }; + let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) }; f(as_str); Atom::from(&*as_str) } else { @@ -455,6 +474,9 @@ impl Atom { } } + /// Like [`to_ascii_uppercase`]. + /// + /// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase pub fn to_ascii_uppercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { if let b'a' ... b'z' = b { @@ -464,6 +486,9 @@ impl Atom { self.clone() } + /// Like [`to_ascii_lowercase`]. + /// + /// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase pub fn to_ascii_lowercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { if let b'A' ... b'Z' = b { @@ -473,10 +498,16 @@ impl Atom { self.clone() } + /// Like [`eq_ignore_ascii_case`]. + /// + /// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { (self == other) || self.eq_str_ignore_ascii_case(&**other) } + /// Like [`eq_ignore_ascii_case`], but takes an unhashed string as `other`. + /// + /// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool { (&**self).eq_ignore_ascii_case(other) } @@ -525,6 +556,8 @@ fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { } impl UnpackedAtom { + /// Pack a key, fitting it into a u64 with flags and data. See `string_cache_shared` for + /// hints for the layout. #[inline(always)] unsafe fn pack(self) -> u64 { match self { @@ -546,6 +579,7 @@ impl UnpackedAtom { } } + /// Unpack a key, extracting information from a single u64 into useable structs. #[inline(always)] unsafe fn from_packed(data: u64) -> UnpackedAtom { debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 83a63a3..e92f8a0 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -82,6 +82,8 @@ use std::path::Path; /// A builder for a static atom set and relevant macros pub struct AtomType { path: String, + atom_doc: Option, + static_set_doc: Option, macro_name: String, macro_doc: Option, atoms: HashSet, @@ -108,16 +110,40 @@ impl AtomType { /// macro_rules foo_atom { /// // Expands to: $crate::foo::FooAtom { … } /// } + /// ``` pub fn new(path: &str, macro_name: &str) -> Self { - assert!(macro_name.ends_with("!")); + assert!(macro_name.ends_with("!"), "`macro_name` must end with '!'"); AtomType { path: path.to_owned(), macro_name: macro_name[..macro_name.len() - "!".len()].to_owned(), + atom_doc: None, + static_set_doc: None, macro_doc: None, atoms: HashSet::new(), } } + /// Add some documentation to the generated Atom type alias. + /// + /// This can help the user know that the type uses interned strings. + /// + /// Note that `docs` should not contain the `///` at the front of normal docs. + pub fn with_atom_doc(&mut self, docs: &str) -> &mut Self { + self.atom_doc = Some(docs.to_owned()); + self + } + + /// Add some documentation to the generated static set. + /// + /// This can help the user know that this type is zero-sized and just references a static + /// lookup table, or point them to the `Atom` type alias for more info. + /// + /// Note that `docs` should not contain the `///` at the front of normal docs. + pub fn with_static_set_doc(&mut self, docs: &str) -> &mut Self { + self.static_set_doc = Some(docs.to_owned()); + self + } + /// Add some documentation to the generated macro. /// /// Note that `docs` should not contain the `///` at the front of normal docs. @@ -176,6 +202,14 @@ impl AtomType { } else { &self.path }; + let atom_doc = match self.atom_doc { + Some(ref doc) => quote!(#[doc = #doc]), + None => quote!() + }; + let static_set_doc = match self.static_set_doc { + Some(ref doc) => quote!(#[doc = #doc]), + None => quote!() + }; let macro_doc = match self.macro_doc { Some(ref doc) => quote!(#[doc = #doc]), None => quote!() @@ -186,7 +220,9 @@ impl AtomType { let path = iter::repeat(quote::Ident::from(&*self.path)); quote! { + #atom_doc pub type #type_name = ::string_cache::Atom<#static_set_name>; + #static_set_doc pub struct #static_set_name; impl ::string_cache::StaticAtomSet for #static_set_name { fn get() -> &'static ::string_cache::PhfStrSet { From abb9c61222e587eb60c6b168a614b5b20a52cd61 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Mon, 4 Dec 2017 21:55:42 +0100 Subject: [PATCH 285/379] Update lazy_static to 1.0 and bump version --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 64ea6f6..14adf72 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.7.0" # Also update README.md when making a semver-breaking change +version = "0.7.1" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -29,7 +29,7 @@ unstable = [] [dependencies] precomputed-hash = "0.1" -lazy_static = "0.2" +lazy_static = "1" serde = "1" phf_shared = "0.7.4" debug_unreachable = "0.1.1" From 42f4684796ea90c4b81844a76d824089300383ed Mon Sep 17 00:00:00 2001 From: hcpl Date: Sun, 1 Apr 2018 14:07:22 +0300 Subject: [PATCH 286/379] Update dependencies --- Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 3 ++- string-cache-codegen/lib.rs | 24 ++++++++++++++---------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 14adf72..8b0a204 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ debug_unreachable = "0.1.1" string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] -rand = "0.3" +rand = "0.4" [build-dependencies] string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 14b93a6..afb724d 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -16,4 +16,5 @@ path = "lib.rs" string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" phf_shared = "0.7.4" -quote = "0.3.9" +proc-macro2 = "0.3.1" +quote = "0.5.1" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index e92f8a0..c61da73 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -72,6 +72,7 @@ extern crate phf_generator; extern crate phf_shared; extern crate string_cache_shared as shared; #[macro_use] extern crate quote; +extern crate proc_macro2; use std::collections::HashSet; use std::fs::File; @@ -169,7 +170,7 @@ impl AtomType { pub fn write_to(&mut self, mut destination: W) -> io::Result<()> where W: Write { destination.write_all( self.to_tokens() - .as_str() + .to_string() // Insert some newlines to make the generated code slightly easier to read. .replace(" [ \"", "[\n\"") .replace("\" , ", "\",\n") @@ -187,9 +188,11 @@ impl AtomType { let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); let hash_state = phf_generator::generate_hash(&atoms); let phf_generator::HashState { key, disps, map } = hash_state; + let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip(); let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); + let atoms_ref = &atoms; let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; - let data = (0..atoms.len()).map(|i| quote::Hex(shared::pack_static(i as u32))); + let data = (0..atoms.len()).map(|i| proc_macro2::Literal::u64_suffixed(shared::pack_static(i as u32))); let hashes: Vec = atoms.iter().map(|string| { @@ -214,10 +217,11 @@ impl AtomType { Some(ref doc) => quote!(#[doc = #doc]), None => quote!() }; - let static_set_name = quote::Ident::from(format!("{}StaticSet", type_name)); - let type_name = quote::Ident::from(type_name); - let macro_name = quote::Ident::from(&*self.macro_name); - let path = iter::repeat(quote::Ident::from(&*self.path)); + let produce_term = |string: &str| proc_macro2::Term::new(string, proc_macro2::Span::call_site()); + let static_set_name = produce_term(&format!("{}StaticSet", type_name)); + let type_name = produce_term(type_name); + let macro_name = produce_term(&*self.macro_name); + let path = iter::repeat(produce_term(&*self.path)); quote! { #atom_doc @@ -228,9 +232,9 @@ impl AtomType { fn get() -> &'static ::string_cache::PhfStrSet { static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet { key: #key, - disps: &#disps, - atoms: &#atoms, - hashes: &#hashes + disps: &[#((#disps0, #disps1)),*], + atoms: &[#(#atoms_ref),*], + hashes: &[#(#hashes),*] }; &SET } @@ -242,7 +246,7 @@ impl AtomType { #[macro_export] macro_rules! #macro_name { #( - (#atoms) => { + (#atoms_ref) => { $crate::#path { unsafe_data: #data, phantom: ::std::marker::PhantomData, From f392c9b14dfbf6eb9fa22dd5ad223b14e2a4bd58 Mon Sep 17 00:00:00 2001 From: hcpl Date: Tue, 10 Apr 2018 17:00:30 +0300 Subject: [PATCH 287/379] Increment version numbers of affected crates Also fix an unnoticed bug and render data as hex numbers. --- Cargo.toml | 2 +- src/atom.rs | 1 - string-cache-codegen/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 19 +++++++++++++------ 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8b0a204..95ad89a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.7.1" # Also update README.md when making a semver-breaking change +version = "0.7.2" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/src/atom.rs b/src/atom.rs index c8b2f3c..7a48031 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -12,7 +12,6 @@ use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -#[allow(unused_imports)] use std::ascii::AsciiExt; use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index afb724d..c11952b 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.4.0" # Also update ../README.md when making a semver-breaking change +version = "0.4.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index c61da73..7312bcf 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -192,7 +192,14 @@ impl AtomType { let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); let atoms_ref = &atoms; let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; - let data = (0..atoms.len()).map(|i| proc_macro2::Literal::u64_suffixed(shared::pack_static(i as u32))); + let data = (0..atoms.len()).map(|i| { + format!("0x{:X}u64", shared::pack_static(i as u32)) + .parse::() + .unwrap() + .into_iter() + .next() + .unwrap() + }); let hashes: Vec = atoms.iter().map(|string| { @@ -217,11 +224,11 @@ impl AtomType { Some(ref doc) => quote!(#[doc = #doc]), None => quote!() }; - let produce_term = |string: &str| proc_macro2::Term::new(string, proc_macro2::Span::call_site()); - let static_set_name = produce_term(&format!("{}StaticSet", type_name)); - let type_name = produce_term(type_name); - let macro_name = produce_term(&*self.macro_name); - let path = iter::repeat(produce_term(&*self.path)); + let new_term = |string: &str| proc_macro2::Term::new(string, proc_macro2::Span::call_site()); + let static_set_name = new_term(&format!("{}StaticSet", type_name)); + let type_name = new_term(type_name); + let macro_name = new_term(&*self.macro_name); + let path = iter::repeat(self.path.parse::().unwrap()); quote! { #atom_doc From d5b6071b949e69793ed1659bb4a1d81a70780a0f Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Mon, 16 Apr 2018 16:43:19 +0100 Subject: [PATCH 288/379] Add a simple example of library use. Specifically for internment without any static values. --- examples/simple.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 examples/simple.rs diff --git a/examples/simple.rs b/examples/simple.rs new file mode 100644 index 0000000..89f7369 --- /dev/null +++ b/examples/simple.rs @@ -0,0 +1,26 @@ +extern crate string_cache; + +use string_cache::{DefaultAtom, Atom}; + +fn main() { + + let mut interned_stuff = Vec::new(); + let text = "here is a sentence of text that will be tokenised and interned and some repeated \ + tokens is of text and"; + for word in text.split_whitespace() { + let seen_before = interned_stuff.iter() + // We can use impl PartialEq where T is anything string-like to compare to + // interned strings to either other interned strings, or actual strings Comparing two + // interned strings is very fast (normally a single cpu operation). + .filter(|interned_word| interned_word == &word) + .count(); + if seen_before > 0 { + println!(r#"Seen the word "{}" {} times"#, word, seen_before); + } else { + println!(r#"Not seen the word "{}" before"#, word); + } + // We use the impl From<(Cow<'a, str>, or &'a str, or String) for Atom to intern a + // new string + interned_stuff.push(DefaultAtom::from(word)); + } +} From 035ad182c67527876de5a5acd5cbde993db8724d Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Mon, 16 Apr 2018 16:58:31 +0100 Subject: [PATCH 289/379] Added some module-level docs --- src/lib.rs | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 04890e7..3571da9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,96 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +//! +//! A library for interning things that are `AsRef`. +//! +//! Some strings may be interned at compile time using the `string-cache-codegen` crate, or the +//! `EmptyStaticAtomSet` may be used that has no comiple-time interned strings. An `Atom` is an +//! interned string for a given set (either `EmptyStaticAtomSet` or a generated `StaticAtomSet`). +//! +//! Generated `Atom`s will have assocated macros to intern static strings at compile-time. +//! +//! # Examples +//! +//! Here are two examples, one with compile-time `Atom`s, and one without. +//! +//! ## With compile-time atoms +//! +//! In `Cargo.toml`: +//! ```toml +//! [dependencies] +//! string_cache = "0.7" +//! +//! [dev-dependencies] +//! string_cache_codegen = "0.4" +//! ``` +//! +//! In `build.rs`: +//! ```rust +//! extern crate string_cache_codegen; +//! +//! use std::env; +//! use std::path::Path; +//! +//! fn main() { +//! string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!") +//! .atoms(&["foo", "bar"]) +//! .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs")) +//! .unwrap() +//! } +//! ``` +//! +//! In `lib.rs`: +//! ```rust +//! extern crate string_cache; +//! +//! mod foo { +//! include!(concat!(env!("OUT_DIR"), "/foo_atom.rs")); +//! } +//! +//! fn use_the_atom(t: &str) { +//! match *t { +//! foo_atom!("foo") => println!("Found foo!"), +//! foo_atom!("bar") => println!("Found bar!"), +//! // foo_atom!("baz") => println!("Found baz!"), - would be a compile time error +//! _ => { +//! println!("String not interned"); +//! // We can intern strings at runtime as well +//! foo::FooAtom::from(t) +//! } +//! } +//! } +//! ``` +//! +//! ## No compile-time atoms +//! +//! ```rust +//! extern crate string_cache; +//! +//! +//! let mut interned_stuff = Vec::new(); +//! let text = "here is a sentence of text that will be tokenised and +//! interned and some repeated tokens is of text and"; +//! for word in text.split_whitespace() { +//! let seen_before = interned_stuff.iter() +//! // We can use impl PartialEq where T is anything string-like +//! // to compare to interned strings to either other interned strings, +//! // or actual strings Comparing two interned strings is very fast +//! // (normally a single cpu operation). +//! .filter(|interned_word| interned_word == &word) +//! .count(); +//! if seen_before > 0 { +//! println!(r#"Seen the word "{}" {} times"#, word, seen_before); +//! } else { +//! println!(r#"Not seen the word "{}" before"#, word); +//! } +//! // We use the impl From<(Cow<'a, str>, or &'a str, or String)> for +//! // Atom to intern a new string. +//! interned_stuff.push(DefaultAtom::from(word)); +//! } +//! ``` +//! + #![crate_name = "string_cache"] #![crate_type = "rlib"] From 19cbfa9a718331d3313e9de281e4f7c63310e5a6 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Tue, 17 Apr 2018 09:55:38 +0100 Subject: [PATCH 290/379] Fix tests --- Cargo.toml | 1 + examples/simple.rs | 2 +- src/lib.rs | 8 +++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 95ad89a..0eafa8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] rand = "0.4" +string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } [build-dependencies] string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } diff --git a/examples/simple.rs b/examples/simple.rs index 89f7369..b375049 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -1,6 +1,6 @@ extern crate string_cache; -use string_cache::{DefaultAtom, Atom}; +use string_cache::DefaultAtom; fn main() { diff --git a/src/lib.rs b/src/lib.rs index 3571da9..2912cab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,7 +32,7 @@ //! ``` //! //! In `build.rs`: -//! ```rust +//! ``` //! extern crate string_cache_codegen; //! //! use std::env; @@ -47,7 +47,7 @@ //! ``` //! //! In `lib.rs`: -//! ```rust +//! ```ignore //! extern crate string_cache; //! //! mod foo { @@ -70,9 +70,11 @@ //! //! ## No compile-time atoms //! -//! ```rust +//! ``` //! extern crate string_cache; //! +//! use string_cache::DefaultAtom; +//! //! //! let mut interned_stuff = Vec::new(); //! let text = "here is a sentence of text that will be tokenised and From 9b41702503fc59d9abaec9f852b7c0b8c0f35d3f Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Tue, 17 Apr 2018 10:18:58 +0100 Subject: [PATCH 291/379] Try fix examples --- src/lib.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2912cab..86deee3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,6 +32,7 @@ //! ``` //! //! In `build.rs`: +//! //! ``` //! extern crate string_cache_codegen; //! @@ -47,6 +48,7 @@ //! ``` //! //! In `lib.rs`: +//! //! ```ignore //! extern crate string_cache; //! @@ -71,11 +73,10 @@ //! ## No compile-time atoms //! //! ``` -//! extern crate string_cache; -//! +//! # extern crate string_cache; //! use string_cache::DefaultAtom; //! -//! +//! # fn main() { //! let mut interned_stuff = Vec::new(); //! let text = "here is a sentence of text that will be tokenised and //! interned and some repeated tokens is of text and"; @@ -96,6 +97,7 @@ //! // Atom to intern a new string. //! interned_stuff.push(DefaultAtom::from(word)); //! } +//! # } //! ``` //! From 007c8260f0727038774e133b880521300fade5a4 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Sat, 2 Jun 2018 21:37:01 -0700 Subject: [PATCH 292/379] Switch to fork of debug_unreachable Because the original debug_unreachable is abandoned and doesn't work correctly in modern Rust. --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 95ad89a..fda2c99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.7.2" # Also update README.md when making a semver-breaking change +version = "0.7.3" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -32,7 +32,7 @@ precomputed-hash = "0.1" lazy_static = "1" serde = "1" phf_shared = "0.7.4" -debug_unreachable = "0.1.1" +new_debug_unreachable = "1.0" string_cache_shared = {path = "./shared", version = "0.3"} [dev-dependencies] From f26c0f6294af7a5554309961884a9095a97451f7 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Tue, 5 Jun 2018 15:05:40 +0200 Subject: [PATCH 293/379] Update quote, proc-macro2 and bump version in codegen --- string-cache-codegen/Cargo.toml | 6 +++--- string-cache-codegen/lib.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index c11952b..a3d78ed 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.4.1" # Also update ../README.md when making a semver-breaking change +version = "0.4.2" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -16,5 +16,5 @@ path = "lib.rs" string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" phf_shared = "0.7.4" -proc-macro2 = "0.3.1" -quote = "0.5.1" +proc-macro2 = "0.4" +quote = "0.6" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 7312bcf..b675712 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -179,7 +179,7 @@ impl AtomType { .as_bytes()) } - fn to_tokens(&mut self) -> quote::Tokens { + fn to_tokens(&mut self) -> proc_macro2::TokenStream { // `impl Default for Atom` requires the empty string to be in the static set. // This also makes sure the set in non-empty, // which would cause divisions by zero in rust-phf. @@ -224,7 +224,7 @@ impl AtomType { Some(ref doc) => quote!(#[doc = #doc]), None => quote!() }; - let new_term = |string: &str| proc_macro2::Term::new(string, proc_macro2::Span::call_site()); + let new_term = |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site()); let static_set_name = new_term(&format!("{}StaticSet", type_name)); let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); From 94883855ea265775492ae4bcbf9b78accab4544d Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Wed, 25 Jul 2018 18:30:49 +0100 Subject: [PATCH 294/379] Fix typo --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 86deee3..7eb8216 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,7 +11,7 @@ //! A library for interning things that are `AsRef`. //! //! Some strings may be interned at compile time using the `string-cache-codegen` crate, or the -//! `EmptyStaticAtomSet` may be used that has no comiple-time interned strings. An `Atom` is an +//! `EmptyStaticAtomSet` may be used that has no compile-time interned strings. An `Atom` is an //! interned string for a given set (either `EmptyStaticAtomSet` or a generated `StaticAtomSet`). //! //! Generated `Atom`s will have assocated macros to intern static strings at compile-time. From 48917bae5a7a2341529aa553a1c38f1f29deed80 Mon Sep 17 00:00:00 2001 From: Jan Andre Ikenmeyer Date: Sat, 10 Nov 2018 17:10:56 +0100 Subject: [PATCH 295/379] Remove unneeded webhook notification --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 438e39d..f0c2e36 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,5 +15,3 @@ script: - "cd string-cache-codegen/ && cargo build && cd .." - "cd examples/event-log/ && cargo build && cd ../.." - "cd examples/summarize-events/ && cargo build && cd ../.." -notifications: - webhooks: http://build.servo.org:54856/travis From 8114d01592a7f388416491eb1aad1dc4e47eb242 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Mon, 11 Mar 2019 16:38:42 -0400 Subject: [PATCH 296/379] Add licenses to shared and codegen crates. --- shared/Cargo.toml | 2 +- shared/LICENSE-APACHE | 201 ++++++++++++++++++++++++++++ shared/LICENSE-MIT | 25 ++++ string-cache-codegen/Cargo.toml | 2 +- string-cache-codegen/LICENSE-APACHE | 201 ++++++++++++++++++++++++++++ string-cache-codegen/LICENSE-MIT | 25 ++++ 6 files changed, 454 insertions(+), 2 deletions(-) create mode 100644 shared/LICENSE-APACHE create mode 100644 shared/LICENSE-MIT create mode 100644 string-cache-codegen/LICENSE-APACHE create mode 100644 string-cache-codegen/LICENSE-MIT diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 6f8286d..ce8d53f 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.3.0" +version = "0.3.1" authors = [ "The Servo Project Developers" ] description = "Code share between string_cache and string_cache_codegen." license = "MIT / Apache-2.0" diff --git a/shared/LICENSE-APACHE b/shared/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/shared/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/shared/LICENSE-MIT b/shared/LICENSE-MIT new file mode 100644 index 0000000..807526f --- /dev/null +++ b/shared/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2012-2013 Mozilla Foundation + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index a3d78ed..d6e1400 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.4.2" # Also update ../README.md when making a semver-breaking change +version = "0.4.3" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/string-cache-codegen/LICENSE-APACHE b/string-cache-codegen/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/string-cache-codegen/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/string-cache-codegen/LICENSE-MIT b/string-cache-codegen/LICENSE-MIT new file mode 100644 index 0000000..807526f --- /dev/null +++ b/string-cache-codegen/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2012-2013 Mozilla Foundation + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. From 3238aee8967bebd635234a4859a315d5cce1210e Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Wed, 31 Jul 2019 13:54:20 +0200 Subject: [PATCH 297/379] Bump phf_generator generator version to 0.7.22 This is useful for cargo minimal version builds. `phf_generator` in versions before 0.7.22 depended on rand 0.3 which itself depended on libc 0.1.1 that no longer compiles with the newest Rust version. --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index d6e1400..a7241de 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -14,7 +14,7 @@ path = "lib.rs" [dependencies] string_cache_shared = {path = "../shared", version = "0.3"} -phf_generator = "0.7.15" +phf_generator = "0.7.22" phf_shared = "0.7.4" proc-macro2 = "0.4" quote = "0.6" From a98e06372aca511760fae08a85f91e5b5d1bc9fd Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Wed, 31 Jul 2019 13:59:42 +0200 Subject: [PATCH 298/379] Replace ... operator with ..= --- src/atom.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 7a48031..c05ef1d 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -478,7 +478,7 @@ impl Atom { /// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase pub fn to_ascii_uppercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { - if let b'a' ... b'z' = b { + if let b'a' ..= b'z' = b { return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase()) } } @@ -490,7 +490,7 @@ impl Atom { /// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase pub fn to_ascii_lowercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { - if let b'A' ... b'Z' = b { + if let b'A' ..= b'Z' = b { return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase()) } } From db1eda55bce00f1b94c0aa2174c986ed619965f5 Mon Sep 17 00:00:00 2001 From: Alex Touchet Date: Wed, 31 Jul 2019 08:12:53 -0700 Subject: [PATCH 299/379] Update Travis CI URL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 657f646..ddd3bc0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # string-cache -[![Build Status](https://travis-ci.org/servo/string-cache.svg?branch=master)](https://travis-ci.org/servo/string-cache) +[![Build Status](https://travis-ci.com/servo/string-cache.svg?branch=master)](https://travis-ci.com/servo/string-cache) [Documentation](https://docs.rs/string_cache/) From 45c3dae8f7fb203fb385ec66117c881bc14de3f3 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Sat, 17 Aug 2019 16:28:03 +0200 Subject: [PATCH 300/379] Update syn related dependencies to 1.0 and bump version --- string-cache-codegen/Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index d6e1400..0a828f3 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.4.3" # Also update ../README.md when making a semver-breaking change +version = "0.4.4" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -16,5 +16,5 @@ path = "lib.rs" string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.7.15" phf_shared = "0.7.4" -proc-macro2 = "0.4" -quote = "0.6" +proc-macro2 = "1" +quote = "1" From 904fb633625149e1930dab5a6ff6a65986ba258a Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Sun, 18 Aug 2019 14:58:46 +0200 Subject: [PATCH 301/379] Fix nightly build by fixing deprecation warnings --- src/atom.rs | 6 ++++-- string-cache-codegen/lib.rs | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index c05ef1d..11acf2a 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -450,7 +450,7 @@ impl Serialize for Atom { impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { fn deserialize(deserializer: D) -> Result where D: Deserializer<'a> { - let string: String = try!(Deserialize::deserialize(deserializer)); + let string: String = Deserialize::deserialize(deserializer)?; Ok(Atom::from(string)) } } @@ -460,7 +460,9 @@ impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { // over the one from &str. impl Atom { fn from_mutated_str(s: &str, f: F) -> Self { - let mut buffer: [u8; 64] = unsafe { mem::uninitialized() }; + let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit(); + let buffer = unsafe { &mut *buffer.as_mut_ptr() }; + if let Some(buffer_prefix) = buffer.get_mut(..s.len()) { buffer_prefix.copy_from_slice(s.as_bytes()); let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) }; diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index b675712..8a06d33 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -269,6 +269,6 @@ impl AtomType { /// Typical usage: /// `.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))` pub fn write_to_file(&mut self, path: &Path) -> io::Result<()> { - self.write_to(BufWriter::new(try!(File::create(path)))) + self.write_to(BufWriter::new(File::create(path)?)) } } From 80e59c1741117a69c5611f3a4c2bfe6cf0ef1ca1 Mon Sep 17 00:00:00 2001 From: Federico Mena Quintero Date: Mon, 26 Aug 2019 16:50:06 +0300 Subject: [PATCH 302/379] Document the semantics of Atom's generic parameter Provide a short example that shows why interning temporary atoms will not blow up memory consumption. Fixes #212 --- src/atom.rs | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index c05ef1d..21f9e64 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -193,8 +193,46 @@ pub type DefaultAtom = Atom; /// Represents a string that has been interned. /// -/// In reality this contains a complex packed datastructure and the methods to extract information -/// from it, along with type information to tell the compiler which static set it corresponds to. +/// While the type definition for `Atom` indicates that it generic on a particular +/// implementation of an atom set, you don't need to worry about this. Atoms can be static +/// and come from a `StaticAtomSet` generated by the `string_cache_codegen` crate, or they +/// can be dynamic and created by you on an `EmptyStaticAtomSet`. +/// +/// `Atom` implements `Clone` but not `Copy`, since internally atoms are reference-counted; +/// this means that you may need to `.clone()` an atom to keep copies to it in different +/// places, or when passing it to a function that takes an `Atom` rather than an `&Atom`. +/// +/// ## Creating an atom at runtime +/// +/// If you use `string_cache_codegen` to generate a precomputed list of atoms, your code +/// may then do something like read data from somewhere and extract tokens that need to be +/// compared to the atoms. In this case, you can use `Atom::from(&str)` or +/// `Atom::from(String)`. These create a reference-counted atom which will be +/// automatically freed when all references to it are dropped. +/// +/// This means that your application can safely have a loop which tokenizes data, creates +/// atoms from the tokens, and compares the atoms to a predefined set of keywords, without +/// running the risk of arbitrary memory consumption from creating large numbers of atoms — +/// as long as your application does not store clones of the atoms it creates along the +/// way. +/// +/// For example, the following is safe and will not consume arbitrary amounts of memory: +/// +/// ```ignore +/// let untrusted_data = "large amounts of text ..."; +/// +/// for token in untrusted_data.split_whitespace() { +/// let atom = Atom::from(token); // interns the string +/// +/// if atom == Atom::from("keyword") { +/// // handle that keyword +/// } else if atom == Atom::from("another_keyword") { +/// // handle that keyword +/// } else { +/// println!("unknown keyword"); +/// } +/// } // atom is dropped here, so it is not kept around in memory +/// ``` pub struct Atom { /// This field is public so that the `atom!()` macros can use it. /// You should not otherwise access this field. From af7a9c1d19254c367d4ce123f78cb883738abe95 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 30 Sep 2019 15:26:17 +0200 Subject: [PATCH 303/379] Fix initializing the global hash map with a small stack Fix https://github.com/servo/html5ever/issues/393 --- Cargo.toml | 6 +++++- src/atom.rs | 9 +++++++-- tests/small-stack.rs | 12 ++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 tests/small-stack.rs diff --git a/Cargo.toml b/Cargo.toml index dbdad57..d98a065 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.7.3" # Also update README.md when making a semver-breaking change +version = "0.7.4" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -41,3 +41,7 @@ string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } [build-dependencies] string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } + +[[test]] +name = "small-stack" +harness = false diff --git a/src/atom.rs b/src/atom.rs index f575c02..ac1f397 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -39,7 +39,7 @@ const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; struct StringCache { - buckets: [Option>; NB_BUCKETS], + buckets: Box<[Option>; NB_BUCKETS]>, } lazy_static! { @@ -67,8 +67,13 @@ impl StringCacheEntry { impl StringCache { fn new() -> StringCache { + type T = Option>; + let _static_assert_size_eq = std::mem::transmute::; + let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); StringCache { - buckets: unsafe { mem::zeroed() }, + buckets: unsafe { + Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) + }, } } diff --git a/tests/small-stack.rs b/tests/small-stack.rs new file mode 100644 index 0000000..300b142 --- /dev/null +++ b/tests/small-stack.rs @@ -0,0 +1,12 @@ +// Regression test for https://github.com/servo/html5ever/issues/393 +// +// Create a dynamic atom − causing initialization of the golbal hash map − +// in a thread that has a small stack. +// +// This is a separate test program rather than a `#[test] fn` among others +// to make sure that nothing else has already initialized the map in this process. +fn main() { + std::thread::Builder::new().stack_size(50_000).spawn(|| { + string_cache::DefaultAtom::from("12345678"); + }).unwrap().join().unwrap() +} From b94483f90f54a6a08ae082958077d640d26ffe19 Mon Sep 17 00:00:00 2001 From: Brendan Zabarauskas Date: Tue, 8 Oct 2019 15:12:20 +1100 Subject: [PATCH 304/379] Pin rustc version in .travis.yml --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index f0c2e36..bcb5f68 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ sudo: false language: rust rust: + - 1.36.0 - nightly - beta - stable From d56ba88cd94acd485d69965f03bd025f33745b35 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 18 Oct 2019 11:23:29 +0200 Subject: [PATCH 305/379] Remove log-events --- .travis.yml | 4 - Cargo.toml | 6 - examples/event-log/Cargo.toml | 9 -- examples/event-log/README.md | 4 - examples/event-log/src/main.rs | 33 ----- examples/summarize-events/Cargo.toml | 13 -- examples/summarize-events/src/main.rs | 170 -------------------------- src/atom.rs | 15 --- src/lib.rs | 4 - 9 files changed, 258 deletions(-) delete mode 100644 examples/event-log/Cargo.toml delete mode 100644 examples/event-log/README.md delete mode 100644 examples/event-log/src/main.rs delete mode 100644 examples/summarize-events/Cargo.toml delete mode 100644 examples/summarize-events/src/main.rs diff --git a/.travis.yml b/.travis.yml index bcb5f68..fa09a1e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,10 +9,6 @@ os: - linux script: - cargo build - - cargo test - - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - cargo test - "cd string-cache-codegen/ && cargo build && cd .." - - "cd examples/event-log/ && cargo build && cd ../.." - - "cd examples/summarize-events/ && cargo build && cd ../.." diff --git a/Cargo.toml b/Cargo.toml index d98a065..61a853f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,4 @@ [package] - name = "string_cache" version = "0.7.4" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] @@ -19,11 +18,6 @@ build = "build.rs" name = "string_cache" [features] - -# Enable event logging for generating benchmark traces. -# See examples/event-log. -log-events = [] - # Use unstable features to optimize space and time (memory and CPU usage). unstable = [] diff --git a/examples/event-log/Cargo.toml b/examples/event-log/Cargo.toml deleted file mode 100644 index 7edd5cf..0000000 --- a/examples/event-log/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] - -name = "string-cache-event-log-example" -version = "0.0.0" -authors = [ "The Servo Project Developers" ] - -[dependencies.string_cache] -path = "../.." -features = ["log-events"] diff --git a/examples/event-log/README.md b/examples/event-log/README.md deleted file mode 100644 index b2deb39..0000000 --- a/examples/event-log/README.md +++ /dev/null @@ -1,4 +0,0 @@ -string-cache can record logs of what it's doing, which can be useful for -guiding future changes to the library. This project demonstrates how to build -string-cache with logging enabled (see `Cargo.toml`), and how to access the log -at runtime. diff --git a/examples/event-log/src/main.rs b/examples/event-log/src/main.rs deleted file mode 100644 index 7a25e29..0000000 --- a/examples/event-log/src/main.rs +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -extern crate string_cache; - -use string_cache::DefaultAtom as Atom; -use string_cache::event; - -use std::io; -use std::io::prelude::*; - -fn main() { - println!("Reading stdin to end of file"); - let mut stdin = String::new(); - io::stdin().read_to_string(&mut stdin).unwrap(); - let mut atoms = vec![]; - for word in stdin.split(|c: char| c.is_whitespace()) { - atoms.push(Atom::from(word)); - } - - let log = event::LOG.lock().unwrap(); - - println!("Created {} atoms, logged {} events:", atoms.len(), log.len()); - for e in log.iter() { - println!("{:?}", e); - } -} diff --git a/examples/summarize-events/Cargo.toml b/examples/summarize-events/Cargo.toml deleted file mode 100644 index 7d2e7ba..0000000 --- a/examples/summarize-events/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] - -name = "string-cache-summarize-events" -version = "0.0.0" -authors = [ "The Servo Project Developers" ] - -[dependencies] -csv = "0" -rustc-serialize = "0" -phf_shared = "0.7.4" - -[dependencies.string_cache] -path = "../.." diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs deleted file mode 100644 index a74e659..0000000 --- a/examples/summarize-events/src/main.rs +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -extern crate csv; -extern crate string_cache; -extern crate rustc_serialize; -extern crate phf_shared; - -#[path = "../../../shared/lib.rs"] -#[allow(dead_code)] -mod shared; - -use string_cache::DefaultAtom as Atom; - -use std::{env, cmp}; -use std::collections::hash_map::{HashMap, Entry}; -use std::marker::PhantomData; -use std::path::Path; - -#[derive(RustcDecodable, Debug)] -struct Event { - event: String, - id: u64, - string: Option, -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -enum Kind { - Dynamic, - Inline, - Static, -} - -impl Kind { - fn from_tag(tag: u8) -> Kind { - match tag { - shared::DYNAMIC_TAG => Kind::Dynamic, - shared::INLINE_TAG => Kind::Inline, - shared::STATIC_TAG => Kind::Static, - _ => panic!() - } - } - - fn to_tag(self) -> u8 { - match self { - Kind::Dynamic => shared::DYNAMIC_TAG, - Kind::Inline => shared::INLINE_TAG, - Kind::Static => shared::STATIC_TAG, - } - } -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -struct Summary { - kind: Kind, - times: usize, -} - -fn main() { - let filename = env::args().skip(1).next() - .expect("Usage: string-cache-summarize-events foo.csv"); - let path = &Path::new(&filename); - let mut file = csv::Reader::from_file(path).unwrap(); - - // Over the lifetime of a program, one dynamic atom might get interned at - // several addresses, and one address may be used to intern several - // different strings. For this reason we must separately track the - // currently-allocated atoms and the summary of all atoms ever created. - let mut dynamic: HashMap = HashMap::new(); - let mut peak_dynamic = 0; - let mut summary: HashMap = HashMap::new(); - let mut inserts = 0; - - for record in file.decode() { - let ev: Event = record.unwrap(); - match &ev.event[..] { - "intern" => { - let tag = (ev.id & 0xf) as u8; - assert!(tag <= shared::STATIC_TAG); - - let string = match tag { - shared::DYNAMIC_TAG => dynamic[&ev.id].clone(), - - // FIXME: We really shouldn't be allowed to do this. It's a memory-safety - // hazard; the field is only public for the atom!() macro. - _ => Atom { unsafe_data: ev.id, phantom: PhantomData }.to_string(), - }; - - match summary.entry(string) { - Entry::Occupied(entry) => entry.into_mut().times += 1, - Entry::Vacant(entry) => { - entry.insert(Summary { - kind: Kind::from_tag(tag), - times: 1, - }); - } - } - }, - - "insert" => { - assert!(!dynamic.contains_key(&ev.id)); - dynamic.insert(ev.id, ev.string.expect("no string to insert")); - peak_dynamic = cmp::max(peak_dynamic, dynamic.len()); - inserts += 1; - } - - "remove" => { - assert!(dynamic.contains_key(&ev.id)); - dynamic.remove(&ev.id); - } - - e => panic!("unknown event {}", e), - } - } - - // Get all records, in a stable order. - let mut summary: Vec<_> = summary.into_iter().collect(); - summary.sort_by(|&(ref a, _), &(ref b, _)| a.cmp(b)); - - // Sort by number of occurrences, descending. - summary.sort_by(|&(_, a), &(_, b)| b.times.cmp(&a.times)); - let longest_atom = summary.iter().map(|&(ref k, _)| k.len()) - .max().unwrap_or(0); - - let pad = |c, n| { - for _ in n..longest_atom { - print!("{}", c); - } - }; - - let mut total = 0; - let mut by_kind = [0, 0, 0]; - for &(_, Summary { kind, times }) in &summary { - total += times; - by_kind[kind.to_tag() as usize] += times; - } - - println!("\n"); - println!("kind times pct"); - println!("------- ------- ----"); - for (k, &n) in by_kind.iter().enumerate() { - let k: Kind = Kind::from_tag(k as u8); - print!("{:7?} {:7} {:4.1}", - k, n, 100.0 * (n as f64) / (total as f64)); - - match k { - Kind::Dynamic => println!(" {} inserts, peak size {}, miss rate {:4.1}%", - inserts, peak_dynamic, 100.0 * (inserts as f64) / (n as f64)), - _ => println!(""), - } - } - println!(""); - println!("total {:7}", total); - println!("\n"); - - pad(' ', 4); - println!("atom times kind"); - pad('-', 4); - println!("---- ------ -------"); - for (string, Summary { kind, times }) in summary { - pad(' ', string.chars().count()); - println!("{} {:6} {:?}", string, times, kind); - } -} diff --git a/src/atom.rs b/src/atom.rs index ac1f397..3aef7e2 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -29,12 +29,6 @@ use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STAT ENTRY_ALIGNMENT, pack_static}; use self::UnpackedAtom::{Dynamic, Inline, Static}; -#[cfg(feature = "log-events")] -use event::Event; - -#[cfg(not(feature = "log-events"))] -macro_rules! log (($e:expr) => (())); - const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; @@ -101,16 +95,10 @@ impl StringCache { } debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); let string = string.into_owned(); - let _string_clone = if cfg!(feature = "log-events") { - string.clone() - } else { - "".to_owned() - }; let mut entry = Box::new(StringCacheEntry::new( self.buckets[bucket_index].take(), hash, string)); let ptr: *mut StringCacheEntry = &mut *entry; self.buckets[bucket_index] = Some(entry); - log!(Event::Insert(ptr as u64, _string_clone)); ptr } @@ -137,8 +125,6 @@ impl StringCache { } current = unsafe { &mut (*entry_ptr).next_in_bucket }; } - - log!(Event::Remove(key)); } } @@ -355,7 +341,6 @@ impl<'a, Static: StaticAtomSet> From> for Atom { }; let data = unsafe { unpacked.pack() }; - log!(Event::Intern(data)); Atom { unsafe_data: data, phantom: PhantomData } } } diff --git a/src/lib.rs b/src/lib.rs index 7eb8216..fc00f3a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,10 +118,6 @@ extern crate string_cache_shared as shared; pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; -#[cfg(feature = "log-events")] -#[macro_use] -pub mod event; - pub mod atom; // Make test_atom! macro work in this crate. From 5f6ad92839a87d3c445e228f458f26625db8db6f Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 18 Oct 2019 16:13:52 +0200 Subject: [PATCH 306/379] Update phf to 0.8 --- Cargo.toml | 2 +- src/atom.rs | 5 +++-- string-cache-codegen/Cargo.toml | 4 ++-- string-cache-codegen/lib.rs | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 61a853f..46e9117 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ unstable = [] precomputed-hash = "0.1" lazy_static = "1" serde = "1" -phf_shared = "0.7.4" +phf_shared = "0.8" new_debug_unreachable = "1.0" string_cache_shared = {path = "./shared", version = "0.3"} diff --git a/src/atom.rs b/src/atom.rs index 3aef7e2..7dce151 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -324,8 +324,8 @@ impl<'a, Static: StaticAtomSet> From> for Atom { #[inline] fn from(string_to_add: Cow<'a, str>) -> Self { let static_set = Static::get(); - let hash = phf_shared::hash(&*string_to_add, static_set.key); - let index = phf_shared::get_index(hash, static_set.disps, static_set.atoms.len()); + let hash = phf_shared::hash(&*string_to_add, &static_set.key); + let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); let unpacked = if static_set.atoms[index as usize] == string_to_add { Static(index) @@ -336,6 +336,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { buf[..len].copy_from_slice(string_to_add.as_bytes()); Inline(len as u8, buf) } else { + let hash = (hash.g as u64) << 32 | (hash.f1 as u64); Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) } }; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index e088413..4bcd3ee 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -14,7 +14,7 @@ path = "lib.rs" [dependencies] string_cache_shared = {path = "../shared", version = "0.3"} -phf_generator = "0.7.22" -phf_shared = "0.7.4" +phf_generator = "0.8" +phf_shared = "0.8" proc-macro2 = "1" quote = "1" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 8a06d33..e878b0f 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -203,8 +203,8 @@ impl AtomType { let hashes: Vec = atoms.iter().map(|string| { - let hash = phf_shared::hash(string, key); - ((hash >> 32) ^ hash) as u32 + let hash = phf_shared::hash(string, &key); + (hash.g ^ hash.f1) as u32 }).collect(); let type_name = if let Some(position) = self.path.rfind("::") { From a337b617011dc201975bc8ad3e3c4eaa7e123022 Mon Sep 17 00:00:00 2001 From: David Kellum Date: Fri, 27 Sep 2019 12:18:14 -0700 Subject: [PATCH 307/379] Move tests, bench and codegen deps to sub-package Closes #225, #226. This removes the codegen build dependency of the string_cache crate, thereby minimizing dep for users that don't need codegen. To allow the now external integration tests and benchmarks to test the same things, public Atom::is_(static|dynamic|inline) -> bool methods were also added. --- .travis.yml | 6 +- Cargo.toml | 18 +- integration-tests/Cargo.toml | 25 ++ build.rs => integration-tests/build.rs | 2 +- {src => integration-tests/src}/bench.rs | 24 +- integration-tests/src/lib.rs | 276 ++++++++++++++++++++++ src/atom.rs | 296 +++--------------------- src/lib.rs | 5 +- 8 files changed, 353 insertions(+), 299 deletions(-) create mode 100644 integration-tests/Cargo.toml rename build.rs => integration-tests/build.rs (79%) rename {src => integration-tests/src}/bench.rs (94%) create mode 100644 integration-tests/src/lib.rs diff --git a/.travis.yml b/.travis.yml index fa09a1e..ad47308 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,6 @@ os: - linux script: - cargo build - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - - cargo test - - "cd string-cache-codegen/ && cargo build && cd .." + - cargo test --all + - "cd string-cache-codegen && cargo build && cd .." + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd integration-tests && cargo test --features unstable && cd ..; fi" diff --git a/Cargo.toml b/Cargo.toml index 46e9117..8c40c1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,6 @@ description = "A string interning library for Rust, developed as part of the Ser license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache/" -build = "build.rs" # Do not `exclude` ./string-cache-codegen because we want to include # ./string-cache-codegen/shared.rs, and `include` is a pain to use @@ -17,10 +16,6 @@ build = "build.rs" [lib] name = "string_cache" -[features] -# Use unstable features to optimize space and time (memory and CPU usage). -unstable = [] - [dependencies] precomputed-hash = "0.1" lazy_static = "1" @@ -29,13 +24,12 @@ phf_shared = "0.8" new_debug_unreachable = "1.0" string_cache_shared = {path = "./shared", version = "0.3"} -[dev-dependencies] -rand = "0.4" -string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } - -[build-dependencies] -string_cache_codegen = { version = "0.4", path = "./string-cache-codegen" } - [[test]] name = "small-stack" harness = false + +[workspace] +members = [ + "string-cache-codegen", + "integration-tests", +] diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml new file mode 100644 index 0000000..1a92ecc --- /dev/null +++ b/integration-tests/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "integration_tests" +version = "0.0.1" +authors = [ "The Servo Project Developers" ] +build = "build.rs" +publish = false + +[lib] +doctest = false +test = true + +[features] + +# Use unstable features to optimize space and time (memory and CPU usage). +unstable = [] + +[dependencies] +string_cache = { version = "0.7", path = ".." } + +[dev-dependencies] +rand = "0.4" +string_cache_codegen = { version = "0.4", path = "../string-cache-codegen" } + +[build-dependencies] +string_cache_codegen = { version = "0.4", path = "../string-cache-codegen" } diff --git a/build.rs b/integration-tests/build.rs similarity index 79% rename from build.rs rename to integration-tests/build.rs index 4b0bb9e..2cb93b1 100644 --- a/build.rs +++ b/integration-tests/build.rs @@ -4,7 +4,7 @@ use std::env; use std::path::Path; fn main() { - string_cache_codegen::AtomType::new("atom::tests::TestAtom", "test_atom!") + string_cache_codegen::AtomType::new("TestAtom", "test_atom!") .atoms(&[ "a", "b", "address", "area", "body", "font-weight", "br", "html", "head", "id", ]) diff --git a/src/bench.rs b/integration-tests/src/bench.rs similarity index 94% rename from src/bench.rs rename to integration-tests/src/bench.rs index f6f5248..7866ae9 100644 --- a/src/bench.rs +++ b/integration-tests/src/bench.rs @@ -26,8 +26,8 @@ Furthermore, a large part of the point of interning is to make strings small and cheap to move around, which isn't reflected in these tests. */ +use crate::TestAtom; -use atom::tests::TestAtom; use test::{Bencher, black_box}; // Just shorthand @@ -35,14 +35,11 @@ fn mk(x: &str) -> TestAtom { TestAtom::from(x) } -macro_rules! check_type (($name:ident, $x:expr, $p:pat) => ( +macro_rules! check_type (($name:ident, $x:expr) => ( // NB: "cargo bench" does not run these! #[test] fn $name() { - match unsafe { $x.unpack() } { - $p => (), - _ => panic!("atom has wrong type"), - } + assert!($x, "atom has wrong type"); } )); @@ -62,12 +59,12 @@ macro_rules! bench_tiny_op (($name:ident, $op:ident, $ctor_x:expr, $ctor_y:expr) )); macro_rules! bench_one ( - (x_static $x:expr, $y:expr) => (check_type!(check_type_x, $x, Static(..));); - (x_inline $x:expr, $y:expr) => (check_type!(check_type_x, $x, Inline(..));); - (x_dynamic $x:expr, $y:expr) => (check_type!(check_type_x, $x, Dynamic(..));); - (y_static $x:expr, $y:expr) => (check_type!(check_type_y, $y, Static(..));); - (y_inline $x:expr, $y:expr) => (check_type!(check_type_y, $y, Inline(..));); - (y_dynamic $x:expr, $y:expr) => (check_type!(check_type_y, $y, Dynamic(..));); + (x_static $x:expr, $y:expr) => (check_type!(check_type_x, $x.is_static());); + (x_inline $x:expr, $y:expr) => (check_type!(check_type_x, $x.is_inline());); + (x_dynamic $x:expr, $y:expr) => (check_type!(check_type_x, $x.is_dynamic());); + (y_static $x:expr, $y:expr) => (check_type!(check_type_y, $y.is_static());); + (y_inline $x:expr, $y:expr) => (check_type!(check_type_y, $y.is_inline());); + (y_dynamic $x:expr, $y:expr) => (check_type!(check_type_y, $y.is_dynamic());); (is_static $x:expr, $y:expr) => (bench_one!(x_static $x, $y); bench_one!(y_static $x, $y);); (is_inline $x:expr, $y:expr) => (bench_one!(x_inline $x, $y); bench_one!(y_inline $x, $y);); (is_dynamic $x:expr, $y:expr) => (bench_one!(x_dynamic $x, $y); bench_one!(y_dynamic $x, $y);); @@ -134,8 +131,7 @@ macro_rules! bench_all ( use std::string::ToString; use std::iter::repeat; - use atom::tests::TestAtom; - use atom::UnpackedAtom::{Static, Inline, Dynamic}; + use crate::TestAtom; use super::mk; diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs new file mode 100644 index 0000000..d993e57 --- /dev/null +++ b/integration-tests/src/lib.rs @@ -0,0 +1,276 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![cfg(test)] + +#![deny(warnings)] +#![allow(non_upper_case_globals)] + +#![cfg_attr(feature = "unstable", feature(test))] + +extern crate string_cache; + +#[cfg(feature = "unstable")] extern crate test; +#[cfg(feature = "unstable")] extern crate rand; + +use std::thread; +use string_cache::atom::StaticAtomSet; + +include!(concat!(env!("OUT_DIR"), "/test_atom.rs")); +pub type Atom = TestAtom; + +#[test] +fn test_as_slice() { + let s0 = Atom::from(""); + assert!(s0.as_ref() == ""); + + let s1 = Atom::from("class"); + assert!(s1.as_ref() == "class"); + + let i0 = Atom::from("blah"); + assert!(i0.as_ref() == "blah"); + + let s0 = Atom::from("BLAH"); + assert!(s0.as_ref() == "BLAH"); + + let d0 = Atom::from("zzzzzzzzzz"); + assert!(d0.as_ref() == "zzzzzzzzzz"); + + let d1 = Atom::from("ZZZZZZZZZZ"); + assert!(d1.as_ref() == "ZZZZZZZZZZ"); +} + +#[test] +fn test_types() { + assert!(Atom::from("").is_static()); + assert!(Atom::from("id").is_static()); + assert!(Atom::from("body").is_static()); + assert!(Atom::from("a").is_static()); + assert!(Atom::from("c").is_inline()); + assert!(Atom::from("zz").is_inline()); + assert!(Atom::from("zzz").is_inline()); + assert!(Atom::from("zzzz").is_inline()); + assert!(Atom::from("zzzzz").is_inline()); + assert!(Atom::from("zzzzzz").is_inline()); + assert!(Atom::from("zzzzzzz").is_inline()); + assert!(Atom::from("zzzzzzzz").is_dynamic()); + assert!(Atom::from("zzzzzzzzzzzzz").is_dynamic()); +} + +#[test] +fn test_equality() { + let s0 = Atom::from("fn"); + let s1 = Atom::from("fn"); + let s2 = Atom::from("loop"); + + let i0 = Atom::from("blah"); + let i1 = Atom::from("blah"); + let i2 = Atom::from("blah2"); + + let d0 = Atom::from("zzzzzzzz"); + let d1 = Atom::from("zzzzzzzz"); + let d2 = Atom::from("zzzzzzzzz"); + + assert!(s0 == s1); + assert!(s0 != s2); + + assert!(i0 == i1); + assert!(i0 != i2); + + assert!(d0 == d1); + assert!(d0 != d2); + + assert!(s0 != i0); + assert!(s0 != d0); + assert!(i0 != d0); +} + +#[test] +fn default() { + assert_eq!(TestAtom::default(), test_atom!("")); + assert_eq!(&*TestAtom::default(), ""); +} + +#[test] +fn ord() { + fn check(x: &str, y: &str) { + assert_eq!(x < y, Atom::from(x) < Atom::from(y)); + assert_eq!(x.cmp(y), Atom::from(x).cmp(&Atom::from(y))); + assert_eq!(x.partial_cmp(y), Atom::from(x).partial_cmp(&Atom::from(y))); + } + + check("a", "body"); + check("asdf", "body"); + check("zasdf", "body"); + check("z", "body"); + + check("a", "bbbbb"); + check("asdf", "bbbbb"); + check("zasdf", "bbbbb"); + check("z", "bbbbb"); +} + +#[test] +fn clone() { + let s0 = Atom::from("fn"); + let s1 = s0.clone(); + let s2 = Atom::from("loop"); + + let i0 = Atom::from("blah"); + let i1 = i0.clone(); + let i2 = Atom::from("blah2"); + + let d0 = Atom::from("zzzzzzzz"); + let d1 = d0.clone(); + let d2 = Atom::from("zzzzzzzzz"); + + assert!(s0 == s1); + assert!(s0 != s2); + + assert!(i0 == i1); + assert!(i0 != i2); + + assert!(d0 == d1); + assert!(d0 != d2); + + assert!(s0 != i0); + assert!(s0 != d0); + assert!(i0 != d0); +} + +macro_rules! assert_eq_fmt (($fmt:expr, $x:expr, $y:expr) => ({ + let x = $x; + let y = $y; + if x != y { + panic!("assertion failed: {} != {}", + format_args!($fmt, x), + format_args!($fmt, y)); + } +})); + +#[test] +fn repr() { + fn check(s: &str, data: u64) { + assert_eq_fmt!("0x{:016X}", Atom::from(s).unsafe_data, data); + } + + fn check_static(s: &str, x: Atom) { + assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); + assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); + // The index is unspecified by phf. + assert!((x.unsafe_data >> 32) <= TestAtomStaticSet::get().atoms.len() as u64); + } + + // This test is here to make sure we don't change atom representation + // by accident. It may need adjusting if there are changes to the + // static atom table, the tag values, etc. + + // Static atoms + check_static("a", test_atom!("a")); + check_static("address", test_atom!("address")); + check_static("area", test_atom!("area")); + + // Inline atoms + check("e", 0x0000_0000_0000_6511); + check("xyzzy", 0x0000_797A_7A79_7851); + check("xyzzy01", 0x3130_797A_7A79_7871); + + // Dynamic atoms. This is a pointer so we can't verify every bit. + assert_eq!(0x00, Atom::from("a dynamic string").unsafe_data & 0xf); +} + +#[test] +fn test_threads() { + for _ in 0_u32..100 { + thread::spawn(move || { + let _ = Atom::from("a dynamic string"); + let _ = Atom::from("another string"); + }); + } +} + +#[test] +fn atom_macro() { + assert_eq!(test_atom!("body"), Atom::from("body")); + assert_eq!(test_atom!("font-weight"), Atom::from("font-weight")); +} + +#[test] +fn match_atom() { + assert_eq!(2, match Atom::from("head") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + }); + + assert_eq!(3, match Atom::from("body") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + }); + + assert_eq!(3, match Atom::from("zzzzzz") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + }); +} + +#[test] +fn ensure_deref() { + // Ensure we can Deref to a &str + let atom = Atom::from("foobar"); + let _: &str = &atom; +} + +#[test] +fn ensure_as_ref() { + // Ensure we can as_ref to a &str + let atom = Atom::from("foobar"); + let _: &str = atom.as_ref(); +} + +#[test] +fn test_ascii_lowercase() { + assert_eq!(Atom::from("").to_ascii_lowercase(), Atom::from("")); + assert_eq!(Atom::from("aZ9").to_ascii_lowercase(), Atom::from("az9")); + assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_lowercase(), Atom::from("the quick brown fox!")); + assert_eq!(Atom::from("JE VAIS À PARIS").to_ascii_lowercase(), Atom::from("je vais À paris")); +} + +#[test] +fn test_ascii_uppercase() { + assert_eq!(Atom::from("").to_ascii_uppercase(), Atom::from("")); + assert_eq!(Atom::from("aZ9").to_ascii_uppercase(), Atom::from("AZ9")); + assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_uppercase(), Atom::from("THE QUICK BROWN FOX!")); + assert_eq!(Atom::from("Je vais à Paris").to_ascii_uppercase(), Atom::from("JE VAIS à PARIS")); +} + +#[test] +fn test_eq_ignore_ascii_case() { + assert!(Atom::from("").eq_ignore_ascii_case(&Atom::from(""))); + assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("aZ9"))); + assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("Az9"))); + assert!(Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!"))); + assert!(Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("je VAIS à PARIS"))); + assert!(!Atom::from("").eq_ignore_ascii_case(&Atom::from("az9"))); + assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from(""))); + assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("9Za"))); + assert!(!Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!!"))); + assert!(!Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("JE vais À paris"))); +} + +#[test] +fn test_from_string() { + assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); +} + +#[cfg(all(test, feature = "unstable"))] +#[path = "bench.rs"] +mod bench; diff --git a/src/atom.rs b/src/atom.rs index 7dce151..8b48218 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -111,7 +111,6 @@ impl StringCache { (value.hash & BUCKET_MASK) as usize }; - let mut current: &mut Option> = &mut self.buckets[bucket_index]; loop { @@ -257,6 +256,33 @@ impl Atom { UnpackedAtom::from_packed(self.unsafe_data) } + /// Return true if this is a static Atom. + #[doc(hidden)] + pub fn is_static(&self) -> bool { + match unsafe { self.unpack() } { + Static(..) => true, + _ => false + } + } + + /// Return true if this is a dynamic Atom. + #[doc(hidden)] + pub fn is_dynamic(&self) -> bool { + match unsafe { self.unpack() } { + Dynamic(..) => true, + _ => false + } + } + + /// Return true if this is an inline Atom. + #[doc(hidden)] + pub fn is_inline(&self) -> bool { + match unsafe { self.unpack() } { + Inline(..) => true, + _ => false + } + } + /// Get the hash of the string as it is stored in the set. pub fn get_hash(&self) -> u32 { match unsafe { self.unpack() } { @@ -655,184 +681,14 @@ unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { } } +// Some minor tests of internal layout here. See ../integration-tests for much +// more. #[cfg(test)] -#[macro_use] mod tests { use std::mem; - use std::thread; - use super::{StaticAtomSet, StringCacheEntry}; - use super::UnpackedAtom::{Dynamic, Inline, Static}; + use super::{DefaultAtom, StringCacheEntry}; use shared::ENTRY_ALIGNMENT; - include!(concat!(env!("OUT_DIR"), "/test_atom.rs")); - pub type Atom = TestAtom; - - #[test] - fn test_as_slice() { - let s0 = Atom::from(""); - assert!(s0.as_ref() == ""); - - let s1 = Atom::from("class"); - assert!(s1.as_ref() == "class"); - - let i0 = Atom::from("blah"); - assert!(i0.as_ref() == "blah"); - - let s0 = Atom::from("BLAH"); - assert!(s0.as_ref() == "BLAH"); - - let d0 = Atom::from("zzzzzzzzzz"); - assert!(d0.as_ref() == "zzzzzzzzzz"); - - let d1 = Atom::from("ZZZZZZZZZZ"); - assert!(d1.as_ref() == "ZZZZZZZZZZ"); - } - - macro_rules! unpacks_to (($e:expr, $t:pat) => ( - match unsafe { Atom::from($e).unpack() } { - $t => (), - _ => panic!("atom has wrong type"), - } - )); - - #[test] - fn test_types() { - unpacks_to!("", Static(..)); - unpacks_to!("id", Static(..)); - unpacks_to!("body", Static(..)); - unpacks_to!("c", Inline(..)); // "z" is a static atom - unpacks_to!("zz", Inline(..)); - unpacks_to!("zzz", Inline(..)); - unpacks_to!("zzzz", Inline(..)); - unpacks_to!("zzzzz", Inline(..)); - unpacks_to!("zzzzzz", Inline(..)); - unpacks_to!("zzzzzzz", Inline(..)); - unpacks_to!("zzzzzzzz", Dynamic(..)); - unpacks_to!("zzzzzzzzzzzzz", Dynamic(..)); - } - - #[test] - fn test_equality() { - let s0 = Atom::from("fn"); - let s1 = Atom::from("fn"); - let s2 = Atom::from("loop"); - - let i0 = Atom::from("blah"); - let i1 = Atom::from("blah"); - let i2 = Atom::from("blah2"); - - let d0 = Atom::from("zzzzzzzz"); - let d1 = Atom::from("zzzzzzzz"); - let d2 = Atom::from("zzzzzzzzz"); - - assert!(s0 == s1); - assert!(s0 != s2); - - assert!(i0 == i1); - assert!(i0 != i2); - - assert!(d0 == d1); - assert!(d0 != d2); - - assert!(s0 != i0); - assert!(s0 != d0); - assert!(i0 != d0); - } - - #[test] - fn default() { - assert_eq!(TestAtom::default(), test_atom!("")); - assert_eq!(&*TestAtom::default(), ""); - } - - #[test] - fn ord() { - fn check(x: &str, y: &str) { - assert_eq!(x < y, Atom::from(x) < Atom::from(y)); - assert_eq!(x.cmp(y), Atom::from(x).cmp(&Atom::from(y))); - assert_eq!(x.partial_cmp(y), Atom::from(x).partial_cmp(&Atom::from(y))); - } - - check("a", "body"); - check("asdf", "body"); - check("zasdf", "body"); - check("z", "body"); - - check("a", "bbbbb"); - check("asdf", "bbbbb"); - check("zasdf", "bbbbb"); - check("z", "bbbbb"); - } - - #[test] - fn clone() { - let s0 = Atom::from("fn"); - let s1 = s0.clone(); - let s2 = Atom::from("loop"); - - let i0 = Atom::from("blah"); - let i1 = i0.clone(); - let i2 = Atom::from("blah2"); - - let d0 = Atom::from("zzzzzzzz"); - let d1 = d0.clone(); - let d2 = Atom::from("zzzzzzzzz"); - - assert!(s0 == s1); - assert!(s0 != s2); - - assert!(i0 == i1); - assert!(i0 != i2); - - assert!(d0 == d1); - assert!(d0 != d2); - - assert!(s0 != i0); - assert!(s0 != d0); - assert!(i0 != d0); - } - - macro_rules! assert_eq_fmt (($fmt:expr, $x:expr, $y:expr) => ({ - let x = $x; - let y = $y; - if x != y { - panic!("assertion failed: {} != {}", - format_args!($fmt, x), - format_args!($fmt, y)); - } - })); - - #[test] - fn repr() { - fn check(s: &str, data: u64) { - assert_eq_fmt!("0x{:016X}", Atom::from(s).unsafe_data, data); - } - - fn check_static(s: &str, x: Atom) { - assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); - assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); - // The index is unspecified by phf. - assert!((x.unsafe_data >> 32) <= TestAtomStaticSet::get().atoms.len() as u64); - } - - // This test is here to make sure we don't change atom representation - // by accident. It may need adjusting if there are changes to the - // static atom table, the tag values, etc. - - // Static atoms - check_static("a", test_atom!("a")); - check_static("address", test_atom!("address")); - check_static("area", test_atom!("area")); - - // Inline atoms - check("e", 0x0000_0000_0000_6511); - check("xyzzy", 0x0000_797A_7A79_7851); - check("xyzzy01", 0x3130_797A_7A79_7871); - - // Dynamic atoms. This is a pointer so we can't verify every bit. - assert_eq!(0x00, Atom::from("a dynamic string").unsafe_data & 0xf); - } - #[test] fn assert_sizes() { use std::mem; @@ -843,104 +699,14 @@ mod tests { let compiler_uses_inline_drop_flags = mem::size_of::() > 0; // Guard against accidental changes to the sizes of things. - assert_eq!(mem::size_of::(), + assert_eq!(mem::size_of::(), if compiler_uses_inline_drop_flags { 16 } else { 8 }); assert_eq!(mem::size_of::(), 8 + 4 * mem::size_of::()); } - #[test] - fn test_threads() { - for _ in 0_u32..100 { - thread::spawn(move || { - let _ = Atom::from("a dynamic string"); - let _ = Atom::from("another string"); - }); - } - } - - #[test] - fn atom_macro() { - assert_eq!(test_atom!("body"), Atom::from("body")); - assert_eq!(test_atom!("font-weight"), Atom::from("font-weight")); - } - - #[test] - fn match_atom() { - assert_eq!(2, match Atom::from("head") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); - - assert_eq!(3, match Atom::from("body") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); - - assert_eq!(3, match Atom::from("zzzzzz") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); - } - - #[test] - fn ensure_deref() { - // Ensure we can Deref to a &str - let atom = Atom::from("foobar"); - let _: &str = &atom; - } - - #[test] - fn ensure_as_ref() { - // Ensure we can as_ref to a &str - let atom = Atom::from("foobar"); - let _: &str = atom.as_ref(); - } - #[test] fn string_cache_entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } - - #[test] - fn test_ascii_lowercase() { - assert_eq!(Atom::from("").to_ascii_lowercase(), Atom::from("")); - assert_eq!(Atom::from("aZ9").to_ascii_lowercase(), Atom::from("az9")); - assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_lowercase(), Atom::from("the quick brown fox!")); - assert_eq!(Atom::from("JE VAIS À PARIS").to_ascii_lowercase(), Atom::from("je vais À paris")); - } - - #[test] - fn test_ascii_uppercase() { - assert_eq!(Atom::from("").to_ascii_uppercase(), Atom::from("")); - assert_eq!(Atom::from("aZ9").to_ascii_uppercase(), Atom::from("AZ9")); - assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_uppercase(), Atom::from("THE QUICK BROWN FOX!")); - assert_eq!(Atom::from("Je vais à Paris").to_ascii_uppercase(), Atom::from("JE VAIS à PARIS")); - } - - #[test] - fn test_eq_ignore_ascii_case() { - assert!(Atom::from("").eq_ignore_ascii_case(&Atom::from(""))); - assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("aZ9"))); - assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("Az9"))); - assert!(Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!"))); - assert!(Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("je VAIS à PARIS"))); - assert!(!Atom::from("").eq_ignore_ascii_case(&Atom::from("az9"))); - assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from(""))); - assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("9Za"))); - assert!(!Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!!"))); - assert!(!Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("JE vais À paris"))); - } - - #[test] - fn test_from_string() { - assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); - } } - -#[cfg(all(test, feature = "unstable"))] -#[path = "bench.rs"] -mod bench; diff --git a/src/lib.rs b/src/lib.rs index fc00f3a..7de917c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,7 +33,7 @@ //! //! In `build.rs`: //! -//! ``` +//! ```ignore //! extern crate string_cache_codegen; //! //! use std::env; @@ -105,10 +105,7 @@ #![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] -#![cfg_attr(all(test, feature = "unstable"), feature(test))] -#[cfg(all(test, feature = "unstable"))] extern crate test; -#[cfg(all(test, feature = "unstable"))] extern crate rand; #[macro_use] extern crate lazy_static; #[macro_use] extern crate debug_unreachable; extern crate phf_shared; From d182bbfdbaf7515856e88d629a92e3f65e6b0e41 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 18 Oct 2019 16:29:33 +0200 Subject: [PATCH 308/379] Update rand to 0.7 --- integration-tests/Cargo.toml | 2 +- integration-tests/src/bench.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 1a92ecc..7bd1772 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -18,7 +18,7 @@ unstable = [] string_cache = { version = "0.7", path = ".." } [dev-dependencies] -rand = "0.4" +rand = "0.7" string_cache_codegen = { version = "0.4", path = "../string-cache-codegen" } [build-dependencies] diff --git a/integration-tests/src/bench.rs b/integration-tests/src/bench.rs index 7866ae9..459c913 100644 --- a/integration-tests/src/bench.rs +++ b/integration-tests/src/bench.rs @@ -184,9 +184,9 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( fn $name(b: &mut Bencher) { use std::str; use rand; - use rand::Rng; + use rand::{RngCore, SeedableRng}; - let mut gen = rand::weak_rng(); + let mut gen = rand::rngs::SmallRng::from_entropy(); b.iter(|| { // We have to generate new atoms on every iter, because // the dynamic atom table isn't reset. From 07a74fa9bf90285d0a9d8383a483a02b7d571dae Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Fri, 18 Oct 2019 16:31:15 +0200 Subject: [PATCH 309/379] Update version to 0.8 --- Cargo.toml | 2 +- README.md | 6 +++--- integration-tests/Cargo.toml | 6 +++--- src/lib.rs | 4 ++-- string-cache-codegen/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8c40c1e..3ade61d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.7.4" # Also update README.md when making a semver-breaking change +version = "0.8.0" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/README.md b/README.md index ddd3bc0..9c9c8ac 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.7" +string_cache = "0.8" ``` In `lib.rs`: @@ -31,10 +31,10 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.7" +string_cache = "0.8" [build-dependencies] -string_cache_codegen = "0.4" +string_cache_codegen = "0.5" ``` In `build.rs`: diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 7bd1772..7f1c60b 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -15,11 +15,11 @@ test = true unstable = [] [dependencies] -string_cache = { version = "0.7", path = ".." } +string_cache = { version = "0.8", path = ".." } [dev-dependencies] rand = "0.7" -string_cache_codegen = { version = "0.4", path = "../string-cache-codegen" } +string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } [build-dependencies] -string_cache_codegen = { version = "0.4", path = "../string-cache-codegen" } +string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } diff --git a/src/lib.rs b/src/lib.rs index 7de917c..0191bd8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,10 +25,10 @@ //! In `Cargo.toml`: //! ```toml //! [dependencies] -//! string_cache = "0.7" +//! string_cache = "0.8" //! //! [dev-dependencies] -//! string_cache_codegen = "0.4" +//! string_cache_codegen = "0.5" //! ``` //! //! In `build.rs`: diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 4bcd3ee..1dc5493 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_codegen" -version = "0.4.4" # Also update ../README.md when making a semver-breaking change +version = "0.5.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index e878b0f..7f5cec2 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -19,10 +19,10 @@ //! build = "build.rs" //! //! [dependencies] -//! string_cache = "0.7" +//! string_cache = "0.8" //! //! [build-dependencies] -//! string_cache_codegen = "0.4" +//! string_cache_codegen = "0.5" //! ``` //! //! In `build.rs`: From 2e1d42a9e4b98eddd1a5bfb127ba4ed083302374 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 16:58:11 +0200 Subject: [PATCH 310/379] Run rustfmt --- examples/simple.rs | 4 +- integration-tests/build.rs | 11 +++- integration-tests/src/bench.rs | 14 ++--- integration-tests/src/lib.rs | 85 ++++++++++++++++++----------- shared/lib.rs | 4 +- src/atom.rs | 98 +++++++++++++++++++++------------- src/lib.rs | 11 ++-- string-cache-codegen/lib.rs | 52 +++++++++++------- tests/small-stack.rs | 11 ++-- 9 files changed, 182 insertions(+), 108 deletions(-) diff --git a/examples/simple.rs b/examples/simple.rs index b375049..8a524ff 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -3,12 +3,12 @@ extern crate string_cache; use string_cache::DefaultAtom; fn main() { - let mut interned_stuff = Vec::new(); let text = "here is a sentence of text that will be tokenised and interned and some repeated \ tokens is of text and"; for word in text.split_whitespace() { - let seen_before = interned_stuff.iter() + let seen_before = interned_stuff + .iter() // We can use impl PartialEq where T is anything string-like to compare to // interned strings to either other interned strings, or actual strings Comparing two // interned strings is very fast (normally a single cpu operation). diff --git a/integration-tests/build.rs b/integration-tests/build.rs index 2cb93b1..e7e89d4 100644 --- a/integration-tests/build.rs +++ b/integration-tests/build.rs @@ -6,7 +6,16 @@ use std::path::Path; fn main() { string_cache_codegen::AtomType::new("TestAtom", "test_atom!") .atoms(&[ - "a", "b", "address", "area", "body", "font-weight", "br", "html", "head", "id", + "a", + "b", + "address", + "area", + "body", + "font-weight", + "br", + "html", + "head", + "id", ]) .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs")) .unwrap() diff --git a/integration-tests/src/bench.rs b/integration-tests/src/bench.rs index 459c913..4d8f012 100644 --- a/integration-tests/src/bench.rs +++ b/integration-tests/src/bench.rs @@ -28,7 +28,7 @@ and cheap to move around, which isn't reflected in these tests. */ use crate::TestAtom; -use test::{Bencher, black_box}; +use test::{black_box, Bencher}; // Just shorthand fn mk(x: &str) -> TestAtom { @@ -142,10 +142,10 @@ macro_rules! bench_all ( ); ); -pub const longer_dynamic_a: &'static str - = "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Band"; -pub const longer_dynamic_b: &'static str - = "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Ban!"; +pub const longer_dynamic_a: &'static str = + "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Band"; +pub const longer_dynamic_b: &'static str = + "Thee Silver Mt. Zion Memorial Orchestra & Tra-La-La Ban!"; bench_all!([eq ne lt clone_string] for short_string = "e", "f"); bench_all!([eq ne lt clone_string] for medium_string = "xyzzy01", "xyzzy02"); @@ -206,7 +206,7 @@ macro_rules! bench_rand ( ($name:ident, $len:expr) => ( } )); -bench_rand!(intern_rand_008, 8); -bench_rand!(intern_rand_032, 32); +bench_rand!(intern_rand_008, 8); +bench_rand!(intern_rand_032, 32); bench_rand!(intern_rand_128, 128); bench_rand!(intern_rand_512, 512); diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index d993e57..c6b9980 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -8,16 +8,16 @@ // except according to those terms. #![cfg(test)] - #![deny(warnings)] #![allow(non_upper_case_globals)] - #![cfg_attr(feature = "unstable", feature(test))] extern crate string_cache; -#[cfg(feature = "unstable")] extern crate test; -#[cfg(feature = "unstable")] extern crate rand; +#[cfg(feature = "unstable")] +extern crate rand; +#[cfg(feature = "unstable")] +extern crate test; use std::thread; use string_cache::atom::StaticAtomSet; @@ -172,13 +172,13 @@ fn repr() { // static atom table, the tag values, etc. // Static atoms - check_static("a", test_atom!("a")); + check_static("a", test_atom!("a")); check_static("address", test_atom!("address")); - check_static("area", test_atom!("area")); + check_static("area", test_atom!("area")); // Inline atoms - check("e", 0x0000_0000_0000_6511); - check("xyzzy", 0x0000_797A_7A79_7851); + check("e", 0x0000_0000_0000_6511); + check("xyzzy", 0x0000_797A_7A79_7851); check("xyzzy01", 0x3130_797A_7A79_7871); // Dynamic atoms. This is a pointer so we can't verify every bit. @@ -203,23 +203,32 @@ fn atom_macro() { #[test] fn match_atom() { - assert_eq!(2, match Atom::from("head") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); - - assert_eq!(3, match Atom::from("body") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); - - assert_eq!(3, match Atom::from("zzzzzz") { - test_atom!("br") => 1, - test_atom!("html") | test_atom!("head") => 2, - _ => 3, - }); + assert_eq!( + 2, + match Atom::from("head") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + } + ); + + assert_eq!( + 3, + match Atom::from("body") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + } + ); + + assert_eq!( + 3, + match Atom::from("zzzzzz") { + test_atom!("br") => 1, + test_atom!("html") | test_atom!("head") => 2, + _ => 3, + } + ); } #[test] @@ -240,16 +249,28 @@ fn ensure_as_ref() { fn test_ascii_lowercase() { assert_eq!(Atom::from("").to_ascii_lowercase(), Atom::from("")); assert_eq!(Atom::from("aZ9").to_ascii_lowercase(), Atom::from("az9")); - assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_lowercase(), Atom::from("the quick brown fox!")); - assert_eq!(Atom::from("JE VAIS À PARIS").to_ascii_lowercase(), Atom::from("je vais À paris")); + assert_eq!( + Atom::from("The Quick Brown Fox!").to_ascii_lowercase(), + Atom::from("the quick brown fox!") + ); + assert_eq!( + Atom::from("JE VAIS À PARIS").to_ascii_lowercase(), + Atom::from("je vais À paris") + ); } #[test] fn test_ascii_uppercase() { assert_eq!(Atom::from("").to_ascii_uppercase(), Atom::from("")); assert_eq!(Atom::from("aZ9").to_ascii_uppercase(), Atom::from("AZ9")); - assert_eq!(Atom::from("The Quick Brown Fox!").to_ascii_uppercase(), Atom::from("THE QUICK BROWN FOX!")); - assert_eq!(Atom::from("Je vais à Paris").to_ascii_uppercase(), Atom::from("JE VAIS à PARIS")); + assert_eq!( + Atom::from("The Quick Brown Fox!").to_ascii_uppercase(), + Atom::from("THE QUICK BROWN FOX!") + ); + assert_eq!( + Atom::from("Je vais à Paris").to_ascii_uppercase(), + Atom::from("JE VAIS à PARIS") + ); } #[test] @@ -257,12 +278,14 @@ fn test_eq_ignore_ascii_case() { assert!(Atom::from("").eq_ignore_ascii_case(&Atom::from(""))); assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("aZ9"))); assert!(Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("Az9"))); - assert!(Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!"))); + assert!(Atom::from("The Quick Brown Fox!") + .eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!"))); assert!(Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("je VAIS à PARIS"))); assert!(!Atom::from("").eq_ignore_ascii_case(&Atom::from("az9"))); assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from(""))); assert!(!Atom::from("aZ9").eq_ignore_ascii_case(&Atom::from("9Za"))); - assert!(!Atom::from("The Quick Brown Fox!").eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!!"))); + assert!(!Atom::from("The Quick Brown Fox!") + .eq_ignore_ascii_case(&Atom::from("THE quick BROWN fox!!"))); assert!(!Atom::from("Je vais à Paris").eq_ignore_ascii_case(&Atom::from("JE vais À paris"))); } diff --git a/shared/lib.rs b/shared/lib.rs index f69a4f4..75c21d0 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -9,10 +9,10 @@ // FIXME(rust-lang/rust#18153): generate these from an enum pub const DYNAMIC_TAG: u8 = 0b_00; -pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble +pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble pub const STATIC_TAG: u8 = 0b_10; pub const TAG_MASK: u64 = 0b_11; -pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. +pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. pub const MAX_INLINE_LEN: usize = 7; diff --git a/src/atom.rs b/src/atom.rs index 8b48218..8f4e999 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -21,15 +21,17 @@ use std::mem; use std::ops; use std::slice; use std::str; -use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; +use std::sync::Mutex; -use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STATIC_SHIFT_BITS, - ENTRY_ALIGNMENT, pack_static}; use self::UnpackedAtom::{Dynamic, Inline, Static}; +use shared::{ + pack_static, DYNAMIC_TAG, ENTRY_ALIGNMENT, INLINE_TAG, MAX_INLINE_LEN, STATIC_SHIFT_BITS, + STATIC_TAG, TAG_MASK, +}; -const NB_BUCKETS: usize = 1 << 12; // 4096 +const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; struct StringCache { @@ -48,8 +50,7 @@ struct StringCacheEntry { } impl StringCacheEntry { - fn new(next: Option>, hash: u64, string: String) - -> StringCacheEntry { + fn new(next: Option>, hash: u64, string: String) -> StringCacheEntry { StringCacheEntry { next_in_bucket: next, hash: hash, @@ -65,17 +66,14 @@ impl StringCache { let _static_assert_size_eq = std::mem::transmute::; let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); StringCache { - buckets: unsafe { - Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) - }, + buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, } } fn add(&mut self, string: Cow, hash: u64) -> *mut StringCacheEntry { let bucket_index = (hash & BUCKET_MASK) as usize; { - let mut ptr: Option<&mut Box> = - self.buckets[bucket_index].as_mut(); + let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); while let Some(entry) = ptr.take() { if entry.hash == hash && &*entry.string == &*string { @@ -96,7 +94,10 @@ impl StringCache { debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); let string = string.into_owned(); let mut entry = Box::new(StringCacheEntry::new( - self.buckets[bucket_index].take(), hash, string)); + self.buckets[bucket_index].take(), + hash, + string, + )); let ptr: *mut StringCacheEntry = &mut *entry; self.buckets[bucket_index] = Some(entry); @@ -119,7 +120,9 @@ impl StringCache { None => break, }; if entry_ptr == ptr { - mem::drop(mem::replace(current, unsafe { (*entry_ptr).next_in_bucket.take() })); + mem::drop(mem::replace(current, unsafe { + (*entry_ptr).next_in_bucket.take() + })); break; } current = unsafe { &mut (*entry_ptr).next_in_bucket }; @@ -261,7 +264,7 @@ impl Atom { pub fn is_static(&self) -> bool { match unsafe { self.unpack() } { Static(..) => true, - _ => false + _ => false, } } @@ -270,7 +273,7 @@ impl Atom { pub fn is_dynamic(&self) -> bool { match unsafe { self.unpack() } { Dynamic(..) => true, - _ => false + _ => false, } } @@ -279,7 +282,7 @@ impl Atom { pub fn is_inline(&self) -> bool { match unsafe { self.unpack() } { Inline(..) => true, - _ => false + _ => false, } } @@ -294,9 +297,7 @@ impl Atom { let entry = entry as *mut StringCacheEntry; u64_hash_as_u32(unsafe { (*entry).hash }) } - Inline(..) => { - u64_hash_as_u32(self.unsafe_data) - } + Inline(..) => u64_hash_as_u32(self.unsafe_data), } } } @@ -306,14 +307,17 @@ impl Default for Atom { fn default() -> Self { Atom { unsafe_data: pack_static(Static::empty_string_index()), - phantom: PhantomData + phantom: PhantomData, } } } impl Hash for Atom { #[inline] - fn hash(&self, state: &mut H) where H: Hasher { + fn hash(&self, state: &mut H) + where + H: Hasher, + { state.write_u32(self.get_hash()) } } @@ -368,7 +372,10 @@ impl<'a, Static: StaticAtomSet> From> for Atom { }; let data = unsafe { unpacked.pack() }; - Atom { unsafe_data: data, phantom: PhantomData } + Atom { + unsafe_data: data, + phantom: PhantomData, + } } } @@ -394,7 +401,7 @@ impl Clone for Atom { Some(entry) => { let entry = entry as *mut StringCacheEntry; (*entry).ref_count.fetch_add(1, SeqCst); - }, + } None => (), } } @@ -437,8 +444,11 @@ impl ops::Deref for Atom { Inline(..) => { let buf = inline_orig_bytes(&self.unsafe_data); str::from_utf8_unchecked(buf) - }, - Static(idx) => Static::get().atoms.get(idx as usize).expect("bad static atom"), + } + Static(idx) => Static::get() + .atoms + .get(idx as usize) + .expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string @@ -497,14 +507,20 @@ impl AsRef for Atom { } impl Serialize for Atom { - fn serialize(&self, serializer: S) -> Result where S: Serializer { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { let string: &str = self.as_ref(); string.serialize(serializer) } } impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { - fn deserialize(deserializer: D) -> Result where D: Deserializer<'a> { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'a>, + { let string: String = Deserialize::deserialize(deserializer)?; Ok(Atom::from(string)) } @@ -535,8 +551,8 @@ impl Atom { /// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase pub fn to_ascii_uppercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { - if let b'a' ..= b'z' = b { - return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase()) + if let b'a'..=b'z' = b { + return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase()); } } self.clone() @@ -547,8 +563,8 @@ impl Atom { /// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase pub fn to_ascii_lowercase(&self) -> Self { for (i, b) in self.bytes().enumerate() { - if let b'A' ..= b'Z' = b { - return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase()) + if let b'A'..=b'Z' = b { + return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase()); } } self.clone() @@ -650,7 +666,7 @@ impl UnpackedAtom { let src = inline_atom_slice(&data); buf.copy_from_slice(src); Inline(len as u8, buf) - }, + } _ => debug_unreachable!(), } } @@ -685,9 +701,9 @@ unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { // more. #[cfg(test)] mod tests { - use std::mem; use super::{DefaultAtom, StringCacheEntry}; use shared::ENTRY_ALIGNMENT; + use std::mem; #[test] fn assert_sizes() { @@ -699,10 +715,18 @@ mod tests { let compiler_uses_inline_drop_flags = mem::size_of::() > 0; // Guard against accidental changes to the sizes of things. - assert_eq!(mem::size_of::(), - if compiler_uses_inline_drop_flags { 16 } else { 8 }); - assert_eq!(mem::size_of::(), - 8 + 4 * mem::size_of::()); + assert_eq!( + mem::size_of::(), + if compiler_uses_inline_drop_flags { + 16 + } else { + 8 + } + ); + assert_eq!( + mem::size_of::(), + 8 + 4 * mem::size_of::() + ); } #[test] diff --git a/src/lib.rs b/src/lib.rs index 0191bd8..058b833 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -103,22 +103,23 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] - #![cfg_attr(test, deny(warnings))] -#[macro_use] extern crate lazy_static; -#[macro_use] extern crate debug_unreachable; +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate debug_unreachable; extern crate phf_shared; extern crate precomputed_hash; extern crate serde; extern crate string_cache_shared as shared; -pub use atom::{Atom, StaticAtomSet, PhfStrSet, EmptyStaticAtomSet, DefaultAtom}; +pub use atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; pub mod atom; // Make test_atom! macro work in this crate. // `$crate` would not be appropriate for other crates creating such macros mod string_cache { - pub use {Atom, StaticAtomSet, PhfStrSet}; + pub use {Atom, PhfStrSet, StaticAtomSet}; } diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 7f5cec2..86af84f 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -71,12 +71,13 @@ extern crate phf_generator; extern crate phf_shared; extern crate string_cache_shared as shared; -#[macro_use] extern crate quote; +#[macro_use] +extern crate quote; extern crate proc_macro2; use std::collections::HashSet; use std::fs::File; -use std::io::{self, Write, BufWriter}; +use std::io::{self, BufWriter, Write}; use std::iter; use std::path::Path; @@ -161,22 +162,30 @@ impl AtomType { /// Adds multiple atoms to the builder pub fn atoms(&mut self, iter: I) -> &mut Self - where I: IntoIterator, I::Item: AsRef { - self.atoms.extend(iter.into_iter().map(|s| s.as_ref().to_owned())); + where + I: IntoIterator, + I::Item: AsRef, + { + self.atoms + .extend(iter.into_iter().map(|s| s.as_ref().to_owned())); self } /// Write generated code to `destination`. - pub fn write_to(&mut self, mut destination: W) -> io::Result<()> where W: Write { + pub fn write_to(&mut self, mut destination: W) -> io::Result<()> + where + W: Write, + { destination.write_all( self.to_tokens() - .to_string() - // Insert some newlines to make the generated code slightly easier to read. - .replace(" [ \"", "[\n\"") - .replace("\" , ", "\",\n") - .replace(" ( \"", "\n( \"") - .replace("; ", ";\n") - .as_bytes()) + .to_string() + // Insert some newlines to make the generated code slightly easier to read. + .replace(" [ \"", "[\n\"") + .replace("\" , ", "\",\n") + .replace(" ( \"", "\n( \"") + .replace("; ", ";\n") + .as_bytes(), + ) } fn to_tokens(&mut self) -> proc_macro2::TokenStream { @@ -201,30 +210,33 @@ impl AtomType { .unwrap() }); - let hashes: Vec = - atoms.iter().map(|string| { + let hashes: Vec = atoms + .iter() + .map(|string| { let hash = phf_shared::hash(string, &key); (hash.g ^ hash.f1) as u32 - }).collect(); + }) + .collect(); let type_name = if let Some(position) = self.path.rfind("::") { - &self.path[position + "::".len() ..] + &self.path[position + "::".len()..] } else { &self.path }; let atom_doc = match self.atom_doc { Some(ref doc) => quote!(#[doc = #doc]), - None => quote!() + None => quote!(), }; let static_set_doc = match self.static_set_doc { Some(ref doc) => quote!(#[doc = #doc]), - None => quote!() + None => quote!(), }; let macro_doc = match self.macro_doc { Some(ref doc) => quote!(#[doc = #doc]), - None => quote!() + None => quote!(), }; - let new_term = |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site()); + let new_term = + |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site()); let static_set_name = new_term(&format!("{}StaticSet", type_name)); let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); diff --git a/tests/small-stack.rs b/tests/small-stack.rs index 300b142..269cad7 100644 --- a/tests/small-stack.rs +++ b/tests/small-stack.rs @@ -6,7 +6,12 @@ // This is a separate test program rather than a `#[test] fn` among others // to make sure that nothing else has already initialized the map in this process. fn main() { - std::thread::Builder::new().stack_size(50_000).spawn(|| { - string_cache::DefaultAtom::from("12345678"); - }).unwrap().join().unwrap() + std::thread::Builder::new() + .stack_size(50_000) + .spawn(|| { + string_cache::DefaultAtom::from("12345678"); + }) + .unwrap() + .join() + .unwrap() } From a317539e722ca7b173f984a168dd5159ff6a8b2b Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 17:45:42 +0200 Subject: [PATCH 311/379] Make fields of Atom private atom!() macros create values through a `const fn` constructor --- Cargo.toml | 1 - integration-tests/src/lib.rs | 10 +- shared/Cargo.toml | 11 -- shared/LICENSE-APACHE | 201 -------------------------------- shared/LICENSE-MIT | 25 ---- shared/lib.rs | 23 ---- src/atom.rs | 107 +++++++++-------- src/lib.rs | 1 - string-cache-codegen/Cargo.toml | 1 - string-cache-codegen/lib.rs | 50 ++++---- 10 files changed, 92 insertions(+), 338 deletions(-) delete mode 100644 shared/Cargo.toml delete mode 100644 shared/LICENSE-APACHE delete mode 100644 shared/LICENSE-MIT delete mode 100644 shared/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 3ade61d..5ba5775 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,6 @@ lazy_static = "1" serde = "1" phf_shared = "0.8" new_debug_unreachable = "1.0" -string_cache_shared = {path = "./shared", version = "0.3"} [[test]] name = "small-stack" diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index c6b9980..614d068 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -157,14 +157,14 @@ macro_rules! assert_eq_fmt (($fmt:expr, $x:expr, $y:expr) => ({ #[test] fn repr() { fn check(s: &str, data: u64) { - assert_eq_fmt!("0x{:016X}", Atom::from(s).unsafe_data, data); + assert_eq_fmt!("0x{:016X}", Atom::from(s).unsafe_data(), data); } fn check_static(s: &str, x: Atom) { - assert_eq_fmt!("0x{:016X}", x.unsafe_data, Atom::from(s).unsafe_data); - assert_eq!(0x2, x.unsafe_data & 0xFFFF_FFFF); + assert_eq_fmt!("0x{:016X}", x.unsafe_data(), Atom::from(s).unsafe_data()); + assert_eq!(0x2, x.unsafe_data() & 0xFFFF_FFFF); // The index is unspecified by phf. - assert!((x.unsafe_data >> 32) <= TestAtomStaticSet::get().atoms.len() as u64); + assert!((x.unsafe_data() >> 32) <= TestAtomStaticSet::get().atoms.len() as u64); } // This test is here to make sure we don't change atom representation @@ -182,7 +182,7 @@ fn repr() { check("xyzzy01", 0x3130_797A_7A79_7871); // Dynamic atoms. This is a pointer so we can't verify every bit. - assert_eq!(0x00, Atom::from("a dynamic string").unsafe_data & 0xf); + assert_eq!(0x00, Atom::from("a dynamic string").unsafe_data() & 0xf); } #[test] diff --git a/shared/Cargo.toml b/shared/Cargo.toml deleted file mode 100644 index ce8d53f..0000000 --- a/shared/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] - -name = "string_cache_shared" -version = "0.3.1" -authors = [ "The Servo Project Developers" ] -description = "Code share between string_cache and string_cache_codegen." -license = "MIT / Apache-2.0" -repository = "https://github.com/servo/string-cache" - -[lib] -path = "lib.rs" diff --git a/shared/LICENSE-APACHE b/shared/LICENSE-APACHE deleted file mode 100644 index 16fe87b..0000000 --- a/shared/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/shared/LICENSE-MIT b/shared/LICENSE-MIT deleted file mode 100644 index 807526f..0000000 --- a/shared/LICENSE-MIT +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2012-2013 Mozilla Foundation - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/shared/lib.rs b/shared/lib.rs deleted file mode 100644 index 75c21d0..0000000 --- a/shared/lib.rs +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2015 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// FIXME(rust-lang/rust#18153): generate these from an enum -pub const DYNAMIC_TAG: u8 = 0b_00; -pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble -pub const STATIC_TAG: u8 = 0b_10; -pub const TAG_MASK: u64 = 0b_11; -pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. - -pub const MAX_INLINE_LEN: usize = 7; - -pub const STATIC_SHIFT_BITS: usize = 32; - -pub fn pack_static(n: u32) -> u64 { - (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) -} diff --git a/src/atom.rs b/src/atom.rs index 8f4e999..8f6a983 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -26,10 +26,16 @@ use std::sync::atomic::Ordering::SeqCst; use std::sync::Mutex; use self::UnpackedAtom::{Dynamic, Inline, Static}; -use shared::{ - pack_static, DYNAMIC_TAG, ENTRY_ALIGNMENT, INLINE_TAG, MAX_INLINE_LEN, STATIC_SHIFT_BITS, - STATIC_TAG, TAG_MASK, -}; + +const DYNAMIC_TAG: u8 = 0b_00; +const INLINE_TAG: u8 = 0b_01; // len in upper nybble +const STATIC_TAG: u8 = 0b_10; +const TAG_MASK: u64 = 0b_11; +const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. + +const MAX_INLINE_LEN: usize = 7; + +const STATIC_SHIFT_BITS: usize = 32; const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; @@ -138,7 +144,7 @@ impl StringCache { /// It is used by the methods of [`Atom`] to check if a string is present in the static set. /// /// [`Atom`]: struct.Atom.html -pub trait StaticAtomSet { +pub trait StaticAtomSet: Ord { /// Get the location of the static string set in the binary. fn get() -> &'static PhfStrSet; /// Get the index of the empty string, which is in every set and is used for `Atom::default`. @@ -160,6 +166,7 @@ pub struct PhfStrSet { } /// An empty static atom set for when only dynamic strings will be added +#[derive(PartialEq, Eq, PartialOrd, Ord)] pub struct EmptyStaticAtomSet; impl StaticAtomSet for EmptyStaticAtomSet { @@ -226,14 +233,11 @@ pub type DefaultAtom = Atom; /// } /// } // atom is dropped here, so it is not kept around in memory /// ``` -pub struct Atom { - /// This field is public so that the `atom!()` macros can use it. - /// You should not otherwise access this field. - #[doc(hidden)] - pub unsafe_data: u64, - - #[doc(hidden)] - pub phantom: PhantomData, +#[derive(PartialEq, Eq)] +// NOTE: Deriving PartialEq requires that a given string must always be interned the same way. +pub struct Atom { + unsafe_data: u64, + phantom: PhantomData, } impl ::precomputed_hash::PrecomputedHash for Atom { @@ -253,13 +257,34 @@ fn u64_hash_as_u32(h: u64) -> u32 { ((h >> 32) ^ h) as u32 } +// FIXME: bound removed from the struct definition before of this error for pack_static: +// "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable" +// https://github.com/rust-lang/rust/issues/57563 +impl Atom { + /// For the atom!() macros + #[inline(always)] + #[doc(hidden)] + pub const fn pack_static(n: u32) -> Self { + Self { + unsafe_data: (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS), + phantom: PhantomData, + } + } +} + impl Atom { #[inline(always)] unsafe fn unpack(&self) -> UnpackedAtom { UnpackedAtom::from_packed(self.unsafe_data) } - /// Return true if this is a static Atom. + /// Return the internal repersentation. For testing. + #[doc(hidden)] + pub fn unsafe_data(&self) -> u64 { + self.unsafe_data + } + + /// Return true if this is a static Atom. For testing. #[doc(hidden)] pub fn is_static(&self) -> bool { match unsafe { self.unpack() } { @@ -268,7 +293,7 @@ impl Atom { } } - /// Return true if this is a dynamic Atom. + /// Return true if this is a dynamic Atom. For testing. #[doc(hidden)] pub fn is_dynamic(&self) -> bool { match unsafe { self.unpack() } { @@ -277,7 +302,7 @@ impl Atom { } } - /// Return true if this is an inline Atom. + /// Return true if this is an inline Atom. For testing. #[doc(hidden)] pub fn is_inline(&self) -> bool { match unsafe { self.unpack() } { @@ -305,10 +330,7 @@ impl Atom { impl Default for Atom { #[inline] fn default() -> Self { - Atom { - unsafe_data: pack_static(Static::empty_string_index()), - phantom: PhantomData, - } + Atom::pack_static(Static::empty_string_index()) } } @@ -322,16 +344,6 @@ impl Hash for Atom { } } -impl Eq for Atom {} - -// NOTE: This impl requires that a given string must always be interned the same way. -impl PartialEq for Atom { - #[inline] - fn eq(&self, other: &Self) -> bool { - self.unsafe_data == other.unsafe_data - } -} - impl PartialEq for Atom { fn eq(&self, other: &str) -> bool { &self[..] == other @@ -371,11 +383,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { } }; - let data = unsafe { unpacked.pack() }; - Atom { - unsafe_data: data, - phantom: PhantomData, - } + unsafe { unpacked.pack() } } } @@ -412,11 +420,11 @@ impl Clone for Atom { } } -impl Drop for Atom { +impl Drop for Atom { #[inline] fn drop(&mut self) { // Out of line to guide inlining. - fn drop_slow(this: &mut Atom) { + fn drop_slow(this: &mut Atom) { STRING_CACHE.lock().unwrap().remove(this.unsafe_data); } @@ -631,22 +639,28 @@ impl UnpackedAtom { /// Pack a key, fitting it into a u64 with flags and data. See `string_cache_shared` for /// hints for the layout. #[inline(always)] - unsafe fn pack(self) -> u64 { + unsafe fn pack(self) -> Atom { match self { - Static(n) => pack_static(n), + Static(n) => Atom::pack_static(n), Dynamic(p) => { - let n = p as u64; - debug_assert!(0 == n & TAG_MASK); - n + let unsafe_data = p as u64; + debug_assert!(0 == unsafe_data & TAG_MASK); + Atom { + unsafe_data, + phantom: PhantomData, + } } Inline(len, buf) => { debug_assert!((len as usize) <= MAX_INLINE_LEN); - let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); + let mut unsafe_data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { - let dest = inline_atom_slice_mut(&mut data); + let dest = inline_atom_slice_mut(&mut unsafe_data); dest.copy_from_slice(&buf) } - data + Atom { + unsafe_data, + phantom: PhantomData, + } } } } @@ -701,8 +715,7 @@ unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { // more. #[cfg(test)] mod tests { - use super::{DefaultAtom, StringCacheEntry}; - use shared::ENTRY_ALIGNMENT; + use super::{DefaultAtom, StringCacheEntry, ENTRY_ALIGNMENT}; use std::mem; #[test] diff --git a/src/lib.rs b/src/lib.rs index 058b833..484f606 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -112,7 +112,6 @@ extern crate debug_unreachable; extern crate phf_shared; extern crate precomputed_hash; extern crate serde; -extern crate string_cache_shared as shared; pub use atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 1dc5493..847bace 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -13,7 +13,6 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -string_cache_shared = {path = "../shared", version = "0.3"} phf_generator = "0.8" phf_shared = "0.8" proc-macro2 = "1" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 86af84f..f22b88c 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -70,7 +70,6 @@ extern crate phf_generator; extern crate phf_shared; -extern crate string_cache_shared as shared; #[macro_use] extern crate quote; extern crate proc_macro2; @@ -78,7 +77,6 @@ extern crate proc_macro2; use std::collections::HashSet; use std::fs::File; use std::io::{self, BufWriter, Write}; -use std::iter; use std::path::Path; /// A builder for a static atom set and relevant macros @@ -199,16 +197,8 @@ impl AtomType { let phf_generator::HashState { key, disps, map } = hash_state; let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip(); let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); - let atoms_ref = &atoms; let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; - let data = (0..atoms.len()).map(|i| { - format!("0x{:X}u64", shared::pack_static(i as u32)) - .parse::() - .unwrap() - .into_iter() - .next() - .unwrap() - }); + let indices = 0..atoms.len() as u32; let hashes: Vec = atoms .iter() @@ -218,10 +208,11 @@ impl AtomType { }) .collect(); - let type_name = if let Some(position) = self.path.rfind("::") { - &self.path[position + "::".len()..] - } else { - &self.path + let mut path_parts = self.path.rsplitn(2, "::"); + let type_name = path_parts.next().unwrap(); + let module = match path_parts.next() { + Some(m) => format!("$crate::{}", m), + None => format!("$crate"), }; let atom_doc = match self.atom_doc { Some(ref doc) => quote!(#[doc = #doc]), @@ -240,19 +231,32 @@ impl AtomType { let static_set_name = new_term(&format!("{}StaticSet", type_name)); let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); - let path = iter::repeat(self.path.parse::().unwrap()); + let module = module.parse::().unwrap(); + let const_names: Vec<_> = atoms + .iter() + .map(|atom| { + let mut name = String::from("ATOM"); + for c in atom.chars() { + name.push_str(&format!("_{:02X}", c as u32)) + } + new_term(&name) + }) + .collect(); quote! { #atom_doc pub type #type_name = ::string_cache::Atom<#static_set_name>; + #static_set_doc + #[derive(PartialEq, Eq, PartialOrd, Ord)] pub struct #static_set_name; + impl ::string_cache::StaticAtomSet for #static_set_name { fn get() -> &'static ::string_cache::PhfStrSet { static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet { key: #key, disps: &[#((#disps0, #disps1)),*], - atoms: &[#(#atoms_ref),*], + atoms: &[#(#atoms),*], hashes: &[#(#hashes),*] }; &SET @@ -261,16 +265,16 @@ impl AtomType { #empty_string_index } } + + #( + pub const #const_names: #type_name = #type_name::pack_static(#indices); + )* + #macro_doc #[macro_export] macro_rules! #macro_name { #( - (#atoms_ref) => { - $crate::#path { - unsafe_data: #data, - phantom: ::std::marker::PhantomData, - } - }; + (#atoms) => { #module::#const_names }; )* } } From d653207e6cebfc5965b7dac778da1b95376a27fc Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 18:10:28 +0200 Subject: [PATCH 312/379] Add the non-zero optimization to Atom --- src/atom.rs | 58 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 8f6a983..095140e 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -18,6 +18,7 @@ use std::fmt; use std::hash::{Hash, Hasher}; use std::marker::PhantomData; use std::mem; +use std::num::NonZeroU64; use std::ops; use std::slice; use std::str; @@ -110,8 +111,7 @@ impl StringCache { ptr } - fn remove(&mut self, key: u64) { - let ptr = key as *mut StringCacheEntry; + fn remove(&mut self, ptr: *mut StringCacheEntry) { let bucket_index = { let value: &StringCacheEntry = unsafe { &*ptr }; debug_assert!(value.ref_count.load(SeqCst) == 0); @@ -236,7 +236,7 @@ pub type DefaultAtom = Atom; #[derive(PartialEq, Eq)] // NOTE: Deriving PartialEq requires that a given string must always be interned the same way. pub struct Atom { - unsafe_data: u64, + unsafe_data: NonZeroU64, phantom: PhantomData, } @@ -266,7 +266,10 @@ impl Atom { #[doc(hidden)] pub const fn pack_static(n: u32) -> Self { Self { - unsafe_data: (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS), + unsafe_data: unsafe { + // STATIC_TAG ensure this is non-zero + NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS)) + }, phantom: PhantomData, } } @@ -281,7 +284,7 @@ impl Atom { /// Return the internal repersentation. For testing. #[doc(hidden)] pub fn unsafe_data(&self) -> u64 { - self.unsafe_data + self.unsafe_data.get() } /// Return true if this is a static Atom. For testing. @@ -322,7 +325,7 @@ impl Atom { let entry = entry as *mut StringCacheEntry; u64_hash_as_u32(unsafe { (*entry).hash }) } - Inline(..) => u64_hash_as_u32(self.unsafe_data), + Inline(..) => u64_hash_as_u32(self.unsafe_data.get()), } } } @@ -405,7 +408,7 @@ impl Clone for Atom { #[inline(always)] fn clone(&self) -> Self { unsafe { - match from_packed_dynamic(self.unsafe_data) { + match from_packed_dynamic(self.unsafe_data.get()) { Some(entry) => { let entry = entry as *mut StringCacheEntry; (*entry).ref_count.fetch_add(1, SeqCst); @@ -425,11 +428,14 @@ impl Drop for Atom { fn drop(&mut self) { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - STRING_CACHE.lock().unwrap().remove(this.unsafe_data); + STRING_CACHE + .lock() + .unwrap() + .remove(this.unsafe_data.get() as *mut StringCacheEntry); } unsafe { - match from_packed_dynamic(self.unsafe_data) { + match from_packed_dynamic(self.unsafe_data.get()) { Some(entry) => { let entry = entry as *mut StringCacheEntry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { @@ -608,9 +614,9 @@ enum UnpackedAtom { } #[inline(always)] -fn inline_atom_slice(x: &u64) -> &[u8] { +fn inline_atom_slice(x: &NonZeroU64) -> &[u8] { unsafe { - let x: *const u64 = x; + let x: *const NonZeroU64 = x; let mut data = x as *const u8; // All except the lowest byte, which is first in little-endian, last in big-endian. if cfg!(target_endian = "little") { @@ -643,22 +649,24 @@ impl UnpackedAtom { match self { Static(n) => Atom::pack_static(n), Dynamic(p) => { - let unsafe_data = p as u64; - debug_assert!(0 == unsafe_data & TAG_MASK); + let data = p as u64; + debug_assert!(0 == data & TAG_MASK); Atom { - unsafe_data, + // Callers are responsible for calling this with a valid, non-null pointer + unsafe_data: NonZeroU64::new_unchecked(data), phantom: PhantomData, } } Inline(len, buf) => { debug_assert!((len as usize) <= MAX_INLINE_LEN); - let mut unsafe_data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { - let dest = inline_atom_slice_mut(&mut unsafe_data); + let dest = inline_atom_slice_mut(&mut data); dest.copy_from_slice(&buf) } Atom { - unsafe_data, + // INLINE_TAG ensures this is never zero + unsafe_data: NonZeroU64::new_unchecked(data), phantom: PhantomData, } } @@ -667,14 +675,14 @@ impl UnpackedAtom { /// Unpack a key, extracting information from a single u64 into useable structs. #[inline(always)] - unsafe fn from_packed(data: u64) -> UnpackedAtom { + unsafe fn from_packed(data: NonZeroU64) -> UnpackedAtom { debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged - match (data & TAG_MASK) as u8 { - DYNAMIC_TAG => Dynamic(data as *mut ()), - STATIC_TAG => Static((data >> STATIC_SHIFT_BITS) as u32), + match (data.get() & TAG_MASK) as u8 { + DYNAMIC_TAG => Dynamic(data.get() as *mut ()), + STATIC_TAG => Static((data.get() >> STATIC_SHIFT_BITS) as u32), INLINE_TAG => { - let len = ((data & 0xf0) >> 4) as usize; + let len = ((data.get() & 0xf0) >> 4) as usize; debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; let src = inline_atom_slice(&data); @@ -701,7 +709,7 @@ unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { /// /// It's undefined behavior to call this on a non-inline atom!! #[inline(always)] -unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { +unsafe fn inline_orig_bytes<'a>(data: &'a NonZeroU64) -> &'a [u8] { match UnpackedAtom::from_packed(*data) { Inline(len, _) => { let src = inline_atom_slice(&data); @@ -736,6 +744,10 @@ mod tests { 8 } ); + assert_eq!( + mem::size_of::>(), + mem::size_of::(), + ); assert_eq!( mem::size_of::(), 8 + 4 * mem::size_of::() From 09abcfbab37b08152b347e3c7062bf564cfd46c8 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 18:29:50 +0200 Subject: [PATCH 313/379] Remove unused events.rs --- src/event.rs | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 src/event.rs diff --git a/src/event.rs b/src/event.rs deleted file mode 100644 index 1b777d3..0000000 --- a/src/event.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use std::sync::Mutex; - -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Debug)] -pub enum Event { - Intern(u64), - Insert(u64, String), - Remove(u64), -} - -lazy_static! { - pub static ref LOG: Mutex> - = Mutex::new(Vec::with_capacity(50_000)); -} - -pub fn log(e: Event) { - LOG.lock().unwrap().push(e); -} - -macro_rules! log (($e:expr) => (::event::log($e))); From d968f8c0f4734bca285a39fd2cf93f5041568959 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 18:33:50 +0200 Subject: [PATCH 314/379] test_atom! is in another crate now --- src/lib.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 484f606..bbbcf00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -116,9 +116,3 @@ extern crate serde; pub use atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; pub mod atom; - -// Make test_atom! macro work in this crate. -// `$crate` would not be appropriate for other crates creating such macros -mod string_cache { - pub use {Atom, PhfStrSet, StaticAtomSet}; -} From d191aae845c370e052a1cf74ec079672241a69dd Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 18:39:50 +0200 Subject: [PATCH 315/379] Switch to the 2018 edition --- Cargo.toml | 1 + examples/simple.rs | 2 +- integration-tests/Cargo.toml | 1 + integration-tests/build.rs | 2 +- integration-tests/src/lib.rs | 4 ---- src/atom.rs | 2 ++ src/lib.rs | 10 +--------- string-cache-codegen/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 7 +------ 9 files changed, 9 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5ba5775..eec2438 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ description = "A string interning library for Rust, developed as part of the Ser license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache/" +edition = "2018" # Do not `exclude` ./string-cache-codegen because we want to include # ./string-cache-codegen/shared.rs, and `include` is a pain to use diff --git a/examples/simple.rs b/examples/simple.rs index 8a524ff..f063b06 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -1,4 +1,4 @@ -extern crate string_cache; + use string_cache::DefaultAtom; diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 7f1c60b..736e34a 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.1" authors = [ "The Servo Project Developers" ] build = "build.rs" publish = false +edition = "2018" [lib] doctest = false diff --git a/integration-tests/build.rs b/integration-tests/build.rs index e7e89d4..da40873 100644 --- a/integration-tests/build.rs +++ b/integration-tests/build.rs @@ -1,4 +1,4 @@ -extern crate string_cache_codegen; +use string_cache_codegen; use std::env; use std::path::Path; diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index 614d068..28a5836 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -12,10 +12,6 @@ #![allow(non_upper_case_globals)] #![cfg_attr(feature = "unstable", feature(test))] -extern crate string_cache; - -#[cfg(feature = "unstable")] -extern crate rand; #[cfg(feature = "unstable")] extern crate test; diff --git a/src/atom.rs b/src/atom.rs index 095140e..4c508e0 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -9,6 +9,8 @@ #![allow(non_upper_case_globals)] +use debug_unreachable::debug_unreachable; +use lazy_static::lazy_static; use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; diff --git a/src/lib.rs b/src/lib.rs index bbbcf00..fbfacb6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,14 +105,6 @@ #![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] -#[macro_use] -extern crate lazy_static; -#[macro_use] -extern crate debug_unreachable; -extern crate phf_shared; -extern crate precomputed_hash; -extern crate serde; - -pub use atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; +pub use crate::atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; pub mod atom; diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 847bace..42cda70 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,5 +1,4 @@ [package] - name = "string_cache_codegen" version = "0.5.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] @@ -7,6 +6,7 @@ description = "A codegen library for string-cache, developed as part of the Serv license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache_codegen/" +edition = "2018" [lib] name = "string_cache_codegen" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index f22b88c..0d90271 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -68,12 +68,7 @@ #![recursion_limit = "128"] -extern crate phf_generator; -extern crate phf_shared; -#[macro_use] -extern crate quote; -extern crate proc_macro2; - +use quote::quote; use std::collections::HashSet; use std::fs::File; use std::io::{self, BufWriter, Write}; From a578c80406dc687a16b93f819bf33e9043c66a95 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 18:41:07 +0200 Subject: [PATCH 316/379] The atom module does not need to be public --- integration-tests/src/lib.rs | 2 +- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index 28a5836..3aa2a44 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -16,7 +16,7 @@ extern crate test; use std::thread; -use string_cache::atom::StaticAtomSet; +use string_cache::StaticAtomSet; include!(concat!(env!("OUT_DIR"), "/test_atom.rs")); pub type Atom = TestAtom; diff --git a/src/lib.rs b/src/lib.rs index fbfacb6..3409c4d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -107,4 +107,4 @@ pub use crate::atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; -pub mod atom; +mod atom; From b5174eaa3baabf7f2008ddcff27e8b02fdbf22cf Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 19:05:04 +0200 Subject: [PATCH 317/379] Move the global hash map to its own module --- src/atom.rs | 172 +++++++-------------------------------------- src/dynamic_set.rs | 110 +++++++++++++++++++++++++++++ src/lib.rs | 1 + 3 files changed, 138 insertions(+), 145 deletions(-) create mode 100644 src/dynamic_set.rs diff --git a/src/atom.rs b/src/atom.rs index 4c508e0..6e2027a 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -9,11 +9,10 @@ #![allow(non_upper_case_globals)] +use crate::dynamic_set::{Entry, DYNAMIC_SET}; use debug_unreachable::debug_unreachable; -use lazy_static::lazy_static; use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; - use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; @@ -24,9 +23,7 @@ use std::num::NonZeroU64; use std::ops; use std::slice; use std::str; -use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use std::sync::Mutex; use self::UnpackedAtom::{Dynamic, Inline, Static}; @@ -34,110 +31,10 @@ const DYNAMIC_TAG: u8 = 0b_00; const INLINE_TAG: u8 = 0b_01; // len in upper nybble const STATIC_TAG: u8 = 0b_10; const TAG_MASK: u64 = 0b_11; -const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. const MAX_INLINE_LEN: usize = 7; - const STATIC_SHIFT_BITS: usize = 32; -const NB_BUCKETS: usize = 1 << 12; // 4096 -const BUCKET_MASK: u64 = (1 << 12) - 1; - -struct StringCache { - buckets: Box<[Option>; NB_BUCKETS]>, -} - -lazy_static! { - static ref STRING_CACHE: Mutex = Mutex::new(StringCache::new()); -} - -struct StringCacheEntry { - next_in_bucket: Option>, - hash: u64, - ref_count: AtomicIsize, - string: Box, -} - -impl StringCacheEntry { - fn new(next: Option>, hash: u64, string: String) -> StringCacheEntry { - StringCacheEntry { - next_in_bucket: next, - hash: hash, - ref_count: AtomicIsize::new(1), - string: string.into_boxed_str(), - } - } -} - -impl StringCache { - fn new() -> StringCache { - type T = Option>; - let _static_assert_size_eq = std::mem::transmute::; - let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); - StringCache { - buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, - } - } - - fn add(&mut self, string: Cow, hash: u64) -> *mut StringCacheEntry { - let bucket_index = (hash & BUCKET_MASK) as usize; - { - let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); - - while let Some(entry) = ptr.take() { - if entry.hash == hash && &*entry.string == &*string { - if entry.ref_count.fetch_add(1, SeqCst) > 0 { - return &mut **entry; - } - // Uh-oh. The pointer's reference count was zero, which means someone may try - // to free it. (Naive attempts to defend against this, for example having the - // destructor check to see whether the reference count is indeed zero, don't - // work due to ABA.) Thus we need to temporarily add a duplicate string to the - // list. - entry.ref_count.fetch_sub(1, SeqCst); - break; - } - ptr = entry.next_in_bucket.as_mut(); - } - } - debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); - let string = string.into_owned(); - let mut entry = Box::new(StringCacheEntry::new( - self.buckets[bucket_index].take(), - hash, - string, - )); - let ptr: *mut StringCacheEntry = &mut *entry; - self.buckets[bucket_index] = Some(entry); - - ptr - } - - fn remove(&mut self, ptr: *mut StringCacheEntry) { - let bucket_index = { - let value: &StringCacheEntry = unsafe { &*ptr }; - debug_assert!(value.ref_count.load(SeqCst) == 0); - (value.hash & BUCKET_MASK) as usize - }; - - let mut current: &mut Option> = &mut self.buckets[bucket_index]; - - loop { - let entry_ptr: *mut StringCacheEntry = match current.as_mut() { - Some(entry) => &mut **entry, - None => break, - }; - if entry_ptr == ptr { - mem::drop(mem::replace(current, unsafe { - (*entry_ptr).next_in_bucket.take() - })); - break; - } - current = unsafe { &mut (*entry_ptr).next_in_bucket }; - } - } -} - /// A static `PhfStrSet` /// /// This trait is implemented by static sets of interned strings generated using @@ -324,7 +221,7 @@ impl Atom { static_set.hashes[index as usize] } Dynamic(entry) => { - let entry = entry as *mut StringCacheEntry; + let entry = entry as *mut Entry; u64_hash_as_u32(unsafe { (*entry).hash }) } Inline(..) => u64_hash_as_u32(self.unsafe_data.get()), @@ -384,7 +281,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { Inline(len as u8, buf) } else { let hash = (hash.g as u64) << 32 | (hash.f1 as u64); - Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) + Dynamic(DYNAMIC_SET.lock().unwrap().insert(string_to_add, hash) as *mut ()) } }; @@ -412,7 +309,7 @@ impl Clone for Atom { unsafe { match from_packed_dynamic(self.unsafe_data.get()) { Some(entry) => { - let entry = entry as *mut StringCacheEntry; + let entry = entry as *mut Entry; (*entry).ref_count.fetch_add(1, SeqCst); } None => (), @@ -430,16 +327,16 @@ impl Drop for Atom { fn drop(&mut self) { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - STRING_CACHE + DYNAMIC_SET .lock() .unwrap() - .remove(this.unsafe_data.get() as *mut StringCacheEntry); + .remove(this.unsafe_data.get() as *mut Entry); } unsafe { match from_packed_dynamic(self.unsafe_data.get()) { Some(entry) => { - let entry = entry as *mut StringCacheEntry; + let entry = entry as *mut Entry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { drop_slow(self); } @@ -466,7 +363,7 @@ impl ops::Deref for Atom { .get(idx as usize) .expect("bad static atom"), Dynamic(entry) => { - let entry = entry as *mut StringCacheEntry; + let entry = entry as *mut Entry; &(*entry).string } } @@ -723,41 +620,26 @@ unsafe fn inline_orig_bytes<'a>(data: &'a NonZeroU64) -> &'a [u8] { // Some minor tests of internal layout here. See ../integration-tests for much // more. -#[cfg(test)] -mod tests { - use super::{DefaultAtom, StringCacheEntry, ENTRY_ALIGNMENT}; +#[test] +fn assert_sizes() { use std::mem; - - #[test] - fn assert_sizes() { - use std::mem; - struct EmptyWithDrop; - impl Drop for EmptyWithDrop { - fn drop(&mut self) {} - } - let compiler_uses_inline_drop_flags = mem::size_of::() > 0; - - // Guard against accidental changes to the sizes of things. - assert_eq!( - mem::size_of::(), - if compiler_uses_inline_drop_flags { - 16 - } else { - 8 - } - ); - assert_eq!( - mem::size_of::>(), - mem::size_of::(), - ); - assert_eq!( - mem::size_of::(), - 8 + 4 * mem::size_of::() - ); + struct EmptyWithDrop; + impl Drop for EmptyWithDrop { + fn drop(&mut self) {} } + let compiler_uses_inline_drop_flags = mem::size_of::() > 0; - #[test] - fn string_cache_entry_alignment_is_sufficient() { - assert!(mem::align_of::() >= ENTRY_ALIGNMENT); - } + // Guard against accidental changes to the sizes of things. + assert_eq!( + mem::size_of::(), + if compiler_uses_inline_drop_flags { + 16 + } else { + 8 + } + ); + assert_eq!( + mem::size_of::>(), + mem::size_of::(), + ); } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs new file mode 100644 index 0000000..5bdeb68 --- /dev/null +++ b/src/dynamic_set.rs @@ -0,0 +1,110 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use lazy_static::lazy_static; +use std::borrow::Cow; +use std::mem; +use std::sync::atomic::AtomicIsize; +use std::sync::atomic::Ordering::SeqCst; +use std::sync::Mutex; + +const NB_BUCKETS: usize = 1 << 12; // 4096 +const BUCKET_MASK: u64 = (1 << 12) - 1; + +pub(crate) struct Set { + buckets: Box<[Option>; NB_BUCKETS]>, +} + +pub(crate) struct Entry { + pub(crate) string: Box, + pub(crate) hash: u64, + pub(crate) ref_count: AtomicIsize, + next_in_bucket: Option>, +} + +// Addresses are a multiples of this, +// and therefore have have TAG_MASK bits unset, available for tagging. +pub(crate) const ENTRY_ALIGNMENT: usize = 4; + +#[test] +fn entry_alignment_is_sufficient() { + assert!(mem::align_of::() >= ENTRY_ALIGNMENT); +} + +lazy_static! { + pub(crate) static ref DYNAMIC_SET: Mutex = Mutex::new({ + type T = Option>; + let _static_assert_size_eq = std::mem::transmute::; + let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); + Set { + buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, + } + }); +} + +impl Set { + pub(crate) fn insert(&mut self, string: Cow, hash: u64) -> *mut Entry { + let bucket_index = (hash & BUCKET_MASK) as usize; + { + let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); + + while let Some(entry) = ptr.take() { + if entry.hash == hash && &*entry.string == &*string { + if entry.ref_count.fetch_add(1, SeqCst) > 0 { + return &mut **entry; + } + // Uh-oh. The pointer's reference count was zero, which means someone may try + // to free it. (Naive attempts to defend against this, for example having the + // destructor check to see whether the reference count is indeed zero, don't + // work due to ABA.) Thus we need to temporarily add a duplicate string to the + // list. + entry.ref_count.fetch_sub(1, SeqCst); + break; + } + ptr = entry.next_in_bucket.as_mut(); + } + } + debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + let string = string.into_owned(); + let mut entry = Box::new(Entry { + next_in_bucket: self.buckets[bucket_index].take(), + hash, + ref_count: AtomicIsize::new(1), + string: string.into_boxed_str(), + }); + let ptr: *mut Entry = &mut *entry; + self.buckets[bucket_index] = Some(entry); + + ptr + } + + pub(crate) fn remove(&mut self, ptr: *mut Entry) { + let bucket_index = { + let value: &Entry = unsafe { &*ptr }; + debug_assert!(value.ref_count.load(SeqCst) == 0); + (value.hash & BUCKET_MASK) as usize + }; + + let mut current: &mut Option> = &mut self.buckets[bucket_index]; + + loop { + let entry_ptr: *mut Entry = match current.as_mut() { + Some(entry) => &mut **entry, + None => break, + }; + if entry_ptr == ptr { + mem::drop(mem::replace(current, unsafe { + (*entry_ptr).next_in_bucket.take() + })); + break; + } + current = unsafe { &mut (*entry_ptr).next_in_bucket }; + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 3409c4d..5751b25 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -108,3 +108,4 @@ pub use crate::atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; mod atom; +mod dynamic_set; From e25b4c664f3df2647b745f7ea90d4b47dd336906 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 19:13:57 +0200 Subject: [PATCH 318/379] =?UTF-8?q?Keep=20a=20u32=20hash=20rather=20than?= =?UTF-8?q?=20u64=20in=20dynamic=5Fatoms=5Fmap::Entry,=20since=20that?= =?UTF-8?q?=E2=80=99s=20all=20we=20ever=20use?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/atom.rs | 10 +++++++--- src/dynamic_set.rs | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 6e2027a..5476d7e 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -222,7 +222,7 @@ impl Atom { } Dynamic(entry) => { let entry = entry as *mut Entry; - u64_hash_as_u32(unsafe { (*entry).hash }) + unsafe { (*entry).hash } } Inline(..) => u64_hash_as_u32(self.unsafe_data.get()), } @@ -280,8 +280,12 @@ impl<'a, Static: StaticAtomSet> From> for Atom { buf[..len].copy_from_slice(string_to_add.as_bytes()); Inline(len as u8, buf) } else { - let hash = (hash.g as u64) << 32 | (hash.f1 as u64); - Dynamic(DYNAMIC_SET.lock().unwrap().insert(string_to_add, hash) as *mut ()) + Dynamic( + DYNAMIC_SET + .lock() + .unwrap() + .insert(string_to_add, hash.g) as *mut (), + ) } }; diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 5bdeb68..0a2044b 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -15,7 +15,7 @@ use std::sync::atomic::Ordering::SeqCst; use std::sync::Mutex; const NB_BUCKETS: usize = 1 << 12; // 4096 -const BUCKET_MASK: u64 = (1 << 12) - 1; +const BUCKET_MASK: u32 = (1 << 12) - 1; pub(crate) struct Set { buckets: Box<[Option>; NB_BUCKETS]>, @@ -23,7 +23,7 @@ pub(crate) struct Set { pub(crate) struct Entry { pub(crate) string: Box, - pub(crate) hash: u64, + pub(crate) hash: u32, pub(crate) ref_count: AtomicIsize, next_in_bucket: Option>, } @@ -49,7 +49,7 @@ lazy_static! { } impl Set { - pub(crate) fn insert(&mut self, string: Cow, hash: u64) -> *mut Entry { + pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> *mut Entry { let bucket_index = (hash & BUCKET_MASK) as usize; { let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); From 1a92659fdf7f198f3632f30200d6da93fa43c3c1 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 19:48:59 +0200 Subject: [PATCH 319/379] Remove UnpackedAtom, inline its code --- src/atom.rs | 253 +++++++++++++-------------------------------- src/dynamic_set.rs | 7 +- 2 files changed, 78 insertions(+), 182 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 5476d7e..da41899 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -25,12 +25,12 @@ use std::slice; use std::str; use std::sync::atomic::Ordering::SeqCst; -use self::UnpackedAtom::{Dynamic, Inline, Static}; - const DYNAMIC_TAG: u8 = 0b_00; const INLINE_TAG: u8 = 0b_01; // len in upper nybble const STATIC_TAG: u8 = 0b_10; const TAG_MASK: u64 = 0b_11; +const LEN_OFFSET: u64 = 4; +const LEN_MASK: u64 = 0xF0; const MAX_INLINE_LEN: usize = 7; const STATIC_SHIFT_BITS: usize = 32; @@ -151,11 +151,6 @@ impl<'a, Static: StaticAtomSet> From<&'a Atom> for Atom { } } -fn u64_hash_as_u32(h: u64) -> u32 { - // This may or may not be great... - ((h >> 32) ^ h) as u32 -} - // FIXME: bound removed from the struct definition before of this error for pack_static: // "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable" // https://github.com/rust-lang/rust/issues/57563 @@ -166,20 +161,19 @@ impl Atom { pub const fn pack_static(n: u32) -> Self { Self { unsafe_data: unsafe { - // STATIC_TAG ensure this is non-zero + // STATIC_TAG ensures this is non-zero NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS)) }, phantom: PhantomData, } } -} -impl Atom { - #[inline(always)] - unsafe fn unpack(&self) -> UnpackedAtom { - UnpackedAtom::from_packed(self.unsafe_data) + fn tag(&self) -> u8 { + (self.unsafe_data.get() & TAG_MASK) as u8 } +} +impl Atom { /// Return the internal repersentation. For testing. #[doc(hidden)] pub fn unsafe_data(&self) -> u64 { @@ -189,42 +183,39 @@ impl Atom { /// Return true if this is a static Atom. For testing. #[doc(hidden)] pub fn is_static(&self) -> bool { - match unsafe { self.unpack() } { - Static(..) => true, - _ => false, - } + self.tag() == STATIC_TAG } /// Return true if this is a dynamic Atom. For testing. #[doc(hidden)] pub fn is_dynamic(&self) -> bool { - match unsafe { self.unpack() } { - Dynamic(..) => true, - _ => false, - } + self.tag() == DYNAMIC_TAG } /// Return true if this is an inline Atom. For testing. #[doc(hidden)] pub fn is_inline(&self) -> bool { - match unsafe { self.unpack() } { - Inline(..) => true, - _ => false, - } + self.tag() == INLINE_TAG + } + + fn static_index(&self) -> u64 { + self.unsafe_data.get() >> STATIC_SHIFT_BITS } /// Get the hash of the string as it is stored in the set. pub fn get_hash(&self) -> u32 { - match unsafe { self.unpack() } { - Static(index) => { - let static_set = Static::get(); - static_set.hashes[index as usize] - } - Dynamic(entry) => { - let entry = entry as *mut Entry; + match self.tag() { + DYNAMIC_TAG => { + let entry = self.unsafe_data.get() as *const Entry; unsafe { (*entry).hash } } - Inline(..) => u64_hash_as_u32(self.unsafe_data.get()), + STATIC_TAG => Static::get().hashes[self.static_index() as usize], + INLINE_TAG => { + let data = self.unsafe_data.get(); + // This may or may not be great... + ((data >> 32) ^ data) as u32 + } + _ => unsafe { debug_unreachable!() }, } } } @@ -265,31 +256,40 @@ impl PartialEq for Atom { } impl<'a, Static: StaticAtomSet> From> for Atom { - #[inline] fn from(string_to_add: Cow<'a, str>) -> Self { let static_set = Static::get(); let hash = phf_shared::hash(&*string_to_add, &static_set.key); let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); - let unpacked = if static_set.atoms[index as usize] == string_to_add { - Static(index) + if static_set.atoms[index as usize] == string_to_add { + Self::pack_static(index) } else { let len = string_to_add.len(); if len <= MAX_INLINE_LEN { - let mut buf: [u8; 7] = [0; 7]; - buf[..len].copy_from_slice(string_to_add.as_bytes()); - Inline(len as u8, buf) + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); + { + let dest = inline_atom_slice_mut(&mut data); + dest[..len].copy_from_slice(string_to_add.as_bytes()) + } + Atom { + // INLINE_TAG ensures this is never zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } } else { - Dynamic( - DYNAMIC_SET - .lock() - .unwrap() - .insert(string_to_add, hash.g) as *mut (), - ) + let ptr: std::ptr::NonNull = DYNAMIC_SET + .lock() + .unwrap() + .insert(string_to_add, hash.g); + let data = ptr.as_ptr() as u64; + debug_assert!(0 == data & TAG_MASK); + Atom { + // The address of a ptr::NonNull is non-zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } } - }; - - unsafe { unpacked.pack() } + } } } @@ -310,25 +310,24 @@ impl From for Atom { impl Clone for Atom { #[inline(always)] fn clone(&self) -> Self { - unsafe { - match from_packed_dynamic(self.unsafe_data.get()) { - Some(entry) => { - let entry = entry as *mut Entry; - (*entry).ref_count.fetch_add(1, SeqCst); - } - None => (), - } - } - Atom { - unsafe_data: self.unsafe_data, - phantom: PhantomData, + if self.tag() == DYNAMIC_TAG { + let entry = self.unsafe_data.get() as *const Entry; + unsafe { &*entry }.ref_count.fetch_add(1, SeqCst); } + Atom { ..*self } } } impl Drop for Atom { #[inline] fn drop(&mut self) { + if self.tag() == DYNAMIC_TAG { + let entry = self.unsafe_data.get() as *const Entry; + if unsafe { &*entry }.ref_count.fetch_sub(1, SeqCst) == 1 { + drop_slow(self) + } + } + // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { DYNAMIC_SET @@ -336,18 +335,6 @@ impl Drop for Atom { .unwrap() .remove(this.unsafe_data.get() as *mut Entry); } - - unsafe { - match from_packed_dynamic(self.unsafe_data.get()) { - Some(entry) => { - let entry = entry as *mut Entry; - if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { - drop_slow(self); - } - } - _ => (), - } - } } } @@ -357,19 +344,18 @@ impl ops::Deref for Atom { #[inline] fn deref(&self) -> &str { unsafe { - match self.unpack() { - Inline(..) => { - let buf = inline_orig_bytes(&self.unsafe_data); - str::from_utf8_unchecked(buf) - } - Static(idx) => Static::get() - .atoms - .get(idx as usize) - .expect("bad static atom"), - Dynamic(entry) => { - let entry = entry as *mut Entry; + match self.tag() { + DYNAMIC_TAG => { + let entry = self.unsafe_data.get() as *const Entry; &(*entry).string } + INLINE_TAG => { + let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET; + let src = inline_atom_slice(&self.unsafe_data); + str::from_utf8_unchecked(&src[..(len as usize)]) + } + STATIC_TAG => Static::get().atoms[self.static_index() as usize], + _ => debug_unreachable!(), } } } @@ -386,10 +372,11 @@ impl fmt::Debug for Atom { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ty_str = unsafe { - match self.unpack() { - Dynamic(..) => "dynamic", - Inline(..) => "inline", - Static(..) => "static", + match self.tag() { + DYNAMIC_TAG => "dynamic", + INLINE_TAG => "inline", + STATIC_TAG => "static", + _ => debug_unreachable!(), } }; @@ -502,20 +489,6 @@ impl Atom { } } -// Atoms use a compact representation which fits this enum in a single u64. -// Inlining avoids actually constructing the unpacked representation in memory. -#[allow(missing_copy_implementations)] -enum UnpackedAtom { - /// Pointer to a dynamic table entry. Must be 16-byte aligned! - Dynamic(*mut ()), - - /// Length + bytes of string. - Inline(u8, [u8; 7]), - - /// Index in static interning table. - Static(u32), -} - #[inline(always)] fn inline_atom_slice(x: &NonZeroU64) -> &[u8] { unsafe { @@ -544,84 +517,6 @@ fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { } } -impl UnpackedAtom { - /// Pack a key, fitting it into a u64 with flags and data. See `string_cache_shared` for - /// hints for the layout. - #[inline(always)] - unsafe fn pack(self) -> Atom { - match self { - Static(n) => Atom::pack_static(n), - Dynamic(p) => { - let data = p as u64; - debug_assert!(0 == data & TAG_MASK); - Atom { - // Callers are responsible for calling this with a valid, non-null pointer - unsafe_data: NonZeroU64::new_unchecked(data), - phantom: PhantomData, - } - } - Inline(len, buf) => { - debug_assert!((len as usize) <= MAX_INLINE_LEN); - let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); - { - let dest = inline_atom_slice_mut(&mut data); - dest.copy_from_slice(&buf) - } - Atom { - // INLINE_TAG ensures this is never zero - unsafe_data: NonZeroU64::new_unchecked(data), - phantom: PhantomData, - } - } - } - } - - /// Unpack a key, extracting information from a single u64 into useable structs. - #[inline(always)] - unsafe fn from_packed(data: NonZeroU64) -> UnpackedAtom { - debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged - - match (data.get() & TAG_MASK) as u8 { - DYNAMIC_TAG => Dynamic(data.get() as *mut ()), - STATIC_TAG => Static((data.get() >> STATIC_SHIFT_BITS) as u32), - INLINE_TAG => { - let len = ((data.get() & 0xf0) >> 4) as usize; - debug_assert!(len <= MAX_INLINE_LEN); - let mut buf: [u8; 7] = [0; 7]; - let src = inline_atom_slice(&data); - buf.copy_from_slice(src); - Inline(len as u8, buf) - } - _ => debug_unreachable!(), - } - } -} - -/// Used for a fast path in Clone and Drop. -#[inline(always)] -unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { - if (DYNAMIC_TAG as u64) == (data & TAG_MASK) { - Some(data as *mut ()) - } else { - None - } -} - -/// For as_slice on inline atoms, we need a pointer into the original -/// string contents. -/// -/// It's undefined behavior to call this on a non-inline atom!! -#[inline(always)] -unsafe fn inline_orig_bytes<'a>(data: &'a NonZeroU64) -> &'a [u8] { - match UnpackedAtom::from_packed(*data) { - Inline(len, _) => { - let src = inline_atom_slice(&data); - &src[..(len as usize)] - } - _ => debug_unreachable!(), - } -} - // Some minor tests of internal layout here. See ../integration-tests for much // more. #[test] diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 0a2044b..08c9dcd 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -10,6 +10,7 @@ use lazy_static::lazy_static; use std::borrow::Cow; use std::mem; +use std::ptr::NonNull; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; use std::sync::Mutex; @@ -49,7 +50,7 @@ lazy_static! { } impl Set { - pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> *mut Entry { + pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> NonNull { let bucket_index = (hash & BUCKET_MASK) as usize; { let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); @@ -57,7 +58,7 @@ impl Set { while let Some(entry) = ptr.take() { if entry.hash == hash && &*entry.string == &*string { if entry.ref_count.fetch_add(1, SeqCst) > 0 { - return &mut **entry; + return NonNull::from(&mut **entry); } // Uh-oh. The pointer's reference count was zero, which means someone may try // to free it. (Naive attempts to defend against this, for example having the @@ -78,7 +79,7 @@ impl Set { ref_count: AtomicIsize::new(1), string: string.into_boxed_str(), }); - let ptr: *mut Entry = &mut *entry; + let ptr = NonNull::from(&mut *entry); self.buckets[bucket_index] = Some(entry); ptr From 4837db4fb68dcea7824973da5ba08e967e06defe Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 20:19:53 +0200 Subject: [PATCH 320/379] Move static sets to their own module --- src/atom.rs | 88 ++-------------------------------------------- src/lib.rs | 20 +++++++++-- src/static_sets.rs | 64 +++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 87 deletions(-) create mode 100644 src/static_sets.rs diff --git a/src/atom.rs b/src/atom.rs index da41899..a04abee 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -10,6 +10,7 @@ #![allow(non_upper_case_globals)] use crate::dynamic_set::{Entry, DYNAMIC_SET}; +use crate::static_sets::StaticAtomSet; use debug_unreachable::debug_unreachable; use phf_shared; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -35,61 +36,6 @@ const LEN_MASK: u64 = 0xF0; const MAX_INLINE_LEN: usize = 7; const STATIC_SHIFT_BITS: usize = 32; -/// A static `PhfStrSet` -/// -/// This trait is implemented by static sets of interned strings generated using -/// `string_cache_codegen`, and `EmptyStaticAtomSet` for when strings will be added dynamically. -/// -/// It is used by the methods of [`Atom`] to check if a string is present in the static set. -/// -/// [`Atom`]: struct.Atom.html -pub trait StaticAtomSet: Ord { - /// Get the location of the static string set in the binary. - fn get() -> &'static PhfStrSet; - /// Get the index of the empty string, which is in every set and is used for `Atom::default`. - fn empty_string_index() -> u32; -} - -/// A string set created using a [perfect hash function], specifically -/// [Hash, Displace and Compress]. -/// -/// See the CHD document for the meaning of the struct fields. -/// -/// [perfect hash function]: https://en.wikipedia.org/wiki/Perfect_hash_function -/// [Hash, Displace and Compress]: http://cmph.sourceforge.net/papers/esa09.pdf -pub struct PhfStrSet { - pub key: u64, - pub disps: &'static [(u32, u32)], - pub atoms: &'static [&'static str], - pub hashes: &'static [u32], -} - -/// An empty static atom set for when only dynamic strings will be added -#[derive(PartialEq, Eq, PartialOrd, Ord)] -pub struct EmptyStaticAtomSet; - -impl StaticAtomSet for EmptyStaticAtomSet { - fn get() -> &'static PhfStrSet { - // The name is a lie: this set is not empty (it contains the empty string) - // but that’s only to avoid divisions by zero in rust-phf. - static SET: PhfStrSet = PhfStrSet { - key: 0, - disps: &[(0, 0)], - atoms: &[""], - // "" SipHash'd, and xored with u64_hash_to_u32. - hashes: &[0x3ddddef3], - }; - &SET - } - - fn empty_string_index() -> u32 { - 0 - } -} - -/// Use this if you don’t care about static atoms. -pub type DefaultAtom = Atom; - /// Represents a string that has been interned. /// /// While the type definition for `Atom` indicates that it generic on a particular @@ -277,10 +223,8 @@ impl<'a, Static: StaticAtomSet> From> for Atom { phantom: PhantomData, } } else { - let ptr: std::ptr::NonNull = DYNAMIC_SET - .lock() - .unwrap() - .insert(string_to_add, hash.g); + let ptr: std::ptr::NonNull = + DYNAMIC_SET.lock().unwrap().insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -516,29 +460,3 @@ fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { slice::from_raw_parts_mut(data, len) } } - -// Some minor tests of internal layout here. See ../integration-tests for much -// more. -#[test] -fn assert_sizes() { - use std::mem; - struct EmptyWithDrop; - impl Drop for EmptyWithDrop { - fn drop(&mut self) {} - } - let compiler_uses_inline_drop_flags = mem::size_of::() > 0; - - // Guard against accidental changes to the sizes of things. - assert_eq!( - mem::size_of::(), - if compiler_uses_inline_drop_flags { - 16 - } else { - 8 - } - ); - assert_eq!( - mem::size_of::>(), - mem::size_of::(), - ); -} diff --git a/src/lib.rs b/src/lib.rs index 5751b25..b83c81c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,7 +105,23 @@ #![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] -pub use crate::atom::{Atom, DefaultAtom, EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; - mod atom; mod dynamic_set; +mod static_sets; + +pub use atom::Atom; +pub use static_sets::{EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; + +/// Use this if you don’t care about static atoms. +pub type DefaultAtom = Atom; + +// Some minor tests of internal layout here. +// See ../integration-tests for much more. + +/// Guard against accidental changes to the sizes of things. +#[test] +fn assert_sizes() { + use std::mem::size_of; + assert_eq!(size_of::(), 8); + assert_eq!(size_of::>(), size_of::(),); +} diff --git a/src/static_sets.rs b/src/static_sets.rs new file mode 100644 index 0000000..f7f1799 --- /dev/null +++ b/src/static_sets.rs @@ -0,0 +1,64 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/// A static `PhfStrSet` +/// +/// This trait is implemented by static sets of interned strings generated using +/// `string_cache_codegen`, and `EmptyStaticAtomSet` for when strings will be added dynamically. +/// +/// It is used by the methods of [`Atom`] to check if a string is present in the static set. +/// +/// [`Atom`]: struct.Atom.html +pub trait StaticAtomSet: Ord { + /// Get the location of the static string set in the binary. + fn get() -> &'static PhfStrSet; + /// Get the index of the empty string, which is in every set and is used for `Atom::default`. + fn empty_string_index() -> u32; +} + +/// A string set created using a [perfect hash function], specifically +/// [Hash, Displace and Compress]. +/// +/// See the CHD document for the meaning of the struct fields. +/// +/// [perfect hash function]: https://en.wikipedia.org/wiki/Perfect_hash_function +/// [Hash, Displace and Compress]: http://cmph.sourceforge.net/papers/esa09.pdf +pub struct PhfStrSet { + #[doc(hidden)] + pub key: u64, + #[doc(hidden)] + pub disps: &'static [(u32, u32)], + #[doc(hidden)] + pub atoms: &'static [&'static str], + #[doc(hidden)] + pub hashes: &'static [u32], +} + +/// An empty static atom set for when only dynamic strings will be added +#[derive(PartialEq, Eq, PartialOrd, Ord)] +pub struct EmptyStaticAtomSet; + +impl StaticAtomSet for EmptyStaticAtomSet { + fn get() -> &'static PhfStrSet { + // The name is a lie: this set is not empty (it contains the empty string) + // but that’s only to avoid divisions by zero in rust-phf. + static SET: PhfStrSet = PhfStrSet { + key: 0, + disps: &[(0, 0)], + atoms: &[""], + // "" SipHash'd, and xored with u64_hash_to_u32. + hashes: &[0x3ddddef3], + }; + &SET + } + + fn empty_string_index() -> u32 { + 0 + } +} From 57ff374f19232678f64611e22aac4af2129d58a8 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 20:20:19 +0200 Subject: [PATCH 321/379] Remove attributes redundant with Cargo --- src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b83c81c..fd5e677 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -101,8 +101,6 @@ //! ``` //! -#![crate_name = "string_cache"] -#![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] mod atom; From 8bedc3ee1406d5d0b7b202c3b067e7fce58a26cf Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 18 Oct 2019 20:24:41 +0200 Subject: [PATCH 322/379] Move trait impls that simply forward to something else to their own module --- src/atom.rs | 80 --------------------------------------- src/lib.rs | 1 + src/trivial_impls.rs | 90 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 80 deletions(-) create mode 100644 src/trivial_impls.rs diff --git a/src/atom.rs b/src/atom.rs index a04abee..5011d40 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -7,13 +7,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![allow(non_upper_case_globals)] - use crate::dynamic_set::{Entry, DYNAMIC_SET}; use crate::static_sets::StaticAtomSet; use debug_unreachable::debug_unreachable; use phf_shared; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; @@ -85,18 +82,6 @@ pub struct Atom { phantom: PhantomData, } -impl ::precomputed_hash::PrecomputedHash for Atom { - fn precomputed_hash(&self) -> u32 { - self.get_hash() - } -} - -impl<'a, Static: StaticAtomSet> From<&'a Atom> for Atom { - fn from(atom: &'a Self) -> Self { - atom.clone() - } -} - // FIXME: bound removed from the struct definition before of this error for pack_static: // "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable" // https://github.com/rust-lang/rust/issues/57563 @@ -183,24 +168,6 @@ impl Hash for Atom { } } -impl PartialEq for Atom { - fn eq(&self, other: &str) -> bool { - &self[..] == other - } -} - -impl PartialEq> for str { - fn eq(&self, other: &Atom) -> bool { - self == &other[..] - } -} - -impl PartialEq for Atom { - fn eq(&self, other: &String) -> bool { - &self[..] == &other[..] - } -} - impl<'a, Static: StaticAtomSet> From> for Atom { fn from(string_to_add: Cow<'a, str>) -> Self { let static_set = Static::get(); @@ -237,20 +204,6 @@ impl<'a, Static: StaticAtomSet> From> for Atom { } } -impl<'a, Static: StaticAtomSet> From<&'a str> for Atom { - #[inline] - fn from(string_to_add: &str) -> Self { - Atom::from(Cow::Borrowed(string_to_add)) - } -} - -impl From for Atom { - #[inline] - fn from(string_to_add: String) -> Self { - Atom::from(Cow::Owned(string_to_add)) - } -} - impl Clone for Atom { #[inline(always)] fn clone(&self) -> Self { @@ -305,13 +258,6 @@ impl ops::Deref for Atom { } } -impl fmt::Display for Atom { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - ::fmt(self, f) - } -} - impl fmt::Debug for Atom { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -348,32 +294,6 @@ impl Ord for Atom { } } -impl AsRef for Atom { - fn as_ref(&self) -> &str { - &self - } -} - -impl Serialize for Atom { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let string: &str = self.as_ref(); - string.serialize(serializer) - } -} - -impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'a>, - { - let string: String = Deserialize::deserialize(deserializer)?; - Ok(Atom::from(string)) - } -} - // AsciiExt requires mutating methods, so we just implement the non-mutating ones. // We don't need to implement is_ascii because there's no performance improvement // over the one from &str. diff --git a/src/lib.rs b/src/lib.rs index fd5e677..b4a8fd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -106,6 +106,7 @@ mod atom; mod dynamic_set; mod static_sets; +mod trivial_impls; pub use atom::Atom; pub use static_sets::{EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs new file mode 100644 index 0000000..ed53be2 --- /dev/null +++ b/src/trivial_impls.rs @@ -0,0 +1,90 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::{Atom, StaticAtomSet}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::borrow::Cow; +use std::fmt; + +impl ::precomputed_hash::PrecomputedHash for Atom { + fn precomputed_hash(&self) -> u32 { + self.get_hash() + } +} + +impl<'a, Static: StaticAtomSet> From<&'a Atom> for Atom { + fn from(atom: &'a Self) -> Self { + atom.clone() + } +} + +impl PartialEq for Atom { + fn eq(&self, other: &str) -> bool { + &self[..] == other + } +} + +impl PartialEq> for str { + fn eq(&self, other: &Atom) -> bool { + self == &other[..] + } +} + +impl PartialEq for Atom { + fn eq(&self, other: &String) -> bool { + &self[..] == &other[..] + } +} + +impl<'a, Static: StaticAtomSet> From<&'a str> for Atom { + #[inline] + fn from(string_to_add: &str) -> Self { + Atom::from(Cow::Borrowed(string_to_add)) + } +} + +impl From for Atom { + #[inline] + fn from(string_to_add: String) -> Self { + Atom::from(Cow::Owned(string_to_add)) + } +} + +impl fmt::Display for Atom { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + +impl AsRef for Atom { + fn as_ref(&self) -> &str { + &self + } +} + +impl Serialize for Atom { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let string: &str = self.as_ref(); + string.serialize(serializer) + } +} + +impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'a>, + { + let string: String = Deserialize::deserialize(deserializer)?; + Ok(Atom::from(string)) + } +} From b78c9da2e8b446f1d8a22172003531d9673fb055 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Mon, 21 Oct 2019 12:48:08 +0200 Subject: [PATCH 323/379] Use unique const names in string_cache_codegen --- string-cache-codegen/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 42cda70..d212d7e 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.0" # Also update ../README.md when making a semver-breaking change +version = "0.5.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT / Apache-2.0" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 0d90271..0fe4819 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -227,10 +227,11 @@ impl AtomType { let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); let module = module.parse::().unwrap(); + let atom_prefix = format!("ATOM_{}_", type_name.to_string().to_uppercase()); let const_names: Vec<_> = atoms .iter() .map(|atom| { - let mut name = String::from("ATOM"); + let mut name = atom_prefix.clone(); for c in atom.chars() { name.push_str(&format!("_{:02X}", c as u32)) } From 7f50fe0360175b74f0ff0302a597b676e854fe75 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Thu, 5 Dec 2019 05:11:10 -0800 Subject: [PATCH 324/379] Make serde dependency optional, but enabled by default --- .travis.yml | 1 + Cargo.toml | 6 +++++- src/trivial_impls.rs | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ad47308..180d855 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ rust: os: - linux script: + - cargo build --no-default-features - cargo build - cargo test --all - "cd string-cache-codegen && cargo build && cd .." diff --git a/Cargo.toml b/Cargo.toml index eec2438..364131a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,10 +17,14 @@ edition = "2018" [lib] name = "string_cache" +[features] +serde_support = ["serde"] +default = ["serde_support"] + [dependencies] precomputed-hash = "0.1" lazy_static = "1" -serde = "1" +serde = { version = "1", optional = true } phf_shared = "0.8" new_debug_unreachable = "1.0" diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index ed53be2..4c055fd 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -8,6 +8,7 @@ // except according to those terms. use crate::{Atom, StaticAtomSet}; +#[cfg(feature = "serde_support")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::borrow::Cow; use std::fmt; @@ -69,6 +70,7 @@ impl AsRef for Atom { } } +#[cfg(feature = "serde_support")] impl Serialize for Atom { fn serialize(&self, serializer: S) -> Result where @@ -79,6 +81,7 @@ impl Serialize for Atom { } } +#[cfg(feature = "serde_support")] impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { fn deserialize(deserializer: D) -> Result where From 82ac0d955d2d1785eb5634cdbf61465adc0d759e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 19 Nov 2020 19:09:37 +0100 Subject: [PATCH 325/379] Bump to 0.8.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 364131a..01b9282 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.0" # Also update README.md when making a semver-breaking change +version = "0.8.1" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From c057dde7fdbdb9618d15374d74642b8267c0c837 Mon Sep 17 00:00:00 2001 From: l3ops Date: Tue, 20 Apr 2021 13:48:57 +0200 Subject: [PATCH 326/379] Add an Atom::try_static method to create an Atom only if it exists in the static table --- integration-tests/src/lib.rs | 6 ++++++ src/atom.rs | 26 ++++++++++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index 3aa2a44..1f2be87 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -290,6 +290,12 @@ fn test_from_string() { assert!(Atom::from("camembert".to_owned()) == Atom::from("camembert")); } +#[test] +fn test_try_static() { + assert!(Atom::try_static("head").is_some()); + assert!(Atom::try_static("not in the static table").is_none()); +} + #[cfg(all(test, feature = "unstable"))] #[path = "bench.rs"] mod bench; diff --git a/src/atom.rs b/src/atom.rs index 5011d40..6da0044 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -149,6 +149,22 @@ impl Atom { _ => unsafe { debug_unreachable!() }, } } + + pub fn try_static(string_to_add: &str) -> Option { + Self::try_static_internal(string_to_add).ok() + } + + fn try_static_internal(string_to_add: &str) -> Result { + let static_set = Static::get(); + let hash = phf_shared::hash(&*string_to_add, &static_set.key); + let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); + + if static_set.atoms[index as usize] == string_to_add { + Ok(Self::pack_static(index)) + } else { + Err(hash) + } + } } impl Default for Atom { @@ -170,13 +186,7 @@ impl Hash for Atom { impl<'a, Static: StaticAtomSet> From> for Atom { fn from(string_to_add: Cow<'a, str>) -> Self { - let static_set = Static::get(); - let hash = phf_shared::hash(&*string_to_add, &static_set.key); - let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); - - if static_set.atoms[index as usize] == string_to_add { - Self::pack_static(index) - } else { + Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { let len = string_to_add.len(); if len <= MAX_INLINE_LEN { let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); @@ -200,7 +210,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { phantom: PhantomData, } } - } + }) } } From 609a59f19273d6e261ee47543b4e0d576ca3707e Mon Sep 17 00:00:00 2001 From: Russell Mull Date: Tue, 31 Aug 2021 09:30:20 -0700 Subject: [PATCH 327/379] Update phf, rand dependencies --- Cargo.toml | 2 +- integration-tests/Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 01b9282..88c09e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ default = ["serde_support"] precomputed-hash = "0.1" lazy_static = "1" serde = { version = "1", optional = true } -phf_shared = "0.8" +phf_shared = "0.10" new_debug_unreachable = "1.0" [[test]] diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 736e34a..a0b047c 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -19,7 +19,7 @@ unstable = [] string_cache = { version = "0.8", path = ".." } [dev-dependencies] -rand = "0.7" +rand = "0.8" string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } [build-dependencies] diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index d212d7e..5ae3a52 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -13,7 +13,7 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -phf_generator = "0.8" -phf_shared = "0.8" +phf_generator = "0.10" +phf_shared = "0.10" proc-macro2 = "1" quote = "1" From 72f38cf1836292c1533e662d6741f7e433e3cdbf Mon Sep 17 00:00:00 2001 From: Nathan West Date: Mon, 13 Sep 2021 23:48:39 -0400 Subject: [PATCH 328/379] Use a custom Visitor in Deserialize --- src/trivial_impls.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 4c055fd..3f2d75e 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -87,7 +87,33 @@ impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { where D: Deserializer<'a>, { - let string: String = Deserialize::deserialize(deserializer)?; - Ok(Atom::from(string)) + use serde::de; + use std::marker::PhantomData; + + struct AtomVisitor(PhantomData); + + impl<'de, Static: StaticAtomSet> de::Visitor<'de> for AtomVisitor { + type Value = Atom; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "an Atom") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + Ok(Atom::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: de::Error, + { + Ok(Atom::from(v)) + } + } + + deserializer.deserialize_string(AtomVisitor(PhantomData)) } } From 58d0c572db093c751797bb50c482f8375396cc7c Mon Sep 17 00:00:00 2001 From: Nathan West Date: Tue, 14 Sep 2021 00:26:04 -0400 Subject: [PATCH 329/379] Use deserialize_str instead of deserialize_string --- src/trivial_impls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 3f2d75e..c0119ca 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -114,6 +114,6 @@ impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom { } } - deserializer.deserialize_string(AtomVisitor(PhantomData)) + deserializer.deserialize_str(AtomVisitor(PhantomData)) } } From f95e39b2cf9e6610659c22c5c3e4ac271f886eba Mon Sep 17 00:00:00 2001 From: Donny Date: Fri, 8 Oct 2021 16:14:03 +0900 Subject: [PATCH 330/379] Use parking_lot --- Cargo.toml | 1 + src/atom.rs | 3 +-- src/dynamic_set.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 01b9282..a54b134 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ lazy_static = "1" serde = { version = "1", optional = true } phf_shared = "0.8" new_debug_unreachable = "1.0" +parking_lot = "0.11" [[test]] name = "small-stack" diff --git a/src/atom.rs b/src/atom.rs index 6da0044..0d74408 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -201,7 +201,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { } } else { let ptr: std::ptr::NonNull = - DYNAMIC_SET.lock().unwrap().insert(string_to_add, hash.g); + DYNAMIC_SET.lock().insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -239,7 +239,6 @@ impl Drop for Atom { fn drop_slow(this: &mut Atom) { DYNAMIC_SET .lock() - .unwrap() .remove(this.unsafe_data.get() as *mut Entry); } } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 08c9dcd..f926f1e 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -8,12 +8,12 @@ // except according to those terms. use lazy_static::lazy_static; +use parking_lot::Mutex; use std::borrow::Cow; use std::mem; use std::ptr::NonNull; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use std::sync::Mutex; const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u32 = (1 << 12) - 1; From bcf15b9269e8cff3560087c45d3ee66f8bf4fc76 Mon Sep 17 00:00:00 2001 From: cybai Date: Sat, 9 Oct 2021 01:18:28 +0900 Subject: [PATCH 331/379] Move CI to GitHub Actions --- .github/workflows/ci.yml | 46 ++++++++++++++++++++++++++++++++++++++++ .travis.yml | 15 ------------- 2 files changed, 46 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..d1c124a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: Run CI +on: + push: + branches: ["master"] + pull_request: + branches: ["**"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +env: + RUST_BACKTRACE: 1 + SHELL: /bin/bash + +jobs: + ci: + name: Run CI + runs-on: ubuntu-20.04 + + strategy: + matrix: + rust: [1.36.0, nightly, beta, stable] + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 2 + - name: Setup Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: ${{ matrix.rust }} + default: true + override: true + - name: Build + run: | + cargo build --no-default-features + cargo build + - name: Tests + run: cargo test --all + - name: Build codegen + run: | + cd string-cache-codegen && cargo build && cd .. + + if [ ${{ matrix.rust }} = nightly ]; then + cd integration-tests && cargo test --features unstable && cd ..; + fi diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 180d855..0000000 --- a/.travis.yml +++ /dev/null @@ -1,15 +0,0 @@ -sudo: false -language: rust -rust: - - 1.36.0 - - nightly - - beta - - stable -os: - - linux -script: - - cargo build --no-default-features - - cargo build - - cargo test --all - - "cd string-cache-codegen && cargo build && cd .." - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd integration-tests && cargo test --features unstable && cd ..; fi" From 0806c6f08f086d9033f62ff95865a9690dc35101 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 9 Oct 2021 12:15:46 -0400 Subject: [PATCH 332/379] Add homu result. --- .github/workflows/ci.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1c124a..ee596b5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,3 +44,18 @@ jobs: if [ ${{ matrix.rust }} = nightly ]; then cd integration-tests && cargo test --features unstable && cd ..; fi + + + build_result: + name: homu build finished + runs-on: ubuntu-latest + needs: + - "ci" + + steps: + - name: Mark the job as successful + run: exit 0 + if: success() + - name: Mark the job as unsuccessful + run: exit 1 + if: "!success()" From 3a35e765552a22c54aefce9588ba8bf6de86457b Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 9 Oct 2021 12:22:12 -0400 Subject: [PATCH 333/379] Run CI on auto branch. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee596b5..d043617 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,7 +1,7 @@ name: Run CI on: push: - branches: ["master"] + branches: ["auto"] pull_request: branches: ["**"] From ddaf1bd4074369c698235b5e196f506ae0d21c0a Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Mon, 11 Oct 2021 21:44:41 -0400 Subject: [PATCH 334/379] Publish 0.8.2. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a54b134..2636de6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.1" # Also update README.md when making a semver-breaking change +version = "0.8.2" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From dd6a60115991e13c21ce77fe12cf11245c04ddcb Mon Sep 17 00:00:00 2001 From: Alex Touchet Date: Thu, 14 Oct 2021 17:15:32 -0700 Subject: [PATCH 335/379] Replace Travis CI badge with GitHub Actions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c9c8ac..fdf4c0a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # string-cache -[![Build Status](https://travis-ci.com/servo/string-cache.svg?branch=master)](https://travis-ci.com/servo/string-cache) +[![Build Status](https://github.com/servo/string-cache/actions/workflows/ci.yml/badge.svg)](https://github.com/servo/string-cache/actions) [Documentation](https://docs.rs/string_cache/) From edce5ddd8b0740609b51440e4d7d5ba752b376d4 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sun, 21 Nov 2021 15:36:17 -0500 Subject: [PATCH 336/379] Update MSRV to 1.38.0. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d043617..e8485a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: - rust: [1.36.0, nightly, beta, stable] + rust: [1.38.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 5c3d60334a496402d78d906e7661dd4b72931a44 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sun, 21 Nov 2021 16:06:28 -0500 Subject: [PATCH 337/379] Update MSRV to 1.40.0 --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e8485a9..39b55c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,11 +15,11 @@ env: jobs: ci: name: Run CI - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: matrix: - rust: [1.38.0, nightly, beta, stable] + rust: [1.40.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 7934bf9eb7784962d5af8f96258756fc480d4c22 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Thu, 16 Dec 2021 16:16:49 -0700 Subject: [PATCH 338/379] Fix warning and typo in small stack regression test --- tests/small-stack.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/small-stack.rs b/tests/small-stack.rs index 269cad7..bb607af 100644 --- a/tests/small-stack.rs +++ b/tests/small-stack.rs @@ -1,6 +1,6 @@ // Regression test for https://github.com/servo/html5ever/issues/393 // -// Create a dynamic atom − causing initialization of the golbal hash map − +// Create a dynamic atom − causing initialization of the global hash map − // in a thread that has a small stack. // // This is a separate test program rather than a `#[test] fn` among others @@ -9,7 +9,7 @@ fn main() { std::thread::Builder::new() .stack_size(50_000) .spawn(|| { - string_cache::DefaultAtom::from("12345678"); + let _atom = string_cache::DefaultAtom::from("12345678"); }) .unwrap() .join() From 474d27785f1339fa3056a8d320da2aa8cfec3d19 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:17:01 -0700 Subject: [PATCH 339/379] Add a Clippy exception for `derive_hash_xor_eq` This is a string interning library. It does some weird things related to hashing. This is fine. --- src/lib.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index b4a8fd5..441cb4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -103,6 +103,19 @@ #![cfg_attr(test, deny(warnings))] +// Types, such as Atom, that impl Hash must follow the hash invariant: if two objects match +// with PartialEq, they must also have the same Hash. Clippy warns on types that derive one while +// manually impl-ing the other, because it seems easy for the two to drift apart, causing the +// invariant to be violated. +// +// But Atom is a newtype over NonZeroU64, and probably always will be, since cheap comparisons and +// copying are this library's purpose. So we know what the PartialEq comparison is going to do. +// +// The `get_hash` function, seen in `atom.rs`, consults that number, plus the global string interner +// tables. The only way for the resulting hash for two Atoms with the same inner 64-bit number to +// differ would be if the table entry changed between invocations, and that would be really bad. +#![allow(clippy::derive_hash_xor_eq)] + mod atom; mod dynamic_set; mod static_sets; From c2afb8bbc3104867c5d89b79cc83bec1fae9fabd Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:19:12 -0700 Subject: [PATCH 340/379] Remove redundant `use` line --- src/atom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index 0d74408..c02651b 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -10,7 +10,7 @@ use crate::dynamic_set::{Entry, DYNAMIC_SET}; use crate::static_sets::StaticAtomSet; use debug_unreachable::debug_unreachable; -use phf_shared; + use std::borrow::Cow; use std::cmp::Ordering::{self, Equal}; use std::fmt; From 78c516d82f48f215ec214979ed3b3a4b874c3dd9 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:20:25 -0700 Subject: [PATCH 341/379] Remove redundant reference operands --- src/dynamic_set.rs | 2 +- src/trivial_impls.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index f926f1e..2eab9da 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -56,7 +56,7 @@ impl Set { let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); while let Some(entry) = ptr.take() { - if entry.hash == hash && &*entry.string == &*string { + if entry.hash == hash && *entry.string == *string { if entry.ref_count.fetch_add(1, SeqCst) > 0 { return NonNull::from(&mut **entry); } diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index c0119ca..960dde0 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -39,7 +39,7 @@ impl PartialEq> for str { impl PartialEq for Atom { fn eq(&self, other: &String) -> bool { - &self[..] == &other[..] + self[..] == other[..] } } @@ -66,7 +66,7 @@ impl fmt::Display for Atom { impl AsRef for Atom { fn as_ref(&self) -> &str { - &self + self } } From 71925156c4ec58bc6cfc1f31f8d6a9ee58a4cfc0 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 21 Dec 2021 10:22:12 -0700 Subject: [PATCH 342/379] Convert manual loop to `while let` --- src/dynamic_set.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 2eab9da..602b700 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -94,11 +94,8 @@ impl Set { let mut current: &mut Option> = &mut self.buckets[bucket_index]; - loop { - let entry_ptr: *mut Entry = match current.as_mut() { - Some(entry) => &mut **entry, - None => break, - }; + while let Some(entry_ptr) = current.as_mut() { + let entry_ptr: *mut Entry = &mut **entry_ptr; if entry_ptr == ptr { mem::drop(mem::replace(current, unsafe { (*entry_ptr).next_in_bucket.take() From 36da1fd4d86b5803082ec27155d43b0ca0c68709 Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Fri, 4 Feb 2022 12:34:49 +0100 Subject: [PATCH 343/379] Bump new_debug_unreachable to 1.0.2 Previous versions are incompatible with -Z minimal-versions builds. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index d3d2093..2b0b466 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ precomputed-hash = "0.1" lazy_static = "1" serde = { version = "1", optional = true } phf_shared = "0.10" -new_debug_unreachable = "1.0" +new_debug_unreachable = "1.0.2" parking_lot = "0.11" [[test]] From 97a4f6d1c457fc45151216d7b54446e573cdf322 Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Fri, 4 Feb 2022 12:36:52 +0100 Subject: [PATCH 344/379] Bump lazy_static to 1.1.0 This version builds with `-Z minimal-versions` when warnings are denied. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2b0b466..747d499 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" -lazy_static = "1" +lazy_static = "1.1.0" serde = { version = "1", optional = true } phf_shared = "0.10" new_debug_unreachable = "1.0.2" From ab30960f33f1a5e87ccb62e1e9cfd9677fe84cbb Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Fri, 4 Feb 2022 13:08:40 +0100 Subject: [PATCH 345/379] Bump to 0.8.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 747d499..9ba5095 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.2" # Also update README.md when making a semver-breaking change +version = "0.8.3" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From cbd1dc3b7f2085a93945e29c000723d232aec61b Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 14 Feb 2022 11:18:26 -0500 Subject: [PATCH 346/379] fix: bump parking lot to 0.12 in order to not create wasm export --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9ba5095..5d94040 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ lazy_static = "1.1.0" serde = { version = "1", optional = true } phf_shared = "0.10" new_debug_unreachable = "1.0.2" -parking_lot = "0.11" +parking_lot = "0.12" [[test]] name = "small-stack" From 6c0f8253813ea24b4f8be07658159fc6edb7acd2 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 14 Feb 2022 13:11:15 -0500 Subject: [PATCH 347/379] Bump CI to rust 1.49 (not sure if desired though) --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39b55c5..fe17a69 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: - rust: [1.40.0, nightly, beta, stable] + rust: [1.49.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 019118878fc994417edfd6d7a389c317b0654aa0 Mon Sep 17 00:00:00 2001 From: Alex Touchet Date: Mon, 28 Feb 2022 14:43:55 -0800 Subject: [PATCH 348/379] Use SPDX license format --- Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5d94040..6ca2f39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "string_cache" version = "0.8.3" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." -license = "MIT / Apache-2.0" +license = "MIT OR Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache/" edition = "2018" diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 5ae3a52..f207e21 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -3,7 +3,7 @@ name = "string_cache_codegen" version = "0.5.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." -license = "MIT / Apache-2.0" +license = "MIT OR Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "https://docs.rs/string_cache_codegen/" edition = "2018" From 9ae0f889bb96bc890b59eef1cd271c603b5a690f Mon Sep 17 00:00:00 2001 From: Xidorn Quan Date: Fri, 18 Mar 2022 22:48:05 +1100 Subject: [PATCH 349/379] Replace lazy_static with once_cell --- Cargo.toml | 2 +- src/dynamic_set.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5d94040..e47eb8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" -lazy_static = "1.1.0" +once_cell = "1.10.0" serde = { version = "1", optional = true } phf_shared = "0.10" new_debug_unreachable = "1.0.2" diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 602b700..229a79f 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use parking_lot::Mutex; use std::borrow::Cow; use std::mem; @@ -38,16 +38,16 @@ fn entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } -lazy_static! { - pub(crate) static ref DYNAMIC_SET: Mutex = Mutex::new({ +pub(crate) static DYNAMIC_SET: Lazy> = Lazy::new(|| { + Mutex::new({ type T = Option>; let _static_assert_size_eq = std::mem::transmute::; let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); Set { buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, } - }); -} + }) +}); impl Set { pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> NonNull { From aa644096ae26aba2e7192d67a74101e6a21469ac Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 21 Mar 2022 14:24:45 -0400 Subject: [PATCH 350/379] 0.8.4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e47eb8a..db1e95d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.3" # Also update README.md when making a semver-breaking change +version = "0.8.4" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" From 5034bde5c45b40af8e1c40672263193fa7050b32 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sun, 27 Mar 2022 18:09:47 -0400 Subject: [PATCH 351/379] Publish string-cache-codegen 0.5.2. --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index f207e21..5eb5125 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.1" # Also update ../README.md when making a semver-breaking change +version = "0.5.2" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" From c58ac06272bf59135e58fdf8df0dd5e696019382 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Tue, 12 Jul 2022 11:49:26 +0100 Subject: [PATCH 352/379] Fix comment typo --- src/atom.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index c02651b..3fad611 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -105,7 +105,7 @@ impl Atom { } impl Atom { - /// Return the internal repersentation. For testing. + /// Return the internal representation. For testing. #[doc(hidden)] pub fn unsafe_data(&self) -> u64 { self.unsafe_data.get() From 8f5bed52d29a4d201705665fc9599c0d06bb4427 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Wed, 14 Dec 2022 21:33:36 -0500 Subject: [PATCH 353/379] Update MSRV. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe17a69..f99d2e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: - rust: [1.49.0, nightly, beta, stable] + rust: [1.56.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 37b459f8ce1ec694e8218ebdeef30c06f68e6205 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 14 Dec 2022 10:44:50 +0100 Subject: [PATCH 354/379] Add trivial impl of Borrow for Atom This enables Atom to be used in methods like HashMap::entry_ref. --- src/trivial_impls.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 960dde0..24baaf1 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -10,7 +10,7 @@ use crate::{Atom, StaticAtomSet}; #[cfg(feature = "serde_support")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::borrow::Cow; +use std::borrow::{Borrow, Cow}; use std::fmt; impl ::precomputed_hash::PrecomputedHash for Atom { @@ -70,6 +70,12 @@ impl AsRef for Atom { } } +impl Borrow for Atom { + fn borrow(&self) -> &str { + self + } +} + #[cfg(feature = "serde_support")] impl Serialize for Atom { fn serialize(&self, serializer: S) -> Result From b473a4ad3be989166031f56976f7ce54ae79ac05 Mon Sep 17 00:00:00 2001 From: Boshen Date: Thu, 16 Feb 2023 21:58:39 +0800 Subject: [PATCH 355/379] feat: use bucket mutex instead of global mutex for dynamic set This implementation uses bucket level mutex with linear probing. --- src/atom.rs | 7 ++----- src/dynamic_set.rs | 36 +++++++++++++++++++----------------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index c02651b..7856947 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -200,8 +200,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { phantom: PhantomData, } } else { - let ptr: std::ptr::NonNull = - DYNAMIC_SET.lock().insert(string_to_add, hash.g); + let ptr: std::ptr::NonNull = DYNAMIC_SET.insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -237,9 +236,7 @@ impl Drop for Atom { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - DYNAMIC_SET - .lock() - .remove(this.unsafe_data.get() as *mut Entry); + DYNAMIC_SET.remove(this.unsafe_data.get() as *mut Entry); } } } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 229a79f..6ea4ba6 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -19,7 +19,7 @@ const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u32 = (1 << 12) - 1; pub(crate) struct Set { - buckets: Box<[Option>; NB_BUCKETS]>, + buckets: Box<[Mutex>>]>, } pub(crate) struct Entry { @@ -38,22 +38,24 @@ fn entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } -pub(crate) static DYNAMIC_SET: Lazy> = Lazy::new(|| { - Mutex::new({ - type T = Option>; - let _static_assert_size_eq = std::mem::transmute::; - let vec = std::mem::ManuallyDrop::new(vec![0_usize; NB_BUCKETS]); - Set { - buckets: unsafe { Box::from_raw(vec.as_ptr() as *mut [T; NB_BUCKETS]) }, - } - }) +pub(crate) static DYNAMIC_SET: Lazy = Lazy::new(|| { + // NOTE: Using const initialization for buckets breaks the small-stack test. + // ``` + // // buckets: [Mutex>>; NB_BUCKETS], + // const MUTEX: Mutex>> = Mutex::new(None); + // let buckets = Box::new([MUTEX; NB_BUCKETS]); + // ``` + let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect(); + Set { buckets } }); impl Set { - pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> NonNull { + pub(crate) fn insert(&self, string: Cow, hash: u32) -> NonNull { let bucket_index = (hash & BUCKET_MASK) as usize; + let mut linked_list = self.buckets[bucket_index].lock(); + { - let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); + let mut ptr: Option<&mut Box> = linked_list.as_mut(); while let Some(entry) = ptr.take() { if entry.hash == hash && *entry.string == *string { @@ -74,25 +76,25 @@ impl Set { debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); let string = string.into_owned(); let mut entry = Box::new(Entry { - next_in_bucket: self.buckets[bucket_index].take(), + next_in_bucket: linked_list.take(), hash, ref_count: AtomicIsize::new(1), string: string.into_boxed_str(), }); let ptr = NonNull::from(&mut *entry); - self.buckets[bucket_index] = Some(entry); - + *linked_list = Some(entry); ptr } - pub(crate) fn remove(&mut self, ptr: *mut Entry) { + pub(crate) fn remove(&self, ptr: *mut Entry) { let bucket_index = { let value: &Entry = unsafe { &*ptr }; debug_assert!(value.ref_count.load(SeqCst) == 0); (value.hash & BUCKET_MASK) as usize }; - let mut current: &mut Option> = &mut self.buckets[bucket_index]; + let mut linked_list = self.buckets[bucket_index].lock(); + let mut current: &mut Option> = &mut linked_list; while let Some(entry_ptr) = current.as_mut() { let entry_ptr: *mut Entry = &mut **entry_ptr; From e01688eb974ac52b24f19c69fb8ca398a07cdd32 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Wed, 22 Feb 2023 11:50:52 +0100 Subject: [PATCH 356/379] Bump version to 0.8.5 This will allow dependent packages to pick up improvements to mutex performance. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 16ef966..2d29863 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.4" # Also update README.md when making a semver-breaking change +version = "0.8.5" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 9c7b0aa84a5d862f57c88e59f07d3c66efe58908 Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Thu, 23 Feb 2023 02:35:49 +0200 Subject: [PATCH 357/379] Revert trivial impl of Borrow for Atom --- src/trivial_impls.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 24baaf1..0b2c98b 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -70,12 +70,6 @@ impl AsRef for Atom { } } -impl Borrow for Atom { - fn borrow(&self) -> &str { - self - } -} - #[cfg(feature = "serde_support")] impl Serialize for Atom { fn serialize(&self, serializer: S) -> Result From 4e45fde044657fe94a00d5cbd0d2910a1f827d0e Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Thu, 23 Feb 2023 15:34:07 +0200 Subject: [PATCH 358/379] remove unused import --- src/trivial_impls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trivial_impls.rs b/src/trivial_impls.rs index 0b2c98b..960dde0 100644 --- a/src/trivial_impls.rs +++ b/src/trivial_impls.rs @@ -10,7 +10,7 @@ use crate::{Atom, StaticAtomSet}; #[cfg(feature = "serde_support")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::borrow::{Borrow, Cow}; +use std::borrow::Cow; use std::fmt; impl ::precomputed_hash::PrecomputedHash for Atom { From 448bf6b9c14e1f6f81a3f820006508fab8c4388b Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Thu, 23 Feb 2023 22:04:50 -0500 Subject: [PATCH 359/379] Publish 0.8.6. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2d29863..6067114 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.5" # Also update README.md when making a semver-breaking change +version = "0.8.6" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 126c173ac3ba18888821038be28aeba44af1023c Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Tue, 28 Feb 2023 18:00:46 +0200 Subject: [PATCH 360/379] test: add common dependency usage --- integration-tests/src/common-usage.rs | 19 +++++++++++++++++++ integration-tests/src/lib.rs | 4 ++++ 2 files changed, 23 insertions(+) create mode 100644 integration-tests/src/common-usage.rs diff --git a/integration-tests/src/common-usage.rs b/integration-tests/src/common-usage.rs new file mode 100644 index 0000000..7b7380a --- /dev/null +++ b/integration-tests/src/common-usage.rs @@ -0,0 +1,19 @@ +/// Test common usage by popular dependents (html5ever, lalrpop, browserlists-rs), to ensure no API-surface breaking changes +/// Created after https://github.com/servo/string-cache/issues/271 +use std::collections::HashMap; + +use crate::Atom; +use crate::TestAtom; + +#[test] +fn usage_with_hashmap() { + let mut map: HashMap = HashMap::new(); + + map.insert(test_atom!("area"), 1); + map.insert("str_into".into(), 2); + map.insert("atom_from".into(), 3); + + assert_eq!(map.get(&"area".into()).unwrap(), &1); + assert_eq!(map.get(&"str_into".into()).unwrap(), &2); + assert_eq!(map.get(&Atom::from("atom_from")).unwrap(), &3); +} diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index 1f2be87..aaacdff 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -296,6 +296,10 @@ fn test_try_static() { assert!(Atom::try_static("not in the static table").is_none()); } +#[cfg(test)] +#[path = "common-usage.rs"] +mod common_usage; + #[cfg(all(test, feature = "unstable"))] #[path = "bench.rs"] mod bench; From 120ba6c88e9337a810149b5afa4eecf32d8006d8 Mon Sep 17 00:00:00 2001 From: Yoni Feigelson Date: Fri, 3 Mar 2023 22:22:19 +0200 Subject: [PATCH 361/379] fix: move debug_assert check --- src/dynamic_set.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 6ea4ba6..46e7a54 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -87,13 +87,11 @@ impl Set { } pub(crate) fn remove(&self, ptr: *mut Entry) { - let bucket_index = { - let value: &Entry = unsafe { &*ptr }; - debug_assert!(value.ref_count.load(SeqCst) == 0); - (value.hash & BUCKET_MASK) as usize - }; + let value: &Entry = unsafe { &*ptr }; + let bucket_index = (value.hash & BUCKET_MASK) as usize; let mut linked_list = self.buckets[bucket_index].lock(); + debug_assert!(value.ref_count.load(SeqCst) == 0); let mut current: &mut Option> = &mut linked_list; while let Some(entry_ptr) = current.as_mut() { From 34f914c99e8bdc5f2fa842fd04f190c7c9e4df3b Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Tue, 7 Mar 2023 08:54:42 -0500 Subject: [PATCH 362/379] Publish 0.8.7 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6067114..b0f4957 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.6" # Also update README.md when making a semver-breaking change +version = "0.8.7" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 1ae3d0dcbf2c50f7d4a6f8c6e5f16a410ecc40d6 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Mon, 10 Jul 2023 14:14:57 +0200 Subject: [PATCH 363/379] Enable the GitHub merge queue (#280) --- .github/workflows/ci.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f99d2e9..a4615f2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,9 +1,11 @@ -name: Run CI +name: CI on: push: - branches: ["auto"] + branches: ["master"] pull_request: branches: ["**"] + merge_group: + types: [checks_requested] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -14,7 +16,7 @@ env: jobs: ci: - name: Run CI + name: Build and Test runs-on: ubuntu-latest strategy: @@ -35,8 +37,10 @@ jobs: run: | cargo build --no-default-features cargo build - - name: Tests - run: cargo test --all + - uses: actions-rs/cargo@v1 + with: + command: test + args: --all - name: Build codegen run: | cd string-cache-codegen && cargo build && cd .. @@ -47,7 +51,7 @@ jobs: build_result: - name: homu build finished + name: Result runs-on: ubuntu-latest needs: - "ci" From b46a64fa8c74fbef9b297ab05cace66da536e8a1 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Mon, 11 Mar 2024 10:37:57 +0100 Subject: [PATCH 364/379] Rename `master` branch to `main` (#283) --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a4615f2..8b48d1a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,9 +1,8 @@ name: CI on: push: - branches: ["master"] + branches: ["main"] pull_request: - branches: ["**"] merge_group: types: [checks_requested] From a7793f0e6739bb4976c80db2351163c9a7d005c2 Mon Sep 17 00:00:00 2001 From: Matthew Martin Date: Mon, 11 Mar 2024 04:44:34 -0500 Subject: [PATCH 365/379] Update phf to 0.11 (#281) --- .github/workflows/ci.yml | 2 +- Cargo.toml | 2 +- string-cache-codegen/Cargo.toml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8b48d1a..b133023 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: - rust: [1.56.0, nightly, beta, stable] + rust: [1.60.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 diff --git a/Cargo.toml b/Cargo.toml index b0f4957..df159f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ default = ["serde_support"] precomputed-hash = "0.1" once_cell = "1.10.0" serde = { version = "1", optional = true } -phf_shared = "0.10" +phf_shared = "0.11" new_debug_unreachable = "1.0.2" parking_lot = "0.12" diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 5eb5125..b059bfc 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -13,7 +13,7 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -phf_generator = "0.10" -phf_shared = "0.10" +phf_generator = "0.11" +phf_shared = "0.11" proc-macro2 = "1" quote = "1" From 1b636e99cb1bd8dff31bb4fc5be089002a635c12 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Wed, 17 Jul 2024 22:35:24 +0100 Subject: [PATCH 366/379] Skip bounds check for inline slices (#277) --- src/atom.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/atom.rs b/src/atom.rs index 321b0a4..d1bd7b8 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -254,8 +254,9 @@ impl ops::Deref for Atom { } INLINE_TAG => { let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET; + debug_assert!(len as usize <= MAX_INLINE_LEN); let src = inline_atom_slice(&self.unsafe_data); - str::from_utf8_unchecked(&src[..(len as usize)]) + str::from_utf8_unchecked(src.get_unchecked(..(len as usize))) } STATIC_TAG => Static::get().atoms[self.static_index() as usize], _ => debug_unreachable!(), From c8fed62876eed738b16f7e0ae3d9974391e8f59f Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Wed, 31 Jul 2024 23:13:14 +0100 Subject: [PATCH 367/379] Prefer inline representation over static (#278) * Benchmarks use longer static strings * Use inline for short strings Closes #276. --- integration-tests/build.rs | 4 +++ integration-tests/src/bench.rs | 8 ++--- integration-tests/src/lib.rs | 25 +++++++++++----- src/atom.rs | 53 ++++++++++++++++++++++++---------- string-cache-codegen/lib.rs | 51 +++++++++++++++++++++++++------- 5 files changed, 104 insertions(+), 37 deletions(-) diff --git a/integration-tests/build.rs b/integration-tests/build.rs index da40873..6293e4c 100644 --- a/integration-tests/build.rs +++ b/integration-tests/build.rs @@ -9,6 +9,7 @@ fn main() { "a", "b", "address", + "defaults", "area", "body", "font-weight", @@ -16,6 +17,9 @@ fn main() { "html", "head", "id", + "❤", + "❤💯", + "❤💯❤💯", ]) .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs")) .unwrap() diff --git a/integration-tests/src/bench.rs b/integration-tests/src/bench.rs index 4d8f012..45e7199 100644 --- a/integration-tests/src/bench.rs +++ b/integration-tests/src/bench.rs @@ -153,7 +153,7 @@ bench_all!([eq ne lt clone_string] for longer_string = super::longer_dynamic_a, super::longer_dynamic_b); bench_all!([eq ne intern as_ref clone is_static lt] - for static_atom = test_atom!("a"), test_atom!("b")); + for static_atom = test_atom!("defaults"), test_atom!("font-weight")); bench_all!([intern as_ref clone is_inline] for short_inline_atom = mk("e"), mk("f")); @@ -168,13 +168,13 @@ bench_all!([eq ne intern as_ref clone is_dynamic lt] for longer_dynamic_atom = mk(super::longer_dynamic_a), mk(super::longer_dynamic_b)); bench_all!([intern as_ref clone is_static] - for static_at_runtime = mk("a"), mk("b")); + for static_at_runtime = mk("defaults"), mk("font-weight")); bench_all!([ne lt x_static y_inline] - for static_vs_inline = test_atom!("a"), mk("f")); + for static_vs_inline = test_atom!("defaults"), mk("f")); bench_all!([ne lt x_static y_dynamic] - for static_vs_dynamic = test_atom!("a"), mk(super::longer_dynamic_b)); + for static_vs_dynamic = test_atom!("defaults"), mk(super::longer_dynamic_b)); bench_all!([ne lt x_inline y_dynamic] for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b)); diff --git a/integration-tests/src/lib.rs b/integration-tests/src/lib.rs index aaacdff..a788d93 100644 --- a/integration-tests/src/lib.rs +++ b/integration-tests/src/lib.rs @@ -45,9 +45,12 @@ fn test_as_slice() { #[test] fn test_types() { assert!(Atom::from("").is_static()); - assert!(Atom::from("id").is_static()); - assert!(Atom::from("body").is_static()); - assert!(Atom::from("a").is_static()); + assert!(Atom::from("defaults").is_static()); + assert!(Atom::from("font-weight").is_static()); + assert!(Atom::from("id").is_inline()); + assert!(Atom::from("body").is_inline()); + assert!(Atom::from("a").is_inline()); + assert!(Atom::from("address").is_inline()); assert!(Atom::from("c").is_inline()); assert!(Atom::from("zz").is_inline()); assert!(Atom::from("zzz").is_inline()); @@ -168,11 +171,13 @@ fn repr() { // static atom table, the tag values, etc. // Static atoms - check_static("a", test_atom!("a")); - check_static("address", test_atom!("address")); - check_static("area", test_atom!("area")); + check_static("defaults", test_atom!("defaults")); + check_static("font-weight", test_atom!("font-weight")); // Inline atoms + check("a", 0x0000_0000_0000_6111); + check("address", 0x7373_6572_6464_6171); + check("area", 0x0000_0061_6572_6141); check("e", 0x0000_0000_0000_6511); check("xyzzy", 0x0000_797A_7A79_7851); check("xyzzy01", 0x3130_797A_7A79_7871); @@ -193,8 +198,13 @@ fn test_threads() { #[test] fn atom_macro() { + assert_eq!(test_atom!("a"), Atom::from("a")); assert_eq!(test_atom!("body"), Atom::from("body")); + assert_eq!(test_atom!("address"), Atom::from("address")); + assert_eq!(test_atom!("❤"), Atom::from("❤")); + assert_eq!(test_atom!("❤💯"), Atom::from("❤💯")); assert_eq!(test_atom!("font-weight"), Atom::from("font-weight")); + assert_eq!(test_atom!("❤💯❤💯"), Atom::from("❤💯❤💯")); } #[test] @@ -292,7 +302,8 @@ fn test_from_string() { #[test] fn test_try_static() { - assert!(Atom::try_static("head").is_some()); + assert!(Atom::try_static("defaults").is_some()); + assert!(Atom::try_static("head").is_none()); assert!(Atom::try_static("not in the static table").is_none()); } diff --git a/src/atom.rs b/src/atom.rs index d1bd7b8..7a3dea9 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -99,6 +99,25 @@ impl Atom { } } + /// For the atom!() macros + #[inline(always)] + #[doc(hidden)] + pub const fn pack_inline(mut n: u64, len: u8) -> Self { + if cfg!(target_endian = "big") { + // Reverse order of top 7 bytes. + // Bottom 8 bits of `n` are zero, and we need that to remain so. + // String data is stored in top 7 bytes, tag and length in bottom byte. + n = n.to_le() << 8; + } + + let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n; + Self { + // INLINE_TAG ensures this is never zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } + } + fn tag(&self) -> u8 { (self.unsafe_data.get() & TAG_MASK) as u8 } @@ -186,20 +205,22 @@ impl Hash for Atom { impl<'a, Static: StaticAtomSet> From> for Atom { fn from(string_to_add: Cow<'a, str>) -> Self { - Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { - let len = string_to_add.len(); - if len <= MAX_INLINE_LEN { - let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); - { - let dest = inline_atom_slice_mut(&mut data); - dest[..len].copy_from_slice(string_to_add.as_bytes()) - } - Atom { - // INLINE_TAG ensures this is never zero - unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, - phantom: PhantomData, - } - } else { + let len = string_to_add.len(); + if len == 0 { + Self::pack_static(Static::empty_string_index()) + } else if len <= MAX_INLINE_LEN { + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); + { + let dest = inline_atom_slice_mut(&mut data); + dest[..len].copy_from_slice(string_to_add.as_bytes()); + } + Atom { + // INLINE_TAG ensures this is never zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } + } else { + Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { let ptr: std::ptr::NonNull = DYNAMIC_SET.insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); @@ -208,8 +229,8 @@ impl<'a, Static: StaticAtomSet> From> for Atom { unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, phantom: PhantomData, } - } - }) + }) + } } } diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 0fe4819..3228946 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -187,11 +187,19 @@ impl AtomType { // which would cause divisions by zero in rust-phf. self.atoms.insert(String::new()); - let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect(); - let hash_state = phf_generator::generate_hash(&atoms); + // Strings over 7 bytes + empty string added to static set. + // Otherwise stored inline. + let (static_strs, inline_strs): (Vec<_>, Vec<_>) = self + .atoms + .iter() + .map(String::as_str) + .partition(|s| s.len() > 7 || s.is_empty()); + + // Static strings + let hash_state = phf_generator::generate_hash(&static_strs); let phf_generator::HashState { key, disps, map } = hash_state; let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip(); - let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect(); + let atoms: Vec<&str> = map.iter().map(|&idx| static_strs[idx]).collect(); let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; let indices = 0..atoms.len() as u32; @@ -228,16 +236,33 @@ impl AtomType { let macro_name = new_term(&*self.macro_name); let module = module.parse::().unwrap(); let atom_prefix = format!("ATOM_{}_", type_name.to_string().to_uppercase()); - let const_names: Vec<_> = atoms + let new_const_name = |atom: &str| { + let mut name = atom_prefix.clone(); + for c in atom.chars() { + name.push_str(&format!("_{:02X}", c as u32)) + } + new_term(&name) + }; + let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect(); + + // Inline strings + let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs .iter() - .map(|atom| { - let mut name = atom_prefix.clone(); - for c in atom.chars() { - name.push_str(&format!("_{:02X}", c as u32)) + .map(|s| { + let const_name = new_const_name(s); + + let mut value = 0u64; + for (index, c) in s.bytes().enumerate() { + value = value | ((c as u64) << (index * 8 + 8)); } - new_term(&name) + + let len = s.len() as u8; + + (const_name, (value, len)) }) - .collect(); + .unzip(); + let (inline_values, inline_lengths): (Vec<_>, Vec<_>) = + inline_values_and_lengths.into_iter().unzip(); quote! { #atom_doc @@ -265,6 +290,9 @@ impl AtomType { #( pub const #const_names: #type_name = #type_name::pack_static(#indices); )* + #( + pub const #inline_const_names: #type_name = #type_name::pack_inline(#inline_values, #inline_lengths); + )* #macro_doc #[macro_export] @@ -272,6 +300,9 @@ impl AtomType { #( (#atoms) => { #module::#const_names }; )* + #( + (#inline_strs) => { #module::#inline_const_names }; + )* } } } From 021012ea995461efdec6ddb5eb30a1bf9481d7c4 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Thu, 1 Aug 2024 01:23:58 -0400 Subject: [PATCH 368/379] Update MSRV to 1.61. (#284) Signed-off-by: Josh Matthews --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b133023..aa6f952 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: - rust: [1.60.0, nightly, beta, stable] + rust: [1.61.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 From 471ca0d8978cf6ce7dbfd170e67a103cfe62b975 Mon Sep 17 00:00:00 2001 From: cactter <109739451+cactter@users.noreply.github.com> Date: Sat, 10 Aug 2024 00:53:57 +0800 Subject: [PATCH 369/379] The scope of the unsafe block can be appropriately reduced (#263) * Shrink unsafe block * Remove empty lines --------- Co-authored-by: Martin Robinson --- src/atom.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/atom.rs b/src/atom.rs index 7a3dea9..7e15357 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -383,28 +383,24 @@ impl Atom { #[inline(always)] fn inline_atom_slice(x: &NonZeroU64) -> &[u8] { - unsafe { let x: *const NonZeroU64 = x; let mut data = x as *const u8; // All except the lowest byte, which is first in little-endian, last in big-endian. if cfg!(target_endian = "little") { - data = data.offset(1); + data = unsafe { data.offset(1) }; } let len = 7; - slice::from_raw_parts(data, len) - } + unsafe { slice::from_raw_parts(data, len) } } #[inline(always)] -fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { - unsafe { +fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { let x: *mut u64 = x; let mut data = x as *mut u8; // All except the lowest byte, which is first in little-endian, last in big-endian. if cfg!(target_endian = "little") { - data = data.offset(1); + data = unsafe { data.offset(1) }; } let len = 7; - slice::from_raw_parts_mut(data, len) - } + unsafe { slice::from_raw_parts_mut(data, len) } } From e03f29061d1359adae2741f3d20f1184a636221c Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 1 Feb 2025 14:43:21 +0100 Subject: [PATCH 370/379] MSRV 1.70 + Replace `once_cell::sync::Lazy` with `std::sync::OnceLock` (#287) * Update MSRV to 1.70 * Replace `once_cell::sync::Lazy` with `std::sync::OnceLock` --- .github/workflows/ci.yml | 2 +- Cargo.toml | 1 - src/atom.rs | 6 +++--- src/dynamic_set.rs | 14 +++++++++----- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aa6f952..c5cdfa7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: - rust: [1.61.0, nightly, beta, stable] + rust: [1.70.0, nightly, beta, stable] steps: - uses: actions/checkout@v2 diff --git a/Cargo.toml b/Cargo.toml index df159f1..678eda3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,6 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" -once_cell = "1.10.0" serde = { version = "1", optional = true } phf_shared = "0.11" new_debug_unreachable = "1.0.2" diff --git a/src/atom.rs b/src/atom.rs index 7e15357..adf5f62 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use crate::dynamic_set::{Entry, DYNAMIC_SET}; +use crate::dynamic_set::{dynamic_set, Entry}; use crate::static_sets::StaticAtomSet; use debug_unreachable::debug_unreachable; @@ -221,7 +221,7 @@ impl<'a, Static: StaticAtomSet> From> for Atom { } } else { Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { - let ptr: std::ptr::NonNull = DYNAMIC_SET.insert(string_to_add, hash.g); + let ptr: std::ptr::NonNull = dynamic_set().insert(string_to_add, hash.g); let data = ptr.as_ptr() as u64; debug_assert!(0 == data & TAG_MASK); Atom { @@ -257,7 +257,7 @@ impl Drop for Atom { // Out of line to guide inlining. fn drop_slow(this: &mut Atom) { - DYNAMIC_SET.remove(this.unsafe_data.get() as *mut Entry); + dynamic_set().remove(this.unsafe_data.get() as *mut Entry); } } } diff --git a/src/dynamic_set.rs b/src/dynamic_set.rs index 46e7a54..4442b4d 100644 --- a/src/dynamic_set.rs +++ b/src/dynamic_set.rs @@ -7,13 +7,13 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use once_cell::sync::Lazy; use parking_lot::Mutex; use std::borrow::Cow; use std::mem; use std::ptr::NonNull; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; +use std::sync::OnceLock; const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u32 = (1 << 12) - 1; @@ -38,16 +38,20 @@ fn entry_alignment_is_sufficient() { assert!(mem::align_of::() >= ENTRY_ALIGNMENT); } -pub(crate) static DYNAMIC_SET: Lazy = Lazy::new(|| { +pub(crate) fn dynamic_set() -> &'static Set { // NOTE: Using const initialization for buckets breaks the small-stack test. // ``` // // buckets: [Mutex>>; NB_BUCKETS], // const MUTEX: Mutex>> = Mutex::new(None); // let buckets = Box::new([MUTEX; NB_BUCKETS]); // ``` - let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect(); - Set { buckets } -}); + static DYNAMIC_SET: OnceLock = OnceLock::new(); + + DYNAMIC_SET.get_or_init(|| { + let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect(); + Set { buckets } + }) +} impl Set { pub(crate) fn insert(&self, string: Cow, hash: u32) -> NonNull { From 27221cec100d883420ef0fd06a7af5593262e4d8 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 1 Feb 2025 08:45:33 -0500 Subject: [PATCH 371/379] Publish 0.8.8. Signed-off-by: Josh Matthews --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 678eda3..338eeba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.7" # Also update README.md when making a semver-breaking change +version = "0.8.8" # Also update README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 4a5bb75adaa35d4076ab30091b0dbc433f369c45 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Sat, 1 Feb 2025 13:09:13 -0500 Subject: [PATCH 372/379] Publish string-cache-codegen 0.5.3. Signed-off-by: Josh Matthews --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index b059bfc..53c1f0f 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.2" # Also update ../README.md when making a semver-breaking change +version = "0.5.3" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" From 14ae86af8fbe7a14229296473e9ac18b67228f52 Mon Sep 17 00:00:00 2001 From: Alex Touchet <26315797+atouchet@users.noreply.github.com> Date: Sat, 1 Feb 2025 10:10:09 -0800 Subject: [PATCH 373/379] Set rust-version in Cargo.toml (#288) --- Cargo.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 338eeba..6ceabfe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,13 @@ [package] name = "string_cache" version = "0.8.8" # Also update README.md when making a semver-breaking change -authors = [ "The Servo Project Developers" ] +authors = ["The Servo Project Developers"] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" repository = "https://github.com/servo/string-cache" -documentation = "https://docs.rs/string_cache/" +documentation = "https://docs.rs/string_cache" edition = "2018" +rust-version = "1.70.0" # Do not `exclude` ./string-cache-codegen because we want to include # ./string-cache-codegen/shared.rs, and `include` is a pain to use From d9e888f2f61d43c1868849a506104ee02d79027c Mon Sep 17 00:00:00 2001 From: Ygg01 Date: Wed, 19 Feb 2025 11:59:59 +0100 Subject: [PATCH 374/379] Add test for atom order stability (#290) * Add test for atom order stability. * Made test for iteration order a unit test # Conflicts: # string-cache-codegen/tests/reproducibility_test.rs --- string-cache-codegen/lib.rs | 39 ++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 3228946..c703cf7 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -69,7 +69,7 @@ #![recursion_limit = "128"] use quote::quote; -use std::collections::HashSet; +use std::collections::BTreeSet; use std::fs::File; use std::io::{self, BufWriter, Write}; use std::path::Path; @@ -81,7 +81,7 @@ pub struct AtomType { static_set_doc: Option, macro_name: String, macro_doc: Option, - atoms: HashSet, + atoms: BTreeSet, } impl AtomType { @@ -114,7 +114,7 @@ impl AtomType { atom_doc: None, static_set_doc: None, macro_doc: None, - atoms: HashSet::new(), + atoms: BTreeSet::new(), } } @@ -181,6 +181,26 @@ impl AtomType { ) } + #[cfg(test)] + /// Write generated code to destination [`Vec`] and return it as [`String`] + /// + /// Used mostly for testing or displaying a value. + pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result + { + destination.write_all( + self.to_tokens() + .to_string() + // Insert some newlines to make the generated code slightly easier to read. + .replace(" [ \"", "[\n\"") + .replace("\" , ", "\",\n") + .replace(" ( \"", "\n( \"") + .replace("; ", ";\n") + .as_bytes(), + )?; + let str = String::from_utf8(destination).unwrap(); + Ok(str) + } + fn to_tokens(&mut self) -> proc_macro2::TokenStream { // `impl Default for Atom` requires the empty string to be in the static set. // This also makes sure the set in non-empty, @@ -315,3 +335,16 @@ impl AtomType { self.write_to(BufWriter::new(File::create(path)?)) } } + +#[test] +fn test_iteration_order() { + let x1 = crate::AtomType::new("foo::Atom", "foo_atom!") + .atoms(&["x", "xlink", "svg", "test"]) + .write_to_string(Vec::new()).expect("write to string cache x1"); + + let x2 = crate::AtomType::new("foo::Atom", "foo_atom!") + .atoms(&["x", "xlink", "svg", "test"]) + .write_to_string(Vec::new()).expect("write to string cache x2"); + + assert_eq!(x1, x2); +} \ No newline at end of file From 88600346b44ae50afaf52a31d6a0db1c37b94b02 Mon Sep 17 00:00:00 2001 From: Ygg01 Date: Thu, 20 Feb 2025 13:40:19 +0100 Subject: [PATCH 375/379] Publish 0.5.4 string cache codegen (#292) --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index 53c1f0f..a9660bd 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.3" # Also update ../README.md when making a semver-breaking change +version = "0.5.4" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" From b92f7eb3ff504034ec58c0154f9a9b053e23da4f Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Fri, 28 Mar 2025 07:15:11 +1300 Subject: [PATCH 376/379] Implement MallocSizeOf for Atom (#289) Signed-off-by: Nico Burns --- .github/workflows/ci.yml | 1 + Cargo.toml | 3 ++- src/atom.rs | 9 +++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c5cdfa7..74ade77 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,7 @@ jobs: run: | cargo build --no-default-features cargo build + cargo build --features malloc_size_of - uses: actions-rs/cargo@v1 with: command: test diff --git a/Cargo.toml b/Cargo.toml index 6ceabfe..287bd07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.8" # Also update README.md when making a semver-breaking change +version = "0.8.9" # Also update README.md when making a semver-breaking change authors = ["The Servo Project Developers"] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" @@ -25,6 +25,7 @@ default = ["serde_support"] [dependencies] precomputed-hash = "0.1" serde = { version = "1", optional = true } +malloc_size_of = { version = "0.1", default-features = false, optional = true } phf_shared = "0.11" new_debug_unreachable = "1.0.2" parking_lot = "0.12" diff --git a/src/atom.rs b/src/atom.rs index adf5f62..5a8aa7f 100644 --- a/src/atom.rs +++ b/src/atom.rs @@ -82,6 +82,15 @@ pub struct Atom { phantom: PhantomData, } +// This isn't really correct as the Atoms can technically take up space. But I guess it's ok +// as it is possible to measure the size of the atom set separately/ +#[cfg(feature = "malloc_size_of")] +impl malloc_size_of::MallocSizeOf for Atom { + fn size_of(&self, _ops: &mut malloc_size_of::MallocSizeOfOps) -> usize { + 0 + } +} + // FIXME: bound removed from the struct definition before of this error for pack_static: // "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable" // https://github.com/rust-lang/rust/issues/57563 From eb5ad11b53a8e132fa09781f062c47ce352f80d6 Mon Sep 17 00:00:00 2001 From: Cheng Xu <3105373+xu-cheng@users.noreply.github.com> Date: Tue, 26 Aug 2025 00:47:01 -0700 Subject: [PATCH 377/379] Update phf to 0.13 (#295) --- Cargo.toml | 4 ++-- README.md | 6 +++--- integration-tests/Cargo.toml | 6 +++--- src/lib.rs | 4 ++-- string-cache-codegen/Cargo.toml | 6 +++--- string-cache-codegen/lib.rs | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 287bd07..e73215e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache" -version = "0.8.9" # Also update README.md when making a semver-breaking change +version = "0.9.0" # Also update README.md when making a semver-breaking change authors = ["The Servo Project Developers"] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT OR Apache-2.0" @@ -26,7 +26,7 @@ default = ["serde_support"] precomputed-hash = "0.1" serde = { version = "1", optional = true } malloc_size_of = { version = "0.1", default-features = false, optional = true } -phf_shared = "0.11" +phf_shared = "0.13" new_debug_unreachable = "1.0.2" parking_lot = "0.12" diff --git a/README.md b/README.md index fdf4c0a..429d1ec 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In `Cargo.toml`: ```toml [dependencies] -string_cache = "0.8" +string_cache = "0.9" ``` In `lib.rs`: @@ -31,10 +31,10 @@ In `Cargo.toml`: build = "build.rs" [dependencies] -string_cache = "0.8" +string_cache = "0.9" [build-dependencies] -string_cache_codegen = "0.5" +string_cache_codegen = "0.6" ``` In `build.rs`: diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index a0b047c..12c0ad0 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -16,11 +16,11 @@ test = true unstable = [] [dependencies] -string_cache = { version = "0.8", path = ".." } +string_cache = { version = "0.9", path = ".." } [dev-dependencies] rand = "0.8" -string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } +string_cache_codegen = { version = "0.6", path = "../string-cache-codegen" } [build-dependencies] -string_cache_codegen = { version = "0.5", path = "../string-cache-codegen" } +string_cache_codegen = { version = "0.6", path = "../string-cache-codegen" } diff --git a/src/lib.rs b/src/lib.rs index 441cb4e..3cc29b1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,10 +25,10 @@ //! In `Cargo.toml`: //! ```toml //! [dependencies] -//! string_cache = "0.8" +//! string_cache = "0.9" //! //! [dev-dependencies] -//! string_cache_codegen = "0.5" +//! string_cache_codegen = "0.6" //! ``` //! //! In `build.rs`: diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index a9660bd..a6e9da0 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.5.4" # Also update ../README.md when making a semver-breaking change +version = "0.6.0" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0" @@ -13,7 +13,7 @@ name = "string_cache_codegen" path = "lib.rs" [dependencies] -phf_generator = "0.11" -phf_shared = "0.11" +phf_generator = "0.13" +phf_shared = "0.13" proc-macro2 = "1" quote = "1" diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index c703cf7..69ff612 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -19,10 +19,10 @@ //! build = "build.rs" //! //! [dependencies] -//! string_cache = "0.8" +//! string_cache = "0.9" //! //! [build-dependencies] -//! string_cache_codegen = "0.5" +//! string_cache_codegen = "0.6" //! ``` //! //! In `build.rs`: @@ -183,8 +183,8 @@ impl AtomType { #[cfg(test)] /// Write generated code to destination [`Vec`] and return it as [`String`] - /// - /// Used mostly for testing or displaying a value. + /// + /// Used mostly for testing or displaying a value. pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result { destination.write_all( @@ -347,4 +347,4 @@ fn test_iteration_order() { .write_to_string(Vec::new()).expect("write to string cache x2"); assert_eq!(x1, x2); -} \ No newline at end of file +} From 533b64e132ec65a616317d2607f536da024d19a9 Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Fri, 5 Sep 2025 13:01:12 +0100 Subject: [PATCH 378/379] Make macros accept idents where atom value is a valid ident (#296) * Make macros accept idents where atom is a valid atom This means that local_name!(html) will work as well as local_name!("html") Signed-off-by: Nico Burns * Fix tests build Signed-off-by: Nico Burns --------- Signed-off-by: Nico Burns --- integration-tests/Cargo.toml | 2 +- string-cache-codegen/lib.rs | 55 ++++++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index 12c0ad0..4562747 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -19,7 +19,7 @@ unstable = [] string_cache = { version = "0.9", path = ".." } [dev-dependencies] -rand = "0.8" +rand = { version = "0.8", features = ["small_rng"] } string_cache_codegen = { version = "0.6", path = "../string-cache-codegen" } [build-dependencies] diff --git a/string-cache-codegen/lib.rs b/string-cache-codegen/lib.rs index 69ff612..525ef3a 100644 --- a/string-cache-codegen/lib.rs +++ b/string-cache-codegen/lib.rs @@ -68,6 +68,7 @@ #![recursion_limit = "128"] +use proc_macro2::Ident; use quote::quote; use std::collections::BTreeSet; use std::fs::File; @@ -185,8 +186,7 @@ impl AtomType { /// Write generated code to destination [`Vec`] and return it as [`String`] /// /// Used mostly for testing or displaying a value. - pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result - { + pub fn write_to_string(&mut self, mut destination: Vec) -> io::Result { destination.write_all( self.to_tokens() .to_string() @@ -223,6 +223,30 @@ impl AtomType { let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; let indices = 0..atoms.len() as u32; + fn is_valid_ident(name: &str) -> bool { + let begins_with_letter_or_underscore = name + .chars() + .next() + .is_some_and(|c| c.is_alphabetic() || c == '_'); + let is_alphanumeric = name.chars().all(|c| c.is_alphanumeric() || c == '_'); + + begins_with_letter_or_underscore && is_alphanumeric + } + + let atoms_for_idents: Vec<&str> = atoms + .iter() + .copied() + .filter(|x| is_valid_ident(x)) + .collect(); + let atom_idents: Vec = atoms_for_idents.iter().map(|atom| new_term(atom)).collect(); + + let istrs_for_idents: Vec<&str> = inline_strs + .iter() + .copied() + .filter(|x| is_valid_ident(x)) + .collect(); + let istr_idents: Vec = istrs_for_idents.iter().map(|atom| new_term(atom)).collect(); + let hashes: Vec = atoms .iter() .map(|string| { @@ -249,8 +273,9 @@ impl AtomType { Some(ref doc) => quote!(#[doc = #doc]), None => quote!(), }; - let new_term = - |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site()); + fn new_term(string: &str) -> Ident { + Ident::new(string, proc_macro2::Span::call_site()) + } let static_set_name = new_term(&format!("{}StaticSet", type_name)); let type_name = new_term(type_name); let macro_name = new_term(&*self.macro_name); @@ -264,6 +289,16 @@ impl AtomType { new_term(&name) }; let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect(); + let ident_const_names: Vec<_> = atoms_for_idents + .iter() + .copied() + .map(new_const_name) + .collect(); + let ident_inline_const_names: Vec<_> = istrs_for_idents + .iter() + .copied() + .map(new_const_name) + .collect(); // Inline strings let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs @@ -323,6 +358,12 @@ impl AtomType { #( (#inline_strs) => { #module::#inline_const_names }; )* + #( + (#atom_idents) => { #module::#ident_const_names }; + )* + #( + (#istr_idents) => { #module::#ident_inline_const_names }; + )* } } } @@ -340,11 +381,13 @@ impl AtomType { fn test_iteration_order() { let x1 = crate::AtomType::new("foo::Atom", "foo_atom!") .atoms(&["x", "xlink", "svg", "test"]) - .write_to_string(Vec::new()).expect("write to string cache x1"); + .write_to_string(Vec::new()) + .expect("write to string cache x1"); let x2 = crate::AtomType::new("foo::Atom", "foo_atom!") .atoms(&["x", "xlink", "svg", "test"]) - .write_to_string(Vec::new()).expect("write to string cache x2"); + .write_to_string(Vec::new()) + .expect("write to string cache x2"); assert_eq!(x1, x2); } From fd1475d07d5ff30e5d2ca343671577c77ab5679a Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Mon, 8 Sep 2025 14:50:19 +0100 Subject: [PATCH 379/379] Bump version of string-cache-codegen to v0.6.1 (#297) Signed-off-by: Nico Burns --- string-cache-codegen/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string-cache-codegen/Cargo.toml b/string-cache-codegen/Cargo.toml index a6e9da0..20eced9 100644 --- a/string-cache-codegen/Cargo.toml +++ b/string-cache-codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "string_cache_codegen" -version = "0.6.0" # Also update ../README.md when making a semver-breaking change +version = "0.6.1" # Also update ../README.md when making a semver-breaking change authors = [ "The Servo Project Developers" ] description = "A codegen library for string-cache, developed as part of the Servo project." license = "MIT OR Apache-2.0"