diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bd16a1e53..8e9005900e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +1.12.3 (2025-02-03) +=================== +This release excludes some unnecessary things from the archive published to +crates.io. Specifically, fuzzing data and various shell scripts are now +excluded. If you run into problems, please file an issue. + +Improvements: + +* [#1319](https://github.com/rust-lang/regex/pull/1319): +Switch from a Cargo `exclude` list to an `include` list, and exclude some +unnecessary stuff. + + 1.12.2 (2025-10-13) =================== This release fixes a `cargo doc` breakage on nightly when `--cfg docsrs` is diff --git a/Cargo.toml b/Cargo.toml index 2b17e7a6ce..ab2671adee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex" -version = "1.12.1" #:version +version = "1.12.3" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -13,7 +13,7 @@ finite automata and guarantees linear time matching on all inputs. """ categories = ["text-processing"] autotests = false -exclude = ["/fuzz/*", "/record/*", "/scripts/*", "tests/fuzz/*", "/.github/*"] +include.workspace = true edition = "2021" rust-version = "1.65" @@ -27,6 +27,24 @@ members = [ "regex-test", ] +[workspace.package] +include = [ + "/CHANGELOG.md", + "/Cargo.toml", + "/LICENSE-MIT", + "/LICENSE-APACHE", + "/README.md", + "/UNICODE.md", + "bench/README.md", + "src/**/*.rs", + "testdata/**.toml", + "tests/**/*.rs", + "bench/**/*.rs", + "benches/**/*.rs", + "LICENSE-UNICODE", + "!/tests/fuzz/mod.rs", +] + # Features are documented in the "Crate features" section of the crate docs: # https://docs.rs/regex/*/#crate-features [features] diff --git a/regex-automata/Cargo.toml b/regex-automata/Cargo.toml index 67d603287a..e2a764492f 100644 --- a/regex-automata/Cargo.toml +++ b/regex-automata/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-automata" -version = "0.4.12" #:version +version = "0.4.14" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] description = "Automata construction and matching using regular expressions." documentation = "https://docs.rs/regex-automata" @@ -13,6 +13,7 @@ categories = ["text-processing"] edition = "2021" autoexamples = false rust-version = "1.65" +include.workspace = true [lib] bench = false diff --git a/regex-automata/src/dfa/onepass.rs b/regex-automata/src/dfa/onepass.rs index 85f820ef54..5b6a0bd88a 100644 --- a/regex-automata/src/dfa/onepass.rs +++ b/regex-automata/src/dfa/onepass.rs @@ -2093,9 +2093,20 @@ impl DFA { // be bad. In theory, we could avoid all this slot clearing if we knew // that every slot was always activated for every match. Then we would // know they would always be overwritten when a match is found. + // + // NOTE: We have to be careful here to avoid setting a length that + // exceeds the number of slots in our cache. Otherwise copying into + // the cache later will fail. This can happen when the number of + // caller provided slots is bigger than the number of slots in the + // compiled regex. (It's a bit of a weird case, but for simplicity and + // flexibility reasons, it is an API guarantee that the caller can + // provide any number of slots that they want.) let explicit_slots_len = core::cmp::min( Slots::LIMIT, - slots.len().saturating_sub(self.explicit_slot_start), + core::cmp::min( + slots.len().saturating_sub(self.explicit_slot_start), + cache.explicit_slots.len(), + ), ); cache.setup_search(explicit_slots_len); for slot in cache.explicit_slots() { @@ -2216,10 +2227,15 @@ impl DFA { // the path to the match state. if self.explicit_slot_start < slots.len() { // NOTE: The 'cache.explicit_slots()' slice is setup at the - // beginning of every search such that it is guaranteed to return a - // slice of length equivalent to 'slots[explicit_slot_start..]'. - slots[self.explicit_slot_start..] - .copy_from_slice(cache.explicit_slots()); + // beginning of every search such that it is guaranteed + // to return a slice that is at most equal in length to + // 'slots[explicit_slot_start..]'. It may be smaller in + // cases where the caller provided more slots than there + // are in the compiled regex. In which case, we limit the + // length of `slots` to what we actually have. + let cache_slots = cache.explicit_slots(); + slots[self.explicit_slot_start..][..cache_slots.len()] + .copy_from_slice(cache_slots); epsilons.slots().apply(at, &mut slots[self.explicit_slot_start..]); } *matched_pid = Some(pid); diff --git a/regex-automata/tests/dfa/onepass/mod.rs b/regex-automata/tests/dfa/onepass/mod.rs index 9d6ab475ef..5a71144e65 100644 --- a/regex-automata/tests/dfa/onepass/mod.rs +++ b/regex-automata/tests/dfa/onepass/mod.rs @@ -1,2 +1,3 @@ +mod regression; #[cfg(not(miri))] mod suite; diff --git a/regex-automata/tests/dfa/onepass/regression.rs b/regex-automata/tests/dfa/onepass/regression.rs new file mode 100644 index 0000000000..7c72db3c1e --- /dev/null +++ b/regex-automata/tests/dfa/onepass/regression.rs @@ -0,0 +1,61 @@ +// Regression test for zero-repetition capture groups, +// which caused a panic when the Vec passed into search_slots +// contained space for the capture group which would never +// have any results. +// +// See: https://github.com/rust-lang/regex/issues/1327 +#[test] +fn zero_repetition_capture_group() { + use regex_automata::{ + dfa::onepass::DFA, util::primitives::NonMaxUsize, Anchored, Input, + }; + + let expr = DFA::new(r"(abc)(ABC){0}").unwrap(); + let s = "abcABC"; + let input = Input::new(s).span(0..s.len()).anchored(Anchored::Yes); + + // Test with 4 slots, so the whole match plus the first capture group. + let mut cache = expr.create_cache(); + let mut slots: Vec> = vec![None; 4]; + let pid = expr.try_search_slots(&mut cache, &input, &mut slots).unwrap(); + assert_eq!(pid, Some(regex_automata::PatternID::must(0))); + assert_eq!(slots[0], Some(NonMaxUsize::new(0).unwrap())); + assert_eq!(slots[1], Some(NonMaxUsize::new(3).unwrap())); + assert_eq!(slots[2], Some(NonMaxUsize::new(0).unwrap())); + assert_eq!(slots[3], Some(NonMaxUsize::new(3).unwrap())); + + // Test with larger slot array, which would fit the + // zero-repetition capture group. + slots.resize(6, None); + let pid = expr.try_search_slots(&mut cache, &input, &mut slots).unwrap(); + assert_eq!(pid, Some(regex_automata::PatternID::must(0))); + // First capture group should match + assert_eq!(slots[2], Some(NonMaxUsize::new(0).unwrap())); + assert_eq!(slots[3], Some(NonMaxUsize::new(3).unwrap())); + // Second capture group with {0} should be None. + assert_eq!(slots[4], None); + assert_eq!(slots[5], None); +} + +// Another regression test for the same case as +// `zero_repetition_capture_group`, but uses a simpler pattern. That +// is, a zero-repetition capture group is a red herring. The actual bug +// is simpler: it happens whenever too many slots are provided by the +// caller. +#[test] +fn too_many_slots_normal_pattern() { + use regex_automata::{ + dfa::onepass::DFA, util::primitives::NonMaxUsize, Anchored, Input, + }; + + let expr = DFA::new(r"abc").unwrap(); + let s = "abc"; + let input = Input::new(s).span(0..s.len()).anchored(Anchored::Yes); + + let mut cache = expr.create_cache(); + let mut slots: Vec> = vec![None; 4]; + let pid = expr.try_search_slots(&mut cache, &input, &mut slots).unwrap(); + assert_eq!(pid, Some(regex_automata::PatternID::must(0))); + assert_eq!(slots[0], Some(NonMaxUsize::new(0).unwrap())); + assert_eq!(slots[1], Some(NonMaxUsize::new(3).unwrap())); +} diff --git a/regex-automata/tests/nfa/thompson/backtrack/mod.rs b/regex-automata/tests/nfa/thompson/backtrack/mod.rs index 9d6ab475ef..5a71144e65 100644 --- a/regex-automata/tests/nfa/thompson/backtrack/mod.rs +++ b/regex-automata/tests/nfa/thompson/backtrack/mod.rs @@ -1,2 +1,3 @@ +mod regression; #[cfg(not(miri))] mod suite; diff --git a/regex-automata/tests/nfa/thompson/backtrack/regression.rs b/regex-automata/tests/nfa/thompson/backtrack/regression.rs new file mode 100644 index 0000000000..b392f7a841 --- /dev/null +++ b/regex-automata/tests/nfa/thompson/backtrack/regression.rs @@ -0,0 +1,20 @@ +// Tests that we can call the backtracker with more slots +// than is actually in the compiled regex. +#[test] +fn too_many_slots_normal_pattern() { + use regex_automata::{ + nfa::thompson::backtrack::BoundedBacktracker, + util::primitives::NonMaxUsize, Anchored, Input, + }; + + let expr = BoundedBacktracker::new(r"abc").unwrap(); + let s = "abc"; + let input = Input::new(s).span(0..s.len()).anchored(Anchored::Yes); + + let mut cache = expr.create_cache(); + let mut slots: Vec> = vec![None; 4]; + let pid = expr.try_search_slots(&mut cache, &input, &mut slots).unwrap(); + assert_eq!(pid, Some(regex_automata::PatternID::must(0))); + assert_eq!(slots[0], Some(NonMaxUsize::new(0).unwrap())); + assert_eq!(slots[1], Some(NonMaxUsize::new(3).unwrap())); +} diff --git a/regex-automata/tests/nfa/thompson/pikevm/mod.rs b/regex-automata/tests/nfa/thompson/pikevm/mod.rs index 9d6ab475ef..5a71144e65 100644 --- a/regex-automata/tests/nfa/thompson/pikevm/mod.rs +++ b/regex-automata/tests/nfa/thompson/pikevm/mod.rs @@ -1,2 +1,3 @@ +mod regression; #[cfg(not(miri))] mod suite; diff --git a/regex-automata/tests/nfa/thompson/pikevm/regression.rs b/regex-automata/tests/nfa/thompson/pikevm/regression.rs new file mode 100644 index 0000000000..b5db6349f8 --- /dev/null +++ b/regex-automata/tests/nfa/thompson/pikevm/regression.rs @@ -0,0 +1,20 @@ +// Tests that we can call the PikeVM with more slots +// than is actually in the compiled regex. +#[test] +fn too_many_slots_normal_pattern() { + use regex_automata::{ + nfa::thompson::pikevm::PikeVM, util::primitives::NonMaxUsize, + Anchored, Input, + }; + + let expr = PikeVM::new(r"abc").unwrap(); + let s = "abc"; + let input = Input::new(s).span(0..s.len()).anchored(Anchored::Yes); + + let mut cache = expr.create_cache(); + let mut slots: Vec> = vec![None; 4]; + let pid = expr.search_slots(&mut cache, &input, &mut slots); + assert_eq!(pid, Some(regex_automata::PatternID::must(0))); + assert_eq!(slots[0], Some(NonMaxUsize::new(0).unwrap())); + assert_eq!(slots[1], Some(NonMaxUsize::new(3).unwrap())); +} diff --git a/regex-capi/Cargo.toml b/regex-capi/Cargo.toml index 9d55258db9..ef3d762ad3 100644 --- a/regex-capi/Cargo.toml +++ b/regex-capi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rure" -version = "0.2.4" #:version +version = "0.2.5" #:version authors = ["The Rust Project Developers"] license = "MIT OR Apache-2.0" readme = "README.md" @@ -13,6 +13,19 @@ A C API for Rust's regular expression library. workspace = ".." edition = "2021" rust-version = "1.65" +include = [ + "/Cargo.toml", + "/LICENSE-MIT", + "/LICENSE-APACHE", + "/README.md", + "/UNICODE.md", + "bench/README.md", + "src/**/*.rs", + "tests/**/*.rs", + "include/**/*.h", + "ctest/**/*.c", + "examples/**/*.{c,txt}", +] [lib] name = "rure" diff --git a/regex-lite/Cargo.toml b/regex-lite/Cargo.toml index 16469f6feb..b230501b92 100644 --- a/regex-lite/Cargo.toml +++ b/regex-lite/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-lite" -version = "0.1.8" #:version +version = "0.1.9" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" homepage = "https://github.com/rust-lang/regex/tree/master/regex-lite" @@ -13,6 +13,7 @@ workspace = ".." edition = "2021" rust-version = "1.65" autotests = false +include.workspace = true # Features are documented in the "Crate features" section of the crate docs: # https://docs.rs/regex-lite/*/#crate-features diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml index e6dd1bc3d5..71755d8a66 100644 --- a/regex-syntax/Cargo.toml +++ b/regex-syntax/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-syntax" -version = "0.8.8" #:version +version = "0.8.10" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" homepage = "https://github.com/rust-lang/regex/tree/master/regex-syntax" @@ -10,6 +10,7 @@ description = "A regular expression parser." workspace = ".." edition = "2021" rust-version = "1.65" +include.workspace = true # Features are documented in the "Crate features" section of the crate docs: # https://docs.rs/regex-syntax/*/#crate-features diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 48469f9e16..91a5e179e7 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -1047,7 +1047,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { if let Ok(ref mut class) = result { self.unicode_fold_and_negate( &ast_class.span, - ast_class.negated, + ast_class.is_negated(), class, )?; } @@ -2470,6 +2470,10 @@ mod tests { t(r"\p{gc=Separator}"), hir_uclass_query(ClassQuery::Binary("Z")) ); + assert_eq!( + t(r"\p{gc!=Separator}"), + hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) + ); assert_eq!( t(r"\p{Other}"), hir_uclass_query(ClassQuery::Binary("Other")) @@ -2486,7 +2490,7 @@ mod tests { ); assert_eq!( t(r"\P{gc!=separator}"), - hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) + hir_uclass_query(ClassQuery::Binary("Z")) ); assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));