From f47dad7b6595b8c65d6f15b057ad59eafd363d25 Mon Sep 17 00:00:00 2001 From: anilbey Date: Sat, 13 Jul 2019 14:07:58 +0200 Subject: [PATCH 1/4] bpo7940: add support for negative end positions to re.finditer and re.findall --- Lib/test/test_re.py | 26 +++++++++++++++++++ Misc/ACKS | 10 +++++-- .../2019-07-13-15-23-34.bpo-7940.NDghRj.rst | 2 ++ Modules/_sre/sre.c | 4 +++ 4 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-07-13-15-23-34.bpo-7940.NDghRj.rst diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index a6f5af17d7d51ba..c41e196f86a7f7d 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -441,6 +441,32 @@ def test_bug_117612(self): self.assertEqual(re.findall(r"(a|(b))", "aba"), [("a", ""),("b", "b"),("a", "")]) + def test_bug_7940(self): + # Issue 7940: re.finditer and re.findall should support negative end positions + pat = re.compile(".") + self.assertEqual(pat.findall("abcd", 1, 3), ['b', 'c']) + self.assertEqual(pat.findall("abcd", 1, -1), ['b', 'c']) + self.assertEqual(pat.findall("abcd", -3, -1), ['b', 'c']) + self.assertEqual(pat.findall("abcd", -3, 3), ['b', 'c']) + self.assertEqual(pat.findall("abcd", -1, 1), []) + self.assertEqual(pat.findall("abcd", -1, -3), []) + self.assertEqual(pat.findall("abcd", pos=1, endpos=-1), ['b', 'c']) + + self.assertEqual([m[0] for m in pat.finditer("abcd", 1, 3)], + ['b', 'c']) + self.assertEqual([m[0] for m in pat.finditer("abcd", 1, -1)], + ['b', 'c']) + self.assertEqual([m[0] for m in pat.finditer("abcd", -3, -1)], + ['b', 'c']) + self.assertEqual([m[0] for m in pat.finditer("abcd", -3, 3)], + ['b', 'c']) + self.assertEqual([m[0] for m in pat.finditer("abcd", -1, 1)], + []) + self.assertEqual([m[0] for m in pat.finditer("abcd", -1, -3)], + []) + self.assertEqual([m[0] for m in pat.finditer("abcd", pos=1, + endpos=-1)], ['b', 'c']) + def test_re_match(self): for string in 'a', S('a'): self.assertEqual(re.match('a', string).groups(), ()) diff --git a/Misc/ACKS b/Misc/ACKS index fadf488888aa8ba..981034983354822 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -2074,5 +2074,11 @@ Doug Zongker Peter Åstrand Vlad Emelianov Andrey Doroschenko - -(Entries should be added in rough alphabetical order by last names) +Zheao Li +Carsten Klein +Diego Rojas +Edison Abahurire +Geoff Shannon +Batuhan Taskaya +Aleksandr Balezin +Mustafa Anıl Tuncel diff --git a/Misc/NEWS.d/next/Library/2019-07-13-15-23-34.bpo-7940.NDghRj.rst b/Misc/NEWS.d/next/Library/2019-07-13-15-23-34.bpo-7940.NDghRj.rst new file mode 100644 index 000000000000000..bad823178b79bdf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-07-13-15-23-34.bpo-7940.NDghRj.rst @@ -0,0 +1,2 @@ +Add support for negative end positions to re.finditer and re.findall +(original patch by Matthew Barnett, tests and PR by Mustafa Anıl Tuncel). diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index ddbdc9f478aab3f..bb5ab89f2f94ff4 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -453,11 +453,15 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, } /* adjust boundaries */ + if (start < 0) + start += length; if (start < 0) start = 0; else if (start > length) start = length; + if (end < 0) + end += length; if (end < 0) end = 0; else if (end > length) From 75e7c5f50abe6139b324a69f5ec79a5450e0dbf4 Mon Sep 17 00:00:00 2001 From: anilbey Date: Sat, 13 Jul 2019 16:45:42 +0200 Subject: [PATCH 2/4] bpo7940: added test cases adressing the review --- Lib/test/test_re.py | 6 ++++++ Misc/ACKS | 11 +++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index c41e196f86a7f7d..e1e6d9e26146e72 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -450,6 +450,8 @@ def test_bug_7940(self): self.assertEqual(pat.findall("abcd", -3, 3), ['b', 'c']) self.assertEqual(pat.findall("abcd", -1, 1), []) self.assertEqual(pat.findall("abcd", -1, -3), []) + self.assertEqual(pat.findall("abcd", -200, -1), ['a', 'b', 'c']) + self.assertEqual(pat.findall("abcd", -200, -100), []) self.assertEqual(pat.findall("abcd", pos=1, endpos=-1), ['b', 'c']) self.assertEqual([m[0] for m in pat.finditer("abcd", 1, 3)], @@ -464,6 +466,10 @@ def test_bug_7940(self): []) self.assertEqual([m[0] for m in pat.finditer("abcd", -1, -3)], []) + self.assertEqual([m[0] for m in pat.finditer("abcd", -200, -1)], + ['a', 'b', 'c']) + self.assertEqual([m[0] for m in pat.finditer("abcd", -200, -100)], + []) self.assertEqual([m[0] for m in pat.finditer("abcd", pos=1, endpos=-1)], ['b', 'c']) diff --git a/Misc/ACKS b/Misc/ACKS index 981034983354822..e0efbe6cfe29270 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1865,6 +1865,7 @@ Steven Troxler Brent Tubbs Anthony Tuininga Erno Tukia +Anıl Tuncel David Turner Stephen Turner Itamar Turner-Trauring @@ -2074,11 +2075,5 @@ Doug Zongker Peter Åstrand Vlad Emelianov Andrey Doroschenko -Zheao Li -Carsten Klein -Diego Rojas -Edison Abahurire -Geoff Shannon -Batuhan Taskaya -Aleksandr Balezin -Mustafa Anıl Tuncel + +(Entries should be added in rough alphabetical order by last names) From 530afdf05f2b7d556fc468fc061551aacdf29afc Mon Sep 17 00:00:00 2001 From: Anil Tuncel Date: Wed, 15 Mar 2023 00:01:08 +0100 Subject: [PATCH 3/4] emit FutureWarning for negative start/end indices --- Modules/_sre/sre.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index bb5ab89f2f94ff4..8d3bc1e8066101d 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -453,15 +453,23 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, } /* adjust boundaries */ - if (start < 0) + if (start < 0) { + PyErr_WarnEx(PyExc_FutureWarning, + "Negative start index will not be truncated to zero in the future", + 1); start += length; + } if (start < 0) start = 0; else if (start > length) start = length; - if (end < 0) + if (end < 0) { + PyErr_WarnEx(PyExc_FutureWarning, + "Negative end index will not be truncated to zero in the future", + 1); end += length; + } if (end < 0) end = 0; else if (end > length) From 8bc3fb88ed1dd8a8621268453d61f2f577cc8f5c Mon Sep 17 00:00:00 2001 From: Anil Tuncel Date: Wed, 15 Mar 2023 00:10:25 +0100 Subject: [PATCH 4/4] test_bug_7940: assure warnings are effective via assertWarns --- Lib/test/test_re.py | 83 ++++++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 28 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index e1e6d9e26146e72..35144246479d56d 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -444,34 +444,61 @@ def test_bug_117612(self): def test_bug_7940(self): # Issue 7940: re.finditer and re.findall should support negative end positions pat = re.compile(".") - self.assertEqual(pat.findall("abcd", 1, 3), ['b', 'c']) - self.assertEqual(pat.findall("abcd", 1, -1), ['b', 'c']) - self.assertEqual(pat.findall("abcd", -3, -1), ['b', 'c']) - self.assertEqual(pat.findall("abcd", -3, 3), ['b', 'c']) - self.assertEqual(pat.findall("abcd", -1, 1), []) - self.assertEqual(pat.findall("abcd", -1, -3), []) - self.assertEqual(pat.findall("abcd", -200, -1), ['a', 'b', 'c']) - self.assertEqual(pat.findall("abcd", -200, -100), []) - self.assertEqual(pat.findall("abcd", pos=1, endpos=-1), ['b', 'c']) - - self.assertEqual([m[0] for m in pat.finditer("abcd", 1, 3)], - ['b', 'c']) - self.assertEqual([m[0] for m in pat.finditer("abcd", 1, -1)], - ['b', 'c']) - self.assertEqual([m[0] for m in pat.finditer("abcd", -3, -1)], - ['b', 'c']) - self.assertEqual([m[0] for m in pat.finditer("abcd", -3, 3)], - ['b', 'c']) - self.assertEqual([m[0] for m in pat.finditer("abcd", -1, 1)], - []) - self.assertEqual([m[0] for m in pat.finditer("abcd", -1, -3)], - []) - self.assertEqual([m[0] for m in pat.finditer("abcd", -200, -1)], - ['a', 'b', 'c']) - self.assertEqual([m[0] for m in pat.finditer("abcd", -200, -100)], - []) - self.assertEqual([m[0] for m in pat.finditer("abcd", pos=1, - endpos=-1)], ['b', 'c']) + + with self.assertWarns(FutureWarning) as cm: + self.assertEqual(pat.findall("abcd", -1, 1), []) + self.assertEqual(str(cm.warning), "Negative start index will not " + "be truncated to zero in the future") + + with self.assertWarns(FutureWarning) as cm: + self.assertEqual(pat.findall("abcd", 1, -1), ['b', 'c']) + self.assertEqual(str(cm.warning), "Negative end index will not " + "be truncated to zero in the future") + + with self.assertWarns(FutureWarning) as cm: + self.assertEqual(pat.findall("abcd", 1, 3), ['b', 'c']) + self.assertEqual(pat.findall("abcd", 1, -1), ['b', 'c']) + self.assertEqual(pat.findall("abcd", -3, -1), ['b', 'c']) + self.assertEqual(pat.findall("abcd", -3, 3), ['b', 'c']) + self.assertEqual(pat.findall("abcd", -1, 1), []) + self.assertEqual(pat.findall("abcd", -1, -3), []) + self.assertEqual(pat.findall("abcd", -200, -1), ['a', 'b', 'c']) + self.assertEqual(pat.findall("abcd", -200, -100), []) + self.assertEqual(pat.findall("abcd", pos=1, endpos=-1), ['b', 'c']) + self.assertEqual(len(cm.warnings), 12) + + with self.assertWarns(FutureWarning) as cm: + self.assertEqual([m[0] for m in pat.finditer("abcd", -1, 1)], + []) + self.assertEqual(str(cm.warning), "Negative start index will not " + "be truncated to zero in the future") + + with self.assertWarns(FutureWarning) as cm: + self.assertEqual([m[0] for m in pat.finditer("abcd", 1, -1)], + ['b', 'c']) + self.assertEqual(str(cm.warning), "Negative end index will not " + "be truncated to zero in the future") + + with self.assertWarns(FutureWarning) as cm: + self.assertEqual([m[0] for m in pat.finditer("abcd", 1, 3)], + ['b', 'c']) + self.assertEqual([m[0] for m in pat.finditer("abcd", 1, -1)], + ['b', 'c']) + self.assertEqual([m[0] for m in pat.finditer("abcd", -3, -1)], + ['b', 'c']) + self.assertEqual([m[0] for m in pat.finditer("abcd", -3, 3)], + ['b', 'c']) + self.assertEqual([m[0] for m in pat.finditer("abcd", -1, 1)], + []) + self.assertEqual([m[0] for m in pat.finditer("abcd", -1, -3)], + []) + self.assertEqual([m[0] for m in pat.finditer("abcd", -200, -1)], + ['a', 'b', 'c']) + self.assertEqual([m[0] for m in pat.finditer("abcd", -200, -100)], + []) + self.assertEqual([m[0] for m in pat.finditer("abcd", pos=1, + endpos=-1)], ['b', 'c']) + self.assertEqual(len(cm.warnings), 12) def test_re_match(self): for string in 'a', S('a'):