From 7f503d8d1b56dcb5dc8cf96d381494bceda29dda Mon Sep 17 00:00:00 2001 From: Daniel Weber Date: Sat, 11 Nov 2023 14:43:01 +1100 Subject: [PATCH 1/6] Add European Community (EC) Number --- stdnum/eu/ecnumber.py | 85 +++++++++++++++ tests/test_eu_ecnumber.doctest | 186 +++++++++++++++++++++++++++++++++ 2 files changed, 271 insertions(+) create mode 100644 stdnum/eu/ecnumber.py create mode 100644 tests/test_eu_ecnumber.doctest diff --git a/stdnum/eu/ecnumber.py b/stdnum/eu/ecnumber.py new file mode 100644 index 00000000..d61147b6 --- /dev/null +++ b/stdnum/eu/ecnumber.py @@ -0,0 +1,85 @@ +# ecnumber.py - functions for handling European Community Numbers + +# Copyright (C) 2023 Daniel Weber +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301 USA + +"""EC Number (European Community Number). + +The EC Number is a unique seven-digit number assigned to chemical substances +for regulatory purposes within the European Union by the European Commision. + +More information: + +* https://en.wikipedia.org/wiki/European_Community_number + +>>> validate('200-001-8') +'200-001-8' +>>> validate('200-001-9') +Traceback (most recent call last): + ... +InvalidChecksum: ... +>>> validate('20-0001-8') +Traceback (most recent call last): + ... +InvalidFormat: ... +""" + +import re + +from stdnum.exceptions import * +from stdnum.util import clean + + +_ec_number_re = re.compile(r'^[0-9]{3}-[0-9]{3}-[0-9]$') + + +def compact(number): + """Convert the number to the minimal representation.""" + number = clean(number, ' ').strip() + if '-' not in number: + number = '-'.join((number[:3], number[3:6], number[6:])) + return number + + +def calc_check_digit(number): + """Calculate the check digit for the number. The passed number should not + have the check digit included.""" + number = number.replace('-', '') + return str( + sum((i + 1) * int(n) for i, n in enumerate(number)) % 11) + + +def validate(number): + """Check if the number provided is a valid EC Number.""" + number = compact(number) + if not len(number) == 9: + raise InvalidLength() + if not _ec_number_re.match(number): + raise InvalidFormat() + if number[3] != '-' or number[7] != '-': + raise InvalidFormat() + if number[-1] != calc_check_digit(number[:-1]): + raise InvalidChecksum() + return number + + +def is_valid(number): + """Check if the number provided is a valid EC Number.""" + try: + return bool(validate(number)) + except ValidationError: + return False \ No newline at end of file diff --git a/tests/test_eu_ecnumber.doctest b/tests/test_eu_ecnumber.doctest new file mode 100644 index 00000000..078d446f --- /dev/null +++ b/tests/test_eu_ecnumber.doctest @@ -0,0 +1,186 @@ +test_eu_ecnumber.doctest - more detailed doctests for the stdnum.eu.ecnumber module + +Copyright (C) 2023 Daniel Weber + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA + + +This file contains more detailed doctests for the stdnum.casrn module. It +contains some corner case tests and tries to validate numbers that have been +found online. + +>>> from stdnum.eu import ecnumber +>>> from stdnum.exceptions import * + + +EC Numbers always include separators and will be introduced if they +are not present. Validation will fail if separators are in the incorrect +place. + +>>> ecnumber.validate('200-112-1') +'200-112-1' +>>> ecnumber.validate('2001121') +'200-112-1' +>>> ecnumber.validate('20-0112-1') +Traceback (most recent call last): + ... +InvalidFormat: ... + + +The number should only have two separators. + +>>> ecnumber.validate('20--112-1') +Traceback (most recent call last): + ... +InvalidFormat: ... + + +EC Numbers are always nine characters long (including hyphens). + +>>> ecnumber.validate('2000-112-1') +Traceback (most recent call last): + ... +InvalidLength: ... +>>> ecnumber.validate('20001121') +Traceback (most recent call last): + ... +InvalidLength: ... +>>> ecnumber.validate('201121') +Traceback (most recent call last): + ... +InvalidLength: ... + + +The final character must have the correct check digit. + +>>> ecnumber.validate('200-112-2') +Traceback (most recent call last): + ... +InvalidChecksum: ... +>>> ecnumber.validate('2001122') +Traceback (most recent call last): + ... +InvalidChecksum: ... + + +These are randomly selected from the EC Inventory should be valid EC Numbers. + +>>> numbers = ''' +... +... 257-228-0 +... 207-296-2 +... 297-119-5 +... 287-900-9 +... 271-104-3 +... 280-851-4 +... 281-919-6 +... 252-552-9 +... 215-429-0 +... 249-469-5 +... 421-750-9 +... 301-916-6 +... 217-931-5 +... 295-191-2 +... 207-952-8 +... 211-043-1 +... 262-758-0 +... 255-597-2 +... 255-524-4 +... 226-307-1 +... 276-796-0 +... 274-741-5 +... 250-046-2 +... 287-761-4 +... 200-662-2 +... 200-897-0 +... 250-140-3 +... 222-729-5 +... 212-948-4 +... 239-367-9 +... 249-213-2 +... 263-543-4 +... 259-660-5 +... 254-324-4 +... 241-289-5 +... 228-426-4 +... 207-631-2 +... 245-704-0 +... 282-848-3 +... 204-282-8 +... 284-690-0 +... 282-944-5 +... 302-175-1 +... 251-412-4 +... 257-308-5 +... 220-575-3 +... 249-244-1 +... 266-556-3 +... 288-360-7 +... 266-708-9 +... 221-531-6 +... 274-747-8 +... 238-769-1 +... 251-186-7 +... 254-323-9 +... 247-214-2 +... 304-902-8 +... 307-415-9 +... 256-980-7 +... 296-057-6 +... 222-700-7 +... 223-550-5 +... 239-530-4 +... 219-941-5 +... 216-155-4 +... 236-325-1 +... 244-886-9 +... 248-170-7 +... 203-499-5 +... 280-279-5 +... 267-064-1 +... 280-947-6 +... 266-597-7 +... 281-719-9 +... 250-478-1 +... 243-154-6 +... 271-556-1 +... 263-157-6 +... 301-691-4 +... 274-112-5 +... 302-331-9 +... 235-556-5 +... 244-556-4 +... 217-593-9 +... 206-777-4 +... 286-712-4 +... 414-380-4 +... 242-807-2 +... 500-464-9 +... 252-796-6 +... 304-512-8 +... 307-269-6 +... 238-475-3 +... 304-680-2 +... 273-972-9 +... 297-362-7 +... 233-748-3 +... 307-692-6 +... 300-706-1 +... 310-159-0 +... +... ''' +>>> [x for x in numbers.splitlines() if x and not ecnumber.is_valid(x)] +[] From a8e6d1615f0f4ccc41f0f99a4306c1463f470514 Mon Sep 17 00:00:00 2001 From: Daniel Weber Date: Sun, 12 Nov 2023 21:04:15 +1100 Subject: [PATCH 2/6] Add tests for single hyphen and non-numeric characters --- tests/test_eu_ecnumber.doctest | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_eu_ecnumber.doctest b/tests/test_eu_ecnumber.doctest index 078d446f..d29fcf1e 100644 --- a/tests/test_eu_ecnumber.doctest +++ b/tests/test_eu_ecnumber.doctest @@ -38,6 +38,10 @@ place. Traceback (most recent call last): ... InvalidFormat: ... +>>> ecnumber.validate('2000112-1') +Traceback (most recent call last): + ... +InvalidFormat: ... The number should only have two separators. @@ -48,6 +52,14 @@ Traceback (most recent call last): InvalidFormat: ... +Only numeric characters between separators. + +>>> ecnumber.validate('20A-112-1') +Traceback (most recent call last): + ... +InvalidFormat: ... + + EC Numbers are always nine characters long (including hyphens). >>> ecnumber.validate('2000-112-1') From 97e24e74731bd22f14940fff8c5b56283cb88979 Mon Sep 17 00:00:00 2001 From: Daniel Weber Date: Sun, 12 Nov 2023 21:21:17 +1100 Subject: [PATCH 3/6] Remove redundant hyphen check --- stdnum/eu/ecnumber.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/stdnum/eu/ecnumber.py b/stdnum/eu/ecnumber.py index d61147b6..75027f78 100644 --- a/stdnum/eu/ecnumber.py +++ b/stdnum/eu/ecnumber.py @@ -70,8 +70,6 @@ def validate(number): raise InvalidLength() if not _ec_number_re.match(number): raise InvalidFormat() - if number[3] != '-' or number[7] != '-': - raise InvalidFormat() if number[-1] != calc_check_digit(number[:-1]): raise InvalidChecksum() return number From fc4480ca77872aa1db3a1e3b82ee1ef3eee2b1d7 Mon Sep 17 00:00:00 2001 From: Daniel Weber Date: Sun, 12 Nov 2023 21:28:28 +1100 Subject: [PATCH 4/6] Add newline and remove trailing whitespace --- stdnum/eu/ecnumber.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdnum/eu/ecnumber.py b/stdnum/eu/ecnumber.py index 75027f78..2056f364 100644 --- a/stdnum/eu/ecnumber.py +++ b/stdnum/eu/ecnumber.py @@ -19,7 +19,7 @@ """EC Number (European Community Number). -The EC Number is a unique seven-digit number assigned to chemical substances +The EC Number is a unique seven-digit number assigned to chemical substances for regulatory purposes within the European Union by the European Commision. More information: @@ -80,4 +80,4 @@ def is_valid(number): try: return bool(validate(number)) except ValidationError: - return False \ No newline at end of file + return False From e082341e78373f9a3e00e1d4099dc3ec67af05a3 Mon Sep 17 00:00:00 2001 From: Daniel Weber Date: Sun, 12 Nov 2023 21:32:16 +1100 Subject: [PATCH 5/6] Remove typo --- tests/test_eu_ecnumber.doctest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_eu_ecnumber.doctest b/tests/test_eu_ecnumber.doctest index d29fcf1e..838fcc4b 100644 --- a/tests/test_eu_ecnumber.doctest +++ b/tests/test_eu_ecnumber.doctest @@ -18,7 +18,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -This file contains more detailed doctests for the stdnum.casrn module. It +This file contains more detailed doctests for the stdnum.eu.ecnumber module. It contains some corner case tests and tries to validate numbers that have been found online. From 81007b01dcf9efe1ef1920331e8ed0d40ccc7634 Mon Sep 17 00:00:00 2001 From: Daniel Weber Date: Sun, 19 Nov 2023 18:04:29 +1100 Subject: [PATCH 6/6] Handle check digit of 10 --- stdnum/eu/ecnumber.py | 5 +++-- tests/test_eu_ecnumber.doctest | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/stdnum/eu/ecnumber.py b/stdnum/eu/ecnumber.py index 2056f364..f27b3ebb 100644 --- a/stdnum/eu/ecnumber.py +++ b/stdnum/eu/ecnumber.py @@ -57,10 +57,11 @@ def compact(number): def calc_check_digit(number): """Calculate the check digit for the number. The passed number should not - have the check digit included.""" + have the check digit included. In instances where the check digit is + '10', the check digit is set to '1' and the number is considered.""" number = number.replace('-', '') return str( - sum((i + 1) * int(n) for i, n in enumerate(number)) % 11) + sum((i + 1) * int(n) for i, n in enumerate(number)) % 11)[0] def validate(number): diff --git a/tests/test_eu_ecnumber.doctest b/tests/test_eu_ecnumber.doctest index 838fcc4b..284fe688 100644 --- a/tests/test_eu_ecnumber.doctest +++ b/tests/test_eu_ecnumber.doctest @@ -192,6 +192,7 @@ These are randomly selected from the EC Inventory should be valid EC Numbers. ... 307-692-6 ... 300-706-1 ... 310-159-0 +... 424-870-1 ... ... ''' >>> [x for x in numbers.splitlines() if x and not ecnumber.is_valid(x)]