diff --git a/README b/README index c0953cf9..d9bbfd20 100644 --- a/README +++ b/README @@ -127,6 +127,7 @@ Currently this package supports the following formats: * ISIL (International Standard Identifier for Libraries) * ISIN (International Securities Identification Number) * ISMN (International Standard Music Number) + * ISRC (International Standard Recording Code) * ISO 11649 (Structured Creditor Reference) * ISO 6346 (International standard for container identification) * ISSN (International Standard Serial Number) diff --git a/docs/stdnum.isrc.rst b/docs/stdnum.isrc.rst new file mode 100644 index 00000000..c8346987 --- /dev/null +++ b/docs/stdnum.isrc.rst @@ -0,0 +1,5 @@ +stdnum.isrc +=========== + +.. automodule:: stdnum.isrc + :members: diff --git a/stdnum/isrc.py b/stdnum/isrc.py new file mode 100644 index 00000000..c27c3bc8 --- /dev/null +++ b/stdnum/isrc.py @@ -0,0 +1,83 @@ +# isrc.py - functions for International Standard Recording Codes (ISRC) +# +# Copyright (C) 2021 Nuno André Novo +# Copyright (C) 2014-2021 Arthur de Jong +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301 USA + +"""ISRC (International Standard Recording Code). + +The ISRC is an international standard code (ISO 3901) for uniquely +identifying sound recordings and music video recordings. + +More information: + +* https://en.wikipedia.org/wiki/International_Standard_Recording_Code +""" + +from stdnum.exceptions import * +from stdnum.util import clean, segment +from stdnum.isin import _iso_3116_1_country_codes +from string import ascii_uppercase, digits + + +# These special codes are allowed for ISRC +_country_codes = set(_iso_3116_1_country_codes + [ + 'QM', # US new registrants due to US codes became exhausted + 'CP', # reserved for further overflow + 'DG', # idem + 'ZZ', # International ISRC Agency codes +]) + +_chunks = 2, 3, 2, 5 + + +def compact(number): + """Convert the ISRC to the minimal representation. This strips the + number of any valid separators and removes surrounding whitespace.""" + return clean(number, ' -').strip().upper() + + +def validate(number): + """Check if the number provided is a valid ISRC. This checks the length, + the alphabet, and the country code but does not check if the registrant + code is known.""" + number = compact(number) + country, registrant, year, record = segment(number, *_chunks) + + if len(number) != 12: + raise InvalidLength() + if any(c not in ascii_uppercase + digits for c in registrant): + raise InvalidFormat() + if any(c not in digits for c in year + record): + raise InvalidFormat() + if country not in _country_codes: + raise InvalidComponent() + + return number + + +def is_valid(number): + try: + return bool(validate(number)) + except ValidationError: + return False + + +def format(number, separator='-'): + """Reformat the number to the common representation.""" + parts = segment(compact(number), *_chunks) + return separator.join(parts) diff --git a/stdnum/util.py b/stdnum/util.py index 5b0e4e88..b32fd165 100644 --- a/stdnum/util.py +++ b/stdnum/util.py @@ -200,6 +200,19 @@ def to_unicode(text): return text +def segment(string, *sizes, start=0): + """Split the string into the indicated number and size of pieces. + + >>> segment('0123456789ABCDEF', 3, 4, 5, 4) + ['012', '3456', '789AB', 'CDEF'] + """ + from itertools import accumulate, tee + + a, b = tee(accumulate((start, *sizes))) + next(b, None) + return [string[slice(*ab)] for ab in zip(a, b)] + + def get_number_modules(base='stdnum'): """Yield all the number validation modules under the specified module.""" __import__(base) diff --git a/tests/test_isrc.doctest b/tests/test_isrc.doctest new file mode 100644 index 00000000..1b2b856e --- /dev/null +++ b/tests/test_isrc.doctest @@ -0,0 +1,58 @@ +test_isrc.doctest - more detailed doctests for stdnum.isrc module + +Copyright (C) 2021 Nuno André Novo +Copyright (C) 2010-2021 Arthur de Jong + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA + + +This file contains more detailed doctests for the stdnum.isrc module. It +tries to test more corner cases and detailed functionality that is not +really useful as module documentation. + +>>> from stdnum import isrc + + +These are normal variations that should just work. + +>>> isrc.validate('US-SKG-19-12345') +'USSKG1912345' +>>> isrc.validate('USSKG1912345') +'USSKG1912345' +>>> isrc.validate('us-skg1912345') +'USSKG1912345' + + +Tests for mangling and incorrect country codes. + +>>> isrc.validate('US-SKG-19-123456') +Traceback (most recent call last): + ... +InvalidLength: ... +>>> isrc.validate('US-SKG-19-1234*') +Traceback (most recent call last): + ... +InvalidFormat: ... +>>> isrc.validate('XX-SKG-19-12345') +Traceback (most recent call last): + ... +InvalidComponent: ... + + +Regrouping tests. + +>>> isrc.format('USSKG1912345') +'US-SKG-19-12345'