From 215e8ad08357e1e4af50e3dc6a7df9430e13b840 Mon Sep 17 00:00:00 2001 From: David Arnold Date: Tue, 30 Jun 2020 19:38:39 -0500 Subject: [PATCH 001/139] Add mdx extension https://github.com/mdx-js/mdx --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 4710ac65..4d50fcdf 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -93,6 +93,7 @@ 'map': {'text', 'map'}, 'markdown': {'text', 'markdown'}, 'md': {'text', 'markdown'}, + 'mdx': {'text', 'mdx'}, 'mib': {'text', 'mib'}, 'mk': {'text', 'makefile'}, 'ml': {'text', 'ocaml'}, From 0f22757bf6b40ad9d8317621f3399c4ab179ce98 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Tue, 30 Jun 2020 17:41:15 -0700 Subject: [PATCH 002/139] v1.4.21 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ee16c49f..69f64966 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.20 +version = 1.4.21 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From fb6315381357c49107b84a1fba6aa9efd956845e Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 10 Jul 2020 09:41:00 -0700 Subject: [PATCH 003/139] Add detection for binary pyz files --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 4d50fcdf..9ef40e31 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -127,6 +127,7 @@ 'py': {'text', 'python'}, 'pyi': {'text', 'pyi'}, 'pyx': {'text', 'cython'}, + 'pyz': {'binary', 'pyz'}, 'pxd': {'text', 'cython'}, 'pxi': {'text', 'cython'}, 'r': {'text', 'r'}, From e9955a044af68cf764f1f6f79a5ee9860c988be4 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 10 Jul 2020 09:46:05 -0700 Subject: [PATCH 004/139] v1.4.22 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 69f64966..83e3e447 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.21 +version = 1.4.22 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 32996544f4f685b5fc4ba31ea7cf88eaef6b730c Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Fri, 10 Jul 2020 19:38:19 +0100 Subject: [PATCH 005/139] add pyzw as a pyz alias --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 9ef40e31..535b5788 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -128,6 +128,7 @@ 'pyi': {'text', 'pyi'}, 'pyx': {'text', 'cython'}, 'pyz': {'binary', 'pyz'}, + 'pyzw': {'binary', 'pyz'}, 'pxd': {'text', 'cython'}, 'pxi': {'text', 'cython'}, 'r': {'text', 'r'}, From 8b7d4e90c80676630d952c4c927bf0891086ebd4 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 10 Jul 2020 11:40:54 -0700 Subject: [PATCH 006/139] v1.4.23 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 83e3e447..897dbc8b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.22 +version = 1.4.23 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From ca18757330d4d39499ee0dcd0b016f47ff423dd1 Mon Sep 17 00:00:00 2001 From: Leonidas Loucas Date: Sat, 18 Jul 2020 09:22:31 -0700 Subject: [PATCH 007/139] feat: Detect common haskell suffixes (hs,lhs,chs) hs -> haskell lhs -> literate-haskell https://wiki.haskell.org/Literate_programming chs -> c2hs https://github.com/haskell/c2hs/wiki Fixes #116 --- identify/extensions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 535b5788..07587785 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -16,6 +16,7 @@ 'cc': {'text', 'c++'}, 'cu': {'text', 'cuda'}, 'cfg': {'text'}, + 'chs': {'text', 'c2hs'}, 'clj': {'text', 'clojure'}, 'cljc': {'text', 'clojure'}, 'cljs': {'text', 'clojure', 'clojurescript'}, @@ -58,6 +59,7 @@ 'gz': {'binary', 'gzip'}, 'h': {'text', 'header', 'c', 'c++'}, 'hpp': {'text', 'header', 'c++'}, + 'hs': {'text', 'haskell'}, 'htm': {'text', 'html'}, 'html': {'text', 'html'}, 'hxx': {'text', 'header', 'c++'}, @@ -86,6 +88,7 @@ 'kml': {'text', 'kml', 'xml'}, 'kt': {'text', 'kotlin'}, 'less': {'text', 'less'}, + 'lhs': {'text', 'literate-haskell'}, 'lidr': {'text', 'idris'}, 'lua': {'text', 'lua'}, 'm': {'text', 'c', 'objective-c'}, From 5c9d860b0ef258bbc2c3946ab059b388f683881e Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sun, 19 Jul 2020 11:18:58 -0700 Subject: [PATCH 008/139] v1.4.24 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 897dbc8b..08663963 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.23 +version = 1.4.24 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From f10ea6a2d96553059f5c12d63e61f07a3a9286ff Mon Sep 17 00:00:00 2001 From: Matt Walker Date: Mon, 20 Jul 2020 18:17:25 -0400 Subject: [PATCH 009/139] Add common HDL language extensions Add support for detecting files of the common hardware description languages of verilog and VHDL. Suffix references, see the filename extension field of: * Verilog: https://en.wikipedia.org/w/index.php?title=Verilog&oldid=964006711 * SystemVerilog: https://en.wikipedia.org/w/index.php?title=SystemVerilog&oldid=962682338 * VHDL: https://en.wikipedia.org/w/index.php?title=VHDL&oldid=959179032 See also: * Xilinx UG895: Vivado User Guide, System Level Design Entry, Page 13 https://www.xilinx.com/support/documentation/sw_manuals/xilinx2018_3/ug895-vivado-system-level-design-entry.pdf --- identify/extensions.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 07587785..e62000ee 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -152,7 +152,9 @@ 'ss': {'text', 'scheme'}, 'styl': {'text', 'stylus'}, 'sql': {'text', 'sql'}, + 'sv': {'text', 'system-verilog'}, 'svg': {'text', 'image', 'svg'}, + 'svh': {'text', 'system-verilog'}, 'swf': {'binary', 'swf'}, 'swift': {'text', 'swift'}, 'swiftdeps': {'text', 'swiftdeps'}, @@ -168,7 +170,10 @@ 'tsx': {'text', 'tsx'}, 'ttf': {'binary', 'ttf'}, 'txt': {'text', 'plain-text'}, + 'v': {'text', 'verilog'}, 'vdx': {'text', 'vdx'}, + 'vh': {'text', 'verilog'}, + 'vhd': {'text', 'vhdl'}, 'vim': {'text', 'vim'}, 'vue': {'text', 'vue'}, 'war': {'binary', 'zip', 'jar'}, From 8bdaf8e824de80357156f62c62486bd4feb2366c Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Tue, 21 Jul 2020 15:22:39 -0700 Subject: [PATCH 010/139] v1.4.25 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 08663963..63a8be8f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.24 +version = 1.4.25 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From dbf38aa61239b53dc53b88ea7428cd43b878135a Mon Sep 17 00:00:00 2001 From: Yannick Vaucher Date: Thu, 13 Aug 2020 23:03:06 +0200 Subject: [PATCH 011/139] Add gd extension for GDScript from Godot --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index e62000ee..8d8274eb 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -45,6 +45,7 @@ 'eyaml': {'text', 'yaml'}, 'feature': {'text', 'gherkin'}, 'fish': {'text', 'fish'}, + 'gd': {'text', 'gdscript'}, 'gemspec': {'text', 'ruby'}, 'gif': {'binary', 'image', 'gif'}, 'go': {'text', 'go'}, From d6b5769cbb780d35625b5e6aebcf38d1f045b155 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 14 Aug 2020 13:09:53 -0700 Subject: [PATCH 012/139] v1.4.26 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 63a8be8f..a5135296 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.25 +version = 1.4.26 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From f762d136a78cfe57d23085fec9508a1e348afdc4 Mon Sep 17 00:00:00 2001 From: Christian Knittl-Frank Date: Sat, 15 Aug 2020 09:33:06 +0200 Subject: [PATCH 013/139] Add 'inx'->'xml' to 'extensions.py' inx is an xml file used by Incscape and Adobe InDesign --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 8d8274eb..bae00853 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -71,6 +71,7 @@ 'idr': {'text', 'idris'}, 'inc': {'text', 'inc'}, 'ini': {'text', 'ini'}, + 'inx': {'text', 'xml', 'inx'}, 'ipynb': {'text', 'jupyter'}, 'j2': {'text', 'jinja'}, 'jade': {'text', 'jade'}, From de3e6716162dc45a40a41a7efdb8048acc173c64 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 15 Aug 2020 17:26:05 -0700 Subject: [PATCH 014/139] v1.4.27 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index a5135296..3c3c241e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.26 +version = 1.4.27 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 9ba85f4f44e71575adbd2961e0cc9df328d9e453 Mon Sep 17 00:00:00 2001 From: Christian Knittl-Frank Date: Sun, 16 Aug 2020 13:14:29 +0200 Subject: [PATCH 015/139] Add 'xml' type to 'svg' extension SVG is an XML-based vector image format --- identify/extensions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/identify/extensions.py b/identify/extensions.py index bae00853..d79f5b4a 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -155,7 +155,7 @@ 'styl': {'text', 'stylus'}, 'sql': {'text', 'sql'}, 'sv': {'text', 'system-verilog'}, - 'svg': {'text', 'image', 'svg'}, + 'svg': {'text', 'image', 'svg', 'xml'}, 'svh': {'text', 'system-verilog'}, 'swf': {'binary', 'swf'}, 'swift': {'text', 'swift'}, From bcf64ebdb22e826266a7db83995ce79b931bac58 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sun, 16 Aug 2020 11:28:36 -0700 Subject: [PATCH 016/139] v1.4.28 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 3c3c241e..3d131085 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.27 +version = 1.4.28 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 4cb804e16baab3b8a78276c9120aa9c59ce5c30c Mon Sep 17 00:00:00 2001 From: Filippos Chalvatzoglou Date: Mon, 24 Aug 2020 14:30:56 +0100 Subject: [PATCH 017/139] Add libsonnet extension as jsonnet Per https://jsonnet.org/learning/tutorial.html: "Files designed for import by convention end with .libsonnet" --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index d79f5b4a..4475a41a 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -91,6 +91,7 @@ 'kt': {'text', 'kotlin'}, 'less': {'text', 'less'}, 'lhs': {'text', 'literate-haskell'}, + 'libsonnet': {'text', 'jsonnet'}, 'lidr': {'text', 'idris'}, 'lua': {'text', 'lua'}, 'm': {'text', 'c', 'objective-c'}, From a1caa07e6c6f636546d3df66f5fccfc465435709 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 24 Aug 2020 09:14:00 -0700 Subject: [PATCH 018/139] v1.4.29 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 3d131085..ea0b1377 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.28 +version = 1.4.29 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 9b1f48023414859eb277c32b0e42fe3efcea29c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Mond=C3=A9jar?= Date: Mon, 31 Aug 2020 10:00:29 +0200 Subject: [PATCH 019/139] Add 'sass' files extension --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 4475a41a..fd218013 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -142,6 +142,7 @@ 'rs': {'text', 'rust'}, 'rst': {'text', 'rst'}, 's': {'text', 'asm'}, + 'sass': {'text', 'sass'}, 'sbt': {'text', 'sbt', 'scala'}, 'sc': {'text', 'scala'}, 'scala': {'text', 'scala'}, From 62e98d7bee975f686bab8257c4fe95ff0ff7ab80 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 31 Aug 2020 22:25:31 -0700 Subject: [PATCH 020/139] v1.4.30 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ea0b1377..35cab12c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.29 +version = 1.4.30 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 1eeaa05e46e7c9829adf120cf7029f6d7eebe392 Mon Sep 17 00:00:00 2001 From: David Arnold Date: Fri, 4 Sep 2020 11:57:10 -0500 Subject: [PATCH 021/139] add: interpreters the lest known might be: https://linux.die.net/man/1/expect --- identify/interpreters.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/identify/interpreters.py b/identify/interpreters.py index bcffb5ac..7feb4b1b 100644 --- a/identify/interpreters.py +++ b/identify/interpreters.py @@ -3,9 +3,14 @@ from __future__ import unicode_literals INTERPRETERS = { + 'ash': {'shell', 'ash'}, + 'awk': {'awk'}, 'bash': {'shell', 'bash'}, + 'bats': {'shell', 'bash', 'bats'}, 'csh': {'shell', 'csh'}, 'dash': {'shell', 'dash'}, + 'expect': {'expect'}, + 'ksh': {'shell', 'ksh'}, 'node': {'javascript'}, 'nodejs': {'javascript'}, 'perl': {'perl'}, From fe6380cf24d9ffacfa827c6b921169b5561e5245 Mon Sep 17 00:00:00 2001 From: David Arnold Date: Fri, 4 Sep 2020 12:54:40 -0500 Subject: [PATCH 022/139] imp: add nix-shell interpreter parser nix-shell uses the second line to pass relevant arguments to the interpreter. Linux (unlike BSD) -- see: https://unix.stackexchange.com/a/63981 -- only passes a single argument to the shebang command (in this case env or nix-shell). On the second line, if specified, the interpreter is passed to nix-shell via the -i flag, so we parse this interpreter if the second line is specified. If not (nix-shell called w/o arguments) the implicit interpreter is bash. -- see also: #127 -- --- identify/identify.py | 27 ++++++++++++++++++++ tests/identify_test.py | 56 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/identify/identify.py b/identify/identify.py index 8a21d8b5..24d82c7b 100644 --- a/identify/identify.py +++ b/identify/identify.py @@ -159,6 +159,33 @@ def parse_shebang(bytesio): cmd = tuple(_shebang_split(first_line.strip())) if cmd and cmd[0] == '/usr/bin/env': cmd = cmd[1:] + if cmd[:1] == ('nix-shell',): + if cmd[1:]: + return () + multiple_shebangs = False + while bytesio.read(2) == b'#!': + multiple_shebangs = True + next_line = bytesio.readline() + try: + next_line = next_line.decode('UTF-8') + except UnicodeDecodeError: + return () + + for c in next_line: + if c not in printable: + return () + + line_tokens = tuple(shlex.split(next_line.strip())) + for i, token in enumerate(line_tokens): + if not token == '-i': + continue + try: + # the argument to -i flag + cmd = (line_tokens[i + 1],) + except IndexError: + return () + if not multiple_shebangs: + return () return cmd diff --git a/tests/identify_test.py b/tests/identify_test.py index 44406a12..f6636dba 100644 --- a/tests/identify_test.py +++ b/tests/identify_test.py @@ -217,6 +217,62 @@ def test_file_is_text_does_not_exist(tmpdir): (b"#!/path'with/quotes y", ("/path'with/quotes", 'y')), # Don't regress on leading/trailing ws (b"#! /path'with/quotes y ", ("/path'with/quotes", 'y')), + # Test nix-shell specialites with shebang on second line + ( + b'#! /usr/bin/env nix-shell\n' + + b'#! nix-shell -i bash -p python', + ('bash',), + ), + ( + b'#! /usr/bin/env nix-shell\n' + + b'#! nix-shell -i python -p coreutils', + ('python',), + ), + ( + b'#! /usr/bin/env nix-shell\n' + + b'#! nix-shell -p coreutils -i python', + ('python',), + ), + # multi-line and no whitespace variation + ( + b'#! /usr/bin/env nix-shell\n' + + b'#! nix-shell -p coreutils\n' + + b'#! nix-shell -i python', + ('python',), + ), + ( + b'#! /usr/bin/env nix-shell\n' + + b'#!nix-shell -p coreutils\n' + + b'#!nix-shell -i python', + ('python',), + ), + ( + b'#! /usr/bin/env nix-shell\n' + + b'#!\xf9\x93\x01\x42\xcd', + (), + ), + ( + b'#! /usr/bin/env nix-shell\n' + + b'#!\x00\x00\x00\x00', + (), + ), + # non-proper nix-shell + # (b'#! /usr/bin/nix-shell', ()), # out of scope test + (b'#! /usr/bin/env nix-shell', ()), + (b'#! /usr/bin/env nix-shell non-portable-argument', ()), + ( + b'#! /usr/bin/env nix-shell\n' + + b'#! nix-shell -i', (), # guard against index error + ), + # interpret quotes correctly + ( + b'#!/usr/bin/env nix-shell\n' + + b'#!nix-shell --argstr x "a -i python3 p"\n' + + b'#!nix-shell -p hello\n' + + b'#!nix-shell -i bash\n' + + b'#!nix-shell --argstr y "b -i runhaskell q"', + ('bash',), + ), (b'\xf9\x93\x01\x42\xcd', ()), (b'#!\xf9\x93\x01\x42\xcd', ()), (b'#!\x00\x00\x00\x00', ()), From e67ccb6400f9815bf74968eb1d6e8ea4b20b770b Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 4 Sep 2020 22:38:07 -0700 Subject: [PATCH 023/139] fix a few small things --- identify/identify.py | 50 +++++++++++++++++++----------------------- tests/identify_test.py | 44 ++++++++++++++++++++----------------- 2 files changed, 47 insertions(+), 47 deletions(-) diff --git a/identify/identify.py b/identify/identify.py index 24d82c7b..1c0e6777 100644 --- a/identify/identify.py +++ b/identify/identify.py @@ -141,6 +141,27 @@ def _shebang_split(line): return line.split() +def _parse_nix_shebang(bytesio, cmd): + while bytesio.read(2) == b'#!': + next_line = bytesio.readline() + try: + next_line = next_line.decode('UTF-8') + except UnicodeDecodeError: + return cmd + + for c in next_line: + if c not in printable: + return cmd + + line_tokens = tuple(_shebang_split(next_line.strip())) + for i, token in enumerate(line_tokens[:-1]): + if token != '-i': + continue + # the argument to -i flag + cmd = (line_tokens[i + 1],) + return cmd + + def parse_shebang(bytesio): """Parse the shebang from a file opened for reading binary.""" if bytesio.read(2) != b'#!': @@ -159,33 +180,8 @@ def parse_shebang(bytesio): cmd = tuple(_shebang_split(first_line.strip())) if cmd and cmd[0] == '/usr/bin/env': cmd = cmd[1:] - if cmd[:1] == ('nix-shell',): - if cmd[1:]: - return () - multiple_shebangs = False - while bytesio.read(2) == b'#!': - multiple_shebangs = True - next_line = bytesio.readline() - try: - next_line = next_line.decode('UTF-8') - except UnicodeDecodeError: - return () - - for c in next_line: - if c not in printable: - return () - - line_tokens = tuple(shlex.split(next_line.strip())) - for i, token in enumerate(line_tokens): - if not token == '-i': - continue - try: - # the argument to -i flag - cmd = (line_tokens[i + 1],) - except IndexError: - return () - if not multiple_shebangs: - return () + if cmd == ('nix-shell',): + return _parse_nix_shebang(bytesio, cmd) return cmd diff --git a/tests/identify_test.py b/tests/identify_test.py index f6636dba..a80c4015 100644 --- a/tests/identify_test.py +++ b/tests/identify_test.py @@ -219,57 +219,61 @@ def test_file_is_text_does_not_exist(tmpdir): (b"#! /path'with/quotes y ", ("/path'with/quotes", 'y')), # Test nix-shell specialites with shebang on second line ( - b'#! /usr/bin/env nix-shell\n' + + b'#! /usr/bin/env nix-shell\n' b'#! nix-shell -i bash -p python', ('bash',), ), ( - b'#! /usr/bin/env nix-shell\n' + + b'#! /usr/bin/env nix-shell\n' b'#! nix-shell -i python -p coreutils', ('python',), ), ( - b'#! /usr/bin/env nix-shell\n' + + b'#! /usr/bin/env nix-shell\n' b'#! nix-shell -p coreutils -i python', ('python',), ), # multi-line and no whitespace variation ( - b'#! /usr/bin/env nix-shell\n' + - b'#! nix-shell -p coreutils\n' + + b'#! /usr/bin/env nix-shell\n' + b'#! nix-shell -p coreutils\n' b'#! nix-shell -i python', ('python',), ), ( - b'#! /usr/bin/env nix-shell\n' + - b'#!nix-shell -p coreutils\n' + + b'#! /usr/bin/env nix-shell\n' + b'#!nix-shell -p coreutils\n' b'#!nix-shell -i python', ('python',), ), ( - b'#! /usr/bin/env nix-shell\n' + + b'#! /usr/bin/env nix-shell\n' b'#!\xf9\x93\x01\x42\xcd', - (), + ('nix-shell',), ), ( - b'#! /usr/bin/env nix-shell\n' + + b'#! /usr/bin/env nix-shell\n' b'#!\x00\x00\x00\x00', - (), + ('nix-shell',), ), # non-proper nix-shell - # (b'#! /usr/bin/nix-shell', ()), # out of scope test - (b'#! /usr/bin/env nix-shell', ()), - (b'#! /usr/bin/env nix-shell non-portable-argument', ()), + (b'#! /usr/bin/nix-shell', ('/usr/bin/nix-shell',)), + (b'#! /usr/bin/env nix-shell', ('nix-shell',)), ( - b'#! /usr/bin/env nix-shell\n' + - b'#! nix-shell -i', (), # guard against index error + b'#! /usr/bin/env nix-shell non-portable-argument', + ('nix-shell', 'non-portable-argument'), + ), + ( + b'#! /usr/bin/env nix-shell\n' + b'#! nix-shell -i', + ('nix-shell',), # guard against index error ), # interpret quotes correctly ( - b'#!/usr/bin/env nix-shell\n' + - b'#!nix-shell --argstr x "a -i python3 p"\n' + - b'#!nix-shell -p hello\n' + - b'#!nix-shell -i bash\n' + + b'#!/usr/bin/env nix-shell\n' + b'#!nix-shell --argstr x "a -i python3 p"\n' + b'#!nix-shell -p hello\n' + b'#!nix-shell -i bash\n' b'#!nix-shell --argstr y "b -i runhaskell q"', ('bash',), ), From 00ca0a20015a8106c2cd007cb4989ee035936b99 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 5 Sep 2020 12:22:04 -0700 Subject: [PATCH 024/139] v1.5.0 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 35cab12c..a60d2eb7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.4.30 +version = 1.5.0 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 61deb722cf4dd6a82d57f2e242b208da2fb84bc2 Mon Sep 17 00:00:00 2001 From: Christian Knittl-Frank Date: Sat, 12 Sep 2020 17:40:52 +0200 Subject: [PATCH 025/139] Add 'txsprofile'->'ini' to 'extensions.py' `txsprofile` is an `ini` file used by TeXstudio --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index fd218013..69c5fcc8 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -173,6 +173,7 @@ 'ts': {'text', 'ts'}, 'tsx': {'text', 'tsx'}, 'ttf': {'binary', 'ttf'}, + 'txsprofile': {'text', 'ini', 'txsprofile'}, 'txt': {'text', 'plain-text'}, 'v': {'text', 'verilog'}, 'vdx': {'text', 'vdx'}, From dfd92d166ad4eb40693dcd0d0cbf7ab86cec0f27 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 12 Sep 2020 10:24:38 -0700 Subject: [PATCH 026/139] v1.5.1 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index a60d2eb7..46f2acb0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.0 +version = 1.5.1 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 242762dad72b95259dfcf0c78dc8f279bd53d8de Mon Sep 17 00:00:00 2001 From: Christian Knittl-Frank Date: Sun, 13 Sep 2020 02:22:39 +0200 Subject: [PATCH 027/139] Add '.rstcheck.cfg'->'ini' '.rstcheck.cfg' is used by 'rstcheck' and is like 'setup.cfg' an 'ini' like file --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 69c5fcc8..e370ddb3 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -230,6 +230,7 @@ '.npmignore': {'text', 'npmignore'}, '.pdbrc': EXTENSIONS['py'] | {'pdbrc'}, '.pypirc': EXTENSIONS['ini'] | {'pypirc'}, + '.rstcheck.cfg': EXTENSIONS['ini'], '.yamllint': EXTENSIONS['yaml'] | {'yamllint'}, '.zshrc': EXTENSIONS['zsh'], 'AUTHORS': EXTENSIONS['txt'], From 00c3f8fc62d2f5f5e4d488fc4197af323a319b2b Mon Sep 17 00:00:00 2001 From: Christian Knittl-Frank Date: Sun, 13 Sep 2020 02:31:06 +0200 Subject: [PATCH 028/139] Add '.flake8'->'ini' '.flake8' is an 'ini' file used to configure 'Flake8' --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index e370ddb3..5c6f27eb 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -219,6 +219,7 @@ '.cshrc': EXTENSIONS['csh'], '.dockerignore': {'text', 'dockerignore'}, '.editorconfig': {'text', 'editorconfig'}, + '.flake8': EXTENSIONS['ini'] | {'flake8'}, '.gitconfig': EXTENSIONS['ini'] | {'gitconfig'}, '.hgrc': EXTENSIONS['ini'] | {'hgrc'}, '.gitattributes': {'text', 'gitattributes'}, From fb70f9adbf6780bfa628a8f3684255567c4765ac Mon Sep 17 00:00:00 2001 From: Christian Knittl-Frank Date: Sun, 13 Sep 2020 02:41:55 +0200 Subject: [PATCH 029/139] Add 'xhtml'->'xml' --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 5c6f27eb..cc4f0cee 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -188,6 +188,7 @@ 'woff': {'binary', 'woff'}, 'woff2': {'binary', 'woff2'}, 'wsgi': {'text', 'wsgi', 'python'}, + 'xhtml': {'text', 'xml', 'xhtml'}, 'xml': {'text', 'xml'}, 'xq': {'text', 'xquery'}, 'xql': {'text', 'xquery'}, From 1cf9486e7c302f13a5ca9be291c71ba9d6aa8452 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 12 Sep 2020 20:14:32 -0700 Subject: [PATCH 030/139] v1.5.2 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 46f2acb0..a54ed4af 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.1 +version = 1.5.2 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 4f6f70b625b2c3605de2b1686fb9189db6140e2e Mon Sep 17 00:00:00 2001 From: Christian Knittl-Frank Date: Sun, 13 Sep 2020 02:33:29 +0200 Subject: [PATCH 031/139] Sort entries Sorted alphabetically while leaving entries with same values next to each other --- identify/extensions.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/identify/extensions.py b/identify/extensions.py index cc4f0cee..ba41e9cc 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -14,7 +14,6 @@ 'bz2': {'binary', 'bzip2'}, 'c': {'text', 'c'}, 'cc': {'text', 'c++'}, - 'cu': {'text', 'cuda'}, 'cfg': {'text'}, 'chs': {'text', 'c2hs'}, 'clj': {'text', 'clojure'}, @@ -31,6 +30,7 @@ 'cson': {'text', 'cson'}, 'css': {'text', 'css'}, 'csv': {'text', 'csv'}, + 'cu': {'text', 'cuda'}, 'cxx': {'text', 'c++'}, 'dart': {'text', 'dart'}, 'def': {'text', 'def'}, @@ -130,13 +130,13 @@ 'proto': {'text', 'proto'}, 'puml': {'text', 'plantuml'}, 'purs': {'text', 'purescript'}, + 'pxd': {'text', 'cython'}, + 'pxi': {'text', 'cython'}, 'py': {'text', 'python'}, 'pyi': {'text', 'pyi'}, 'pyx': {'text', 'cython'}, 'pyz': {'binary', 'pyz'}, 'pyzw': {'binary', 'pyz'}, - 'pxd': {'text', 'cython'}, - 'pxi': {'text', 'cython'}, 'r': {'text', 'r'}, 'rb': {'text', 'ruby'}, 'rs': {'text', 'rust'}, @@ -146,16 +146,16 @@ 'sbt': {'text', 'sbt', 'scala'}, 'sc': {'text', 'scala'}, 'scala': {'text', 'scala'}, - 'scss': {'text', 'scss'}, 'scm': {'text', 'scheme'}, + 'scss': {'text', 'scss'}, 'sh': {'text', 'shell'}, 'sls': {'text', 'salt'}, 'so': {'binary'}, 'sol': {'text', 'solidity'}, 'spec': {'text', 'spec'}, + 'sql': {'text', 'sql'}, 'ss': {'text', 'scheme'}, 'styl': {'text', 'stylus'}, - 'sql': {'text', 'sql'}, 'sv': {'text', 'system-verilog'}, 'svg': {'text', 'image', 'svg', 'xml'}, 'svh': {'text', 'system-verilog'}, @@ -164,12 +164,12 @@ 'swiftdeps': {'text', 'swiftdeps'}, 'tac': {'text', 'twisted', 'python'}, 'tar': {'binary', 'tar'}, + 'tf': {'text', 'terraform'}, + 'tfvars': {'text', 'terraform'}, 'tgz': {'binary', 'gzip'}, 'thrift': {'text', 'thrift'}, 'tiff': {'binary', 'image', 'tiff'}, 'toml': {'text', 'toml'}, - 'tf': {'text', 'terraform'}, - 'tfvars': {'text', 'terraform'}, 'ts': {'text', 'ts'}, 'tsx': {'text', 'tsx'}, 'ttf': {'binary', 'ttf'}, @@ -183,8 +183,8 @@ 'vue': {'text', 'vue'}, 'war': {'binary', 'zip', 'jar'}, 'wav': {'binary', 'audio', 'wav'}, - 'wkt': {'text', 'wkt'}, 'whl': {'binary', 'wheel', 'zip'}, + 'wkt': {'text', 'wkt'}, 'woff': {'binary', 'woff'}, 'woff2': {'binary', 'woff2'}, 'wsgi': {'text', 'wsgi', 'python'}, @@ -212,20 +212,20 @@ NAMES = { '.babelrc': EXTENSIONS['json'] | {'babelrc'}, - '.bashrc': EXTENSIONS['bash'], '.bash_aliases': EXTENSIONS['bash'], '.bash_profile': EXTENSIONS['bash'], + '.bashrc': EXTENSIONS['bash'], '.bowerrc': EXTENSIONS['json'] | {'bowerrc'}, '.coveragerc': EXTENSIONS['ini'] | {'coveragerc'}, '.cshrc': EXTENSIONS['csh'], '.dockerignore': {'text', 'dockerignore'}, '.editorconfig': {'text', 'editorconfig'}, '.flake8': EXTENSIONS['ini'] | {'flake8'}, - '.gitconfig': EXTENSIONS['ini'] | {'gitconfig'}, - '.hgrc': EXTENSIONS['ini'] | {'hgrc'}, '.gitattributes': {'text', 'gitattributes'}, + '.gitconfig': EXTENSIONS['ini'] | {'gitconfig'}, '.gitignore': {'text', 'gitignore'}, '.gitmodules': {'text', 'gitmodules'}, + '.hgrc': EXTENSIONS['ini'] | {'hgrc'}, '.jshintrc': EXTENSIONS['json'] | {'jshintrc'}, '.mailmap': {'text', 'mailmap'}, '.mention-bot': EXTENSIONS['json'] | {'mention-bot'}, @@ -236,8 +236,8 @@ '.yamllint': EXTENSIONS['yaml'] | {'yamllint'}, '.zshrc': EXTENSIONS['zsh'], 'AUTHORS': EXTENSIONS['txt'], - 'BUILD.bazel': {'text', 'bazel'}, 'BUILD': {'text', 'bazel'}, + 'BUILD.bazel': {'text', 'bazel'}, 'CMakeLists.txt': EXTENSIONS['cmake'], 'COPYING': EXTENSIONS['txt'], 'Dockerfile': {'text', 'dockerfile'}, From 9e39167df7c31854e8d032003c20ce1dd137757b Mon Sep 17 00:00:00 2001 From: Christian Knittl-Frank Date: Tue, 15 Sep 2020 10:06:59 +0200 Subject: [PATCH 032/139] Add 'html' to 'xhtml' --- identify/extensions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/identify/extensions.py b/identify/extensions.py index ba41e9cc..43466f1a 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -188,7 +188,7 @@ 'woff': {'binary', 'woff'}, 'woff2': {'binary', 'woff2'}, 'wsgi': {'text', 'wsgi', 'python'}, - 'xhtml': {'text', 'xml', 'xhtml'}, + 'xhtml': {'text', 'xml', 'html', 'xhtml'}, 'xml': {'text', 'xml'}, 'xq': {'text', 'xquery'}, 'xql': {'text', 'xquery'}, From c7dfc6e0f9f2e6d8c36094305352e9f551774d4d Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 17 Sep 2020 12:09:32 -0700 Subject: [PATCH 033/139] v1.5.3 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index a54ed4af..f14b23a5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.2 +version = 1.5.3 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 60a1fc1f2a78bf8d56d88cef43dec4eb9874508c Mon Sep 17 00:00:00 2001 From: Jens Heinrich <59469646+JensHeinrich@users.noreply.github.com> Date: Mon, 21 Sep 2020 21:25:00 +0200 Subject: [PATCH 034/139] Add support for tex and bib extensions To enable easier handling of LaTeX projects --- identify/extensions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 43466f1a..4afe47b9 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -10,6 +10,7 @@ 'asar': {'binary', 'asar'}, 'bash': {'text', 'shell', 'bash'}, 'bat': {'text', 'batch'}, + 'bib': {'text', 'bib'}, 'bmp': {'binary', 'image', 'bitmap'}, 'bz2': {'binary', 'bzip2'}, 'c': {'text', 'c'}, @@ -164,6 +165,7 @@ 'swiftdeps': {'text', 'swiftdeps'}, 'tac': {'text', 'twisted', 'python'}, 'tar': {'binary', 'tar'}, + 'tex': {'text', 'tex'}, 'tf': {'text', 'terraform'}, 'tfvars': {'text', 'terraform'}, 'tgz': {'binary', 'gzip'}, From 2afe765db560a49bdcdfeb0ae7bb0b437dd36685 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 21 Sep 2020 13:31:40 -0700 Subject: [PATCH 035/139] v1.5.4 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index f14b23a5..fa124b8e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.3 +version = 1.5.4 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 70f096acfc6187fb718ffce39c96360f671b177e Mon Sep 17 00:00:00 2001 From: Julian Berman Date: Wed, 23 Sep 2020 20:43:50 -0400 Subject: [PATCH 036/139] Make the first README example slightly more self-contained Saves new users like me from having to hunt down that there's a module within the package that needed importing. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7a58a1fc..17aabd24 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ If you have an actual file on disk, you can get the most information possible (a superset of all other methods): ```python +>>> from identify import identify >>> identify.tags_from_path('/path/to/file.py') {'file', 'text', 'python', 'non-executable'} >>> identify.tags_from_path('/path/to/file-with-shebang') From bdbf4f760e3bc7f4fdc6376f401aca1e9a677d74 Mon Sep 17 00:00:00 2001 From: Julian Berman Date: Wed, 23 Sep 2020 20:50:40 -0400 Subject: [PATCH 037/139] Add support for Lean. Lean is https://leanprover-community.github.io/, an interactive theorem prover / programming language. --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 4afe47b9..1110ccbd 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -90,6 +90,7 @@ 'key': {'text', 'pem'}, 'kml': {'text', 'kml', 'xml'}, 'kt': {'text', 'kotlin'}, + 'lean': {'text', 'lean'}, 'less': {'text', 'less'}, 'lhs': {'text', 'literate-haskell'}, 'libsonnet': {'text', 'jsonnet'}, From f9019ed99ddce06808ed34bc95a023e4a8c7cbf6 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Wed, 23 Sep 2020 17:54:57 -0700 Subject: [PATCH 038/139] v1.5.5 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index fa124b8e..c742f5a5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.4 +version = 1.5.5 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 9d9166d50f1cb318d9dd3e9083e1de5f7cf92e3f Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Wed, 23 Sep 2020 19:22:35 -0700 Subject: [PATCH 039/139] travis-ci -> azure pipelines --- .travis.yml | 19 ------------------- README.md | 4 ++-- azure-pipelines.yml | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 21 deletions(-) delete mode 100644 .travis.yml create mode 100644 azure-pipelines.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 2f270681..00000000 --- a/.travis.yml +++ /dev/null @@ -1,19 +0,0 @@ -language: python -matrix: - include: - - env: TOXENV=py27 - - env: TOXENV=py35 - python: 3.5 - - env: TOXENV=py36 - python: 3.6 - - env: TOXENV=pypy - python: pypy - - env: TOXENV=pre-commit - python: 3.6 -install: pip install coveralls tox -script: tox -after_success: coveralls -cache: - directories: - - $HOME/.cache/pip - - $HOME/.cache/pre-commit diff --git a/README.md b/README.md index 17aabd24..87b4b731 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ identify ======== -[![Build Status](https://travis-ci.org/chriskuehl/identify.svg?branch=master)](https://travis-ci.org/chriskuehl/identify) -[![Coverage Status](https://coveralls.io/repos/github/chriskuehl/identify/badge.svg?branch=master)](https://coveralls.io/github/chriskuehl/identify?branch=master) +[![Build Status](https://dev.azure.com/asottile/asottile/_apis/build/status/pre-commit.identify?branchName=master)](https://dev.azure.com/asottile/asottile/_build/latest?definitionId=67&branchName=master) +[![Azure DevOps coverage](https://img.shields.io/azure-devops/coverage/asottile/asottile/67/master.svg)](https://dev.azure.com/asottile/asottile/_build/latest?definitionId=67&branchName=master) [![PyPI version](https://badge.fury.io/py/identify.svg)](https://pypi.python.org/pypi/identify) File identification library for Python. diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 00000000..bf08d5e4 --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,20 @@ +trigger: + branches: + include: [master, test-me-*] + tags: + include: ['*'] + +resources: + repositories: + - repository: asottile + type: github + endpoint: github + name: asottile/azure-pipeline-templates + ref: refs/tags/v2.0.0 + +jobs: +- template: job--pre-commit.yml@asottile +- template: job--python-tox.yml@asottile + parameters: + toxenvs: [pypy3, py36, py37, py38] + os: linux From 827d9911e3b1fc9fd9125bdf42ee20d5c7ddb96e Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Wed, 23 Sep 2020 19:27:33 -0700 Subject: [PATCH 040/139] adjust setuptools url --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index c742f5a5..11ce6c2d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ version = 1.5.5 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown -url = https://github.com/chriskuehl/identify +url = https://github.com/pre-commit/identify author = Chris Kuehl author_email = ckuehl@ocf.berkeley.edu license = MIT From 6c06709237a4c9197659c913bb20939e7ca69462 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Wed, 23 Sep 2020 19:30:27 -0700 Subject: [PATCH 041/139] use covdefaults --- .coveragerc | 29 ----------------------------- Makefile | 16 ---------------- requirements-dev.txt | 1 + setup.cfg | 5 ++++- tox.ini | 1 - 5 files changed, 5 insertions(+), 47 deletions(-) delete mode 100644 .coveragerc delete mode 100644 Makefile diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index f995964c..00000000 --- a/.coveragerc +++ /dev/null @@ -1,29 +0,0 @@ -[run] -branch = True -source = - . -omit = - .tox/* - /usr/* - setup.py - -[report] -show_missing = True - -exclude_lines = - # Have to re-enable the standard pragma - \#\s*pragma: no cover - - # Don't complain if tests don't hit defensive assertion code: - ^\s*raise AssertionError\b - ^\s*raise NotImplementedError\b - ^\s*return NotImplemented\b - ^\s*raise$ - - # Don't complain if non-runnable code isn't run: - ^if __name__ == ['"]__main__['"]:$ - -[html] -directory = coverage-html - -# vim:ft=dosini diff --git a/Makefile b/Makefile deleted file mode 100644 index 0ce294d6..00000000 --- a/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -.PHONY: minimal -minimal: venv - -venv: setup.py requirements-dev.txt tox.ini - tox -e venv - -.PHONY: test -test: - tox - -.PHONY: clean -clean: - find -name '*.pyc' -delete - find -name '__pycache__' -delete - rm -rf .tox - rm -rf venv diff --git a/requirements-dev.txt b/requirements-dev.txt index 6732f9ce..e2e6f92c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,4 @@ +covdefaults coverage pre-commit>=0.12.0 pytest diff --git a/setup.cfg b/setup.cfg index c742f5a5..81eec28a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,5 +37,8 @@ exclude = tests* testing* -[wheel] +[bdist_wheel] universal = True + +[coverage:run] +plugins = covdefaults diff --git a/tox.ini b/tox.ini index 33cae3c9..39ab8235 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,5 @@ [tox] envlist = py27,py35,py36,pypy,pre-commit -tox_pip_extensions_ext_venv_update = true [testenv] deps = -rrequirements-dev.txt From 85365e7f5cafa13b1f843921c1a4111d6fcb1fd5 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Wed, 23 Sep 2020 19:35:02 -0700 Subject: [PATCH 042/139] use the default flake8 setup --- tests/identify_test.py | 10 ++++++---- tox.ini | 3 --- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/identify_test.py b/tests/identify_test.py index a80c4015..8e00f606 100644 --- a/tests/identify_test.py +++ b/tests/identify_test.py @@ -105,7 +105,7 @@ def test_tags_from_path_plist_text(tmpdir): x = tmpdir.join('t.plist') x.write( '\n' - '\n' + '\n' # noqa: E501 '\n' '\n' '\tLast Login Name\n' @@ -179,12 +179,14 @@ def test_tags_from_interpreter(interpreter, expected): (b'', True), ('éóñəå ⊂(◉‿◉)つ(ノ≥∇≤)ノ'.encode('utf8'), True), (r'¯\_(ツ)_/¯'.encode('utf8'), True), - ('♪┏(・o・)┛♪┗ ( ・o・) ┓♪┏ ( ) ┛♪┗ (・o・ ) ┓♪┏(・o・)┛♪'.encode('utf8'), True), + ('♪┏(・o・)┛♪┗ ( ・o・) ┓♪┏ ( ) ┛♪┗ (・o・ ) ┓♪'.encode('utf8'), True), ('éóñå'.encode('latin1'), True), (b'hello world\x00', False), - (b'\x7f\x45\x4c\x46\x02\x01\x01', False), # first few bytes of /bin/bash - (b'\x43\x92\xd9\x0f\xaf\x32\x2c', False), # some /dev/urandom output + # first few bytes of /bin/bash + (b'\x7f\x45\x4c\x46\x02\x01\x01', False), + # some /dev/urandom output + (b'\x43\x92\xd9\x0f\xaf\x32\x2c', False), ), ) def test_is_text(data, expected): diff --git a/tox.ini b/tox.ini index 39ab8235..2876390d 100644 --- a/tox.ini +++ b/tox.ini @@ -14,8 +14,5 @@ skip_install = true deps = pre-commit commands = pre-commit run --all-files --show-diff-on-failure -[flake8] -max-line-length = 119 - [pep8] ignore = E265,E501,W504 From dad9e9a67c41e0c2bd0f83d675b12de3b3b697c0 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 25 Sep 2020 12:07:27 -0700 Subject: [PATCH 043/139] remove unencessary install (not needed at test time) --- requirements-dev.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index e2e6f92c..0c5a37eb 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,3 @@ covdefaults coverage -pre-commit>=0.12.0 pytest From bff7b18cc1862c0c3ae29cb840d22cd40d4af437 Mon Sep 17 00:00:00 2001 From: Raphael Boidol Date: Sat, 10 Oct 2020 13:46:55 +0200 Subject: [PATCH 044/139] Add 'webp' file extension --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 1110ccbd..21cd5c80 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -186,6 +186,7 @@ 'vue': {'text', 'vue'}, 'war': {'binary', 'zip', 'jar'}, 'wav': {'binary', 'audio', 'wav'}, + 'webp': {'binary', 'image', 'webp'}, 'whl': {'binary', 'wheel', 'zip'}, 'wkt': {'text', 'wkt'}, 'woff': {'binary', 'woff'}, From faa211d9331057a6d8e30cc73324e5aa343a5dc6 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 10 Oct 2020 09:12:59 -0700 Subject: [PATCH 045/139] v1.5.6 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 1eed32f2..887d1912 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.5 +version = 1.5.6 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From d1a7500b3003d4f96a4a5d7c8c6718af4a0e8a34 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 29 Oct 2020 11:37:04 -0700 Subject: [PATCH 046/139] use pre-commit.ci --- README.md | 1 + azure-pipelines.yml | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 87b4b731..accf29ba 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ identify [![Build Status](https://dev.azure.com/asottile/asottile/_apis/build/status/pre-commit.identify?branchName=master)](https://dev.azure.com/asottile/asottile/_build/latest?definitionId=67&branchName=master) [![Azure DevOps coverage](https://img.shields.io/azure-devops/coverage/asottile/asottile/67/master.svg)](https://dev.azure.com/asottile/asottile/_build/latest?definitionId=67&branchName=master) +[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/pre-commit/identify/master.svg)](https://results.pre-commit.ci/latest/github/pre-commit/identify/master) [![PyPI version](https://badge.fury.io/py/identify.svg)](https://pypi.python.org/pypi/identify) File identification library for Python. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index bf08d5e4..7bb7600a 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -13,7 +13,6 @@ resources: ref: refs/tags/v2.0.0 jobs: -- template: job--pre-commit.yml@asottile - template: job--python-tox.yml@asottile parameters: toxenvs: [pypy3, py36, py37, py38] From 6ddb7f729513a2684b87af012e06e551d30efbf1 Mon Sep 17 00:00:00 2001 From: Ruairidh MacLeod Date: Mon, 2 Nov 2020 13:46:53 +0000 Subject: [PATCH 047/139] add c# file types --- identify/extensions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 21cd5c80..5d5a6854 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -27,6 +27,7 @@ 'cpp': {'text', 'c++'}, 'crt': {'text', 'pem'}, 'cs': {'text', 'c#'}, + 'csproj': {'text', 'xml', 'csproj'}, 'csh': {'text', 'shell', 'csh'}, 'cson': {'text', 'cson'}, 'css': {'text', 'css'}, @@ -151,6 +152,7 @@ 'scm': {'text', 'scheme'}, 'scss': {'text', 'scss'}, 'sh': {'text', 'shell'}, + 'sln': {'text', 'sln'}, 'sls': {'text', 'salt'}, 'so': {'binary'}, 'sol': {'text', 'solidity'}, From 08875d3edc90affa2061c8307f1b0eed2ae7dbc1 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 2 Nov 2020 09:20:59 -0800 Subject: [PATCH 048/139] v1.5.7 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 887d1912..2b90b9fd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.6 +version = 1.5.7 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 4553f2bc128086c295f1073085fa7edd521d149f Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Tue, 3 Nov 2020 12:21:58 +0200 Subject: [PATCH 049/139] Add .twig extension --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 5d5a6854..d72a5898 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -178,6 +178,7 @@ 'ts': {'text', 'ts'}, 'tsx': {'text', 'tsx'}, 'ttf': {'binary', 'ttf'}, + 'twig': {'text', 'twig'}, 'txsprofile': {'text', 'ini', 'txsprofile'}, 'txt': {'text', 'plain-text'}, 'v': {'text', 'verilog'}, From 7e8e5088ed4ad9b67930a845c3d0d3727672f2fc Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Tue, 3 Nov 2020 08:46:03 -0800 Subject: [PATCH 050/139] v1.5.8 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 2b90b9fd..99134dd4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.7 +version = 1.5.8 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 1f7a1e2312055a3ffb9a50ded533e2c27eab544f Mon Sep 17 00:00:00 2001 From: axk Date: Tue, 3 Nov 2020 16:05:11 +0100 Subject: [PATCH 051/139] Adds support for Arch Linux packaging descriptions. --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index d72a5898..22b85c50 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -257,6 +257,7 @@ 'PATENTS': EXTENSIONS['txt'], 'Pipfile': EXTENSIONS['toml'], 'Pipfile.lock': EXTENSIONS['json'], + 'PKGBUILD': {'text', 'bash', 'pkgbuild', 'alpm'}, 'README': EXTENSIONS['txt'], 'Rakefile': EXTENSIONS['rb'], 'setup.cfg': EXTENSIONS['ini'], From 17e9b5359ae5cd4e9fce8c89a9ff3826fd63b74e Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Tue, 3 Nov 2020 10:44:49 -0800 Subject: [PATCH 052/139] v1.5.9 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 99134dd4..722410c7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.8 +version = 1.5.9 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From c8d7fb5d595957486fa1ee271e37da02d38f7564 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Nov 2020 17:05:21 +0000 Subject: [PATCH 053/139] [pre-commit.ci] pre-commit autoupdate --- .pre-commit-config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 71873800..331f2c2c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.5.0 + rev: v3.3.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -14,16 +14,16 @@ repos: - id: check-byte-order-marker - id: fix-encoding-pragma - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.0 + rev: 3.8.4 hooks: - id: flake8 exclude: ^identify/vendor/licenses\.py$ - repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.5.2 + rev: v1.5.4 hooks: - id: autopep8 - repo: https://github.com/asottile/reorder_python_imports - rev: v2.3.0 + rev: v2.3.6 hooks: - id: reorder-python-imports args: [ From a146c1259c656299037d458a17172f60beb238d3 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 19 Nov 2020 17:11:35 -0800 Subject: [PATCH 054/139] Add link to GitHub Sponsors + Open Collective at the time of writing I am currently unemployed. I'd love to make open source a full time career. if you or your company is deriving value from this free software, please consider [sponsoring] or [supporting]. [sponsoring]: https://github.com/sponsors/asottile [supporting]: https://opencollective.com/pre-commit Committed via https://github.com/asottile/all-repos --- .github/FUNDING.yml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..9408e44d --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,2 @@ +github: asottile +open_collective: pre-commit From 0dc6b33ce24d626869207153a7975bb580373230 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sun, 22 Nov 2020 13:11:34 -0800 Subject: [PATCH 055/139] enforce all names have a type --- tests/extensions_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/extensions_test.py b/tests/extensions_test.py index 44f16a81..4527a58c 100644 --- a/tests/extensions_test.py +++ b/tests/extensions_test.py @@ -13,6 +13,12 @@ def test_extensions_have_binary_or_text(extension): assert len({'text', 'binary'} & tags) == 1, tags +@pytest.mark.parametrize('name', extensions.NAMES) +def test_names_have_binary_or_text(name): + tags = extensions.NAMES[name] + assert len({'text', 'binary'} & tags) == 1, tags + + @pytest.mark.parametrize('extension', extensions.EXTENSIONS_NEED_BINARY_CHECK) def test_need_binary_check_do_not_specify_text_binary(extension): tags = extensions.EXTENSIONS_NEED_BINARY_CHECK[extension] From 0aef7d9078972f9d3c8feb072a8cb4a4459a0bf4 Mon Sep 17 00:00:00 2001 From: "C.A.M. Gerlach" Date: Sun, 22 Nov 2020 13:38:41 -0600 Subject: [PATCH 056/139] Add support for lektor file extensions --- identify/extensions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 22b85c50..2a4040c3 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -92,10 +92,12 @@ 'kml': {'text', 'kml', 'xml'}, 'kt': {'text', 'kotlin'}, 'lean': {'text', 'lean'}, + 'lektorproject': {'text', 'ini', 'lektorproject'}, 'less': {'text', 'less'}, 'lhs': {'text', 'literate-haskell'}, 'libsonnet': {'text', 'jsonnet'}, 'lidr': {'text', 'idris'}, + 'lr': {'text', 'lektor'}, 'lua': {'text', 'lua'}, 'm': {'text', 'c', 'objective-c'}, 'manifest': {'text', 'manifest'}, From a3da59e3b01687c471849fe143c837d7d6d17d18 Mon Sep 17 00:00:00 2001 From: "C.A.M. Gerlach" Date: Sun, 22 Nov 2020 13:42:24 -0600 Subject: [PATCH 057/139] Add support for additional tool config files without extensions --- identify/extensions.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 2a4040c3..59169108 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -225,14 +225,18 @@ '.bash_profile': EXTENSIONS['bash'], '.bashrc': EXTENSIONS['bash'], '.bowerrc': EXTENSIONS['json'] | {'bowerrc'}, + '.browserslistrc': {'text', 'browserslistrc'}, + '.codespellrc': EXTENSIONS['ini'] | {'codespellrc'}, '.coveragerc': EXTENSIONS['ini'] | {'coveragerc'}, '.cshrc': EXTENSIONS['csh'], + '.csslintrc': EXTENSIONS['json'] | {'csslintrc'}, '.dockerignore': {'text', 'dockerignore'}, '.editorconfig': {'text', 'editorconfig'}, '.flake8': EXTENSIONS['ini'] | {'flake8'}, '.gitattributes': {'text', 'gitattributes'}, '.gitconfig': EXTENSIONS['ini'] | {'gitconfig'}, '.gitignore': {'text', 'gitignore'}, + '.gitlint': EXTENSIONS['ini'] | {'gitlint'}, '.gitmodules': {'text', 'gitmodules'}, '.hgrc': EXTENSIONS['ini'] | {'hgrc'}, '.jshintrc': EXTENSIONS['json'] | {'jshintrc'}, @@ -260,6 +264,7 @@ 'Pipfile': EXTENSIONS['toml'], 'Pipfile.lock': EXTENSIONS['json'], 'PKGBUILD': {'text', 'bash', 'pkgbuild', 'alpm'}, + 'pylintrc': EXTENSIONS['ini'] | {'pylintrc'}, 'README': EXTENSIONS['txt'], 'Rakefile': EXTENSIONS['rb'], 'setup.cfg': EXTENSIONS['ini'], From 2ac0455f6a4acf39e0aa2b12e5caaa1c31c1c7c5 Mon Sep 17 00:00:00 2001 From: "C.A.M. Gerlach" Date: Sun, 22 Nov 2020 13:43:53 -0600 Subject: [PATCH 058/139] Add support for additional common text metafiles lacking extensions --- identify/extensions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 59169108..0ec03c0c 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -252,6 +252,8 @@ 'BUILD': {'text', 'bazel'}, 'BUILD.bazel': {'text', 'bazel'}, 'CMakeLists.txt': EXTENSIONS['cmake'], + 'CHANGELOG': EXTENSIONS['txt'], + 'CONTRIBUTING': EXTENSIONS['txt'], 'COPYING': EXTENSIONS['txt'], 'Dockerfile': {'text', 'dockerfile'}, 'Gemfile': EXTENSIONS['rb'], @@ -259,6 +261,7 @@ 'LICENSE': EXTENSIONS['txt'], 'MAINTAINERS': EXTENSIONS['txt'], 'Makefile': EXTENSIONS['mk'], + 'NEWS': EXTENSIONS['txt'], 'NOTICE': EXTENSIONS['txt'], 'PATENTS': EXTENSIONS['txt'], 'Pipfile': EXTENSIONS['toml'], From ede19ff562dddfbd0b9580ee4cf1bfd520f250e1 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sun, 22 Nov 2020 17:19:04 -0800 Subject: [PATCH 059/139] v1.5.10 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 722410c7..51540dd7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.9 +version = 1.5.10 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 1f6098366456a539a3539a35ef99ff8f3dcac7e8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Dec 2020 16:43:40 +0000 Subject: [PATCH 060/139] [pre-commit.ci] pre-commit autoupdate --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 331f2c2c..06d55efb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.3.0 + rev: v3.4.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer From c6af994bcdb3dc9922f91d20ccb887d5a2dac97a Mon Sep 17 00:00:00 2001 From: John de Rooij Date: Thu, 31 Dec 2020 10:51:32 +0100 Subject: [PATCH 061/139] Also identify other MS Visualstudio project files as XML The .csproj was already present in the list. Now also the .vbproj, .pyproj and .vcxproj are added and identified as XML files. --- identify/extensions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 0ec03c0c..e4941527 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -139,6 +139,7 @@ 'pxi': {'text', 'cython'}, 'py': {'text', 'python'}, 'pyi': {'text', 'pyi'}, + 'pyproj': {'text', 'xml', 'pyproj'}, 'pyx': {'text', 'cython'}, 'pyz': {'binary', 'pyz'}, 'pyzw': {'binary', 'pyz'}, @@ -184,6 +185,8 @@ 'txsprofile': {'text', 'ini', 'txsprofile'}, 'txt': {'text', 'plain-text'}, 'v': {'text', 'verilog'}, + 'vbproj': {'text', 'xml', 'vbproj'}, + 'vcxproj': {'text', 'xml', 'vcxproj'}, 'vdx': {'text', 'vdx'}, 'vh': {'text', 'verilog'}, 'vhd': {'text', 'vhdl'}, From c67d7b4c7b0ea97d5a4109297ff9334f0a871e9c Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 31 Dec 2020 11:03:35 -0800 Subject: [PATCH 062/139] v1.5.11 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 51540dd7..c3f27ee2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.10 +version = 1.5.11 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 1a4c9efe1e02c41bc90b697f80c547192af80169 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 4 Jan 2021 16:45:08 +0000 Subject: [PATCH 063/139] [pre-commit.ci] pre-commit autoupdate --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 06d55efb..59813df2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,6 +31,6 @@ repos: '--add-import', 'from __future__ import unicode_literals', ] - repo: https://github.com/asottile/add-trailing-comma - rev: v2.0.1 + rev: v2.0.2 hooks: - id: add-trailing-comma From fd6936fac91a118dac5b5f214f2ee0bbe7f63363 Mon Sep 17 00:00:00 2001 From: Aaron Gokaslan Date: Sat, 9 Jan 2021 11:51:38 -0500 Subject: [PATCH 064/139] Add .myst files to extensions.py --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index e4941527..9c418030 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -111,6 +111,7 @@ 'mli': {'text', 'ocaml'}, 'mm': {'text', 'c++', 'objective-c++'}, 'modulemap': {'text', 'modulemap'}, + 'myst': {'text', 'myst'}, 'ngdoc': {'text', 'ngdoc'}, 'nim': {'text', 'nim'}, 'nims': {'text', 'nim'}, From 6aa47acdd7c887a5ba6ee22d7122873777616d3f Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 9 Jan 2021 08:59:38 -0800 Subject: [PATCH 065/139] v1.5.12 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index c3f27ee2..ee799ab8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.11 +version = 1.5.12 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 13da8745cb38f2f727359cb447762d233d829ec3 Mon Sep 17 00:00:00 2001 From: Jon Dufresne Date: Sat, 16 Jan 2021 18:05:32 -0800 Subject: [PATCH 066/139] Add '.rake' extension as a Ruby file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The '.rake' extension is used to break large Rakefiles into smaller files. Rakefiles are Ruby files. https://ruby.github.io/rake/doc/rakefile_rdoc.html#label-Multiple+Rake+Files > Multiple Rake Files > > Not all tasks need to be included in a single Rakefile. Additional > rake files (with the file extension “.rake”) may be placed in rakelib > directory located at the top level of a project (i.e. the same > directory that contains the main Rakefile). > > Also, rails projects may include additional rake files in the > lib/tasks directory. --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 9c418030..0098ec68 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -145,6 +145,7 @@ 'pyz': {'binary', 'pyz'}, 'pyzw': {'binary', 'pyz'}, 'r': {'text', 'r'}, + 'rake': {'text', 'ruby'}, 'rb': {'text', 'ruby'}, 'rs': {'text', 'rust'}, 'rst': {'text', 'rst'}, From ec7d2c87f7a833f5dc014cf58316391b6d7f4712 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 16 Jan 2021 19:59:01 -0800 Subject: [PATCH 067/139] v1.5.13 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ee799ab8..9b919f1c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.12 +version = 1.5.13 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 800e17876a02ea9efe9f6eda5aca5a99816f1fcc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 Jan 2021 16:40:19 +0000 Subject: [PATCH 068/139] [pre-commit.ci] pre-commit autoupdate --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 59813df2..01c9da5a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,6 +31,6 @@ repos: '--add-import', 'from __future__ import unicode_literals', ] - repo: https://github.com/asottile/add-trailing-comma - rev: v2.0.2 + rev: v2.1.0 hooks: - id: add-trailing-comma From 17a62402da37910b3fd188f94b9936a9f810656f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Feb 2021 16:49:17 +0000 Subject: [PATCH 069/139] [pre-commit.ci] pre-commit autoupdate --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 01c9da5a..c3381ffa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,7 @@ repos: hooks: - id: autopep8 - repo: https://github.com/asottile/reorder_python_imports - rev: v2.3.6 + rev: v2.4.0 hooks: - id: reorder-python-imports args: [ From 9bb588c195843a3256a26a3503d7eebf15f6456d Mon Sep 17 00:00:00 2001 From: Aaron Gokaslan Date: Sat, 20 Feb 2021 13:33:03 -0500 Subject: [PATCH 070/139] Add support for .clang-tidy and .clang-format These two config files are very common for C++ commit hooks and are yaml formats. --- identify/extensions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 0098ec68..62a7f5b6 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -231,6 +231,8 @@ '.bashrc': EXTENSIONS['bash'], '.bowerrc': EXTENSIONS['json'] | {'bowerrc'}, '.browserslistrc': {'text', 'browserslistrc'}, + '.clang-format': EXTENSIONS['yaml'], + '.clang-tidy': EXTENSIONS['yaml'], '.codespellrc': EXTENSIONS['ini'] | {'codespellrc'}, '.coveragerc': EXTENSIONS['ini'] | {'coveragerc'}, '.cshrc': EXTENSIONS['csh'], From 32cd354f5fc1a19457eea3aa29cbfb1ea3d9ee63 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 20 Feb 2021 12:04:30 -0800 Subject: [PATCH 071/139] v1.5.14 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 9b919f1c..c5dce83c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.13 +version = 1.5.14 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From bf64fcdbe5f5a6f818fe087fb26886296cb2f35a Mon Sep 17 00:00:00 2001 From: "C.A.M. Gerlach" Date: Thu, 25 Feb 2021 13:39:22 -0600 Subject: [PATCH 072/139] Add new global constants for different tag types --- identify/identify.py | 11 +++++++---- tests/identify_test.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/identify/identify.py b/identify/identify.py index 1c0e6777..45ad3beb 100644 --- a/identify/identify.py +++ b/identify/identify.py @@ -25,7 +25,10 @@ TEXT = 'text' BINARY = 'binary' -ALL_TAGS = {DIRECTORY, SYMLINK, FILE, EXECUTABLE, NON_EXECUTABLE, TEXT, BINARY} +TYPE_TAGS = frozenset((DIRECTORY, FILE, SYMLINK)) +MODE_TAGS = frozenset((EXECUTABLE, NON_EXECUTABLE)) +ENCODING_TAGS = frozenset((BINARY, TEXT)) +ALL_TAGS = {*TYPE_TAGS, *MODE_TAGS, *ENCODING_TAGS} ALL_TAGS.update(*extensions.EXTENSIONS.values()) ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values()) ALL_TAGS.update(*extensions.NAMES.values()) @@ -62,14 +65,14 @@ def tags_from_path(path): # some extensions can be both binary and text # see EXTENSIONS_NEED_BINARY_CHECK - if not {TEXT, BINARY} & tags: + if not ENCODING_TAGS & tags: if file_is_text(path): tags.add(TEXT) else: tags.add(BINARY) - assert {TEXT, BINARY} & tags, tags - assert {EXECUTABLE, NON_EXECUTABLE} & tags, tags + assert ENCODING_TAGS & tags, tags + assert MODE_TAGS & tags, tags return tags diff --git a/tests/identify_test.py b/tests/identify_test.py index 8e00f606..6f9b9966 100644 --- a/tests/identify_test.py +++ b/tests/identify_test.py @@ -14,6 +14,20 @@ def test_all_tags_includes_basic_ones(): assert 'file' in identify.ALL_TAGS assert 'directory' in identify.ALL_TAGS + assert 'executable' in identify.ALL_TAGS + assert 'text' in identify.ALL_TAGS + + +@pytest.mark.parametrize( + 'tag_group', + ( + identify.TYPE_TAGS, + identify.MODE_TAGS, + identify.ENCODING_TAGS, + ), +) +def test_all_tags_contains_all_groups(tag_group): + assert tag_group < identify.ALL_TAGS def test_all_tags_contains_each_type(): From 2daf98bc5500ce738e266f3013e7515a34062691 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 26 Feb 2021 18:01:25 -0800 Subject: [PATCH 073/139] v1.6.0 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index c5dce83c..e4b96ef2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.5.14 +version = 1.6.0 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 4180e5b7ed858002c7cad851239e63d5a4575625 Mon Sep 17 00:00:00 2001 From: Raphael Boidol Date: Sat, 27 Feb 2021 17:56:37 +0100 Subject: [PATCH 074/139] Add `avif` file extension --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 62a7f5b6..523f3f14 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -8,6 +8,7 @@ 'asciidoc': {'text', 'asciidoc'}, 'apinotes': {'text', 'apinotes'}, 'asar': {'binary', 'asar'}, + 'avif': {'binary', 'image', 'avif'}, 'bash': {'text', 'shell', 'bash'}, 'bat': {'text', 'batch'}, 'bib': {'text', 'bib'}, From f27d6ebd9b73aa3724f6469aef2fd982d9948d44 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 27 Feb 2021 09:10:32 -0800 Subject: [PATCH 075/139] v1.6.1 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index e4b96ef2..2e2bef35 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.6.0 +version = 1.6.1 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 31be9199c2a4d807c16241fafad59a2b58e34c75 Mon Sep 17 00:00:00 2001 From: gabrielf-eb <53191391+gabrielf-eb@users.noreply.github.com> Date: Mon, 1 Mar 2021 09:56:46 -0300 Subject: [PATCH 076/139] Fix frozenset usage, not compatible with Python2.7 --- identify/identify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/identify/identify.py b/identify/identify.py index 45ad3beb..f284542d 100644 --- a/identify/identify.py +++ b/identify/identify.py @@ -28,7 +28,7 @@ TYPE_TAGS = frozenset((DIRECTORY, FILE, SYMLINK)) MODE_TAGS = frozenset((EXECUTABLE, NON_EXECUTABLE)) ENCODING_TAGS = frozenset((BINARY, TEXT)) -ALL_TAGS = {*TYPE_TAGS, *MODE_TAGS, *ENCODING_TAGS} +ALL_TAGS = set(TYPE_TAGS).union(MODE_TAGS).union(ENCODING_TAGS) ALL_TAGS.update(*extensions.EXTENSIONS.values()) ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values()) ALL_TAGS.update(*extensions.NAMES.values()) From bc830b53670d15965709a4b2000ab7a9bf520975 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 1 Mar 2021 07:05:55 -0800 Subject: [PATCH 077/139] v1.6.2 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 2e2bef35..4f7058db 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.6.1 +version = 1.6.2 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 4fa0eadf2438a8e1b20e2aa5bb3d7231f968ab25 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 1 Mar 2021 07:06:48 -0800 Subject: [PATCH 078/139] Revert "Merge pull request #171 from gabrielf-eb/patch-1" This reverts commit 0dd163a3e6013e2ea7258166ee98952eccd9c9ce, reversing changes made to f27d6ebd9b73aa3724f6469aef2fd982d9948d44. --- identify/identify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/identify/identify.py b/identify/identify.py index f284542d..45ad3beb 100644 --- a/identify/identify.py +++ b/identify/identify.py @@ -28,7 +28,7 @@ TYPE_TAGS = frozenset((DIRECTORY, FILE, SYMLINK)) MODE_TAGS = frozenset((EXECUTABLE, NON_EXECUTABLE)) ENCODING_TAGS = frozenset((BINARY, TEXT)) -ALL_TAGS = set(TYPE_TAGS).union(MODE_TAGS).union(ENCODING_TAGS) +ALL_TAGS = {*TYPE_TAGS, *MODE_TAGS, *ENCODING_TAGS} ALL_TAGS.update(*extensions.EXTENSIONS.values()) ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values()) ALL_TAGS.update(*extensions.NAMES.values()) From 589a7dd6937054a8f6e3e07c1db0bcb64dd8f92f Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 1 Mar 2021 07:18:49 -0800 Subject: [PATCH 079/139] drop python 2 --- .pre-commit-config.yaml | 29 ++++++++++------ bin/vendor-licenses | 9 ++--- identify/cli.py | 8 ++--- identify/extensions.py | 5 --- identify/identify.py | 66 +++++++++++++++++++------------------ identify/interpreters.py | 4 --- identify/vendor/licenses.py | 3 -- setup.cfg | 25 ++++++++++---- setup.py | 4 --- tests/cli_test.py | 4 --- tests/extensions_test.py | 4 --- tests/identify_test.py | 10 ++---- tox.ini | 2 +- 13 files changed, 82 insertions(+), 91 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c3381ffa..a1dc351b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,22 +2,24 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.4.0 hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - id: check-docstring-first - - id: check-merge-conflict - id: check-yaml - id: debug-statements - id: double-quote-string-fixer + - id: end-of-file-fixer - id: name-tests-test - - id: check-added-large-files - - id: check-byte-order-marker - - id: fix-encoding-pragma + - id: requirements-txt-fixer + - id: trailing-whitespace +- repo: https://github.com/asottile/setup-cfg-fmt + rev: v1.16.0 + hooks: + - id: setup-cfg-fmt - repo: https://gitlab.com/pycqa/flake8 rev: 3.8.4 hooks: - id: flake8 exclude: ^identify/vendor/licenses\.py$ + additional_dependencies: [flake8-typing-imports==1.10.1] - repo: https://github.com/pre-commit/mirrors-autopep8 rev: v1.5.4 hooks: @@ -26,11 +28,18 @@ repos: rev: v2.4.0 hooks: - id: reorder-python-imports - args: [ - '--add-import', 'from __future__ import absolute_import', - '--add-import', 'from __future__ import unicode_literals', - ] + args: [--py3-plus] - repo: https://github.com/asottile/add-trailing-comma rev: v2.1.0 hooks: - id: add-trailing-comma + args: [--py36-plus] +- repo: https://github.com/asottile/pyupgrade + rev: v2.10.0 + hooks: + - id: pyupgrade + args: [--py36-plus] +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.812 + hooks: + - id: mypy diff --git a/bin/vendor-licenses b/bin/vendor-licenses index 83c0c4e9..33ce89a1 100755 --- a/bin/vendor-licenses +++ b/bin/vendor-licenses @@ -1,19 +1,15 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- """Usage: ./bin/vendor-licenses > identify/vendor/licenses.py """ -from __future__ import absolute_import -from __future__ import unicode_literals - import argparse import os.path import subprocess import tempfile -def main(): +def main() -> int: parser = argparse.ArgumentParser() parser.add_argument('--revision', default='HEAD') args = parser.parse_args() @@ -51,12 +47,13 @@ def main(): print('LICENSES = (') for spdx, text in sorted(licenses): print(' (') - print(' {!r},'.format(spdx)) + print(f' {spdx!r},') print(" '''\\") print(text.replace('\t', ' ').replace(' \n', '').strip()) print("''',") print(' ),') print(')') + return 0 if __name__ == '__main__': diff --git a/identify/cli.py b/identify/cli.py index 511caf69..28e61550 100644 --- a/identify/cli.py +++ b/identify/cli.py @@ -1,14 +1,12 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import unicode_literals - import argparse import json +from typing import Optional +from typing import Sequence from identify import identify -def main(argv=None): +def main(argv: Optional[Sequence[str]] = None) -> int: parser = argparse.ArgumentParser() parser.add_argument('--filename-only', action='store_true') parser.add_argument('path') diff --git a/identify/extensions.py b/identify/extensions.py index 523f3f14..778b695c 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -1,8 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import unicode_literals - - EXTENSIONS = { 'adoc': {'text', 'asciidoc'}, 'asciidoc': {'text', 'asciidoc'}, diff --git a/identify/identify.py b/identify/identify.py index 45ad3beb..0f936a8f 100644 --- a/identify/identify.py +++ b/identify/identify.py @@ -1,14 +1,13 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -import io import os.path import re import shlex import string import sys +from typing import IO +from typing import List +from typing import Optional +from typing import Set +from typing import Tuple from identify import extensions from identify import interpreters @@ -28,17 +27,17 @@ TYPE_TAGS = frozenset((DIRECTORY, FILE, SYMLINK)) MODE_TAGS = frozenset((EXECUTABLE, NON_EXECUTABLE)) ENCODING_TAGS = frozenset((BINARY, TEXT)) -ALL_TAGS = {*TYPE_TAGS, *MODE_TAGS, *ENCODING_TAGS} -ALL_TAGS.update(*extensions.EXTENSIONS.values()) -ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values()) -ALL_TAGS.update(*extensions.NAMES.values()) -ALL_TAGS.update(*interpreters.INTERPRETERS.values()) -ALL_TAGS = frozenset(ALL_TAGS) +_ALL_TAGS = {*TYPE_TAGS, *MODE_TAGS, *ENCODING_TAGS} +_ALL_TAGS.update(*extensions.EXTENSIONS.values()) +_ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values()) +_ALL_TAGS.update(*extensions.NAMES.values()) +_ALL_TAGS.update(*interpreters.INTERPRETERS.values()) +ALL_TAGS = frozenset(_ALL_TAGS) -def tags_from_path(path): +def tags_from_path(path: str) -> Set[str]: if not os.path.lexists(path): - raise ValueError('{} does not exist.'.format(path)) + raise ValueError(f'{path} does not exist.') if os.path.isdir(path): return {DIRECTORY} if os.path.islink(path): @@ -76,8 +75,8 @@ def tags_from_path(path): return tags -def tags_from_filename(filename): - _, filename = os.path.split(filename) +def tags_from_filename(path: str) -> Set[str]: + _, filename = os.path.split(path) _, ext = os.path.splitext(filename) ret = set() @@ -98,7 +97,7 @@ def tags_from_filename(filename): return ret -def tags_from_interpreter(interpreter): +def tags_from_interpreter(interpreter: str) -> Set[str]: _, _, interpreter = interpreter.rpartition('/') # Try "python3.5.2" => "python3.5" => "python3" until one matches. @@ -111,7 +110,7 @@ def tags_from_interpreter(interpreter): return set() -def is_text(bytesio): +def is_text(bytesio: IO[bytes]) -> bool: """Return whether the first KB of contents seems to be binary. This is roughly based on libmagic's binary/text detection: @@ -125,14 +124,14 @@ def is_text(bytesio): return not bool(bytesio.read(1024).translate(None, text_chars)) -def file_is_text(path): +def file_is_text(path: str) -> bool: if not os.path.lexists(path): - raise ValueError('{} does not exist.'.format(path)) + raise ValueError(f'{path} does not exist.') with open(path, 'rb') as f: return is_text(f) -def _shebang_split(line): +def _shebang_split(line: str) -> List[str]: try: # shebangs aren't supposed to be quoted, though some tools such as # setuptools will write them with quotes so we'll best-guess parse @@ -144,11 +143,14 @@ def _shebang_split(line): return line.split() -def _parse_nix_shebang(bytesio, cmd): +def _parse_nix_shebang( + bytesio: IO[bytes], + cmd: Tuple[str, ...], +) -> Tuple[str, ...]: while bytesio.read(2) == b'#!': - next_line = bytesio.readline() + next_line_b = bytesio.readline() try: - next_line = next_line.decode('UTF-8') + next_line = next_line_b.decode('UTF-8') except UnicodeDecodeError: return cmd @@ -165,13 +167,13 @@ def _parse_nix_shebang(bytesio, cmd): return cmd -def parse_shebang(bytesio): +def parse_shebang(bytesio: IO[bytes]) -> Tuple[str, ...]: """Parse the shebang from a file opened for reading binary.""" if bytesio.read(2) != b'#!': return () - first_line = bytesio.readline() + first_line_b = bytesio.readline() try: - first_line = first_line.decode('UTF-8') + first_line = first_line_b.decode('UTF-8') except UnicodeDecodeError: return () @@ -188,10 +190,10 @@ def parse_shebang(bytesio): return cmd -def parse_shebang_from_file(path): +def parse_shebang_from_file(path: str) -> Tuple[str, ...]: """Parse the shebang given a file path.""" if not os.path.lexists(path): - raise ValueError('{} does not exist.'.format(path)) + raise ValueError(f'{path} does not exist.') if not os.access(path, os.X_OK): return () @@ -203,13 +205,13 @@ def parse_shebang_from_file(path): WS_RE = re.compile(r'\s+') -def _norm_license(s): +def _norm_license(s: str) -> str: s = COPYRIGHT_RE.sub('', s) s = WS_RE.sub(' ', s) return s.strip() -def license_id(filename): +def license_id(filename: str) -> Optional[str]: """Return the spdx id for the license contained in `filename`. If no license is detected, returns `None`. @@ -225,7 +227,7 @@ def license_id(filename): """ import editdistance # `pip install identify[license]` - with io.open(filename, encoding='UTF-8') as f: + with open(filename, encoding='UTF-8') as f: contents = f.read() norm = _norm_license(contents) diff --git a/identify/interpreters.py b/identify/interpreters.py index 7feb4b1b..dabf36cf 100644 --- a/identify/interpreters.py +++ b/identify/interpreters.py @@ -1,7 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import unicode_literals - INTERPRETERS = { 'ash': {'shell', 'ash'}, 'awk': {'awk'}, diff --git a/identify/vendor/licenses.py b/identify/vendor/licenses.py index 912b5c83..3478d0f2 100644 --- a/identify/vendor/licenses.py +++ b/identify/vendor/licenses.py @@ -1,6 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import unicode_literals LICENSES = ( ( '0BSD', diff --git a/setup.cfg b/setup.cfg index 4f7058db..a7fad3e8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,26 +11,26 @@ license = MIT license_file = LICENSE classifiers = License :: OSI Approved :: MIT License - Programming Language :: Python :: 2 - Programming Language :: Python :: 2.7 Programming Language :: Python :: 3 - Programming Language :: Python :: 3.4 - Programming Language :: Python :: 3.5 + Programming Language :: Python :: 3 :: Only Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: PyPy [options] packages = find: -python_requires = >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.* +python_requires = >=3.6.1 [options.entry_points] console_scripts = identify-cli=identify.cli:main [options.extras_require] -license = editdistance +license = + editdistance [options.packages.find] exclude = @@ -42,3 +42,16 @@ universal = True [coverage:run] plugins = covdefaults + +[mypy] +check_untyped_defs = true +disallow_any_generics = true +disallow_incomplete_defs = true +disallow_untyped_defs = true +no_implicit_optional = true + +[mypy-testing.*] +disallow_untyped_defs = false + +[mypy-tests.*] +disallow_untyped_defs = false diff --git a/setup.py b/setup.py index acf1ad4c..8bf1ba93 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,2 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import unicode_literals - from setuptools import setup setup() diff --git a/tests/cli_test.py b/tests/cli_test.py index 9369a5ee..94fb8ae7 100644 --- a/tests/cli_test.py +++ b/tests/cli_test.py @@ -1,7 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import unicode_literals - from identify import cli diff --git a/tests/extensions_test.py b/tests/extensions_test.py index 4527a58c..c2a828cf 100644 --- a/tests/extensions_test.py +++ b/tests/extensions_test.py @@ -1,7 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import unicode_literals - import pytest from identify import extensions diff --git a/tests/identify_test.py b/tests/identify_test.py index 6f9b9966..1fc532e6 100644 --- a/tests/identify_test.py +++ b/tests/identify_test.py @@ -1,7 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import unicode_literals - import io import os import stat @@ -191,9 +187,9 @@ def test_tags_from_interpreter(interpreter, expected): ( (b'hello world', True), (b'', True), - ('éóñəå ⊂(◉‿◉)つ(ノ≥∇≤)ノ'.encode('utf8'), True), - (r'¯\_(ツ)_/¯'.encode('utf8'), True), - ('♪┏(・o・)┛♪┗ ( ・o・) ┓♪┏ ( ) ┛♪┗ (・o・ ) ┓♪'.encode('utf8'), True), + ('éóñəå ⊂(◉‿◉)つ(ノ≥∇≤)ノ'.encode(), True), + (r'¯\_(ツ)_/¯'.encode(), True), + ('♪┏(・o・)┛♪┗ ( ・o・) ┓♪┏ ( ) ┛♪┗ (・o・ ) ┓♪'.encode(), True), ('éóñå'.encode('latin1'), True), (b'hello world\x00', False), diff --git a/tox.ini b/tox.ini index 2876390d..a063c94e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27,py35,py36,pypy,pre-commit +envlist = py36,pypy3,pre-commit [testenv] deps = -rrequirements-dev.txt From 89d9e7500e59c7eb62ca8eef2cccde8e0f825375 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 1 Mar 2021 07:24:37 -0800 Subject: [PATCH 080/139] v2.0.0 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index a7fad3e8..153cba37 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 1.6.2 +version = 2.0.0 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From d619133cda5439bf4c385d62fd4a811041b6dc5d Mon Sep 17 00:00:00 2001 From: Max R Date: Mon, 1 Mar 2021 21:43:23 -0500 Subject: [PATCH 081/139] Do not add py2-compat lines into vendored licenses --- bin/vendor-licenses | 3 --- 1 file changed, 3 deletions(-) diff --git a/bin/vendor-licenses b/bin/vendor-licenses index 33ce89a1..2dbde362 100755 --- a/bin/vendor-licenses +++ b/bin/vendor-licenses @@ -41,9 +41,6 @@ def main() -> int: licenses.append((spdx, license_text)) - print('# -*- coding: utf-8 -*-') - print('from __future__ import absolute_import') - print('from __future__ import unicode_literals') print('LICENSES = (') for spdx, text in sorted(licenses): print(' (') From 8afd489d0483b9c5ef5222f3cdb64d330691858e Mon Sep 17 00:00:00 2001 From: Max R Date: Mon, 1 Mar 2021 21:55:22 -0500 Subject: [PATCH 082/139] Formatting nits --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index accf29ba..388bdd8d 100644 --- a/README.md +++ b/README.md @@ -76,11 +76,11 @@ optional arguments: --filename-only ``` -```bash +```console $ identify-cli setup.py; echo $? ["file", "non-executable", "python", "text"] 0 -identify setup.py --filename-only; echo $? +$ identify setup.py --filename-only; echo $? ["python", "text"] 0 $ identify-cli wat.wat; echo $? From 2e41f34a3e343e3c547c1e228de832a343b95e0b Mon Sep 17 00:00:00 2001 From: Max R Date: Tue, 2 Mar 2021 10:54:00 -0500 Subject: [PATCH 083/139] Identify sockets --- README.md | 2 +- identify/identify.py | 16 ++++++++++++---- tests/identify_test.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index accf29ba..68a6893d 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ If you have an actual file on disk, you can get the most information possible When using a file on disk, the checks performed are: -* File type (file, symlink, directory) +* File type (file, symlink, directory, socket) * Mode (is it executable?) * File name (mostly based on extension) * If executable, the shebang is read and the interpreter interpreted diff --git a/identify/identify.py b/identify/identify.py index 0f936a8f..51c12889 100644 --- a/identify/identify.py +++ b/identify/identify.py @@ -1,6 +1,7 @@ import os.path import re import shlex +import stat import string import sys from typing import IO @@ -18,13 +19,14 @@ DIRECTORY = 'directory' SYMLINK = 'symlink' +SOCKET = 'socket' FILE = 'file' EXECUTABLE = 'executable' NON_EXECUTABLE = 'non-executable' TEXT = 'text' BINARY = 'binary' -TYPE_TAGS = frozenset((DIRECTORY, FILE, SYMLINK)) +TYPE_TAGS = frozenset((DIRECTORY, FILE, SYMLINK, SOCKET)) MODE_TAGS = frozenset((EXECUTABLE, NON_EXECUTABLE)) ENCODING_TAGS = frozenset((BINARY, TEXT)) _ALL_TAGS = {*TYPE_TAGS, *MODE_TAGS, *ENCODING_TAGS} @@ -36,12 +38,18 @@ def tags_from_path(path: str) -> Set[str]: - if not os.path.lexists(path): + try: + sr = os.lstat(path) + except (OSError, ValueError): # same error-handling as `os.lexists()` raise ValueError(f'{path} does not exist.') - if os.path.isdir(path): + + mode = sr.st_mode + if stat.S_ISDIR(mode): return {DIRECTORY} - if os.path.islink(path): + if stat.S_ISLNK(mode): return {SYMLINK} + if stat.S_ISSOCK(mode): + return {SOCKET} tags = {FILE} diff --git a/tests/identify_test.py b/tests/identify_test.py index 1fc532e6..8cc58562 100644 --- a/tests/identify_test.py +++ b/tests/identify_test.py @@ -1,6 +1,8 @@ import io import os +import socket import stat +from tempfile import TemporaryDirectory import pytest @@ -12,6 +14,7 @@ def test_all_tags_includes_basic_ones(): assert 'directory' in identify.ALL_TAGS assert 'executable' in identify.ALL_TAGS assert 'text' in identify.ALL_TAGS + assert 'socket' in identify.ALL_TAGS @pytest.mark.parametrize( @@ -51,6 +54,17 @@ def test_tags_from_path_symlink(tmpdir): assert identify.tags_from_path(x.strpath) == {'symlink'} +def test_tags_from_path_socket(): + tmproot = '/tmp' # short path avoids `OSError: AF_UNIX path too long` + with TemporaryDirectory(dir=tmproot) as tmpdir: + socket_path = os.path.join(tmpdir, 'socket') + with socket.socket(socket.AF_UNIX) as sock: + sock.bind(socket_path) + tags = identify.tags_from_path(socket_path) + + assert tags == {'socket'} + + def test_tags_from_path_broken_symlink(tmpdir): x = tmpdir.join('foo') x.mksymlinkto(tmpdir.join('lol')) From 587cbd360a2d68009d62491416294a5a54b1a520 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Wed, 3 Mar 2021 04:33:01 -0800 Subject: [PATCH 084/139] v2.1.0 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 153cba37..4aafd641 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.0.0 +version = 2.1.0 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 038f00b7cfe4e7ba4610f2af52e38d17a9f8b7ed Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 8 Mar 2021 18:39:08 -0800 Subject: [PATCH 085/139] fix parse_shebang_from_file for windows store python --- identify/identify.py | 11 +++++++++-- tests/identify_test.py | 12 ++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/identify/identify.py b/identify/identify.py index 51c12889..52d2c2dc 100644 --- a/identify/identify.py +++ b/identify/identify.py @@ -1,3 +1,4 @@ +import errno import os.path import re import shlex @@ -205,8 +206,14 @@ def parse_shebang_from_file(path: str) -> Tuple[str, ...]: if not os.access(path, os.X_OK): return () - with open(path, 'rb') as f: - return parse_shebang(f) + try: + with open(path, 'rb') as f: + return parse_shebang(f) + except OSError as e: + if e.errno == errno.EINVAL: + return () + else: + raise COPYRIGHT_RE = re.compile(r'^\s*(Copyright|\(C\)) .*$', re.I | re.MULTILINE) diff --git a/tests/identify_test.py b/tests/identify_test.py index 8cc58562..f881f0b2 100644 --- a/tests/identify_test.py +++ b/tests/identify_test.py @@ -1,8 +1,11 @@ +import builtins +import errno import io import os import socket import stat from tempfile import TemporaryDirectory +from unittest import mock import pytest @@ -330,6 +333,15 @@ def test_parse_shebang_from_file_simple(tmpdir): assert identify.parse_shebang_from_file(x.strpath) == ('python',) +def test_parse_shebang_open_raises_einval(tmpdir): + x = tmpdir.join('f') + x.write('#!/usr/bin/env not-expected\n') + make_executable(x) + error = OSError(errno.EINVAL, f'Invalid argument {x}') + with mock.patch.object(builtins, 'open', side_effect=error): + assert identify.parse_shebang_from_file(x.strpath) == () + + def make_executable(filename): original_mode = os.stat(filename).st_mode os.chmod( From 184970cefc74eefda4635d21c54f0d03e4b9defe Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 8 Mar 2021 18:48:09 -0800 Subject: [PATCH 086/139] v2.1.1 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 4aafd641..d398f2ae 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.1.0 +version = 2.1.1 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 7e8abd988aa89559cf1648622e61bb2f8fd6cb9b Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 11 Mar 2021 21:11:47 -0800 Subject: [PATCH 087/139] Add vb to EXTENSIONS --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 778b695c..48864fc0 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -183,6 +183,7 @@ 'txsprofile': {'text', 'ini', 'txsprofile'}, 'txt': {'text', 'plain-text'}, 'v': {'text', 'verilog'}, + 'vb': {'text', 'vb'}, 'vbproj': {'text', 'xml', 'vbproj'}, 'vcxproj': {'text', 'xml', 'vcxproj'}, 'vdx': {'text', 'vdx'}, From efbc22e7a0bd7372df9aecc4f88fdd180affdb4f Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 11 Mar 2021 21:16:11 -0800 Subject: [PATCH 088/139] v2.1.2 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index d398f2ae..ae7dbb47 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.1.1 +version = 2.1.2 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From c192ed0c59c2d3e570480d2782a287618f0d9282 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Sat, 13 Mar 2021 23:05:48 +0000 Subject: [PATCH 089/139] List Gemfile.lock as type 'text' To avoid it being treated as ruby code (e.g. via pre-commit running tools like rubocop). --- identify/extensions.py | 1 + tests/identify_test.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 48864fc0..3eaa963a 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -261,6 +261,7 @@ 'COPYING': EXTENSIONS['txt'], 'Dockerfile': {'text', 'dockerfile'}, 'Gemfile': EXTENSIONS['rb'], + 'Gemfile.lock': {'text'}, 'Jenkinsfile': {'text', 'groovy'}, 'LICENSE': EXTENSIONS['txt'], 'MAINTAINERS': EXTENSIONS['txt'], diff --git a/tests/identify_test.py b/tests/identify_test.py index f881f0b2..1b5dde0d 100644 --- a/tests/identify_test.py +++ b/tests/identify_test.py @@ -158,6 +158,8 @@ def test_tags_from_path_plist_text(tmpdir): ('Pipfile.lock', {'text', 'json'}), ('mod/test.py', {'text', 'python'}), ('mod/Dockerfile', {'text', 'dockerfile'}), + ('Gemfile', {'text', 'ruby'}), + ('Gemfile.lock', {'text'}), # does not set binary / text ('f.plist', {'plist'}), From f3578685d50084fee76d8b797f1d2ef79a4787d1 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sun, 14 Mar 2021 08:45:31 -0700 Subject: [PATCH 090/139] v2.1.3 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ae7dbb47..5dd77383 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.1.2 +version = 2.1.3 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 08a3b86935e74501dadbb7aec00c04c87a40ebfb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Mar 2021 17:02:31 +0000 Subject: [PATCH 091/139] [pre-commit.ci] pre-commit autoupdate --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a1dc351b..bb59eb79 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,17 +11,17 @@ repos: - id: requirements-txt-fixer - id: trailing-whitespace - repo: https://github.com/asottile/setup-cfg-fmt - rev: v1.16.0 + rev: v1.17.0 hooks: - id: setup-cfg-fmt - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + rev: 3.9.0 hooks: - id: flake8 exclude: ^identify/vendor/licenses\.py$ additional_dependencies: [flake8-typing-imports==1.10.1] - repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.5.4 + rev: v1.5.5 hooks: - id: autopep8 - repo: https://github.com/asottile/reorder_python_imports From d04cebae920c72ba5770cee1a89724a95dd775b8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Mar 2021 17:02:47 +0000 Subject: [PATCH 092/139] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- setup.cfg | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/setup.cfg b/setup.cfg index 5dd77383..446e4f3c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,6 +24,11 @@ classifiers = packages = find: python_requires = >=3.6.1 +[options.packages.find] +exclude = + tests* + testing* + [options.entry_points] console_scripts = identify-cli=identify.cli:main @@ -32,11 +37,6 @@ console_scripts = license = editdistance -[options.packages.find] -exclude = - tests* - testing* - [bdist_wheel] universal = True From 17b6765031785a0be530e91b9d48652125e38642 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 18 Mar 2021 15:00:46 -0700 Subject: [PATCH 093/139] add support for .bazelrc --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 3eaa963a..d5a16270 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -226,6 +226,7 @@ '.bash_aliases': EXTENSIONS['bash'], '.bash_profile': EXTENSIONS['bash'], '.bashrc': EXTENSIONS['bash'], + '.bazelrc': {'text', 'bazelrc'}, '.bowerrc': EXTENSIONS['json'] | {'bowerrc'}, '.browserslistrc': {'text', 'browserslistrc'}, '.clang-format': EXTENSIONS['yaml'], From 56d42a200452c3a154ff82beab5ccb5b9217196b Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 18 Mar 2021 15:16:16 -0700 Subject: [PATCH 094/139] v2.1.4 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 446e4f3c..a5393b4d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.1.3 +version = 2.1.4 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 26efd95e14d714cfc6727a5e390aae930aedb7a2 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 20 Mar 2021 01:06:12 -0700 Subject: [PATCH 095/139] use editdistance-s instead of editdistance --- identify/identify.py | 4 ++-- setup.cfg | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/identify/identify.py b/identify/identify.py index 52d2c2dc..59bc6bab 100644 --- a/identify/identify.py +++ b/identify/identify.py @@ -240,7 +240,7 @@ def license_id(filename: str) -> Optional[str]: 3. check exact text match with existing licenses 4. failing that use edit distance """ - import editdistance # `pip install identify[license]` + import editdistance_s # `pip install identify[license]` with open(filename, encoding='UTF-8') as f: contents = f.read() @@ -260,7 +260,7 @@ def license_id(filename: str) -> Optional[str]: if norm and abs(len(norm) - len(norm_license)) / len(norm) > .05: continue - edit_dist = editdistance.eval(norm, norm_license) + edit_dist = editdistance_s.distance(norm, norm_license) if edit_dist < min_edit_dist: min_edit_dist = edit_dist min_edit_dist_spdx = spdx diff --git a/setup.cfg b/setup.cfg index a5393b4d..d12a634c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,7 +35,7 @@ console_scripts = [options.extras_require] license = - editdistance + editdistance-s [bdist_wheel] universal = True From bc105d59ecbb49730f74cc1fac00674e68aca969 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sat, 20 Mar 2021 01:13:00 -0700 Subject: [PATCH 096/139] v2.2.0 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index d12a634c..65104a54 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.1.4 +version = 2.2.0 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From d9618c1db25b70162511b2048a658e2c034a6efd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Mar 2021 17:03:42 +0000 Subject: [PATCH 097/139] [pre-commit.ci] pre-commit autoupdate --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bb59eb79..4d952e14 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: exclude: ^identify/vendor/licenses\.py$ additional_dependencies: [flake8-typing-imports==1.10.1] - repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.5.5 + rev: v1.5.6 hooks: - id: autopep8 - repo: https://github.com/asottile/reorder_python_imports @@ -35,7 +35,7 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.10.0 + rev: v2.11.0 hooks: - id: pyupgrade args: [--py36-plus] From a8e4ae7d8e0cf13fef7b7fd31fe5ee59836a43a8 Mon Sep 17 00:00:00 2001 From: jonasc Date: Wed, 24 Mar 2021 11:49:31 +0100 Subject: [PATCH 098/139] Add to zsh: .zlogin, .zlogout, .zprofile, .zshenv See http://zsh.sourceforge.net/Doc/Release/Files.html for a documentation of the files. --- identify/extensions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index d5a16270..accb3990 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -252,7 +252,11 @@ '.pypirc': EXTENSIONS['ini'] | {'pypirc'}, '.rstcheck.cfg': EXTENSIONS['ini'], '.yamllint': EXTENSIONS['yaml'] | {'yamllint'}, + '.zlogin': EXTENSIONS['zsh'], + '.zlogout': EXTENSIONS['zsh'], + '.zprofile': EXTENSIONS['zsh'], '.zshrc': EXTENSIONS['zsh'], + '.zshenv': EXTENSIONS['zsh'], 'AUTHORS': EXTENSIONS['txt'], 'BUILD': {'text', 'bazel'}, 'BUILD.bazel': {'text', 'bazel'}, From 0f258e01897a185bd0a8b9f6f7be91b10a634154 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Wed, 24 Mar 2021 17:19:01 -0700 Subject: [PATCH 099/139] v2.2.1 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 65104a54..cb381856 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.0 +version = 2.2.1 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 36c217f8ea598b39275b380f7dceb50aa2a8f707 Mon Sep 17 00:00:00 2001 From: Kert Date: Sat, 27 Mar 2021 14:41:14 -0700 Subject: [PATCH 100/139] Add .dll as a binary file --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index accb3990..0fc0d8fa 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -32,6 +32,7 @@ 'cxx': {'text', 'c++'}, 'dart': {'text', 'dart'}, 'def': {'text', 'def'}, + 'dll': {'binary'}, 'dtd': {'text', 'dtd'}, 'ear': {'binary', 'zip', 'jar'}, 'edn': {'text', 'clojure', 'edn'}, From e97ff81b6a793be00b43be57bfc2ddf92dd12410 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sun, 28 Mar 2021 10:10:53 -0700 Subject: [PATCH 101/139] v2.2.2 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index cb381856..3fdbf025 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.1 +version = 2.2.2 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 09303a6214f6200060e9e3bda46ee211da3ed339 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 5 Apr 2021 17:07:39 +0000 Subject: [PATCH 102/139] [pre-commit.ci] pre-commit autoupdate --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4d952e14..c053f2b9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: rev: v1.17.0 hooks: - id: setup-cfg-fmt -- repo: https://gitlab.com/pycqa/flake8 +- repo: https://github.com/PyCQA/flake8 rev: 3.9.0 hooks: - id: flake8 From 5f707d1eec6f6694ff3d3470fde1defe3dc1a662 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 8 Apr 2021 19:22:12 -0700 Subject: [PATCH 103/139] Update azure-pipelines template repositories Committed via https://github.com/asottile/all-repos --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7bb7600a..61a5a11d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -10,7 +10,7 @@ resources: type: github endpoint: github name: asottile/azure-pipeline-templates - ref: refs/tags/v2.0.0 + ref: refs/tags/v2.1.0 jobs: - template: job--python-tox.yml@asottile From c2725f79398198b6a58313ae9caebbf64580f370 Mon Sep 17 00:00:00 2001 From: Yiannis Nikolakopoulos Date: Fri, 9 Apr 2021 19:24:59 +0200 Subject: [PATCH 104/139] identify c++ header files with .hh suffix --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 0fc0d8fa..19eaa7cd 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -58,6 +58,7 @@ 'gypi': {'text', 'gyp', 'python'}, 'gz': {'binary', 'gzip'}, 'h': {'text', 'header', 'c', 'c++'}, + 'hh' : {'text', 'header', 'c++'}, 'hpp': {'text', 'header', 'c++'}, 'hs': {'text', 'haskell'}, 'htm': {'text', 'html'}, From c4b283d8000332751bba733d3937ef47ab391d7a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 9 Apr 2021 17:27:07 +0000 Subject: [PATCH 105/139] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- identify/extensions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/identify/extensions.py b/identify/extensions.py index 19eaa7cd..7b36b4eb 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -58,7 +58,7 @@ 'gypi': {'text', 'gyp', 'python'}, 'gz': {'binary', 'gzip'}, 'h': {'text', 'header', 'c', 'c++'}, - 'hh' : {'text', 'header', 'c++'}, + 'hh': {'text', 'header', 'c++'}, 'hpp': {'text', 'header', 'c++'}, 'hs': {'text', 'haskell'}, 'htm': {'text', 'html'}, From 7ed47f3ce9c44c3aa749a149b4d34e1cabb99b75 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 9 Apr 2021 13:03:11 -0700 Subject: [PATCH 106/139] v2.2.3 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 3fdbf025..fb4941cd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.2 +version = 2.2.3 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 56d874fd87634eb1ad3acd8b635cee6155d92d4d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 12 Apr 2021 17:09:32 +0000 Subject: [PATCH 107/139] [pre-commit.ci] pre-commit autoupdate --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c053f2b9..e0355409 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.11.0 + rev: v2.12.0 hooks: - id: pyupgrade args: [--py36-plus] From 60379ec9d7d20a7210e1edb8c310b346fd381381 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Apr 2021 17:08:07 +0000 Subject: [PATCH 108/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/flake8: 3.9.0 → 3.9.1](https://github.com/PyCQA/flake8/compare/3.9.0...3.9.1) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e0355409..2b007a11 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: hooks: - id: setup-cfg-fmt - repo: https://github.com/PyCQA/flake8 - rev: 3.9.0 + rev: 3.9.1 hooks: - id: flake8 exclude: ^identify/vendor/licenses\.py$ From 476d69e4bf9767dbc76699443f08fd2db25f5d4a Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 19 Apr 2021 21:25:57 -0500 Subject: [PATCH 109/139] Detect .cuh as CUDA header file. --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 7b36b4eb..61465349 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -29,6 +29,7 @@ 'css': {'text', 'css'}, 'csv': {'text', 'csv'}, 'cu': {'text', 'cuda'}, + 'cuh': {'text', 'header', 'cuda'}, 'cxx': {'text', 'c++'}, 'dart': {'text', 'dart'}, 'def': {'text', 'def'}, From 70e1a3ba7a001563d168256c79a4a704f2b3f388 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 19 Apr 2021 22:01:23 -0500 Subject: [PATCH 110/139] Don't treat .cuh as a header. --- identify/extensions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/identify/extensions.py b/identify/extensions.py index 61465349..0018da3b 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -29,7 +29,7 @@ 'css': {'text', 'css'}, 'csv': {'text', 'csv'}, 'cu': {'text', 'cuda'}, - 'cuh': {'text', 'header', 'cuda'}, + 'cuh': {'text', 'cuda'}, 'cxx': {'text', 'c++'}, 'dart': {'text', 'dart'}, 'def': {'text', 'def'}, From ea9b9a077d82dee35b64f81c4536537f6eaa1355 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Tue, 20 Apr 2021 17:10:12 -0700 Subject: [PATCH 111/139] v2.2.4 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index fb4941cd..194b3186 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.3 +version = 2.2.4 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From b5ed5a342700b6d2c1717a8126af579222d3b780 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 26 Apr 2021 17:19:38 +0000 Subject: [PATCH 112/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/reorder_python_imports: v2.4.0 → v2.5.0](https://github.com/asottile/reorder_python_imports/compare/v2.4.0...v2.5.0) - [github.com/asottile/pyupgrade: v2.12.0 → v2.13.0](https://github.com/asottile/pyupgrade/compare/v2.12.0...v2.13.0) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2b007a11..f3c73774 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: hooks: - id: autopep8 - repo: https://github.com/asottile/reorder_python_imports - rev: v2.4.0 + rev: v2.5.0 hooks: - id: reorder-python-imports args: [--py3-plus] @@ -35,7 +35,7 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.12.0 + rev: v2.13.0 hooks: - id: pyupgrade args: [--py36-plus] From 7e58b198d6520b7c94355bb8da64b5f5bef8ed0f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 May 2021 17:17:17 +0000 Subject: [PATCH 113/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-autopep8: v1.5.6 → v1.5.7](https://github.com/pre-commit/mirrors-autopep8/compare/v1.5.6...v1.5.7) - [github.com/asottile/pyupgrade: v2.13.0 → v2.14.0](https://github.com/asottile/pyupgrade/compare/v2.13.0...v2.14.0) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f3c73774..d40db41e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: exclude: ^identify/vendor/licenses\.py$ additional_dependencies: [flake8-typing-imports==1.10.1] - repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.5.6 + rev: v1.5.7 hooks: - id: autopep8 - repo: https://github.com/asottile/reorder_python_imports @@ -35,7 +35,7 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.13.0 + rev: v2.14.0 hooks: - id: pyupgrade args: [--py36-plus] From 716642cdfaefd6499682e72d3f2d08c54b1ea5cf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 May 2021 20:19:58 +0000 Subject: [PATCH 114/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/flake8: 3.9.1 → 3.9.2](https://github.com/PyCQA/flake8/compare/3.9.1...3.9.2) - [github.com/asottile/pyupgrade: v2.14.0 → v2.15.0](https://github.com/asottile/pyupgrade/compare/v2.14.0...v2.15.0) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d40db41e..28c82064 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: hooks: - id: setup-cfg-fmt - repo: https://github.com/PyCQA/flake8 - rev: 3.9.1 + rev: 3.9.2 hooks: - id: flake8 exclude: ^identify/vendor/licenses\.py$ @@ -35,7 +35,7 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.14.0 + rev: v2.15.0 hooks: - id: pyupgrade args: [--py36-plus] From 67f5b14df2ced082e2e1666901551123881d02c5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 May 2021 17:23:27 +0000 Subject: [PATCH 115/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v3.4.0 → v4.0.1](https://github.com/pre-commit/pre-commit-hooks/compare/v3.4.0...v4.0.1) - [github.com/asottile/pyupgrade: v2.15.0 → v2.16.0](https://github.com/asottile/pyupgrade/compare/v2.15.0...v2.16.0) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 28c82064..b938daed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.0.1 hooks: - id: check-docstring-first - id: check-yaml @@ -35,7 +35,7 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.15.0 + rev: v2.16.0 hooks: - id: pyupgrade args: [--py36-plus] From a82b800029171347001fb28abefda1ca0bfa5cb1 Mon Sep 17 00:00:00 2001 From: s190850 <56158575+s190850@users.noreply.github.com> Date: Fri, 21 May 2021 16:28:45 +0200 Subject: [PATCH 116/139] Add other makefile names Both `makefile` and `GNUmakefile` are officially defined Makefile names as seen (here)[https://www.gnu.org/software/make/manual/html_node/Makefile-Names.html] --- identify/extensions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index 0018da3b..fd47a2fc 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -270,10 +270,12 @@ 'Dockerfile': {'text', 'dockerfile'}, 'Gemfile': EXTENSIONS['rb'], 'Gemfile.lock': {'text'}, + 'GNUmakefile': EXTENSIONS['mk'], 'Jenkinsfile': {'text', 'groovy'}, 'LICENSE': EXTENSIONS['txt'], 'MAINTAINERS': EXTENSIONS['txt'], 'Makefile': EXTENSIONS['mk'], + 'makefile': EXTENSIONS['mk'], 'NEWS': EXTENSIONS['txt'], 'NOTICE': EXTENSIONS['txt'], 'PATENTS': EXTENSIONS['txt'], From 761779c8829839eeef3c68220c1633c00c8492c2 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 21 May 2021 14:20:30 -0700 Subject: [PATCH 117/139] v2.2.5 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 194b3186..b7767f86 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.4 +version = 2.2.5 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 7e0952801b58bb859b8532eeb6d2ba68b51eafdb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 May 2021 17:19:43 +0000 Subject: [PATCH 118/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v2.16.0 → v2.18.2](https://github.com/asottile/pyupgrade/compare/v2.16.0...v2.18.2) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b938daed..dcac0b51 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.16.0 + rev: v2.18.2 hooks: - id: pyupgrade args: [--py36-plus] From 2fa72f68819737302d88c9abd41ee2f891ed8b89 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 May 2021 17:19:56 +0000 Subject: [PATCH 119/139] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- bin/vendor-licenses | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/vendor-licenses b/bin/vendor-licenses index 2dbde362..1303558a 100755 --- a/bin/vendor-licenses +++ b/bin/vendor-licenses @@ -33,11 +33,11 @@ def main() -> int: _, data, license_text = contents.split('---\n', 2) - spdx, = [ + spdx, = ( line[len('spdx-id:'):].strip() for line in data.splitlines() if line.startswith('spdx-id:') - ] + ) licenses.append((spdx, license_text)) From 1916ff9b5033f6a047f49783de199aa158003f44 Mon Sep 17 00:00:00 2001 From: Dr Freon Date: Fri, 14 May 2021 09:38:15 -0400 Subject: [PATCH 120/139] Add .jenkins extension type --- identify/extensions.py | 5 +++-- tests/identify_test.py | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/identify/extensions.py b/identify/extensions.py index fd47a2fc..5ad041fc 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -78,7 +78,8 @@ 'jade': {'text', 'jade'}, 'jar': {'binary', 'zip', 'jar'}, 'java': {'text', 'java'}, - 'jenkinsfile': {'text', 'groovy'}, + 'jenkins': {'text', 'groovy', 'jenkins'}, + 'jenkinsfile': {'text', 'groovy', 'jenkins'}, 'jinja': {'text', 'jinja'}, 'jinja2': {'text', 'jinja'}, 'jpeg': {'binary', 'image', 'jpeg'}, @@ -271,7 +272,7 @@ 'Gemfile': EXTENSIONS['rb'], 'Gemfile.lock': {'text'}, 'GNUmakefile': EXTENSIONS['mk'], - 'Jenkinsfile': {'text', 'groovy'}, + 'Jenkinsfile': EXTENSIONS['jenkins'], 'LICENSE': EXTENSIONS['txt'], 'MAINTAINERS': EXTENSIONS['txt'], 'Makefile': EXTENSIONS['mk'], diff --git a/tests/identify_test.py b/tests/identify_test.py index 1b5dde0d..ec40cdfa 100644 --- a/tests/identify_test.py +++ b/tests/identify_test.py @@ -160,6 +160,9 @@ def test_tags_from_path_plist_text(tmpdir): ('mod/Dockerfile', {'text', 'dockerfile'}), ('Gemfile', {'text', 'ruby'}), ('Gemfile.lock', {'text'}), + ('Jenkinsfile', {'text', 'groovy', 'jenkins'}), + ('build.jenkins', {'text', 'groovy', 'jenkins'}), + ('build.jenkinsfile', {'text', 'groovy', 'jenkins'}), # does not set binary / text ('f.plist', {'plist'}), From bfc1fe4826b6258853bec57ee4c6459e83a810e3 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 24 May 2021 19:27:16 -0700 Subject: [PATCH 121/139] v2.2.6 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index b7767f86..4c207920 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.5 +version = 2.2.6 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From cd3da0c76f8dc7f6eef9e1ca1a8054ed5b4499cd Mon Sep 17 00:00:00 2001 From: Stephan Wolski Date: Sun, 30 May 2021 19:58:01 -0400 Subject: [PATCH 122/139] detect .bzl files as `bazel` --- identify/extensions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/identify/extensions.py b/identify/extensions.py index 5ad041fc..22406231 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -9,6 +9,7 @@ 'bib': {'text', 'bib'}, 'bmp': {'binary', 'image', 'bitmap'}, 'bz2': {'binary', 'bzip2'}, + 'bzl': {'text', 'bazel'}, 'c': {'text', 'c'}, 'cc': {'text', 'c++'}, 'cfg': {'text'}, @@ -262,8 +263,8 @@ '.zshrc': EXTENSIONS['zsh'], '.zshenv': EXTENSIONS['zsh'], 'AUTHORS': EXTENSIONS['txt'], - 'BUILD': {'text', 'bazel'}, - 'BUILD.bazel': {'text', 'bazel'}, + 'BUILD': EXTENSIONS['bzl'], + 'BUILD.bazel': EXTENSIONS['bzl'], 'CMakeLists.txt': EXTENSIONS['cmake'], 'CHANGELOG': EXTENSIONS['txt'], 'CONTRIBUTING': EXTENSIONS['txt'], From bec268e549a1448ef4df139530f1347250b0b0c1 Mon Sep 17 00:00:00 2001 From: Stephan Wolski Date: Sun, 30 May 2021 19:59:06 -0400 Subject: [PATCH 123/139] detect WORKSPACE files as `bazel` --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 22406231..efc73cae 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -288,4 +288,5 @@ 'README': EXTENSIONS['txt'], 'Rakefile': EXTENSIONS['rb'], 'setup.cfg': EXTENSIONS['ini'], + 'WORKSPACE': EXTENSIONS['bzl'], } From e6e05dbce111c33f82cdae744da846c3de204543 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sun, 30 May 2021 17:14:16 -0700 Subject: [PATCH 124/139] v2.2.7 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 4c207920..16e866f8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.6 +version = 2.2.7 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 5c37397b0a555431af57834acab337dbab96be00 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 31 May 2021 17:26:34 +0000 Subject: [PATCH 125/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v2.18.2 → v2.19.0](https://github.com/asottile/pyupgrade/compare/v2.18.2...v2.19.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dcac0b51..7def6d39 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.18.2 + rev: v2.19.0 hooks: - id: pyupgrade args: [--py36-plus] From a2f119f21eb7b2491b03998192170b298b1876d3 Mon Sep 17 00:00:00 2001 From: Levent Yalcin Date: Tue, 1 Jun 2021 16:43:16 +0100 Subject: [PATCH 126/139] bats is a TAP-compliant testing framework [1]. However, .bats file are bash and shellcheck compatible. Recognising this files as bash will help them to test with shellcheck https://github.com/bats-core/ --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index efc73cae..836e6a19 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -6,6 +6,7 @@ 'avif': {'binary', 'image', 'avif'}, 'bash': {'text', 'shell', 'bash'}, 'bat': {'text', 'batch'}, + 'bats': {'text', 'shell', 'bash', 'bats'}, 'bib': {'text', 'bib'}, 'bmp': {'binary', 'image', 'bitmap'}, 'bz2': {'binary', 'bzip2'}, From fc5dd82bbae0fd64211202027c4352c536d13dcb Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 3 Jun 2021 08:30:32 -0700 Subject: [PATCH 127/139] v2.2.8 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 16e866f8..a77828c1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.7 +version = 2.2.8 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 1a01df4a80f120055b4942cfc5327d413caa7bfb Mon Sep 17 00:00:00 2001 From: Godefroid Chapelle Date: Wed, 2 Jun 2021 19:03:00 +0200 Subject: [PATCH 128/139] Added Zope pt, zpt and zcml --- identify/extensions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/identify/extensions.py b/identify/extensions.py index efc73cae..5474563e 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -218,8 +218,10 @@ 'yang': {'text', 'yang'}, 'yin': {'text', 'xml', 'yin'}, 'yml': {'text', 'yaml'}, + 'zcml': {'text', 'xml', 'zcml'}, 'zig': {'text', 'zig'}, 'zip': {'binary', 'zip'}, + 'zpt': {'text', 'zpt'}, 'zsh': {'text', 'shell', 'zsh'}, } EXTENSIONS_NEED_BINARY_CHECK = { From 32807ea4697e1784a3f97259787c1efcd7262d91 Mon Sep 17 00:00:00 2001 From: Aaron Gokaslan Date: Thu, 3 Jun 2021 19:15:26 -0400 Subject: [PATCH 129/139] Add URDF --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 836e6a19..5838bb2c 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -188,6 +188,7 @@ 'twig': {'text', 'twig'}, 'txsprofile': {'text', 'ini', 'txsprofile'}, 'txt': {'text', 'plain-text'}, + 'urdf': {'text', 'xml', 'urdf'}, 'v': {'text', 'verilog'}, 'vb': {'text', 'vb'}, 'vbproj': {'text', 'xml', 'vbproj'}, From b6551417e3716f71cc6cbbb6e6c9a06563498cf9 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 4 Jun 2021 18:18:03 -0700 Subject: [PATCH 130/139] v2.2.9 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index a77828c1..540c7025 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.8 +version = 2.2.9 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From 8376fd4081e9cea7111f82b0039cced4d1138abc Mon Sep 17 00:00:00 2001 From: Cameron Davison Date: Mon, 7 Jun 2021 12:11:16 -0500 Subject: [PATCH 131/139] add tab separated values (tsv) files --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index bb3a96c2..1ad70a94 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -183,6 +183,7 @@ 'tiff': {'binary', 'image', 'tiff'}, 'toml': {'text', 'toml'}, 'ts': {'text', 'ts'}, + 'tsv': {'text', 'tsv'}, 'tsx': {'text', 'tsx'}, 'ttf': {'binary', 'ttf'}, 'twig': {'text', 'twig'}, From ee08e725a23a0708f057724d776dc55b0ae4a200 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 7 Jun 2021 10:18:24 -0700 Subject: [PATCH 132/139] v2.2.10 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 540c7025..c88e6a72 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.9 +version = 2.2.10 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown From b936988a272aa117f9ac700c749f5f6ff6a0543b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 8 Jun 2021 01:46:32 +0000 Subject: [PATCH 133/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v2.19.0 → v2.19.1](https://github.com/asottile/pyupgrade/compare/v2.19.0...v2.19.1) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7def6d39..10ecf2c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.19.0 + rev: v2.19.1 hooks: - id: pyupgrade args: [--py36-plus] From 0d2a17da5a1be37805eed080398e624c678d178f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 14 Jun 2021 17:33:54 +0000 Subject: [PATCH 134/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v2.19.1 → v2.19.4](https://github.com/asottile/pyupgrade/compare/v2.19.1...v2.19.4) - [github.com/pre-commit/mirrors-mypy: v0.812 → v0.902](https://github.com/pre-commit/mirrors-mypy/compare/v0.812...v0.902) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 10ecf2c7..95f86d41 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,11 +35,11 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.19.1 + rev: v2.19.4 hooks: - id: pyupgrade args: [--py36-plus] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.812 + rev: v0.902 hooks: - id: mypy From 4bae2f6df531620ea9371a2a93a838da618b9b78 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 21 Jun 2021 19:15:31 -0700 Subject: [PATCH 135/139] stricter mypy settings Committed via https://github.com/asottile/all-repos --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index c88e6a72..dd5d196c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,6 +49,8 @@ disallow_any_generics = true disallow_incomplete_defs = true disallow_untyped_defs = true no_implicit_optional = true +warn_redundant_casts = true +warn_unused_ignores = true [mypy-testing.*] disallow_untyped_defs = false From 87e4cb19756841a8247631a79d70e5c0f872d07b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 28 Jun 2021 17:38:03 +0000 Subject: [PATCH 136/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-mypy: v0.902 → v0.910](https://github.com/pre-commit/mirrors-mypy/compare/v0.902...v0.910) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 95f86d41..5ce735d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,6 +40,6 @@ repos: - id: pyupgrade args: [--py36-plus] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.902 + rev: v0.910 hooks: - id: mypy From 4ec3626173584f404ff635097026c083978c3a7a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 5 Jul 2021 22:01:54 +0000 Subject: [PATCH 137/139] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v2.19.4 → v2.20.0](https://github.com/asottile/pyupgrade/compare/v2.19.4...v2.20.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5ce735d8..696fc44e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: - id: add-trailing-comma args: [--py36-plus] - repo: https://github.com/asottile/pyupgrade - rev: v2.19.4 + rev: v2.20.0 hooks: - id: pyupgrade args: [--py36-plus] From 7044d643d9e92cda974a854bb385c5ceb5c43889 Mon Sep 17 00:00:00 2001 From: Michael Vincent Date: Fri, 9 Jul 2021 18:23:04 -0500 Subject: [PATCH 138/139] Detect waf wscript files as Python Detect the waf build system's wscript files as Python. https://waf.io/ --- identify/extensions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/identify/extensions.py b/identify/extensions.py index 1ad70a94..a6b2039f 100644 --- a/identify/extensions.py +++ b/identify/extensions.py @@ -294,4 +294,5 @@ 'Rakefile': EXTENSIONS['rb'], 'setup.cfg': EXTENSIONS['ini'], 'WORKSPACE': EXTENSIONS['bzl'], + 'wscript': EXTENSIONS['py'], } From 109aca227e1c491fddb605510c10a01af8ac3d31 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 9 Jul 2021 19:40:00 -0400 Subject: [PATCH 139/139] v2.2.11 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index dd5d196c..b8f49d95 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = identify -version = 2.2.10 +version = 2.2.11 description = File identification library for Python long_description = file: README.md long_description_content_type = text/markdown