Skip to content

Commit ad0dcd4

Browse files
author
birkenfeld
committed
Patch [ 784089 ] A program to scan python files and list those require coding
git-svn-id: http://svn.python.org/projects/python/trunk@39375 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 8785c8f commit ad0dcd4

File tree

3 files changed

+241
-0
lines changed

3 files changed

+241
-0
lines changed

Misc/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,11 @@ New platforms
502502
Tools/Demos
503503
-----------
504504

505+
- Added two new files to Tools/scripts: pysource.py, which recursively
506+
finds Python source files, and findnocoding.py, which finds Python
507+
source files that need an encoding declaration.
508+
Patch #784089, credits to Oleg Broytmann.
509+
505510
- Bug #1072853: pindent.py used an uninitialized variable.
506511

507512
- Patch #1177597: Correct Complex.__init__.

Tools/scripts/findnocoding.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/usr/bin/env python
2+
3+
"""List all those Python files that require a coding directive
4+
5+
Usage: nocoding.py dir1 [dir2...]
6+
"""
7+
8+
__author__ = "Oleg Broytmann, Reinhold Birkenfeld"
9+
10+
import sys, os, re, getopt
11+
12+
# our pysource module finds Python source files
13+
try:
14+
import pysource
15+
except:
16+
# emulate the module with a simple os.walk
17+
class pysource:
18+
has_python_ext = looks_like_python = can_be_compiled = None
19+
def walk_python_files(self, paths, *args, **kwargs):
20+
for path in paths:
21+
if os.path.isfile(path):
22+
yield path.endswith(".py")
23+
elif os.path.isdir(path):
24+
for root, dirs, files in os.walk(path):
25+
for filename in files:
26+
if filename.endswith(".py"):
27+
yield os.path.join(root, filename)
28+
pysource = pysource()
29+
30+
31+
print >>sys.stderr, ("The pysource module is not available; "
32+
"no sophisticated Python source file search will be done.")
33+
34+
35+
decl_re = re.compile(r"coding[=:]\s*([-\w.]+)")
36+
37+
def get_declaration(line):
38+
match = decl_re.search(line)
39+
if match:
40+
return match.group(1)
41+
return ''
42+
43+
def has_correct_encoding(text, codec):
44+
try:
45+
unicode(text, codec)
46+
except UnicodeDecodeError:
47+
return False
48+
else:
49+
return True
50+
51+
def needs_declaration(fullpath):
52+
try:
53+
infile = open(fullpath, 'rU')
54+
except IOError: # Oops, the file was removed - ignore it
55+
return None
56+
57+
line1 = infile.readline()
58+
line2 = infile.readline()
59+
60+
if get_declaration(line1) or get_declaration(line2):
61+
# the file does have an encoding declaration, so trust it
62+
infile.close()
63+
return False
64+
65+
# check the whole file for non-ASCII characters
66+
rest = infile.read()
67+
infile.close()
68+
69+
if has_correct_encoding(line1+line2+rest, "ascii"):
70+
return False
71+
72+
return True
73+
74+
75+
usage = """Usage: %s [-cd] paths...
76+
-c: recognize Python source files trying to compile them
77+
-d: debug output""" % sys.argv[0]
78+
79+
try:
80+
opts, args = getopt.getopt(sys.argv[1:], 'cd')
81+
except getopt.error, msg:
82+
print >>sys.stderr, msg
83+
print >>sys.stderr, usage
84+
sys.exit(1)
85+
86+
is_python = pysource.looks_like_python
87+
debug = False
88+
89+
for o, a in opts:
90+
if o == '-c':
91+
is_python = pysource.can_be_compiled
92+
elif o == '-d':
93+
debug = True
94+
95+
if not args:
96+
print >>sys.stderr, usage
97+
sys.exit(1)
98+
99+
for fullpath in pysource.walk_python_files(args, is_python):
100+
if debug:
101+
print "Testing for coding: %s" % fullpath
102+
result = needs_declaration(fullpath)
103+
if result:
104+
print fullpath
105+
106+

Tools/scripts/pysource.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#!/usr/bin/env python
2+
3+
"""\
4+
List python source files.
5+
6+
There are three functions to check whether a file is a Python source, listed
7+
here with increasing complexity:
8+
9+
- has_python_ext() checks whether a file name ends in '.py[w]'.
10+
- look_like_python() checks whether the file is not binary and either has
11+
the '.py[w]' extension or the first line contains the word 'python'.
12+
- can_be_compiled() checks whether the file can be compiled by compile().
13+
14+
The file also must be of appropriate size - not bigger than a megabyte.
15+
16+
walk_python_files() recursively lists all Python files under the given directories.
17+
"""
18+
__author__ = "Oleg Broytmann, Reinhold Birkenfeld"
19+
20+
__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
21+
22+
23+
import sys, os, re
24+
25+
binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]')
26+
27+
debug = False
28+
29+
def print_debug(msg):
30+
if debug: print msg
31+
32+
33+
def _open(fullpath):
34+
try:
35+
size = os.stat(fullpath).st_size
36+
except OSError, err: # Permission denied - ignore the file
37+
print_debug("%s: permission denied: %s" % (fullpath, err))
38+
return None
39+
40+
if size > 1024*1024: # too big
41+
print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
42+
return None
43+
44+
try:
45+
return open(fullpath, 'rU')
46+
except IOError, err: # Access denied, or a special file - ignore it
47+
print_debug("%s: access denied: %s" % (fullpath, err))
48+
return None
49+
50+
def has_python_ext(fullpath):
51+
return fullpath.endswith(".py") or fullpath.endswith(".pyw")
52+
53+
def looks_like_python(fullpath):
54+
infile = _open(fullpath)
55+
if infile is None:
56+
return False
57+
58+
line = infile.readline()
59+
infile.close()
60+
61+
if binary_re.search(line):
62+
# file appears to be binary
63+
print_debug("%s: appears to be binary" % fullpath)
64+
return False
65+
66+
if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
67+
return True
68+
elif "python" in line:
69+
# disguised Python script (e.g. CGI)
70+
return True
71+
72+
return False
73+
74+
def can_be_compiled(fullpath):
75+
infile = _open(fullpath)
76+
if infile is None:
77+
return False
78+
79+
code = infile.read()
80+
infile.close()
81+
82+
try:
83+
compile(code, fullpath, "exec")
84+
except Exception, err:
85+
print_debug("%s: cannot compile: %s" % (fullpath, err))
86+
return False
87+
88+
return True
89+
90+
91+
def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
92+
"""\
93+
Recursively yield all Python source files below the given paths.
94+
95+
paths: a list of files and/or directories to be checked.
96+
is_python: a function that takes a file name and checks whether it is a
97+
Python source file
98+
exclude_dirs: a list of directory base names that should be excluded in
99+
the search
100+
"""
101+
if exclude_dirs is None:
102+
exclude_dirs=[]
103+
104+
for path in paths:
105+
print_debug("testing: %s" % path)
106+
if os.path.isfile(path):
107+
if is_python(path):
108+
yield path
109+
elif os.path.isdir(path):
110+
print_debug(" it is a directory")
111+
for dirpath, dirnames, filenames in os.walk(path):
112+
for exclude in exclude_dirs:
113+
if exclude in dirnames:
114+
dirnames.remove(exclude)
115+
for filename in filenames:
116+
fullpath = os.path.join(dirpath, filename)
117+
print_debug("testing: %s" % fullpath)
118+
if is_python(fullpath):
119+
yield fullpath
120+
else:
121+
print_debug(" unknown type")
122+
123+
124+
if __name__ == "__main__":
125+
# Two simple examples/tests
126+
for fullpath in walk_python_files(['.']):
127+
print fullpath
128+
print "----------"
129+
for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
130+
print fullpath

0 commit comments

Comments
 (0)