Skip to content

Commit 71f8bf9

Browse files
Finishing basic repo stats script
1 parent e412460 commit 71f8bf9

2 files changed

Lines changed: 72 additions & 14 deletions

File tree

housekeeping/repo_health_checkup.py

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,37 @@
66
import sys, token, tokenize
77

88
def main():
9-
for folder, subfolders, files in os.walk(os.getcwd(), topdown=False):
9+
search_dir = os.path.dirname(os.getcwd()) # we want the parent directory
10+
for folder, subfolders, files in os.walk(search_dir, topdown=False):
1011
for f in files:
1112
fullpath = os.path.join(folder,f)
12-
if '.git' in fullpath: continue # ignore .git hidden folder files
13+
if not fullpath.endswith('.py') and not fullpath.endswith('.md'): continue # ignore .git hidden folder files
1314
print(fullpath) # just to check where we're walking
14-
# HEALTH CHECK FLOW
15-
#
16-
# if: README for a problem check for -> formatting, spelling, and a code output example
17-
18-
# if: problem solution source code file then check for -> tags, unit tests, spelling
19-
20-
# and... any other file check for -> spelling, "TODO" markers, amount of commenting
15+
with open(fullpath, 'r') as f:
16+
try:
17+
lines = f.readlines()
18+
except UnicodeDecodeError: continue
19+
file_health = fullpath + '\n'
20+
# HEALTH CHECK FLOW
21+
#
22+
# if: README for a problem check for -> formatting, spelling, and a code output example
23+
if fullpath[-2:] == 'md':
24+
file_health += "" if check_for_code_output_example(lines) else "missing output ex.\n"
25+
# TODO: Add the rest of the checks
26+
# if: problem solution source code file then check for -> tags, unit tests, spelling
27+
elif fullpath.endswith('-answer.py'):
28+
file_health += "" if check_for_tags(lines) else "missing tags!\n"
29+
file_health += "" if check_for_unittests(lines) else "missing unittests!\n"
30+
# TODO: Add spelling check
31+
# and... any other file check for -> spelling, "TODO" markers, amount of commenting
32+
else:
33+
has_todos, _ = check_for_todos(lines)
34+
if has_todos: file_health += "has TODOs to complete.\n"
35+
comment_lines, total_lines = check_amount_of_commenting( f.readline )
36+
if comment_lines / total_lines < 0.15: # arbitrary limit
37+
file_health += "low commenting! " + str(comment_lines / total_lines) + "\n"
38+
if file_health != fullpath + '\n':
39+
print(file_health)
2140

2241
def check_markdown( lines ):
2342
"""
@@ -26,11 +45,25 @@ def check_markdown( lines ):
2645
"""
2746
return True # TODO
2847

48+
def check_for_todos( lines ): # TODO: It is inefficient to re-read through lines to check for different things
49+
""" Check for presence of 'TODO' markers in files. """
50+
todos = []
51+
for line in lines:
52+
if 'TODO' in line:
53+
todos.append(line[line.index('TODO'):])
54+
if not todos:
55+
return False, None
56+
else:
57+
return True, todos
58+
59+
2960
def check_amount_of_commenting( readable ):
3061
"""
3162
Check lines of file to see whether it has a lot of commenting in it,
3263
which we take to mean the presence of code explanation.
3364
"""
65+
# TODO: I have hacked this code together and it could be silly and buggy.
66+
# It does seem to get things roughly correct, which is good enough for the moment
3467
prev_toktype = token.INDENT
3568
first_line = None
3669
last_lineno = -1
@@ -46,21 +79,20 @@ def check_amount_of_commenting( readable ):
4679
))
4780
if slineno > last_lineno:
4881
last_col = 0
49-
if scol > last_col:
50-
comment_lines += scol - last_col
5182
if toktype == token.STRING and prev_toktype == token.INDENT:
5283
# Docstring
53-
mod.write("#--")
84+
comment_lines += 1
5485
elif toktype == tokenize.COMMENT:
5586
# Comment
5687
comment_lines += 1
5788
prev_toktype = toktype
5889
last_col = ecol
5990
last_lineno = elineno
60-
# TODO // fix this function
91+
return comment_lines, last_lineno # roughly num of comment lines divided by num of lines
6192

6293
def check_for_tags( lines ):
6394
""" Check if a problems answer file has tags in it. True, if yes. """
95+
lines = ''.join(lines)
6496
match = re.search(r'##\$\$##(\s\S+,)+(\s\S+)', lines )
6597
return True if match else False
6698

@@ -69,9 +101,14 @@ def check_for_code_output_example( lines ):
69101
Check a Markdown problem description file for the presence of a code input to
70102
output example. ie. file should have something like: double_this(4) # > 8
71103
"""
104+
lines = ''.join(lines)
72105
match = re.search(r'`{3}.+\s`{3}', lines, re.DOTALL)
73106
return True if match else False
74107

108+
def check_for_unittests( lines ):
109+
""" Check python source code for unittests. """
110+
return True # TODO: Implement this
111+
75112
def check_spelling( lines ):
76113
""" Check the spelling of all words in the lines provided. """
77114
lines_to_check = []

housekeeping/repo_stats.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,28 @@
66
from os.path import isfile, isdir, join, splitext
77

88
def main():
9-
pass
9+
""" Run through all relevant files in the projet and print to console some stats. """
10+
nloc_in_project, num_problems, num_questions = 0,0,0
11+
search_dir = os.path.dirname(os.getcwd()) # we want the parent directory
12+
for folder, subfolders, files in os.walk(search_dir, topdown=False):
13+
for f in files:
14+
fullpath = os.path.join(folder,f)
15+
if (not fullpath.endswith('.py')
16+
and not fullpath.endswith('.ipynb')
17+
and not fullpath.endswith('.md')): continue # ignore .git hidden folder files and others
18+
with open(fullpath, 'r') as f:
19+
try:
20+
lines = f.readlines()
21+
except UnicodeDecodeError: continue
22+
if fullpath.endswith('.py'): nloc_in_project += len(lines)
23+
if fullpath.endswith('.py') and '-answer' in fullpath: num_problems += 1
24+
if fullpath.endswith('.ipynb'): num_questions += 1
25+
26+
print("Lines of code in project: ", nloc_in_project)
27+
print("Number of Code Exercises: ", num_problems)
28+
print("Number of Worded Questions: ", num_questions )
29+
30+
1031

1132
def line_count_file(file_path, flags=None):
1233
""" Counts lines for given file in file_name """

0 commit comments

Comments
 (0)