Skip to content

Commit 3d5d4c0

Browse files
committed
Add script to compute the number of bytes per file type
1 parent 895e852 commit 3d5d4c0

File tree

1 file changed

+216
-0
lines changed

1 file changed

+216
-0
lines changed
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Prints an approximate number of bytes per file type.
4+
#
5+
# <proportion> <total_bytes> <number_of_files> <average> <file_type>
6+
#
7+
# Notes:
8+
#
9+
# * `README.md` files are included in Markdown statistics.
10+
11+
# * `git ls-files`
12+
# - List indexed files.
13+
# * `ls -l`
14+
# - Print file info, which includes number of bytes.
15+
# * `awk '{}'`
16+
# - Compute statistics.
17+
# * `sort -nr`
18+
# - Sort in reverse numeric order.
19+
git ls-files | xargs ls -l | awk '
20+
{
21+
bytes["total"] += $5
22+
N["total"] += 1
23+
}
24+
25+
# Special cases...
26+
/(LICENSE|NOTICE)$/ {
27+
bytes["LICENSE"] += $5
28+
N["LICENSE"] += 1
29+
next
30+
}
31+
/datapackage\.json$/ {
32+
bytes["datapackage.json"] += $5
33+
N["datapackage.json"] += 1
34+
next
35+
}
36+
/package\.json$/ {
37+
bytes["package.json"] += $5
38+
N["package.json"] += 1
39+
next
40+
}
41+
42+
# Known file extensions (keep in alphabetical order)...
43+
/\.awk$/ {
44+
bytes["AWK"] += $5
45+
N["AWK"] += 1
46+
next
47+
}
48+
/\.bib$/ {
49+
bytes["BibTeX"] += $5
50+
N["BibTeX"] += 1
51+
next
52+
}
53+
/\.c$/ {
54+
bytes["C"] += $5
55+
N["C"] += 1
56+
next
57+
}
58+
/\.cpp$/ {
59+
bytes["C++"] += $5
60+
N["C++"] += 1
61+
next
62+
}
63+
/\.csl$/ {
64+
bytes["CSL"] += $5
65+
N["CSL"] += 1
66+
next
67+
}
68+
/\.css$/ {
69+
bytes["CSS"] += $5
70+
N["CSS"] += 1
71+
next
72+
}
73+
/\.csv$/ {
74+
bytes["CSV"] += $5
75+
N["CSV"] += 1
76+
next
77+
}
78+
/\.eot$/ {
79+
bytes["fonts"] += $5
80+
N["fonts"] += 1
81+
next
82+
}
83+
/\.gif$/ {
84+
bytes["gif"] += $5
85+
N["gif"] += 1
86+
next
87+
}
88+
/\.go$/ {
89+
bytes["Go"] += $5
90+
N["Go"] += 1
91+
next
92+
}
93+
/\.h$/ {
94+
bytes["C"] += $5
95+
N["C"] += 1
96+
next
97+
}
98+
/\.hpp$/ {
99+
bytes["C++"] += $5
100+
N["C++"] += 1
101+
next
102+
}
103+
/\.html$/ {
104+
bytes["HTML"] += $5
105+
N["HTML"] += 1
106+
next
107+
}
108+
/\.jl$/ {
109+
bytes["Julia"] += $5
110+
N["Julia"] += 1
111+
next
112+
}
113+
/\.jpg$/ {
114+
bytes["JPG"] += $5
115+
N["JPG"] += 1
116+
next
117+
}
118+
/\.js$/ {
119+
bytes["JavaScript"] += $5
120+
N["JavaScript"] += 1
121+
next
122+
}
123+
/\.json$/ {
124+
bytes["JSON"] += $5
125+
N["JSON"] += 1
126+
next
127+
}
128+
/Makefile$/ {
129+
bytes["make"] += $5
130+
N["make"] += 1
131+
next
132+
}
133+
/\.md$/ {
134+
if ($9 ~/README\.md$/) {
135+
bytes["README"] += $5
136+
N["README"] += 1
137+
}
138+
bytes["Markdown"] += $5
139+
N["Markdown"] += 1
140+
next
141+
}
142+
/\.mk$/ {
143+
bytes["make"] += $5
144+
N["make"] += 1
145+
next
146+
}
147+
/\.png$/ {
148+
bytes["PNG"] += $5
149+
N["PNG"] += 1
150+
next
151+
}
152+
/\.py$/ {
153+
bytes["Python"] += $5
154+
N["Python"] += 1
155+
next
156+
}
157+
/\.R$/ {
158+
bytes["R"] += $5
159+
N["R"] += 1
160+
next
161+
}
162+
/\.sh$/ {
163+
bytes["bash"] += $5
164+
N["bash"] += 1
165+
next
166+
}
167+
/\.svg$/ {
168+
bytes["SVG"] += $5
169+
N["SVG"] += 1
170+
next
171+
}
172+
/\.txt$/ {
173+
bytes["plaintext"] += $5
174+
N["plaintext"] += 1
175+
next
176+
}
177+
/\.woff$/ {
178+
bytes["fonts"] += $5
179+
N["fonts"] += 1
180+
next
181+
}
182+
/\.yml$/ {
183+
bytes["YAML"] += $5
184+
N["YAML"] += 1
185+
next
186+
}
187+
188+
# Special cases...
189+
$9 ~ /^\.([A-Za-z])+$|\/\.([A-Za-z])+$/ {
190+
bytes["dotfiles"] += $5
191+
N["dotfiles"] += 1
192+
next
193+
}
194+
$9 ~ /\/([A-Za-z0-9_-])+$/ {
195+
bytes["executables"] += $5
196+
N["executables"] += 1
197+
next
198+
}
199+
200+
# Everything else...
201+
{
202+
bytes["other"] += $5
203+
N["other"] += 1
204+
}
205+
206+
END {
207+
total = bytes["total"]
208+
for (k in bytes) {
209+
len = bytes[k];
210+
n = N[k]
211+
pct = int(len/total*10000)/10000
212+
mu = int(len/n*10000)/10000
213+
print pct OFS len OFS n OFS mu OFS k
214+
}
215+
}
216+
' | sort -nr

0 commit comments

Comments
 (0)