-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPythonWalkDirectory.py
More file actions
41 lines (31 loc) · 939 Bytes
/
PythonWalkDirectory.py
File metadata and controls
41 lines (31 loc) · 939 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
# directory = "/tmp2/cdiscount/raw"
directory = "/tmp2/cdiscount/input/train.unpacked"
print("Starting")
# os.chdir(directory)
a = "hello"
print a.find("/")
dir_entries = {}
count = 0
for x in os.walk(directory):
count += 1
if count == 1:
continue
if (count % 100) == 0:
print("Now processed: {}".format(count))
break
io = x[0].rfind("/")
if io != -1:
sub_directory = x[0][io+1:]
else:
sub_directory = x[0]
files = x[2]
if sub_directory in dir_entries:
assert False, "Directory already existed"
dir_entries[sub_directory] = files
# print("Directory: {}, has: {}, number of files".format(sub_directory, len(files)))
print("Done counting files, now sorting")
result = sorted(dir_entries.iteritems(), key=lambda (k,v): -len(v))
for i in range(15):
print("[{}]: {} files".format(result[i][0], len(result[i][1])))
print("Finished")