-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path14.3.py
More file actions
133 lines (110 loc) · 3.59 KB
/
14.3.py
File metadata and controls
133 lines (110 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#coding:utf-8
from random import *
import random
import time
import bisect
from bisect import bisect_left
import os
import shelve
import matplotlib.pyplot as pyplot
from string import *
print (os.path.abspath("words.txt"))
"""
写一个模块,导入 anagram_sets 然后提供两个函数:store_anagrams 可以把相同字母异序
词词典存储到一个『shelf』;read_anagrams 可以查找一个词,返回一个由其相同字母异序
词 组成的列表。
"""
fin = open('C:\Users\LzyRapx\PycharmProjects\untitled\words.txt')
fin2 = open("C:\Users\LzyRapx\PycharmProjects\untitled\kamasutra.txt")
def walk(dirname):
"""
Finds the names of all files in dirname and its subdirectories.
dirname: string name of directory
"""
names = []
if '__pycache__' in dirname:
return names
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isfile(path):
names.append(path)
else:
names.extend(walk(path))
return names
def compute_checksum(filename):
"""
Computes the MD5 checksum of the contents of a file.
filename: string
"""
cmd = 'md5sum ' + filename
return pipe(cmd)
def check_diff(name1, name2):
"""
Computes the difference between the contents of two files.
name1, name2: string filenames
"""
cmd = 'diff %s %s' % (name1, name2)
return pipe(cmd)
def pipe(cmd):
"""
Runs a command in a subprocess.
cmd: string Unix command
Returns (res, stat), the output of the subprocess and the exit status.
"""
# Note: os.popen is deprecated
# now, which means we are supposed to stop using it and start using
# the subprocess module. But for simple cases, I find
# subprocess more complicated than necessary. So I am going
# to keep using os.popen until they take it away.
fp = os.popen(cmd)
res = fp.read()
stat = fp.close()
assert stat is None
return res, stat
def compute_checksums(dirname, suffix):
"""
Computes checksums for all files with the given suffix.
dirname: string name of directory to search
suffix: string suffix to match
Returns: map from checksum to list of files with that checksum
"""
names = walk(dirname)
d = {}
for name in names:
if name.endswith(suffix):
res, stat = compute_checksum(name)
checksum, _ = res.split()
if checksum in d:
d[checksum].append(name)
else:
d[checksum] = [name]
return d
def check_pairs(names):
"""
Checks whether any in a list of files differs from the others.
names: list of string filenames
"""
for name1 in names:
for name2 in names:
if name1 < name2:
res, stat = check_diff(name1, name2)
if res:
return False
return True
def print_duplicates(d):
"""
Checks for duplicate files.
Reports any files with the same checksum and checks whether they
are, in fact, identical.
d: map from checksum to list of files with that checksum
"""
for key, names in d.items():
if len(names) > 1:
print('The following files have the same checksum:')
for name in names:
print(name)
if check_pairs(names):
print('And they are identical.')
if __name__ == '__main__':
d = compute_checksums(dirname='.', suffix='.py')
print_duplicates(d)