Skip to content

Commit e6444c1

Browse files
author
rhettinger
committed
Added functions for creating context diffs and unified diffs.
Documentation update and NEWS item are forthcoming. git-svn-id: http://svn.python.org/projects/python/trunk@33037 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent a05cbf7 commit e6444c1

1 file changed

Lines changed: 200 additions & 1 deletion

File tree

Lib/difflib.py

Lines changed: 200 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,18 @@
66
Function get_close_matches(word, possibilities, n=3, cutoff=0.6):
77
Use SequenceMatcher to return list of the best "good enough" matches.
88
9+
Function context_diff(a, b):
10+
For two lists of strings, return a delta in context diff format.
11+
912
Function ndiff(a, b):
1013
Return a delta: the difference between `a` and `b` (lists of strings).
1114
1215
Function restore(delta, which):
1316
Return one of the two sequences that generated an ndiff delta.
1417
18+
Function unified_diff(a, b):
19+
For two lists of strings, return a delta in unified diff format.
20+
1521
Class SequenceMatcher:
1622
A flexible class for comparing pairs of sequences of any type.
1723
@@ -20,7 +26,8 @@
2026
"""
2127

2228
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
23-
'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK']
29+
'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
30+
'unified_diff']
2431

2532
class SequenceMatcher:
2633

@@ -532,6 +539,54 @@ def get_opcodes(self):
532539
answer.append( ('equal', ai, i, bj, j) )
533540
return answer
534541

542+
def get_grouped_opcodes(self, n=3):
543+
""" Isolate change clusters by eliminating ranges with no changes.
544+
545+
Return a generator of groups with upto n lines of context.
546+
Each group is in the same format as returned by get_opcodes().
547+
548+
>>> from pprint import pprint
549+
>>> a = map(str, range(1,40))
550+
>>> b = a[:]
551+
>>> b[8:8] = ['i'] # Make an insertion
552+
>>> b[20] += 'x' # Make a replacement
553+
>>> b[23:28] = [] # Make a deletion
554+
>>> b[30] += 'y' # Make another replacement
555+
>>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes()))
556+
[[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],
557+
[('equal', 16, 19, 17, 20),
558+
('replace', 19, 20, 20, 21),
559+
('equal', 20, 22, 21, 23),
560+
('delete', 22, 27, 23, 23),
561+
('equal', 27, 30, 23, 26)],
562+
[('equal', 31, 34, 27, 30),
563+
('replace', 34, 35, 30, 31),
564+
('equal', 35, 38, 31, 34)]]
565+
"""
566+
567+
codes = self.get_opcodes()
568+
# Fixup leading and trailing groups if they show no changes.
569+
if codes[0][0] == 'equal':
570+
tag, i1, i2, j1, j2 = codes[0]
571+
codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2
572+
if codes[-1][0] == 'equal':
573+
tag, i1, i2, j1, j2 = codes[-1]
574+
codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n)
575+
576+
nn = n + n
577+
group = []
578+
for tag, i1, i2, j1, j2 in codes:
579+
# End the current group and start a new one whenever
580+
# there is a large range with no changes.
581+
if tag == 'equal' and i2-i1 > nn:
582+
group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n)))
583+
yield group
584+
group = []
585+
i1, j1 = max(i1, i2-n), max(j1, j2-n)
586+
group.append((tag, i1, i2, j1 ,j2))
587+
if group and not (len(group)==1 and group[0][0] == 'equal'):
588+
yield group
589+
535590
def ratio(self):
536591
"""Return a measure of the sequences' similarity (float in [0,1]).
537592
@@ -1042,6 +1097,150 @@ def IS_CHARACTER_JUNK(ch, ws=" \t"):
10421097

10431098
del re
10441099

1100+
1101+
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
1102+
tofiledate='', n=3, lineterm='\n'):
1103+
r"""
1104+
Compare two sequences of lines; generate the delta as a unified diff.
1105+
1106+
Unified diffs are a compact way of showing line changes and a few
1107+
lines of context. The number of context lines is set by 'n' which
1108+
defaults to three.
1109+
1110+
By default, the diff control lines (those with *** or ---) are
1111+
created with a trailing newline. This is helpful so that inputs
1112+
created from file.readlines() result in diffs that are suitable for
1113+
file.writelines() since both the inputs and outputs have trailing
1114+
newlines.
1115+
1116+
For inputs that do not have trailing newlines, set the lineterm
1117+
argument to "" so that the output will be uniformly newline free.
1118+
1119+
The unidiff format normally has a header for filenames and modification
1120+
times. Any or all of these may be specified using strings for
1121+
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
1122+
times are normally expressed in the format returned by time.ctime().
1123+
1124+
Example:
1125+
1126+
>>> for line in unified_diff('one two three four'.split(),
1127+
... 'zero one tree four'.split(), 'Original', 'Current',
1128+
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
1129+
... lineterm=''):
1130+
... print line
1131+
--- Original Sat Jan 26 23:30:50 1991
1132+
+++ Current Fri Jun 06 10:20:52 2003
1133+
@@ -1,4 +1,4 @@
1134+
+zero
1135+
one
1136+
-two
1137+
-three
1138+
+tree
1139+
four
1140+
"""
1141+
1142+
started = False
1143+
for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
1144+
if not started:
1145+
yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
1146+
yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
1147+
started = True
1148+
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
1149+
yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
1150+
for tag, i1, i2, j1, j2 in group:
1151+
if tag == 'equal':
1152+
for line in a[i1:i2]:
1153+
yield ' ' + line
1154+
continue
1155+
if tag == 'replace' or tag == 'delete':
1156+
for line in a[i1:i2]:
1157+
yield '-' + line
1158+
if tag == 'replace' or tag == 'insert':
1159+
for line in b[j1:j2]:
1160+
yield '+' + line
1161+
1162+
# See http://www.unix.org/single_unix_specification/
1163+
def context_diff(a, b, fromfile='', tofile='',
1164+
fromfiledate='', tofiledate='', n=3, lineterm='\n'):
1165+
r"""
1166+
Compare two sequences of lines; generate the delta as a context diff.
1167+
1168+
Context diffs are a compact way of showing line changes and a few
1169+
lines of context. The number of context lines is set by 'n' which
1170+
defaults to three.
1171+
1172+
By default, the diff control lines (those with *** or ---) are
1173+
created with a trailing newline. This is helpful so that inputs
1174+
created from file.readlines() result in diffs that are suitable for
1175+
file.writelines() since both the inputs and outputs have trailing
1176+
newlines.
1177+
1178+
For inputs that do not have trailing newlines, set the lineterm
1179+
argument to "" so that the output will be uniformly newline free.
1180+
1181+
The context diff format normally has a header for filenames and
1182+
modification times. Any or all of these may be specified using
1183+
strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
1184+
The modification times are normally expressed in the format returned
1185+
by time.ctime(). If not specified, the strings default to blanks.
1186+
1187+
Example:
1188+
1189+
>>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
1190+
... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current',
1191+
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')),
1192+
*** Original Sat Jan 26 23:30:50 1991
1193+
--- Current Fri Jun 06 10:22:46 2003
1194+
***************
1195+
*** 1,4 ****
1196+
one
1197+
! two
1198+
! three
1199+
four
1200+
--- 1,4 ----
1201+
+ zero
1202+
one
1203+
! tree
1204+
four
1205+
"""
1206+
1207+
started = False
1208+
prefixmap = dict(insert='+ ', delete='- ', replace='! ', equal=' ')
1209+
for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
1210+
if not started:
1211+
yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm)
1212+
yield '--- %s %s%s' % (tofile, tofiledate, lineterm)
1213+
started = True
1214+
yield '***************%s' % (lineterm,)
1215+
if group[-1][2] - group[0][1] >= 2:
1216+
yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm)
1217+
else:
1218+
yield '*** %d ****%s' % (group[-1][2], lineterm)
1219+
empty = True
1220+
for tag, i1, i2, j1, j2 in group:
1221+
if tag == 'replace' or tag == 'delete':
1222+
empty = False
1223+
break
1224+
if not empty:
1225+
for tag, i1, i2, j1, j2 in group:
1226+
if tag != 'insert':
1227+
for line in a[i1:i2]:
1228+
yield prefixmap[tag] + line
1229+
if group[-1][4] - group[0][3] >= 2:
1230+
yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm)
1231+
else:
1232+
yield '--- %d ----%s' % (group[-1][4], lineterm)
1233+
empty = True
1234+
for tag, i1, i2, j1, j2 in group:
1235+
if tag == 'replace' or tag == 'insert':
1236+
empty = False
1237+
break
1238+
if not empty:
1239+
for tag, i1, i2, j1, j2 in group:
1240+
if tag != 'delete':
1241+
for line in b[j1:j2]:
1242+
yield prefixmap[tag] + line
1243+
10451244
def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
10461245
r"""
10471246
Compare `a` and `b` (lists of strings); return a `Differ`-style delta.

0 commit comments

Comments
 (0)