Skip to content

Commit 6e019b0

Browse files
H. Peter Anvinsravnborg
authored andcommitted
cleanpatch: a script to clean up stealth whitespace added by a patch
This script is a companion to the "cleanfile" script. This cleans up a patch in unified diff format *before* it is applied. Note that the empty lines at the end of file detection *requires* that the diff was taken with at least one line of context around each hunk, or bad things will happen. This script cleans up various classes of stealth whitespace. In particular, it cleans up: - Whitespace (spaces or tabs)before newline; - DOS line endings (CR before LF); - Space before tab (spaces are deleted or converted to tabs); - Empty lines at end of file. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
1 parent 12b3156 commit 6e019b0

1 file changed

Lines changed: 206 additions & 0 deletions

File tree

scripts/cleanpatch

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
#!/usr/bin/perl -w
2+
#
3+
# Clean a patch file -- or directory of patch files -- of stealth whitespace.
4+
# WARNING: this can be a highly destructive operation. Use with caution.
5+
#
6+
7+
use bytes;
8+
use File::Basename;
9+
10+
#
11+
# Clean up space-tab sequences, either by removing spaces or
12+
# replacing them with tabs.
13+
sub clean_space_tabs($)
14+
{
15+
no bytes; # Tab alignment depends on characters
16+
17+
my($li) = @_;
18+
my($lo) = '';
19+
my $pos = 0;
20+
my $nsp = 0;
21+
my($i, $c);
22+
23+
for ($i = 0; $i < length($li); $i++) {
24+
$c = substr($li, $i, 1);
25+
if ($c eq "\t") {
26+
my $npos = ($pos+$nsp+8) & ~7;
27+
my $ntab = ($npos >> 3) - ($pos >> 3);
28+
$lo .= "\t" x $ntab;
29+
$pos = $npos;
30+
$nsp = 0;
31+
} elsif ($c eq "\n" || $c eq "\r") {
32+
$lo .= " " x $nsp;
33+
$pos += $nsp;
34+
$nsp = 0;
35+
$lo .= $c;
36+
$pos = 0;
37+
} elsif ($c eq " ") {
38+
$nsp++;
39+
} else {
40+
$lo .= " " x $nsp;
41+
$pos += $nsp;
42+
$nsp = 0;
43+
$lo .= $c;
44+
$pos++;
45+
}
46+
}
47+
$lo .= " " x $nsp;
48+
return $lo;
49+
}
50+
51+
$name = basename($0);
52+
53+
foreach $f ( @ARGV ) {
54+
print STDERR "$name: $f\n";
55+
56+
if (! -f $f) {
57+
print STDERR "$f: not a file\n";
58+
next;
59+
}
60+
61+
if (!open(FILE, '+<', $f)) {
62+
print STDERR "$name: Cannot open file: $f: $!\n";
63+
next;
64+
}
65+
66+
binmode FILE;
67+
68+
# First, verify that it is not a binary file; consider any file
69+
# with a zero byte to be a binary file. Is there any better, or
70+
# additional, heuristic that should be applied?
71+
$is_binary = 0;
72+
73+
while (read(FILE, $data, 65536) > 0) {
74+
if ($data =~ /\0/) {
75+
$is_binary = 1;
76+
last;
77+
}
78+
}
79+
80+
if ($is_binary) {
81+
print STDERR "$name: $f: binary file\n";
82+
next;
83+
}
84+
85+
seek(FILE, 0, 0);
86+
87+
$in_bytes = 0;
88+
$out_bytes = 0;
89+
90+
@lines = ();
91+
92+
$in_hunk = 0;
93+
$err = 0;
94+
95+
while ( defined($line = <FILE>) ) {
96+
$in_bytes += length($line);
97+
98+
if (!$in_hunk) {
99+
if ($line =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) {
100+
$minus_lines = $2;
101+
$plus_lines = $4;
102+
if ($minus_lines || $plus_lines) {
103+
$in_hunk = 1;
104+
@hunk_lines = ($line);
105+
}
106+
} else {
107+
push(@lines, $line);
108+
$out_bytes += length($line);
109+
}
110+
} else {
111+
# We're in a hunk
112+
113+
if ($line =~ /^\+/) {
114+
$plus_lines--;
115+
116+
$text = substr($line, 1);
117+
$text =~ s/[ \t\r]*$//; # Remove trailing spaces
118+
$text = clean_space_tabs($text);
119+
120+
push(@hunk_lines, '+'.$text);
121+
} elsif ($line =~ /^\-/) {
122+
$minus_lines--;
123+
push(@hunk_lines, $line);
124+
} elsif ($line =~ /^ /) {
125+
$plus_lines--;
126+
$minus_lines--;
127+
push(@hunk_lines, $line);
128+
} else {
129+
print STDERR "$name: $f: malformed patch\n";
130+
$err = 1;
131+
last;
132+
}
133+
134+
if ($plus_lines < 0 || $minus_lines < 0) {
135+
print STDERR "$name: $f: malformed patch\n";
136+
$err = 1;
137+
last;
138+
} elsif ($plus_lines == 0 && $minus_lines == 0) {
139+
# End of a hunk. Process this hunk.
140+
my $i;
141+
my $l;
142+
my @h = ();
143+
my $adj = 0;
144+
my $done = 0;
145+
146+
for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) {
147+
$l = $hunk_lines[$i];
148+
if (!$done && $l eq "+\n") {
149+
$adj++; # Skip this line
150+
} elsif ($l =~ /^[ +]/) {
151+
$done = 1;
152+
unshift(@h, $l);
153+
} else {
154+
unshift(@h, $l);
155+
}
156+
}
157+
158+
$l = $hunk_lines[0]; # Hunk header
159+
undef @hunk_lines; # Free memory
160+
161+
if ($adj) {
162+
die unless
163+
($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/);
164+
my $mstart = $1;
165+
my $mlin = $2;
166+
my $pstart = $3;
167+
my $plin = $4;
168+
my $tail = $5; # doesn't include the final newline
169+
170+
$l = sprintf("@@ -%d,%d +%d,%d @@%s\n",
171+
$mstart, $mlin, $pstart, $plin-$adj,
172+
$tail);
173+
}
174+
unshift(@h, $l);
175+
176+
# Transfer to the output array
177+
foreach $l (@h) {
178+
$out_bytes += length($l);
179+
push(@lines, $l);
180+
}
181+
182+
$in_hunk = 0;
183+
}
184+
}
185+
}
186+
187+
if ($in_hunk) {
188+
print STDERR "$name: $f: malformed patch\n";
189+
$err = 1;
190+
}
191+
192+
if (!$err) {
193+
if ($in_bytes != $out_bytes) {
194+
# Only write to the file if changed
195+
seek(FILE, 0, 0);
196+
print FILE @lines;
197+
198+
if ( !defined($where = tell(FILE)) ||
199+
!truncate(FILE, $where) ) {
200+
die "$name: Failed to truncate modified file: $f: $!\n";
201+
}
202+
}
203+
}
204+
205+
close(FILE);
206+
}

0 commit comments

Comments
 (0)