aboutsummaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2007-03-16 17:45:06 -0400
committerSam Ravnborg <sam@ravnborg.org>2007-05-02 14:58:08 -0400
commit6e019b001d27c4289f8e48ebc458e92410446259 (patch)
treee49759afd7e7a84b3b38fba521db1bdca5b2dc7d /scripts
parent12b315603a1eb95b4e4ea3389ba44bd7ded0aa35 (diff)
cleanpatch: a script to clean up stealth whitespace added by a patch
This script is a companion to the "cleanfile" script. This cleans up a patch in unified diff format *before* it is applied. Note that the empty lines at the end of file detection *requires* that the diff was taken with at least one line of context around each hunk, or bad things will happen. This script cleans up various classes of stealth whitespace. In particular, it cleans up: - Whitespace (spaces or tabs)before newline; - DOS line endings (CR before LF); - Space before tab (spaces are deleted or converted to tabs); - Empty lines at end of file. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/cleanpatch206
1 files changed, 206 insertions, 0 deletions
diff --git a/scripts/cleanpatch b/scripts/cleanpatch
new file mode 100755
index 00000000000..a53f987708f
--- /dev/null
+++ b/scripts/cleanpatch
@@ -0,0 +1,206 @@
1#!/usr/bin/perl -w
2#
3# Clean a patch file -- or directory of patch files -- of stealth whitespace.
4# WARNING: this can be a highly destructive operation. Use with caution.
5#
6
7use bytes;
8use File::Basename;
9
10#
11# Clean up space-tab sequences, either by removing spaces or
12# replacing them with tabs.
13sub clean_space_tabs($)
14{
15 no bytes; # Tab alignment depends on characters
16
17 my($li) = @_;
18 my($lo) = '';
19 my $pos = 0;
20 my $nsp = 0;
21 my($i, $c);
22
23 for ($i = 0; $i < length($li); $i++) {
24 $c = substr($li, $i, 1);
25 if ($c eq "\t") {
26 my $npos = ($pos+$nsp+8) & ~7;
27 my $ntab = ($npos >> 3) - ($pos >> 3);
28 $lo .= "\t" x $ntab;
29 $pos = $npos;
30 $nsp = 0;
31 } elsif ($c eq "\n" || $c eq "\r") {
32 $lo .= " " x $nsp;
33 $pos += $nsp;
34 $nsp = 0;
35 $lo .= $c;
36 $pos = 0;
37 } elsif ($c eq " ") {
38 $nsp++;
39 } else {
40 $lo .= " " x $nsp;
41 $pos += $nsp;
42 $nsp = 0;
43 $lo .= $c;
44 $pos++;
45 }
46 }
47 $lo .= " " x $nsp;
48 return $lo;
49}
50
51$name = basename($0);
52
53foreach $f ( @ARGV ) {
54 print STDERR "$name: $f\n";
55
56 if (! -f $f) {
57 print STDERR "$f: not a file\n";
58 next;
59 }
60
61 if (!open(FILE, '+<', $f)) {
62 print STDERR "$name: Cannot open file: $f: $!\n";
63 next;
64 }
65
66 binmode FILE;
67
68 # First, verify that it is not a binary file; consider any file
69 # with a zero byte to be a binary file. Is there any better, or
70 # additional, heuristic that should be applied?
71 $is_binary = 0;
72
73 while (read(FILE, $data, 65536) > 0) {
74 if ($data =~ /\0/) {
75 $is_binary = 1;
76 last;
77 }
78 }
79
80 if ($is_binary) {
81 print STDERR "$name: $f: binary file\n";
82 next;
83 }
84
85 seek(FILE, 0, 0);
86
87 $in_bytes = 0;
88 $out_bytes = 0;
89
90 @lines = ();
91
92 $in_hunk = 0;
93 $err = 0;
94
95 while ( defined($line = <FILE>) ) {
96 $in_bytes += length($line);
97
98 if (!$in_hunk) {
99 if ($line =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) {
100 $minus_lines = $2;
101 $plus_lines = $4;
102 if ($minus_lines || $plus_lines) {
103 $in_hunk = 1;
104 @hunk_lines = ($line);
105 }
106 } else {
107 push(@lines, $line);
108 $out_bytes += length($line);
109 }
110 } else {
111 # We're in a hunk
112
113 if ($line =~ /^\+/) {
114 $plus_lines--;
115
116 $text = substr($line, 1);
117 $text =~ s/[ \t\r]*$//; # Remove trailing spaces
118 $text = clean_space_tabs($text);
119
120 push(@hunk_lines, '+'.$text);
121 } elsif ($line =~ /^\-/) {
122 $minus_lines--;
123 push(@hunk_lines, $line);
124 } elsif ($line =~ /^ /) {
125 $plus_lines--;
126 $minus_lines--;
127 push(@hunk_lines, $line);
128 } else {
129 print STDERR "$name: $f: malformed patch\n";
130 $err = 1;
131 last;
132 }
133
134 if ($plus_lines < 0 || $minus_lines < 0) {
135 print STDERR "$name: $f: malformed patch\n";
136 $err = 1;
137 last;
138 } elsif ($plus_lines == 0 && $minus_lines == 0) {
139 # End of a hunk. Process this hunk.
140 my $i;
141 my $l;
142 my @h = ();
143 my $adj = 0;
144 my $done = 0;
145
146 for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) {
147 $l = $hunk_lines[$i];
148 if (!$done && $l eq "+\n") {
149 $adj++; # Skip this line
150 } elsif ($l =~ /^[ +]/) {
151 $done = 1;
152 unshift(@h, $l);
153 } else {
154 unshift(@h, $l);
155 }
156 }
157
158 $l = $hunk_lines[0]; # Hunk header
159 undef @hunk_lines; # Free memory
160
161 if ($adj) {
162 die unless
163 ($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/);
164 my $mstart = $1;
165 my $mlin = $2;
166 my $pstart = $3;
167 my $plin = $4;
168 my $tail = $5; # doesn't include the final newline
169
170 $l = sprintf("@@ -%d,%d +%d,%d @@%s\n",
171 $mstart, $mlin, $pstart, $plin-$adj,
172 $tail);
173 }
174 unshift(@h, $l);
175
176 # Transfer to the output array
177 foreach $l (@h) {
178 $out_bytes += length($l);
179 push(@lines, $l);
180 }
181
182 $in_hunk = 0;
183 }
184 }
185 }
186
187 if ($in_hunk) {
188 print STDERR "$name: $f: malformed patch\n";
189 $err = 1;
190 }
191
192 if (!$err) {
193 if ($in_bytes != $out_bytes) {
194 # Only write to the file if changed
195 seek(FILE, 0, 0);
196 print FILE @lines;
197
198 if ( !defined($where = tell(FILE)) ||
199 !truncate(FILE, $where) ) {
200 die "$name: Failed to truncate modified file: $f: $!\n";
201 }
202 }
203 }
204
205 close(FILE);
206}