cleanpatch: a script to clean up stealth whitespace added by a patch

This script is a companion to the "cleanfile" script. This cleans up a patch in unified diff format *before* it is applied. Note that the empty lines at the end of file detection *requires* that the diff was taken with at least one line of context around each hunk, or bad things will happen. This script cleans up various classes of stealth whitespace. In particular, it cleans up: - Whitespace (spaces or tabs)before newline; - DOS line endings (CR before LF); - Space before tab (spaces are deleted or converted to tabs); - Empty lines at end of file. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
author: H. Peter Anvin <hpa@zytor.com> 2007-03-16 17:45:06 -0400
committer: Sam Ravnborg <sam@ravnborg.org> 2007-05-02 14:58:08 -0400
commit: 6e019b001d27c4289f8e48ebc458e92410446259 (patch)
tree: e49759afd7e7a84b3b38fba521db1bdca5b2dc7d
parent: 12b315603a1eb95b4e4ea3389ba44bd7ded0aa35 (diff)
1 files changed, 206 insertions, 0 deletions
diff --git a/scripts/cleanpatch b/scripts/cleanpatch
new file mode 100755
index 00000000000..a53f987708f
--- /dev/null
+++ b/scripts/cleanpatch
@@ -0,0 +1,206 @@
+#!/usr/bin/perl -w
+#
+# Clean a patch file -- or directory of patch files -- of stealth whitespace.
+# WARNING: this can be a highly destructive operation.  Use with caution.
+#
+use bytes;
+use File::Basename;
+#
+# Clean up space-tab sequences, either by removing spaces or
+# replacing them with tabs.
+sub clean_space_tabs($)
+{
+    no bytes;                   # Tab alignment depends on characters
+    my($li) = @_;
+    my($lo) = '';
+    my $pos = 0;
+    my $nsp = 0;
+    my($i, $c);
+    for ($i = 0; $i < length($li); $i++) {
+        $c = substr($li, $i, 1);
+        if ($c eq "\t") {
+            my $npos = ($pos+$nsp+8) & ~7;
+            my $ntab = ($npos >> 3) - ($pos >> 3);
+            $lo .= "\t" x $ntab;
+            $pos = $npos;
+            $nsp = 0;
+        } elsif ($c eq "\n" || $c eq "\r") {
+            $lo .= " " x $nsp;
+            $pos += $nsp;
+            $nsp = 0;
+            $lo .= $c;
+            $pos = 0;
+        } elsif ($c eq " ") {
+            $nsp++;
+        } else {
+            $lo .= " " x $nsp;
+            $pos += $nsp;
+            $nsp = 0;
+            $lo .= $c;
+            $pos++;
+        }
+    }
+    $lo .= " " x $nsp;
+    return $lo;
+}
+$name = basename($0);
+foreach $f ( @ARGV ) {
+    print STDERR "$name: $f\n";
+    if (! -f $f) {
+        print STDERR "$f: not a file\n";
+        next;
+    }
+    if (!open(FILE, '+<', $f)) {
+        print STDERR "$name: Cannot open file: $f: $!\n";
+        next;
+    }
+    binmode FILE;
+    # First, verify that it is not a binary file; consider any file
+    # with a zero byte to be a binary file.  Is there any better, or
+    # additional, heuristic that should be applied?
+    $is_binary = 0;
+    while (read(FILE, $data, 65536) > 0) {
+        if ($data =~ /\0/) {
+            $is_binary = 1;
+            last;
+        }
+    }
+    if ($is_binary) {
+        print STDERR "$name: $f: binary file\n";
+        next;
+    }
+    seek(FILE, 0, 0);
+    $in_bytes = 0;
+    $out_bytes = 0;
+    @lines  = ();
+    $in_hunk = 0;
+    $err = 0;
+    while ( defined($line = <FILE>) ) {
+        $in_bytes += length($line);
+        if (!$in_hunk) {
+            if ($line =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) {
+                $minus_lines = $2;
+                $plus_lines = $4;
+                if ($minus_lines || $plus_lines) {
+                    $in_hunk = 1;
+                    @hunk_lines = ($line);
+                }
+            } else {
+                push(@lines, $line);
+                $out_bytes += length($line);
+            }
+        } else {
+            # We're in a hunk
+            if ($line =~ /^\+/) {
+                $plus_lines--;
+                $text = substr($line, 1);
+                $text =~ s/[ \t\r]*$//;         # Remove trailing spaces
+                $text = clean_space_tabs($text);
+                push(@hunk_lines, '+'.$text);
+            } elsif ($line =~ /^\-/) {
+                $minus_lines--;
+                push(@hunk_lines, $line);
+            } elsif ($line =~ /^ /) {
+                $plus_lines--;
+                $minus_lines--;
+                push(@hunk_lines, $line);
+            } else {
+                print STDERR "$name: $f: malformed patch\n";
+                $err = 1;
+                last;
+            }
+            if ($plus_lines < 0 || $minus_lines < 0) {
+                print STDERR "$name: $f: malformed patch\n";
+                $err = 1;
+                last;
+            } elsif ($plus_lines == 0 && $minus_lines == 0) {
+                # End of a hunk.  Process this hunk.
+                my $i;
+                my $l;
+                my @h = ();
+                my $adj = 0;
+                my $done = 0;
+                for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) {
+                    $l = $hunk_lines[$i];
+                    if (!$done && $l eq "+\n") {
+                        $adj++; # Skip this line
+                    } elsif ($l =~ /^[ +]/) {
+                        $done = 1;
+                        unshift(@h, $l);
+                    } else {
+                        unshift(@h, $l);
+                    }
+                }
+                $l = $hunk_lines[0];  # Hunk header
+                undef @hunk_lines;    # Free memory
+                if ($adj) {
+                    die unless
+                        ($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/);
+                    my $mstart = $1;
+                    my $mlin = $2;
+                    my $pstart = $3;
+                    my $plin = $4;
+                    my $tail = $5; # doesn't include the final newline
+                    $l = sprintf("@@ -%d,%d +%d,%d @@%s\n",
+                                 $mstart, $mlin, $pstart, $plin-$adj,
+                                 $tail);
+                }
+                unshift(@h, $l);
+                # Transfer to the output array
+                foreach $l (@h) {
+                    $out_bytes += length($l);
+                    push(@lines, $l);
+                }
+                $in_hunk = 0;
+            }
+        }
+    }
+    if ($in_hunk) {
+        print STDERR "$name: $f: malformed patch\n";
+        $err = 1;
+    }
+    if (!$err) {
+        if ($in_bytes != $out_bytes) {
+            # Only write to the file if changed
+            seek(FILE, 0, 0);
+            print FILE @lines;
+            if ( !defined($where = tell(FILE)) ||
+                 !truncate(FILE, $where) ) {
+                die "$name: Failed to truncate modified file: $f: $!\n";
+            }
+        }
+    }
+    close(FILE);
+}
author	H. Peter Anvin <hpa@zytor.com>	2007-03-16 17:45:06 -0400
committer	Sam Ravnborg <sam@ravnborg.org>	2007-05-02 14:58:08 -0400
commit	6e019b001d27c4289f8e48ebc458e92410446259 (patch)
tree	e49759afd7e7a84b3b38fba521db1bdca5b2dc7d
parent	12b315603a1eb95b4e4ea3389ba44bd7ded0aa35 (diff)

diff --git a/scripts/cleanpatch b/scripts/cleanpatch new file mode 100755 index 00000000000..a53f987708f --- /dev/null +++ b/scripts/cleanpatch
@@ -0,0 +1,206 @@
	1	#!/usr/bin/perl -w
	2	#
	3	# Clean a patch file -- or directory of patch files -- of stealth whitespace.
	4	# WARNING: this can be a highly destructive operation. Use with caution.
	5	#
	6
	7	use bytes;
	8	use File::Basename;
	9
	10	#
	11	# Clean up space-tab sequences, either by removing spaces or
	12	# replacing them with tabs.
	13	sub clean_space_tabs($)
	14	{
	15	no bytes; # Tab alignment depends on characters
	16
	17	my($li) = @_;
	18	my($lo) = '';
	19	my $pos = 0;
	20	my $nsp = 0;
	21	my($i, $c);
	22
	23	for ($i = 0; $i < length($li); $i++) {
	24	$c = substr($li, $i, 1);
	25	if ($c eq "\t") {
	26	my $npos = ($pos+$nsp+8) & ~7;
	27	my $ntab = ($npos >> 3) - ($pos >> 3);
	28	$lo .= "\t" x $ntab;
	29	$pos = $npos;
	30	$nsp = 0;
	31	} elsif ($c eq "\n" \|\| $c eq "\r") {
	32	$lo .= " " x $nsp;
	33	$pos += $nsp;
	34	$nsp = 0;
	35	$lo .= $c;
	36	$pos = 0;
	37	} elsif ($c eq " ") {
	38	$nsp++;
	39	} else {
	40	$lo .= " " x $nsp;
	41	$pos += $nsp;
	42	$nsp = 0;
	43	$lo .= $c;
	44	$pos++;
	45	}
	46	}
	47	$lo .= " " x $nsp;
	48	return $lo;
	49	}
	50
	51	$name = basename($0);
	52
	53	foreach $f ( @ARGV ) {
	54	print STDERR "$name: $f\n";
	55
	56	if (! -f $f) {
	57	print STDERR "$f: not a file\n";
	58	next;
	59	}
	60
	61	if (!open(FILE, '+<', $f)) {
	62	print STDERR "$name: Cannot open file: $f: $!\n";
	63	next;
	64	}
	65
	66	binmode FILE;
	67
	68	# First, verify that it is not a binary file; consider any file
	69	# with a zero byte to be a binary file. Is there any better, or
	70	# additional, heuristic that should be applied?
	71	$is_binary = 0;
	72
	73	while (read(FILE, $data, 65536) > 0) {
	74	if ($data =~ /\0/) {
	75	$is_binary = 1;
	76	last;
	77	}
	78	}
	79
	80	if ($is_binary) {
	81	print STDERR "$name: $f: binary file\n";
	82	next;
	83	}
	84
	85	seek(FILE, 0, 0);
	86
	87	$in_bytes = 0;
	88	$out_bytes = 0;
	89
	90	@lines = ();
	91
	92	$in_hunk = 0;
	93	$err = 0;
	94
	95	while ( defined($line = <FILE>) ) {
	96	$in_bytes += length($line);
	97
	98	if (!$in_hunk) {
	99	if ($line =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) {
	100	$minus_lines = $2;
	101	$plus_lines = $4;
	102	if ($minus_lines \|\| $plus_lines) {
	103	$in_hunk = 1;
	104	@hunk_lines = ($line);
	105	}
	106	} else {
	107	push(@lines, $line);
	108	$out_bytes += length($line);
	109	}
	110	} else {
	111	# We're in a hunk
	112
	113	if ($line =~ /^\+/) {
	114	$plus_lines--;
	115
	116	$text = substr($line, 1);
	117	$text =~ s/[ \t\r]*$//; # Remove trailing spaces
	118	$text = clean_space_tabs($text);
	119
	120	push(@hunk_lines, '+'.$text);
	121	} elsif ($line =~ /^\-/) {
	122	$minus_lines--;
	123	push(@hunk_lines, $line);
	124	} elsif ($line =~ /^ /) {
	125	$plus_lines--;
	126	$minus_lines--;
	127	push(@hunk_lines, $line);
	128	} else {
	129	print STDERR "$name: $f: malformed patch\n";
	130	$err = 1;
	131	last;
	132	}
	133
	134	if ($plus_lines < 0 \|\| $minus_lines < 0) {
	135	print STDERR "$name: $f: malformed patch\n";
	136	$err = 1;
	137	last;
	138	} elsif ($plus_lines == 0 && $minus_lines == 0) {
	139	# End of a hunk. Process this hunk.
	140	my $i;
	141	my $l;
	142	my @h = ();
	143	my $adj = 0;
	144	my $done = 0;
	145
	146	for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) {
	147	$l = $hunk_lines[$i];
	148	if (!$done && $l eq "+\n") {
	149	$adj++; # Skip this line
	150	} elsif ($l =~ /^[ +]/) {
	151	$done = 1;
	152	unshift(@h, $l);
	153	} else {
	154	unshift(@h, $l);
	155	}
	156	}
	157
	158	$l = $hunk_lines[0]; # Hunk header
	159	undef @hunk_lines; # Free memory
	160
	161	if ($adj) {
	162	die unless
	163	($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/);
	164	my $mstart = $1;
	165	my $mlin = $2;
	166	my $pstart = $3;
	167	my $plin = $4;
	168	my $tail = $5; # doesn't include the final newline
	169
	170	$l = sprintf("@@ -%d,%d +%d,%d @@%s\n",
	171	$mstart, $mlin, $pstart, $plin-$adj,
	172	$tail);
	173	}
	174	unshift(@h, $l);
	175
	176	# Transfer to the output array
	177	foreach $l (@h) {
	178	$out_bytes += length($l);
	179	push(@lines, $l);
	180	}
	181
	182	$in_hunk = 0;
	183	}
	184	}
	185	}
	186
	187	if ($in_hunk) {
	188	print STDERR "$name: $f: malformed patch\n";
	189	$err = 1;
	190	}
	191
	192	if (!$err) {
	193	if ($in_bytes != $out_bytes) {
	194	# Only write to the file if changed
	195	seek(FILE, 0, 0);
	196	print FILE @lines;
	197
	198	if ( !defined($where = tell(FILE)) \|\|
	199	!truncate(FILE, $where) ) {
	200	die "$name: Failed to truncate modified file: $f: $!\n";
	201	}
	202	}
	203	}
	204
	205	close(FILE);
	206	}