aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/cleanfile
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2007-03-12 15:16:30 -0400
committerSam Ravnborg <sam@ravnborg.org>2007-05-02 14:58:08 -0400
commit12b315603a1eb95b4e4ea3389ba44bd7ded0aa35 (patch)
treefd2b88b8b947124e9fe47f480b4dff5f8eb65fbb /scripts/cleanfile
parentdc24f0e708c8a6a27b5b967a2599c04973054398 (diff)
cleanfile: a script to clean up stealth whitespace
This script cleans up various classes of stealth whitespace. In particular, it cleans up: - Whitespace (spaces or tabs)before newline; - DOS line endings (CR before LF); - Space before tab (spaces are deleted or converted to tabs); - Empty lines at end of file. Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Diffstat (limited to 'scripts/cleanfile')
-rwxr-xr-xscripts/cleanfile126
1 files changed, 126 insertions, 0 deletions
diff --git a/scripts/cleanfile b/scripts/cleanfile
new file mode 100755
index 000000000000..f1ba8aa58a40
--- /dev/null
+++ b/scripts/cleanfile
@@ -0,0 +1,126 @@
1#!/usr/bin/perl -w
2#
3# Clean a text file -- or directory of text files -- of stealth whitespace.
4# WARNING: this can be a highly destructive operation. Use with caution.
5#
6
7use bytes;
8use File::Basename;
9
10#
11# Clean up space-tab sequences, either by removing spaces or
12# replacing them with tabs.
13sub clean_space_tabs($)
14{
15 no bytes; # Tab alignment depends on characters
16
17 my($li) = @_;
18 my($lo) = '';
19 my $pos = 0;
20 my $nsp = 0;
21 my($i, $c);
22
23 for ($i = 0; $i < length($li); $i++) {
24 $c = substr($li, $i, 1);
25 if ($c eq "\t") {
26 my $npos = ($pos+$nsp+8) & ~7;
27 my $ntab = ($npos >> 3) - ($pos >> 3);
28 $lo .= "\t" x $ntab;
29 $pos = $npos;
30 $nsp = 0;
31 } elsif ($c eq "\n" || $c eq "\r") {
32 $lo .= " " x $nsp;
33 $pos += $nsp;
34 $nsp = 0;
35 $lo .= $c;
36 $pos = 0;
37 } elsif ($c eq " ") {
38 $nsp++;
39 } else {
40 $lo .= " " x $nsp;
41 $pos += $nsp;
42 $nsp = 0;
43 $lo .= $c;
44 $pos++;
45 }
46 }
47 $lo .= " " x $nsp;
48 return $lo;
49}
50
51$name = basename($0);
52
53foreach $f ( @ARGV ) {
54 print STDERR "$name: $f\n";
55
56 if (! -f $f) {
57 print STDERR "$f: not a file\n";
58 next;
59 }
60
61 if (!open(FILE, '+<', $f)) {
62 print STDERR "$name: Cannot open file: $f: $!\n";
63 next;
64 }
65
66 binmode FILE;
67
68 # First, verify that it is not a binary file; consider any file
69 # with a zero byte to be a binary file. Is there any better, or
70 # additional, heuristic that should be applied?
71 $is_binary = 0;
72
73 while (read(FILE, $data, 65536) > 0) {
74 if ($data =~ /\0/) {
75 $is_binary = 1;
76 last;
77 }
78 }
79
80 if ($is_binary) {
81 print STDERR "$name: $f: binary file\n";
82 next;
83 }
84
85 seek(FILE, 0, 0);
86
87 $in_bytes = 0;
88 $out_bytes = 0;
89 $blank_bytes = 0;
90
91 @blanks = ();
92 @lines = ();
93
94 while ( defined($line = <FILE>) ) {
95 $in_bytes += length($line);
96 $line =~ s/[ \t\r]*$//; # Remove trailing spaces
97 $line = clean_space_tabs($line);
98
99 if ( $line eq "\n" ) {
100 push(@blanks, $line);
101 $blank_bytes += length($line);
102 } else {
103 push(@lines, @blanks);
104 $out_bytes += $blank_bytes;
105 push(@lines, $line);
106 $out_bytes += length($line);
107 @blanks = ();
108 $blank_bytes = 0;
109 }
110 }
111
112 # Any blanks at the end of the file are discarded
113
114 if ($in_bytes != $out_bytes) {
115 # Only write to the file if changed
116 seek(FILE, 0, 0);
117 print FILE @lines;
118
119 if ( !defined($where = tell(FILE)) ||
120 !truncate(FILE, $where) ) {
121 die "$name: Failed to truncate modified file: $f: $!\n";
122 }
123 }
124
125 close(FILE);
126}