Commit | Line | Data |
---|---|---|
12b31560 PA |
1 | #!/usr/bin/perl -w |
2 | # | |
3 | # Clean a text file -- or directory of text files -- of stealth whitespace. | |
4 | # WARNING: this can be a highly destructive operation. Use with caution. | |
5 | # | |
6 | ||
7 | use bytes; | |
8 | use File::Basename; | |
9 | ||
10 | # | |
11 | # Clean up space-tab sequences, either by removing spaces or | |
12 | # replacing them with tabs. | |
13 | sub clean_space_tabs($) | |
14 | { | |
15 | no bytes; # Tab alignment depends on characters | |
16 | ||
17 | my($li) = @_; | |
18 | my($lo) = ''; | |
19 | my $pos = 0; | |
20 | my $nsp = 0; | |
21 | my($i, $c); | |
22 | ||
23 | for ($i = 0; $i < length($li); $i++) { | |
24 | $c = substr($li, $i, 1); | |
25 | if ($c eq "\t") { | |
26 | my $npos = ($pos+$nsp+8) & ~7; | |
27 | my $ntab = ($npos >> 3) - ($pos >> 3); | |
28 | $lo .= "\t" x $ntab; | |
29 | $pos = $npos; | |
30 | $nsp = 0; | |
31 | } elsif ($c eq "\n" || $c eq "\r") { | |
32 | $lo .= " " x $nsp; | |
33 | $pos += $nsp; | |
34 | $nsp = 0; | |
35 | $lo .= $c; | |
36 | $pos = 0; | |
37 | } elsif ($c eq " ") { | |
38 | $nsp++; | |
39 | } else { | |
40 | $lo .= " " x $nsp; | |
41 | $pos += $nsp; | |
42 | $nsp = 0; | |
43 | $lo .= $c; | |
44 | $pos++; | |
45 | } | |
46 | } | |
47 | $lo .= " " x $nsp; | |
48 | return $lo; | |
49 | } | |
50 | ||
51 | $name = basename($0); | |
52 | ||
53 | foreach $f ( @ARGV ) { | |
54 | print STDERR "$name: $f\n"; | |
55 | ||
56 | if (! -f $f) { | |
57 | print STDERR "$f: not a file\n"; | |
58 | next; | |
59 | } | |
60 | ||
61 | if (!open(FILE, '+<', $f)) { | |
62 | print STDERR "$name: Cannot open file: $f: $!\n"; | |
63 | next; | |
64 | } | |
65 | ||
66 | binmode FILE; | |
67 | ||
68 | # First, verify that it is not a binary file; consider any file | |
69 | # with a zero byte to be a binary file. Is there any better, or | |
70 | # additional, heuristic that should be applied? | |
71 | $is_binary = 0; | |
72 | ||
73 | while (read(FILE, $data, 65536) > 0) { | |
74 | if ($data =~ /\0/) { | |
75 | $is_binary = 1; | |
76 | last; | |
77 | } | |
78 | } | |
79 | ||
80 | if ($is_binary) { | |
81 | print STDERR "$name: $f: binary file\n"; | |
82 | next; | |
83 | } | |
84 | ||
85 | seek(FILE, 0, 0); | |
86 | ||
87 | $in_bytes = 0; | |
88 | $out_bytes = 0; | |
89 | $blank_bytes = 0; | |
90 | ||
91 | @blanks = (); | |
92 | @lines = (); | |
93 | ||
94 | while ( defined($line = <FILE>) ) { | |
95 | $in_bytes += length($line); | |
96 | $line =~ s/[ \t\r]*$//; # Remove trailing spaces | |
97 | $line = clean_space_tabs($line); | |
98 | ||
99 | if ( $line eq "\n" ) { | |
100 | push(@blanks, $line); | |
101 | $blank_bytes += length($line); | |
102 | } else { | |
103 | push(@lines, @blanks); | |
104 | $out_bytes += $blank_bytes; | |
105 | push(@lines, $line); | |
106 | $out_bytes += length($line); | |
107 | @blanks = (); | |
108 | $blank_bytes = 0; | |
109 | } | |
110 | } | |
111 | ||
112 | # Any blanks at the end of the file are discarded | |
113 | ||
114 | if ($in_bytes != $out_bytes) { | |
115 | # Only write to the file if changed | |
116 | seek(FILE, 0, 0); | |
117 | print FILE @lines; | |
118 | ||
119 | if ( !defined($where = tell(FILE)) || | |
120 | !truncate(FILE, $where) ) { | |
121 | die "$name: Failed to truncate modified file: $f: $!\n"; | |
122 | } | |
123 | } | |
124 | ||
125 | close(FILE); | |
126 | } |