aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoe Perches <joe@perches.com>2011-10-31 20:13:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-31 20:30:57 -0400
commit15662b3e8644905032c2e26808401a487d4e90c1 (patch)
tree0eae1a68779bd314459a2bff55c43e266329015d
parent67d0a0754455f89ef3946946159d8ec9e45ce33a (diff)
checkpatch: add a --strict check for utf-8 in commit logs
Some find using utf-8 in commit logs inappropriate. Some patch commit logs contain unintended utf-8 characters when doing things like copy/pasting compilation output. Look for the start of any commit log by skipping initial lines that look like email headers and "From: " lines. Stop looking for utf-8 at the first signature line. Signed-off-by: Joe Perches <joe@perches.com> Suggested-by: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rwxr-xr-xscripts/checkpatch.pl30
1 files changed, 26 insertions, 4 deletions
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 5ba679c8cde6..5e93342d22f9 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -240,9 +240,8 @@ our $NonptrType;
240our $Type; 240our $Type;
241our $Declare; 241our $Declare;
242 242
243our $UTF8 = qr { 243our $NON_ASCII_UTF8 = qr{
244 [\x09\x0A\x0D\x20-\x7E] # ASCII 244 [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
245 | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
246 | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs 245 | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
247 | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte 246 | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
248 | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates 247 | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
@@ -251,6 +250,11 @@ our $UTF8 = qr {
251 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 250 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
252}x; 251}x;
253 252
253our $UTF8 = qr{
254 [\x09\x0A\x0D\x20-\x7E] # ASCII
255 | $NON_ASCII_UTF8
256}x;
257
254our $typeTypedefs = qr{(?x: 258our $typeTypedefs = qr{(?x:
255 (?:__)?(?:u|s|be|le)(?:8|16|32|64)| 259 (?:__)?(?:u|s|be|le)(?:8|16|32|64)|
256 atomic_t 260 atomic_t
@@ -1330,6 +1334,9 @@ sub process {
1330 my $signoff = 0; 1334 my $signoff = 0;
1331 my $is_patch = 0; 1335 my $is_patch = 0;
1332 1336
1337 my $in_header_lines = 1;
1338 my $in_commit_log = 0; #Scanning lines before patch
1339
1333 our @report = (); 1340 our @report = ();
1334 our $cnt_lines = 0; 1341 our $cnt_lines = 0;
1335 our $cnt_error = 0; 1342 our $cnt_error = 0;
@@ -1497,7 +1504,6 @@ sub process {
1497 if ($line =~ /^diff --git.*?(\S+)$/) { 1504 if ($line =~ /^diff --git.*?(\S+)$/) {
1498 $realfile = $1; 1505 $realfile = $1;
1499 $realfile =~ s@^([^/]*)/@@; 1506 $realfile =~ s@^([^/]*)/@@;
1500
1501 } elsif ($line =~ /^\+\+\+\s+(\S+)/) { 1507 } elsif ($line =~ /^\+\+\+\s+(\S+)/) {
1502 $realfile = $1; 1508 $realfile = $1;
1503 $realfile =~ s@^([^/]*)/@@; 1509 $realfile =~ s@^([^/]*)/@@;
@@ -1536,6 +1542,7 @@ sub process {
1536# Check the patch for a signoff: 1542# Check the patch for a signoff:
1537 if ($line =~ /^\s*signed-off-by:/i) { 1543 if ($line =~ /^\s*signed-off-by:/i) {
1538 $signoff++; 1544 $signoff++;
1545 $in_commit_log = 0;
1539 } 1546 }
1540 1547
1541# Check signature styles 1548# Check signature styles
@@ -1613,6 +1620,21 @@ sub process {
1613 "Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr); 1620 "Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr);
1614 } 1621 }
1615 1622
1623# Check if it's the start of a commit log
1624# (not a header line and we haven't seen the patch filename)
1625 if ($in_header_lines && $realfile =~ /^$/ &&
1626 $rawline !~ /^(commit\b|from\b|\w+:).+$/i) {
1627 $in_header_lines = 0;
1628 $in_commit_log = 1;
1629 }
1630
1631# Still not yet in a patch, check for any UTF-8
1632 if ($in_commit_log && $realfile =~ /^$/ &&
1633 $rawline =~ /$NON_ASCII_UTF8/) {
1634 CHK("UTF8_BEFORE_PATCH",
1635 "8-bit UTF-8 used in possible commit log\n" . $herecurr);
1636 }
1637
1616# ignore non-hunk lines and lines being removed 1638# ignore non-hunk lines and lines being removed
1617 next if (!$hunk_line || $line =~ /^-/); 1639 next if (!$hunk_line || $line =~ /^-/);
1618 1640