diff options
| author | Joe Perches <joe@perches.com> | 2010-03-05 16:43:00 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-06 14:26:33 -0500 |
| commit | 03372dbbe618bfcd02f9c8cdbfe78e97e3aad43b (patch) | |
| tree | 8b848249ec5d45154fb05c43aa0c7bd34f13d09e /scripts | |
| parent | cea83886dde49fd7524e9f4a246dd5dff4ad236a (diff) | |
scripts/get_maintainer.pl: add --file-emails, find embedded email addresses
Add an imperfect option to search a source file for email addresses.
New option: --file-emails or --fe
email addresses in files are freeform text and are nearly impossible to
parse. Still, might as well try to do a somewhat acceptable job of
finding them. This code should find all addresses that are in the form
addr@domain.tld
The code assumes that up to 3 alphabetic words along with dashes, commas,
and periods that preceed the email address are a name.
If 3 words are found for the name, and one of the first two words are a
single letter and period, or just a single letter then the 3 words are use
as name otherwise the last 2 words are used.
Some variants that are shown correctly:
John Smith <jksmith@domain.org>
Random J. Developer <rjd@tld.com>
Random J. Developer (rjd@tld.com)
J. Random Developer rjd@tld.com
Variants that are shown nominally correctly:
Written by First Last (funny-addr@somecompany.com)
is shown as:
First Last <funny-addr@somecompany.com>
Variants that are shown incorrectly:
Some Really Long Name <srln@foo.bar>
MontaVista Software, Inc. <source@mvista.com>
are returned as:
Long Name <srln@foo.bar>
"Software, Inc" <source@mvista.com>
--roles and --rolestats show "(in file)" for matches.
For instance:
Without -file-emails:
$ ./scripts/get_maintainer.pl -f -nogit -roles net/core/netpoll.c
David S. Miller <davem@davemloft.net> (maintainer:NETWORKING [GENERAL])
linux-kernel@vger.kernel.org (open list)
With -fe:
$ ./scripts/get_maintainer.pl -f -fe -nogit -roles net/core/netpoll.c
David S. Miller <davem@davemloft.net> (maintainer:NETWORKING [GENERAL])
Matt Mackall <mpm@selenic.com> (in file)
Ingo Molnar <mingo@redhat.com> (in file)
linux-kernel@vger.kernel.org (open list)
netdev@vger.kernel.org (open list:NETWORKING [GENERAL])
The number of email addresses in the file in not limited. Neither is the
number of returned email addresses.
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/get_maintainer.pl | 83 |
1 files changed, 76 insertions, 7 deletions
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 2f3230db7ffb..bff2390652c2 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl | |||
| @@ -41,6 +41,7 @@ my $web = 0; | |||
| 41 | my $subsystem = 0; | 41 | my $subsystem = 0; |
| 42 | my $status = 0; | 42 | my $status = 0; |
| 43 | my $keywords = 1; | 43 | my $keywords = 1; |
| 44 | my $file_emails = 0; | ||
| 44 | my $from_filename = 0; | 45 | my $from_filename = 0; |
| 45 | my $pattern_depth = 0; | 46 | my $pattern_depth = 0; |
| 46 | my $version = 0; | 47 | my $version = 0; |
| @@ -120,6 +121,7 @@ if (!GetOptions( | |||
| 120 | 'web!' => \$web, | 121 | 'web!' => \$web, |
| 121 | 'pattern-depth=i' => \$pattern_depth, | 122 | 'pattern-depth=i' => \$pattern_depth, |
| 122 | 'k|keywords!' => \$keywords, | 123 | 'k|keywords!' => \$keywords, |
| 124 | 'fe|file-emails!' => \$file_emails, | ||
| 123 | 'f|file' => \$from_filename, | 125 | 'f|file' => \$from_filename, |
| 124 | 'v|version' => \$version, | 126 | 'v|version' => \$version, |
| 125 | 'h|help' => \$help, | 127 | 'h|help' => \$help, |
| @@ -232,6 +234,7 @@ if ($email_remove_duplicates) { | |||
| 232 | my @files = (); | 234 | my @files = (); |
| 233 | my @range = (); | 235 | my @range = (); |
| 234 | my @keyword_tvi = (); | 236 | my @keyword_tvi = (); |
| 237 | my @file_emails = (); | ||
| 235 | 238 | ||
| 236 | foreach my $file (@ARGV) { | 239 | foreach my $file (@ARGV) { |
| 237 | ##if $file is a directory and it lacks a trailing slash, add one | 240 | ##if $file is a directory and it lacks a trailing slash, add one |
| @@ -242,15 +245,21 @@ foreach my $file (@ARGV) { | |||
| 242 | } | 245 | } |
| 243 | if ($from_filename) { | 246 | if ($from_filename) { |
| 244 | push(@files, $file); | 247 | push(@files, $file); |
| 245 | if (-f $file && $keywords) { | 248 | if (-f $file && ($keywords || $file_emails)) { |
| 246 | open(FILE, "<$file") or die "$P: Can't open ${file}\n"; | 249 | open(FILE, "<$file") or die "$P: Can't open ${file}\n"; |
| 247 | my $text = do { local($/) ; <FILE> }; | 250 | my $text = do { local($/) ; <FILE> }; |
| 248 | foreach my $line (keys %keyword_hash) { | 251 | close(FILE); |
| 249 | if ($text =~ m/$keyword_hash{$line}/x) { | 252 | if ($keywords) { |
| 250 | push(@keyword_tvi, $line); | 253 | foreach my $line (keys %keyword_hash) { |
| 254 | if ($text =~ m/$keyword_hash{$line}/x) { | ||
| 255 | push(@keyword_tvi, $line); | ||
| 256 | } | ||
| 251 | } | 257 | } |
| 252 | } | 258 | } |
| 253 | close(FILE); | 259 | if ($file_emails) { |
| 260 | my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g; | ||
| 261 | push(@file_emails, clean_file_emails(@poss_addr)); | ||
| 262 | } | ||
| 254 | } | 263 | } |
| 255 | } else { | 264 | } else { |
| 256 | my $file_cnt = @files; | 265 | my $file_cnt = @files; |
| @@ -285,6 +294,8 @@ foreach my $file (@ARGV) { | |||
| 285 | } | 294 | } |
| 286 | } | 295 | } |
| 287 | 296 | ||
| 297 | @file_emails = uniq(@file_emails); | ||
| 298 | |||
| 288 | my @email_to = (); | 299 | my @email_to = (); |
| 289 | my @list_to = (); | 300 | my @list_to = (); |
| 290 | my @scm = (); | 301 | my @scm = (); |
| @@ -377,6 +388,14 @@ if ($email) { | |||
| 377 | } | 388 | } |
| 378 | } | 389 | } |
| 379 | } | 390 | } |
| 391 | |||
| 392 | foreach my $email (@file_emails) { | ||
| 393 | my ($name, $address) = parse_email($email); | ||
| 394 | |||
| 395 | my $tmp_email = format_email($name, $address, $email_usename); | ||
| 396 | push_email_address($tmp_email, ''); | ||
| 397 | add_role($tmp_email, 'in file'); | ||
| 398 | } | ||
| 380 | } | 399 | } |
| 381 | 400 | ||
| 382 | if ($email || $email_list) { | 401 | if ($email || $email_list) { |
| @@ -453,6 +472,7 @@ MAINTAINER field selection options: | |||
| 453 | --remove-duplicates => minimize duplicate email names/addresses | 472 | --remove-duplicates => minimize duplicate email names/addresses |
| 454 | --roles => show roles (status:subsystem, git-signer, list, etc...) | 473 | --roles => show roles (status:subsystem, git-signer, list, etc...) |
| 455 | --rolestats => show roles and statistics (commits/total_commits, %) | 474 | --rolestats => show roles and statistics (commits/total_commits, %) |
| 475 | --file-emails => add email addresses found in -f file (default: 0 (off)) | ||
| 456 | --scm => print SCM tree(s) if any | 476 | --scm => print SCM tree(s) if any |
| 457 | --status => print status if any | 477 | --status => print status if any |
| 458 | --subsystem => print subsystem name if any | 478 | --subsystem => print subsystem name if any |
| @@ -811,7 +831,9 @@ sub add_role { | |||
| 811 | foreach my $entry (@email_to) { | 831 | foreach my $entry (@email_to) { |
| 812 | if ($email_remove_duplicates) { | 832 | if ($email_remove_duplicates) { |
| 813 | my ($entry_name, $entry_address) = parse_email($entry->[0]); | 833 | my ($entry_name, $entry_address) = parse_email($entry->[0]); |
| 814 | if ($name eq $entry_name || $address eq $entry_address) { | 834 | if (($name eq $entry_name || $address eq $entry_address) |
| 835 | && ($role eq "" || !($entry->[1] =~ m/$role/)) | ||
| 836 | ) { | ||
| 815 | if ($entry->[1] eq "") { | 837 | if ($entry->[1] eq "") { |
| 816 | $entry->[1] = "$role"; | 838 | $entry->[1] = "$role"; |
| 817 | } else { | 839 | } else { |
| @@ -819,7 +841,9 @@ sub add_role { | |||
| 819 | } | 841 | } |
| 820 | } | 842 | } |
| 821 | } else { | 843 | } else { |
| 822 | if ($email eq $entry->[0]) { | 844 | if ($email eq $entry->[0] |
| 845 | && ($role eq "" || !($entry->[1] =~ m/$role/)) | ||
| 846 | ) { | ||
| 823 | if ($entry->[1] eq "") { | 847 | if ($entry->[1] eq "") { |
| 824 | $entry->[1] = "$role"; | 848 | $entry->[1] = "$role"; |
| 825 | } else { | 849 | } else { |
| @@ -1099,6 +1123,51 @@ sub sort_and_uniq { | |||
| 1099 | return @parms; | 1123 | return @parms; |
| 1100 | } | 1124 | } |
| 1101 | 1125 | ||
| 1126 | sub clean_file_emails { | ||
| 1127 | my (@file_emails) = @_; | ||
| 1128 | my @fmt_emails = (); | ||
| 1129 | |||
| 1130 | foreach my $email (@file_emails) { | ||
| 1131 | $email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g; | ||
| 1132 | my ($name, $address) = parse_email($email); | ||
| 1133 | if ($name eq '"[,\.]"') { | ||
| 1134 | $name = ""; | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name); | ||
| 1138 | if (@nw > 2) { | ||
| 1139 | my $first = $nw[@nw - 3]; | ||
| 1140 | my $middle = $nw[@nw - 2]; | ||
| 1141 | my $last = $nw[@nw - 1]; | ||
| 1142 | |||
| 1143 | if (((length($first) == 1 && $first =~ m/[A-Za-z]/) || | ||
| 1144 | (length($first) == 2 && substr($first, -1) eq ".")) || | ||
| 1145 | (length($middle) == 1 || | ||
| 1146 | (length($middle) == 2 && substr($middle, -1) eq "."))) { | ||
| 1147 | $name = "$first $middle $last"; | ||
| 1148 | } else { | ||
| 1149 | $name = "$middle $last"; | ||
| 1150 | } | ||
| 1151 | } | ||
| 1152 | |||
| 1153 | if (substr($name, -1) =~ /[,\.]/) { | ||
| 1154 | $name = substr($name, 0, length($name) - 1); | ||
| 1155 | } elsif (substr($name, -2) =~ /[,\.]"/) { | ||
| 1156 | $name = substr($name, 0, length($name) - 2) . '"'; | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | if (substr($name, 0, 1) =~ /[,\.]/) { | ||
| 1160 | $name = substr($name, 1, length($name) - 1); | ||
| 1161 | } elsif (substr($name, 0, 2) =~ /"[,\.]/) { | ||
| 1162 | $name = '"' . substr($name, 2, length($name) - 2); | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | my $fmt_email = format_email($name, $address, $email_usename); | ||
| 1166 | push(@fmt_emails, $fmt_email); | ||
| 1167 | } | ||
| 1168 | return @fmt_emails; | ||
| 1169 | } | ||
| 1170 | |||
| 1102 | sub merge_email { | 1171 | sub merge_email { |
| 1103 | my @lines; | 1172 | my @lines; |
| 1104 | my %saw; | 1173 | my %saw; |
