aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoe Perches <joe@perches.com>2010-03-05 16:43:00 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-03-06 14:26:33 -0500
commit03372dbbe618bfcd02f9c8cdbfe78e97e3aad43b (patch)
tree8b848249ec5d45154fb05c43aa0c7bd34f13d09e
parentcea83886dde49fd7524e9f4a246dd5dff4ad236a (diff)
scripts/get_maintainer.pl: add --file-emails, find embedded email addresses
Add an imperfect option to search a source file for email addresses. New option: --file-emails or --fe email addresses in files are freeform text and are nearly impossible to parse. Still, might as well try to do a somewhat acceptable job of finding them. This code should find all addresses that are in the form addr@domain.tld The code assumes that up to 3 alphabetic words along with dashes, commas, and periods that preceed the email address are a name. If 3 words are found for the name, and one of the first two words are a single letter and period, or just a single letter then the 3 words are use as name otherwise the last 2 words are used. Some variants that are shown correctly: John Smith <jksmith@domain.org> Random J. Developer <rjd@tld.com> Random J. Developer (rjd@tld.com) J. Random Developer rjd@tld.com Variants that are shown nominally correctly: Written by First Last (funny-addr@somecompany.com) is shown as: First Last <funny-addr@somecompany.com> Variants that are shown incorrectly: Some Really Long Name <srln@foo.bar> MontaVista Software, Inc. <source@mvista.com> are returned as: Long Name <srln@foo.bar> "Software, Inc" <source@mvista.com> --roles and --rolestats show "(in file)" for matches. For instance: Without -file-emails: $ ./scripts/get_maintainer.pl -f -nogit -roles net/core/netpoll.c David S. Miller <davem@davemloft.net> (maintainer:NETWORKING [GENERAL]) linux-kernel@vger.kernel.org (open list) With -fe: $ ./scripts/get_maintainer.pl -f -fe -nogit -roles net/core/netpoll.c David S. Miller <davem@davemloft.net> (maintainer:NETWORKING [GENERAL]) Matt Mackall <mpm@selenic.com> (in file) Ingo Molnar <mingo@redhat.com> (in file) linux-kernel@vger.kernel.org (open list) netdev@vger.kernel.org (open list:NETWORKING [GENERAL]) The number of email addresses in the file in not limited. Neither is the number of returned email addresses. Signed-off-by: Joe Perches <joe@perches.com> Cc: Matt Mackall <mpm@selenic.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rwxr-xr-xscripts/get_maintainer.pl83
1 files changed, 76 insertions, 7 deletions
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 2f3230db7ffb..bff2390652c2 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -41,6 +41,7 @@ my $web = 0;
41my $subsystem = 0; 41my $subsystem = 0;
42my $status = 0; 42my $status = 0;
43my $keywords = 1; 43my $keywords = 1;
44my $file_emails = 0;
44my $from_filename = 0; 45my $from_filename = 0;
45my $pattern_depth = 0; 46my $pattern_depth = 0;
46my $version = 0; 47my $version = 0;
@@ -120,6 +121,7 @@ if (!GetOptions(
120 'web!' => \$web, 121 'web!' => \$web,
121 'pattern-depth=i' => \$pattern_depth, 122 'pattern-depth=i' => \$pattern_depth,
122 'k|keywords!' => \$keywords, 123 'k|keywords!' => \$keywords,
124 'fe|file-emails!' => \$file_emails,
123 'f|file' => \$from_filename, 125 'f|file' => \$from_filename,
124 'v|version' => \$version, 126 'v|version' => \$version,
125 'h|help' => \$help, 127 'h|help' => \$help,
@@ -232,6 +234,7 @@ if ($email_remove_duplicates) {
232my @files = (); 234my @files = ();
233my @range = (); 235my @range = ();
234my @keyword_tvi = (); 236my @keyword_tvi = ();
237my @file_emails = ();
235 238
236foreach my $file (@ARGV) { 239foreach my $file (@ARGV) {
237 ##if $file is a directory and it lacks a trailing slash, add one 240 ##if $file is a directory and it lacks a trailing slash, add one
@@ -242,15 +245,21 @@ foreach my $file (@ARGV) {
242 } 245 }
243 if ($from_filename) { 246 if ($from_filename) {
244 push(@files, $file); 247 push(@files, $file);
245 if (-f $file && $keywords) { 248 if (-f $file && ($keywords || $file_emails)) {
246 open(FILE, "<$file") or die "$P: Can't open ${file}\n"; 249 open(FILE, "<$file") or die "$P: Can't open ${file}\n";
247 my $text = do { local($/) ; <FILE> }; 250 my $text = do { local($/) ; <FILE> };
248 foreach my $line (keys %keyword_hash) { 251 close(FILE);
249 if ($text =~ m/$keyword_hash{$line}/x) { 252 if ($keywords) {
250 push(@keyword_tvi, $line); 253 foreach my $line (keys %keyword_hash) {
254 if ($text =~ m/$keyword_hash{$line}/x) {
255 push(@keyword_tvi, $line);
256 }
251 } 257 }
252 } 258 }
253 close(FILE); 259 if ($file_emails) {
260 my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
261 push(@file_emails, clean_file_emails(@poss_addr));
262 }
254 } 263 }
255 } else { 264 } else {
256 my $file_cnt = @files; 265 my $file_cnt = @files;
@@ -285,6 +294,8 @@ foreach my $file (@ARGV) {
285 } 294 }
286} 295}
287 296
297@file_emails = uniq(@file_emails);
298
288my @email_to = (); 299my @email_to = ();
289my @list_to = (); 300my @list_to = ();
290my @scm = (); 301my @scm = ();
@@ -377,6 +388,14 @@ if ($email) {
377 } 388 }
378 } 389 }
379 } 390 }
391
392 foreach my $email (@file_emails) {
393 my ($name, $address) = parse_email($email);
394
395 my $tmp_email = format_email($name, $address, $email_usename);
396 push_email_address($tmp_email, '');
397 add_role($tmp_email, 'in file');
398 }
380} 399}
381 400
382if ($email || $email_list) { 401if ($email || $email_list) {
@@ -453,6 +472,7 @@ MAINTAINER field selection options:
453 --remove-duplicates => minimize duplicate email names/addresses 472 --remove-duplicates => minimize duplicate email names/addresses
454 --roles => show roles (status:subsystem, git-signer, list, etc...) 473 --roles => show roles (status:subsystem, git-signer, list, etc...)
455 --rolestats => show roles and statistics (commits/total_commits, %) 474 --rolestats => show roles and statistics (commits/total_commits, %)
475 --file-emails => add email addresses found in -f file (default: 0 (off))
456 --scm => print SCM tree(s) if any 476 --scm => print SCM tree(s) if any
457 --status => print status if any 477 --status => print status if any
458 --subsystem => print subsystem name if any 478 --subsystem => print subsystem name if any
@@ -811,7 +831,9 @@ sub add_role {
811 foreach my $entry (@email_to) { 831 foreach my $entry (@email_to) {
812 if ($email_remove_duplicates) { 832 if ($email_remove_duplicates) {
813 my ($entry_name, $entry_address) = parse_email($entry->[0]); 833 my ($entry_name, $entry_address) = parse_email($entry->[0]);
814 if ($name eq $entry_name || $address eq $entry_address) { 834 if (($name eq $entry_name || $address eq $entry_address)
835 && ($role eq "" || !($entry->[1] =~ m/$role/))
836 ) {
815 if ($entry->[1] eq "") { 837 if ($entry->[1] eq "") {
816 $entry->[1] = "$role"; 838 $entry->[1] = "$role";
817 } else { 839 } else {
@@ -819,7 +841,9 @@ sub add_role {
819 } 841 }
820 } 842 }
821 } else { 843 } else {
822 if ($email eq $entry->[0]) { 844 if ($email eq $entry->[0]
845 && ($role eq "" || !($entry->[1] =~ m/$role/))
846 ) {
823 if ($entry->[1] eq "") { 847 if ($entry->[1] eq "") {
824 $entry->[1] = "$role"; 848 $entry->[1] = "$role";
825 } else { 849 } else {
@@ -1099,6 +1123,51 @@ sub sort_and_uniq {
1099 return @parms; 1123 return @parms;
1100} 1124}
1101 1125
1126sub clean_file_emails {
1127 my (@file_emails) = @_;
1128 my @fmt_emails = ();
1129
1130 foreach my $email (@file_emails) {
1131 $email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
1132 my ($name, $address) = parse_email($email);
1133 if ($name eq '"[,\.]"') {
1134 $name = "";
1135 }
1136
1137 my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
1138 if (@nw > 2) {
1139 my $first = $nw[@nw - 3];
1140 my $middle = $nw[@nw - 2];
1141 my $last = $nw[@nw - 1];
1142
1143 if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
1144 (length($first) == 2 && substr($first, -1) eq ".")) ||
1145 (length($middle) == 1 ||
1146 (length($middle) == 2 && substr($middle, -1) eq "."))) {
1147 $name = "$first $middle $last";
1148 } else {
1149 $name = "$middle $last";
1150 }
1151 }
1152
1153 if (substr($name, -1) =~ /[,\.]/) {
1154 $name = substr($name, 0, length($name) - 1);
1155 } elsif (substr($name, -2) =~ /[,\.]"/) {
1156 $name = substr($name, 0, length($name) - 2) . '"';
1157 }
1158
1159 if (substr($name, 0, 1) =~ /[,\.]/) {
1160 $name = substr($name, 1, length($name) - 1);
1161 } elsif (substr($name, 0, 2) =~ /"[,\.]/) {
1162 $name = '"' . substr($name, 2, length($name) - 2);
1163 }
1164
1165 my $fmt_email = format_email($name, $address, $email_usename);
1166 push(@fmt_emails, $fmt_email);
1167 }
1168 return @fmt_emails;
1169}
1170
1102sub merge_email { 1171sub merge_email {
1103 my @lines; 1172 my @lines;
1104 my %saw; 1173 my %saw;