aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc/base.c
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2006-06-26 03:25:55 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 12:58:25 -0400
commit99f895518368252ba862cc15ce4eb98ebbe1bec6 (patch)
treea9dcc01963221d1fd6a7e357b95d361ebfe91c6d /fs/proc/base.c
parent8578cea7509cbdec25b31d08b48a92fcc3b1a9e3 (diff)
[PATCH] proc: don't lock task_structs indefinitely
Every inode in /proc holds a reference to a struct task_struct. If a directory or file is opened and remains open after the the task exits this pinning continues. With 8K stacks on a 32bit machine the amount pinned per file descriptor is about 10K. Normally I would figure a reasonable per user process limit is about 100 processes. With 80 processes, with a 1000 file descriptors each I can trigger the 00M killer on a 32bit kernel, because I have pinned about 800MB of useless data. This patch replaces the struct task_struct pointer with a pointer to a struct task_ref which has a struct task_struct pointer. The so the pinning of dead tasks does not happen. The code now has to contend with the fact that the task may now exit at any time. Which is a little but not muh more complicated. With this change it takes about 1000 processes each opening up 1000 file descriptors before I can trigger the OOM killer. Much better. [mlp@google.com: task_mmu small fixes] Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: Paul Jackson <pj@sgi.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Albert Cahalan <acahalan@gmail.com> Signed-off-by: Prasanna Meda <mlp@google.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/proc/base.c')
-rw-r--r--fs/proc/base.c355
1 files changed, 253 insertions, 102 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 20746e12440..489810abc72 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -307,12 +307,15 @@ static struct pid_entry tid_attr_stuff[] = {
307 307
308static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 308static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
309{ 309{
310 struct task_struct *task = proc_task(inode); 310 struct task_struct *task = get_proc_task(inode);
311 struct files_struct *files; 311 struct files_struct *files = NULL;
312 struct file *file; 312 struct file *file;
313 int fd = proc_fd(inode); 313 int fd = proc_fd(inode);
314 314
315 files = get_files_struct(task); 315 if (task) {
316 files = get_files_struct(task);
317 put_task_struct(task);
318 }
316 if (files) { 319 if (files) {
317 /* 320 /*
318 * We are not taking a ref to the file structure, so we must 321 * We are not taking a ref to the file structure, so we must
@@ -344,10 +347,29 @@ static struct fs_struct *get_fs_struct(struct task_struct *task)
344 return fs; 347 return fs;
345} 348}
346 349
350static int get_nr_threads(struct task_struct *tsk)
351{
352 /* Must be called with the rcu_read_lock held */
353 unsigned long flags;
354 int count = 0;
355
356 if (lock_task_sighand(tsk, &flags)) {
357 count = atomic_read(&tsk->signal->count);
358 unlock_task_sighand(tsk, &flags);
359 }
360 return count;
361}
362
347static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 363static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
348{ 364{
349 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 365 struct task_struct *task = get_proc_task(inode);
366 struct fs_struct *fs = NULL;
350 int result = -ENOENT; 367 int result = -ENOENT;
368
369 if (task) {
370 fs = get_fs_struct(task);
371 put_task_struct(task);
372 }
351 if (fs) { 373 if (fs) {
352 read_lock(&fs->lock); 374 read_lock(&fs->lock);
353 *mnt = mntget(fs->pwdmnt); 375 *mnt = mntget(fs->pwdmnt);
@@ -361,8 +383,14 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs
361 383
362static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 384static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
363{ 385{
364 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 386 struct task_struct *task = get_proc_task(inode);
387 struct fs_struct *fs = NULL;
365 int result = -ENOENT; 388 int result = -ENOENT;
389
390 if (task) {
391 fs = get_fs_struct(task);
392 put_task_struct(task);
393 }
366 if (fs) { 394 if (fs) {
367 read_lock(&fs->lock); 395 read_lock(&fs->lock);
368 *mnt = mntget(fs->rootmnt); 396 *mnt = mntget(fs->rootmnt);
@@ -550,16 +578,19 @@ struct proc_mounts {
550 578
551static int mounts_open(struct inode *inode, struct file *file) 579static int mounts_open(struct inode *inode, struct file *file)
552{ 580{
553 struct task_struct *task = proc_task(inode); 581 struct task_struct *task = get_proc_task(inode);
554 struct namespace *namespace; 582 struct namespace *namespace = NULL;
555 struct proc_mounts *p; 583 struct proc_mounts *p;
556 int ret = -EINVAL; 584 int ret = -EINVAL;
557 585
558 task_lock(task); 586 if (task) {
559 namespace = task->namespace; 587 task_lock(task);
560 if (namespace) 588 namespace = task->namespace;
561 get_namespace(namespace); 589 if (namespace)
562 task_unlock(task); 590 get_namespace(namespace);
591 task_unlock(task);
592 put_task_struct(task);
593 }
563 594
564 if (namespace) { 595 if (namespace) {
565 ret = -ENOMEM; 596 ret = -ENOMEM;
@@ -616,17 +647,21 @@ static struct file_operations proc_mounts_operations = {
616extern struct seq_operations mountstats_op; 647extern struct seq_operations mountstats_op;
617static int mountstats_open(struct inode *inode, struct file *file) 648static int mountstats_open(struct inode *inode, struct file *file)
618{ 649{
619 struct task_struct *task = proc_task(inode);
620 int ret = seq_open(file, &mountstats_op); 650 int ret = seq_open(file, &mountstats_op);
621 651
622 if (!ret) { 652 if (!ret) {
623 struct seq_file *m = file->private_data; 653 struct seq_file *m = file->private_data;
624 struct namespace *namespace; 654 struct namespace *namespace = NULL;
625 task_lock(task); 655 struct task_struct *task = get_proc_task(inode);
626 namespace = task->namespace; 656
627 if (namespace) 657 if (task) {
628 get_namespace(namespace); 658 task_lock(task);
629 task_unlock(task); 659 namespace = task->namespace;
660 if (namespace)
661 get_namespace(namespace);
662 task_unlock(task);
663 put_task_struct(task);
664 }
630 665
631 if (namespace) 666 if (namespace)
632 m->private = namespace; 667 m->private = namespace;
@@ -653,18 +688,27 @@ static ssize_t proc_info_read(struct file * file, char __user * buf,
653 struct inode * inode = file->f_dentry->d_inode; 688 struct inode * inode = file->f_dentry->d_inode;
654 unsigned long page; 689 unsigned long page;
655 ssize_t length; 690 ssize_t length;
656 struct task_struct *task = proc_task(inode); 691 struct task_struct *task = get_proc_task(inode);
692
693 length = -ESRCH;
694 if (!task)
695 goto out_no_task;
657 696
658 if (count > PROC_BLOCK_SIZE) 697 if (count > PROC_BLOCK_SIZE)
659 count = PROC_BLOCK_SIZE; 698 count = PROC_BLOCK_SIZE;
699
700 length = -ENOMEM;
660 if (!(page = __get_free_page(GFP_KERNEL))) 701 if (!(page = __get_free_page(GFP_KERNEL)))
661 return -ENOMEM; 702 goto out;
662 703
663 length = PROC_I(inode)->op.proc_read(task, (char*)page); 704 length = PROC_I(inode)->op.proc_read(task, (char*)page);
664 705
665 if (length >= 0) 706 if (length >= 0)
666 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 707 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
667 free_page(page); 708 free_page(page);
709out:
710 put_task_struct(task);
711out_no_task:
668 return length; 712 return length;
669} 713}
670 714
@@ -681,12 +725,15 @@ static int mem_open(struct inode* inode, struct file* file)
681static ssize_t mem_read(struct file * file, char __user * buf, 725static ssize_t mem_read(struct file * file, char __user * buf,
682 size_t count, loff_t *ppos) 726 size_t count, loff_t *ppos)
683{ 727{
684 struct task_struct *task = proc_task(file->f_dentry->d_inode); 728 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
685 char *page; 729 char *page;
686 unsigned long src = *ppos; 730 unsigned long src = *ppos;
687 int ret = -ESRCH; 731 int ret = -ESRCH;
688 struct mm_struct *mm; 732 struct mm_struct *mm;
689 733
734 if (!task)
735 goto out_no_task;
736
690 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 737 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
691 goto out; 738 goto out;
692 739
@@ -736,6 +783,8 @@ out_put:
736out_free: 783out_free:
737 free_page((unsigned long) page); 784 free_page((unsigned long) page);
738out: 785out:
786 put_task_struct(task);
787out_no_task:
739 return ret; 788 return ret;
740} 789}
741 790
@@ -748,15 +797,20 @@ static ssize_t mem_write(struct file * file, const char * buf,
748{ 797{
749 int copied = 0; 798 int copied = 0;
750 char *page; 799 char *page;
751 struct task_struct *task = proc_task(file->f_dentry->d_inode); 800 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
752 unsigned long dst = *ppos; 801 unsigned long dst = *ppos;
753 802
803 copied = -ESRCH;
804 if (!task)
805 goto out_no_task;
806
754 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 807 if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
755 return -ESRCH; 808 goto out;
756 809
810 copied = -ENOMEM;
757 page = (char *)__get_free_page(GFP_USER); 811 page = (char *)__get_free_page(GFP_USER);
758 if (!page) 812 if (!page)
759 return -ENOMEM; 813 goto out;
760 814
761 while (count > 0) { 815 while (count > 0) {
762 int this_len, retval; 816 int this_len, retval;
@@ -779,6 +833,9 @@ static ssize_t mem_write(struct file * file, const char * buf,
779 } 833 }
780 *ppos = dst; 834 *ppos = dst;
781 free_page((unsigned long) page); 835 free_page((unsigned long) page);
836out:
837 put_task_struct(task);
838out_no_task:
782 return copied; 839 return copied;
783} 840}
784#endif 841#endif
@@ -809,12 +866,17 @@ static struct file_operations proc_mem_operations = {
809static ssize_t oom_adjust_read(struct file *file, char __user *buf, 866static ssize_t oom_adjust_read(struct file *file, char __user *buf,
810 size_t count, loff_t *ppos) 867 size_t count, loff_t *ppos)
811{ 868{
812 struct task_struct *task = proc_task(file->f_dentry->d_inode); 869 struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
813 char buffer[PROC_NUMBUF]; 870 char buffer[PROC_NUMBUF];
814 size_t len; 871 size_t len;
815 int oom_adjust = task->oomkilladj; 872 int oom_adjust;
816 loff_t __ppos = *ppos; 873 loff_t __ppos = *ppos;
817 874
875 if (!task)
876 return -ESRCH;
877 oom_adjust = task->oomkilladj;
878 put_task_struct(task);
879
818 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 880 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
819 if (__ppos >= len) 881 if (__ppos >= len)
820 return 0; 882 return 0;
@@ -829,7 +891,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
829static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 891static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
830 size_t count, loff_t *ppos) 892 size_t count, loff_t *ppos)
831{ 893{
832 struct task_struct *task = proc_task(file->f_dentry->d_inode); 894 struct task_struct *task;
833 char buffer[PROC_NUMBUF], *end; 895 char buffer[PROC_NUMBUF], *end;
834 int oom_adjust; 896 int oom_adjust;
835 897
@@ -845,7 +907,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
845 return -EINVAL; 907 return -EINVAL;
846 if (*end == '\n') 908 if (*end == '\n')
847 end++; 909 end++;
910 task = get_proc_task(file->f_dentry->d_inode);
911 if (!task)
912 return -ESRCH;
848 task->oomkilladj = oom_adjust; 913 task->oomkilladj = oom_adjust;
914 put_task_struct(task);
849 if (end - buffer == 0) 915 if (end - buffer == 0)
850 return -EIO; 916 return -EIO;
851 return end - buffer; 917 return end - buffer;
@@ -862,12 +928,15 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
862 size_t count, loff_t *ppos) 928 size_t count, loff_t *ppos)
863{ 929{
864 struct inode * inode = file->f_dentry->d_inode; 930 struct inode * inode = file->f_dentry->d_inode;
865 struct task_struct *task = proc_task(inode); 931 struct task_struct *task = get_proc_task(inode);
866 ssize_t length; 932 ssize_t length;
867 char tmpbuf[TMPBUFLEN]; 933 char tmpbuf[TMPBUFLEN];
868 934
935 if (!task)
936 return -ESRCH;
869 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 937 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
870 audit_get_loginuid(task->audit_context)); 938 audit_get_loginuid(task->audit_context));
939 put_task_struct(task);
871 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 940 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
872} 941}
873 942
@@ -877,13 +946,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
877 struct inode * inode = file->f_dentry->d_inode; 946 struct inode * inode = file->f_dentry->d_inode;
878 char *page, *tmp; 947 char *page, *tmp;
879 ssize_t length; 948 ssize_t length;
880 struct task_struct *task = proc_task(inode);
881 uid_t loginuid; 949 uid_t loginuid;
882 950
883 if (!capable(CAP_AUDIT_CONTROL)) 951 if (!capable(CAP_AUDIT_CONTROL))
884 return -EPERM; 952 return -EPERM;
885 953
886 if (current != task) 954 if (current != proc_tref(inode)->task)
887 return -EPERM; 955 return -EPERM;
888 956
889 if (count >= PAGE_SIZE) 957 if (count >= PAGE_SIZE)
@@ -907,7 +975,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
907 goto out_free_page; 975 goto out_free_page;
908 976
909 } 977 }
910 length = audit_set_loginuid(task, loginuid); 978 length = audit_set_loginuid(current, loginuid);
911 if (likely(length == 0)) 979 if (likely(length == 0))
912 length = count; 980 length = count;
913 981
@@ -926,13 +994,16 @@ static struct file_operations proc_loginuid_operations = {
926static ssize_t seccomp_read(struct file *file, char __user *buf, 994static ssize_t seccomp_read(struct file *file, char __user *buf,
927 size_t count, loff_t *ppos) 995 size_t count, loff_t *ppos)
928{ 996{
929 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 997 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
930 char __buf[20]; 998 char __buf[20];
931 loff_t __ppos = *ppos; 999 loff_t __ppos = *ppos;
932 size_t len; 1000 size_t len;
933 1001
1002 if (!tsk)
1003 return -ESRCH;
934 /* no need to print the trailing zero, so use only len */ 1004 /* no need to print the trailing zero, so use only len */
935 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 1005 len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
1006 put_task_struct(tsk);
936 if (__ppos >= len) 1007 if (__ppos >= len)
937 return 0; 1008 return 0;
938 if (count > len - __ppos) 1009 if (count > len - __ppos)
@@ -946,29 +1017,43 @@ static ssize_t seccomp_read(struct file *file, char __user *buf,
946static ssize_t seccomp_write(struct file *file, const char __user *buf, 1017static ssize_t seccomp_write(struct file *file, const char __user *buf,
947 size_t count, loff_t *ppos) 1018 size_t count, loff_t *ppos)
948{ 1019{
949 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 1020 struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
950 char __buf[20], *end; 1021 char __buf[20], *end;
951 unsigned int seccomp_mode; 1022 unsigned int seccomp_mode;
1023 ssize_t result;
1024
1025 result = -ESRCH;
1026 if (!tsk)
1027 goto out_no_task;
952 1028
953 /* can set it only once to be even more secure */ 1029 /* can set it only once to be even more secure */
1030 result = -EPERM;
954 if (unlikely(tsk->seccomp.mode)) 1031 if (unlikely(tsk->seccomp.mode))
955 return -EPERM; 1032 goto out;
956 1033
1034 result = -EFAULT;
957 memset(__buf, 0, sizeof(__buf)); 1035 memset(__buf, 0, sizeof(__buf));
958 count = min(count, sizeof(__buf) - 1); 1036 count = min(count, sizeof(__buf) - 1);
959 if (copy_from_user(__buf, buf, count)) 1037 if (copy_from_user(__buf, buf, count))
960 return -EFAULT; 1038 goto out;
1039
961 seccomp_mode = simple_strtoul(__buf, &end, 0); 1040 seccomp_mode = simple_strtoul(__buf, &end, 0);
962 if (*end == '\n') 1041 if (*end == '\n')
963 end++; 1042 end++;
1043 result = -EINVAL;
964 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 1044 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
965 tsk->seccomp.mode = seccomp_mode; 1045 tsk->seccomp.mode = seccomp_mode;
966 set_tsk_thread_flag(tsk, TIF_SECCOMP); 1046 set_tsk_thread_flag(tsk, TIF_SECCOMP);
967 } else 1047 } else
968 return -EINVAL; 1048 goto out;
1049 result = -EIO;
969 if (unlikely(!(end - __buf))) 1050 if (unlikely(!(end - __buf)))
970 return -EIO; 1051 goto out;
971 return end - __buf; 1052 result = end - __buf;
1053out:
1054 put_task_struct(tsk);
1055out_no_task:
1056 return result;
972} 1057}
973 1058
974static struct file_operations proc_seccomp_operations = { 1059static struct file_operations proc_seccomp_operations = {
@@ -995,7 +1080,7 @@ static int proc_check_dentry_visible(struct inode *inode,
995 /* See if the the two tasks share a commone set of 1080 /* See if the the two tasks share a commone set of
996 * file descriptors. If so everything is visible. 1081 * file descriptors. If so everything is visible.
997 */ 1082 */
998 task = proc_task(inode); 1083 task = get_proc_task(inode);
999 if (!task) 1084 if (!task)
1000 goto out; 1085 goto out;
1001 files = get_files_struct(current); 1086 files = get_files_struct(current);
@@ -1006,6 +1091,7 @@ static int proc_check_dentry_visible(struct inode *inode,
1006 put_files_struct(task_files); 1091 put_files_struct(task_files);
1007 if (files) 1092 if (files)
1008 put_files_struct(files); 1093 put_files_struct(files);
1094 put_task_struct(task);
1009 if (!error) 1095 if (!error)
1010 goto out; 1096 goto out;
1011 1097
@@ -1106,7 +1192,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1106{ 1192{
1107 struct dentry *dentry = filp->f_dentry; 1193 struct dentry *dentry = filp->f_dentry;
1108 struct inode *inode = dentry->d_inode; 1194 struct inode *inode = dentry->d_inode;
1109 struct task_struct *p = proc_task(inode); 1195 struct task_struct *p = get_proc_task(inode);
1110 unsigned int fd, tid, ino; 1196 unsigned int fd, tid, ino;
1111 int retval; 1197 int retval;
1112 char buf[PROC_NUMBUF]; 1198 char buf[PROC_NUMBUF];
@@ -1114,8 +1200,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1114 struct fdtable *fdt; 1200 struct fdtable *fdt;
1115 1201
1116 retval = -ENOENT; 1202 retval = -ENOENT;
1117 if (!pid_alive(p)) 1203 if (!p)
1118 goto out; 1204 goto out_no_task;
1119 retval = 0; 1205 retval = 0;
1120 tid = p->pid; 1206 tid = p->pid;
1121 1207
@@ -1164,6 +1250,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1164 put_files_struct(files); 1250 put_files_struct(files);
1165 } 1251 }
1166out: 1252out:
1253 put_task_struct(p);
1254out_no_task:
1167 return retval; 1255 return retval;
1168} 1256}
1169 1257
@@ -1175,16 +1263,18 @@ static int proc_pident_readdir(struct file *filp,
1175 int pid; 1263 int pid;
1176 struct dentry *dentry = filp->f_dentry; 1264 struct dentry *dentry = filp->f_dentry;
1177 struct inode *inode = dentry->d_inode; 1265 struct inode *inode = dentry->d_inode;
1266 struct task_struct *task = get_proc_task(inode);
1178 struct pid_entry *p; 1267 struct pid_entry *p;
1179 ino_t ino; 1268 ino_t ino;
1180 int ret; 1269 int ret;
1181 1270
1182 ret = -ENOENT; 1271 ret = -ENOENT;
1183 if (!pid_alive(proc_task(inode))) 1272 if (!task)
1184 goto out; 1273 goto out;
1185 1274
1186 ret = 0; 1275 ret = 0;
1187 pid = proc_task(inode)->pid; 1276 pid = task->pid;
1277 put_task_struct(task);
1188 i = filp->f_pos; 1278 i = filp->f_pos;
1189 switch (i) { 1279 switch (i) {
1190 case 0: 1280 case 0:
@@ -1270,14 +1360,13 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1270 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1360 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1271 inode->i_ino = fake_ino(task->pid, ino); 1361 inode->i_ino = fake_ino(task->pid, ino);
1272 1362
1273 if (!pid_alive(task))
1274 goto out_unlock;
1275
1276 /* 1363 /*
1277 * grab the reference to task. 1364 * grab the reference to task.
1278 */ 1365 */
1279 get_task_struct(task); 1366 ei->tref = tref_get_by_task(task);
1280 ei->task = task; 1367 if (!tref_task(ei->tref))
1368 goto out_unlock;
1369
1281 inode->i_uid = 0; 1370 inode->i_uid = 0;
1282 inode->i_gid = 0; 1371 inode->i_gid = 0;
1283 if (task_dumpable(task)) { 1372 if (task_dumpable(task)) {
@@ -1303,13 +1392,21 @@ out_unlock:
1303 * 1392 *
1304 * Rewrite the inode's ownerships here because the owning task may have 1393 * Rewrite the inode's ownerships here because the owning task may have
1305 * performed a setuid(), etc. 1394 * performed a setuid(), etc.
1395 *
1396 * Before the /proc/pid/status file was created the only way to read
1397 * the effective uid of a /process was to stat /proc/pid. Reading
1398 * /proc/pid/status is slow enough that procps and other packages
1399 * kept stating /proc/pid. To keep the rules in /proc simple I have
1400 * made this apply to all per process world readable and executable
1401 * directories.
1306 */ 1402 */
1307static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1403static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1308{ 1404{
1309 struct inode *inode = dentry->d_inode; 1405 struct inode *inode = dentry->d_inode;
1310 struct task_struct *task = proc_task(inode); 1406 struct task_struct *task = get_proc_task(inode);
1311 if (pid_alive(task)) { 1407 if (task) {
1312 if (task_dumpable(task)) { 1408 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1409 task_dumpable(task)) {
1313 inode->i_uid = task->euid; 1410 inode->i_uid = task->euid;
1314 inode->i_gid = task->egid; 1411 inode->i_gid = task->egid;
1315 } else { 1412 } else {
@@ -1317,37 +1414,63 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1317 inode->i_gid = 0; 1414 inode->i_gid = 0;
1318 } 1415 }
1319 security_task_to_inode(task, inode); 1416 security_task_to_inode(task, inode);
1417 put_task_struct(task);
1320 return 1; 1418 return 1;
1321 } 1419 }
1322 d_drop(dentry); 1420 d_drop(dentry);
1323 return 0; 1421 return 0;
1324} 1422}
1325 1423
1424static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1425{
1426 struct inode *inode = dentry->d_inode;
1427 struct task_struct *task;
1428 generic_fillattr(inode, stat);
1429
1430 rcu_read_lock();
1431 stat->uid = 0;
1432 stat->gid = 0;
1433 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1434 if (task) {
1435 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1436 task_dumpable(task)) {
1437 stat->uid = task->euid;
1438 stat->gid = task->egid;
1439 }
1440 }
1441 rcu_read_unlock();
1442 return 0;
1443}
1444
1326static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1445static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1327{ 1446{
1328 struct inode *inode = dentry->d_inode; 1447 struct inode *inode = dentry->d_inode;
1329 struct task_struct *task = proc_task(inode); 1448 struct task_struct *task = get_proc_task(inode);
1330 int fd = proc_fd(inode); 1449 int fd = proc_fd(inode);
1331 struct files_struct *files; 1450 struct files_struct *files;
1332 1451
1333 files = get_files_struct(task); 1452 if (task) {
1334 if (files) { 1453 files = get_files_struct(task);
1335 rcu_read_lock(); 1454 if (files) {
1336 if (fcheck_files(files, fd)) { 1455 rcu_read_lock();
1456 if (fcheck_files(files, fd)) {
1457 rcu_read_unlock();
1458 put_files_struct(files);
1459 if (task_dumpable(task)) {
1460 inode->i_uid = task->euid;
1461 inode->i_gid = task->egid;
1462 } else {
1463 inode->i_uid = 0;
1464 inode->i_gid = 0;
1465 }
1466 security_task_to_inode(task, inode);
1467 put_task_struct(task);
1468 return 1;
1469 }
1337 rcu_read_unlock(); 1470 rcu_read_unlock();
1338 put_files_struct(files); 1471 put_files_struct(files);
1339 if (task_dumpable(task)) {
1340 inode->i_uid = task->euid;
1341 inode->i_gid = task->egid;
1342 } else {
1343 inode->i_uid = 0;
1344 inode->i_gid = 0;
1345 }
1346 security_task_to_inode(task, inode);
1347 return 1;
1348 } 1472 }
1349 rcu_read_unlock(); 1473 put_task_struct(task);
1350 put_files_struct(files);
1351 } 1474 }
1352 d_drop(dentry); 1475 d_drop(dentry);
1353 return 0; 1476 return 0;
@@ -1359,7 +1482,7 @@ static int pid_delete_dentry(struct dentry * dentry)
1359 * If so, then don't put the dentry on the lru list, 1482 * If so, then don't put the dentry on the lru list,
1360 * kill it immediately. 1483 * kill it immediately.
1361 */ 1484 */
1362 return !pid_alive(proc_task(dentry->d_inode)); 1485 return !proc_tref(dentry->d_inode)->task;
1363} 1486}
1364 1487
1365static struct dentry_operations tid_fd_dentry_operations = 1488static struct dentry_operations tid_fd_dentry_operations =
@@ -1401,7 +1524,7 @@ out:
1401/* SMP-safe */ 1524/* SMP-safe */
1402static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1525static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1403{ 1526{
1404 struct task_struct *task = proc_task(dir); 1527 struct task_struct *task = get_proc_task(dir);
1405 unsigned fd = name_to_int(dentry); 1528 unsigned fd = name_to_int(dentry);
1406 struct dentry *result = ERR_PTR(-ENOENT); 1529 struct dentry *result = ERR_PTR(-ENOENT);
1407 struct file * file; 1530 struct file * file;
@@ -1409,10 +1532,10 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1409 struct inode *inode; 1532 struct inode *inode;
1410 struct proc_inode *ei; 1533 struct proc_inode *ei;
1411 1534
1535 if (!task)
1536 goto out_no_task;
1412 if (fd == ~0U) 1537 if (fd == ~0U)
1413 goto out; 1538 goto out;
1414 if (!pid_alive(task))
1415 goto out;
1416 1539
1417 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1540 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1418 if (!inode) 1541 if (!inode)
@@ -1447,6 +1570,8 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1447 if (tid_fd_revalidate(dentry, NULL)) 1570 if (tid_fd_revalidate(dentry, NULL))
1448 result = NULL; 1571 result = NULL;
1449out: 1572out:
1573 put_task_struct(task);
1574out_no_task:
1450 return result; 1575 return result;
1451 1576
1452out_unlock2: 1577out_unlock2:
@@ -1490,12 +1615,17 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1490 struct inode * inode = file->f_dentry->d_inode; 1615 struct inode * inode = file->f_dentry->d_inode;
1491 unsigned long page; 1616 unsigned long page;
1492 ssize_t length; 1617 ssize_t length;
1493 struct task_struct *task = proc_task(inode); 1618 struct task_struct *task = get_proc_task(inode);
1619
1620 length = -ESRCH;
1621 if (!task)
1622 goto out_no_task;
1494 1623
1495 if (count > PAGE_SIZE) 1624 if (count > PAGE_SIZE)
1496 count = PAGE_SIZE; 1625 count = PAGE_SIZE;
1626 length = -ENOMEM;
1497 if (!(page = __get_free_page(GFP_KERNEL))) 1627 if (!(page = __get_free_page(GFP_KERNEL)))
1498 return -ENOMEM; 1628 goto out;
1499 1629
1500 length = security_getprocattr(task, 1630 length = security_getprocattr(task,
1501 (char*)file->f_dentry->d_name.name, 1631 (char*)file->f_dentry->d_name.name,
@@ -1503,6 +1633,9 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1503 if (length >= 0) 1633 if (length >= 0)
1504 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1634 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
1505 free_page(page); 1635 free_page(page);
1636out:
1637 put_task_struct(task);
1638out_no_task:
1506 return length; 1639 return length;
1507} 1640}
1508 1641
@@ -1512,26 +1645,36 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1512 struct inode * inode = file->f_dentry->d_inode; 1645 struct inode * inode = file->f_dentry->d_inode;
1513 char *page; 1646 char *page;
1514 ssize_t length; 1647 ssize_t length;
1515 struct task_struct *task = proc_task(inode); 1648 struct task_struct *task = get_proc_task(inode);
1516 1649
1650 length = -ESRCH;
1651 if (!task)
1652 goto out_no_task;
1517 if (count > PAGE_SIZE) 1653 if (count > PAGE_SIZE)
1518 count = PAGE_SIZE; 1654 count = PAGE_SIZE;
1519 if (*ppos != 0) { 1655
1520 /* No partial writes. */ 1656 /* No partial writes. */
1521 return -EINVAL; 1657 length = -EINVAL;
1522 } 1658 if (*ppos != 0)
1659 goto out;
1660
1661 length = -ENOMEM;
1523 page = (char*)__get_free_page(GFP_USER); 1662 page = (char*)__get_free_page(GFP_USER);
1524 if (!page) 1663 if (!page)
1525 return -ENOMEM; 1664 goto out;
1665
1526 length = -EFAULT; 1666 length = -EFAULT;
1527 if (copy_from_user(page, buf, count)) 1667 if (copy_from_user(page, buf, count))
1528 goto out; 1668 goto out_free;
1529 1669
1530 length = security_setprocattr(task, 1670 length = security_setprocattr(task,
1531 (char*)file->f_dentry->d_name.name, 1671 (char*)file->f_dentry->d_name.name,
1532 (void*)page, count); 1672 (void*)page, count);
1533out: 1673out_free:
1534 free_page((unsigned long) page); 1674 free_page((unsigned long) page);
1675out:
1676 put_task_struct(task);
1677out_no_task:
1535 return length; 1678 return length;
1536} 1679}
1537 1680
@@ -1553,15 +1696,15 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1553{ 1696{
1554 struct inode *inode; 1697 struct inode *inode;
1555 struct dentry *error; 1698 struct dentry *error;
1556 struct task_struct *task = proc_task(dir); 1699 struct task_struct *task = get_proc_task(dir);
1557 struct pid_entry *p; 1700 struct pid_entry *p;
1558 struct proc_inode *ei; 1701 struct proc_inode *ei;
1559 1702
1560 error = ERR_PTR(-ENOENT); 1703 error = ERR_PTR(-ENOENT);
1561 inode = NULL; 1704 inode = NULL;
1562 1705
1563 if (!pid_alive(task)) 1706 if (!task)
1564 goto out; 1707 goto out_no_task;
1565 1708
1566 for (p = ents; p->name; p++) { 1709 for (p = ents; p->name; p++) {
1567 if (p->len != dentry->d_name.len) 1710 if (p->len != dentry->d_name.len)
@@ -1748,6 +1891,8 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1748 if (pid_revalidate(dentry, NULL)) 1891 if (pid_revalidate(dentry, NULL))
1749 error = NULL; 1892 error = NULL;
1750out: 1893out:
1894 put_task_struct(task);
1895out_no_task:
1751 return error; 1896 return error;
1752} 1897}
1753 1898
@@ -1771,10 +1916,12 @@ static struct file_operations proc_tid_base_operations = {
1771 1916
1772static struct inode_operations proc_tgid_base_inode_operations = { 1917static struct inode_operations proc_tgid_base_inode_operations = {
1773 .lookup = proc_tgid_base_lookup, 1918 .lookup = proc_tgid_base_lookup,
1919 .getattr = pid_getattr,
1774}; 1920};
1775 1921
1776static struct inode_operations proc_tid_base_inode_operations = { 1922static struct inode_operations proc_tid_base_inode_operations = {
1777 .lookup = proc_tid_base_lookup, 1923 .lookup = proc_tid_base_lookup,
1924 .getattr = pid_getattr,
1778}; 1925};
1779 1926
1780#ifdef CONFIG_SECURITY 1927#ifdef CONFIG_SECURITY
@@ -1816,10 +1963,12 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1816 1963
1817static struct inode_operations proc_tgid_attr_inode_operations = { 1964static struct inode_operations proc_tgid_attr_inode_operations = {
1818 .lookup = proc_tgid_attr_lookup, 1965 .lookup = proc_tgid_attr_lookup,
1966 .getattr = pid_getattr,
1819}; 1967};
1820 1968
1821static struct inode_operations proc_tid_attr_inode_operations = { 1969static struct inode_operations proc_tid_attr_inode_operations = {
1822 .lookup = proc_tid_attr_lookup, 1970 .lookup = proc_tid_attr_lookup,
1971 .getattr = pid_getattr,
1823}; 1972};
1824#endif 1973#endif
1825 1974
@@ -1981,10 +2130,13 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
1981{ 2130{
1982 struct dentry *result = ERR_PTR(-ENOENT); 2131 struct dentry *result = ERR_PTR(-ENOENT);
1983 struct task_struct *task; 2132 struct task_struct *task;
1984 struct task_struct *leader = proc_task(dir); 2133 struct task_struct *leader = get_proc_task(dir);
1985 struct inode *inode; 2134 struct inode *inode;
1986 unsigned tid; 2135 unsigned tid;
1987 2136
2137 if (!leader)
2138 goto out_no_task;
2139
1988 tid = name_to_int(dentry); 2140 tid = name_to_int(dentry);
1989 if (tid == ~0U) 2141 if (tid == ~0U)
1990 goto out; 2142 goto out;
@@ -2024,6 +2176,8 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
2024out_drop_task: 2176out_drop_task:
2025 put_task_struct(task); 2177 put_task_struct(task);
2026out: 2178out:
2179 put_task_struct(leader);
2180out_no_task:
2027 return result; 2181 return result;
2028} 2182}
2029 2183
@@ -2163,12 +2317,7 @@ static struct task_struct *first_tid(struct task_struct *leader, int tid, int nr
2163 2317
2164 /* If nr exceeds the number of threads there is nothing todo */ 2318 /* If nr exceeds the number of threads there is nothing todo */
2165 if (nr) { 2319 if (nr) {
2166 int threads = 0; 2320 if (nr >= get_nr_threads(leader))
2167 task_lock(leader);
2168 if (leader->signal)
2169 threads = atomic_read(&leader->signal->count);
2170 task_unlock(leader);
2171 if (nr >= threads)
2172 goto done; 2321 goto done;
2173 } 2322 }
2174 2323
@@ -2218,15 +2367,15 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
2218 char buf[PROC_NUMBUF]; 2367 char buf[PROC_NUMBUF];
2219 struct dentry *dentry = filp->f_dentry; 2368 struct dentry *dentry = filp->f_dentry;
2220 struct inode *inode = dentry->d_inode; 2369 struct inode *inode = dentry->d_inode;
2221 struct task_struct *leader = proc_task(inode); 2370 struct task_struct *leader = get_proc_task(inode);
2222 struct task_struct *task; 2371 struct task_struct *task;
2223 int retval = -ENOENT; 2372 int retval = -ENOENT;
2224 ino_t ino; 2373 ino_t ino;
2225 int tid; 2374 int tid;
2226 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2375 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
2227 2376
2228 if (!pid_alive(leader)) 2377 if (!leader)
2229 goto out; 2378 goto out_no_task;
2230 retval = 0; 2379 retval = 0;
2231 2380
2232 switch (pos) { 2381 switch (pos) {
@@ -2266,20 +2415,22 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
2266 } 2415 }
2267out: 2416out:
2268 filp->f_pos = pos; 2417 filp->f_pos = pos;
2418 put_task_struct(leader);
2419out_no_task:
2269 return retval; 2420 return retval;
2270} 2421}
2271 2422
2272static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 2423static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
2273{ 2424{
2274 struct inode *inode = dentry->d_inode; 2425 struct inode *inode = dentry->d_inode;
2275 struct task_struct *p = proc_task(inode); 2426 struct task_struct *p = get_proc_task(inode);
2276 generic_fillattr(inode, stat); 2427 generic_fillattr(inode, stat);
2277 2428
2278 if (pid_alive(p)) { 2429 if (p) {
2279 task_lock(p); 2430 rcu_read_lock();
2280 if (p->signal) 2431 stat->nlink += get_nr_threads(p);
2281 stat->nlink += atomic_read(&p->signal->count); 2432 rcu_read_unlock();
2282 task_unlock(p); 2433 put_task_struct(p);
2283 } 2434 }
2284 2435
2285 return 0; 2436 return 0;