diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2006-06-26 03:25:55 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-26 12:58:25 -0400 |
commit | 99f895518368252ba862cc15ce4eb98ebbe1bec6 (patch) | |
tree | a9dcc01963221d1fd6a7e357b95d361ebfe91c6d | |
parent | 8578cea7509cbdec25b31d08b48a92fcc3b1a9e3 (diff) |
[PATCH] proc: don't lock task_structs indefinitely
Every inode in /proc holds a reference to a struct task_struct. If a
directory or file is opened and remains open after the the task exits this
pinning continues. With 8K stacks on a 32bit machine the amount pinned per
file descriptor is about 10K.
Normally I would figure a reasonable per user process limit is about 100
processes. With 80 processes, with a 1000 file descriptors each I can trigger
the 00M killer on a 32bit kernel, because I have pinned about 800MB of useless
data.
This patch replaces the struct task_struct pointer with a pointer to a struct
task_ref which has a struct task_struct pointer. The so the pinning of dead
tasks does not happen.
The code now has to contend with the fact that the task may now exit at any
time. Which is a little but not muh more complicated.
With this change it takes about 1000 processes each opening up 1000 file
descriptors before I can trigger the OOM killer. Much better.
[mlp@google.com: task_mmu small fixes]
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Paul Jackson <pj@sgi.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Albert Cahalan <acahalan@gmail.com>
Signed-off-by: Prasanna Meda <mlp@google.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/proc/base.c | 355 | ||||
-rw-r--r-- | fs/proc/inode.c | 9 | ||||
-rw-r--r-- | fs/proc/internal.h | 15 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 72 | ||||
-rw-r--r-- | include/linux/proc_fs.h | 8 | ||||
-rw-r--r-- | kernel/cpuset.c | 27 | ||||
-rw-r--r-- | mm/mempolicy.c | 6 |
7 files changed, 349 insertions, 143 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 20746e124409..489810abc72d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -307,12 +307,15 @@ static struct pid_entry tid_attr_stuff[] = { | |||
307 | 307 | ||
308 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 308 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) |
309 | { | 309 | { |
310 | struct task_struct *task = proc_task(inode); | 310 | struct task_struct *task = get_proc_task(inode); |
311 | struct files_struct *files; | 311 | struct files_struct *files = NULL; |
312 | struct file *file; | 312 | struct file *file; |
313 | int fd = proc_fd(inode); | 313 | int fd = proc_fd(inode); |
314 | 314 | ||
315 | files = get_files_struct(task); | 315 | if (task) { |
316 | files = get_files_struct(task); | ||
317 | put_task_struct(task); | ||
318 | } | ||
316 | if (files) { | 319 | if (files) { |
317 | /* | 320 | /* |
318 | * We are not taking a ref to the file structure, so we must | 321 | * We are not taking a ref to the file structure, so we must |
@@ -344,10 +347,29 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) | |||
344 | return fs; | 347 | return fs; |
345 | } | 348 | } |
346 | 349 | ||
350 | static int get_nr_threads(struct task_struct *tsk) | ||
351 | { | ||
352 | /* Must be called with the rcu_read_lock held */ | ||
353 | unsigned long flags; | ||
354 | int count = 0; | ||
355 | |||
356 | if (lock_task_sighand(tsk, &flags)) { | ||
357 | count = atomic_read(&tsk->signal->count); | ||
358 | unlock_task_sighand(tsk, &flags); | ||
359 | } | ||
360 | return count; | ||
361 | } | ||
362 | |||
347 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 363 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) |
348 | { | 364 | { |
349 | struct fs_struct *fs = get_fs_struct(proc_task(inode)); | 365 | struct task_struct *task = get_proc_task(inode); |
366 | struct fs_struct *fs = NULL; | ||
350 | int result = -ENOENT; | 367 | int result = -ENOENT; |
368 | |||
369 | if (task) { | ||
370 | fs = get_fs_struct(task); | ||
371 | put_task_struct(task); | ||
372 | } | ||
351 | if (fs) { | 373 | if (fs) { |
352 | read_lock(&fs->lock); | 374 | read_lock(&fs->lock); |
353 | *mnt = mntget(fs->pwdmnt); | 375 | *mnt = mntget(fs->pwdmnt); |
@@ -361,8 +383,14 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs | |||
361 | 383 | ||
362 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 384 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) |
363 | { | 385 | { |
364 | struct fs_struct *fs = get_fs_struct(proc_task(inode)); | 386 | struct task_struct *task = get_proc_task(inode); |
387 | struct fs_struct *fs = NULL; | ||
365 | int result = -ENOENT; | 388 | int result = -ENOENT; |
389 | |||
390 | if (task) { | ||
391 | fs = get_fs_struct(task); | ||
392 | put_task_struct(task); | ||
393 | } | ||
366 | if (fs) { | 394 | if (fs) { |
367 | read_lock(&fs->lock); | 395 | read_lock(&fs->lock); |
368 | *mnt = mntget(fs->rootmnt); | 396 | *mnt = mntget(fs->rootmnt); |
@@ -550,16 +578,19 @@ struct proc_mounts { | |||
550 | 578 | ||
551 | static int mounts_open(struct inode *inode, struct file *file) | 579 | static int mounts_open(struct inode *inode, struct file *file) |
552 | { | 580 | { |
553 | struct task_struct *task = proc_task(inode); | 581 | struct task_struct *task = get_proc_task(inode); |
554 | struct namespace *namespace; | 582 | struct namespace *namespace = NULL; |
555 | struct proc_mounts *p; | 583 | struct proc_mounts *p; |
556 | int ret = -EINVAL; | 584 | int ret = -EINVAL; |
557 | 585 | ||
558 | task_lock(task); | 586 | if (task) { |
559 | namespace = task->namespace; | 587 | task_lock(task); |
560 | if (namespace) | 588 | namespace = task->namespace; |
561 | get_namespace(namespace); | 589 | if (namespace) |
562 | task_unlock(task); | 590 | get_namespace(namespace); |
591 | task_unlock(task); | ||
592 | put_task_struct(task); | ||
593 | } | ||
563 | 594 | ||
564 | if (namespace) { | 595 | if (namespace) { |
565 | ret = -ENOMEM; | 596 | ret = -ENOMEM; |
@@ -616,17 +647,21 @@ static struct file_operations proc_mounts_operations = { | |||
616 | extern struct seq_operations mountstats_op; | 647 | extern struct seq_operations mountstats_op; |
617 | static int mountstats_open(struct inode *inode, struct file *file) | 648 | static int mountstats_open(struct inode *inode, struct file *file) |
618 | { | 649 | { |
619 | struct task_struct *task = proc_task(inode); | ||
620 | int ret = seq_open(file, &mountstats_op); | 650 | int ret = seq_open(file, &mountstats_op); |
621 | 651 | ||
622 | if (!ret) { | 652 | if (!ret) { |
623 | struct seq_file *m = file->private_data; | 653 | struct seq_file *m = file->private_data; |
624 | struct namespace *namespace; | 654 | struct namespace *namespace = NULL; |
625 | task_lock(task); | 655 | struct task_struct *task = get_proc_task(inode); |
626 | namespace = task->namespace; | 656 | |
627 | if (namespace) | 657 | if (task) { |
628 | get_namespace(namespace); | 658 | task_lock(task); |
629 | task_unlock(task); | 659 | namespace = task->namespace; |
660 | if (namespace) | ||
661 | get_namespace(namespace); | ||
662 | task_unlock(task); | ||
663 | put_task_struct(task); | ||
664 | } | ||
630 | 665 | ||
631 | if (namespace) | 666 | if (namespace) |
632 | m->private = namespace; | 667 | m->private = namespace; |
@@ -653,18 +688,27 @@ static ssize_t proc_info_read(struct file * file, char __user * buf, | |||
653 | struct inode * inode = file->f_dentry->d_inode; | 688 | struct inode * inode = file->f_dentry->d_inode; |
654 | unsigned long page; | 689 | unsigned long page; |
655 | ssize_t length; | 690 | ssize_t length; |
656 | struct task_struct *task = proc_task(inode); | 691 | struct task_struct *task = get_proc_task(inode); |
692 | |||
693 | length = -ESRCH; | ||
694 | if (!task) | ||
695 | goto out_no_task; | ||
657 | 696 | ||
658 | if (count > PROC_BLOCK_SIZE) | 697 | if (count > PROC_BLOCK_SIZE) |
659 | count = PROC_BLOCK_SIZE; | 698 | count = PROC_BLOCK_SIZE; |
699 | |||
700 | length = -ENOMEM; | ||
660 | if (!(page = __get_free_page(GFP_KERNEL))) | 701 | if (!(page = __get_free_page(GFP_KERNEL))) |
661 | return -ENOMEM; | 702 | goto out; |
662 | 703 | ||
663 | length = PROC_I(inode)->op.proc_read(task, (char*)page); | 704 | length = PROC_I(inode)->op.proc_read(task, (char*)page); |
664 | 705 | ||
665 | if (length >= 0) | 706 | if (length >= 0) |
666 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | 707 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); |
667 | free_page(page); | 708 | free_page(page); |
709 | out: | ||
710 | put_task_struct(task); | ||
711 | out_no_task: | ||
668 | return length; | 712 | return length; |
669 | } | 713 | } |
670 | 714 | ||
@@ -681,12 +725,15 @@ static int mem_open(struct inode* inode, struct file* file) | |||
681 | static ssize_t mem_read(struct file * file, char __user * buf, | 725 | static ssize_t mem_read(struct file * file, char __user * buf, |
682 | size_t count, loff_t *ppos) | 726 | size_t count, loff_t *ppos) |
683 | { | 727 | { |
684 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 728 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
685 | char *page; | 729 | char *page; |
686 | unsigned long src = *ppos; | 730 | unsigned long src = *ppos; |
687 | int ret = -ESRCH; | 731 | int ret = -ESRCH; |
688 | struct mm_struct *mm; | 732 | struct mm_struct *mm; |
689 | 733 | ||
734 | if (!task) | ||
735 | goto out_no_task; | ||
736 | |||
690 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) | 737 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) |
691 | goto out; | 738 | goto out; |
692 | 739 | ||
@@ -736,6 +783,8 @@ out_put: | |||
736 | out_free: | 783 | out_free: |
737 | free_page((unsigned long) page); | 784 | free_page((unsigned long) page); |
738 | out: | 785 | out: |
786 | put_task_struct(task); | ||
787 | out_no_task: | ||
739 | return ret; | 788 | return ret; |
740 | } | 789 | } |
741 | 790 | ||
@@ -748,15 +797,20 @@ static ssize_t mem_write(struct file * file, const char * buf, | |||
748 | { | 797 | { |
749 | int copied = 0; | 798 | int copied = 0; |
750 | char *page; | 799 | char *page; |
751 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 800 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
752 | unsigned long dst = *ppos; | 801 | unsigned long dst = *ppos; |
753 | 802 | ||
803 | copied = -ESRCH; | ||
804 | if (!task) | ||
805 | goto out_no_task; | ||
806 | |||
754 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) | 807 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) |
755 | return -ESRCH; | 808 | goto out; |
756 | 809 | ||
810 | copied = -ENOMEM; | ||
757 | page = (char *)__get_free_page(GFP_USER); | 811 | page = (char *)__get_free_page(GFP_USER); |
758 | if (!page) | 812 | if (!page) |
759 | return -ENOMEM; | 813 | goto out; |
760 | 814 | ||
761 | while (count > 0) { | 815 | while (count > 0) { |
762 | int this_len, retval; | 816 | int this_len, retval; |
@@ -779,6 +833,9 @@ static ssize_t mem_write(struct file * file, const char * buf, | |||
779 | } | 833 | } |
780 | *ppos = dst; | 834 | *ppos = dst; |
781 | free_page((unsigned long) page); | 835 | free_page((unsigned long) page); |
836 | out: | ||
837 | put_task_struct(task); | ||
838 | out_no_task: | ||
782 | return copied; | 839 | return copied; |
783 | } | 840 | } |
784 | #endif | 841 | #endif |
@@ -809,12 +866,17 @@ static struct file_operations proc_mem_operations = { | |||
809 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, | 866 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, |
810 | size_t count, loff_t *ppos) | 867 | size_t count, loff_t *ppos) |
811 | { | 868 | { |
812 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 869 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
813 | char buffer[PROC_NUMBUF]; | 870 | char buffer[PROC_NUMBUF]; |
814 | size_t len; | 871 | size_t len; |
815 | int oom_adjust = task->oomkilladj; | 872 | int oom_adjust; |
816 | loff_t __ppos = *ppos; | 873 | loff_t __ppos = *ppos; |
817 | 874 | ||
875 | if (!task) | ||
876 | return -ESRCH; | ||
877 | oom_adjust = task->oomkilladj; | ||
878 | put_task_struct(task); | ||
879 | |||
818 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | 880 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); |
819 | if (__ppos >= len) | 881 | if (__ppos >= len) |
820 | return 0; | 882 | return 0; |
@@ -829,7 +891,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, | |||
829 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | 891 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, |
830 | size_t count, loff_t *ppos) | 892 | size_t count, loff_t *ppos) |
831 | { | 893 | { |
832 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 894 | struct task_struct *task; |
833 | char buffer[PROC_NUMBUF], *end; | 895 | char buffer[PROC_NUMBUF], *end; |
834 | int oom_adjust; | 896 | int oom_adjust; |
835 | 897 | ||
@@ -845,7 +907,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
845 | return -EINVAL; | 907 | return -EINVAL; |
846 | if (*end == '\n') | 908 | if (*end == '\n') |
847 | end++; | 909 | end++; |
910 | task = get_proc_task(file->f_dentry->d_inode); | ||
911 | if (!task) | ||
912 | return -ESRCH; | ||
848 | task->oomkilladj = oom_adjust; | 913 | task->oomkilladj = oom_adjust; |
914 | put_task_struct(task); | ||
849 | if (end - buffer == 0) | 915 | if (end - buffer == 0) |
850 | return -EIO; | 916 | return -EIO; |
851 | return end - buffer; | 917 | return end - buffer; |
@@ -862,12 +928,15 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | |||
862 | size_t count, loff_t *ppos) | 928 | size_t count, loff_t *ppos) |
863 | { | 929 | { |
864 | struct inode * inode = file->f_dentry->d_inode; | 930 | struct inode * inode = file->f_dentry->d_inode; |
865 | struct task_struct *task = proc_task(inode); | 931 | struct task_struct *task = get_proc_task(inode); |
866 | ssize_t length; | 932 | ssize_t length; |
867 | char tmpbuf[TMPBUFLEN]; | 933 | char tmpbuf[TMPBUFLEN]; |
868 | 934 | ||
935 | if (!task) | ||
936 | return -ESRCH; | ||
869 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", | 937 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", |
870 | audit_get_loginuid(task->audit_context)); | 938 | audit_get_loginuid(task->audit_context)); |
939 | put_task_struct(task); | ||
871 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); | 940 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); |
872 | } | 941 | } |
873 | 942 | ||
@@ -877,13 +946,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
877 | struct inode * inode = file->f_dentry->d_inode; | 946 | struct inode * inode = file->f_dentry->d_inode; |
878 | char *page, *tmp; | 947 | char *page, *tmp; |
879 | ssize_t length; | 948 | ssize_t length; |
880 | struct task_struct *task = proc_task(inode); | ||
881 | uid_t loginuid; | 949 | uid_t loginuid; |
882 | 950 | ||
883 | if (!capable(CAP_AUDIT_CONTROL)) | 951 | if (!capable(CAP_AUDIT_CONTROL)) |
884 | return -EPERM; | 952 | return -EPERM; |
885 | 953 | ||
886 | if (current != task) | 954 | if (current != proc_tref(inode)->task) |
887 | return -EPERM; | 955 | return -EPERM; |
888 | 956 | ||
889 | if (count >= PAGE_SIZE) | 957 | if (count >= PAGE_SIZE) |
@@ -907,7 +975,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
907 | goto out_free_page; | 975 | goto out_free_page; |
908 | 976 | ||
909 | } | 977 | } |
910 | length = audit_set_loginuid(task, loginuid); | 978 | length = audit_set_loginuid(current, loginuid); |
911 | if (likely(length == 0)) | 979 | if (likely(length == 0)) |
912 | length = count; | 980 | length = count; |
913 | 981 | ||
@@ -926,13 +994,16 @@ static struct file_operations proc_loginuid_operations = { | |||
926 | static ssize_t seccomp_read(struct file *file, char __user *buf, | 994 | static ssize_t seccomp_read(struct file *file, char __user *buf, |
927 | size_t count, loff_t *ppos) | 995 | size_t count, loff_t *ppos) |
928 | { | 996 | { |
929 | struct task_struct *tsk = proc_task(file->f_dentry->d_inode); | 997 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); |
930 | char __buf[20]; | 998 | char __buf[20]; |
931 | loff_t __ppos = *ppos; | 999 | loff_t __ppos = *ppos; |
932 | size_t len; | 1000 | size_t len; |
933 | 1001 | ||
1002 | if (!tsk) | ||
1003 | return -ESRCH; | ||
934 | /* no need to print the trailing zero, so use only len */ | 1004 | /* no need to print the trailing zero, so use only len */ |
935 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); | 1005 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); |
1006 | put_task_struct(tsk); | ||
936 | if (__ppos >= len) | 1007 | if (__ppos >= len) |
937 | return 0; | 1008 | return 0; |
938 | if (count > len - __ppos) | 1009 | if (count > len - __ppos) |
@@ -946,29 +1017,43 @@ static ssize_t seccomp_read(struct file *file, char __user *buf, | |||
946 | static ssize_t seccomp_write(struct file *file, const char __user *buf, | 1017 | static ssize_t seccomp_write(struct file *file, const char __user *buf, |
947 | size_t count, loff_t *ppos) | 1018 | size_t count, loff_t *ppos) |
948 | { | 1019 | { |
949 | struct task_struct *tsk = proc_task(file->f_dentry->d_inode); | 1020 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); |
950 | char __buf[20], *end; | 1021 | char __buf[20], *end; |
951 | unsigned int seccomp_mode; | 1022 | unsigned int seccomp_mode; |
1023 | ssize_t result; | ||
1024 | |||
1025 | result = -ESRCH; | ||
1026 | if (!tsk) | ||
1027 | goto out_no_task; | ||
952 | 1028 | ||
953 | /* can set it only once to be even more secure */ | 1029 | /* can set it only once to be even more secure */ |
1030 | result = -EPERM; | ||
954 | if (unlikely(tsk->seccomp.mode)) | 1031 | if (unlikely(tsk->seccomp.mode)) |
955 | return -EPERM; | 1032 | goto out; |
956 | 1033 | ||
1034 | result = -EFAULT; | ||
957 | memset(__buf, 0, sizeof(__buf)); | 1035 | memset(__buf, 0, sizeof(__buf)); |
958 | count = min(count, sizeof(__buf) - 1); | 1036 | count = min(count, sizeof(__buf) - 1); |
959 | if (copy_from_user(__buf, buf, count)) | 1037 | if (copy_from_user(__buf, buf, count)) |
960 | return -EFAULT; | 1038 | goto out; |
1039 | |||
961 | seccomp_mode = simple_strtoul(__buf, &end, 0); | 1040 | seccomp_mode = simple_strtoul(__buf, &end, 0); |
962 | if (*end == '\n') | 1041 | if (*end == '\n') |
963 | end++; | 1042 | end++; |
1043 | result = -EINVAL; | ||
964 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | 1044 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { |
965 | tsk->seccomp.mode = seccomp_mode; | 1045 | tsk->seccomp.mode = seccomp_mode; |
966 | set_tsk_thread_flag(tsk, TIF_SECCOMP); | 1046 | set_tsk_thread_flag(tsk, TIF_SECCOMP); |
967 | } else | 1047 | } else |
968 | return -EINVAL; | 1048 | goto out; |
1049 | result = -EIO; | ||
969 | if (unlikely(!(end - __buf))) | 1050 | if (unlikely(!(end - __buf))) |
970 | return -EIO; | 1051 | goto out; |
971 | return end - __buf; | 1052 | result = end - __buf; |
1053 | out: | ||
1054 | put_task_struct(tsk); | ||
1055 | out_no_task: | ||
1056 | return result; | ||
972 | } | 1057 | } |
973 | 1058 | ||
974 | static struct file_operations proc_seccomp_operations = { | 1059 | static struct file_operations proc_seccomp_operations = { |
@@ -995,7 +1080,7 @@ static int proc_check_dentry_visible(struct inode *inode, | |||
995 | /* See if the the two tasks share a commone set of | 1080 | /* See if the the two tasks share a commone set of |
996 | * file descriptors. If so everything is visible. | 1081 | * file descriptors. If so everything is visible. |
997 | */ | 1082 | */ |
998 | task = proc_task(inode); | 1083 | task = get_proc_task(inode); |
999 | if (!task) | 1084 | if (!task) |
1000 | goto out; | 1085 | goto out; |
1001 | files = get_files_struct(current); | 1086 | files = get_files_struct(current); |
@@ -1006,6 +1091,7 @@ static int proc_check_dentry_visible(struct inode *inode, | |||
1006 | put_files_struct(task_files); | 1091 | put_files_struct(task_files); |
1007 | if (files) | 1092 | if (files) |
1008 | put_files_struct(files); | 1093 | put_files_struct(files); |
1094 | put_task_struct(task); | ||
1009 | if (!error) | 1095 | if (!error) |
1010 | goto out; | 1096 | goto out; |
1011 | 1097 | ||
@@ -1106,7 +1192,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1106 | { | 1192 | { |
1107 | struct dentry *dentry = filp->f_dentry; | 1193 | struct dentry *dentry = filp->f_dentry; |
1108 | struct inode *inode = dentry->d_inode; | 1194 | struct inode *inode = dentry->d_inode; |
1109 | struct task_struct *p = proc_task(inode); | 1195 | struct task_struct *p = get_proc_task(inode); |
1110 | unsigned int fd, tid, ino; | 1196 | unsigned int fd, tid, ino; |
1111 | int retval; | 1197 | int retval; |
1112 | char buf[PROC_NUMBUF]; | 1198 | char buf[PROC_NUMBUF]; |
@@ -1114,8 +1200,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1114 | struct fdtable *fdt; | 1200 | struct fdtable *fdt; |
1115 | 1201 | ||
1116 | retval = -ENOENT; | 1202 | retval = -ENOENT; |
1117 | if (!pid_alive(p)) | 1203 | if (!p) |
1118 | goto out; | 1204 | goto out_no_task; |
1119 | retval = 0; | 1205 | retval = 0; |
1120 | tid = p->pid; | 1206 | tid = p->pid; |
1121 | 1207 | ||
@@ -1164,6 +1250,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1164 | put_files_struct(files); | 1250 | put_files_struct(files); |
1165 | } | 1251 | } |
1166 | out: | 1252 | out: |
1253 | put_task_struct(p); | ||
1254 | out_no_task: | ||
1167 | return retval; | 1255 | return retval; |
1168 | } | 1256 | } |
1169 | 1257 | ||
@@ -1175,16 +1263,18 @@ static int proc_pident_readdir(struct file *filp, | |||
1175 | int pid; | 1263 | int pid; |
1176 | struct dentry *dentry = filp->f_dentry; | 1264 | struct dentry *dentry = filp->f_dentry; |
1177 | struct inode *inode = dentry->d_inode; | 1265 | struct inode *inode = dentry->d_inode; |
1266 | struct task_struct *task = get_proc_task(inode); | ||
1178 | struct pid_entry *p; | 1267 | struct pid_entry *p; |
1179 | ino_t ino; | 1268 | ino_t ino; |
1180 | int ret; | 1269 | int ret; |
1181 | 1270 | ||
1182 | ret = -ENOENT; | 1271 | ret = -ENOENT; |
1183 | if (!pid_alive(proc_task(inode))) | 1272 | if (!task) |
1184 | goto out; | 1273 | goto out; |
1185 | 1274 | ||
1186 | ret = 0; | 1275 | ret = 0; |
1187 | pid = proc_task(inode)->pid; | 1276 | pid = task->pid; |
1277 | put_task_struct(task); | ||
1188 | i = filp->f_pos; | 1278 | i = filp->f_pos; |
1189 | switch (i) { | 1279 | switch (i) { |
1190 | case 0: | 1280 | case 0: |
@@ -1270,14 +1360,13 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st | |||
1270 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 1360 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
1271 | inode->i_ino = fake_ino(task->pid, ino); | 1361 | inode->i_ino = fake_ino(task->pid, ino); |
1272 | 1362 | ||
1273 | if (!pid_alive(task)) | ||
1274 | goto out_unlock; | ||
1275 | |||
1276 | /* | 1363 | /* |
1277 | * grab the reference to task. | 1364 | * grab the reference to task. |
1278 | */ | 1365 | */ |
1279 | get_task_struct(task); | 1366 | ei->tref = tref_get_by_task(task); |
1280 | ei->task = task; | 1367 | if (!tref_task(ei->tref)) |
1368 | goto out_unlock; | ||
1369 | |||
1281 | inode->i_uid = 0; | 1370 | inode->i_uid = 0; |
1282 | inode->i_gid = 0; | 1371 | inode->i_gid = 0; |
1283 | if (task_dumpable(task)) { | 1372 | if (task_dumpable(task)) { |
@@ -1303,13 +1392,21 @@ out_unlock: | |||
1303 | * | 1392 | * |
1304 | * Rewrite the inode's ownerships here because the owning task may have | 1393 | * Rewrite the inode's ownerships here because the owning task may have |
1305 | * performed a setuid(), etc. | 1394 | * performed a setuid(), etc. |
1395 | * | ||
1396 | * Before the /proc/pid/status file was created the only way to read | ||
1397 | * the effective uid of a /process was to stat /proc/pid. Reading | ||
1398 | * /proc/pid/status is slow enough that procps and other packages | ||
1399 | * kept stating /proc/pid. To keep the rules in /proc simple I have | ||
1400 | * made this apply to all per process world readable and executable | ||
1401 | * directories. | ||
1306 | */ | 1402 | */ |
1307 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | 1403 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) |
1308 | { | 1404 | { |
1309 | struct inode *inode = dentry->d_inode; | 1405 | struct inode *inode = dentry->d_inode; |
1310 | struct task_struct *task = proc_task(inode); | 1406 | struct task_struct *task = get_proc_task(inode); |
1311 | if (pid_alive(task)) { | 1407 | if (task) { |
1312 | if (task_dumpable(task)) { | 1408 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || |
1409 | task_dumpable(task)) { | ||
1313 | inode->i_uid = task->euid; | 1410 | inode->i_uid = task->euid; |
1314 | inode->i_gid = task->egid; | 1411 | inode->i_gid = task->egid; |
1315 | } else { | 1412 | } else { |
@@ -1317,37 +1414,63 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1317 | inode->i_gid = 0; | 1414 | inode->i_gid = 0; |
1318 | } | 1415 | } |
1319 | security_task_to_inode(task, inode); | 1416 | security_task_to_inode(task, inode); |
1417 | put_task_struct(task); | ||
1320 | return 1; | 1418 | return 1; |
1321 | } | 1419 | } |
1322 | d_drop(dentry); | 1420 | d_drop(dentry); |
1323 | return 0; | 1421 | return 0; |
1324 | } | 1422 | } |
1325 | 1423 | ||
1424 | static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | ||
1425 | { | ||
1426 | struct inode *inode = dentry->d_inode; | ||
1427 | struct task_struct *task; | ||
1428 | generic_fillattr(inode, stat); | ||
1429 | |||
1430 | rcu_read_lock(); | ||
1431 | stat->uid = 0; | ||
1432 | stat->gid = 0; | ||
1433 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
1434 | if (task) { | ||
1435 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | ||
1436 | task_dumpable(task)) { | ||
1437 | stat->uid = task->euid; | ||
1438 | stat->gid = task->egid; | ||
1439 | } | ||
1440 | } | ||
1441 | rcu_read_unlock(); | ||
1442 | return 0; | ||
1443 | } | ||
1444 | |||
1326 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | 1445 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) |
1327 | { | 1446 | { |
1328 | struct inode *inode = dentry->d_inode; | 1447 | struct inode *inode = dentry->d_inode; |
1329 | struct task_struct *task = proc_task(inode); | 1448 | struct task_struct *task = get_proc_task(inode); |
1330 | int fd = proc_fd(inode); | 1449 | int fd = proc_fd(inode); |
1331 | struct files_struct *files; | 1450 | struct files_struct *files; |
1332 | 1451 | ||
1333 | files = get_files_struct(task); | 1452 | if (task) { |
1334 | if (files) { | 1453 | files = get_files_struct(task); |
1335 | rcu_read_lock(); | 1454 | if (files) { |
1336 | if (fcheck_files(files, fd)) { | 1455 | rcu_read_lock(); |
1456 | if (fcheck_files(files, fd)) { | ||
1457 | rcu_read_unlock(); | ||
1458 | put_files_struct(files); | ||
1459 | if (task_dumpable(task)) { | ||
1460 | inode->i_uid = task->euid; | ||
1461 | inode->i_gid = task->egid; | ||
1462 | } else { | ||
1463 | inode->i_uid = 0; | ||
1464 | inode->i_gid = 0; | ||
1465 | } | ||
1466 | security_task_to_inode(task, inode); | ||
1467 | put_task_struct(task); | ||
1468 | return 1; | ||
1469 | } | ||
1337 | rcu_read_unlock(); | 1470 | rcu_read_unlock(); |
1338 | put_files_struct(files); | 1471 | put_files_struct(files); |
1339 | if (task_dumpable(task)) { | ||
1340 | inode->i_uid = task->euid; | ||
1341 | inode->i_gid = task->egid; | ||
1342 | } else { | ||
1343 | inode->i_uid = 0; | ||
1344 | inode->i_gid = 0; | ||
1345 | } | ||
1346 | security_task_to_inode(task, inode); | ||
1347 | return 1; | ||
1348 | } | 1472 | } |
1349 | rcu_read_unlock(); | 1473 | put_task_struct(task); |
1350 | put_files_struct(files); | ||
1351 | } | 1474 | } |
1352 | d_drop(dentry); | 1475 | d_drop(dentry); |
1353 | return 0; | 1476 | return 0; |
@@ -1359,7 +1482,7 @@ static int pid_delete_dentry(struct dentry * dentry) | |||
1359 | * If so, then don't put the dentry on the lru list, | 1482 | * If so, then don't put the dentry on the lru list, |
1360 | * kill it immediately. | 1483 | * kill it immediately. |
1361 | */ | 1484 | */ |
1362 | return !pid_alive(proc_task(dentry->d_inode)); | 1485 | return !proc_tref(dentry->d_inode)->task; |
1363 | } | 1486 | } |
1364 | 1487 | ||
1365 | static struct dentry_operations tid_fd_dentry_operations = | 1488 | static struct dentry_operations tid_fd_dentry_operations = |
@@ -1401,7 +1524,7 @@ out: | |||
1401 | /* SMP-safe */ | 1524 | /* SMP-safe */ |
1402 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) | 1525 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) |
1403 | { | 1526 | { |
1404 | struct task_struct *task = proc_task(dir); | 1527 | struct task_struct *task = get_proc_task(dir); |
1405 | unsigned fd = name_to_int(dentry); | 1528 | unsigned fd = name_to_int(dentry); |
1406 | struct dentry *result = ERR_PTR(-ENOENT); | 1529 | struct dentry *result = ERR_PTR(-ENOENT); |
1407 | struct file * file; | 1530 | struct file * file; |
@@ -1409,10 +1532,10 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1409 | struct inode *inode; | 1532 | struct inode *inode; |
1410 | struct proc_inode *ei; | 1533 | struct proc_inode *ei; |
1411 | 1534 | ||
1535 | if (!task) | ||
1536 | goto out_no_task; | ||
1412 | if (fd == ~0U) | 1537 | if (fd == ~0U) |
1413 | goto out; | 1538 | goto out; |
1414 | if (!pid_alive(task)) | ||
1415 | goto out; | ||
1416 | 1539 | ||
1417 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); | 1540 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); |
1418 | if (!inode) | 1541 | if (!inode) |
@@ -1447,6 +1570,8 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1447 | if (tid_fd_revalidate(dentry, NULL)) | 1570 | if (tid_fd_revalidate(dentry, NULL)) |
1448 | result = NULL; | 1571 | result = NULL; |
1449 | out: | 1572 | out: |
1573 | put_task_struct(task); | ||
1574 | out_no_task: | ||
1450 | return result; | 1575 | return result; |
1451 | 1576 | ||
1452 | out_unlock2: | 1577 | out_unlock2: |
@@ -1490,12 +1615,17 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | |||
1490 | struct inode * inode = file->f_dentry->d_inode; | 1615 | struct inode * inode = file->f_dentry->d_inode; |
1491 | unsigned long page; | 1616 | unsigned long page; |
1492 | ssize_t length; | 1617 | ssize_t length; |
1493 | struct task_struct *task = proc_task(inode); | 1618 | struct task_struct *task = get_proc_task(inode); |
1619 | |||
1620 | length = -ESRCH; | ||
1621 | if (!task) | ||
1622 | goto out_no_task; | ||
1494 | 1623 | ||
1495 | if (count > PAGE_SIZE) | 1624 | if (count > PAGE_SIZE) |
1496 | count = PAGE_SIZE; | 1625 | count = PAGE_SIZE; |
1626 | length = -ENOMEM; | ||
1497 | if (!(page = __get_free_page(GFP_KERNEL))) | 1627 | if (!(page = __get_free_page(GFP_KERNEL))) |
1498 | return -ENOMEM; | 1628 | goto out; |
1499 | 1629 | ||
1500 | length = security_getprocattr(task, | 1630 | length = security_getprocattr(task, |
1501 | (char*)file->f_dentry->d_name.name, | 1631 | (char*)file->f_dentry->d_name.name, |
@@ -1503,6 +1633,9 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | |||
1503 | if (length >= 0) | 1633 | if (length >= 0) |
1504 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | 1634 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); |
1505 | free_page(page); | 1635 | free_page(page); |
1636 | out: | ||
1637 | put_task_struct(task); | ||
1638 | out_no_task: | ||
1506 | return length; | 1639 | return length; |
1507 | } | 1640 | } |
1508 | 1641 | ||
@@ -1512,26 +1645,36 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, | |||
1512 | struct inode * inode = file->f_dentry->d_inode; | 1645 | struct inode * inode = file->f_dentry->d_inode; |
1513 | char *page; | 1646 | char *page; |
1514 | ssize_t length; | 1647 | ssize_t length; |
1515 | struct task_struct *task = proc_task(inode); | 1648 | struct task_struct *task = get_proc_task(inode); |
1516 | 1649 | ||
1650 | length = -ESRCH; | ||
1651 | if (!task) | ||
1652 | goto out_no_task; | ||
1517 | if (count > PAGE_SIZE) | 1653 | if (count > PAGE_SIZE) |
1518 | count = PAGE_SIZE; | 1654 | count = PAGE_SIZE; |
1519 | if (*ppos != 0) { | 1655 | |
1520 | /* No partial writes. */ | 1656 | /* No partial writes. */ |
1521 | return -EINVAL; | 1657 | length = -EINVAL; |
1522 | } | 1658 | if (*ppos != 0) |
1659 | goto out; | ||
1660 | |||
1661 | length = -ENOMEM; | ||
1523 | page = (char*)__get_free_page(GFP_USER); | 1662 | page = (char*)__get_free_page(GFP_USER); |
1524 | if (!page) | 1663 | if (!page) |
1525 | return -ENOMEM; | 1664 | goto out; |
1665 | |||
1526 | length = -EFAULT; | 1666 | length = -EFAULT; |
1527 | if (copy_from_user(page, buf, count)) | 1667 | if (copy_from_user(page, buf, count)) |
1528 | goto out; | 1668 | goto out_free; |
1529 | 1669 | ||
1530 | length = security_setprocattr(task, | 1670 | length = security_setprocattr(task, |
1531 | (char*)file->f_dentry->d_name.name, | 1671 | (char*)file->f_dentry->d_name.name, |
1532 | (void*)page, count); | 1672 | (void*)page, count); |
1533 | out: | 1673 | out_free: |
1534 | free_page((unsigned long) page); | 1674 | free_page((unsigned long) page); |
1675 | out: | ||
1676 | put_task_struct(task); | ||
1677 | out_no_task: | ||
1535 | return length; | 1678 | return length; |
1536 | } | 1679 | } |
1537 | 1680 | ||
@@ -1553,15 +1696,15 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1553 | { | 1696 | { |
1554 | struct inode *inode; | 1697 | struct inode *inode; |
1555 | struct dentry *error; | 1698 | struct dentry *error; |
1556 | struct task_struct *task = proc_task(dir); | 1699 | struct task_struct *task = get_proc_task(dir); |
1557 | struct pid_entry *p; | 1700 | struct pid_entry *p; |
1558 | struct proc_inode *ei; | 1701 | struct proc_inode *ei; |
1559 | 1702 | ||
1560 | error = ERR_PTR(-ENOENT); | 1703 | error = ERR_PTR(-ENOENT); |
1561 | inode = NULL; | 1704 | inode = NULL; |
1562 | 1705 | ||
1563 | if (!pid_alive(task)) | 1706 | if (!task) |
1564 | goto out; | 1707 | goto out_no_task; |
1565 | 1708 | ||
1566 | for (p = ents; p->name; p++) { | 1709 | for (p = ents; p->name; p++) { |
1567 | if (p->len != dentry->d_name.len) | 1710 | if (p->len != dentry->d_name.len) |
@@ -1748,6 +1891,8 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1748 | if (pid_revalidate(dentry, NULL)) | 1891 | if (pid_revalidate(dentry, NULL)) |
1749 | error = NULL; | 1892 | error = NULL; |
1750 | out: | 1893 | out: |
1894 | put_task_struct(task); | ||
1895 | out_no_task: | ||
1751 | return error; | 1896 | return error; |
1752 | } | 1897 | } |
1753 | 1898 | ||
@@ -1771,10 +1916,12 @@ static struct file_operations proc_tid_base_operations = { | |||
1771 | 1916 | ||
1772 | static struct inode_operations proc_tgid_base_inode_operations = { | 1917 | static struct inode_operations proc_tgid_base_inode_operations = { |
1773 | .lookup = proc_tgid_base_lookup, | 1918 | .lookup = proc_tgid_base_lookup, |
1919 | .getattr = pid_getattr, | ||
1774 | }; | 1920 | }; |
1775 | 1921 | ||
1776 | static struct inode_operations proc_tid_base_inode_operations = { | 1922 | static struct inode_operations proc_tid_base_inode_operations = { |
1777 | .lookup = proc_tid_base_lookup, | 1923 | .lookup = proc_tid_base_lookup, |
1924 | .getattr = pid_getattr, | ||
1778 | }; | 1925 | }; |
1779 | 1926 | ||
1780 | #ifdef CONFIG_SECURITY | 1927 | #ifdef CONFIG_SECURITY |
@@ -1816,10 +1963,12 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir, | |||
1816 | 1963 | ||
1817 | static struct inode_operations proc_tgid_attr_inode_operations = { | 1964 | static struct inode_operations proc_tgid_attr_inode_operations = { |
1818 | .lookup = proc_tgid_attr_lookup, | 1965 | .lookup = proc_tgid_attr_lookup, |
1966 | .getattr = pid_getattr, | ||
1819 | }; | 1967 | }; |
1820 | 1968 | ||
1821 | static struct inode_operations proc_tid_attr_inode_operations = { | 1969 | static struct inode_operations proc_tid_attr_inode_operations = { |
1822 | .lookup = proc_tid_attr_lookup, | 1970 | .lookup = proc_tid_attr_lookup, |
1971 | .getattr = pid_getattr, | ||
1823 | }; | 1972 | }; |
1824 | #endif | 1973 | #endif |
1825 | 1974 | ||
@@ -1981,10 +2130,13 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry | |||
1981 | { | 2130 | { |
1982 | struct dentry *result = ERR_PTR(-ENOENT); | 2131 | struct dentry *result = ERR_PTR(-ENOENT); |
1983 | struct task_struct *task; | 2132 | struct task_struct *task; |
1984 | struct task_struct *leader = proc_task(dir); | 2133 | struct task_struct *leader = get_proc_task(dir); |
1985 | struct inode *inode; | 2134 | struct inode *inode; |
1986 | unsigned tid; | 2135 | unsigned tid; |
1987 | 2136 | ||
2137 | if (!leader) | ||
2138 | goto out_no_task; | ||
2139 | |||
1988 | tid = name_to_int(dentry); | 2140 | tid = name_to_int(dentry); |
1989 | if (tid == ~0U) | 2141 | if (tid == ~0U) |
1990 | goto out; | 2142 | goto out; |
@@ -2024,6 +2176,8 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry | |||
2024 | out_drop_task: | 2176 | out_drop_task: |
2025 | put_task_struct(task); | 2177 | put_task_struct(task); |
2026 | out: | 2178 | out: |
2179 | put_task_struct(leader); | ||
2180 | out_no_task: | ||
2027 | return result; | 2181 | return result; |
2028 | } | 2182 | } |
2029 | 2183 | ||
@@ -2163,12 +2317,7 @@ static struct task_struct *first_tid(struct task_struct *leader, int tid, int nr | |||
2163 | 2317 | ||
2164 | /* If nr exceeds the number of threads there is nothing todo */ | 2318 | /* If nr exceeds the number of threads there is nothing todo */ |
2165 | if (nr) { | 2319 | if (nr) { |
2166 | int threads = 0; | 2320 | if (nr >= get_nr_threads(leader)) |
2167 | task_lock(leader); | ||
2168 | if (leader->signal) | ||
2169 | threads = atomic_read(&leader->signal->count); | ||
2170 | task_unlock(leader); | ||
2171 | if (nr >= threads) | ||
2172 | goto done; | 2321 | goto done; |
2173 | } | 2322 | } |
2174 | 2323 | ||
@@ -2218,15 +2367,15 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi | |||
2218 | char buf[PROC_NUMBUF]; | 2367 | char buf[PROC_NUMBUF]; |
2219 | struct dentry *dentry = filp->f_dentry; | 2368 | struct dentry *dentry = filp->f_dentry; |
2220 | struct inode *inode = dentry->d_inode; | 2369 | struct inode *inode = dentry->d_inode; |
2221 | struct task_struct *leader = proc_task(inode); | 2370 | struct task_struct *leader = get_proc_task(inode); |
2222 | struct task_struct *task; | 2371 | struct task_struct *task; |
2223 | int retval = -ENOENT; | 2372 | int retval = -ENOENT; |
2224 | ino_t ino; | 2373 | ino_t ino; |
2225 | int tid; | 2374 | int tid; |
2226 | unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ | 2375 | unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ |
2227 | 2376 | ||
2228 | if (!pid_alive(leader)) | 2377 | if (!leader) |
2229 | goto out; | 2378 | goto out_no_task; |
2230 | retval = 0; | 2379 | retval = 0; |
2231 | 2380 | ||
2232 | switch (pos) { | 2381 | switch (pos) { |
@@ -2266,20 +2415,22 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi | |||
2266 | } | 2415 | } |
2267 | out: | 2416 | out: |
2268 | filp->f_pos = pos; | 2417 | filp->f_pos = pos; |
2418 | put_task_struct(leader); | ||
2419 | out_no_task: | ||
2269 | return retval; | 2420 | return retval; |
2270 | } | 2421 | } |
2271 | 2422 | ||
2272 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 2423 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
2273 | { | 2424 | { |
2274 | struct inode *inode = dentry->d_inode; | 2425 | struct inode *inode = dentry->d_inode; |
2275 | struct task_struct *p = proc_task(inode); | 2426 | struct task_struct *p = get_proc_task(inode); |
2276 | generic_fillattr(inode, stat); | 2427 | generic_fillattr(inode, stat); |
2277 | 2428 | ||
2278 | if (pid_alive(p)) { | 2429 | if (p) { |
2279 | task_lock(p); | 2430 | rcu_read_lock(); |
2280 | if (p->signal) | 2431 | stat->nlink += get_nr_threads(p); |
2281 | stat->nlink += atomic_read(&p->signal->count); | 2432 | rcu_read_unlock(); |
2282 | task_unlock(p); | 2433 | put_task_struct(p); |
2283 | } | 2434 | } |
2284 | 2435 | ||
2285 | return 0; | 2436 | return 0; |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index fbc94df138a7..31e0475c6cb9 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -58,14 +58,11 @@ static void de_put(struct proc_dir_entry *de) | |||
58 | static void proc_delete_inode(struct inode *inode) | 58 | static void proc_delete_inode(struct inode *inode) |
59 | { | 59 | { |
60 | struct proc_dir_entry *de; | 60 | struct proc_dir_entry *de; |
61 | struct task_struct *tsk; | ||
62 | 61 | ||
63 | truncate_inode_pages(&inode->i_data, 0); | 62 | truncate_inode_pages(&inode->i_data, 0); |
64 | 63 | ||
65 | /* Let go of any associated process */ | 64 | /* Stop tracking associated processes */ |
66 | tsk = PROC_I(inode)->task; | 65 | tref_put(PROC_I(inode)->tref); |
67 | if (tsk) | ||
68 | put_task_struct(tsk); | ||
69 | 66 | ||
70 | /* Let go of any associated proc directory entry */ | 67 | /* Let go of any associated proc directory entry */ |
71 | de = PROC_I(inode)->pde; | 68 | de = PROC_I(inode)->pde; |
@@ -94,7 +91,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb) | |||
94 | ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); | 91 | ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); |
95 | if (!ei) | 92 | if (!ei) |
96 | return NULL; | 93 | return NULL; |
97 | ei->task = NULL; | 94 | ei->tref = NULL; |
98 | ei->fd = 0; | 95 | ei->fd = 0; |
99 | ei->op.proc_get_link = NULL; | 96 | ei->op.proc_get_link = NULL; |
100 | ei->pde = NULL; | 97 | ei->pde = NULL; |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 548e7447ea47..37f1648adc23 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -10,6 +10,7 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/proc_fs.h> | 12 | #include <linux/proc_fs.h> |
13 | #include <linux/task_ref.h> | ||
13 | 14 | ||
14 | struct vmalloc_info { | 15 | struct vmalloc_info { |
15 | unsigned long used; | 16 | unsigned long used; |
@@ -41,13 +42,23 @@ extern struct file_operations proc_maps_operations; | |||
41 | extern struct file_operations proc_numa_maps_operations; | 42 | extern struct file_operations proc_numa_maps_operations; |
42 | extern struct file_operations proc_smaps_operations; | 43 | extern struct file_operations proc_smaps_operations; |
43 | 44 | ||
45 | extern struct file_operations proc_maps_operations; | ||
46 | extern struct file_operations proc_numa_maps_operations; | ||
47 | extern struct file_operations proc_smaps_operations; | ||
48 | |||
49 | |||
44 | void free_proc_entry(struct proc_dir_entry *de); | 50 | void free_proc_entry(struct proc_dir_entry *de); |
45 | 51 | ||
46 | int proc_init_inodecache(void); | 52 | int proc_init_inodecache(void); |
47 | 53 | ||
48 | static inline struct task_struct *proc_task(struct inode *inode) | 54 | static inline struct task_ref *proc_tref(struct inode *inode) |
55 | { | ||
56 | return PROC_I(inode)->tref; | ||
57 | } | ||
58 | |||
59 | static inline struct task_struct *get_proc_task(struct inode *inode) | ||
49 | { | 60 | { |
50 | return PROC_I(inode)->task; | 61 | return get_tref_task(proc_tref(inode)); |
51 | } | 62 | } |
52 | 63 | ||
53 | static inline int proc_fd(struct inode *inode) | 64 | static inline int proc_fd(struct inode *inode) |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4187b4e9cdb3..abf3208c3f60 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -75,9 +75,13 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount * | |||
75 | { | 75 | { |
76 | struct vm_area_struct * vma; | 76 | struct vm_area_struct * vma; |
77 | int result = -ENOENT; | 77 | int result = -ENOENT; |
78 | struct task_struct *task = proc_task(inode); | 78 | struct task_struct *task = get_proc_task(inode); |
79 | struct mm_struct * mm = get_task_mm(task); | 79 | struct mm_struct * mm = NULL; |
80 | 80 | ||
81 | if (task) { | ||
82 | mm = get_task_mm(task); | ||
83 | put_task_struct(task); | ||
84 | } | ||
81 | if (!mm) | 85 | if (!mm) |
82 | goto out; | 86 | goto out; |
83 | down_read(&mm->mmap_sem); | 87 | down_read(&mm->mmap_sem); |
@@ -120,7 +124,8 @@ struct mem_size_stats | |||
120 | 124 | ||
121 | static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) | 125 | static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) |
122 | { | 126 | { |
123 | struct task_struct *task = m->private; | 127 | struct proc_maps_private *priv = m->private; |
128 | struct task_struct *task = priv->task; | ||
124 | struct vm_area_struct *vma = v; | 129 | struct vm_area_struct *vma = v; |
125 | struct mm_struct *mm = vma->vm_mm; | 130 | struct mm_struct *mm = vma->vm_mm; |
126 | struct file *file = vma->vm_file; | 131 | struct file *file = vma->vm_file; |
@@ -295,12 +300,16 @@ static int show_smap(struct seq_file *m, void *v) | |||
295 | 300 | ||
296 | static void *m_start(struct seq_file *m, loff_t *pos) | 301 | static void *m_start(struct seq_file *m, loff_t *pos) |
297 | { | 302 | { |
298 | struct task_struct *task = m->private; | 303 | struct proc_maps_private *priv = m->private; |
299 | unsigned long last_addr = m->version; | 304 | unsigned long last_addr = m->version; |
300 | struct mm_struct *mm; | 305 | struct mm_struct *mm; |
301 | struct vm_area_struct *vma, *tail_vma; | 306 | struct vm_area_struct *vma, *tail_vma = NULL; |
302 | loff_t l = *pos; | 307 | loff_t l = *pos; |
303 | 308 | ||
309 | /* Clear the per syscall fields in priv */ | ||
310 | priv->task = NULL; | ||
311 | priv->tail_vma = NULL; | ||
312 | |||
304 | /* | 313 | /* |
305 | * We remember last_addr rather than next_addr to hit with | 314 | * We remember last_addr rather than next_addr to hit with |
306 | * mmap_cache most of the time. We have zero last_addr at | 315 | * mmap_cache most of the time. We have zero last_addr at |
@@ -311,11 +320,15 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
311 | if (last_addr == -1UL) | 320 | if (last_addr == -1UL) |
312 | return NULL; | 321 | return NULL; |
313 | 322 | ||
314 | mm = get_task_mm(task); | 323 | priv->task = get_tref_task(priv->tref); |
324 | if (!priv->task) | ||
325 | return NULL; | ||
326 | |||
327 | mm = get_task_mm(priv->task); | ||
315 | if (!mm) | 328 | if (!mm) |
316 | return NULL; | 329 | return NULL; |
317 | 330 | ||
318 | tail_vma = get_gate_vma(task); | 331 | priv->tail_vma = tail_vma = get_gate_vma(priv->task); |
319 | down_read(&mm->mmap_sem); | 332 | down_read(&mm->mmap_sem); |
320 | 333 | ||
321 | /* Start with last addr hint */ | 334 | /* Start with last addr hint */ |
@@ -350,11 +363,9 @@ out: | |||
350 | return tail_vma; | 363 | return tail_vma; |
351 | } | 364 | } |
352 | 365 | ||
353 | static void m_stop(struct seq_file *m, void *v) | 366 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) |
354 | { | 367 | { |
355 | struct task_struct *task = m->private; | 368 | if (vma && vma != priv->tail_vma) { |
356 | struct vm_area_struct *vma = v; | ||
357 | if (vma && vma != get_gate_vma(task)) { | ||
358 | struct mm_struct *mm = vma->vm_mm; | 369 | struct mm_struct *mm = vma->vm_mm; |
359 | up_read(&mm->mmap_sem); | 370 | up_read(&mm->mmap_sem); |
360 | mmput(mm); | 371 | mmput(mm); |
@@ -363,17 +374,27 @@ static void m_stop(struct seq_file *m, void *v) | |||
363 | 374 | ||
364 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | 375 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
365 | { | 376 | { |
366 | struct task_struct *task = m->private; | 377 | struct proc_maps_private *priv = m->private; |
367 | struct vm_area_struct *vma = v; | 378 | struct vm_area_struct *vma = v; |
368 | struct vm_area_struct *tail_vma = get_gate_vma(task); | 379 | struct vm_area_struct *tail_vma = priv->tail_vma; |
369 | 380 | ||
370 | (*pos)++; | 381 | (*pos)++; |
371 | if (vma && (vma != tail_vma) && vma->vm_next) | 382 | if (vma && (vma != tail_vma) && vma->vm_next) |
372 | return vma->vm_next; | 383 | return vma->vm_next; |
373 | m_stop(m, v); | 384 | vma_stop(priv, vma); |
374 | return (vma != tail_vma)? tail_vma: NULL; | 385 | return (vma != tail_vma)? tail_vma: NULL; |
375 | } | 386 | } |
376 | 387 | ||
388 | static void m_stop(struct seq_file *m, void *v) | ||
389 | { | ||
390 | struct proc_maps_private *priv = m->private; | ||
391 | struct vm_area_struct *vma = v; | ||
392 | |||
393 | vma_stop(priv, vma); | ||
394 | if (priv->task) | ||
395 | put_task_struct(priv->task); | ||
396 | } | ||
397 | |||
377 | static struct seq_operations proc_pid_maps_op = { | 398 | static struct seq_operations proc_pid_maps_op = { |
378 | .start = m_start, | 399 | .start = m_start, |
379 | .next = m_next, | 400 | .next = m_next, |
@@ -391,11 +412,18 @@ static struct seq_operations proc_pid_smaps_op = { | |||
391 | static int do_maps_open(struct inode *inode, struct file *file, | 412 | static int do_maps_open(struct inode *inode, struct file *file, |
392 | struct seq_operations *ops) | 413 | struct seq_operations *ops) |
393 | { | 414 | { |
394 | struct task_struct *task = proc_task(inode); | 415 | struct proc_maps_private *priv; |
395 | int ret = seq_open(file, ops); | 416 | int ret = -ENOMEM; |
396 | if (!ret) { | 417 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
397 | struct seq_file *m = file->private_data; | 418 | if (priv) { |
398 | m->private = task; | 419 | priv->tref = proc_tref(inode); |
420 | ret = seq_open(file, ops); | ||
421 | if (!ret) { | ||
422 | struct seq_file *m = file->private_data; | ||
423 | m->private = priv; | ||
424 | } else { | ||
425 | kfree(priv); | ||
426 | } | ||
399 | } | 427 | } |
400 | return ret; | 428 | return ret; |
401 | } | 429 | } |
@@ -409,7 +437,7 @@ struct file_operations proc_maps_operations = { | |||
409 | .open = maps_open, | 437 | .open = maps_open, |
410 | .read = seq_read, | 438 | .read = seq_read, |
411 | .llseek = seq_lseek, | 439 | .llseek = seq_lseek, |
412 | .release = seq_release, | 440 | .release = seq_release_private, |
413 | }; | 441 | }; |
414 | 442 | ||
415 | #ifdef CONFIG_NUMA | 443 | #ifdef CONFIG_NUMA |
@@ -431,7 +459,7 @@ struct file_operations proc_numa_maps_operations = { | |||
431 | .open = numa_maps_open, | 459 | .open = numa_maps_open, |
432 | .read = seq_read, | 460 | .read = seq_read, |
433 | .llseek = seq_lseek, | 461 | .llseek = seq_lseek, |
434 | .release = seq_release, | 462 | .release = seq_release_private, |
435 | }; | 463 | }; |
436 | #endif | 464 | #endif |
437 | 465 | ||
@@ -444,5 +472,5 @@ struct file_operations proc_smaps_operations = { | |||
444 | .open = smaps_open, | 472 | .open = smaps_open, |
445 | .read = seq_read, | 473 | .read = seq_read, |
446 | .llseek = seq_lseek, | 474 | .llseek = seq_lseek, |
447 | .release = seq_release, | 475 | .release = seq_release_private, |
448 | }; | 476 | }; |
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index d4d2081dbaf7..4c7271f04697 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h | |||
@@ -246,7 +246,7 @@ extern void kclist_add(struct kcore_list *, void *, size_t); | |||
246 | #endif | 246 | #endif |
247 | 247 | ||
248 | struct proc_inode { | 248 | struct proc_inode { |
249 | struct task_struct *task; | 249 | struct task_ref *tref; |
250 | int fd; | 250 | int fd; |
251 | union { | 251 | union { |
252 | int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); | 252 | int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); |
@@ -266,4 +266,10 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode) | |||
266 | return PROC_I(inode)->pde; | 266 | return PROC_I(inode)->pde; |
267 | } | 267 | } |
268 | 268 | ||
269 | struct proc_maps_private { | ||
270 | struct task_ref *tref; | ||
271 | struct task_struct *task; | ||
272 | struct vm_area_struct *tail_vma; | ||
273 | }; | ||
274 | |||
269 | #endif /* _LINUX_PROC_FS_H */ | 275 | #endif /* _LINUX_PROC_FS_H */ |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b602f73fb38d..3e991c0c02e2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/time.h> | 50 | #include <linux/time.h> |
51 | #include <linux/backing-dev.h> | 51 | #include <linux/backing-dev.h> |
52 | #include <linux/sort.h> | 52 | #include <linux/sort.h> |
53 | #include <linux/task_ref.h> | ||
53 | 54 | ||
54 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
55 | #include <asm/atomic.h> | 56 | #include <asm/atomic.h> |
@@ -2442,31 +2443,43 @@ void __cpuset_memory_pressure_bump(void) | |||
2442 | */ | 2443 | */ |
2443 | static int proc_cpuset_show(struct seq_file *m, void *v) | 2444 | static int proc_cpuset_show(struct seq_file *m, void *v) |
2444 | { | 2445 | { |
2446 | struct task_ref *tref; | ||
2445 | struct task_struct *tsk; | 2447 | struct task_struct *tsk; |
2446 | char *buf; | 2448 | char *buf; |
2447 | int retval = 0; | 2449 | int retval; |
2448 | 2450 | ||
2451 | retval = -ENOMEM; | ||
2449 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 2452 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
2450 | if (!buf) | 2453 | if (!buf) |
2451 | return -ENOMEM; | 2454 | goto out; |
2455 | |||
2456 | retval = -ESRCH; | ||
2457 | tref = m->private; | ||
2458 | tsk = get_tref_task(tref); | ||
2459 | if (!tsk) | ||
2460 | goto out_free; | ||
2452 | 2461 | ||
2453 | tsk = m->private; | 2462 | retval = -EINVAL; |
2454 | mutex_lock(&manage_mutex); | 2463 | mutex_lock(&manage_mutex); |
2464 | |||
2455 | retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); | 2465 | retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); |
2456 | if (retval < 0) | 2466 | if (retval < 0) |
2457 | goto out; | 2467 | goto out_unlock; |
2458 | seq_puts(m, buf); | 2468 | seq_puts(m, buf); |
2459 | seq_putc(m, '\n'); | 2469 | seq_putc(m, '\n'); |
2460 | out: | 2470 | out_unlock: |
2461 | mutex_unlock(&manage_mutex); | 2471 | mutex_unlock(&manage_mutex); |
2472 | put_task_struct(tsk); | ||
2473 | out_free: | ||
2462 | kfree(buf); | 2474 | kfree(buf); |
2475 | out: | ||
2463 | return retval; | 2476 | return retval; |
2464 | } | 2477 | } |
2465 | 2478 | ||
2466 | static int cpuset_open(struct inode *inode, struct file *file) | 2479 | static int cpuset_open(struct inode *inode, struct file *file) |
2467 | { | 2480 | { |
2468 | struct task_struct *tsk = PROC_I(inode)->task; | 2481 | struct task_ref *tref = PROC_I(inode)->tref; |
2469 | return single_open(file, proc_cpuset_show, tsk); | 2482 | return single_open(file, proc_cpuset_show, tref); |
2470 | } | 2483 | } |
2471 | 2484 | ||
2472 | struct file_operations proc_cpuset_operations = { | 2485 | struct file_operations proc_cpuset_operations = { |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 73e0f23b7f51..6b9740bbf4c0 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1821,7 +1821,7 @@ static inline void check_huge_range(struct vm_area_struct *vma, | |||
1821 | 1821 | ||
1822 | int show_numa_map(struct seq_file *m, void *v) | 1822 | int show_numa_map(struct seq_file *m, void *v) |
1823 | { | 1823 | { |
1824 | struct task_struct *task = m->private; | 1824 | struct proc_maps_private *priv = m->private; |
1825 | struct vm_area_struct *vma = v; | 1825 | struct vm_area_struct *vma = v; |
1826 | struct numa_maps *md; | 1826 | struct numa_maps *md; |
1827 | struct file *file = vma->vm_file; | 1827 | struct file *file = vma->vm_file; |
@@ -1837,7 +1837,7 @@ int show_numa_map(struct seq_file *m, void *v) | |||
1837 | return 0; | 1837 | return 0; |
1838 | 1838 | ||
1839 | mpol_to_str(buffer, sizeof(buffer), | 1839 | mpol_to_str(buffer, sizeof(buffer), |
1840 | get_vma_policy(task, vma, vma->vm_start)); | 1840 | get_vma_policy(priv->task, vma, vma->vm_start)); |
1841 | 1841 | ||
1842 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1842 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
1843 | 1843 | ||
@@ -1891,7 +1891,7 @@ out: | |||
1891 | kfree(md); | 1891 | kfree(md); |
1892 | 1892 | ||
1893 | if (m->count < m->size) | 1893 | if (m->count < m->size) |
1894 | m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; | 1894 | m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; |
1895 | return 0; | 1895 | return 0; |
1896 | } | 1896 | } |
1897 | 1897 | ||