diff options
Diffstat (limited to 'fs/proc/base.c')
-rw-r--r-- | fs/proc/base.c | 1091 |
1 files changed, 571 insertions, 520 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 6cc77dc3f3ff..6ba7785319de 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -74,6 +74,16 @@ | |||
74 | #include <linux/poll.h> | 74 | #include <linux/poll.h> |
75 | #include "internal.h" | 75 | #include "internal.h" |
76 | 76 | ||
77 | /* NOTE: | ||
78 | * Implementing inode permission operations in /proc is almost | ||
79 | * certainly an error. Permission checks need to happen during | ||
80 | * each system call not at open time. The reason is that most of | ||
81 | * what we wish to check for permissions in /proc varies at runtime. | ||
82 | * | ||
83 | * The classic example of a problem is opening file descriptors | ||
84 | * in /proc for a task before it execs a suid executable. | ||
85 | */ | ||
86 | |||
77 | /* | 87 | /* |
78 | * For hysterical raisins we keep the same inumbers as in the old procfs. | 88 | * For hysterical raisins we keep the same inumbers as in the old procfs. |
79 | * Feel free to change the macro below - just keep the range distinct from | 89 | * Feel free to change the macro below - just keep the range distinct from |
@@ -121,6 +131,8 @@ enum pid_directory_inos { | |||
121 | PROC_TGID_ATTR_PREV, | 131 | PROC_TGID_ATTR_PREV, |
122 | PROC_TGID_ATTR_EXEC, | 132 | PROC_TGID_ATTR_EXEC, |
123 | PROC_TGID_ATTR_FSCREATE, | 133 | PROC_TGID_ATTR_FSCREATE, |
134 | PROC_TGID_ATTR_KEYCREATE, | ||
135 | PROC_TGID_ATTR_SOCKCREATE, | ||
124 | #endif | 136 | #endif |
125 | #ifdef CONFIG_AUDITSYSCALL | 137 | #ifdef CONFIG_AUDITSYSCALL |
126 | PROC_TGID_LOGINUID, | 138 | PROC_TGID_LOGINUID, |
@@ -162,6 +174,8 @@ enum pid_directory_inos { | |||
162 | PROC_TID_ATTR_PREV, | 174 | PROC_TID_ATTR_PREV, |
163 | PROC_TID_ATTR_EXEC, | 175 | PROC_TID_ATTR_EXEC, |
164 | PROC_TID_ATTR_FSCREATE, | 176 | PROC_TID_ATTR_FSCREATE, |
177 | PROC_TID_ATTR_KEYCREATE, | ||
178 | PROC_TID_ATTR_SOCKCREATE, | ||
165 | #endif | 179 | #endif |
166 | #ifdef CONFIG_AUDITSYSCALL | 180 | #ifdef CONFIG_AUDITSYSCALL |
167 | PROC_TID_LOGINUID, | 181 | PROC_TID_LOGINUID, |
@@ -173,6 +187,9 @@ enum pid_directory_inos { | |||
173 | PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ | 187 | PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ |
174 | }; | 188 | }; |
175 | 189 | ||
190 | /* Worst case buffer size needed for holding an integer. */ | ||
191 | #define PROC_NUMBUF 10 | ||
192 | |||
176 | struct pid_entry { | 193 | struct pid_entry { |
177 | int type; | 194 | int type; |
178 | int len; | 195 | int len; |
@@ -275,6 +292,8 @@ static struct pid_entry tgid_attr_stuff[] = { | |||
275 | E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), | 292 | E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), |
276 | E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), | 293 | E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), |
277 | E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), | 294 | E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), |
295 | E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
296 | E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
278 | {0,0,NULL,0} | 297 | {0,0,NULL,0} |
279 | }; | 298 | }; |
280 | static struct pid_entry tid_attr_stuff[] = { | 299 | static struct pid_entry tid_attr_stuff[] = { |
@@ -282,6 +301,8 @@ static struct pid_entry tid_attr_stuff[] = { | |||
282 | E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), | 301 | E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), |
283 | E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), | 302 | E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), |
284 | E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), | 303 | E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), |
304 | E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
305 | E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
285 | {0,0,NULL,0} | 306 | {0,0,NULL,0} |
286 | }; | 307 | }; |
287 | #endif | 308 | #endif |
@@ -290,12 +311,15 @@ static struct pid_entry tid_attr_stuff[] = { | |||
290 | 311 | ||
291 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 312 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) |
292 | { | 313 | { |
293 | struct task_struct *task = proc_task(inode); | 314 | struct task_struct *task = get_proc_task(inode); |
294 | struct files_struct *files; | 315 | struct files_struct *files = NULL; |
295 | struct file *file; | 316 | struct file *file; |
296 | int fd = proc_type(inode) - PROC_TID_FD_DIR; | 317 | int fd = proc_fd(inode); |
297 | 318 | ||
298 | files = get_files_struct(task); | 319 | if (task) { |
320 | files = get_files_struct(task); | ||
321 | put_task_struct(task); | ||
322 | } | ||
299 | if (files) { | 323 | if (files) { |
300 | /* | 324 | /* |
301 | * We are not taking a ref to the file structure, so we must | 325 | * We are not taking a ref to the file structure, so we must |
@@ -327,29 +351,33 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) | |||
327 | return fs; | 351 | return fs; |
328 | } | 352 | } |
329 | 353 | ||
330 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 354 | static int get_nr_threads(struct task_struct *tsk) |
331 | { | 355 | { |
332 | struct fs_struct *fs = get_fs_struct(proc_task(inode)); | 356 | /* Must be called with the rcu_read_lock held */ |
333 | int result = -ENOENT; | 357 | unsigned long flags; |
334 | if (fs) { | 358 | int count = 0; |
335 | read_lock(&fs->lock); | 359 | |
336 | *mnt = mntget(fs->pwdmnt); | 360 | if (lock_task_sighand(tsk, &flags)) { |
337 | *dentry = dget(fs->pwd); | 361 | count = atomic_read(&tsk->signal->count); |
338 | read_unlock(&fs->lock); | 362 | unlock_task_sighand(tsk, &flags); |
339 | result = 0; | ||
340 | put_fs_struct(fs); | ||
341 | } | 363 | } |
342 | return result; | 364 | return count; |
343 | } | 365 | } |
344 | 366 | ||
345 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 367 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) |
346 | { | 368 | { |
347 | struct fs_struct *fs = get_fs_struct(proc_task(inode)); | 369 | struct task_struct *task = get_proc_task(inode); |
370 | struct fs_struct *fs = NULL; | ||
348 | int result = -ENOENT; | 371 | int result = -ENOENT; |
372 | |||
373 | if (task) { | ||
374 | fs = get_fs_struct(task); | ||
375 | put_task_struct(task); | ||
376 | } | ||
349 | if (fs) { | 377 | if (fs) { |
350 | read_lock(&fs->lock); | 378 | read_lock(&fs->lock); |
351 | *mnt = mntget(fs->rootmnt); | 379 | *mnt = mntget(fs->pwdmnt); |
352 | *dentry = dget(fs->root); | 380 | *dentry = dget(fs->pwd); |
353 | read_unlock(&fs->lock); | 381 | read_unlock(&fs->lock); |
354 | result = 0; | 382 | result = 0; |
355 | put_fs_struct(fs); | 383 | put_fs_struct(fs); |
@@ -357,42 +385,16 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf | |||
357 | return result; | 385 | return result; |
358 | } | 386 | } |
359 | 387 | ||
360 | 388 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | |
361 | /* Same as proc_root_link, but this addionally tries to get fs from other | ||
362 | * threads in the group */ | ||
363 | static int proc_task_root_link(struct inode *inode, struct dentry **dentry, | ||
364 | struct vfsmount **mnt) | ||
365 | { | 389 | { |
366 | struct fs_struct *fs; | 390 | struct task_struct *task = get_proc_task(inode); |
391 | struct fs_struct *fs = NULL; | ||
367 | int result = -ENOENT; | 392 | int result = -ENOENT; |
368 | struct task_struct *leader = proc_task(inode); | ||
369 | 393 | ||
370 | task_lock(leader); | 394 | if (task) { |
371 | fs = leader->fs; | 395 | fs = get_fs_struct(task); |
372 | if (fs) { | 396 | put_task_struct(task); |
373 | atomic_inc(&fs->count); | ||
374 | task_unlock(leader); | ||
375 | } else { | ||
376 | /* Try to get fs from other threads */ | ||
377 | task_unlock(leader); | ||
378 | read_lock(&tasklist_lock); | ||
379 | if (pid_alive(leader)) { | ||
380 | struct task_struct *task = leader; | ||
381 | |||
382 | while ((task = next_thread(task)) != leader) { | ||
383 | task_lock(task); | ||
384 | fs = task->fs; | ||
385 | if (fs) { | ||
386 | atomic_inc(&fs->count); | ||
387 | task_unlock(task); | ||
388 | break; | ||
389 | } | ||
390 | task_unlock(task); | ||
391 | } | ||
392 | } | ||
393 | read_unlock(&tasklist_lock); | ||
394 | } | 397 | } |
395 | |||
396 | if (fs) { | 398 | if (fs) { |
397 | read_lock(&fs->lock); | 399 | read_lock(&fs->lock); |
398 | *mnt = mntget(fs->rootmnt); | 400 | *mnt = mntget(fs->rootmnt); |
@@ -404,7 +406,6 @@ static int proc_task_root_link(struct inode *inode, struct dentry **dentry, | |||
404 | return result; | 406 | return result; |
405 | } | 407 | } |
406 | 408 | ||
407 | |||
408 | #define MAY_PTRACE(task) \ | 409 | #define MAY_PTRACE(task) \ |
409 | (task == current || \ | 410 | (task == current || \ |
410 | (task->parent == current && \ | 411 | (task->parent == current && \ |
@@ -535,142 +536,22 @@ static int proc_oom_score(struct task_struct *task, char *buffer) | |||
535 | /************************************************************************/ | 536 | /************************************************************************/ |
536 | 537 | ||
537 | /* permission checks */ | 538 | /* permission checks */ |
538 | 539 | static int proc_fd_access_allowed(struct inode *inode) | |
539 | /* If the process being read is separated by chroot from the reading process, | ||
540 | * don't let the reader access the threads. | ||
541 | * | ||
542 | * note: this does dput(root) and mntput(vfsmnt) on exit. | ||
543 | */ | ||
544 | static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) | ||
545 | { | 540 | { |
546 | struct dentry *de, *base; | 541 | struct task_struct *task; |
547 | struct vfsmount *our_vfsmnt, *mnt; | 542 | int allowed = 0; |
548 | int res = 0; | 543 | /* Allow access to a task's file descriptors if it is us or we |
549 | 544 | * may use ptrace attach to the process and find out that | |
550 | read_lock(¤t->fs->lock); | 545 | * information. |
551 | our_vfsmnt = mntget(current->fs->rootmnt); | 546 | */ |
552 | base = dget(current->fs->root); | 547 | task = get_proc_task(inode); |
553 | read_unlock(¤t->fs->lock); | 548 | if (task) { |
554 | 549 | allowed = ptrace_may_attach(task); | |
555 | spin_lock(&vfsmount_lock); | 550 | put_task_struct(task); |
556 | de = root; | ||
557 | mnt = vfsmnt; | ||
558 | |||
559 | while (mnt != our_vfsmnt) { | ||
560 | if (mnt == mnt->mnt_parent) | ||
561 | goto out; | ||
562 | de = mnt->mnt_mountpoint; | ||
563 | mnt = mnt->mnt_parent; | ||
564 | } | ||
565 | |||
566 | if (!is_subdir(de, base)) | ||
567 | goto out; | ||
568 | spin_unlock(&vfsmount_lock); | ||
569 | |||
570 | exit: | ||
571 | dput(base); | ||
572 | mntput(our_vfsmnt); | ||
573 | dput(root); | ||
574 | mntput(vfsmnt); | ||
575 | return res; | ||
576 | out: | ||
577 | spin_unlock(&vfsmount_lock); | ||
578 | res = -EACCES; | ||
579 | goto exit; | ||
580 | } | ||
581 | |||
582 | static int proc_check_root(struct inode *inode) | ||
583 | { | ||
584 | struct dentry *root; | ||
585 | struct vfsmount *vfsmnt; | ||
586 | |||
587 | if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ | ||
588 | return -ENOENT; | ||
589 | return proc_check_chroot(root, vfsmnt); | ||
590 | } | ||
591 | |||
592 | static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
593 | { | ||
594 | if (generic_permission(inode, mask, NULL) != 0) | ||
595 | return -EACCES; | ||
596 | return proc_check_root(inode); | ||
597 | } | ||
598 | |||
599 | static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
600 | { | ||
601 | struct dentry *root; | ||
602 | struct vfsmount *vfsmnt; | ||
603 | |||
604 | if (generic_permission(inode, mask, NULL) != 0) | ||
605 | return -EACCES; | ||
606 | |||
607 | if (proc_task_root_link(inode, &root, &vfsmnt)) | ||
608 | return -ENOENT; | ||
609 | |||
610 | return proc_check_chroot(root, vfsmnt); | ||
611 | } | ||
612 | |||
613 | extern struct seq_operations proc_pid_maps_op; | ||
614 | static int maps_open(struct inode *inode, struct file *file) | ||
615 | { | ||
616 | struct task_struct *task = proc_task(inode); | ||
617 | int ret = seq_open(file, &proc_pid_maps_op); | ||
618 | if (!ret) { | ||
619 | struct seq_file *m = file->private_data; | ||
620 | m->private = task; | ||
621 | } | ||
622 | return ret; | ||
623 | } | ||
624 | |||
625 | static struct file_operations proc_maps_operations = { | ||
626 | .open = maps_open, | ||
627 | .read = seq_read, | ||
628 | .llseek = seq_lseek, | ||
629 | .release = seq_release, | ||
630 | }; | ||
631 | |||
632 | #ifdef CONFIG_NUMA | ||
633 | extern struct seq_operations proc_pid_numa_maps_op; | ||
634 | static int numa_maps_open(struct inode *inode, struct file *file) | ||
635 | { | ||
636 | struct task_struct *task = proc_task(inode); | ||
637 | int ret = seq_open(file, &proc_pid_numa_maps_op); | ||
638 | if (!ret) { | ||
639 | struct seq_file *m = file->private_data; | ||
640 | m->private = task; | ||
641 | } | ||
642 | return ret; | ||
643 | } | ||
644 | |||
645 | static struct file_operations proc_numa_maps_operations = { | ||
646 | .open = numa_maps_open, | ||
647 | .read = seq_read, | ||
648 | .llseek = seq_lseek, | ||
649 | .release = seq_release, | ||
650 | }; | ||
651 | #endif | ||
652 | |||
653 | #ifdef CONFIG_MMU | ||
654 | extern struct seq_operations proc_pid_smaps_op; | ||
655 | static int smaps_open(struct inode *inode, struct file *file) | ||
656 | { | ||
657 | struct task_struct *task = proc_task(inode); | ||
658 | int ret = seq_open(file, &proc_pid_smaps_op); | ||
659 | if (!ret) { | ||
660 | struct seq_file *m = file->private_data; | ||
661 | m->private = task; | ||
662 | } | 551 | } |
663 | return ret; | 552 | return allowed; |
664 | } | 553 | } |
665 | 554 | ||
666 | static struct file_operations proc_smaps_operations = { | ||
667 | .open = smaps_open, | ||
668 | .read = seq_read, | ||
669 | .llseek = seq_lseek, | ||
670 | .release = seq_release, | ||
671 | }; | ||
672 | #endif | ||
673 | |||
674 | extern struct seq_operations mounts_op; | 555 | extern struct seq_operations mounts_op; |
675 | struct proc_mounts { | 556 | struct proc_mounts { |
676 | struct seq_file m; | 557 | struct seq_file m; |
@@ -679,16 +560,19 @@ struct proc_mounts { | |||
679 | 560 | ||
680 | static int mounts_open(struct inode *inode, struct file *file) | 561 | static int mounts_open(struct inode *inode, struct file *file) |
681 | { | 562 | { |
682 | struct task_struct *task = proc_task(inode); | 563 | struct task_struct *task = get_proc_task(inode); |
683 | struct namespace *namespace; | 564 | struct namespace *namespace = NULL; |
684 | struct proc_mounts *p; | 565 | struct proc_mounts *p; |
685 | int ret = -EINVAL; | 566 | int ret = -EINVAL; |
686 | 567 | ||
687 | task_lock(task); | 568 | if (task) { |
688 | namespace = task->namespace; | 569 | task_lock(task); |
689 | if (namespace) | 570 | namespace = task->namespace; |
690 | get_namespace(namespace); | 571 | if (namespace) |
691 | task_unlock(task); | 572 | get_namespace(namespace); |
573 | task_unlock(task); | ||
574 | put_task_struct(task); | ||
575 | } | ||
692 | 576 | ||
693 | if (namespace) { | 577 | if (namespace) { |
694 | ret = -ENOMEM; | 578 | ret = -ENOMEM; |
@@ -745,17 +629,21 @@ static struct file_operations proc_mounts_operations = { | |||
745 | extern struct seq_operations mountstats_op; | 629 | extern struct seq_operations mountstats_op; |
746 | static int mountstats_open(struct inode *inode, struct file *file) | 630 | static int mountstats_open(struct inode *inode, struct file *file) |
747 | { | 631 | { |
748 | struct task_struct *task = proc_task(inode); | ||
749 | int ret = seq_open(file, &mountstats_op); | 632 | int ret = seq_open(file, &mountstats_op); |
750 | 633 | ||
751 | if (!ret) { | 634 | if (!ret) { |
752 | struct seq_file *m = file->private_data; | 635 | struct seq_file *m = file->private_data; |
753 | struct namespace *namespace; | 636 | struct namespace *namespace = NULL; |
754 | task_lock(task); | 637 | struct task_struct *task = get_proc_task(inode); |
755 | namespace = task->namespace; | 638 | |
756 | if (namespace) | 639 | if (task) { |
757 | get_namespace(namespace); | 640 | task_lock(task); |
758 | task_unlock(task); | 641 | namespace = task->namespace; |
642 | if (namespace) | ||
643 | get_namespace(namespace); | ||
644 | task_unlock(task); | ||
645 | put_task_struct(task); | ||
646 | } | ||
759 | 647 | ||
760 | if (namespace) | 648 | if (namespace) |
761 | m->private = namespace; | 649 | m->private = namespace; |
@@ -782,18 +670,27 @@ static ssize_t proc_info_read(struct file * file, char __user * buf, | |||
782 | struct inode * inode = file->f_dentry->d_inode; | 670 | struct inode * inode = file->f_dentry->d_inode; |
783 | unsigned long page; | 671 | unsigned long page; |
784 | ssize_t length; | 672 | ssize_t length; |
785 | struct task_struct *task = proc_task(inode); | 673 | struct task_struct *task = get_proc_task(inode); |
674 | |||
675 | length = -ESRCH; | ||
676 | if (!task) | ||
677 | goto out_no_task; | ||
786 | 678 | ||
787 | if (count > PROC_BLOCK_SIZE) | 679 | if (count > PROC_BLOCK_SIZE) |
788 | count = PROC_BLOCK_SIZE; | 680 | count = PROC_BLOCK_SIZE; |
681 | |||
682 | length = -ENOMEM; | ||
789 | if (!(page = __get_free_page(GFP_KERNEL))) | 683 | if (!(page = __get_free_page(GFP_KERNEL))) |
790 | return -ENOMEM; | 684 | goto out; |
791 | 685 | ||
792 | length = PROC_I(inode)->op.proc_read(task, (char*)page); | 686 | length = PROC_I(inode)->op.proc_read(task, (char*)page); |
793 | 687 | ||
794 | if (length >= 0) | 688 | if (length >= 0) |
795 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | 689 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); |
796 | free_page(page); | 690 | free_page(page); |
691 | out: | ||
692 | put_task_struct(task); | ||
693 | out_no_task: | ||
797 | return length; | 694 | return length; |
798 | } | 695 | } |
799 | 696 | ||
@@ -810,12 +707,15 @@ static int mem_open(struct inode* inode, struct file* file) | |||
810 | static ssize_t mem_read(struct file * file, char __user * buf, | 707 | static ssize_t mem_read(struct file * file, char __user * buf, |
811 | size_t count, loff_t *ppos) | 708 | size_t count, loff_t *ppos) |
812 | { | 709 | { |
813 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 710 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
814 | char *page; | 711 | char *page; |
815 | unsigned long src = *ppos; | 712 | unsigned long src = *ppos; |
816 | int ret = -ESRCH; | 713 | int ret = -ESRCH; |
817 | struct mm_struct *mm; | 714 | struct mm_struct *mm; |
818 | 715 | ||
716 | if (!task) | ||
717 | goto out_no_task; | ||
718 | |||
819 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) | 719 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) |
820 | goto out; | 720 | goto out; |
821 | 721 | ||
@@ -865,6 +765,8 @@ out_put: | |||
865 | out_free: | 765 | out_free: |
866 | free_page((unsigned long) page); | 766 | free_page((unsigned long) page); |
867 | out: | 767 | out: |
768 | put_task_struct(task); | ||
769 | out_no_task: | ||
868 | return ret; | 770 | return ret; |
869 | } | 771 | } |
870 | 772 | ||
@@ -877,15 +779,20 @@ static ssize_t mem_write(struct file * file, const char * buf, | |||
877 | { | 779 | { |
878 | int copied = 0; | 780 | int copied = 0; |
879 | char *page; | 781 | char *page; |
880 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 782 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
881 | unsigned long dst = *ppos; | 783 | unsigned long dst = *ppos; |
882 | 784 | ||
785 | copied = -ESRCH; | ||
786 | if (!task) | ||
787 | goto out_no_task; | ||
788 | |||
883 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) | 789 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) |
884 | return -ESRCH; | 790 | goto out; |
885 | 791 | ||
792 | copied = -ENOMEM; | ||
886 | page = (char *)__get_free_page(GFP_USER); | 793 | page = (char *)__get_free_page(GFP_USER); |
887 | if (!page) | 794 | if (!page) |
888 | return -ENOMEM; | 795 | goto out; |
889 | 796 | ||
890 | while (count > 0) { | 797 | while (count > 0) { |
891 | int this_len, retval; | 798 | int this_len, retval; |
@@ -908,6 +815,9 @@ static ssize_t mem_write(struct file * file, const char * buf, | |||
908 | } | 815 | } |
909 | *ppos = dst; | 816 | *ppos = dst; |
910 | free_page((unsigned long) page); | 817 | free_page((unsigned long) page); |
818 | out: | ||
819 | put_task_struct(task); | ||
820 | out_no_task: | ||
911 | return copied; | 821 | return copied; |
912 | } | 822 | } |
913 | #endif | 823 | #endif |
@@ -938,13 +848,18 @@ static struct file_operations proc_mem_operations = { | |||
938 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, | 848 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, |
939 | size_t count, loff_t *ppos) | 849 | size_t count, loff_t *ppos) |
940 | { | 850 | { |
941 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 851 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
942 | char buffer[8]; | 852 | char buffer[PROC_NUMBUF]; |
943 | size_t len; | 853 | size_t len; |
944 | int oom_adjust = task->oomkilladj; | 854 | int oom_adjust; |
945 | loff_t __ppos = *ppos; | 855 | loff_t __ppos = *ppos; |
946 | 856 | ||
947 | len = sprintf(buffer, "%i\n", oom_adjust); | 857 | if (!task) |
858 | return -ESRCH; | ||
859 | oom_adjust = task->oomkilladj; | ||
860 | put_task_struct(task); | ||
861 | |||
862 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | ||
948 | if (__ppos >= len) | 863 | if (__ppos >= len) |
949 | return 0; | 864 | return 0; |
950 | if (count > len-__ppos) | 865 | if (count > len-__ppos) |
@@ -958,15 +873,15 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, | |||
958 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | 873 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, |
959 | size_t count, loff_t *ppos) | 874 | size_t count, loff_t *ppos) |
960 | { | 875 | { |
961 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 876 | struct task_struct *task; |
962 | char buffer[8], *end; | 877 | char buffer[PROC_NUMBUF], *end; |
963 | int oom_adjust; | 878 | int oom_adjust; |
964 | 879 | ||
965 | if (!capable(CAP_SYS_RESOURCE)) | 880 | if (!capable(CAP_SYS_RESOURCE)) |
966 | return -EPERM; | 881 | return -EPERM; |
967 | memset(buffer, 0, 8); | 882 | memset(buffer, 0, sizeof(buffer)); |
968 | if (count > 6) | 883 | if (count > sizeof(buffer) - 1) |
969 | count = 6; | 884 | count = sizeof(buffer) - 1; |
970 | if (copy_from_user(buffer, buf, count)) | 885 | if (copy_from_user(buffer, buf, count)) |
971 | return -EFAULT; | 886 | return -EFAULT; |
972 | oom_adjust = simple_strtol(buffer, &end, 0); | 887 | oom_adjust = simple_strtol(buffer, &end, 0); |
@@ -974,7 +889,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
974 | return -EINVAL; | 889 | return -EINVAL; |
975 | if (*end == '\n') | 890 | if (*end == '\n') |
976 | end++; | 891 | end++; |
892 | task = get_proc_task(file->f_dentry->d_inode); | ||
893 | if (!task) | ||
894 | return -ESRCH; | ||
977 | task->oomkilladj = oom_adjust; | 895 | task->oomkilladj = oom_adjust; |
896 | put_task_struct(task); | ||
978 | if (end - buffer == 0) | 897 | if (end - buffer == 0) |
979 | return -EIO; | 898 | return -EIO; |
980 | return end - buffer; | 899 | return end - buffer; |
@@ -985,22 +904,21 @@ static struct file_operations proc_oom_adjust_operations = { | |||
985 | .write = oom_adjust_write, | 904 | .write = oom_adjust_write, |
986 | }; | 905 | }; |
987 | 906 | ||
988 | static struct inode_operations proc_mem_inode_operations = { | ||
989 | .permission = proc_permission, | ||
990 | }; | ||
991 | |||
992 | #ifdef CONFIG_AUDITSYSCALL | 907 | #ifdef CONFIG_AUDITSYSCALL |
993 | #define TMPBUFLEN 21 | 908 | #define TMPBUFLEN 21 |
994 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | 909 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, |
995 | size_t count, loff_t *ppos) | 910 | size_t count, loff_t *ppos) |
996 | { | 911 | { |
997 | struct inode * inode = file->f_dentry->d_inode; | 912 | struct inode * inode = file->f_dentry->d_inode; |
998 | struct task_struct *task = proc_task(inode); | 913 | struct task_struct *task = get_proc_task(inode); |
999 | ssize_t length; | 914 | ssize_t length; |
1000 | char tmpbuf[TMPBUFLEN]; | 915 | char tmpbuf[TMPBUFLEN]; |
1001 | 916 | ||
917 | if (!task) | ||
918 | return -ESRCH; | ||
1002 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", | 919 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", |
1003 | audit_get_loginuid(task->audit_context)); | 920 | audit_get_loginuid(task->audit_context)); |
921 | put_task_struct(task); | ||
1004 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); | 922 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); |
1005 | } | 923 | } |
1006 | 924 | ||
@@ -1010,17 +928,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1010 | struct inode * inode = file->f_dentry->d_inode; | 928 | struct inode * inode = file->f_dentry->d_inode; |
1011 | char *page, *tmp; | 929 | char *page, *tmp; |
1012 | ssize_t length; | 930 | ssize_t length; |
1013 | struct task_struct *task = proc_task(inode); | ||
1014 | uid_t loginuid; | 931 | uid_t loginuid; |
1015 | 932 | ||
1016 | if (!capable(CAP_AUDIT_CONTROL)) | 933 | if (!capable(CAP_AUDIT_CONTROL)) |
1017 | return -EPERM; | 934 | return -EPERM; |
1018 | 935 | ||
1019 | if (current != task) | 936 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) |
1020 | return -EPERM; | 937 | return -EPERM; |
1021 | 938 | ||
1022 | if (count > PAGE_SIZE) | 939 | if (count >= PAGE_SIZE) |
1023 | count = PAGE_SIZE; | 940 | count = PAGE_SIZE - 1; |
1024 | 941 | ||
1025 | if (*ppos != 0) { | 942 | if (*ppos != 0) { |
1026 | /* No partial writes. */ | 943 | /* No partial writes. */ |
@@ -1033,13 +950,14 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1033 | if (copy_from_user(page, buf, count)) | 950 | if (copy_from_user(page, buf, count)) |
1034 | goto out_free_page; | 951 | goto out_free_page; |
1035 | 952 | ||
953 | page[count] = '\0'; | ||
1036 | loginuid = simple_strtoul(page, &tmp, 10); | 954 | loginuid = simple_strtoul(page, &tmp, 10); |
1037 | if (tmp == page) { | 955 | if (tmp == page) { |
1038 | length = -EINVAL; | 956 | length = -EINVAL; |
1039 | goto out_free_page; | 957 | goto out_free_page; |
1040 | 958 | ||
1041 | } | 959 | } |
1042 | length = audit_set_loginuid(task, loginuid); | 960 | length = audit_set_loginuid(current, loginuid); |
1043 | if (likely(length == 0)) | 961 | if (likely(length == 0)) |
1044 | length = count; | 962 | length = count; |
1045 | 963 | ||
@@ -1058,13 +976,16 @@ static struct file_operations proc_loginuid_operations = { | |||
1058 | static ssize_t seccomp_read(struct file *file, char __user *buf, | 976 | static ssize_t seccomp_read(struct file *file, char __user *buf, |
1059 | size_t count, loff_t *ppos) | 977 | size_t count, loff_t *ppos) |
1060 | { | 978 | { |
1061 | struct task_struct *tsk = proc_task(file->f_dentry->d_inode); | 979 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); |
1062 | char __buf[20]; | 980 | char __buf[20]; |
1063 | loff_t __ppos = *ppos; | 981 | loff_t __ppos = *ppos; |
1064 | size_t len; | 982 | size_t len; |
1065 | 983 | ||
984 | if (!tsk) | ||
985 | return -ESRCH; | ||
1066 | /* no need to print the trailing zero, so use only len */ | 986 | /* no need to print the trailing zero, so use only len */ |
1067 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); | 987 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); |
988 | put_task_struct(tsk); | ||
1068 | if (__ppos >= len) | 989 | if (__ppos >= len) |
1069 | return 0; | 990 | return 0; |
1070 | if (count > len - __ppos) | 991 | if (count > len - __ppos) |
@@ -1078,29 +999,43 @@ static ssize_t seccomp_read(struct file *file, char __user *buf, | |||
1078 | static ssize_t seccomp_write(struct file *file, const char __user *buf, | 999 | static ssize_t seccomp_write(struct file *file, const char __user *buf, |
1079 | size_t count, loff_t *ppos) | 1000 | size_t count, loff_t *ppos) |
1080 | { | 1001 | { |
1081 | struct task_struct *tsk = proc_task(file->f_dentry->d_inode); | 1002 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); |
1082 | char __buf[20], *end; | 1003 | char __buf[20], *end; |
1083 | unsigned int seccomp_mode; | 1004 | unsigned int seccomp_mode; |
1005 | ssize_t result; | ||
1006 | |||
1007 | result = -ESRCH; | ||
1008 | if (!tsk) | ||
1009 | goto out_no_task; | ||
1084 | 1010 | ||
1085 | /* can set it only once to be even more secure */ | 1011 | /* can set it only once to be even more secure */ |
1012 | result = -EPERM; | ||
1086 | if (unlikely(tsk->seccomp.mode)) | 1013 | if (unlikely(tsk->seccomp.mode)) |
1087 | return -EPERM; | 1014 | goto out; |
1088 | 1015 | ||
1016 | result = -EFAULT; | ||
1089 | memset(__buf, 0, sizeof(__buf)); | 1017 | memset(__buf, 0, sizeof(__buf)); |
1090 | count = min(count, sizeof(__buf) - 1); | 1018 | count = min(count, sizeof(__buf) - 1); |
1091 | if (copy_from_user(__buf, buf, count)) | 1019 | if (copy_from_user(__buf, buf, count)) |
1092 | return -EFAULT; | 1020 | goto out; |
1021 | |||
1093 | seccomp_mode = simple_strtoul(__buf, &end, 0); | 1022 | seccomp_mode = simple_strtoul(__buf, &end, 0); |
1094 | if (*end == '\n') | 1023 | if (*end == '\n') |
1095 | end++; | 1024 | end++; |
1025 | result = -EINVAL; | ||
1096 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | 1026 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { |
1097 | tsk->seccomp.mode = seccomp_mode; | 1027 | tsk->seccomp.mode = seccomp_mode; |
1098 | set_tsk_thread_flag(tsk, TIF_SECCOMP); | 1028 | set_tsk_thread_flag(tsk, TIF_SECCOMP); |
1099 | } else | 1029 | } else |
1100 | return -EINVAL; | 1030 | goto out; |
1031 | result = -EIO; | ||
1101 | if (unlikely(!(end - __buf))) | 1032 | if (unlikely(!(end - __buf))) |
1102 | return -EIO; | 1033 | goto out; |
1103 | return end - __buf; | 1034 | result = end - __buf; |
1035 | out: | ||
1036 | put_task_struct(tsk); | ||
1037 | out_no_task: | ||
1038 | return result; | ||
1104 | } | 1039 | } |
1105 | 1040 | ||
1106 | static struct file_operations proc_seccomp_operations = { | 1041 | static struct file_operations proc_seccomp_operations = { |
@@ -1117,10 +1052,8 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
1117 | /* We don't need a base pointer in the /proc filesystem */ | 1052 | /* We don't need a base pointer in the /proc filesystem */ |
1118 | path_release(nd); | 1053 | path_release(nd); |
1119 | 1054 | ||
1120 | if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) | 1055 | /* Are we allowed to snoop on the tasks file descriptors? */ |
1121 | goto out; | 1056 | if (!proc_fd_access_allowed(inode)) |
1122 | error = proc_check_root(inode); | ||
1123 | if (error) | ||
1124 | goto out; | 1057 | goto out; |
1125 | 1058 | ||
1126 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); | 1059 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); |
@@ -1162,12 +1095,8 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b | |||
1162 | struct dentry *de; | 1095 | struct dentry *de; |
1163 | struct vfsmount *mnt = NULL; | 1096 | struct vfsmount *mnt = NULL; |
1164 | 1097 | ||
1165 | lock_kernel(); | 1098 | /* Are we allowed to snoop on the tasks file descriptors? */ |
1166 | 1099 | if (!proc_fd_access_allowed(inode)) | |
1167 | if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) | ||
1168 | goto out; | ||
1169 | error = proc_check_root(inode); | ||
1170 | if (error) | ||
1171 | goto out; | 1100 | goto out; |
1172 | 1101 | ||
1173 | error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); | 1102 | error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); |
@@ -1178,7 +1107,6 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b | |||
1178 | dput(de); | 1107 | dput(de); |
1179 | mntput(mnt); | 1108 | mntput(mnt); |
1180 | out: | 1109 | out: |
1181 | unlock_kernel(); | ||
1182 | return error; | 1110 | return error; |
1183 | } | 1111 | } |
1184 | 1112 | ||
@@ -1187,21 +1115,20 @@ static struct inode_operations proc_pid_link_inode_operations = { | |||
1187 | .follow_link = proc_pid_follow_link | 1115 | .follow_link = proc_pid_follow_link |
1188 | }; | 1116 | }; |
1189 | 1117 | ||
1190 | #define NUMBUF 10 | ||
1191 | |||
1192 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | 1118 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) |
1193 | { | 1119 | { |
1194 | struct inode *inode = filp->f_dentry->d_inode; | 1120 | struct dentry *dentry = filp->f_dentry; |
1195 | struct task_struct *p = proc_task(inode); | 1121 | struct inode *inode = dentry->d_inode; |
1122 | struct task_struct *p = get_proc_task(inode); | ||
1196 | unsigned int fd, tid, ino; | 1123 | unsigned int fd, tid, ino; |
1197 | int retval; | 1124 | int retval; |
1198 | char buf[NUMBUF]; | 1125 | char buf[PROC_NUMBUF]; |
1199 | struct files_struct * files; | 1126 | struct files_struct * files; |
1200 | struct fdtable *fdt; | 1127 | struct fdtable *fdt; |
1201 | 1128 | ||
1202 | retval = -ENOENT; | 1129 | retval = -ENOENT; |
1203 | if (!pid_alive(p)) | 1130 | if (!p) |
1204 | goto out; | 1131 | goto out_no_task; |
1205 | retval = 0; | 1132 | retval = 0; |
1206 | tid = p->pid; | 1133 | tid = p->pid; |
1207 | 1134 | ||
@@ -1212,7 +1139,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1212 | goto out; | 1139 | goto out; |
1213 | filp->f_pos++; | 1140 | filp->f_pos++; |
1214 | case 1: | 1141 | case 1: |
1215 | ino = fake_ino(tid, PROC_TID_INO); | 1142 | ino = parent_ino(dentry); |
1216 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | 1143 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) |
1217 | goto out; | 1144 | goto out; |
1218 | filp->f_pos++; | 1145 | filp->f_pos++; |
@@ -1231,7 +1158,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1231 | continue; | 1158 | continue; |
1232 | rcu_read_unlock(); | 1159 | rcu_read_unlock(); |
1233 | 1160 | ||
1234 | j = NUMBUF; | 1161 | j = PROC_NUMBUF; |
1235 | i = fd; | 1162 | i = fd; |
1236 | do { | 1163 | do { |
1237 | j--; | 1164 | j--; |
@@ -1240,7 +1167,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1240 | } while (i); | 1167 | } while (i); |
1241 | 1168 | ||
1242 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); | 1169 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); |
1243 | if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { | 1170 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) { |
1244 | rcu_read_lock(); | 1171 | rcu_read_lock(); |
1245 | break; | 1172 | break; |
1246 | } | 1173 | } |
@@ -1250,6 +1177,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1250 | put_files_struct(files); | 1177 | put_files_struct(files); |
1251 | } | 1178 | } |
1252 | out: | 1179 | out: |
1180 | put_task_struct(p); | ||
1181 | out_no_task: | ||
1253 | return retval; | 1182 | return retval; |
1254 | } | 1183 | } |
1255 | 1184 | ||
@@ -1261,16 +1190,18 @@ static int proc_pident_readdir(struct file *filp, | |||
1261 | int pid; | 1190 | int pid; |
1262 | struct dentry *dentry = filp->f_dentry; | 1191 | struct dentry *dentry = filp->f_dentry; |
1263 | struct inode *inode = dentry->d_inode; | 1192 | struct inode *inode = dentry->d_inode; |
1193 | struct task_struct *task = get_proc_task(inode); | ||
1264 | struct pid_entry *p; | 1194 | struct pid_entry *p; |
1265 | ino_t ino; | 1195 | ino_t ino; |
1266 | int ret; | 1196 | int ret; |
1267 | 1197 | ||
1268 | ret = -ENOENT; | 1198 | ret = -ENOENT; |
1269 | if (!pid_alive(proc_task(inode))) | 1199 | if (!task) |
1270 | goto out; | 1200 | goto out; |
1271 | 1201 | ||
1272 | ret = 0; | 1202 | ret = 0; |
1273 | pid = proc_task(inode)->pid; | 1203 | pid = task->pid; |
1204 | put_task_struct(task); | ||
1274 | i = filp->f_pos; | 1205 | i = filp->f_pos; |
1275 | switch (i) { | 1206 | switch (i) { |
1276 | case 0: | 1207 | case 0: |
@@ -1353,22 +1284,19 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st | |||
1353 | 1284 | ||
1354 | /* Common stuff */ | 1285 | /* Common stuff */ |
1355 | ei = PROC_I(inode); | 1286 | ei = PROC_I(inode); |
1356 | ei->task = NULL; | ||
1357 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 1287 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
1358 | inode->i_ino = fake_ino(task->pid, ino); | 1288 | inode->i_ino = fake_ino(task->pid, ino); |
1359 | 1289 | ||
1360 | if (!pid_alive(task)) | ||
1361 | goto out_unlock; | ||
1362 | |||
1363 | /* | 1290 | /* |
1364 | * grab the reference to task. | 1291 | * grab the reference to task. |
1365 | */ | 1292 | */ |
1366 | get_task_struct(task); | 1293 | ei->pid = get_pid(task->pids[PIDTYPE_PID].pid); |
1367 | ei->task = task; | 1294 | if (!ei->pid) |
1368 | ei->type = ino; | 1295 | goto out_unlock; |
1296 | |||
1369 | inode->i_uid = 0; | 1297 | inode->i_uid = 0; |
1370 | inode->i_gid = 0; | 1298 | inode->i_gid = 0; |
1371 | if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { | 1299 | if (task_dumpable(task)) { |
1372 | inode->i_uid = task->euid; | 1300 | inode->i_uid = task->euid; |
1373 | inode->i_gid = task->egid; | 1301 | inode->i_gid = task->egid; |
1374 | } | 1302 | } |
@@ -1378,7 +1306,6 @@ out: | |||
1378 | return inode; | 1306 | return inode; |
1379 | 1307 | ||
1380 | out_unlock: | 1308 | out_unlock: |
1381 | ei->pde = NULL; | ||
1382 | iput(inode); | 1309 | iput(inode); |
1383 | return NULL; | 1310 | return NULL; |
1384 | } | 1311 | } |
@@ -1392,13 +1319,21 @@ out_unlock: | |||
1392 | * | 1319 | * |
1393 | * Rewrite the inode's ownerships here because the owning task may have | 1320 | * Rewrite the inode's ownerships here because the owning task may have |
1394 | * performed a setuid(), etc. | 1321 | * performed a setuid(), etc. |
1322 | * | ||
1323 | * Before the /proc/pid/status file was created the only way to read | ||
1324 | * the effective uid of a /process was to stat /proc/pid. Reading | ||
1325 | * /proc/pid/status is slow enough that procps and other packages | ||
1326 | * kept stating /proc/pid. To keep the rules in /proc simple I have | ||
1327 | * made this apply to all per process world readable and executable | ||
1328 | * directories. | ||
1395 | */ | 1329 | */ |
1396 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | 1330 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) |
1397 | { | 1331 | { |
1398 | struct inode *inode = dentry->d_inode; | 1332 | struct inode *inode = dentry->d_inode; |
1399 | struct task_struct *task = proc_task(inode); | 1333 | struct task_struct *task = get_proc_task(inode); |
1400 | if (pid_alive(task)) { | 1334 | if (task) { |
1401 | if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { | 1335 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || |
1336 | task_dumpable(task)) { | ||
1402 | inode->i_uid = task->euid; | 1337 | inode->i_uid = task->euid; |
1403 | inode->i_gid = task->egid; | 1338 | inode->i_gid = task->egid; |
1404 | } else { | 1339 | } else { |
@@ -1406,59 +1341,75 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1406 | inode->i_gid = 0; | 1341 | inode->i_gid = 0; |
1407 | } | 1342 | } |
1408 | security_task_to_inode(task, inode); | 1343 | security_task_to_inode(task, inode); |
1344 | put_task_struct(task); | ||
1409 | return 1; | 1345 | return 1; |
1410 | } | 1346 | } |
1411 | d_drop(dentry); | 1347 | d_drop(dentry); |
1412 | return 0; | 1348 | return 0; |
1413 | } | 1349 | } |
1414 | 1350 | ||
1351 | static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | ||
1352 | { | ||
1353 | struct inode *inode = dentry->d_inode; | ||
1354 | struct task_struct *task; | ||
1355 | generic_fillattr(inode, stat); | ||
1356 | |||
1357 | rcu_read_lock(); | ||
1358 | stat->uid = 0; | ||
1359 | stat->gid = 0; | ||
1360 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
1361 | if (task) { | ||
1362 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | ||
1363 | task_dumpable(task)) { | ||
1364 | stat->uid = task->euid; | ||
1365 | stat->gid = task->egid; | ||
1366 | } | ||
1367 | } | ||
1368 | rcu_read_unlock(); | ||
1369 | return 0; | ||
1370 | } | ||
1371 | |||
1415 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | 1372 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) |
1416 | { | 1373 | { |
1417 | struct inode *inode = dentry->d_inode; | 1374 | struct inode *inode = dentry->d_inode; |
1418 | struct task_struct *task = proc_task(inode); | 1375 | struct task_struct *task = get_proc_task(inode); |
1419 | int fd = proc_type(inode) - PROC_TID_FD_DIR; | 1376 | int fd = proc_fd(inode); |
1420 | struct files_struct *files; | 1377 | struct files_struct *files; |
1421 | 1378 | ||
1422 | files = get_files_struct(task); | 1379 | if (task) { |
1423 | if (files) { | 1380 | files = get_files_struct(task); |
1424 | rcu_read_lock(); | 1381 | if (files) { |
1425 | if (fcheck_files(files, fd)) { | 1382 | rcu_read_lock(); |
1383 | if (fcheck_files(files, fd)) { | ||
1384 | rcu_read_unlock(); | ||
1385 | put_files_struct(files); | ||
1386 | if (task_dumpable(task)) { | ||
1387 | inode->i_uid = task->euid; | ||
1388 | inode->i_gid = task->egid; | ||
1389 | } else { | ||
1390 | inode->i_uid = 0; | ||
1391 | inode->i_gid = 0; | ||
1392 | } | ||
1393 | security_task_to_inode(task, inode); | ||
1394 | put_task_struct(task); | ||
1395 | return 1; | ||
1396 | } | ||
1426 | rcu_read_unlock(); | 1397 | rcu_read_unlock(); |
1427 | put_files_struct(files); | 1398 | put_files_struct(files); |
1428 | if (task_dumpable(task)) { | ||
1429 | inode->i_uid = task->euid; | ||
1430 | inode->i_gid = task->egid; | ||
1431 | } else { | ||
1432 | inode->i_uid = 0; | ||
1433 | inode->i_gid = 0; | ||
1434 | } | ||
1435 | security_task_to_inode(task, inode); | ||
1436 | return 1; | ||
1437 | } | 1399 | } |
1438 | rcu_read_unlock(); | 1400 | put_task_struct(task); |
1439 | put_files_struct(files); | ||
1440 | } | 1401 | } |
1441 | d_drop(dentry); | 1402 | d_drop(dentry); |
1442 | return 0; | 1403 | return 0; |
1443 | } | 1404 | } |
1444 | 1405 | ||
1445 | static void pid_base_iput(struct dentry *dentry, struct inode *inode) | ||
1446 | { | ||
1447 | struct task_struct *task = proc_task(inode); | ||
1448 | spin_lock(&task->proc_lock); | ||
1449 | if (task->proc_dentry == dentry) | ||
1450 | task->proc_dentry = NULL; | ||
1451 | spin_unlock(&task->proc_lock); | ||
1452 | iput(inode); | ||
1453 | } | ||
1454 | |||
1455 | static int pid_delete_dentry(struct dentry * dentry) | 1406 | static int pid_delete_dentry(struct dentry * dentry) |
1456 | { | 1407 | { |
1457 | /* Is the task we represent dead? | 1408 | /* Is the task we represent dead? |
1458 | * If so, then don't put the dentry on the lru list, | 1409 | * If so, then don't put the dentry on the lru list, |
1459 | * kill it immediately. | 1410 | * kill it immediately. |
1460 | */ | 1411 | */ |
1461 | return !pid_alive(proc_task(dentry->d_inode)); | 1412 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; |
1462 | } | 1413 | } |
1463 | 1414 | ||
1464 | static struct dentry_operations tid_fd_dentry_operations = | 1415 | static struct dentry_operations tid_fd_dentry_operations = |
@@ -1473,13 +1424,6 @@ static struct dentry_operations pid_dentry_operations = | |||
1473 | .d_delete = pid_delete_dentry, | 1424 | .d_delete = pid_delete_dentry, |
1474 | }; | 1425 | }; |
1475 | 1426 | ||
1476 | static struct dentry_operations pid_base_dentry_operations = | ||
1477 | { | ||
1478 | .d_revalidate = pid_revalidate, | ||
1479 | .d_iput = pid_base_iput, | ||
1480 | .d_delete = pid_delete_dentry, | ||
1481 | }; | ||
1482 | |||
1483 | /* Lookups */ | 1427 | /* Lookups */ |
1484 | 1428 | ||
1485 | static unsigned name_to_int(struct dentry *dentry) | 1429 | static unsigned name_to_int(struct dentry *dentry) |
@@ -1507,22 +1451,24 @@ out: | |||
1507 | /* SMP-safe */ | 1451 | /* SMP-safe */ |
1508 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) | 1452 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) |
1509 | { | 1453 | { |
1510 | struct task_struct *task = proc_task(dir); | 1454 | struct task_struct *task = get_proc_task(dir); |
1511 | unsigned fd = name_to_int(dentry); | 1455 | unsigned fd = name_to_int(dentry); |
1456 | struct dentry *result = ERR_PTR(-ENOENT); | ||
1512 | struct file * file; | 1457 | struct file * file; |
1513 | struct files_struct * files; | 1458 | struct files_struct * files; |
1514 | struct inode *inode; | 1459 | struct inode *inode; |
1515 | struct proc_inode *ei; | 1460 | struct proc_inode *ei; |
1516 | 1461 | ||
1462 | if (!task) | ||
1463 | goto out_no_task; | ||
1517 | if (fd == ~0U) | 1464 | if (fd == ~0U) |
1518 | goto out; | 1465 | goto out; |
1519 | if (!pid_alive(task)) | ||
1520 | goto out; | ||
1521 | 1466 | ||
1522 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); | 1467 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); |
1523 | if (!inode) | 1468 | if (!inode) |
1524 | goto out; | 1469 | goto out; |
1525 | ei = PROC_I(inode); | 1470 | ei = PROC_I(inode); |
1471 | ei->fd = fd; | ||
1526 | files = get_files_struct(task); | 1472 | files = get_files_struct(task); |
1527 | if (!files) | 1473 | if (!files) |
1528 | goto out_unlock; | 1474 | goto out_unlock; |
@@ -1547,19 +1493,25 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1547 | ei->op.proc_get_link = proc_fd_link; | 1493 | ei->op.proc_get_link = proc_fd_link; |
1548 | dentry->d_op = &tid_fd_dentry_operations; | 1494 | dentry->d_op = &tid_fd_dentry_operations; |
1549 | d_add(dentry, inode); | 1495 | d_add(dentry, inode); |
1550 | return NULL; | 1496 | /* Close the race of the process dying before we return the dentry */ |
1497 | if (tid_fd_revalidate(dentry, NULL)) | ||
1498 | result = NULL; | ||
1499 | out: | ||
1500 | put_task_struct(task); | ||
1501 | out_no_task: | ||
1502 | return result; | ||
1551 | 1503 | ||
1552 | out_unlock2: | 1504 | out_unlock2: |
1553 | spin_unlock(&files->file_lock); | 1505 | spin_unlock(&files->file_lock); |
1554 | put_files_struct(files); | 1506 | put_files_struct(files); |
1555 | out_unlock: | 1507 | out_unlock: |
1556 | iput(inode); | 1508 | iput(inode); |
1557 | out: | 1509 | goto out; |
1558 | return ERR_PTR(-ENOENT); | ||
1559 | } | 1510 | } |
1560 | 1511 | ||
1561 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); | 1512 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); |
1562 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); | 1513 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); |
1514 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); | ||
1563 | 1515 | ||
1564 | static struct file_operations proc_fd_operations = { | 1516 | static struct file_operations proc_fd_operations = { |
1565 | .read = generic_read_dir, | 1517 | .read = generic_read_dir, |
@@ -1576,12 +1528,11 @@ static struct file_operations proc_task_operations = { | |||
1576 | */ | 1528 | */ |
1577 | static struct inode_operations proc_fd_inode_operations = { | 1529 | static struct inode_operations proc_fd_inode_operations = { |
1578 | .lookup = proc_lookupfd, | 1530 | .lookup = proc_lookupfd, |
1579 | .permission = proc_permission, | ||
1580 | }; | 1531 | }; |
1581 | 1532 | ||
1582 | static struct inode_operations proc_task_inode_operations = { | 1533 | static struct inode_operations proc_task_inode_operations = { |
1583 | .lookup = proc_task_lookup, | 1534 | .lookup = proc_task_lookup, |
1584 | .permission = proc_task_permission, | 1535 | .getattr = proc_task_getattr, |
1585 | }; | 1536 | }; |
1586 | 1537 | ||
1587 | #ifdef CONFIG_SECURITY | 1538 | #ifdef CONFIG_SECURITY |
@@ -1591,12 +1542,17 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | |||
1591 | struct inode * inode = file->f_dentry->d_inode; | 1542 | struct inode * inode = file->f_dentry->d_inode; |
1592 | unsigned long page; | 1543 | unsigned long page; |
1593 | ssize_t length; | 1544 | ssize_t length; |
1594 | struct task_struct *task = proc_task(inode); | 1545 | struct task_struct *task = get_proc_task(inode); |
1546 | |||
1547 | length = -ESRCH; | ||
1548 | if (!task) | ||
1549 | goto out_no_task; | ||
1595 | 1550 | ||
1596 | if (count > PAGE_SIZE) | 1551 | if (count > PAGE_SIZE) |
1597 | count = PAGE_SIZE; | 1552 | count = PAGE_SIZE; |
1553 | length = -ENOMEM; | ||
1598 | if (!(page = __get_free_page(GFP_KERNEL))) | 1554 | if (!(page = __get_free_page(GFP_KERNEL))) |
1599 | return -ENOMEM; | 1555 | goto out; |
1600 | 1556 | ||
1601 | length = security_getprocattr(task, | 1557 | length = security_getprocattr(task, |
1602 | (char*)file->f_dentry->d_name.name, | 1558 | (char*)file->f_dentry->d_name.name, |
@@ -1604,6 +1560,9 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | |||
1604 | if (length >= 0) | 1560 | if (length >= 0) |
1605 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | 1561 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); |
1606 | free_page(page); | 1562 | free_page(page); |
1563 | out: | ||
1564 | put_task_struct(task); | ||
1565 | out_no_task: | ||
1607 | return length; | 1566 | return length; |
1608 | } | 1567 | } |
1609 | 1568 | ||
@@ -1613,26 +1572,36 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, | |||
1613 | struct inode * inode = file->f_dentry->d_inode; | 1572 | struct inode * inode = file->f_dentry->d_inode; |
1614 | char *page; | 1573 | char *page; |
1615 | ssize_t length; | 1574 | ssize_t length; |
1616 | struct task_struct *task = proc_task(inode); | 1575 | struct task_struct *task = get_proc_task(inode); |
1617 | 1576 | ||
1577 | length = -ESRCH; | ||
1578 | if (!task) | ||
1579 | goto out_no_task; | ||
1618 | if (count > PAGE_SIZE) | 1580 | if (count > PAGE_SIZE) |
1619 | count = PAGE_SIZE; | 1581 | count = PAGE_SIZE; |
1620 | if (*ppos != 0) { | 1582 | |
1621 | /* No partial writes. */ | 1583 | /* No partial writes. */ |
1622 | return -EINVAL; | 1584 | length = -EINVAL; |
1623 | } | 1585 | if (*ppos != 0) |
1586 | goto out; | ||
1587 | |||
1588 | length = -ENOMEM; | ||
1624 | page = (char*)__get_free_page(GFP_USER); | 1589 | page = (char*)__get_free_page(GFP_USER); |
1625 | if (!page) | 1590 | if (!page) |
1626 | return -ENOMEM; | 1591 | goto out; |
1592 | |||
1627 | length = -EFAULT; | 1593 | length = -EFAULT; |
1628 | if (copy_from_user(page, buf, count)) | 1594 | if (copy_from_user(page, buf, count)) |
1629 | goto out; | 1595 | goto out_free; |
1630 | 1596 | ||
1631 | length = security_setprocattr(task, | 1597 | length = security_setprocattr(task, |
1632 | (char*)file->f_dentry->d_name.name, | 1598 | (char*)file->f_dentry->d_name.name, |
1633 | (void*)page, count); | 1599 | (void*)page, count); |
1634 | out: | 1600 | out_free: |
1635 | free_page((unsigned long) page); | 1601 | free_page((unsigned long) page); |
1602 | out: | ||
1603 | put_task_struct(task); | ||
1604 | out_no_task: | ||
1636 | return length; | 1605 | return length; |
1637 | } | 1606 | } |
1638 | 1607 | ||
@@ -1647,24 +1616,22 @@ static struct file_operations proc_tgid_attr_operations; | |||
1647 | static struct inode_operations proc_tgid_attr_inode_operations; | 1616 | static struct inode_operations proc_tgid_attr_inode_operations; |
1648 | #endif | 1617 | #endif |
1649 | 1618 | ||
1650 | static int get_tid_list(int index, unsigned int *tids, struct inode *dir); | ||
1651 | |||
1652 | /* SMP-safe */ | 1619 | /* SMP-safe */ |
1653 | static struct dentry *proc_pident_lookup(struct inode *dir, | 1620 | static struct dentry *proc_pident_lookup(struct inode *dir, |
1654 | struct dentry *dentry, | 1621 | struct dentry *dentry, |
1655 | struct pid_entry *ents) | 1622 | struct pid_entry *ents) |
1656 | { | 1623 | { |
1657 | struct inode *inode; | 1624 | struct inode *inode; |
1658 | int error; | 1625 | struct dentry *error; |
1659 | struct task_struct *task = proc_task(dir); | 1626 | struct task_struct *task = get_proc_task(dir); |
1660 | struct pid_entry *p; | 1627 | struct pid_entry *p; |
1661 | struct proc_inode *ei; | 1628 | struct proc_inode *ei; |
1662 | 1629 | ||
1663 | error = -ENOENT; | 1630 | error = ERR_PTR(-ENOENT); |
1664 | inode = NULL; | 1631 | inode = NULL; |
1665 | 1632 | ||
1666 | if (!pid_alive(task)) | 1633 | if (!task) |
1667 | goto out; | 1634 | goto out_no_task; |
1668 | 1635 | ||
1669 | for (p = ents; p->name; p++) { | 1636 | for (p = ents; p->name; p++) { |
1670 | if (p->len != dentry->d_name.len) | 1637 | if (p->len != dentry->d_name.len) |
@@ -1675,7 +1642,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1675 | if (!p->name) | 1642 | if (!p->name) |
1676 | goto out; | 1643 | goto out; |
1677 | 1644 | ||
1678 | error = -EINVAL; | 1645 | error = ERR_PTR(-EINVAL); |
1679 | inode = proc_pid_make_inode(dir->i_sb, task, p->type); | 1646 | inode = proc_pid_make_inode(dir->i_sb, task, p->type); |
1680 | if (!inode) | 1647 | if (!inode) |
1681 | goto out; | 1648 | goto out; |
@@ -1688,7 +1655,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1688 | */ | 1655 | */ |
1689 | switch(p->type) { | 1656 | switch(p->type) { |
1690 | case PROC_TGID_TASK: | 1657 | case PROC_TGID_TASK: |
1691 | inode->i_nlink = 2 + get_tid_list(2, NULL, dir); | 1658 | inode->i_nlink = 2; |
1692 | inode->i_op = &proc_task_inode_operations; | 1659 | inode->i_op = &proc_task_inode_operations; |
1693 | inode->i_fop = &proc_task_operations; | 1660 | inode->i_fop = &proc_task_operations; |
1694 | break; | 1661 | break; |
@@ -1758,7 +1725,6 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1758 | #endif | 1725 | #endif |
1759 | case PROC_TID_MEM: | 1726 | case PROC_TID_MEM: |
1760 | case PROC_TGID_MEM: | 1727 | case PROC_TGID_MEM: |
1761 | inode->i_op = &proc_mem_inode_operations; | ||
1762 | inode->i_fop = &proc_mem_operations; | 1728 | inode->i_fop = &proc_mem_operations; |
1763 | break; | 1729 | break; |
1764 | #ifdef CONFIG_SECCOMP | 1730 | #ifdef CONFIG_SECCOMP |
@@ -1800,6 +1766,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1800 | case PROC_TGID_ATTR_EXEC: | 1766 | case PROC_TGID_ATTR_EXEC: |
1801 | case PROC_TID_ATTR_FSCREATE: | 1767 | case PROC_TID_ATTR_FSCREATE: |
1802 | case PROC_TGID_ATTR_FSCREATE: | 1768 | case PROC_TGID_ATTR_FSCREATE: |
1769 | case PROC_TID_ATTR_KEYCREATE: | ||
1770 | case PROC_TGID_ATTR_KEYCREATE: | ||
1771 | case PROC_TID_ATTR_SOCKCREATE: | ||
1772 | case PROC_TGID_ATTR_SOCKCREATE: | ||
1803 | inode->i_fop = &proc_pid_attr_operations; | 1773 | inode->i_fop = &proc_pid_attr_operations; |
1804 | break; | 1774 | break; |
1805 | #endif | 1775 | #endif |
@@ -1841,14 +1811,18 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1841 | default: | 1811 | default: |
1842 | printk("procfs: impossible type (%d)",p->type); | 1812 | printk("procfs: impossible type (%d)",p->type); |
1843 | iput(inode); | 1813 | iput(inode); |
1844 | return ERR_PTR(-EINVAL); | 1814 | error = ERR_PTR(-EINVAL); |
1815 | goto out; | ||
1845 | } | 1816 | } |
1846 | dentry->d_op = &pid_dentry_operations; | 1817 | dentry->d_op = &pid_dentry_operations; |
1847 | d_add(dentry, inode); | 1818 | d_add(dentry, inode); |
1848 | return NULL; | 1819 | /* Close the race of the process dying before we return the dentry */ |
1849 | 1820 | if (pid_revalidate(dentry, NULL)) | |
1821 | error = NULL; | ||
1850 | out: | 1822 | out: |
1851 | return ERR_PTR(error); | 1823 | put_task_struct(task); |
1824 | out_no_task: | ||
1825 | return error; | ||
1852 | } | 1826 | } |
1853 | 1827 | ||
1854 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | 1828 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ |
@@ -1871,10 +1845,12 @@ static struct file_operations proc_tid_base_operations = { | |||
1871 | 1845 | ||
1872 | static struct inode_operations proc_tgid_base_inode_operations = { | 1846 | static struct inode_operations proc_tgid_base_inode_operations = { |
1873 | .lookup = proc_tgid_base_lookup, | 1847 | .lookup = proc_tgid_base_lookup, |
1848 | .getattr = pid_getattr, | ||
1874 | }; | 1849 | }; |
1875 | 1850 | ||
1876 | static struct inode_operations proc_tid_base_inode_operations = { | 1851 | static struct inode_operations proc_tid_base_inode_operations = { |
1877 | .lookup = proc_tid_base_lookup, | 1852 | .lookup = proc_tid_base_lookup, |
1853 | .getattr = pid_getattr, | ||
1878 | }; | 1854 | }; |
1879 | 1855 | ||
1880 | #ifdef CONFIG_SECURITY | 1856 | #ifdef CONFIG_SECURITY |
@@ -1916,10 +1892,12 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir, | |||
1916 | 1892 | ||
1917 | static struct inode_operations proc_tgid_attr_inode_operations = { | 1893 | static struct inode_operations proc_tgid_attr_inode_operations = { |
1918 | .lookup = proc_tgid_attr_lookup, | 1894 | .lookup = proc_tgid_attr_lookup, |
1895 | .getattr = pid_getattr, | ||
1919 | }; | 1896 | }; |
1920 | 1897 | ||
1921 | static struct inode_operations proc_tid_attr_inode_operations = { | 1898 | static struct inode_operations proc_tid_attr_inode_operations = { |
1922 | .lookup = proc_tid_attr_lookup, | 1899 | .lookup = proc_tid_attr_lookup, |
1900 | .getattr = pid_getattr, | ||
1923 | }; | 1901 | }; |
1924 | #endif | 1902 | #endif |
1925 | 1903 | ||
@@ -1929,14 +1907,14 @@ static struct inode_operations proc_tid_attr_inode_operations = { | |||
1929 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, | 1907 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, |
1930 | int buflen) | 1908 | int buflen) |
1931 | { | 1909 | { |
1932 | char tmp[30]; | 1910 | char tmp[PROC_NUMBUF]; |
1933 | sprintf(tmp, "%d", current->tgid); | 1911 | sprintf(tmp, "%d", current->tgid); |
1934 | return vfs_readlink(dentry,buffer,buflen,tmp); | 1912 | return vfs_readlink(dentry,buffer,buflen,tmp); |
1935 | } | 1913 | } |
1936 | 1914 | ||
1937 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | 1915 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) |
1938 | { | 1916 | { |
1939 | char tmp[30]; | 1917 | char tmp[PROC_NUMBUF]; |
1940 | sprintf(tmp, "%d", current->tgid); | 1918 | sprintf(tmp, "%d", current->tgid); |
1941 | return ERR_PTR(vfs_follow_link(nd,tmp)); | 1919 | return ERR_PTR(vfs_follow_link(nd,tmp)); |
1942 | } | 1920 | } |
@@ -1947,67 +1925,80 @@ static struct inode_operations proc_self_inode_operations = { | |||
1947 | }; | 1925 | }; |
1948 | 1926 | ||
1949 | /** | 1927 | /** |
1950 | * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. | 1928 | * proc_flush_task - Remove dcache entries for @task from the /proc dcache. |
1951 | * @p: task that should be flushed. | 1929 | * |
1930 | * @task: task that should be flushed. | ||
1931 | * | ||
1932 | * Looks in the dcache for | ||
1933 | * /proc/@pid | ||
1934 | * /proc/@tgid/task/@pid | ||
1935 | * if either directory is present flushes it and all of it'ts children | ||
1936 | * from the dcache. | ||
1952 | * | 1937 | * |
1953 | * Drops the /proc/@pid dcache entry from the hash chains. | 1938 | * It is safe and reasonable to cache /proc entries for a task until |
1939 | * that task exits. After that they just clog up the dcache with | ||
1940 | * useless entries, possibly causing useful dcache entries to be | ||
1941 | * flushed instead. This routine is proved to flush those useless | ||
1942 | * dcache entries at process exit time. | ||
1954 | * | 1943 | * |
1955 | * Dropping /proc/@pid entries and detach_pid must be synchroneous, | 1944 | * NOTE: This routine is just an optimization so it does not guarantee |
1956 | * otherwise e.g. /proc/@pid/exe might point to the wrong executable, | 1945 | * that no dcache entries will exist at process exit time it |
1957 | * if the pid value is immediately reused. This is enforced by | 1946 | * just makes it very unlikely that any will persist. |
1958 | * - caller must acquire spin_lock(p->proc_lock) | ||
1959 | * - must be called before detach_pid() | ||
1960 | * - proc_pid_lookup acquires proc_lock, and checks that | ||
1961 | * the target is not dead by looking at the attach count | ||
1962 | * of PIDTYPE_PID. | ||
1963 | */ | 1947 | */ |
1964 | 1948 | void proc_flush_task(struct task_struct *task) | |
1965 | struct dentry *proc_pid_unhash(struct task_struct *p) | ||
1966 | { | 1949 | { |
1967 | struct dentry *proc_dentry; | 1950 | struct dentry *dentry, *leader, *dir; |
1951 | char buf[PROC_NUMBUF]; | ||
1952 | struct qstr name; | ||
1953 | |||
1954 | name.name = buf; | ||
1955 | name.len = snprintf(buf, sizeof(buf), "%d", task->pid); | ||
1956 | dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); | ||
1957 | if (dentry) { | ||
1958 | shrink_dcache_parent(dentry); | ||
1959 | d_drop(dentry); | ||
1960 | dput(dentry); | ||
1961 | } | ||
1968 | 1962 | ||
1969 | proc_dentry = p->proc_dentry; | 1963 | if (thread_group_leader(task)) |
1970 | if (proc_dentry != NULL) { | 1964 | goto out; |
1971 | 1965 | ||
1972 | spin_lock(&dcache_lock); | 1966 | name.name = buf; |
1973 | spin_lock(&proc_dentry->d_lock); | 1967 | name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); |
1974 | if (!d_unhashed(proc_dentry)) { | 1968 | leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); |
1975 | dget_locked(proc_dentry); | 1969 | if (!leader) |
1976 | __d_drop(proc_dentry); | 1970 | goto out; |
1977 | spin_unlock(&proc_dentry->d_lock); | ||
1978 | } else { | ||
1979 | spin_unlock(&proc_dentry->d_lock); | ||
1980 | proc_dentry = NULL; | ||
1981 | } | ||
1982 | spin_unlock(&dcache_lock); | ||
1983 | } | ||
1984 | return proc_dentry; | ||
1985 | } | ||
1986 | 1971 | ||
1987 | /** | 1972 | name.name = "task"; |
1988 | * proc_pid_flush - recover memory used by stale /proc/@pid/x entries | 1973 | name.len = strlen(name.name); |
1989 | * @proc_dentry: directoy to prune. | 1974 | dir = d_hash_and_lookup(leader, &name); |
1990 | * | 1975 | if (!dir) |
1991 | * Shrink the /proc directory that was used by the just killed thread. | 1976 | goto out_put_leader; |
1992 | */ | 1977 | |
1993 | 1978 | name.name = buf; | |
1994 | void proc_pid_flush(struct dentry *proc_dentry) | 1979 | name.len = snprintf(buf, sizeof(buf), "%d", task->pid); |
1995 | { | 1980 | dentry = d_hash_and_lookup(dir, &name); |
1996 | might_sleep(); | 1981 | if (dentry) { |
1997 | if(proc_dentry != NULL) { | 1982 | shrink_dcache_parent(dentry); |
1998 | shrink_dcache_parent(proc_dentry); | 1983 | d_drop(dentry); |
1999 | dput(proc_dentry); | 1984 | dput(dentry); |
2000 | } | 1985 | } |
1986 | |||
1987 | dput(dir); | ||
1988 | out_put_leader: | ||
1989 | dput(leader); | ||
1990 | out: | ||
1991 | return; | ||
2001 | } | 1992 | } |
2002 | 1993 | ||
2003 | /* SMP-safe */ | 1994 | /* SMP-safe */ |
2004 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 1995 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
2005 | { | 1996 | { |
1997 | struct dentry *result = ERR_PTR(-ENOENT); | ||
2006 | struct task_struct *task; | 1998 | struct task_struct *task; |
2007 | struct inode *inode; | 1999 | struct inode *inode; |
2008 | struct proc_inode *ei; | 2000 | struct proc_inode *ei; |
2009 | unsigned tgid; | 2001 | unsigned tgid; |
2010 | int died; | ||
2011 | 2002 | ||
2012 | if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { | 2003 | if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { |
2013 | inode = new_inode(dir->i_sb); | 2004 | inode = new_inode(dir->i_sb); |
@@ -2028,21 +2019,18 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||
2028 | if (tgid == ~0U) | 2019 | if (tgid == ~0U) |
2029 | goto out; | 2020 | goto out; |
2030 | 2021 | ||
2031 | read_lock(&tasklist_lock); | 2022 | rcu_read_lock(); |
2032 | task = find_task_by_pid(tgid); | 2023 | task = find_task_by_pid(tgid); |
2033 | if (task) | 2024 | if (task) |
2034 | get_task_struct(task); | 2025 | get_task_struct(task); |
2035 | read_unlock(&tasklist_lock); | 2026 | rcu_read_unlock(); |
2036 | if (!task) | 2027 | if (!task) |
2037 | goto out; | 2028 | goto out; |
2038 | 2029 | ||
2039 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); | 2030 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); |
2031 | if (!inode) | ||
2032 | goto out_put_task; | ||
2040 | 2033 | ||
2041 | |||
2042 | if (!inode) { | ||
2043 | put_task_struct(task); | ||
2044 | goto out; | ||
2045 | } | ||
2046 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; | 2034 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; |
2047 | inode->i_op = &proc_tgid_base_inode_operations; | 2035 | inode->i_op = &proc_tgid_base_inode_operations; |
2048 | inode->i_fop = &proc_tgid_base_operations; | 2036 | inode->i_fop = &proc_tgid_base_operations; |
@@ -2053,45 +2041,40 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||
2053 | inode->i_nlink = 4; | 2041 | inode->i_nlink = 4; |
2054 | #endif | 2042 | #endif |
2055 | 2043 | ||
2056 | dentry->d_op = &pid_base_dentry_operations; | 2044 | dentry->d_op = &pid_dentry_operations; |
2057 | 2045 | ||
2058 | died = 0; | ||
2059 | d_add(dentry, inode); | 2046 | d_add(dentry, inode); |
2060 | spin_lock(&task->proc_lock); | 2047 | /* Close the race of the process dying before we return the dentry */ |
2061 | task->proc_dentry = dentry; | 2048 | if (pid_revalidate(dentry, NULL)) |
2062 | if (!pid_alive(task)) { | 2049 | result = NULL; |
2063 | dentry = proc_pid_unhash(task); | ||
2064 | died = 1; | ||
2065 | } | ||
2066 | spin_unlock(&task->proc_lock); | ||
2067 | 2050 | ||
2051 | out_put_task: | ||
2068 | put_task_struct(task); | 2052 | put_task_struct(task); |
2069 | if (died) { | ||
2070 | proc_pid_flush(dentry); | ||
2071 | goto out; | ||
2072 | } | ||
2073 | return NULL; | ||
2074 | out: | 2053 | out: |
2075 | return ERR_PTR(-ENOENT); | 2054 | return result; |
2076 | } | 2055 | } |
2077 | 2056 | ||
2078 | /* SMP-safe */ | 2057 | /* SMP-safe */ |
2079 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 2058 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
2080 | { | 2059 | { |
2060 | struct dentry *result = ERR_PTR(-ENOENT); | ||
2081 | struct task_struct *task; | 2061 | struct task_struct *task; |
2082 | struct task_struct *leader = proc_task(dir); | 2062 | struct task_struct *leader = get_proc_task(dir); |
2083 | struct inode *inode; | 2063 | struct inode *inode; |
2084 | unsigned tid; | 2064 | unsigned tid; |
2085 | 2065 | ||
2066 | if (!leader) | ||
2067 | goto out_no_task; | ||
2068 | |||
2086 | tid = name_to_int(dentry); | 2069 | tid = name_to_int(dentry); |
2087 | if (tid == ~0U) | 2070 | if (tid == ~0U) |
2088 | goto out; | 2071 | goto out; |
2089 | 2072 | ||
2090 | read_lock(&tasklist_lock); | 2073 | rcu_read_lock(); |
2091 | task = find_task_by_pid(tid); | 2074 | task = find_task_by_pid(tid); |
2092 | if (task) | 2075 | if (task) |
2093 | get_task_struct(task); | 2076 | get_task_struct(task); |
2094 | read_unlock(&tasklist_lock); | 2077 | rcu_read_unlock(); |
2095 | if (!task) | 2078 | if (!task) |
2096 | goto out; | 2079 | goto out; |
2097 | if (leader->tgid != task->tgid) | 2080 | if (leader->tgid != task->tgid) |
@@ -2112,101 +2095,95 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry | |||
2112 | inode->i_nlink = 3; | 2095 | inode->i_nlink = 3; |
2113 | #endif | 2096 | #endif |
2114 | 2097 | ||
2115 | dentry->d_op = &pid_base_dentry_operations; | 2098 | dentry->d_op = &pid_dentry_operations; |
2116 | 2099 | ||
2117 | d_add(dentry, inode); | 2100 | d_add(dentry, inode); |
2101 | /* Close the race of the process dying before we return the dentry */ | ||
2102 | if (pid_revalidate(dentry, NULL)) | ||
2103 | result = NULL; | ||
2118 | 2104 | ||
2119 | put_task_struct(task); | ||
2120 | return NULL; | ||
2121 | out_drop_task: | 2105 | out_drop_task: |
2122 | put_task_struct(task); | 2106 | put_task_struct(task); |
2123 | out: | 2107 | out: |
2124 | return ERR_PTR(-ENOENT); | 2108 | put_task_struct(leader); |
2109 | out_no_task: | ||
2110 | return result; | ||
2125 | } | 2111 | } |
2126 | 2112 | ||
2127 | #define PROC_NUMBUF 10 | ||
2128 | #define PROC_MAXPIDS 20 | ||
2129 | |||
2130 | /* | 2113 | /* |
2131 | * Get a few tgid's to return for filldir - we need to hold the | 2114 | * Find the first tgid to return to user space. |
2132 | * tasklist lock while doing this, and we must release it before | 2115 | * |
2133 | * we actually do the filldir itself, so we use a temp buffer.. | 2116 | * Usually this is just whatever follows &init_task, but if the users |
2117 | * buffer was too small to hold the full list or there was a seek into | ||
2118 | * the middle of the directory we have more work to do. | ||
2119 | * | ||
2120 | * In the case of a short read we start with find_task_by_pid. | ||
2121 | * | ||
2122 | * In the case of a seek we start with &init_task and walk nr | ||
2123 | * threads past it. | ||
2134 | */ | 2124 | */ |
2135 | static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) | 2125 | static struct task_struct *first_tgid(int tgid, unsigned int nr) |
2136 | { | 2126 | { |
2137 | struct task_struct *p; | 2127 | struct task_struct *pos; |
2138 | int nr_tgids = 0; | 2128 | rcu_read_lock(); |
2139 | 2129 | if (tgid && nr) { | |
2140 | index--; | 2130 | pos = find_task_by_pid(tgid); |
2141 | read_lock(&tasklist_lock); | 2131 | if (pos && thread_group_leader(pos)) |
2142 | p = NULL; | 2132 | goto found; |
2143 | if (version) { | ||
2144 | p = find_task_by_pid(version); | ||
2145 | if (p && !thread_group_leader(p)) | ||
2146 | p = NULL; | ||
2147 | } | 2133 | } |
2134 | /* If nr exceeds the number of processes get out quickly */ | ||
2135 | pos = NULL; | ||
2136 | if (nr && nr >= nr_processes()) | ||
2137 | goto done; | ||
2148 | 2138 | ||
2149 | if (p) | 2139 | /* If we haven't found our starting place yet start with |
2150 | index = 0; | 2140 | * the init_task and walk nr tasks forward. |
2151 | else | 2141 | */ |
2152 | p = next_task(&init_task); | 2142 | for (pos = next_task(&init_task); nr > 0; --nr) { |
2153 | 2143 | pos = next_task(pos); | |
2154 | for ( ; p != &init_task; p = next_task(p)) { | 2144 | if (pos == &init_task) { |
2155 | int tgid = p->pid; | 2145 | pos = NULL; |
2156 | if (!pid_alive(p)) | 2146 | goto done; |
2157 | continue; | 2147 | } |
2158 | if (--index >= 0) | ||
2159 | continue; | ||
2160 | tgids[nr_tgids] = tgid; | ||
2161 | nr_tgids++; | ||
2162 | if (nr_tgids >= PROC_MAXPIDS) | ||
2163 | break; | ||
2164 | } | 2148 | } |
2165 | read_unlock(&tasklist_lock); | 2149 | found: |
2166 | return nr_tgids; | 2150 | get_task_struct(pos); |
2151 | done: | ||
2152 | rcu_read_unlock(); | ||
2153 | return pos; | ||
2167 | } | 2154 | } |
2168 | 2155 | ||
2169 | /* | 2156 | /* |
2170 | * Get a few tid's to return for filldir - we need to hold the | 2157 | * Find the next task in the task list. |
2171 | * tasklist lock while doing this, and we must release it before | 2158 | * Return NULL if we loop or there is any error. |
2172 | * we actually do the filldir itself, so we use a temp buffer.. | 2159 | * |
2160 | * The reference to the input task_struct is released. | ||
2173 | */ | 2161 | */ |
2174 | static int get_tid_list(int index, unsigned int *tids, struct inode *dir) | 2162 | static struct task_struct *next_tgid(struct task_struct *start) |
2175 | { | 2163 | { |
2176 | struct task_struct *leader_task = proc_task(dir); | 2164 | struct task_struct *pos; |
2177 | struct task_struct *task = leader_task; | 2165 | rcu_read_lock(); |
2178 | int nr_tids = 0; | 2166 | pos = start; |
2179 | 2167 | if (pid_alive(start)) | |
2180 | index -= 2; | 2168 | pos = next_task(start); |
2181 | read_lock(&tasklist_lock); | 2169 | if (pid_alive(pos) && (pos != &init_task)) { |
2182 | /* | 2170 | get_task_struct(pos); |
2183 | * The starting point task (leader_task) might be an already | 2171 | goto done; |
2184 | * unlinked task, which cannot be used to access the task-list | 2172 | } |
2185 | * via next_thread(). | 2173 | pos = NULL; |
2186 | */ | 2174 | done: |
2187 | if (pid_alive(task)) do { | 2175 | rcu_read_unlock(); |
2188 | int tid = task->pid; | 2176 | put_task_struct(start); |
2189 | 2177 | return pos; | |
2190 | if (--index >= 0) | ||
2191 | continue; | ||
2192 | if (tids != NULL) | ||
2193 | tids[nr_tids] = tid; | ||
2194 | nr_tids++; | ||
2195 | if (nr_tids >= PROC_MAXPIDS) | ||
2196 | break; | ||
2197 | } while ((task = next_thread(task)) != leader_task); | ||
2198 | read_unlock(&tasklist_lock); | ||
2199 | return nr_tids; | ||
2200 | } | 2178 | } |
2201 | 2179 | ||
2202 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 2180 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
2203 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2181 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) |
2204 | { | 2182 | { |
2205 | unsigned int tgid_array[PROC_MAXPIDS]; | ||
2206 | char buf[PROC_NUMBUF]; | 2183 | char buf[PROC_NUMBUF]; |
2207 | unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; | 2184 | unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; |
2208 | unsigned int nr_tgids, i; | 2185 | struct task_struct *task; |
2209 | int next_tgid; | 2186 | int tgid; |
2210 | 2187 | ||
2211 | if (!nr) { | 2188 | if (!nr) { |
2212 | ino_t ino = fake_ino(0,PROC_TGID_INO); | 2189 | ino_t ino = fake_ino(0,PROC_TGID_INO); |
@@ -2215,63 +2192,116 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
2215 | filp->f_pos++; | 2192 | filp->f_pos++; |
2216 | nr++; | 2193 | nr++; |
2217 | } | 2194 | } |
2195 | nr -= 1; | ||
2218 | 2196 | ||
2219 | /* f_version caches the tgid value that the last readdir call couldn't | 2197 | /* f_version caches the tgid value that the last readdir call couldn't |
2220 | * return. lseek aka telldir automagically resets f_version to 0. | 2198 | * return. lseek aka telldir automagically resets f_version to 0. |
2221 | */ | 2199 | */ |
2222 | next_tgid = filp->f_version; | 2200 | tgid = filp->f_version; |
2223 | filp->f_version = 0; | 2201 | filp->f_version = 0; |
2224 | for (;;) { | 2202 | for (task = first_tgid(tgid, nr); |
2225 | nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); | 2203 | task; |
2226 | if (!nr_tgids) { | 2204 | task = next_tgid(task), filp->f_pos++) { |
2227 | /* no more entries ! */ | 2205 | int len; |
2206 | ino_t ino; | ||
2207 | tgid = task->pid; | ||
2208 | len = snprintf(buf, sizeof(buf), "%d", tgid); | ||
2209 | ino = fake_ino(tgid, PROC_TGID_INO); | ||
2210 | if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) { | ||
2211 | /* returning this tgid failed, save it as the first | ||
2212 | * pid for the next readir call */ | ||
2213 | filp->f_version = tgid; | ||
2214 | put_task_struct(task); | ||
2228 | break; | 2215 | break; |
2229 | } | 2216 | } |
2230 | next_tgid = 0; | 2217 | } |
2218 | return 0; | ||
2219 | } | ||
2231 | 2220 | ||
2232 | /* do not use the last found pid, reserve it for next_tgid */ | 2221 | /* |
2233 | if (nr_tgids == PROC_MAXPIDS) { | 2222 | * Find the first tid of a thread group to return to user space. |
2234 | nr_tgids--; | 2223 | * |
2235 | next_tgid = tgid_array[nr_tgids]; | 2224 | * Usually this is just the thread group leader, but if the users |
2236 | } | 2225 | * buffer was too small or there was a seek into the middle of the |
2226 | * directory we have more work todo. | ||
2227 | * | ||
2228 | * In the case of a short read we start with find_task_by_pid. | ||
2229 | * | ||
2230 | * In the case of a seek we start with the leader and walk nr | ||
2231 | * threads past it. | ||
2232 | */ | ||
2233 | static struct task_struct *first_tid(struct task_struct *leader, | ||
2234 | int tid, int nr) | ||
2235 | { | ||
2236 | struct task_struct *pos; | ||
2237 | 2237 | ||
2238 | for (i=0;i<nr_tgids;i++) { | 2238 | rcu_read_lock(); |
2239 | int tgid = tgid_array[i]; | 2239 | /* Attempt to start with the pid of a thread */ |
2240 | ino_t ino = fake_ino(tgid,PROC_TGID_INO); | 2240 | if (tid && (nr > 0)) { |
2241 | unsigned long j = PROC_NUMBUF; | 2241 | pos = find_task_by_pid(tid); |
2242 | if (pos && (pos->group_leader == leader)) | ||
2243 | goto found; | ||
2244 | } | ||
2242 | 2245 | ||
2243 | do | 2246 | /* If nr exceeds the number of threads there is nothing todo */ |
2244 | buf[--j] = '0' + (tgid % 10); | 2247 | pos = NULL; |
2245 | while ((tgid /= 10) != 0); | 2248 | if (nr && nr >= get_nr_threads(leader)) |
2249 | goto out; | ||
2246 | 2250 | ||
2247 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { | 2251 | /* If we haven't found our starting place yet start |
2248 | /* returning this tgid failed, save it as the first | 2252 | * with the leader and walk nr threads forward. |
2249 | * pid for the next readir call */ | 2253 | */ |
2250 | filp->f_version = tgid_array[i]; | 2254 | for (pos = leader; nr > 0; --nr) { |
2251 | goto out; | 2255 | pos = next_thread(pos); |
2252 | } | 2256 | if (pos == leader) { |
2253 | filp->f_pos++; | 2257 | pos = NULL; |
2254 | nr++; | 2258 | goto out; |
2255 | } | 2259 | } |
2256 | } | 2260 | } |
2261 | found: | ||
2262 | get_task_struct(pos); | ||
2257 | out: | 2263 | out: |
2258 | return 0; | 2264 | rcu_read_unlock(); |
2265 | return pos; | ||
2266 | } | ||
2267 | |||
2268 | /* | ||
2269 | * Find the next thread in the thread list. | ||
2270 | * Return NULL if there is an error or no next thread. | ||
2271 | * | ||
2272 | * The reference to the input task_struct is released. | ||
2273 | */ | ||
2274 | static struct task_struct *next_tid(struct task_struct *start) | ||
2275 | { | ||
2276 | struct task_struct *pos = NULL; | ||
2277 | rcu_read_lock(); | ||
2278 | if (pid_alive(start)) { | ||
2279 | pos = next_thread(start); | ||
2280 | if (thread_group_leader(pos)) | ||
2281 | pos = NULL; | ||
2282 | else | ||
2283 | get_task_struct(pos); | ||
2284 | } | ||
2285 | rcu_read_unlock(); | ||
2286 | put_task_struct(start); | ||
2287 | return pos; | ||
2259 | } | 2288 | } |
2260 | 2289 | ||
2261 | /* for the /proc/TGID/task/ directories */ | 2290 | /* for the /proc/TGID/task/ directories */ |
2262 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2291 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) |
2263 | { | 2292 | { |
2264 | unsigned int tid_array[PROC_MAXPIDS]; | ||
2265 | char buf[PROC_NUMBUF]; | 2293 | char buf[PROC_NUMBUF]; |
2266 | unsigned int nr_tids, i; | ||
2267 | struct dentry *dentry = filp->f_dentry; | 2294 | struct dentry *dentry = filp->f_dentry; |
2268 | struct inode *inode = dentry->d_inode; | 2295 | struct inode *inode = dentry->d_inode; |
2296 | struct task_struct *leader = get_proc_task(inode); | ||
2297 | struct task_struct *task; | ||
2269 | int retval = -ENOENT; | 2298 | int retval = -ENOENT; |
2270 | ino_t ino; | 2299 | ino_t ino; |
2300 | int tid; | ||
2271 | unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ | 2301 | unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ |
2272 | 2302 | ||
2273 | if (!pid_alive(proc_task(inode))) | 2303 | if (!leader) |
2274 | goto out; | 2304 | goto out_no_task; |
2275 | retval = 0; | 2305 | retval = 0; |
2276 | 2306 | ||
2277 | switch (pos) { | 2307 | switch (pos) { |
@@ -2289,24 +2319,45 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi | |||
2289 | /* fall through */ | 2319 | /* fall through */ |
2290 | } | 2320 | } |
2291 | 2321 | ||
2292 | nr_tids = get_tid_list(pos, tid_array, inode); | 2322 | /* f_version caches the tgid value that the last readdir call couldn't |
2293 | inode->i_nlink = pos + nr_tids; | 2323 | * return. lseek aka telldir automagically resets f_version to 0. |
2294 | 2324 | */ | |
2295 | for (i = 0; i < nr_tids; i++) { | 2325 | tid = filp->f_version; |
2296 | unsigned long j = PROC_NUMBUF; | 2326 | filp->f_version = 0; |
2297 | int tid = tid_array[i]; | 2327 | for (task = first_tid(leader, tid, pos - 2); |
2298 | 2328 | task; | |
2299 | ino = fake_ino(tid,PROC_TID_INO); | 2329 | task = next_tid(task), pos++) { |
2300 | 2330 | int len; | |
2301 | do | 2331 | tid = task->pid; |
2302 | buf[--j] = '0' + (tid % 10); | 2332 | len = snprintf(buf, sizeof(buf), "%d", tid); |
2303 | while ((tid /= 10) != 0); | 2333 | ino = fake_ino(tid, PROC_TID_INO); |
2304 | 2334 | if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) { | |
2305 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) | 2335 | /* returning this tgid failed, save it as the first |
2336 | * pid for the next readir call */ | ||
2337 | filp->f_version = tid; | ||
2338 | put_task_struct(task); | ||
2306 | break; | 2339 | break; |
2307 | pos++; | 2340 | } |
2308 | } | 2341 | } |
2309 | out: | 2342 | out: |
2310 | filp->f_pos = pos; | 2343 | filp->f_pos = pos; |
2344 | put_task_struct(leader); | ||
2345 | out_no_task: | ||
2311 | return retval; | 2346 | return retval; |
2312 | } | 2347 | } |
2348 | |||
2349 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | ||
2350 | { | ||
2351 | struct inode *inode = dentry->d_inode; | ||
2352 | struct task_struct *p = get_proc_task(inode); | ||
2353 | generic_fillattr(inode, stat); | ||
2354 | |||
2355 | if (p) { | ||
2356 | rcu_read_lock(); | ||
2357 | stat->nlink += get_nr_threads(p); | ||
2358 | rcu_read_unlock(); | ||
2359 | put_task_struct(p); | ||
2360 | } | ||
2361 | |||
2362 | return 0; | ||
2363 | } | ||