diff options
author | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2006-06-28 14:06:39 -0400 |
---|---|---|
committer | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2006-06-28 14:06:39 -0400 |
commit | f28e71617ddaf2483e3e5c5237103484a303743f (patch) | |
tree | 67627d2d8ddbf6a4449371e9261d796c013b1fa1 /fs/proc/base.c | |
parent | dc6a78f1af10d28fb8c395034ae1e099b85c05b0 (diff) | |
parent | a39727f212426b9d5f9267b3318a2afaf9922d3b (diff) |
Merge ../linux-2.6/
Conflicts:
drivers/scsi/aacraid/comminit.c
Fixed up by removing the now renamed CONFIG_IOMMU option from
aacraid
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Diffstat (limited to 'fs/proc/base.c')
-rw-r--r-- | fs/proc/base.c | 1086 |
1 files changed, 568 insertions, 518 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 6afff725a8c9..6ba7785319de 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -74,6 +74,16 @@ | |||
74 | #include <linux/poll.h> | 74 | #include <linux/poll.h> |
75 | #include "internal.h" | 75 | #include "internal.h" |
76 | 76 | ||
77 | /* NOTE: | ||
78 | * Implementing inode permission operations in /proc is almost | ||
79 | * certainly an error. Permission checks need to happen during | ||
80 | * each system call not at open time. The reason is that most of | ||
81 | * what we wish to check for permissions in /proc varies at runtime. | ||
82 | * | ||
83 | * The classic example of a problem is opening file descriptors | ||
84 | * in /proc for a task before it execs a suid executable. | ||
85 | */ | ||
86 | |||
77 | /* | 87 | /* |
78 | * For hysterical raisins we keep the same inumbers as in the old procfs. | 88 | * For hysterical raisins we keep the same inumbers as in the old procfs. |
79 | * Feel free to change the macro below - just keep the range distinct from | 89 | * Feel free to change the macro below - just keep the range distinct from |
@@ -121,6 +131,8 @@ enum pid_directory_inos { | |||
121 | PROC_TGID_ATTR_PREV, | 131 | PROC_TGID_ATTR_PREV, |
122 | PROC_TGID_ATTR_EXEC, | 132 | PROC_TGID_ATTR_EXEC, |
123 | PROC_TGID_ATTR_FSCREATE, | 133 | PROC_TGID_ATTR_FSCREATE, |
134 | PROC_TGID_ATTR_KEYCREATE, | ||
135 | PROC_TGID_ATTR_SOCKCREATE, | ||
124 | #endif | 136 | #endif |
125 | #ifdef CONFIG_AUDITSYSCALL | 137 | #ifdef CONFIG_AUDITSYSCALL |
126 | PROC_TGID_LOGINUID, | 138 | PROC_TGID_LOGINUID, |
@@ -162,6 +174,8 @@ enum pid_directory_inos { | |||
162 | PROC_TID_ATTR_PREV, | 174 | PROC_TID_ATTR_PREV, |
163 | PROC_TID_ATTR_EXEC, | 175 | PROC_TID_ATTR_EXEC, |
164 | PROC_TID_ATTR_FSCREATE, | 176 | PROC_TID_ATTR_FSCREATE, |
177 | PROC_TID_ATTR_KEYCREATE, | ||
178 | PROC_TID_ATTR_SOCKCREATE, | ||
165 | #endif | 179 | #endif |
166 | #ifdef CONFIG_AUDITSYSCALL | 180 | #ifdef CONFIG_AUDITSYSCALL |
167 | PROC_TID_LOGINUID, | 181 | PROC_TID_LOGINUID, |
@@ -173,6 +187,9 @@ enum pid_directory_inos { | |||
173 | PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ | 187 | PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ |
174 | }; | 188 | }; |
175 | 189 | ||
190 | /* Worst case buffer size needed for holding an integer. */ | ||
191 | #define PROC_NUMBUF 10 | ||
192 | |||
176 | struct pid_entry { | 193 | struct pid_entry { |
177 | int type; | 194 | int type; |
178 | int len; | 195 | int len; |
@@ -275,6 +292,8 @@ static struct pid_entry tgid_attr_stuff[] = { | |||
275 | E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), | 292 | E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), |
276 | E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), | 293 | E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), |
277 | E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), | 294 | E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), |
295 | E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
296 | E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
278 | {0,0,NULL,0} | 297 | {0,0,NULL,0} |
279 | }; | 298 | }; |
280 | static struct pid_entry tid_attr_stuff[] = { | 299 | static struct pid_entry tid_attr_stuff[] = { |
@@ -282,6 +301,8 @@ static struct pid_entry tid_attr_stuff[] = { | |||
282 | E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), | 301 | E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), |
283 | E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), | 302 | E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), |
284 | E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), | 303 | E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), |
304 | E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
305 | E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
285 | {0,0,NULL,0} | 306 | {0,0,NULL,0} |
286 | }; | 307 | }; |
287 | #endif | 308 | #endif |
@@ -290,12 +311,15 @@ static struct pid_entry tid_attr_stuff[] = { | |||
290 | 311 | ||
291 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 312 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) |
292 | { | 313 | { |
293 | struct task_struct *task = proc_task(inode); | 314 | struct task_struct *task = get_proc_task(inode); |
294 | struct files_struct *files; | 315 | struct files_struct *files = NULL; |
295 | struct file *file; | 316 | struct file *file; |
296 | int fd = proc_type(inode) - PROC_TID_FD_DIR; | 317 | int fd = proc_fd(inode); |
297 | 318 | ||
298 | files = get_files_struct(task); | 319 | if (task) { |
320 | files = get_files_struct(task); | ||
321 | put_task_struct(task); | ||
322 | } | ||
299 | if (files) { | 323 | if (files) { |
300 | /* | 324 | /* |
301 | * We are not taking a ref to the file structure, so we must | 325 | * We are not taking a ref to the file structure, so we must |
@@ -327,29 +351,33 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) | |||
327 | return fs; | 351 | return fs; |
328 | } | 352 | } |
329 | 353 | ||
330 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 354 | static int get_nr_threads(struct task_struct *tsk) |
331 | { | 355 | { |
332 | struct fs_struct *fs = get_fs_struct(proc_task(inode)); | 356 | /* Must be called with the rcu_read_lock held */ |
333 | int result = -ENOENT; | 357 | unsigned long flags; |
334 | if (fs) { | 358 | int count = 0; |
335 | read_lock(&fs->lock); | 359 | |
336 | *mnt = mntget(fs->pwdmnt); | 360 | if (lock_task_sighand(tsk, &flags)) { |
337 | *dentry = dget(fs->pwd); | 361 | count = atomic_read(&tsk->signal->count); |
338 | read_unlock(&fs->lock); | 362 | unlock_task_sighand(tsk, &flags); |
339 | result = 0; | ||
340 | put_fs_struct(fs); | ||
341 | } | 363 | } |
342 | return result; | 364 | return count; |
343 | } | 365 | } |
344 | 366 | ||
345 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 367 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) |
346 | { | 368 | { |
347 | struct fs_struct *fs = get_fs_struct(proc_task(inode)); | 369 | struct task_struct *task = get_proc_task(inode); |
370 | struct fs_struct *fs = NULL; | ||
348 | int result = -ENOENT; | 371 | int result = -ENOENT; |
372 | |||
373 | if (task) { | ||
374 | fs = get_fs_struct(task); | ||
375 | put_task_struct(task); | ||
376 | } | ||
349 | if (fs) { | 377 | if (fs) { |
350 | read_lock(&fs->lock); | 378 | read_lock(&fs->lock); |
351 | *mnt = mntget(fs->rootmnt); | 379 | *mnt = mntget(fs->pwdmnt); |
352 | *dentry = dget(fs->root); | 380 | *dentry = dget(fs->pwd); |
353 | read_unlock(&fs->lock); | 381 | read_unlock(&fs->lock); |
354 | result = 0; | 382 | result = 0; |
355 | put_fs_struct(fs); | 383 | put_fs_struct(fs); |
@@ -357,42 +385,16 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf | |||
357 | return result; | 385 | return result; |
358 | } | 386 | } |
359 | 387 | ||
360 | 388 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | |
361 | /* Same as proc_root_link, but this addionally tries to get fs from other | ||
362 | * threads in the group */ | ||
363 | static int proc_task_root_link(struct inode *inode, struct dentry **dentry, | ||
364 | struct vfsmount **mnt) | ||
365 | { | 389 | { |
366 | struct fs_struct *fs; | 390 | struct task_struct *task = get_proc_task(inode); |
391 | struct fs_struct *fs = NULL; | ||
367 | int result = -ENOENT; | 392 | int result = -ENOENT; |
368 | struct task_struct *leader = proc_task(inode); | ||
369 | 393 | ||
370 | task_lock(leader); | 394 | if (task) { |
371 | fs = leader->fs; | 395 | fs = get_fs_struct(task); |
372 | if (fs) { | 396 | put_task_struct(task); |
373 | atomic_inc(&fs->count); | ||
374 | task_unlock(leader); | ||
375 | } else { | ||
376 | /* Try to get fs from other threads */ | ||
377 | task_unlock(leader); | ||
378 | read_lock(&tasklist_lock); | ||
379 | if (pid_alive(leader)) { | ||
380 | struct task_struct *task = leader; | ||
381 | |||
382 | while ((task = next_thread(task)) != leader) { | ||
383 | task_lock(task); | ||
384 | fs = task->fs; | ||
385 | if (fs) { | ||
386 | atomic_inc(&fs->count); | ||
387 | task_unlock(task); | ||
388 | break; | ||
389 | } | ||
390 | task_unlock(task); | ||
391 | } | ||
392 | } | ||
393 | read_unlock(&tasklist_lock); | ||
394 | } | 397 | } |
395 | |||
396 | if (fs) { | 398 | if (fs) { |
397 | read_lock(&fs->lock); | 399 | read_lock(&fs->lock); |
398 | *mnt = mntget(fs->rootmnt); | 400 | *mnt = mntget(fs->rootmnt); |
@@ -404,7 +406,6 @@ static int proc_task_root_link(struct inode *inode, struct dentry **dentry, | |||
404 | return result; | 406 | return result; |
405 | } | 407 | } |
406 | 408 | ||
407 | |||
408 | #define MAY_PTRACE(task) \ | 409 | #define MAY_PTRACE(task) \ |
409 | (task == current || \ | 410 | (task == current || \ |
410 | (task->parent == current && \ | 411 | (task->parent == current && \ |
@@ -535,142 +536,22 @@ static int proc_oom_score(struct task_struct *task, char *buffer) | |||
535 | /************************************************************************/ | 536 | /************************************************************************/ |
536 | 537 | ||
537 | /* permission checks */ | 538 | /* permission checks */ |
538 | 539 | static int proc_fd_access_allowed(struct inode *inode) | |
539 | /* If the process being read is separated by chroot from the reading process, | ||
540 | * don't let the reader access the threads. | ||
541 | * | ||
542 | * note: this does dput(root) and mntput(vfsmnt) on exit. | ||
543 | */ | ||
544 | static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) | ||
545 | { | ||
546 | struct dentry *de, *base; | ||
547 | struct vfsmount *our_vfsmnt, *mnt; | ||
548 | int res = 0; | ||
549 | |||
550 | read_lock(¤t->fs->lock); | ||
551 | our_vfsmnt = mntget(current->fs->rootmnt); | ||
552 | base = dget(current->fs->root); | ||
553 | read_unlock(¤t->fs->lock); | ||
554 | |||
555 | spin_lock(&vfsmount_lock); | ||
556 | de = root; | ||
557 | mnt = vfsmnt; | ||
558 | |||
559 | while (mnt != our_vfsmnt) { | ||
560 | if (mnt == mnt->mnt_parent) | ||
561 | goto out; | ||
562 | de = mnt->mnt_mountpoint; | ||
563 | mnt = mnt->mnt_parent; | ||
564 | } | ||
565 | |||
566 | if (!is_subdir(de, base)) | ||
567 | goto out; | ||
568 | spin_unlock(&vfsmount_lock); | ||
569 | |||
570 | exit: | ||
571 | dput(base); | ||
572 | mntput(our_vfsmnt); | ||
573 | dput(root); | ||
574 | mntput(vfsmnt); | ||
575 | return res; | ||
576 | out: | ||
577 | spin_unlock(&vfsmount_lock); | ||
578 | res = -EACCES; | ||
579 | goto exit; | ||
580 | } | ||
581 | |||
582 | static int proc_check_root(struct inode *inode) | ||
583 | { | ||
584 | struct dentry *root; | ||
585 | struct vfsmount *vfsmnt; | ||
586 | |||
587 | if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ | ||
588 | return -ENOENT; | ||
589 | return proc_check_chroot(root, vfsmnt); | ||
590 | } | ||
591 | |||
592 | static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
593 | { | ||
594 | if (generic_permission(inode, mask, NULL) != 0) | ||
595 | return -EACCES; | ||
596 | return proc_check_root(inode); | ||
597 | } | ||
598 | |||
599 | static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
600 | { | ||
601 | struct dentry *root; | ||
602 | struct vfsmount *vfsmnt; | ||
603 | |||
604 | if (generic_permission(inode, mask, NULL) != 0) | ||
605 | return -EACCES; | ||
606 | |||
607 | if (proc_task_root_link(inode, &root, &vfsmnt)) | ||
608 | return -ENOENT; | ||
609 | |||
610 | return proc_check_chroot(root, vfsmnt); | ||
611 | } | ||
612 | |||
613 | extern struct seq_operations proc_pid_maps_op; | ||
614 | static int maps_open(struct inode *inode, struct file *file) | ||
615 | { | ||
616 | struct task_struct *task = proc_task(inode); | ||
617 | int ret = seq_open(file, &proc_pid_maps_op); | ||
618 | if (!ret) { | ||
619 | struct seq_file *m = file->private_data; | ||
620 | m->private = task; | ||
621 | } | ||
622 | return ret; | ||
623 | } | ||
624 | |||
625 | static struct file_operations proc_maps_operations = { | ||
626 | .open = maps_open, | ||
627 | .read = seq_read, | ||
628 | .llseek = seq_lseek, | ||
629 | .release = seq_release, | ||
630 | }; | ||
631 | |||
632 | #ifdef CONFIG_NUMA | ||
633 | extern struct seq_operations proc_pid_numa_maps_op; | ||
634 | static int numa_maps_open(struct inode *inode, struct file *file) | ||
635 | { | ||
636 | struct task_struct *task = proc_task(inode); | ||
637 | int ret = seq_open(file, &proc_pid_numa_maps_op); | ||
638 | if (!ret) { | ||
639 | struct seq_file *m = file->private_data; | ||
640 | m->private = task; | ||
641 | } | ||
642 | return ret; | ||
643 | } | ||
644 | |||
645 | static struct file_operations proc_numa_maps_operations = { | ||
646 | .open = numa_maps_open, | ||
647 | .read = seq_read, | ||
648 | .llseek = seq_lseek, | ||
649 | .release = seq_release, | ||
650 | }; | ||
651 | #endif | ||
652 | |||
653 | #ifdef CONFIG_MMU | ||
654 | extern struct seq_operations proc_pid_smaps_op; | ||
655 | static int smaps_open(struct inode *inode, struct file *file) | ||
656 | { | 540 | { |
657 | struct task_struct *task = proc_task(inode); | 541 | struct task_struct *task; |
658 | int ret = seq_open(file, &proc_pid_smaps_op); | 542 | int allowed = 0; |
659 | if (!ret) { | 543 | /* Allow access to a task's file descriptors if it is us or we |
660 | struct seq_file *m = file->private_data; | 544 | * may use ptrace attach to the process and find out that |
661 | m->private = task; | 545 | * information. |
546 | */ | ||
547 | task = get_proc_task(inode); | ||
548 | if (task) { | ||
549 | allowed = ptrace_may_attach(task); | ||
550 | put_task_struct(task); | ||
662 | } | 551 | } |
663 | return ret; | 552 | return allowed; |
664 | } | 553 | } |
665 | 554 | ||
666 | static struct file_operations proc_smaps_operations = { | ||
667 | .open = smaps_open, | ||
668 | .read = seq_read, | ||
669 | .llseek = seq_lseek, | ||
670 | .release = seq_release, | ||
671 | }; | ||
672 | #endif | ||
673 | |||
674 | extern struct seq_operations mounts_op; | 555 | extern struct seq_operations mounts_op; |
675 | struct proc_mounts { | 556 | struct proc_mounts { |
676 | struct seq_file m; | 557 | struct seq_file m; |
@@ -679,16 +560,19 @@ struct proc_mounts { | |||
679 | 560 | ||
680 | static int mounts_open(struct inode *inode, struct file *file) | 561 | static int mounts_open(struct inode *inode, struct file *file) |
681 | { | 562 | { |
682 | struct task_struct *task = proc_task(inode); | 563 | struct task_struct *task = get_proc_task(inode); |
683 | struct namespace *namespace; | 564 | struct namespace *namespace = NULL; |
684 | struct proc_mounts *p; | 565 | struct proc_mounts *p; |
685 | int ret = -EINVAL; | 566 | int ret = -EINVAL; |
686 | 567 | ||
687 | task_lock(task); | 568 | if (task) { |
688 | namespace = task->namespace; | 569 | task_lock(task); |
689 | if (namespace) | 570 | namespace = task->namespace; |
690 | get_namespace(namespace); | 571 | if (namespace) |
691 | task_unlock(task); | 572 | get_namespace(namespace); |
573 | task_unlock(task); | ||
574 | put_task_struct(task); | ||
575 | } | ||
692 | 576 | ||
693 | if (namespace) { | 577 | if (namespace) { |
694 | ret = -ENOMEM; | 578 | ret = -ENOMEM; |
@@ -745,17 +629,21 @@ static struct file_operations proc_mounts_operations = { | |||
745 | extern struct seq_operations mountstats_op; | 629 | extern struct seq_operations mountstats_op; |
746 | static int mountstats_open(struct inode *inode, struct file *file) | 630 | static int mountstats_open(struct inode *inode, struct file *file) |
747 | { | 631 | { |
748 | struct task_struct *task = proc_task(inode); | ||
749 | int ret = seq_open(file, &mountstats_op); | 632 | int ret = seq_open(file, &mountstats_op); |
750 | 633 | ||
751 | if (!ret) { | 634 | if (!ret) { |
752 | struct seq_file *m = file->private_data; | 635 | struct seq_file *m = file->private_data; |
753 | struct namespace *namespace; | 636 | struct namespace *namespace = NULL; |
754 | task_lock(task); | 637 | struct task_struct *task = get_proc_task(inode); |
755 | namespace = task->namespace; | 638 | |
756 | if (namespace) | 639 | if (task) { |
757 | get_namespace(namespace); | 640 | task_lock(task); |
758 | task_unlock(task); | 641 | namespace = task->namespace; |
642 | if (namespace) | ||
643 | get_namespace(namespace); | ||
644 | task_unlock(task); | ||
645 | put_task_struct(task); | ||
646 | } | ||
759 | 647 | ||
760 | if (namespace) | 648 | if (namespace) |
761 | m->private = namespace; | 649 | m->private = namespace; |
@@ -782,18 +670,27 @@ static ssize_t proc_info_read(struct file * file, char __user * buf, | |||
782 | struct inode * inode = file->f_dentry->d_inode; | 670 | struct inode * inode = file->f_dentry->d_inode; |
783 | unsigned long page; | 671 | unsigned long page; |
784 | ssize_t length; | 672 | ssize_t length; |
785 | struct task_struct *task = proc_task(inode); | 673 | struct task_struct *task = get_proc_task(inode); |
674 | |||
675 | length = -ESRCH; | ||
676 | if (!task) | ||
677 | goto out_no_task; | ||
786 | 678 | ||
787 | if (count > PROC_BLOCK_SIZE) | 679 | if (count > PROC_BLOCK_SIZE) |
788 | count = PROC_BLOCK_SIZE; | 680 | count = PROC_BLOCK_SIZE; |
681 | |||
682 | length = -ENOMEM; | ||
789 | if (!(page = __get_free_page(GFP_KERNEL))) | 683 | if (!(page = __get_free_page(GFP_KERNEL))) |
790 | return -ENOMEM; | 684 | goto out; |
791 | 685 | ||
792 | length = PROC_I(inode)->op.proc_read(task, (char*)page); | 686 | length = PROC_I(inode)->op.proc_read(task, (char*)page); |
793 | 687 | ||
794 | if (length >= 0) | 688 | if (length >= 0) |
795 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | 689 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); |
796 | free_page(page); | 690 | free_page(page); |
691 | out: | ||
692 | put_task_struct(task); | ||
693 | out_no_task: | ||
797 | return length; | 694 | return length; |
798 | } | 695 | } |
799 | 696 | ||
@@ -810,12 +707,15 @@ static int mem_open(struct inode* inode, struct file* file) | |||
810 | static ssize_t mem_read(struct file * file, char __user * buf, | 707 | static ssize_t mem_read(struct file * file, char __user * buf, |
811 | size_t count, loff_t *ppos) | 708 | size_t count, loff_t *ppos) |
812 | { | 709 | { |
813 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 710 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
814 | char *page; | 711 | char *page; |
815 | unsigned long src = *ppos; | 712 | unsigned long src = *ppos; |
816 | int ret = -ESRCH; | 713 | int ret = -ESRCH; |
817 | struct mm_struct *mm; | 714 | struct mm_struct *mm; |
818 | 715 | ||
716 | if (!task) | ||
717 | goto out_no_task; | ||
718 | |||
819 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) | 719 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) |
820 | goto out; | 720 | goto out; |
821 | 721 | ||
@@ -865,6 +765,8 @@ out_put: | |||
865 | out_free: | 765 | out_free: |
866 | free_page((unsigned long) page); | 766 | free_page((unsigned long) page); |
867 | out: | 767 | out: |
768 | put_task_struct(task); | ||
769 | out_no_task: | ||
868 | return ret; | 770 | return ret; |
869 | } | 771 | } |
870 | 772 | ||
@@ -877,15 +779,20 @@ static ssize_t mem_write(struct file * file, const char * buf, | |||
877 | { | 779 | { |
878 | int copied = 0; | 780 | int copied = 0; |
879 | char *page; | 781 | char *page; |
880 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 782 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
881 | unsigned long dst = *ppos; | 783 | unsigned long dst = *ppos; |
882 | 784 | ||
785 | copied = -ESRCH; | ||
786 | if (!task) | ||
787 | goto out_no_task; | ||
788 | |||
883 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) | 789 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) |
884 | return -ESRCH; | 790 | goto out; |
885 | 791 | ||
792 | copied = -ENOMEM; | ||
886 | page = (char *)__get_free_page(GFP_USER); | 793 | page = (char *)__get_free_page(GFP_USER); |
887 | if (!page) | 794 | if (!page) |
888 | return -ENOMEM; | 795 | goto out; |
889 | 796 | ||
890 | while (count > 0) { | 797 | while (count > 0) { |
891 | int this_len, retval; | 798 | int this_len, retval; |
@@ -908,6 +815,9 @@ static ssize_t mem_write(struct file * file, const char * buf, | |||
908 | } | 815 | } |
909 | *ppos = dst; | 816 | *ppos = dst; |
910 | free_page((unsigned long) page); | 817 | free_page((unsigned long) page); |
818 | out: | ||
819 | put_task_struct(task); | ||
820 | out_no_task: | ||
911 | return copied; | 821 | return copied; |
912 | } | 822 | } |
913 | #endif | 823 | #endif |
@@ -938,13 +848,18 @@ static struct file_operations proc_mem_operations = { | |||
938 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, | 848 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, |
939 | size_t count, loff_t *ppos) | 849 | size_t count, loff_t *ppos) |
940 | { | 850 | { |
941 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 851 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
942 | char buffer[8]; | 852 | char buffer[PROC_NUMBUF]; |
943 | size_t len; | 853 | size_t len; |
944 | int oom_adjust = task->oomkilladj; | 854 | int oom_adjust; |
945 | loff_t __ppos = *ppos; | 855 | loff_t __ppos = *ppos; |
946 | 856 | ||
947 | len = sprintf(buffer, "%i\n", oom_adjust); | 857 | if (!task) |
858 | return -ESRCH; | ||
859 | oom_adjust = task->oomkilladj; | ||
860 | put_task_struct(task); | ||
861 | |||
862 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | ||
948 | if (__ppos >= len) | 863 | if (__ppos >= len) |
949 | return 0; | 864 | return 0; |
950 | if (count > len-__ppos) | 865 | if (count > len-__ppos) |
@@ -958,15 +873,15 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, | |||
958 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | 873 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, |
959 | size_t count, loff_t *ppos) | 874 | size_t count, loff_t *ppos) |
960 | { | 875 | { |
961 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 876 | struct task_struct *task; |
962 | char buffer[8], *end; | 877 | char buffer[PROC_NUMBUF], *end; |
963 | int oom_adjust; | 878 | int oom_adjust; |
964 | 879 | ||
965 | if (!capable(CAP_SYS_RESOURCE)) | 880 | if (!capable(CAP_SYS_RESOURCE)) |
966 | return -EPERM; | 881 | return -EPERM; |
967 | memset(buffer, 0, 8); | 882 | memset(buffer, 0, sizeof(buffer)); |
968 | if (count > 6) | 883 | if (count > sizeof(buffer) - 1) |
969 | count = 6; | 884 | count = sizeof(buffer) - 1; |
970 | if (copy_from_user(buffer, buf, count)) | 885 | if (copy_from_user(buffer, buf, count)) |
971 | return -EFAULT; | 886 | return -EFAULT; |
972 | oom_adjust = simple_strtol(buffer, &end, 0); | 887 | oom_adjust = simple_strtol(buffer, &end, 0); |
@@ -974,7 +889,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
974 | return -EINVAL; | 889 | return -EINVAL; |
975 | if (*end == '\n') | 890 | if (*end == '\n') |
976 | end++; | 891 | end++; |
892 | task = get_proc_task(file->f_dentry->d_inode); | ||
893 | if (!task) | ||
894 | return -ESRCH; | ||
977 | task->oomkilladj = oom_adjust; | 895 | task->oomkilladj = oom_adjust; |
896 | put_task_struct(task); | ||
978 | if (end - buffer == 0) | 897 | if (end - buffer == 0) |
979 | return -EIO; | 898 | return -EIO; |
980 | return end - buffer; | 899 | return end - buffer; |
@@ -985,22 +904,21 @@ static struct file_operations proc_oom_adjust_operations = { | |||
985 | .write = oom_adjust_write, | 904 | .write = oom_adjust_write, |
986 | }; | 905 | }; |
987 | 906 | ||
988 | static struct inode_operations proc_mem_inode_operations = { | ||
989 | .permission = proc_permission, | ||
990 | }; | ||
991 | |||
992 | #ifdef CONFIG_AUDITSYSCALL | 907 | #ifdef CONFIG_AUDITSYSCALL |
993 | #define TMPBUFLEN 21 | 908 | #define TMPBUFLEN 21 |
994 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | 909 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, |
995 | size_t count, loff_t *ppos) | 910 | size_t count, loff_t *ppos) |
996 | { | 911 | { |
997 | struct inode * inode = file->f_dentry->d_inode; | 912 | struct inode * inode = file->f_dentry->d_inode; |
998 | struct task_struct *task = proc_task(inode); | 913 | struct task_struct *task = get_proc_task(inode); |
999 | ssize_t length; | 914 | ssize_t length; |
1000 | char tmpbuf[TMPBUFLEN]; | 915 | char tmpbuf[TMPBUFLEN]; |
1001 | 916 | ||
917 | if (!task) | ||
918 | return -ESRCH; | ||
1002 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", | 919 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", |
1003 | audit_get_loginuid(task->audit_context)); | 920 | audit_get_loginuid(task->audit_context)); |
921 | put_task_struct(task); | ||
1004 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); | 922 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); |
1005 | } | 923 | } |
1006 | 924 | ||
@@ -1010,13 +928,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1010 | struct inode * inode = file->f_dentry->d_inode; | 928 | struct inode * inode = file->f_dentry->d_inode; |
1011 | char *page, *tmp; | 929 | char *page, *tmp; |
1012 | ssize_t length; | 930 | ssize_t length; |
1013 | struct task_struct *task = proc_task(inode); | ||
1014 | uid_t loginuid; | 931 | uid_t loginuid; |
1015 | 932 | ||
1016 | if (!capable(CAP_AUDIT_CONTROL)) | 933 | if (!capable(CAP_AUDIT_CONTROL)) |
1017 | return -EPERM; | 934 | return -EPERM; |
1018 | 935 | ||
1019 | if (current != task) | 936 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) |
1020 | return -EPERM; | 937 | return -EPERM; |
1021 | 938 | ||
1022 | if (count >= PAGE_SIZE) | 939 | if (count >= PAGE_SIZE) |
@@ -1040,7 +957,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1040 | goto out_free_page; | 957 | goto out_free_page; |
1041 | 958 | ||
1042 | } | 959 | } |
1043 | length = audit_set_loginuid(task, loginuid); | 960 | length = audit_set_loginuid(current, loginuid); |
1044 | if (likely(length == 0)) | 961 | if (likely(length == 0)) |
1045 | length = count; | 962 | length = count; |
1046 | 963 | ||
@@ -1059,13 +976,16 @@ static struct file_operations proc_loginuid_operations = { | |||
1059 | static ssize_t seccomp_read(struct file *file, char __user *buf, | 976 | static ssize_t seccomp_read(struct file *file, char __user *buf, |
1060 | size_t count, loff_t *ppos) | 977 | size_t count, loff_t *ppos) |
1061 | { | 978 | { |
1062 | struct task_struct *tsk = proc_task(file->f_dentry->d_inode); | 979 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); |
1063 | char __buf[20]; | 980 | char __buf[20]; |
1064 | loff_t __ppos = *ppos; | 981 | loff_t __ppos = *ppos; |
1065 | size_t len; | 982 | size_t len; |
1066 | 983 | ||
984 | if (!tsk) | ||
985 | return -ESRCH; | ||
1067 | /* no need to print the trailing zero, so use only len */ | 986 | /* no need to print the trailing zero, so use only len */ |
1068 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); | 987 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); |
988 | put_task_struct(tsk); | ||
1069 | if (__ppos >= len) | 989 | if (__ppos >= len) |
1070 | return 0; | 990 | return 0; |
1071 | if (count > len - __ppos) | 991 | if (count > len - __ppos) |
@@ -1079,29 +999,43 @@ static ssize_t seccomp_read(struct file *file, char __user *buf, | |||
1079 | static ssize_t seccomp_write(struct file *file, const char __user *buf, | 999 | static ssize_t seccomp_write(struct file *file, const char __user *buf, |
1080 | size_t count, loff_t *ppos) | 1000 | size_t count, loff_t *ppos) |
1081 | { | 1001 | { |
1082 | struct task_struct *tsk = proc_task(file->f_dentry->d_inode); | 1002 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); |
1083 | char __buf[20], *end; | 1003 | char __buf[20], *end; |
1084 | unsigned int seccomp_mode; | 1004 | unsigned int seccomp_mode; |
1005 | ssize_t result; | ||
1006 | |||
1007 | result = -ESRCH; | ||
1008 | if (!tsk) | ||
1009 | goto out_no_task; | ||
1085 | 1010 | ||
1086 | /* can set it only once to be even more secure */ | 1011 | /* can set it only once to be even more secure */ |
1012 | result = -EPERM; | ||
1087 | if (unlikely(tsk->seccomp.mode)) | 1013 | if (unlikely(tsk->seccomp.mode)) |
1088 | return -EPERM; | 1014 | goto out; |
1089 | 1015 | ||
1016 | result = -EFAULT; | ||
1090 | memset(__buf, 0, sizeof(__buf)); | 1017 | memset(__buf, 0, sizeof(__buf)); |
1091 | count = min(count, sizeof(__buf) - 1); | 1018 | count = min(count, sizeof(__buf) - 1); |
1092 | if (copy_from_user(__buf, buf, count)) | 1019 | if (copy_from_user(__buf, buf, count)) |
1093 | return -EFAULT; | 1020 | goto out; |
1021 | |||
1094 | seccomp_mode = simple_strtoul(__buf, &end, 0); | 1022 | seccomp_mode = simple_strtoul(__buf, &end, 0); |
1095 | if (*end == '\n') | 1023 | if (*end == '\n') |
1096 | end++; | 1024 | end++; |
1025 | result = -EINVAL; | ||
1097 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | 1026 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { |
1098 | tsk->seccomp.mode = seccomp_mode; | 1027 | tsk->seccomp.mode = seccomp_mode; |
1099 | set_tsk_thread_flag(tsk, TIF_SECCOMP); | 1028 | set_tsk_thread_flag(tsk, TIF_SECCOMP); |
1100 | } else | 1029 | } else |
1101 | return -EINVAL; | 1030 | goto out; |
1031 | result = -EIO; | ||
1102 | if (unlikely(!(end - __buf))) | 1032 | if (unlikely(!(end - __buf))) |
1103 | return -EIO; | 1033 | goto out; |
1104 | return end - __buf; | 1034 | result = end - __buf; |
1035 | out: | ||
1036 | put_task_struct(tsk); | ||
1037 | out_no_task: | ||
1038 | return result; | ||
1105 | } | 1039 | } |
1106 | 1040 | ||
1107 | static struct file_operations proc_seccomp_operations = { | 1041 | static struct file_operations proc_seccomp_operations = { |
@@ -1118,10 +1052,8 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
1118 | /* We don't need a base pointer in the /proc filesystem */ | 1052 | /* We don't need a base pointer in the /proc filesystem */ |
1119 | path_release(nd); | 1053 | path_release(nd); |
1120 | 1054 | ||
1121 | if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) | 1055 | /* Are we allowed to snoop on the tasks file descriptors? */ |
1122 | goto out; | 1056 | if (!proc_fd_access_allowed(inode)) |
1123 | error = proc_check_root(inode); | ||
1124 | if (error) | ||
1125 | goto out; | 1057 | goto out; |
1126 | 1058 | ||
1127 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); | 1059 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); |
@@ -1163,12 +1095,8 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b | |||
1163 | struct dentry *de; | 1095 | struct dentry *de; |
1164 | struct vfsmount *mnt = NULL; | 1096 | struct vfsmount *mnt = NULL; |
1165 | 1097 | ||
1166 | lock_kernel(); | 1098 | /* Are we allowed to snoop on the tasks file descriptors? */ |
1167 | 1099 | if (!proc_fd_access_allowed(inode)) | |
1168 | if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) | ||
1169 | goto out; | ||
1170 | error = proc_check_root(inode); | ||
1171 | if (error) | ||
1172 | goto out; | 1100 | goto out; |
1173 | 1101 | ||
1174 | error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); | 1102 | error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); |
@@ -1179,7 +1107,6 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b | |||
1179 | dput(de); | 1107 | dput(de); |
1180 | mntput(mnt); | 1108 | mntput(mnt); |
1181 | out: | 1109 | out: |
1182 | unlock_kernel(); | ||
1183 | return error; | 1110 | return error; |
1184 | } | 1111 | } |
1185 | 1112 | ||
@@ -1188,21 +1115,20 @@ static struct inode_operations proc_pid_link_inode_operations = { | |||
1188 | .follow_link = proc_pid_follow_link | 1115 | .follow_link = proc_pid_follow_link |
1189 | }; | 1116 | }; |
1190 | 1117 | ||
1191 | #define NUMBUF 10 | ||
1192 | |||
1193 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | 1118 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) |
1194 | { | 1119 | { |
1195 | struct inode *inode = filp->f_dentry->d_inode; | 1120 | struct dentry *dentry = filp->f_dentry; |
1196 | struct task_struct *p = proc_task(inode); | 1121 | struct inode *inode = dentry->d_inode; |
1122 | struct task_struct *p = get_proc_task(inode); | ||
1197 | unsigned int fd, tid, ino; | 1123 | unsigned int fd, tid, ino; |
1198 | int retval; | 1124 | int retval; |
1199 | char buf[NUMBUF]; | 1125 | char buf[PROC_NUMBUF]; |
1200 | struct files_struct * files; | 1126 | struct files_struct * files; |
1201 | struct fdtable *fdt; | 1127 | struct fdtable *fdt; |
1202 | 1128 | ||
1203 | retval = -ENOENT; | 1129 | retval = -ENOENT; |
1204 | if (!pid_alive(p)) | 1130 | if (!p) |
1205 | goto out; | 1131 | goto out_no_task; |
1206 | retval = 0; | 1132 | retval = 0; |
1207 | tid = p->pid; | 1133 | tid = p->pid; |
1208 | 1134 | ||
@@ -1213,7 +1139,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1213 | goto out; | 1139 | goto out; |
1214 | filp->f_pos++; | 1140 | filp->f_pos++; |
1215 | case 1: | 1141 | case 1: |
1216 | ino = fake_ino(tid, PROC_TID_INO); | 1142 | ino = parent_ino(dentry); |
1217 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | 1143 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) |
1218 | goto out; | 1144 | goto out; |
1219 | filp->f_pos++; | 1145 | filp->f_pos++; |
@@ -1232,7 +1158,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1232 | continue; | 1158 | continue; |
1233 | rcu_read_unlock(); | 1159 | rcu_read_unlock(); |
1234 | 1160 | ||
1235 | j = NUMBUF; | 1161 | j = PROC_NUMBUF; |
1236 | i = fd; | 1162 | i = fd; |
1237 | do { | 1163 | do { |
1238 | j--; | 1164 | j--; |
@@ -1241,7 +1167,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1241 | } while (i); | 1167 | } while (i); |
1242 | 1168 | ||
1243 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); | 1169 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); |
1244 | if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { | 1170 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) { |
1245 | rcu_read_lock(); | 1171 | rcu_read_lock(); |
1246 | break; | 1172 | break; |
1247 | } | 1173 | } |
@@ -1251,6 +1177,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
1251 | put_files_struct(files); | 1177 | put_files_struct(files); |
1252 | } | 1178 | } |
1253 | out: | 1179 | out: |
1180 | put_task_struct(p); | ||
1181 | out_no_task: | ||
1254 | return retval; | 1182 | return retval; |
1255 | } | 1183 | } |
1256 | 1184 | ||
@@ -1262,16 +1190,18 @@ static int proc_pident_readdir(struct file *filp, | |||
1262 | int pid; | 1190 | int pid; |
1263 | struct dentry *dentry = filp->f_dentry; | 1191 | struct dentry *dentry = filp->f_dentry; |
1264 | struct inode *inode = dentry->d_inode; | 1192 | struct inode *inode = dentry->d_inode; |
1193 | struct task_struct *task = get_proc_task(inode); | ||
1265 | struct pid_entry *p; | 1194 | struct pid_entry *p; |
1266 | ino_t ino; | 1195 | ino_t ino; |
1267 | int ret; | 1196 | int ret; |
1268 | 1197 | ||
1269 | ret = -ENOENT; | 1198 | ret = -ENOENT; |
1270 | if (!pid_alive(proc_task(inode))) | 1199 | if (!task) |
1271 | goto out; | 1200 | goto out; |
1272 | 1201 | ||
1273 | ret = 0; | 1202 | ret = 0; |
1274 | pid = proc_task(inode)->pid; | 1203 | pid = task->pid; |
1204 | put_task_struct(task); | ||
1275 | i = filp->f_pos; | 1205 | i = filp->f_pos; |
1276 | switch (i) { | 1206 | switch (i) { |
1277 | case 0: | 1207 | case 0: |
@@ -1354,22 +1284,19 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st | |||
1354 | 1284 | ||
1355 | /* Common stuff */ | 1285 | /* Common stuff */ |
1356 | ei = PROC_I(inode); | 1286 | ei = PROC_I(inode); |
1357 | ei->task = NULL; | ||
1358 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 1287 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
1359 | inode->i_ino = fake_ino(task->pid, ino); | 1288 | inode->i_ino = fake_ino(task->pid, ino); |
1360 | 1289 | ||
1361 | if (!pid_alive(task)) | ||
1362 | goto out_unlock; | ||
1363 | |||
1364 | /* | 1290 | /* |
1365 | * grab the reference to task. | 1291 | * grab the reference to task. |
1366 | */ | 1292 | */ |
1367 | get_task_struct(task); | 1293 | ei->pid = get_pid(task->pids[PIDTYPE_PID].pid); |
1368 | ei->task = task; | 1294 | if (!ei->pid) |
1369 | ei->type = ino; | 1295 | goto out_unlock; |
1296 | |||
1370 | inode->i_uid = 0; | 1297 | inode->i_uid = 0; |
1371 | inode->i_gid = 0; | 1298 | inode->i_gid = 0; |
1372 | if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { | 1299 | if (task_dumpable(task)) { |
1373 | inode->i_uid = task->euid; | 1300 | inode->i_uid = task->euid; |
1374 | inode->i_gid = task->egid; | 1301 | inode->i_gid = task->egid; |
1375 | } | 1302 | } |
@@ -1379,7 +1306,6 @@ out: | |||
1379 | return inode; | 1306 | return inode; |
1380 | 1307 | ||
1381 | out_unlock: | 1308 | out_unlock: |
1382 | ei->pde = NULL; | ||
1383 | iput(inode); | 1309 | iput(inode); |
1384 | return NULL; | 1310 | return NULL; |
1385 | } | 1311 | } |
@@ -1393,13 +1319,21 @@ out_unlock: | |||
1393 | * | 1319 | * |
1394 | * Rewrite the inode's ownerships here because the owning task may have | 1320 | * Rewrite the inode's ownerships here because the owning task may have |
1395 | * performed a setuid(), etc. | 1321 | * performed a setuid(), etc. |
1322 | * | ||
1323 | * Before the /proc/pid/status file was created the only way to read | ||
1324 | * the effective uid of a /process was to stat /proc/pid. Reading | ||
1325 | * /proc/pid/status is slow enough that procps and other packages | ||
1326 | * kept stating /proc/pid. To keep the rules in /proc simple I have | ||
1327 | * made this apply to all per process world readable and executable | ||
1328 | * directories. | ||
1396 | */ | 1329 | */ |
1397 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | 1330 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) |
1398 | { | 1331 | { |
1399 | struct inode *inode = dentry->d_inode; | 1332 | struct inode *inode = dentry->d_inode; |
1400 | struct task_struct *task = proc_task(inode); | 1333 | struct task_struct *task = get_proc_task(inode); |
1401 | if (pid_alive(task)) { | 1334 | if (task) { |
1402 | if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { | 1335 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || |
1336 | task_dumpable(task)) { | ||
1403 | inode->i_uid = task->euid; | 1337 | inode->i_uid = task->euid; |
1404 | inode->i_gid = task->egid; | 1338 | inode->i_gid = task->egid; |
1405 | } else { | 1339 | } else { |
@@ -1407,59 +1341,75 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1407 | inode->i_gid = 0; | 1341 | inode->i_gid = 0; |
1408 | } | 1342 | } |
1409 | security_task_to_inode(task, inode); | 1343 | security_task_to_inode(task, inode); |
1344 | put_task_struct(task); | ||
1410 | return 1; | 1345 | return 1; |
1411 | } | 1346 | } |
1412 | d_drop(dentry); | 1347 | d_drop(dentry); |
1413 | return 0; | 1348 | return 0; |
1414 | } | 1349 | } |
1415 | 1350 | ||
1351 | static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | ||
1352 | { | ||
1353 | struct inode *inode = dentry->d_inode; | ||
1354 | struct task_struct *task; | ||
1355 | generic_fillattr(inode, stat); | ||
1356 | |||
1357 | rcu_read_lock(); | ||
1358 | stat->uid = 0; | ||
1359 | stat->gid = 0; | ||
1360 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
1361 | if (task) { | ||
1362 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | ||
1363 | task_dumpable(task)) { | ||
1364 | stat->uid = task->euid; | ||
1365 | stat->gid = task->egid; | ||
1366 | } | ||
1367 | } | ||
1368 | rcu_read_unlock(); | ||
1369 | return 0; | ||
1370 | } | ||
1371 | |||
1416 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | 1372 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) |
1417 | { | 1373 | { |
1418 | struct inode *inode = dentry->d_inode; | 1374 | struct inode *inode = dentry->d_inode; |
1419 | struct task_struct *task = proc_task(inode); | 1375 | struct task_struct *task = get_proc_task(inode); |
1420 | int fd = proc_type(inode) - PROC_TID_FD_DIR; | 1376 | int fd = proc_fd(inode); |
1421 | struct files_struct *files; | 1377 | struct files_struct *files; |
1422 | 1378 | ||
1423 | files = get_files_struct(task); | 1379 | if (task) { |
1424 | if (files) { | 1380 | files = get_files_struct(task); |
1425 | rcu_read_lock(); | 1381 | if (files) { |
1426 | if (fcheck_files(files, fd)) { | 1382 | rcu_read_lock(); |
1383 | if (fcheck_files(files, fd)) { | ||
1384 | rcu_read_unlock(); | ||
1385 | put_files_struct(files); | ||
1386 | if (task_dumpable(task)) { | ||
1387 | inode->i_uid = task->euid; | ||
1388 | inode->i_gid = task->egid; | ||
1389 | } else { | ||
1390 | inode->i_uid = 0; | ||
1391 | inode->i_gid = 0; | ||
1392 | } | ||
1393 | security_task_to_inode(task, inode); | ||
1394 | put_task_struct(task); | ||
1395 | return 1; | ||
1396 | } | ||
1427 | rcu_read_unlock(); | 1397 | rcu_read_unlock(); |
1428 | put_files_struct(files); | 1398 | put_files_struct(files); |
1429 | if (task_dumpable(task)) { | ||
1430 | inode->i_uid = task->euid; | ||
1431 | inode->i_gid = task->egid; | ||
1432 | } else { | ||
1433 | inode->i_uid = 0; | ||
1434 | inode->i_gid = 0; | ||
1435 | } | ||
1436 | security_task_to_inode(task, inode); | ||
1437 | return 1; | ||
1438 | } | 1399 | } |
1439 | rcu_read_unlock(); | 1400 | put_task_struct(task); |
1440 | put_files_struct(files); | ||
1441 | } | 1401 | } |
1442 | d_drop(dentry); | 1402 | d_drop(dentry); |
1443 | return 0; | 1403 | return 0; |
1444 | } | 1404 | } |
1445 | 1405 | ||
1446 | static void pid_base_iput(struct dentry *dentry, struct inode *inode) | ||
1447 | { | ||
1448 | struct task_struct *task = proc_task(inode); | ||
1449 | spin_lock(&task->proc_lock); | ||
1450 | if (task->proc_dentry == dentry) | ||
1451 | task->proc_dentry = NULL; | ||
1452 | spin_unlock(&task->proc_lock); | ||
1453 | iput(inode); | ||
1454 | } | ||
1455 | |||
1456 | static int pid_delete_dentry(struct dentry * dentry) | 1406 | static int pid_delete_dentry(struct dentry * dentry) |
1457 | { | 1407 | { |
1458 | /* Is the task we represent dead? | 1408 | /* Is the task we represent dead? |
1459 | * If so, then don't put the dentry on the lru list, | 1409 | * If so, then don't put the dentry on the lru list, |
1460 | * kill it immediately. | 1410 | * kill it immediately. |
1461 | */ | 1411 | */ |
1462 | return !pid_alive(proc_task(dentry->d_inode)); | 1412 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; |
1463 | } | 1413 | } |
1464 | 1414 | ||
1465 | static struct dentry_operations tid_fd_dentry_operations = | 1415 | static struct dentry_operations tid_fd_dentry_operations = |
@@ -1474,13 +1424,6 @@ static struct dentry_operations pid_dentry_operations = | |||
1474 | .d_delete = pid_delete_dentry, | 1424 | .d_delete = pid_delete_dentry, |
1475 | }; | 1425 | }; |
1476 | 1426 | ||
1477 | static struct dentry_operations pid_base_dentry_operations = | ||
1478 | { | ||
1479 | .d_revalidate = pid_revalidate, | ||
1480 | .d_iput = pid_base_iput, | ||
1481 | .d_delete = pid_delete_dentry, | ||
1482 | }; | ||
1483 | |||
1484 | /* Lookups */ | 1427 | /* Lookups */ |
1485 | 1428 | ||
1486 | static unsigned name_to_int(struct dentry *dentry) | 1429 | static unsigned name_to_int(struct dentry *dentry) |
@@ -1508,22 +1451,24 @@ out: | |||
1508 | /* SMP-safe */ | 1451 | /* SMP-safe */ |
1509 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) | 1452 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) |
1510 | { | 1453 | { |
1511 | struct task_struct *task = proc_task(dir); | 1454 | struct task_struct *task = get_proc_task(dir); |
1512 | unsigned fd = name_to_int(dentry); | 1455 | unsigned fd = name_to_int(dentry); |
1456 | struct dentry *result = ERR_PTR(-ENOENT); | ||
1513 | struct file * file; | 1457 | struct file * file; |
1514 | struct files_struct * files; | 1458 | struct files_struct * files; |
1515 | struct inode *inode; | 1459 | struct inode *inode; |
1516 | struct proc_inode *ei; | 1460 | struct proc_inode *ei; |
1517 | 1461 | ||
1462 | if (!task) | ||
1463 | goto out_no_task; | ||
1518 | if (fd == ~0U) | 1464 | if (fd == ~0U) |
1519 | goto out; | 1465 | goto out; |
1520 | if (!pid_alive(task)) | ||
1521 | goto out; | ||
1522 | 1466 | ||
1523 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); | 1467 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); |
1524 | if (!inode) | 1468 | if (!inode) |
1525 | goto out; | 1469 | goto out; |
1526 | ei = PROC_I(inode); | 1470 | ei = PROC_I(inode); |
1471 | ei->fd = fd; | ||
1527 | files = get_files_struct(task); | 1472 | files = get_files_struct(task); |
1528 | if (!files) | 1473 | if (!files) |
1529 | goto out_unlock; | 1474 | goto out_unlock; |
@@ -1548,19 +1493,25 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
1548 | ei->op.proc_get_link = proc_fd_link; | 1493 | ei->op.proc_get_link = proc_fd_link; |
1549 | dentry->d_op = &tid_fd_dentry_operations; | 1494 | dentry->d_op = &tid_fd_dentry_operations; |
1550 | d_add(dentry, inode); | 1495 | d_add(dentry, inode); |
1551 | return NULL; | 1496 | /* Close the race of the process dying before we return the dentry */ |
1497 | if (tid_fd_revalidate(dentry, NULL)) | ||
1498 | result = NULL; | ||
1499 | out: | ||
1500 | put_task_struct(task); | ||
1501 | out_no_task: | ||
1502 | return result; | ||
1552 | 1503 | ||
1553 | out_unlock2: | 1504 | out_unlock2: |
1554 | spin_unlock(&files->file_lock); | 1505 | spin_unlock(&files->file_lock); |
1555 | put_files_struct(files); | 1506 | put_files_struct(files); |
1556 | out_unlock: | 1507 | out_unlock: |
1557 | iput(inode); | 1508 | iput(inode); |
1558 | out: | 1509 | goto out; |
1559 | return ERR_PTR(-ENOENT); | ||
1560 | } | 1510 | } |
1561 | 1511 | ||
1562 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); | 1512 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); |
1563 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); | 1513 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); |
1514 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); | ||
1564 | 1515 | ||
1565 | static struct file_operations proc_fd_operations = { | 1516 | static struct file_operations proc_fd_operations = { |
1566 | .read = generic_read_dir, | 1517 | .read = generic_read_dir, |
@@ -1577,12 +1528,11 @@ static struct file_operations proc_task_operations = { | |||
1577 | */ | 1528 | */ |
1578 | static struct inode_operations proc_fd_inode_operations = { | 1529 | static struct inode_operations proc_fd_inode_operations = { |
1579 | .lookup = proc_lookupfd, | 1530 | .lookup = proc_lookupfd, |
1580 | .permission = proc_permission, | ||
1581 | }; | 1531 | }; |
1582 | 1532 | ||
1583 | static struct inode_operations proc_task_inode_operations = { | 1533 | static struct inode_operations proc_task_inode_operations = { |
1584 | .lookup = proc_task_lookup, | 1534 | .lookup = proc_task_lookup, |
1585 | .permission = proc_task_permission, | 1535 | .getattr = proc_task_getattr, |
1586 | }; | 1536 | }; |
1587 | 1537 | ||
1588 | #ifdef CONFIG_SECURITY | 1538 | #ifdef CONFIG_SECURITY |
@@ -1592,12 +1542,17 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | |||
1592 | struct inode * inode = file->f_dentry->d_inode; | 1542 | struct inode * inode = file->f_dentry->d_inode; |
1593 | unsigned long page; | 1543 | unsigned long page; |
1594 | ssize_t length; | 1544 | ssize_t length; |
1595 | struct task_struct *task = proc_task(inode); | 1545 | struct task_struct *task = get_proc_task(inode); |
1546 | |||
1547 | length = -ESRCH; | ||
1548 | if (!task) | ||
1549 | goto out_no_task; | ||
1596 | 1550 | ||
1597 | if (count > PAGE_SIZE) | 1551 | if (count > PAGE_SIZE) |
1598 | count = PAGE_SIZE; | 1552 | count = PAGE_SIZE; |
1553 | length = -ENOMEM; | ||
1599 | if (!(page = __get_free_page(GFP_KERNEL))) | 1554 | if (!(page = __get_free_page(GFP_KERNEL))) |
1600 | return -ENOMEM; | 1555 | goto out; |
1601 | 1556 | ||
1602 | length = security_getprocattr(task, | 1557 | length = security_getprocattr(task, |
1603 | (char*)file->f_dentry->d_name.name, | 1558 | (char*)file->f_dentry->d_name.name, |
@@ -1605,6 +1560,9 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | |||
1605 | if (length >= 0) | 1560 | if (length >= 0) |
1606 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | 1561 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); |
1607 | free_page(page); | 1562 | free_page(page); |
1563 | out: | ||
1564 | put_task_struct(task); | ||
1565 | out_no_task: | ||
1608 | return length; | 1566 | return length; |
1609 | } | 1567 | } |
1610 | 1568 | ||
@@ -1614,26 +1572,36 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, | |||
1614 | struct inode * inode = file->f_dentry->d_inode; | 1572 | struct inode * inode = file->f_dentry->d_inode; |
1615 | char *page; | 1573 | char *page; |
1616 | ssize_t length; | 1574 | ssize_t length; |
1617 | struct task_struct *task = proc_task(inode); | 1575 | struct task_struct *task = get_proc_task(inode); |
1618 | 1576 | ||
1577 | length = -ESRCH; | ||
1578 | if (!task) | ||
1579 | goto out_no_task; | ||
1619 | if (count > PAGE_SIZE) | 1580 | if (count > PAGE_SIZE) |
1620 | count = PAGE_SIZE; | 1581 | count = PAGE_SIZE; |
1621 | if (*ppos != 0) { | 1582 | |
1622 | /* No partial writes. */ | 1583 | /* No partial writes. */ |
1623 | return -EINVAL; | 1584 | length = -EINVAL; |
1624 | } | 1585 | if (*ppos != 0) |
1586 | goto out; | ||
1587 | |||
1588 | length = -ENOMEM; | ||
1625 | page = (char*)__get_free_page(GFP_USER); | 1589 | page = (char*)__get_free_page(GFP_USER); |
1626 | if (!page) | 1590 | if (!page) |
1627 | return -ENOMEM; | 1591 | goto out; |
1592 | |||
1628 | length = -EFAULT; | 1593 | length = -EFAULT; |
1629 | if (copy_from_user(page, buf, count)) | 1594 | if (copy_from_user(page, buf, count)) |
1630 | goto out; | 1595 | goto out_free; |
1631 | 1596 | ||
1632 | length = security_setprocattr(task, | 1597 | length = security_setprocattr(task, |
1633 | (char*)file->f_dentry->d_name.name, | 1598 | (char*)file->f_dentry->d_name.name, |
1634 | (void*)page, count); | 1599 | (void*)page, count); |
1635 | out: | 1600 | out_free: |
1636 | free_page((unsigned long) page); | 1601 | free_page((unsigned long) page); |
1602 | out: | ||
1603 | put_task_struct(task); | ||
1604 | out_no_task: | ||
1637 | return length; | 1605 | return length; |
1638 | } | 1606 | } |
1639 | 1607 | ||
@@ -1648,24 +1616,22 @@ static struct file_operations proc_tgid_attr_operations; | |||
1648 | static struct inode_operations proc_tgid_attr_inode_operations; | 1616 | static struct inode_operations proc_tgid_attr_inode_operations; |
1649 | #endif | 1617 | #endif |
1650 | 1618 | ||
1651 | static int get_tid_list(int index, unsigned int *tids, struct inode *dir); | ||
1652 | |||
1653 | /* SMP-safe */ | 1619 | /* SMP-safe */ |
1654 | static struct dentry *proc_pident_lookup(struct inode *dir, | 1620 | static struct dentry *proc_pident_lookup(struct inode *dir, |
1655 | struct dentry *dentry, | 1621 | struct dentry *dentry, |
1656 | struct pid_entry *ents) | 1622 | struct pid_entry *ents) |
1657 | { | 1623 | { |
1658 | struct inode *inode; | 1624 | struct inode *inode; |
1659 | int error; | 1625 | struct dentry *error; |
1660 | struct task_struct *task = proc_task(dir); | 1626 | struct task_struct *task = get_proc_task(dir); |
1661 | struct pid_entry *p; | 1627 | struct pid_entry *p; |
1662 | struct proc_inode *ei; | 1628 | struct proc_inode *ei; |
1663 | 1629 | ||
1664 | error = -ENOENT; | 1630 | error = ERR_PTR(-ENOENT); |
1665 | inode = NULL; | 1631 | inode = NULL; |
1666 | 1632 | ||
1667 | if (!pid_alive(task)) | 1633 | if (!task) |
1668 | goto out; | 1634 | goto out_no_task; |
1669 | 1635 | ||
1670 | for (p = ents; p->name; p++) { | 1636 | for (p = ents; p->name; p++) { |
1671 | if (p->len != dentry->d_name.len) | 1637 | if (p->len != dentry->d_name.len) |
@@ -1676,7 +1642,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1676 | if (!p->name) | 1642 | if (!p->name) |
1677 | goto out; | 1643 | goto out; |
1678 | 1644 | ||
1679 | error = -EINVAL; | 1645 | error = ERR_PTR(-EINVAL); |
1680 | inode = proc_pid_make_inode(dir->i_sb, task, p->type); | 1646 | inode = proc_pid_make_inode(dir->i_sb, task, p->type); |
1681 | if (!inode) | 1647 | if (!inode) |
1682 | goto out; | 1648 | goto out; |
@@ -1689,7 +1655,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1689 | */ | 1655 | */ |
1690 | switch(p->type) { | 1656 | switch(p->type) { |
1691 | case PROC_TGID_TASK: | 1657 | case PROC_TGID_TASK: |
1692 | inode->i_nlink = 2 + get_tid_list(2, NULL, dir); | 1658 | inode->i_nlink = 2; |
1693 | inode->i_op = &proc_task_inode_operations; | 1659 | inode->i_op = &proc_task_inode_operations; |
1694 | inode->i_fop = &proc_task_operations; | 1660 | inode->i_fop = &proc_task_operations; |
1695 | break; | 1661 | break; |
@@ -1759,7 +1725,6 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1759 | #endif | 1725 | #endif |
1760 | case PROC_TID_MEM: | 1726 | case PROC_TID_MEM: |
1761 | case PROC_TGID_MEM: | 1727 | case PROC_TGID_MEM: |
1762 | inode->i_op = &proc_mem_inode_operations; | ||
1763 | inode->i_fop = &proc_mem_operations; | 1728 | inode->i_fop = &proc_mem_operations; |
1764 | break; | 1729 | break; |
1765 | #ifdef CONFIG_SECCOMP | 1730 | #ifdef CONFIG_SECCOMP |
@@ -1801,6 +1766,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1801 | case PROC_TGID_ATTR_EXEC: | 1766 | case PROC_TGID_ATTR_EXEC: |
1802 | case PROC_TID_ATTR_FSCREATE: | 1767 | case PROC_TID_ATTR_FSCREATE: |
1803 | case PROC_TGID_ATTR_FSCREATE: | 1768 | case PROC_TGID_ATTR_FSCREATE: |
1769 | case PROC_TID_ATTR_KEYCREATE: | ||
1770 | case PROC_TGID_ATTR_KEYCREATE: | ||
1771 | case PROC_TID_ATTR_SOCKCREATE: | ||
1772 | case PROC_TGID_ATTR_SOCKCREATE: | ||
1804 | inode->i_fop = &proc_pid_attr_operations; | 1773 | inode->i_fop = &proc_pid_attr_operations; |
1805 | break; | 1774 | break; |
1806 | #endif | 1775 | #endif |
@@ -1842,14 +1811,18 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
1842 | default: | 1811 | default: |
1843 | printk("procfs: impossible type (%d)",p->type); | 1812 | printk("procfs: impossible type (%d)",p->type); |
1844 | iput(inode); | 1813 | iput(inode); |
1845 | return ERR_PTR(-EINVAL); | 1814 | error = ERR_PTR(-EINVAL); |
1815 | goto out; | ||
1846 | } | 1816 | } |
1847 | dentry->d_op = &pid_dentry_operations; | 1817 | dentry->d_op = &pid_dentry_operations; |
1848 | d_add(dentry, inode); | 1818 | d_add(dentry, inode); |
1849 | return NULL; | 1819 | /* Close the race of the process dying before we return the dentry */ |
1850 | 1820 | if (pid_revalidate(dentry, NULL)) | |
1821 | error = NULL; | ||
1851 | out: | 1822 | out: |
1852 | return ERR_PTR(error); | 1823 | put_task_struct(task); |
1824 | out_no_task: | ||
1825 | return error; | ||
1853 | } | 1826 | } |
1854 | 1827 | ||
1855 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | 1828 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ |
@@ -1872,10 +1845,12 @@ static struct file_operations proc_tid_base_operations = { | |||
1872 | 1845 | ||
1873 | static struct inode_operations proc_tgid_base_inode_operations = { | 1846 | static struct inode_operations proc_tgid_base_inode_operations = { |
1874 | .lookup = proc_tgid_base_lookup, | 1847 | .lookup = proc_tgid_base_lookup, |
1848 | .getattr = pid_getattr, | ||
1875 | }; | 1849 | }; |
1876 | 1850 | ||
1877 | static struct inode_operations proc_tid_base_inode_operations = { | 1851 | static struct inode_operations proc_tid_base_inode_operations = { |
1878 | .lookup = proc_tid_base_lookup, | 1852 | .lookup = proc_tid_base_lookup, |
1853 | .getattr = pid_getattr, | ||
1879 | }; | 1854 | }; |
1880 | 1855 | ||
1881 | #ifdef CONFIG_SECURITY | 1856 | #ifdef CONFIG_SECURITY |
@@ -1917,10 +1892,12 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir, | |||
1917 | 1892 | ||
1918 | static struct inode_operations proc_tgid_attr_inode_operations = { | 1893 | static struct inode_operations proc_tgid_attr_inode_operations = { |
1919 | .lookup = proc_tgid_attr_lookup, | 1894 | .lookup = proc_tgid_attr_lookup, |
1895 | .getattr = pid_getattr, | ||
1920 | }; | 1896 | }; |
1921 | 1897 | ||
1922 | static struct inode_operations proc_tid_attr_inode_operations = { | 1898 | static struct inode_operations proc_tid_attr_inode_operations = { |
1923 | .lookup = proc_tid_attr_lookup, | 1899 | .lookup = proc_tid_attr_lookup, |
1900 | .getattr = pid_getattr, | ||
1924 | }; | 1901 | }; |
1925 | #endif | 1902 | #endif |
1926 | 1903 | ||
@@ -1930,14 +1907,14 @@ static struct inode_operations proc_tid_attr_inode_operations = { | |||
1930 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, | 1907 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, |
1931 | int buflen) | 1908 | int buflen) |
1932 | { | 1909 | { |
1933 | char tmp[30]; | 1910 | char tmp[PROC_NUMBUF]; |
1934 | sprintf(tmp, "%d", current->tgid); | 1911 | sprintf(tmp, "%d", current->tgid); |
1935 | return vfs_readlink(dentry,buffer,buflen,tmp); | 1912 | return vfs_readlink(dentry,buffer,buflen,tmp); |
1936 | } | 1913 | } |
1937 | 1914 | ||
1938 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | 1915 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) |
1939 | { | 1916 | { |
1940 | char tmp[30]; | 1917 | char tmp[PROC_NUMBUF]; |
1941 | sprintf(tmp, "%d", current->tgid); | 1918 | sprintf(tmp, "%d", current->tgid); |
1942 | return ERR_PTR(vfs_follow_link(nd,tmp)); | 1919 | return ERR_PTR(vfs_follow_link(nd,tmp)); |
1943 | } | 1920 | } |
@@ -1948,67 +1925,80 @@ static struct inode_operations proc_self_inode_operations = { | |||
1948 | }; | 1925 | }; |
1949 | 1926 | ||
1950 | /** | 1927 | /** |
1951 | * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. | 1928 | * proc_flush_task - Remove dcache entries for @task from the /proc dcache. |
1952 | * @p: task that should be flushed. | 1929 | * |
1930 | * @task: task that should be flushed. | ||
1931 | * | ||
1932 | * Looks in the dcache for | ||
1933 | * /proc/@pid | ||
1934 | * /proc/@tgid/task/@pid | ||
1935 | * if either directory is present flushes it and all of it'ts children | ||
1936 | * from the dcache. | ||
1953 | * | 1937 | * |
1954 | * Drops the /proc/@pid dcache entry from the hash chains. | 1938 | * It is safe and reasonable to cache /proc entries for a task until |
1939 | * that task exits. After that they just clog up the dcache with | ||
1940 | * useless entries, possibly causing useful dcache entries to be | ||
1941 | * flushed instead. This routine is proved to flush those useless | ||
1942 | * dcache entries at process exit time. | ||
1955 | * | 1943 | * |
1956 | * Dropping /proc/@pid entries and detach_pid must be synchroneous, | 1944 | * NOTE: This routine is just an optimization so it does not guarantee |
1957 | * otherwise e.g. /proc/@pid/exe might point to the wrong executable, | 1945 | * that no dcache entries will exist at process exit time it |
1958 | * if the pid value is immediately reused. This is enforced by | 1946 | * just makes it very unlikely that any will persist. |
1959 | * - caller must acquire spin_lock(p->proc_lock) | ||
1960 | * - must be called before detach_pid() | ||
1961 | * - proc_pid_lookup acquires proc_lock, and checks that | ||
1962 | * the target is not dead by looking at the attach count | ||
1963 | * of PIDTYPE_PID. | ||
1964 | */ | 1947 | */ |
1965 | 1948 | void proc_flush_task(struct task_struct *task) | |
1966 | struct dentry *proc_pid_unhash(struct task_struct *p) | ||
1967 | { | 1949 | { |
1968 | struct dentry *proc_dentry; | 1950 | struct dentry *dentry, *leader, *dir; |
1951 | char buf[PROC_NUMBUF]; | ||
1952 | struct qstr name; | ||
1953 | |||
1954 | name.name = buf; | ||
1955 | name.len = snprintf(buf, sizeof(buf), "%d", task->pid); | ||
1956 | dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); | ||
1957 | if (dentry) { | ||
1958 | shrink_dcache_parent(dentry); | ||
1959 | d_drop(dentry); | ||
1960 | dput(dentry); | ||
1961 | } | ||
1969 | 1962 | ||
1970 | proc_dentry = p->proc_dentry; | 1963 | if (thread_group_leader(task)) |
1971 | if (proc_dentry != NULL) { | 1964 | goto out; |
1972 | 1965 | ||
1973 | spin_lock(&dcache_lock); | 1966 | name.name = buf; |
1974 | spin_lock(&proc_dentry->d_lock); | 1967 | name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); |
1975 | if (!d_unhashed(proc_dentry)) { | 1968 | leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); |
1976 | dget_locked(proc_dentry); | 1969 | if (!leader) |
1977 | __d_drop(proc_dentry); | 1970 | goto out; |
1978 | spin_unlock(&proc_dentry->d_lock); | ||
1979 | } else { | ||
1980 | spin_unlock(&proc_dentry->d_lock); | ||
1981 | proc_dentry = NULL; | ||
1982 | } | ||
1983 | spin_unlock(&dcache_lock); | ||
1984 | } | ||
1985 | return proc_dentry; | ||
1986 | } | ||
1987 | 1971 | ||
1988 | /** | 1972 | name.name = "task"; |
1989 | * proc_pid_flush - recover memory used by stale /proc/@pid/x entries | 1973 | name.len = strlen(name.name); |
1990 | * @proc_dentry: directoy to prune. | 1974 | dir = d_hash_and_lookup(leader, &name); |
1991 | * | 1975 | if (!dir) |
1992 | * Shrink the /proc directory that was used by the just killed thread. | 1976 | goto out_put_leader; |
1993 | */ | 1977 | |
1994 | 1978 | name.name = buf; | |
1995 | void proc_pid_flush(struct dentry *proc_dentry) | 1979 | name.len = snprintf(buf, sizeof(buf), "%d", task->pid); |
1996 | { | 1980 | dentry = d_hash_and_lookup(dir, &name); |
1997 | might_sleep(); | 1981 | if (dentry) { |
1998 | if(proc_dentry != NULL) { | 1982 | shrink_dcache_parent(dentry); |
1999 | shrink_dcache_parent(proc_dentry); | 1983 | d_drop(dentry); |
2000 | dput(proc_dentry); | 1984 | dput(dentry); |
2001 | } | 1985 | } |
1986 | |||
1987 | dput(dir); | ||
1988 | out_put_leader: | ||
1989 | dput(leader); | ||
1990 | out: | ||
1991 | return; | ||
2002 | } | 1992 | } |
2003 | 1993 | ||
2004 | /* SMP-safe */ | 1994 | /* SMP-safe */ |
2005 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 1995 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
2006 | { | 1996 | { |
1997 | struct dentry *result = ERR_PTR(-ENOENT); | ||
2007 | struct task_struct *task; | 1998 | struct task_struct *task; |
2008 | struct inode *inode; | 1999 | struct inode *inode; |
2009 | struct proc_inode *ei; | 2000 | struct proc_inode *ei; |
2010 | unsigned tgid; | 2001 | unsigned tgid; |
2011 | int died; | ||
2012 | 2002 | ||
2013 | if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { | 2003 | if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { |
2014 | inode = new_inode(dir->i_sb); | 2004 | inode = new_inode(dir->i_sb); |
@@ -2029,21 +2019,18 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||
2029 | if (tgid == ~0U) | 2019 | if (tgid == ~0U) |
2030 | goto out; | 2020 | goto out; |
2031 | 2021 | ||
2032 | read_lock(&tasklist_lock); | 2022 | rcu_read_lock(); |
2033 | task = find_task_by_pid(tgid); | 2023 | task = find_task_by_pid(tgid); |
2034 | if (task) | 2024 | if (task) |
2035 | get_task_struct(task); | 2025 | get_task_struct(task); |
2036 | read_unlock(&tasklist_lock); | 2026 | rcu_read_unlock(); |
2037 | if (!task) | 2027 | if (!task) |
2038 | goto out; | 2028 | goto out; |
2039 | 2029 | ||
2040 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); | 2030 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); |
2031 | if (!inode) | ||
2032 | goto out_put_task; | ||
2041 | 2033 | ||
2042 | |||
2043 | if (!inode) { | ||
2044 | put_task_struct(task); | ||
2045 | goto out; | ||
2046 | } | ||
2047 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; | 2034 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; |
2048 | inode->i_op = &proc_tgid_base_inode_operations; | 2035 | inode->i_op = &proc_tgid_base_inode_operations; |
2049 | inode->i_fop = &proc_tgid_base_operations; | 2036 | inode->i_fop = &proc_tgid_base_operations; |
@@ -2054,45 +2041,40 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||
2054 | inode->i_nlink = 4; | 2041 | inode->i_nlink = 4; |
2055 | #endif | 2042 | #endif |
2056 | 2043 | ||
2057 | dentry->d_op = &pid_base_dentry_operations; | 2044 | dentry->d_op = &pid_dentry_operations; |
2058 | 2045 | ||
2059 | died = 0; | ||
2060 | d_add(dentry, inode); | 2046 | d_add(dentry, inode); |
2061 | spin_lock(&task->proc_lock); | 2047 | /* Close the race of the process dying before we return the dentry */ |
2062 | task->proc_dentry = dentry; | 2048 | if (pid_revalidate(dentry, NULL)) |
2063 | if (!pid_alive(task)) { | 2049 | result = NULL; |
2064 | dentry = proc_pid_unhash(task); | ||
2065 | died = 1; | ||
2066 | } | ||
2067 | spin_unlock(&task->proc_lock); | ||
2068 | 2050 | ||
2051 | out_put_task: | ||
2069 | put_task_struct(task); | 2052 | put_task_struct(task); |
2070 | if (died) { | ||
2071 | proc_pid_flush(dentry); | ||
2072 | goto out; | ||
2073 | } | ||
2074 | return NULL; | ||
2075 | out: | 2053 | out: |
2076 | return ERR_PTR(-ENOENT); | 2054 | return result; |
2077 | } | 2055 | } |
2078 | 2056 | ||
2079 | /* SMP-safe */ | 2057 | /* SMP-safe */ |
2080 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 2058 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
2081 | { | 2059 | { |
2060 | struct dentry *result = ERR_PTR(-ENOENT); | ||
2082 | struct task_struct *task; | 2061 | struct task_struct *task; |
2083 | struct task_struct *leader = proc_task(dir); | 2062 | struct task_struct *leader = get_proc_task(dir); |
2084 | struct inode *inode; | 2063 | struct inode *inode; |
2085 | unsigned tid; | 2064 | unsigned tid; |
2086 | 2065 | ||
2066 | if (!leader) | ||
2067 | goto out_no_task; | ||
2068 | |||
2087 | tid = name_to_int(dentry); | 2069 | tid = name_to_int(dentry); |
2088 | if (tid == ~0U) | 2070 | if (tid == ~0U) |
2089 | goto out; | 2071 | goto out; |
2090 | 2072 | ||
2091 | read_lock(&tasklist_lock); | 2073 | rcu_read_lock(); |
2092 | task = find_task_by_pid(tid); | 2074 | task = find_task_by_pid(tid); |
2093 | if (task) | 2075 | if (task) |
2094 | get_task_struct(task); | 2076 | get_task_struct(task); |
2095 | read_unlock(&tasklist_lock); | 2077 | rcu_read_unlock(); |
2096 | if (!task) | 2078 | if (!task) |
2097 | goto out; | 2079 | goto out; |
2098 | if (leader->tgid != task->tgid) | 2080 | if (leader->tgid != task->tgid) |
@@ -2113,101 +2095,95 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry | |||
2113 | inode->i_nlink = 3; | 2095 | inode->i_nlink = 3; |
2114 | #endif | 2096 | #endif |
2115 | 2097 | ||
2116 | dentry->d_op = &pid_base_dentry_operations; | 2098 | dentry->d_op = &pid_dentry_operations; |
2117 | 2099 | ||
2118 | d_add(dentry, inode); | 2100 | d_add(dentry, inode); |
2101 | /* Close the race of the process dying before we return the dentry */ | ||
2102 | if (pid_revalidate(dentry, NULL)) | ||
2103 | result = NULL; | ||
2119 | 2104 | ||
2120 | put_task_struct(task); | ||
2121 | return NULL; | ||
2122 | out_drop_task: | 2105 | out_drop_task: |
2123 | put_task_struct(task); | 2106 | put_task_struct(task); |
2124 | out: | 2107 | out: |
2125 | return ERR_PTR(-ENOENT); | 2108 | put_task_struct(leader); |
2109 | out_no_task: | ||
2110 | return result; | ||
2126 | } | 2111 | } |
2127 | 2112 | ||
2128 | #define PROC_NUMBUF 10 | ||
2129 | #define PROC_MAXPIDS 20 | ||
2130 | |||
2131 | /* | 2113 | /* |
2132 | * Get a few tgid's to return for filldir - we need to hold the | 2114 | * Find the first tgid to return to user space. |
2133 | * tasklist lock while doing this, and we must release it before | 2115 | * |
2134 | * we actually do the filldir itself, so we use a temp buffer.. | 2116 | * Usually this is just whatever follows &init_task, but if the users |
2117 | * buffer was too small to hold the full list or there was a seek into | ||
2118 | * the middle of the directory we have more work to do. | ||
2119 | * | ||
2120 | * In the case of a short read we start with find_task_by_pid. | ||
2121 | * | ||
2122 | * In the case of a seek we start with &init_task and walk nr | ||
2123 | * threads past it. | ||
2135 | */ | 2124 | */ |
2136 | static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) | 2125 | static struct task_struct *first_tgid(int tgid, unsigned int nr) |
2137 | { | 2126 | { |
2138 | struct task_struct *p; | 2127 | struct task_struct *pos; |
2139 | int nr_tgids = 0; | 2128 | rcu_read_lock(); |
2140 | 2129 | if (tgid && nr) { | |
2141 | index--; | 2130 | pos = find_task_by_pid(tgid); |
2142 | read_lock(&tasklist_lock); | 2131 | if (pos && thread_group_leader(pos)) |
2143 | p = NULL; | 2132 | goto found; |
2144 | if (version) { | ||
2145 | p = find_task_by_pid(version); | ||
2146 | if (p && !thread_group_leader(p)) | ||
2147 | p = NULL; | ||
2148 | } | 2133 | } |
2134 | /* If nr exceeds the number of processes get out quickly */ | ||
2135 | pos = NULL; | ||
2136 | if (nr && nr >= nr_processes()) | ||
2137 | goto done; | ||
2149 | 2138 | ||
2150 | if (p) | 2139 | /* If we haven't found our starting place yet start with |
2151 | index = 0; | 2140 | * the init_task and walk nr tasks forward. |
2152 | else | 2141 | */ |
2153 | p = next_task(&init_task); | 2142 | for (pos = next_task(&init_task); nr > 0; --nr) { |
2154 | 2143 | pos = next_task(pos); | |
2155 | for ( ; p != &init_task; p = next_task(p)) { | 2144 | if (pos == &init_task) { |
2156 | int tgid = p->pid; | 2145 | pos = NULL; |
2157 | if (!pid_alive(p)) | 2146 | goto done; |
2158 | continue; | 2147 | } |
2159 | if (--index >= 0) | ||
2160 | continue; | ||
2161 | tgids[nr_tgids] = tgid; | ||
2162 | nr_tgids++; | ||
2163 | if (nr_tgids >= PROC_MAXPIDS) | ||
2164 | break; | ||
2165 | } | 2148 | } |
2166 | read_unlock(&tasklist_lock); | 2149 | found: |
2167 | return nr_tgids; | 2150 | get_task_struct(pos); |
2151 | done: | ||
2152 | rcu_read_unlock(); | ||
2153 | return pos; | ||
2168 | } | 2154 | } |
2169 | 2155 | ||
2170 | /* | 2156 | /* |
2171 | * Get a few tid's to return for filldir - we need to hold the | 2157 | * Find the next task in the task list. |
2172 | * tasklist lock while doing this, and we must release it before | 2158 | * Return NULL if we loop or there is any error. |
2173 | * we actually do the filldir itself, so we use a temp buffer.. | 2159 | * |
2160 | * The reference to the input task_struct is released. | ||
2174 | */ | 2161 | */ |
2175 | static int get_tid_list(int index, unsigned int *tids, struct inode *dir) | 2162 | static struct task_struct *next_tgid(struct task_struct *start) |
2176 | { | 2163 | { |
2177 | struct task_struct *leader_task = proc_task(dir); | 2164 | struct task_struct *pos; |
2178 | struct task_struct *task = leader_task; | 2165 | rcu_read_lock(); |
2179 | int nr_tids = 0; | 2166 | pos = start; |
2180 | 2167 | if (pid_alive(start)) | |
2181 | index -= 2; | 2168 | pos = next_task(start); |
2182 | read_lock(&tasklist_lock); | 2169 | if (pid_alive(pos) && (pos != &init_task)) { |
2183 | /* | 2170 | get_task_struct(pos); |
2184 | * The starting point task (leader_task) might be an already | 2171 | goto done; |
2185 | * unlinked task, which cannot be used to access the task-list | 2172 | } |
2186 | * via next_thread(). | 2173 | pos = NULL; |
2187 | */ | 2174 | done: |
2188 | if (pid_alive(task)) do { | 2175 | rcu_read_unlock(); |
2189 | int tid = task->pid; | 2176 | put_task_struct(start); |
2190 | 2177 | return pos; | |
2191 | if (--index >= 0) | ||
2192 | continue; | ||
2193 | if (tids != NULL) | ||
2194 | tids[nr_tids] = tid; | ||
2195 | nr_tids++; | ||
2196 | if (nr_tids >= PROC_MAXPIDS) | ||
2197 | break; | ||
2198 | } while ((task = next_thread(task)) != leader_task); | ||
2199 | read_unlock(&tasklist_lock); | ||
2200 | return nr_tids; | ||
2201 | } | 2178 | } |
2202 | 2179 | ||
2203 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 2180 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
2204 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2181 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) |
2205 | { | 2182 | { |
2206 | unsigned int tgid_array[PROC_MAXPIDS]; | ||
2207 | char buf[PROC_NUMBUF]; | 2183 | char buf[PROC_NUMBUF]; |
2208 | unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; | 2184 | unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; |
2209 | unsigned int nr_tgids, i; | 2185 | struct task_struct *task; |
2210 | int next_tgid; | 2186 | int tgid; |
2211 | 2187 | ||
2212 | if (!nr) { | 2188 | if (!nr) { |
2213 | ino_t ino = fake_ino(0,PROC_TGID_INO); | 2189 | ino_t ino = fake_ino(0,PROC_TGID_INO); |
@@ -2216,63 +2192,116 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
2216 | filp->f_pos++; | 2192 | filp->f_pos++; |
2217 | nr++; | 2193 | nr++; |
2218 | } | 2194 | } |
2195 | nr -= 1; | ||
2219 | 2196 | ||
2220 | /* f_version caches the tgid value that the last readdir call couldn't | 2197 | /* f_version caches the tgid value that the last readdir call couldn't |
2221 | * return. lseek aka telldir automagically resets f_version to 0. | 2198 | * return. lseek aka telldir automagically resets f_version to 0. |
2222 | */ | 2199 | */ |
2223 | next_tgid = filp->f_version; | 2200 | tgid = filp->f_version; |
2224 | filp->f_version = 0; | 2201 | filp->f_version = 0; |
2225 | for (;;) { | 2202 | for (task = first_tgid(tgid, nr); |
2226 | nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); | 2203 | task; |
2227 | if (!nr_tgids) { | 2204 | task = next_tgid(task), filp->f_pos++) { |
2228 | /* no more entries ! */ | 2205 | int len; |
2206 | ino_t ino; | ||
2207 | tgid = task->pid; | ||
2208 | len = snprintf(buf, sizeof(buf), "%d", tgid); | ||
2209 | ino = fake_ino(tgid, PROC_TGID_INO); | ||
2210 | if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) { | ||
2211 | /* returning this tgid failed, save it as the first | ||
2212 | * pid for the next readir call */ | ||
2213 | filp->f_version = tgid; | ||
2214 | put_task_struct(task); | ||
2229 | break; | 2215 | break; |
2230 | } | 2216 | } |
2231 | next_tgid = 0; | 2217 | } |
2218 | return 0; | ||
2219 | } | ||
2232 | 2220 | ||
2233 | /* do not use the last found pid, reserve it for next_tgid */ | 2221 | /* |
2234 | if (nr_tgids == PROC_MAXPIDS) { | 2222 | * Find the first tid of a thread group to return to user space. |
2235 | nr_tgids--; | 2223 | * |
2236 | next_tgid = tgid_array[nr_tgids]; | 2224 | * Usually this is just the thread group leader, but if the users |
2237 | } | 2225 | * buffer was too small or there was a seek into the middle of the |
2226 | * directory we have more work todo. | ||
2227 | * | ||
2228 | * In the case of a short read we start with find_task_by_pid. | ||
2229 | * | ||
2230 | * In the case of a seek we start with the leader and walk nr | ||
2231 | * threads past it. | ||
2232 | */ | ||
2233 | static struct task_struct *first_tid(struct task_struct *leader, | ||
2234 | int tid, int nr) | ||
2235 | { | ||
2236 | struct task_struct *pos; | ||
2238 | 2237 | ||
2239 | for (i=0;i<nr_tgids;i++) { | 2238 | rcu_read_lock(); |
2240 | int tgid = tgid_array[i]; | 2239 | /* Attempt to start with the pid of a thread */ |
2241 | ino_t ino = fake_ino(tgid,PROC_TGID_INO); | 2240 | if (tid && (nr > 0)) { |
2242 | unsigned long j = PROC_NUMBUF; | 2241 | pos = find_task_by_pid(tid); |
2242 | if (pos && (pos->group_leader == leader)) | ||
2243 | goto found; | ||
2244 | } | ||
2243 | 2245 | ||
2244 | do | 2246 | /* If nr exceeds the number of threads there is nothing todo */ |
2245 | buf[--j] = '0' + (tgid % 10); | 2247 | pos = NULL; |
2246 | while ((tgid /= 10) != 0); | 2248 | if (nr && nr >= get_nr_threads(leader)) |
2249 | goto out; | ||
2247 | 2250 | ||
2248 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { | 2251 | /* If we haven't found our starting place yet start |
2249 | /* returning this tgid failed, save it as the first | 2252 | * with the leader and walk nr threads forward. |
2250 | * pid for the next readir call */ | 2253 | */ |
2251 | filp->f_version = tgid_array[i]; | 2254 | for (pos = leader; nr > 0; --nr) { |
2252 | goto out; | 2255 | pos = next_thread(pos); |
2253 | } | 2256 | if (pos == leader) { |
2254 | filp->f_pos++; | 2257 | pos = NULL; |
2255 | nr++; | 2258 | goto out; |
2256 | } | 2259 | } |
2257 | } | 2260 | } |
2261 | found: | ||
2262 | get_task_struct(pos); | ||
2258 | out: | 2263 | out: |
2259 | return 0; | 2264 | rcu_read_unlock(); |
2265 | return pos; | ||
2266 | } | ||
2267 | |||
2268 | /* | ||
2269 | * Find the next thread in the thread list. | ||
2270 | * Return NULL if there is an error or no next thread. | ||
2271 | * | ||
2272 | * The reference to the input task_struct is released. | ||
2273 | */ | ||
2274 | static struct task_struct *next_tid(struct task_struct *start) | ||
2275 | { | ||
2276 | struct task_struct *pos = NULL; | ||
2277 | rcu_read_lock(); | ||
2278 | if (pid_alive(start)) { | ||
2279 | pos = next_thread(start); | ||
2280 | if (thread_group_leader(pos)) | ||
2281 | pos = NULL; | ||
2282 | else | ||
2283 | get_task_struct(pos); | ||
2284 | } | ||
2285 | rcu_read_unlock(); | ||
2286 | put_task_struct(start); | ||
2287 | return pos; | ||
2260 | } | 2288 | } |
2261 | 2289 | ||
2262 | /* for the /proc/TGID/task/ directories */ | 2290 | /* for the /proc/TGID/task/ directories */ |
2263 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2291 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) |
2264 | { | 2292 | { |
2265 | unsigned int tid_array[PROC_MAXPIDS]; | ||
2266 | char buf[PROC_NUMBUF]; | 2293 | char buf[PROC_NUMBUF]; |
2267 | unsigned int nr_tids, i; | ||
2268 | struct dentry *dentry = filp->f_dentry; | 2294 | struct dentry *dentry = filp->f_dentry; |
2269 | struct inode *inode = dentry->d_inode; | 2295 | struct inode *inode = dentry->d_inode; |
2296 | struct task_struct *leader = get_proc_task(inode); | ||
2297 | struct task_struct *task; | ||
2270 | int retval = -ENOENT; | 2298 | int retval = -ENOENT; |
2271 | ino_t ino; | 2299 | ino_t ino; |
2300 | int tid; | ||
2272 | unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ | 2301 | unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ |
2273 | 2302 | ||
2274 | if (!pid_alive(proc_task(inode))) | 2303 | if (!leader) |
2275 | goto out; | 2304 | goto out_no_task; |
2276 | retval = 0; | 2305 | retval = 0; |
2277 | 2306 | ||
2278 | switch (pos) { | 2307 | switch (pos) { |
@@ -2290,24 +2319,45 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi | |||
2290 | /* fall through */ | 2319 | /* fall through */ |
2291 | } | 2320 | } |
2292 | 2321 | ||
2293 | nr_tids = get_tid_list(pos, tid_array, inode); | 2322 | /* f_version caches the tgid value that the last readdir call couldn't |
2294 | inode->i_nlink = pos + nr_tids; | 2323 | * return. lseek aka telldir automagically resets f_version to 0. |
2295 | 2324 | */ | |
2296 | for (i = 0; i < nr_tids; i++) { | 2325 | tid = filp->f_version; |
2297 | unsigned long j = PROC_NUMBUF; | 2326 | filp->f_version = 0; |
2298 | int tid = tid_array[i]; | 2327 | for (task = first_tid(leader, tid, pos - 2); |
2299 | 2328 | task; | |
2300 | ino = fake_ino(tid,PROC_TID_INO); | 2329 | task = next_tid(task), pos++) { |
2301 | 2330 | int len; | |
2302 | do | 2331 | tid = task->pid; |
2303 | buf[--j] = '0' + (tid % 10); | 2332 | len = snprintf(buf, sizeof(buf), "%d", tid); |
2304 | while ((tid /= 10) != 0); | 2333 | ino = fake_ino(tid, PROC_TID_INO); |
2305 | 2334 | if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) { | |
2306 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) | 2335 | /* returning this tgid failed, save it as the first |
2336 | * pid for the next readir call */ | ||
2337 | filp->f_version = tid; | ||
2338 | put_task_struct(task); | ||
2307 | break; | 2339 | break; |
2308 | pos++; | 2340 | } |
2309 | } | 2341 | } |
2310 | out: | 2342 | out: |
2311 | filp->f_pos = pos; | 2343 | filp->f_pos = pos; |
2344 | put_task_struct(leader); | ||
2345 | out_no_task: | ||
2312 | return retval; | 2346 | return retval; |
2313 | } | 2347 | } |
2348 | |||
2349 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | ||
2350 | { | ||
2351 | struct inode *inode = dentry->d_inode; | ||
2352 | struct task_struct *p = get_proc_task(inode); | ||
2353 | generic_fillattr(inode, stat); | ||
2354 | |||
2355 | if (p) { | ||
2356 | rcu_read_lock(); | ||
2357 | stat->nlink += get_nr_threads(p); | ||
2358 | rcu_read_unlock(); | ||
2359 | put_task_struct(p); | ||
2360 | } | ||
2361 | |||
2362 | return 0; | ||
2363 | } | ||