diff options
Diffstat (limited to 'fs/proc/base.c')
| -rw-r--r-- | fs/proc/base.c | 1107 |
1 files changed, 594 insertions, 513 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 6afff725a8c9..89c20d9d50bf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -49,7 +49,6 @@ | |||
| 49 | 49 | ||
| 50 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
| 51 | 51 | ||
| 52 | #include <linux/config.h> | ||
| 53 | #include <linux/errno.h> | 52 | #include <linux/errno.h> |
| 54 | #include <linux/time.h> | 53 | #include <linux/time.h> |
| 55 | #include <linux/proc_fs.h> | 54 | #include <linux/proc_fs.h> |
| @@ -74,6 +73,16 @@ | |||
| 74 | #include <linux/poll.h> | 73 | #include <linux/poll.h> |
| 75 | #include "internal.h" | 74 | #include "internal.h" |
| 76 | 75 | ||
| 76 | /* NOTE: | ||
| 77 | * Implementing inode permission operations in /proc is almost | ||
| 78 | * certainly an error. Permission checks need to happen during | ||
| 79 | * each system call not at open time. The reason is that most of | ||
| 80 | * what we wish to check for permissions in /proc varies at runtime. | ||
| 81 | * | ||
| 82 | * The classic example of a problem is opening file descriptors | ||
| 83 | * in /proc for a task before it execs a suid executable. | ||
| 84 | */ | ||
| 85 | |||
| 77 | /* | 86 | /* |
| 78 | * For hysterical raisins we keep the same inumbers as in the old procfs. | 87 | * For hysterical raisins we keep the same inumbers as in the old procfs. |
| 79 | * Feel free to change the macro below - just keep the range distinct from | 88 | * Feel free to change the macro below - just keep the range distinct from |
| @@ -121,6 +130,8 @@ enum pid_directory_inos { | |||
| 121 | PROC_TGID_ATTR_PREV, | 130 | PROC_TGID_ATTR_PREV, |
| 122 | PROC_TGID_ATTR_EXEC, | 131 | PROC_TGID_ATTR_EXEC, |
| 123 | PROC_TGID_ATTR_FSCREATE, | 132 | PROC_TGID_ATTR_FSCREATE, |
| 133 | PROC_TGID_ATTR_KEYCREATE, | ||
| 134 | PROC_TGID_ATTR_SOCKCREATE, | ||
| 124 | #endif | 135 | #endif |
| 125 | #ifdef CONFIG_AUDITSYSCALL | 136 | #ifdef CONFIG_AUDITSYSCALL |
| 126 | PROC_TGID_LOGINUID, | 137 | PROC_TGID_LOGINUID, |
| @@ -162,6 +173,8 @@ enum pid_directory_inos { | |||
| 162 | PROC_TID_ATTR_PREV, | 173 | PROC_TID_ATTR_PREV, |
| 163 | PROC_TID_ATTR_EXEC, | 174 | PROC_TID_ATTR_EXEC, |
| 164 | PROC_TID_ATTR_FSCREATE, | 175 | PROC_TID_ATTR_FSCREATE, |
| 176 | PROC_TID_ATTR_KEYCREATE, | ||
| 177 | PROC_TID_ATTR_SOCKCREATE, | ||
| 165 | #endif | 178 | #endif |
| 166 | #ifdef CONFIG_AUDITSYSCALL | 179 | #ifdef CONFIG_AUDITSYSCALL |
| 167 | PROC_TID_LOGINUID, | 180 | PROC_TID_LOGINUID, |
| @@ -173,6 +186,9 @@ enum pid_directory_inos { | |||
| 173 | PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ | 186 | PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ |
| 174 | }; | 187 | }; |
| 175 | 188 | ||
| 189 | /* Worst case buffer size needed for holding an integer. */ | ||
| 190 | #define PROC_NUMBUF 10 | ||
| 191 | |||
| 176 | struct pid_entry { | 192 | struct pid_entry { |
| 177 | int type; | 193 | int type; |
| 178 | int len; | 194 | int len; |
| @@ -275,6 +291,8 @@ static struct pid_entry tgid_attr_stuff[] = { | |||
| 275 | E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), | 291 | E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), |
| 276 | E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), | 292 | E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), |
| 277 | E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), | 293 | E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), |
| 294 | E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
| 295 | E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
| 278 | {0,0,NULL,0} | 296 | {0,0,NULL,0} |
| 279 | }; | 297 | }; |
| 280 | static struct pid_entry tid_attr_stuff[] = { | 298 | static struct pid_entry tid_attr_stuff[] = { |
| @@ -282,6 +300,8 @@ static struct pid_entry tid_attr_stuff[] = { | |||
| 282 | E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), | 300 | E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), |
| 283 | E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), | 301 | E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), |
| 284 | E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), | 302 | E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), |
| 303 | E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
| 304 | E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO), | ||
| 285 | {0,0,NULL,0} | 305 | {0,0,NULL,0} |
| 286 | }; | 306 | }; |
| 287 | #endif | 307 | #endif |
| @@ -290,12 +310,15 @@ static struct pid_entry tid_attr_stuff[] = { | |||
| 290 | 310 | ||
| 291 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 311 | static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) |
| 292 | { | 312 | { |
| 293 | struct task_struct *task = proc_task(inode); | 313 | struct task_struct *task = get_proc_task(inode); |
| 294 | struct files_struct *files; | 314 | struct files_struct *files = NULL; |
| 295 | struct file *file; | 315 | struct file *file; |
| 296 | int fd = proc_type(inode) - PROC_TID_FD_DIR; | 316 | int fd = proc_fd(inode); |
| 297 | 317 | ||
| 298 | files = get_files_struct(task); | 318 | if (task) { |
| 319 | files = get_files_struct(task); | ||
| 320 | put_task_struct(task); | ||
| 321 | } | ||
| 299 | if (files) { | 322 | if (files) { |
| 300 | /* | 323 | /* |
| 301 | * We are not taking a ref to the file structure, so we must | 324 | * We are not taking a ref to the file structure, so we must |
| @@ -327,29 +350,33 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) | |||
| 327 | return fs; | 350 | return fs; |
| 328 | } | 351 | } |
| 329 | 352 | ||
| 330 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 353 | static int get_nr_threads(struct task_struct *tsk) |
| 331 | { | 354 | { |
| 332 | struct fs_struct *fs = get_fs_struct(proc_task(inode)); | 355 | /* Must be called with the rcu_read_lock held */ |
| 333 | int result = -ENOENT; | 356 | unsigned long flags; |
| 334 | if (fs) { | 357 | int count = 0; |
| 335 | read_lock(&fs->lock); | 358 | |
| 336 | *mnt = mntget(fs->pwdmnt); | 359 | if (lock_task_sighand(tsk, &flags)) { |
| 337 | *dentry = dget(fs->pwd); | 360 | count = atomic_read(&tsk->signal->count); |
| 338 | read_unlock(&fs->lock); | 361 | unlock_task_sighand(tsk, &flags); |
| 339 | result = 0; | ||
| 340 | put_fs_struct(fs); | ||
| 341 | } | 362 | } |
| 342 | return result; | 363 | return count; |
| 343 | } | 364 | } |
| 344 | 365 | ||
| 345 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | 366 | static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) |
| 346 | { | 367 | { |
| 347 | struct fs_struct *fs = get_fs_struct(proc_task(inode)); | 368 | struct task_struct *task = get_proc_task(inode); |
| 369 | struct fs_struct *fs = NULL; | ||
| 348 | int result = -ENOENT; | 370 | int result = -ENOENT; |
| 371 | |||
| 372 | if (task) { | ||
| 373 | fs = get_fs_struct(task); | ||
| 374 | put_task_struct(task); | ||
| 375 | } | ||
| 349 | if (fs) { | 376 | if (fs) { |
| 350 | read_lock(&fs->lock); | 377 | read_lock(&fs->lock); |
| 351 | *mnt = mntget(fs->rootmnt); | 378 | *mnt = mntget(fs->pwdmnt); |
| 352 | *dentry = dget(fs->root); | 379 | *dentry = dget(fs->pwd); |
| 353 | read_unlock(&fs->lock); | 380 | read_unlock(&fs->lock); |
| 354 | result = 0; | 381 | result = 0; |
| 355 | put_fs_struct(fs); | 382 | put_fs_struct(fs); |
| @@ -357,42 +384,16 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf | |||
| 357 | return result; | 384 | return result; |
| 358 | } | 385 | } |
| 359 | 386 | ||
| 360 | 387 | static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) | |
| 361 | /* Same as proc_root_link, but this addionally tries to get fs from other | ||
| 362 | * threads in the group */ | ||
| 363 | static int proc_task_root_link(struct inode *inode, struct dentry **dentry, | ||
| 364 | struct vfsmount **mnt) | ||
| 365 | { | 388 | { |
| 366 | struct fs_struct *fs; | 389 | struct task_struct *task = get_proc_task(inode); |
| 390 | struct fs_struct *fs = NULL; | ||
| 367 | int result = -ENOENT; | 391 | int result = -ENOENT; |
| 368 | struct task_struct *leader = proc_task(inode); | ||
| 369 | 392 | ||
| 370 | task_lock(leader); | 393 | if (task) { |
| 371 | fs = leader->fs; | 394 | fs = get_fs_struct(task); |
| 372 | if (fs) { | 395 | put_task_struct(task); |
| 373 | atomic_inc(&fs->count); | ||
| 374 | task_unlock(leader); | ||
| 375 | } else { | ||
| 376 | /* Try to get fs from other threads */ | ||
| 377 | task_unlock(leader); | ||
| 378 | read_lock(&tasklist_lock); | ||
| 379 | if (pid_alive(leader)) { | ||
| 380 | struct task_struct *task = leader; | ||
| 381 | |||
| 382 | while ((task = next_thread(task)) != leader) { | ||
| 383 | task_lock(task); | ||
| 384 | fs = task->fs; | ||
| 385 | if (fs) { | ||
| 386 | atomic_inc(&fs->count); | ||
| 387 | task_unlock(task); | ||
| 388 | break; | ||
| 389 | } | ||
| 390 | task_unlock(task); | ||
| 391 | } | ||
| 392 | } | ||
| 393 | read_unlock(&tasklist_lock); | ||
| 394 | } | 396 | } |
| 395 | |||
| 396 | if (fs) { | 397 | if (fs) { |
| 397 | read_lock(&fs->lock); | 398 | read_lock(&fs->lock); |
| 398 | *mnt = mntget(fs->rootmnt); | 399 | *mnt = mntget(fs->rootmnt); |
| @@ -404,7 +405,6 @@ static int proc_task_root_link(struct inode *inode, struct dentry **dentry, | |||
| 404 | return result; | 405 | return result; |
| 405 | } | 406 | } |
| 406 | 407 | ||
| 407 | |||
| 408 | #define MAY_PTRACE(task) \ | 408 | #define MAY_PTRACE(task) \ |
| 409 | (task == current || \ | 409 | (task == current || \ |
| 410 | (task->parent == current && \ | 410 | (task->parent == current && \ |
| @@ -535,141 +535,42 @@ static int proc_oom_score(struct task_struct *task, char *buffer) | |||
| 535 | /************************************************************************/ | 535 | /************************************************************************/ |
| 536 | 536 | ||
| 537 | /* permission checks */ | 537 | /* permission checks */ |
| 538 | 538 | static int proc_fd_access_allowed(struct inode *inode) | |
| 539 | /* If the process being read is separated by chroot from the reading process, | ||
| 540 | * don't let the reader access the threads. | ||
| 541 | * | ||
| 542 | * note: this does dput(root) and mntput(vfsmnt) on exit. | ||
| 543 | */ | ||
| 544 | static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) | ||
| 545 | { | 539 | { |
| 546 | struct dentry *de, *base; | 540 | struct task_struct *task; |
| 547 | struct vfsmount *our_vfsmnt, *mnt; | 541 | int allowed = 0; |
| 548 | int res = 0; | 542 | /* Allow access to a task's file descriptors if it is us or we |
| 549 | 543 | * may use ptrace attach to the process and find out that | |
| 550 | read_lock(¤t->fs->lock); | 544 | * information. |
| 551 | our_vfsmnt = mntget(current->fs->rootmnt); | 545 | */ |
| 552 | base = dget(current->fs->root); | 546 | task = get_proc_task(inode); |
| 553 | read_unlock(¤t->fs->lock); | 547 | if (task) { |
| 554 | 548 | allowed = ptrace_may_attach(task); | |
| 555 | spin_lock(&vfsmount_lock); | 549 | put_task_struct(task); |
| 556 | de = root; | ||
| 557 | mnt = vfsmnt; | ||
| 558 | |||
| 559 | while (mnt != our_vfsmnt) { | ||
| 560 | if (mnt == mnt->mnt_parent) | ||
| 561 | goto out; | ||
| 562 | de = mnt->mnt_mountpoint; | ||
| 563 | mnt = mnt->mnt_parent; | ||
| 564 | } | ||
| 565 | |||
| 566 | if (!is_subdir(de, base)) | ||
| 567 | goto out; | ||
| 568 | spin_unlock(&vfsmount_lock); | ||
| 569 | |||
| 570 | exit: | ||
| 571 | dput(base); | ||
| 572 | mntput(our_vfsmnt); | ||
| 573 | dput(root); | ||
| 574 | mntput(vfsmnt); | ||
| 575 | return res; | ||
| 576 | out: | ||
| 577 | spin_unlock(&vfsmount_lock); | ||
| 578 | res = -EACCES; | ||
| 579 | goto exit; | ||
| 580 | } | ||
| 581 | |||
| 582 | static int proc_check_root(struct inode *inode) | ||
| 583 | { | ||
| 584 | struct dentry *root; | ||
| 585 | struct vfsmount *vfsmnt; | ||
| 586 | |||
| 587 | if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ | ||
| 588 | return -ENOENT; | ||
| 589 | return proc_check_chroot(root, vfsmnt); | ||
| 590 | } | ||
| 591 | |||
| 592 | static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
| 593 | { | ||
| 594 | if (generic_permission(inode, mask, NULL) != 0) | ||
| 595 | return -EACCES; | ||
| 596 | return proc_check_root(inode); | ||
| 597 | } | ||
| 598 | |||
| 599 | static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd) | ||
| 600 | { | ||
| 601 | struct dentry *root; | ||
| 602 | struct vfsmount *vfsmnt; | ||
| 603 | |||
| 604 | if (generic_permission(inode, mask, NULL) != 0) | ||
| 605 | return -EACCES; | ||
| 606 | |||
| 607 | if (proc_task_root_link(inode, &root, &vfsmnt)) | ||
| 608 | return -ENOENT; | ||
| 609 | |||
| 610 | return proc_check_chroot(root, vfsmnt); | ||
| 611 | } | ||
| 612 | |||
| 613 | extern struct seq_operations proc_pid_maps_op; | ||
| 614 | static int maps_open(struct inode *inode, struct file *file) | ||
| 615 | { | ||
| 616 | struct task_struct *task = proc_task(inode); | ||
| 617 | int ret = seq_open(file, &proc_pid_maps_op); | ||
| 618 | if (!ret) { | ||
| 619 | struct seq_file *m = file->private_data; | ||
| 620 | m->private = task; | ||
| 621 | } | 550 | } |
| 622 | return ret; | 551 | return allowed; |
| 623 | } | 552 | } |
| 624 | 553 | ||
| 625 | static struct file_operations proc_maps_operations = { | 554 | static int proc_setattr(struct dentry *dentry, struct iattr *attr) |
| 626 | .open = maps_open, | ||
| 627 | .read = seq_read, | ||
| 628 | .llseek = seq_lseek, | ||
| 629 | .release = seq_release, | ||
| 630 | }; | ||
| 631 | |||
| 632 | #ifdef CONFIG_NUMA | ||
| 633 | extern struct seq_operations proc_pid_numa_maps_op; | ||
| 634 | static int numa_maps_open(struct inode *inode, struct file *file) | ||
| 635 | { | 555 | { |
| 636 | struct task_struct *task = proc_task(inode); | 556 | int error; |
| 637 | int ret = seq_open(file, &proc_pid_numa_maps_op); | 557 | struct inode *inode = dentry->d_inode; |
| 638 | if (!ret) { | ||
| 639 | struct seq_file *m = file->private_data; | ||
| 640 | m->private = task; | ||
| 641 | } | ||
| 642 | return ret; | ||
| 643 | } | ||
| 644 | 558 | ||
| 645 | static struct file_operations proc_numa_maps_operations = { | 559 | if (attr->ia_valid & ATTR_MODE) |
| 646 | .open = numa_maps_open, | 560 | return -EPERM; |
| 647 | .read = seq_read, | ||
| 648 | .llseek = seq_lseek, | ||
| 649 | .release = seq_release, | ||
| 650 | }; | ||
| 651 | #endif | ||
| 652 | 561 | ||
| 653 | #ifdef CONFIG_MMU | 562 | error = inode_change_ok(inode, attr); |
| 654 | extern struct seq_operations proc_pid_smaps_op; | 563 | if (!error) { |
| 655 | static int smaps_open(struct inode *inode, struct file *file) | 564 | error = security_inode_setattr(dentry, attr); |
| 656 | { | 565 | if (!error) |
| 657 | struct task_struct *task = proc_task(inode); | 566 | error = inode_setattr(inode, attr); |
| 658 | int ret = seq_open(file, &proc_pid_smaps_op); | ||
| 659 | if (!ret) { | ||
| 660 | struct seq_file *m = file->private_data; | ||
| 661 | m->private = task; | ||
| 662 | } | 567 | } |
| 663 | return ret; | 568 | return error; |
| 664 | } | 569 | } |
| 665 | 570 | ||
| 666 | static struct file_operations proc_smaps_operations = { | 571 | static struct inode_operations proc_def_inode_operations = { |
| 667 | .open = smaps_open, | 572 | .setattr = proc_setattr, |
| 668 | .read = seq_read, | ||
| 669 | .llseek = seq_lseek, | ||
| 670 | .release = seq_release, | ||
| 671 | }; | 573 | }; |
| 672 | #endif | ||
| 673 | 574 | ||
| 674 | extern struct seq_operations mounts_op; | 575 | extern struct seq_operations mounts_op; |
| 675 | struct proc_mounts { | 576 | struct proc_mounts { |
| @@ -679,16 +580,19 @@ struct proc_mounts { | |||
| 679 | 580 | ||
| 680 | static int mounts_open(struct inode *inode, struct file *file) | 581 | static int mounts_open(struct inode *inode, struct file *file) |
| 681 | { | 582 | { |
| 682 | struct task_struct *task = proc_task(inode); | 583 | struct task_struct *task = get_proc_task(inode); |
| 683 | struct namespace *namespace; | 584 | struct namespace *namespace = NULL; |
| 684 | struct proc_mounts *p; | 585 | struct proc_mounts *p; |
| 685 | int ret = -EINVAL; | 586 | int ret = -EINVAL; |
| 686 | 587 | ||
| 687 | task_lock(task); | 588 | if (task) { |
| 688 | namespace = task->namespace; | 589 | task_lock(task); |
| 689 | if (namespace) | 590 | namespace = task->namespace; |
| 690 | get_namespace(namespace); | 591 | if (namespace) |
| 691 | task_unlock(task); | 592 | get_namespace(namespace); |
| 593 | task_unlock(task); | ||
| 594 | put_task_struct(task); | ||
| 595 | } | ||
| 692 | 596 | ||
| 693 | if (namespace) { | 597 | if (namespace) { |
| 694 | ret = -ENOMEM; | 598 | ret = -ENOMEM; |
| @@ -745,17 +649,21 @@ static struct file_operations proc_mounts_operations = { | |||
| 745 | extern struct seq_operations mountstats_op; | 649 | extern struct seq_operations mountstats_op; |
| 746 | static int mountstats_open(struct inode *inode, struct file *file) | 650 | static int mountstats_open(struct inode *inode, struct file *file) |
| 747 | { | 651 | { |
| 748 | struct task_struct *task = proc_task(inode); | ||
| 749 | int ret = seq_open(file, &mountstats_op); | 652 | int ret = seq_open(file, &mountstats_op); |
| 750 | 653 | ||
| 751 | if (!ret) { | 654 | if (!ret) { |
| 752 | struct seq_file *m = file->private_data; | 655 | struct seq_file *m = file->private_data; |
| 753 | struct namespace *namespace; | 656 | struct namespace *namespace = NULL; |
| 754 | task_lock(task); | 657 | struct task_struct *task = get_proc_task(inode); |
| 755 | namespace = task->namespace; | 658 | |
| 756 | if (namespace) | 659 | if (task) { |
| 757 | get_namespace(namespace); | 660 | task_lock(task); |
| 758 | task_unlock(task); | 661 | namespace = task->namespace; |
| 662 | if (namespace) | ||
| 663 | get_namespace(namespace); | ||
| 664 | task_unlock(task); | ||
| 665 | put_task_struct(task); | ||
| 666 | } | ||
| 759 | 667 | ||
| 760 | if (namespace) | 668 | if (namespace) |
| 761 | m->private = namespace; | 669 | m->private = namespace; |
| @@ -782,18 +690,27 @@ static ssize_t proc_info_read(struct file * file, char __user * buf, | |||
| 782 | struct inode * inode = file->f_dentry->d_inode; | 690 | struct inode * inode = file->f_dentry->d_inode; |
| 783 | unsigned long page; | 691 | unsigned long page; |
| 784 | ssize_t length; | 692 | ssize_t length; |
| 785 | struct task_struct *task = proc_task(inode); | 693 | struct task_struct *task = get_proc_task(inode); |
| 694 | |||
| 695 | length = -ESRCH; | ||
| 696 | if (!task) | ||
| 697 | goto out_no_task; | ||
| 786 | 698 | ||
| 787 | if (count > PROC_BLOCK_SIZE) | 699 | if (count > PROC_BLOCK_SIZE) |
| 788 | count = PROC_BLOCK_SIZE; | 700 | count = PROC_BLOCK_SIZE; |
| 701 | |||
| 702 | length = -ENOMEM; | ||
| 789 | if (!(page = __get_free_page(GFP_KERNEL))) | 703 | if (!(page = __get_free_page(GFP_KERNEL))) |
| 790 | return -ENOMEM; | 704 | goto out; |
| 791 | 705 | ||
| 792 | length = PROC_I(inode)->op.proc_read(task, (char*)page); | 706 | length = PROC_I(inode)->op.proc_read(task, (char*)page); |
| 793 | 707 | ||
| 794 | if (length >= 0) | 708 | if (length >= 0) |
| 795 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | 709 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); |
| 796 | free_page(page); | 710 | free_page(page); |
| 711 | out: | ||
| 712 | put_task_struct(task); | ||
| 713 | out_no_task: | ||
| 797 | return length; | 714 | return length; |
| 798 | } | 715 | } |
| 799 | 716 | ||
| @@ -810,12 +727,15 @@ static int mem_open(struct inode* inode, struct file* file) | |||
| 810 | static ssize_t mem_read(struct file * file, char __user * buf, | 727 | static ssize_t mem_read(struct file * file, char __user * buf, |
| 811 | size_t count, loff_t *ppos) | 728 | size_t count, loff_t *ppos) |
| 812 | { | 729 | { |
| 813 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 730 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
| 814 | char *page; | 731 | char *page; |
| 815 | unsigned long src = *ppos; | 732 | unsigned long src = *ppos; |
| 816 | int ret = -ESRCH; | 733 | int ret = -ESRCH; |
| 817 | struct mm_struct *mm; | 734 | struct mm_struct *mm; |
| 818 | 735 | ||
| 736 | if (!task) | ||
| 737 | goto out_no_task; | ||
| 738 | |||
| 819 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) | 739 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) |
| 820 | goto out; | 740 | goto out; |
| 821 | 741 | ||
| @@ -865,6 +785,8 @@ out_put: | |||
| 865 | out_free: | 785 | out_free: |
| 866 | free_page((unsigned long) page); | 786 | free_page((unsigned long) page); |
| 867 | out: | 787 | out: |
| 788 | put_task_struct(task); | ||
| 789 | out_no_task: | ||
| 868 | return ret; | 790 | return ret; |
| 869 | } | 791 | } |
| 870 | 792 | ||
| @@ -875,18 +797,24 @@ out: | |||
| 875 | static ssize_t mem_write(struct file * file, const char * buf, | 797 | static ssize_t mem_write(struct file * file, const char * buf, |
| 876 | size_t count, loff_t *ppos) | 798 | size_t count, loff_t *ppos) |
| 877 | { | 799 | { |
| 878 | int copied = 0; | 800 | int copied; |
| 879 | char *page; | 801 | char *page; |
| 880 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 802 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
| 881 | unsigned long dst = *ppos; | 803 | unsigned long dst = *ppos; |
| 882 | 804 | ||
| 805 | copied = -ESRCH; | ||
| 806 | if (!task) | ||
| 807 | goto out_no_task; | ||
| 808 | |||
| 883 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) | 809 | if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) |
| 884 | return -ESRCH; | 810 | goto out; |
| 885 | 811 | ||
| 812 | copied = -ENOMEM; | ||
| 886 | page = (char *)__get_free_page(GFP_USER); | 813 | page = (char *)__get_free_page(GFP_USER); |
| 887 | if (!page) | 814 | if (!page) |
| 888 | return -ENOMEM; | 815 | goto out; |
| 889 | 816 | ||
| 817 | copied = 0; | ||
| 890 | while (count > 0) { | 818 | while (count > 0) { |
| 891 | int this_len, retval; | 819 | int this_len, retval; |
| 892 | 820 | ||
| @@ -908,6 +836,9 @@ static ssize_t mem_write(struct file * file, const char * buf, | |||
| 908 | } | 836 | } |
| 909 | *ppos = dst; | 837 | *ppos = dst; |
| 910 | free_page((unsigned long) page); | 838 | free_page((unsigned long) page); |
| 839 | out: | ||
| 840 | put_task_struct(task); | ||
| 841 | out_no_task: | ||
| 911 | return copied; | 842 | return copied; |
| 912 | } | 843 | } |
| 913 | #endif | 844 | #endif |
| @@ -938,13 +869,18 @@ static struct file_operations proc_mem_operations = { | |||
| 938 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, | 869 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, |
| 939 | size_t count, loff_t *ppos) | 870 | size_t count, loff_t *ppos) |
| 940 | { | 871 | { |
| 941 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 872 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); |
| 942 | char buffer[8]; | 873 | char buffer[PROC_NUMBUF]; |
| 943 | size_t len; | 874 | size_t len; |
| 944 | int oom_adjust = task->oomkilladj; | 875 | int oom_adjust; |
| 945 | loff_t __ppos = *ppos; | 876 | loff_t __ppos = *ppos; |
| 946 | 877 | ||
| 947 | len = sprintf(buffer, "%i\n", oom_adjust); | 878 | if (!task) |
| 879 | return -ESRCH; | ||
| 880 | oom_adjust = task->oomkilladj; | ||
| 881 | put_task_struct(task); | ||
| 882 | |||
| 883 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | ||
| 948 | if (__ppos >= len) | 884 | if (__ppos >= len) |
| 949 | return 0; | 885 | return 0; |
| 950 | if (count > len-__ppos) | 886 | if (count > len-__ppos) |
| @@ -958,15 +894,15 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, | |||
| 958 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | 894 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, |
| 959 | size_t count, loff_t *ppos) | 895 | size_t count, loff_t *ppos) |
| 960 | { | 896 | { |
| 961 | struct task_struct *task = proc_task(file->f_dentry->d_inode); | 897 | struct task_struct *task; |
| 962 | char buffer[8], *end; | 898 | char buffer[PROC_NUMBUF], *end; |
| 963 | int oom_adjust; | 899 | int oom_adjust; |
| 964 | 900 | ||
| 965 | if (!capable(CAP_SYS_RESOURCE)) | 901 | if (!capable(CAP_SYS_RESOURCE)) |
| 966 | return -EPERM; | 902 | return -EPERM; |
| 967 | memset(buffer, 0, 8); | 903 | memset(buffer, 0, sizeof(buffer)); |
| 968 | if (count > 6) | 904 | if (count > sizeof(buffer) - 1) |
| 969 | count = 6; | 905 | count = sizeof(buffer) - 1; |
| 970 | if (copy_from_user(buffer, buf, count)) | 906 | if (copy_from_user(buffer, buf, count)) |
| 971 | return -EFAULT; | 907 | return -EFAULT; |
| 972 | oom_adjust = simple_strtol(buffer, &end, 0); | 908 | oom_adjust = simple_strtol(buffer, &end, 0); |
| @@ -974,7 +910,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
| 974 | return -EINVAL; | 910 | return -EINVAL; |
| 975 | if (*end == '\n') | 911 | if (*end == '\n') |
| 976 | end++; | 912 | end++; |
| 913 | task = get_proc_task(file->f_dentry->d_inode); | ||
| 914 | if (!task) | ||
| 915 | return -ESRCH; | ||
| 977 | task->oomkilladj = oom_adjust; | 916 | task->oomkilladj = oom_adjust; |
| 917 | put_task_struct(task); | ||
| 978 | if (end - buffer == 0) | 918 | if (end - buffer == 0) |
| 979 | return -EIO; | 919 | return -EIO; |
| 980 | return end - buffer; | 920 | return end - buffer; |
| @@ -985,22 +925,21 @@ static struct file_operations proc_oom_adjust_operations = { | |||
| 985 | .write = oom_adjust_write, | 925 | .write = oom_adjust_write, |
| 986 | }; | 926 | }; |
| 987 | 927 | ||
| 988 | static struct inode_operations proc_mem_inode_operations = { | ||
| 989 | .permission = proc_permission, | ||
| 990 | }; | ||
| 991 | |||
| 992 | #ifdef CONFIG_AUDITSYSCALL | 928 | #ifdef CONFIG_AUDITSYSCALL |
| 993 | #define TMPBUFLEN 21 | 929 | #define TMPBUFLEN 21 |
| 994 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | 930 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, |
| 995 | size_t count, loff_t *ppos) | 931 | size_t count, loff_t *ppos) |
| 996 | { | 932 | { |
| 997 | struct inode * inode = file->f_dentry->d_inode; | 933 | struct inode * inode = file->f_dentry->d_inode; |
| 998 | struct task_struct *task = proc_task(inode); | 934 | struct task_struct *task = get_proc_task(inode); |
| 999 | ssize_t length; | 935 | ssize_t length; |
| 1000 | char tmpbuf[TMPBUFLEN]; | 936 | char tmpbuf[TMPBUFLEN]; |
| 1001 | 937 | ||
| 938 | if (!task) | ||
| 939 | return -ESRCH; | ||
| 1002 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", | 940 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", |
| 1003 | audit_get_loginuid(task->audit_context)); | 941 | audit_get_loginuid(task->audit_context)); |
| 942 | put_task_struct(task); | ||
| 1004 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); | 943 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); |
| 1005 | } | 944 | } |
| 1006 | 945 | ||
| @@ -1010,13 +949,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
| 1010 | struct inode * inode = file->f_dentry->d_inode; | 949 | struct inode * inode = file->f_dentry->d_inode; |
| 1011 | char *page, *tmp; | 950 | char *page, *tmp; |
| 1012 | ssize_t length; | 951 | ssize_t length; |
| 1013 | struct task_struct *task = proc_task(inode); | ||
| 1014 | uid_t loginuid; | 952 | uid_t loginuid; |
| 1015 | 953 | ||
| 1016 | if (!capable(CAP_AUDIT_CONTROL)) | 954 | if (!capable(CAP_AUDIT_CONTROL)) |
| 1017 | return -EPERM; | 955 | return -EPERM; |
| 1018 | 956 | ||
| 1019 | if (current != task) | 957 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) |
| 1020 | return -EPERM; | 958 | return -EPERM; |
| 1021 | 959 | ||
| 1022 | if (count >= PAGE_SIZE) | 960 | if (count >= PAGE_SIZE) |
| @@ -1040,7 +978,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
| 1040 | goto out_free_page; | 978 | goto out_free_page; |
| 1041 | 979 | ||
| 1042 | } | 980 | } |
| 1043 | length = audit_set_loginuid(task, loginuid); | 981 | length = audit_set_loginuid(current, loginuid); |
| 1044 | if (likely(length == 0)) | 982 | if (likely(length == 0)) |
| 1045 | length = count; | 983 | length = count; |
| 1046 | 984 | ||
| @@ -1059,13 +997,16 @@ static struct file_operations proc_loginuid_operations = { | |||
| 1059 | static ssize_t seccomp_read(struct file *file, char __user *buf, | 997 | static ssize_t seccomp_read(struct file *file, char __user *buf, |
| 1060 | size_t count, loff_t *ppos) | 998 | size_t count, loff_t *ppos) |
| 1061 | { | 999 | { |
| 1062 | struct task_struct *tsk = proc_task(file->f_dentry->d_inode); | 1000 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); |
| 1063 | char __buf[20]; | 1001 | char __buf[20]; |
| 1064 | loff_t __ppos = *ppos; | 1002 | loff_t __ppos = *ppos; |
| 1065 | size_t len; | 1003 | size_t len; |
| 1066 | 1004 | ||
| 1005 | if (!tsk) | ||
| 1006 | return -ESRCH; | ||
| 1067 | /* no need to print the trailing zero, so use only len */ | 1007 | /* no need to print the trailing zero, so use only len */ |
| 1068 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); | 1008 | len = sprintf(__buf, "%u\n", tsk->seccomp.mode); |
| 1009 | put_task_struct(tsk); | ||
| 1069 | if (__ppos >= len) | 1010 | if (__ppos >= len) |
| 1070 | return 0; | 1011 | return 0; |
| 1071 | if (count > len - __ppos) | 1012 | if (count > len - __ppos) |
| @@ -1079,29 +1020,43 @@ static ssize_t seccomp_read(struct file *file, char __user *buf, | |||
| 1079 | static ssize_t seccomp_write(struct file *file, const char __user *buf, | 1020 | static ssize_t seccomp_write(struct file *file, const char __user *buf, |
| 1080 | size_t count, loff_t *ppos) | 1021 | size_t count, loff_t *ppos) |
| 1081 | { | 1022 | { |
| 1082 | struct task_struct *tsk = proc_task(file->f_dentry->d_inode); | 1023 | struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); |
| 1083 | char __buf[20], *end; | 1024 | char __buf[20], *end; |
| 1084 | unsigned int seccomp_mode; | 1025 | unsigned int seccomp_mode; |
| 1026 | ssize_t result; | ||
| 1027 | |||
| 1028 | result = -ESRCH; | ||
| 1029 | if (!tsk) | ||
| 1030 | goto out_no_task; | ||
| 1085 | 1031 | ||
| 1086 | /* can set it only once to be even more secure */ | 1032 | /* can set it only once to be even more secure */ |
| 1033 | result = -EPERM; | ||
| 1087 | if (unlikely(tsk->seccomp.mode)) | 1034 | if (unlikely(tsk->seccomp.mode)) |
| 1088 | return -EPERM; | 1035 | goto out; |
| 1089 | 1036 | ||
| 1037 | result = -EFAULT; | ||
| 1090 | memset(__buf, 0, sizeof(__buf)); | 1038 | memset(__buf, 0, sizeof(__buf)); |
| 1091 | count = min(count, sizeof(__buf) - 1); | 1039 | count = min(count, sizeof(__buf) - 1); |
| 1092 | if (copy_from_user(__buf, buf, count)) | 1040 | if (copy_from_user(__buf, buf, count)) |
| 1093 | return -EFAULT; | 1041 | goto out; |
| 1042 | |||
| 1094 | seccomp_mode = simple_strtoul(__buf, &end, 0); | 1043 | seccomp_mode = simple_strtoul(__buf, &end, 0); |
| 1095 | if (*end == '\n') | 1044 | if (*end == '\n') |
| 1096 | end++; | 1045 | end++; |
| 1046 | result = -EINVAL; | ||
| 1097 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | 1047 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { |
| 1098 | tsk->seccomp.mode = seccomp_mode; | 1048 | tsk->seccomp.mode = seccomp_mode; |
| 1099 | set_tsk_thread_flag(tsk, TIF_SECCOMP); | 1049 | set_tsk_thread_flag(tsk, TIF_SECCOMP); |
| 1100 | } else | 1050 | } else |
| 1101 | return -EINVAL; | 1051 | goto out; |
| 1052 | result = -EIO; | ||
| 1102 | if (unlikely(!(end - __buf))) | 1053 | if (unlikely(!(end - __buf))) |
| 1103 | return -EIO; | 1054 | goto out; |
| 1104 | return end - __buf; | 1055 | result = end - __buf; |
| 1056 | out: | ||
| 1057 | put_task_struct(tsk); | ||
| 1058 | out_no_task: | ||
| 1059 | return result; | ||
| 1105 | } | 1060 | } |
| 1106 | 1061 | ||
| 1107 | static struct file_operations proc_seccomp_operations = { | 1062 | static struct file_operations proc_seccomp_operations = { |
| @@ -1118,10 +1073,8 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
| 1118 | /* We don't need a base pointer in the /proc filesystem */ | 1073 | /* We don't need a base pointer in the /proc filesystem */ |
| 1119 | path_release(nd); | 1074 | path_release(nd); |
| 1120 | 1075 | ||
| 1121 | if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) | 1076 | /* Are we allowed to snoop on the tasks file descriptors? */ |
| 1122 | goto out; | 1077 | if (!proc_fd_access_allowed(inode)) |
| 1123 | error = proc_check_root(inode); | ||
| 1124 | if (error) | ||
| 1125 | goto out; | 1078 | goto out; |
| 1126 | 1079 | ||
| 1127 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); | 1080 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); |
| @@ -1163,12 +1116,8 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b | |||
| 1163 | struct dentry *de; | 1116 | struct dentry *de; |
| 1164 | struct vfsmount *mnt = NULL; | 1117 | struct vfsmount *mnt = NULL; |
| 1165 | 1118 | ||
| 1166 | lock_kernel(); | 1119 | /* Are we allowed to snoop on the tasks file descriptors? */ |
| 1167 | 1120 | if (!proc_fd_access_allowed(inode)) | |
| 1168 | if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) | ||
| 1169 | goto out; | ||
| 1170 | error = proc_check_root(inode); | ||
| 1171 | if (error) | ||
| 1172 | goto out; | 1121 | goto out; |
| 1173 | 1122 | ||
| 1174 | error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); | 1123 | error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); |
| @@ -1179,30 +1128,29 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b | |||
| 1179 | dput(de); | 1128 | dput(de); |
| 1180 | mntput(mnt); | 1129 | mntput(mnt); |
| 1181 | out: | 1130 | out: |
| 1182 | unlock_kernel(); | ||
| 1183 | return error; | 1131 | return error; |
| 1184 | } | 1132 | } |
| 1185 | 1133 | ||
| 1186 | static struct inode_operations proc_pid_link_inode_operations = { | 1134 | static struct inode_operations proc_pid_link_inode_operations = { |
| 1187 | .readlink = proc_pid_readlink, | 1135 | .readlink = proc_pid_readlink, |
| 1188 | .follow_link = proc_pid_follow_link | 1136 | .follow_link = proc_pid_follow_link, |
| 1137 | .setattr = proc_setattr, | ||
| 1189 | }; | 1138 | }; |
| 1190 | 1139 | ||
| 1191 | #define NUMBUF 10 | ||
| 1192 | |||
| 1193 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | 1140 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) |
| 1194 | { | 1141 | { |
| 1195 | struct inode *inode = filp->f_dentry->d_inode; | 1142 | struct dentry *dentry = filp->f_dentry; |
| 1196 | struct task_struct *p = proc_task(inode); | 1143 | struct inode *inode = dentry->d_inode; |
| 1144 | struct task_struct *p = get_proc_task(inode); | ||
| 1197 | unsigned int fd, tid, ino; | 1145 | unsigned int fd, tid, ino; |
| 1198 | int retval; | 1146 | int retval; |
| 1199 | char buf[NUMBUF]; | 1147 | char buf[PROC_NUMBUF]; |
| 1200 | struct files_struct * files; | 1148 | struct files_struct * files; |
| 1201 | struct fdtable *fdt; | 1149 | struct fdtable *fdt; |
| 1202 | 1150 | ||
| 1203 | retval = -ENOENT; | 1151 | retval = -ENOENT; |
| 1204 | if (!pid_alive(p)) | 1152 | if (!p) |
| 1205 | goto out; | 1153 | goto out_no_task; |
| 1206 | retval = 0; | 1154 | retval = 0; |
| 1207 | tid = p->pid; | 1155 | tid = p->pid; |
| 1208 | 1156 | ||
| @@ -1213,7 +1161,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
| 1213 | goto out; | 1161 | goto out; |
| 1214 | filp->f_pos++; | 1162 | filp->f_pos++; |
| 1215 | case 1: | 1163 | case 1: |
| 1216 | ino = fake_ino(tid, PROC_TID_INO); | 1164 | ino = parent_ino(dentry); |
| 1217 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | 1165 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) |
| 1218 | goto out; | 1166 | goto out; |
| 1219 | filp->f_pos++; | 1167 | filp->f_pos++; |
| @@ -1232,7 +1180,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
| 1232 | continue; | 1180 | continue; |
| 1233 | rcu_read_unlock(); | 1181 | rcu_read_unlock(); |
| 1234 | 1182 | ||
| 1235 | j = NUMBUF; | 1183 | j = PROC_NUMBUF; |
| 1236 | i = fd; | 1184 | i = fd; |
| 1237 | do { | 1185 | do { |
| 1238 | j--; | 1186 | j--; |
| @@ -1241,7 +1189,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
| 1241 | } while (i); | 1189 | } while (i); |
| 1242 | 1190 | ||
| 1243 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); | 1191 | ino = fake_ino(tid, PROC_TID_FD_DIR + fd); |
| 1244 | if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { | 1192 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) { |
| 1245 | rcu_read_lock(); | 1193 | rcu_read_lock(); |
| 1246 | break; | 1194 | break; |
| 1247 | } | 1195 | } |
| @@ -1251,6 +1199,8 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | |||
| 1251 | put_files_struct(files); | 1199 | put_files_struct(files); |
| 1252 | } | 1200 | } |
| 1253 | out: | 1201 | out: |
| 1202 | put_task_struct(p); | ||
| 1203 | out_no_task: | ||
| 1254 | return retval; | 1204 | return retval; |
| 1255 | } | 1205 | } |
| 1256 | 1206 | ||
| @@ -1262,16 +1212,18 @@ static int proc_pident_readdir(struct file *filp, | |||
| 1262 | int pid; | 1212 | int pid; |
| 1263 | struct dentry *dentry = filp->f_dentry; | 1213 | struct dentry *dentry = filp->f_dentry; |
| 1264 | struct inode *inode = dentry->d_inode; | 1214 | struct inode *inode = dentry->d_inode; |
| 1215 | struct task_struct *task = get_proc_task(inode); | ||
| 1265 | struct pid_entry *p; | 1216 | struct pid_entry *p; |
| 1266 | ino_t ino; | 1217 | ino_t ino; |
| 1267 | int ret; | 1218 | int ret; |
| 1268 | 1219 | ||
| 1269 | ret = -ENOENT; | 1220 | ret = -ENOENT; |
| 1270 | if (!pid_alive(proc_task(inode))) | 1221 | if (!task) |
| 1271 | goto out; | 1222 | goto out; |
| 1272 | 1223 | ||
| 1273 | ret = 0; | 1224 | ret = 0; |
| 1274 | pid = proc_task(inode)->pid; | 1225 | pid = task->pid; |
| 1226 | put_task_struct(task); | ||
| 1275 | i = filp->f_pos; | 1227 | i = filp->f_pos; |
| 1276 | switch (i) { | 1228 | switch (i) { |
| 1277 | case 0: | 1229 | case 0: |
| @@ -1354,22 +1306,20 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st | |||
| 1354 | 1306 | ||
| 1355 | /* Common stuff */ | 1307 | /* Common stuff */ |
| 1356 | ei = PROC_I(inode); | 1308 | ei = PROC_I(inode); |
| 1357 | ei->task = NULL; | ||
| 1358 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 1309 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
| 1359 | inode->i_ino = fake_ino(task->pid, ino); | 1310 | inode->i_ino = fake_ino(task->pid, ino); |
| 1360 | 1311 | inode->i_op = &proc_def_inode_operations; | |
| 1361 | if (!pid_alive(task)) | ||
| 1362 | goto out_unlock; | ||
| 1363 | 1312 | ||
| 1364 | /* | 1313 | /* |
| 1365 | * grab the reference to task. | 1314 | * grab the reference to task. |
| 1366 | */ | 1315 | */ |
| 1367 | get_task_struct(task); | 1316 | ei->pid = get_pid(task->pids[PIDTYPE_PID].pid); |
| 1368 | ei->task = task; | 1317 | if (!ei->pid) |
| 1369 | ei->type = ino; | 1318 | goto out_unlock; |
| 1319 | |||
| 1370 | inode->i_uid = 0; | 1320 | inode->i_uid = 0; |
| 1371 | inode->i_gid = 0; | 1321 | inode->i_gid = 0; |
| 1372 | if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { | 1322 | if (task_dumpable(task)) { |
| 1373 | inode->i_uid = task->euid; | 1323 | inode->i_uid = task->euid; |
| 1374 | inode->i_gid = task->egid; | 1324 | inode->i_gid = task->egid; |
| 1375 | } | 1325 | } |
| @@ -1379,7 +1329,6 @@ out: | |||
| 1379 | return inode; | 1329 | return inode; |
| 1380 | 1330 | ||
| 1381 | out_unlock: | 1331 | out_unlock: |
| 1382 | ei->pde = NULL; | ||
| 1383 | iput(inode); | 1332 | iput(inode); |
| 1384 | return NULL; | 1333 | return NULL; |
| 1385 | } | 1334 | } |
| @@ -1393,73 +1342,99 @@ out_unlock: | |||
| 1393 | * | 1342 | * |
| 1394 | * Rewrite the inode's ownerships here because the owning task may have | 1343 | * Rewrite the inode's ownerships here because the owning task may have |
| 1395 | * performed a setuid(), etc. | 1344 | * performed a setuid(), etc. |
| 1345 | * | ||
| 1346 | * Before the /proc/pid/status file was created the only way to read | ||
| 1347 | * the effective uid of a /process was to stat /proc/pid. Reading | ||
| 1348 | * /proc/pid/status is slow enough that procps and other packages | ||
| 1349 | * kept stating /proc/pid. To keep the rules in /proc simple I have | ||
| 1350 | * made this apply to all per process world readable and executable | ||
| 1351 | * directories. | ||
| 1396 | */ | 1352 | */ |
| 1397 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | 1353 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) |
| 1398 | { | 1354 | { |
| 1399 | struct inode *inode = dentry->d_inode; | 1355 | struct inode *inode = dentry->d_inode; |
| 1400 | struct task_struct *task = proc_task(inode); | 1356 | struct task_struct *task = get_proc_task(inode); |
| 1401 | if (pid_alive(task)) { | 1357 | if (task) { |
| 1402 | if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { | 1358 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || |
| 1359 | task_dumpable(task)) { | ||
| 1403 | inode->i_uid = task->euid; | 1360 | inode->i_uid = task->euid; |
| 1404 | inode->i_gid = task->egid; | 1361 | inode->i_gid = task->egid; |
| 1405 | } else { | 1362 | } else { |
| 1406 | inode->i_uid = 0; | 1363 | inode->i_uid = 0; |
| 1407 | inode->i_gid = 0; | 1364 | inode->i_gid = 0; |
| 1408 | } | 1365 | } |
| 1366 | inode->i_mode &= ~(S_ISUID | S_ISGID); | ||
| 1409 | security_task_to_inode(task, inode); | 1367 | security_task_to_inode(task, inode); |
| 1368 | put_task_struct(task); | ||
| 1410 | return 1; | 1369 | return 1; |
| 1411 | } | 1370 | } |
| 1412 | d_drop(dentry); | 1371 | d_drop(dentry); |
| 1413 | return 0; | 1372 | return 0; |
| 1414 | } | 1373 | } |
| 1415 | 1374 | ||
| 1375 | static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | ||
| 1376 | { | ||
| 1377 | struct inode *inode = dentry->d_inode; | ||
| 1378 | struct task_struct *task; | ||
| 1379 | generic_fillattr(inode, stat); | ||
| 1380 | |||
| 1381 | rcu_read_lock(); | ||
| 1382 | stat->uid = 0; | ||
| 1383 | stat->gid = 0; | ||
| 1384 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
| 1385 | if (task) { | ||
| 1386 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | ||
| 1387 | task_dumpable(task)) { | ||
| 1388 | stat->uid = task->euid; | ||
| 1389 | stat->gid = task->egid; | ||
| 1390 | } | ||
| 1391 | } | ||
| 1392 | rcu_read_unlock(); | ||
| 1393 | return 0; | ||
| 1394 | } | ||
| 1395 | |||
| 1416 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | 1396 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) |
| 1417 | { | 1397 | { |
| 1418 | struct inode *inode = dentry->d_inode; | 1398 | struct inode *inode = dentry->d_inode; |
| 1419 | struct task_struct *task = proc_task(inode); | 1399 | struct task_struct *task = get_proc_task(inode); |
| 1420 | int fd = proc_type(inode) - PROC_TID_FD_DIR; | 1400 | int fd = proc_fd(inode); |
| 1421 | struct files_struct *files; | 1401 | struct files_struct *files; |
| 1422 | 1402 | ||
| 1423 | files = get_files_struct(task); | 1403 | if (task) { |
| 1424 | if (files) { | 1404 | files = get_files_struct(task); |
| 1425 | rcu_read_lock(); | 1405 | if (files) { |
| 1426 | if (fcheck_files(files, fd)) { | 1406 | rcu_read_lock(); |
| 1407 | if (fcheck_files(files, fd)) { | ||
| 1408 | rcu_read_unlock(); | ||
| 1409 | put_files_struct(files); | ||
| 1410 | if (task_dumpable(task)) { | ||
| 1411 | inode->i_uid = task->euid; | ||
| 1412 | inode->i_gid = task->egid; | ||
| 1413 | } else { | ||
| 1414 | inode->i_uid = 0; | ||
| 1415 | inode->i_gid = 0; | ||
| 1416 | } | ||
| 1417 | inode->i_mode &= ~(S_ISUID | S_ISGID); | ||
| 1418 | security_task_to_inode(task, inode); | ||
| 1419 | put_task_struct(task); | ||
| 1420 | return 1; | ||
| 1421 | } | ||
| 1427 | rcu_read_unlock(); | 1422 | rcu_read_unlock(); |
| 1428 | put_files_struct(files); | 1423 | put_files_struct(files); |
| 1429 | if (task_dumpable(task)) { | ||
| 1430 | inode->i_uid = task->euid; | ||
| 1431 | inode->i_gid = task->egid; | ||
| 1432 | } else { | ||
| 1433 | inode->i_uid = 0; | ||
| 1434 | inode->i_gid = 0; | ||
| 1435 | } | ||
| 1436 | security_task_to_inode(task, inode); | ||
| 1437 | return 1; | ||
| 1438 | } | 1424 | } |
| 1439 | rcu_read_unlock(); | 1425 | put_task_struct(task); |
| 1440 | put_files_struct(files); | ||
| 1441 | } | 1426 | } |
| 1442 | d_drop(dentry); | 1427 | d_drop(dentry); |
| 1443 | return 0; | 1428 | return 0; |
| 1444 | } | 1429 | } |
| 1445 | 1430 | ||
| 1446 | static void pid_base_iput(struct dentry *dentry, struct inode *inode) | ||
| 1447 | { | ||
| 1448 | struct task_struct *task = proc_task(inode); | ||
| 1449 | spin_lock(&task->proc_lock); | ||
| 1450 | if (task->proc_dentry == dentry) | ||
| 1451 | task->proc_dentry = NULL; | ||
| 1452 | spin_unlock(&task->proc_lock); | ||
| 1453 | iput(inode); | ||
| 1454 | } | ||
| 1455 | |||
| 1456 | static int pid_delete_dentry(struct dentry * dentry) | 1431 | static int pid_delete_dentry(struct dentry * dentry) |
| 1457 | { | 1432 | { |
| 1458 | /* Is the task we represent dead? | 1433 | /* Is the task we represent dead? |
| 1459 | * If so, then don't put the dentry on the lru list, | 1434 | * If so, then don't put the dentry on the lru list, |
| 1460 | * kill it immediately. | 1435 | * kill it immediately. |
| 1461 | */ | 1436 | */ |
| 1462 | return !pid_alive(proc_task(dentry->d_inode)); | 1437 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; |
| 1463 | } | 1438 | } |
| 1464 | 1439 | ||
| 1465 | static struct dentry_operations tid_fd_dentry_operations = | 1440 | static struct dentry_operations tid_fd_dentry_operations = |
| @@ -1474,13 +1449,6 @@ static struct dentry_operations pid_dentry_operations = | |||
| 1474 | .d_delete = pid_delete_dentry, | 1449 | .d_delete = pid_delete_dentry, |
| 1475 | }; | 1450 | }; |
| 1476 | 1451 | ||
| 1477 | static struct dentry_operations pid_base_dentry_operations = | ||
| 1478 | { | ||
| 1479 | .d_revalidate = pid_revalidate, | ||
| 1480 | .d_iput = pid_base_iput, | ||
| 1481 | .d_delete = pid_delete_dentry, | ||
| 1482 | }; | ||
| 1483 | |||
| 1484 | /* Lookups */ | 1452 | /* Lookups */ |
| 1485 | 1453 | ||
| 1486 | static unsigned name_to_int(struct dentry *dentry) | 1454 | static unsigned name_to_int(struct dentry *dentry) |
| @@ -1508,22 +1476,24 @@ out: | |||
| 1508 | /* SMP-safe */ | 1476 | /* SMP-safe */ |
| 1509 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) | 1477 | static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) |
| 1510 | { | 1478 | { |
| 1511 | struct task_struct *task = proc_task(dir); | 1479 | struct task_struct *task = get_proc_task(dir); |
| 1512 | unsigned fd = name_to_int(dentry); | 1480 | unsigned fd = name_to_int(dentry); |
| 1481 | struct dentry *result = ERR_PTR(-ENOENT); | ||
| 1513 | struct file * file; | 1482 | struct file * file; |
| 1514 | struct files_struct * files; | 1483 | struct files_struct * files; |
| 1515 | struct inode *inode; | 1484 | struct inode *inode; |
| 1516 | struct proc_inode *ei; | 1485 | struct proc_inode *ei; |
| 1517 | 1486 | ||
| 1487 | if (!task) | ||
| 1488 | goto out_no_task; | ||
| 1518 | if (fd == ~0U) | 1489 | if (fd == ~0U) |
| 1519 | goto out; | 1490 | goto out; |
| 1520 | if (!pid_alive(task)) | ||
| 1521 | goto out; | ||
| 1522 | 1491 | ||
| 1523 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); | 1492 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); |
| 1524 | if (!inode) | 1493 | if (!inode) |
| 1525 | goto out; | 1494 | goto out; |
| 1526 | ei = PROC_I(inode); | 1495 | ei = PROC_I(inode); |
| 1496 | ei->fd = fd; | ||
| 1527 | files = get_files_struct(task); | 1497 | files = get_files_struct(task); |
| 1528 | if (!files) | 1498 | if (!files) |
| 1529 | goto out_unlock; | 1499 | goto out_unlock; |
| @@ -1548,19 +1518,25 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, | |||
| 1548 | ei->op.proc_get_link = proc_fd_link; | 1518 | ei->op.proc_get_link = proc_fd_link; |
| 1549 | dentry->d_op = &tid_fd_dentry_operations; | 1519 | dentry->d_op = &tid_fd_dentry_operations; |
| 1550 | d_add(dentry, inode); | 1520 | d_add(dentry, inode); |
| 1551 | return NULL; | 1521 | /* Close the race of the process dying before we return the dentry */ |
| 1522 | if (tid_fd_revalidate(dentry, NULL)) | ||
| 1523 | result = NULL; | ||
| 1524 | out: | ||
| 1525 | put_task_struct(task); | ||
| 1526 | out_no_task: | ||
| 1527 | return result; | ||
| 1552 | 1528 | ||
| 1553 | out_unlock2: | 1529 | out_unlock2: |
| 1554 | spin_unlock(&files->file_lock); | 1530 | spin_unlock(&files->file_lock); |
| 1555 | put_files_struct(files); | 1531 | put_files_struct(files); |
| 1556 | out_unlock: | 1532 | out_unlock: |
| 1557 | iput(inode); | 1533 | iput(inode); |
| 1558 | out: | 1534 | goto out; |
| 1559 | return ERR_PTR(-ENOENT); | ||
| 1560 | } | 1535 | } |
| 1561 | 1536 | ||
| 1562 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); | 1537 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); |
| 1563 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); | 1538 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); |
| 1539 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); | ||
| 1564 | 1540 | ||
| 1565 | static struct file_operations proc_fd_operations = { | 1541 | static struct file_operations proc_fd_operations = { |
| 1566 | .read = generic_read_dir, | 1542 | .read = generic_read_dir, |
| @@ -1577,12 +1553,13 @@ static struct file_operations proc_task_operations = { | |||
| 1577 | */ | 1553 | */ |
| 1578 | static struct inode_operations proc_fd_inode_operations = { | 1554 | static struct inode_operations proc_fd_inode_operations = { |
| 1579 | .lookup = proc_lookupfd, | 1555 | .lookup = proc_lookupfd, |
| 1580 | .permission = proc_permission, | 1556 | .setattr = proc_setattr, |
| 1581 | }; | 1557 | }; |
| 1582 | 1558 | ||
| 1583 | static struct inode_operations proc_task_inode_operations = { | 1559 | static struct inode_operations proc_task_inode_operations = { |
| 1584 | .lookup = proc_task_lookup, | 1560 | .lookup = proc_task_lookup, |
| 1585 | .permission = proc_task_permission, | 1561 | .getattr = proc_task_getattr, |
| 1562 | .setattr = proc_setattr, | ||
| 1586 | }; | 1563 | }; |
| 1587 | 1564 | ||
| 1588 | #ifdef CONFIG_SECURITY | 1565 | #ifdef CONFIG_SECURITY |
| @@ -1592,12 +1569,17 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | |||
| 1592 | struct inode * inode = file->f_dentry->d_inode; | 1569 | struct inode * inode = file->f_dentry->d_inode; |
| 1593 | unsigned long page; | 1570 | unsigned long page; |
| 1594 | ssize_t length; | 1571 | ssize_t length; |
| 1595 | struct task_struct *task = proc_task(inode); | 1572 | struct task_struct *task = get_proc_task(inode); |
| 1573 | |||
| 1574 | length = -ESRCH; | ||
| 1575 | if (!task) | ||
| 1576 | goto out_no_task; | ||
| 1596 | 1577 | ||
| 1597 | if (count > PAGE_SIZE) | 1578 | if (count > PAGE_SIZE) |
| 1598 | count = PAGE_SIZE; | 1579 | count = PAGE_SIZE; |
| 1580 | length = -ENOMEM; | ||
| 1599 | if (!(page = __get_free_page(GFP_KERNEL))) | 1581 | if (!(page = __get_free_page(GFP_KERNEL))) |
| 1600 | return -ENOMEM; | 1582 | goto out; |
| 1601 | 1583 | ||
| 1602 | length = security_getprocattr(task, | 1584 | length = security_getprocattr(task, |
| 1603 | (char*)file->f_dentry->d_name.name, | 1585 | (char*)file->f_dentry->d_name.name, |
| @@ -1605,6 +1587,9 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, | |||
| 1605 | if (length >= 0) | 1587 | if (length >= 0) |
| 1606 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); | 1588 | length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); |
| 1607 | free_page(page); | 1589 | free_page(page); |
| 1590 | out: | ||
| 1591 | put_task_struct(task); | ||
| 1592 | out_no_task: | ||
| 1608 | return length; | 1593 | return length; |
| 1609 | } | 1594 | } |
| 1610 | 1595 | ||
| @@ -1614,26 +1599,36 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, | |||
| 1614 | struct inode * inode = file->f_dentry->d_inode; | 1599 | struct inode * inode = file->f_dentry->d_inode; |
| 1615 | char *page; | 1600 | char *page; |
| 1616 | ssize_t length; | 1601 | ssize_t length; |
| 1617 | struct task_struct *task = proc_task(inode); | 1602 | struct task_struct *task = get_proc_task(inode); |
| 1618 | 1603 | ||
| 1604 | length = -ESRCH; | ||
| 1605 | if (!task) | ||
| 1606 | goto out_no_task; | ||
| 1619 | if (count > PAGE_SIZE) | 1607 | if (count > PAGE_SIZE) |
| 1620 | count = PAGE_SIZE; | 1608 | count = PAGE_SIZE; |
| 1621 | if (*ppos != 0) { | 1609 | |
| 1622 | /* No partial writes. */ | 1610 | /* No partial writes. */ |
| 1623 | return -EINVAL; | 1611 | length = -EINVAL; |
| 1624 | } | 1612 | if (*ppos != 0) |
| 1613 | goto out; | ||
| 1614 | |||
| 1615 | length = -ENOMEM; | ||
| 1625 | page = (char*)__get_free_page(GFP_USER); | 1616 | page = (char*)__get_free_page(GFP_USER); |
| 1626 | if (!page) | 1617 | if (!page) |
| 1627 | return -ENOMEM; | 1618 | goto out; |
| 1619 | |||
| 1628 | length = -EFAULT; | 1620 | length = -EFAULT; |
| 1629 | if (copy_from_user(page, buf, count)) | 1621 | if (copy_from_user(page, buf, count)) |
| 1630 | goto out; | 1622 | goto out_free; |
| 1631 | 1623 | ||
| 1632 | length = security_setprocattr(task, | 1624 | length = security_setprocattr(task, |
| 1633 | (char*)file->f_dentry->d_name.name, | 1625 | (char*)file->f_dentry->d_name.name, |
| 1634 | (void*)page, count); | 1626 | (void*)page, count); |
| 1635 | out: | 1627 | out_free: |
| 1636 | free_page((unsigned long) page); | 1628 | free_page((unsigned long) page); |
| 1629 | out: | ||
| 1630 | put_task_struct(task); | ||
| 1631 | out_no_task: | ||
| 1637 | return length; | 1632 | return length; |
| 1638 | } | 1633 | } |
| 1639 | 1634 | ||
| @@ -1648,24 +1643,22 @@ static struct file_operations proc_tgid_attr_operations; | |||
| 1648 | static struct inode_operations proc_tgid_attr_inode_operations; | 1643 | static struct inode_operations proc_tgid_attr_inode_operations; |
| 1649 | #endif | 1644 | #endif |
| 1650 | 1645 | ||
| 1651 | static int get_tid_list(int index, unsigned int *tids, struct inode *dir); | ||
| 1652 | |||
| 1653 | /* SMP-safe */ | 1646 | /* SMP-safe */ |
| 1654 | static struct dentry *proc_pident_lookup(struct inode *dir, | 1647 | static struct dentry *proc_pident_lookup(struct inode *dir, |
| 1655 | struct dentry *dentry, | 1648 | struct dentry *dentry, |
| 1656 | struct pid_entry *ents) | 1649 | struct pid_entry *ents) |
| 1657 | { | 1650 | { |
| 1658 | struct inode *inode; | 1651 | struct inode *inode; |
| 1659 | int error; | 1652 | struct dentry *error; |
| 1660 | struct task_struct *task = proc_task(dir); | 1653 | struct task_struct *task = get_proc_task(dir); |
| 1661 | struct pid_entry *p; | 1654 | struct pid_entry *p; |
| 1662 | struct proc_inode *ei; | 1655 | struct proc_inode *ei; |
| 1663 | 1656 | ||
| 1664 | error = -ENOENT; | 1657 | error = ERR_PTR(-ENOENT); |
| 1665 | inode = NULL; | 1658 | inode = NULL; |
| 1666 | 1659 | ||
| 1667 | if (!pid_alive(task)) | 1660 | if (!task) |
| 1668 | goto out; | 1661 | goto out_no_task; |
| 1669 | 1662 | ||
| 1670 | for (p = ents; p->name; p++) { | 1663 | for (p = ents; p->name; p++) { |
| 1671 | if (p->len != dentry->d_name.len) | 1664 | if (p->len != dentry->d_name.len) |
| @@ -1676,7 +1669,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
| 1676 | if (!p->name) | 1669 | if (!p->name) |
| 1677 | goto out; | 1670 | goto out; |
| 1678 | 1671 | ||
| 1679 | error = -EINVAL; | 1672 | error = ERR_PTR(-EINVAL); |
| 1680 | inode = proc_pid_make_inode(dir->i_sb, task, p->type); | 1673 | inode = proc_pid_make_inode(dir->i_sb, task, p->type); |
| 1681 | if (!inode) | 1674 | if (!inode) |
| 1682 | goto out; | 1675 | goto out; |
| @@ -1689,7 +1682,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
| 1689 | */ | 1682 | */ |
| 1690 | switch(p->type) { | 1683 | switch(p->type) { |
| 1691 | case PROC_TGID_TASK: | 1684 | case PROC_TGID_TASK: |
| 1692 | inode->i_nlink = 2 + get_tid_list(2, NULL, dir); | 1685 | inode->i_nlink = 2; |
| 1693 | inode->i_op = &proc_task_inode_operations; | 1686 | inode->i_op = &proc_task_inode_operations; |
| 1694 | inode->i_fop = &proc_task_operations; | 1687 | inode->i_fop = &proc_task_operations; |
| 1695 | break; | 1688 | break; |
| @@ -1759,7 +1752,6 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
| 1759 | #endif | 1752 | #endif |
| 1760 | case PROC_TID_MEM: | 1753 | case PROC_TID_MEM: |
| 1761 | case PROC_TGID_MEM: | 1754 | case PROC_TGID_MEM: |
| 1762 | inode->i_op = &proc_mem_inode_operations; | ||
| 1763 | inode->i_fop = &proc_mem_operations; | 1755 | inode->i_fop = &proc_mem_operations; |
| 1764 | break; | 1756 | break; |
| 1765 | #ifdef CONFIG_SECCOMP | 1757 | #ifdef CONFIG_SECCOMP |
| @@ -1801,6 +1793,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
| 1801 | case PROC_TGID_ATTR_EXEC: | 1793 | case PROC_TGID_ATTR_EXEC: |
| 1802 | case PROC_TID_ATTR_FSCREATE: | 1794 | case PROC_TID_ATTR_FSCREATE: |
| 1803 | case PROC_TGID_ATTR_FSCREATE: | 1795 | case PROC_TGID_ATTR_FSCREATE: |
| 1796 | case PROC_TID_ATTR_KEYCREATE: | ||
| 1797 | case PROC_TGID_ATTR_KEYCREATE: | ||
| 1798 | case PROC_TID_ATTR_SOCKCREATE: | ||
| 1799 | case PROC_TGID_ATTR_SOCKCREATE: | ||
| 1804 | inode->i_fop = &proc_pid_attr_operations; | 1800 | inode->i_fop = &proc_pid_attr_operations; |
| 1805 | break; | 1801 | break; |
| 1806 | #endif | 1802 | #endif |
| @@ -1842,14 +1838,18 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
| 1842 | default: | 1838 | default: |
| 1843 | printk("procfs: impossible type (%d)",p->type); | 1839 | printk("procfs: impossible type (%d)",p->type); |
| 1844 | iput(inode); | 1840 | iput(inode); |
| 1845 | return ERR_PTR(-EINVAL); | 1841 | error = ERR_PTR(-EINVAL); |
| 1842 | goto out; | ||
| 1846 | } | 1843 | } |
| 1847 | dentry->d_op = &pid_dentry_operations; | 1844 | dentry->d_op = &pid_dentry_operations; |
| 1848 | d_add(dentry, inode); | 1845 | d_add(dentry, inode); |
| 1849 | return NULL; | 1846 | /* Close the race of the process dying before we return the dentry */ |
| 1850 | 1847 | if (pid_revalidate(dentry, NULL)) | |
| 1848 | error = NULL; | ||
| 1851 | out: | 1849 | out: |
| 1852 | return ERR_PTR(error); | 1850 | put_task_struct(task); |
| 1851 | out_no_task: | ||
| 1852 | return error; | ||
| 1853 | } | 1853 | } |
| 1854 | 1854 | ||
| 1855 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | 1855 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ |
| @@ -1872,10 +1872,14 @@ static struct file_operations proc_tid_base_operations = { | |||
| 1872 | 1872 | ||
| 1873 | static struct inode_operations proc_tgid_base_inode_operations = { | 1873 | static struct inode_operations proc_tgid_base_inode_operations = { |
| 1874 | .lookup = proc_tgid_base_lookup, | 1874 | .lookup = proc_tgid_base_lookup, |
| 1875 | .getattr = pid_getattr, | ||
| 1876 | .setattr = proc_setattr, | ||
| 1875 | }; | 1877 | }; |
| 1876 | 1878 | ||
| 1877 | static struct inode_operations proc_tid_base_inode_operations = { | 1879 | static struct inode_operations proc_tid_base_inode_operations = { |
| 1878 | .lookup = proc_tid_base_lookup, | 1880 | .lookup = proc_tid_base_lookup, |
| 1881 | .getattr = pid_getattr, | ||
| 1882 | .setattr = proc_setattr, | ||
| 1879 | }; | 1883 | }; |
| 1880 | 1884 | ||
| 1881 | #ifdef CONFIG_SECURITY | 1885 | #ifdef CONFIG_SECURITY |
| @@ -1917,10 +1921,14 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir, | |||
| 1917 | 1921 | ||
| 1918 | static struct inode_operations proc_tgid_attr_inode_operations = { | 1922 | static struct inode_operations proc_tgid_attr_inode_operations = { |
| 1919 | .lookup = proc_tgid_attr_lookup, | 1923 | .lookup = proc_tgid_attr_lookup, |
| 1924 | .getattr = pid_getattr, | ||
| 1925 | .setattr = proc_setattr, | ||
| 1920 | }; | 1926 | }; |
| 1921 | 1927 | ||
| 1922 | static struct inode_operations proc_tid_attr_inode_operations = { | 1928 | static struct inode_operations proc_tid_attr_inode_operations = { |
| 1923 | .lookup = proc_tid_attr_lookup, | 1929 | .lookup = proc_tid_attr_lookup, |
| 1930 | .getattr = pid_getattr, | ||
| 1931 | .setattr = proc_setattr, | ||
| 1924 | }; | 1932 | }; |
| 1925 | #endif | 1933 | #endif |
| 1926 | 1934 | ||
| @@ -1930,14 +1938,14 @@ static struct inode_operations proc_tid_attr_inode_operations = { | |||
| 1930 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, | 1938 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, |
| 1931 | int buflen) | 1939 | int buflen) |
| 1932 | { | 1940 | { |
| 1933 | char tmp[30]; | 1941 | char tmp[PROC_NUMBUF]; |
| 1934 | sprintf(tmp, "%d", current->tgid); | 1942 | sprintf(tmp, "%d", current->tgid); |
| 1935 | return vfs_readlink(dentry,buffer,buflen,tmp); | 1943 | return vfs_readlink(dentry,buffer,buflen,tmp); |
| 1936 | } | 1944 | } |
| 1937 | 1945 | ||
| 1938 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | 1946 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) |
| 1939 | { | 1947 | { |
| 1940 | char tmp[30]; | 1948 | char tmp[PROC_NUMBUF]; |
| 1941 | sprintf(tmp, "%d", current->tgid); | 1949 | sprintf(tmp, "%d", current->tgid); |
| 1942 | return ERR_PTR(vfs_follow_link(nd,tmp)); | 1950 | return ERR_PTR(vfs_follow_link(nd,tmp)); |
| 1943 | } | 1951 | } |
| @@ -1948,67 +1956,80 @@ static struct inode_operations proc_self_inode_operations = { | |||
| 1948 | }; | 1956 | }; |
| 1949 | 1957 | ||
| 1950 | /** | 1958 | /** |
| 1951 | * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. | 1959 | * proc_flush_task - Remove dcache entries for @task from the /proc dcache. |
| 1952 | * @p: task that should be flushed. | 1960 | * |
| 1961 | * @task: task that should be flushed. | ||
| 1953 | * | 1962 | * |
| 1954 | * Drops the /proc/@pid dcache entry from the hash chains. | 1963 | * Looks in the dcache for |
| 1964 | * /proc/@pid | ||
| 1965 | * /proc/@tgid/task/@pid | ||
| 1966 | * if either directory is present flushes it and all of it'ts children | ||
| 1967 | * from the dcache. | ||
| 1955 | * | 1968 | * |
| 1956 | * Dropping /proc/@pid entries and detach_pid must be synchroneous, | 1969 | * It is safe and reasonable to cache /proc entries for a task until |
| 1957 | * otherwise e.g. /proc/@pid/exe might point to the wrong executable, | 1970 | * that task exits. After that they just clog up the dcache with |
| 1958 | * if the pid value is immediately reused. This is enforced by | 1971 | * useless entries, possibly causing useful dcache entries to be |
| 1959 | * - caller must acquire spin_lock(p->proc_lock) | 1972 | * flushed instead. This routine is proved to flush those useless |
| 1960 | * - must be called before detach_pid() | 1973 | * dcache entries at process exit time. |
| 1961 | * - proc_pid_lookup acquires proc_lock, and checks that | 1974 | * |
| 1962 | * the target is not dead by looking at the attach count | 1975 | * NOTE: This routine is just an optimization so it does not guarantee |
| 1963 | * of PIDTYPE_PID. | 1976 | * that no dcache entries will exist at process exit time it |
| 1977 | * just makes it very unlikely that any will persist. | ||
| 1964 | */ | 1978 | */ |
| 1965 | 1979 | void proc_flush_task(struct task_struct *task) | |
| 1966 | struct dentry *proc_pid_unhash(struct task_struct *p) | ||
| 1967 | { | 1980 | { |
| 1968 | struct dentry *proc_dentry; | 1981 | struct dentry *dentry, *leader, *dir; |
| 1982 | char buf[PROC_NUMBUF]; | ||
| 1983 | struct qstr name; | ||
| 1984 | |||
| 1985 | name.name = buf; | ||
| 1986 | name.len = snprintf(buf, sizeof(buf), "%d", task->pid); | ||
| 1987 | dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); | ||
| 1988 | if (dentry) { | ||
| 1989 | shrink_dcache_parent(dentry); | ||
| 1990 | d_drop(dentry); | ||
| 1991 | dput(dentry); | ||
| 1992 | } | ||
| 1969 | 1993 | ||
| 1970 | proc_dentry = p->proc_dentry; | 1994 | if (thread_group_leader(task)) |
| 1971 | if (proc_dentry != NULL) { | 1995 | goto out; |
| 1972 | 1996 | ||
| 1973 | spin_lock(&dcache_lock); | 1997 | name.name = buf; |
| 1974 | spin_lock(&proc_dentry->d_lock); | 1998 | name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); |
| 1975 | if (!d_unhashed(proc_dentry)) { | 1999 | leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); |
| 1976 | dget_locked(proc_dentry); | 2000 | if (!leader) |
| 1977 | __d_drop(proc_dentry); | 2001 | goto out; |
| 1978 | spin_unlock(&proc_dentry->d_lock); | ||
| 1979 | } else { | ||
| 1980 | spin_unlock(&proc_dentry->d_lock); | ||
| 1981 | proc_dentry = NULL; | ||
| 1982 | } | ||
| 1983 | spin_unlock(&dcache_lock); | ||
| 1984 | } | ||
| 1985 | return proc_dentry; | ||
| 1986 | } | ||
| 1987 | 2002 | ||
| 1988 | /** | 2003 | name.name = "task"; |
| 1989 | * proc_pid_flush - recover memory used by stale /proc/@pid/x entries | 2004 | name.len = strlen(name.name); |
| 1990 | * @proc_dentry: directoy to prune. | 2005 | dir = d_hash_and_lookup(leader, &name); |
| 1991 | * | 2006 | if (!dir) |
| 1992 | * Shrink the /proc directory that was used by the just killed thread. | 2007 | goto out_put_leader; |
| 1993 | */ | 2008 | |
| 1994 | 2009 | name.name = buf; | |
| 1995 | void proc_pid_flush(struct dentry *proc_dentry) | 2010 | name.len = snprintf(buf, sizeof(buf), "%d", task->pid); |
| 1996 | { | 2011 | dentry = d_hash_and_lookup(dir, &name); |
| 1997 | might_sleep(); | 2012 | if (dentry) { |
| 1998 | if(proc_dentry != NULL) { | 2013 | shrink_dcache_parent(dentry); |
| 1999 | shrink_dcache_parent(proc_dentry); | 2014 | d_drop(dentry); |
| 2000 | dput(proc_dentry); | 2015 | dput(dentry); |
| 2001 | } | 2016 | } |
| 2017 | |||
| 2018 | dput(dir); | ||
| 2019 | out_put_leader: | ||
| 2020 | dput(leader); | ||
| 2021 | out: | ||
| 2022 | return; | ||
| 2002 | } | 2023 | } |
| 2003 | 2024 | ||
| 2004 | /* SMP-safe */ | 2025 | /* SMP-safe */ |
| 2005 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 2026 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
| 2006 | { | 2027 | { |
| 2028 | struct dentry *result = ERR_PTR(-ENOENT); | ||
| 2007 | struct task_struct *task; | 2029 | struct task_struct *task; |
| 2008 | struct inode *inode; | 2030 | struct inode *inode; |
| 2009 | struct proc_inode *ei; | 2031 | struct proc_inode *ei; |
| 2010 | unsigned tgid; | 2032 | unsigned tgid; |
| 2011 | int died; | ||
| 2012 | 2033 | ||
| 2013 | if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { | 2034 | if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { |
| 2014 | inode = new_inode(dir->i_sb); | 2035 | inode = new_inode(dir->i_sb); |
| @@ -2029,21 +2050,18 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||
| 2029 | if (tgid == ~0U) | 2050 | if (tgid == ~0U) |
| 2030 | goto out; | 2051 | goto out; |
| 2031 | 2052 | ||
| 2032 | read_lock(&tasklist_lock); | 2053 | rcu_read_lock(); |
| 2033 | task = find_task_by_pid(tgid); | 2054 | task = find_task_by_pid(tgid); |
| 2034 | if (task) | 2055 | if (task) |
| 2035 | get_task_struct(task); | 2056 | get_task_struct(task); |
| 2036 | read_unlock(&tasklist_lock); | 2057 | rcu_read_unlock(); |
| 2037 | if (!task) | 2058 | if (!task) |
| 2038 | goto out; | 2059 | goto out; |
| 2039 | 2060 | ||
| 2040 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); | 2061 | inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); |
| 2062 | if (!inode) | ||
| 2063 | goto out_put_task; | ||
| 2041 | 2064 | ||
| 2042 | |||
| 2043 | if (!inode) { | ||
| 2044 | put_task_struct(task); | ||
| 2045 | goto out; | ||
| 2046 | } | ||
| 2047 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; | 2065 | inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; |
| 2048 | inode->i_op = &proc_tgid_base_inode_operations; | 2066 | inode->i_op = &proc_tgid_base_inode_operations; |
| 2049 | inode->i_fop = &proc_tgid_base_operations; | 2067 | inode->i_fop = &proc_tgid_base_operations; |
| @@ -2054,45 +2072,40 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||
| 2054 | inode->i_nlink = 4; | 2072 | inode->i_nlink = 4; |
| 2055 | #endif | 2073 | #endif |
| 2056 | 2074 | ||
| 2057 | dentry->d_op = &pid_base_dentry_operations; | 2075 | dentry->d_op = &pid_dentry_operations; |
| 2058 | 2076 | ||
| 2059 | died = 0; | ||
| 2060 | d_add(dentry, inode); | 2077 | d_add(dentry, inode); |
| 2061 | spin_lock(&task->proc_lock); | 2078 | /* Close the race of the process dying before we return the dentry */ |
| 2062 | task->proc_dentry = dentry; | 2079 | if (pid_revalidate(dentry, NULL)) |
| 2063 | if (!pid_alive(task)) { | 2080 | result = NULL; |
| 2064 | dentry = proc_pid_unhash(task); | ||
| 2065 | died = 1; | ||
| 2066 | } | ||
| 2067 | spin_unlock(&task->proc_lock); | ||
| 2068 | 2081 | ||
| 2082 | out_put_task: | ||
| 2069 | put_task_struct(task); | 2083 | put_task_struct(task); |
| 2070 | if (died) { | ||
| 2071 | proc_pid_flush(dentry); | ||
| 2072 | goto out; | ||
| 2073 | } | ||
| 2074 | return NULL; | ||
| 2075 | out: | 2084 | out: |
| 2076 | return ERR_PTR(-ENOENT); | 2085 | return result; |
| 2077 | } | 2086 | } |
| 2078 | 2087 | ||
| 2079 | /* SMP-safe */ | 2088 | /* SMP-safe */ |
| 2080 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 2089 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
| 2081 | { | 2090 | { |
| 2091 | struct dentry *result = ERR_PTR(-ENOENT); | ||
| 2082 | struct task_struct *task; | 2092 | struct task_struct *task; |
| 2083 | struct task_struct *leader = proc_task(dir); | 2093 | struct task_struct *leader = get_proc_task(dir); |
| 2084 | struct inode *inode; | 2094 | struct inode *inode; |
| 2085 | unsigned tid; | 2095 | unsigned tid; |
| 2086 | 2096 | ||
| 2097 | if (!leader) | ||
| 2098 | goto out_no_task; | ||
| 2099 | |||
| 2087 | tid = name_to_int(dentry); | 2100 | tid = name_to_int(dentry); |
| 2088 | if (tid == ~0U) | 2101 | if (tid == ~0U) |
| 2089 | goto out; | 2102 | goto out; |
| 2090 | 2103 | ||
| 2091 | read_lock(&tasklist_lock); | 2104 | rcu_read_lock(); |
| 2092 | task = find_task_by_pid(tid); | 2105 | task = find_task_by_pid(tid); |
| 2093 | if (task) | 2106 | if (task) |
| 2094 | get_task_struct(task); | 2107 | get_task_struct(task); |
| 2095 | read_unlock(&tasklist_lock); | 2108 | rcu_read_unlock(); |
| 2096 | if (!task) | 2109 | if (!task) |
| 2097 | goto out; | 2110 | goto out; |
| 2098 | if (leader->tgid != task->tgid) | 2111 | if (leader->tgid != task->tgid) |
| @@ -2113,101 +2126,95 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry | |||
| 2113 | inode->i_nlink = 3; | 2126 | inode->i_nlink = 3; |
| 2114 | #endif | 2127 | #endif |
| 2115 | 2128 | ||
| 2116 | dentry->d_op = &pid_base_dentry_operations; | 2129 | dentry->d_op = &pid_dentry_operations; |
| 2117 | 2130 | ||
| 2118 | d_add(dentry, inode); | 2131 | d_add(dentry, inode); |
| 2132 | /* Close the race of the process dying before we return the dentry */ | ||
| 2133 | if (pid_revalidate(dentry, NULL)) | ||
| 2134 | result = NULL; | ||
| 2119 | 2135 | ||
| 2120 | put_task_struct(task); | ||
| 2121 | return NULL; | ||
| 2122 | out_drop_task: | 2136 | out_drop_task: |
| 2123 | put_task_struct(task); | 2137 | put_task_struct(task); |
| 2124 | out: | 2138 | out: |
| 2125 | return ERR_PTR(-ENOENT); | 2139 | put_task_struct(leader); |
| 2140 | out_no_task: | ||
| 2141 | return result; | ||
| 2126 | } | 2142 | } |
| 2127 | 2143 | ||
| 2128 | #define PROC_NUMBUF 10 | ||
| 2129 | #define PROC_MAXPIDS 20 | ||
| 2130 | |||
| 2131 | /* | 2144 | /* |
| 2132 | * Get a few tgid's to return for filldir - we need to hold the | 2145 | * Find the first tgid to return to user space. |
| 2133 | * tasklist lock while doing this, and we must release it before | 2146 | * |
| 2134 | * we actually do the filldir itself, so we use a temp buffer.. | 2147 | * Usually this is just whatever follows &init_task, but if the users |
| 2148 | * buffer was too small to hold the full list or there was a seek into | ||
| 2149 | * the middle of the directory we have more work to do. | ||
| 2150 | * | ||
| 2151 | * In the case of a short read we start with find_task_by_pid. | ||
| 2152 | * | ||
| 2153 | * In the case of a seek we start with &init_task and walk nr | ||
| 2154 | * threads past it. | ||
| 2135 | */ | 2155 | */ |
| 2136 | static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) | 2156 | static struct task_struct *first_tgid(int tgid, unsigned int nr) |
| 2137 | { | 2157 | { |
| 2138 | struct task_struct *p; | 2158 | struct task_struct *pos; |
| 2139 | int nr_tgids = 0; | 2159 | rcu_read_lock(); |
| 2140 | 2160 | if (tgid && nr) { | |
| 2141 | index--; | 2161 | pos = find_task_by_pid(tgid); |
| 2142 | read_lock(&tasklist_lock); | 2162 | if (pos && thread_group_leader(pos)) |
| 2143 | p = NULL; | 2163 | goto found; |
| 2144 | if (version) { | ||
| 2145 | p = find_task_by_pid(version); | ||
| 2146 | if (p && !thread_group_leader(p)) | ||
| 2147 | p = NULL; | ||
| 2148 | } | 2164 | } |
| 2165 | /* If nr exceeds the number of processes get out quickly */ | ||
| 2166 | pos = NULL; | ||
| 2167 | if (nr && nr >= nr_processes()) | ||
| 2168 | goto done; | ||
| 2149 | 2169 | ||
| 2150 | if (p) | 2170 | /* If we haven't found our starting place yet start with |
| 2151 | index = 0; | 2171 | * the init_task and walk nr tasks forward. |
| 2152 | else | 2172 | */ |
| 2153 | p = next_task(&init_task); | 2173 | for (pos = next_task(&init_task); nr > 0; --nr) { |
| 2154 | 2174 | pos = next_task(pos); | |
| 2155 | for ( ; p != &init_task; p = next_task(p)) { | 2175 | if (pos == &init_task) { |
| 2156 | int tgid = p->pid; | 2176 | pos = NULL; |
| 2157 | if (!pid_alive(p)) | 2177 | goto done; |
| 2158 | continue; | 2178 | } |
| 2159 | if (--index >= 0) | ||
| 2160 | continue; | ||
| 2161 | tgids[nr_tgids] = tgid; | ||
| 2162 | nr_tgids++; | ||
| 2163 | if (nr_tgids >= PROC_MAXPIDS) | ||
| 2164 | break; | ||
| 2165 | } | 2179 | } |
| 2166 | read_unlock(&tasklist_lock); | 2180 | found: |
| 2167 | return nr_tgids; | 2181 | get_task_struct(pos); |
| 2182 | done: | ||
| 2183 | rcu_read_unlock(); | ||
| 2184 | return pos; | ||
| 2168 | } | 2185 | } |
| 2169 | 2186 | ||
| 2170 | /* | 2187 | /* |
| 2171 | * Get a few tid's to return for filldir - we need to hold the | 2188 | * Find the next task in the task list. |
| 2172 | * tasklist lock while doing this, and we must release it before | 2189 | * Return NULL if we loop or there is any error. |
| 2173 | * we actually do the filldir itself, so we use a temp buffer.. | 2190 | * |
| 2191 | * The reference to the input task_struct is released. | ||
| 2174 | */ | 2192 | */ |
| 2175 | static int get_tid_list(int index, unsigned int *tids, struct inode *dir) | 2193 | static struct task_struct *next_tgid(struct task_struct *start) |
| 2176 | { | 2194 | { |
| 2177 | struct task_struct *leader_task = proc_task(dir); | 2195 | struct task_struct *pos; |
| 2178 | struct task_struct *task = leader_task; | 2196 | rcu_read_lock(); |
| 2179 | int nr_tids = 0; | 2197 | pos = start; |
| 2180 | 2198 | if (pid_alive(start)) | |
| 2181 | index -= 2; | 2199 | pos = next_task(start); |
| 2182 | read_lock(&tasklist_lock); | 2200 | if (pid_alive(pos) && (pos != &init_task)) { |
| 2183 | /* | 2201 | get_task_struct(pos); |
| 2184 | * The starting point task (leader_task) might be an already | 2202 | goto done; |
| 2185 | * unlinked task, which cannot be used to access the task-list | 2203 | } |
| 2186 | * via next_thread(). | 2204 | pos = NULL; |
| 2187 | */ | 2205 | done: |
| 2188 | if (pid_alive(task)) do { | 2206 | rcu_read_unlock(); |
| 2189 | int tid = task->pid; | 2207 | put_task_struct(start); |
| 2190 | 2208 | return pos; | |
| 2191 | if (--index >= 0) | ||
| 2192 | continue; | ||
| 2193 | if (tids != NULL) | ||
| 2194 | tids[nr_tids] = tid; | ||
| 2195 | nr_tids++; | ||
| 2196 | if (nr_tids >= PROC_MAXPIDS) | ||
| 2197 | break; | ||
| 2198 | } while ((task = next_thread(task)) != leader_task); | ||
| 2199 | read_unlock(&tasklist_lock); | ||
| 2200 | return nr_tids; | ||
| 2201 | } | 2209 | } |
| 2202 | 2210 | ||
| 2203 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 2211 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
| 2204 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2212 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) |
| 2205 | { | 2213 | { |
| 2206 | unsigned int tgid_array[PROC_MAXPIDS]; | ||
| 2207 | char buf[PROC_NUMBUF]; | 2214 | char buf[PROC_NUMBUF]; |
| 2208 | unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; | 2215 | unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; |
| 2209 | unsigned int nr_tgids, i; | 2216 | struct task_struct *task; |
| 2210 | int next_tgid; | 2217 | int tgid; |
| 2211 | 2218 | ||
| 2212 | if (!nr) { | 2219 | if (!nr) { |
| 2213 | ino_t ino = fake_ino(0,PROC_TGID_INO); | 2220 | ino_t ino = fake_ino(0,PROC_TGID_INO); |
| @@ -2216,63 +2223,116 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
| 2216 | filp->f_pos++; | 2223 | filp->f_pos++; |
| 2217 | nr++; | 2224 | nr++; |
| 2218 | } | 2225 | } |
| 2226 | nr -= 1; | ||
| 2219 | 2227 | ||
| 2220 | /* f_version caches the tgid value that the last readdir call couldn't | 2228 | /* f_version caches the tgid value that the last readdir call couldn't |
| 2221 | * return. lseek aka telldir automagically resets f_version to 0. | 2229 | * return. lseek aka telldir automagically resets f_version to 0. |
| 2222 | */ | 2230 | */ |
| 2223 | next_tgid = filp->f_version; | 2231 | tgid = filp->f_version; |
| 2224 | filp->f_version = 0; | 2232 | filp->f_version = 0; |
| 2225 | for (;;) { | 2233 | for (task = first_tgid(tgid, nr); |
| 2226 | nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); | 2234 | task; |
| 2227 | if (!nr_tgids) { | 2235 | task = next_tgid(task), filp->f_pos++) { |
| 2228 | /* no more entries ! */ | 2236 | int len; |
| 2237 | ino_t ino; | ||
| 2238 | tgid = task->pid; | ||
| 2239 | len = snprintf(buf, sizeof(buf), "%d", tgid); | ||
| 2240 | ino = fake_ino(tgid, PROC_TGID_INO); | ||
| 2241 | if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) { | ||
| 2242 | /* returning this tgid failed, save it as the first | ||
| 2243 | * pid for the next readir call */ | ||
| 2244 | filp->f_version = tgid; | ||
| 2245 | put_task_struct(task); | ||
| 2229 | break; | 2246 | break; |
| 2230 | } | 2247 | } |
| 2231 | next_tgid = 0; | 2248 | } |
| 2249 | return 0; | ||
| 2250 | } | ||
| 2232 | 2251 | ||
| 2233 | /* do not use the last found pid, reserve it for next_tgid */ | 2252 | /* |
| 2234 | if (nr_tgids == PROC_MAXPIDS) { | 2253 | * Find the first tid of a thread group to return to user space. |
| 2235 | nr_tgids--; | 2254 | * |
| 2236 | next_tgid = tgid_array[nr_tgids]; | 2255 | * Usually this is just the thread group leader, but if the users |
| 2237 | } | 2256 | * buffer was too small or there was a seek into the middle of the |
| 2257 | * directory we have more work todo. | ||
| 2258 | * | ||
| 2259 | * In the case of a short read we start with find_task_by_pid. | ||
| 2260 | * | ||
| 2261 | * In the case of a seek we start with the leader and walk nr | ||
| 2262 | * threads past it. | ||
| 2263 | */ | ||
| 2264 | static struct task_struct *first_tid(struct task_struct *leader, | ||
| 2265 | int tid, int nr) | ||
| 2266 | { | ||
| 2267 | struct task_struct *pos; | ||
| 2238 | 2268 | ||
| 2239 | for (i=0;i<nr_tgids;i++) { | 2269 | rcu_read_lock(); |
| 2240 | int tgid = tgid_array[i]; | 2270 | /* Attempt to start with the pid of a thread */ |
| 2241 | ino_t ino = fake_ino(tgid,PROC_TGID_INO); | 2271 | if (tid && (nr > 0)) { |
| 2242 | unsigned long j = PROC_NUMBUF; | 2272 | pos = find_task_by_pid(tid); |
| 2273 | if (pos && (pos->group_leader == leader)) | ||
| 2274 | goto found; | ||
| 2275 | } | ||
| 2243 | 2276 | ||
| 2244 | do | 2277 | /* If nr exceeds the number of threads there is nothing todo */ |
| 2245 | buf[--j] = '0' + (tgid % 10); | 2278 | pos = NULL; |
| 2246 | while ((tgid /= 10) != 0); | 2279 | if (nr && nr >= get_nr_threads(leader)) |
| 2280 | goto out; | ||
| 2247 | 2281 | ||
| 2248 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { | 2282 | /* If we haven't found our starting place yet start |
| 2249 | /* returning this tgid failed, save it as the first | 2283 | * with the leader and walk nr threads forward. |
| 2250 | * pid for the next readir call */ | 2284 | */ |
| 2251 | filp->f_version = tgid_array[i]; | 2285 | for (pos = leader; nr > 0; --nr) { |
| 2252 | goto out; | 2286 | pos = next_thread(pos); |
| 2253 | } | 2287 | if (pos == leader) { |
| 2254 | filp->f_pos++; | 2288 | pos = NULL; |
| 2255 | nr++; | 2289 | goto out; |
| 2256 | } | 2290 | } |
| 2257 | } | 2291 | } |
| 2292 | found: | ||
| 2293 | get_task_struct(pos); | ||
| 2258 | out: | 2294 | out: |
| 2259 | return 0; | 2295 | rcu_read_unlock(); |
| 2296 | return pos; | ||
| 2297 | } | ||
| 2298 | |||
| 2299 | /* | ||
| 2300 | * Find the next thread in the thread list. | ||
| 2301 | * Return NULL if there is an error or no next thread. | ||
| 2302 | * | ||
| 2303 | * The reference to the input task_struct is released. | ||
| 2304 | */ | ||
| 2305 | static struct task_struct *next_tid(struct task_struct *start) | ||
| 2306 | { | ||
| 2307 | struct task_struct *pos = NULL; | ||
| 2308 | rcu_read_lock(); | ||
| 2309 | if (pid_alive(start)) { | ||
| 2310 | pos = next_thread(start); | ||
| 2311 | if (thread_group_leader(pos)) | ||
| 2312 | pos = NULL; | ||
| 2313 | else | ||
| 2314 | get_task_struct(pos); | ||
| 2315 | } | ||
| 2316 | rcu_read_unlock(); | ||
| 2317 | put_task_struct(start); | ||
| 2318 | return pos; | ||
| 2260 | } | 2319 | } |
| 2261 | 2320 | ||
| 2262 | /* for the /proc/TGID/task/ directories */ | 2321 | /* for the /proc/TGID/task/ directories */ |
| 2263 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2322 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) |
| 2264 | { | 2323 | { |
| 2265 | unsigned int tid_array[PROC_MAXPIDS]; | ||
| 2266 | char buf[PROC_NUMBUF]; | 2324 | char buf[PROC_NUMBUF]; |
| 2267 | unsigned int nr_tids, i; | ||
| 2268 | struct dentry *dentry = filp->f_dentry; | 2325 | struct dentry *dentry = filp->f_dentry; |
| 2269 | struct inode *inode = dentry->d_inode; | 2326 | struct inode *inode = dentry->d_inode; |
| 2327 | struct task_struct *leader = get_proc_task(inode); | ||
| 2328 | struct task_struct *task; | ||
| 2270 | int retval = -ENOENT; | 2329 | int retval = -ENOENT; |
| 2271 | ino_t ino; | 2330 | ino_t ino; |
| 2331 | int tid; | ||
| 2272 | unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ | 2332 | unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ |
| 2273 | 2333 | ||
| 2274 | if (!pid_alive(proc_task(inode))) | 2334 | if (!leader) |
| 2275 | goto out; | 2335 | goto out_no_task; |
| 2276 | retval = 0; | 2336 | retval = 0; |
| 2277 | 2337 | ||
| 2278 | switch (pos) { | 2338 | switch (pos) { |
| @@ -2290,24 +2350,45 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi | |||
| 2290 | /* fall through */ | 2350 | /* fall through */ |
| 2291 | } | 2351 | } |
| 2292 | 2352 | ||
| 2293 | nr_tids = get_tid_list(pos, tid_array, inode); | 2353 | /* f_version caches the tgid value that the last readdir call couldn't |
| 2294 | inode->i_nlink = pos + nr_tids; | 2354 | * return. lseek aka telldir automagically resets f_version to 0. |
| 2295 | 2355 | */ | |
| 2296 | for (i = 0; i < nr_tids; i++) { | 2356 | tid = filp->f_version; |
| 2297 | unsigned long j = PROC_NUMBUF; | 2357 | filp->f_version = 0; |
| 2298 | int tid = tid_array[i]; | 2358 | for (task = first_tid(leader, tid, pos - 2); |
| 2299 | 2359 | task; | |
| 2300 | ino = fake_ino(tid,PROC_TID_INO); | 2360 | task = next_tid(task), pos++) { |
| 2301 | 2361 | int len; | |
| 2302 | do | 2362 | tid = task->pid; |
| 2303 | buf[--j] = '0' + (tid % 10); | 2363 | len = snprintf(buf, sizeof(buf), "%d", tid); |
| 2304 | while ((tid /= 10) != 0); | 2364 | ino = fake_ino(tid, PROC_TID_INO); |
| 2305 | 2365 | if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) { | |
| 2306 | if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) | 2366 | /* returning this tgid failed, save it as the first |
| 2367 | * pid for the next readir call */ | ||
| 2368 | filp->f_version = tid; | ||
| 2369 | put_task_struct(task); | ||
| 2307 | break; | 2370 | break; |
| 2308 | pos++; | 2371 | } |
| 2309 | } | 2372 | } |
| 2310 | out: | 2373 | out: |
| 2311 | filp->f_pos = pos; | 2374 | filp->f_pos = pos; |
| 2375 | put_task_struct(leader); | ||
| 2376 | out_no_task: | ||
| 2312 | return retval; | 2377 | return retval; |
| 2313 | } | 2378 | } |
| 2379 | |||
| 2380 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | ||
| 2381 | { | ||
| 2382 | struct inode *inode = dentry->d_inode; | ||
| 2383 | struct task_struct *p = get_proc_task(inode); | ||
| 2384 | generic_fillattr(inode, stat); | ||
| 2385 | |||
| 2386 | if (p) { | ||
| 2387 | rcu_read_lock(); | ||
| 2388 | stat->nlink += get_nr_threads(p); | ||
| 2389 | rcu_read_unlock(); | ||
| 2390 | put_task_struct(p); | ||
| 2391 | } | ||
| 2392 | |||
| 2393 | return 0; | ||
| 2394 | } | ||
