diff options
author | Cyrill Gorcunov <gorcunov@openvz.org> | 2012-05-31 19:26:43 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-31 20:49:32 -0400 |
commit | 818411616baf46ceba0cff6f05af3a9b294734f7 (patch) | |
tree | b6e338e20d4a45b6a05ffa37e6af33e0cc1b50c0 /fs/proc | |
parent | 98ed57eef9f67dfe541be0bca34660ffc88365b2 (diff) |
fs, proc: introduce /proc/<pid>/task/<tid>/children entry
When we do checkpoint of a task we need to know the list of children the
task, has but there is no easy and fast way to generate reverse
parent->children chain from arbitrary <pid> (while a parent pid is
provided in "PPid" field of /proc/<pid>/status).
So instead of walking over all pids in the system (creating one big
process tree in memory, just to figure out which children a task has) --
we add explicit /proc/<pid>/task/<tid>/children entry, because the kernel
already has this kind of information but it is not yet exported.
This is a first level children, not the whole process tree.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/array.c | 123 | ||||
-rw-r--r-- | fs/proc/base.c | 3 | ||||
-rw-r--r-- | fs/proc/internal.h | 1 |
3 files changed, 127 insertions, 0 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index 3a26d23420cc..62887e39a2de 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -565,3 +565,126 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, | |||
565 | 565 | ||
566 | return 0; | 566 | return 0; |
567 | } | 567 | } |
568 | |||
569 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
570 | static struct pid * | ||
571 | get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos) | ||
572 | { | ||
573 | struct task_struct *start, *task; | ||
574 | struct pid *pid = NULL; | ||
575 | |||
576 | read_lock(&tasklist_lock); | ||
577 | |||
578 | start = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
579 | if (!start) | ||
580 | goto out; | ||
581 | |||
582 | /* | ||
583 | * Lets try to continue searching first, this gives | ||
584 | * us significant speedup on children-rich processes. | ||
585 | */ | ||
586 | if (pid_prev) { | ||
587 | task = pid_task(pid_prev, PIDTYPE_PID); | ||
588 | if (task && task->real_parent == start && | ||
589 | !(list_empty(&task->sibling))) { | ||
590 | if (list_is_last(&task->sibling, &start->children)) | ||
591 | goto out; | ||
592 | task = list_first_entry(&task->sibling, | ||
593 | struct task_struct, sibling); | ||
594 | pid = get_pid(task_pid(task)); | ||
595 | goto out; | ||
596 | } | ||
597 | } | ||
598 | |||
599 | /* | ||
600 | * Slow search case. | ||
601 | * | ||
602 | * We might miss some children here if children | ||
603 | * are exited while we were not holding the lock, | ||
604 | * but it was never promised to be accurate that | ||
605 | * much. | ||
606 | * | ||
607 | * "Just suppose that the parent sleeps, but N children | ||
608 | * exit after we printed their tids. Now the slow paths | ||
609 | * skips N extra children, we miss N tasks." (c) | ||
610 | * | ||
611 | * So one need to stop or freeze the leader and all | ||
612 | * its children to get a precise result. | ||
613 | */ | ||
614 | list_for_each_entry(task, &start->children, sibling) { | ||
615 | if (pos-- == 0) { | ||
616 | pid = get_pid(task_pid(task)); | ||
617 | break; | ||
618 | } | ||
619 | } | ||
620 | |||
621 | out: | ||
622 | read_unlock(&tasklist_lock); | ||
623 | return pid; | ||
624 | } | ||
625 | |||
626 | static int children_seq_show(struct seq_file *seq, void *v) | ||
627 | { | ||
628 | struct inode *inode = seq->private; | ||
629 | pid_t pid; | ||
630 | |||
631 | pid = pid_nr_ns(v, inode->i_sb->s_fs_info); | ||
632 | return seq_printf(seq, "%d ", pid); | ||
633 | } | ||
634 | |||
635 | static void *children_seq_start(struct seq_file *seq, loff_t *pos) | ||
636 | { | ||
637 | return get_children_pid(seq->private, NULL, *pos); | ||
638 | } | ||
639 | |||
640 | static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
641 | { | ||
642 | struct pid *pid; | ||
643 | |||
644 | pid = get_children_pid(seq->private, v, *pos + 1); | ||
645 | put_pid(v); | ||
646 | |||
647 | ++*pos; | ||
648 | return pid; | ||
649 | } | ||
650 | |||
651 | static void children_seq_stop(struct seq_file *seq, void *v) | ||
652 | { | ||
653 | put_pid(v); | ||
654 | } | ||
655 | |||
656 | static const struct seq_operations children_seq_ops = { | ||
657 | .start = children_seq_start, | ||
658 | .next = children_seq_next, | ||
659 | .stop = children_seq_stop, | ||
660 | .show = children_seq_show, | ||
661 | }; | ||
662 | |||
663 | static int children_seq_open(struct inode *inode, struct file *file) | ||
664 | { | ||
665 | struct seq_file *m; | ||
666 | int ret; | ||
667 | |||
668 | ret = seq_open(file, &children_seq_ops); | ||
669 | if (ret) | ||
670 | return ret; | ||
671 | |||
672 | m = file->private_data; | ||
673 | m->private = inode; | ||
674 | |||
675 | return ret; | ||
676 | } | ||
677 | |||
678 | int children_seq_release(struct inode *inode, struct file *file) | ||
679 | { | ||
680 | seq_release(inode, file); | ||
681 | return 0; | ||
682 | } | ||
683 | |||
684 | const struct file_operations proc_tid_children_operations = { | ||
685 | .open = children_seq_open, | ||
686 | .read = seq_read, | ||
687 | .llseek = seq_lseek, | ||
688 | .release = children_seq_release, | ||
689 | }; | ||
690 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index bd8b4ca6a610..616f41a7cde6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -3400,6 +3400,9 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3400 | ONE("stat", S_IRUGO, proc_tid_stat), | 3400 | ONE("stat", S_IRUGO, proc_tid_stat), |
3401 | ONE("statm", S_IRUGO, proc_pid_statm), | 3401 | ONE("statm", S_IRUGO, proc_pid_statm), |
3402 | REG("maps", S_IRUGO, proc_tid_maps_operations), | 3402 | REG("maps", S_IRUGO, proc_tid_maps_operations), |
3403 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
3404 | REG("children", S_IRUGO, proc_tid_children_operations), | ||
3405 | #endif | ||
3403 | #ifdef CONFIG_NUMA | 3406 | #ifdef CONFIG_NUMA |
3404 | REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), | 3407 | REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), |
3405 | #endif | 3408 | #endif |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index a30643784db5..eca4aca5b6e2 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -54,6 +54,7 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, | |||
54 | struct pid *pid, struct task_struct *task); | 54 | struct pid *pid, struct task_struct *task); |
55 | extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); | 55 | extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); |
56 | 56 | ||
57 | extern const struct file_operations proc_tid_children_operations; | ||
57 | extern const struct file_operations proc_pid_maps_operations; | 58 | extern const struct file_operations proc_pid_maps_operations; |
58 | extern const struct file_operations proc_tid_maps_operations; | 59 | extern const struct file_operations proc_tid_maps_operations; |
59 | extern const struct file_operations proc_pid_numa_maps_operations; | 60 | extern const struct file_operations proc_pid_numa_maps_operations; |