aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCyrill Gorcunov <gorcunov@openvz.org>2012-05-31 19:26:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-31 20:49:32 -0400
commit818411616baf46ceba0cff6f05af3a9b294734f7 (patch)
treeb6e338e20d4a45b6a05ffa37e6af33e0cc1b50c0
parent98ed57eef9f67dfe541be0bca34660ffc88365b2 (diff)
fs, proc: introduce /proc/<pid>/task/<tid>/children entry
When we do checkpoint of a task we need to know the list of children the task, has but there is no easy and fast way to generate reverse parent->children chain from arbitrary <pid> (while a parent pid is provided in "PPid" field of /proc/<pid>/status). So instead of walking over all pids in the system (creating one big process tree in memory, just to figure out which children a task has) -- we add explicit /proc/<pid>/task/<tid>/children entry, because the kernel already has this kind of information but it is not yet exported. This is a first level children, not the whole process tree. Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Kees Cook <keescook@chromium.org> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Serge Hallyn <serge.hallyn@canonical.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/filesystems/proc.txt18
-rw-r--r--fs/proc/array.c123
-rw-r--r--fs/proc/base.c3
-rw-r--r--fs/proc/internal.h1
4 files changed, 145 insertions, 0 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 912af6ce562..d8d3f9a8e5a 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -40,6 +40,7 @@ Table of Contents
40 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings 40 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings
41 3.5 /proc/<pid>/mountinfo - Information about mounts 41 3.5 /proc/<pid>/mountinfo - Information about mounts
42 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm 42 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm
43 3.7 /proc/<pid>/task/<tid>/children - Information about task children
43 44
44 4 Configuring procfs 45 4 Configuring procfs
45 4.1 Mount options 46 4.1 Mount options
@@ -1578,6 +1579,23 @@ then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated
1578comm value. 1579comm value.
1579 1580
1580 1581
15823.7 /proc/<pid>/task/<tid>/children - Information about task children
1583-------------------------------------------------------------------------
1584This file provides a fast way to retrieve first level children pids
1585of a task pointed by <pid>/<tid> pair. The format is a space separated
1586stream of pids.
1587
1588Note the "first level" here -- if a child has own children they will
1589not be listed here, one needs to read /proc/<children-pid>/task/<tid>/children
1590to obtain the descendants.
1591
1592Since this interface is intended to be fast and cheap it doesn't
1593guarantee to provide precise results and some children might be
1594skipped, especially if they've exited right after we printed their
1595pids, so one need to either stop or freeze processes being inspected
1596if precise results are needed.
1597
1598
1581------------------------------------------------------------------------------ 1599------------------------------------------------------------------------------
1582Configuring procfs 1600Configuring procfs
1583------------------------------------------------------------------------------ 1601------------------------------------------------------------------------------
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3a26d23420c..62887e39a2d 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -565,3 +565,126 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
565 565
566 return 0; 566 return 0;
567} 567}
568
569#ifdef CONFIG_CHECKPOINT_RESTORE
570static struct pid *
571get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos)
572{
573 struct task_struct *start, *task;
574 struct pid *pid = NULL;
575
576 read_lock(&tasklist_lock);
577
578 start = pid_task(proc_pid(inode), PIDTYPE_PID);
579 if (!start)
580 goto out;
581
582 /*
583 * Lets try to continue searching first, this gives
584 * us significant speedup on children-rich processes.
585 */
586 if (pid_prev) {
587 task = pid_task(pid_prev, PIDTYPE_PID);
588 if (task && task->real_parent == start &&
589 !(list_empty(&task->sibling))) {
590 if (list_is_last(&task->sibling, &start->children))
591 goto out;
592 task = list_first_entry(&task->sibling,
593 struct task_struct, sibling);
594 pid = get_pid(task_pid(task));
595 goto out;
596 }
597 }
598
599 /*
600 * Slow search case.
601 *
602 * We might miss some children here if children
603 * are exited while we were not holding the lock,
604 * but it was never promised to be accurate that
605 * much.
606 *
607 * "Just suppose that the parent sleeps, but N children
608 * exit after we printed their tids. Now the slow paths
609 * skips N extra children, we miss N tasks." (c)
610 *
611 * So one need to stop or freeze the leader and all
612 * its children to get a precise result.
613 */
614 list_for_each_entry(task, &start->children, sibling) {
615 if (pos-- == 0) {
616 pid = get_pid(task_pid(task));
617 break;
618 }
619 }
620
621out:
622 read_unlock(&tasklist_lock);
623 return pid;
624}
625
626static int children_seq_show(struct seq_file *seq, void *v)
627{
628 struct inode *inode = seq->private;
629 pid_t pid;
630
631 pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
632 return seq_printf(seq, "%d ", pid);
633}
634
635static void *children_seq_start(struct seq_file *seq, loff_t *pos)
636{
637 return get_children_pid(seq->private, NULL, *pos);
638}
639
640static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
641{
642 struct pid *pid;
643
644 pid = get_children_pid(seq->private, v, *pos + 1);
645 put_pid(v);
646
647 ++*pos;
648 return pid;
649}
650
651static void children_seq_stop(struct seq_file *seq, void *v)
652{
653 put_pid(v);
654}
655
656static const struct seq_operations children_seq_ops = {
657 .start = children_seq_start,
658 .next = children_seq_next,
659 .stop = children_seq_stop,
660 .show = children_seq_show,
661};
662
663static int children_seq_open(struct inode *inode, struct file *file)
664{
665 struct seq_file *m;
666 int ret;
667
668 ret = seq_open(file, &children_seq_ops);
669 if (ret)
670 return ret;
671
672 m = file->private_data;
673 m->private = inode;
674
675 return ret;
676}
677
678int children_seq_release(struct inode *inode, struct file *file)
679{
680 seq_release(inode, file);
681 return 0;
682}
683
684const struct file_operations proc_tid_children_operations = {
685 .open = children_seq_open,
686 .read = seq_read,
687 .llseek = seq_lseek,
688 .release = children_seq_release,
689};
690#endif /* CONFIG_CHECKPOINT_RESTORE */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index bd8b4ca6a61..616f41a7cde 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3400,6 +3400,9 @@ static const struct pid_entry tid_base_stuff[] = {
3400 ONE("stat", S_IRUGO, proc_tid_stat), 3400 ONE("stat", S_IRUGO, proc_tid_stat),
3401 ONE("statm", S_IRUGO, proc_pid_statm), 3401 ONE("statm", S_IRUGO, proc_pid_statm),
3402 REG("maps", S_IRUGO, proc_tid_maps_operations), 3402 REG("maps", S_IRUGO, proc_tid_maps_operations),
3403#ifdef CONFIG_CHECKPOINT_RESTORE
3404 REG("children", S_IRUGO, proc_tid_children_operations),
3405#endif
3403#ifdef CONFIG_NUMA 3406#ifdef CONFIG_NUMA
3404 REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), 3407 REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
3405#endif 3408#endif
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index a30643784db..eca4aca5b6e 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -54,6 +54,7 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
54 struct pid *pid, struct task_struct *task); 54 struct pid *pid, struct task_struct *task);
55extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); 55extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
56 56
57extern const struct file_operations proc_tid_children_operations;
57extern const struct file_operations proc_pid_maps_operations; 58extern const struct file_operations proc_pid_maps_operations;
58extern const struct file_operations proc_tid_maps_operations; 59extern const struct file_operations proc_tid_maps_operations;
59extern const struct file_operations proc_pid_numa_maps_operations; 60extern const struct file_operations proc_pid_numa_maps_operations;