diff options
author | Cliff Wickman <cpw@sgi.com> | 2008-02-07 03:14:44 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 11:42:22 -0500 |
commit | 58f4790b73639d1fa808439fac7f761a4c46e11f (patch) | |
tree | 2c7a9cbee80de78beed07b0915df649d8e843365 | |
parent | 956db3ca0606e78456786ef19fd4dc7a5151a6e1 (diff) |
cpusets: update_cpumask revision
Use the new function cgroup_scan_tasks() to step through all tasks in a
cpuset.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | kernel/cpuset.c | 107 |
1 files changed, 46 insertions, 61 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d94a8f7c4c29..20cb3916c66c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <linux/mount.h> | 38 | #include <linux/mount.h> |
39 | #include <linux/namei.h> | 39 | #include <linux/namei.h> |
40 | #include <linux/pagemap.h> | 40 | #include <linux/pagemap.h> |
41 | #include <linux/prio_heap.h> | ||
42 | #include <linux/proc_fs.h> | 41 | #include <linux/proc_fs.h> |
43 | #include <linux/rcupdate.h> | 42 | #include <linux/rcupdate.h> |
44 | #include <linux/sched.h> | 43 | #include <linux/sched.h> |
@@ -740,22 +739,50 @@ static inline int started_after(void *p1, void *p2) | |||
740 | return started_after_time(t1, &t2->start_time, t2); | 739 | return started_after_time(t1, &t2->start_time, t2); |
741 | } | 740 | } |
742 | 741 | ||
743 | /* | 742 | /** |
743 | * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's | ||
744 | * @tsk: task to test | ||
745 | * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner | ||
746 | * | ||
744 | * Call with manage_mutex held. May take callback_mutex during call. | 747 | * Call with manage_mutex held. May take callback_mutex during call. |
748 | * Called for each task in a cgroup by cgroup_scan_tasks(). | ||
749 | * Return nonzero if this tasks's cpus_allowed mask should be changed (in other | ||
750 | * words, if its mask is not equal to its cpuset's mask). | ||
745 | */ | 751 | */ |
752 | int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | ||
753 | { | ||
754 | return !cpus_equal(tsk->cpus_allowed, | ||
755 | (cgroup_cs(scan->cg))->cpus_allowed); | ||
756 | } | ||
746 | 757 | ||
758 | /** | ||
759 | * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's | ||
760 | * @tsk: task to test | ||
761 | * @scan: struct cgroup_scanner containing the cgroup of the task | ||
762 | * | ||
763 | * Called by cgroup_scan_tasks() for each task in a cgroup whose | ||
764 | * cpus_allowed mask needs to be changed. | ||
765 | * | ||
766 | * We don't need to re-check for the cgroup/cpuset membership, since we're | ||
767 | * holding cgroup_lock() at this point. | ||
768 | */ | ||
769 | void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | ||
770 | { | ||
771 | set_cpus_allowed(tsk, (cgroup_cs(scan->cg))->cpus_allowed); | ||
772 | } | ||
773 | |||
774 | /** | ||
775 | * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it | ||
776 | * @cs: the cpuset to consider | ||
777 | * @buf: buffer of cpu numbers written to this cpuset | ||
778 | */ | ||
747 | static int update_cpumask(struct cpuset *cs, char *buf) | 779 | static int update_cpumask(struct cpuset *cs, char *buf) |
748 | { | 780 | { |
749 | struct cpuset trialcs; | 781 | struct cpuset trialcs; |
750 | int retval, i; | 782 | struct cgroup_scanner scan; |
751 | int is_load_balanced; | ||
752 | struct cgroup_iter it; | ||
753 | struct cgroup *cgrp = cs->css.cgroup; | ||
754 | struct task_struct *p, *dropped; | ||
755 | /* Never dereference latest_task, since it's not refcounted */ | ||
756 | struct task_struct *latest_task = NULL; | ||
757 | struct ptr_heap heap; | 783 | struct ptr_heap heap; |
758 | struct timespec latest_time = { 0, 0 }; | 784 | int retval; |
785 | int is_load_balanced; | ||
759 | 786 | ||
760 | /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ | 787 | /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ |
761 | if (cs == &top_cpuset) | 788 | if (cs == &top_cpuset) |
@@ -764,7 +791,7 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
764 | trialcs = *cs; | 791 | trialcs = *cs; |
765 | 792 | ||
766 | /* | 793 | /* |
767 | * An empty cpus_allowed is ok iff there are no tasks in the cpuset. | 794 | * An empty cpus_allowed is ok if there are no tasks in the cpuset. |
768 | * Since cpulist_parse() fails on an empty mask, we special case | 795 | * Since cpulist_parse() fails on an empty mask, we special case |
769 | * that parsing. The validate_change() call ensures that cpusets | 796 | * that parsing. The validate_change() call ensures that cpusets |
770 | * with tasks have cpus. | 797 | * with tasks have cpus. |
@@ -785,6 +812,7 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
785 | /* Nothing to do if the cpus didn't change */ | 812 | /* Nothing to do if the cpus didn't change */ |
786 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) | 813 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) |
787 | return 0; | 814 | return 0; |
815 | |||
788 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after); | 816 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after); |
789 | if (retval) | 817 | if (retval) |
790 | return retval; | 818 | return retval; |
@@ -795,62 +823,19 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
795 | cs->cpus_allowed = trialcs.cpus_allowed; | 823 | cs->cpus_allowed = trialcs.cpus_allowed; |
796 | mutex_unlock(&callback_mutex); | 824 | mutex_unlock(&callback_mutex); |
797 | 825 | ||
798 | again: | ||
799 | /* | 826 | /* |
800 | * Scan tasks in the cpuset, and update the cpumasks of any | 827 | * Scan tasks in the cpuset, and update the cpumasks of any |
801 | * that need an update. Since we can't call set_cpus_allowed() | 828 | * that need an update. |
802 | * while holding tasklist_lock, gather tasks to be processed | ||
803 | * in a heap structure. If the statically-sized heap fills up, | ||
804 | * overflow tasks that started later, and in future iterations | ||
805 | * only consider tasks that started after the latest task in | ||
806 | * the previous pass. This guarantees forward progress and | ||
807 | * that we don't miss any tasks | ||
808 | */ | 829 | */ |
809 | heap.size = 0; | 830 | scan.cg = cs->css.cgroup; |
810 | cgroup_iter_start(cgrp, &it); | 831 | scan.test_task = cpuset_test_cpumask; |
811 | while ((p = cgroup_iter_next(cgrp, &it))) { | 832 | scan.process_task = cpuset_change_cpumask; |
812 | /* Only affect tasks that don't have the right cpus_allowed */ | 833 | scan.heap = &heap; |
813 | if (cpus_equal(p->cpus_allowed, cs->cpus_allowed)) | 834 | cgroup_scan_tasks(&scan); |
814 | continue; | ||
815 | /* | ||
816 | * Only process tasks that started after the last task | ||
817 | * we processed | ||
818 | */ | ||
819 | if (!started_after_time(p, &latest_time, latest_task)) | ||
820 | continue; | ||
821 | dropped = heap_insert(&heap, p); | ||
822 | if (dropped == NULL) { | ||
823 | get_task_struct(p); | ||
824 | } else if (dropped != p) { | ||
825 | get_task_struct(p); | ||
826 | put_task_struct(dropped); | ||
827 | } | ||
828 | } | ||
829 | cgroup_iter_end(cgrp, &it); | ||
830 | if (heap.size) { | ||
831 | for (i = 0; i < heap.size; i++) { | ||
832 | struct task_struct *p = heap.ptrs[i]; | ||
833 | if (i == 0) { | ||
834 | latest_time = p->start_time; | ||
835 | latest_task = p; | ||
836 | } | ||
837 | set_cpus_allowed(p, cs->cpus_allowed); | ||
838 | put_task_struct(p); | ||
839 | } | ||
840 | /* | ||
841 | * If we had to process any tasks at all, scan again | ||
842 | * in case some of them were in the middle of forking | ||
843 | * children that didn't notice the new cpumask | ||
844 | * restriction. Not the most efficient way to do it, | ||
845 | * but it avoids having to take callback_mutex in the | ||
846 | * fork path | ||
847 | */ | ||
848 | goto again; | ||
849 | } | ||
850 | heap_free(&heap); | 835 | heap_free(&heap); |
836 | |||
851 | if (is_load_balanced) | 837 | if (is_load_balanced) |
852 | rebuild_sched_domains(); | 838 | rebuild_sched_domains(); |
853 | |||
854 | return 0; | 839 | return 0; |
855 | } | 840 | } |
856 | 841 | ||