aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/cpuset.c107
1 files changed, 46 insertions, 61 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d94a8f7c4c29..20cb3916c66c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -38,7 +38,6 @@
38#include <linux/mount.h> 38#include <linux/mount.h>
39#include <linux/namei.h> 39#include <linux/namei.h>
40#include <linux/pagemap.h> 40#include <linux/pagemap.h>
41#include <linux/prio_heap.h>
42#include <linux/proc_fs.h> 41#include <linux/proc_fs.h>
43#include <linux/rcupdate.h> 42#include <linux/rcupdate.h>
44#include <linux/sched.h> 43#include <linux/sched.h>
@@ -740,22 +739,50 @@ static inline int started_after(void *p1, void *p2)
740 return started_after_time(t1, &t2->start_time, t2); 739 return started_after_time(t1, &t2->start_time, t2);
741} 740}
742 741
743/* 742/**
743 * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's
744 * @tsk: task to test
745 * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner
746 *
744 * Call with manage_mutex held. May take callback_mutex during call. 747 * Call with manage_mutex held. May take callback_mutex during call.
748 * Called for each task in a cgroup by cgroup_scan_tasks().
749 * Return nonzero if this tasks's cpus_allowed mask should be changed (in other
750 * words, if its mask is not equal to its cpuset's mask).
745 */ 751 */
752int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
753{
754 return !cpus_equal(tsk->cpus_allowed,
755 (cgroup_cs(scan->cg))->cpus_allowed);
756}
746 757
758/**
759 * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's
760 * @tsk: task to test
761 * @scan: struct cgroup_scanner containing the cgroup of the task
762 *
763 * Called by cgroup_scan_tasks() for each task in a cgroup whose
764 * cpus_allowed mask needs to be changed.
765 *
766 * We don't need to re-check for the cgroup/cpuset membership, since we're
767 * holding cgroup_lock() at this point.
768 */
769void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
770{
771 set_cpus_allowed(tsk, (cgroup_cs(scan->cg))->cpus_allowed);
772}
773
774/**
775 * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
776 * @cs: the cpuset to consider
777 * @buf: buffer of cpu numbers written to this cpuset
778 */
747static int update_cpumask(struct cpuset *cs, char *buf) 779static int update_cpumask(struct cpuset *cs, char *buf)
748{ 780{
749 struct cpuset trialcs; 781 struct cpuset trialcs;
750 int retval, i; 782 struct cgroup_scanner scan;
751 int is_load_balanced;
752 struct cgroup_iter it;
753 struct cgroup *cgrp = cs->css.cgroup;
754 struct task_struct *p, *dropped;
755 /* Never dereference latest_task, since it's not refcounted */
756 struct task_struct *latest_task = NULL;
757 struct ptr_heap heap; 783 struct ptr_heap heap;
758 struct timespec latest_time = { 0, 0 }; 784 int retval;
785 int is_load_balanced;
759 786
760 /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ 787 /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
761 if (cs == &top_cpuset) 788 if (cs == &top_cpuset)
@@ -764,7 +791,7 @@ static int update_cpumask(struct cpuset *cs, char *buf)
764 trialcs = *cs; 791 trialcs = *cs;
765 792
766 /* 793 /*
767 * An empty cpus_allowed is ok iff there are no tasks in the cpuset. 794 * An empty cpus_allowed is ok if there are no tasks in the cpuset.
768 * Since cpulist_parse() fails on an empty mask, we special case 795 * Since cpulist_parse() fails on an empty mask, we special case
769 * that parsing. The validate_change() call ensures that cpusets 796 * that parsing. The validate_change() call ensures that cpusets
770 * with tasks have cpus. 797 * with tasks have cpus.
@@ -785,6 +812,7 @@ static int update_cpumask(struct cpuset *cs, char *buf)
785 /* Nothing to do if the cpus didn't change */ 812 /* Nothing to do if the cpus didn't change */
786 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) 813 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
787 return 0; 814 return 0;
815
788 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after); 816 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
789 if (retval) 817 if (retval)
790 return retval; 818 return retval;
@@ -795,62 +823,19 @@ static int update_cpumask(struct cpuset *cs, char *buf)
795 cs->cpus_allowed = trialcs.cpus_allowed; 823 cs->cpus_allowed = trialcs.cpus_allowed;
796 mutex_unlock(&callback_mutex); 824 mutex_unlock(&callback_mutex);
797 825
798 again:
799 /* 826 /*
800 * Scan tasks in the cpuset, and update the cpumasks of any 827 * Scan tasks in the cpuset, and update the cpumasks of any
801 * that need an update. Since we can't call set_cpus_allowed() 828 * that need an update.
802 * while holding tasklist_lock, gather tasks to be processed
803 * in a heap structure. If the statically-sized heap fills up,
804 * overflow tasks that started later, and in future iterations
805 * only consider tasks that started after the latest task in
806 * the previous pass. This guarantees forward progress and
807 * that we don't miss any tasks
808 */ 829 */
809 heap.size = 0; 830 scan.cg = cs->css.cgroup;
810 cgroup_iter_start(cgrp, &it); 831 scan.test_task = cpuset_test_cpumask;
811 while ((p = cgroup_iter_next(cgrp, &it))) { 832 scan.process_task = cpuset_change_cpumask;
812 /* Only affect tasks that don't have the right cpus_allowed */ 833 scan.heap = &heap;
813 if (cpus_equal(p->cpus_allowed, cs->cpus_allowed)) 834 cgroup_scan_tasks(&scan);
814 continue;
815 /*
816 * Only process tasks that started after the last task
817 * we processed
818 */
819 if (!started_after_time(p, &latest_time, latest_task))
820 continue;
821 dropped = heap_insert(&heap, p);
822 if (dropped == NULL) {
823 get_task_struct(p);
824 } else if (dropped != p) {
825 get_task_struct(p);
826 put_task_struct(dropped);
827 }
828 }
829 cgroup_iter_end(cgrp, &it);
830 if (heap.size) {
831 for (i = 0; i < heap.size; i++) {
832 struct task_struct *p = heap.ptrs[i];
833 if (i == 0) {
834 latest_time = p->start_time;
835 latest_task = p;
836 }
837 set_cpus_allowed(p, cs->cpus_allowed);
838 put_task_struct(p);
839 }
840 /*
841 * If we had to process any tasks at all, scan again
842 * in case some of them were in the middle of forking
843 * children that didn't notice the new cpumask
844 * restriction. Not the most efficient way to do it,
845 * but it avoids having to take callback_mutex in the
846 * fork path
847 */
848 goto again;
849 }
850 heap_free(&heap); 835 heap_free(&heap);
836
851 if (is_load_balanced) 837 if (is_load_balanced)
852 rebuild_sched_domains(); 838 rebuild_sched_domains();
853
854 return 0; 839 return 0;
855} 840}
856 841