aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c363
1 files changed, 178 insertions, 185 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 459d601947a8..91cf85b36dd5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -227,10 +227,6 @@ static struct cpuset top_cpuset = {
227 * The task_struct fields mems_allowed and mems_generation may only 227 * The task_struct fields mems_allowed and mems_generation may only
228 * be accessed in the context of that task, so require no locks. 228 * be accessed in the context of that task, so require no locks.
229 * 229 *
230 * The cpuset_common_file_write handler for operations that modify
231 * the cpuset hierarchy holds cgroup_mutex across the entire operation,
232 * single threading all such cpuset modifications across the system.
233 *
234 * The cpuset_common_file_read() handlers only hold callback_mutex across 230 * The cpuset_common_file_read() handlers only hold callback_mutex across
235 * small pieces of code, such as when reading out possibly multi-word 231 * small pieces of code, such as when reading out possibly multi-word
236 * cpumasks and nodemasks. 232 * cpumasks and nodemasks.
@@ -369,7 +365,7 @@ void cpuset_update_task_memory_state(void)
369 my_cpusets_mem_gen = top_cpuset.mems_generation; 365 my_cpusets_mem_gen = top_cpuset.mems_generation;
370 } else { 366 } else {
371 rcu_read_lock(); 367 rcu_read_lock();
372 my_cpusets_mem_gen = task_cs(current)->mems_generation; 368 my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
373 rcu_read_unlock(); 369 rcu_read_unlock();
374 } 370 }
375 371
@@ -500,11 +496,16 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
500/* 496/*
501 * rebuild_sched_domains() 497 * rebuild_sched_domains()
502 * 498 *
503 * If the flag 'sched_load_balance' of any cpuset with non-empty 499 * This routine will be called to rebuild the scheduler's dynamic
504 * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset 500 * sched domains:
505 * which has that flag enabled, or if any cpuset with a non-empty 501 * - if the flag 'sched_load_balance' of any cpuset with non-empty
506 * 'cpus' is removed, then call this routine to rebuild the 502 * 'cpus' changes,
507 * scheduler's dynamic sched domains. 503 * - or if the 'cpus' allowed changes in any cpuset which has that
504 * flag enabled,
505 * - or if the 'sched_relax_domain_level' of any cpuset which has
506 * that flag enabled and with non-empty 'cpus' changes,
507 * - or if any cpuset with non-empty 'cpus' is removed,
508 * - or if a cpu gets offlined.
508 * 509 *
509 * This routine builds a partial partition of the systems CPUs 510 * This routine builds a partial partition of the systems CPUs
510 * (the set of non-overlappping cpumask_t's in the array 'part' 511 * (the set of non-overlappping cpumask_t's in the array 'part'
@@ -564,7 +565,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
564 * partition_sched_domains(). 565 * partition_sched_domains().
565 */ 566 */
566 567
567static void rebuild_sched_domains(void) 568void rebuild_sched_domains(void)
568{ 569{
569 struct kfifo *q; /* queue of cpusets to be scanned */ 570 struct kfifo *q; /* queue of cpusets to be scanned */
570 struct cpuset *cp; /* scans q */ 571 struct cpuset *cp; /* scans q */
@@ -609,8 +610,13 @@ static void rebuild_sched_domains(void)
609 while (__kfifo_get(q, (void *)&cp, sizeof(cp))) { 610 while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
610 struct cgroup *cont; 611 struct cgroup *cont;
611 struct cpuset *child; /* scans child cpusets of cp */ 612 struct cpuset *child; /* scans child cpusets of cp */
613
614 if (cpus_empty(cp->cpus_allowed))
615 continue;
616
612 if (is_sched_load_balance(cp)) 617 if (is_sched_load_balance(cp))
613 csa[csn++] = cp; 618 csa[csn++] = cp;
619
614 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { 620 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
615 child = cgroup_cs(cont); 621 child = cgroup_cs(cont);
616 __kfifo_put(q, (void *)&child, sizeof(cp)); 622 __kfifo_put(q, (void *)&child, sizeof(cp));
@@ -679,7 +685,9 @@ restart:
679 if (apn == b->pn) { 685 if (apn == b->pn) {
680 cpus_or(*dp, *dp, b->cpus_allowed); 686 cpus_or(*dp, *dp, b->cpus_allowed);
681 b->pn = -1; 687 b->pn = -1;
682 update_domain_attr(dattr, b); 688 if (dattr)
689 update_domain_attr(dattr
690 + nslot, b);
683 } 691 }
684 } 692 }
685 nslot++; 693 nslot++;
@@ -701,36 +709,6 @@ done:
701 /* Don't kfree(dattr) -- partition_sched_domains() does that. */ 709 /* Don't kfree(dattr) -- partition_sched_domains() does that. */
702} 710}
703 711
704static inline int started_after_time(struct task_struct *t1,
705 struct timespec *time,
706 struct task_struct *t2)
707{
708 int start_diff = timespec_compare(&t1->start_time, time);
709 if (start_diff > 0) {
710 return 1;
711 } else if (start_diff < 0) {
712 return 0;
713 } else {
714 /*
715 * Arbitrarily, if two processes started at the same
716 * time, we'll say that the lower pointer value
717 * started first. Note that t2 may have exited by now
718 * so this may not be a valid pointer any longer, but
719 * that's fine - it still serves to distinguish
720 * between two tasks started (effectively)
721 * simultaneously.
722 */
723 return t1 > t2;
724 }
725}
726
727static inline int started_after(void *p1, void *p2)
728{
729 struct task_struct *t1 = p1;
730 struct task_struct *t2 = p2;
731 return started_after_time(t1, &t2->start_time, t2);
732}
733
734/** 712/**
735 * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's 713 * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's
736 * @tsk: task to test 714 * @tsk: task to test
@@ -766,15 +744,49 @@ static void cpuset_change_cpumask(struct task_struct *tsk,
766} 744}
767 745
768/** 746/**
747 * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
748 * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
749 *
750 * Called with cgroup_mutex held
751 *
752 * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
753 * calling callback functions for each.
754 *
755 * Return 0 if successful, -errno if not.
756 */
757static int update_tasks_cpumask(struct cpuset *cs)
758{
759 struct cgroup_scanner scan;
760 struct ptr_heap heap;
761 int retval;
762
763 /*
764 * cgroup_scan_tasks() will initialize heap->gt for us.
765 * heap_init() is still needed here for we should not change
766 * cs->cpus_allowed when heap_init() fails.
767 */
768 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
769 if (retval)
770 return retval;
771
772 scan.cg = cs->css.cgroup;
773 scan.test_task = cpuset_test_cpumask;
774 scan.process_task = cpuset_change_cpumask;
775 scan.heap = &heap;
776 retval = cgroup_scan_tasks(&scan);
777
778 heap_free(&heap);
779 return retval;
780}
781
782/**
769 * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it 783 * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
770 * @cs: the cpuset to consider 784 * @cs: the cpuset to consider
771 * @buf: buffer of cpu numbers written to this cpuset 785 * @buf: buffer of cpu numbers written to this cpuset
772 */ 786 */
773static int update_cpumask(struct cpuset *cs, char *buf) 787static int update_cpumask(struct cpuset *cs, const char *buf)
774{ 788{
775 struct cpuset trialcs; 789 struct cpuset trialcs;
776 struct cgroup_scanner scan;
777 struct ptr_heap heap;
778 int retval; 790 int retval;
779 int is_load_balanced; 791 int is_load_balanced;
780 792
@@ -790,7 +802,6 @@ static int update_cpumask(struct cpuset *cs, char *buf)
790 * that parsing. The validate_change() call ensures that cpusets 802 * that parsing. The validate_change() call ensures that cpusets
791 * with tasks have cpus. 803 * with tasks have cpus.
792 */ 804 */
793 buf = strstrip(buf);
794 if (!*buf) { 805 if (!*buf) {
795 cpus_clear(trialcs.cpus_allowed); 806 cpus_clear(trialcs.cpus_allowed);
796 } else { 807 } else {
@@ -809,10 +820,6 @@ static int update_cpumask(struct cpuset *cs, char *buf)
809 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) 820 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
810 return 0; 821 return 0;
811 822
812 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
813 if (retval)
814 return retval;
815
816 is_load_balanced = is_sched_load_balance(&trialcs); 823 is_load_balanced = is_sched_load_balance(&trialcs);
817 824
818 mutex_lock(&callback_mutex); 825 mutex_lock(&callback_mutex);
@@ -823,12 +830,9 @@ static int update_cpumask(struct cpuset *cs, char *buf)
823 * Scan tasks in the cpuset, and update the cpumasks of any 830 * Scan tasks in the cpuset, and update the cpumasks of any
824 * that need an update. 831 * that need an update.
825 */ 832 */
826 scan.cg = cs->css.cgroup; 833 retval = update_tasks_cpumask(cs);
827 scan.test_task = cpuset_test_cpumask; 834 if (retval < 0)
828 scan.process_task = cpuset_change_cpumask; 835 return retval;
829 scan.heap = &heap;
830 cgroup_scan_tasks(&scan);
831 heap_free(&heap);
832 836
833 if (is_load_balanced) 837 if (is_load_balanced)
834 rebuild_sched_domains(); 838 rebuild_sched_domains();
@@ -884,74 +888,25 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
884 mutex_unlock(&callback_mutex); 888 mutex_unlock(&callback_mutex);
885} 889}
886 890
887/*
888 * Handle user request to change the 'mems' memory placement
889 * of a cpuset. Needs to validate the request, update the
890 * cpusets mems_allowed and mems_generation, and for each
891 * task in the cpuset, rebind any vma mempolicies and if
892 * the cpuset is marked 'memory_migrate', migrate the tasks
893 * pages to the new memory.
894 *
895 * Call with cgroup_mutex held. May take callback_mutex during call.
896 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
897 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
898 * their mempolicies to the cpusets new mems_allowed.
899 */
900
901static void *cpuset_being_rebound; 891static void *cpuset_being_rebound;
902 892
903static int update_nodemask(struct cpuset *cs, char *buf) 893/**
894 * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
895 * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
896 * @oldmem: old mems_allowed of cpuset cs
897 *
898 * Called with cgroup_mutex held
899 * Return 0 if successful, -errno if not.
900 */
901static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
904{ 902{
905 struct cpuset trialcs;
906 nodemask_t oldmem;
907 struct task_struct *p; 903 struct task_struct *p;
908 struct mm_struct **mmarray; 904 struct mm_struct **mmarray;
909 int i, n, ntasks; 905 int i, n, ntasks;
910 int migrate; 906 int migrate;
911 int fudge; 907 int fudge;
912 int retval;
913 struct cgroup_iter it; 908 struct cgroup_iter it;
914 909 int retval;
915 /*
916 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
917 * it's read-only
918 */
919 if (cs == &top_cpuset)
920 return -EACCES;
921
922 trialcs = *cs;
923
924 /*
925 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
926 * Since nodelist_parse() fails on an empty mask, we special case
927 * that parsing. The validate_change() call ensures that cpusets
928 * with tasks have memory.
929 */
930 buf = strstrip(buf);
931 if (!*buf) {
932 nodes_clear(trialcs.mems_allowed);
933 } else {
934 retval = nodelist_parse(buf, trialcs.mems_allowed);
935 if (retval < 0)
936 goto done;
937
938 if (!nodes_subset(trialcs.mems_allowed,
939 node_states[N_HIGH_MEMORY]))
940 return -EINVAL;
941 }
942 oldmem = cs->mems_allowed;
943 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
944 retval = 0; /* Too easy - nothing to do */
945 goto done;
946 }
947 retval = validate_change(cs, &trialcs);
948 if (retval < 0)
949 goto done;
950
951 mutex_lock(&callback_mutex);
952 cs->mems_allowed = trialcs.mems_allowed;
953 cs->mems_generation = cpuset_mems_generation++;
954 mutex_unlock(&callback_mutex);
955 910
956 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ 911 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
957 912
@@ -1018,7 +973,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
1018 973
1019 mpol_rebind_mm(mm, &cs->mems_allowed); 974 mpol_rebind_mm(mm, &cs->mems_allowed);
1020 if (migrate) 975 if (migrate)
1021 cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); 976 cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
1022 mmput(mm); 977 mmput(mm);
1023 } 978 }
1024 979
@@ -1030,6 +985,70 @@ done:
1030 return retval; 985 return retval;
1031} 986}
1032 987
988/*
989 * Handle user request to change the 'mems' memory placement
990 * of a cpuset. Needs to validate the request, update the
991 * cpusets mems_allowed and mems_generation, and for each
992 * task in the cpuset, rebind any vma mempolicies and if
993 * the cpuset is marked 'memory_migrate', migrate the tasks
994 * pages to the new memory.
995 *
996 * Call with cgroup_mutex held. May take callback_mutex during call.
997 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
998 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
999 * their mempolicies to the cpusets new mems_allowed.
1000 */
1001static int update_nodemask(struct cpuset *cs, const char *buf)
1002{
1003 struct cpuset trialcs;
1004 nodemask_t oldmem;
1005 int retval;
1006
1007 /*
1008 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
1009 * it's read-only
1010 */
1011 if (cs == &top_cpuset)
1012 return -EACCES;
1013
1014 trialcs = *cs;
1015
1016 /*
1017 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
1018 * Since nodelist_parse() fails on an empty mask, we special case
1019 * that parsing. The validate_change() call ensures that cpusets
1020 * with tasks have memory.
1021 */
1022 if (!*buf) {
1023 nodes_clear(trialcs.mems_allowed);
1024 } else {
1025 retval = nodelist_parse(buf, trialcs.mems_allowed);
1026 if (retval < 0)
1027 goto done;
1028
1029 if (!nodes_subset(trialcs.mems_allowed,
1030 node_states[N_HIGH_MEMORY]))
1031 return -EINVAL;
1032 }
1033 oldmem = cs->mems_allowed;
1034 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
1035 retval = 0; /* Too easy - nothing to do */
1036 goto done;
1037 }
1038 retval = validate_change(cs, &trialcs);
1039 if (retval < 0)
1040 goto done;
1041
1042 mutex_lock(&callback_mutex);
1043 cs->mems_allowed = trialcs.mems_allowed;
1044 cs->mems_generation = cpuset_mems_generation++;
1045 mutex_unlock(&callback_mutex);
1046
1047 retval = update_tasks_nodemask(cs, &oldmem);
1048done:
1049 return retval;
1050}
1051
1033int current_cpuset_is_being_rebound(void) 1052int current_cpuset_is_being_rebound(void)
1034{ 1053{
1035 return task_cs(current) == cpuset_being_rebound; 1054 return task_cs(current) == cpuset_being_rebound;
@@ -1042,7 +1061,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1042 1061
1043 if (val != cs->relax_domain_level) { 1062 if (val != cs->relax_domain_level) {
1044 cs->relax_domain_level = val; 1063 cs->relax_domain_level = val;
1045 rebuild_sched_domains(); 1064 if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
1065 rebuild_sched_domains();
1046 } 1066 }
1047 1067
1048 return 0; 1068 return 0;
@@ -1254,72 +1274,14 @@ typedef enum {
1254 FILE_SPREAD_SLAB, 1274 FILE_SPREAD_SLAB,
1255} cpuset_filetype_t; 1275} cpuset_filetype_t;
1256 1276
1257static ssize_t cpuset_common_file_write(struct cgroup *cont,
1258 struct cftype *cft,
1259 struct file *file,
1260 const char __user *userbuf,
1261 size_t nbytes, loff_t *unused_ppos)
1262{
1263 struct cpuset *cs = cgroup_cs(cont);
1264 cpuset_filetype_t type = cft->private;
1265 char *buffer;
1266 int retval = 0;
1267
1268 /* Crude upper limit on largest legitimate cpulist user might write. */
1269 if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
1270 return -E2BIG;
1271
1272 /* +1 for nul-terminator */
1273 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1274 if (!buffer)
1275 return -ENOMEM;
1276
1277 if (copy_from_user(buffer, userbuf, nbytes)) {
1278 retval = -EFAULT;
1279 goto out1;
1280 }
1281 buffer[nbytes] = 0; /* nul-terminate */
1282
1283 cgroup_lock();
1284
1285 if (cgroup_is_removed(cont)) {
1286 retval = -ENODEV;
1287 goto out2;
1288 }
1289
1290 switch (type) {
1291 case FILE_CPULIST:
1292 retval = update_cpumask(cs, buffer);
1293 break;
1294 case FILE_MEMLIST:
1295 retval = update_nodemask(cs, buffer);
1296 break;
1297 default:
1298 retval = -EINVAL;
1299 goto out2;
1300 }
1301
1302 if (retval == 0)
1303 retval = nbytes;
1304out2:
1305 cgroup_unlock();
1306out1:
1307 kfree(buffer);
1308 return retval;
1309}
1310
1311static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) 1277static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1312{ 1278{
1313 int retval = 0; 1279 int retval = 0;
1314 struct cpuset *cs = cgroup_cs(cgrp); 1280 struct cpuset *cs = cgroup_cs(cgrp);
1315 cpuset_filetype_t type = cft->private; 1281 cpuset_filetype_t type = cft->private;
1316 1282
1317 cgroup_lock(); 1283 if (!cgroup_lock_live_group(cgrp))
1318
1319 if (cgroup_is_removed(cgrp)) {
1320 cgroup_unlock();
1321 return -ENODEV; 1284 return -ENODEV;
1322 }
1323 1285
1324 switch (type) { 1286 switch (type) {
1325 case FILE_CPU_EXCLUSIVE: 1287 case FILE_CPU_EXCLUSIVE:
@@ -1365,12 +1327,9 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
1365 struct cpuset *cs = cgroup_cs(cgrp); 1327 struct cpuset *cs = cgroup_cs(cgrp);
1366 cpuset_filetype_t type = cft->private; 1328 cpuset_filetype_t type = cft->private;
1367 1329
1368 cgroup_lock(); 1330 if (!cgroup_lock_live_group(cgrp))
1369
1370 if (cgroup_is_removed(cgrp)) {
1371 cgroup_unlock();
1372 return -ENODEV; 1331 return -ENODEV;
1373 } 1332
1374 switch (type) { 1333 switch (type) {
1375 case FILE_SCHED_RELAX_DOMAIN_LEVEL: 1334 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1376 retval = update_relax_domain_level(cs, val); 1335 retval = update_relax_domain_level(cs, val);
@@ -1384,6 +1343,32 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
1384} 1343}
1385 1344
1386/* 1345/*
1346 * Common handling for a write to a "cpus" or "mems" file.
1347 */
1348static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1349 const char *buf)
1350{
1351 int retval = 0;
1352
1353 if (!cgroup_lock_live_group(cgrp))
1354 return -ENODEV;
1355
1356 switch (cft->private) {
1357 case FILE_CPULIST:
1358 retval = update_cpumask(cgroup_cs(cgrp), buf);
1359 break;
1360 case FILE_MEMLIST:
1361 retval = update_nodemask(cgroup_cs(cgrp), buf);
1362 break;
1363 default:
1364 retval = -EINVAL;
1365 break;
1366 }
1367 cgroup_unlock();
1368 return retval;
1369}
1370
1371/*
1387 * These ascii lists should be read in a single call, by using a user 1372 * These ascii lists should be read in a single call, by using a user
1388 * buffer large enough to hold the entire map. If read in smaller 1373 * buffer large enough to hold the entire map. If read in smaller
1389 * chunks, there is no guarantee of atomicity. Since the display format 1374 * chunks, there is no guarantee of atomicity. Since the display format
@@ -1502,14 +1487,16 @@ static struct cftype files[] = {
1502 { 1487 {
1503 .name = "cpus", 1488 .name = "cpus",
1504 .read = cpuset_common_file_read, 1489 .read = cpuset_common_file_read,
1505 .write = cpuset_common_file_write, 1490 .write_string = cpuset_write_resmask,
1491 .max_write_len = (100U + 6 * NR_CPUS),
1506 .private = FILE_CPULIST, 1492 .private = FILE_CPULIST,
1507 }, 1493 },
1508 1494
1509 { 1495 {
1510 .name = "mems", 1496 .name = "mems",
1511 .read = cpuset_common_file_read, 1497 .read = cpuset_common_file_read,
1512 .write = cpuset_common_file_write, 1498 .write_string = cpuset_write_resmask,
1499 .max_write_len = (100U + 6 * MAX_NUMNODES),
1513 .private = FILE_MEMLIST, 1500 .private = FILE_MEMLIST,
1514 }, 1501 },
1515 1502
@@ -1790,7 +1777,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
1790 scan.scan.heap = NULL; 1777 scan.scan.heap = NULL;
1791 scan.to = to->css.cgroup; 1778 scan.to = to->css.cgroup;
1792 1779
1793 if (cgroup_scan_tasks((struct cgroup_scanner *)&scan)) 1780 if (cgroup_scan_tasks(&scan.scan))
1794 printk(KERN_ERR "move_member_tasks_to_cpuset: " 1781 printk(KERN_ERR "move_member_tasks_to_cpuset: "
1795 "cgroup_scan_tasks failed\n"); 1782 "cgroup_scan_tasks failed\n");
1796} 1783}
@@ -1850,6 +1837,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
1850 struct cpuset *child; /* scans child cpusets of cp */ 1837 struct cpuset *child; /* scans child cpusets of cp */
1851 struct list_head queue; 1838 struct list_head queue;
1852 struct cgroup *cont; 1839 struct cgroup *cont;
1840 nodemask_t oldmems;
1853 1841
1854 INIT_LIST_HEAD(&queue); 1842 INIT_LIST_HEAD(&queue);
1855 1843
@@ -1869,6 +1857,8 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
1869 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 1857 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
1870 continue; 1858 continue;
1871 1859
1860 oldmems = cp->mems_allowed;
1861
1872 /* Remove offline cpus and mems from this cpuset. */ 1862 /* Remove offline cpus and mems from this cpuset. */
1873 mutex_lock(&callback_mutex); 1863 mutex_lock(&callback_mutex);
1874 cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); 1864 cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
@@ -1880,6 +1870,10 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
1880 if (cpus_empty(cp->cpus_allowed) || 1870 if (cpus_empty(cp->cpus_allowed) ||
1881 nodes_empty(cp->mems_allowed)) 1871 nodes_empty(cp->mems_allowed))
1882 remove_tasks_in_empty_cpuset(cp); 1872 remove_tasks_in_empty_cpuset(cp);
1873 else {
1874 update_tasks_cpumask(cp);
1875 update_tasks_nodemask(cp, &oldmems);
1876 }
1883 } 1877 }
1884} 1878}
1885 1879
@@ -1972,7 +1966,6 @@ void __init cpuset_init_smp(void)
1972} 1966}
1973 1967
1974/** 1968/**
1975
1976 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. 1969 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
1977 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. 1970 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
1978 * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. 1971 * @pmask: pointer to cpumask_t variable to receive cpus_allowed set.