aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-10 21:41:39 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-10 21:41:39 -0500
commitabede81c4fb2e3b85d8760f25e3da39d2c69a134 (patch)
tree26c893ec108d837eb9171d678c55a1cea7b22af4 /kernel/cpuset.c
parentc9d557c19f94df42db78d4a5de4d25feee694bad (diff)
parentc59765042f53a79a7a65585042ff463b69cb248c (diff)
Merge commit 'v2.6.29-rc1' into core/urgent
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c251
1 files changed, 151 insertions, 100 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 345ace5117de..647c77a88fcb 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -84,7 +84,7 @@ struct cpuset {
84 struct cgroup_subsys_state css; 84 struct cgroup_subsys_state css;
85 85
86 unsigned long flags; /* "unsigned long" so bitops work */ 86 unsigned long flags; /* "unsigned long" so bitops work */
87 cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ 87 cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
88 nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ 88 nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
89 89
90 struct cpuset *parent; /* my parent */ 90 struct cpuset *parent; /* my parent */
@@ -195,8 +195,6 @@ static int cpuset_mems_generation;
195 195
196static struct cpuset top_cpuset = { 196static struct cpuset top_cpuset = {
197 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), 197 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
198 .cpus_allowed = CPU_MASK_ALL,
199 .mems_allowed = NODE_MASK_ALL,
200}; 198};
201 199
202/* 200/*
@@ -278,7 +276,7 @@ static struct file_system_type cpuset_fs_type = {
278}; 276};
279 277
280/* 278/*
281 * Return in *pmask the portion of a cpusets's cpus_allowed that 279 * Return in pmask the portion of a cpusets's cpus_allowed that
282 * are online. If none are online, walk up the cpuset hierarchy 280 * are online. If none are online, walk up the cpuset hierarchy
283 * until we find one that does have some online cpus. If we get 281 * until we find one that does have some online cpus. If we get
284 * all the way to the top and still haven't found any online cpus, 282 * all the way to the top and still haven't found any online cpus,
@@ -291,15 +289,16 @@ static struct file_system_type cpuset_fs_type = {
291 * Call with callback_mutex held. 289 * Call with callback_mutex held.
292 */ 290 */
293 291
294static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) 292static void guarantee_online_cpus(const struct cpuset *cs,
293 struct cpumask *pmask)
295{ 294{
296 while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map)) 295 while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask))
297 cs = cs->parent; 296 cs = cs->parent;
298 if (cs) 297 if (cs)
299 cpus_and(*pmask, cs->cpus_allowed, cpu_online_map); 298 cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask);
300 else 299 else
301 *pmask = cpu_online_map; 300 cpumask_copy(pmask, cpu_online_mask);
302 BUG_ON(!cpus_intersects(*pmask, cpu_online_map)); 301 BUG_ON(!cpumask_intersects(pmask, cpu_online_mask));
303} 302}
304 303
305/* 304/*
@@ -375,14 +374,9 @@ void cpuset_update_task_memory_state(void)
375 struct task_struct *tsk = current; 374 struct task_struct *tsk = current;
376 struct cpuset *cs; 375 struct cpuset *cs;
377 376
378 if (task_cs(tsk) == &top_cpuset) { 377 rcu_read_lock();
379 /* Don't need rcu for top_cpuset. It's never freed. */ 378 my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
380 my_cpusets_mem_gen = top_cpuset.mems_generation; 379 rcu_read_unlock();
381 } else {
382 rcu_read_lock();
383 my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
384 rcu_read_unlock();
385 }
386 380
387 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { 381 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
388 mutex_lock(&callback_mutex); 382 mutex_lock(&callback_mutex);
@@ -414,12 +408,43 @@ void cpuset_update_task_memory_state(void)
414 408
415static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) 409static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
416{ 410{
417 return cpus_subset(p->cpus_allowed, q->cpus_allowed) && 411 return cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
418 nodes_subset(p->mems_allowed, q->mems_allowed) && 412 nodes_subset(p->mems_allowed, q->mems_allowed) &&
419 is_cpu_exclusive(p) <= is_cpu_exclusive(q) && 413 is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
420 is_mem_exclusive(p) <= is_mem_exclusive(q); 414 is_mem_exclusive(p) <= is_mem_exclusive(q);
421} 415}
422 416
417/**
418 * alloc_trial_cpuset - allocate a trial cpuset
419 * @cs: the cpuset that the trial cpuset duplicates
420 */
421static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs)
422{
423 struct cpuset *trial;
424
425 trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
426 if (!trial)
427 return NULL;
428
429 if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) {
430 kfree(trial);
431 return NULL;
432 }
433 cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
434
435 return trial;
436}
437
438/**
439 * free_trial_cpuset - free the trial cpuset
440 * @trial: the trial cpuset to be freed
441 */
442static void free_trial_cpuset(struct cpuset *trial)
443{
444 free_cpumask_var(trial->cpus_allowed);
445 kfree(trial);
446}
447
423/* 448/*
424 * validate_change() - Used to validate that any proposed cpuset change 449 * validate_change() - Used to validate that any proposed cpuset change
425 * follows the structural rules for cpusets. 450 * follows the structural rules for cpusets.
@@ -469,7 +494,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
469 c = cgroup_cs(cont); 494 c = cgroup_cs(cont);
470 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && 495 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
471 c != cur && 496 c != cur &&
472 cpus_intersects(trial->cpus_allowed, c->cpus_allowed)) 497 cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
473 return -EINVAL; 498 return -EINVAL;
474 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && 499 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
475 c != cur && 500 c != cur &&
@@ -479,7 +504,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
479 504
480 /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */ 505 /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
481 if (cgroup_task_count(cur->css.cgroup)) { 506 if (cgroup_task_count(cur->css.cgroup)) {
482 if (cpus_empty(trial->cpus_allowed) || 507 if (cpumask_empty(trial->cpus_allowed) ||
483 nodes_empty(trial->mems_allowed)) { 508 nodes_empty(trial->mems_allowed)) {
484 return -ENOSPC; 509 return -ENOSPC;
485 } 510 }
@@ -494,7 +519,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
494 */ 519 */
495static int cpusets_overlap(struct cpuset *a, struct cpuset *b) 520static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
496{ 521{
497 return cpus_intersects(a->cpus_allowed, b->cpus_allowed); 522 return cpumask_intersects(a->cpus_allowed, b->cpus_allowed);
498} 523}
499 524
500static void 525static void
@@ -519,7 +544,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
519 cp = list_first_entry(&q, struct cpuset, stack_list); 544 cp = list_first_entry(&q, struct cpuset, stack_list);
520 list_del(q.next); 545 list_del(q.next);
521 546
522 if (cpus_empty(cp->cpus_allowed)) 547 if (cpumask_empty(cp->cpus_allowed))
523 continue; 548 continue;
524 549
525 if (is_sched_load_balance(cp)) 550 if (is_sched_load_balance(cp))
@@ -586,7 +611,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
586 * element of the partition (one sched domain) to be passed to 611 * element of the partition (one sched domain) to be passed to
587 * partition_sched_domains(). 612 * partition_sched_domains().
588 */ 613 */
589static int generate_sched_domains(cpumask_t **domains, 614/* FIXME: see the FIXME in partition_sched_domains() */
615static int generate_sched_domains(struct cpumask **domains,
590 struct sched_domain_attr **attributes) 616 struct sched_domain_attr **attributes)
591{ 617{
592 LIST_HEAD(q); /* queue of cpusets to be scanned */ 618 LIST_HEAD(q); /* queue of cpusets to be scanned */
@@ -594,10 +620,10 @@ static int generate_sched_domains(cpumask_t **domains,
594 struct cpuset **csa; /* array of all cpuset ptrs */ 620 struct cpuset **csa; /* array of all cpuset ptrs */
595 int csn; /* how many cpuset ptrs in csa so far */ 621 int csn; /* how many cpuset ptrs in csa so far */
596 int i, j, k; /* indices for partition finding loops */ 622 int i, j, k; /* indices for partition finding loops */
597 cpumask_t *doms; /* resulting partition; i.e. sched domains */ 623 struct cpumask *doms; /* resulting partition; i.e. sched domains */
598 struct sched_domain_attr *dattr; /* attributes for custom domains */ 624 struct sched_domain_attr *dattr; /* attributes for custom domains */
599 int ndoms = 0; /* number of sched domains in result */ 625 int ndoms = 0; /* number of sched domains in result */
600 int nslot; /* next empty doms[] cpumask_t slot */ 626 int nslot; /* next empty doms[] struct cpumask slot */
601 627
602 doms = NULL; 628 doms = NULL;
603 dattr = NULL; 629 dattr = NULL;
@@ -605,7 +631,7 @@ static int generate_sched_domains(cpumask_t **domains,
605 631
606 /* Special case for the 99% of systems with one, full, sched domain */ 632 /* Special case for the 99% of systems with one, full, sched domain */
607 if (is_sched_load_balance(&top_cpuset)) { 633 if (is_sched_load_balance(&top_cpuset)) {
608 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 634 doms = kmalloc(cpumask_size(), GFP_KERNEL);
609 if (!doms) 635 if (!doms)
610 goto done; 636 goto done;
611 637
@@ -614,7 +640,7 @@ static int generate_sched_domains(cpumask_t **domains,
614 *dattr = SD_ATTR_INIT; 640 *dattr = SD_ATTR_INIT;
615 update_domain_attr_tree(dattr, &top_cpuset); 641 update_domain_attr_tree(dattr, &top_cpuset);
616 } 642 }
617 *doms = top_cpuset.cpus_allowed; 643 cpumask_copy(doms, top_cpuset.cpus_allowed);
618 644
619 ndoms = 1; 645 ndoms = 1;
620 goto done; 646 goto done;
@@ -633,7 +659,7 @@ static int generate_sched_domains(cpumask_t **domains,
633 cp = list_first_entry(&q, struct cpuset, stack_list); 659 cp = list_first_entry(&q, struct cpuset, stack_list);
634 list_del(q.next); 660 list_del(q.next);
635 661
636 if (cpus_empty(cp->cpus_allowed)) 662 if (cpumask_empty(cp->cpus_allowed))
637 continue; 663 continue;
638 664
639 /* 665 /*
@@ -684,7 +710,7 @@ restart:
684 * Now we know how many domains to create. 710 * Now we know how many domains to create.
685 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. 711 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
686 */ 712 */
687 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); 713 doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
688 if (!doms) 714 if (!doms)
689 goto done; 715 goto done;
690 716
@@ -696,7 +722,7 @@ restart:
696 722
697 for (nslot = 0, i = 0; i < csn; i++) { 723 for (nslot = 0, i = 0; i < csn; i++) {
698 struct cpuset *a = csa[i]; 724 struct cpuset *a = csa[i];
699 cpumask_t *dp; 725 struct cpumask *dp;
700 int apn = a->pn; 726 int apn = a->pn;
701 727
702 if (apn < 0) { 728 if (apn < 0) {
@@ -719,14 +745,14 @@ restart:
719 continue; 745 continue;
720 } 746 }
721 747
722 cpus_clear(*dp); 748 cpumask_clear(dp);
723 if (dattr) 749 if (dattr)
724 *(dattr + nslot) = SD_ATTR_INIT; 750 *(dattr + nslot) = SD_ATTR_INIT;
725 for (j = i; j < csn; j++) { 751 for (j = i; j < csn; j++) {
726 struct cpuset *b = csa[j]; 752 struct cpuset *b = csa[j];
727 753
728 if (apn == b->pn) { 754 if (apn == b->pn) {
729 cpus_or(*dp, *dp, b->cpus_allowed); 755 cpumask_or(dp, dp, b->cpus_allowed);
730 if (dattr) 756 if (dattr)
731 update_domain_attr_tree(dattr + nslot, b); 757 update_domain_attr_tree(dattr + nslot, b);
732 758
@@ -766,7 +792,7 @@ done:
766static void do_rebuild_sched_domains(struct work_struct *unused) 792static void do_rebuild_sched_domains(struct work_struct *unused)
767{ 793{
768 struct sched_domain_attr *attr; 794 struct sched_domain_attr *attr;
769 cpumask_t *doms; 795 struct cpumask *doms;
770 int ndoms; 796 int ndoms;
771 797
772 get_online_cpus(); 798 get_online_cpus();
@@ -835,7 +861,7 @@ void rebuild_sched_domains(void)
835static int cpuset_test_cpumask(struct task_struct *tsk, 861static int cpuset_test_cpumask(struct task_struct *tsk,
836 struct cgroup_scanner *scan) 862 struct cgroup_scanner *scan)
837{ 863{
838 return !cpus_equal(tsk->cpus_allowed, 864 return !cpumask_equal(&tsk->cpus_allowed,
839 (cgroup_cs(scan->cg))->cpus_allowed); 865 (cgroup_cs(scan->cg))->cpus_allowed);
840} 866}
841 867
@@ -853,7 +879,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
853static void cpuset_change_cpumask(struct task_struct *tsk, 879static void cpuset_change_cpumask(struct task_struct *tsk,
854 struct cgroup_scanner *scan) 880 struct cgroup_scanner *scan)
855{ 881{
856 set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); 882 set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
857} 883}
858 884
859/** 885/**
@@ -885,10 +911,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
885 * @cs: the cpuset to consider 911 * @cs: the cpuset to consider
886 * @buf: buffer of cpu numbers written to this cpuset 912 * @buf: buffer of cpu numbers written to this cpuset
887 */ 913 */
888static int update_cpumask(struct cpuset *cs, const char *buf) 914static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
915 const char *buf)
889{ 916{
890 struct ptr_heap heap; 917 struct ptr_heap heap;
891 struct cpuset trialcs;
892 int retval; 918 int retval;
893 int is_load_balanced; 919 int is_load_balanced;
894 920
@@ -896,8 +922,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
896 if (cs == &top_cpuset) 922 if (cs == &top_cpuset)
897 return -EACCES; 923 return -EACCES;
898 924
899 trialcs = *cs;
900
901 /* 925 /*
902 * An empty cpus_allowed is ok only if the cpuset has no tasks. 926 * An empty cpus_allowed is ok only if the cpuset has no tasks.
903 * Since cpulist_parse() fails on an empty mask, we special case 927 * Since cpulist_parse() fails on an empty mask, we special case
@@ -905,31 +929,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
905 * with tasks have cpus. 929 * with tasks have cpus.
906 */ 930 */
907 if (!*buf) { 931 if (!*buf) {
908 cpus_clear(trialcs.cpus_allowed); 932 cpumask_clear(trialcs->cpus_allowed);
909 } else { 933 } else {
910 retval = cpulist_parse(buf, &trialcs.cpus_allowed); 934 retval = cpulist_parse(buf, trialcs->cpus_allowed);
911 if (retval < 0) 935 if (retval < 0)
912 return retval; 936 return retval;
913 937
914 if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map)) 938 if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
915 return -EINVAL; 939 return -EINVAL;
916 } 940 }
917 retval = validate_change(cs, &trialcs); 941 retval = validate_change(cs, trialcs);
918 if (retval < 0) 942 if (retval < 0)
919 return retval; 943 return retval;
920 944
921 /* Nothing to do if the cpus didn't change */ 945 /* Nothing to do if the cpus didn't change */
922 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) 946 if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
923 return 0; 947 return 0;
924 948
925 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); 949 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
926 if (retval) 950 if (retval)
927 return retval; 951 return retval;
928 952
929 is_load_balanced = is_sched_load_balance(&trialcs); 953 is_load_balanced = is_sched_load_balance(trialcs);
930 954
931 mutex_lock(&callback_mutex); 955 mutex_lock(&callback_mutex);
932 cs->cpus_allowed = trialcs.cpus_allowed; 956 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
933 mutex_unlock(&callback_mutex); 957 mutex_unlock(&callback_mutex);
934 958
935 /* 959 /*
@@ -1017,7 +1041,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
1017 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ 1041 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
1018 1042
1019 fudge = 10; /* spare mmarray[] slots */ 1043 fudge = 10; /* spare mmarray[] slots */
1020 fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ 1044 fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
1021 retval = -ENOMEM; 1045 retval = -ENOMEM;
1022 1046
1023 /* 1047 /*
@@ -1104,9 +1128,9 @@ done:
1104 * lock each such tasks mm->mmap_sem, scan its vma's and rebind 1128 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
1105 * their mempolicies to the cpusets new mems_allowed. 1129 * their mempolicies to the cpusets new mems_allowed.
1106 */ 1130 */
1107static int update_nodemask(struct cpuset *cs, const char *buf) 1131static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1132 const char *buf)
1108{ 1133{
1109 struct cpuset trialcs;
1110 nodemask_t oldmem; 1134 nodemask_t oldmem;
1111 int retval; 1135 int retval;
1112 1136
@@ -1117,8 +1141,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
1117 if (cs == &top_cpuset) 1141 if (cs == &top_cpuset)
1118 return -EACCES; 1142 return -EACCES;
1119 1143
1120 trialcs = *cs;
1121
1122 /* 1144 /*
1123 * An empty mems_allowed is ok iff there are no tasks in the cpuset. 1145 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
1124 * Since nodelist_parse() fails on an empty mask, we special case 1146 * Since nodelist_parse() fails on an empty mask, we special case
@@ -1126,27 +1148,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
1126 * with tasks have memory. 1148 * with tasks have memory.
1127 */ 1149 */
1128 if (!*buf) { 1150 if (!*buf) {
1129 nodes_clear(trialcs.mems_allowed); 1151 nodes_clear(trialcs->mems_allowed);
1130 } else { 1152 } else {
1131 retval = nodelist_parse(buf, trialcs.mems_allowed); 1153 retval = nodelist_parse(buf, trialcs->mems_allowed);
1132 if (retval < 0) 1154 if (retval < 0)
1133 goto done; 1155 goto done;
1134 1156
1135 if (!nodes_subset(trialcs.mems_allowed, 1157 if (!nodes_subset(trialcs->mems_allowed,
1136 node_states[N_HIGH_MEMORY])) 1158 node_states[N_HIGH_MEMORY]))
1137 return -EINVAL; 1159 return -EINVAL;
1138 } 1160 }
1139 oldmem = cs->mems_allowed; 1161 oldmem = cs->mems_allowed;
1140 if (nodes_equal(oldmem, trialcs.mems_allowed)) { 1162 if (nodes_equal(oldmem, trialcs->mems_allowed)) {
1141 retval = 0; /* Too easy - nothing to do */ 1163 retval = 0; /* Too easy - nothing to do */
1142 goto done; 1164 goto done;
1143 } 1165 }
1144 retval = validate_change(cs, &trialcs); 1166 retval = validate_change(cs, trialcs);
1145 if (retval < 0) 1167 if (retval < 0)
1146 goto done; 1168 goto done;
1147 1169
1148 mutex_lock(&callback_mutex); 1170 mutex_lock(&callback_mutex);
1149 cs->mems_allowed = trialcs.mems_allowed; 1171 cs->mems_allowed = trialcs->mems_allowed;
1150 cs->mems_generation = cpuset_mems_generation++; 1172 cs->mems_generation = cpuset_mems_generation++;
1151 mutex_unlock(&callback_mutex); 1173 mutex_unlock(&callback_mutex);
1152 1174
@@ -1167,7 +1189,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1167 1189
1168 if (val != cs->relax_domain_level) { 1190 if (val != cs->relax_domain_level) {
1169 cs->relax_domain_level = val; 1191 cs->relax_domain_level = val;
1170 if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) 1192 if (!cpumask_empty(cs->cpus_allowed) &&
1193 is_sched_load_balance(cs))
1171 async_rebuild_sched_domains(); 1194 async_rebuild_sched_domains();
1172 } 1195 }
1173 1196
@@ -1186,31 +1209,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1186static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, 1209static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1187 int turning_on) 1210 int turning_on)
1188{ 1211{
1189 struct cpuset trialcs; 1212 struct cpuset *trialcs;
1190 int err; 1213 int err;
1191 int balance_flag_changed; 1214 int balance_flag_changed;
1192 1215
1193 trialcs = *cs; 1216 trialcs = alloc_trial_cpuset(cs);
1217 if (!trialcs)
1218 return -ENOMEM;
1219
1194 if (turning_on) 1220 if (turning_on)
1195 set_bit(bit, &trialcs.flags); 1221 set_bit(bit, &trialcs->flags);
1196 else 1222 else
1197 clear_bit(bit, &trialcs.flags); 1223 clear_bit(bit, &trialcs->flags);
1198 1224
1199 err = validate_change(cs, &trialcs); 1225 err = validate_change(cs, trialcs);
1200 if (err < 0) 1226 if (err < 0)
1201 return err; 1227 goto out;
1202 1228
1203 balance_flag_changed = (is_sched_load_balance(cs) != 1229 balance_flag_changed = (is_sched_load_balance(cs) !=
1204 is_sched_load_balance(&trialcs)); 1230 is_sched_load_balance(trialcs));
1205 1231
1206 mutex_lock(&callback_mutex); 1232 mutex_lock(&callback_mutex);
1207 cs->flags = trialcs.flags; 1233 cs->flags = trialcs->flags;
1208 mutex_unlock(&callback_mutex); 1234 mutex_unlock(&callback_mutex);
1209 1235
1210 if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed) 1236 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
1211 async_rebuild_sched_domains(); 1237 async_rebuild_sched_domains();
1212 1238
1213 return 0; 1239out:
1240 free_trial_cpuset(trialcs);
1241 return err;
1214} 1242}
1215 1243
1216/* 1244/*
@@ -1311,42 +1339,47 @@ static int fmeter_getrate(struct fmeter *fmp)
1311 return val; 1339 return val;
1312} 1340}
1313 1341
1342/* Protected by cgroup_lock */
1343static cpumask_var_t cpus_attach;
1344
1314/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ 1345/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1315static int cpuset_can_attach(struct cgroup_subsys *ss, 1346static int cpuset_can_attach(struct cgroup_subsys *ss,
1316 struct cgroup *cont, struct task_struct *tsk) 1347 struct cgroup *cont, struct task_struct *tsk)
1317{ 1348{
1318 struct cpuset *cs = cgroup_cs(cont); 1349 struct cpuset *cs = cgroup_cs(cont);
1350 int ret = 0;
1319 1351
1320 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 1352 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1321 return -ENOSPC; 1353 return -ENOSPC;
1322 if (tsk->flags & PF_THREAD_BOUND) {
1323 cpumask_t mask;
1324 1354
1355 if (tsk->flags & PF_THREAD_BOUND) {
1325 mutex_lock(&callback_mutex); 1356 mutex_lock(&callback_mutex);
1326 mask = cs->cpus_allowed; 1357 if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed))
1358 ret = -EINVAL;
1327 mutex_unlock(&callback_mutex); 1359 mutex_unlock(&callback_mutex);
1328 if (!cpus_equal(tsk->cpus_allowed, mask))
1329 return -EINVAL;
1330 } 1360 }
1331 1361
1332 return security_task_setscheduler(tsk, 0, NULL); 1362 return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL);
1333} 1363}
1334 1364
1335static void cpuset_attach(struct cgroup_subsys *ss, 1365static void cpuset_attach(struct cgroup_subsys *ss,
1336 struct cgroup *cont, struct cgroup *oldcont, 1366 struct cgroup *cont, struct cgroup *oldcont,
1337 struct task_struct *tsk) 1367 struct task_struct *tsk)
1338{ 1368{
1339 cpumask_t cpus;
1340 nodemask_t from, to; 1369 nodemask_t from, to;
1341 struct mm_struct *mm; 1370 struct mm_struct *mm;
1342 struct cpuset *cs = cgroup_cs(cont); 1371 struct cpuset *cs = cgroup_cs(cont);
1343 struct cpuset *oldcs = cgroup_cs(oldcont); 1372 struct cpuset *oldcs = cgroup_cs(oldcont);
1344 int err; 1373 int err;
1345 1374
1346 mutex_lock(&callback_mutex); 1375 if (cs == &top_cpuset) {
1347 guarantee_online_cpus(cs, &cpus); 1376 cpumask_copy(cpus_attach, cpu_possible_mask);
1348 err = set_cpus_allowed_ptr(tsk, &cpus); 1377 } else {
1349 mutex_unlock(&callback_mutex); 1378 mutex_lock(&callback_mutex);
1379 guarantee_online_cpus(cs, cpus_attach);
1380 mutex_unlock(&callback_mutex);
1381 }
1382 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1350 if (err) 1383 if (err)
1351 return; 1384 return;
1352 1385
@@ -1359,7 +1392,6 @@ static void cpuset_attach(struct cgroup_subsys *ss,
1359 cpuset_migrate_mm(mm, &from, &to); 1392 cpuset_migrate_mm(mm, &from, &to);
1360 mmput(mm); 1393 mmput(mm);
1361 } 1394 }
1362
1363} 1395}
1364 1396
1365/* The various types of files and directories in a cpuset file system */ 1397/* The various types of files and directories in a cpuset file system */
@@ -1454,21 +1486,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1454 const char *buf) 1486 const char *buf)
1455{ 1487{
1456 int retval = 0; 1488 int retval = 0;
1489 struct cpuset *cs = cgroup_cs(cgrp);
1490 struct cpuset *trialcs;
1457 1491
1458 if (!cgroup_lock_live_group(cgrp)) 1492 if (!cgroup_lock_live_group(cgrp))
1459 return -ENODEV; 1493 return -ENODEV;
1460 1494
1495 trialcs = alloc_trial_cpuset(cs);
1496 if (!trialcs)
1497 return -ENOMEM;
1498
1461 switch (cft->private) { 1499 switch (cft->private) {
1462 case FILE_CPULIST: 1500 case FILE_CPULIST:
1463 retval = update_cpumask(cgroup_cs(cgrp), buf); 1501 retval = update_cpumask(cs, trialcs, buf);
1464 break; 1502 break;
1465 case FILE_MEMLIST: 1503 case FILE_MEMLIST:
1466 retval = update_nodemask(cgroup_cs(cgrp), buf); 1504 retval = update_nodemask(cs, trialcs, buf);
1467 break; 1505 break;
1468 default: 1506 default:
1469 retval = -EINVAL; 1507 retval = -EINVAL;
1470 break; 1508 break;
1471 } 1509 }
1510
1511 free_trial_cpuset(trialcs);
1472 cgroup_unlock(); 1512 cgroup_unlock();
1473 return retval; 1513 return retval;
1474} 1514}
@@ -1487,13 +1527,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1487 1527
1488static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) 1528static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1489{ 1529{
1490 cpumask_t mask; 1530 int ret;
1491 1531
1492 mutex_lock(&callback_mutex); 1532 mutex_lock(&callback_mutex);
1493 mask = cs->cpus_allowed; 1533 ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
1494 mutex_unlock(&callback_mutex); 1534 mutex_unlock(&callback_mutex);
1495 1535
1496 return cpulist_scnprintf(page, PAGE_SIZE, &mask); 1536 return ret;
1497} 1537}
1498 1538
1499static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) 1539static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
@@ -1729,7 +1769,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
1729 parent_cs = cgroup_cs(parent); 1769 parent_cs = cgroup_cs(parent);
1730 1770
1731 cs->mems_allowed = parent_cs->mems_allowed; 1771 cs->mems_allowed = parent_cs->mems_allowed;
1732 cs->cpus_allowed = parent_cs->cpus_allowed; 1772 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
1733 return; 1773 return;
1734} 1774}
1735 1775
@@ -1755,6 +1795,10 @@ static struct cgroup_subsys_state *cpuset_create(
1755 cs = kmalloc(sizeof(*cs), GFP_KERNEL); 1795 cs = kmalloc(sizeof(*cs), GFP_KERNEL);
1756 if (!cs) 1796 if (!cs)
1757 return ERR_PTR(-ENOMEM); 1797 return ERR_PTR(-ENOMEM);
1798 if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) {
1799 kfree(cs);
1800 return ERR_PTR(-ENOMEM);
1801 }
1758 1802
1759 cpuset_update_task_memory_state(); 1803 cpuset_update_task_memory_state();
1760 cs->flags = 0; 1804 cs->flags = 0;
@@ -1763,7 +1807,7 @@ static struct cgroup_subsys_state *cpuset_create(
1763 if (is_spread_slab(parent)) 1807 if (is_spread_slab(parent))
1764 set_bit(CS_SPREAD_SLAB, &cs->flags); 1808 set_bit(CS_SPREAD_SLAB, &cs->flags);
1765 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); 1809 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
1766 cpus_clear(cs->cpus_allowed); 1810 cpumask_clear(cs->cpus_allowed);
1767 nodes_clear(cs->mems_allowed); 1811 nodes_clear(cs->mems_allowed);
1768 cs->mems_generation = cpuset_mems_generation++; 1812 cs->mems_generation = cpuset_mems_generation++;
1769 fmeter_init(&cs->fmeter); 1813 fmeter_init(&cs->fmeter);
@@ -1790,6 +1834,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
1790 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); 1834 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
1791 1835
1792 number_of_cpusets--; 1836 number_of_cpusets--;
1837 free_cpumask_var(cs->cpus_allowed);
1793 kfree(cs); 1838 kfree(cs);
1794} 1839}
1795 1840
@@ -1813,6 +1858,8 @@ struct cgroup_subsys cpuset_subsys = {
1813 1858
1814int __init cpuset_init_early(void) 1859int __init cpuset_init_early(void)
1815{ 1860{
1861 alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
1862
1816 top_cpuset.mems_generation = cpuset_mems_generation++; 1863 top_cpuset.mems_generation = cpuset_mems_generation++;
1817 return 0; 1864 return 0;
1818} 1865}
@@ -1828,7 +1875,7 @@ int __init cpuset_init(void)
1828{ 1875{
1829 int err = 0; 1876 int err = 0;
1830 1877
1831 cpus_setall(top_cpuset.cpus_allowed); 1878 cpumask_setall(top_cpuset.cpus_allowed);
1832 nodes_setall(top_cpuset.mems_allowed); 1879 nodes_setall(top_cpuset.mems_allowed);
1833 1880
1834 fmeter_init(&top_cpuset.fmeter); 1881 fmeter_init(&top_cpuset.fmeter);
@@ -1840,6 +1887,9 @@ int __init cpuset_init(void)
1840 if (err < 0) 1887 if (err < 0)
1841 return err; 1888 return err;
1842 1889
1890 if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
1891 BUG();
1892
1843 number_of_cpusets = 1; 1893 number_of_cpusets = 1;
1844 return 0; 1894 return 0;
1845} 1895}
@@ -1914,7 +1964,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
1914 * has online cpus, so can't be empty). 1964 * has online cpus, so can't be empty).
1915 */ 1965 */
1916 parent = cs->parent; 1966 parent = cs->parent;
1917 while (cpus_empty(parent->cpus_allowed) || 1967 while (cpumask_empty(parent->cpus_allowed) ||
1918 nodes_empty(parent->mems_allowed)) 1968 nodes_empty(parent->mems_allowed))
1919 parent = parent->parent; 1969 parent = parent->parent;
1920 1970
@@ -1955,7 +2005,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
1955 } 2005 }
1956 2006
1957 /* Continue past cpusets with all cpus, mems online */ 2007 /* Continue past cpusets with all cpus, mems online */
1958 if (cpus_subset(cp->cpus_allowed, cpu_online_map) && 2008 if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
1959 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2009 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
1960 continue; 2010 continue;
1961 2011
@@ -1963,13 +2013,14 @@ static void scan_for_empty_cpusets(struct cpuset *root)
1963 2013
1964 /* Remove offline cpus and mems from this cpuset. */ 2014 /* Remove offline cpus and mems from this cpuset. */
1965 mutex_lock(&callback_mutex); 2015 mutex_lock(&callback_mutex);
1966 cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); 2016 cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
2017 cpu_online_mask);
1967 nodes_and(cp->mems_allowed, cp->mems_allowed, 2018 nodes_and(cp->mems_allowed, cp->mems_allowed,
1968 node_states[N_HIGH_MEMORY]); 2019 node_states[N_HIGH_MEMORY]);
1969 mutex_unlock(&callback_mutex); 2020 mutex_unlock(&callback_mutex);
1970 2021
1971 /* Move tasks from the empty cpuset to a parent */ 2022 /* Move tasks from the empty cpuset to a parent */
1972 if (cpus_empty(cp->cpus_allowed) || 2023 if (cpumask_empty(cp->cpus_allowed) ||
1973 nodes_empty(cp->mems_allowed)) 2024 nodes_empty(cp->mems_allowed))
1974 remove_tasks_in_empty_cpuset(cp); 2025 remove_tasks_in_empty_cpuset(cp);
1975 else { 2026 else {
@@ -1995,7 +2046,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
1995 unsigned long phase, void *unused_cpu) 2046 unsigned long phase, void *unused_cpu)
1996{ 2047{
1997 struct sched_domain_attr *attr; 2048 struct sched_domain_attr *attr;
1998 cpumask_t *doms; 2049 struct cpumask *doms;
1999 int ndoms; 2050 int ndoms;
2000 2051
2001 switch (phase) { 2052 switch (phase) {
@@ -2010,7 +2061,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2010 } 2061 }
2011 2062
2012 cgroup_lock(); 2063 cgroup_lock();
2013 top_cpuset.cpus_allowed = cpu_online_map; 2064 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
2014 scan_for_empty_cpusets(&top_cpuset); 2065 scan_for_empty_cpusets(&top_cpuset);
2015 ndoms = generate_sched_domains(&doms, &attr); 2066 ndoms = generate_sched_domains(&doms, &attr);
2016 cgroup_unlock(); 2067 cgroup_unlock();
@@ -2055,7 +2106,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2055 2106
2056void __init cpuset_init_smp(void) 2107void __init cpuset_init_smp(void)
2057{ 2108{
2058 top_cpuset.cpus_allowed = cpu_online_map; 2109 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
2059 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2110 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2060 2111
2061 hotcpu_notifier(cpuset_track_online_cpus, 0); 2112 hotcpu_notifier(cpuset_track_online_cpus, 0);
@@ -2065,15 +2116,15 @@ void __init cpuset_init_smp(void)
2065/** 2116/**
2066 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. 2117 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
2067 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. 2118 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
2068 * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. 2119 * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
2069 * 2120 *
2070 * Description: Returns the cpumask_t cpus_allowed of the cpuset 2121 * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
2071 * attached to the specified @tsk. Guaranteed to return some non-empty 2122 * attached to the specified @tsk. Guaranteed to return some non-empty
2072 * subset of cpu_online_map, even if this means going outside the 2123 * subset of cpu_online_map, even if this means going outside the
2073 * tasks cpuset. 2124 * tasks cpuset.
2074 **/ 2125 **/
2075 2126
2076void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask) 2127void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2077{ 2128{
2078 mutex_lock(&callback_mutex); 2129 mutex_lock(&callback_mutex);
2079 cpuset_cpus_allowed_locked(tsk, pmask); 2130 cpuset_cpus_allowed_locked(tsk, pmask);
@@ -2084,7 +2135,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
2084 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. 2135 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
2085 * Must be called with callback_mutex held. 2136 * Must be called with callback_mutex held.
2086 **/ 2137 **/
2087void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask) 2138void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
2088{ 2139{
2089 task_lock(tsk); 2140 task_lock(tsk);
2090 guarantee_online_cpus(task_cs(tsk), pmask); 2141 guarantee_online_cpus(task_cs(tsk), pmask);