aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c285
1 files changed, 185 insertions, 100 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 96c0ba13b8cd..647c77a88fcb 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -84,7 +84,7 @@ struct cpuset {
84 struct cgroup_subsys_state css; 84 struct cgroup_subsys_state css;
85 85
86 unsigned long flags; /* "unsigned long" so bitops work */ 86 unsigned long flags; /* "unsigned long" so bitops work */
87 cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ 87 cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
88 nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ 88 nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
89 89
90 struct cpuset *parent; /* my parent */ 90 struct cpuset *parent; /* my parent */
@@ -195,8 +195,6 @@ static int cpuset_mems_generation;
195 195
196static struct cpuset top_cpuset = { 196static struct cpuset top_cpuset = {
197 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), 197 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
198 .cpus_allowed = CPU_MASK_ALL,
199 .mems_allowed = NODE_MASK_ALL,
200}; 198};
201 199
202/* 200/*
@@ -240,6 +238,17 @@ static struct cpuset top_cpuset = {
240static DEFINE_MUTEX(callback_mutex); 238static DEFINE_MUTEX(callback_mutex);
241 239
242/* 240/*
241 * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
242 * buffers. They are statically allocated to prevent using excess stack
243 * when calling cpuset_print_task_mems_allowed().
244 */
245#define CPUSET_NAME_LEN (128)
246#define CPUSET_NODELIST_LEN (256)
247static char cpuset_name[CPUSET_NAME_LEN];
248static char cpuset_nodelist[CPUSET_NODELIST_LEN];
249static DEFINE_SPINLOCK(cpuset_buffer_lock);
250
251/*
243 * This is ugly, but preserves the userspace API for existing cpuset 252 * This is ugly, but preserves the userspace API for existing cpuset
244 * users. If someone tries to mount the "cpuset" filesystem, we 253 * users. If someone tries to mount the "cpuset" filesystem, we
245 * silently switch it to mount "cgroup" instead 254 * silently switch it to mount "cgroup" instead
@@ -267,7 +276,7 @@ static struct file_system_type cpuset_fs_type = {
267}; 276};
268 277
269/* 278/*
270 * Return in *pmask the portion of a cpusets's cpus_allowed that 279 * Return in pmask the portion of a cpusets's cpus_allowed that
271 * are online. If none are online, walk up the cpuset hierarchy 280 * are online. If none are online, walk up the cpuset hierarchy
272 * until we find one that does have some online cpus. If we get 281 * until we find one that does have some online cpus. If we get
273 * all the way to the top and still haven't found any online cpus, 282 * all the way to the top and still haven't found any online cpus,
@@ -280,15 +289,16 @@ static struct file_system_type cpuset_fs_type = {
280 * Call with callback_mutex held. 289 * Call with callback_mutex held.
281 */ 290 */
282 291
283static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) 292static void guarantee_online_cpus(const struct cpuset *cs,
293 struct cpumask *pmask)
284{ 294{
285 while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map)) 295 while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask))
286 cs = cs->parent; 296 cs = cs->parent;
287 if (cs) 297 if (cs)
288 cpus_and(*pmask, cs->cpus_allowed, cpu_online_map); 298 cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask);
289 else 299 else
290 *pmask = cpu_online_map; 300 cpumask_copy(pmask, cpu_online_mask);
291 BUG_ON(!cpus_intersects(*pmask, cpu_online_map)); 301 BUG_ON(!cpumask_intersects(pmask, cpu_online_mask));
292} 302}
293 303
294/* 304/*
@@ -364,14 +374,9 @@ void cpuset_update_task_memory_state(void)
364 struct task_struct *tsk = current; 374 struct task_struct *tsk = current;
365 struct cpuset *cs; 375 struct cpuset *cs;
366 376
367 if (task_cs(tsk) == &top_cpuset) { 377 rcu_read_lock();
368 /* Don't need rcu for top_cpuset. It's never freed. */ 378 my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
369 my_cpusets_mem_gen = top_cpuset.mems_generation; 379 rcu_read_unlock();
370 } else {
371 rcu_read_lock();
372 my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
373 rcu_read_unlock();
374 }
375 380
376 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { 381 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
377 mutex_lock(&callback_mutex); 382 mutex_lock(&callback_mutex);
@@ -403,12 +408,43 @@ void cpuset_update_task_memory_state(void)
403 408
404static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) 409static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
405{ 410{
406 return cpus_subset(p->cpus_allowed, q->cpus_allowed) && 411 return cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
407 nodes_subset(p->mems_allowed, q->mems_allowed) && 412 nodes_subset(p->mems_allowed, q->mems_allowed) &&
408 is_cpu_exclusive(p) <= is_cpu_exclusive(q) && 413 is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
409 is_mem_exclusive(p) <= is_mem_exclusive(q); 414 is_mem_exclusive(p) <= is_mem_exclusive(q);
410} 415}
411 416
417/**
418 * alloc_trial_cpuset - allocate a trial cpuset
419 * @cs: the cpuset that the trial cpuset duplicates
420 */
421static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs)
422{
423 struct cpuset *trial;
424
425 trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
426 if (!trial)
427 return NULL;
428
429 if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) {
430 kfree(trial);
431 return NULL;
432 }
433 cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
434
435 return trial;
436}
437
438/**
439 * free_trial_cpuset - free the trial cpuset
440 * @trial: the trial cpuset to be freed
441 */
442static void free_trial_cpuset(struct cpuset *trial)
443{
444 free_cpumask_var(trial->cpus_allowed);
445 kfree(trial);
446}
447
412/* 448/*
413 * validate_change() - Used to validate that any proposed cpuset change 449 * validate_change() - Used to validate that any proposed cpuset change
414 * follows the structural rules for cpusets. 450 * follows the structural rules for cpusets.
@@ -458,7 +494,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
458 c = cgroup_cs(cont); 494 c = cgroup_cs(cont);
459 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && 495 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
460 c != cur && 496 c != cur &&
461 cpus_intersects(trial->cpus_allowed, c->cpus_allowed)) 497 cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
462 return -EINVAL; 498 return -EINVAL;
463 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && 499 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
464 c != cur && 500 c != cur &&
@@ -468,7 +504,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
468 504
469 /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */ 505 /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
470 if (cgroup_task_count(cur->css.cgroup)) { 506 if (cgroup_task_count(cur->css.cgroup)) {
471 if (cpus_empty(trial->cpus_allowed) || 507 if (cpumask_empty(trial->cpus_allowed) ||
472 nodes_empty(trial->mems_allowed)) { 508 nodes_empty(trial->mems_allowed)) {
473 return -ENOSPC; 509 return -ENOSPC;
474 } 510 }
@@ -483,7 +519,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
483 */ 519 */
484static int cpusets_overlap(struct cpuset *a, struct cpuset *b) 520static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
485{ 521{
486 return cpus_intersects(a->cpus_allowed, b->cpus_allowed); 522 return cpumask_intersects(a->cpus_allowed, b->cpus_allowed);
487} 523}
488 524
489static void 525static void
@@ -508,7 +544,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
508 cp = list_first_entry(&q, struct cpuset, stack_list); 544 cp = list_first_entry(&q, struct cpuset, stack_list);
509 list_del(q.next); 545 list_del(q.next);
510 546
511 if (cpus_empty(cp->cpus_allowed)) 547 if (cpumask_empty(cp->cpus_allowed))
512 continue; 548 continue;
513 549
514 if (is_sched_load_balance(cp)) 550 if (is_sched_load_balance(cp))
@@ -575,7 +611,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
575 * element of the partition (one sched domain) to be passed to 611 * element of the partition (one sched domain) to be passed to
576 * partition_sched_domains(). 612 * partition_sched_domains().
577 */ 613 */
578static int generate_sched_domains(cpumask_t **domains, 614/* FIXME: see the FIXME in partition_sched_domains() */
615static int generate_sched_domains(struct cpumask **domains,
579 struct sched_domain_attr **attributes) 616 struct sched_domain_attr **attributes)
580{ 617{
581 LIST_HEAD(q); /* queue of cpusets to be scanned */ 618 LIST_HEAD(q); /* queue of cpusets to be scanned */
@@ -583,10 +620,10 @@ static int generate_sched_domains(cpumask_t **domains,
583 struct cpuset **csa; /* array of all cpuset ptrs */ 620 struct cpuset **csa; /* array of all cpuset ptrs */
584 int csn; /* how many cpuset ptrs in csa so far */ 621 int csn; /* how many cpuset ptrs in csa so far */
585 int i, j, k; /* indices for partition finding loops */ 622 int i, j, k; /* indices for partition finding loops */
586 cpumask_t *doms; /* resulting partition; i.e. sched domains */ 623 struct cpumask *doms; /* resulting partition; i.e. sched domains */
587 struct sched_domain_attr *dattr; /* attributes for custom domains */ 624 struct sched_domain_attr *dattr; /* attributes for custom domains */
588 int ndoms = 0; /* number of sched domains in result */ 625 int ndoms = 0; /* number of sched domains in result */
589 int nslot; /* next empty doms[] cpumask_t slot */ 626 int nslot; /* next empty doms[] struct cpumask slot */
590 627
591 doms = NULL; 628 doms = NULL;
592 dattr = NULL; 629 dattr = NULL;
@@ -594,7 +631,7 @@ static int generate_sched_domains(cpumask_t **domains,
594 631
595 /* Special case for the 99% of systems with one, full, sched domain */ 632 /* Special case for the 99% of systems with one, full, sched domain */
596 if (is_sched_load_balance(&top_cpuset)) { 633 if (is_sched_load_balance(&top_cpuset)) {
597 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 634 doms = kmalloc(cpumask_size(), GFP_KERNEL);
598 if (!doms) 635 if (!doms)
599 goto done; 636 goto done;
600 637
@@ -603,7 +640,7 @@ static int generate_sched_domains(cpumask_t **domains,
603 *dattr = SD_ATTR_INIT; 640 *dattr = SD_ATTR_INIT;
604 update_domain_attr_tree(dattr, &top_cpuset); 641 update_domain_attr_tree(dattr, &top_cpuset);
605 } 642 }
606 *doms = top_cpuset.cpus_allowed; 643 cpumask_copy(doms, top_cpuset.cpus_allowed);
607 644
608 ndoms = 1; 645 ndoms = 1;
609 goto done; 646 goto done;
@@ -622,7 +659,7 @@ static int generate_sched_domains(cpumask_t **domains,
622 cp = list_first_entry(&q, struct cpuset, stack_list); 659 cp = list_first_entry(&q, struct cpuset, stack_list);
623 list_del(q.next); 660 list_del(q.next);
624 661
625 if (cpus_empty(cp->cpus_allowed)) 662 if (cpumask_empty(cp->cpus_allowed))
626 continue; 663 continue;
627 664
628 /* 665 /*
@@ -673,7 +710,7 @@ restart:
673 * Now we know how many domains to create. 710 * Now we know how many domains to create.
674 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. 711 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
675 */ 712 */
676 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); 713 doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
677 if (!doms) 714 if (!doms)
678 goto done; 715 goto done;
679 716
@@ -685,7 +722,7 @@ restart:
685 722
686 for (nslot = 0, i = 0; i < csn; i++) { 723 for (nslot = 0, i = 0; i < csn; i++) {
687 struct cpuset *a = csa[i]; 724 struct cpuset *a = csa[i];
688 cpumask_t *dp; 725 struct cpumask *dp;
689 int apn = a->pn; 726 int apn = a->pn;
690 727
691 if (apn < 0) { 728 if (apn < 0) {
@@ -708,14 +745,14 @@ restart:
708 continue; 745 continue;
709 } 746 }
710 747
711 cpus_clear(*dp); 748 cpumask_clear(dp);
712 if (dattr) 749 if (dattr)
713 *(dattr + nslot) = SD_ATTR_INIT; 750 *(dattr + nslot) = SD_ATTR_INIT;
714 for (j = i; j < csn; j++) { 751 for (j = i; j < csn; j++) {
715 struct cpuset *b = csa[j]; 752 struct cpuset *b = csa[j];
716 753
717 if (apn == b->pn) { 754 if (apn == b->pn) {
718 cpus_or(*dp, *dp, b->cpus_allowed); 755 cpumask_or(dp, dp, b->cpus_allowed);
719 if (dattr) 756 if (dattr)
720 update_domain_attr_tree(dattr + nslot, b); 757 update_domain_attr_tree(dattr + nslot, b);
721 758
@@ -755,7 +792,7 @@ done:
755static void do_rebuild_sched_domains(struct work_struct *unused) 792static void do_rebuild_sched_domains(struct work_struct *unused)
756{ 793{
757 struct sched_domain_attr *attr; 794 struct sched_domain_attr *attr;
758 cpumask_t *doms; 795 struct cpumask *doms;
759 int ndoms; 796 int ndoms;
760 797
761 get_online_cpus(); 798 get_online_cpus();
@@ -824,7 +861,7 @@ void rebuild_sched_domains(void)
824static int cpuset_test_cpumask(struct task_struct *tsk, 861static int cpuset_test_cpumask(struct task_struct *tsk,
825 struct cgroup_scanner *scan) 862 struct cgroup_scanner *scan)
826{ 863{
827 return !cpus_equal(tsk->cpus_allowed, 864 return !cpumask_equal(&tsk->cpus_allowed,
828 (cgroup_cs(scan->cg))->cpus_allowed); 865 (cgroup_cs(scan->cg))->cpus_allowed);
829} 866}
830 867
@@ -842,7 +879,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
842static void cpuset_change_cpumask(struct task_struct *tsk, 879static void cpuset_change_cpumask(struct task_struct *tsk,
843 struct cgroup_scanner *scan) 880 struct cgroup_scanner *scan)
844{ 881{
845 set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); 882 set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
846} 883}
847 884
848/** 885/**
@@ -874,10 +911,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
874 * @cs: the cpuset to consider 911 * @cs: the cpuset to consider
875 * @buf: buffer of cpu numbers written to this cpuset 912 * @buf: buffer of cpu numbers written to this cpuset
876 */ 913 */
877static int update_cpumask(struct cpuset *cs, const char *buf) 914static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
915 const char *buf)
878{ 916{
879 struct ptr_heap heap; 917 struct ptr_heap heap;
880 struct cpuset trialcs;
881 int retval; 918 int retval;
882 int is_load_balanced; 919 int is_load_balanced;
883 920
@@ -885,8 +922,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
885 if (cs == &top_cpuset) 922 if (cs == &top_cpuset)
886 return -EACCES; 923 return -EACCES;
887 924
888 trialcs = *cs;
889
890 /* 925 /*
891 * An empty cpus_allowed is ok only if the cpuset has no tasks. 926 * An empty cpus_allowed is ok only if the cpuset has no tasks.
892 * Since cpulist_parse() fails on an empty mask, we special case 927 * Since cpulist_parse() fails on an empty mask, we special case
@@ -894,31 +929,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
894 * with tasks have cpus. 929 * with tasks have cpus.
895 */ 930 */
896 if (!*buf) { 931 if (!*buf) {
897 cpus_clear(trialcs.cpus_allowed); 932 cpumask_clear(trialcs->cpus_allowed);
898 } else { 933 } else {
899 retval = cpulist_parse(buf, trialcs.cpus_allowed); 934 retval = cpulist_parse(buf, trialcs->cpus_allowed);
900 if (retval < 0) 935 if (retval < 0)
901 return retval; 936 return retval;
902 937
903 if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map)) 938 if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
904 return -EINVAL; 939 return -EINVAL;
905 } 940 }
906 retval = validate_change(cs, &trialcs); 941 retval = validate_change(cs, trialcs);
907 if (retval < 0) 942 if (retval < 0)
908 return retval; 943 return retval;
909 944
910 /* Nothing to do if the cpus didn't change */ 945 /* Nothing to do if the cpus didn't change */
911 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) 946 if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
912 return 0; 947 return 0;
913 948
914 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); 949 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
915 if (retval) 950 if (retval)
916 return retval; 951 return retval;
917 952
918 is_load_balanced = is_sched_load_balance(&trialcs); 953 is_load_balanced = is_sched_load_balance(trialcs);
919 954
920 mutex_lock(&callback_mutex); 955 mutex_lock(&callback_mutex);
921 cs->cpus_allowed = trialcs.cpus_allowed; 956 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
922 mutex_unlock(&callback_mutex); 957 mutex_unlock(&callback_mutex);
923 958
924 /* 959 /*
@@ -1006,7 +1041,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
1006 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ 1041 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
1007 1042
1008 fudge = 10; /* spare mmarray[] slots */ 1043 fudge = 10; /* spare mmarray[] slots */
1009 fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ 1044 fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
1010 retval = -ENOMEM; 1045 retval = -ENOMEM;
1011 1046
1012 /* 1047 /*
@@ -1093,9 +1128,9 @@ done:
1093 * lock each such tasks mm->mmap_sem, scan its vma's and rebind 1128 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
1094 * their mempolicies to the cpusets new mems_allowed. 1129 * their mempolicies to the cpusets new mems_allowed.
1095 */ 1130 */
1096static int update_nodemask(struct cpuset *cs, const char *buf) 1131static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1132 const char *buf)
1097{ 1133{
1098 struct cpuset trialcs;
1099 nodemask_t oldmem; 1134 nodemask_t oldmem;
1100 int retval; 1135 int retval;
1101 1136
@@ -1106,8 +1141,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
1106 if (cs == &top_cpuset) 1141 if (cs == &top_cpuset)
1107 return -EACCES; 1142 return -EACCES;
1108 1143
1109 trialcs = *cs;
1110
1111 /* 1144 /*
1112 * An empty mems_allowed is ok iff there are no tasks in the cpuset. 1145 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
1113 * Since nodelist_parse() fails on an empty mask, we special case 1146 * Since nodelist_parse() fails on an empty mask, we special case
@@ -1115,27 +1148,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
1115 * with tasks have memory. 1148 * with tasks have memory.
1116 */ 1149 */
1117 if (!*buf) { 1150 if (!*buf) {
1118 nodes_clear(trialcs.mems_allowed); 1151 nodes_clear(trialcs->mems_allowed);
1119 } else { 1152 } else {
1120 retval = nodelist_parse(buf, trialcs.mems_allowed); 1153 retval = nodelist_parse(buf, trialcs->mems_allowed);
1121 if (retval < 0) 1154 if (retval < 0)
1122 goto done; 1155 goto done;
1123 1156
1124 if (!nodes_subset(trialcs.mems_allowed, 1157 if (!nodes_subset(trialcs->mems_allowed,
1125 node_states[N_HIGH_MEMORY])) 1158 node_states[N_HIGH_MEMORY]))
1126 return -EINVAL; 1159 return -EINVAL;
1127 } 1160 }
1128 oldmem = cs->mems_allowed; 1161 oldmem = cs->mems_allowed;
1129 if (nodes_equal(oldmem, trialcs.mems_allowed)) { 1162 if (nodes_equal(oldmem, trialcs->mems_allowed)) {
1130 retval = 0; /* Too easy - nothing to do */ 1163 retval = 0; /* Too easy - nothing to do */
1131 goto done; 1164 goto done;
1132 } 1165 }
1133 retval = validate_change(cs, &trialcs); 1166 retval = validate_change(cs, trialcs);
1134 if (retval < 0) 1167 if (retval < 0)
1135 goto done; 1168 goto done;
1136 1169
1137 mutex_lock(&callback_mutex); 1170 mutex_lock(&callback_mutex);
1138 cs->mems_allowed = trialcs.mems_allowed; 1171 cs->mems_allowed = trialcs->mems_allowed;
1139 cs->mems_generation = cpuset_mems_generation++; 1172 cs->mems_generation = cpuset_mems_generation++;
1140 mutex_unlock(&callback_mutex); 1173 mutex_unlock(&callback_mutex);
1141 1174
@@ -1156,7 +1189,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1156 1189
1157 if (val != cs->relax_domain_level) { 1190 if (val != cs->relax_domain_level) {
1158 cs->relax_domain_level = val; 1191 cs->relax_domain_level = val;
1159 if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) 1192 if (!cpumask_empty(cs->cpus_allowed) &&
1193 is_sched_load_balance(cs))
1160 async_rebuild_sched_domains(); 1194 async_rebuild_sched_domains();
1161 } 1195 }
1162 1196
@@ -1175,31 +1209,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1175static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, 1209static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1176 int turning_on) 1210 int turning_on)
1177{ 1211{
1178 struct cpuset trialcs; 1212 struct cpuset *trialcs;
1179 int err; 1213 int err;
1180 int balance_flag_changed; 1214 int balance_flag_changed;
1181 1215
1182 trialcs = *cs; 1216 trialcs = alloc_trial_cpuset(cs);
1217 if (!trialcs)
1218 return -ENOMEM;
1219
1183 if (turning_on) 1220 if (turning_on)
1184 set_bit(bit, &trialcs.flags); 1221 set_bit(bit, &trialcs->flags);
1185 else 1222 else
1186 clear_bit(bit, &trialcs.flags); 1223 clear_bit(bit, &trialcs->flags);
1187 1224
1188 err = validate_change(cs, &trialcs); 1225 err = validate_change(cs, trialcs);
1189 if (err < 0) 1226 if (err < 0)
1190 return err; 1227 goto out;
1191 1228
1192 balance_flag_changed = (is_sched_load_balance(cs) != 1229 balance_flag_changed = (is_sched_load_balance(cs) !=
1193 is_sched_load_balance(&trialcs)); 1230 is_sched_load_balance(trialcs));
1194 1231
1195 mutex_lock(&callback_mutex); 1232 mutex_lock(&callback_mutex);
1196 cs->flags = trialcs.flags; 1233 cs->flags = trialcs->flags;
1197 mutex_unlock(&callback_mutex); 1234 mutex_unlock(&callback_mutex);
1198 1235
1199 if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed) 1236 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
1200 async_rebuild_sched_domains(); 1237 async_rebuild_sched_domains();
1201 1238
1202 return 0; 1239out:
1240 free_trial_cpuset(trialcs);
1241 return err;
1203} 1242}
1204 1243
1205/* 1244/*
@@ -1300,42 +1339,47 @@ static int fmeter_getrate(struct fmeter *fmp)
1300 return val; 1339 return val;
1301} 1340}
1302 1341
1342/* Protected by cgroup_lock */
1343static cpumask_var_t cpus_attach;
1344
1303/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ 1345/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1304static int cpuset_can_attach(struct cgroup_subsys *ss, 1346static int cpuset_can_attach(struct cgroup_subsys *ss,
1305 struct cgroup *cont, struct task_struct *tsk) 1347 struct cgroup *cont, struct task_struct *tsk)
1306{ 1348{
1307 struct cpuset *cs = cgroup_cs(cont); 1349 struct cpuset *cs = cgroup_cs(cont);
1350 int ret = 0;
1308 1351
1309 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 1352 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1310 return -ENOSPC; 1353 return -ENOSPC;
1311 if (tsk->flags & PF_THREAD_BOUND) {
1312 cpumask_t mask;
1313 1354
1355 if (tsk->flags & PF_THREAD_BOUND) {
1314 mutex_lock(&callback_mutex); 1356 mutex_lock(&callback_mutex);
1315 mask = cs->cpus_allowed; 1357 if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed))
1358 ret = -EINVAL;
1316 mutex_unlock(&callback_mutex); 1359 mutex_unlock(&callback_mutex);
1317 if (!cpus_equal(tsk->cpus_allowed, mask))
1318 return -EINVAL;
1319 } 1360 }
1320 1361
1321 return security_task_setscheduler(tsk, 0, NULL); 1362 return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL);
1322} 1363}
1323 1364
1324static void cpuset_attach(struct cgroup_subsys *ss, 1365static void cpuset_attach(struct cgroup_subsys *ss,
1325 struct cgroup *cont, struct cgroup *oldcont, 1366 struct cgroup *cont, struct cgroup *oldcont,
1326 struct task_struct *tsk) 1367 struct task_struct *tsk)
1327{ 1368{
1328 cpumask_t cpus;
1329 nodemask_t from, to; 1369 nodemask_t from, to;
1330 struct mm_struct *mm; 1370 struct mm_struct *mm;
1331 struct cpuset *cs = cgroup_cs(cont); 1371 struct cpuset *cs = cgroup_cs(cont);
1332 struct cpuset *oldcs = cgroup_cs(oldcont); 1372 struct cpuset *oldcs = cgroup_cs(oldcont);
1333 int err; 1373 int err;
1334 1374
1335 mutex_lock(&callback_mutex); 1375 if (cs == &top_cpuset) {
1336 guarantee_online_cpus(cs, &cpus); 1376 cpumask_copy(cpus_attach, cpu_possible_mask);
1337 err = set_cpus_allowed_ptr(tsk, &cpus); 1377 } else {
1338 mutex_unlock(&callback_mutex); 1378 mutex_lock(&callback_mutex);
1379 guarantee_online_cpus(cs, cpus_attach);
1380 mutex_unlock(&callback_mutex);
1381 }
1382 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1339 if (err) 1383 if (err)
1340 return; 1384 return;
1341 1385
@@ -1348,7 +1392,6 @@ static void cpuset_attach(struct cgroup_subsys *ss,
1348 cpuset_migrate_mm(mm, &from, &to); 1392 cpuset_migrate_mm(mm, &from, &to);
1349 mmput(mm); 1393 mmput(mm);
1350 } 1394 }
1351
1352} 1395}
1353 1396
1354/* The various types of files and directories in a cpuset file system */ 1397/* The various types of files and directories in a cpuset file system */
@@ -1443,21 +1486,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1443 const char *buf) 1486 const char *buf)
1444{ 1487{
1445 int retval = 0; 1488 int retval = 0;
1489 struct cpuset *cs = cgroup_cs(cgrp);
1490 struct cpuset *trialcs;
1446 1491
1447 if (!cgroup_lock_live_group(cgrp)) 1492 if (!cgroup_lock_live_group(cgrp))
1448 return -ENODEV; 1493 return -ENODEV;
1449 1494
1495 trialcs = alloc_trial_cpuset(cs);
1496 if (!trialcs)
1497 return -ENOMEM;
1498
1450 switch (cft->private) { 1499 switch (cft->private) {
1451 case FILE_CPULIST: 1500 case FILE_CPULIST:
1452 retval = update_cpumask(cgroup_cs(cgrp), buf); 1501 retval = update_cpumask(cs, trialcs, buf);
1453 break; 1502 break;
1454 case FILE_MEMLIST: 1503 case FILE_MEMLIST:
1455 retval = update_nodemask(cgroup_cs(cgrp), buf); 1504 retval = update_nodemask(cs, trialcs, buf);
1456 break; 1505 break;
1457 default: 1506 default:
1458 retval = -EINVAL; 1507 retval = -EINVAL;
1459 break; 1508 break;
1460 } 1509 }
1510
1511 free_trial_cpuset(trialcs);
1461 cgroup_unlock(); 1512 cgroup_unlock();
1462 return retval; 1513 return retval;
1463} 1514}
@@ -1476,13 +1527,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1476 1527
1477static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) 1528static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1478{ 1529{
1479 cpumask_t mask; 1530 int ret;
1480 1531
1481 mutex_lock(&callback_mutex); 1532 mutex_lock(&callback_mutex);
1482 mask = cs->cpus_allowed; 1533 ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
1483 mutex_unlock(&callback_mutex); 1534 mutex_unlock(&callback_mutex);
1484 1535
1485 return cpulist_scnprintf(page, PAGE_SIZE, mask); 1536 return ret;
1486} 1537}
1487 1538
1488static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) 1539static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
@@ -1718,7 +1769,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
1718 parent_cs = cgroup_cs(parent); 1769 parent_cs = cgroup_cs(parent);
1719 1770
1720 cs->mems_allowed = parent_cs->mems_allowed; 1771 cs->mems_allowed = parent_cs->mems_allowed;
1721 cs->cpus_allowed = parent_cs->cpus_allowed; 1772 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
1722 return; 1773 return;
1723} 1774}
1724 1775
@@ -1744,6 +1795,10 @@ static struct cgroup_subsys_state *cpuset_create(
1744 cs = kmalloc(sizeof(*cs), GFP_KERNEL); 1795 cs = kmalloc(sizeof(*cs), GFP_KERNEL);
1745 if (!cs) 1796 if (!cs)
1746 return ERR_PTR(-ENOMEM); 1797 return ERR_PTR(-ENOMEM);
1798 if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) {
1799 kfree(cs);
1800 return ERR_PTR(-ENOMEM);
1801 }
1747 1802
1748 cpuset_update_task_memory_state(); 1803 cpuset_update_task_memory_state();
1749 cs->flags = 0; 1804 cs->flags = 0;
@@ -1752,7 +1807,7 @@ static struct cgroup_subsys_state *cpuset_create(
1752 if (is_spread_slab(parent)) 1807 if (is_spread_slab(parent))
1753 set_bit(CS_SPREAD_SLAB, &cs->flags); 1808 set_bit(CS_SPREAD_SLAB, &cs->flags);
1754 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); 1809 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
1755 cpus_clear(cs->cpus_allowed); 1810 cpumask_clear(cs->cpus_allowed);
1756 nodes_clear(cs->mems_allowed); 1811 nodes_clear(cs->mems_allowed);
1757 cs->mems_generation = cpuset_mems_generation++; 1812 cs->mems_generation = cpuset_mems_generation++;
1758 fmeter_init(&cs->fmeter); 1813 fmeter_init(&cs->fmeter);
@@ -1779,6 +1834,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
1779 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); 1834 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
1780 1835
1781 number_of_cpusets--; 1836 number_of_cpusets--;
1837 free_cpumask_var(cs->cpus_allowed);
1782 kfree(cs); 1838 kfree(cs);
1783} 1839}
1784 1840
@@ -1802,6 +1858,8 @@ struct cgroup_subsys cpuset_subsys = {
1802 1858
1803int __init cpuset_init_early(void) 1859int __init cpuset_init_early(void)
1804{ 1860{
1861 alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
1862
1805 top_cpuset.mems_generation = cpuset_mems_generation++; 1863 top_cpuset.mems_generation = cpuset_mems_generation++;
1806 return 0; 1864 return 0;
1807} 1865}
@@ -1817,7 +1875,7 @@ int __init cpuset_init(void)
1817{ 1875{
1818 int err = 0; 1876 int err = 0;
1819 1877
1820 cpus_setall(top_cpuset.cpus_allowed); 1878 cpumask_setall(top_cpuset.cpus_allowed);
1821 nodes_setall(top_cpuset.mems_allowed); 1879 nodes_setall(top_cpuset.mems_allowed);
1822 1880
1823 fmeter_init(&top_cpuset.fmeter); 1881 fmeter_init(&top_cpuset.fmeter);
@@ -1829,6 +1887,9 @@ int __init cpuset_init(void)
1829 if (err < 0) 1887 if (err < 0)
1830 return err; 1888 return err;
1831 1889
1890 if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
1891 BUG();
1892
1832 number_of_cpusets = 1; 1893 number_of_cpusets = 1;
1833 return 0; 1894 return 0;
1834} 1895}
@@ -1903,7 +1964,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
1903 * has online cpus, so can't be empty). 1964 * has online cpus, so can't be empty).
1904 */ 1965 */
1905 parent = cs->parent; 1966 parent = cs->parent;
1906 while (cpus_empty(parent->cpus_allowed) || 1967 while (cpumask_empty(parent->cpus_allowed) ||
1907 nodes_empty(parent->mems_allowed)) 1968 nodes_empty(parent->mems_allowed))
1908 parent = parent->parent; 1969 parent = parent->parent;
1909 1970
@@ -1944,7 +2005,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
1944 } 2005 }
1945 2006
1946 /* Continue past cpusets with all cpus, mems online */ 2007 /* Continue past cpusets with all cpus, mems online */
1947 if (cpus_subset(cp->cpus_allowed, cpu_online_map) && 2008 if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
1948 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2009 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
1949 continue; 2010 continue;
1950 2011
@@ -1952,13 +2013,14 @@ static void scan_for_empty_cpusets(struct cpuset *root)
1952 2013
1953 /* Remove offline cpus and mems from this cpuset. */ 2014 /* Remove offline cpus and mems from this cpuset. */
1954 mutex_lock(&callback_mutex); 2015 mutex_lock(&callback_mutex);
1955 cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); 2016 cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
2017 cpu_online_mask);
1956 nodes_and(cp->mems_allowed, cp->mems_allowed, 2018 nodes_and(cp->mems_allowed, cp->mems_allowed,
1957 node_states[N_HIGH_MEMORY]); 2019 node_states[N_HIGH_MEMORY]);
1958 mutex_unlock(&callback_mutex); 2020 mutex_unlock(&callback_mutex);
1959 2021
1960 /* Move tasks from the empty cpuset to a parent */ 2022 /* Move tasks from the empty cpuset to a parent */
1961 if (cpus_empty(cp->cpus_allowed) || 2023 if (cpumask_empty(cp->cpus_allowed) ||
1962 nodes_empty(cp->mems_allowed)) 2024 nodes_empty(cp->mems_allowed))
1963 remove_tasks_in_empty_cpuset(cp); 2025 remove_tasks_in_empty_cpuset(cp);
1964 else { 2026 else {
@@ -1984,7 +2046,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
1984 unsigned long phase, void *unused_cpu) 2046 unsigned long phase, void *unused_cpu)
1985{ 2047{
1986 struct sched_domain_attr *attr; 2048 struct sched_domain_attr *attr;
1987 cpumask_t *doms; 2049 struct cpumask *doms;
1988 int ndoms; 2050 int ndoms;
1989 2051
1990 switch (phase) { 2052 switch (phase) {
@@ -1999,7 +2061,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
1999 } 2061 }
2000 2062
2001 cgroup_lock(); 2063 cgroup_lock();
2002 top_cpuset.cpus_allowed = cpu_online_map; 2064 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
2003 scan_for_empty_cpusets(&top_cpuset); 2065 scan_for_empty_cpusets(&top_cpuset);
2004 ndoms = generate_sched_domains(&doms, &attr); 2066 ndoms = generate_sched_domains(&doms, &attr);
2005 cgroup_unlock(); 2067 cgroup_unlock();
@@ -2044,7 +2106,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2044 2106
2045void __init cpuset_init_smp(void) 2107void __init cpuset_init_smp(void)
2046{ 2108{
2047 top_cpuset.cpus_allowed = cpu_online_map; 2109 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
2048 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2110 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2049 2111
2050 hotcpu_notifier(cpuset_track_online_cpus, 0); 2112 hotcpu_notifier(cpuset_track_online_cpus, 0);
@@ -2054,15 +2116,15 @@ void __init cpuset_init_smp(void)
2054/** 2116/**
2055 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. 2117 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
2056 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. 2118 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
2057 * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. 2119 * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
2058 * 2120 *
2059 * Description: Returns the cpumask_t cpus_allowed of the cpuset 2121 * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
2060 * attached to the specified @tsk. Guaranteed to return some non-empty 2122 * attached to the specified @tsk. Guaranteed to return some non-empty
2061 * subset of cpu_online_map, even if this means going outside the 2123 * subset of cpu_online_map, even if this means going outside the
2062 * tasks cpuset. 2124 * tasks cpuset.
2063 **/ 2125 **/
2064 2126
2065void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask) 2127void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2066{ 2128{
2067 mutex_lock(&callback_mutex); 2129 mutex_lock(&callback_mutex);
2068 cpuset_cpus_allowed_locked(tsk, pmask); 2130 cpuset_cpus_allowed_locked(tsk, pmask);
@@ -2073,7 +2135,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
2073 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. 2135 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
2074 * Must be called with callback_mutex held. 2136 * Must be called with callback_mutex held.
2075 **/ 2137 **/
2076void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask) 2138void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
2077{ 2139{
2078 task_lock(tsk); 2140 task_lock(tsk);
2079 guarantee_online_cpus(task_cs(tsk), pmask); 2141 guarantee_online_cpus(task_cs(tsk), pmask);
@@ -2356,6 +2418,29 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2356 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); 2418 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
2357} 2419}
2358 2420
2421/**
2422 * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
2423 * @task: pointer to task_struct of some task.
2424 *
2425 * Description: Prints @task's name, cpuset name, and cached copy of its
2426 * mems_allowed to the kernel log. Must hold task_lock(task) to allow
2427 * dereferencing task_cs(task).
2428 */
2429void cpuset_print_task_mems_allowed(struct task_struct *tsk)
2430{
2431 struct dentry *dentry;
2432
2433 dentry = task_cs(tsk)->css.cgroup->dentry;
2434 spin_lock(&cpuset_buffer_lock);
2435 snprintf(cpuset_name, CPUSET_NAME_LEN,
2436 dentry ? (const char *)dentry->d_name.name : "/");
2437 nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
2438 tsk->mems_allowed);
2439 printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
2440 tsk->comm, cpuset_name, cpuset_nodelist);
2441 spin_unlock(&cpuset_buffer_lock);
2442}
2443
2359/* 2444/*
2360 * Collection of memory_pressure is suppressed unless 2445 * Collection of memory_pressure is suppressed unless
2361 * this flag is enabled by writing "1" to the special 2446 * this flag is enabled by writing "1" to the special