aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
commitada47b5fe13d89735805b566185f4885f5a3f750 (patch)
tree644b88f8a71896307d71438e9b3af49126ffb22b /kernel/cpuset.c
parent43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff)
parent3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff)
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c151
1 files changed, 94 insertions, 57 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index b5cb469d2545..d10946748ec2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
537 * element of the partition (one sched domain) to be passed to 537 * element of the partition (one sched domain) to be passed to
538 * partition_sched_domains(). 538 * partition_sched_domains().
539 */ 539 */
540/* FIXME: see the FIXME in partition_sched_domains() */ 540static int generate_sched_domains(cpumask_var_t **domains,
541static int generate_sched_domains(struct cpumask **domains,
542 struct sched_domain_attr **attributes) 541 struct sched_domain_attr **attributes)
543{ 542{
544 LIST_HEAD(q); /* queue of cpusets to be scanned */ 543 LIST_HEAD(q); /* queue of cpusets to be scanned */
@@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains,
546 struct cpuset **csa; /* array of all cpuset ptrs */ 545 struct cpuset **csa; /* array of all cpuset ptrs */
547 int csn; /* how many cpuset ptrs in csa so far */ 546 int csn; /* how many cpuset ptrs in csa so far */
548 int i, j, k; /* indices for partition finding loops */ 547 int i, j, k; /* indices for partition finding loops */
549 struct cpumask *doms; /* resulting partition; i.e. sched domains */ 548 cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
550 struct sched_domain_attr *dattr; /* attributes for custom domains */ 549 struct sched_domain_attr *dattr; /* attributes for custom domains */
551 int ndoms = 0; /* number of sched domains in result */ 550 int ndoms = 0; /* number of sched domains in result */
552 int nslot; /* next empty doms[] struct cpumask slot */ 551 int nslot; /* next empty doms[] struct cpumask slot */
@@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains,
557 556
558 /* Special case for the 99% of systems with one, full, sched domain */ 557 /* Special case for the 99% of systems with one, full, sched domain */
559 if (is_sched_load_balance(&top_cpuset)) { 558 if (is_sched_load_balance(&top_cpuset)) {
560 doms = kmalloc(cpumask_size(), GFP_KERNEL); 559 ndoms = 1;
560 doms = alloc_sched_domains(ndoms);
561 if (!doms) 561 if (!doms)
562 goto done; 562 goto done;
563 563
@@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains,
566 *dattr = SD_ATTR_INIT; 566 *dattr = SD_ATTR_INIT;
567 update_domain_attr_tree(dattr, &top_cpuset); 567 update_domain_attr_tree(dattr, &top_cpuset);
568 } 568 }
569 cpumask_copy(doms, top_cpuset.cpus_allowed); 569 cpumask_copy(doms[0], top_cpuset.cpus_allowed);
570 570
571 ndoms = 1;
572 goto done; 571 goto done;
573 } 572 }
574 573
@@ -636,7 +635,7 @@ restart:
636 * Now we know how many domains to create. 635 * Now we know how many domains to create.
637 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. 636 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
638 */ 637 */
639 doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL); 638 doms = alloc_sched_domains(ndoms);
640 if (!doms) 639 if (!doms)
641 goto done; 640 goto done;
642 641
@@ -656,7 +655,7 @@ restart:
656 continue; 655 continue;
657 } 656 }
658 657
659 dp = doms + nslot; 658 dp = doms[nslot];
660 659
661 if (nslot == ndoms) { 660 if (nslot == ndoms) {
662 static int warnings = 10; 661 static int warnings = 10;
@@ -718,7 +717,7 @@ done:
718static void do_rebuild_sched_domains(struct work_struct *unused) 717static void do_rebuild_sched_domains(struct work_struct *unused)
719{ 718{
720 struct sched_domain_attr *attr; 719 struct sched_domain_attr *attr;
721 struct cpumask *doms; 720 cpumask_var_t *doms;
722 int ndoms; 721 int ndoms;
723 722
724 get_online_cpus(); 723 get_online_cpus();
@@ -738,7 +737,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused)
738{ 737{
739} 738}
740 739
741static int generate_sched_domains(struct cpumask **domains, 740static int generate_sched_domains(cpumask_var_t **domains,
742 struct sched_domain_attr **attributes) 741 struct sched_domain_attr **attributes)
743{ 742{
744 *domains = NULL; 743 *domains = NULL;
@@ -873,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
873 if (retval < 0) 872 if (retval < 0)
874 return retval; 873 return retval;
875 874
876 if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) 875 if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask))
877 return -EINVAL; 876 return -EINVAL;
878 } 877 }
879 retval = validate_change(cs, trialcs); 878 retval = validate_change(cs, trialcs);
@@ -921,9 +920,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
921 * call to guarantee_online_mems(), as we know no one is changing 920 * call to guarantee_online_mems(), as we know no one is changing
922 * our task's cpuset. 921 * our task's cpuset.
923 * 922 *
924 * Hold callback_mutex around the two modifications of our tasks
925 * mems_allowed to synchronize with cpuset_mems_allowed().
926 *
927 * While the mm_struct we are migrating is typically from some 923 * While the mm_struct we are migrating is typically from some
928 * other task, the task_struct mems_allowed that we are hacking 924 * other task, the task_struct mems_allowed that we are hacking
929 * is for our current task, which must allocate new pages for that 925 * is for our current task, which must allocate new pages for that
@@ -974,15 +970,20 @@ static void cpuset_change_nodemask(struct task_struct *p,
974 struct cpuset *cs; 970 struct cpuset *cs;
975 int migrate; 971 int migrate;
976 const nodemask_t *oldmem = scan->data; 972 const nodemask_t *oldmem = scan->data;
977 nodemask_t newmems; 973 NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL);
974
975 if (!newmems)
976 return;
978 977
979 cs = cgroup_cs(scan->cg); 978 cs = cgroup_cs(scan->cg);
980 guarantee_online_mems(cs, &newmems); 979 guarantee_online_mems(cs, newmems);
981 980
982 task_lock(p); 981 task_lock(p);
983 cpuset_change_task_nodemask(p, &newmems); 982 cpuset_change_task_nodemask(p, newmems);
984 task_unlock(p); 983 task_unlock(p);
985 984
985 NODEMASK_FREE(newmems);
986
986 mm = get_task_mm(p); 987 mm = get_task_mm(p);
987 if (!mm) 988 if (!mm)
988 return; 989 return;
@@ -1052,16 +1053,21 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
1052static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, 1053static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1053 const char *buf) 1054 const char *buf)
1054{ 1055{
1055 nodemask_t oldmem; 1056 NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL);
1056 int retval; 1057 int retval;
1057 struct ptr_heap heap; 1058 struct ptr_heap heap;
1058 1059
1060 if (!oldmem)
1061 return -ENOMEM;
1062
1059 /* 1063 /*
1060 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; 1064 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
1061 * it's read-only 1065 * it's read-only
1062 */ 1066 */
1063 if (cs == &top_cpuset) 1067 if (cs == &top_cpuset) {
1064 return -EACCES; 1068 retval = -EACCES;
1069 goto done;
1070 }
1065 1071
1066 /* 1072 /*
1067 * An empty mems_allowed is ok iff there are no tasks in the cpuset. 1073 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
@@ -1077,11 +1083,13 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1077 goto done; 1083 goto done;
1078 1084
1079 if (!nodes_subset(trialcs->mems_allowed, 1085 if (!nodes_subset(trialcs->mems_allowed,
1080 node_states[N_HIGH_MEMORY])) 1086 node_states[N_HIGH_MEMORY])) {
1081 return -EINVAL; 1087 retval = -EINVAL;
1088 goto done;
1089 }
1082 } 1090 }
1083 oldmem = cs->mems_allowed; 1091 *oldmem = cs->mems_allowed;
1084 if (nodes_equal(oldmem, trialcs->mems_allowed)) { 1092 if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
1085 retval = 0; /* Too easy - nothing to do */ 1093 retval = 0; /* Too easy - nothing to do */
1086 goto done; 1094 goto done;
1087 } 1095 }
@@ -1097,10 +1105,11 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1097 cs->mems_allowed = trialcs->mems_allowed; 1105 cs->mems_allowed = trialcs->mems_allowed;
1098 mutex_unlock(&callback_mutex); 1106 mutex_unlock(&callback_mutex);
1099 1107
1100 update_tasks_nodemask(cs, &oldmem, &heap); 1108 update_tasks_nodemask(cs, oldmem, &heap);
1101 1109
1102 heap_free(&heap); 1110 heap_free(&heap);
1103done: 1111done:
1112 NODEMASK_FREE(oldmem);
1104 return retval; 1113 return retval;
1105} 1114}
1106 1115
@@ -1385,40 +1394,47 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1385 struct cgroup *oldcont, struct task_struct *tsk, 1394 struct cgroup *oldcont, struct task_struct *tsk,
1386 bool threadgroup) 1395 bool threadgroup)
1387{ 1396{
1388 nodemask_t from, to;
1389 struct mm_struct *mm; 1397 struct mm_struct *mm;
1390 struct cpuset *cs = cgroup_cs(cont); 1398 struct cpuset *cs = cgroup_cs(cont);
1391 struct cpuset *oldcs = cgroup_cs(oldcont); 1399 struct cpuset *oldcs = cgroup_cs(oldcont);
1400 NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL);
1401 NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
1402
1403 if (from == NULL || to == NULL)
1404 goto alloc_fail;
1392 1405
1393 if (cs == &top_cpuset) { 1406 if (cs == &top_cpuset) {
1394 cpumask_copy(cpus_attach, cpu_possible_mask); 1407 cpumask_copy(cpus_attach, cpu_possible_mask);
1395 to = node_possible_map;
1396 } else { 1408 } else {
1397 guarantee_online_cpus(cs, cpus_attach); 1409 guarantee_online_cpus(cs, cpus_attach);
1398 guarantee_online_mems(cs, &to);
1399 } 1410 }
1411 guarantee_online_mems(cs, to);
1400 1412
1401 /* do per-task migration stuff possibly for each in the threadgroup */ 1413 /* do per-task migration stuff possibly for each in the threadgroup */
1402 cpuset_attach_task(tsk, &to, cs); 1414 cpuset_attach_task(tsk, to, cs);
1403 if (threadgroup) { 1415 if (threadgroup) {
1404 struct task_struct *c; 1416 struct task_struct *c;
1405 rcu_read_lock(); 1417 rcu_read_lock();
1406 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { 1418 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1407 cpuset_attach_task(c, &to, cs); 1419 cpuset_attach_task(c, to, cs);
1408 } 1420 }
1409 rcu_read_unlock(); 1421 rcu_read_unlock();
1410 } 1422 }
1411 1423
1412 /* change mm; only needs to be done once even if threadgroup */ 1424 /* change mm; only needs to be done once even if threadgroup */
1413 from = oldcs->mems_allowed; 1425 *from = oldcs->mems_allowed;
1414 to = cs->mems_allowed; 1426 *to = cs->mems_allowed;
1415 mm = get_task_mm(tsk); 1427 mm = get_task_mm(tsk);
1416 if (mm) { 1428 if (mm) {
1417 mpol_rebind_mm(mm, &to); 1429 mpol_rebind_mm(mm, to);
1418 if (is_memory_migrate(cs)) 1430 if (is_memory_migrate(cs))
1419 cpuset_migrate_mm(mm, &from, &to); 1431 cpuset_migrate_mm(mm, from, to);
1420 mmput(mm); 1432 mmput(mm);
1421 } 1433 }
1434
1435alloc_fail:
1436 NODEMASK_FREE(from);
1437 NODEMASK_FREE(to);
1422} 1438}
1423 1439
1424/* The various types of files and directories in a cpuset file system */ 1440/* The various types of files and directories in a cpuset file system */
@@ -1563,13 +1579,21 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1563 1579
1564static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) 1580static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
1565{ 1581{
1566 nodemask_t mask; 1582 NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL);
1583 int retval;
1584
1585 if (mask == NULL)
1586 return -ENOMEM;
1567 1587
1568 mutex_lock(&callback_mutex); 1588 mutex_lock(&callback_mutex);
1569 mask = cs->mems_allowed; 1589 *mask = cs->mems_allowed;
1570 mutex_unlock(&callback_mutex); 1590 mutex_unlock(&callback_mutex);
1571 1591
1572 return nodelist_scnprintf(page, PAGE_SIZE, mask); 1592 retval = nodelist_scnprintf(page, PAGE_SIZE, *mask);
1593
1594 NODEMASK_FREE(mask);
1595
1596 return retval;
1573} 1597}
1574 1598
1575static ssize_t cpuset_common_file_read(struct cgroup *cont, 1599static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1998,7 +2022,10 @@ static void scan_for_empty_cpusets(struct cpuset *root)
1998 struct cpuset *cp; /* scans cpusets being updated */ 2022 struct cpuset *cp; /* scans cpusets being updated */
1999 struct cpuset *child; /* scans child cpusets of cp */ 2023 struct cpuset *child; /* scans child cpusets of cp */
2000 struct cgroup *cont; 2024 struct cgroup *cont;
2001 nodemask_t oldmems; 2025 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
2026
2027 if (oldmems == NULL)
2028 return;
2002 2029
2003 list_add_tail((struct list_head *)&root->stack_list, &queue); 2030 list_add_tail((struct list_head *)&root->stack_list, &queue);
2004 2031
@@ -2011,16 +2038,16 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2011 } 2038 }
2012 2039
2013 /* Continue past cpusets with all cpus, mems online */ 2040 /* Continue past cpusets with all cpus, mems online */
2014 if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && 2041 if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
2015 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2042 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
2016 continue; 2043 continue;
2017 2044
2018 oldmems = cp->mems_allowed; 2045 *oldmems = cp->mems_allowed;
2019 2046
2020 /* Remove offline cpus and mems from this cpuset. */ 2047 /* Remove offline cpus and mems from this cpuset. */
2021 mutex_lock(&callback_mutex); 2048 mutex_lock(&callback_mutex);
2022 cpumask_and(cp->cpus_allowed, cp->cpus_allowed, 2049 cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
2023 cpu_online_mask); 2050 cpu_active_mask);
2024 nodes_and(cp->mems_allowed, cp->mems_allowed, 2051 nodes_and(cp->mems_allowed, cp->mems_allowed,
2025 node_states[N_HIGH_MEMORY]); 2052 node_states[N_HIGH_MEMORY]);
2026 mutex_unlock(&callback_mutex); 2053 mutex_unlock(&callback_mutex);
@@ -2031,9 +2058,10 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2031 remove_tasks_in_empty_cpuset(cp); 2058 remove_tasks_in_empty_cpuset(cp);
2032 else { 2059 else {
2033 update_tasks_cpumask(cp, NULL); 2060 update_tasks_cpumask(cp, NULL);
2034 update_tasks_nodemask(cp, &oldmems, NULL); 2061 update_tasks_nodemask(cp, oldmems, NULL);
2035 } 2062 }
2036 } 2063 }
2064 NODEMASK_FREE(oldmems);
2037} 2065}
2038 2066
2039/* 2067/*
@@ -2052,14 +2080,16 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2052 unsigned long phase, void *unused_cpu) 2080 unsigned long phase, void *unused_cpu)
2053{ 2081{
2054 struct sched_domain_attr *attr; 2082 struct sched_domain_attr *attr;
2055 struct cpumask *doms; 2083 cpumask_var_t *doms;
2056 int ndoms; 2084 int ndoms;
2057 2085
2058 switch (phase) { 2086 switch (phase) {
2059 case CPU_ONLINE: 2087 case CPU_ONLINE:
2060 case CPU_ONLINE_FROZEN: 2088 case CPU_ONLINE_FROZEN:
2061 case CPU_DEAD: 2089 case CPU_DOWN_PREPARE:
2062 case CPU_DEAD_FROZEN: 2090 case CPU_DOWN_PREPARE_FROZEN:
2091 case CPU_DOWN_FAILED:
2092 case CPU_DOWN_FAILED_FROZEN:
2063 break; 2093 break;
2064 2094
2065 default: 2095 default:
@@ -2068,7 +2098,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2068 2098
2069 cgroup_lock(); 2099 cgroup_lock();
2070 mutex_lock(&callback_mutex); 2100 mutex_lock(&callback_mutex);
2071 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); 2101 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2072 mutex_unlock(&callback_mutex); 2102 mutex_unlock(&callback_mutex);
2073 scan_for_empty_cpusets(&top_cpuset); 2103 scan_for_empty_cpusets(&top_cpuset);
2074 ndoms = generate_sched_domains(&doms, &attr); 2104 ndoms = generate_sched_domains(&doms, &attr);
@@ -2089,20 +2119,33 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2089static int cpuset_track_online_nodes(struct notifier_block *self, 2119static int cpuset_track_online_nodes(struct notifier_block *self,
2090 unsigned long action, void *arg) 2120 unsigned long action, void *arg)
2091{ 2121{
2122 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
2123
2124 if (oldmems == NULL)
2125 return NOTIFY_DONE;
2126
2092 cgroup_lock(); 2127 cgroup_lock();
2093 switch (action) { 2128 switch (action) {
2094 case MEM_ONLINE: 2129 case MEM_ONLINE:
2095 case MEM_OFFLINE: 2130 *oldmems = top_cpuset.mems_allowed;
2096 mutex_lock(&callback_mutex); 2131 mutex_lock(&callback_mutex);
2097 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2132 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2098 mutex_unlock(&callback_mutex); 2133 mutex_unlock(&callback_mutex);
2099 if (action == MEM_OFFLINE) 2134 update_tasks_nodemask(&top_cpuset, oldmems, NULL);
2100 scan_for_empty_cpusets(&top_cpuset); 2135 break;
2136 case MEM_OFFLINE:
2137 /*
2138 * needn't update top_cpuset.mems_allowed explicitly because
2139 * scan_for_empty_cpusets() will update it.
2140 */
2141 scan_for_empty_cpusets(&top_cpuset);
2101 break; 2142 break;
2102 default: 2143 default:
2103 break; 2144 break;
2104 } 2145 }
2105 cgroup_unlock(); 2146 cgroup_unlock();
2147
2148 NODEMASK_FREE(oldmems);
2106 return NOTIFY_OK; 2149 return NOTIFY_OK;
2107} 2150}
2108#endif 2151#endif
@@ -2115,7 +2158,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2115 2158
2116void __init cpuset_init_smp(void) 2159void __init cpuset_init_smp(void)
2117{ 2160{
2118 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); 2161 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2119 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2162 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2120 2163
2121 hotcpu_notifier(cpuset_track_online_cpus, 0); 2164 hotcpu_notifier(cpuset_track_online_cpus, 0);
@@ -2537,15 +2580,9 @@ const struct file_operations proc_cpuset_operations = {
2537}; 2580};
2538#endif /* CONFIG_PROC_PID_CPUSET */ 2581#endif /* CONFIG_PROC_PID_CPUSET */
2539 2582
2540/* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */ 2583/* Display task mems_allowed in /proc/<pid>/status file. */
2541void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) 2584void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
2542{ 2585{
2543 seq_printf(m, "Cpus_allowed:\t");
2544 seq_cpumask(m, &task->cpus_allowed);
2545 seq_printf(m, "\n");
2546 seq_printf(m, "Cpus_allowed_list:\t");
2547 seq_cpumask_list(m, &task->cpus_allowed);
2548 seq_printf(m, "\n");
2549 seq_printf(m, "Mems_allowed:\t"); 2586 seq_printf(m, "Mems_allowed:\t");
2550 seq_nodemask(m, &task->mems_allowed); 2587 seq_nodemask(m, &task->mems_allowed);
2551 seq_printf(m, "\n"); 2588 seq_printf(m, "\n");