aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c178
1 files changed, 74 insertions, 104 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4349935c2ad8..1ceeb049c827 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1015,17 +1015,12 @@ static void cpuset_change_nodemask(struct task_struct *p,
1015 struct cpuset *cs; 1015 struct cpuset *cs;
1016 int migrate; 1016 int migrate;
1017 const nodemask_t *oldmem = scan->data; 1017 const nodemask_t *oldmem = scan->data;
1018 NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL); 1018 static nodemask_t newmems; /* protected by cgroup_mutex */
1019
1020 if (!newmems)
1021 return;
1022 1019
1023 cs = cgroup_cs(scan->cg); 1020 cs = cgroup_cs(scan->cg);
1024 guarantee_online_mems(cs, newmems); 1021 guarantee_online_mems(cs, &newmems);
1025 1022
1026 cpuset_change_task_nodemask(p, newmems); 1023 cpuset_change_task_nodemask(p, &newmems);
1027
1028 NODEMASK_FREE(newmems);
1029 1024
1030 mm = get_task_mm(p); 1025 mm = get_task_mm(p);
1031 if (!mm) 1026 if (!mm)
@@ -1164,7 +1159,7 @@ int current_cpuset_is_being_rebound(void)
1164static int update_relax_domain_level(struct cpuset *cs, s64 val) 1159static int update_relax_domain_level(struct cpuset *cs, s64 val)
1165{ 1160{
1166#ifdef CONFIG_SMP 1161#ifdef CONFIG_SMP
1167 if (val < -1 || val >= SD_LV_MAX) 1162 if (val < -1 || val >= sched_domain_level_max)
1168 return -EINVAL; 1163 return -EINVAL;
1169#endif 1164#endif
1170 1165
@@ -1372,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp)
1372 return val; 1367 return val;
1373} 1368}
1374 1369
1375/* Protected by cgroup_lock */
1376static cpumask_var_t cpus_attach;
1377
1378/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ 1370/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1379static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, 1371static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1380 struct task_struct *tsk, bool threadgroup) 1372 struct task_struct *tsk)
1381{ 1373{
1382 int ret;
1383 struct cpuset *cs = cgroup_cs(cont); 1374 struct cpuset *cs = cgroup_cs(cont);
1384 1375
1385 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 1376 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1396,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1396 if (tsk->flags & PF_THREAD_BOUND) 1387 if (tsk->flags & PF_THREAD_BOUND)
1397 return -EINVAL; 1388 return -EINVAL;
1398 1389
1399 ret = security_task_setscheduler(tsk);
1400 if (ret)
1401 return ret;
1402 if (threadgroup) {
1403 struct task_struct *c;
1404
1405 rcu_read_lock();
1406 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1407 ret = security_task_setscheduler(c);
1408 if (ret) {
1409 rcu_read_unlock();
1410 return ret;
1411 }
1412 }
1413 rcu_read_unlock();
1414 }
1415 return 0; 1390 return 0;
1416} 1391}
1417 1392
1418static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, 1393static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
1419 struct cpuset *cs) 1394{
1395 return security_task_setscheduler(task);
1396}
1397
1398/*
1399 * Protected by cgroup_lock. The nodemasks must be stored globally because
1400 * dynamically allocating them is not allowed in pre_attach, and they must
1401 * persist among pre_attach, attach_task, and attach.
1402 */
1403static cpumask_var_t cpus_attach;
1404static nodemask_t cpuset_attach_nodemask_from;
1405static nodemask_t cpuset_attach_nodemask_to;
1406
1407/* Set-up work for before attaching each task. */
1408static void cpuset_pre_attach(struct cgroup *cont)
1409{
1410 struct cpuset *cs = cgroup_cs(cont);
1411
1412 if (cs == &top_cpuset)
1413 cpumask_copy(cpus_attach, cpu_possible_mask);
1414 else
1415 guarantee_online_cpus(cs, cpus_attach);
1416
1417 guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
1418}
1419
1420/* Per-thread attachment work. */
1421static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
1420{ 1422{
1421 int err; 1423 int err;
1424 struct cpuset *cs = cgroup_cs(cont);
1425
1422 /* 1426 /*
1423 * can_attach beforehand should guarantee that this doesn't fail. 1427 * can_attach beforehand should guarantee that this doesn't fail.
1424 * TODO: have a better way to handle failure here 1428 * TODO: have a better way to handle failure here
@@ -1426,56 +1430,31 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
1426 err = set_cpus_allowed_ptr(tsk, cpus_attach); 1430 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1427 WARN_ON_ONCE(err); 1431 WARN_ON_ONCE(err);
1428 1432
1429 cpuset_change_task_nodemask(tsk, to); 1433 cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
1430 cpuset_update_task_spread_flag(cs, tsk); 1434 cpuset_update_task_spread_flag(cs, tsk);
1431
1432} 1435}
1433 1436
1434static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, 1437static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1435 struct cgroup *oldcont, struct task_struct *tsk, 1438 struct cgroup *oldcont, struct task_struct *tsk)
1436 bool threadgroup)
1437{ 1439{
1438 struct mm_struct *mm; 1440 struct mm_struct *mm;
1439 struct cpuset *cs = cgroup_cs(cont); 1441 struct cpuset *cs = cgroup_cs(cont);
1440 struct cpuset *oldcs = cgroup_cs(oldcont); 1442 struct cpuset *oldcs = cgroup_cs(oldcont);
1441 NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL);
1442 NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
1443
1444 if (from == NULL || to == NULL)
1445 goto alloc_fail;
1446 1443
1447 if (cs == &top_cpuset) { 1444 /*
1448 cpumask_copy(cpus_attach, cpu_possible_mask); 1445 * Change mm, possibly for multiple threads in a threadgroup. This is
1449 } else { 1446 * expensive and may sleep.
1450 guarantee_online_cpus(cs, cpus_attach); 1447 */
1451 } 1448 cpuset_attach_nodemask_from = oldcs->mems_allowed;
1452 guarantee_online_mems(cs, to); 1449 cpuset_attach_nodemask_to = cs->mems_allowed;
1453
1454 /* do per-task migration stuff possibly for each in the threadgroup */
1455 cpuset_attach_task(tsk, to, cs);
1456 if (threadgroup) {
1457 struct task_struct *c;
1458 rcu_read_lock();
1459 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1460 cpuset_attach_task(c, to, cs);
1461 }
1462 rcu_read_unlock();
1463 }
1464
1465 /* change mm; only needs to be done once even if threadgroup */
1466 *from = oldcs->mems_allowed;
1467 *to = cs->mems_allowed;
1468 mm = get_task_mm(tsk); 1450 mm = get_task_mm(tsk);
1469 if (mm) { 1451 if (mm) {
1470 mpol_rebind_mm(mm, to); 1452 mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
1471 if (is_memory_migrate(cs)) 1453 if (is_memory_migrate(cs))
1472 cpuset_migrate_mm(mm, from, to); 1454 cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
1455 &cpuset_attach_nodemask_to);
1473 mmput(mm); 1456 mmput(mm);
1474 } 1457 }
1475
1476alloc_fail:
1477 NODEMASK_FREE(from);
1478 NODEMASK_FREE(to);
1479} 1458}
1480 1459
1481/* The various types of files and directories in a cpuset file system */ 1460/* The various types of files and directories in a cpuset file system */
@@ -1575,8 +1554,10 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1575 return -ENODEV; 1554 return -ENODEV;
1576 1555
1577 trialcs = alloc_trial_cpuset(cs); 1556 trialcs = alloc_trial_cpuset(cs);
1578 if (!trialcs) 1557 if (!trialcs) {
1579 return -ENOMEM; 1558 retval = -ENOMEM;
1559 goto out;
1560 }
1580 1561
1581 switch (cft->private) { 1562 switch (cft->private) {
1582 case FILE_CPULIST: 1563 case FILE_CPULIST:
@@ -1591,6 +1572,7 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1591 } 1572 }
1592 1573
1593 free_trial_cpuset(trialcs); 1574 free_trial_cpuset(trialcs);
1575out:
1594 cgroup_unlock(); 1576 cgroup_unlock();
1595 return retval; 1577 return retval;
1596} 1578}
@@ -1607,34 +1589,26 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1607 * across a page fault. 1589 * across a page fault.
1608 */ 1590 */
1609 1591
1610static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) 1592static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1611{ 1593{
1612 int ret; 1594 size_t count;
1613 1595
1614 mutex_lock(&callback_mutex); 1596 mutex_lock(&callback_mutex);
1615 ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); 1597 count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
1616 mutex_unlock(&callback_mutex); 1598 mutex_unlock(&callback_mutex);
1617 1599
1618 return ret; 1600 return count;
1619} 1601}
1620 1602
1621static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) 1603static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
1622{ 1604{
1623 NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL); 1605 size_t count;
1624 int retval;
1625
1626 if (mask == NULL)
1627 return -ENOMEM;
1628 1606
1629 mutex_lock(&callback_mutex); 1607 mutex_lock(&callback_mutex);
1630 *mask = cs->mems_allowed; 1608 count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
1631 mutex_unlock(&callback_mutex); 1609 mutex_unlock(&callback_mutex);
1632 1610
1633 retval = nodelist_scnprintf(page, PAGE_SIZE, *mask); 1611 return count;
1634
1635 NODEMASK_FREE(mask);
1636
1637 return retval;
1638} 1612}
1639 1613
1640static ssize_t cpuset_common_file_read(struct cgroup *cont, 1614static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1828,10 +1802,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1828} 1802}
1829 1803
1830/* 1804/*
1831 * post_clone() is called at the end of cgroup_clone(). 1805 * post_clone() is called during cgroup_create() when the
1832 * 'cgroup' was just created automatically as a result of 1806 * clone_children mount argument was specified. The cgroup
1833 * a cgroup_clone(), and the current task is about to 1807 * can not yet have any tasks.
1834 * be moved into 'cgroup'.
1835 * 1808 *
1836 * Currently we refuse to set up the cgroup - thereby 1809 * Currently we refuse to set up the cgroup - thereby
1837 * refusing the task to be entered, and as a result refusing 1810 * refusing the task to be entered, and as a result refusing
@@ -1859,8 +1832,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
1859 cs = cgroup_cs(cgroup); 1832 cs = cgroup_cs(cgroup);
1860 parent_cs = cgroup_cs(parent); 1833 parent_cs = cgroup_cs(parent);
1861 1834
1835 mutex_lock(&callback_mutex);
1862 cs->mems_allowed = parent_cs->mems_allowed; 1836 cs->mems_allowed = parent_cs->mems_allowed;
1863 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); 1837 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
1838 mutex_unlock(&callback_mutex);
1864 return; 1839 return;
1865} 1840}
1866 1841
@@ -1928,6 +1903,9 @@ struct cgroup_subsys cpuset_subsys = {
1928 .create = cpuset_create, 1903 .create = cpuset_create,
1929 .destroy = cpuset_destroy, 1904 .destroy = cpuset_destroy,
1930 .can_attach = cpuset_can_attach, 1905 .can_attach = cpuset_can_attach,
1906 .can_attach_task = cpuset_can_attach_task,
1907 .pre_attach = cpuset_pre_attach,
1908 .attach_task = cpuset_attach_task,
1931 .attach = cpuset_attach, 1909 .attach = cpuset_attach,
1932 .populate = cpuset_populate, 1910 .populate = cpuset_populate,
1933 .post_clone = cpuset_post_clone, 1911 .post_clone = cpuset_post_clone,
@@ -2063,10 +2041,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2063 struct cpuset *cp; /* scans cpusets being updated */ 2041 struct cpuset *cp; /* scans cpusets being updated */
2064 struct cpuset *child; /* scans child cpusets of cp */ 2042 struct cpuset *child; /* scans child cpusets of cp */
2065 struct cgroup *cont; 2043 struct cgroup *cont;
2066 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); 2044 static nodemask_t oldmems; /* protected by cgroup_mutex */
2067
2068 if (oldmems == NULL)
2069 return;
2070 2045
2071 list_add_tail((struct list_head *)&root->stack_list, &queue); 2046 list_add_tail((struct list_head *)&root->stack_list, &queue);
2072 2047
@@ -2083,7 +2058,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2083 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2058 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
2084 continue; 2059 continue;
2085 2060
2086 *oldmems = cp->mems_allowed; 2061 oldmems = cp->mems_allowed;
2087 2062
2088 /* Remove offline cpus and mems from this cpuset. */ 2063 /* Remove offline cpus and mems from this cpuset. */
2089 mutex_lock(&callback_mutex); 2064 mutex_lock(&callback_mutex);
@@ -2099,10 +2074,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2099 remove_tasks_in_empty_cpuset(cp); 2074 remove_tasks_in_empty_cpuset(cp);
2100 else { 2075 else {
2101 update_tasks_cpumask(cp, NULL); 2076 update_tasks_cpumask(cp, NULL);
2102 update_tasks_nodemask(cp, oldmems, NULL); 2077 update_tasks_nodemask(cp, &oldmems, NULL);
2103 } 2078 }
2104 } 2079 }
2105 NODEMASK_FREE(oldmems);
2106} 2080}
2107 2081
2108/* 2082/*
@@ -2144,19 +2118,16 @@ void cpuset_update_active_cpus(void)
2144static int cpuset_track_online_nodes(struct notifier_block *self, 2118static int cpuset_track_online_nodes(struct notifier_block *self,
2145 unsigned long action, void *arg) 2119 unsigned long action, void *arg)
2146{ 2120{
2147 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); 2121 static nodemask_t oldmems; /* protected by cgroup_mutex */
2148
2149 if (oldmems == NULL)
2150 return NOTIFY_DONE;
2151 2122
2152 cgroup_lock(); 2123 cgroup_lock();
2153 switch (action) { 2124 switch (action) {
2154 case MEM_ONLINE: 2125 case MEM_ONLINE:
2155 *oldmems = top_cpuset.mems_allowed; 2126 oldmems = top_cpuset.mems_allowed;
2156 mutex_lock(&callback_mutex); 2127 mutex_lock(&callback_mutex);
2157 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2128 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2158 mutex_unlock(&callback_mutex); 2129 mutex_unlock(&callback_mutex);
2159 update_tasks_nodemask(&top_cpuset, oldmems, NULL); 2130 update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
2160 break; 2131 break;
2161 case MEM_OFFLINE: 2132 case MEM_OFFLINE:
2162 /* 2133 /*
@@ -2170,7 +2141,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2170 } 2141 }
2171 cgroup_unlock(); 2142 cgroup_unlock();
2172 2143
2173 NODEMASK_FREE(oldmems);
2174 return NOTIFY_OK; 2144 return NOTIFY_OK;
2175} 2145}
2176#endif 2146#endif