diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 178 |
1 files changed, 74 insertions, 104 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4349935c2ad8..1ceeb049c827 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1015,17 +1015,12 @@ static void cpuset_change_nodemask(struct task_struct *p, | |||
1015 | struct cpuset *cs; | 1015 | struct cpuset *cs; |
1016 | int migrate; | 1016 | int migrate; |
1017 | const nodemask_t *oldmem = scan->data; | 1017 | const nodemask_t *oldmem = scan->data; |
1018 | NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL); | 1018 | static nodemask_t newmems; /* protected by cgroup_mutex */ |
1019 | |||
1020 | if (!newmems) | ||
1021 | return; | ||
1022 | 1019 | ||
1023 | cs = cgroup_cs(scan->cg); | 1020 | cs = cgroup_cs(scan->cg); |
1024 | guarantee_online_mems(cs, newmems); | 1021 | guarantee_online_mems(cs, &newmems); |
1025 | 1022 | ||
1026 | cpuset_change_task_nodemask(p, newmems); | 1023 | cpuset_change_task_nodemask(p, &newmems); |
1027 | |||
1028 | NODEMASK_FREE(newmems); | ||
1029 | 1024 | ||
1030 | mm = get_task_mm(p); | 1025 | mm = get_task_mm(p); |
1031 | if (!mm) | 1026 | if (!mm) |
@@ -1164,7 +1159,7 @@ int current_cpuset_is_being_rebound(void) | |||
1164 | static int update_relax_domain_level(struct cpuset *cs, s64 val) | 1159 | static int update_relax_domain_level(struct cpuset *cs, s64 val) |
1165 | { | 1160 | { |
1166 | #ifdef CONFIG_SMP | 1161 | #ifdef CONFIG_SMP |
1167 | if (val < -1 || val >= SD_LV_MAX) | 1162 | if (val < -1 || val >= sched_domain_level_max) |
1168 | return -EINVAL; | 1163 | return -EINVAL; |
1169 | #endif | 1164 | #endif |
1170 | 1165 | ||
@@ -1372,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1372 | return val; | 1367 | return val; |
1373 | } | 1368 | } |
1374 | 1369 | ||
1375 | /* Protected by cgroup_lock */ | ||
1376 | static cpumask_var_t cpus_attach; | ||
1377 | |||
1378 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | 1370 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
1379 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 1371 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
1380 | struct task_struct *tsk, bool threadgroup) | 1372 | struct task_struct *tsk) |
1381 | { | 1373 | { |
1382 | int ret; | ||
1383 | struct cpuset *cs = cgroup_cs(cont); | 1374 | struct cpuset *cs = cgroup_cs(cont); |
1384 | 1375 | ||
1385 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1376 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
@@ -1396,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
1396 | if (tsk->flags & PF_THREAD_BOUND) | 1387 | if (tsk->flags & PF_THREAD_BOUND) |
1397 | return -EINVAL; | 1388 | return -EINVAL; |
1398 | 1389 | ||
1399 | ret = security_task_setscheduler(tsk); | ||
1400 | if (ret) | ||
1401 | return ret; | ||
1402 | if (threadgroup) { | ||
1403 | struct task_struct *c; | ||
1404 | |||
1405 | rcu_read_lock(); | ||
1406 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
1407 | ret = security_task_setscheduler(c); | ||
1408 | if (ret) { | ||
1409 | rcu_read_unlock(); | ||
1410 | return ret; | ||
1411 | } | ||
1412 | } | ||
1413 | rcu_read_unlock(); | ||
1414 | } | ||
1415 | return 0; | 1390 | return 0; |
1416 | } | 1391 | } |
1417 | 1392 | ||
1418 | static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | 1393 | static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task) |
1419 | struct cpuset *cs) | 1394 | { |
1395 | return security_task_setscheduler(task); | ||
1396 | } | ||
1397 | |||
1398 | /* | ||
1399 | * Protected by cgroup_lock. The nodemasks must be stored globally because | ||
1400 | * dynamically allocating them is not allowed in pre_attach, and they must | ||
1401 | * persist among pre_attach, attach_task, and attach. | ||
1402 | */ | ||
1403 | static cpumask_var_t cpus_attach; | ||
1404 | static nodemask_t cpuset_attach_nodemask_from; | ||
1405 | static nodemask_t cpuset_attach_nodemask_to; | ||
1406 | |||
1407 | /* Set-up work for before attaching each task. */ | ||
1408 | static void cpuset_pre_attach(struct cgroup *cont) | ||
1409 | { | ||
1410 | struct cpuset *cs = cgroup_cs(cont); | ||
1411 | |||
1412 | if (cs == &top_cpuset) | ||
1413 | cpumask_copy(cpus_attach, cpu_possible_mask); | ||
1414 | else | ||
1415 | guarantee_online_cpus(cs, cpus_attach); | ||
1416 | |||
1417 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); | ||
1418 | } | ||
1419 | |||
1420 | /* Per-thread attachment work. */ | ||
1421 | static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk) | ||
1420 | { | 1422 | { |
1421 | int err; | 1423 | int err; |
1424 | struct cpuset *cs = cgroup_cs(cont); | ||
1425 | |||
1422 | /* | 1426 | /* |
1423 | * can_attach beforehand should guarantee that this doesn't fail. | 1427 | * can_attach beforehand should guarantee that this doesn't fail. |
1424 | * TODO: have a better way to handle failure here | 1428 | * TODO: have a better way to handle failure here |
@@ -1426,56 +1430,31 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | |||
1426 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | 1430 | err = set_cpus_allowed_ptr(tsk, cpus_attach); |
1427 | WARN_ON_ONCE(err); | 1431 | WARN_ON_ONCE(err); |
1428 | 1432 | ||
1429 | cpuset_change_task_nodemask(tsk, to); | 1433 | cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to); |
1430 | cpuset_update_task_spread_flag(cs, tsk); | 1434 | cpuset_update_task_spread_flag(cs, tsk); |
1431 | |||
1432 | } | 1435 | } |
1433 | 1436 | ||
1434 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 1437 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
1435 | struct cgroup *oldcont, struct task_struct *tsk, | 1438 | struct cgroup *oldcont, struct task_struct *tsk) |
1436 | bool threadgroup) | ||
1437 | { | 1439 | { |
1438 | struct mm_struct *mm; | 1440 | struct mm_struct *mm; |
1439 | struct cpuset *cs = cgroup_cs(cont); | 1441 | struct cpuset *cs = cgroup_cs(cont); |
1440 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1442 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1441 | NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL); | ||
1442 | NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL); | ||
1443 | |||
1444 | if (from == NULL || to == NULL) | ||
1445 | goto alloc_fail; | ||
1446 | 1443 | ||
1447 | if (cs == &top_cpuset) { | 1444 | /* |
1448 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1445 | * Change mm, possibly for multiple threads in a threadgroup. This is |
1449 | } else { | 1446 | * expensive and may sleep. |
1450 | guarantee_online_cpus(cs, cpus_attach); | 1447 | */ |
1451 | } | 1448 | cpuset_attach_nodemask_from = oldcs->mems_allowed; |
1452 | guarantee_online_mems(cs, to); | 1449 | cpuset_attach_nodemask_to = cs->mems_allowed; |
1453 | |||
1454 | /* do per-task migration stuff possibly for each in the threadgroup */ | ||
1455 | cpuset_attach_task(tsk, to, cs); | ||
1456 | if (threadgroup) { | ||
1457 | struct task_struct *c; | ||
1458 | rcu_read_lock(); | ||
1459 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
1460 | cpuset_attach_task(c, to, cs); | ||
1461 | } | ||
1462 | rcu_read_unlock(); | ||
1463 | } | ||
1464 | |||
1465 | /* change mm; only needs to be done once even if threadgroup */ | ||
1466 | *from = oldcs->mems_allowed; | ||
1467 | *to = cs->mems_allowed; | ||
1468 | mm = get_task_mm(tsk); | 1450 | mm = get_task_mm(tsk); |
1469 | if (mm) { | 1451 | if (mm) { |
1470 | mpol_rebind_mm(mm, to); | 1452 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); |
1471 | if (is_memory_migrate(cs)) | 1453 | if (is_memory_migrate(cs)) |
1472 | cpuset_migrate_mm(mm, from, to); | 1454 | cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from, |
1455 | &cpuset_attach_nodemask_to); | ||
1473 | mmput(mm); | 1456 | mmput(mm); |
1474 | } | 1457 | } |
1475 | |||
1476 | alloc_fail: | ||
1477 | NODEMASK_FREE(from); | ||
1478 | NODEMASK_FREE(to); | ||
1479 | } | 1458 | } |
1480 | 1459 | ||
1481 | /* The various types of files and directories in a cpuset file system */ | 1460 | /* The various types of files and directories in a cpuset file system */ |
@@ -1575,8 +1554,10 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1575 | return -ENODEV; | 1554 | return -ENODEV; |
1576 | 1555 | ||
1577 | trialcs = alloc_trial_cpuset(cs); | 1556 | trialcs = alloc_trial_cpuset(cs); |
1578 | if (!trialcs) | 1557 | if (!trialcs) { |
1579 | return -ENOMEM; | 1558 | retval = -ENOMEM; |
1559 | goto out; | ||
1560 | } | ||
1580 | 1561 | ||
1581 | switch (cft->private) { | 1562 | switch (cft->private) { |
1582 | case FILE_CPULIST: | 1563 | case FILE_CPULIST: |
@@ -1591,6 +1572,7 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1591 | } | 1572 | } |
1592 | 1573 | ||
1593 | free_trial_cpuset(trialcs); | 1574 | free_trial_cpuset(trialcs); |
1575 | out: | ||
1594 | cgroup_unlock(); | 1576 | cgroup_unlock(); |
1595 | return retval; | 1577 | return retval; |
1596 | } | 1578 | } |
@@ -1607,34 +1589,26 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1607 | * across a page fault. | 1589 | * across a page fault. |
1608 | */ | 1590 | */ |
1609 | 1591 | ||
1610 | static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) | 1592 | static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs) |
1611 | { | 1593 | { |
1612 | int ret; | 1594 | size_t count; |
1613 | 1595 | ||
1614 | mutex_lock(&callback_mutex); | 1596 | mutex_lock(&callback_mutex); |
1615 | ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); | 1597 | count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); |
1616 | mutex_unlock(&callback_mutex); | 1598 | mutex_unlock(&callback_mutex); |
1617 | 1599 | ||
1618 | return ret; | 1600 | return count; |
1619 | } | 1601 | } |
1620 | 1602 | ||
1621 | static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) | 1603 | static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs) |
1622 | { | 1604 | { |
1623 | NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL); | 1605 | size_t count; |
1624 | int retval; | ||
1625 | |||
1626 | if (mask == NULL) | ||
1627 | return -ENOMEM; | ||
1628 | 1606 | ||
1629 | mutex_lock(&callback_mutex); | 1607 | mutex_lock(&callback_mutex); |
1630 | *mask = cs->mems_allowed; | 1608 | count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed); |
1631 | mutex_unlock(&callback_mutex); | 1609 | mutex_unlock(&callback_mutex); |
1632 | 1610 | ||
1633 | retval = nodelist_scnprintf(page, PAGE_SIZE, *mask); | 1611 | return count; |
1634 | |||
1635 | NODEMASK_FREE(mask); | ||
1636 | |||
1637 | return retval; | ||
1638 | } | 1612 | } |
1639 | 1613 | ||
1640 | static ssize_t cpuset_common_file_read(struct cgroup *cont, | 1614 | static ssize_t cpuset_common_file_read(struct cgroup *cont, |
@@ -1828,10 +1802,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1828 | } | 1802 | } |
1829 | 1803 | ||
1830 | /* | 1804 | /* |
1831 | * post_clone() is called at the end of cgroup_clone(). | 1805 | * post_clone() is called during cgroup_create() when the |
1832 | * 'cgroup' was just created automatically as a result of | 1806 | * clone_children mount argument was specified. The cgroup |
1833 | * a cgroup_clone(), and the current task is about to | 1807 | * can not yet have any tasks. |
1834 | * be moved into 'cgroup'. | ||
1835 | * | 1808 | * |
1836 | * Currently we refuse to set up the cgroup - thereby | 1809 | * Currently we refuse to set up the cgroup - thereby |
1837 | * refusing the task to be entered, and as a result refusing | 1810 | * refusing the task to be entered, and as a result refusing |
@@ -1859,8 +1832,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, | |||
1859 | cs = cgroup_cs(cgroup); | 1832 | cs = cgroup_cs(cgroup); |
1860 | parent_cs = cgroup_cs(parent); | 1833 | parent_cs = cgroup_cs(parent); |
1861 | 1834 | ||
1835 | mutex_lock(&callback_mutex); | ||
1862 | cs->mems_allowed = parent_cs->mems_allowed; | 1836 | cs->mems_allowed = parent_cs->mems_allowed; |
1863 | cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); | 1837 | cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); |
1838 | mutex_unlock(&callback_mutex); | ||
1864 | return; | 1839 | return; |
1865 | } | 1840 | } |
1866 | 1841 | ||
@@ -1928,6 +1903,9 @@ struct cgroup_subsys cpuset_subsys = { | |||
1928 | .create = cpuset_create, | 1903 | .create = cpuset_create, |
1929 | .destroy = cpuset_destroy, | 1904 | .destroy = cpuset_destroy, |
1930 | .can_attach = cpuset_can_attach, | 1905 | .can_attach = cpuset_can_attach, |
1906 | .can_attach_task = cpuset_can_attach_task, | ||
1907 | .pre_attach = cpuset_pre_attach, | ||
1908 | .attach_task = cpuset_attach_task, | ||
1931 | .attach = cpuset_attach, | 1909 | .attach = cpuset_attach, |
1932 | .populate = cpuset_populate, | 1910 | .populate = cpuset_populate, |
1933 | .post_clone = cpuset_post_clone, | 1911 | .post_clone = cpuset_post_clone, |
@@ -2063,10 +2041,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2063 | struct cpuset *cp; /* scans cpusets being updated */ | 2041 | struct cpuset *cp; /* scans cpusets being updated */ |
2064 | struct cpuset *child; /* scans child cpusets of cp */ | 2042 | struct cpuset *child; /* scans child cpusets of cp */ |
2065 | struct cgroup *cont; | 2043 | struct cgroup *cont; |
2066 | NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); | 2044 | static nodemask_t oldmems; /* protected by cgroup_mutex */ |
2067 | |||
2068 | if (oldmems == NULL) | ||
2069 | return; | ||
2070 | 2045 | ||
2071 | list_add_tail((struct list_head *)&root->stack_list, &queue); | 2046 | list_add_tail((struct list_head *)&root->stack_list, &queue); |
2072 | 2047 | ||
@@ -2083,7 +2058,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2083 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 2058 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
2084 | continue; | 2059 | continue; |
2085 | 2060 | ||
2086 | *oldmems = cp->mems_allowed; | 2061 | oldmems = cp->mems_allowed; |
2087 | 2062 | ||
2088 | /* Remove offline cpus and mems from this cpuset. */ | 2063 | /* Remove offline cpus and mems from this cpuset. */ |
2089 | mutex_lock(&callback_mutex); | 2064 | mutex_lock(&callback_mutex); |
@@ -2099,10 +2074,9 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2099 | remove_tasks_in_empty_cpuset(cp); | 2074 | remove_tasks_in_empty_cpuset(cp); |
2100 | else { | 2075 | else { |
2101 | update_tasks_cpumask(cp, NULL); | 2076 | update_tasks_cpumask(cp, NULL); |
2102 | update_tasks_nodemask(cp, oldmems, NULL); | 2077 | update_tasks_nodemask(cp, &oldmems, NULL); |
2103 | } | 2078 | } |
2104 | } | 2079 | } |
2105 | NODEMASK_FREE(oldmems); | ||
2106 | } | 2080 | } |
2107 | 2081 | ||
2108 | /* | 2082 | /* |
@@ -2144,19 +2118,16 @@ void cpuset_update_active_cpus(void) | |||
2144 | static int cpuset_track_online_nodes(struct notifier_block *self, | 2118 | static int cpuset_track_online_nodes(struct notifier_block *self, |
2145 | unsigned long action, void *arg) | 2119 | unsigned long action, void *arg) |
2146 | { | 2120 | { |
2147 | NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); | 2121 | static nodemask_t oldmems; /* protected by cgroup_mutex */ |
2148 | |||
2149 | if (oldmems == NULL) | ||
2150 | return NOTIFY_DONE; | ||
2151 | 2122 | ||
2152 | cgroup_lock(); | 2123 | cgroup_lock(); |
2153 | switch (action) { | 2124 | switch (action) { |
2154 | case MEM_ONLINE: | 2125 | case MEM_ONLINE: |
2155 | *oldmems = top_cpuset.mems_allowed; | 2126 | oldmems = top_cpuset.mems_allowed; |
2156 | mutex_lock(&callback_mutex); | 2127 | mutex_lock(&callback_mutex); |
2157 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2128 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2158 | mutex_unlock(&callback_mutex); | 2129 | mutex_unlock(&callback_mutex); |
2159 | update_tasks_nodemask(&top_cpuset, oldmems, NULL); | 2130 | update_tasks_nodemask(&top_cpuset, &oldmems, NULL); |
2160 | break; | 2131 | break; |
2161 | case MEM_OFFLINE: | 2132 | case MEM_OFFLINE: |
2162 | /* | 2133 | /* |
@@ -2170,7 +2141,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2170 | } | 2141 | } |
2171 | cgroup_unlock(); | 2142 | cgroup_unlock(); |
2172 | 2143 | ||
2173 | NODEMASK_FREE(oldmems); | ||
2174 | return NOTIFY_OK; | 2144 | return NOTIFY_OK; |
2175 | } | 2145 | } |
2176 | #endif | 2146 | #endif |