diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 106 |
1 files changed, 74 insertions, 32 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ba401fab459f..d10946748ec2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -920,9 +920,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
920 | * call to guarantee_online_mems(), as we know no one is changing | 920 | * call to guarantee_online_mems(), as we know no one is changing |
921 | * our task's cpuset. | 921 | * our task's cpuset. |
922 | * | 922 | * |
923 | * Hold callback_mutex around the two modifications of our tasks | ||
924 | * mems_allowed to synchronize with cpuset_mems_allowed(). | ||
925 | * | ||
926 | * While the mm_struct we are migrating is typically from some | 923 | * While the mm_struct we are migrating is typically from some |
927 | * other task, the task_struct mems_allowed that we are hacking | 924 | * other task, the task_struct mems_allowed that we are hacking |
928 | * is for our current task, which must allocate new pages for that | 925 | * is for our current task, which must allocate new pages for that |
@@ -973,15 +970,20 @@ static void cpuset_change_nodemask(struct task_struct *p, | |||
973 | struct cpuset *cs; | 970 | struct cpuset *cs; |
974 | int migrate; | 971 | int migrate; |
975 | const nodemask_t *oldmem = scan->data; | 972 | const nodemask_t *oldmem = scan->data; |
976 | nodemask_t newmems; | 973 | NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL); |
974 | |||
975 | if (!newmems) | ||
976 | return; | ||
977 | 977 | ||
978 | cs = cgroup_cs(scan->cg); | 978 | cs = cgroup_cs(scan->cg); |
979 | guarantee_online_mems(cs, &newmems); | 979 | guarantee_online_mems(cs, newmems); |
980 | 980 | ||
981 | task_lock(p); | 981 | task_lock(p); |
982 | cpuset_change_task_nodemask(p, &newmems); | 982 | cpuset_change_task_nodemask(p, newmems); |
983 | task_unlock(p); | 983 | task_unlock(p); |
984 | 984 | ||
985 | NODEMASK_FREE(newmems); | ||
986 | |||
985 | mm = get_task_mm(p); | 987 | mm = get_task_mm(p); |
986 | if (!mm) | 988 | if (!mm) |
987 | return; | 989 | return; |
@@ -1051,16 +1053,21 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, | |||
1051 | static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | 1053 | static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, |
1052 | const char *buf) | 1054 | const char *buf) |
1053 | { | 1055 | { |
1054 | nodemask_t oldmem; | 1056 | NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL); |
1055 | int retval; | 1057 | int retval; |
1056 | struct ptr_heap heap; | 1058 | struct ptr_heap heap; |
1057 | 1059 | ||
1060 | if (!oldmem) | ||
1061 | return -ENOMEM; | ||
1062 | |||
1058 | /* | 1063 | /* |
1059 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; | 1064 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; |
1060 | * it's read-only | 1065 | * it's read-only |
1061 | */ | 1066 | */ |
1062 | if (cs == &top_cpuset) | 1067 | if (cs == &top_cpuset) { |
1063 | return -EACCES; | 1068 | retval = -EACCES; |
1069 | goto done; | ||
1070 | } | ||
1064 | 1071 | ||
1065 | /* | 1072 | /* |
1066 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. | 1073 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. |
@@ -1076,11 +1083,13 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |||
1076 | goto done; | 1083 | goto done; |
1077 | 1084 | ||
1078 | if (!nodes_subset(trialcs->mems_allowed, | 1085 | if (!nodes_subset(trialcs->mems_allowed, |
1079 | node_states[N_HIGH_MEMORY])) | 1086 | node_states[N_HIGH_MEMORY])) { |
1080 | return -EINVAL; | 1087 | retval = -EINVAL; |
1088 | goto done; | ||
1089 | } | ||
1081 | } | 1090 | } |
1082 | oldmem = cs->mems_allowed; | 1091 | *oldmem = cs->mems_allowed; |
1083 | if (nodes_equal(oldmem, trialcs->mems_allowed)) { | 1092 | if (nodes_equal(*oldmem, trialcs->mems_allowed)) { |
1084 | retval = 0; /* Too easy - nothing to do */ | 1093 | retval = 0; /* Too easy - nothing to do */ |
1085 | goto done; | 1094 | goto done; |
1086 | } | 1095 | } |
@@ -1096,10 +1105,11 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |||
1096 | cs->mems_allowed = trialcs->mems_allowed; | 1105 | cs->mems_allowed = trialcs->mems_allowed; |
1097 | mutex_unlock(&callback_mutex); | 1106 | mutex_unlock(&callback_mutex); |
1098 | 1107 | ||
1099 | update_tasks_nodemask(cs, &oldmem, &heap); | 1108 | update_tasks_nodemask(cs, oldmem, &heap); |
1100 | 1109 | ||
1101 | heap_free(&heap); | 1110 | heap_free(&heap); |
1102 | done: | 1111 | done: |
1112 | NODEMASK_FREE(oldmem); | ||
1103 | return retval; | 1113 | return retval; |
1104 | } | 1114 | } |
1105 | 1115 | ||
@@ -1384,40 +1394,47 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
1384 | struct cgroup *oldcont, struct task_struct *tsk, | 1394 | struct cgroup *oldcont, struct task_struct *tsk, |
1385 | bool threadgroup) | 1395 | bool threadgroup) |
1386 | { | 1396 | { |
1387 | nodemask_t from, to; | ||
1388 | struct mm_struct *mm; | 1397 | struct mm_struct *mm; |
1389 | struct cpuset *cs = cgroup_cs(cont); | 1398 | struct cpuset *cs = cgroup_cs(cont); |
1390 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1399 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1400 | NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL); | ||
1401 | NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL); | ||
1402 | |||
1403 | if (from == NULL || to == NULL) | ||
1404 | goto alloc_fail; | ||
1391 | 1405 | ||
1392 | if (cs == &top_cpuset) { | 1406 | if (cs == &top_cpuset) { |
1393 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1407 | cpumask_copy(cpus_attach, cpu_possible_mask); |
1394 | to = node_possible_map; | ||
1395 | } else { | 1408 | } else { |
1396 | guarantee_online_cpus(cs, cpus_attach); | 1409 | guarantee_online_cpus(cs, cpus_attach); |
1397 | guarantee_online_mems(cs, &to); | ||
1398 | } | 1410 | } |
1411 | guarantee_online_mems(cs, to); | ||
1399 | 1412 | ||
1400 | /* do per-task migration stuff possibly for each in the threadgroup */ | 1413 | /* do per-task migration stuff possibly for each in the threadgroup */ |
1401 | cpuset_attach_task(tsk, &to, cs); | 1414 | cpuset_attach_task(tsk, to, cs); |
1402 | if (threadgroup) { | 1415 | if (threadgroup) { |
1403 | struct task_struct *c; | 1416 | struct task_struct *c; |
1404 | rcu_read_lock(); | 1417 | rcu_read_lock(); |
1405 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | 1418 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { |
1406 | cpuset_attach_task(c, &to, cs); | 1419 | cpuset_attach_task(c, to, cs); |
1407 | } | 1420 | } |
1408 | rcu_read_unlock(); | 1421 | rcu_read_unlock(); |
1409 | } | 1422 | } |
1410 | 1423 | ||
1411 | /* change mm; only needs to be done once even if threadgroup */ | 1424 | /* change mm; only needs to be done once even if threadgroup */ |
1412 | from = oldcs->mems_allowed; | 1425 | *from = oldcs->mems_allowed; |
1413 | to = cs->mems_allowed; | 1426 | *to = cs->mems_allowed; |
1414 | mm = get_task_mm(tsk); | 1427 | mm = get_task_mm(tsk); |
1415 | if (mm) { | 1428 | if (mm) { |
1416 | mpol_rebind_mm(mm, &to); | 1429 | mpol_rebind_mm(mm, to); |
1417 | if (is_memory_migrate(cs)) | 1430 | if (is_memory_migrate(cs)) |
1418 | cpuset_migrate_mm(mm, &from, &to); | 1431 | cpuset_migrate_mm(mm, from, to); |
1419 | mmput(mm); | 1432 | mmput(mm); |
1420 | } | 1433 | } |
1434 | |||
1435 | alloc_fail: | ||
1436 | NODEMASK_FREE(from); | ||
1437 | NODEMASK_FREE(to); | ||
1421 | } | 1438 | } |
1422 | 1439 | ||
1423 | /* The various types of files and directories in a cpuset file system */ | 1440 | /* The various types of files and directories in a cpuset file system */ |
@@ -1562,13 +1579,21 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) | |||
1562 | 1579 | ||
1563 | static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) | 1580 | static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) |
1564 | { | 1581 | { |
1565 | nodemask_t mask; | 1582 | NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL); |
1583 | int retval; | ||
1584 | |||
1585 | if (mask == NULL) | ||
1586 | return -ENOMEM; | ||
1566 | 1587 | ||
1567 | mutex_lock(&callback_mutex); | 1588 | mutex_lock(&callback_mutex); |
1568 | mask = cs->mems_allowed; | 1589 | *mask = cs->mems_allowed; |
1569 | mutex_unlock(&callback_mutex); | 1590 | mutex_unlock(&callback_mutex); |
1570 | 1591 | ||
1571 | return nodelist_scnprintf(page, PAGE_SIZE, mask); | 1592 | retval = nodelist_scnprintf(page, PAGE_SIZE, *mask); |
1593 | |||
1594 | NODEMASK_FREE(mask); | ||
1595 | |||
1596 | return retval; | ||
1572 | } | 1597 | } |
1573 | 1598 | ||
1574 | static ssize_t cpuset_common_file_read(struct cgroup *cont, | 1599 | static ssize_t cpuset_common_file_read(struct cgroup *cont, |
@@ -1997,7 +2022,10 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
1997 | struct cpuset *cp; /* scans cpusets being updated */ | 2022 | struct cpuset *cp; /* scans cpusets being updated */ |
1998 | struct cpuset *child; /* scans child cpusets of cp */ | 2023 | struct cpuset *child; /* scans child cpusets of cp */ |
1999 | struct cgroup *cont; | 2024 | struct cgroup *cont; |
2000 | nodemask_t oldmems; | 2025 | NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); |
2026 | |||
2027 | if (oldmems == NULL) | ||
2028 | return; | ||
2001 | 2029 | ||
2002 | list_add_tail((struct list_head *)&root->stack_list, &queue); | 2030 | list_add_tail((struct list_head *)&root->stack_list, &queue); |
2003 | 2031 | ||
@@ -2014,7 +2042,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2014 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 2042 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
2015 | continue; | 2043 | continue; |
2016 | 2044 | ||
2017 | oldmems = cp->mems_allowed; | 2045 | *oldmems = cp->mems_allowed; |
2018 | 2046 | ||
2019 | /* Remove offline cpus and mems from this cpuset. */ | 2047 | /* Remove offline cpus and mems from this cpuset. */ |
2020 | mutex_lock(&callback_mutex); | 2048 | mutex_lock(&callback_mutex); |
@@ -2030,9 +2058,10 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2030 | remove_tasks_in_empty_cpuset(cp); | 2058 | remove_tasks_in_empty_cpuset(cp); |
2031 | else { | 2059 | else { |
2032 | update_tasks_cpumask(cp, NULL); | 2060 | update_tasks_cpumask(cp, NULL); |
2033 | update_tasks_nodemask(cp, &oldmems, NULL); | 2061 | update_tasks_nodemask(cp, oldmems, NULL); |
2034 | } | 2062 | } |
2035 | } | 2063 | } |
2064 | NODEMASK_FREE(oldmems); | ||
2036 | } | 2065 | } |
2037 | 2066 | ||
2038 | /* | 2067 | /* |
@@ -2090,20 +2119,33 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
2090 | static int cpuset_track_online_nodes(struct notifier_block *self, | 2119 | static int cpuset_track_online_nodes(struct notifier_block *self, |
2091 | unsigned long action, void *arg) | 2120 | unsigned long action, void *arg) |
2092 | { | 2121 | { |
2122 | NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); | ||
2123 | |||
2124 | if (oldmems == NULL) | ||
2125 | return NOTIFY_DONE; | ||
2126 | |||
2093 | cgroup_lock(); | 2127 | cgroup_lock(); |
2094 | switch (action) { | 2128 | switch (action) { |
2095 | case MEM_ONLINE: | 2129 | case MEM_ONLINE: |
2096 | case MEM_OFFLINE: | 2130 | *oldmems = top_cpuset.mems_allowed; |
2097 | mutex_lock(&callback_mutex); | 2131 | mutex_lock(&callback_mutex); |
2098 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2132 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2099 | mutex_unlock(&callback_mutex); | 2133 | mutex_unlock(&callback_mutex); |
2100 | if (action == MEM_OFFLINE) | 2134 | update_tasks_nodemask(&top_cpuset, oldmems, NULL); |
2101 | scan_for_empty_cpusets(&top_cpuset); | 2135 | break; |
2136 | case MEM_OFFLINE: | ||
2137 | /* | ||
2138 | * needn't update top_cpuset.mems_allowed explicitly because | ||
2139 | * scan_for_empty_cpusets() will update it. | ||
2140 | */ | ||
2141 | scan_for_empty_cpusets(&top_cpuset); | ||
2102 | break; | 2142 | break; |
2103 | default: | 2143 | default: |
2104 | break; | 2144 | break; |
2105 | } | 2145 | } |
2106 | cgroup_unlock(); | 2146 | cgroup_unlock(); |
2147 | |||
2148 | NODEMASK_FREE(oldmems); | ||
2107 | return NOTIFY_OK; | 2149 | return NOTIFY_OK; |
2108 | } | 2150 | } |
2109 | #endif | 2151 | #endif |