diff options
author | Li Zefan <lizefan@huawei.com> | 2013-06-09 05:15:08 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2013-06-13 13:48:32 -0400 |
commit | 33ad801dfb5c8b1127c72fdb745ce8c630150f3f (patch) | |
tree | 3bb097f4353c0950194a23338bdd0cbd3fd28134 /kernel/cpuset.c | |
parent | 388afd8549dc8be0920e00ae9404341593b6bd7c (diff) |
cpuset: record old_mems_allowed in struct cpuset
When we update a cpuset's mems_allowed and thus update tasks'
mems_allowed, it's required to pass the old mems_allowed and new
mems_allowed to cpuset_migrate_mm().
Currently we save old mems_allowed in a temp local variable before
changing cpuset->mems_allowed. This patch changes it by saving
old mems_allowed in cpuset->old_mems_allowed.
This currently won't change any behavior, but it will later allow
us to keep tasks in empty cpusets.
v3: restored "cpuset_attach_nodemask_to = cs->mems_allowed"
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 61 |
1 files changed, 36 insertions, 25 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 608fe1308b22..2b4554588a04 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -88,6 +88,18 @@ struct cpuset { | |||
88 | cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ | 88 | cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ |
89 | nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ | 89 | nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ |
90 | 90 | ||
91 | /* | ||
92 | * This is old Memory Nodes tasks took on. | ||
93 | * | ||
94 | * - top_cpuset.old_mems_allowed is initialized to mems_allowed. | ||
95 | * - A new cpuset's old_mems_allowed is initialized when some | ||
96 | * task is moved into it. | ||
97 | * - old_mems_allowed is used in cpuset_migrate_mm() when we change | ||
98 | * cpuset.mems_allowed and have tasks' nodemask updated, and | ||
99 | * then old_mems_allowed is updated to mems_allowed. | ||
100 | */ | ||
101 | nodemask_t old_mems_allowed; | ||
102 | |||
91 | struct fmeter fmeter; /* memory_pressure filter */ | 103 | struct fmeter fmeter; /* memory_pressure filter */ |
92 | 104 | ||
93 | /* | 105 | /* |
@@ -972,16 +984,12 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
972 | static void cpuset_change_nodemask(struct task_struct *p, | 984 | static void cpuset_change_nodemask(struct task_struct *p, |
973 | struct cgroup_scanner *scan) | 985 | struct cgroup_scanner *scan) |
974 | { | 986 | { |
987 | struct cpuset *cs = cgroup_cs(scan->cg); | ||
975 | struct mm_struct *mm; | 988 | struct mm_struct *mm; |
976 | struct cpuset *cs; | ||
977 | int migrate; | 989 | int migrate; |
978 | const nodemask_t *oldmem = scan->data; | 990 | nodemask_t *newmems = scan->data; |
979 | static nodemask_t newmems; /* protected by cpuset_mutex */ | ||
980 | |||
981 | cs = cgroup_cs(scan->cg); | ||
982 | guarantee_online_mems(cs, &newmems); | ||
983 | 991 | ||
984 | cpuset_change_task_nodemask(p, &newmems); | 992 | cpuset_change_task_nodemask(p, newmems); |
985 | 993 | ||
986 | mm = get_task_mm(p); | 994 | mm = get_task_mm(p); |
987 | if (!mm) | 995 | if (!mm) |
@@ -991,7 +999,7 @@ static void cpuset_change_nodemask(struct task_struct *p, | |||
991 | 999 | ||
992 | mpol_rebind_mm(mm, &cs->mems_allowed); | 1000 | mpol_rebind_mm(mm, &cs->mems_allowed); |
993 | if (migrate) | 1001 | if (migrate) |
994 | cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed); | 1002 | cpuset_migrate_mm(mm, &cs->old_mems_allowed, newmems); |
995 | mmput(mm); | 1003 | mmput(mm); |
996 | } | 1004 | } |
997 | 1005 | ||
@@ -1000,25 +1008,26 @@ static void *cpuset_being_rebound; | |||
1000 | /** | 1008 | /** |
1001 | * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. | 1009 | * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. |
1002 | * @cs: the cpuset in which each task's mems_allowed mask needs to be changed | 1010 | * @cs: the cpuset in which each task's mems_allowed mask needs to be changed |
1003 | * @oldmem: old mems_allowed of cpuset cs | ||
1004 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() | 1011 | * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() |
1005 | * | 1012 | * |
1006 | * Called with cpuset_mutex held | 1013 | * Called with cpuset_mutex held |
1007 | * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 | 1014 | * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 |
1008 | * if @heap != NULL. | 1015 | * if @heap != NULL. |
1009 | */ | 1016 | */ |
1010 | static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, | 1017 | static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) |
1011 | struct ptr_heap *heap) | ||
1012 | { | 1018 | { |
1019 | static nodemask_t newmems; /* protected by cpuset_mutex */ | ||
1013 | struct cgroup_scanner scan; | 1020 | struct cgroup_scanner scan; |
1014 | 1021 | ||
1015 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ | 1022 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ |
1016 | 1023 | ||
1024 | guarantee_online_mems(cs, &newmems); | ||
1025 | |||
1017 | scan.cg = cs->css.cgroup; | 1026 | scan.cg = cs->css.cgroup; |
1018 | scan.test_task = NULL; | 1027 | scan.test_task = NULL; |
1019 | scan.process_task = cpuset_change_nodemask; | 1028 | scan.process_task = cpuset_change_nodemask; |
1020 | scan.heap = heap; | 1029 | scan.heap = heap; |
1021 | scan.data = (nodemask_t *)oldmem; | 1030 | scan.data = &newmems; |
1022 | 1031 | ||
1023 | /* | 1032 | /* |
1024 | * The mpol_rebind_mm() call takes mmap_sem, which we couldn't | 1033 | * The mpol_rebind_mm() call takes mmap_sem, which we couldn't |
@@ -1032,6 +1041,12 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, | |||
1032 | */ | 1041 | */ |
1033 | cgroup_scan_tasks(&scan); | 1042 | cgroup_scan_tasks(&scan); |
1034 | 1043 | ||
1044 | /* | ||
1045 | * All the tasks' nodemasks have been updated, update | ||
1046 | * cs->old_mems_allowed. | ||
1047 | */ | ||
1048 | cs->old_mems_allowed = newmems; | ||
1049 | |||
1035 | /* We're done rebinding vmas to this cpuset's new mems_allowed. */ | 1050 | /* We're done rebinding vmas to this cpuset's new mems_allowed. */ |
1036 | cpuset_being_rebound = NULL; | 1051 | cpuset_being_rebound = NULL; |
1037 | } | 1052 | } |
@@ -1052,13 +1067,9 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, | |||
1052 | static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | 1067 | static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, |
1053 | const char *buf) | 1068 | const char *buf) |
1054 | { | 1069 | { |
1055 | NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL); | ||
1056 | int retval; | 1070 | int retval; |
1057 | struct ptr_heap heap; | 1071 | struct ptr_heap heap; |
1058 | 1072 | ||
1059 | if (!oldmem) | ||
1060 | return -ENOMEM; | ||
1061 | |||
1062 | /* | 1073 | /* |
1063 | * top_cpuset.mems_allowed tracks node_stats[N_MEMORY]; | 1074 | * top_cpuset.mems_allowed tracks node_stats[N_MEMORY]; |
1064 | * it's read-only | 1075 | * it's read-only |
@@ -1087,8 +1098,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |||
1087 | goto done; | 1098 | goto done; |
1088 | } | 1099 | } |
1089 | } | 1100 | } |
1090 | *oldmem = cs->mems_allowed; | 1101 | |
1091 | if (nodes_equal(*oldmem, trialcs->mems_allowed)) { | 1102 | if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) { |
1092 | retval = 0; /* Too easy - nothing to do */ | 1103 | retval = 0; /* Too easy - nothing to do */ |
1093 | goto done; | 1104 | goto done; |
1094 | } | 1105 | } |
@@ -1104,11 +1115,10 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |||
1104 | cs->mems_allowed = trialcs->mems_allowed; | 1115 | cs->mems_allowed = trialcs->mems_allowed; |
1105 | mutex_unlock(&callback_mutex); | 1116 | mutex_unlock(&callback_mutex); |
1106 | 1117 | ||
1107 | update_tasks_nodemask(cs, oldmem, &heap); | 1118 | update_tasks_nodemask(cs, &heap); |
1108 | 1119 | ||
1109 | heap_free(&heap); | 1120 | heap_free(&heap); |
1110 | done: | 1121 | done: |
1111 | NODEMASK_FREE(oldmem); | ||
1112 | return retval; | 1122 | return retval; |
1113 | } | 1123 | } |
1114 | 1124 | ||
@@ -1431,6 +1441,8 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
1431 | mmput(mm); | 1441 | mmput(mm); |
1432 | } | 1442 | } |
1433 | 1443 | ||
1444 | cs->old_mems_allowed = cpuset_attach_nodemask_to; | ||
1445 | |||
1434 | cs->attach_in_progress--; | 1446 | cs->attach_in_progress--; |
1435 | if (!cs->attach_in_progress) | 1447 | if (!cs->attach_in_progress) |
1436 | wake_up(&cpuset_attach_wq); | 1448 | wake_up(&cpuset_attach_wq); |
@@ -1985,7 +1997,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
1985 | static void cpuset_hotplug_update_tasks(struct cpuset *cs) | 1997 | static void cpuset_hotplug_update_tasks(struct cpuset *cs) |
1986 | { | 1998 | { |
1987 | static cpumask_t off_cpus; | 1999 | static cpumask_t off_cpus; |
1988 | static nodemask_t off_mems, tmp_mems; | 2000 | static nodemask_t off_mems; |
1989 | bool is_empty; | 2001 | bool is_empty; |
1990 | 2002 | ||
1991 | retry: | 2003 | retry: |
@@ -2015,11 +2027,10 @@ retry: | |||
2015 | 2027 | ||
2016 | /* remove offline mems from @cs */ | 2028 | /* remove offline mems from @cs */ |
2017 | if (!nodes_empty(off_mems)) { | 2029 | if (!nodes_empty(off_mems)) { |
2018 | tmp_mems = cs->mems_allowed; | ||
2019 | mutex_lock(&callback_mutex); | 2030 | mutex_lock(&callback_mutex); |
2020 | nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); | 2031 | nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); |
2021 | mutex_unlock(&callback_mutex); | 2032 | mutex_unlock(&callback_mutex); |
2022 | update_tasks_nodemask(cs, &tmp_mems, NULL); | 2033 | update_tasks_nodemask(cs, NULL); |
2023 | } | 2034 | } |
2024 | 2035 | ||
2025 | is_empty = cpumask_empty(cs->cpus_allowed) || | 2036 | is_empty = cpumask_empty(cs->cpus_allowed) || |
@@ -2083,11 +2094,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
2083 | 2094 | ||
2084 | /* synchronize mems_allowed to N_MEMORY */ | 2095 | /* synchronize mems_allowed to N_MEMORY */ |
2085 | if (mems_updated) { | 2096 | if (mems_updated) { |
2086 | tmp_mems = top_cpuset.mems_allowed; | ||
2087 | mutex_lock(&callback_mutex); | 2097 | mutex_lock(&callback_mutex); |
2088 | top_cpuset.mems_allowed = new_mems; | 2098 | top_cpuset.mems_allowed = new_mems; |
2089 | mutex_unlock(&callback_mutex); | 2099 | mutex_unlock(&callback_mutex); |
2090 | update_tasks_nodemask(&top_cpuset, &tmp_mems, NULL); | 2100 | update_tasks_nodemask(&top_cpuset, NULL); |
2091 | } | 2101 | } |
2092 | 2102 | ||
2093 | mutex_unlock(&cpuset_mutex); | 2103 | mutex_unlock(&cpuset_mutex); |
@@ -2158,6 +2168,7 @@ void __init cpuset_init_smp(void) | |||
2158 | { | 2168 | { |
2159 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); | 2169 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
2160 | top_cpuset.mems_allowed = node_states[N_MEMORY]; | 2170 | top_cpuset.mems_allowed = node_states[N_MEMORY]; |
2171 | top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; | ||
2161 | 2172 | ||
2162 | register_hotmemory_notifier(&cpuset_track_online_nodes_nb); | 2173 | register_hotmemory_notifier(&cpuset_track_online_nodes_nb); |
2163 | } | 2174 | } |