aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorLi Zefan <lizefan@huawei.com>2013-06-09 05:15:08 -0400
committerTejun Heo <tj@kernel.org>2013-06-13 13:48:32 -0400
commit33ad801dfb5c8b1127c72fdb745ce8c630150f3f (patch)
tree3bb097f4353c0950194a23338bdd0cbd3fd28134 /kernel/cpuset.c
parent388afd8549dc8be0920e00ae9404341593b6bd7c (diff)
cpuset: record old_mems_allowed in struct cpuset
When we update a cpuset's mems_allowed and thus update tasks' mems_allowed, it's required to pass the old mems_allowed and new mems_allowed to cpuset_migrate_mm(). Currently we save old mems_allowed in a temp local variable before changing cpuset->mems_allowed. This patch changes it by saving old mems_allowed in cpuset->old_mems_allowed. This currently won't change any behavior, but it will later allow us to keep tasks in empty cpusets. v3: restored "cpuset_attach_nodemask_to = cs->mems_allowed" Signed-off-by: Li Zefan <lizefan@huawei.com> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c61
1 files changed, 36 insertions, 25 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 608fe1308b22..2b4554588a04 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -88,6 +88,18 @@ struct cpuset {
88 cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ 88 cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
89 nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ 89 nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
90 90
91 /*
92 * This is old Memory Nodes tasks took on.
93 *
94 * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
95 * - A new cpuset's old_mems_allowed is initialized when some
96 * task is moved into it.
97 * - old_mems_allowed is used in cpuset_migrate_mm() when we change
98 * cpuset.mems_allowed and have tasks' nodemask updated, and
99 * then old_mems_allowed is updated to mems_allowed.
100 */
101 nodemask_t old_mems_allowed;
102
91 struct fmeter fmeter; /* memory_pressure filter */ 103 struct fmeter fmeter; /* memory_pressure filter */
92 104
93 /* 105 /*
@@ -972,16 +984,12 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
972static void cpuset_change_nodemask(struct task_struct *p, 984static void cpuset_change_nodemask(struct task_struct *p,
973 struct cgroup_scanner *scan) 985 struct cgroup_scanner *scan)
974{ 986{
987 struct cpuset *cs = cgroup_cs(scan->cg);
975 struct mm_struct *mm; 988 struct mm_struct *mm;
976 struct cpuset *cs;
977 int migrate; 989 int migrate;
978 const nodemask_t *oldmem = scan->data; 990 nodemask_t *newmems = scan->data;
979 static nodemask_t newmems; /* protected by cpuset_mutex */
980
981 cs = cgroup_cs(scan->cg);
982 guarantee_online_mems(cs, &newmems);
983 991
984 cpuset_change_task_nodemask(p, &newmems); 992 cpuset_change_task_nodemask(p, newmems);
985 993
986 mm = get_task_mm(p); 994 mm = get_task_mm(p);
987 if (!mm) 995 if (!mm)
@@ -991,7 +999,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
991 999
992 mpol_rebind_mm(mm, &cs->mems_allowed); 1000 mpol_rebind_mm(mm, &cs->mems_allowed);
993 if (migrate) 1001 if (migrate)
994 cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed); 1002 cpuset_migrate_mm(mm, &cs->old_mems_allowed, newmems);
995 mmput(mm); 1003 mmput(mm);
996} 1004}
997 1005
@@ -1000,25 +1008,26 @@ static void *cpuset_being_rebound;
1000/** 1008/**
1001 * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. 1009 * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
1002 * @cs: the cpuset in which each task's mems_allowed mask needs to be changed 1010 * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
1003 * @oldmem: old mems_allowed of cpuset cs
1004 * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks() 1011 * @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
1005 * 1012 *
1006 * Called with cpuset_mutex held 1013 * Called with cpuset_mutex held
1007 * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0 1014 * No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
1008 * if @heap != NULL. 1015 * if @heap != NULL.
1009 */ 1016 */
1010static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem, 1017static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
1011 struct ptr_heap *heap)
1012{ 1018{
1019 static nodemask_t newmems; /* protected by cpuset_mutex */
1013 struct cgroup_scanner scan; 1020 struct cgroup_scanner scan;
1014 1021
1015 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ 1022 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
1016 1023
1024 guarantee_online_mems(cs, &newmems);
1025
1017 scan.cg = cs->css.cgroup; 1026 scan.cg = cs->css.cgroup;
1018 scan.test_task = NULL; 1027 scan.test_task = NULL;
1019 scan.process_task = cpuset_change_nodemask; 1028 scan.process_task = cpuset_change_nodemask;
1020 scan.heap = heap; 1029 scan.heap = heap;
1021 scan.data = (nodemask_t *)oldmem; 1030 scan.data = &newmems;
1022 1031
1023 /* 1032 /*
1024 * The mpol_rebind_mm() call takes mmap_sem, which we couldn't 1033 * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
@@ -1032,6 +1041,12 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
1032 */ 1041 */
1033 cgroup_scan_tasks(&scan); 1042 cgroup_scan_tasks(&scan);
1034 1043
1044 /*
1045 * All the tasks' nodemasks have been updated, update
1046 * cs->old_mems_allowed.
1047 */
1048 cs->old_mems_allowed = newmems;
1049
1035 /* We're done rebinding vmas to this cpuset's new mems_allowed. */ 1050 /* We're done rebinding vmas to this cpuset's new mems_allowed. */
1036 cpuset_being_rebound = NULL; 1051 cpuset_being_rebound = NULL;
1037} 1052}
@@ -1052,13 +1067,9 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
1052static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, 1067static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1053 const char *buf) 1068 const char *buf)
1054{ 1069{
1055 NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL);
1056 int retval; 1070 int retval;
1057 struct ptr_heap heap; 1071 struct ptr_heap heap;
1058 1072
1059 if (!oldmem)
1060 return -ENOMEM;
1061
1062 /* 1073 /*
1063 * top_cpuset.mems_allowed tracks node_stats[N_MEMORY]; 1074 * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
1064 * it's read-only 1075 * it's read-only
@@ -1087,8 +1098,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1087 goto done; 1098 goto done;
1088 } 1099 }
1089 } 1100 }
1090 *oldmem = cs->mems_allowed; 1101
1091 if (nodes_equal(*oldmem, trialcs->mems_allowed)) { 1102 if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
1092 retval = 0; /* Too easy - nothing to do */ 1103 retval = 0; /* Too easy - nothing to do */
1093 goto done; 1104 goto done;
1094 } 1105 }
@@ -1104,11 +1115,10 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1104 cs->mems_allowed = trialcs->mems_allowed; 1115 cs->mems_allowed = trialcs->mems_allowed;
1105 mutex_unlock(&callback_mutex); 1116 mutex_unlock(&callback_mutex);
1106 1117
1107 update_tasks_nodemask(cs, oldmem, &heap); 1118 update_tasks_nodemask(cs, &heap);
1108 1119
1109 heap_free(&heap); 1120 heap_free(&heap);
1110done: 1121done:
1111 NODEMASK_FREE(oldmem);
1112 return retval; 1122 return retval;
1113} 1123}
1114 1124
@@ -1431,6 +1441,8 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
1431 mmput(mm); 1441 mmput(mm);
1432 } 1442 }
1433 1443
1444 cs->old_mems_allowed = cpuset_attach_nodemask_to;
1445
1434 cs->attach_in_progress--; 1446 cs->attach_in_progress--;
1435 if (!cs->attach_in_progress) 1447 if (!cs->attach_in_progress)
1436 wake_up(&cpuset_attach_wq); 1448 wake_up(&cpuset_attach_wq);
@@ -1985,7 +1997,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
1985static void cpuset_hotplug_update_tasks(struct cpuset *cs) 1997static void cpuset_hotplug_update_tasks(struct cpuset *cs)
1986{ 1998{
1987 static cpumask_t off_cpus; 1999 static cpumask_t off_cpus;
1988 static nodemask_t off_mems, tmp_mems; 2000 static nodemask_t off_mems;
1989 bool is_empty; 2001 bool is_empty;
1990 2002
1991retry: 2003retry:
@@ -2015,11 +2027,10 @@ retry:
2015 2027
2016 /* remove offline mems from @cs */ 2028 /* remove offline mems from @cs */
2017 if (!nodes_empty(off_mems)) { 2029 if (!nodes_empty(off_mems)) {
2018 tmp_mems = cs->mems_allowed;
2019 mutex_lock(&callback_mutex); 2030 mutex_lock(&callback_mutex);
2020 nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); 2031 nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems);
2021 mutex_unlock(&callback_mutex); 2032 mutex_unlock(&callback_mutex);
2022 update_tasks_nodemask(cs, &tmp_mems, NULL); 2033 update_tasks_nodemask(cs, NULL);
2023 } 2034 }
2024 2035
2025 is_empty = cpumask_empty(cs->cpus_allowed) || 2036 is_empty = cpumask_empty(cs->cpus_allowed) ||
@@ -2083,11 +2094,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
2083 2094
2084 /* synchronize mems_allowed to N_MEMORY */ 2095 /* synchronize mems_allowed to N_MEMORY */
2085 if (mems_updated) { 2096 if (mems_updated) {
2086 tmp_mems = top_cpuset.mems_allowed;
2087 mutex_lock(&callback_mutex); 2097 mutex_lock(&callback_mutex);
2088 top_cpuset.mems_allowed = new_mems; 2098 top_cpuset.mems_allowed = new_mems;
2089 mutex_unlock(&callback_mutex); 2099 mutex_unlock(&callback_mutex);
2090 update_tasks_nodemask(&top_cpuset, &tmp_mems, NULL); 2100 update_tasks_nodemask(&top_cpuset, NULL);
2091 } 2101 }
2092 2102
2093 mutex_unlock(&cpuset_mutex); 2103 mutex_unlock(&cpuset_mutex);
@@ -2158,6 +2168,7 @@ void __init cpuset_init_smp(void)
2158{ 2168{
2159 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); 2169 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2160 top_cpuset.mems_allowed = node_states[N_MEMORY]; 2170 top_cpuset.mems_allowed = node_states[N_MEMORY];
2171 top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
2161 2172
2162 register_hotmemory_notifier(&cpuset_track_online_nodes_nb); 2173 register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
2163} 2174}