aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c71
1 files changed, 49 insertions, 22 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e945fcd8179..41989ab4db57 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -287,6 +287,8 @@ static struct cpuset top_cpuset = {
287static DEFINE_MUTEX(cpuset_mutex); 287static DEFINE_MUTEX(cpuset_mutex);
288static DEFINE_SPINLOCK(callback_lock); 288static DEFINE_SPINLOCK(callback_lock);
289 289
290static struct workqueue_struct *cpuset_migrate_mm_wq;
291
290/* 292/*
291 * CPU / memory hotplug is handled asynchronously. 293 * CPU / memory hotplug is handled asynchronously.
292 */ 294 */
@@ -972,31 +974,51 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
972} 974}
973 975
974/* 976/*
975 * cpuset_migrate_mm 977 * Migrate memory region from one set of nodes to another. This is
976 * 978 * performed asynchronously as it can be called from process migration path
977 * Migrate memory region from one set of nodes to another. 979 * holding locks involved in process management. All mm migrations are
978 * 980 * performed in the queued order and can be waited for by flushing
979 * Temporarilly set tasks mems_allowed to target nodes of migration, 981 * cpuset_migrate_mm_wq.
980 * so that the migration code can allocate pages on these nodes.
981 *
982 * While the mm_struct we are migrating is typically from some
983 * other task, the task_struct mems_allowed that we are hacking
984 * is for our current task, which must allocate new pages for that
985 * migrating memory region.
986 */ 982 */
987 983
984struct cpuset_migrate_mm_work {
985 struct work_struct work;
986 struct mm_struct *mm;
987 nodemask_t from;
988 nodemask_t to;
989};
990
991static void cpuset_migrate_mm_workfn(struct work_struct *work)
992{
993 struct cpuset_migrate_mm_work *mwork =
994 container_of(work, struct cpuset_migrate_mm_work, work);
995
996 /* on a wq worker, no need to worry about %current's mems_allowed */
997 do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
998 mmput(mwork->mm);
999 kfree(mwork);
1000}
1001
988static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, 1002static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
989 const nodemask_t *to) 1003 const nodemask_t *to)
990{ 1004{
991 struct task_struct *tsk = current; 1005 struct cpuset_migrate_mm_work *mwork;
992
993 tsk->mems_allowed = *to;
994 1006
995 do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); 1007 mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
1008 if (mwork) {
1009 mwork->mm = mm;
1010 mwork->from = *from;
1011 mwork->to = *to;
1012 INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
1013 queue_work(cpuset_migrate_mm_wq, &mwork->work);
1014 } else {
1015 mmput(mm);
1016 }
1017}
996 1018
997 rcu_read_lock(); 1019void cpuset_post_attach_flush(void)
998 guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed); 1020{
999 rcu_read_unlock(); 1021 flush_workqueue(cpuset_migrate_mm_wq);
1000} 1022}
1001 1023
1002/* 1024/*
@@ -1097,7 +1119,8 @@ static void update_tasks_nodemask(struct cpuset *cs)
1097 mpol_rebind_mm(mm, &cs->mems_allowed); 1119 mpol_rebind_mm(mm, &cs->mems_allowed);
1098 if (migrate) 1120 if (migrate)
1099 cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); 1121 cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
1100 mmput(mm); 1122 else
1123 mmput(mm);
1101 } 1124 }
1102 css_task_iter_end(&it); 1125 css_task_iter_end(&it);
1103 1126
@@ -1545,11 +1568,11 @@ static void cpuset_attach(struct cgroup_taskset *tset)
1545 * @old_mems_allowed is the right nodesets that we 1568 * @old_mems_allowed is the right nodesets that we
1546 * migrate mm from. 1569 * migrate mm from.
1547 */ 1570 */
1548 if (is_memory_migrate(cs)) { 1571 if (is_memory_migrate(cs))
1549 cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, 1572 cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
1550 &cpuset_attach_nodemask_to); 1573 &cpuset_attach_nodemask_to);
1551 } 1574 else
1552 mmput(mm); 1575 mmput(mm);
1553 } 1576 }
1554 } 1577 }
1555 1578
@@ -1714,6 +1737,7 @@ out_unlock:
1714 mutex_unlock(&cpuset_mutex); 1737 mutex_unlock(&cpuset_mutex);
1715 kernfs_unbreak_active_protection(of->kn); 1738 kernfs_unbreak_active_protection(of->kn);
1716 css_put(&cs->css); 1739 css_put(&cs->css);
1740 flush_workqueue(cpuset_migrate_mm_wq);
1717 return retval ?: nbytes; 1741 return retval ?: nbytes;
1718} 1742}
1719 1743
@@ -2359,6 +2383,9 @@ void __init cpuset_init_smp(void)
2359 top_cpuset.effective_mems = node_states[N_MEMORY]; 2383 top_cpuset.effective_mems = node_states[N_MEMORY];
2360 2384
2361 register_hotmemory_notifier(&cpuset_track_online_nodes_nb); 2385 register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
2386
2387 cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
2388 BUG_ON(!cpuset_migrate_mm_wq);
2362} 2389}
2363 2390
2364/** 2391/**