diff options
Diffstat (limited to 'kernel/cpuset.c')
| -rw-r--r-- | kernel/cpuset.c | 71 |
1 files changed, 49 insertions, 22 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3e945fcd8179..41989ab4db57 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -287,6 +287,8 @@ static struct cpuset top_cpuset = { | |||
| 287 | static DEFINE_MUTEX(cpuset_mutex); | 287 | static DEFINE_MUTEX(cpuset_mutex); |
| 288 | static DEFINE_SPINLOCK(callback_lock); | 288 | static DEFINE_SPINLOCK(callback_lock); |
| 289 | 289 | ||
| 290 | static struct workqueue_struct *cpuset_migrate_mm_wq; | ||
| 291 | |||
| 290 | /* | 292 | /* |
| 291 | * CPU / memory hotplug is handled asynchronously. | 293 | * CPU / memory hotplug is handled asynchronously. |
| 292 | */ | 294 | */ |
| @@ -972,31 +974,51 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
| 972 | } | 974 | } |
| 973 | 975 | ||
| 974 | /* | 976 | /* |
| 975 | * cpuset_migrate_mm | 977 | * Migrate memory region from one set of nodes to another. This is |
| 976 | * | 978 | * performed asynchronously as it can be called from process migration path |
| 977 | * Migrate memory region from one set of nodes to another. | 979 | * holding locks involved in process management. All mm migrations are |
| 978 | * | 980 | * performed in the queued order and can be waited for by flushing |
| 979 | * Temporarilly set tasks mems_allowed to target nodes of migration, | 981 | * cpuset_migrate_mm_wq. |
| 980 | * so that the migration code can allocate pages on these nodes. | ||
| 981 | * | ||
| 982 | * While the mm_struct we are migrating is typically from some | ||
| 983 | * other task, the task_struct mems_allowed that we are hacking | ||
| 984 | * is for our current task, which must allocate new pages for that | ||
| 985 | * migrating memory region. | ||
| 986 | */ | 982 | */ |
| 987 | 983 | ||
| 984 | struct cpuset_migrate_mm_work { | ||
| 985 | struct work_struct work; | ||
| 986 | struct mm_struct *mm; | ||
| 987 | nodemask_t from; | ||
| 988 | nodemask_t to; | ||
| 989 | }; | ||
| 990 | |||
| 991 | static void cpuset_migrate_mm_workfn(struct work_struct *work) | ||
| 992 | { | ||
| 993 | struct cpuset_migrate_mm_work *mwork = | ||
| 994 | container_of(work, struct cpuset_migrate_mm_work, work); | ||
| 995 | |||
| 996 | /* on a wq worker, no need to worry about %current's mems_allowed */ | ||
| 997 | do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL); | ||
| 998 | mmput(mwork->mm); | ||
| 999 | kfree(mwork); | ||
| 1000 | } | ||
| 1001 | |||
| 988 | static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | 1002 | static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, |
| 989 | const nodemask_t *to) | 1003 | const nodemask_t *to) |
| 990 | { | 1004 | { |
| 991 | struct task_struct *tsk = current; | 1005 | struct cpuset_migrate_mm_work *mwork; |
| 992 | |||
| 993 | tsk->mems_allowed = *to; | ||
| 994 | 1006 | ||
| 995 | do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); | 1007 | mwork = kzalloc(sizeof(*mwork), GFP_KERNEL); |
| 1008 | if (mwork) { | ||
| 1009 | mwork->mm = mm; | ||
| 1010 | mwork->from = *from; | ||
| 1011 | mwork->to = *to; | ||
| 1012 | INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn); | ||
| 1013 | queue_work(cpuset_migrate_mm_wq, &mwork->work); | ||
| 1014 | } else { | ||
| 1015 | mmput(mm); | ||
| 1016 | } | ||
| 1017 | } | ||
| 996 | 1018 | ||
| 997 | rcu_read_lock(); | 1019 | void cpuset_post_attach_flush(void) |
| 998 | guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed); | 1020 | { |
| 999 | rcu_read_unlock(); | 1021 | flush_workqueue(cpuset_migrate_mm_wq); |
| 1000 | } | 1022 | } |
| 1001 | 1023 | ||
| 1002 | /* | 1024 | /* |
| @@ -1097,7 +1119,8 @@ static void update_tasks_nodemask(struct cpuset *cs) | |||
| 1097 | mpol_rebind_mm(mm, &cs->mems_allowed); | 1119 | mpol_rebind_mm(mm, &cs->mems_allowed); |
| 1098 | if (migrate) | 1120 | if (migrate) |
| 1099 | cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); | 1121 | cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); |
| 1100 | mmput(mm); | 1122 | else |
| 1123 | mmput(mm); | ||
| 1101 | } | 1124 | } |
| 1102 | css_task_iter_end(&it); | 1125 | css_task_iter_end(&it); |
| 1103 | 1126 | ||
| @@ -1545,11 +1568,11 @@ static void cpuset_attach(struct cgroup_taskset *tset) | |||
| 1545 | * @old_mems_allowed is the right nodesets that we | 1568 | * @old_mems_allowed is the right nodesets that we |
| 1546 | * migrate mm from. | 1569 | * migrate mm from. |
| 1547 | */ | 1570 | */ |
| 1548 | if (is_memory_migrate(cs)) { | 1571 | if (is_memory_migrate(cs)) |
| 1549 | cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, | 1572 | cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, |
| 1550 | &cpuset_attach_nodemask_to); | 1573 | &cpuset_attach_nodemask_to); |
| 1551 | } | 1574 | else |
| 1552 | mmput(mm); | 1575 | mmput(mm); |
| 1553 | } | 1576 | } |
| 1554 | } | 1577 | } |
| 1555 | 1578 | ||
| @@ -1714,6 +1737,7 @@ out_unlock: | |||
| 1714 | mutex_unlock(&cpuset_mutex); | 1737 | mutex_unlock(&cpuset_mutex); |
| 1715 | kernfs_unbreak_active_protection(of->kn); | 1738 | kernfs_unbreak_active_protection(of->kn); |
| 1716 | css_put(&cs->css); | 1739 | css_put(&cs->css); |
| 1740 | flush_workqueue(cpuset_migrate_mm_wq); | ||
| 1717 | return retval ?: nbytes; | 1741 | return retval ?: nbytes; |
| 1718 | } | 1742 | } |
| 1719 | 1743 | ||
| @@ -2359,6 +2383,9 @@ void __init cpuset_init_smp(void) | |||
| 2359 | top_cpuset.effective_mems = node_states[N_MEMORY]; | 2383 | top_cpuset.effective_mems = node_states[N_MEMORY]; |
| 2360 | 2384 | ||
| 2361 | register_hotmemory_notifier(&cpuset_track_online_nodes_nb); | 2385 | register_hotmemory_notifier(&cpuset_track_online_nodes_nb); |
| 2386 | |||
| 2387 | cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0); | ||
| 2388 | BUG_ON(!cpuset_migrate_mm_wq); | ||
| 2362 | } | 2389 | } |
| 2363 | 2390 | ||
| 2364 | /** | 2391 | /** |
