aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c69
1 files changed, 57 insertions, 12 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 18aea1bd1284..72248d1b9e3f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -616,12 +616,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
616 * current->cpuset if a task has its memory placement changed. 616 * current->cpuset if a task has its memory placement changed.
617 * Do not call this routine if in_interrupt(). 617 * Do not call this routine if in_interrupt().
618 * 618 *
619 * Call without callback_mutex or task_lock() held. May be called 619 * Call without callback_mutex or task_lock() held. May be
620 * with or without manage_mutex held. Doesn't need task_lock to guard 620 * called with or without manage_mutex held. Thanks in part to
621 * against another task changing a non-NULL cpuset pointer to NULL, 621 * 'the_top_cpuset_hack', the tasks cpuset pointer will never
622 * as that is only done by a task on itself, and if the current task 622 * be NULL. This routine also might acquire callback_mutex and
623 * is here, it is not simultaneously in the exit code NULL'ing its
624 * cpuset pointer. This routine also might acquire callback_mutex and
625 * current->mm->mmap_sem during call. 623 * current->mm->mmap_sem during call.
626 * 624 *
627 * Reading current->cpuset->mems_generation doesn't need task_lock 625 * Reading current->cpuset->mems_generation doesn't need task_lock
@@ -836,6 +834,55 @@ static int update_cpumask(struct cpuset *cs, char *buf)
836} 834}
837 835
838/* 836/*
837 * cpuset_migrate_mm
838 *
839 * Migrate memory region from one set of nodes to another.
840 *
841 * Temporarilly set tasks mems_allowed to target nodes of migration,
842 * so that the migration code can allocate pages on these nodes.
843 *
844 * Call holding manage_mutex, so our current->cpuset won't change
845 * during this call, as manage_mutex holds off any attach_task()
846 * calls. Therefore we don't need to take task_lock around the
847 * call to guarantee_online_mems(), as we know no one is changing
848 * our tasks cpuset.
849 *
850 * Hold callback_mutex around the two modifications of our tasks
851 * mems_allowed to synchronize with cpuset_mems_allowed().
852 *
853 * While the mm_struct we are migrating is typically from some
854 * other task, the task_struct mems_allowed that we are hacking
855 * is for our current task, which must allocate new pages for that
856 * migrating memory region.
857 *
858 * We call cpuset_update_task_memory_state() before hacking
859 * our tasks mems_allowed, so that we are assured of being in
860 * sync with our tasks cpuset, and in particular, callbacks to
861 * cpuset_update_task_memory_state() from nested page allocations
862 * won't see any mismatch of our cpuset and task mems_generation
863 * values, so won't overwrite our hacked tasks mems_allowed
864 * nodemask.
865 */
866
867static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
868 const nodemask_t *to)
869{
870 struct task_struct *tsk = current;
871
872 cpuset_update_task_memory_state();
873
874 mutex_lock(&callback_mutex);
875 tsk->mems_allowed = *to;
876 mutex_unlock(&callback_mutex);
877
878 do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
879
880 mutex_lock(&callback_mutex);
881 guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
882 mutex_unlock(&callback_mutex);
883}
884
885/*
839 * Handle user request to change the 'mems' memory placement 886 * Handle user request to change the 'mems' memory placement
840 * of a cpuset. Needs to validate the request, update the 887 * of a cpuset. Needs to validate the request, update the
841 * cpusets mems_allowed and mems_generation, and for each 888 * cpusets mems_allowed and mems_generation, and for each
@@ -947,10 +994,8 @@ static int update_nodemask(struct cpuset *cs, char *buf)
947 struct mm_struct *mm = mmarray[i]; 994 struct mm_struct *mm = mmarray[i];
948 995
949 mpol_rebind_mm(mm, &cs->mems_allowed); 996 mpol_rebind_mm(mm, &cs->mems_allowed);
950 if (migrate) { 997 if (migrate)
951 do_migrate_pages(mm, &oldmem, &cs->mems_allowed, 998 cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
952 MPOL_MF_MOVE_ALL);
953 }
954 mmput(mm); 999 mmput(mm);
955 } 1000 }
956 1001
@@ -1185,11 +1230,11 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1185 mm = get_task_mm(tsk); 1230 mm = get_task_mm(tsk);
1186 if (mm) { 1231 if (mm) {
1187 mpol_rebind_mm(mm, &to); 1232 mpol_rebind_mm(mm, &to);
1233 if (is_memory_migrate(cs))
1234 cpuset_migrate_mm(mm, &from, &to);
1188 mmput(mm); 1235 mmput(mm);
1189 } 1236 }
1190 1237
1191 if (is_memory_migrate(cs))
1192 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
1193 put_task_struct(tsk); 1238 put_task_struct(tsk);
1194 synchronize_rcu(); 1239 synchronize_rcu();
1195 if (atomic_dec_and_test(&oldcs->count)) 1240 if (atomic_dec_and_test(&oldcs->count))