diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 69 |
1 files changed, 57 insertions, 12 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 18aea1bd1284..72248d1b9e3f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -616,12 +616,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
616 | * current->cpuset if a task has its memory placement changed. | 616 | * current->cpuset if a task has its memory placement changed. |
617 | * Do not call this routine if in_interrupt(). | 617 | * Do not call this routine if in_interrupt(). |
618 | * | 618 | * |
619 | * Call without callback_mutex or task_lock() held. May be called | 619 | * Call without callback_mutex or task_lock() held. May be |
620 | * with or without manage_mutex held. Doesn't need task_lock to guard | 620 | * called with or without manage_mutex held. Thanks in part to |
621 | * against another task changing a non-NULL cpuset pointer to NULL, | 621 | * 'the_top_cpuset_hack', the tasks cpuset pointer will never |
622 | * as that is only done by a task on itself, and if the current task | 622 | * be NULL. This routine also might acquire callback_mutex and |
623 | * is here, it is not simultaneously in the exit code NULL'ing its | ||
624 | * cpuset pointer. This routine also might acquire callback_mutex and | ||
625 | * current->mm->mmap_sem during call. | 623 | * current->mm->mmap_sem during call. |
626 | * | 624 | * |
627 | * Reading current->cpuset->mems_generation doesn't need task_lock | 625 | * Reading current->cpuset->mems_generation doesn't need task_lock |
@@ -836,6 +834,55 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
836 | } | 834 | } |
837 | 835 | ||
838 | /* | 836 | /* |
837 | * cpuset_migrate_mm | ||
838 | * | ||
839 | * Migrate memory region from one set of nodes to another. | ||
840 | * | ||
841 | * Temporarilly set tasks mems_allowed to target nodes of migration, | ||
842 | * so that the migration code can allocate pages on these nodes. | ||
843 | * | ||
844 | * Call holding manage_mutex, so our current->cpuset won't change | ||
845 | * during this call, as manage_mutex holds off any attach_task() | ||
846 | * calls. Therefore we don't need to take task_lock around the | ||
847 | * call to guarantee_online_mems(), as we know no one is changing | ||
848 | * our tasks cpuset. | ||
849 | * | ||
850 | * Hold callback_mutex around the two modifications of our tasks | ||
851 | * mems_allowed to synchronize with cpuset_mems_allowed(). | ||
852 | * | ||
853 | * While the mm_struct we are migrating is typically from some | ||
854 | * other task, the task_struct mems_allowed that we are hacking | ||
855 | * is for our current task, which must allocate new pages for that | ||
856 | * migrating memory region. | ||
857 | * | ||
858 | * We call cpuset_update_task_memory_state() before hacking | ||
859 | * our tasks mems_allowed, so that we are assured of being in | ||
860 | * sync with our tasks cpuset, and in particular, callbacks to | ||
861 | * cpuset_update_task_memory_state() from nested page allocations | ||
862 | * won't see any mismatch of our cpuset and task mems_generation | ||
863 | * values, so won't overwrite our hacked tasks mems_allowed | ||
864 | * nodemask. | ||
865 | */ | ||
866 | |||
867 | static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | ||
868 | const nodemask_t *to) | ||
869 | { | ||
870 | struct task_struct *tsk = current; | ||
871 | |||
872 | cpuset_update_task_memory_state(); | ||
873 | |||
874 | mutex_lock(&callback_mutex); | ||
875 | tsk->mems_allowed = *to; | ||
876 | mutex_unlock(&callback_mutex); | ||
877 | |||
878 | do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); | ||
879 | |||
880 | mutex_lock(&callback_mutex); | ||
881 | guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed); | ||
882 | mutex_unlock(&callback_mutex); | ||
883 | } | ||
884 | |||
885 | /* | ||
839 | * Handle user request to change the 'mems' memory placement | 886 | * Handle user request to change the 'mems' memory placement |
840 | * of a cpuset. Needs to validate the request, update the | 887 | * of a cpuset. Needs to validate the request, update the |
841 | * cpusets mems_allowed and mems_generation, and for each | 888 | * cpusets mems_allowed and mems_generation, and for each |
@@ -947,10 +994,8 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
947 | struct mm_struct *mm = mmarray[i]; | 994 | struct mm_struct *mm = mmarray[i]; |
948 | 995 | ||
949 | mpol_rebind_mm(mm, &cs->mems_allowed); | 996 | mpol_rebind_mm(mm, &cs->mems_allowed); |
950 | if (migrate) { | 997 | if (migrate) |
951 | do_migrate_pages(mm, &oldmem, &cs->mems_allowed, | 998 | cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); |
952 | MPOL_MF_MOVE_ALL); | ||
953 | } | ||
954 | mmput(mm); | 999 | mmput(mm); |
955 | } | 1000 | } |
956 | 1001 | ||
@@ -1185,11 +1230,11 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1185 | mm = get_task_mm(tsk); | 1230 | mm = get_task_mm(tsk); |
1186 | if (mm) { | 1231 | if (mm) { |
1187 | mpol_rebind_mm(mm, &to); | 1232 | mpol_rebind_mm(mm, &to); |
1233 | if (is_memory_migrate(cs)) | ||
1234 | cpuset_migrate_mm(mm, &from, &to); | ||
1188 | mmput(mm); | 1235 | mmput(mm); |
1189 | } | 1236 | } |
1190 | 1237 | ||
1191 | if (is_memory_migrate(cs)) | ||
1192 | do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); | ||
1193 | put_task_struct(tsk); | 1238 | put_task_struct(tsk); |
1194 | synchronize_rcu(); | 1239 | synchronize_rcu(); |
1195 | if (atomic_dec_and_test(&oldcs->count)) | 1240 | if (atomic_dec_and_test(&oldcs->count)) |