aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-01-08 04:02:00 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-08 23:13:44 -0500
commit04c19fa6f16047abff2288ddbc1f0798ede5a849 (patch)
tree4c4f109d919042b300ac907a8fde64b822faa7aa
parent4225399a66b315d4d1fb1cb61b75dda201c832e3 (diff)
[PATCH] cpuset: migrate all tasks in cpuset at once
Given the mechanism in the previous patch to handle rebinding the per-vma mempolicies of all tasks in a cpuset that changes its memory placement, it is now easier to handle the page migration requirements of such tasks at the same time. The previous code didn't actually attempt to migrate the pages of the tasks in a cpuset whose memory placement changed until the next time each such task tried to allocate memory. This was undesirable, as users invoking memory page migration exected to happen when the placement changed, not some unspecified time later when the task needed more memory. It is now trivial to handle the page migration at the same time as the per-vma rebinding is done. The routine cpuset.c:update_nodemask(), which handles changing a cpusets memory placement ('mems') now checks for the special case of being asked to write a placement that is the same as before. It was harmless enough before to just recompute everything again, even though nothing had changed. But page migration is a heavy weight operation - moving pages about. So now it is worth avoiding that if asked to move a cpuset to its current location. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--kernel/cpuset.c29
1 files changed, 16 insertions, 13 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 19f87565be17..cf8203a5fa71 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -639,25 +639,14 @@ void cpuset_update_task_memory_state()
639 task_unlock(tsk); 639 task_unlock(tsk);
640 640
641 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { 641 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
642 nodemask_t oldmem = tsk->mems_allowed;
643 int migrate;
644
645 down(&callback_sem); 642 down(&callback_sem);
646 task_lock(tsk); 643 task_lock(tsk);
647 cs = tsk->cpuset; /* Maybe changed when task not locked */ 644 cs = tsk->cpuset; /* Maybe changed when task not locked */
648 migrate = is_memory_migrate(cs);
649 guarantee_online_mems(cs, &tsk->mems_allowed); 645 guarantee_online_mems(cs, &tsk->mems_allowed);
650 tsk->cpuset_mems_generation = cs->mems_generation; 646 tsk->cpuset_mems_generation = cs->mems_generation;
651 task_unlock(tsk); 647 task_unlock(tsk);
652 up(&callback_sem); 648 up(&callback_sem);
653 mpol_rebind_task(tsk, &tsk->mems_allowed); 649 mpol_rebind_task(tsk, &tsk->mems_allowed);
654 if (!nodes_equal(oldmem, tsk->mems_allowed)) {
655 if (migrate) {
656 do_migrate_pages(tsk->mm, &oldmem,
657 &tsk->mems_allowed,
658 MPOL_MF_MOVE_ALL);
659 }
660 }
661 } 650 }
662} 651}
663 652
@@ -815,7 +804,9 @@ static int update_cpumask(struct cpuset *cs, char *buf)
815 * Handle user request to change the 'mems' memory placement 804 * Handle user request to change the 'mems' memory placement
816 * of a cpuset. Needs to validate the request, update the 805 * of a cpuset. Needs to validate the request, update the
817 * cpusets mems_allowed and mems_generation, and for each 806 * cpusets mems_allowed and mems_generation, and for each
818 * task in the cpuset, rebind any vma mempolicies. 807 * task in the cpuset, rebind any vma mempolicies and if
808 * the cpuset is marked 'memory_migrate', migrate the tasks
809 * pages to the new memory.
819 * 810 *
820 * Call with manage_sem held. May take callback_sem during call. 811 * Call with manage_sem held. May take callback_sem during call.
821 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, 812 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
@@ -826,9 +817,11 @@ static int update_cpumask(struct cpuset *cs, char *buf)
826static int update_nodemask(struct cpuset *cs, char *buf) 817static int update_nodemask(struct cpuset *cs, char *buf)
827{ 818{
828 struct cpuset trialcs; 819 struct cpuset trialcs;
820 nodemask_t oldmem;
829 struct task_struct *g, *p; 821 struct task_struct *g, *p;
830 struct mm_struct **mmarray; 822 struct mm_struct **mmarray;
831 int i, n, ntasks; 823 int i, n, ntasks;
824 int migrate;
832 int fudge; 825 int fudge;
833 int retval; 826 int retval;
834 827
@@ -837,6 +830,11 @@ static int update_nodemask(struct cpuset *cs, char *buf)
837 if (retval < 0) 830 if (retval < 0)
838 goto done; 831 goto done;
839 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); 832 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map);
833 oldmem = cs->mems_allowed;
834 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
835 retval = 0; /* Too easy - nothing to do */
836 goto done;
837 }
840 if (nodes_empty(trialcs.mems_allowed)) { 838 if (nodes_empty(trialcs.mems_allowed)) {
841 retval = -ENOSPC; 839 retval = -ENOSPC;
842 goto done; 840 goto done;
@@ -908,12 +906,17 @@ static int update_nodemask(struct cpuset *cs, char *buf)
908 * cpuset manage_sem, we know that no other rebind effort will 906 * cpuset manage_sem, we know that no other rebind effort will
909 * be contending for the global variable cpuset_being_rebound. 907 * be contending for the global variable cpuset_being_rebound.
910 * It's ok if we rebind the same mm twice; mpol_rebind_mm() 908 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
911 * is idempotent. 909 * is idempotent. Also migrate pages in each mm to new nodes.
912 */ 910 */
911 migrate = is_memory_migrate(cs);
913 for (i = 0; i < n; i++) { 912 for (i = 0; i < n; i++) {
914 struct mm_struct *mm = mmarray[i]; 913 struct mm_struct *mm = mmarray[i];
915 914
916 mpol_rebind_mm(mm, &cs->mems_allowed); 915 mpol_rebind_mm(mm, &cs->mems_allowed);
916 if (migrate) {
917 do_migrate_pages(mm, &oldmem, &cs->mems_allowed,
918 MPOL_MF_MOVE_ALL);
919 }
917 mmput(mm); 920 mmput(mm);
918 } 921 }
919 922