aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorLi Zefan <lizf@cn.fujitsu.com>2009-04-02 19:57:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-02 22:04:57 -0400
commit3b6766fe668b83c8a03c6ed01bcc2ac77cbae848 (patch)
tree8b109576301d849406f080c61f4ce1809556ad0b /kernel/cpuset.c
parentbd1a8ab73edd449fecda633449cc277b856ad4f5 (diff)
cpuset: rewrite update_tasks_nodemask()
This patch uses cgroup_scan_tasks() to rebind tasks' vmas to new cpuset's mems_allowed. Not only simplify the code largely, but also avoid allocating an array to hold mm pointers of all the tasks in the cpuset. This array can be big (size > PAGESIZE) if we have lots of tasks in that cpuset, thus has a chance to fail the allocation when under memory stress. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c109
1 files changed, 39 insertions, 70 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 31737957cb62..dca455e0482e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1026,6 +1026,31 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
1026 mutex_unlock(&callback_mutex); 1026 mutex_unlock(&callback_mutex);
1027} 1027}
1028 1028
1029/*
1030 * Rebind task's vmas to cpuset's new mems_allowed, and migrate pages to new
1031 * nodes if memory_migrate flag is set. Called with cgroup_mutex held.
1032 */
1033static void cpuset_change_nodemask(struct task_struct *p,
1034 struct cgroup_scanner *scan)
1035{
1036 struct mm_struct *mm;
1037 struct cpuset *cs;
1038 int migrate;
1039 const nodemask_t *oldmem = scan->data;
1040
1041 mm = get_task_mm(p);
1042 if (!mm)
1043 return;
1044
1045 cs = cgroup_cs(scan->cg);
1046 migrate = is_memory_migrate(cs);
1047
1048 mpol_rebind_mm(mm, &cs->mems_allowed);
1049 if (migrate)
1050 cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
1051 mmput(mm);
1052}
1053
1029static void *cpuset_being_rebound; 1054static void *cpuset_being_rebound;
1030 1055
1031/** 1056/**
@@ -1038,88 +1063,32 @@ static void *cpuset_being_rebound;
1038 */ 1063 */
1039static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem) 1064static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
1040{ 1065{
1041 struct task_struct *p;
1042 struct mm_struct **mmarray;
1043 int i, n, ntasks;
1044 int migrate;
1045 int fudge;
1046 struct cgroup_iter it;
1047 int retval; 1066 int retval;
1067 struct cgroup_scanner scan;
1048 1068
1049 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ 1069 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
1050 1070
1051 fudge = 10; /* spare mmarray[] slots */ 1071 scan.cg = cs->css.cgroup;
1052 fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */ 1072 scan.test_task = NULL;
1053 retval = -ENOMEM; 1073 scan.process_task = cpuset_change_nodemask;
1054 1074 scan.heap = NULL;
1055 /* 1075 scan.data = (nodemask_t *)oldmem;
1056 * Allocate mmarray[] to hold mm reference for each task
1057 * in cpuset cs. Can't kmalloc GFP_KERNEL while holding
1058 * tasklist_lock. We could use GFP_ATOMIC, but with a
1059 * few more lines of code, we can retry until we get a big
1060 * enough mmarray[] w/o using GFP_ATOMIC.
1061 */
1062 while (1) {
1063 ntasks = cgroup_task_count(cs->css.cgroup); /* guess */
1064 ntasks += fudge;
1065 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
1066 if (!mmarray)
1067 goto done;
1068 read_lock(&tasklist_lock); /* block fork */
1069 if (cgroup_task_count(cs->css.cgroup) <= ntasks)
1070 break; /* got enough */
1071 read_unlock(&tasklist_lock); /* try again */
1072 kfree(mmarray);
1073 }
1074
1075 n = 0;
1076
1077 /* Load up mmarray[] with mm reference for each task in cpuset. */
1078 cgroup_iter_start(cs->css.cgroup, &it);
1079 while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
1080 struct mm_struct *mm;
1081
1082 if (n >= ntasks) {
1083 printk(KERN_WARNING
1084 "Cpuset mempolicy rebind incomplete.\n");
1085 break;
1086 }
1087 mm = get_task_mm(p);
1088 if (!mm)
1089 continue;
1090 mmarray[n++] = mm;
1091 }
1092 cgroup_iter_end(cs->css.cgroup, &it);
1093 read_unlock(&tasklist_lock);
1094 1076
1095 /* 1077 /*
1096 * Now that we've dropped the tasklist spinlock, we can 1078 * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
1097 * rebind the vma mempolicies of each mm in mmarray[] to their 1079 * take while holding tasklist_lock. Forks can happen - the
1098 * new cpuset, and release that mm. The mpol_rebind_mm() 1080 * mpol_dup() cpuset_being_rebound check will catch such forks,
1099 * call takes mmap_sem, which we couldn't take while holding 1081 * and rebind their vma mempolicies too. Because we still hold
1100 * tasklist_lock. Forks can happen again now - the mpol_dup() 1082 * the global cgroup_mutex, we know that no other rebind effort
1101 * cpuset_being_rebound check will catch such forks, and rebind 1083 * will be contending for the global variable cpuset_being_rebound.
1102 * their vma mempolicies too. Because we still hold the global
1103 * cgroup_mutex, we know that no other rebind effort will
1104 * be contending for the global variable cpuset_being_rebound.
1105 * It's ok if we rebind the same mm twice; mpol_rebind_mm() 1084 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
1106 * is idempotent. Also migrate pages in each mm to new nodes. 1085 * is idempotent. Also migrate pages in each mm to new nodes.
1107 */ 1086 */
1108 migrate = is_memory_migrate(cs); 1087 retval = cgroup_scan_tasks(&scan);
1109 for (i = 0; i < n; i++) {
1110 struct mm_struct *mm = mmarray[i];
1111
1112 mpol_rebind_mm(mm, &cs->mems_allowed);
1113 if (migrate)
1114 cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
1115 mmput(mm);
1116 }
1117 1088
1118 /* We're done rebinding vmas to this cpuset's new mems_allowed. */ 1089 /* We're done rebinding vmas to this cpuset's new mems_allowed. */
1119 kfree(mmarray);
1120 cpuset_being_rebound = NULL; 1090 cpuset_being_rebound = NULL;
1121 retval = 0; 1091
1122done:
1123 return retval; 1092 return retval;
1124} 1093}
1125 1094