diff options
author | Mel Gorman <mgorman@suse.de> | 2013-10-07 06:29:29 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-09 08:48:02 -0400 |
commit | 7dbd13ed06513b047216a7ffc718bad9df0660f1 (patch) | |
tree | 6f752254b51954b129fa3a35f44968342060afdb | |
parent | 82727018b0d33d188e9916bcf76f18387484cb04 (diff) |
sched/numa: Prevent parallel updates to group stats during placement
Having multiple tasks in a group go through task_numa_placement
simultaneously can lead to a task picking a wrong node to run on, because
the group stats may be in the middle of an update. This patch avoids
parallel updates by holding the numa_group lock during placement
decisions.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-52-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/sched/fair.c | 35 |
1 files changed, 23 insertions, 12 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c4df2de6ca4a..147349987bfe 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1233,6 +1233,7 @@ static void task_numa_placement(struct task_struct *p) | |||
1233 | { | 1233 | { |
1234 | int seq, nid, max_nid = -1, max_group_nid = -1; | 1234 | int seq, nid, max_nid = -1, max_group_nid = -1; |
1235 | unsigned long max_faults = 0, max_group_faults = 0; | 1235 | unsigned long max_faults = 0, max_group_faults = 0; |
1236 | spinlock_t *group_lock = NULL; | ||
1236 | 1237 | ||
1237 | seq = ACCESS_ONCE(p->mm->numa_scan_seq); | 1238 | seq = ACCESS_ONCE(p->mm->numa_scan_seq); |
1238 | if (p->numa_scan_seq == seq) | 1239 | if (p->numa_scan_seq == seq) |
@@ -1241,6 +1242,12 @@ static void task_numa_placement(struct task_struct *p) | |||
1241 | p->numa_migrate_seq++; | 1242 | p->numa_migrate_seq++; |
1242 | p->numa_scan_period_max = task_scan_max(p); | 1243 | p->numa_scan_period_max = task_scan_max(p); |
1243 | 1244 | ||
1245 | /* If the task is part of a group prevent parallel updates to group stats */ | ||
1246 | if (p->numa_group) { | ||
1247 | group_lock = &p->numa_group->lock; | ||
1248 | spin_lock(group_lock); | ||
1249 | } | ||
1250 | |||
1244 | /* Find the node with the highest number of faults */ | 1251 | /* Find the node with the highest number of faults */ |
1245 | for_each_online_node(nid) { | 1252 | for_each_online_node(nid) { |
1246 | unsigned long faults = 0, group_faults = 0; | 1253 | unsigned long faults = 0, group_faults = 0; |
@@ -1279,20 +1286,24 @@ static void task_numa_placement(struct task_struct *p) | |||
1279 | } | 1286 | } |
1280 | } | 1287 | } |
1281 | 1288 | ||
1282 | /* | 1289 | if (p->numa_group) { |
1283 | * If the preferred task and group nids are different, | 1290 | /* |
1284 | * iterate over the nodes again to find the best place. | 1291 | * If the preferred task and group nids are different, |
1285 | */ | 1292 | * iterate over the nodes again to find the best place. |
1286 | if (p->numa_group && max_nid != max_group_nid) { | 1293 | */ |
1287 | unsigned long weight, max_weight = 0; | 1294 | if (max_nid != max_group_nid) { |
1288 | 1295 | unsigned long weight, max_weight = 0; | |
1289 | for_each_online_node(nid) { | 1296 | |
1290 | weight = task_weight(p, nid) + group_weight(p, nid); | 1297 | for_each_online_node(nid) { |
1291 | if (weight > max_weight) { | 1298 | weight = task_weight(p, nid) + group_weight(p, nid); |
1292 | max_weight = weight; | 1299 | if (weight > max_weight) { |
1293 | max_nid = nid; | 1300 | max_weight = weight; |
1301 | max_nid = nid; | ||
1302 | } | ||
1294 | } | 1303 | } |
1295 | } | 1304 | } |
1305 | |||
1306 | spin_unlock(group_lock); | ||
1296 | } | 1307 | } |
1297 | 1308 | ||
1298 | /* Preferred node as the node with the most faults */ | 1309 | /* Preferred node as the node with the most faults */ |