diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2009-03-19 00:52:20 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-03-19 03:15:15 -0400 |
commit | df7c8e845e8e2030e8ae947e0ace56d184d0e9a0 (patch) | |
tree | 15f68e17f2efaf853ffc8f3ba03bca37d847a32b /kernel/sched.c | |
parent | c38da5692e3a4d5d303c04cbf7e526f1eb761076 (diff) |
cpumask: remove cpumask allocation from idle_balance
Impact: fix circular locking
Steven reports a circular locking from alloc_cpumask_var doing
a wakeup. We get rid of this using the tried-and-true technique
of using a per-cpu cpumask_var_t rather than doing an alloc
every time.
Simpler and more robust than a rare, implicit allocation within
an atomic codepath.
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <alpine.DEB.2.00.0903181729360.31583@gandalf.stny.rr.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 35 |
1 files changed, 18 insertions, 17 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 5dabd80c3c1..48862d418be 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3448,19 +3448,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, | |||
3448 | */ | 3448 | */ |
3449 | #define MAX_PINNED_INTERVAL 512 | 3449 | #define MAX_PINNED_INTERVAL 512 |
3450 | 3450 | ||
3451 | /* Working cpumask for load_balance and load_balance_newidle. */ | ||
3452 | static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); | ||
3453 | |||
3451 | /* | 3454 | /* |
3452 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 3455 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
3453 | * tasks if there is an imbalance. | 3456 | * tasks if there is an imbalance. |
3454 | */ | 3457 | */ |
3455 | static int load_balance(int this_cpu, struct rq *this_rq, | 3458 | static int load_balance(int this_cpu, struct rq *this_rq, |
3456 | struct sched_domain *sd, enum cpu_idle_type idle, | 3459 | struct sched_domain *sd, enum cpu_idle_type idle, |
3457 | int *balance, struct cpumask *cpus) | 3460 | int *balance) |
3458 | { | 3461 | { |
3459 | int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 3462 | int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; |
3460 | struct sched_group *group; | 3463 | struct sched_group *group; |
3461 | unsigned long imbalance; | 3464 | unsigned long imbalance; |
3462 | struct rq *busiest; | 3465 | struct rq *busiest; |
3463 | unsigned long flags; | 3466 | unsigned long flags; |
3467 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | ||
3464 | 3468 | ||
3465 | cpumask_setall(cpus); | 3469 | cpumask_setall(cpus); |
3466 | 3470 | ||
@@ -3615,8 +3619,7 @@ out: | |||
3615 | * this_rq is locked. | 3619 | * this_rq is locked. |
3616 | */ | 3620 | */ |
3617 | static int | 3621 | static int |
3618 | load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, | 3622 | load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) |
3619 | struct cpumask *cpus) | ||
3620 | { | 3623 | { |
3621 | struct sched_group *group; | 3624 | struct sched_group *group; |
3622 | struct rq *busiest = NULL; | 3625 | struct rq *busiest = NULL; |
@@ -3624,6 +3627,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, | |||
3624 | int ld_moved = 0; | 3627 | int ld_moved = 0; |
3625 | int sd_idle = 0; | 3628 | int sd_idle = 0; |
3626 | int all_pinned = 0; | 3629 | int all_pinned = 0; |
3630 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | ||
3627 | 3631 | ||
3628 | cpumask_setall(cpus); | 3632 | cpumask_setall(cpus); |
3629 | 3633 | ||
@@ -3764,10 +3768,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3764 | struct sched_domain *sd; | 3768 | struct sched_domain *sd; |
3765 | int pulled_task = 0; | 3769 | int pulled_task = 0; |
3766 | unsigned long next_balance = jiffies + HZ; | 3770 | unsigned long next_balance = jiffies + HZ; |
3767 | cpumask_var_t tmpmask; | ||
3768 | |||
3769 | if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC)) | ||
3770 | return; | ||
3771 | 3771 | ||
3772 | for_each_domain(this_cpu, sd) { | 3772 | for_each_domain(this_cpu, sd) { |
3773 | unsigned long interval; | 3773 | unsigned long interval; |
@@ -3778,7 +3778,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3778 | if (sd->flags & SD_BALANCE_NEWIDLE) | 3778 | if (sd->flags & SD_BALANCE_NEWIDLE) |
3779 | /* If we've pulled tasks over stop searching: */ | 3779 | /* If we've pulled tasks over stop searching: */ |
3780 | pulled_task = load_balance_newidle(this_cpu, this_rq, | 3780 | pulled_task = load_balance_newidle(this_cpu, this_rq, |
3781 | sd, tmpmask); | 3781 | sd); |
3782 | 3782 | ||
3783 | interval = msecs_to_jiffies(sd->balance_interval); | 3783 | interval = msecs_to_jiffies(sd->balance_interval); |
3784 | if (time_after(next_balance, sd->last_balance + interval)) | 3784 | if (time_after(next_balance, sd->last_balance + interval)) |
@@ -3793,7 +3793,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3793 | */ | 3793 | */ |
3794 | this_rq->next_balance = next_balance; | 3794 | this_rq->next_balance = next_balance; |
3795 | } | 3795 | } |
3796 | free_cpumask_var(tmpmask); | ||
3797 | } | 3796 | } |
3798 | 3797 | ||
3799 | /* | 3798 | /* |
@@ -3943,11 +3942,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3943 | unsigned long next_balance = jiffies + 60*HZ; | 3942 | unsigned long next_balance = jiffies + 60*HZ; |
3944 | int update_next_balance = 0; | 3943 | int update_next_balance = 0; |
3945 | int need_serialize; | 3944 | int need_serialize; |
3946 | cpumask_var_t tmp; | ||
3947 | |||
3948 | /* Fails alloc? Rebalancing probably not a priority right now. */ | ||
3949 | if (!alloc_cpumask_var(&tmp, GFP_ATOMIC)) | ||
3950 | return; | ||
3951 | 3945 | ||
3952 | for_each_domain(cpu, sd) { | 3946 | for_each_domain(cpu, sd) { |
3953 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3947 | if (!(sd->flags & SD_LOAD_BALANCE)) |
@@ -3972,7 +3966,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3972 | } | 3966 | } |
3973 | 3967 | ||
3974 | if (time_after_eq(jiffies, sd->last_balance + interval)) { | 3968 | if (time_after_eq(jiffies, sd->last_balance + interval)) { |
3975 | if (load_balance(cpu, rq, sd, idle, &balance, tmp)) { | 3969 | if (load_balance(cpu, rq, sd, idle, &balance)) { |
3976 | /* | 3970 | /* |
3977 | * We've pulled tasks over so either we're no | 3971 | * We've pulled tasks over so either we're no |
3978 | * longer idle, or one of our SMT siblings is | 3972 | * longer idle, or one of our SMT siblings is |
@@ -4006,8 +4000,6 @@ out: | |||
4006 | */ | 4000 | */ |
4007 | if (likely(update_next_balance)) | 4001 | if (likely(update_next_balance)) |
4008 | rq->next_balance = next_balance; | 4002 | rq->next_balance = next_balance; |
4009 | |||
4010 | free_cpumask_var(tmp); | ||
4011 | } | 4003 | } |
4012 | 4004 | ||
4013 | /* | 4005 | /* |
@@ -8304,6 +8296,9 @@ void __init sched_init(void) | |||
8304 | #ifdef CONFIG_USER_SCHED | 8296 | #ifdef CONFIG_USER_SCHED |
8305 | alloc_size *= 2; | 8297 | alloc_size *= 2; |
8306 | #endif | 8298 | #endif |
8299 | #ifdef CONFIG_CPUMASK_OFFSTACK | ||
8300 | alloc_size *= num_possible_cpus() * cpumask_size(); | ||
8301 | #endif | ||
8307 | /* | 8302 | /* |
8308 | * As sched_init() is called before page_alloc is setup, | 8303 | * As sched_init() is called before page_alloc is setup, |
8309 | * we use alloc_bootmem(). | 8304 | * we use alloc_bootmem(). |
@@ -8341,6 +8336,12 @@ void __init sched_init(void) | |||
8341 | ptr += nr_cpu_ids * sizeof(void **); | 8336 | ptr += nr_cpu_ids * sizeof(void **); |
8342 | #endif /* CONFIG_USER_SCHED */ | 8337 | #endif /* CONFIG_USER_SCHED */ |
8343 | #endif /* CONFIG_RT_GROUP_SCHED */ | 8338 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8339 | #ifdef CONFIG_CPUMASK_OFFSTACK | ||
8340 | for_each_possible_cpu(i) { | ||
8341 | per_cpu(load_balance_tmpmask, i) = (void *)ptr; | ||
8342 | ptr += cpumask_size(); | ||
8343 | } | ||
8344 | #endif /* CONFIG_CPUMASK_OFFSTACK */ | ||
8344 | } | 8345 | } |
8345 | 8346 | ||
8346 | #ifdef CONFIG_SMP | 8347 | #ifdef CONFIG_SMP |