aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2009-03-19 00:52:20 -0400
committerIngo Molnar <mingo@elte.hu>2009-03-19 03:15:15 -0400
commitdf7c8e845e8e2030e8ae947e0ace56d184d0e9a0 (patch)
tree15f68e17f2efaf853ffc8f3ba03bca37d847a32b /kernel
parentc38da5692e3a4d5d303c04cbf7e526f1eb761076 (diff)
cpumask: remove cpumask allocation from idle_balance
Impact: fix circular locking Steven reports a circular locking from alloc_cpumask_var doing a wakeup. We get rid of this using the tried-and-true technique of using a per-cpu cpumask_var_t rather than doing an alloc every time. Simpler and more robust than a rare, implicit allocation within an atomic codepath. Reported-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> LKML-Reference: <alpine.DEB.2.00.0903181729360.31583@gandalf.stny.rr.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c35
1 files changed, 18 insertions, 17 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 5dabd80c3c1..48862d418be 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3448,19 +3448,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3448 */ 3448 */
3449#define MAX_PINNED_INTERVAL 512 3449#define MAX_PINNED_INTERVAL 512
3450 3450
3451/* Working cpumask for load_balance and load_balance_newidle. */
3452static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
3453
3451/* 3454/*
3452 * Check this_cpu to ensure it is balanced within domain. Attempt to move 3455 * Check this_cpu to ensure it is balanced within domain. Attempt to move
3453 * tasks if there is an imbalance. 3456 * tasks if there is an imbalance.
3454 */ 3457 */
3455static int load_balance(int this_cpu, struct rq *this_rq, 3458static int load_balance(int this_cpu, struct rq *this_rq,
3456 struct sched_domain *sd, enum cpu_idle_type idle, 3459 struct sched_domain *sd, enum cpu_idle_type idle,
3457 int *balance, struct cpumask *cpus) 3460 int *balance)
3458{ 3461{
3459 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 3462 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
3460 struct sched_group *group; 3463 struct sched_group *group;
3461 unsigned long imbalance; 3464 unsigned long imbalance;
3462 struct rq *busiest; 3465 struct rq *busiest;
3463 unsigned long flags; 3466 unsigned long flags;
3467 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
3464 3468
3465 cpumask_setall(cpus); 3469 cpumask_setall(cpus);
3466 3470
@@ -3615,8 +3619,7 @@ out:
3615 * this_rq is locked. 3619 * this_rq is locked.
3616 */ 3620 */
3617static int 3621static int
3618load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, 3622load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
3619 struct cpumask *cpus)
3620{ 3623{
3621 struct sched_group *group; 3624 struct sched_group *group;
3622 struct rq *busiest = NULL; 3625 struct rq *busiest = NULL;
@@ -3624,6 +3627,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3624 int ld_moved = 0; 3627 int ld_moved = 0;
3625 int sd_idle = 0; 3628 int sd_idle = 0;
3626 int all_pinned = 0; 3629 int all_pinned = 0;
3630 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
3627 3631
3628 cpumask_setall(cpus); 3632 cpumask_setall(cpus);
3629 3633
@@ -3764,10 +3768,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3764 struct sched_domain *sd; 3768 struct sched_domain *sd;
3765 int pulled_task = 0; 3769 int pulled_task = 0;
3766 unsigned long next_balance = jiffies + HZ; 3770 unsigned long next_balance = jiffies + HZ;
3767 cpumask_var_t tmpmask;
3768
3769 if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
3770 return;
3771 3771
3772 for_each_domain(this_cpu, sd) { 3772 for_each_domain(this_cpu, sd) {
3773 unsigned long interval; 3773 unsigned long interval;
@@ -3778,7 +3778,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3778 if (sd->flags & SD_BALANCE_NEWIDLE) 3778 if (sd->flags & SD_BALANCE_NEWIDLE)
3779 /* If we've pulled tasks over stop searching: */ 3779 /* If we've pulled tasks over stop searching: */
3780 pulled_task = load_balance_newidle(this_cpu, this_rq, 3780 pulled_task = load_balance_newidle(this_cpu, this_rq,
3781 sd, tmpmask); 3781 sd);
3782 3782
3783 interval = msecs_to_jiffies(sd->balance_interval); 3783 interval = msecs_to_jiffies(sd->balance_interval);
3784 if (time_after(next_balance, sd->last_balance + interval)) 3784 if (time_after(next_balance, sd->last_balance + interval))
@@ -3793,7 +3793,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3793 */ 3793 */
3794 this_rq->next_balance = next_balance; 3794 this_rq->next_balance = next_balance;
3795 } 3795 }
3796 free_cpumask_var(tmpmask);
3797} 3796}
3798 3797
3799/* 3798/*
@@ -3943,11 +3942,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3943 unsigned long next_balance = jiffies + 60*HZ; 3942 unsigned long next_balance = jiffies + 60*HZ;
3944 int update_next_balance = 0; 3943 int update_next_balance = 0;
3945 int need_serialize; 3944 int need_serialize;
3946 cpumask_var_t tmp;
3947
3948 /* Fails alloc? Rebalancing probably not a priority right now. */
3949 if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
3950 return;
3951 3945
3952 for_each_domain(cpu, sd) { 3946 for_each_domain(cpu, sd) {
3953 if (!(sd->flags & SD_LOAD_BALANCE)) 3947 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3972,7 +3966,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3972 } 3966 }
3973 3967
3974 if (time_after_eq(jiffies, sd->last_balance + interval)) { 3968 if (time_after_eq(jiffies, sd->last_balance + interval)) {
3975 if (load_balance(cpu, rq, sd, idle, &balance, tmp)) { 3969 if (load_balance(cpu, rq, sd, idle, &balance)) {
3976 /* 3970 /*
3977 * We've pulled tasks over so either we're no 3971 * We've pulled tasks over so either we're no
3978 * longer idle, or one of our SMT siblings is 3972 * longer idle, or one of our SMT siblings is
@@ -4006,8 +4000,6 @@ out:
4006 */ 4000 */
4007 if (likely(update_next_balance)) 4001 if (likely(update_next_balance))
4008 rq->next_balance = next_balance; 4002 rq->next_balance = next_balance;
4009
4010 free_cpumask_var(tmp);
4011} 4003}
4012 4004
4013/* 4005/*
@@ -8304,6 +8296,9 @@ void __init sched_init(void)
8304#ifdef CONFIG_USER_SCHED 8296#ifdef CONFIG_USER_SCHED
8305 alloc_size *= 2; 8297 alloc_size *= 2;
8306#endif 8298#endif
8299#ifdef CONFIG_CPUMASK_OFFSTACK
8300 alloc_size *= num_possible_cpus() * cpumask_size();
8301#endif
8307 /* 8302 /*
8308 * As sched_init() is called before page_alloc is setup, 8303 * As sched_init() is called before page_alloc is setup,
8309 * we use alloc_bootmem(). 8304 * we use alloc_bootmem().
@@ -8341,6 +8336,12 @@ void __init sched_init(void)
8341 ptr += nr_cpu_ids * sizeof(void **); 8336 ptr += nr_cpu_ids * sizeof(void **);
8342#endif /* CONFIG_USER_SCHED */ 8337#endif /* CONFIG_USER_SCHED */
8343#endif /* CONFIG_RT_GROUP_SCHED */ 8338#endif /* CONFIG_RT_GROUP_SCHED */
8339#ifdef CONFIG_CPUMASK_OFFSTACK
8340 for_each_possible_cpu(i) {
8341 per_cpu(load_balance_tmpmask, i) = (void *)ptr;
8342 ptr += cpumask_size();
8343 }
8344#endif /* CONFIG_CPUMASK_OFFSTACK */
8344 } 8345 }
8345 8346
8346#ifdef CONFIG_SMP 8347#ifdef CONFIG_SMP