diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutree.c | 26 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 68 | ||||
-rw-r--r-- | kernel/sched.c | 236 | ||||
-rw-r--r-- | kernel/sched_fair.c | 46 | ||||
-rw-r--r-- | kernel/sched_features.h | 2 | ||||
-rw-r--r-- | kernel/signal.c | 19 | ||||
-rw-r--r-- | kernel/softirq.c | 12 |
7 files changed, 319 insertions, 90 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7e59ffb3d0b..ba06207b1dd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -84,9 +84,32 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | |||
84 | 84 | ||
85 | static struct rcu_state *rcu_state; | 85 | static struct rcu_state *rcu_state; |
86 | 86 | ||
87 | /* | ||
88 | * The rcu_scheduler_active variable transitions from zero to one just | ||
89 | * before the first task is spawned. So when this variable is zero, RCU | ||
90 | * can assume that there is but one task, allowing RCU to (for example) | ||
91 | * optimized synchronize_sched() to a simple barrier(). When this variable | ||
92 | * is one, RCU must actually do all the hard work required to detect real | ||
93 | * grace periods. This variable is also used to suppress boot-time false | ||
94 | * positives from lockdep-RCU error checking. | ||
95 | */ | ||
87 | int rcu_scheduler_active __read_mostly; | 96 | int rcu_scheduler_active __read_mostly; |
88 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | 97 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); |
89 | 98 | ||
99 | /* | ||
100 | * The rcu_scheduler_fully_active variable transitions from zero to one | ||
101 | * during the early_initcall() processing, which is after the scheduler | ||
102 | * is capable of creating new tasks. So RCU processing (for example, | ||
103 | * creating tasks for RCU priority boosting) must be delayed until after | ||
104 | * rcu_scheduler_fully_active transitions from zero to one. We also | ||
105 | * currently delay invocation of any RCU callbacks until after this point. | ||
106 | * | ||
107 | * It might later prove better for people registering RCU callbacks during | ||
108 | * early boot to take responsibility for these callbacks, but one step at | ||
109 | * a time. | ||
110 | */ | ||
111 | static int rcu_scheduler_fully_active __read_mostly; | ||
112 | |||
90 | #ifdef CONFIG_RCU_BOOST | 113 | #ifdef CONFIG_RCU_BOOST |
91 | 114 | ||
92 | /* | 115 | /* |
@@ -98,7 +121,6 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |||
98 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); | 121 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); |
99 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | 122 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
100 | DEFINE_PER_CPU(char, rcu_cpu_has_work); | 123 | DEFINE_PER_CPU(char, rcu_cpu_has_work); |
101 | static char rcu_kthreads_spawnable; | ||
102 | 124 | ||
103 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 125 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
104 | 126 | ||
@@ -1467,6 +1489,8 @@ static void rcu_process_callbacks(struct softirq_action *unused) | |||
1467 | */ | 1489 | */ |
1468 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1490 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
1469 | { | 1491 | { |
1492 | if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active))) | ||
1493 | return; | ||
1470 | if (likely(!rsp->boost)) { | 1494 | if (likely(!rsp->boost)) { |
1471 | rcu_do_batch(rsp, rdp); | 1495 | rcu_do_batch(rsp, rdp); |
1472 | return; | 1496 | return; |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 14dc7dd0090..8aafbb80b8b 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -68,6 +68,7 @@ struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | |||
68 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 68 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
69 | static struct rcu_state *rcu_state = &rcu_preempt_state; | 69 | static struct rcu_state *rcu_state = &rcu_preempt_state; |
70 | 70 | ||
71 | static void rcu_read_unlock_special(struct task_struct *t); | ||
71 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | 72 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); |
72 | 73 | ||
73 | /* | 74 | /* |
@@ -147,7 +148,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
147 | struct rcu_data *rdp; | 148 | struct rcu_data *rdp; |
148 | struct rcu_node *rnp; | 149 | struct rcu_node *rnp; |
149 | 150 | ||
150 | if (t->rcu_read_lock_nesting && | 151 | if (t->rcu_read_lock_nesting > 0 && |
151 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 152 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
152 | 153 | ||
153 | /* Possibly blocking in an RCU read-side critical section. */ | 154 | /* Possibly blocking in an RCU read-side critical section. */ |
@@ -190,6 +191,14 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
190 | rnp->gp_tasks = &t->rcu_node_entry; | 191 | rnp->gp_tasks = &t->rcu_node_entry; |
191 | } | 192 | } |
192 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 193 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
194 | } else if (t->rcu_read_lock_nesting < 0 && | ||
195 | t->rcu_read_unlock_special) { | ||
196 | |||
197 | /* | ||
198 | * Complete exit from RCU read-side critical section on | ||
199 | * behalf of preempted instance of __rcu_read_unlock(). | ||
200 | */ | ||
201 | rcu_read_unlock_special(t); | ||
193 | } | 202 | } |
194 | 203 | ||
195 | /* | 204 | /* |
@@ -284,7 +293,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t, | |||
284 | * notify RCU core processing or task having blocked during the RCU | 293 | * notify RCU core processing or task having blocked during the RCU |
285 | * read-side critical section. | 294 | * read-side critical section. |
286 | */ | 295 | */ |
287 | static void rcu_read_unlock_special(struct task_struct *t) | 296 | static noinline void rcu_read_unlock_special(struct task_struct *t) |
288 | { | 297 | { |
289 | int empty; | 298 | int empty; |
290 | int empty_exp; | 299 | int empty_exp; |
@@ -309,7 +318,7 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
309 | } | 318 | } |
310 | 319 | ||
311 | /* Hardware IRQ handlers cannot block. */ | 320 | /* Hardware IRQ handlers cannot block. */ |
312 | if (in_irq()) { | 321 | if (in_irq() || in_serving_softirq()) { |
313 | local_irq_restore(flags); | 322 | local_irq_restore(flags); |
314 | return; | 323 | return; |
315 | } | 324 | } |
@@ -342,6 +351,11 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
342 | #ifdef CONFIG_RCU_BOOST | 351 | #ifdef CONFIG_RCU_BOOST |
343 | if (&t->rcu_node_entry == rnp->boost_tasks) | 352 | if (&t->rcu_node_entry == rnp->boost_tasks) |
344 | rnp->boost_tasks = np; | 353 | rnp->boost_tasks = np; |
354 | /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ | ||
355 | if (t->rcu_boosted) { | ||
356 | special |= RCU_READ_UNLOCK_BOOSTED; | ||
357 | t->rcu_boosted = 0; | ||
358 | } | ||
345 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 359 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
346 | t->rcu_blocked_node = NULL; | 360 | t->rcu_blocked_node = NULL; |
347 | 361 | ||
@@ -358,7 +372,6 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
358 | #ifdef CONFIG_RCU_BOOST | 372 | #ifdef CONFIG_RCU_BOOST |
359 | /* Unboost if we were boosted. */ | 373 | /* Unboost if we were boosted. */ |
360 | if (special & RCU_READ_UNLOCK_BOOSTED) { | 374 | if (special & RCU_READ_UNLOCK_BOOSTED) { |
361 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; | ||
362 | rt_mutex_unlock(t->rcu_boost_mutex); | 375 | rt_mutex_unlock(t->rcu_boost_mutex); |
363 | t->rcu_boost_mutex = NULL; | 376 | t->rcu_boost_mutex = NULL; |
364 | } | 377 | } |
@@ -387,13 +400,22 @@ void __rcu_read_unlock(void) | |||
387 | struct task_struct *t = current; | 400 | struct task_struct *t = current; |
388 | 401 | ||
389 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ | 402 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ |
390 | --t->rcu_read_lock_nesting; | 403 | if (t->rcu_read_lock_nesting != 1) |
391 | barrier(); /* decrement before load of ->rcu_read_unlock_special */ | 404 | --t->rcu_read_lock_nesting; |
392 | if (t->rcu_read_lock_nesting == 0 && | 405 | else { |
393 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 406 | t->rcu_read_lock_nesting = INT_MIN; |
394 | rcu_read_unlock_special(t); | 407 | barrier(); /* assign before ->rcu_read_unlock_special load */ |
408 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | ||
409 | rcu_read_unlock_special(t); | ||
410 | barrier(); /* ->rcu_read_unlock_special load before assign */ | ||
411 | t->rcu_read_lock_nesting = 0; | ||
412 | } | ||
395 | #ifdef CONFIG_PROVE_LOCKING | 413 | #ifdef CONFIG_PROVE_LOCKING |
396 | WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); | 414 | { |
415 | int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); | ||
416 | |||
417 | WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); | ||
418 | } | ||
397 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | 419 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ |
398 | } | 420 | } |
399 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | 421 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); |
@@ -589,7 +611,8 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
589 | rcu_preempt_qs(cpu); | 611 | rcu_preempt_qs(cpu); |
590 | return; | 612 | return; |
591 | } | 613 | } |
592 | if (per_cpu(rcu_preempt_data, cpu).qs_pending) | 614 | if (t->rcu_read_lock_nesting > 0 && |
615 | per_cpu(rcu_preempt_data, cpu).qs_pending) | ||
593 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; | 616 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; |
594 | } | 617 | } |
595 | 618 | ||
@@ -695,9 +718,12 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
695 | 718 | ||
696 | raw_spin_lock_irqsave(&rnp->lock, flags); | 719 | raw_spin_lock_irqsave(&rnp->lock, flags); |
697 | for (;;) { | 720 | for (;;) { |
698 | if (!sync_rcu_preempt_exp_done(rnp)) | 721 | if (!sync_rcu_preempt_exp_done(rnp)) { |
722 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
699 | break; | 723 | break; |
724 | } | ||
700 | if (rnp->parent == NULL) { | 725 | if (rnp->parent == NULL) { |
726 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
701 | wake_up(&sync_rcu_preempt_exp_wq); | 727 | wake_up(&sync_rcu_preempt_exp_wq); |
702 | break; | 728 | break; |
703 | } | 729 | } |
@@ -707,7 +733,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
707 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 733 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
708 | rnp->expmask &= ~mask; | 734 | rnp->expmask &= ~mask; |
709 | } | 735 | } |
710 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
711 | } | 736 | } |
712 | 737 | ||
713 | /* | 738 | /* |
@@ -1174,7 +1199,7 @@ static int rcu_boost(struct rcu_node *rnp) | |||
1174 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1199 | t = container_of(tb, struct task_struct, rcu_node_entry); |
1175 | rt_mutex_init_proxy_locked(&mtx, t); | 1200 | rt_mutex_init_proxy_locked(&mtx, t); |
1176 | t->rcu_boost_mutex = &mtx; | 1201 | t->rcu_boost_mutex = &mtx; |
1177 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; | 1202 | t->rcu_boosted = 1; |
1178 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1203 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1179 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ | 1204 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ |
1180 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 1205 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
@@ -1532,7 +1557,7 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | |||
1532 | struct sched_param sp; | 1557 | struct sched_param sp; |
1533 | struct task_struct *t; | 1558 | struct task_struct *t; |
1534 | 1559 | ||
1535 | if (!rcu_kthreads_spawnable || | 1560 | if (!rcu_scheduler_fully_active || |
1536 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) | 1561 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) |
1537 | return 0; | 1562 | return 0; |
1538 | t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); | 1563 | t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); |
@@ -1639,7 +1664,7 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | |||
1639 | struct sched_param sp; | 1664 | struct sched_param sp; |
1640 | struct task_struct *t; | 1665 | struct task_struct *t; |
1641 | 1666 | ||
1642 | if (!rcu_kthreads_spawnable || | 1667 | if (!rcu_scheduler_fully_active || |
1643 | rnp->qsmaskinit == 0) | 1668 | rnp->qsmaskinit == 0) |
1644 | return 0; | 1669 | return 0; |
1645 | if (rnp->node_kthread_task == NULL) { | 1670 | if (rnp->node_kthread_task == NULL) { |
@@ -1665,7 +1690,7 @@ static int __init rcu_spawn_kthreads(void) | |||
1665 | int cpu; | 1690 | int cpu; |
1666 | struct rcu_node *rnp; | 1691 | struct rcu_node *rnp; |
1667 | 1692 | ||
1668 | rcu_kthreads_spawnable = 1; | 1693 | rcu_scheduler_fully_active = 1; |
1669 | for_each_possible_cpu(cpu) { | 1694 | for_each_possible_cpu(cpu) { |
1670 | per_cpu(rcu_cpu_has_work, cpu) = 0; | 1695 | per_cpu(rcu_cpu_has_work, cpu) = 0; |
1671 | if (cpu_online(cpu)) | 1696 | if (cpu_online(cpu)) |
@@ -1687,7 +1712,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) | |||
1687 | struct rcu_node *rnp = rdp->mynode; | 1712 | struct rcu_node *rnp = rdp->mynode; |
1688 | 1713 | ||
1689 | /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ | 1714 | /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ |
1690 | if (rcu_kthreads_spawnable) { | 1715 | if (rcu_scheduler_fully_active) { |
1691 | (void)rcu_spawn_one_cpu_kthread(cpu); | 1716 | (void)rcu_spawn_one_cpu_kthread(cpu); |
1692 | if (rnp->node_kthread_task == NULL) | 1717 | if (rnp->node_kthread_task == NULL) |
1693 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1718 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
@@ -1726,6 +1751,13 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt) | |||
1726 | { | 1751 | { |
1727 | } | 1752 | } |
1728 | 1753 | ||
1754 | static int __init rcu_scheduler_really_started(void) | ||
1755 | { | ||
1756 | rcu_scheduler_fully_active = 1; | ||
1757 | return 0; | ||
1758 | } | ||
1759 | early_initcall(rcu_scheduler_really_started); | ||
1760 | |||
1729 | static void __cpuinit rcu_prepare_kthreads(int cpu) | 1761 | static void __cpuinit rcu_prepare_kthreads(int cpu) |
1730 | { | 1762 | { |
1731 | } | 1763 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index 9769c756ad6..fde6ff90352 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2544,13 +2544,9 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) | |||
2544 | } | 2544 | } |
2545 | 2545 | ||
2546 | #ifdef CONFIG_SMP | 2546 | #ifdef CONFIG_SMP |
2547 | static void sched_ttwu_pending(void) | 2547 | static void sched_ttwu_do_pending(struct task_struct *list) |
2548 | { | 2548 | { |
2549 | struct rq *rq = this_rq(); | 2549 | struct rq *rq = this_rq(); |
2550 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2551 | |||
2552 | if (!list) | ||
2553 | return; | ||
2554 | 2550 | ||
2555 | raw_spin_lock(&rq->lock); | 2551 | raw_spin_lock(&rq->lock); |
2556 | 2552 | ||
@@ -2563,9 +2559,45 @@ static void sched_ttwu_pending(void) | |||
2563 | raw_spin_unlock(&rq->lock); | 2559 | raw_spin_unlock(&rq->lock); |
2564 | } | 2560 | } |
2565 | 2561 | ||
2562 | #ifdef CONFIG_HOTPLUG_CPU | ||
2563 | |||
2564 | static void sched_ttwu_pending(void) | ||
2565 | { | ||
2566 | struct rq *rq = this_rq(); | ||
2567 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2568 | |||
2569 | if (!list) | ||
2570 | return; | ||
2571 | |||
2572 | sched_ttwu_do_pending(list); | ||
2573 | } | ||
2574 | |||
2575 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
2576 | |||
2566 | void scheduler_ipi(void) | 2577 | void scheduler_ipi(void) |
2567 | { | 2578 | { |
2568 | sched_ttwu_pending(); | 2579 | struct rq *rq = this_rq(); |
2580 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2581 | |||
2582 | if (!list) | ||
2583 | return; | ||
2584 | |||
2585 | /* | ||
2586 | * Not all reschedule IPI handlers call irq_enter/irq_exit, since | ||
2587 | * traditionally all their work was done from the interrupt return | ||
2588 | * path. Now that we actually do some work, we need to make sure | ||
2589 | * we do call them. | ||
2590 | * | ||
2591 | * Some archs already do call them, luckily irq_enter/exit nest | ||
2592 | * properly. | ||
2593 | * | ||
2594 | * Arguably we should visit all archs and update all handlers, | ||
2595 | * however a fair share of IPIs are still resched only so this would | ||
2596 | * somewhat pessimize the simple resched case. | ||
2597 | */ | ||
2598 | irq_enter(); | ||
2599 | sched_ttwu_do_pending(list); | ||
2600 | irq_exit(); | ||
2569 | } | 2601 | } |
2570 | 2602 | ||
2571 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | 2603 | static void ttwu_queue_remote(struct task_struct *p, int cpu) |
@@ -6557,7 +6589,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
6557 | break; | 6589 | break; |
6558 | } | 6590 | } |
6559 | 6591 | ||
6560 | if (!group->cpu_power) { | 6592 | if (!group->sgp->power) { |
6561 | printk(KERN_CONT "\n"); | 6593 | printk(KERN_CONT "\n"); |
6562 | printk(KERN_ERR "ERROR: domain->cpu_power not " | 6594 | printk(KERN_ERR "ERROR: domain->cpu_power not " |
6563 | "set\n"); | 6595 | "set\n"); |
@@ -6581,9 +6613,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
6581 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 6613 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
6582 | 6614 | ||
6583 | printk(KERN_CONT " %s", str); | 6615 | printk(KERN_CONT " %s", str); |
6584 | if (group->cpu_power != SCHED_POWER_SCALE) { | 6616 | if (group->sgp->power != SCHED_POWER_SCALE) { |
6585 | printk(KERN_CONT " (cpu_power = %d)", | 6617 | printk(KERN_CONT " (cpu_power = %d)", |
6586 | group->cpu_power); | 6618 | group->sgp->power); |
6587 | } | 6619 | } |
6588 | 6620 | ||
6589 | group = group->next; | 6621 | group = group->next; |
@@ -6774,11 +6806,39 @@ static struct root_domain *alloc_rootdomain(void) | |||
6774 | return rd; | 6806 | return rd; |
6775 | } | 6807 | } |
6776 | 6808 | ||
6809 | static void free_sched_groups(struct sched_group *sg, int free_sgp) | ||
6810 | { | ||
6811 | struct sched_group *tmp, *first; | ||
6812 | |||
6813 | if (!sg) | ||
6814 | return; | ||
6815 | |||
6816 | first = sg; | ||
6817 | do { | ||
6818 | tmp = sg->next; | ||
6819 | |||
6820 | if (free_sgp && atomic_dec_and_test(&sg->sgp->ref)) | ||
6821 | kfree(sg->sgp); | ||
6822 | |||
6823 | kfree(sg); | ||
6824 | sg = tmp; | ||
6825 | } while (sg != first); | ||
6826 | } | ||
6827 | |||
6777 | static void free_sched_domain(struct rcu_head *rcu) | 6828 | static void free_sched_domain(struct rcu_head *rcu) |
6778 | { | 6829 | { |
6779 | struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); | 6830 | struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); |
6780 | if (atomic_dec_and_test(&sd->groups->ref)) | 6831 | |
6832 | /* | ||
6833 | * If its an overlapping domain it has private groups, iterate and | ||
6834 | * nuke them all. | ||
6835 | */ | ||
6836 | if (sd->flags & SD_OVERLAP) { | ||
6837 | free_sched_groups(sd->groups, 1); | ||
6838 | } else if (atomic_dec_and_test(&sd->groups->ref)) { | ||
6839 | kfree(sd->groups->sgp); | ||
6781 | kfree(sd->groups); | 6840 | kfree(sd->groups); |
6841 | } | ||
6782 | kfree(sd); | 6842 | kfree(sd); |
6783 | } | 6843 | } |
6784 | 6844 | ||
@@ -6945,6 +7005,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
6945 | struct sd_data { | 7005 | struct sd_data { |
6946 | struct sched_domain **__percpu sd; | 7006 | struct sched_domain **__percpu sd; |
6947 | struct sched_group **__percpu sg; | 7007 | struct sched_group **__percpu sg; |
7008 | struct sched_group_power **__percpu sgp; | ||
6948 | }; | 7009 | }; |
6949 | 7010 | ||
6950 | struct s_data { | 7011 | struct s_data { |
@@ -6964,15 +7025,73 @@ struct sched_domain_topology_level; | |||
6964 | typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); | 7025 | typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); |
6965 | typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); | 7026 | typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); |
6966 | 7027 | ||
7028 | #define SDTL_OVERLAP 0x01 | ||
7029 | |||
6967 | struct sched_domain_topology_level { | 7030 | struct sched_domain_topology_level { |
6968 | sched_domain_init_f init; | 7031 | sched_domain_init_f init; |
6969 | sched_domain_mask_f mask; | 7032 | sched_domain_mask_f mask; |
7033 | int flags; | ||
6970 | struct sd_data data; | 7034 | struct sd_data data; |
6971 | }; | 7035 | }; |
6972 | 7036 | ||
6973 | /* | 7037 | static int |
6974 | * Assumes the sched_domain tree is fully constructed | 7038 | build_overlap_sched_groups(struct sched_domain *sd, int cpu) |
6975 | */ | 7039 | { |
7040 | struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg; | ||
7041 | const struct cpumask *span = sched_domain_span(sd); | ||
7042 | struct cpumask *covered = sched_domains_tmpmask; | ||
7043 | struct sd_data *sdd = sd->private; | ||
7044 | struct sched_domain *child; | ||
7045 | int i; | ||
7046 | |||
7047 | cpumask_clear(covered); | ||
7048 | |||
7049 | for_each_cpu(i, span) { | ||
7050 | struct cpumask *sg_span; | ||
7051 | |||
7052 | if (cpumask_test_cpu(i, covered)) | ||
7053 | continue; | ||
7054 | |||
7055 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | ||
7056 | GFP_KERNEL, cpu_to_node(i)); | ||
7057 | |||
7058 | if (!sg) | ||
7059 | goto fail; | ||
7060 | |||
7061 | sg_span = sched_group_cpus(sg); | ||
7062 | |||
7063 | child = *per_cpu_ptr(sdd->sd, i); | ||
7064 | if (child->child) { | ||
7065 | child = child->child; | ||
7066 | cpumask_copy(sg_span, sched_domain_span(child)); | ||
7067 | } else | ||
7068 | cpumask_set_cpu(i, sg_span); | ||
7069 | |||
7070 | cpumask_or(covered, covered, sg_span); | ||
7071 | |||
7072 | sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); | ||
7073 | atomic_inc(&sg->sgp->ref); | ||
7074 | |||
7075 | if (cpumask_test_cpu(cpu, sg_span)) | ||
7076 | groups = sg; | ||
7077 | |||
7078 | if (!first) | ||
7079 | first = sg; | ||
7080 | if (last) | ||
7081 | last->next = sg; | ||
7082 | last = sg; | ||
7083 | last->next = first; | ||
7084 | } | ||
7085 | sd->groups = groups; | ||
7086 | |||
7087 | return 0; | ||
7088 | |||
7089 | fail: | ||
7090 | free_sched_groups(first, 0); | ||
7091 | |||
7092 | return -ENOMEM; | ||
7093 | } | ||
7094 | |||
6976 | static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) | 7095 | static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) |
6977 | { | 7096 | { |
6978 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); | 7097 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); |
@@ -6981,24 +7100,24 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) | |||
6981 | if (child) | 7100 | if (child) |
6982 | cpu = cpumask_first(sched_domain_span(child)); | 7101 | cpu = cpumask_first(sched_domain_span(child)); |
6983 | 7102 | ||
6984 | if (sg) | 7103 | if (sg) { |
6985 | *sg = *per_cpu_ptr(sdd->sg, cpu); | 7104 | *sg = *per_cpu_ptr(sdd->sg, cpu); |
7105 | (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu); | ||
7106 | atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */ | ||
7107 | } | ||
6986 | 7108 | ||
6987 | return cpu; | 7109 | return cpu; |
6988 | } | 7110 | } |
6989 | 7111 | ||
6990 | /* | 7112 | /* |
6991 | * build_sched_groups takes the cpumask we wish to span, and a pointer | ||
6992 | * to a function which identifies what group(along with sched group) a CPU | ||
6993 | * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids | ||
6994 | * (due to the fact that we keep track of groups covered with a struct cpumask). | ||
6995 | * | ||
6996 | * build_sched_groups will build a circular linked list of the groups | 7113 | * build_sched_groups will build a circular linked list of the groups |
6997 | * covered by the given span, and will set each group's ->cpumask correctly, | 7114 | * covered by the given span, and will set each group's ->cpumask correctly, |
6998 | * and ->cpu_power to 0. | 7115 | * and ->cpu_power to 0. |
7116 | * | ||
7117 | * Assumes the sched_domain tree is fully constructed | ||
6999 | */ | 7118 | */ |
7000 | static void | 7119 | static int |
7001 | build_sched_groups(struct sched_domain *sd) | 7120 | build_sched_groups(struct sched_domain *sd, int cpu) |
7002 | { | 7121 | { |
7003 | struct sched_group *first = NULL, *last = NULL; | 7122 | struct sched_group *first = NULL, *last = NULL; |
7004 | struct sd_data *sdd = sd->private; | 7123 | struct sd_data *sdd = sd->private; |
@@ -7006,6 +7125,12 @@ build_sched_groups(struct sched_domain *sd) | |||
7006 | struct cpumask *covered; | 7125 | struct cpumask *covered; |
7007 | int i; | 7126 | int i; |
7008 | 7127 | ||
7128 | get_group(cpu, sdd, &sd->groups); | ||
7129 | atomic_inc(&sd->groups->ref); | ||
7130 | |||
7131 | if (cpu != cpumask_first(sched_domain_span(sd))) | ||
7132 | return 0; | ||
7133 | |||
7009 | lockdep_assert_held(&sched_domains_mutex); | 7134 | lockdep_assert_held(&sched_domains_mutex); |
7010 | covered = sched_domains_tmpmask; | 7135 | covered = sched_domains_tmpmask; |
7011 | 7136 | ||
@@ -7020,7 +7145,7 @@ build_sched_groups(struct sched_domain *sd) | |||
7020 | continue; | 7145 | continue; |
7021 | 7146 | ||
7022 | cpumask_clear(sched_group_cpus(sg)); | 7147 | cpumask_clear(sched_group_cpus(sg)); |
7023 | sg->cpu_power = 0; | 7148 | sg->sgp->power = 0; |
7024 | 7149 | ||
7025 | for_each_cpu(j, span) { | 7150 | for_each_cpu(j, span) { |
7026 | if (get_group(j, sdd, NULL) != group) | 7151 | if (get_group(j, sdd, NULL) != group) |
@@ -7037,6 +7162,8 @@ build_sched_groups(struct sched_domain *sd) | |||
7037 | last = sg; | 7162 | last = sg; |
7038 | } | 7163 | } |
7039 | last->next = first; | 7164 | last->next = first; |
7165 | |||
7166 | return 0; | ||
7040 | } | 7167 | } |
7041 | 7168 | ||
7042 | /* | 7169 | /* |
@@ -7051,12 +7178,17 @@ build_sched_groups(struct sched_domain *sd) | |||
7051 | */ | 7178 | */ |
7052 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) | 7179 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) |
7053 | { | 7180 | { |
7054 | WARN_ON(!sd || !sd->groups); | 7181 | struct sched_group *sg = sd->groups; |
7055 | 7182 | ||
7056 | if (cpu != group_first_cpu(sd->groups)) | 7183 | WARN_ON(!sd || !sg); |
7057 | return; | 7184 | |
7185 | do { | ||
7186 | sg->group_weight = cpumask_weight(sched_group_cpus(sg)); | ||
7187 | sg = sg->next; | ||
7188 | } while (sg != sd->groups); | ||
7058 | 7189 | ||
7059 | sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); | 7190 | if (cpu != group_first_cpu(sg)) |
7191 | return; | ||
7060 | 7192 | ||
7061 | update_group_power(sd, cpu); | 7193 | update_group_power(sd, cpu); |
7062 | } | 7194 | } |
@@ -7177,15 +7309,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, | |||
7177 | static void claim_allocations(int cpu, struct sched_domain *sd) | 7309 | static void claim_allocations(int cpu, struct sched_domain *sd) |
7178 | { | 7310 | { |
7179 | struct sd_data *sdd = sd->private; | 7311 | struct sd_data *sdd = sd->private; |
7180 | struct sched_group *sg = sd->groups; | ||
7181 | 7312 | ||
7182 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); | 7313 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); |
7183 | *per_cpu_ptr(sdd->sd, cpu) = NULL; | 7314 | *per_cpu_ptr(sdd->sd, cpu) = NULL; |
7184 | 7315 | ||
7185 | if (cpu == cpumask_first(sched_group_cpus(sg))) { | 7316 | if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) |
7186 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg); | ||
7187 | *per_cpu_ptr(sdd->sg, cpu) = NULL; | 7317 | *per_cpu_ptr(sdd->sg, cpu) = NULL; |
7188 | } | 7318 | |
7319 | if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref)) | ||
7320 | *per_cpu_ptr(sdd->sgp, cpu) = NULL; | ||
7189 | } | 7321 | } |
7190 | 7322 | ||
7191 | #ifdef CONFIG_SCHED_SMT | 7323 | #ifdef CONFIG_SCHED_SMT |
@@ -7210,7 +7342,7 @@ static struct sched_domain_topology_level default_topology[] = { | |||
7210 | #endif | 7342 | #endif |
7211 | { sd_init_CPU, cpu_cpu_mask, }, | 7343 | { sd_init_CPU, cpu_cpu_mask, }, |
7212 | #ifdef CONFIG_NUMA | 7344 | #ifdef CONFIG_NUMA |
7213 | { sd_init_NODE, cpu_node_mask, }, | 7345 | { sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, }, |
7214 | { sd_init_ALLNODES, cpu_allnodes_mask, }, | 7346 | { sd_init_ALLNODES, cpu_allnodes_mask, }, |
7215 | #endif | 7347 | #endif |
7216 | { NULL, }, | 7348 | { NULL, }, |
@@ -7234,9 +7366,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
7234 | if (!sdd->sg) | 7366 | if (!sdd->sg) |
7235 | return -ENOMEM; | 7367 | return -ENOMEM; |
7236 | 7368 | ||
7369 | sdd->sgp = alloc_percpu(struct sched_group_power *); | ||
7370 | if (!sdd->sgp) | ||
7371 | return -ENOMEM; | ||
7372 | |||
7237 | for_each_cpu(j, cpu_map) { | 7373 | for_each_cpu(j, cpu_map) { |
7238 | struct sched_domain *sd; | 7374 | struct sched_domain *sd; |
7239 | struct sched_group *sg; | 7375 | struct sched_group *sg; |
7376 | struct sched_group_power *sgp; | ||
7240 | 7377 | ||
7241 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), | 7378 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), |
7242 | GFP_KERNEL, cpu_to_node(j)); | 7379 | GFP_KERNEL, cpu_to_node(j)); |
@@ -7251,6 +7388,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
7251 | return -ENOMEM; | 7388 | return -ENOMEM; |
7252 | 7389 | ||
7253 | *per_cpu_ptr(sdd->sg, j) = sg; | 7390 | *per_cpu_ptr(sdd->sg, j) = sg; |
7391 | |||
7392 | sgp = kzalloc_node(sizeof(struct sched_group_power), | ||
7393 | GFP_KERNEL, cpu_to_node(j)); | ||
7394 | if (!sgp) | ||
7395 | return -ENOMEM; | ||
7396 | |||
7397 | *per_cpu_ptr(sdd->sgp, j) = sgp; | ||
7254 | } | 7398 | } |
7255 | } | 7399 | } |
7256 | 7400 | ||
@@ -7266,11 +7410,15 @@ static void __sdt_free(const struct cpumask *cpu_map) | |||
7266 | struct sd_data *sdd = &tl->data; | 7410 | struct sd_data *sdd = &tl->data; |
7267 | 7411 | ||
7268 | for_each_cpu(j, cpu_map) { | 7412 | for_each_cpu(j, cpu_map) { |
7269 | kfree(*per_cpu_ptr(sdd->sd, j)); | 7413 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); |
7414 | if (sd && (sd->flags & SD_OVERLAP)) | ||
7415 | free_sched_groups(sd->groups, 0); | ||
7270 | kfree(*per_cpu_ptr(sdd->sg, j)); | 7416 | kfree(*per_cpu_ptr(sdd->sg, j)); |
7417 | kfree(*per_cpu_ptr(sdd->sgp, j)); | ||
7271 | } | 7418 | } |
7272 | free_percpu(sdd->sd); | 7419 | free_percpu(sdd->sd); |
7273 | free_percpu(sdd->sg); | 7420 | free_percpu(sdd->sg); |
7421 | free_percpu(sdd->sgp); | ||
7274 | } | 7422 | } |
7275 | } | 7423 | } |
7276 | 7424 | ||
@@ -7316,8 +7464,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, | |||
7316 | struct sched_domain_topology_level *tl; | 7464 | struct sched_domain_topology_level *tl; |
7317 | 7465 | ||
7318 | sd = NULL; | 7466 | sd = NULL; |
7319 | for (tl = sched_domain_topology; tl->init; tl++) | 7467 | for (tl = sched_domain_topology; tl->init; tl++) { |
7320 | sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i); | 7468 | sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i); |
7469 | if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP)) | ||
7470 | sd->flags |= SD_OVERLAP; | ||
7471 | if (cpumask_equal(cpu_map, sched_domain_span(sd))) | ||
7472 | break; | ||
7473 | } | ||
7321 | 7474 | ||
7322 | while (sd->child) | 7475 | while (sd->child) |
7323 | sd = sd->child; | 7476 | sd = sd->child; |
@@ -7329,13 +7482,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, | |||
7329 | for_each_cpu(i, cpu_map) { | 7482 | for_each_cpu(i, cpu_map) { |
7330 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { | 7483 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { |
7331 | sd->span_weight = cpumask_weight(sched_domain_span(sd)); | 7484 | sd->span_weight = cpumask_weight(sched_domain_span(sd)); |
7332 | get_group(i, sd->private, &sd->groups); | 7485 | if (sd->flags & SD_OVERLAP) { |
7333 | atomic_inc(&sd->groups->ref); | 7486 | if (build_overlap_sched_groups(sd, i)) |
7334 | 7487 | goto error; | |
7335 | if (i != cpumask_first(sched_domain_span(sd))) | 7488 | } else { |
7336 | continue; | 7489 | if (build_sched_groups(sd, i)) |
7337 | 7490 | goto error; | |
7338 | build_sched_groups(sd); | 7491 | } |
7339 | } | 7492 | } |
7340 | } | 7493 | } |
7341 | 7494 | ||
@@ -7757,6 +7910,9 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | |||
7757 | #endif | 7910 | #endif |
7758 | #endif | 7911 | #endif |
7759 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 7912 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
7913 | #ifndef CONFIG_64BIT | ||
7914 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | ||
7915 | #endif | ||
7760 | } | 7916 | } |
7761 | 7917 | ||
7762 | static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | 7918 | static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 433491c2dc8..c768588e180 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1585,7 +1585,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, | |||
1585 | } | 1585 | } |
1586 | 1586 | ||
1587 | /* Adjust by relative CPU power of the group */ | 1587 | /* Adjust by relative CPU power of the group */ |
1588 | avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power; | 1588 | avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power; |
1589 | 1589 | ||
1590 | if (local_group) { | 1590 | if (local_group) { |
1591 | this_load = avg_load; | 1591 | this_load = avg_load; |
@@ -2631,7 +2631,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
2631 | power >>= SCHED_POWER_SHIFT; | 2631 | power >>= SCHED_POWER_SHIFT; |
2632 | } | 2632 | } |
2633 | 2633 | ||
2634 | sdg->cpu_power_orig = power; | 2634 | sdg->sgp->power_orig = power; |
2635 | 2635 | ||
2636 | if (sched_feat(ARCH_POWER)) | 2636 | if (sched_feat(ARCH_POWER)) |
2637 | power *= arch_scale_freq_power(sd, cpu); | 2637 | power *= arch_scale_freq_power(sd, cpu); |
@@ -2647,7 +2647,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
2647 | power = 1; | 2647 | power = 1; |
2648 | 2648 | ||
2649 | cpu_rq(cpu)->cpu_power = power; | 2649 | cpu_rq(cpu)->cpu_power = power; |
2650 | sdg->cpu_power = power; | 2650 | sdg->sgp->power = power; |
2651 | } | 2651 | } |
2652 | 2652 | ||
2653 | static void update_group_power(struct sched_domain *sd, int cpu) | 2653 | static void update_group_power(struct sched_domain *sd, int cpu) |
@@ -2665,11 +2665,11 @@ static void update_group_power(struct sched_domain *sd, int cpu) | |||
2665 | 2665 | ||
2666 | group = child->groups; | 2666 | group = child->groups; |
2667 | do { | 2667 | do { |
2668 | power += group->cpu_power; | 2668 | power += group->sgp->power; |
2669 | group = group->next; | 2669 | group = group->next; |
2670 | } while (group != child->groups); | 2670 | } while (group != child->groups); |
2671 | 2671 | ||
2672 | sdg->cpu_power = power; | 2672 | sdg->sgp->power = power; |
2673 | } | 2673 | } |
2674 | 2674 | ||
2675 | /* | 2675 | /* |
@@ -2691,7 +2691,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
2691 | /* | 2691 | /* |
2692 | * If ~90% of the cpu_power is still there, we're good. | 2692 | * If ~90% of the cpu_power is still there, we're good. |
2693 | */ | 2693 | */ |
2694 | if (group->cpu_power * 32 > group->cpu_power_orig * 29) | 2694 | if (group->sgp->power * 32 > group->sgp->power_orig * 29) |
2695 | return 1; | 2695 | return 1; |
2696 | 2696 | ||
2697 | return 0; | 2697 | return 0; |
@@ -2771,7 +2771,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2771 | } | 2771 | } |
2772 | 2772 | ||
2773 | /* Adjust by relative CPU power of the group */ | 2773 | /* Adjust by relative CPU power of the group */ |
2774 | sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power; | 2774 | sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power; |
2775 | 2775 | ||
2776 | /* | 2776 | /* |
2777 | * Consider the group unbalanced when the imbalance is larger | 2777 | * Consider the group unbalanced when the imbalance is larger |
@@ -2788,7 +2788,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2788 | if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) | 2788 | if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) |
2789 | sgs->group_imb = 1; | 2789 | sgs->group_imb = 1; |
2790 | 2790 | ||
2791 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, | 2791 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power, |
2792 | SCHED_POWER_SCALE); | 2792 | SCHED_POWER_SCALE); |
2793 | if (!sgs->group_capacity) | 2793 | if (!sgs->group_capacity) |
2794 | sgs->group_capacity = fix_small_capacity(sd, group); | 2794 | sgs->group_capacity = fix_small_capacity(sd, group); |
@@ -2877,7 +2877,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
2877 | return; | 2877 | return; |
2878 | 2878 | ||
2879 | sds->total_load += sgs.group_load; | 2879 | sds->total_load += sgs.group_load; |
2880 | sds->total_pwr += sg->cpu_power; | 2880 | sds->total_pwr += sg->sgp->power; |
2881 | 2881 | ||
2882 | /* | 2882 | /* |
2883 | * In case the child domain prefers tasks go to siblings | 2883 | * In case the child domain prefers tasks go to siblings |
@@ -2962,7 +2962,7 @@ static int check_asym_packing(struct sched_domain *sd, | |||
2962 | if (this_cpu > busiest_cpu) | 2962 | if (this_cpu > busiest_cpu) |
2963 | return 0; | 2963 | return 0; |
2964 | 2964 | ||
2965 | *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power, | 2965 | *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power, |
2966 | SCHED_POWER_SCALE); | 2966 | SCHED_POWER_SCALE); |
2967 | return 1; | 2967 | return 1; |
2968 | } | 2968 | } |
@@ -2993,7 +2993,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
2993 | 2993 | ||
2994 | scaled_busy_load_per_task = sds->busiest_load_per_task | 2994 | scaled_busy_load_per_task = sds->busiest_load_per_task |
2995 | * SCHED_POWER_SCALE; | 2995 | * SCHED_POWER_SCALE; |
2996 | scaled_busy_load_per_task /= sds->busiest->cpu_power; | 2996 | scaled_busy_load_per_task /= sds->busiest->sgp->power; |
2997 | 2997 | ||
2998 | if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= | 2998 | if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= |
2999 | (scaled_busy_load_per_task * imbn)) { | 2999 | (scaled_busy_load_per_task * imbn)) { |
@@ -3007,28 +3007,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
3007 | * moving them. | 3007 | * moving them. |
3008 | */ | 3008 | */ |
3009 | 3009 | ||
3010 | pwr_now += sds->busiest->cpu_power * | 3010 | pwr_now += sds->busiest->sgp->power * |
3011 | min(sds->busiest_load_per_task, sds->max_load); | 3011 | min(sds->busiest_load_per_task, sds->max_load); |
3012 | pwr_now += sds->this->cpu_power * | 3012 | pwr_now += sds->this->sgp->power * |
3013 | min(sds->this_load_per_task, sds->this_load); | 3013 | min(sds->this_load_per_task, sds->this_load); |
3014 | pwr_now /= SCHED_POWER_SCALE; | 3014 | pwr_now /= SCHED_POWER_SCALE; |
3015 | 3015 | ||
3016 | /* Amount of load we'd subtract */ | 3016 | /* Amount of load we'd subtract */ |
3017 | tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / | 3017 | tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / |
3018 | sds->busiest->cpu_power; | 3018 | sds->busiest->sgp->power; |
3019 | if (sds->max_load > tmp) | 3019 | if (sds->max_load > tmp) |
3020 | pwr_move += sds->busiest->cpu_power * | 3020 | pwr_move += sds->busiest->sgp->power * |
3021 | min(sds->busiest_load_per_task, sds->max_load - tmp); | 3021 | min(sds->busiest_load_per_task, sds->max_load - tmp); |
3022 | 3022 | ||
3023 | /* Amount of load we'd add */ | 3023 | /* Amount of load we'd add */ |
3024 | if (sds->max_load * sds->busiest->cpu_power < | 3024 | if (sds->max_load * sds->busiest->sgp->power < |
3025 | sds->busiest_load_per_task * SCHED_POWER_SCALE) | 3025 | sds->busiest_load_per_task * SCHED_POWER_SCALE) |
3026 | tmp = (sds->max_load * sds->busiest->cpu_power) / | 3026 | tmp = (sds->max_load * sds->busiest->sgp->power) / |
3027 | sds->this->cpu_power; | 3027 | sds->this->sgp->power; |
3028 | else | 3028 | else |
3029 | tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / | 3029 | tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / |
3030 | sds->this->cpu_power; | 3030 | sds->this->sgp->power; |
3031 | pwr_move += sds->this->cpu_power * | 3031 | pwr_move += sds->this->sgp->power * |
3032 | min(sds->this_load_per_task, sds->this_load + tmp); | 3032 | min(sds->this_load_per_task, sds->this_load + tmp); |
3033 | pwr_move /= SCHED_POWER_SCALE; | 3033 | pwr_move /= SCHED_POWER_SCALE; |
3034 | 3034 | ||
@@ -3074,7 +3074,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
3074 | 3074 | ||
3075 | load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); | 3075 | load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); |
3076 | 3076 | ||
3077 | load_above_capacity /= sds->busiest->cpu_power; | 3077 | load_above_capacity /= sds->busiest->sgp->power; |
3078 | } | 3078 | } |
3079 | 3079 | ||
3080 | /* | 3080 | /* |
@@ -3090,8 +3090,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
3090 | max_pull = min(sds->max_load - sds->avg_load, load_above_capacity); | 3090 | max_pull = min(sds->max_load - sds->avg_load, load_above_capacity); |
3091 | 3091 | ||
3092 | /* How much load to actually move to equalise the imbalance */ | 3092 | /* How much load to actually move to equalise the imbalance */ |
3093 | *imbalance = min(max_pull * sds->busiest->cpu_power, | 3093 | *imbalance = min(max_pull * sds->busiest->sgp->power, |
3094 | (sds->avg_load - sds->this_load) * sds->this->cpu_power) | 3094 | (sds->avg_load - sds->this_load) * sds->this->sgp->power) |
3095 | / SCHED_POWER_SCALE; | 3095 | / SCHED_POWER_SCALE; |
3096 | 3096 | ||
3097 | /* | 3097 | /* |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index be40f7371ee..1e7066d76c2 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -70,3 +70,5 @@ SCHED_FEAT(NONIRQ_POWER, 1) | |||
70 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | 70 | * using the scheduler IPI. Reduces rq->lock contention/bounces. |
71 | */ | 71 | */ |
72 | SCHED_FEAT(TTWU_QUEUE, 1) | 72 | SCHED_FEAT(TTWU_QUEUE, 1) |
73 | |||
74 | SCHED_FEAT(FORCE_SD_OVERLAP, 0) | ||
diff --git a/kernel/signal.c b/kernel/signal.c index ff767860332..415d85d6f6c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1178,18 +1178,25 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |||
1178 | { | 1178 | { |
1179 | struct sighand_struct *sighand; | 1179 | struct sighand_struct *sighand; |
1180 | 1180 | ||
1181 | rcu_read_lock(); | ||
1182 | for (;;) { | 1181 | for (;;) { |
1182 | local_irq_save(*flags); | ||
1183 | rcu_read_lock(); | ||
1183 | sighand = rcu_dereference(tsk->sighand); | 1184 | sighand = rcu_dereference(tsk->sighand); |
1184 | if (unlikely(sighand == NULL)) | 1185 | if (unlikely(sighand == NULL)) { |
1186 | rcu_read_unlock(); | ||
1187 | local_irq_restore(*flags); | ||
1185 | break; | 1188 | break; |
1189 | } | ||
1186 | 1190 | ||
1187 | spin_lock_irqsave(&sighand->siglock, *flags); | 1191 | spin_lock(&sighand->siglock); |
1188 | if (likely(sighand == tsk->sighand)) | 1192 | if (likely(sighand == tsk->sighand)) { |
1193 | rcu_read_unlock(); | ||
1189 | break; | 1194 | break; |
1190 | spin_unlock_irqrestore(&sighand->siglock, *flags); | 1195 | } |
1196 | spin_unlock(&sighand->siglock); | ||
1197 | rcu_read_unlock(); | ||
1198 | local_irq_restore(*flags); | ||
1191 | } | 1199 | } |
1192 | rcu_read_unlock(); | ||
1193 | 1200 | ||
1194 | return sighand; | 1201 | return sighand; |
1195 | } | 1202 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 40cf63ddd4b..fca82c32042 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -315,16 +315,24 @@ static inline void invoke_softirq(void) | |||
315 | { | 315 | { |
316 | if (!force_irqthreads) | 316 | if (!force_irqthreads) |
317 | __do_softirq(); | 317 | __do_softirq(); |
318 | else | 318 | else { |
319 | __local_bh_disable((unsigned long)__builtin_return_address(0), | ||
320 | SOFTIRQ_OFFSET); | ||
319 | wakeup_softirqd(); | 321 | wakeup_softirqd(); |
322 | __local_bh_enable(SOFTIRQ_OFFSET); | ||
323 | } | ||
320 | } | 324 | } |
321 | #else | 325 | #else |
322 | static inline void invoke_softirq(void) | 326 | static inline void invoke_softirq(void) |
323 | { | 327 | { |
324 | if (!force_irqthreads) | 328 | if (!force_irqthreads) |
325 | do_softirq(); | 329 | do_softirq(); |
326 | else | 330 | else { |
331 | __local_bh_disable((unsigned long)__builtin_return_address(0), | ||
332 | SOFTIRQ_OFFSET); | ||
327 | wakeup_softirqd(); | 333 | wakeup_softirqd(); |
334 | __local_bh_enable(SOFTIRQ_OFFSET); | ||
335 | } | ||
328 | } | 336 | } |
329 | #endif | 337 | #endif |
330 | 338 | ||