diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-11-05 13:56:47 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-11-05 13:56:47 -0500 |
commit | 608221fdf9a2170962295dcfbea53dc5c50d1a74 (patch) | |
tree | 26712f2f8a8a332369df2b31174c8c5b46c42164 | |
parent | 72cc129e8dae988d2a132467cfd0ecd7623c35fb (diff) | |
parent | b84ff7d6f1b7f8a43414e74d972ec4c8f3361db4 (diff) |
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched: Fix kthread_bind() by moving the body of kthread_bind() to sched.c
sched: Disable SD_PREFER_LOCAL at node level
sched: Fix boot crash by zalloc()ing most of the cpu masks
sched: Strengthen buddies and mitigate buddy induced latencies
-rw-r--r-- | arch/x86/include/asm/topology.h | 2 | ||||
-rw-r--r-- | kernel/kthread.c | 23 | ||||
-rw-r--r-- | kernel/sched.c | 40 | ||||
-rw-r--r-- | kernel/sched_fair.c | 73 |
4 files changed, 84 insertions, 54 deletions
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d823c245f63b..40e37b10c6c0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -143,7 +143,7 @@ extern unsigned long node_remap_size[]; | |||
143 | | 1*SD_BALANCE_FORK \ | 143 | | 1*SD_BALANCE_FORK \ |
144 | | 0*SD_BALANCE_WAKE \ | 144 | | 0*SD_BALANCE_WAKE \ |
145 | | 1*SD_WAKE_AFFINE \ | 145 | | 1*SD_WAKE_AFFINE \ |
146 | | 1*SD_PREFER_LOCAL \ | 146 | | 0*SD_PREFER_LOCAL \ |
147 | | 0*SD_SHARE_CPUPOWER \ | 147 | | 0*SD_SHARE_CPUPOWER \ |
148 | | 0*SD_POWERSAVINGS_BALANCE \ | 148 | | 0*SD_POWERSAVINGS_BALANCE \ |
149 | | 0*SD_SHARE_PKG_RESOURCES \ | 149 | | 0*SD_SHARE_PKG_RESOURCES \ |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 5fe709982caa..ab7ae57773e1 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -150,29 +150,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
150 | EXPORT_SYMBOL(kthread_create); | 150 | EXPORT_SYMBOL(kthread_create); |
151 | 151 | ||
152 | /** | 152 | /** |
153 | * kthread_bind - bind a just-created kthread to a cpu. | ||
154 | * @k: thread created by kthread_create(). | ||
155 | * @cpu: cpu (might not be online, must be possible) for @k to run on. | ||
156 | * | ||
157 | * Description: This function is equivalent to set_cpus_allowed(), | ||
158 | * except that @cpu doesn't need to be online, and the thread must be | ||
159 | * stopped (i.e., just returned from kthread_create()). | ||
160 | */ | ||
161 | void kthread_bind(struct task_struct *k, unsigned int cpu) | ||
162 | { | ||
163 | /* Must have done schedule() in kthread() before we set_task_cpu */ | ||
164 | if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) { | ||
165 | WARN_ON(1); | ||
166 | return; | ||
167 | } | ||
168 | set_task_cpu(k, cpu); | ||
169 | k->cpus_allowed = cpumask_of_cpu(cpu); | ||
170 | k->rt.nr_cpus_allowed = 1; | ||
171 | k->flags |= PF_THREAD_BOUND; | ||
172 | } | ||
173 | EXPORT_SYMBOL(kthread_bind); | ||
174 | |||
175 | /** | ||
176 | * kthread_stop - stop a thread created by kthread_create(). | 153 | * kthread_stop - stop a thread created by kthread_create(). |
177 | * @k: thread created by kthread_create(). | 154 | * @k: thread created by kthread_create(). |
178 | * | 155 | * |
diff --git a/kernel/sched.c b/kernel/sched.c index a455dca884a6..28dd4f490bfc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1992,6 +1992,38 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1992 | p->sched_class->prio_changed(rq, p, oldprio, running); | 1992 | p->sched_class->prio_changed(rq, p, oldprio, running); |
1993 | } | 1993 | } |
1994 | 1994 | ||
1995 | /** | ||
1996 | * kthread_bind - bind a just-created kthread to a cpu. | ||
1997 | * @k: thread created by kthread_create(). | ||
1998 | * @cpu: cpu (might not be online, must be possible) for @k to run on. | ||
1999 | * | ||
2000 | * Description: This function is equivalent to set_cpus_allowed(), | ||
2001 | * except that @cpu doesn't need to be online, and the thread must be | ||
2002 | * stopped (i.e., just returned from kthread_create()). | ||
2003 | * | ||
2004 | * Function lives here instead of kthread.c because it messes with | ||
2005 | * scheduler internals which require locking. | ||
2006 | */ | ||
2007 | void kthread_bind(struct task_struct *p, unsigned int cpu) | ||
2008 | { | ||
2009 | struct rq *rq = cpu_rq(cpu); | ||
2010 | unsigned long flags; | ||
2011 | |||
2012 | /* Must have done schedule() in kthread() before we set_task_cpu */ | ||
2013 | if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { | ||
2014 | WARN_ON(1); | ||
2015 | return; | ||
2016 | } | ||
2017 | |||
2018 | spin_lock_irqsave(&rq->lock, flags); | ||
2019 | set_task_cpu(p, cpu); | ||
2020 | p->cpus_allowed = cpumask_of_cpu(cpu); | ||
2021 | p->rt.nr_cpus_allowed = 1; | ||
2022 | p->flags |= PF_THREAD_BOUND; | ||
2023 | spin_unlock_irqrestore(&rq->lock, flags); | ||
2024 | } | ||
2025 | EXPORT_SYMBOL(kthread_bind); | ||
2026 | |||
1995 | #ifdef CONFIG_SMP | 2027 | #ifdef CONFIG_SMP |
1996 | /* | 2028 | /* |
1997 | * Is this task likely cache-hot: | 2029 | * Is this task likely cache-hot: |
@@ -2004,7 +2036,7 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2004 | /* | 2036 | /* |
2005 | * Buddy candidates are cache hot: | 2037 | * Buddy candidates are cache hot: |
2006 | */ | 2038 | */ |
2007 | if (sched_feat(CACHE_HOT_BUDDY) && | 2039 | if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running && |
2008 | (&p->se == cfs_rq_of(&p->se)->next || | 2040 | (&p->se == cfs_rq_of(&p->se)->next || |
2009 | &p->se == cfs_rq_of(&p->se)->last)) | 2041 | &p->se == cfs_rq_of(&p->se)->last)) |
2010 | return 1; | 2042 | return 1; |
@@ -9532,13 +9564,13 @@ void __init sched_init(void) | |||
9532 | current->sched_class = &fair_sched_class; | 9564 | current->sched_class = &fair_sched_class; |
9533 | 9565 | ||
9534 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | 9566 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ |
9535 | alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | 9567 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); |
9536 | #ifdef CONFIG_SMP | 9568 | #ifdef CONFIG_SMP |
9537 | #ifdef CONFIG_NO_HZ | 9569 | #ifdef CONFIG_NO_HZ |
9538 | alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); | 9570 | zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); |
9539 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); | 9571 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); |
9540 | #endif | 9572 | #endif |
9541 | alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 9573 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
9542 | #endif /* SMP */ | 9574 | #endif /* SMP */ |
9543 | 9575 | ||
9544 | perf_event_init(); | 9576 | perf_event_init(); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c32c3e643daa..37087a7fac22 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -822,6 +822,26 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
822 | * re-elected due to buddy favours. | 822 | * re-elected due to buddy favours. |
823 | */ | 823 | */ |
824 | clear_buddies(cfs_rq, curr); | 824 | clear_buddies(cfs_rq, curr); |
825 | return; | ||
826 | } | ||
827 | |||
828 | /* | ||
829 | * Ensure that a task that missed wakeup preemption by a | ||
830 | * narrow margin doesn't have to wait for a full slice. | ||
831 | * This also mitigates buddy induced latencies under load. | ||
832 | */ | ||
833 | if (!sched_feat(WAKEUP_PREEMPT)) | ||
834 | return; | ||
835 | |||
836 | if (delta_exec < sysctl_sched_min_granularity) | ||
837 | return; | ||
838 | |||
839 | if (cfs_rq->nr_running > 1) { | ||
840 | struct sched_entity *se = __pick_next_entity(cfs_rq); | ||
841 | s64 delta = curr->vruntime - se->vruntime; | ||
842 | |||
843 | if (delta > ideal_runtime) | ||
844 | resched_task(rq_of(cfs_rq)->curr); | ||
825 | } | 845 | } |
826 | } | 846 | } |
827 | 847 | ||
@@ -861,21 +881,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); | |||
861 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 881 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) |
862 | { | 882 | { |
863 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 883 | struct sched_entity *se = __pick_next_entity(cfs_rq); |
864 | struct sched_entity *buddy; | 884 | struct sched_entity *left = se; |
865 | 885 | ||
866 | if (cfs_rq->next) { | 886 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) |
867 | buddy = cfs_rq->next; | 887 | se = cfs_rq->next; |
868 | cfs_rq->next = NULL; | ||
869 | if (wakeup_preempt_entity(buddy, se) < 1) | ||
870 | return buddy; | ||
871 | } | ||
872 | 888 | ||
873 | if (cfs_rq->last) { | 889 | /* |
874 | buddy = cfs_rq->last; | 890 | * Prefer last buddy, try to return the CPU to a preempted task. |
875 | cfs_rq->last = NULL; | 891 | */ |
876 | if (wakeup_preempt_entity(buddy, se) < 1) | 892 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) |
877 | return buddy; | 893 | se = cfs_rq->last; |
878 | } | 894 | |
895 | clear_buddies(cfs_rq, se); | ||
879 | 896 | ||
880 | return se; | 897 | return se; |
881 | } | 898 | } |
@@ -1577,6 +1594,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1577 | struct sched_entity *se = &curr->se, *pse = &p->se; | 1594 | struct sched_entity *se = &curr->se, *pse = &p->se; |
1578 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1595 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
1579 | int sync = wake_flags & WF_SYNC; | 1596 | int sync = wake_flags & WF_SYNC; |
1597 | int scale = cfs_rq->nr_running >= sched_nr_latency; | ||
1580 | 1598 | ||
1581 | update_curr(cfs_rq); | 1599 | update_curr(cfs_rq); |
1582 | 1600 | ||
@@ -1591,18 +1609,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1591 | if (unlikely(se == pse)) | 1609 | if (unlikely(se == pse)) |
1592 | return; | 1610 | return; |
1593 | 1611 | ||
1594 | /* | 1612 | if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) |
1595 | * Only set the backward buddy when the current task is still on the | ||
1596 | * rq. This can happen when a wakeup gets interleaved with schedule on | ||
1597 | * the ->pre_schedule() or idle_balance() point, either of which can | ||
1598 | * drop the rq lock. | ||
1599 | * | ||
1600 | * Also, during early boot the idle thread is in the fair class, for | ||
1601 | * obvious reasons its a bad idea to schedule back to the idle thread. | ||
1602 | */ | ||
1603 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) | ||
1604 | set_last_buddy(se); | ||
1605 | if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) | ||
1606 | set_next_buddy(pse); | 1613 | set_next_buddy(pse); |
1607 | 1614 | ||
1608 | /* | 1615 | /* |
@@ -1648,8 +1655,22 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1648 | 1655 | ||
1649 | BUG_ON(!pse); | 1656 | BUG_ON(!pse); |
1650 | 1657 | ||
1651 | if (wakeup_preempt_entity(se, pse) == 1) | 1658 | if (wakeup_preempt_entity(se, pse) == 1) { |
1652 | resched_task(curr); | 1659 | resched_task(curr); |
1660 | /* | ||
1661 | * Only set the backward buddy when the current task is still | ||
1662 | * on the rq. This can happen when a wakeup gets interleaved | ||
1663 | * with schedule on the ->pre_schedule() or idle_balance() | ||
1664 | * point, either of which can * drop the rq lock. | ||
1665 | * | ||
1666 | * Also, during early boot the idle thread is in the fair class, | ||
1667 | * for obvious reasons its a bad idea to schedule back to it. | ||
1668 | */ | ||
1669 | if (unlikely(!se->on_rq || curr == rq->idle)) | ||
1670 | return; | ||
1671 | if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) | ||
1672 | set_last_buddy(se); | ||
1673 | } | ||
1653 | } | 1674 | } |
1654 | 1675 | ||
1655 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 1676 | static struct task_struct *pick_next_task_fair(struct rq *rq) |