diff options
| -rw-r--r-- | include/linux/wait.h | 26 | ||||
| -rw-r--r-- | kernel/locking/mutex.c | 2 | ||||
| -rw-r--r-- | kernel/sched/core.c | 107 | ||||
| -rw-r--r-- | kernel/sched/cpudeadline.c | 27 | ||||
| -rw-r--r-- | kernel/sched/cpudeadline.h | 2 | ||||
| -rw-r--r-- | kernel/sched/deadline.c | 51 | ||||
| -rw-r--r-- | kernel/sched/debug.c | 1 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 7 | ||||
| -rw-r--r-- | kernel/sched/idle.c | 3 | ||||
| -rw-r--r-- | kernel/sched/rt.c | 26 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 22 |
11 files changed, 197 insertions, 77 deletions
diff --git a/include/linux/wait.h b/include/linux/wait.h index 37423e0e1379..537d58eea8a0 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h | |||
| @@ -990,6 +990,32 @@ wait_on_bit_io(void *word, int bit, unsigned mode) | |||
| 990 | } | 990 | } |
| 991 | 991 | ||
| 992 | /** | 992 | /** |
| 993 | * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses | ||
| 994 | * @word: the word being waited on, a kernel virtual address | ||
| 995 | * @bit: the bit of the word being waited on | ||
| 996 | * @mode: the task state to sleep in | ||
| 997 | * @timeout: timeout, in jiffies | ||
| 998 | * | ||
| 999 | * Use the standard hashed waitqueue table to wait for a bit | ||
| 1000 | * to be cleared. This is similar to wait_on_bit(), except also takes a | ||
| 1001 | * timeout parameter. | ||
| 1002 | * | ||
| 1003 | * Returned value will be zero if the bit was cleared before the | ||
| 1004 | * @timeout elapsed, or non-zero if the @timeout elapsed or process | ||
| 1005 | * received a signal and the mode permitted wakeup on that signal. | ||
| 1006 | */ | ||
| 1007 | static inline int | ||
| 1008 | wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout) | ||
| 1009 | { | ||
| 1010 | might_sleep(); | ||
| 1011 | if (!test_bit(bit, word)) | ||
| 1012 | return 0; | ||
| 1013 | return out_of_line_wait_on_bit_timeout(word, bit, | ||
| 1014 | bit_wait_timeout, | ||
| 1015 | mode, timeout); | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | /** | ||
| 993 | * wait_on_bit_action - wait for a bit to be cleared | 1019 | * wait_on_bit_action - wait for a bit to be cleared |
| 994 | * @word: the word being waited on, a kernel virtual address | 1020 | * @word: the word being waited on, a kernel virtual address |
| 995 | * @bit: the bit of the word being waited on | 1021 | * @bit: the bit of the word being waited on |
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 57407062e209..94674e5919cb 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
| @@ -81,7 +81,7 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); | |||
| 81 | * The mutex must later on be released by the same task that | 81 | * The mutex must later on be released by the same task that |
| 82 | * acquired it. Recursive locking is not allowed. The task | 82 | * acquired it. Recursive locking is not allowed. The task |
| 83 | * may not exit without first unlocking the mutex. Also, kernel | 83 | * may not exit without first unlocking the mutex. Also, kernel |
| 84 | * memory where the mutex resides mutex must not be freed with | 84 | * memory where the mutex resides must not be freed with |
| 85 | * the mutex still locked. The mutex must first be initialized | 85 | * the mutex still locked. The mutex must first be initialized |
| 86 | * (or statically defined) before it can be locked. memset()-ing | 86 | * (or statically defined) before it can be locked. memset()-ing |
| 87 | * the mutex to 0 is not allowed. | 87 | * the mutex to 0 is not allowed. |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1612578a5b7a..1f37fe7f77a4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -119,7 +119,9 @@ void update_rq_clock(struct rq *rq) | |||
| 119 | { | 119 | { |
| 120 | s64 delta; | 120 | s64 delta; |
| 121 | 121 | ||
| 122 | if (rq->skip_clock_update > 0) | 122 | lockdep_assert_held(&rq->lock); |
| 123 | |||
| 124 | if (rq->clock_skip_update & RQCF_ACT_SKIP) | ||
| 123 | return; | 125 | return; |
| 124 | 126 | ||
| 125 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; | 127 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; |
| @@ -490,6 +492,11 @@ static __init void init_hrtick(void) | |||
| 490 | */ | 492 | */ |
| 491 | void hrtick_start(struct rq *rq, u64 delay) | 493 | void hrtick_start(struct rq *rq, u64 delay) |
| 492 | { | 494 | { |
| 495 | /* | ||
| 496 | * Don't schedule slices shorter than 10000ns, that just | ||
| 497 | * doesn't make sense. Rely on vruntime for fairness. | ||
| 498 | */ | ||
| 499 | delay = max_t(u64, delay, 10000LL); | ||
| 493 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, | 500 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, |
| 494 | HRTIMER_MODE_REL_PINNED, 0); | 501 | HRTIMER_MODE_REL_PINNED, 0); |
| 495 | } | 502 | } |
| @@ -1046,7 +1053,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
| 1046 | * this case, we can save a useless back to back clock update. | 1053 | * this case, we can save a useless back to back clock update. |
| 1047 | */ | 1054 | */ |
| 1048 | if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) | 1055 | if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) |
| 1049 | rq->skip_clock_update = 1; | 1056 | rq_clock_skip_update(rq, true); |
| 1050 | } | 1057 | } |
| 1051 | 1058 | ||
| 1052 | #ifdef CONFIG_SMP | 1059 | #ifdef CONFIG_SMP |
| @@ -1836,6 +1843,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
| 1836 | p->se.prev_sum_exec_runtime = 0; | 1843 | p->se.prev_sum_exec_runtime = 0; |
| 1837 | p->se.nr_migrations = 0; | 1844 | p->se.nr_migrations = 0; |
| 1838 | p->se.vruntime = 0; | 1845 | p->se.vruntime = 0; |
| 1846 | #ifdef CONFIG_SMP | ||
| 1847 | p->se.avg.decay_count = 0; | ||
| 1848 | #endif | ||
| 1839 | INIT_LIST_HEAD(&p->se.group_node); | 1849 | INIT_LIST_HEAD(&p->se.group_node); |
| 1840 | 1850 | ||
| 1841 | #ifdef CONFIG_SCHEDSTATS | 1851 | #ifdef CONFIG_SCHEDSTATS |
| @@ -2755,6 +2765,10 @@ again: | |||
| 2755 | * - explicit schedule() call | 2765 | * - explicit schedule() call |
| 2756 | * - return from syscall or exception to user-space | 2766 | * - return from syscall or exception to user-space |
| 2757 | * - return from interrupt-handler to user-space | 2767 | * - return from interrupt-handler to user-space |
| 2768 | * | ||
| 2769 | * WARNING: all callers must re-check need_resched() afterward and reschedule | ||
| 2770 | * accordingly in case an event triggered the need for rescheduling (such as | ||
| 2771 | * an interrupt waking up a task) while preemption was disabled in __schedule(). | ||
| 2758 | */ | 2772 | */ |
| 2759 | static void __sched __schedule(void) | 2773 | static void __sched __schedule(void) |
| 2760 | { | 2774 | { |
| @@ -2763,7 +2777,6 @@ static void __sched __schedule(void) | |||
| 2763 | struct rq *rq; | 2777 | struct rq *rq; |
| 2764 | int cpu; | 2778 | int cpu; |
| 2765 | 2779 | ||
| 2766 | need_resched: | ||
| 2767 | preempt_disable(); | 2780 | preempt_disable(); |
| 2768 | cpu = smp_processor_id(); | 2781 | cpu = smp_processor_id(); |
| 2769 | rq = cpu_rq(cpu); | 2782 | rq = cpu_rq(cpu); |
| @@ -2783,6 +2796,8 @@ need_resched: | |||
| 2783 | smp_mb__before_spinlock(); | 2796 | smp_mb__before_spinlock(); |
| 2784 | raw_spin_lock_irq(&rq->lock); | 2797 | raw_spin_lock_irq(&rq->lock); |
| 2785 | 2798 | ||
| 2799 | rq->clock_skip_update <<= 1; /* promote REQ to ACT */ | ||
| 2800 | |||
| 2786 | switch_count = &prev->nivcsw; | 2801 | switch_count = &prev->nivcsw; |
| 2787 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 2802 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
| 2788 | if (unlikely(signal_pending_state(prev->state, prev))) { | 2803 | if (unlikely(signal_pending_state(prev->state, prev))) { |
| @@ -2807,13 +2822,13 @@ need_resched: | |||
| 2807 | switch_count = &prev->nvcsw; | 2822 | switch_count = &prev->nvcsw; |
| 2808 | } | 2823 | } |
| 2809 | 2824 | ||
| 2810 | if (task_on_rq_queued(prev) || rq->skip_clock_update < 0) | 2825 | if (task_on_rq_queued(prev)) |
| 2811 | update_rq_clock(rq); | 2826 | update_rq_clock(rq); |
| 2812 | 2827 | ||
| 2813 | next = pick_next_task(rq, prev); | 2828 | next = pick_next_task(rq, prev); |
| 2814 | clear_tsk_need_resched(prev); | 2829 | clear_tsk_need_resched(prev); |
| 2815 | clear_preempt_need_resched(); | 2830 | clear_preempt_need_resched(); |
| 2816 | rq->skip_clock_update = 0; | 2831 | rq->clock_skip_update = 0; |
| 2817 | 2832 | ||
| 2818 | if (likely(prev != next)) { | 2833 | if (likely(prev != next)) { |
| 2819 | rq->nr_switches++; | 2834 | rq->nr_switches++; |
| @@ -2828,8 +2843,6 @@ need_resched: | |||
| 2828 | post_schedule(rq); | 2843 | post_schedule(rq); |
| 2829 | 2844 | ||
| 2830 | sched_preempt_enable_no_resched(); | 2845 | sched_preempt_enable_no_resched(); |
| 2831 | if (need_resched()) | ||
| 2832 | goto need_resched; | ||
| 2833 | } | 2846 | } |
| 2834 | 2847 | ||
| 2835 | static inline void sched_submit_work(struct task_struct *tsk) | 2848 | static inline void sched_submit_work(struct task_struct *tsk) |
| @@ -2849,7 +2862,9 @@ asmlinkage __visible void __sched schedule(void) | |||
| 2849 | struct task_struct *tsk = current; | 2862 | struct task_struct *tsk = current; |
| 2850 | 2863 | ||
| 2851 | sched_submit_work(tsk); | 2864 | sched_submit_work(tsk); |
| 2852 | __schedule(); | 2865 | do { |
| 2866 | __schedule(); | ||
| 2867 | } while (need_resched()); | ||
| 2853 | } | 2868 | } |
| 2854 | EXPORT_SYMBOL(schedule); | 2869 | EXPORT_SYMBOL(schedule); |
| 2855 | 2870 | ||
| @@ -2884,6 +2899,21 @@ void __sched schedule_preempt_disabled(void) | |||
| 2884 | preempt_disable(); | 2899 | preempt_disable(); |
| 2885 | } | 2900 | } |
| 2886 | 2901 | ||
| 2902 | static void preempt_schedule_common(void) | ||
| 2903 | { | ||
| 2904 | do { | ||
| 2905 | __preempt_count_add(PREEMPT_ACTIVE); | ||
| 2906 | __schedule(); | ||
| 2907 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
| 2908 | |||
| 2909 | /* | ||
| 2910 | * Check again in case we missed a preemption opportunity | ||
| 2911 | * between schedule and now. | ||
| 2912 | */ | ||
| 2913 | barrier(); | ||
| 2914 | } while (need_resched()); | ||
| 2915 | } | ||
| 2916 | |||
| 2887 | #ifdef CONFIG_PREEMPT | 2917 | #ifdef CONFIG_PREEMPT |
| 2888 | /* | 2918 | /* |
| 2889 | * this is the entry point to schedule() from in-kernel preemption | 2919 | * this is the entry point to schedule() from in-kernel preemption |
| @@ -2899,17 +2929,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) | |||
| 2899 | if (likely(!preemptible())) | 2929 | if (likely(!preemptible())) |
| 2900 | return; | 2930 | return; |
| 2901 | 2931 | ||
| 2902 | do { | 2932 | preempt_schedule_common(); |
| 2903 | __preempt_count_add(PREEMPT_ACTIVE); | ||
| 2904 | __schedule(); | ||
| 2905 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
| 2906 | |||
| 2907 | /* | ||
| 2908 | * Check again in case we missed a preemption opportunity | ||
| 2909 | * between schedule and now. | ||
| 2910 | */ | ||
| 2911 | barrier(); | ||
| 2912 | } while (need_resched()); | ||
| 2913 | } | 2933 | } |
| 2914 | NOKPROBE_SYMBOL(preempt_schedule); | 2934 | NOKPROBE_SYMBOL(preempt_schedule); |
| 2915 | EXPORT_SYMBOL(preempt_schedule); | 2935 | EXPORT_SYMBOL(preempt_schedule); |
| @@ -3405,6 +3425,20 @@ static bool check_same_owner(struct task_struct *p) | |||
| 3405 | return match; | 3425 | return match; |
| 3406 | } | 3426 | } |
| 3407 | 3427 | ||
| 3428 | static bool dl_param_changed(struct task_struct *p, | ||
| 3429 | const struct sched_attr *attr) | ||
| 3430 | { | ||
| 3431 | struct sched_dl_entity *dl_se = &p->dl; | ||
| 3432 | |||
| 3433 | if (dl_se->dl_runtime != attr->sched_runtime || | ||
| 3434 | dl_se->dl_deadline != attr->sched_deadline || | ||
| 3435 | dl_se->dl_period != attr->sched_period || | ||
| 3436 | dl_se->flags != attr->sched_flags) | ||
| 3437 | return true; | ||
| 3438 | |||
| 3439 | return false; | ||
| 3440 | } | ||
| 3441 | |||
| 3408 | static int __sched_setscheduler(struct task_struct *p, | 3442 | static int __sched_setscheduler(struct task_struct *p, |
| 3409 | const struct sched_attr *attr, | 3443 | const struct sched_attr *attr, |
| 3410 | bool user) | 3444 | bool user) |
| @@ -3533,7 +3567,7 @@ recheck: | |||
| 3533 | goto change; | 3567 | goto change; |
| 3534 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) | 3568 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) |
| 3535 | goto change; | 3569 | goto change; |
| 3536 | if (dl_policy(policy)) | 3570 | if (dl_policy(policy) && dl_param_changed(p, attr)) |
| 3537 | goto change; | 3571 | goto change; |
| 3538 | 3572 | ||
| 3539 | p->sched_reset_on_fork = reset_on_fork; | 3573 | p->sched_reset_on_fork = reset_on_fork; |
| @@ -4225,17 +4259,10 @@ SYSCALL_DEFINE0(sched_yield) | |||
| 4225 | return 0; | 4259 | return 0; |
| 4226 | } | 4260 | } |
| 4227 | 4261 | ||
| 4228 | static void __cond_resched(void) | ||
| 4229 | { | ||
| 4230 | __preempt_count_add(PREEMPT_ACTIVE); | ||
| 4231 | __schedule(); | ||
| 4232 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
| 4233 | } | ||
| 4234 | |||
| 4235 | int __sched _cond_resched(void) | 4262 | int __sched _cond_resched(void) |
| 4236 | { | 4263 | { |
| 4237 | if (should_resched()) { | 4264 | if (should_resched()) { |
| 4238 | __cond_resched(); | 4265 | preempt_schedule_common(); |
| 4239 | return 1; | 4266 | return 1; |
| 4240 | } | 4267 | } |
| 4241 | return 0; | 4268 | return 0; |
| @@ -4260,7 +4287,7 @@ int __cond_resched_lock(spinlock_t *lock) | |||
| 4260 | if (spin_needbreak(lock) || resched) { | 4287 | if (spin_needbreak(lock) || resched) { |
| 4261 | spin_unlock(lock); | 4288 | spin_unlock(lock); |
| 4262 | if (resched) | 4289 | if (resched) |
| 4263 | __cond_resched(); | 4290 | preempt_schedule_common(); |
| 4264 | else | 4291 | else |
| 4265 | cpu_relax(); | 4292 | cpu_relax(); |
| 4266 | ret = 1; | 4293 | ret = 1; |
| @@ -4276,7 +4303,7 @@ int __sched __cond_resched_softirq(void) | |||
| 4276 | 4303 | ||
| 4277 | if (should_resched()) { | 4304 | if (should_resched()) { |
| 4278 | local_bh_enable(); | 4305 | local_bh_enable(); |
| 4279 | __cond_resched(); | 4306 | preempt_schedule_common(); |
| 4280 | local_bh_disable(); | 4307 | local_bh_disable(); |
| 4281 | return 1; | 4308 | return 1; |
| 4282 | } | 4309 | } |
| @@ -4531,9 +4558,10 @@ void sched_show_task(struct task_struct *p) | |||
| 4531 | { | 4558 | { |
| 4532 | unsigned long free = 0; | 4559 | unsigned long free = 0; |
| 4533 | int ppid; | 4560 | int ppid; |
| 4534 | unsigned state; | 4561 | unsigned long state = p->state; |
| 4535 | 4562 | ||
| 4536 | state = p->state ? __ffs(p->state) + 1 : 0; | 4563 | if (state) |
| 4564 | state = __ffs(state) + 1; | ||
| 4537 | printk(KERN_INFO "%-15.15s %c", p->comm, | 4565 | printk(KERN_INFO "%-15.15s %c", p->comm, |
| 4538 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); | 4566 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); |
| 4539 | #if BITS_PER_LONG == 32 | 4567 | #if BITS_PER_LONG == 32 |
| @@ -4766,7 +4794,7 @@ static struct rq *move_queued_task(struct task_struct *p, int new_cpu) | |||
| 4766 | 4794 | ||
| 4767 | void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | 4795 | void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
| 4768 | { | 4796 | { |
| 4769 | if (p->sched_class && p->sched_class->set_cpus_allowed) | 4797 | if (p->sched_class->set_cpus_allowed) |
| 4770 | p->sched_class->set_cpus_allowed(p, new_mask); | 4798 | p->sched_class->set_cpus_allowed(p, new_mask); |
| 4771 | 4799 | ||
| 4772 | cpumask_copy(&p->cpus_allowed, new_mask); | 4800 | cpumask_copy(&p->cpus_allowed, new_mask); |
| @@ -7276,6 +7304,11 @@ void __init sched_init(void) | |||
| 7276 | enter_lazy_tlb(&init_mm, current); | 7304 | enter_lazy_tlb(&init_mm, current); |
| 7277 | 7305 | ||
| 7278 | /* | 7306 | /* |
| 7307 | * During early bootup we pretend to be a normal task: | ||
| 7308 | */ | ||
| 7309 | current->sched_class = &fair_sched_class; | ||
| 7310 | |||
| 7311 | /* | ||
| 7279 | * Make us the idle thread. Technically, schedule() should not be | 7312 | * Make us the idle thread. Technically, schedule() should not be |
| 7280 | * called from this thread, however somewhere below it might be, | 7313 | * called from this thread, however somewhere below it might be, |
| 7281 | * but because we are the idle thread, we just pick up running again | 7314 | * but because we are the idle thread, we just pick up running again |
| @@ -7285,11 +7318,6 @@ void __init sched_init(void) | |||
| 7285 | 7318 | ||
| 7286 | calc_load_update = jiffies + LOAD_FREQ; | 7319 | calc_load_update = jiffies + LOAD_FREQ; |
| 7287 | 7320 | ||
| 7288 | /* | ||
| 7289 | * During early bootup we pretend to be a normal task: | ||
| 7290 | */ | ||
| 7291 | current->sched_class = &fair_sched_class; | ||
| 7292 | |||
| 7293 | #ifdef CONFIG_SMP | 7321 | #ifdef CONFIG_SMP |
| 7294 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); | 7322 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); |
| 7295 | /* May be allocated at isolcpus cmdline parse time */ | 7323 | /* May be allocated at isolcpus cmdline parse time */ |
| @@ -7350,6 +7378,9 @@ void ___might_sleep(const char *file, int line, int preempt_offset) | |||
| 7350 | in_atomic(), irqs_disabled(), | 7378 | in_atomic(), irqs_disabled(), |
| 7351 | current->pid, current->comm); | 7379 | current->pid, current->comm); |
| 7352 | 7380 | ||
| 7381 | if (task_stack_end_corrupted(current)) | ||
| 7382 | printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); | ||
| 7383 | |||
| 7353 | debug_show_held_locks(current); | 7384 | debug_show_held_locks(current); |
| 7354 | if (irqs_disabled()) | 7385 | if (irqs_disabled()) |
| 7355 | print_irqtrace_events(current); | 7386 | print_irqtrace_events(current); |
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 539ca3ce071b..c6acb07466bb 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c | |||
| @@ -107,7 +107,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, | |||
| 107 | int best_cpu = -1; | 107 | int best_cpu = -1; |
| 108 | const struct sched_dl_entity *dl_se = &p->dl; | 108 | const struct sched_dl_entity *dl_se = &p->dl; |
| 109 | 109 | ||
| 110 | if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) { | 110 | if (later_mask && |
| 111 | cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { | ||
| 111 | best_cpu = cpumask_any(later_mask); | 112 | best_cpu = cpumask_any(later_mask); |
| 112 | goto out; | 113 | goto out; |
| 113 | } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && | 114 | } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && |
| @@ -186,6 +187,26 @@ out: | |||
| 186 | } | 187 | } |
| 187 | 188 | ||
| 188 | /* | 189 | /* |
| 190 | * cpudl_set_freecpu - Set the cpudl.free_cpus | ||
| 191 | * @cp: the cpudl max-heap context | ||
| 192 | * @cpu: rd attached cpu | ||
| 193 | */ | ||
| 194 | void cpudl_set_freecpu(struct cpudl *cp, int cpu) | ||
| 195 | { | ||
| 196 | cpumask_set_cpu(cpu, cp->free_cpus); | ||
| 197 | } | ||
| 198 | |||
| 199 | /* | ||
| 200 | * cpudl_clear_freecpu - Clear the cpudl.free_cpus | ||
| 201 | * @cp: the cpudl max-heap context | ||
| 202 | * @cpu: rd attached cpu | ||
| 203 | */ | ||
| 204 | void cpudl_clear_freecpu(struct cpudl *cp, int cpu) | ||
| 205 | { | ||
| 206 | cpumask_clear_cpu(cpu, cp->free_cpus); | ||
| 207 | } | ||
| 208 | |||
| 209 | /* | ||
| 189 | * cpudl_init - initialize the cpudl structure | 210 | * cpudl_init - initialize the cpudl structure |
| 190 | * @cp: the cpudl max-heap context | 211 | * @cp: the cpudl max-heap context |
| 191 | */ | 212 | */ |
| @@ -203,7 +224,7 @@ int cpudl_init(struct cpudl *cp) | |||
| 203 | if (!cp->elements) | 224 | if (!cp->elements) |
| 204 | return -ENOMEM; | 225 | return -ENOMEM; |
| 205 | 226 | ||
| 206 | if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) { | 227 | if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) { |
| 207 | kfree(cp->elements); | 228 | kfree(cp->elements); |
| 208 | return -ENOMEM; | 229 | return -ENOMEM; |
| 209 | } | 230 | } |
| @@ -211,8 +232,6 @@ int cpudl_init(struct cpudl *cp) | |||
| 211 | for_each_possible_cpu(i) | 232 | for_each_possible_cpu(i) |
| 212 | cp->elements[i].idx = IDX_INVALID; | 233 | cp->elements[i].idx = IDX_INVALID; |
| 213 | 234 | ||
| 214 | cpumask_setall(cp->free_cpus); | ||
| 215 | |||
| 216 | return 0; | 235 | return 0; |
| 217 | } | 236 | } |
| 218 | 237 | ||
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h index 020039bd1326..1a0a6ef2fbe1 100644 --- a/kernel/sched/cpudeadline.h +++ b/kernel/sched/cpudeadline.h | |||
| @@ -24,6 +24,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, | |||
| 24 | struct cpumask *later_mask); | 24 | struct cpumask *later_mask); |
| 25 | void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid); | 25 | void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid); |
| 26 | int cpudl_init(struct cpudl *cp); | 26 | int cpudl_init(struct cpudl *cp); |
| 27 | void cpudl_set_freecpu(struct cpudl *cp, int cpu); | ||
| 28 | void cpudl_clear_freecpu(struct cpudl *cp, int cpu); | ||
| 27 | void cpudl_cleanup(struct cpudl *cp); | 29 | void cpudl_cleanup(struct cpudl *cp); |
| 28 | #endif /* CONFIG_SMP */ | 30 | #endif /* CONFIG_SMP */ |
| 29 | 31 | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 726470d47f87..a027799ae130 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
| @@ -350,6 +350,11 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, | |||
| 350 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; | 350 | dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; |
| 351 | dl_se->runtime = pi_se->dl_runtime; | 351 | dl_se->runtime = pi_se->dl_runtime; |
| 352 | } | 352 | } |
| 353 | |||
| 354 | if (dl_se->dl_yielded) | ||
| 355 | dl_se->dl_yielded = 0; | ||
| 356 | if (dl_se->dl_throttled) | ||
| 357 | dl_se->dl_throttled = 0; | ||
| 353 | } | 358 | } |
| 354 | 359 | ||
| 355 | /* | 360 | /* |
| @@ -536,23 +541,19 @@ again: | |||
| 536 | 541 | ||
| 537 | sched_clock_tick(); | 542 | sched_clock_tick(); |
| 538 | update_rq_clock(rq); | 543 | update_rq_clock(rq); |
| 539 | dl_se->dl_throttled = 0; | 544 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); |
| 540 | dl_se->dl_yielded = 0; | 545 | if (dl_task(rq->curr)) |
| 541 | if (task_on_rq_queued(p)) { | 546 | check_preempt_curr_dl(rq, p, 0); |
| 542 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | 547 | else |
| 543 | if (dl_task(rq->curr)) | 548 | resched_curr(rq); |
| 544 | check_preempt_curr_dl(rq, p, 0); | ||
| 545 | else | ||
| 546 | resched_curr(rq); | ||
| 547 | #ifdef CONFIG_SMP | 549 | #ifdef CONFIG_SMP |
| 548 | /* | 550 | /* |
| 549 | * Queueing this task back might have overloaded rq, | 551 | * Queueing this task back might have overloaded rq, |
| 550 | * check if we need to kick someone away. | 552 | * check if we need to kick someone away. |
| 551 | */ | 553 | */ |
| 552 | if (has_pushable_dl_tasks(rq)) | 554 | if (has_pushable_dl_tasks(rq)) |
| 553 | push_dl_task(rq); | 555 | push_dl_task(rq); |
| 554 | #endif | 556 | #endif |
| 555 | } | ||
| 556 | unlock: | 557 | unlock: |
| 557 | raw_spin_unlock(&rq->lock); | 558 | raw_spin_unlock(&rq->lock); |
| 558 | 559 | ||
| @@ -613,10 +614,9 @@ static void update_curr_dl(struct rq *rq) | |||
| 613 | 614 | ||
| 614 | dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec; | 615 | dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec; |
| 615 | if (dl_runtime_exceeded(rq, dl_se)) { | 616 | if (dl_runtime_exceeded(rq, dl_se)) { |
| 617 | dl_se->dl_throttled = 1; | ||
| 616 | __dequeue_task_dl(rq, curr, 0); | 618 | __dequeue_task_dl(rq, curr, 0); |
| 617 | if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted))) | 619 | if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted))) |
| 618 | dl_se->dl_throttled = 1; | ||
| 619 | else | ||
| 620 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); | 620 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); |
| 621 | 621 | ||
| 622 | if (!is_leftmost(curr, &rq->dl)) | 622 | if (!is_leftmost(curr, &rq->dl)) |
| @@ -853,7 +853,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
| 853 | * its rq, the bandwidth timer callback (which clearly has not | 853 | * its rq, the bandwidth timer callback (which clearly has not |
| 854 | * run yet) will take care of this. | 854 | * run yet) will take care of this. |
| 855 | */ | 855 | */ |
| 856 | if (p->dl.dl_throttled) | 856 | if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) |
| 857 | return; | 857 | return; |
| 858 | 858 | ||
| 859 | enqueue_dl_entity(&p->dl, pi_se, flags); | 859 | enqueue_dl_entity(&p->dl, pi_se, flags); |
| @@ -1073,7 +1073,13 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) | |||
| 1073 | { | 1073 | { |
| 1074 | update_curr_dl(rq); | 1074 | update_curr_dl(rq); |
| 1075 | 1075 | ||
| 1076 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0) | 1076 | /* |
| 1077 | * Even when we have runtime, update_curr_dl() might have resulted in us | ||
| 1078 | * not being the leftmost task anymore. In that case NEED_RESCHED will | ||
| 1079 | * be set and schedule() will start a new hrtick for the next task. | ||
| 1080 | */ | ||
| 1081 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 && | ||
| 1082 | is_leftmost(p, &rq->dl)) | ||
| 1077 | start_hrtick_dl(rq, p); | 1083 | start_hrtick_dl(rq, p); |
| 1078 | } | 1084 | } |
| 1079 | 1085 | ||
| @@ -1166,9 +1172,6 @@ static int find_later_rq(struct task_struct *task) | |||
| 1166 | * We have to consider system topology and task affinity | 1172 | * We have to consider system topology and task affinity |
| 1167 | * first, then we can look for a suitable cpu. | 1173 | * first, then we can look for a suitable cpu. |
| 1168 | */ | 1174 | */ |
| 1169 | cpumask_copy(later_mask, task_rq(task)->rd->span); | ||
| 1170 | cpumask_and(later_mask, later_mask, cpu_active_mask); | ||
| 1171 | cpumask_and(later_mask, later_mask, &task->cpus_allowed); | ||
| 1172 | best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, | 1175 | best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, |
| 1173 | task, later_mask); | 1176 | task, later_mask); |
| 1174 | if (best_cpu == -1) | 1177 | if (best_cpu == -1) |
| @@ -1563,6 +1566,7 @@ static void rq_online_dl(struct rq *rq) | |||
| 1563 | if (rq->dl.overloaded) | 1566 | if (rq->dl.overloaded) |
| 1564 | dl_set_overload(rq); | 1567 | dl_set_overload(rq); |
| 1565 | 1568 | ||
| 1569 | cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu); | ||
| 1566 | if (rq->dl.dl_nr_running > 0) | 1570 | if (rq->dl.dl_nr_running > 0) |
| 1567 | cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1); | 1571 | cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1); |
| 1568 | } | 1572 | } |
| @@ -1574,6 +1578,7 @@ static void rq_offline_dl(struct rq *rq) | |||
| 1574 | dl_clear_overload(rq); | 1578 | dl_clear_overload(rq); |
| 1575 | 1579 | ||
| 1576 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); | 1580 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); |
| 1581 | cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu); | ||
| 1577 | } | 1582 | } |
| 1578 | 1583 | ||
| 1579 | void init_sched_dl_class(void) | 1584 | void init_sched_dl_class(void) |
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 92cc52001e74..8baaf858d25c 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
| @@ -305,6 +305,7 @@ do { \ | |||
| 305 | PN(next_balance); | 305 | PN(next_balance); |
| 306 | SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); | 306 | SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); |
| 307 | PN(clock); | 307 | PN(clock); |
| 308 | PN(clock_task); | ||
| 308 | P(cpu_load[0]); | 309 | P(cpu_load[0]); |
| 309 | P(cpu_load[1]); | 310 | P(cpu_load[1]); |
| 310 | P(cpu_load[2]); | 311 | P(cpu_load[2]); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fe331fc391f5..7ce18f3c097a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -676,7 +676,6 @@ void init_task_runnable_average(struct task_struct *p) | |||
| 676 | { | 676 | { |
| 677 | u32 slice; | 677 | u32 slice; |
| 678 | 678 | ||
| 679 | p->se.avg.decay_count = 0; | ||
| 680 | slice = sched_slice(task_cfs_rq(p), &p->se) >> 10; | 679 | slice = sched_slice(task_cfs_rq(p), &p->se) >> 10; |
| 681 | p->se.avg.runnable_avg_sum = slice; | 680 | p->se.avg.runnable_avg_sum = slice; |
| 682 | p->se.avg.runnable_avg_period = slice; | 681 | p->se.avg.runnable_avg_period = slice; |
| @@ -2574,11 +2573,11 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se) | |||
| 2574 | u64 decays = atomic64_read(&cfs_rq->decay_counter); | 2573 | u64 decays = atomic64_read(&cfs_rq->decay_counter); |
| 2575 | 2574 | ||
| 2576 | decays -= se->avg.decay_count; | 2575 | decays -= se->avg.decay_count; |
| 2576 | se->avg.decay_count = 0; | ||
| 2577 | if (!decays) | 2577 | if (!decays) |
| 2578 | return 0; | 2578 | return 0; |
| 2579 | 2579 | ||
| 2580 | se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); | 2580 | se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); |
| 2581 | se->avg.decay_count = 0; | ||
| 2582 | 2581 | ||
| 2583 | return decays; | 2582 | return decays; |
| 2584 | } | 2583 | } |
| @@ -5157,7 +5156,7 @@ static void yield_task_fair(struct rq *rq) | |||
| 5157 | * so we don't do microscopic update in schedule() | 5156 | * so we don't do microscopic update in schedule() |
| 5158 | * and double the fastpath cost. | 5157 | * and double the fastpath cost. |
| 5159 | */ | 5158 | */ |
| 5160 | rq->skip_clock_update = 1; | 5159 | rq_clock_skip_update(rq, true); |
| 5161 | } | 5160 | } |
| 5162 | 5161 | ||
| 5163 | set_skip_buddy(se); | 5162 | set_skip_buddy(se); |
| @@ -5949,8 +5948,8 @@ static unsigned long scale_rt_capacity(int cpu) | |||
| 5949 | */ | 5948 | */ |
| 5950 | age_stamp = ACCESS_ONCE(rq->age_stamp); | 5949 | age_stamp = ACCESS_ONCE(rq->age_stamp); |
| 5951 | avg = ACCESS_ONCE(rq->rt_avg); | 5950 | avg = ACCESS_ONCE(rq->rt_avg); |
| 5951 | delta = __rq_clock_broken(rq) - age_stamp; | ||
| 5952 | 5952 | ||
| 5953 | delta = rq_clock(rq) - age_stamp; | ||
| 5954 | if (unlikely(delta < 0)) | 5953 | if (unlikely(delta < 0)) |
| 5955 | delta = 0; | 5954 | delta = 0; |
| 5956 | 5955 | ||
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index c47fce75e666..aaf1c1d5cf5d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c | |||
| @@ -47,7 +47,8 @@ static inline int cpu_idle_poll(void) | |||
| 47 | rcu_idle_enter(); | 47 | rcu_idle_enter(); |
| 48 | trace_cpu_idle_rcuidle(0, smp_processor_id()); | 48 | trace_cpu_idle_rcuidle(0, smp_processor_id()); |
| 49 | local_irq_enable(); | 49 | local_irq_enable(); |
| 50 | while (!tif_need_resched()) | 50 | while (!tif_need_resched() && |
| 51 | (cpu_idle_force_poll || tick_check_broadcast_expired())) | ||
| 51 | cpu_relax(); | 52 | cpu_relax(); |
| 52 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); | 53 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
| 53 | rcu_idle_exit(); | 54 | rcu_idle_exit(); |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index ee15f5a0d1c1..f4d4b077eba0 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
| @@ -831,11 +831,14 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
| 831 | enqueue = 1; | 831 | enqueue = 1; |
| 832 | 832 | ||
| 833 | /* | 833 | /* |
| 834 | * Force a clock update if the CPU was idle, | 834 | * When we're idle and a woken (rt) task is |
| 835 | * lest wakeup -> unthrottle time accumulate. | 835 | * throttled check_preempt_curr() will set |
| 836 | * skip_update and the time between the wakeup | ||
| 837 | * and this unthrottle will get accounted as | ||
| 838 | * 'runtime'. | ||
| 836 | */ | 839 | */ |
| 837 | if (rt_rq->rt_nr_running && rq->curr == rq->idle) | 840 | if (rt_rq->rt_nr_running && rq->curr == rq->idle) |
| 838 | rq->skip_clock_update = -1; | 841 | rq_clock_skip_update(rq, false); |
| 839 | } | 842 | } |
| 840 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | 843 | if (rt_rq->rt_time || rt_rq->rt_nr_running) |
| 841 | idle = 0; | 844 | idle = 0; |
| @@ -1337,7 +1340,12 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) | |||
| 1337 | curr->prio <= p->prio)) { | 1340 | curr->prio <= p->prio)) { |
| 1338 | int target = find_lowest_rq(p); | 1341 | int target = find_lowest_rq(p); |
| 1339 | 1342 | ||
| 1340 | if (target != -1) | 1343 | /* |
| 1344 | * Don't bother moving it if the destination CPU is | ||
| 1345 | * not running a lower priority task. | ||
| 1346 | */ | ||
| 1347 | if (target != -1 && | ||
| 1348 | p->prio < cpu_rq(target)->rt.highest_prio.curr) | ||
| 1341 | cpu = target; | 1349 | cpu = target; |
| 1342 | } | 1350 | } |
| 1343 | rcu_read_unlock(); | 1351 | rcu_read_unlock(); |
| @@ -1614,6 +1622,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | |||
| 1614 | 1622 | ||
| 1615 | lowest_rq = cpu_rq(cpu); | 1623 | lowest_rq = cpu_rq(cpu); |
| 1616 | 1624 | ||
| 1625 | if (lowest_rq->rt.highest_prio.curr <= task->prio) { | ||
| 1626 | /* | ||
| 1627 | * Target rq has tasks of equal or higher priority, | ||
| 1628 | * retrying does not release any lock and is unlikely | ||
| 1629 | * to yield a different result. | ||
| 1630 | */ | ||
| 1631 | lowest_rq = NULL; | ||
| 1632 | break; | ||
| 1633 | } | ||
| 1634 | |||
| 1617 | /* if the prio of this runqueue changed, try again */ | 1635 | /* if the prio of this runqueue changed, try again */ |
| 1618 | if (double_lock_balance(rq, lowest_rq)) { | 1636 | if (double_lock_balance(rq, lowest_rq)) { |
| 1619 | /* | 1637 | /* |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9a2a45c970e7..0870db23d79c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -558,8 +558,6 @@ struct rq { | |||
| 558 | #ifdef CONFIG_NO_HZ_FULL | 558 | #ifdef CONFIG_NO_HZ_FULL |
| 559 | unsigned long last_sched_tick; | 559 | unsigned long last_sched_tick; |
| 560 | #endif | 560 | #endif |
| 561 | int skip_clock_update; | ||
| 562 | |||
| 563 | /* capture load from *all* tasks on this cpu: */ | 561 | /* capture load from *all* tasks on this cpu: */ |
| 564 | struct load_weight load; | 562 | struct load_weight load; |
| 565 | unsigned long nr_load_updates; | 563 | unsigned long nr_load_updates; |
| @@ -588,6 +586,7 @@ struct rq { | |||
| 588 | unsigned long next_balance; | 586 | unsigned long next_balance; |
| 589 | struct mm_struct *prev_mm; | 587 | struct mm_struct *prev_mm; |
| 590 | 588 | ||
| 589 | unsigned int clock_skip_update; | ||
| 591 | u64 clock; | 590 | u64 clock; |
| 592 | u64 clock_task; | 591 | u64 clock_task; |
| 593 | 592 | ||
| @@ -687,16 +686,35 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | |||
| 687 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 686 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
| 688 | #define raw_rq() raw_cpu_ptr(&runqueues) | 687 | #define raw_rq() raw_cpu_ptr(&runqueues) |
| 689 | 688 | ||
| 689 | static inline u64 __rq_clock_broken(struct rq *rq) | ||
| 690 | { | ||
| 691 | return ACCESS_ONCE(rq->clock); | ||
| 692 | } | ||
| 693 | |||
| 690 | static inline u64 rq_clock(struct rq *rq) | 694 | static inline u64 rq_clock(struct rq *rq) |
| 691 | { | 695 | { |
| 696 | lockdep_assert_held(&rq->lock); | ||
| 692 | return rq->clock; | 697 | return rq->clock; |
| 693 | } | 698 | } |
| 694 | 699 | ||
| 695 | static inline u64 rq_clock_task(struct rq *rq) | 700 | static inline u64 rq_clock_task(struct rq *rq) |
| 696 | { | 701 | { |
| 702 | lockdep_assert_held(&rq->lock); | ||
| 697 | return rq->clock_task; | 703 | return rq->clock_task; |
| 698 | } | 704 | } |
| 699 | 705 | ||
| 706 | #define RQCF_REQ_SKIP 0x01 | ||
| 707 | #define RQCF_ACT_SKIP 0x02 | ||
| 708 | |||
| 709 | static inline void rq_clock_skip_update(struct rq *rq, bool skip) | ||
| 710 | { | ||
| 711 | lockdep_assert_held(&rq->lock); | ||
| 712 | if (skip) | ||
| 713 | rq->clock_skip_update |= RQCF_REQ_SKIP; | ||
| 714 | else | ||
| 715 | rq->clock_skip_update &= ~RQCF_REQ_SKIP; | ||
| 716 | } | ||
| 717 | |||
| 700 | #ifdef CONFIG_NUMA | 718 | #ifdef CONFIG_NUMA |
| 701 | enum numa_topology_type { | 719 | enum numa_topology_type { |
| 702 | NUMA_DIRECT, | 720 | NUMA_DIRECT, |
