8 files changed, 99 insertions, 75 deletions
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 2803264c512f..c1961761311d 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -62,7 +62,6 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
-void kthread_park_complete(struct task_struct *k);
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9256118bd40c..43731fe51c97 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,7 +118,7 @@ struct task_group;
 * the comment with set_special_state().
 */
 #define is_special_task_state(state)                            \
-        ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
+        ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
 #define __set_current_state(state_value)                        \
        do {                                                    \
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 481951bf091d..750cb8082694 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -177,9 +177,20 @@ void *kthread_probe_data(struct task_struct *task)
 static void __kthread_parkme(struct kthread *self)
 {
        for (;;) {
-                set_current_state(TASK_PARKED);
+                /*
+                 * TASK_PARKED is a special state; we must serialize against
+                 * possible pending wakeups to avoid store-store collisions on
+                 * task->state.
+                 *
+                 * Such a collision might possibly result in the task state
+                 * changin from TASK_PARKED and us failing the
+                 * wait_task_inactive() in kthread_park().
+                 */
+                set_special_state(TASK_PARKED);
                if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
                        break;
+                complete_all(&self->parked);
                schedule();
        }
        __set_current_state(TASK_RUNNING);
@@ -191,11 +202,6 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
-void kthread_park_complete(struct task_struct *k)
-{
-        complete_all(&to_kthread(k)->parked);
-}
 static int kthread(void *_create)
 {
        /* Copy data: it's on kthread's stack */
@@ -461,6 +467,9 @@ void kthread_unpark(struct task_struct *k)
        reinit_completion(&kthread->parked);
        clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+        /*
+         * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
+         */
        wake_up_state(k, TASK_PARKED);
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k)
        set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
        if (k != current) {
                wake_up_process(k);
+                /*
+                 * Wait for __kthread_parkme() to complete(), this means we
+                 * _will_ have TASK_PARKED and are about to call schedule().
+                 */
                wait_for_completion(&kthread->parked);
+                /*
+                 * Now wait for that schedule() to complete and the task to
+                 * get scheduled out.
+                 */
+                WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
        }
        return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78d8facba456..fe365c9a08e9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,7 +7,6 @@
 */
 #include "sched.h"
-#include <linux/kthread.h>
 #include <linux/nospec.h>
 #include <linux/kcov.h>
@@ -2724,28 +2723,20 @@ static struct rq *finish_task_switch(struct task_struct *prev)
                membarrier_mm_sync_core_before_usermode(mm);
                mmdrop(mm);
        }
-        if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
+        if (unlikely(prev_state == TASK_DEAD)) {
-                switch (prev_state) {
+                if (prev->sched_class->task_dead)
-                case TASK_DEAD:
+                        prev->sched_class->task_dead(prev);
-                        if (prev->sched_class->task_dead)
-                                prev->sched_class->task_dead(prev);
-                        /*
+                /*
-                         * Remove function-return probe instances associated with this
+                 * Remove function-return probe instances associated with this
-                         * task and put them back on the free list.
+                 * task and put them back on the free list.
-                         */
+                 */
-                        kprobe_flush_task(prev);
+                kprobe_flush_task(prev);
-                        /* Task is done with its stack. */
-                        put_task_stack(prev);
-                        put_task_struct(prev);
+                /* Task is done with its stack. */
-                        break;
+                put_task_stack(prev);
-                case TASK_PARKED:
+                put_task_struct(prev);
-                        kthread_park_complete(prev);
-                        break;
-                }
        }
        tick_nohz_task_switch();
@@ -3113,7 +3104,9 @@ static void sched_tick_remote(struct work_struct *work)
        struct tick_work *twork = container_of(dwork, struct tick_work, work);
        int cpu = twork->cpu;
        struct rq *rq = cpu_rq(cpu);
+        struct task_struct *curr;
        struct rq_flags rf;
+        u64 delta;
        /*
         * Handle the tick only if it appears the remote CPU is running in full
@@ -3122,24 +3115,28 @@ static void sched_tick_remote(struct work_struct *work)
         * statistics and checks timeslices in a time-independent way, regardless
         * of when exactly it is running.
         */
-        if (!idle_cpu(cpu) && tick_nohz_tick_stopped_cpu(cpu)) {
+        if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
-                struct task_struct *curr;
+                goto out_requeue;
-                u64 delta;
-                rq_lock_irq(rq, &rf);
+        rq_lock_irq(rq, &rf);
-                update_rq_clock(rq);
+        curr = rq->curr;
-                curr = rq->curr;
+        if (is_idle_task(curr))
-                delta = rq_clock_task(rq) - curr->se.exec_start;
+                goto out_unlock;
-                /*
+        update_rq_clock(rq);
-                 * Make sure the next tick runs within a reasonable
+        delta = rq_clock_task(rq) - curr->se.exec_start;
-                 * amount of time.
-                 */
+        /*
-                WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+         * Make sure the next tick runs within a reasonable
-                curr->sched_class->task_tick(rq, curr, 0);
+         * amount of time.
-                rq_unlock_irq(rq, &rf);
+         */
-        }
+        WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+        curr->sched_class->task_tick(rq, curr, 0);
+out_unlock:
+        rq_unlock_irq(rq, &rf);
+out_requeue:
        /*
         * Run the remote tick once per second (1Hz). This arbitrary
         * frequency is large enough to avoid overload but short enough
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 3cde46483f0a..c907fde01eaa 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -192,7 +192,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
 {
        struct rq *rq = cpu_rq(sg_cpu->cpu);
-        if (rq->rt.rt_nr_running)
+        if (rt_rq_is_runnable(&rq->rt))
                return sg_cpu->max;
        /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1866e64792a7..2f0a0be4d344 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3982,18 +3982,10 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
        if (!sched_feat(UTIL_EST))
                return;
-        /*
+        /* Update root cfs_rq's estimated utilization */
-         * Update root cfs_rq's estimated utilization
+        ue.enqueued  = cfs_rq->avg.util_est.enqueued;
-         *
+        ue.enqueued -= min_t(unsigned int, ue.enqueued,
-         * If *p is the last task then the root cfs_rq's estimated utilization
+                             (_task_util_est(p) | UTIL_AVG_UNCHANGED));
-         * of a CPU is 0 by definition.
-         */
-        ue.enqueued = 0;
-        if (cfs_rq->nr_running) {
-                ue.enqueued  = cfs_rq->avg.util_est.enqueued;
-                ue.enqueued -= min_t(unsigned int, ue.enqueued,
-                                     (_task_util_est(p) | UTIL_AVG_UNCHANGED));
-        }
        WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
        /*
@@ -4590,6 +4582,7 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
        now = sched_clock_cpu(smp_processor_id());
        cfs_b->runtime = cfs_b->quota;
        cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
+        cfs_b->expires_seq++;
 }
 static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -4612,6 +4605,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
        struct task_group *tg = cfs_rq->tg;
        struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
        u64 amount = 0, min_amount, expires;
+        int expires_seq;
        /* note: this is a positive sum as runtime_remaining <= 0 */
        min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
@@ -4628,6 +4622,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
                        cfs_b->idle = 0;
                }
        }
+        expires_seq = cfs_b->expires_seq;
        expires = cfs_b->runtime_expires;
        raw_spin_unlock(&cfs_b->lock);
@@ -4637,8 +4632,10 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
         * spread between our sched_clock and the one on which runtime was
         * issued.
         */
-        if ((s64)(expires - cfs_rq->runtime_expires) > 0)
+        if (cfs_rq->expires_seq != expires_seq) {
+                cfs_rq->expires_seq = expires_seq;
                cfs_rq->runtime_expires = expires;
+        }
        return cfs_rq->runtime_remaining > 0;
 }
@@ -4664,12 +4661,9 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
         * has not truly expired.
         *
         * Fortunately we can check determine whether this the case by checking
-         * whether the global deadline has advanced. It is valid to compare
+         * whether the global deadline(cfs_b->expires_seq) has advanced.
-         * cfs_b->runtime_expires without any locks since we only care about
-         * exact equality, so a partial write will still work.
         */
+        if (cfs_rq->expires_seq == cfs_b->expires_seq) {
-        if (cfs_rq->runtime_expires != cfs_b->runtime_expires) {
                /* extend local deadline, drift is bounded above by 2 ticks */
                cfs_rq->runtime_expires += TICK_NSEC;
        } else {
@@ -5202,13 +5196,18 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 {
+        u64 overrun;
        lockdep_assert_held(&cfs_b->lock);
-        if (!cfs_b->period_active) {
+        if (cfs_b->period_active)
-                cfs_b->period_active = 1;
+                return;
-                hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
-                hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
+        cfs_b->period_active = 1;
-        }
+        overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
+        cfs_b->runtime_expires += (overrun + 1) * ktime_to_ns(cfs_b->period);
+        cfs_b->expires_seq++;
+        hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
 }
 static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 47556b0c9a95..572567078b60 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
        rt_se = rt_rq->tg->rt_se[cpu];
-        if (!rt_se)
+        if (!rt_se) {
                dequeue_top_rt_rq(rt_rq);
+                /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+                cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
+        }
        else if (on_rt_rq(rt_se))
                dequeue_rt_entity(rt_se, 0);
 }
@@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
        sub_nr_running(rq, rt_rq->rt_nr_running);
        rt_rq->rt_queued = 0;
-        /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-        cpufreq_update_util(rq, 0);
 }
 static void
@@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
        if (rt_rq->rt_queued)
                return;
-        if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running)
+        if (rt_rq_throttled(rt_rq))
                return;
-        add_nr_running(rq, rt_rq->rt_nr_running);
+        if (rt_rq->rt_nr_running) {
-        rt_rq->rt_queued = 1;
+                add_nr_running(rq, rt_rq->rt_nr_running);
+                rt_rq->rt_queued = 1;
+        }
        /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
        cpufreq_update_util(rq, 0);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6601baf2361c..c7742dcc136c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -334,9 +334,10 @@ struct cfs_bandwidth {
        u64                     runtime;
        s64                     hierarchical_quota;
        u64                     runtime_expires;
+        int                     expires_seq;
-        int                     idle;
+        short                   idle;
-        int                     period_active;
+        short                   period_active;
        struct hrtimer          period_timer;
        struct hrtimer          slack_timer;
        struct list_head        throttled_cfs_rq;
@@ -551,6 +552,7 @@ struct cfs_rq {
 #ifdef CONFIG_CFS_BANDWIDTH
        int                     runtime_enabled;
+        int                     expires_seq;
        u64                     runtime_expires;
        s64                     runtime_remaining;
@@ -609,6 +611,11 @@ struct rt_rq {
 #endif
 };
+static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
+{
+        return rt_rq->rt_queued && rt_rq->rt_nr_running;
+}
 /* Deadline class' related fields in a runqueue */
 struct dl_rq {
        /* runqueue is an rbtree, ordered by deadline */

diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2803264c512f..c1961761311d 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h
@@ -62,7 +62,6 @@ void kthread_probe_data(struct task_struct k);
62	int kthread_park(struct task_struct *k);	62	int kthread_park(struct task_struct *k);
63	void kthread_unpark(struct task_struct *k);	63	void kthread_unpark(struct task_struct *k);
64	void kthread_parkme(void);	64	void kthread_parkme(void);
65	void kthread_park_complete(struct task_struct *k);
66		65
67	int kthreadd(void *unused);	66	int kthreadd(void *unused);
68	extern struct task_struct *kthreadd_task;	67	extern struct task_struct *kthreadd_task;


diff --git a/include/linux/sched.h b/include/linux/sched.h index 9256118bd40c..43731fe51c97 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h
@@ -118,7 +118,7 @@ struct task_group;
118	* the comment with set_special_state().	118	* the comment with set_special_state().
119	*/	119	*/
120	#define is_special_task_state(state) \	120	#define is_special_task_state(state) \
121	((state) & (__TASK_STOPPED \| __TASK_TRACED \| TASK_DEAD))	121	((state) & (__TASK_STOPPED \| __TASK_TRACED \| TASK_PARKED \| TASK_DEAD))
122		122
123	#define __set_current_state(state_value) \	123	#define __set_current_state(state_value) \
124	do { \	124	do { \


diff --git a/kernel/kthread.c b/kernel/kthread.c index 481951bf091d..750cb8082694 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c
@@ -177,9 +177,20 @@ void kthread_probe_data(struct task_struct task)
177	static void __kthread_parkme(struct kthread *self)	177	static void __kthread_parkme(struct kthread *self)
178	{	178	{
179	for (;;) {	179	for (;;) {
180	set_current_state(TASK_PARKED);	180	/*
		181	* TASK_PARKED is a special state; we must serialize against
		182	* possible pending wakeups to avoid store-store collisions on
		183	* task->state.
		184	*
		185	* Such a collision might possibly result in the task state
		186	* changin from TASK_PARKED and us failing the
		187	* wait_task_inactive() in kthread_park().
		188	*/
		189	set_special_state(TASK_PARKED);
181	if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))	190	if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
182	break;	191	break;
		192
		193	complete_all(&self->parked);
183	schedule();	194	schedule();
184	}	195	}
185	__set_current_state(TASK_RUNNING);	196	__set_current_state(TASK_RUNNING);
@@ -191,11 +202,6 @@ void kthread_parkme(void)
191	}	202	}
192	EXPORT_SYMBOL_GPL(kthread_parkme);	203	EXPORT_SYMBOL_GPL(kthread_parkme);
193		204
194	void kthread_park_complete(struct task_struct *k)
195	{
196	complete_all(&to_kthread(k)->parked);
197	}
198
199	static int kthread(void *_create)	205	static int kthread(void *_create)
200	{	206	{
201	/* Copy data: it's on kthread's stack */	207	/* Copy data: it's on kthread's stack */
@@ -461,6 +467,9 @@ void kthread_unpark(struct task_struct *k)
461		467
462	reinit_completion(&kthread->parked);	468	reinit_completion(&kthread->parked);
463	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);	469	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
		470	/*
		471	* __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
		472	*/
464	wake_up_state(k, TASK_PARKED);	473	wake_up_state(k, TASK_PARKED);
465	}	474	}
466	EXPORT_SYMBOL_GPL(kthread_unpark);	475	EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k)
487	set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);	496	set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
488	if (k != current) {	497	if (k != current) {
489	wake_up_process(k);	498	wake_up_process(k);
		499	/*
		500	* Wait for __kthread_parkme() to complete(), this means we
		501	* _will_ have TASK_PARKED and are about to call schedule().
		502	*/
490	wait_for_completion(&kthread->parked);	503	wait_for_completion(&kthread->parked);
		504	/*
		505	* Now wait for that schedule() to complete and the task to
		506	* get scheduled out.
		507	*/
		508	WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
491	}	509	}
492		510
493	return 0;	511	return 0;


diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78d8facba456..fe365c9a08e9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -7,7 +7,6 @@
7	*/	7	*/
8	#include "sched.h"	8	#include "sched.h"
9		9
10	#include <linux/kthread.h>
11	#include <linux/nospec.h>	10	#include <linux/nospec.h>
12		11
13	#include <linux/kcov.h>	12	#include <linux/kcov.h>
@@ -2724,28 +2723,20 @@ static struct rq finish_task_switch(struct task_struct prev)
2724	membarrier_mm_sync_core_before_usermode(mm);	2723	membarrier_mm_sync_core_before_usermode(mm);
2725	mmdrop(mm);	2724	mmdrop(mm);
2726	}	2725	}
2727	if (unlikely(prev_state & (TASK_DEAD\|TASK_PARKED))) {	2726	if (unlikely(prev_state == TASK_DEAD)) {
2728	switch (prev_state) {	2727	if (prev->sched_class->task_dead)
2729	case TASK_DEAD:	2728	prev->sched_class->task_dead(prev);
2730	if (prev->sched_class->task_dead)
2731	prev->sched_class->task_dead(prev);
2732		2729
2733	/*	2730	/*
2734	* Remove function-return probe instances associated with this	2731	* Remove function-return probe instances associated with this
2735	* task and put them back on the free list.	2732	* task and put them back on the free list.
2736	*/	2733	*/
2737	kprobe_flush_task(prev);	2734	kprobe_flush_task(prev);
2738
2739	/* Task is done with its stack. */
2740	put_task_stack(prev);
2741		2735
2742	put_task_struct(prev);	2736	/* Task is done with its stack. */
2743	break;	2737	put_task_stack(prev);
2744		2738
2745	case TASK_PARKED:	2739	put_task_struct(prev);
2746	kthread_park_complete(prev);
2747	break;
2748	}
2749	}	2740	}
2750		2741
2751	tick_nohz_task_switch();	2742	tick_nohz_task_switch();
@@ -3113,7 +3104,9 @@ static void sched_tick_remote(struct work_struct *work)
3113	struct tick_work *twork = container_of(dwork, struct tick_work, work);	3104	struct tick_work *twork = container_of(dwork, struct tick_work, work);
3114	int cpu = twork->cpu;	3105	int cpu = twork->cpu;
3115	struct rq *rq = cpu_rq(cpu);	3106	struct rq *rq = cpu_rq(cpu);
		3107	struct task_struct *curr;
3116	struct rq_flags rf;	3108	struct rq_flags rf;
		3109	u64 delta;
3117		3110
3118	/*	3111	/*
3119	* Handle the tick only if it appears the remote CPU is running in full	3112	* Handle the tick only if it appears the remote CPU is running in full
@@ -3122,24 +3115,28 @@ static void sched_tick_remote(struct work_struct *work)
3122	* statistics and checks timeslices in a time-independent way, regardless	3115	* statistics and checks timeslices in a time-independent way, regardless
3123	* of when exactly it is running.	3116	* of when exactly it is running.
3124	*/	3117	*/
3125	if (!idle_cpu(cpu) && tick_nohz_tick_stopped_cpu(cpu)) {	3118	if (idle_cpu(cpu) \|\| !tick_nohz_tick_stopped_cpu(cpu))
3126	struct task_struct *curr;	3119	goto out_requeue;
3127	u64 delta;
3128		3120
3129	rq_lock_irq(rq, &rf);	3121	rq_lock_irq(rq, &rf);
3130	update_rq_clock(rq);	3122	curr = rq->curr;
3131	curr = rq->curr;	3123	if (is_idle_task(curr))
3132	delta = rq_clock_task(rq) - curr->se.exec_start;	3124	goto out_unlock;
3133		3125
3134	/*	3126	update_rq_clock(rq);
3135	* Make sure the next tick runs within a reasonable	3127	delta = rq_clock_task(rq) - curr->se.exec_start;
3136	* amount of time.	3128
3137	*/	3129	/*
3138	WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);	3130	* Make sure the next tick runs within a reasonable
3139	curr->sched_class->task_tick(rq, curr, 0);	3131	* amount of time.
3140	rq_unlock_irq(rq, &rf);	3132	*/
3141	}	3133	WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
		3134	curr->sched_class->task_tick(rq, curr, 0);
		3135
		3136	out_unlock:
		3137	rq_unlock_irq(rq, &rf);
3142		3138
		3139	out_requeue:
3143	/*	3140	/*
3144	* Run the remote tick once per second (1Hz). This arbitrary	3141	* Run the remote tick once per second (1Hz). This arbitrary
3145	* frequency is large enough to avoid overload but short enough	3142	* frequency is large enough to avoid overload but short enough


diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 3cde46483f0a..c907fde01eaa 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c
@@ -192,7 +192,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
192	{	192	{
193	struct rq *rq = cpu_rq(sg_cpu->cpu);	193	struct rq *rq = cpu_rq(sg_cpu->cpu);
194		194
195	if (rq->rt.rt_nr_running)	195	if (rt_rq_is_runnable(&rq->rt))
196	return sg_cpu->max;	196	return sg_cpu->max;
197		197
198	/*	198	/*


diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1866e64792a7..2f0a0be4d344 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -3982,18 +3982,10 @@ util_est_dequeue(struct cfs_rq cfs_rq, struct task_struct p, bool task_sleep)
3982	if (!sched_feat(UTIL_EST))	3982	if (!sched_feat(UTIL_EST))
3983	return;	3983	return;
3984		3984
3985	/*	3985	/* Update root cfs_rq's estimated utilization */
3986	* Update root cfs_rq's estimated utilization	3986	ue.enqueued = cfs_rq->avg.util_est.enqueued;
3987	*	3987	ue.enqueued -= min_t(unsigned int, ue.enqueued,
3988	* If *p is the last task then the root cfs_rq's estimated utilization	3988	(_task_util_est(p) \| UTIL_AVG_UNCHANGED));
3989	* of a CPU is 0 by definition.
3990	*/
3991	ue.enqueued = 0;
3992	if (cfs_rq->nr_running) {
3993	ue.enqueued = cfs_rq->avg.util_est.enqueued;
3994	ue.enqueued -= min_t(unsigned int, ue.enqueued,
3995	(_task_util_est(p) \| UTIL_AVG_UNCHANGED));
3996	}
3997	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);	3989	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
3998		3990
3999	/*	3991	/*
@@ -4590,6 +4582,7 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
4590	now = sched_clock_cpu(smp_processor_id());	4582	now = sched_clock_cpu(smp_processor_id());
4591	cfs_b->runtime = cfs_b->quota;	4583	cfs_b->runtime = cfs_b->quota;
4592	cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);	4584	cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
		4585	cfs_b->expires_seq++;
4593	}	4586	}
4594		4587
4595	static inline struct cfs_bandwidth tg_cfs_bandwidth(struct task_group tg)	4588	static inline struct cfs_bandwidth tg_cfs_bandwidth(struct task_group tg)
@@ -4612,6 +4605,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4612	struct task_group *tg = cfs_rq->tg;	4605	struct task_group *tg = cfs_rq->tg;
4613	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);	4606	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
4614	u64 amount = 0, min_amount, expires;	4607	u64 amount = 0, min_amount, expires;
		4608	int expires_seq;
4615		4609
4616	/* note: this is a positive sum as runtime_remaining <= 0 */	4610	/* note: this is a positive sum as runtime_remaining <= 0 */
4617	min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;	4611	min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
@@ -4628,6 +4622,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4628	cfs_b->idle = 0;	4622	cfs_b->idle = 0;
4629	}	4623	}
4630	}	4624	}
		4625	expires_seq = cfs_b->expires_seq;
4631	expires = cfs_b->runtime_expires;	4626	expires = cfs_b->runtime_expires;
4632	raw_spin_unlock(&cfs_b->lock);	4627	raw_spin_unlock(&cfs_b->lock);
4633		4628
@@ -4637,8 +4632,10 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4637	* spread between our sched_clock and the one on which runtime was	4632	* spread between our sched_clock and the one on which runtime was
4638	* issued.	4633	* issued.
4639	*/	4634	*/
4640	if ((s64)(expires - cfs_rq->runtime_expires) > 0)	4635	if (cfs_rq->expires_seq != expires_seq) {
		4636	cfs_rq->expires_seq = expires_seq;
4641	cfs_rq->runtime_expires = expires;	4637	cfs_rq->runtime_expires = expires;
		4638	}
4642		4639
4643	return cfs_rq->runtime_remaining > 0;	4640	return cfs_rq->runtime_remaining > 0;
4644	}	4641	}
@@ -4664,12 +4661,9 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4664	* has not truly expired.	4661	* has not truly expired.
4665	*	4662	*
4666	* Fortunately we can check determine whether this the case by checking	4663	* Fortunately we can check determine whether this the case by checking
4667	* whether the global deadline has advanced. It is valid to compare	4664	* whether the global deadline(cfs_b->expires_seq) has advanced.
4668	* cfs_b->runtime_expires without any locks since we only care about
4669	* exact equality, so a partial write will still work.
4670	*/	4665	*/
4671		4666	if (cfs_rq->expires_seq == cfs_b->expires_seq) {
4672	if (cfs_rq->runtime_expires != cfs_b->runtime_expires) {
4673	/* extend local deadline, drift is bounded above by 2 ticks */	4667	/* extend local deadline, drift is bounded above by 2 ticks */
4674	cfs_rq->runtime_expires += TICK_NSEC;	4668	cfs_rq->runtime_expires += TICK_NSEC;
4675	} else {	4669	} else {
@@ -5202,13 +5196,18 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
5202		5196
5203	void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)	5197	void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
5204	{	5198	{
		5199	u64 overrun;
		5200
5205	lockdep_assert_held(&cfs_b->lock);	5201	lockdep_assert_held(&cfs_b->lock);
5206		5202
5207	if (!cfs_b->period_active) {	5203	if (cfs_b->period_active)
5208	cfs_b->period_active = 1;	5204	return;
5209	hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);	5205
5210	hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);	5206	cfs_b->period_active = 1;
5211	}	5207	overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
		5208	cfs_b->runtime_expires += (overrun + 1) * ktime_to_ns(cfs_b->period);
		5209	cfs_b->expires_seq++;
		5210	hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
5212	}	5211	}
5213		5212
5214	static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)	5213	static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)


diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 47556b0c9a95..572567078b60 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c
@@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
508		508
509	rt_se = rt_rq->tg->rt_se[cpu];	509	rt_se = rt_rq->tg->rt_se[cpu];
510		510
511	if (!rt_se)	511	if (!rt_se) {
512	dequeue_top_rt_rq(rt_rq);	512	dequeue_top_rt_rq(rt_rq);
		513	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
		514	cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
		515	}
513	else if (on_rt_rq(rt_se))	516	else if (on_rt_rq(rt_se))
514	dequeue_rt_entity(rt_se, 0);	517	dequeue_rt_entity(rt_se, 0);
515	}	518	}
@@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
1001	sub_nr_running(rq, rt_rq->rt_nr_running);	1004	sub_nr_running(rq, rt_rq->rt_nr_running);
1002	rt_rq->rt_queued = 0;	1005	rt_rq->rt_queued = 0;
1003		1006
1004	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
1005	cpufreq_update_util(rq, 0);
1006	}	1007	}
1007		1008
1008	static void	1009	static void
@@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
1014		1015
1015	if (rt_rq->rt_queued)	1016	if (rt_rq->rt_queued)
1016	return;	1017	return;
1017	if (rt_rq_throttled(rt_rq) \|\| !rt_rq->rt_nr_running)	1018
		1019	if (rt_rq_throttled(rt_rq))
1018	return;	1020	return;
1019		1021
1020	add_nr_running(rq, rt_rq->rt_nr_running);	1022	if (rt_rq->rt_nr_running) {
1021	rt_rq->rt_queued = 1;	1023	add_nr_running(rq, rt_rq->rt_nr_running);
		1024	rt_rq->rt_queued = 1;
		1025	}
1022		1026
1023	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */	1027	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
1024	cpufreq_update_util(rq, 0);	1028	cpufreq_update_util(rq, 0);


diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6601baf2361c..c7742dcc136c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h
@@ -334,9 +334,10 @@ struct cfs_bandwidth {
334	u64 runtime;	334	u64 runtime;
335	s64 hierarchical_quota;	335	s64 hierarchical_quota;
336	u64 runtime_expires;	336	u64 runtime_expires;
		337	int expires_seq;
337		338
338	int idle;	339	short idle;
339	int period_active;	340	short period_active;
340	struct hrtimer period_timer;	341	struct hrtimer period_timer;
341	struct hrtimer slack_timer;	342	struct hrtimer slack_timer;
342	struct list_head throttled_cfs_rq;	343	struct list_head throttled_cfs_rq;
@@ -551,6 +552,7 @@ struct cfs_rq {
551		552
552	#ifdef CONFIG_CFS_BANDWIDTH	553	#ifdef CONFIG_CFS_BANDWIDTH
553	int runtime_enabled;	554	int runtime_enabled;
		555	int expires_seq;
554	u64 runtime_expires;	556	u64 runtime_expires;
555	s64 runtime_remaining;	557	s64 runtime_remaining;
556		558
@@ -609,6 +611,11 @@ struct rt_rq {
609	#endif	611	#endif
610	};	612	};
611		613
		614	static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
		615	{
		616	return rt_rq->rt_queued && rt_rq->rt_nr_running;
		617	}
		618
612	/* Deadline class' related fields in a runqueue */	619	/* Deadline class' related fields in a runqueue */
613	struct dl_rq {	620	struct dl_rq {
614	/* runqueue is an rbtree, ordered by deadline */	621	/* runqueue is an rbtree, ordered by deadline */