3 files changed, 115 insertions, 87 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c1808606ee5f..a88f4a485c5e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2660,6 +2660,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
        } while (need_resched());
 }
 EXPORT_SYMBOL(preempt_schedule);
+#endif /* CONFIG_PREEMPT */
 /*
 * this is the entry point to schedule() from kernel preemption
@@ -2693,8 +2694,6 @@ asmlinkage void __sched preempt_schedule_irq(void)
        exception_exit(prev_state);
 }
-#endif /* CONFIG_PREEMPT */
 int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
                          void *key)
 {
@@ -4762,7 +4761,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
                cpumask_clear_cpu(rq->cpu, old_rd->span);
                /*
-                 * If we dont want to free the old_rt yet then
+                 * If we dont want to free the old_rd yet then
                 * set old_rd to NULL to skip the freeing later
                 * in this function:
                 */
@@ -4903,6 +4902,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_asym);
 static void update_top_cache_domain(int cpu)
 {
        struct sched_domain *sd;
+        struct sched_domain *busy_sd = NULL;
        int id = cpu;
        int size = 1;
@@ -4910,8 +4910,9 @@ static void update_top_cache_domain(int cpu)
        if (sd) {
                id = cpumask_first(sched_domain_span(sd));
                size = cpumask_weight(sched_domain_span(sd));
-                rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent);
+                busy_sd = sd->parent; /* sd_busy */
        }
+        rcu_assign_pointer(per_cpu(sd_busy, cpu), busy_sd);
        rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
        per_cpu(sd_llc_size, cpu) = size;
@@ -5112,6 +5113,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                 * die on a /0 trap.
                 */
                sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);
+                sg->sgp->power_orig = sg->sgp->power;
                /*
                 * Make sure the first group of this domain contains the
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e8b652ebe027..c7395d97e4cb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -178,59 +178,61 @@ void sched_init_granularity(void)
        update_sysctl();
 }
-#if BITS_PER_LONG == 32
+#define WMULT_CONST     (~0U)
-# define WMULT_CONST    (~0UL)
-#else
-# define WMULT_CONST    (1UL << 32)
-#endif
 #define WMULT_SHIFT     32
-/*
+static void __update_inv_weight(struct load_weight *lw)
- * Shift right and round:
+{
- */
+        unsigned long w;
-#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
+        if (likely(lw->inv_weight))
+                return;
+        w = scale_load_down(lw->weight);
+        if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
+                lw->inv_weight = 1;
+        else if (unlikely(!w))
+                lw->inv_weight = WMULT_CONST;
+        else
+                lw->inv_weight = WMULT_CONST / w;
+}
 /*
- * delta *= weight / lw
+ * delta_exec * weight / lw.weight
+ *   OR
+ * (delta_exec * (weight * lw->inv_weight)) >> WMULT_SHIFT
+ *
+ * Either weight := NICE_0_LOAD and lw \e prio_to_wmult[], in which case
+ * we're guaranteed shift stays positive because inv_weight is guaranteed to
+ * fit 32 bits, and NICE_0_LOAD gives another 10 bits; therefore shift >= 22.
+ *
+ * Or, weight =< lw.weight (because lw.weight is the runqueue weight), thus
+ * weight/lw.weight <= 1, and therefore our shift will also be positive.
 */
-static unsigned long
+static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight *lw)
-calc_delta_mine(unsigned long delta_exec, unsigned long weight,
-                struct load_weight *lw)
 {
-        u64 tmp;
+        u64 fact = scale_load_down(weight);
+        int shift = WMULT_SHIFT;
-        /*
+        __update_inv_weight(lw);
-         * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
-         * entities since MIN_SHARES = 2. Treat weight as 1 if less than
-         * 2^SCHED_LOAD_RESOLUTION.
-         */
-        if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
-                tmp = (u64)delta_exec * scale_load_down(weight);
-        else
-                tmp = (u64)delta_exec;
-        if (!lw->inv_weight) {
+        if (unlikely(fact >> 32)) {
-                unsigned long w = scale_load_down(lw->weight);
+                while (fact >> 32) {
+                        fact >>= 1;
-                if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
+                        shift--;
-                        lw->inv_weight = 1;
+                }
-                else if (unlikely(!w))
-                        lw->inv_weight = WMULT_CONST;
-                else
-                        lw->inv_weight = WMULT_CONST / w;
        }
-        /*
+        /* hint to use a 32x32->64 mul */
-         * Check whether we'd overflow the 64-bit multiplication:
+        fact = (u64)(u32)fact * lw->inv_weight;
-         */
-        if (unlikely(tmp > WMULT_CONST))
-                tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
-                        WMULT_SHIFT/2);
-        else
-                tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
-        return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
+        while (fact >> 32) {
+                fact >>= 1;
+                shift--;
+        }
+        return mul_u64_u32_shr(delta_exec, fact, shift);
 }
@@ -443,7 +445,7 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
 #endif  /* CONFIG_FAIR_GROUP_SCHED */
 static __always_inline
-void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec);
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
 /**************************************************************
 * Scheduling class tree data structure manipulation methods:
@@ -612,11 +614,10 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
 /*
 * delta /= w
 */
-static inline unsigned long
+static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
-calc_delta_fair(unsigned long delta, struct sched_entity *se)
 {
        if (unlikely(se->load.weight != NICE_0_LOAD))
-                delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
+                delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
        return delta;
 }
@@ -665,7 +666,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
                        update_load_add(&lw, se->load.weight);
                        load = &lw;
                }
-                slice = calc_delta_mine(slice, se->load.weight, load);
+                slice = __calc_delta(slice, se->load.weight, load);
        }
        return slice;
 }
@@ -703,47 +704,32 @@ void init_task_runnable_average(struct task_struct *p)
 #endif
 /*
- * Update the current task's runtime statistics. Skip current tasks that
+ * Update the current task's runtime statistics.
- * are not in our scheduling class.
 */
-static inline void
-__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
-              unsigned long delta_exec)
-{
-        unsigned long delta_exec_weighted;
-        schedstat_set(curr->statistics.exec_max,
-                      max((u64)delta_exec, curr->statistics.exec_max));
-        curr->sum_exec_runtime += delta_exec;
-        schedstat_add(cfs_rq, exec_clock, delta_exec);
-        delta_exec_weighted = calc_delta_fair(delta_exec, curr);
-        curr->vruntime += delta_exec_weighted;
-        update_min_vruntime(cfs_rq);
-}
 static void update_curr(struct cfs_rq *cfs_rq)
 {
        struct sched_entity *curr = cfs_rq->curr;
        u64 now = rq_clock_task(rq_of(cfs_rq));
-        unsigned long delta_exec;
+        u64 delta_exec;
        if (unlikely(!curr))
                return;
-        /*
+        delta_exec = now - curr->exec_start;
-         * Get the amount of time the current task was running
+        if (unlikely((s64)delta_exec <= 0))
-         * since the last time we changed load (this cannot
-         * overflow on 32 bits):
-         */
-        delta_exec = (unsigned long)(now - curr->exec_start);
-        if (!delta_exec)
                return;
-        __update_curr(cfs_rq, curr, delta_exec);
        curr->exec_start = now;
+        schedstat_set(curr->statistics.exec_max,
+                      max(delta_exec, curr->statistics.exec_max));
+        curr->sum_exec_runtime += delta_exec;
+        schedstat_add(cfs_rq, exec_clock, delta_exec);
+        curr->vruntime += calc_delta_fair(delta_exec, curr);
+        update_min_vruntime(cfs_rq);
        if (entity_is_task(curr)) {
                struct task_struct *curtask = task_of(curr);
@@ -1752,6 +1738,13 @@ void task_numa_work(struct callback_head *work)
                    (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ)))
                        continue;
+                /*
+                 * Skip inaccessible VMAs to avoid any confusion between
+                 * PROT_NONE and NUMA hinting ptes
+                 */
+                if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+                        continue;
                do {
                        start = max(start, vma->vm_start);
                        end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);
@@ -3015,8 +3008,7 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
        }
 }
-static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
+static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
-                                     unsigned long delta_exec)
 {
        /* dock delta_exec before expiring quota (as it could span periods) */
        cfs_rq->runtime_remaining -= delta_exec;
@@ -3034,7 +3026,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
 }
 static __always_inline
-void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec)
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
 {
        if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled)
                return;
@@ -3574,8 +3566,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
        return rq_clock_task(rq_of(cfs_rq));
 }
-static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
+static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
-                                     unsigned long delta_exec) {}
 static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
@@ -5379,10 +5370,31 @@ void update_group_power(struct sched_domain *sd, int cpu)
                 */
                for_each_cpu(cpu, sched_group_cpus(sdg)) {
-                        struct sched_group *sg = cpu_rq(cpu)->sd->groups;
+                        struct sched_group_power *sgp;
+                        struct rq *rq = cpu_rq(cpu);
+                        /*
+                         * build_sched_domains() -> init_sched_groups_power()
+                         * gets here before we've attached the domains to the
+                         * runqueues.
+                         *
+                         * Use power_of(), which is set irrespective of domains
+                         * in update_cpu_power().
+                         *
+                         * This avoids power/power_orig from being 0 and
+                         * causing divide-by-zero issues on boot.
+                         *
+                         * Runtime updates will correct power_orig.
+                         */
+                        if (unlikely(!rq->sd)) {
+                                power_orig += power_of(cpu);
+                                power += power_of(cpu);
+                                continue;
+                        }
-                        power_orig += sg->sgp->power_orig;
+                        sgp = rq->sd->groups->sgp;
-                        power += sg->sgp->power;
+                        power_orig += sgp->power_orig;
+                        power += sgp->power;
                }
        } else  {
                /*
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7d57275fc396..1c4065575fa2 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -901,6 +901,13 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 {
        struct rq *rq = rq_of_rt_rq(rt_rq);
+#ifdef CONFIG_RT_GROUP_SCHED
+        /*
+         * Change rq's cpupri only if rt_rq is the top queue.
+         */
+        if (&rq->rt != rt_rq)
+                return;
+#endif
        if (rq->online && prio < prev_prio)
                cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
 }
@@ -910,6 +917,13 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 {
        struct rq *rq = rq_of_rt_rq(rt_rq);
+#ifdef CONFIG_RT_GROUP_SCHED
+        /*
+         * Change rq's cpupri only if rt_rq is the top queue.
+         */
+        if (&rq->rt != rt_rq)
+                return;
+#endif
        if (rq->online && rt_rq->highest_prio.curr != prev_prio)
                cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
 }

diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c1808606ee5f..a88f4a485c5e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -2660,6 +2660,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
2660	} while (need_resched());	2660	} while (need_resched());
2661	}	2661	}
2662	EXPORT_SYMBOL(preempt_schedule);	2662	EXPORT_SYMBOL(preempt_schedule);
		2663	#endif /* CONFIG_PREEMPT */
2663		2664
2664	/*	2665	/*
2665	* this is the entry point to schedule() from kernel preemption	2666	* this is the entry point to schedule() from kernel preemption
@@ -2693,8 +2694,6 @@ asmlinkage void __sched preempt_schedule_irq(void)
2693	exception_exit(prev_state);	2694	exception_exit(prev_state);
2694	}	2695	}
2695		2696
2696	#endif /* CONFIG_PREEMPT */
2697
2698	int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,	2697	int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
2699	void *key)	2698	void *key)
2700	{	2699	{
@@ -4762,7 +4761,7 @@ static void rq_attach_root(struct rq rq, struct root_domain rd)
4762	cpumask_clear_cpu(rq->cpu, old_rd->span);	4761	cpumask_clear_cpu(rq->cpu, old_rd->span);
4763		4762
4764	/*	4763	/*
4765	* If we dont want to free the old_rt yet then	4764	* If we dont want to free the old_rd yet then
4766	* set old_rd to NULL to skip the freeing later	4765	* set old_rd to NULL to skip the freeing later
4767	* in this function:	4766	* in this function:
4768	*/	4767	*/
@@ -4903,6 +4902,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_asym);
4903	static void update_top_cache_domain(int cpu)	4902	static void update_top_cache_domain(int cpu)
4904	{	4903	{
4905	struct sched_domain *sd;	4904	struct sched_domain *sd;
		4905	struct sched_domain *busy_sd = NULL;
4906	int id = cpu;	4906	int id = cpu;
4907	int size = 1;	4907	int size = 1;
4908		4908
@@ -4910,8 +4910,9 @@ static void update_top_cache_domain(int cpu)
4910	if (sd) {	4910	if (sd) {
4911	id = cpumask_first(sched_domain_span(sd));	4911	id = cpumask_first(sched_domain_span(sd));
4912	size = cpumask_weight(sched_domain_span(sd));	4912	size = cpumask_weight(sched_domain_span(sd));
4913	rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent);	4913	busy_sd = sd->parent; /* sd_busy */
4914	}	4914	}
		4915	rcu_assign_pointer(per_cpu(sd_busy, cpu), busy_sd);
4915		4916
4916	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);	4917	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
4917	per_cpu(sd_llc_size, cpu) = size;	4918	per_cpu(sd_llc_size, cpu) = size;
@@ -5112,6 +5113,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5112	* die on a /0 trap.	5113	* die on a /0 trap.
5113	*/	5114	*/
5114	sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);	5115	sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);
		5116	sg->sgp->power_orig = sg->sgp->power;
5115		5117
5116	/*	5118	/*
5117	* Make sure the first group of this domain contains the	5119	* Make sure the first group of this domain contains the


diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e8b652ebe027..c7395d97e4cb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -178,59 +178,61 @@ void sched_init_granularity(void)
178	update_sysctl();	178	update_sysctl();
179	}	179	}
180		180
181	#if BITS_PER_LONG == 32	181	#define WMULT_CONST (~0U)
182	# define WMULT_CONST (~0UL)
183	#else
184	# define WMULT_CONST (1UL << 32)
185	#endif
186
187	#define WMULT_SHIFT 32	182	#define WMULT_SHIFT 32
188		183
189	/*	184	static void __update_inv_weight(struct load_weight *lw)
190	* Shift right and round:	185	{
191	*/	186	unsigned long w;
192	#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))	187
		188	if (likely(lw->inv_weight))
		189	return;
		190
		191	w = scale_load_down(lw->weight);
		192
		193	if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
		194	lw->inv_weight = 1;
		195	else if (unlikely(!w))
		196	lw->inv_weight = WMULT_CONST;
		197	else
		198	lw->inv_weight = WMULT_CONST / w;
		199	}
193		200
194	/*	201	/*
195	* delta *= weight / lw	202	* delta_exec * weight / lw.weight
		203	* OR
		204	* (delta_exec * (weight * lw->inv_weight)) >> WMULT_SHIFT
		205	*
		206	* Either weight := NICE_0_LOAD and lw \e prio_to_wmult[], in which case
		207	* we're guaranteed shift stays positive because inv_weight is guaranteed to
		208	* fit 32 bits, and NICE_0_LOAD gives another 10 bits; therefore shift >= 22.
		209	*
		210	* Or, weight =< lw.weight (because lw.weight is the runqueue weight), thus
		211	* weight/lw.weight <= 1, and therefore our shift will also be positive.
196	*/	212	*/
197	static unsigned long	213	static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight *lw)
198	calc_delta_mine(unsigned long delta_exec, unsigned long weight,
199	struct load_weight *lw)
200	{	214	{
201	u64 tmp;	215	u64 fact = scale_load_down(weight);
		216	int shift = WMULT_SHIFT;
202		217
203	/*	218	__update_inv_weight(lw);
204	* weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
205	* entities since MIN_SHARES = 2. Treat weight as 1 if less than
206	* 2^SCHED_LOAD_RESOLUTION.
207	*/
208	if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
209	tmp = (u64)delta_exec * scale_load_down(weight);
210	else
211	tmp = (u64)delta_exec;
212		219
213	if (!lw->inv_weight) {	220	if (unlikely(fact >> 32)) {
214	unsigned long w = scale_load_down(lw->weight);	221	while (fact >> 32) {
215		222	fact >>= 1;
216	if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))	223	shift--;
217	lw->inv_weight = 1;	224	}
218	else if (unlikely(!w))
219	lw->inv_weight = WMULT_CONST;
220	else
221	lw->inv_weight = WMULT_CONST / w;
222	}	225	}
223		226
224	/*	227	/* hint to use a 32x32->64 mul */
225	* Check whether we'd overflow the 64-bit multiplication:	228	fact = (u64)(u32)fact * lw->inv_weight;
226	*/
227	if (unlikely(tmp > WMULT_CONST))
228	tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
229	WMULT_SHIFT/2);
230	else
231	tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
232		229
233	return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);	230	while (fact >> 32) {
		231	fact >>= 1;
		232	shift--;
		233	}
		234
		235	return mul_u64_u32_shr(delta_exec, fact, shift);
234	}	236	}
235		237
236		238
@@ -443,7 +445,7 @@ find_matching_se(struct sched_entity se, struct sched_entity pse)
443	#endif /* CONFIG_FAIR_GROUP_SCHED */	445	#endif /* CONFIG_FAIR_GROUP_SCHED */
444		446
445	static __always_inline	447	static __always_inline
446	void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec);	448	void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
447		449
448	/**************************************************************	450	/**************************************************************
449	* Scheduling class tree data structure manipulation methods:	451	* Scheduling class tree data structure manipulation methods:
@@ -612,11 +614,10 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
612	/*	614	/*
613	* delta /= w	615	* delta /= w
614	*/	616	*/
615	static inline unsigned long	617	static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
616	calc_delta_fair(unsigned long delta, struct sched_entity *se)
617	{	618	{
618	if (unlikely(se->load.weight != NICE_0_LOAD))	619	if (unlikely(se->load.weight != NICE_0_LOAD))
619	delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);	620	delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
620		621
621	return delta;	622	return delta;
622	}	623	}
@@ -665,7 +666,7 @@ static u64 sched_slice(struct cfs_rq cfs_rq, struct sched_entity se)
665	update_load_add(&lw, se->load.weight);	666	update_load_add(&lw, se->load.weight);
666	load = &lw;	667	load = &lw;
667	}	668	}
668	slice = calc_delta_mine(slice, se->load.weight, load);	669	slice = __calc_delta(slice, se->load.weight, load);
669	}	670	}
670	return slice;	671	return slice;
671	}	672	}
@@ -703,47 +704,32 @@ void init_task_runnable_average(struct task_struct *p)
703	#endif	704	#endif
704		705
705	/*	706	/*
706	* Update the current task's runtime statistics. Skip current tasks that	707	* Update the current task's runtime statistics.
707	* are not in our scheduling class.
708	*/	708	*/
709	static inline void
710	__update_curr(struct cfs_rq cfs_rq, struct sched_entity curr,
711	unsigned long delta_exec)
712	{
713	unsigned long delta_exec_weighted;
714
715	schedstat_set(curr->statistics.exec_max,
716	max((u64)delta_exec, curr->statistics.exec_max));
717
718	curr->sum_exec_runtime += delta_exec;
719	schedstat_add(cfs_rq, exec_clock, delta_exec);
720	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
721
722	curr->vruntime += delta_exec_weighted;
723	update_min_vruntime(cfs_rq);
724	}
725
726	static void update_curr(struct cfs_rq *cfs_rq)	709	static void update_curr(struct cfs_rq *cfs_rq)
727	{	710	{
728	struct sched_entity *curr = cfs_rq->curr;	711	struct sched_entity *curr = cfs_rq->curr;
729	u64 now = rq_clock_task(rq_of(cfs_rq));	712	u64 now = rq_clock_task(rq_of(cfs_rq));
730	unsigned long delta_exec;	713	u64 delta_exec;
731		714
732	if (unlikely(!curr))	715	if (unlikely(!curr))
733	return;	716	return;
734		717
735	/*	718	delta_exec = now - curr->exec_start;
736	* Get the amount of time the current task was running	719	if (unlikely((s64)delta_exec <= 0))
737	* since the last time we changed load (this cannot
738	* overflow on 32 bits):
739	*/
740	delta_exec = (unsigned long)(now - curr->exec_start);
741	if (!delta_exec)
742	return;	720	return;
743		721
744	__update_curr(cfs_rq, curr, delta_exec);
745	curr->exec_start = now;	722	curr->exec_start = now;
746		723
		724	schedstat_set(curr->statistics.exec_max,
		725	max(delta_exec, curr->statistics.exec_max));
		726
		727	curr->sum_exec_runtime += delta_exec;
		728	schedstat_add(cfs_rq, exec_clock, delta_exec);
		729
		730	curr->vruntime += calc_delta_fair(delta_exec, curr);
		731	update_min_vruntime(cfs_rq);
		732
747	if (entity_is_task(curr)) {	733	if (entity_is_task(curr)) {
748	struct task_struct *curtask = task_of(curr);	734	struct task_struct *curtask = task_of(curr);
749		735
@@ -1752,6 +1738,13 @@ void task_numa_work(struct callback_head *work)
1752	(vma->vm_file && (vma->vm_flags & (VM_READ\|VM_WRITE)) == (VM_READ)))	1738	(vma->vm_file && (vma->vm_flags & (VM_READ\|VM_WRITE)) == (VM_READ)))
1753	continue;	1739	continue;
1754		1740
		1741	/*
		1742	* Skip inaccessible VMAs to avoid any confusion between
		1743	* PROT_NONE and NUMA hinting ptes
		1744	*/
		1745	if (!(vma->vm_flags & (VM_READ \| VM_EXEC \| VM_WRITE)))
		1746	continue;
		1747
1755	do {	1748	do {
1756	start = max(start, vma->vm_start);	1749	start = max(start, vma->vm_start);
1757	end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);	1750	end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);
@@ -3015,8 +3008,7 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
3015	}	3008	}
3016	}	3009	}
3017		3010
3018	static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,	3011	static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
3019	unsigned long delta_exec)
3020	{	3012	{
3021	/* dock delta_exec before expiring quota (as it could span periods) */	3013	/* dock delta_exec before expiring quota (as it could span periods) */
3022	cfs_rq->runtime_remaining -= delta_exec;	3014	cfs_rq->runtime_remaining -= delta_exec;
@@ -3034,7 +3026,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
3034	}	3026	}
3035		3027
3036	static __always_inline	3028	static __always_inline
3037	void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec)	3029	void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
3038	{	3030	{
3039	if (!cfs_bandwidth_used() \|\| !cfs_rq->runtime_enabled)	3031	if (!cfs_bandwidth_used() \|\| !cfs_rq->runtime_enabled)
3040	return;	3032	return;
@@ -3574,8 +3566,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
3574	return rq_clock_task(rq_of(cfs_rq));	3566	return rq_clock_task(rq_of(cfs_rq));
3575	}	3567	}
3576		3568
3577	static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,	3569	static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
3578	unsigned long delta_exec) {}
3579	static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}	3570	static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
3580	static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}	3571	static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
3581	static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}	3572	static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
@@ -5379,10 +5370,31 @@ void update_group_power(struct sched_domain *sd, int cpu)
5379	*/	5370	*/
5380		5371
5381	for_each_cpu(cpu, sched_group_cpus(sdg)) {	5372	for_each_cpu(cpu, sched_group_cpus(sdg)) {
5382	struct sched_group *sg = cpu_rq(cpu)->sd->groups;	5373	struct sched_group_power *sgp;
		5374	struct rq *rq = cpu_rq(cpu);
		5375
		5376	/*
		5377	* build_sched_domains() -> init_sched_groups_power()
		5378	* gets here before we've attached the domains to the
		5379	* runqueues.
		5380	*
		5381	* Use power_of(), which is set irrespective of domains
		5382	* in update_cpu_power().
		5383	*
		5384	* This avoids power/power_orig from being 0 and
		5385	* causing divide-by-zero issues on boot.
		5386	*
		5387	* Runtime updates will correct power_orig.
		5388	*/
		5389	if (unlikely(!rq->sd)) {
		5390	power_orig += power_of(cpu);
		5391	power += power_of(cpu);
		5392	continue;
		5393	}
5383		5394
5384	power_orig += sg->sgp->power_orig;	5395	sgp = rq->sd->groups->sgp;
5385	power += sg->sgp->power;	5396	power_orig += sgp->power_orig;
		5397	power += sgp->power;
5386	}	5398	}
5387	} else {	5399	} else {
5388	/*	5400	/*


diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7d57275fc396..1c4065575fa2 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c
@@ -901,6 +901,13 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
901	{	901	{
902	struct rq *rq = rq_of_rt_rq(rt_rq);	902	struct rq *rq = rq_of_rt_rq(rt_rq);
903		903
		904	#ifdef CONFIG_RT_GROUP_SCHED
		905	/*
		906	* Change rq's cpupri only if rt_rq is the top queue.
		907	*/
		908	if (&rq->rt != rt_rq)
		909	return;
		910	#endif
904	if (rq->online && prio < prev_prio)	911	if (rq->online && prio < prev_prio)
905	cpupri_set(&rq->rd->cpupri, rq->cpu, prio);	912	cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
906	}	913	}
@@ -910,6 +917,13 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
910	{	917	{
911	struct rq *rq = rq_of_rt_rq(rt_rq);	918	struct rq *rq = rq_of_rt_rq(rt_rq);
912		919
		920	#ifdef CONFIG_RT_GROUP_SCHED
		921	/*
		922	* Change rq's cpupri only if rt_rq is the top queue.
		923	*/
		924	if (&rq->rt != rt_rq)
		925	return;
		926	#endif
913	if (rq->online && rt_rq->highest_prio.curr != prev_prio)	927	if (rq->online && rt_rq->highest_prio.curr != prev_prio)
914	cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);	928	cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
915	}	929	}