diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-30 14:55:56 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-30 14:55:56 -0500 |
commit | af8c5e2d6071c71d228788d1ebb0b9676829001a (patch) | |
tree | c898379e89ed05fdc5c6b7ebddbf4a8d50f11657 | |
parent | a1c75e17e7d1306d35d51d3c330a13f42eba1d2d (diff) | |
parent | 07881166a892fa4908ac4924660a7793f75d6544 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"The main changes in this cycle were:
- Implement frequency/CPU invariance and OPP selection for
SCHED_DEADLINE (Juri Lelli)
- Tweak the task migration logic for better multi-tasking
workload scalability (Mel Gorman)
- Misc cleanups, fixes and improvements"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/deadline: Make bandwidth enforcement scale-invariant
sched/cpufreq: Move arch_scale_{freq,cpu}_capacity() outside of #ifdef CONFIG_SMP
sched/cpufreq: Remove arch_scale_freq_capacity()'s 'sd' parameter
sched/cpufreq: Always consider all CPUs when deciding next freq
sched/cpufreq: Split utilization signals
sched/cpufreq: Change the worker kthread to SCHED_DEADLINE
sched/deadline: Move CPU frequency selection triggering points
sched/cpufreq: Use the DEADLINE utilization signal
sched/deadline: Implement "runtime overrun signal" support
sched/fair: Only immediately migrate tasks due to interrupts if prev and target CPUs share cache
sched/fair: Correct obsolete comment about cpufreq_update_util()
sched/fair: Remove impossible condition from find_idlest_group_cpu()
sched/cpufreq: Don't pass flags to sugov_set_iowait_boost()
sched/cpufreq: Initialize sg_cpu->flags to 0
sched/fair: Consider RT/IRQ pressure in capacity_spare_wake()
sched/fair: Use 'unsigned long' for utilization, consistently
sched/core: Rework and clarify prepare_lock_switch()
sched/fair: Remove unused 'curr' parameter from wakeup_gran
sched/headers: Constify object_is_on_stack()
-rw-r--r-- | include/linux/arch_topology.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 5 | ||||
-rw-r--r-- | include/linux/sched/cpufreq.h | 2 | ||||
-rw-r--r-- | include/linux/sched/task_stack.h | 2 | ||||
-rw-r--r-- | include/linux/sched/topology.h | 12 | ||||
-rw-r--r-- | include/uapi/linux/sched.h | 5 | ||||
-rw-r--r-- | kernel/sched/core.c | 67 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 93 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 143 | ||||
-rw-r--r-- | kernel/sched/fair.c | 39 | ||||
-rw-r--r-- | kernel/sched/sched.h | 112 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 18 |
12 files changed, 339 insertions, 161 deletions
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 304511267c82..2b709416de05 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h | |||
@@ -27,7 +27,7 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); | |||
27 | DECLARE_PER_CPU(unsigned long, freq_scale); | 27 | DECLARE_PER_CPU(unsigned long, freq_scale); |
28 | 28 | ||
29 | static inline | 29 | static inline |
30 | unsigned long topology_get_freq_scale(struct sched_domain *sd, int cpu) | 30 | unsigned long topology_get_freq_scale(int cpu) |
31 | { | 31 | { |
32 | return per_cpu(freq_scale, cpu); | 32 | return per_cpu(freq_scale, cpu); |
33 | } | 33 | } |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 68a504f6e474..166144c04ef6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -472,11 +472,15 @@ struct sched_dl_entity { | |||
472 | * has not been executed yet. This flag is useful to avoid race | 472 | * has not been executed yet. This flag is useful to avoid race |
473 | * conditions between the inactive timer handler and the wakeup | 473 | * conditions between the inactive timer handler and the wakeup |
474 | * code. | 474 | * code. |
475 | * | ||
476 | * @dl_overrun tells if the task asked to be informed about runtime | ||
477 | * overruns. | ||
475 | */ | 478 | */ |
476 | unsigned int dl_throttled : 1; | 479 | unsigned int dl_throttled : 1; |
477 | unsigned int dl_boosted : 1; | 480 | unsigned int dl_boosted : 1; |
478 | unsigned int dl_yielded : 1; | 481 | unsigned int dl_yielded : 1; |
479 | unsigned int dl_non_contending : 1; | 482 | unsigned int dl_non_contending : 1; |
483 | unsigned int dl_overrun : 1; | ||
480 | 484 | ||
481 | /* | 485 | /* |
482 | * Bandwidth enforcement timer. Each -deadline task has its | 486 | * Bandwidth enforcement timer. Each -deadline task has its |
@@ -1427,6 +1431,7 @@ extern int idle_cpu(int cpu); | |||
1427 | extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); | 1431 | extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); |
1428 | extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); | 1432 | extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); |
1429 | extern int sched_setattr(struct task_struct *, const struct sched_attr *); | 1433 | extern int sched_setattr(struct task_struct *, const struct sched_attr *); |
1434 | extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *); | ||
1430 | extern struct task_struct *idle_task(int cpu); | 1435 | extern struct task_struct *idle_task(int cpu); |
1431 | 1436 | ||
1432 | /** | 1437 | /** |
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index d1ad3d825561..0b55834efd46 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h | |||
@@ -12,8 +12,6 @@ | |||
12 | #define SCHED_CPUFREQ_DL (1U << 1) | 12 | #define SCHED_CPUFREQ_DL (1U << 1) |
13 | #define SCHED_CPUFREQ_IOWAIT (1U << 2) | 13 | #define SCHED_CPUFREQ_IOWAIT (1U << 2) |
14 | 14 | ||
15 | #define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) | ||
16 | |||
17 | #ifdef CONFIG_CPU_FREQ | 15 | #ifdef CONFIG_CPU_FREQ |
18 | struct update_util_data { | 16 | struct update_util_data { |
19 | void (*func)(struct update_util_data *data, u64 time, unsigned int flags); | 17 | void (*func)(struct update_util_data *data, u64 time, unsigned int flags); |
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index cb4828aaa34f..6a841929073f 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h | |||
@@ -78,7 +78,7 @@ static inline void put_task_stack(struct task_struct *tsk) {} | |||
78 | #define task_stack_end_corrupted(task) \ | 78 | #define task_stack_end_corrupted(task) \ |
79 | (*(end_of_stack(task)) != STACK_END_MAGIC) | 79 | (*(end_of_stack(task)) != STACK_END_MAGIC) |
80 | 80 | ||
81 | static inline int object_is_on_stack(void *obj) | 81 | static inline int object_is_on_stack(const void *obj) |
82 | { | 82 | { |
83 | void *stack = task_stack_page(current); | 83 | void *stack = task_stack_page(current); |
84 | 84 | ||
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index cf257c2e728d..26347741ba50 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h | |||
@@ -7,6 +7,12 @@ | |||
7 | #include <linux/sched/idle.h> | 7 | #include <linux/sched/idle.h> |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * Increase resolution of cpu_capacity calculations | ||
11 | */ | ||
12 | #define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT | ||
13 | #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) | ||
14 | |||
15 | /* | ||
10 | * sched-domains (multiprocessor balancing) declarations: | 16 | * sched-domains (multiprocessor balancing) declarations: |
11 | */ | 17 | */ |
12 | #ifdef CONFIG_SMP | 18 | #ifdef CONFIG_SMP |
@@ -27,12 +33,6 @@ | |||
27 | #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ | 33 | #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ |
28 | #define SD_NUMA 0x4000 /* cross-node balancing */ | 34 | #define SD_NUMA 0x4000 /* cross-node balancing */ |
29 | 35 | ||
30 | /* | ||
31 | * Increase resolution of cpu_capacity calculations | ||
32 | */ | ||
33 | #define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT | ||
34 | #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) | ||
35 | |||
36 | #ifdef CONFIG_SCHED_SMT | 36 | #ifdef CONFIG_SCHED_SMT |
37 | static inline int cpu_smt_flags(void) | 37 | static inline int cpu_smt_flags(void) |
38 | { | 38 | { |
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 30a9e51bbb1e..22627f80063e 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h | |||
@@ -49,5 +49,10 @@ | |||
49 | */ | 49 | */ |
50 | #define SCHED_FLAG_RESET_ON_FORK 0x01 | 50 | #define SCHED_FLAG_RESET_ON_FORK 0x01 |
51 | #define SCHED_FLAG_RECLAIM 0x02 | 51 | #define SCHED_FLAG_RECLAIM 0x02 |
52 | #define SCHED_FLAG_DL_OVERRUN 0x04 | ||
53 | |||
54 | #define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \ | ||
55 | SCHED_FLAG_RECLAIM | \ | ||
56 | SCHED_FLAG_DL_OVERRUN) | ||
52 | 57 | ||
53 | #endif /* _UAPI_LINUX_SCHED_H */ | 58 | #endif /* _UAPI_LINUX_SCHED_H */ |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5a31a85bbd84..3da7a2444a91 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2046,7 +2046,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
2046 | * If the owning (remote) CPU is still in the middle of schedule() with | 2046 | * If the owning (remote) CPU is still in the middle of schedule() with |
2047 | * this task as prev, wait until its done referencing the task. | 2047 | * this task as prev, wait until its done referencing the task. |
2048 | * | 2048 | * |
2049 | * Pairs with the smp_store_release() in finish_lock_switch(). | 2049 | * Pairs with the smp_store_release() in finish_task(). |
2050 | * | 2050 | * |
2051 | * This ensures that tasks getting woken will be fully ordered against | 2051 | * This ensures that tasks getting woken will be fully ordered against |
2052 | * their previous state and preserve Program Order. | 2052 | * their previous state and preserve Program Order. |
@@ -2572,6 +2572,50 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, | |||
2572 | 2572 | ||
2573 | #endif /* CONFIG_PREEMPT_NOTIFIERS */ | 2573 | #endif /* CONFIG_PREEMPT_NOTIFIERS */ |
2574 | 2574 | ||
2575 | static inline void prepare_task(struct task_struct *next) | ||
2576 | { | ||
2577 | #ifdef CONFIG_SMP | ||
2578 | /* | ||
2579 | * Claim the task as running, we do this before switching to it | ||
2580 | * such that any running task will have this set. | ||
2581 | */ | ||
2582 | next->on_cpu = 1; | ||
2583 | #endif | ||
2584 | } | ||
2585 | |||
2586 | static inline void finish_task(struct task_struct *prev) | ||
2587 | { | ||
2588 | #ifdef CONFIG_SMP | ||
2589 | /* | ||
2590 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | ||
2591 | * We must ensure this doesn't happen until the switch is completely | ||
2592 | * finished. | ||
2593 | * | ||
2594 | * In particular, the load of prev->state in finish_task_switch() must | ||
2595 | * happen before this. | ||
2596 | * | ||
2597 | * Pairs with the smp_cond_load_acquire() in try_to_wake_up(). | ||
2598 | */ | ||
2599 | smp_store_release(&prev->on_cpu, 0); | ||
2600 | #endif | ||
2601 | } | ||
2602 | |||
2603 | static inline void finish_lock_switch(struct rq *rq) | ||
2604 | { | ||
2605 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
2606 | /* this is a valid case when another task releases the spinlock */ | ||
2607 | rq->lock.owner = current; | ||
2608 | #endif | ||
2609 | /* | ||
2610 | * If we are tracking spinlock dependencies then we have to | ||
2611 | * fix up the runqueue lock - which gets 'carried over' from | ||
2612 | * prev into current: | ||
2613 | */ | ||
2614 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); | ||
2615 | |||
2616 | raw_spin_unlock_irq(&rq->lock); | ||
2617 | } | ||
2618 | |||
2575 | /** | 2619 | /** |
2576 | * prepare_task_switch - prepare to switch tasks | 2620 | * prepare_task_switch - prepare to switch tasks |
2577 | * @rq: the runqueue preparing to switch | 2621 | * @rq: the runqueue preparing to switch |
@@ -2592,7 +2636,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, | |||
2592 | sched_info_switch(rq, prev, next); | 2636 | sched_info_switch(rq, prev, next); |
2593 | perf_event_task_sched_out(prev, next); | 2637 | perf_event_task_sched_out(prev, next); |
2594 | fire_sched_out_preempt_notifiers(prev, next); | 2638 | fire_sched_out_preempt_notifiers(prev, next); |
2595 | prepare_lock_switch(rq, next); | 2639 | prepare_task(next); |
2596 | prepare_arch_switch(next); | 2640 | prepare_arch_switch(next); |
2597 | } | 2641 | } |
2598 | 2642 | ||
@@ -2647,7 +2691,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) | |||
2647 | * the scheduled task must drop that reference. | 2691 | * the scheduled task must drop that reference. |
2648 | * | 2692 | * |
2649 | * We must observe prev->state before clearing prev->on_cpu (in | 2693 | * We must observe prev->state before clearing prev->on_cpu (in |
2650 | * finish_lock_switch), otherwise a concurrent wakeup can get prev | 2694 | * finish_task), otherwise a concurrent wakeup can get prev |
2651 | * running on another CPU and we could rave with its RUNNING -> DEAD | 2695 | * running on another CPU and we could rave with its RUNNING -> DEAD |
2652 | * transition, resulting in a double drop. | 2696 | * transition, resulting in a double drop. |
2653 | */ | 2697 | */ |
@@ -2664,7 +2708,8 @@ static struct rq *finish_task_switch(struct task_struct *prev) | |||
2664 | * to use. | 2708 | * to use. |
2665 | */ | 2709 | */ |
2666 | smp_mb__after_unlock_lock(); | 2710 | smp_mb__after_unlock_lock(); |
2667 | finish_lock_switch(rq, prev); | 2711 | finish_task(prev); |
2712 | finish_lock_switch(rq); | ||
2668 | finish_arch_post_lock_switch(); | 2713 | finish_arch_post_lock_switch(); |
2669 | 2714 | ||
2670 | fire_sched_in_preempt_notifiers(current); | 2715 | fire_sched_in_preempt_notifiers(current); |
@@ -4041,8 +4086,7 @@ recheck: | |||
4041 | return -EINVAL; | 4086 | return -EINVAL; |
4042 | } | 4087 | } |
4043 | 4088 | ||
4044 | if (attr->sched_flags & | 4089 | if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV)) |
4045 | ~(SCHED_FLAG_RESET_ON_FORK | SCHED_FLAG_RECLAIM)) | ||
4046 | return -EINVAL; | 4090 | return -EINVAL; |
4047 | 4091 | ||
4048 | /* | 4092 | /* |
@@ -4109,6 +4153,9 @@ recheck: | |||
4109 | } | 4153 | } |
4110 | 4154 | ||
4111 | if (user) { | 4155 | if (user) { |
4156 | if (attr->sched_flags & SCHED_FLAG_SUGOV) | ||
4157 | return -EINVAL; | ||
4158 | |||
4112 | retval = security_task_setscheduler(p); | 4159 | retval = security_task_setscheduler(p); |
4113 | if (retval) | 4160 | if (retval) |
4114 | return retval; | 4161 | return retval; |
@@ -4164,7 +4211,8 @@ change: | |||
4164 | } | 4211 | } |
4165 | #endif | 4212 | #endif |
4166 | #ifdef CONFIG_SMP | 4213 | #ifdef CONFIG_SMP |
4167 | if (dl_bandwidth_enabled() && dl_policy(policy)) { | 4214 | if (dl_bandwidth_enabled() && dl_policy(policy) && |
4215 | !(attr->sched_flags & SCHED_FLAG_SUGOV)) { | ||
4168 | cpumask_t *span = rq->rd->span; | 4216 | cpumask_t *span = rq->rd->span; |
4169 | 4217 | ||
4170 | /* | 4218 | /* |
@@ -4294,6 +4342,11 @@ int sched_setattr(struct task_struct *p, const struct sched_attr *attr) | |||
4294 | } | 4342 | } |
4295 | EXPORT_SYMBOL_GPL(sched_setattr); | 4343 | EXPORT_SYMBOL_GPL(sched_setattr); |
4296 | 4344 | ||
4345 | int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) | ||
4346 | { | ||
4347 | return __sched_setscheduler(p, attr, false, true); | ||
4348 | } | ||
4349 | |||
4297 | /** | 4350 | /** |
4298 | * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. | 4351 | * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. |
4299 | * @p: the task in question. | 4352 | * @p: the task in question. |
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d6717a3331a1..dd062a1c8cf0 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c | |||
@@ -60,7 +60,8 @@ struct sugov_cpu { | |||
60 | u64 last_update; | 60 | u64 last_update; |
61 | 61 | ||
62 | /* The fields below are only needed when sharing a policy. */ | 62 | /* The fields below are only needed when sharing a policy. */ |
63 | unsigned long util; | 63 | unsigned long util_cfs; |
64 | unsigned long util_dl; | ||
64 | unsigned long max; | 65 | unsigned long max; |
65 | unsigned int flags; | 66 | unsigned int flags; |
66 | 67 | ||
@@ -176,21 +177,28 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, | |||
176 | return cpufreq_driver_resolve_freq(policy, freq); | 177 | return cpufreq_driver_resolve_freq(policy, freq); |
177 | } | 178 | } |
178 | 179 | ||
179 | static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu) | 180 | static void sugov_get_util(struct sugov_cpu *sg_cpu) |
180 | { | 181 | { |
181 | struct rq *rq = cpu_rq(cpu); | 182 | struct rq *rq = cpu_rq(sg_cpu->cpu); |
182 | unsigned long cfs_max; | ||
183 | 183 | ||
184 | cfs_max = arch_scale_cpu_capacity(NULL, cpu); | 184 | sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); |
185 | sg_cpu->util_cfs = cpu_util_cfs(rq); | ||
186 | sg_cpu->util_dl = cpu_util_dl(rq); | ||
187 | } | ||
185 | 188 | ||
186 | *util = min(rq->cfs.avg.util_avg, cfs_max); | 189 | static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) |
187 | *max = cfs_max; | 190 | { |
191 | /* | ||
192 | * Ideally we would like to set util_dl as min/guaranteed freq and | ||
193 | * util_cfs + util_dl as requested freq. However, cpufreq is not yet | ||
194 | * ready for such an interface. So, we only do the latter for now. | ||
195 | */ | ||
196 | return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max); | ||
188 | } | 197 | } |
189 | 198 | ||
190 | static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, | 199 | static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time) |
191 | unsigned int flags) | ||
192 | { | 200 | { |
193 | if (flags & SCHED_CPUFREQ_IOWAIT) { | 201 | if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) { |
194 | if (sg_cpu->iowait_boost_pending) | 202 | if (sg_cpu->iowait_boost_pending) |
195 | return; | 203 | return; |
196 | 204 | ||
@@ -264,7 +272,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, | |||
264 | unsigned int next_f; | 272 | unsigned int next_f; |
265 | bool busy; | 273 | bool busy; |
266 | 274 | ||
267 | sugov_set_iowait_boost(sg_cpu, time, flags); | 275 | sugov_set_iowait_boost(sg_cpu, time); |
268 | sg_cpu->last_update = time; | 276 | sg_cpu->last_update = time; |
269 | 277 | ||
270 | if (!sugov_should_update_freq(sg_policy, time)) | 278 | if (!sugov_should_update_freq(sg_policy, time)) |
@@ -272,10 +280,12 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, | |||
272 | 280 | ||
273 | busy = sugov_cpu_is_busy(sg_cpu); | 281 | busy = sugov_cpu_is_busy(sg_cpu); |
274 | 282 | ||
275 | if (flags & SCHED_CPUFREQ_RT_DL) { | 283 | if (flags & SCHED_CPUFREQ_RT) { |
276 | next_f = policy->cpuinfo.max_freq; | 284 | next_f = policy->cpuinfo.max_freq; |
277 | } else { | 285 | } else { |
278 | sugov_get_util(&util, &max, sg_cpu->cpu); | 286 | sugov_get_util(sg_cpu); |
287 | max = sg_cpu->max; | ||
288 | util = sugov_aggregate_util(sg_cpu); | ||
279 | sugov_iowait_boost(sg_cpu, &util, &max); | 289 | sugov_iowait_boost(sg_cpu, &util, &max); |
280 | next_f = get_next_freq(sg_policy, util, max); | 290 | next_f = get_next_freq(sg_policy, util, max); |
281 | /* | 291 | /* |
@@ -305,23 +315,27 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) | |||
305 | s64 delta_ns; | 315 | s64 delta_ns; |
306 | 316 | ||
307 | /* | 317 | /* |
308 | * If the CPU utilization was last updated before the previous | 318 | * If the CFS CPU utilization was last updated before the |
309 | * frequency update and the time elapsed between the last update | 319 | * previous frequency update and the time elapsed between the |
310 | * of the CPU utilization and the last frequency update is long | 320 | * last update of the CPU utilization and the last frequency |
311 | * enough, don't take the CPU into account as it probably is | 321 | * update is long enough, reset iowait_boost and util_cfs, as |
312 | * idle now (and clear iowait_boost for it). | 322 | * they are now probably stale. However, still consider the |
323 | * CPU contribution if it has some DEADLINE utilization | ||
324 | * (util_dl). | ||
313 | */ | 325 | */ |
314 | delta_ns = time - j_sg_cpu->last_update; | 326 | delta_ns = time - j_sg_cpu->last_update; |
315 | if (delta_ns > TICK_NSEC) { | 327 | if (delta_ns > TICK_NSEC) { |
316 | j_sg_cpu->iowait_boost = 0; | 328 | j_sg_cpu->iowait_boost = 0; |
317 | j_sg_cpu->iowait_boost_pending = false; | 329 | j_sg_cpu->iowait_boost_pending = false; |
318 | continue; | 330 | j_sg_cpu->util_cfs = 0; |
331 | if (j_sg_cpu->util_dl == 0) | ||
332 | continue; | ||
319 | } | 333 | } |
320 | if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) | 334 | if (j_sg_cpu->flags & SCHED_CPUFREQ_RT) |
321 | return policy->cpuinfo.max_freq; | 335 | return policy->cpuinfo.max_freq; |
322 | 336 | ||
323 | j_util = j_sg_cpu->util; | ||
324 | j_max = j_sg_cpu->max; | 337 | j_max = j_sg_cpu->max; |
338 | j_util = sugov_aggregate_util(j_sg_cpu); | ||
325 | if (j_util * max > j_max * util) { | 339 | if (j_util * max > j_max * util) { |
326 | util = j_util; | 340 | util = j_util; |
327 | max = j_max; | 341 | max = j_max; |
@@ -338,22 +352,18 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, | |||
338 | { | 352 | { |
339 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | 353 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); |
340 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | 354 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; |
341 | unsigned long util, max; | ||
342 | unsigned int next_f; | 355 | unsigned int next_f; |
343 | 356 | ||
344 | sugov_get_util(&util, &max, sg_cpu->cpu); | ||
345 | |||
346 | raw_spin_lock(&sg_policy->update_lock); | 357 | raw_spin_lock(&sg_policy->update_lock); |
347 | 358 | ||
348 | sg_cpu->util = util; | 359 | sugov_get_util(sg_cpu); |
349 | sg_cpu->max = max; | ||
350 | sg_cpu->flags = flags; | 360 | sg_cpu->flags = flags; |
351 | 361 | ||
352 | sugov_set_iowait_boost(sg_cpu, time, flags); | 362 | sugov_set_iowait_boost(sg_cpu, time); |
353 | sg_cpu->last_update = time; | 363 | sg_cpu->last_update = time; |
354 | 364 | ||
355 | if (sugov_should_update_freq(sg_policy, time)) { | 365 | if (sugov_should_update_freq(sg_policy, time)) { |
356 | if (flags & SCHED_CPUFREQ_RT_DL) | 366 | if (flags & SCHED_CPUFREQ_RT) |
357 | next_f = sg_policy->policy->cpuinfo.max_freq; | 367 | next_f = sg_policy->policy->cpuinfo.max_freq; |
358 | else | 368 | else |
359 | next_f = sugov_next_freq_shared(sg_cpu, time); | 369 | next_f = sugov_next_freq_shared(sg_cpu, time); |
@@ -383,9 +393,9 @@ static void sugov_irq_work(struct irq_work *irq_work) | |||
383 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); | 393 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); |
384 | 394 | ||
385 | /* | 395 | /* |
386 | * For RT and deadline tasks, the schedutil governor shoots the | 396 | * For RT tasks, the schedutil governor shoots the frequency to maximum. |
387 | * frequency to maximum. Special care must be taken to ensure that this | 397 | * Special care must be taken to ensure that this kthread doesn't result |
388 | * kthread doesn't result in the same behavior. | 398 | * in the same behavior. |
389 | * | 399 | * |
390 | * This is (mostly) guaranteed by the work_in_progress flag. The flag is | 400 | * This is (mostly) guaranteed by the work_in_progress flag. The flag is |
391 | * updated only at the end of the sugov_work() function and before that | 401 | * updated only at the end of the sugov_work() function and before that |
@@ -470,7 +480,20 @@ static void sugov_policy_free(struct sugov_policy *sg_policy) | |||
470 | static int sugov_kthread_create(struct sugov_policy *sg_policy) | 480 | static int sugov_kthread_create(struct sugov_policy *sg_policy) |
471 | { | 481 | { |
472 | struct task_struct *thread; | 482 | struct task_struct *thread; |
473 | struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; | 483 | struct sched_attr attr = { |
484 | .size = sizeof(struct sched_attr), | ||
485 | .sched_policy = SCHED_DEADLINE, | ||
486 | .sched_flags = SCHED_FLAG_SUGOV, | ||
487 | .sched_nice = 0, | ||
488 | .sched_priority = 0, | ||
489 | /* | ||
490 | * Fake (unused) bandwidth; workaround to "fix" | ||
491 | * priority inheritance. | ||
492 | */ | ||
493 | .sched_runtime = 1000000, | ||
494 | .sched_deadline = 10000000, | ||
495 | .sched_period = 10000000, | ||
496 | }; | ||
474 | struct cpufreq_policy *policy = sg_policy->policy; | 497 | struct cpufreq_policy *policy = sg_policy->policy; |
475 | int ret; | 498 | int ret; |
476 | 499 | ||
@@ -488,10 +511,10 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) | |||
488 | return PTR_ERR(thread); | 511 | return PTR_ERR(thread); |
489 | } | 512 | } |
490 | 513 | ||
491 | ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, ¶m); | 514 | ret = sched_setattr_nocheck(thread, &attr); |
492 | if (ret) { | 515 | if (ret) { |
493 | kthread_stop(thread); | 516 | kthread_stop(thread); |
494 | pr_warn("%s: failed to set SCHED_FIFO\n", __func__); | 517 | pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); |
495 | return ret; | 518 | return ret; |
496 | } | 519 | } |
497 | 520 | ||
@@ -655,7 +678,7 @@ static int sugov_start(struct cpufreq_policy *policy) | |||
655 | memset(sg_cpu, 0, sizeof(*sg_cpu)); | 678 | memset(sg_cpu, 0, sizeof(*sg_cpu)); |
656 | sg_cpu->cpu = cpu; | 679 | sg_cpu->cpu = cpu; |
657 | sg_cpu->sg_policy = sg_policy; | 680 | sg_cpu->sg_policy = sg_policy; |
658 | sg_cpu->flags = SCHED_CPUFREQ_RT; | 681 | sg_cpu->flags = 0; |
659 | sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; | 682 | sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; |
660 | } | 683 | } |
661 | 684 | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 2473736c7616..9bb0e0c412ec 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -78,7 +78,7 @@ static inline int dl_bw_cpus(int i) | |||
78 | #endif | 78 | #endif |
79 | 79 | ||
80 | static inline | 80 | static inline |
81 | void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) | 81 | void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) |
82 | { | 82 | { |
83 | u64 old = dl_rq->running_bw; | 83 | u64 old = dl_rq->running_bw; |
84 | 84 | ||
@@ -86,10 +86,12 @@ void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) | |||
86 | dl_rq->running_bw += dl_bw; | 86 | dl_rq->running_bw += dl_bw; |
87 | SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */ | 87 | SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */ |
88 | SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); | 88 | SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); |
89 | /* kick cpufreq (see the comment in kernel/sched/sched.h). */ | ||
90 | cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL); | ||
89 | } | 91 | } |
90 | 92 | ||
91 | static inline | 93 | static inline |
92 | void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) | 94 | void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) |
93 | { | 95 | { |
94 | u64 old = dl_rq->running_bw; | 96 | u64 old = dl_rq->running_bw; |
95 | 97 | ||
@@ -98,10 +100,12 @@ void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) | |||
98 | SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */ | 100 | SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */ |
99 | if (dl_rq->running_bw > old) | 101 | if (dl_rq->running_bw > old) |
100 | dl_rq->running_bw = 0; | 102 | dl_rq->running_bw = 0; |
103 | /* kick cpufreq (see the comment in kernel/sched/sched.h). */ | ||
104 | cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL); | ||
101 | } | 105 | } |
102 | 106 | ||
103 | static inline | 107 | static inline |
104 | void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) | 108 | void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) |
105 | { | 109 | { |
106 | u64 old = dl_rq->this_bw; | 110 | u64 old = dl_rq->this_bw; |
107 | 111 | ||
@@ -111,7 +115,7 @@ void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) | |||
111 | } | 115 | } |
112 | 116 | ||
113 | static inline | 117 | static inline |
114 | void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) | 118 | void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) |
115 | { | 119 | { |
116 | u64 old = dl_rq->this_bw; | 120 | u64 old = dl_rq->this_bw; |
117 | 121 | ||
@@ -123,16 +127,46 @@ void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) | |||
123 | SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); | 127 | SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); |
124 | } | 128 | } |
125 | 129 | ||
130 | static inline | ||
131 | void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | ||
132 | { | ||
133 | if (!dl_entity_is_special(dl_se)) | ||
134 | __add_rq_bw(dl_se->dl_bw, dl_rq); | ||
135 | } | ||
136 | |||
137 | static inline | ||
138 | void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | ||
139 | { | ||
140 | if (!dl_entity_is_special(dl_se)) | ||
141 | __sub_rq_bw(dl_se->dl_bw, dl_rq); | ||
142 | } | ||
143 | |||
144 | static inline | ||
145 | void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | ||
146 | { | ||
147 | if (!dl_entity_is_special(dl_se)) | ||
148 | __add_running_bw(dl_se->dl_bw, dl_rq); | ||
149 | } | ||
150 | |||
151 | static inline | ||
152 | void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | ||
153 | { | ||
154 | if (!dl_entity_is_special(dl_se)) | ||
155 | __sub_running_bw(dl_se->dl_bw, dl_rq); | ||
156 | } | ||
157 | |||
126 | void dl_change_utilization(struct task_struct *p, u64 new_bw) | 158 | void dl_change_utilization(struct task_struct *p, u64 new_bw) |
127 | { | 159 | { |
128 | struct rq *rq; | 160 | struct rq *rq; |
129 | 161 | ||
162 | BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV); | ||
163 | |||
130 | if (task_on_rq_queued(p)) | 164 | if (task_on_rq_queued(p)) |
131 | return; | 165 | return; |
132 | 166 | ||
133 | rq = task_rq(p); | 167 | rq = task_rq(p); |
134 | if (p->dl.dl_non_contending) { | 168 | if (p->dl.dl_non_contending) { |
135 | sub_running_bw(p->dl.dl_bw, &rq->dl); | 169 | sub_running_bw(&p->dl, &rq->dl); |
136 | p->dl.dl_non_contending = 0; | 170 | p->dl.dl_non_contending = 0; |
137 | /* | 171 | /* |
138 | * If the timer handler is currently running and the | 172 | * If the timer handler is currently running and the |
@@ -144,8 +178,8 @@ void dl_change_utilization(struct task_struct *p, u64 new_bw) | |||
144 | if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) | 178 | if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) |
145 | put_task_struct(p); | 179 | put_task_struct(p); |
146 | } | 180 | } |
147 | sub_rq_bw(p->dl.dl_bw, &rq->dl); | 181 | __sub_rq_bw(p->dl.dl_bw, &rq->dl); |
148 | add_rq_bw(new_bw, &rq->dl); | 182 | __add_rq_bw(new_bw, &rq->dl); |
149 | } | 183 | } |
150 | 184 | ||
151 | /* | 185 | /* |
@@ -217,6 +251,9 @@ static void task_non_contending(struct task_struct *p) | |||
217 | if (dl_se->dl_runtime == 0) | 251 | if (dl_se->dl_runtime == 0) |
218 | return; | 252 | return; |
219 | 253 | ||
254 | if (dl_entity_is_special(dl_se)) | ||
255 | return; | ||
256 | |||
220 | WARN_ON(hrtimer_active(&dl_se->inactive_timer)); | 257 | WARN_ON(hrtimer_active(&dl_se->inactive_timer)); |
221 | WARN_ON(dl_se->dl_non_contending); | 258 | WARN_ON(dl_se->dl_non_contending); |
222 | 259 | ||
@@ -236,12 +273,12 @@ static void task_non_contending(struct task_struct *p) | |||
236 | */ | 273 | */ |
237 | if (zerolag_time < 0) { | 274 | if (zerolag_time < 0) { |
238 | if (dl_task(p)) | 275 | if (dl_task(p)) |
239 | sub_running_bw(dl_se->dl_bw, dl_rq); | 276 | sub_running_bw(dl_se, dl_rq); |
240 | if (!dl_task(p) || p->state == TASK_DEAD) { | 277 | if (!dl_task(p) || p->state == TASK_DEAD) { |
241 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); | 278 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); |
242 | 279 | ||
243 | if (p->state == TASK_DEAD) | 280 | if (p->state == TASK_DEAD) |
244 | sub_rq_bw(p->dl.dl_bw, &rq->dl); | 281 | sub_rq_bw(&p->dl, &rq->dl); |
245 | raw_spin_lock(&dl_b->lock); | 282 | raw_spin_lock(&dl_b->lock); |
246 | __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); | 283 | __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); |
247 | __dl_clear_params(p); | 284 | __dl_clear_params(p); |
@@ -268,7 +305,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags) | |||
268 | return; | 305 | return; |
269 | 306 | ||
270 | if (flags & ENQUEUE_MIGRATED) | 307 | if (flags & ENQUEUE_MIGRATED) |
271 | add_rq_bw(dl_se->dl_bw, dl_rq); | 308 | add_rq_bw(dl_se, dl_rq); |
272 | 309 | ||
273 | if (dl_se->dl_non_contending) { | 310 | if (dl_se->dl_non_contending) { |
274 | dl_se->dl_non_contending = 0; | 311 | dl_se->dl_non_contending = 0; |
@@ -289,7 +326,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags) | |||
289 | * when the "inactive timer" fired). | 326 | * when the "inactive timer" fired). |
290 | * So, add it back. | 327 | * So, add it back. |
291 | */ | 328 | */ |
292 | add_running_bw(dl_se->dl_bw, dl_rq); | 329 | add_running_bw(dl_se, dl_rq); |
293 | } | 330 | } |
294 | } | 331 | } |
295 | 332 | ||
@@ -1114,7 +1151,8 @@ static void update_curr_dl(struct rq *rq) | |||
1114 | { | 1151 | { |
1115 | struct task_struct *curr = rq->curr; | 1152 | struct task_struct *curr = rq->curr; |
1116 | struct sched_dl_entity *dl_se = &curr->dl; | 1153 | struct sched_dl_entity *dl_se = &curr->dl; |
1117 | u64 delta_exec; | 1154 | u64 delta_exec, scaled_delta_exec; |
1155 | int cpu = cpu_of(rq); | ||
1118 | 1156 | ||
1119 | if (!dl_task(curr) || !on_dl_rq(dl_se)) | 1157 | if (!dl_task(curr) || !on_dl_rq(dl_se)) |
1120 | return; | 1158 | return; |
@@ -1134,9 +1172,6 @@ static void update_curr_dl(struct rq *rq) | |||
1134 | return; | 1172 | return; |
1135 | } | 1173 | } |
1136 | 1174 | ||
1137 | /* kick cpufreq (see the comment in kernel/sched/sched.h). */ | ||
1138 | cpufreq_update_util(rq, SCHED_CPUFREQ_DL); | ||
1139 | |||
1140 | schedstat_set(curr->se.statistics.exec_max, | 1175 | schedstat_set(curr->se.statistics.exec_max, |
1141 | max(curr->se.statistics.exec_max, delta_exec)); | 1176 | max(curr->se.statistics.exec_max, delta_exec)); |
1142 | 1177 | ||
@@ -1148,13 +1183,39 @@ static void update_curr_dl(struct rq *rq) | |||
1148 | 1183 | ||
1149 | sched_rt_avg_update(rq, delta_exec); | 1184 | sched_rt_avg_update(rq, delta_exec); |
1150 | 1185 | ||
1151 | if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) | 1186 | if (dl_entity_is_special(dl_se)) |
1152 | delta_exec = grub_reclaim(delta_exec, rq, &curr->dl); | 1187 | return; |
1153 | dl_se->runtime -= delta_exec; | 1188 | |
1189 | /* | ||
1190 | * For tasks that participate in GRUB, we implement GRUB-PA: the | ||
1191 | * spare reclaimed bandwidth is used to clock down frequency. | ||
1192 | * | ||
1193 | * For the others, we still need to scale reservation parameters | ||
1194 | * according to current frequency and CPU maximum capacity. | ||
1195 | */ | ||
1196 | if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) { | ||
1197 | scaled_delta_exec = grub_reclaim(delta_exec, | ||
1198 | rq, | ||
1199 | &curr->dl); | ||
1200 | } else { | ||
1201 | unsigned long scale_freq = arch_scale_freq_capacity(cpu); | ||
1202 | unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu); | ||
1203 | |||
1204 | scaled_delta_exec = cap_scale(delta_exec, scale_freq); | ||
1205 | scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu); | ||
1206 | } | ||
1207 | |||
1208 | dl_se->runtime -= scaled_delta_exec; | ||
1154 | 1209 | ||
1155 | throttle: | 1210 | throttle: |
1156 | if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) { | 1211 | if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) { |
1157 | dl_se->dl_throttled = 1; | 1212 | dl_se->dl_throttled = 1; |
1213 | |||
1214 | /* If requested, inform the user about runtime overruns. */ | ||
1215 | if (dl_runtime_exceeded(dl_se) && | ||
1216 | (dl_se->flags & SCHED_FLAG_DL_OVERRUN)) | ||
1217 | dl_se->dl_overrun = 1; | ||
1218 | |||
1158 | __dequeue_task_dl(rq, curr, 0); | 1219 | __dequeue_task_dl(rq, curr, 0); |
1159 | if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) | 1220 | if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) |
1160 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); | 1221 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); |
@@ -1204,8 +1265,8 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) | |||
1204 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); | 1265 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); |
1205 | 1266 | ||
1206 | if (p->state == TASK_DEAD && dl_se->dl_non_contending) { | 1267 | if (p->state == TASK_DEAD && dl_se->dl_non_contending) { |
1207 | sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl)); | 1268 | sub_running_bw(&p->dl, dl_rq_of_se(&p->dl)); |
1208 | sub_rq_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl)); | 1269 | sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl)); |
1209 | dl_se->dl_non_contending = 0; | 1270 | dl_se->dl_non_contending = 0; |
1210 | } | 1271 | } |
1211 | 1272 | ||
@@ -1222,7 +1283,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) | |||
1222 | sched_clock_tick(); | 1283 | sched_clock_tick(); |
1223 | update_rq_clock(rq); | 1284 | update_rq_clock(rq); |
1224 | 1285 | ||
1225 | sub_running_bw(dl_se->dl_bw, &rq->dl); | 1286 | sub_running_bw(dl_se, &rq->dl); |
1226 | dl_se->dl_non_contending = 0; | 1287 | dl_se->dl_non_contending = 0; |
1227 | unlock: | 1288 | unlock: |
1228 | task_rq_unlock(rq, p, &rf); | 1289 | task_rq_unlock(rq, p, &rf); |
@@ -1416,8 +1477,8 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
1416 | dl_check_constrained_dl(&p->dl); | 1477 | dl_check_constrained_dl(&p->dl); |
1417 | 1478 | ||
1418 | if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) { | 1479 | if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) { |
1419 | add_rq_bw(p->dl.dl_bw, &rq->dl); | 1480 | add_rq_bw(&p->dl, &rq->dl); |
1420 | add_running_bw(p->dl.dl_bw, &rq->dl); | 1481 | add_running_bw(&p->dl, &rq->dl); |
1421 | } | 1482 | } |
1422 | 1483 | ||
1423 | /* | 1484 | /* |
@@ -1457,8 +1518,8 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
1457 | __dequeue_task_dl(rq, p, flags); | 1518 | __dequeue_task_dl(rq, p, flags); |
1458 | 1519 | ||
1459 | if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) { | 1520 | if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) { |
1460 | sub_running_bw(p->dl.dl_bw, &rq->dl); | 1521 | sub_running_bw(&p->dl, &rq->dl); |
1461 | sub_rq_bw(p->dl.dl_bw, &rq->dl); | 1522 | sub_rq_bw(&p->dl, &rq->dl); |
1462 | } | 1523 | } |
1463 | 1524 | ||
1464 | /* | 1525 | /* |
@@ -1564,7 +1625,7 @@ static void migrate_task_rq_dl(struct task_struct *p) | |||
1564 | */ | 1625 | */ |
1565 | raw_spin_lock(&rq->lock); | 1626 | raw_spin_lock(&rq->lock); |
1566 | if (p->dl.dl_non_contending) { | 1627 | if (p->dl.dl_non_contending) { |
1567 | sub_running_bw(p->dl.dl_bw, &rq->dl); | 1628 | sub_running_bw(&p->dl, &rq->dl); |
1568 | p->dl.dl_non_contending = 0; | 1629 | p->dl.dl_non_contending = 0; |
1569 | /* | 1630 | /* |
1570 | * If the timer handler is currently running and the | 1631 | * If the timer handler is currently running and the |
@@ -1576,7 +1637,7 @@ static void migrate_task_rq_dl(struct task_struct *p) | |||
1576 | if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) | 1637 | if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) |
1577 | put_task_struct(p); | 1638 | put_task_struct(p); |
1578 | } | 1639 | } |
1579 | sub_rq_bw(p->dl.dl_bw, &rq->dl); | 1640 | sub_rq_bw(&p->dl, &rq->dl); |
1580 | raw_spin_unlock(&rq->lock); | 1641 | raw_spin_unlock(&rq->lock); |
1581 | } | 1642 | } |
1582 | 1643 | ||
@@ -2019,11 +2080,11 @@ retry: | |||
2019 | } | 2080 | } |
2020 | 2081 | ||
2021 | deactivate_task(rq, next_task, 0); | 2082 | deactivate_task(rq, next_task, 0); |
2022 | sub_running_bw(next_task->dl.dl_bw, &rq->dl); | 2083 | sub_running_bw(&next_task->dl, &rq->dl); |
2023 | sub_rq_bw(next_task->dl.dl_bw, &rq->dl); | 2084 | sub_rq_bw(&next_task->dl, &rq->dl); |
2024 | set_task_cpu(next_task, later_rq->cpu); | 2085 | set_task_cpu(next_task, later_rq->cpu); |
2025 | add_rq_bw(next_task->dl.dl_bw, &later_rq->dl); | 2086 | add_rq_bw(&next_task->dl, &later_rq->dl); |
2026 | add_running_bw(next_task->dl.dl_bw, &later_rq->dl); | 2087 | add_running_bw(&next_task->dl, &later_rq->dl); |
2027 | activate_task(later_rq, next_task, 0); | 2088 | activate_task(later_rq, next_task, 0); |
2028 | ret = 1; | 2089 | ret = 1; |
2029 | 2090 | ||
@@ -2111,11 +2172,11 @@ static void pull_dl_task(struct rq *this_rq) | |||
2111 | resched = true; | 2172 | resched = true; |
2112 | 2173 | ||
2113 | deactivate_task(src_rq, p, 0); | 2174 | deactivate_task(src_rq, p, 0); |
2114 | sub_running_bw(p->dl.dl_bw, &src_rq->dl); | 2175 | sub_running_bw(&p->dl, &src_rq->dl); |
2115 | sub_rq_bw(p->dl.dl_bw, &src_rq->dl); | 2176 | sub_rq_bw(&p->dl, &src_rq->dl); |
2116 | set_task_cpu(p, this_cpu); | 2177 | set_task_cpu(p, this_cpu); |
2117 | add_rq_bw(p->dl.dl_bw, &this_rq->dl); | 2178 | add_rq_bw(&p->dl, &this_rq->dl); |
2118 | add_running_bw(p->dl.dl_bw, &this_rq->dl); | 2179 | add_running_bw(&p->dl, &this_rq->dl); |
2119 | activate_task(this_rq, p, 0); | 2180 | activate_task(this_rq, p, 0); |
2120 | dmin = p->dl.deadline; | 2181 | dmin = p->dl.deadline; |
2121 | 2182 | ||
@@ -2224,7 +2285,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p) | |||
2224 | task_non_contending(p); | 2285 | task_non_contending(p); |
2225 | 2286 | ||
2226 | if (!task_on_rq_queued(p)) | 2287 | if (!task_on_rq_queued(p)) |
2227 | sub_rq_bw(p->dl.dl_bw, &rq->dl); | 2288 | sub_rq_bw(&p->dl, &rq->dl); |
2228 | 2289 | ||
2229 | /* | 2290 | /* |
2230 | * We cannot use inactive_task_timer() to invoke sub_running_bw() | 2291 | * We cannot use inactive_task_timer() to invoke sub_running_bw() |
@@ -2256,7 +2317,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) | |||
2256 | 2317 | ||
2257 | /* If p is not queued we will update its parameters at next wakeup. */ | 2318 | /* If p is not queued we will update its parameters at next wakeup. */ |
2258 | if (!task_on_rq_queued(p)) { | 2319 | if (!task_on_rq_queued(p)) { |
2259 | add_rq_bw(p->dl.dl_bw, &rq->dl); | 2320 | add_rq_bw(&p->dl, &rq->dl); |
2260 | 2321 | ||
2261 | return; | 2322 | return; |
2262 | } | 2323 | } |
@@ -2435,6 +2496,9 @@ int sched_dl_overflow(struct task_struct *p, int policy, | |||
2435 | u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; | 2496 | u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; |
2436 | int cpus, err = -1; | 2497 | int cpus, err = -1; |
2437 | 2498 | ||
2499 | if (attr->sched_flags & SCHED_FLAG_SUGOV) | ||
2500 | return 0; | ||
2501 | |||
2438 | /* !deadline task may carry old deadline bandwidth */ | 2502 | /* !deadline task may carry old deadline bandwidth */ |
2439 | if (new_bw == p->dl.dl_bw && task_has_dl_policy(p)) | 2503 | if (new_bw == p->dl.dl_bw && task_has_dl_policy(p)) |
2440 | return 0; | 2504 | return 0; |
@@ -2521,6 +2585,10 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr) | |||
2521 | */ | 2585 | */ |
2522 | bool __checkparam_dl(const struct sched_attr *attr) | 2586 | bool __checkparam_dl(const struct sched_attr *attr) |
2523 | { | 2587 | { |
2588 | /* special dl tasks don't actually use any parameter */ | ||
2589 | if (attr->sched_flags & SCHED_FLAG_SUGOV) | ||
2590 | return true; | ||
2591 | |||
2524 | /* deadline != 0 */ | 2592 | /* deadline != 0 */ |
2525 | if (attr->sched_deadline == 0) | 2593 | if (attr->sched_deadline == 0) |
2526 | return false; | 2594 | return false; |
@@ -2566,6 +2634,7 @@ void __dl_clear_params(struct task_struct *p) | |||
2566 | dl_se->dl_throttled = 0; | 2634 | dl_se->dl_throttled = 0; |
2567 | dl_se->dl_yielded = 0; | 2635 | dl_se->dl_yielded = 0; |
2568 | dl_se->dl_non_contending = 0; | 2636 | dl_se->dl_non_contending = 0; |
2637 | dl_se->dl_overrun = 0; | ||
2569 | } | 2638 | } |
2570 | 2639 | ||
2571 | bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) | 2640 | bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 26a71ebcd3c2..7b6535987500 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -3020,9 +3020,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) | |||
3020 | /* | 3020 | /* |
3021 | * There are a few boundary cases this might miss but it should | 3021 | * There are a few boundary cases this might miss but it should |
3022 | * get called often enough that that should (hopefully) not be | 3022 | * get called often enough that that should (hopefully) not be |
3023 | * a real problem -- added to that it only calls on the local | 3023 | * a real problem. |
3024 | * CPU, so if we enqueue remotely we'll miss an update, but | ||
3025 | * the next tick/schedule should update. | ||
3026 | * | 3024 | * |
3027 | * It will not get called when we go idle, because the idle | 3025 | * It will not get called when we go idle, because the idle |
3028 | * thread is a different class (!fair), nor will the utilization | 3026 | * thread is a different class (!fair), nor will the utilization |
@@ -3091,8 +3089,6 @@ static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3) | |||
3091 | return c1 + c2 + c3; | 3089 | return c1 + c2 + c3; |
3092 | } | 3090 | } |
3093 | 3091 | ||
3094 | #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) | ||
3095 | |||
3096 | /* | 3092 | /* |
3097 | * Accumulate the three separate parts of the sum; d1 the remainder | 3093 | * Accumulate the three separate parts of the sum; d1 the remainder |
3098 | * of the last (incomplete) period, d2 the span of full periods and d3 | 3094 | * of the last (incomplete) period, d2 the span of full periods and d3 |
@@ -3122,7 +3118,7 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, | |||
3122 | u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */ | 3118 | u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */ |
3123 | u64 periods; | 3119 | u64 periods; |
3124 | 3120 | ||
3125 | scale_freq = arch_scale_freq_capacity(NULL, cpu); | 3121 | scale_freq = arch_scale_freq_capacity(cpu); |
3126 | scale_cpu = arch_scale_cpu_capacity(NULL, cpu); | 3122 | scale_cpu = arch_scale_cpu_capacity(NULL, cpu); |
3127 | 3123 | ||
3128 | delta += sa->period_contrib; | 3124 | delta += sa->period_contrib; |
@@ -5689,8 +5685,8 @@ static int wake_wide(struct task_struct *p) | |||
5689 | * soonest. For the purpose of speed we only consider the waking and previous | 5685 | * soonest. For the purpose of speed we only consider the waking and previous |
5690 | * CPU. | 5686 | * CPU. |
5691 | * | 5687 | * |
5692 | * wake_affine_idle() - only considers 'now', it check if the waking CPU is (or | 5688 | * wake_affine_idle() - only considers 'now', it check if the waking CPU is |
5693 | * will be) idle. | 5689 | * cache-affine and is (or will be) idle. |
5694 | * | 5690 | * |
5695 | * wake_affine_weight() - considers the weight to reflect the average | 5691 | * wake_affine_weight() - considers the weight to reflect the average |
5696 | * scheduling latency of the CPUs. This seems to work | 5692 | * scheduling latency of the CPUs. This seems to work |
@@ -5701,7 +5697,13 @@ static bool | |||
5701 | wake_affine_idle(struct sched_domain *sd, struct task_struct *p, | 5697 | wake_affine_idle(struct sched_domain *sd, struct task_struct *p, |
5702 | int this_cpu, int prev_cpu, int sync) | 5698 | int this_cpu, int prev_cpu, int sync) |
5703 | { | 5699 | { |
5704 | if (idle_cpu(this_cpu)) | 5700 | /* |
5701 | * If this_cpu is idle, it implies the wakeup is from interrupt | ||
5702 | * context. Only allow the move if cache is shared. Otherwise an | ||
5703 | * interrupt intensive workload could force all tasks onto one | ||
5704 | * node depending on the IO topology or IRQ affinity settings. | ||
5705 | */ | ||
5706 | if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu)) | ||
5705 | return true; | 5707 | return true; |
5706 | 5708 | ||
5707 | if (sync && cpu_rq(this_cpu)->nr_running == 1) | 5709 | if (sync && cpu_rq(this_cpu)->nr_running == 1) |
@@ -5765,12 +5767,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, | |||
5765 | return affine; | 5767 | return affine; |
5766 | } | 5768 | } |
5767 | 5769 | ||
5768 | static inline int task_util(struct task_struct *p); | 5770 | static inline unsigned long task_util(struct task_struct *p); |
5769 | static int cpu_util_wake(int cpu, struct task_struct *p); | 5771 | static unsigned long cpu_util_wake(int cpu, struct task_struct *p); |
5770 | 5772 | ||
5771 | static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) | 5773 | static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) |
5772 | { | 5774 | { |
5773 | return capacity_orig_of(cpu) - cpu_util_wake(cpu, p); | 5775 | return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0); |
5774 | } | 5776 | } |
5775 | 5777 | ||
5776 | /* | 5778 | /* |
@@ -5950,7 +5952,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this | |||
5950 | } | 5952 | } |
5951 | } else if (shallowest_idle_cpu == -1) { | 5953 | } else if (shallowest_idle_cpu == -1) { |
5952 | load = weighted_cpuload(cpu_rq(i)); | 5954 | load = weighted_cpuload(cpu_rq(i)); |
5953 | if (load < min_load || (load == min_load && i == this_cpu)) { | 5955 | if (load < min_load) { |
5954 | min_load = load; | 5956 | min_load = load; |
5955 | least_loaded_cpu = i; | 5957 | least_loaded_cpu = i; |
5956 | } | 5958 | } |
@@ -6247,7 +6249,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) | |||
6247 | * capacity_orig) as it useful for predicting the capacity required after task | 6249 | * capacity_orig) as it useful for predicting the capacity required after task |
6248 | * migrations (scheduler-driven DVFS). | 6250 | * migrations (scheduler-driven DVFS). |
6249 | */ | 6251 | */ |
6250 | static int cpu_util(int cpu) | 6252 | static unsigned long cpu_util(int cpu) |
6251 | { | 6253 | { |
6252 | unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; | 6254 | unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; |
6253 | unsigned long capacity = capacity_orig_of(cpu); | 6255 | unsigned long capacity = capacity_orig_of(cpu); |
@@ -6255,7 +6257,7 @@ static int cpu_util(int cpu) | |||
6255 | return (util >= capacity) ? capacity : util; | 6257 | return (util >= capacity) ? capacity : util; |
6256 | } | 6258 | } |
6257 | 6259 | ||
6258 | static inline int task_util(struct task_struct *p) | 6260 | static inline unsigned long task_util(struct task_struct *p) |
6259 | { | 6261 | { |
6260 | return p->se.avg.util_avg; | 6262 | return p->se.avg.util_avg; |
6261 | } | 6263 | } |
@@ -6264,7 +6266,7 @@ static inline int task_util(struct task_struct *p) | |||
6264 | * cpu_util_wake: Compute cpu utilization with any contributions from | 6266 | * cpu_util_wake: Compute cpu utilization with any contributions from |
6265 | * the waking task p removed. | 6267 | * the waking task p removed. |
6266 | */ | 6268 | */ |
6267 | static int cpu_util_wake(int cpu, struct task_struct *p) | 6269 | static unsigned long cpu_util_wake(int cpu, struct task_struct *p) |
6268 | { | 6270 | { |
6269 | unsigned long util, capacity; | 6271 | unsigned long util, capacity; |
6270 | 6272 | ||
@@ -6449,8 +6451,7 @@ static void task_dead_fair(struct task_struct *p) | |||
6449 | } | 6451 | } |
6450 | #endif /* CONFIG_SMP */ | 6452 | #endif /* CONFIG_SMP */ |
6451 | 6453 | ||
6452 | static unsigned long | 6454 | static unsigned long wakeup_gran(struct sched_entity *se) |
6453 | wakeup_gran(struct sched_entity *curr, struct sched_entity *se) | ||
6454 | { | 6455 | { |
6455 | unsigned long gran = sysctl_sched_wakeup_granularity; | 6456 | unsigned long gran = sysctl_sched_wakeup_granularity; |
6456 | 6457 | ||
@@ -6492,7 +6493,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) | |||
6492 | if (vdiff <= 0) | 6493 | if (vdiff <= 0) |
6493 | return -1; | 6494 | return -1; |
6494 | 6495 | ||
6495 | gran = wakeup_gran(curr, se); | 6496 | gran = wakeup_gran(se); |
6496 | if (vdiff > gran) | 6497 | if (vdiff > gran) |
6497 | return 1; | 6498 | return 1; |
6498 | 6499 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b19552a212de..2e95505e23c6 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -156,13 +156,39 @@ static inline int task_has_dl_policy(struct task_struct *p) | |||
156 | return dl_policy(p->policy); | 156 | return dl_policy(p->policy); |
157 | } | 157 | } |
158 | 158 | ||
159 | #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) | ||
160 | |||
161 | /* | ||
162 | * !! For sched_setattr_nocheck() (kernel) only !! | ||
163 | * | ||
164 | * This is actually gross. :( | ||
165 | * | ||
166 | * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE | ||
167 | * tasks, but still be able to sleep. We need this on platforms that cannot | ||
168 | * atomically change clock frequency. Remove once fast switching will be | ||
169 | * available on such platforms. | ||
170 | * | ||
171 | * SUGOV stands for SchedUtil GOVernor. | ||
172 | */ | ||
173 | #define SCHED_FLAG_SUGOV 0x10000000 | ||
174 | |||
175 | static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se) | ||
176 | { | ||
177 | #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL | ||
178 | return unlikely(dl_se->flags & SCHED_FLAG_SUGOV); | ||
179 | #else | ||
180 | return false; | ||
181 | #endif | ||
182 | } | ||
183 | |||
159 | /* | 184 | /* |
160 | * Tells if entity @a should preempt entity @b. | 185 | * Tells if entity @a should preempt entity @b. |
161 | */ | 186 | */ |
162 | static inline bool | 187 | static inline bool |
163 | dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) | 188 | dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) |
164 | { | 189 | { |
165 | return dl_time_before(a->deadline, b->deadline); | 190 | return dl_entity_is_special(a) || |
191 | dl_time_before(a->deadline, b->deadline); | ||
166 | } | 192 | } |
167 | 193 | ||
168 | /* | 194 | /* |
@@ -1328,47 +1354,6 @@ static inline int task_on_rq_migrating(struct task_struct *p) | |||
1328 | # define finish_arch_post_lock_switch() do { } while (0) | 1354 | # define finish_arch_post_lock_switch() do { } while (0) |
1329 | #endif | 1355 | #endif |
1330 | 1356 | ||
1331 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | ||
1332 | { | ||
1333 | #ifdef CONFIG_SMP | ||
1334 | /* | ||
1335 | * We can optimise this out completely for !SMP, because the | ||
1336 | * SMP rebalancing from interrupt is the only thing that cares | ||
1337 | * here. | ||
1338 | */ | ||
1339 | next->on_cpu = 1; | ||
1340 | #endif | ||
1341 | } | ||
1342 | |||
1343 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | ||
1344 | { | ||
1345 | #ifdef CONFIG_SMP | ||
1346 | /* | ||
1347 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | ||
1348 | * We must ensure this doesn't happen until the switch is completely | ||
1349 | * finished. | ||
1350 | * | ||
1351 | * In particular, the load of prev->state in finish_task_switch() must | ||
1352 | * happen before this. | ||
1353 | * | ||
1354 | * Pairs with the smp_cond_load_acquire() in try_to_wake_up(). | ||
1355 | */ | ||
1356 | smp_store_release(&prev->on_cpu, 0); | ||
1357 | #endif | ||
1358 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
1359 | /* this is a valid case when another task releases the spinlock */ | ||
1360 | rq->lock.owner = current; | ||
1361 | #endif | ||
1362 | /* | ||
1363 | * If we are tracking spinlock dependencies then we have to | ||
1364 | * fix up the runqueue lock - which gets 'carried over' from | ||
1365 | * prev into current: | ||
1366 | */ | ||
1367 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); | ||
1368 | |||
1369 | raw_spin_unlock_irq(&rq->lock); | ||
1370 | } | ||
1371 | |||
1372 | /* | 1357 | /* |
1373 | * wake flags | 1358 | * wake flags |
1374 | */ | 1359 | */ |
@@ -1687,17 +1672,17 @@ static inline int hrtick_enabled(struct rq *rq) | |||
1687 | 1672 | ||
1688 | #endif /* CONFIG_SCHED_HRTICK */ | 1673 | #endif /* CONFIG_SCHED_HRTICK */ |
1689 | 1674 | ||
1690 | #ifdef CONFIG_SMP | ||
1691 | extern void sched_avg_update(struct rq *rq); | ||
1692 | |||
1693 | #ifndef arch_scale_freq_capacity | 1675 | #ifndef arch_scale_freq_capacity |
1694 | static __always_inline | 1676 | static __always_inline |
1695 | unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) | 1677 | unsigned long arch_scale_freq_capacity(int cpu) |
1696 | { | 1678 | { |
1697 | return SCHED_CAPACITY_SCALE; | 1679 | return SCHED_CAPACITY_SCALE; |
1698 | } | 1680 | } |
1699 | #endif | 1681 | #endif |
1700 | 1682 | ||
1683 | #ifdef CONFIG_SMP | ||
1684 | extern void sched_avg_update(struct rq *rq); | ||
1685 | |||
1701 | #ifndef arch_scale_cpu_capacity | 1686 | #ifndef arch_scale_cpu_capacity |
1702 | static __always_inline | 1687 | static __always_inline |
1703 | unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) | 1688 | unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) |
@@ -1711,10 +1696,17 @@ unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) | |||
1711 | 1696 | ||
1712 | static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) | 1697 | static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) |
1713 | { | 1698 | { |
1714 | rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq)); | 1699 | rq->rt_avg += rt_delta * arch_scale_freq_capacity(cpu_of(rq)); |
1715 | sched_avg_update(rq); | 1700 | sched_avg_update(rq); |
1716 | } | 1701 | } |
1717 | #else | 1702 | #else |
1703 | #ifndef arch_scale_cpu_capacity | ||
1704 | static __always_inline | ||
1705 | unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu) | ||
1706 | { | ||
1707 | return SCHED_CAPACITY_SCALE; | ||
1708 | } | ||
1709 | #endif | ||
1718 | static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } | 1710 | static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } |
1719 | static inline void sched_avg_update(struct rq *rq) { } | 1711 | static inline void sched_avg_update(struct rq *rq) { } |
1720 | #endif | 1712 | #endif |
@@ -2096,14 +2088,14 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); | |||
2096 | * The way cpufreq is currently arranged requires it to evaluate the CPU | 2088 | * The way cpufreq is currently arranged requires it to evaluate the CPU |
2097 | * performance state (frequency/voltage) on a regular basis to prevent it from | 2089 | * performance state (frequency/voltage) on a regular basis to prevent it from |
2098 | * being stuck in a completely inadequate performance level for too long. | 2090 | * being stuck in a completely inadequate performance level for too long. |
2099 | * That is not guaranteed to happen if the updates are only triggered from CFS, | 2091 | * That is not guaranteed to happen if the updates are only triggered from CFS |
2100 | * though, because they may not be coming in if RT or deadline tasks are active | 2092 | * and DL, though, because they may not be coming in if only RT tasks are |
2101 | * all the time (or there are RT and DL tasks only). | 2093 | * active all the time (or there are RT tasks only). |
2102 | * | 2094 | * |
2103 | * As a workaround for that issue, this function is called by the RT and DL | 2095 | * As a workaround for that issue, this function is called periodically by the |
2104 | * sched classes to trigger extra cpufreq updates to prevent it from stalling, | 2096 | * RT sched class to trigger extra cpufreq updates to prevent it from stalling, |
2105 | * but that really is a band-aid. Going forward it should be replaced with | 2097 | * but that really is a band-aid. Going forward it should be replaced with |
2106 | * solutions targeted more specifically at RT and DL tasks. | 2098 | * solutions targeted more specifically at RT tasks. |
2107 | */ | 2099 | */ |
2108 | static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) | 2100 | static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) |
2109 | { | 2101 | { |
@@ -2125,3 +2117,17 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} | |||
2125 | #else /* arch_scale_freq_capacity */ | 2117 | #else /* arch_scale_freq_capacity */ |
2126 | #define arch_scale_freq_invariant() (false) | 2118 | #define arch_scale_freq_invariant() (false) |
2127 | #endif | 2119 | #endif |
2120 | |||
2121 | #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL | ||
2122 | |||
2123 | static inline unsigned long cpu_util_dl(struct rq *rq) | ||
2124 | { | ||
2125 | return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; | ||
2126 | } | ||
2127 | |||
2128 | static inline unsigned long cpu_util_cfs(struct rq *rq) | ||
2129 | { | ||
2130 | return rq->cfs.avg.util_avg; | ||
2131 | } | ||
2132 | |||
2133 | #endif | ||
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index ec9f5da6f163..2541bd89f20e 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/tick.h> | 14 | #include <linux/tick.h> |
15 | #include <linux/workqueue.h> | 15 | #include <linux/workqueue.h> |
16 | #include <linux/compat.h> | 16 | #include <linux/compat.h> |
17 | #include <linux/sched/deadline.h> | ||
17 | 18 | ||
18 | #include "posix-timers.h" | 19 | #include "posix-timers.h" |
19 | 20 | ||
@@ -791,6 +792,14 @@ check_timers_list(struct list_head *timers, | |||
791 | return 0; | 792 | return 0; |
792 | } | 793 | } |
793 | 794 | ||
795 | static inline void check_dl_overrun(struct task_struct *tsk) | ||
796 | { | ||
797 | if (tsk->dl.dl_overrun) { | ||
798 | tsk->dl.dl_overrun = 0; | ||
799 | __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); | ||
800 | } | ||
801 | } | ||
802 | |||
794 | /* | 803 | /* |
795 | * Check for any per-thread CPU timers that have fired and move them off | 804 | * Check for any per-thread CPU timers that have fired and move them off |
796 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the | 805 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the |
@@ -804,6 +813,9 @@ static void check_thread_timers(struct task_struct *tsk, | |||
804 | u64 expires; | 813 | u64 expires; |
805 | unsigned long soft; | 814 | unsigned long soft; |
806 | 815 | ||
816 | if (dl_task(tsk)) | ||
817 | check_dl_overrun(tsk); | ||
818 | |||
807 | /* | 819 | /* |
808 | * If cputime_expires is zero, then there are no active | 820 | * If cputime_expires is zero, then there are no active |
809 | * per thread CPU timers. | 821 | * per thread CPU timers. |
@@ -906,6 +918,9 @@ static void check_process_timers(struct task_struct *tsk, | |||
906 | struct task_cputime cputime; | 918 | struct task_cputime cputime; |
907 | unsigned long soft; | 919 | unsigned long soft; |
908 | 920 | ||
921 | if (dl_task(tsk)) | ||
922 | check_dl_overrun(tsk); | ||
923 | |||
909 | /* | 924 | /* |
910 | * If cputimer is not running, then there are no active | 925 | * If cputimer is not running, then there are no active |
911 | * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU). | 926 | * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU). |
@@ -1111,6 +1126,9 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
1111 | return 1; | 1126 | return 1; |
1112 | } | 1127 | } |
1113 | 1128 | ||
1129 | if (dl_task(tsk) && tsk->dl.dl_overrun) | ||
1130 | return 1; | ||
1131 | |||
1114 | return 0; | 1132 | return 0; |
1115 | } | 1133 | } |
1116 | 1134 | ||