diff options
-rw-r--r-- | kernel/sched_fair.c | 113 |
1 files changed, 9 insertions, 104 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 95487e3c8b06..3179d1129a80 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -217,77 +217,14 @@ static u64 __sched_period(unsigned long nr_running) | |||
217 | return period; | 217 | return period; |
218 | } | 218 | } |
219 | 219 | ||
220 | /* | 220 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
221 | * Calculate the preemption granularity needed to schedule every | ||
222 | * runnable task once per sysctl_sched_latency amount of time. | ||
223 | * (down to a sensible low limit on granularity) | ||
224 | * | ||
225 | * For example, if there are 2 tasks running and latency is 10 msecs, | ||
226 | * we switch tasks every 5 msecs. If we have 3 tasks running, we have | ||
227 | * to switch tasks every 3.33 msecs to get a 10 msecs observed latency | ||
228 | * for each task. We do finer and finer scheduling up to until we | ||
229 | * reach the minimum granularity value. | ||
230 | * | ||
231 | * To achieve this we use the following dynamic-granularity rule: | ||
232 | * | ||
233 | * gran = lat/nr - lat/nr/nr | ||
234 | * | ||
235 | * This comes out of the following equations: | ||
236 | * | ||
237 | * kA1 + gran = kB1 | ||
238 | * kB2 + gran = kA2 | ||
239 | * kA2 = kA1 | ||
240 | * kB2 = kB1 - d + d/nr | ||
241 | * lat = d * nr | ||
242 | * | ||
243 | * Where 'k' is key, 'A' is task A (waiting), 'B' is task B (running), | ||
244 | * '1' is start of time, '2' is end of time, 'd' is delay between | ||
245 | * 1 and 2 (during which task B was running), 'nr' is number of tasks | ||
246 | * running, 'lat' is the the period of each task. ('lat' is the | ||
247 | * sched_latency that we aim for.) | ||
248 | */ | ||
249 | static long | ||
250 | sched_granularity(struct cfs_rq *cfs_rq) | ||
251 | { | 221 | { |
252 | unsigned int gran = sysctl_sched_latency; | 222 | u64 period = __sched_period(cfs_rq->nr_running); |
253 | unsigned int nr = cfs_rq->nr_running; | ||
254 | |||
255 | if (nr > 1) { | ||
256 | gran = gran/nr - gran/nr/nr; | ||
257 | gran = max(gran, sysctl_sched_min_granularity); | ||
258 | } | ||
259 | 223 | ||
260 | return gran; | 224 | period *= se->load.weight; |
261 | } | 225 | do_div(period, cfs_rq->load.weight); |
262 | 226 | ||
263 | /* | 227 | return period; |
264 | * We rescale the rescheduling granularity of tasks according to their | ||
265 | * nice level, but only linearly, not exponentially: | ||
266 | */ | ||
267 | static long | ||
268 | niced_granularity(struct sched_entity *curr, unsigned long granularity) | ||
269 | { | ||
270 | u64 tmp; | ||
271 | |||
272 | if (likely(curr->load.weight == NICE_0_LOAD)) | ||
273 | return granularity; | ||
274 | /* | ||
275 | * Positive nice levels get the same granularity as nice-0: | ||
276 | */ | ||
277 | if (likely(curr->load.weight < NICE_0_LOAD)) { | ||
278 | tmp = curr->load.weight * (u64)granularity; | ||
279 | return (long) (tmp >> NICE_0_SHIFT); | ||
280 | } | ||
281 | /* | ||
282 | * Negative nice level tasks get linearly finer | ||
283 | * granularity: | ||
284 | */ | ||
285 | tmp = curr->load.inv_weight * (u64)granularity; | ||
286 | |||
287 | /* | ||
288 | * It will always fit into 'long': | ||
289 | */ | ||
290 | return (long) (tmp >> (WMULT_SHIFT-NICE_0_SHIFT)); | ||
291 | } | 228 | } |
292 | 229 | ||
293 | static inline void | 230 | static inline void |
@@ -646,36 +583,13 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
646 | */ | 583 | */ |
647 | static void | 584 | static void |
648 | __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, | 585 | __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, |
649 | struct sched_entity *curr, unsigned long granularity) | 586 | struct sched_entity *curr) |
650 | { | 587 | { |
651 | s64 __delta = curr->fair_key - se->fair_key; | ||
652 | unsigned long ideal_runtime, delta_exec; | 588 | unsigned long ideal_runtime, delta_exec; |
653 | 589 | ||
654 | /* | 590 | ideal_runtime = sched_slice(cfs_rq, curr); |
655 | * ideal_runtime is compared against sum_exec_runtime, which is | ||
656 | * walltime, hence do not scale. | ||
657 | */ | ||
658 | ideal_runtime = max(sysctl_sched_latency / cfs_rq->nr_running, | ||
659 | (unsigned long)sysctl_sched_min_granularity); | ||
660 | |||
661 | /* | ||
662 | * If we executed more than what the latency constraint suggests, | ||
663 | * reduce the rescheduling granularity. This way the total latency | ||
664 | * of how much a task is not scheduled converges to | ||
665 | * sysctl_sched_latency: | ||
666 | */ | ||
667 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | 591 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; |
668 | if (delta_exec > ideal_runtime) | 592 | if (delta_exec > ideal_runtime) |
669 | granularity = 0; | ||
670 | |||
671 | /* | ||
672 | * Take scheduling granularity into account - do not | ||
673 | * preempt the current task unless the best task has | ||
674 | * a larger than sched_granularity fairness advantage: | ||
675 | * | ||
676 | * scale granularity as key space is in fair_clock. | ||
677 | */ | ||
678 | if (__delta > niced_granularity(curr, granularity)) | ||
679 | resched_task(rq_of(cfs_rq)->curr); | 593 | resched_task(rq_of(cfs_rq)->curr); |
680 | } | 594 | } |
681 | 595 | ||
@@ -749,8 +663,7 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
749 | if (next == curr) | 663 | if (next == curr) |
750 | return; | 664 | return; |
751 | 665 | ||
752 | __check_preempt_curr_fair(cfs_rq, next, curr, | 666 | __check_preempt_curr_fair(cfs_rq, next, curr); |
753 | sched_granularity(cfs_rq)); | ||
754 | } | 667 | } |
755 | 668 | ||
756 | /************************************************** | 669 | /************************************************** |
@@ -944,7 +857,6 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p) | |||
944 | { | 857 | { |
945 | struct task_struct *curr = rq->curr; | 858 | struct task_struct *curr = rq->curr; |
946 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 859 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
947 | unsigned long gran; | ||
948 | 860 | ||
949 | if (unlikely(rt_prio(p->prio))) { | 861 | if (unlikely(rt_prio(p->prio))) { |
950 | update_rq_clock(rq); | 862 | update_rq_clock(rq); |
@@ -953,15 +865,8 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p) | |||
953 | return; | 865 | return; |
954 | } | 866 | } |
955 | 867 | ||
956 | gran = sysctl_sched_wakeup_granularity; | ||
957 | /* | ||
958 | * Batch tasks prefer throughput over latency: | ||
959 | */ | ||
960 | if (unlikely(p->policy == SCHED_BATCH)) | ||
961 | gran = sysctl_sched_batch_wakeup_granularity; | ||
962 | |||
963 | if (is_same_group(curr, p)) | 868 | if (is_same_group(curr, p)) |
964 | __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran); | 869 | __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se); |
965 | } | 870 | } |
966 | 871 | ||
967 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 872 | static struct task_struct *pick_next_task_fair(struct rq *rq) |