diff options
| -rw-r--r-- | kernel/sched/deadline.c | 63 | ||||
| -rw-r--r-- | kernel/sched/loadavg.c | 20 |
2 files changed, 69 insertions, 14 deletions
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 99b2c33a9fbc..a2ce59015642 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
| @@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, | |||
| 445 | * | 445 | * |
| 446 | * This function returns true if: | 446 | * This function returns true if: |
| 447 | * | 447 | * |
| 448 | * runtime / (deadline - t) > dl_runtime / dl_period , | 448 | * runtime / (deadline - t) > dl_runtime / dl_deadline , |
| 449 | * | 449 | * |
| 450 | * IOW we can't recycle current parameters. | 450 | * IOW we can't recycle current parameters. |
| 451 | * | 451 | * |
| 452 | * Notice that the bandwidth check is done against the period. For | 452 | * Notice that the bandwidth check is done against the deadline. For |
| 453 | * task with deadline equal to period this is the same of using | 453 | * task with deadline equal to period this is the same of using |
| 454 | * dl_deadline instead of dl_period in the equation above. | 454 | * dl_period instead of dl_deadline in the equation above. |
| 455 | */ | 455 | */ |
| 456 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, | 456 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, |
| 457 | struct sched_dl_entity *pi_se, u64 t) | 457 | struct sched_dl_entity *pi_se, u64 t) |
| @@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, | |||
| 476 | * of anything below microseconds resolution is actually fiction | 476 | * of anything below microseconds resolution is actually fiction |
| 477 | * (but still we want to give the user that illusion >;). | 477 | * (but still we want to give the user that illusion >;). |
| 478 | */ | 478 | */ |
| 479 | left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); | 479 | left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); |
| 480 | right = ((dl_se->deadline - t) >> DL_SCALE) * | 480 | right = ((dl_se->deadline - t) >> DL_SCALE) * |
| 481 | (pi_se->dl_runtime >> DL_SCALE); | 481 | (pi_se->dl_runtime >> DL_SCALE); |
| 482 | 482 | ||
| @@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se, | |||
| 505 | } | 505 | } |
| 506 | } | 506 | } |
| 507 | 507 | ||
| 508 | static inline u64 dl_next_period(struct sched_dl_entity *dl_se) | ||
| 509 | { | ||
| 510 | return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period; | ||
| 511 | } | ||
| 512 | |||
| 508 | /* | 513 | /* |
| 509 | * If the entity depleted all its runtime, and if we want it to sleep | 514 | * If the entity depleted all its runtime, and if we want it to sleep |
| 510 | * while waiting for some new execution time to become available, we | 515 | * while waiting for some new execution time to become available, we |
| 511 | * set the bandwidth enforcement timer to the replenishment instant | 516 | * set the bandwidth replenishment timer to the replenishment instant |
| 512 | * and try to activate it. | 517 | * and try to activate it. |
| 513 | * | 518 | * |
| 514 | * Notice that it is important for the caller to know if the timer | 519 | * Notice that it is important for the caller to know if the timer |
| @@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p) | |||
| 530 | * that it is actually coming from rq->clock and not from | 535 | * that it is actually coming from rq->clock and not from |
| 531 | * hrtimer's time base reading. | 536 | * hrtimer's time base reading. |
| 532 | */ | 537 | */ |
| 533 | act = ns_to_ktime(dl_se->deadline); | 538 | act = ns_to_ktime(dl_next_period(dl_se)); |
| 534 | now = hrtimer_cb_get_time(timer); | 539 | now = hrtimer_cb_get_time(timer); |
| 535 | delta = ktime_to_ns(now) - rq_clock(rq); | 540 | delta = ktime_to_ns(now) - rq_clock(rq); |
| 536 | act = ktime_add_ns(act, delta); | 541 | act = ktime_add_ns(act, delta); |
| @@ -638,6 +643,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | |||
| 638 | lockdep_unpin_lock(&rq->lock, rf.cookie); | 643 | lockdep_unpin_lock(&rq->lock, rf.cookie); |
| 639 | rq = dl_task_offline_migration(rq, p); | 644 | rq = dl_task_offline_migration(rq, p); |
| 640 | rf.cookie = lockdep_pin_lock(&rq->lock); | 645 | rf.cookie = lockdep_pin_lock(&rq->lock); |
| 646 | update_rq_clock(rq); | ||
| 641 | 647 | ||
| 642 | /* | 648 | /* |
| 643 | * Now that the task has been migrated to the new RQ and we | 649 | * Now that the task has been migrated to the new RQ and we |
| @@ -689,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) | |||
| 689 | timer->function = dl_task_timer; | 695 | timer->function = dl_task_timer; |
| 690 | } | 696 | } |
| 691 | 697 | ||
| 698 | /* | ||
| 699 | * During the activation, CBS checks if it can reuse the current task's | ||
| 700 | * runtime and period. If the deadline of the task is in the past, CBS | ||
| 701 | * cannot use the runtime, and so it replenishes the task. This rule | ||
| 702 | * works fine for implicit deadline tasks (deadline == period), and the | ||
| 703 | * CBS was designed for implicit deadline tasks. However, a task with | ||
| 704 | * constrained deadline (deadine < period) might be awakened after the | ||
| 705 | * deadline, but before the next period. In this case, replenishing the | ||
| 706 | * task would allow it to run for runtime / deadline. As in this case | ||
| 707 | * deadline < period, CBS enables a task to run for more than the | ||
| 708 | * runtime / period. In a very loaded system, this can cause a domino | ||
| 709 | * effect, making other tasks miss their deadlines. | ||
| 710 | * | ||
| 711 | * To avoid this problem, in the activation of a constrained deadline | ||
| 712 | * task after the deadline but before the next period, throttle the | ||
| 713 | * task and set the replenishing timer to the begin of the next period, | ||
| 714 | * unless it is boosted. | ||
| 715 | */ | ||
| 716 | static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) | ||
| 717 | { | ||
| 718 | struct task_struct *p = dl_task_of(dl_se); | ||
| 719 | struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se)); | ||
| 720 | |||
| 721 | if (dl_time_before(dl_se->deadline, rq_clock(rq)) && | ||
| 722 | dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { | ||
| 723 | if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) | ||
| 724 | return; | ||
| 725 | dl_se->dl_throttled = 1; | ||
| 726 | } | ||
| 727 | } | ||
| 728 | |||
| 692 | static | 729 | static |
| 693 | int dl_runtime_exceeded(struct sched_dl_entity *dl_se) | 730 | int dl_runtime_exceeded(struct sched_dl_entity *dl_se) |
| 694 | { | 731 | { |
| @@ -922,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se) | |||
| 922 | __dequeue_dl_entity(dl_se); | 959 | __dequeue_dl_entity(dl_se); |
| 923 | } | 960 | } |
| 924 | 961 | ||
| 962 | static inline bool dl_is_constrained(struct sched_dl_entity *dl_se) | ||
| 963 | { | ||
| 964 | return dl_se->dl_deadline < dl_se->dl_period; | ||
| 965 | } | ||
| 966 | |||
| 925 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | 967 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) |
| 926 | { | 968 | { |
| 927 | struct task_struct *pi_task = rt_mutex_get_top_task(p); | 969 | struct task_struct *pi_task = rt_mutex_get_top_task(p); |
| @@ -948,6 +990,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
| 948 | } | 990 | } |
| 949 | 991 | ||
| 950 | /* | 992 | /* |
| 993 | * Check if a constrained deadline task was activated | ||
| 994 | * after the deadline but before the next period. | ||
| 995 | * If that is the case, the task will be throttled and | ||
| 996 | * the replenishment timer will be set to the next period. | ||
| 997 | */ | ||
| 998 | if (!p->dl.dl_throttled && dl_is_constrained(&p->dl)) | ||
| 999 | dl_check_constrained_dl(&p->dl); | ||
| 1000 | |||
| 1001 | /* | ||
| 951 | * If p is throttled, we do nothing. In fact, if it exhausted | 1002 | * If p is throttled, we do nothing. In fact, if it exhausted |
| 952 | * its budget it needs a replenishment and, since it now is on | 1003 | * its budget it needs a replenishment and, since it now is on |
| 953 | * its rq, the bandwidth timer callback (which clearly has not | 1004 | * its rq, the bandwidth timer callback (which clearly has not |
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 7296b7308eca..f15fb2bdbc0d 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c | |||
| @@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void) | |||
| 169 | * If the folding window started, make sure we start writing in the | 169 | * If the folding window started, make sure we start writing in the |
| 170 | * next idle-delta. | 170 | * next idle-delta. |
| 171 | */ | 171 | */ |
| 172 | if (!time_before(jiffies, calc_load_update)) | 172 | if (!time_before(jiffies, READ_ONCE(calc_load_update))) |
| 173 | idx++; | 173 | idx++; |
| 174 | 174 | ||
| 175 | return idx & 1; | 175 | return idx & 1; |
| @@ -202,8 +202,9 @@ void calc_load_exit_idle(void) | |||
| 202 | struct rq *this_rq = this_rq(); | 202 | struct rq *this_rq = this_rq(); |
| 203 | 203 | ||
| 204 | /* | 204 | /* |
| 205 | * If we're still before the sample window, we're done. | 205 | * If we're still before the pending sample window, we're done. |
| 206 | */ | 206 | */ |
| 207 | this_rq->calc_load_update = READ_ONCE(calc_load_update); | ||
| 207 | if (time_before(jiffies, this_rq->calc_load_update)) | 208 | if (time_before(jiffies, this_rq->calc_load_update)) |
| 208 | return; | 209 | return; |
| 209 | 210 | ||
| @@ -212,7 +213,6 @@ void calc_load_exit_idle(void) | |||
| 212 | * accounted through the nohz accounting, so skip the entire deal and | 213 | * accounted through the nohz accounting, so skip the entire deal and |
| 213 | * sync up for the next window. | 214 | * sync up for the next window. |
| 214 | */ | 215 | */ |
| 215 | this_rq->calc_load_update = calc_load_update; | ||
| 216 | if (time_before(jiffies, this_rq->calc_load_update + 10)) | 216 | if (time_before(jiffies, this_rq->calc_load_update + 10)) |
| 217 | this_rq->calc_load_update += LOAD_FREQ; | 217 | this_rq->calc_load_update += LOAD_FREQ; |
| 218 | } | 218 | } |
| @@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp, | |||
| 308 | */ | 308 | */ |
| 309 | static void calc_global_nohz(void) | 309 | static void calc_global_nohz(void) |
| 310 | { | 310 | { |
| 311 | unsigned long sample_window; | ||
| 311 | long delta, active, n; | 312 | long delta, active, n; |
| 312 | 313 | ||
| 313 | if (!time_before(jiffies, calc_load_update + 10)) { | 314 | sample_window = READ_ONCE(calc_load_update); |
| 315 | if (!time_before(jiffies, sample_window + 10)) { | ||
| 314 | /* | 316 | /* |
| 315 | * Catch-up, fold however many we are behind still | 317 | * Catch-up, fold however many we are behind still |
| 316 | */ | 318 | */ |
| 317 | delta = jiffies - calc_load_update - 10; | 319 | delta = jiffies - sample_window - 10; |
| 318 | n = 1 + (delta / LOAD_FREQ); | 320 | n = 1 + (delta / LOAD_FREQ); |
| 319 | 321 | ||
| 320 | active = atomic_long_read(&calc_load_tasks); | 322 | active = atomic_long_read(&calc_load_tasks); |
| @@ -324,7 +326,7 @@ static void calc_global_nohz(void) | |||
| 324 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | 326 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); |
| 325 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | 327 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); |
| 326 | 328 | ||
| 327 | calc_load_update += n * LOAD_FREQ; | 329 | WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ); |
| 328 | } | 330 | } |
| 329 | 331 | ||
| 330 | /* | 332 | /* |
| @@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { } | |||
| 352 | */ | 354 | */ |
| 353 | void calc_global_load(unsigned long ticks) | 355 | void calc_global_load(unsigned long ticks) |
| 354 | { | 356 | { |
| 357 | unsigned long sample_window; | ||
| 355 | long active, delta; | 358 | long active, delta; |
| 356 | 359 | ||
| 357 | if (time_before(jiffies, calc_load_update + 10)) | 360 | sample_window = READ_ONCE(calc_load_update); |
| 361 | if (time_before(jiffies, sample_window + 10)) | ||
| 358 | return; | 362 | return; |
| 359 | 363 | ||
| 360 | /* | 364 | /* |
| @@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks) | |||
| 371 | avenrun[1] = calc_load(avenrun[1], EXP_5, active); | 375 | avenrun[1] = calc_load(avenrun[1], EXP_5, active); |
| 372 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); | 376 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); |
| 373 | 377 | ||
| 374 | calc_load_update += LOAD_FREQ; | 378 | WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ); |
| 375 | 379 | ||
| 376 | /* | 380 | /* |
| 377 | * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. | 381 | * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. |
