diff options
-rw-r--r-- | kernel/sched.c | 15 | ||||
-rw-r--r-- | kernel/sched_fair.c | 108 |
2 files changed, 122 insertions, 1 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 35c91859f8a6..6baade0d7649 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -259,7 +259,7 @@ struct cfs_bandwidth { | |||
259 | u64 runtime_expires; | 259 | u64 runtime_expires; |
260 | 260 | ||
261 | int idle, timer_active; | 261 | int idle, timer_active; |
262 | struct hrtimer period_timer; | 262 | struct hrtimer period_timer, slack_timer; |
263 | struct list_head throttled_cfs_rq; | 263 | struct list_head throttled_cfs_rq; |
264 | 264 | ||
265 | /* statistics */ | 265 | /* statistics */ |
@@ -421,6 +421,16 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | |||
421 | 421 | ||
422 | static inline u64 default_cfs_period(void); | 422 | static inline u64 default_cfs_period(void); |
423 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); | 423 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); |
424 | static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b); | ||
425 | |||
426 | static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) | ||
427 | { | ||
428 | struct cfs_bandwidth *cfs_b = | ||
429 | container_of(timer, struct cfs_bandwidth, slack_timer); | ||
430 | do_sched_cfs_slack_timer(cfs_b); | ||
431 | |||
432 | return HRTIMER_NORESTART; | ||
433 | } | ||
424 | 434 | ||
425 | static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) | 435 | static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) |
426 | { | 436 | { |
@@ -453,6 +463,8 @@ static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | |||
453 | INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); | 463 | INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); |
454 | hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 464 | hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
455 | cfs_b->period_timer.function = sched_cfs_period_timer; | 465 | cfs_b->period_timer.function = sched_cfs_period_timer; |
466 | hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
467 | cfs_b->slack_timer.function = sched_cfs_slack_timer; | ||
456 | } | 468 | } |
457 | 469 | ||
458 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) | 470 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) |
@@ -488,6 +500,7 @@ static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | |||
488 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | 500 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) |
489 | { | 501 | { |
490 | hrtimer_cancel(&cfs_b->period_timer); | 502 | hrtimer_cancel(&cfs_b->period_timer); |
503 | hrtimer_cancel(&cfs_b->slack_timer); | ||
491 | } | 504 | } |
492 | #else | 505 | #else |
493 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 506 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index d201f28c1de7..1ca2cd44d64a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1052,6 +1052,8 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
1052 | __clear_buddies_skip(se); | 1052 | __clear_buddies_skip(se); |
1053 | } | 1053 | } |
1054 | 1054 | ||
1055 | static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); | ||
1056 | |||
1055 | static void | 1057 | static void |
1056 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | 1058 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
1057 | { | 1059 | { |
@@ -1090,6 +1092,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
1090 | if (!(flags & DEQUEUE_SLEEP)) | 1092 | if (!(flags & DEQUEUE_SLEEP)) |
1091 | se->vruntime -= cfs_rq->min_vruntime; | 1093 | se->vruntime -= cfs_rq->min_vruntime; |
1092 | 1094 | ||
1095 | /* return excess runtime on last dequeue */ | ||
1096 | return_cfs_rq_runtime(cfs_rq); | ||
1097 | |||
1093 | update_min_vruntime(cfs_rq); | 1098 | update_min_vruntime(cfs_rq); |
1094 | update_cfs_shares(cfs_rq); | 1099 | update_cfs_shares(cfs_rq); |
1095 | } | 1100 | } |
@@ -1674,6 +1679,108 @@ out_unlock: | |||
1674 | return idle; | 1679 | return idle; |
1675 | } | 1680 | } |
1676 | 1681 | ||
1682 | /* a cfs_rq won't donate quota below this amount */ | ||
1683 | static const u64 min_cfs_rq_runtime = 1 * NSEC_PER_MSEC; | ||
1684 | /* minimum remaining period time to redistribute slack quota */ | ||
1685 | static const u64 min_bandwidth_expiration = 2 * NSEC_PER_MSEC; | ||
1686 | /* how long we wait to gather additional slack before distributing */ | ||
1687 | static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC; | ||
1688 | |||
1689 | /* are we near the end of the current quota period? */ | ||
1690 | static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire) | ||
1691 | { | ||
1692 | struct hrtimer *refresh_timer = &cfs_b->period_timer; | ||
1693 | u64 remaining; | ||
1694 | |||
1695 | /* if the call-back is running a quota refresh is already occurring */ | ||
1696 | if (hrtimer_callback_running(refresh_timer)) | ||
1697 | return 1; | ||
1698 | |||
1699 | /* is a quota refresh about to occur? */ | ||
1700 | remaining = ktime_to_ns(hrtimer_expires_remaining(refresh_timer)); | ||
1701 | if (remaining < min_expire) | ||
1702 | return 1; | ||
1703 | |||
1704 | return 0; | ||
1705 | } | ||
1706 | |||
1707 | static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b) | ||
1708 | { | ||
1709 | u64 min_left = cfs_bandwidth_slack_period + min_bandwidth_expiration; | ||
1710 | |||
1711 | /* if there's a quota refresh soon don't bother with slack */ | ||
1712 | if (runtime_refresh_within(cfs_b, min_left)) | ||
1713 | return; | ||
1714 | |||
1715 | start_bandwidth_timer(&cfs_b->slack_timer, | ||
1716 | ns_to_ktime(cfs_bandwidth_slack_period)); | ||
1717 | } | ||
1718 | |||
1719 | /* we know any runtime found here is valid as update_curr() precedes return */ | ||
1720 | static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
1721 | { | ||
1722 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
1723 | s64 slack_runtime = cfs_rq->runtime_remaining - min_cfs_rq_runtime; | ||
1724 | |||
1725 | if (slack_runtime <= 0) | ||
1726 | return; | ||
1727 | |||
1728 | raw_spin_lock(&cfs_b->lock); | ||
1729 | if (cfs_b->quota != RUNTIME_INF && | ||
1730 | cfs_rq->runtime_expires == cfs_b->runtime_expires) { | ||
1731 | cfs_b->runtime += slack_runtime; | ||
1732 | |||
1733 | /* we are under rq->lock, defer unthrottling using a timer */ | ||
1734 | if (cfs_b->runtime > sched_cfs_bandwidth_slice() && | ||
1735 | !list_empty(&cfs_b->throttled_cfs_rq)) | ||
1736 | start_cfs_slack_bandwidth(cfs_b); | ||
1737 | } | ||
1738 | raw_spin_unlock(&cfs_b->lock); | ||
1739 | |||
1740 | /* even if it's not valid for return we don't want to try again */ | ||
1741 | cfs_rq->runtime_remaining -= slack_runtime; | ||
1742 | } | ||
1743 | |||
1744 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
1745 | { | ||
1746 | if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) | ||
1747 | return; | ||
1748 | |||
1749 | __return_cfs_rq_runtime(cfs_rq); | ||
1750 | } | ||
1751 | |||
1752 | /* | ||
1753 | * This is done with a timer (instead of inline with bandwidth return) since | ||
1754 | * it's necessary to juggle rq->locks to unthrottle their respective cfs_rqs. | ||
1755 | */ | ||
1756 | static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) | ||
1757 | { | ||
1758 | u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); | ||
1759 | u64 expires; | ||
1760 | |||
1761 | /* confirm we're still not at a refresh boundary */ | ||
1762 | if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) | ||
1763 | return; | ||
1764 | |||
1765 | raw_spin_lock(&cfs_b->lock); | ||
1766 | if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) { | ||
1767 | runtime = cfs_b->runtime; | ||
1768 | cfs_b->runtime = 0; | ||
1769 | } | ||
1770 | expires = cfs_b->runtime_expires; | ||
1771 | raw_spin_unlock(&cfs_b->lock); | ||
1772 | |||
1773 | if (!runtime) | ||
1774 | return; | ||
1775 | |||
1776 | runtime = distribute_cfs_runtime(cfs_b, runtime, expires); | ||
1777 | |||
1778 | raw_spin_lock(&cfs_b->lock); | ||
1779 | if (expires == cfs_b->runtime_expires) | ||
1780 | cfs_b->runtime = runtime; | ||
1781 | raw_spin_unlock(&cfs_b->lock); | ||
1782 | } | ||
1783 | |||
1677 | /* | 1784 | /* |
1678 | * When a group wakes up we want to make sure that its quota is not already | 1785 | * When a group wakes up we want to make sure that its quota is not already |
1679 | * expired/exceeded, otherwise it may be allowed to steal additional ticks of | 1786 | * expired/exceeded, otherwise it may be allowed to steal additional ticks of |
@@ -1715,6 +1822,7 @@ static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | |||
1715 | unsigned long delta_exec) {} | 1822 | unsigned long delta_exec) {} |
1716 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 1823 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
1717 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} | 1824 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} |
1825 | static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | ||
1718 | 1826 | ||
1719 | static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) | 1827 | static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) |
1720 | { | 1828 | { |