aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorPaul Turner <pjt@google.com>2012-10-04 07:18:31 -0400
committerIngo Molnar <mingo@kernel.org>2012-10-24 04:27:27 -0400
commitf1b17280efbd21873d1db8631117bdbccbcb39a2 (patch)
tree6c052571b44109b94490eef89765751816ab9c88 /kernel/sched
parentbb17f65571e97a7ec0297571fb1154fbd107ad00 (diff)
sched: Maintain runnable averages across throttled periods
With bandwidth control tracked entities may cease execution according to user specified bandwidth limits. Charging this time as either throttled or blocked however, is incorrect and would falsely skew in either direction. What we actually want is for any throttled periods to be "invisible" to load-tracking as they are removed from the system for that interval and contribute normally otherwise. Do this by moderating the progression of time to omit any periods in which the entity belonged to a throttled hierarchy. Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Ben Segall <bsegall@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20120823141506.998912151@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c50
-rw-r--r--kernel/sched/sched.h3
2 files changed, 42 insertions, 11 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9e49722da032..873c9f5c5796 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1222,15 +1222,26 @@ static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
1222 cfs_rq->blocked_load_avg = 0; 1222 cfs_rq->blocked_load_avg = 0;
1223} 1223}
1224 1224
1225static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
1226
1225/* Update a sched_entity's runnable average */ 1227/* Update a sched_entity's runnable average */
1226static inline void update_entity_load_avg(struct sched_entity *se, 1228static inline void update_entity_load_avg(struct sched_entity *se,
1227 int update_cfs_rq) 1229 int update_cfs_rq)
1228{ 1230{
1229 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1231 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1230 long contrib_delta; 1232 long contrib_delta;
1233 u64 now;
1231 1234
1232 if (!__update_entity_runnable_avg(rq_of(cfs_rq)->clock_task, &se->avg, 1235 /*
1233 se->on_rq)) 1236 * For a group entity we need to use their owned cfs_rq_clock_task() in
1237 * case they are the parent of a throttled hierarchy.
1238 */
1239 if (entity_is_task(se))
1240 now = cfs_rq_clock_task(cfs_rq);
1241 else
1242 now = cfs_rq_clock_task(group_cfs_rq(se));
1243
1244 if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq))
1234 return; 1245 return;
1235 1246
1236 contrib_delta = __update_entity_load_avg_contrib(se); 1247 contrib_delta = __update_entity_load_avg_contrib(se);
@@ -1250,7 +1261,7 @@ static inline void update_entity_load_avg(struct sched_entity *se,
1250 */ 1261 */
1251static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) 1262static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
1252{ 1263{
1253 u64 now = rq_of(cfs_rq)->clock_task >> 20; 1264 u64 now = cfs_rq_clock_task(cfs_rq) >> 20;
1254 u64 decays; 1265 u64 decays;
1255 1266
1256 decays = now - cfs_rq->last_decay; 1267 decays = now - cfs_rq->last_decay;
@@ -1841,6 +1852,15 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
1841 return &tg->cfs_bandwidth; 1852 return &tg->cfs_bandwidth;
1842} 1853}
1843 1854
1855/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
1856static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
1857{
1858 if (unlikely(cfs_rq->throttle_count))
1859 return cfs_rq->throttled_clock_task;
1860
1861 return rq_of(cfs_rq)->clock_task - cfs_rq->throttled_clock_task_time;
1862}
1863
1844/* returns 0 on failure to allocate runtime */ 1864/* returns 0 on failure to allocate runtime */
1845static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) 1865static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
1846{ 1866{
@@ -1991,6 +2011,10 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
1991 cfs_rq->load_stamp += delta; 2011 cfs_rq->load_stamp += delta;
1992 cfs_rq->load_last += delta; 2012 cfs_rq->load_last += delta;
1993 2013
2014 /* adjust cfs_rq_clock_task() */
2015 cfs_rq->throttled_clock_task_time += rq->clock_task -
2016 cfs_rq->throttled_clock_task;
2017
1994 /* update entity weight now that we are on_rq again */ 2018 /* update entity weight now that we are on_rq again */
1995 update_cfs_shares(cfs_rq); 2019 update_cfs_shares(cfs_rq);
1996 } 2020 }
@@ -2005,8 +2029,10 @@ static int tg_throttle_down(struct task_group *tg, void *data)
2005 struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; 2029 struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
2006 2030
2007 /* group is entering throttled state, record last load */ 2031 /* group is entering throttled state, record last load */
2008 if (!cfs_rq->throttle_count) 2032 if (!cfs_rq->throttle_count) {
2009 update_cfs_load(cfs_rq, 0); 2033 update_cfs_load(cfs_rq, 0);
2034 cfs_rq->throttled_clock_task = rq->clock_task;
2035 }
2010 cfs_rq->throttle_count++; 2036 cfs_rq->throttle_count++;
2011 2037
2012 return 0; 2038 return 0;
@@ -2021,7 +2047,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
2021 2047
2022 se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; 2048 se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
2023 2049
2024 /* account load preceding throttle */ 2050 /* freeze hierarchy runnable averages while throttled */
2025 rcu_read_lock(); 2051 rcu_read_lock();
2026 walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq); 2052 walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
2027 rcu_read_unlock(); 2053 rcu_read_unlock();
@@ -2045,7 +2071,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
2045 rq->nr_running -= task_delta; 2071 rq->nr_running -= task_delta;
2046 2072
2047 cfs_rq->throttled = 1; 2073 cfs_rq->throttled = 1;
2048 cfs_rq->throttled_timestamp = rq->clock; 2074 cfs_rq->throttled_clock = rq->clock;
2049 raw_spin_lock(&cfs_b->lock); 2075 raw_spin_lock(&cfs_b->lock);
2050 list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); 2076 list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
2051 raw_spin_unlock(&cfs_b->lock); 2077 raw_spin_unlock(&cfs_b->lock);
@@ -2063,10 +2089,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
2063 2089
2064 cfs_rq->throttled = 0; 2090 cfs_rq->throttled = 0;
2065 raw_spin_lock(&cfs_b->lock); 2091 raw_spin_lock(&cfs_b->lock);
2066 cfs_b->throttled_time += rq->clock - cfs_rq->throttled_timestamp; 2092 cfs_b->throttled_time += rq->clock - cfs_rq->throttled_clock;
2067 list_del_rcu(&cfs_rq->throttled_list); 2093 list_del_rcu(&cfs_rq->throttled_list);
2068 raw_spin_unlock(&cfs_b->lock); 2094 raw_spin_unlock(&cfs_b->lock);
2069 cfs_rq->throttled_timestamp = 0;
2070 2095
2071 update_rq_clock(rq); 2096 update_rq_clock(rq);
2072 /* update hierarchical throttle state */ 2097 /* update hierarchical throttle state */
@@ -2466,8 +2491,13 @@ static void unthrottle_offline_cfs_rqs(struct rq *rq)
2466} 2491}
2467 2492
2468#else /* CONFIG_CFS_BANDWIDTH */ 2493#else /* CONFIG_CFS_BANDWIDTH */
2469static __always_inline 2494static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
2470void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {} 2495{
2496 return rq_of(cfs_rq)->clock_task;
2497}
2498
2499static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
2500 unsigned long delta_exec) {}
2471static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} 2501static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
2472static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} 2502static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
2473static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} 2503static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 134928dc6f05..d13bce7a44ef 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -281,7 +281,8 @@ struct cfs_rq {
281 u64 runtime_expires; 281 u64 runtime_expires;
282 s64 runtime_remaining; 282 s64 runtime_remaining;
283 283
284 u64 throttled_timestamp; 284 u64 throttled_clock, throttled_clock_task;
285 u64 throttled_clock_task_time;
285 int throttled, throttle_count; 286 int throttled, throttle_count;
286 struct list_head throttled_list; 287 struct list_head throttled_list;
287#endif /* CONFIG_CFS_BANDWIDTH */ 288#endif /* CONFIG_CFS_BANDWIDTH */