diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6b800a14b990..16d67f9b6955 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -971,6 +971,126 @@ static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq) | |||
971 | } | 971 | } |
972 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 972 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
973 | 973 | ||
974 | #ifdef CONFIG_SMP | ||
975 | /* | ||
976 | * Approximate: | ||
977 | * val * y^n, where y^32 ~= 0.5 (~1 scheduling period) | ||
978 | */ | ||
979 | static __always_inline u64 decay_load(u64 val, u64 n) | ||
980 | { | ||
981 | for (; n && val; n--) { | ||
982 | val *= 4008; | ||
983 | val >>= 12; | ||
984 | } | ||
985 | |||
986 | return val; | ||
987 | } | ||
988 | |||
989 | /* | ||
990 | * We can represent the historical contribution to runnable average as the | ||
991 | * coefficients of a geometric series. To do this we sub-divide our runnable | ||
992 | * history into segments of approximately 1ms (1024us); label the segment that | ||
993 | * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g. | ||
994 | * | ||
995 | * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ... | ||
996 | * p0 p1 p2 | ||
997 | * (now) (~1ms ago) (~2ms ago) | ||
998 | * | ||
999 | * Let u_i denote the fraction of p_i that the entity was runnable. | ||
1000 | * | ||
1001 | * We then designate the fractions u_i as our co-efficients, yielding the | ||
1002 | * following representation of historical load: | ||
1003 | * u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ... | ||
1004 | * | ||
1005 | * We choose y based on the with of a reasonably scheduling period, fixing: | ||
1006 | * y^32 = 0.5 | ||
1007 | * | ||
1008 | * This means that the contribution to load ~32ms ago (u_32) will be weighted | ||
1009 | * approximately half as much as the contribution to load within the last ms | ||
1010 | * (u_0). | ||
1011 | * | ||
1012 | * When a period "rolls over" and we have new u_0`, multiplying the previous | ||
1013 | * sum again by y is sufficient to update: | ||
1014 | * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... ) | ||
1015 | * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] | ||
1016 | */ | ||
1017 | static __always_inline int __update_entity_runnable_avg(u64 now, | ||
1018 | struct sched_avg *sa, | ||
1019 | int runnable) | ||
1020 | { | ||
1021 | u64 delta; | ||
1022 | int delta_w, decayed = 0; | ||
1023 | |||
1024 | delta = now - sa->last_runnable_update; | ||
1025 | /* | ||
1026 | * This should only happen when time goes backwards, which it | ||
1027 | * unfortunately does during sched clock init when we swap over to TSC. | ||
1028 | */ | ||
1029 | if ((s64)delta < 0) { | ||
1030 | sa->last_runnable_update = now; | ||
1031 | return 0; | ||
1032 | } | ||
1033 | |||
1034 | /* | ||
1035 | * Use 1024ns as the unit of measurement since it's a reasonable | ||
1036 | * approximation of 1us and fast to compute. | ||
1037 | */ | ||
1038 | delta >>= 10; | ||
1039 | if (!delta) | ||
1040 | return 0; | ||
1041 | sa->last_runnable_update = now; | ||
1042 | |||
1043 | /* delta_w is the amount already accumulated against our next period */ | ||
1044 | delta_w = sa->runnable_avg_period % 1024; | ||
1045 | if (delta + delta_w >= 1024) { | ||
1046 | /* period roll-over */ | ||
1047 | decayed = 1; | ||
1048 | |||
1049 | /* | ||
1050 | * Now that we know we're crossing a period boundary, figure | ||
1051 | * out how much from delta we need to complete the current | ||
1052 | * period and accrue it. | ||
1053 | */ | ||
1054 | delta_w = 1024 - delta_w; | ||
1055 | BUG_ON(delta_w > delta); | ||
1056 | do { | ||
1057 | if (runnable) | ||
1058 | sa->runnable_avg_sum += delta_w; | ||
1059 | sa->runnable_avg_period += delta_w; | ||
1060 | |||
1061 | /* | ||
1062 | * Remainder of delta initiates a new period, roll over | ||
1063 | * the previous. | ||
1064 | */ | ||
1065 | sa->runnable_avg_sum = | ||
1066 | decay_load(sa->runnable_avg_sum, 1); | ||
1067 | sa->runnable_avg_period = | ||
1068 | decay_load(sa->runnable_avg_period, 1); | ||
1069 | |||
1070 | delta -= delta_w; | ||
1071 | /* New period is empty */ | ||
1072 | delta_w = 1024; | ||
1073 | } while (delta >= 1024); | ||
1074 | } | ||
1075 | |||
1076 | /* Remainder of delta accrued against u_0` */ | ||
1077 | if (runnable) | ||
1078 | sa->runnable_avg_sum += delta; | ||
1079 | sa->runnable_avg_period += delta; | ||
1080 | |||
1081 | return decayed; | ||
1082 | } | ||
1083 | |||
1084 | /* Update a sched_entity's runnable average */ | ||
1085 | static inline void update_entity_load_avg(struct sched_entity *se) | ||
1086 | { | ||
1087 | __update_entity_runnable_avg(rq_of(cfs_rq_of(se))->clock_task, &se->avg, | ||
1088 | se->on_rq); | ||
1089 | } | ||
1090 | #else | ||
1091 | static inline void update_entity_load_avg(struct sched_entity *se) {} | ||
1092 | #endif | ||
1093 | |||
974 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | 1094 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) |
975 | { | 1095 | { |
976 | #ifdef CONFIG_SCHEDSTATS | 1096 | #ifdef CONFIG_SCHEDSTATS |
@@ -1097,6 +1217,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
1097 | */ | 1217 | */ |
1098 | update_curr(cfs_rq); | 1218 | update_curr(cfs_rq); |
1099 | update_cfs_load(cfs_rq, 0); | 1219 | update_cfs_load(cfs_rq, 0); |
1220 | update_entity_load_avg(se); | ||
1100 | account_entity_enqueue(cfs_rq, se); | 1221 | account_entity_enqueue(cfs_rq, se); |
1101 | update_cfs_shares(cfs_rq); | 1222 | update_cfs_shares(cfs_rq); |
1102 | 1223 | ||
@@ -1171,6 +1292,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
1171 | * Update run-time statistics of the 'current'. | 1292 | * Update run-time statistics of the 'current'. |
1172 | */ | 1293 | */ |
1173 | update_curr(cfs_rq); | 1294 | update_curr(cfs_rq); |
1295 | update_entity_load_avg(se); | ||
1174 | 1296 | ||
1175 | update_stats_dequeue(cfs_rq, se); | 1297 | update_stats_dequeue(cfs_rq, se); |
1176 | if (flags & DEQUEUE_SLEEP) { | 1298 | if (flags & DEQUEUE_SLEEP) { |
@@ -1340,6 +1462,8 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) | |||
1340 | update_stats_wait_start(cfs_rq, prev); | 1462 | update_stats_wait_start(cfs_rq, prev); |
1341 | /* Put 'current' back into the tree. */ | 1463 | /* Put 'current' back into the tree. */ |
1342 | __enqueue_entity(cfs_rq, prev); | 1464 | __enqueue_entity(cfs_rq, prev); |
1465 | /* in !on_rq case, update occurred at dequeue */ | ||
1466 | update_entity_load_avg(prev); | ||
1343 | } | 1467 | } |
1344 | cfs_rq->curr = NULL; | 1468 | cfs_rq->curr = NULL; |
1345 | } | 1469 | } |
@@ -1353,6 +1477,11 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |||
1353 | update_curr(cfs_rq); | 1477 | update_curr(cfs_rq); |
1354 | 1478 | ||
1355 | /* | 1479 | /* |
1480 | * Ensure that runnable average is periodically updated. | ||
1481 | */ | ||
1482 | update_entity_load_avg(curr); | ||
1483 | |||
1484 | /* | ||
1356 | * Update share accounting for long-running entities. | 1485 | * Update share accounting for long-running entities. |
1357 | */ | 1486 | */ |
1358 | update_entity_shares_tick(cfs_rq); | 1487 | update_entity_shares_tick(cfs_rq); |