diff options
author | Paul Turner <pjt@google.com> | 2012-10-04 07:18:32 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-10-24 04:27:30 -0400 |
commit | 5b51f2f80b3b906ce59bd4dce6eca3c7f34cb1b9 (patch) | |
tree | 72e7c6003b377646d4ba4defa2ddf43756e81474 /kernel/sched | |
parent | f269ae0469fc882332bdfb5db15d3c1315fe2a10 (diff) |
sched: Make __update_entity_runnable_avg() fast
__update_entity_runnable_avg forms the core of maintaining an entity's runnable
load average. In this function we charge the accumulated run-time since last
update and handle appropriate decay. In some cases, e.g. a waking task, this
time interval may be much larger than our period unit.
Fortunately we can exploit some properties of our series to perform decay for a
blocked update in constant time and account the contribution for a running
update in essentially-constant* time.
[*]: For any running entity they should be performing updates at the tick which
gives us a soft limit of 1 jiffy between updates, and we can compute up to a
32 jiffy update in a single pass.
C program to generate the magic constants in the arrays:
#include <math.h>
#include <stdio.h>
#define N 32
#define WMULT_SHIFT 32
const long WMULT_CONST = ((1UL << N) - 1);
double y;
long runnable_avg_yN_inv[N];
void calc_mult_inv() {
int i;
double yn = 0;
printf("inverses\n");
for (i = 0; i < N; i++) {
yn = (double)WMULT_CONST * pow(y, i);
runnable_avg_yN_inv[i] = yn;
printf("%2d: 0x%8lx\n", i, runnable_avg_yN_inv[i]);
}
printf("\n");
}
long mult_inv(long c, int n) {
return (c * runnable_avg_yN_inv[n]) >> WMULT_SHIFT;
}
void calc_yn_sum(int n)
{
int i;
double sum = 0, sum_fl = 0, diff = 0;
/*
* We take the floored sum to ensure the sum of partial sums is never
* larger than the actual sum.
*/
printf("sum y^n\n");
printf(" %8s %8s %8s\n", "exact", "floor", "error");
for (i = 1; i <= n; i++) {
sum = (y * sum + y * 1024);
sum_fl = floor(y * sum_fl+ y * 1024);
printf("%2d: %8.0f %8.0f %8.0f\n", i, sum, sum_fl,
sum_fl - sum);
}
printf("\n");
}
void calc_conv(long n) {
long old_n;
int i = -1;
printf("convergence (LOAD_AVG_MAX, LOAD_AVG_MAX_N)\n");
do {
old_n = n;
n = mult_inv(n, 1) + 1024;
i++;
} while (n != old_n);
printf("%d> %ld\n", i - 1, n);
printf("\n");
}
void main() {
y = pow(0.5, 1/(double)N);
calc_mult_inv();
calc_conv(1024);
calc_yn_sum(N);
}
[ Compile with -lm ]
Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120823141507.277808946@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/fair.c | 125 |
1 files changed, 101 insertions, 24 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 002a7697f437..6ecf455fd95b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -884,17 +884,92 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq) | |||
884 | 884 | ||
885 | #ifdef CONFIG_SMP | 885 | #ifdef CONFIG_SMP |
886 | /* | 886 | /* |
887 | * We choose a half-life close to 1 scheduling period. | ||
888 | * Note: The tables below are dependent on this value. | ||
889 | */ | ||
890 | #define LOAD_AVG_PERIOD 32 | ||
891 | #define LOAD_AVG_MAX 47742 /* maximum possible load avg */ | ||
892 | #define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_MAX_AVG */ | ||
893 | |||
894 | /* Precomputed fixed inverse multiplies for multiplication by y^n */ | ||
895 | static const u32 runnable_avg_yN_inv[] = { | ||
896 | 0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6, | ||
897 | 0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85, | ||
898 | 0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581, | ||
899 | 0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9, | ||
900 | 0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80, | ||
901 | 0x85aac367, 0x82cd8698, | ||
902 | }; | ||
903 | |||
904 | /* | ||
905 | * Precomputed \Sum y^k { 1<=k<=n }. These are floor(true_value) to prevent | ||
906 | * over-estimates when re-combining. | ||
907 | */ | ||
908 | static const u32 runnable_avg_yN_sum[] = { | ||
909 | 0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103, | ||
910 | 9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082, | ||
911 | 17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371, | ||
912 | }; | ||
913 | |||
914 | /* | ||
887 | * Approximate: | 915 | * Approximate: |
888 | * val * y^n, where y^32 ~= 0.5 (~1 scheduling period) | 916 | * val * y^n, where y^32 ~= 0.5 (~1 scheduling period) |
889 | */ | 917 | */ |
890 | static __always_inline u64 decay_load(u64 val, u64 n) | 918 | static __always_inline u64 decay_load(u64 val, u64 n) |
891 | { | 919 | { |
892 | for (; n && val; n--) { | 920 | unsigned int local_n; |
893 | val *= 4008; | 921 | |
894 | val >>= 12; | 922 | if (!n) |
923 | return val; | ||
924 | else if (unlikely(n > LOAD_AVG_PERIOD * 63)) | ||
925 | return 0; | ||
926 | |||
927 | /* after bounds checking we can collapse to 32-bit */ | ||
928 | local_n = n; | ||
929 | |||
930 | /* | ||
931 | * As y^PERIOD = 1/2, we can combine | ||
932 | * y^n = 1/2^(n/PERIOD) * k^(n%PERIOD) | ||
933 | * With a look-up table which covers k^n (n<PERIOD) | ||
934 | * | ||
935 | * To achieve constant time decay_load. | ||
936 | */ | ||
937 | if (unlikely(local_n >= LOAD_AVG_PERIOD)) { | ||
938 | val >>= local_n / LOAD_AVG_PERIOD; | ||
939 | local_n %= LOAD_AVG_PERIOD; | ||
895 | } | 940 | } |
896 | 941 | ||
897 | return val; | 942 | val *= runnable_avg_yN_inv[local_n]; |
943 | /* We don't use SRR here since we always want to round down. */ | ||
944 | return val >> 32; | ||
945 | } | ||
946 | |||
947 | /* | ||
948 | * For updates fully spanning n periods, the contribution to runnable | ||
949 | * average will be: \Sum 1024*y^n | ||
950 | * | ||
951 | * We can compute this reasonably efficiently by combining: | ||
952 | * y^PERIOD = 1/2 with precomputed \Sum 1024*y^n {for n <PERIOD} | ||
953 | */ | ||
954 | static u32 __compute_runnable_contrib(u64 n) | ||
955 | { | ||
956 | u32 contrib = 0; | ||
957 | |||
958 | if (likely(n <= LOAD_AVG_PERIOD)) | ||
959 | return runnable_avg_yN_sum[n]; | ||
960 | else if (unlikely(n >= LOAD_AVG_MAX_N)) | ||
961 | return LOAD_AVG_MAX; | ||
962 | |||
963 | /* Compute \Sum k^n combining precomputed values for k^i, \Sum k^j */ | ||
964 | do { | ||
965 | contrib /= 2; /* y^LOAD_AVG_PERIOD = 1/2 */ | ||
966 | contrib += runnable_avg_yN_sum[LOAD_AVG_PERIOD]; | ||
967 | |||
968 | n -= LOAD_AVG_PERIOD; | ||
969 | } while (n > LOAD_AVG_PERIOD); | ||
970 | |||
971 | contrib = decay_load(contrib, n); | ||
972 | return contrib + runnable_avg_yN_sum[n]; | ||
898 | } | 973 | } |
899 | 974 | ||
900 | /* | 975 | /* |
@@ -929,7 +1004,8 @@ static __always_inline int __update_entity_runnable_avg(u64 now, | |||
929 | struct sched_avg *sa, | 1004 | struct sched_avg *sa, |
930 | int runnable) | 1005 | int runnable) |
931 | { | 1006 | { |
932 | u64 delta; | 1007 | u64 delta, periods; |
1008 | u32 runnable_contrib; | ||
933 | int delta_w, decayed = 0; | 1009 | int delta_w, decayed = 0; |
934 | 1010 | ||
935 | delta = now - sa->last_runnable_update; | 1011 | delta = now - sa->last_runnable_update; |
@@ -963,25 +1039,26 @@ static __always_inline int __update_entity_runnable_avg(u64 now, | |||
963 | * period and accrue it. | 1039 | * period and accrue it. |
964 | */ | 1040 | */ |
965 | delta_w = 1024 - delta_w; | 1041 | delta_w = 1024 - delta_w; |
966 | BUG_ON(delta_w > delta); | 1042 | if (runnable) |
967 | do { | 1043 | sa->runnable_avg_sum += delta_w; |
968 | if (runnable) | 1044 | sa->runnable_avg_period += delta_w; |
969 | sa->runnable_avg_sum += delta_w; | 1045 | |
970 | sa->runnable_avg_period += delta_w; | 1046 | delta -= delta_w; |
971 | 1047 | ||
972 | /* | 1048 | /* Figure out how many additional periods this update spans */ |
973 | * Remainder of delta initiates a new period, roll over | 1049 | periods = delta / 1024; |
974 | * the previous. | 1050 | delta %= 1024; |
975 | */ | 1051 | |
976 | sa->runnable_avg_sum = | 1052 | sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum, |
977 | decay_load(sa->runnable_avg_sum, 1); | 1053 | periods + 1); |
978 | sa->runnable_avg_period = | 1054 | sa->runnable_avg_period = decay_load(sa->runnable_avg_period, |
979 | decay_load(sa->runnable_avg_period, 1); | 1055 | periods + 1); |
980 | 1056 | ||
981 | delta -= delta_w; | 1057 | /* Efficiently calculate \sum (1..n_period) 1024*y^i */ |
982 | /* New period is empty */ | 1058 | runnable_contrib = __compute_runnable_contrib(periods); |
983 | delta_w = 1024; | 1059 | if (runnable) |
984 | } while (delta >= 1024); | 1060 | sa->runnable_avg_sum += runnable_contrib; |
1061 | sa->runnable_avg_period += runnable_contrib; | ||
985 | } | 1062 | } |
986 | 1063 | ||
987 | /* Remainder of delta accrued against u_0` */ | 1064 | /* Remainder of delta accrued against u_0` */ |