diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-09-09 13:42:13 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-09-09 13:42:13 -0400 |
commit | 193a19c94a32f2e2a0e973f0a98cf4a098cefa15 (patch) | |
tree | 19c501d521fd7a265a54e7006aa331612aa3246e | |
parent | 00c173dc87b14b8422cea2aa129a2fc99689a05d (diff) |
simple average tracking
-rw-r--r-- | include/litmus/gpu_affinity.h | 15 | ||||
-rw-r--r-- | include/litmus/rt_param.h | 15 | ||||
-rw-r--r-- | litmus/gpu_affinity.c | 66 | ||||
-rw-r--r-- | litmus/litmus.c | 3 |
4 files changed, 83 insertions, 16 deletions
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h index 6b3fb8b28745..d64a15cbf2a5 100644 --- a/include/litmus/gpu_affinity.h +++ b/include/litmus/gpu_affinity.h | |||
@@ -31,17 +31,18 @@ static inline lt_t get_gpu_time(struct task_struct* t) | |||
31 | 31 | ||
32 | static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) | 32 | static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) |
33 | { | 33 | { |
34 | int i; | 34 | // int i; |
35 | fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); | 35 | // fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); |
36 | lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... | 36 | // lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... |
37 | lt_t val = t->rt_param.gpu_migration_est[dist].avg; | ||
37 | 38 | ||
38 | WARN_ON(temp < 0); | 39 | // WARN_ON(temp < 0); |
39 | 40 | ||
40 | // lower-bound a distant migration to be at least equal to the level | 41 | // lower-bound a distant migration to be at least equal to the level |
41 | // below it. | 42 | // below it. |
42 | for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { | 43 | // for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { |
43 | val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); | 44 | // val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); |
44 | } | 45 | // } |
45 | 46 | ||
46 | return ((val > 0) ? val : dist+1); | 47 | return ((val > 0) ? val : dist+1); |
47 | } | 48 | } |
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 0198884eab86..a441badd30cc 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -144,6 +144,17 @@ typedef struct feedback_est{ | |||
144 | fp_t accum_err; | 144 | fp_t accum_err; |
145 | } feedback_est_t; | 145 | } feedback_est_t; |
146 | 146 | ||
147 | |||
148 | #define AVG_EST_WINDOW_SIZE 20 | ||
149 | |||
150 | typedef struct avg_est{ | ||
151 | lt_t history[AVG_EST_WINDOW_SIZE]; | ||
152 | uint16_t count; | ||
153 | uint16_t idx; | ||
154 | lt_t sum; | ||
155 | lt_t avg; | ||
156 | } avg_est_t; | ||
157 | |||
147 | /* RT task parameters for scheduling extensions | 158 | /* RT task parameters for scheduling extensions |
148 | * These parameters are inherited during clone and therefore must | 159 | * These parameters are inherited during clone and therefore must |
149 | * be explicitly set up before the task set is launched. | 160 | * be explicitly set up before the task set is launched. |
@@ -190,12 +201,10 @@ struct rt_param { | |||
190 | long unsigned int held_gpus; // bitmap of held GPUs. | 201 | long unsigned int held_gpus; // bitmap of held GPUs. |
191 | 202 | ||
192 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 203 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
193 | fp_t gpu_fb_param_a[MIG_LAST+1]; | 204 | avg_est_t gpu_migration_est[MIG_LAST+1]; |
194 | fp_t gpu_fb_param_b[MIG_LAST+1]; | ||
195 | 205 | ||
196 | gpu_migration_dist_t gpu_migration; | 206 | gpu_migration_dist_t gpu_migration; |
197 | int last_gpu; | 207 | int last_gpu; |
198 | feedback_est_t gpu_migration_est[MIG_LAST+1]; // local, near, med, far | ||
199 | 208 | ||
200 | lt_t accum_gpu_time; | 209 | lt_t accum_gpu_time; |
201 | lt_t gpu_time_stamp; | 210 | lt_t gpu_time_stamp; |
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 55bb5e1128ec..2cdf18bc7dd6 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c | |||
@@ -7,7 +7,14 @@ | |||
7 | 7 | ||
8 | #include <litmus/sched_trace.h> | 8 | #include <litmus/sched_trace.h> |
9 | 9 | ||
10 | #define OBSERVATION_CAP 2*1e9 | 10 | #define OBSERVATION_CAP ((lt_t)(2e9)) |
11 | |||
12 | // reason for skew: high outliers are less | ||
13 | // frequent and way out of bounds | ||
14 | #define HI_THRESHOLD 2 | ||
15 | #define LO_THRESHOLD 4 | ||
16 | |||
17 | #define MIN(a, b) ((a < b) ? a : b) | ||
11 | 18 | ||
12 | static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) | 19 | static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) |
13 | { | 20 | { |
@@ -28,10 +35,59 @@ static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) | |||
28 | 35 | ||
29 | void update_gpu_estimate(struct task_struct *t, lt_t observed) | 36 | void update_gpu_estimate(struct task_struct *t, lt_t observed) |
30 | { | 37 | { |
31 | feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | 38 | //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); |
39 | avg_est_t *est; | ||
40 | struct migration_info mig_info; | ||
32 | 41 | ||
33 | BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); | 42 | BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); |
34 | 43 | ||
44 | est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | ||
45 | |||
46 | if (unlikely(observed > OBSERVATION_CAP)) { | ||
47 | TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n", | ||
48 | observed, | ||
49 | OBSERVATION_CAP); | ||
50 | return; | ||
51 | } | ||
52 | |||
53 | #if 0 | ||
54 | // filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out | ||
55 | // of range of the average, but only filter if enough samples | ||
56 | // have been taken. | ||
57 | if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { | ||
58 | if (unlikely(observed < est->avg/LO_THRESHOLD)) { | ||
59 | TRACE_TASK(t, "Observation is too small: %llu\n", | ||
60 | observed); | ||
61 | return; | ||
62 | } | ||
63 | else if (unlikely(observed > est->avg*HI_THRESHOLD)) { | ||
64 | TRACE_TASK(t, "Observation is too large: %llu\n", | ||
65 | observed); | ||
66 | return; | ||
67 | } | ||
68 | } | ||
69 | #endif | ||
70 | |||
71 | if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) { | ||
72 | ++est->count; | ||
73 | } | ||
74 | else { | ||
75 | est->sum -= est->history[est->idx]; | ||
76 | } | ||
77 | |||
78 | mig_info.observed = observed; | ||
79 | mig_info.estimated = est->avg; | ||
80 | mig_info.distance = tsk_rt(t)->gpu_migration; | ||
81 | sched_trace_migration(t, &mig_info); | ||
82 | |||
83 | |||
84 | est->history[est->idx] = observed; | ||
85 | est->sum += observed; | ||
86 | est->avg = est->sum/est->count; | ||
87 | est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE; | ||
88 | |||
89 | |||
90 | #if 0 | ||
35 | if(unlikely(fb->est.val == 0)) { | 91 | if(unlikely(fb->est.val == 0)) { |
36 | // kludge-- cap observed values to prevent whacky estimations. | 92 | // kludge-- cap observed values to prevent whacky estimations. |
37 | // whacky stuff happens during the first few jobs. | 93 | // whacky stuff happens during the first few jobs. |
@@ -71,12 +127,12 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) | |||
71 | sched_trace_migration(t, &mig_info); | 127 | sched_trace_migration(t, &mig_info); |
72 | } | 128 | } |
73 | } | 129 | } |
130 | #endif | ||
74 | 131 | ||
75 | TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n", | 132 | TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n", |
76 | tsk_rt(t)->gpu_migration, | 133 | tsk_rt(t)->gpu_migration, |
77 | observed, | 134 | observed, |
78 | _fp_to_integer(fb->est), | 135 | est->avg); |
79 | _point(fb->est)); | ||
80 | } | 136 | } |
81 | 137 | ||
82 | gpu_migration_dist_t gpu_migration_distance(int a, int b) | 138 | gpu_migration_dist_t gpu_migration_distance(int a, int b) |
diff --git a/litmus/litmus.c b/litmus/litmus.c index d1f836c8af6e..91ec65894379 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c | |||
@@ -321,6 +321,7 @@ void init_gpu_affinity_state(struct task_struct* p) | |||
321 | //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); | 321 | //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); |
322 | //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); | 322 | //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); |
323 | 323 | ||
324 | #if 0 | ||
324 | // emperical; | 325 | // emperical; |
325 | p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); | 326 | p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); |
326 | p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); | 327 | p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); |
@@ -333,7 +334,7 @@ void init_gpu_affinity_state(struct task_struct* p) | |||
333 | 334 | ||
334 | p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000); | 335 | p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000); |
335 | p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000); | 336 | p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000); |
336 | 337 | #endif | |
337 | p->rt_param.gpu_migration = MIG_NONE; | 338 | p->rt_param.gpu_migration = MIG_NONE; |
338 | p->rt_param.last_gpu = -1; | 339 | p->rt_param.last_gpu = -1; |
339 | } | 340 | } |