aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-09-09 13:42:13 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2012-09-09 13:42:13 -0400
commit193a19c94a32f2e2a0e973f0a98cf4a098cefa15 (patch)
tree19c501d521fd7a265a54e7006aa331612aa3246e
parent00c173dc87b14b8422cea2aa129a2fc99689a05d (diff)
simple average tracking
-rw-r--r--include/litmus/gpu_affinity.h15
-rw-r--r--include/litmus/rt_param.h15
-rw-r--r--litmus/gpu_affinity.c66
-rw-r--r--litmus/litmus.c3
4 files changed, 83 insertions, 16 deletions
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
index 6b3fb8b28745..d64a15cbf2a5 100644
--- a/include/litmus/gpu_affinity.h
+++ b/include/litmus/gpu_affinity.h
@@ -31,17 +31,18 @@ static inline lt_t get_gpu_time(struct task_struct* t)
31 31
32static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) 32static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
33{ 33{
34 int i; 34// int i;
35 fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); 35// fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
36 lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... 36// lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates...
37 lt_t val = t->rt_param.gpu_migration_est[dist].avg;
37 38
38 WARN_ON(temp < 0); 39// WARN_ON(temp < 0);
39 40
40 // lower-bound a distant migration to be at least equal to the level 41 // lower-bound a distant migration to be at least equal to the level
41 // below it. 42 // below it.
42 for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { 43// for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
43 val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); 44// val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
44 } 45// }
45 46
46 return ((val > 0) ? val : dist+1); 47 return ((val > 0) ? val : dist+1);
47} 48}
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 0198884eab86..a441badd30cc 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -144,6 +144,17 @@ typedef struct feedback_est{
144 fp_t accum_err; 144 fp_t accum_err;
145} feedback_est_t; 145} feedback_est_t;
146 146
147
148#define AVG_EST_WINDOW_SIZE 20
149
150typedef struct avg_est{
151 lt_t history[AVG_EST_WINDOW_SIZE];
152 uint16_t count;
153 uint16_t idx;
154 lt_t sum;
155 lt_t avg;
156} avg_est_t;
157
147/* RT task parameters for scheduling extensions 158/* RT task parameters for scheduling extensions
148 * These parameters are inherited during clone and therefore must 159 * These parameters are inherited during clone and therefore must
149 * be explicitly set up before the task set is launched. 160 * be explicitly set up before the task set is launched.
@@ -190,12 +201,10 @@ struct rt_param {
190 long unsigned int held_gpus; // bitmap of held GPUs. 201 long unsigned int held_gpus; // bitmap of held GPUs.
191 202
192#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 203#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
193 fp_t gpu_fb_param_a[MIG_LAST+1]; 204 avg_est_t gpu_migration_est[MIG_LAST+1];
194 fp_t gpu_fb_param_b[MIG_LAST+1];
195 205
196 gpu_migration_dist_t gpu_migration; 206 gpu_migration_dist_t gpu_migration;
197 int last_gpu; 207 int last_gpu;
198 feedback_est_t gpu_migration_est[MIG_LAST+1]; // local, near, med, far
199 208
200 lt_t accum_gpu_time; 209 lt_t accum_gpu_time;
201 lt_t gpu_time_stamp; 210 lt_t gpu_time_stamp;
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 55bb5e1128ec..2cdf18bc7dd6 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -7,7 +7,14 @@
7 7
8#include <litmus/sched_trace.h> 8#include <litmus/sched_trace.h>
9 9
10#define OBSERVATION_CAP 2*1e9 10#define OBSERVATION_CAP ((lt_t)(2e9))
11
12// reason for skew: high outliers are less
13// frequent and way out of bounds
14#define HI_THRESHOLD 2
15#define LO_THRESHOLD 4
16
17#define MIN(a, b) ((a < b) ? a : b)
11 18
12static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) 19static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
13{ 20{
@@ -28,10 +35,59 @@ static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
28 35
29void update_gpu_estimate(struct task_struct *t, lt_t observed) 36void update_gpu_estimate(struct task_struct *t, lt_t observed)
30{ 37{
31 feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); 38 //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
39 avg_est_t *est;
40 struct migration_info mig_info;
32 41
33 BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); 42 BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
34 43
44 est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
45
46 if (unlikely(observed > OBSERVATION_CAP)) {
47 TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n",
48 observed,
49 OBSERVATION_CAP);
50 return;
51 }
52
53#if 0
54 // filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out
55 // of range of the average, but only filter if enough samples
56 // have been taken.
57 if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
58 if (unlikely(observed < est->avg/LO_THRESHOLD)) {
59 TRACE_TASK(t, "Observation is too small: %llu\n",
60 observed);
61 return;
62 }
63 else if (unlikely(observed > est->avg*HI_THRESHOLD)) {
64 TRACE_TASK(t, "Observation is too large: %llu\n",
65 observed);
66 return;
67 }
68 }
69#endif
70
71 if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {
72 ++est->count;
73 }
74 else {
75 est->sum -= est->history[est->idx];
76 }
77
78 mig_info.observed = observed;
79 mig_info.estimated = est->avg;
80 mig_info.distance = tsk_rt(t)->gpu_migration;
81 sched_trace_migration(t, &mig_info);
82
83
84 est->history[est->idx] = observed;
85 est->sum += observed;
86 est->avg = est->sum/est->count;
87 est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;
88
89
90#if 0
35 if(unlikely(fb->est.val == 0)) { 91 if(unlikely(fb->est.val == 0)) {
36 // kludge-- cap observed values to prevent whacky estimations. 92 // kludge-- cap observed values to prevent whacky estimations.
37 // whacky stuff happens during the first few jobs. 93 // whacky stuff happens during the first few jobs.
@@ -71,12 +127,12 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
71 sched_trace_migration(t, &mig_info); 127 sched_trace_migration(t, &mig_info);
72 } 128 }
73 } 129 }
130#endif
74 131
75 TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n", 132 TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n",
76 tsk_rt(t)->gpu_migration, 133 tsk_rt(t)->gpu_migration,
77 observed, 134 observed,
78 _fp_to_integer(fb->est), 135 est->avg);
79 _point(fb->est));
80} 136}
81 137
82gpu_migration_dist_t gpu_migration_distance(int a, int b) 138gpu_migration_dist_t gpu_migration_distance(int a, int b)
diff --git a/litmus/litmus.c b/litmus/litmus.c
index d1f836c8af6e..91ec65894379 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -321,6 +321,7 @@ void init_gpu_affinity_state(struct task_struct* p)
321 //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); 321 //p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
322 //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); 322 //p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
323 323
324#if 0
324 // emperical; 325 // emperical;
325 p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); 326 p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
326 p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); 327 p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
@@ -333,7 +334,7 @@ void init_gpu_affinity_state(struct task_struct* p)
333 334
334 p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000); 335 p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
335 p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000); 336 p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
336 337#endif
337 p->rt_param.gpu_migration = MIG_NONE; 338 p->rt_param.gpu_migration = MIG_NONE;
338 p->rt_param.last_gpu = -1; 339 p->rt_param.last_gpu = -1;
339} 340}