From 6ab36ca992441f7353840c70fc91d99a500a940e Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Wed, 18 Apr 2012 16:24:56 -0400 Subject: Fixed and tested aff-aware KFMLP. (finally!) --- litmus/gpu_affinity.c | 87 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 32 deletions(-) (limited to 'litmus/gpu_affinity.c') diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 43171390bed7..87349fe10a9b 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c @@ -5,15 +5,16 @@ #include #include -static void update_estimate(feedback_est_t* fb, fp_t* a, fp_t* b, lt_t observed) +#define OBSERVATION_CAP 2*1e9 + +static void update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) { fp_t err, new; - fp_t actual = _frac(observed, 1); // observed is in ns, so beware of overflow! + fp_t actual = _integer_to_fp(observed); err = _sub(actual, fb->est); - new = _add(_mul(*a, err), - _mul(*b, fb->accum_err)); - + new = _add(_mul(a, err), _mul(b, fb->accum_err)); + fb->est = new; fb->accum_err = _add(fb->accum_err, err); } @@ -22,47 +23,69 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) { feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); - TRACE_TASK(t, "GPU est update before (dist = %d): %d.%d\n", - tsk_rt(t)->gpu_migration, - _fp_to_integer(fb->est), - _point(fb->est)); - - update_estimate(fb, - &tsk_rt(t)->gpu_fb_param_a, - &tsk_rt(t)->gpu_fb_param_b, - observed); + WARN_ON(tsk_rt(t)->gpu_migration > MIG_LAST); - TRACE_TASK(t, "GPU est update after (dist = %d): %d.%d\n", + if(unlikely(fb->est.val == 0)) { + // kludge-- cap observed values to prevent whacky estimations. + // whacky stuff happens during the first few jobs. + if(unlikely(observed > OBSERVATION_CAP)) { + TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n", + observed, OBSERVATION_CAP); + observed = OBSERVATION_CAP; + } + + // take the first observation as our estimate + // (initial value of 0 was bogus anyhow) + fb->est = _integer_to_fp(observed); + fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. + } + else { + update_estimate(fb, + tsk_rt(t)->gpu_fb_param_a, + tsk_rt(t)->gpu_fb_param_b, + observed); + + if(_fp_to_integer(fb->est) <= 0) { + // TODO: talk to Jonathan about how well this works. + // Maybe we should average the observed and est instead? + TRACE_TASK(t, "Invalid estimate. Patching.\n"); + fb->est = _integer_to_fp(observed); + fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. + } + } + + TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n", tsk_rt(t)->gpu_migration, + observed, _fp_to_integer(fb->est), _point(fb->est)); } gpu_migration_dist_t gpu_migration_distance(int a, int b) { - // GPUs organized in a binary hierarchy, no more than 2^MIG_LAST GPUs + // GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs int i; - int level; - int max_level; + int dist; - if(unlikely(a < 0 || b < 0)) { - return MIG_LAST; + if(likely(a >= 0 && b >= 0)) { + for(i = 0; i <= MIG_FAR; ++i) { + if(a>>i == b>>i) { + dist = i; + goto out; + } + } + dist = MIG_NONE; // hopefully never reached. + TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b); } - - if(a == b) { - return MIG_LOCAL; + else { + dist = MIG_NONE; } - for(i = 1, level = 2, max_level = 1< %d is %d\n", + a, b, dist); - WARN_ON(1); - return MIG_LAST; + return dist; } -- cgit v1.2.2