From 150c1d5ed8541f3a2bfcde3d5f3174b9af4ab4fc Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Thu, 20 Sep 2012 18:57:20 -0400
Subject: Generalized GPU cost predictors + EWMA. (untested)

---
 litmus/gpu_affinity.c | 238 +++++++++++++++++++++++++-------------------------
 litmus/litmus.c       | 194 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 293 insertions(+), 139 deletions(-)

(limited to 'litmus')

diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 7d73105b4181..7a2b5005be45 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -9,33 +9,10 @@
 
 #define OBSERVATION_CAP ((lt_t)(2e9))
 
-// reason for skew: high outliers are less
-// frequent and way out of bounds
-//#define HI_THRESHOLD 2
-//#define LO_THRESHOLD 4
-
-#define NUM_STDEV_NUM	1
-#define NUM_STDEV_DENOM	2
+#define MIN_OBSERVATIONS 10
 
 #define MIN(a, b) ((a < b) ? a : b)
 
-static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
-{
-	fp_t relative_err;
-	fp_t err, new;
-	fp_t actual = _integer_to_fp(observed);
-
-	err = _sub(actual, fb->est);
-	new = _add(_mul(a, err), _mul(b, fb->accum_err));
-
-	relative_err = _div(err, actual);
-
-	fb->est = new;
-	fb->accum_err = _add(fb->accum_err, err);
-
-	return relative_err;
-}
-
 lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count)
 {
 	/* brute force: takes about as much time as incremental running methods when
@@ -74,127 +51,154 @@ lt_t isqrt(lt_t n)
 	return res;
 }
 
-void update_gpu_estimate(struct task_struct *t, lt_t observed)
+static lt_t avg_method(struct task_struct *t, lt_t observed)
 {
-	//feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
-	avg_est_t *est;
-	struct migration_info mig_info;
-
-	BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
+	avg_est_t *est = &(tsk_rt(t)->gpu_avg_est[tsk_rt(t)->gpu_migration]);
+
+	/* cut-off */
+	if (tsk_rt(t)->prediction_mode != SIMPLE_AVG &&
+					est->history.count >= MIN_OBSERVATIONS) {
+		lt_t upper = est->center_line + est->std * est->sigmas;
+		lt_t lower = est->center_line - est->std * est->sigmas;
+		if (unlikely(observed < lower || observed > upper)) {
+			TRACE_TASK(t, "%llu out of range [%llu, %llu]\n",
+				observed, lower, upper);
+			return 0;
+		}
+	}
 
-	est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
+	if (unlikely(est->history.count < est->history.window_size)) {
+		++est->history.count;
+	}
+	else {
+		est->sum -= est->history.observations[est->history.idx];
 
-	if (unlikely(observed > OBSERVATION_CAP)) {
-		TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n",
-			observed,
-			OBSERVATION_CAP);
-		return;
+		if (tsk_rt(t)->prediction_mode == CC_MR_AVG) {
+			est->rsum -= est->history.range[est->history.ridx];
+		}
 	}
 
-#if 0
-	// filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out
-	// of range of the average, but only filter if enough samples
-	// have been taken.
-	if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
-		if (unlikely(observed < est->avg/LO_THRESHOLD)) {
-			TRACE_TASK(t, "Observation is too small: %llu\n",
-							observed);
-			return;
+	est->history.observations[est->history.idx] = observed;
+	est->sum += observed;
+	est->center_line = est->sum/est->history.count;
+
+	if (tsk_rt(t)->prediction_mode == CC_BRUTE_AVG) {
+		if (likely(est->history.count > 1)) {
+			est->std = isqrt(varience(est->history.observations, est->center_line, est->history.count));
 		}
-		else if (unlikely(observed > est->avg*HI_THRESHOLD)) {
-			TRACE_TASK(t, "Observation is too large: %llu\n",
-							observed);
-			return;
+	}
+	else if (tsk_rt(t)->prediction_mode == CC_MR_AVG) {
+		if (likely(est->history.count > 1)) {
+			lt_t mr;
+			est->history.range[est->history.ridx] = (observed > est->history.last_observed) ?
+				observed - est->history.last_observed :
+				est->history.last_observed - observed;
+			est->rsum += est->history.range[est->history.ridx];
+			mr = est->rsum / (est->history.count - 1);
+			est->std = est->rsum + (128*est->rsum)/1000; // std = mr/d_2, w/ d_2 = 1.128
+			est->history.ridx = (est->history.ridx + 1) % (est->history.window_size - 1);
 		}
-#endif
-	// filter values outside NUM_STDEVx the standard deviation,
-	// but only filter if enough samples have been taken.
-	if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
-		lt_t lower, upper;
+		est->history.last_observed = observed;
+	}
 
-		lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM;
-		lower = est->avg - MIN(range, est->avg); // no underflow.
+	est->history.idx = (est->history.idx + 1) % est->history.window_size;
 
-		if (unlikely(observed < lower)) {
-			TRACE_TASK(t, "Observation is too small: %llu\n", observed);
-			return;
-		}
+	return est->center_line;
+}
 
-		upper = est->avg + range;
-		if (unlikely(observed > upper)) {
-			TRACE_TASK(t, "Observation is too large: %llu\n", observed);
-			return;
+static lt_t ewma_method(struct task_struct *t, lt_t observed)
+{
+	ewma_est_t *est = &(tsk_rt(t)->gpu_ewma_est[tsk_rt(t)->gpu_migration]);
+
+	/* cut-off */
+	if (tsk_rt(t)->prediction_mode != SIMPLE_EWMA &&
+					est->history.count >= MIN_OBSERVATIONS) {
+		lt_t upper = est->center_line + est->std * est->sigmas;
+		lt_t lower = est->center_line - est->std * est->sigmas;
+		if (unlikely(observed < lower || observed > upper)) {
+			TRACE_TASK(t, "%llu out of range [%llu, %llu]\n",
+				observed, lower, upper);
+			return 0;
 		}
 	}
 
+	if (unlikely(est->history.count < est->history.window_size)) {
+		++est->history.count;
+	}
+	else if (tsk_rt(t)->prediction_mode == CC_MR_EWMA) {
+		est->rsum -= est->history.range[est->history.ridx];
+	}
 
-
-	if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {
-		++est->count;
+	if (unlikely(est->history.count == 1)) {
+		est->center_line = observed;
 	}
 	else {
-		est->sum -= est->history[est->idx];
+		est->center_line =
+				((est->alpha_num * est->center_line)/est->alpha_denom) +
+				(((est->alpha_denom - est->alpha_num) * observed)/est->alpha_denom);
 	}
 
-	mig_info.observed = observed;
-	mig_info.estimated = est->avg;
-	mig_info.distance = tsk_rt(t)->gpu_migration;
-	sched_trace_migration(t, &mig_info);
-
-
-	est->history[est->idx] = observed;
-	est->sum += observed;
-	est->avg = est->sum/est->count;
-	est->std = isqrt(varience(est->history, est->avg, est->count));
-	est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;
-
-
-#if 0
-	if(unlikely(fb->est.val == 0)) {
-		// kludge-- cap observed values to prevent whacky estimations.
-		// whacky stuff happens during the first few jobs.
-		if(unlikely(observed > OBSERVATION_CAP)) {
-			TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
-					   observed, OBSERVATION_CAP);
-			observed = OBSERVATION_CAP;
-		}
-
-		// take the first observation as our estimate
-		// (initial value of 0 was bogus anyhow)
-		fb->est = _integer_to_fp(observed);
-		fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
+	if (tsk_rt(t)->prediction_mode == CC_BRUTE_EWMA) {
+		est->history.observations[est->history.idx] = observed;
+		est->std = isqrt(varience(est->history.observations, est->center_line, est->history.count));
+		est->history.idx = (est->history.idx + 1) % est->history.window_size;
 	}
-	else {
-		fp_t rel_err = update_estimate(fb,
-									   tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
-									   tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
-									   observed);
-
-		if(unlikely(_fp_to_integer(fb->est) <= 0)) {
-			TRACE_TASK(t, "Invalid estimate. Patching.\n");
-			fb->est = _integer_to_fp(observed);
-			fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
+	else if (tsk_rt(t)->prediction_mode == CC_MR_EWMA) {
+		if (likely(est->history.count > 1)) {
+			lt_t mr;
+			est->history.range[est->history.ridx] = (observed > est->history.last_observed) ?
+				observed - est->history.last_observed :
+				est->history.last_observed - observed;
+			est->rsum += est->history.range[est->history.ridx];
+			mr = est->rsum / (est->history.count - 1);
+			est->std = est->rsum + (128*est->rsum)/1000; // std = mr/d_2, w/ d_2 = 1.128
+			est->history.ridx = (est->history.ridx + 1) % (est->history.window_size - 1);
 		}
-		else {
-			struct migration_info mig_info;
+		est->history.last_observed = observed;
+	}
 
-			sched_trace_prediction_err(t,
-									   &(tsk_rt(t)->gpu_migration),
-									   &rel_err);
+	return est->center_line;
+}
 
-			mig_info.observed = observed;
-			mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
-			mig_info.distance = tsk_rt(t)->gpu_migration;
+void update_gpu_estimate(struct task_struct *t, lt_t observed)
+{
+	lt_t new_est;
+	struct migration_info mig_info;
 
-			sched_trace_migration(t, &mig_info);
-		}
+	BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
+
+	if (unlikely(observed > OBSERVATION_CAP)) {
+		TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n",
+			observed,
+			OBSERVATION_CAP);
+		return;
 	}
-#endif
+
+	switch (tsk_rt(t)->prediction_mode) {
+		case SIMPLE_AVG:
+		case CC_BRUTE_AVG:
+		case CC_MR_AVG:
+			new_est = avg_method(t, observed);
+			break;
+		case SIMPLE_EWMA:
+		case CC_BRUTE_EWMA:
+		case CC_MR_EWMA:
+			new_est = ewma_method(t, observed);
+			break;
+		default:
+			BUG();
+			break;
+	}
+
+	mig_info.observed = observed;
+	mig_info.estimated = new_est;
+	mig_info.distance = tsk_rt(t)->gpu_migration;
+	sched_trace_migration(t, &mig_info);
 
 	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n",
 			   tsk_rt(t)->gpu_migration,
 			   observed,
-			   est->avg);
+			   new_est);
 }
 
 gpu_migration_dist_t gpu_migration_distance(int a, int b)
diff --git a/litmus/litmus.c b/litmus/litmus.c
index d368202ab8c3..8380c0f6a393 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -332,28 +332,183 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
 }
 
 #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
-void init_gpu_affinity_state(struct task_struct* p)
+static void create_gpu_cc_mr_avg(obs_history_t* obs, uint16_t window_size)
 {
-	// under-damped
-	//p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
-	//p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
+	obs->window_size = window_size;
+	obs->observations = kmalloc(sizeof(lt_t)*window_size, GFP_KERNEL);
+	obs->range = kmalloc(sizeof(lt_t)*(window_size-1), GFP_KERNEL);
+}
 
-#if 0
-	// emperical;
-	p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
-	p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
+static void create_gpu_cc_brute_avg(obs_history_t* obs, uint16_t window_size)
+{
+	obs->window_size = window_size;
+	obs->observations = kmalloc(sizeof(lt_t)*window_size, GFP_KERNEL);
+	obs->range = NULL;
+}
 
-	p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
-	p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
+static void create_gpu_simple_avg(obs_history_t* obs, uint16_t window_size)
+{
+	obs->window_size = 0;
+	obs->observations = NULL;
+	obs->range = NULL;
+}
 
-	p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
-	p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
+static void destroy_obs_history(obs_history_t* obs)
+{
+	if (obs->observations)
+		kfree(obs->observations);
+	if (obs->range)
+		kfree(obs->range);
+}
+
+static void create_gpu_cc_mr_ewma(obs_history_t* obs, uint16_t window_size)
+{
+	obs->window_size = window_size;
+	obs->observations = NULL;
+	obs->range = kmalloc(sizeof(lt_t)*(window_size-1), GFP_KERNEL);
+}
+
+static void create_gpu_cc_brute_ewma(obs_history_t* obs, uint16_t window_size)
+{
+	obs->window_size = window_size;
+	obs->observations = kmalloc(sizeof(lt_t)*window_size, GFP_KERNEL);
+	obs->range = NULL;
+}
+
+static void create_gpu_simple_ewma(obs_history_t* obs, uint16_t window_size)
+{
+	obs->window_size = 0;
+	obs->observations = NULL;
+	obs->range = NULL;
+}
+
+
+static void create_gpu_affinity_state(struct task_struct* p, uint8_t sigmas, uint16_t window_size)
+{
+	int i;
 
-	p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
-	p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
-#endif
 	p->rt_param.gpu_migration = MIG_NONE;
 	p->rt_param.last_gpu = -1;
+
+	switch (p->rt_param.prediction_mode) {
+		case SIMPLE_AVG:
+		case CC_BRUTE_AVG:
+		case CC_MR_AVG:
+			memset(&p->rt_param.gpu_avg_est, 0, sizeof(p->rt_param.gpu_avg_est));
+			break;
+		case SIMPLE_EWMA:
+		case CC_BRUTE_EWMA:
+		case CC_MR_EWMA:
+			memset(&p->rt_param.gpu_ewma_est, 0, sizeof(p->rt_param.gpu_ewma_est));
+			break;
+		default:
+			BUG();
+			break;
+	}
+
+	for (i = 0; i <= MIG_LAST; ++i) {
+		switch (p->rt_param.prediction_mode) {
+			case SIMPLE_AVG:
+				create_gpu_simple_avg(&p->rt_param.gpu_avg_est[i].history, window_size);
+				break;
+			case CC_BRUTE_AVG:
+				p->rt_param.gpu_avg_est[i].sigmas = sigmas;
+				create_gpu_cc_brute_avg(&p->rt_param.gpu_avg_est[i].history, window_size);
+				break;
+			case CC_MR_AVG:
+				p->rt_param.gpu_avg_est[i].sigmas = sigmas;
+				create_gpu_cc_mr_avg(&p->rt_param.gpu_avg_est[i].history, window_size);
+				break;
+			case SIMPLE_EWMA:
+				create_gpu_simple_ewma(&p->rt_param.gpu_avg_est[i].history, window_size);
+				break;
+			case CC_BRUTE_EWMA:
+				p->rt_param.gpu_ewma_est[i].sigmas = sigmas;
+				create_gpu_cc_brute_ewma(&p->rt_param.gpu_ewma_est[i].history, window_size);
+				break;
+			case CC_MR_EWMA:
+				p->rt_param.gpu_ewma_est[i].sigmas = sigmas;
+				create_gpu_cc_mr_ewma(&p->rt_param.gpu_ewma_est[i].history, window_size);
+				break;
+			default:
+				BUG();
+				break;
+		}
+	}
+}
+
+static void destroy_gpu_affinity_state(struct task_struct* p)
+{
+	int i;
+
+	if (p->rt_param.prediction_mode == CC_BRUTE_AVG ||
+		p->rt_param.prediction_mode == CC_MR_AVG) {
+		for (i = 0; i <= MIG_LAST; ++i) {
+			destroy_obs_history(&p->rt_param.gpu_avg_est[i].history);
+		}
+	}
+	else if (p->rt_param.prediction_mode == CC_BRUTE_EWMA ||
+			 p->rt_param.prediction_mode == CC_MR_EWMA) {
+		for (i = 0; i <= MIG_LAST; ++i) {
+			destroy_obs_history(&p->rt_param.gpu_ewma_est[i].history);
+		}
+	}
+}
+
+asmlinkage long sys_config_gpu_affinity_predictor(void* __user arg)
+{
+	struct task_struct *t = current;
+	predictor_t params;
+
+	if (!access_ok(VERIFY_READ, arg, sizeof(params))) {
+		return -EINVAL;
+	}
+	if (__copy_from_user(&params, arg, sizeof(params))) {
+		return -EINVAL;
+	}
+
+	if (params.type != SIMPLE_AVG &&
+		params.type != SIMPLE_EWMA &&
+		params.type != CC_BRUTE_AVG &&
+		params.type != CC_BRUTE_EWMA &&
+		params.type != CC_MR_AVG &&
+		params.type != CC_MR_EWMA) {
+		return -EINVAL;
+	}
+
+	if (params.type != SIMPLE_AVG && params.type != SIMPLE_EWMA) {
+		if (params.window_size <= 10) {
+			return -EINVAL;
+		}
+	}
+
+	if (params.type == SIMPLE_EWMA ||
+		params.type == CC_BRUTE_EWMA ||
+		params.type == CC_MR_EWMA) {
+		if (params.alpha_num > params.alpha_denom) {
+			return -EINVAL;
+		}
+		if (params.alpha_num == 0) {
+			return -EINVAL;
+		}
+		if (params.alpha_denom == 0) {
+			params.alpha_denom = 1;
+		}
+	}
+
+	if (t->rt_param.prediction_mode != 0) {
+		destroy_gpu_affinity_state(t);
+	}
+
+	t->rt_param.prediction_mode = params.type;
+	create_gpu_affinity_state(t, params.sigmas, params.window_size);
+
+	return 0;
+}
+#else
+asmlinkage long sys_config_gpu_affinity_predictor(void* __user arg)
+{
+	return -EINVAL;
 }
 #endif
 
@@ -426,10 +581,6 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 		p->rt_param.ctrl_page   = ctrl_page;
 	}
 
-#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
-	init_gpu_affinity_state(p);
-#endif
-
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
 	raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
@@ -467,9 +618,6 @@ long __litmus_admit_task(struct task_struct* tsk)
 #ifdef CONFIG_LITMUS_NVIDIA
 	atomic_set(&tsk_rt(tsk)->nv_int_count, 0);
 #endif
-#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
-	init_gpu_affinity_state(tsk);
-#endif
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
 	tsk_rt(tsk)->blocked_lock = NULL;
 	raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock);
@@ -539,6 +687,8 @@ void litmus_exit_task(struct task_struct* tsk)
 		bheap_node_free(tsk_rt(tsk)->heap_node);
 		release_heap_free(tsk_rt(tsk)->rel_heap);
 
+		destroy_gpu_affinity_state(tsk);
+
 		atomic_dec(&rt_task_count);
 		reinit_litmus_state(tsk, 1);
 	}
-- 
cgit v1.2.2