aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/gpu_affinity.c
blob: 70a86bdd9aecaa53fcaa479ea010dc1778de3a99 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

#ifdef CONFIG_LITMUS_NVIDIA

#include <linux/sched.h>
#include <litmus/litmus.h>
#include <litmus/gpu_affinity.h>

#define OBSERVATION_CAP 2*1e9

static void update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
{
	fp_t err, new;
	fp_t actual = _integer_to_fp(observed);

	err = _sub(actual, fb->est);
	new = _add(_mul(a, err), _mul(b, fb->accum_err));

	fb->est = new;
	fb->accum_err = _add(fb->accum_err, err);
}

void update_gpu_estimate(struct task_struct *t, lt_t observed)
{
	feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);

	WARN_ON(tsk_rt(t)->gpu_migration > MIG_LAST);

	if(unlikely(fb->est.val == 0)) {
		// kludge-- cap observed values to prevent whacky estimations.
		// whacky stuff happens during the first few jobs.
		if(unlikely(observed > OBSERVATION_CAP)) {
			TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
					   observed, OBSERVATION_CAP);
			observed = OBSERVATION_CAP;
		}

		// take the first observation as our estimate
		// (initial value of 0 was bogus anyhow)
		fb->est = _integer_to_fp(observed);
		fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
	}
	else {
		update_estimate(fb,
						tsk_rt(t)->gpu_fb_param_a,
						tsk_rt(t)->gpu_fb_param_b,
						observed);

		if(_fp_to_integer(fb->est) <= 0) {
			// TODO: talk to Jonathan about how well this works.
			// Maybe we should average the observed and est instead?
			TRACE_TASK(t, "Invalid estimate. Patching.\n");
			fb->est = _integer_to_fp(observed);
			fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
		}
	}

	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
			   tsk_rt(t)->gpu_migration,
			   observed,
			   _fp_to_integer(fb->est),
			   _point(fb->est));
}

gpu_migration_dist_t gpu_migration_distance(int a, int b)
{
	// GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
	int i;
	int dist;

	if(likely(a >= 0 && b >= 0)) {
		for(i = 0; i <= MIG_FAR; ++i) {
			if(a>>i == b>>i) {
				dist = i;
				goto out;
			}
		}
		dist = MIG_NONE; // hopefully never reached.
		TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
	}
	else {
		dist = MIG_NONE;
	}

out:
	TRACE_CUR("Distance %d -> %d is %d\n",
			  a, b, dist);

	return dist;
}




#endif