litmus/gpu_affinity.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169


#ifdef CONFIG_LITMUS_NVIDIA

#include <linux/sched.h>
#include <litmus/litmus.h>
#include <litmus/gpu_affinity.h>

#include <litmus/sched_trace.h>

#define OBSERVATION_CAP ((lt_t)(2e9))

// reason for skew: high outliers are less
// frequent and way out of bounds
#define HI_THRESHOLD 2
#define LO_THRESHOLD 4

#define MIN(a, b) ((a < b) ? a : b)

static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
{
	fp_t relative_err;
	fp_t err, new;
	fp_t actual = _integer_to_fp(observed);

	err = _sub(actual, fb->est);
	new = _add(_mul(a, err), _mul(b, fb->accum_err));

	relative_err = _div(err, actual);

	fb->est = new;
	fb->accum_err = _add(fb->accum_err, err);

	return relative_err;
}

void update_gpu_estimate(struct task_struct *t, lt_t observed)
{
	//feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
	avg_est_t *est;
	struct migration_info mig_info;

	BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);

	est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);

	if (unlikely(observed > OBSERVATION_CAP)) {
		TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n",
			observed,
			OBSERVATION_CAP);
		return;
	}

#if 0
	// filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out
	// of range of the average, but only filter if enough samples
	// have been taken.
	if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
		if (unlikely(observed < est->avg/LO_THRESHOLD)) {
			TRACE_TASK(t, "Observation is too small: %llu\n",
							observed);
			return;
		}
		else if (unlikely(observed > est->avg*HI_THRESHOLD)) {
			TRACE_TASK(t, "Observation is too large: %llu\n",
							observed);
			return;
		}
	}
#endif

	if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {
		++est->count;
	}
	else {
		est->sum -= est->history[est->idx];
	}

	mig_info.observed = observed;
	mig_info.estimated = est->avg;
	mig_info.distance = tsk_rt(t)->gpu_migration;
	sched_trace_migration(t, &mig_info);


	est->history[est->idx] = observed;
	est->sum += observed;
	est->avg = est->sum/est->count;
	est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;


#if 0
	if(unlikely(fb->est.val == 0)) {
		// kludge-- cap observed values to prevent whacky estimations.
		// whacky stuff happens during the first few jobs.
		if(unlikely(observed > OBSERVATION_CAP)) {
			TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
					   observed, OBSERVATION_CAP);
			observed = OBSERVATION_CAP;
		}

		// take the first observation as our estimate
		// (initial value of 0 was bogus anyhow)
		fb->est = _integer_to_fp(observed);
		fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
	}
	else {
		fp_t rel_err = update_estimate(fb,
									   tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
									   tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
									   observed);

		if(unlikely(_fp_to_integer(fb->est) <= 0)) {
			TRACE_TASK(t, "Invalid estimate. Patching.\n");
			fb->est = _integer_to_fp(observed);
			fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
		}
		else {
			struct migration_info mig_info;

			sched_trace_prediction_err(t,
									   &(tsk_rt(t)->gpu_migration),
									   &rel_err);

			mig_info.observed = observed;
			mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
			mig_info.distance = tsk_rt(t)->gpu_migration;

			sched_trace_migration(t, &mig_info);
		}
	}
#endif

	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n",
			   tsk_rt(t)->gpu_migration,
			   observed,
			   est->avg);
}

gpu_migration_dist_t gpu_migration_distance(int a, int b)
{
	// GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
	int i;
	int dist;

	if(likely(a >= 0 && b >= 0)) {
		for(i = 0; i <= MIG_FAR; ++i) {
			if(a>>i == b>>i) {
				dist = i;
				goto out;
			}
		}
		dist = MIG_NONE; // hopefully never reached.
		TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
	}
	else {
		dist = MIG_NONE;
	}

out:
	TRACE_CUR("Distance %d -> %d is %d\n",
			  a, b, dist);

	return dist;
}


#endif