litmus/gpu_affinity.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231


#ifdef CONFIG_LITMUS_NVIDIA

#include <linux/sched.h>
#include <litmus/litmus.h>
#include <litmus/gpu_affinity.h>

#include <litmus/sched_trace.h>

#define OBSERVATION_CAP ((lt_t)(2e9))

// reason for skew: high outliers are less
// frequent and way out of bounds
//#define HI_THRESHOLD 2
//#define LO_THRESHOLD 4

#define NUM_STDEV_NUM	1
#define NUM_STDEV_DENOM	2

#define MIN(a, b) ((a < b) ? a : b)

static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
{
	fp_t relative_err;
	fp_t err, new;
	fp_t actual = _integer_to_fp(observed);

	err = _sub(actual, fb->est);
	new = _add(_mul(a, err), _mul(b, fb->accum_err));

	relative_err = _div(err, actual);

	fb->est = new;
	fb->accum_err = _add(fb->accum_err, err);

	return relative_err;
}

lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count)
{
	/* brute force: takes about as much time as incremental running methods when
	 * count < 50 (on Bonham). Brute force also less prone to overflow.
	 */
	lt_t sqdeviations = 0;
	uint16_t i;
	for(i = 0; i < count; ++i)
	{
		lt_t temp = (int64_t)nums[i] - (int64_t)avg;
		sqdeviations += temp * temp;
	}
	return sqdeviations/count;
}

lt_t isqrt(lt_t n)
{
	/* integer square root using babylonian method
	 * (algo taken from wikipedia */
	lt_t res = 0;
	lt_t bit = ((lt_t)1) << (sizeof(n)*8-2);
	while (bit > n) {
		bit >>= 2;
	}

	while (bit != 0) {
		if (n >= res + bit) {
			n -= res + bit;
			res = (res >> 1) + bit;
		}
		else {
			res >>= 1;
		}
		bit >>= 2;
	}
	return res;
}

void update_gpu_estimate(struct task_struct *t, lt_t observed)
{
	//feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
	avg_est_t *est;
	struct migration_info mig_info;

	BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);

	est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);

	if (unlikely(observed > OBSERVATION_CAP)) {
		TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n",
			observed,
			OBSERVATION_CAP);
		return;
	}

#if 0
	// filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out
	// of range of the average, but only filter if enough samples
	// have been taken.
	if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
		if (unlikely(observed < est->avg/LO_THRESHOLD)) {
			TRACE_TASK(t, "Observation is too small: %llu\n",
							observed);
			return;
		}
		else if (unlikely(observed > est->avg*HI_THRESHOLD)) {
			TRACE_TASK(t, "Observation is too large: %llu\n",
							observed);
			return;
		}
#endif
	// filter values outside NUM_STDEVx the standard deviation,
	// but only filter if enough samples have been taken.
	if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
		lt_t lower, upper;

		lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM;
		lower = est->avg - MIN(range, est->avg); // no underflow.

		if (unlikely(observed < lower)) {
			TRACE_TASK(t, "Observation is too small: %llu\n", observed);
			return;
		}

		upper = est->avg + range;
		if (unlikely(observed > upper)) {
			TRACE_TASK(t, "Observation is too large: %llu\n", observed);
			return;
		}
	}


	if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {
		++est->count;
	}
	else {
		est->sum -= est->history[est->idx];
	}

	mig_info.observed = observed;
	mig_info.estimated = est->avg;
	mig_info.distance = tsk_rt(t)->gpu_migration;
	sched_trace_migration(t, &mig_info);


	est->history[est->idx] = observed;
	est->sum += observed;
	est->avg = est->sum/est->count;
	est->std = isqrt(varience(est->history, est->avg, est->count));
	est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;


#if 0
	if(unlikely(fb->est.val == 0)) {
		// kludge-- cap observed values to prevent whacky estimations.
		// whacky stuff happens during the first few jobs.
		if(unlikely(observed > OBSERVATION_CAP)) {
			TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
					   observed, OBSERVATION_CAP);
			observed = OBSERVATION_CAP;
		}

		// take the first observation as our estimate
		// (initial value of 0 was bogus anyhow)
		fb->est = _integer_to_fp(observed);
		fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
	}
	else {
		fp_t rel_err = update_estimate(fb,
									   tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
									   tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
									   observed);

		if(unlikely(_fp_to_integer(fb->est) <= 0)) {
			TRACE_TASK(t, "Invalid estimate. Patching.\n");
			fb->est = _integer_to_fp(observed);
			fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
		}
		else {
			struct migration_info mig_info;

			sched_trace_prediction_err(t,
									   &(tsk_rt(t)->gpu_migration),
									   &rel_err);

			mig_info.observed = observed;
			mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
			mig_info.distance = tsk_rt(t)->gpu_migration;

			sched_trace_migration(t, &mig_info);
		}
	}
#endif

	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n",
			   tsk_rt(t)->gpu_migration,
			   observed,
			   est->avg);
}

gpu_migration_dist_t gpu_migration_distance(int a, int b)
{
	// GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
	int i;
	int dist;

	if(likely(a >= 0 && b >= 0)) {
		for(i = 0; i <= MIG_FAR; ++i) {
			if(a>>i == b>>i) {
				dist = i;
				goto out;
			}
		}
		dist = MIG_NONE; // hopefully never reached.
		TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
	}
	else {
		dist = MIG_NONE;
	}

out:
	TRACE_CUR("Distance %d -> %d is %d\n",
			  a, b, dist);

	return dist;
}


#endif