1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
#ifdef CONFIG_LITMUS_NVIDIA
#include <linux/sched.h>
#include <litmus/litmus.h>
#include <litmus/gpu_affinity.h>
#define OBSERVATION_CAP 2*1e9
static void update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
{
fp_t err, new;
fp_t actual = _integer_to_fp(observed);
err = _sub(actual, fb->est);
new = _add(_mul(a, err), _mul(b, fb->accum_err));
fb->est = new;
fb->accum_err = _add(fb->accum_err, err);
}
void update_gpu_estimate(struct task_struct *t, lt_t observed)
{
feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
WARN_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
if(unlikely(fb->est.val == 0)) {
// kludge-- cap observed values to prevent whacky estimations.
// whacky stuff happens during the first few jobs.
if(unlikely(observed > OBSERVATION_CAP)) {
TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
observed, OBSERVATION_CAP);
observed = OBSERVATION_CAP;
}
// take the first observation as our estimate
// (initial value of 0 was bogus anyhow)
fb->est = _integer_to_fp(observed);
fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
}
else {
update_estimate(fb,
tsk_rt(t)->gpu_fb_param_a,
tsk_rt(t)->gpu_fb_param_b,
observed);
if(_fp_to_integer(fb->est) <= 0) {
// TODO: talk to Jonathan about how well this works.
// Maybe we should average the observed and est instead?
TRACE_TASK(t, "Invalid estimate. Patching.\n");
fb->est = _integer_to_fp(observed);
fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
}
}
TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
tsk_rt(t)->gpu_migration,
observed,
_fp_to_integer(fb->est),
_point(fb->est));
}
gpu_migration_dist_t gpu_migration_distance(int a, int b)
{
// GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
int i;
int dist;
if(likely(a >= 0 && b >= 0)) {
for(i = 0; i <= MIG_FAR; ++i) {
if(a>>i == b>>i) {
dist = i;
goto out;
}
}
dist = MIG_NONE; // hopefully never reached.
TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
}
else {
dist = MIG_NONE;
}
out:
TRACE_CUR("Distance %d -> %d is %d\n",
a, b, dist);
return dist;
}
#endif
|