diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2013-10-31 14:34:59 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2014-03-03 10:12:28 -0500 |
commit | 35a855f55eeeda1946294c3fc27e0927b6055da2 (patch) | |
tree | d6eb04bc0f28d93926bd5eec914f137971c47c16 | |
parent | 63a87f4eca47a185eb8e73cb6f34071864c02297 (diff) |
Add GPU affinity tracking.
Adds GPU affinity tracking/prediction routines. Tracking
uses a process-chart method whereby observations over two
standard deviations below or above the average are discarded.
-rw-r--r-- | include/litmus/gpu_affinity.h | 58 | ||||
-rw-r--r-- | include/litmus/rt_param.h | 48 | ||||
-rw-r--r-- | litmus/Makefile | 3 | ||||
-rw-r--r-- | litmus/gpu_affinity.c | 163 |
4 files changed, 268 insertions, 4 deletions
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h new file mode 100644 index 000000000000..de5d1a5ab9e9 --- /dev/null +++ b/include/litmus/gpu_affinity.h | |||
@@ -0,0 +1,58 @@ | |||
1 | #ifndef LITMUS_GPU_AFFINITY_H | ||
2 | #define LITMUS_GPU_AFFINITY_H | ||
3 | |||
4 | #include <litmus/rt_param.h> | ||
5 | #include <litmus/sched_plugin.h> | ||
6 | #include <litmus/litmus.h> | ||
7 | |||
8 | void update_gpu_estimate(struct task_struct* t, lt_t observed); | ||
9 | gpu_migration_dist_t gpu_migration_distance(int a, int b); | ||
10 | |||
11 | static inline void reset_gpu_tracker(struct task_struct* t) | ||
12 | { | ||
13 | t->rt_param.accum_gpu_time = 0; | ||
14 | t->rt_param.gpu_time_stamp = 0; | ||
15 | } | ||
16 | |||
17 | static inline void start_gpu_tracker(struct task_struct* t) | ||
18 | { | ||
19 | lt_t now = litmus_clock(); | ||
20 | if (likely(!t->rt_param.gpu_time_stamp)) | ||
21 | t->rt_param.gpu_time_stamp = now; | ||
22 | } | ||
23 | |||
24 | static inline void stop_gpu_tracker(struct task_struct* t) | ||
25 | { | ||
26 | lt_t now = litmus_clock(); | ||
27 | if (likely(t->rt_param.gpu_time_stamp)) { | ||
28 | t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp); | ||
29 | t->rt_param.gpu_time_stamp = 0; | ||
30 | } | ||
31 | } | ||
32 | |||
33 | static inline lt_t get_gpu_time(struct task_struct* t) | ||
34 | { | ||
35 | lt_t accum = t->rt_param.accum_gpu_time; | ||
36 | if (t->rt_param.gpu_time_stamp != 0) | ||
37 | accum += (litmus_clock() - t->rt_param.gpu_time_stamp); | ||
38 | return accum; | ||
39 | } | ||
40 | |||
41 | static inline lt_t get_gpu_estimate(struct task_struct* t, | ||
42 | gpu_migration_dist_t dist) | ||
43 | { | ||
44 | int i; | ||
45 | lt_t val; | ||
46 | |||
47 | if(dist == MIG_NONE) | ||
48 | dist = MIG_LOCAL; | ||
49 | |||
50 | val = t->rt_param.gpu_migration_est[dist].avg; | ||
51 | for(i = dist-1; i >= 0; --i) | ||
52 | if(t->rt_param.gpu_migration_est[i].avg > val) | ||
53 | val = t->rt_param.gpu_migration_est[i].avg; | ||
54 | |||
55 | return ((val > 0) ? val : dist+1); | ||
56 | } | ||
57 | |||
58 | #endif | ||
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 3e4a32af1ba7..b5e58b79dfda 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -98,6 +98,14 @@ struct affinity_observer_args | |||
98 | int lock_od; | 98 | int lock_od; |
99 | }; | 99 | }; |
100 | 100 | ||
101 | struct gpu_affinity_observer_args | ||
102 | { | ||
103 | struct affinity_observer_args obs; | ||
104 | unsigned int replica_to_gpu_offset; | ||
105 | unsigned int rho; | ||
106 | int relaxed_rules; | ||
107 | }; | ||
108 | |||
101 | #define IKGLP_M_IN_FIFOS (0u) | 109 | #define IKGLP_M_IN_FIFOS (0u) |
102 | #define IKGLP_UNLIMITED_IN_FIFOS (~0u) | 110 | #define IKGLP_UNLIMITED_IN_FIFOS (~0u) |
103 | #define IKGLP_OPTIMAL_FIFO_LEN (0u) | 111 | #define IKGLP_OPTIMAL_FIFO_LEN (0u) |
@@ -154,6 +162,40 @@ struct control_page { | |||
154 | #include <linux/semaphore.h> | 162 | #include <linux/semaphore.h> |
155 | #include <litmus/binheap.h> | 163 | #include <litmus/binheap.h> |
156 | 164 | ||
165 | /*** GPU affinity tracking structures ***/ | ||
166 | |||
167 | typedef enum gpu_migration_dist | ||
168 | { | ||
169 | MIG_LOCAL = 0, | ||
170 | MIG_NEAR = 1, | ||
171 | MIG_MED = 2, | ||
172 | MIG_FAR = 3, /* assumes 8 GPU binary tree hierarchy */ | ||
173 | MIG_NONE = 4, | ||
174 | |||
175 | MIG_LAST = MIG_NONE | ||
176 | } gpu_migration_dist_t; | ||
177 | |||
178 | #if 0 | ||
179 | typedef struct feedback_est | ||
180 | { | ||
181 | fp_t est; | ||
182 | fp_t accum_err; | ||
183 | } feedback_est_t | ||
184 | #endif | ||
185 | |||
186 | #define AVG_EST_WINDOW_SIZE 20 | ||
187 | typedef int (*notify_rsrc_exit_t)(struct task_struct* tsk); | ||
188 | |||
189 | typedef struct avg_est { | ||
190 | lt_t history[AVG_EST_WINDOW_SIZE]; | ||
191 | uint16_t count; | ||
192 | uint16_t idx; | ||
193 | lt_t sum; | ||
194 | lt_t avg; | ||
195 | lt_t std; | ||
196 | } avg_est_t; | ||
197 | |||
198 | |||
157 | struct _rt_domain; | 199 | struct _rt_domain; |
158 | struct bheap_node; | 200 | struct bheap_node; |
159 | struct release_heap; | 201 | struct release_heap; |
@@ -217,16 +259,14 @@ struct rt_param { | |||
217 | gpu_migration_dist_t gpu_migration; | 259 | gpu_migration_dist_t gpu_migration; |
218 | int last_gpu; | 260 | int last_gpu; |
219 | lt_t accum_gpu_time; | 261 | lt_t accum_gpu_time; |
220 | lt_t gpu_time_stamp | 262 | lt_t gpu_time_stamp; |
221 | unsigned int suspend_gpu_tracker_on_block:1; | 263 | unsigned int suspend_gpu_tracker_on_block:1; |
222 | #endif /* end LITMUS_AFFINITY_LOCKING */ | 264 | #endif /* end LITMUS_AFFINITY_LOCKING */ |
223 | #endif /* end LITMUS_NVIDIA */ | 265 | #endif /* end LITMUS_NVIDIA */ |
224 | 266 | ||
225 | #if 0 /* PORT RECHECK */ | ||
226 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 267 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
227 | notify_rsrc_exit_t rsrc_exit_cb; | 268 | notify_rsrc_exit_t rsrc_exit_cb; |
228 | void* rsrc_exit_cb_args | 269 | void* rsrc_exit_cb_args; |
229 | #endif | ||
230 | #endif | 270 | #endif |
231 | 271 | ||
232 | #ifdef CONFIG_LITMUS_LOCKING | 272 | #ifdef CONFIG_LITMUS_LOCKING |
diff --git a/litmus/Makefile b/litmus/Makefile index c0b0821667be..0bda543ca57c 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -31,6 +31,9 @@ obj-$(CONFIG_LITMUS_LOCKING) += kfmlp_lock.o | |||
31 | obj-$(CONFIG_LITMUS_NESTED_LOCKING) += fifo_lock.o prioq_lock.o ikglp_lock.o | 31 | obj-$(CONFIG_LITMUS_NESTED_LOCKING) += fifo_lock.o prioq_lock.o ikglp_lock.o |
32 | obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o | 32 | obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o |
33 | obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o | 33 | obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o |
34 | ifeq ($(CONFIG_LITMUS_AFFINITY_LOCKING),y) | ||
35 | obj-$(CONFIG_LITMUS_NVIDIA) += gpu_affinity.o | ||
36 | endif | ||
34 | 37 | ||
35 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o | 38 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o |
36 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o | 39 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o |
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c new file mode 100644 index 000000000000..85fa0ffb7328 --- /dev/null +++ b/litmus/gpu_affinity.c | |||
@@ -0,0 +1,163 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <litmus/litmus.h> | ||
3 | #include <litmus/gpu_affinity.h> | ||
4 | |||
5 | #include <litmus/sched_trace.h> | ||
6 | #include <litmus/trace.h> | ||
7 | |||
8 | /* two second cap on crazy observations */ | ||
9 | #define OBSERVATION_CAP ((lt_t)(2e9)) | ||
10 | |||
11 | #define NUM_STDEV_NUM 2 | ||
12 | #define NUM_STDEV_DENOM 1 | ||
13 | |||
14 | #define MIN(a, b) ((a < b) ? a : b) | ||
15 | |||
16 | #if 0 | ||
17 | /* PID feedback controller */ | ||
18 | static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) | ||
19 | { | ||
20 | fp_t relative_err; | ||
21 | fp_t err, new; | ||
22 | fp_t actual = _integer_to_fp(observed); | ||
23 | |||
24 | err = _sub(actual, fb->est); | ||
25 | new = _add(_mul(a, err), _mul(b, fb->accum_err)); | ||
26 | |||
27 | relative_err = _div(err, actual); | ||
28 | |||
29 | fb->est = new; | ||
30 | fb->accum_err = _add(fb->accum_err, err); | ||
31 | |||
32 | return relative_err; | ||
33 | } | ||
34 | #endif | ||
35 | |||
36 | static lt_t varience(lt_t nums[], const lt_t avg, const uint32_t count) | ||
37 | { | ||
38 | /* brute force: takes about as much time as incremental running methods | ||
39 | * when count < 50 (on Bonham). Brute force also less prone to overflow. | ||
40 | */ | ||
41 | lt_t sqdeviations = 0; | ||
42 | uint32_t i; | ||
43 | for(i = 0; i < count; ++i) { | ||
44 | lt_t temp = (int64_t)nums[i] - (int64_t)avg; | ||
45 | sqdeviations += temp * temp; | ||
46 | } | ||
47 | return sqdeviations/count; | ||
48 | } | ||
49 | |||
50 | static lt_t isqrt(lt_t n) | ||
51 | { | ||
52 | /* integer square root using babylonian method | ||
53 | * (algo taken from wikipedia */ | ||
54 | lt_t res = 0; | ||
55 | lt_t bit = ((lt_t)1) << (sizeof(n)*8-2); | ||
56 | while (bit > n) | ||
57 | bit >>= 2; | ||
58 | |||
59 | while (bit != 0) { | ||
60 | if (n >= res + bit) { | ||
61 | n -= res + bit; | ||
62 | res = (res >> 1) + bit; | ||
63 | } | ||
64 | else { | ||
65 | res >>= 1; | ||
66 | } | ||
67 | bit >>= 2; | ||
68 | } | ||
69 | return res; | ||
70 | } | ||
71 | |||
72 | void update_gpu_estimate(struct task_struct *t, lt_t observed) | ||
73 | { | ||
74 | avg_est_t *est; | ||
75 | struct migration_info mig_info; | ||
76 | |||
77 | BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); | ||
78 | |||
79 | est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | ||
80 | |||
81 | /* log the migration event */ | ||
82 | mig_info.observed = observed; | ||
83 | mig_info.estimated = est->avg; | ||
84 | mig_info.distance = tsk_rt(t)->gpu_migration; | ||
85 | sched_trace_migration(t, &mig_info); | ||
86 | |||
87 | if (unlikely(observed > OBSERVATION_CAP)) { | ||
88 | TRACE_TASK(t, | ||
89 | "Crazy observation greater than was dropped: %llu > %llu\n", | ||
90 | observed, | ||
91 | OBSERVATION_CAP); | ||
92 | return; | ||
93 | } | ||
94 | |||
95 | /* filter values outside NUM_STDEVx the standard deviation, | ||
96 | but only filter if enough samples have been taken. */ | ||
97 | if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { | ||
98 | lt_t lower, upper; | ||
99 | |||
100 | lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM; | ||
101 | lower = est->avg - MIN(range, est->avg); // no underflow. | ||
102 | |||
103 | if (unlikely(observed < lower)) { | ||
104 | TRACE_TASK(t, | ||
105 | "Observation is too small: %llu < %llu (avg: %llu)\n", | ||
106 | observed, lower, est->avg); | ||
107 | return; | ||
108 | } | ||
109 | |||
110 | upper = est->avg + range; | ||
111 | if (unlikely(observed > upper)) { | ||
112 | TRACE_TASK(t, | ||
113 | "Observation is too large: %llu > %llu (avg: %llu)\n", | ||
114 | observed, upper, est->avg); | ||
115 | return; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) | ||
120 | ++est->count; | ||
121 | else | ||
122 | est->sum -= est->history[est->idx]; | ||
123 | |||
124 | TS_UPDATE_GPU_EST_START; | ||
125 | est->history[est->idx] = observed; | ||
126 | est->sum += observed; | ||
127 | est->avg = est->sum/est->count; | ||
128 | est->std = isqrt(varience(est->history, est->avg, est->count)); | ||
129 | est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE; | ||
130 | TS_UPDATE_GPU_EST_END; | ||
131 | |||
132 | TRACE_TASK(t, | ||
133 | "GPU est update after (dist = %d, obs = %llu): %llu\n", | ||
134 | tsk_rt(t)->gpu_migration, | ||
135 | observed, | ||
136 | est->avg); | ||
137 | } | ||
138 | |||
139 | gpu_migration_dist_t gpu_migration_distance(int a, int b) | ||
140 | { | ||
141 | /* GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs */ | ||
142 | int i; | ||
143 | int dist; | ||
144 | |||
145 | if(likely(a >= 0 && b >= 0)) { | ||
146 | for(i = 0; i <= MIG_FAR; ++i) { | ||
147 | if(a>>i == b>>i) { | ||
148 | dist = i; | ||
149 | goto out; | ||
150 | } | ||
151 | } | ||
152 | dist = MIG_NONE; /* hopefully never reached. */ | ||
153 | TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b); | ||
154 | } | ||
155 | else { | ||
156 | dist = MIG_NONE; | ||
157 | } | ||
158 | |||
159 | out: | ||
160 | TRACE_CUR("Distance %d -> %d is %d\n", a, b, dist); | ||
161 | |||
162 | return dist; | ||
163 | } | ||