diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-04-18 21:30:36 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-04-18 21:30:36 -0400 |
commit | f916cdb8e6a9ee2c917fddb7351e6bb39f6c953e (patch) | |
tree | b7904b93f4da153a40815b89378e7b3ca2f70591 | |
parent | 6ab36ca992441f7353840c70fc91d99a500a940e (diff) |
Added support for simult-users in kfmlp
-rw-r--r-- | include/litmus/fdso.h | 9 | ||||
-rw-r--r-- | include/litmus/ikglp_lock.h | 61 | ||||
-rw-r--r-- | include/litmus/kfmlp_lock.h | 10 | ||||
-rw-r--r-- | include/litmus/nvidia_info.h | 1 | ||||
-rw-r--r-- | include/litmus/rt_param.h | 3 | ||||
-rw-r--r-- | litmus/Kconfig | 13 | ||||
-rw-r--r-- | litmus/fdso.c | 3 | ||||
-rw-r--r-- | litmus/kfmlp_lock.c | 337 | ||||
-rw-r--r-- | litmus/nvidia_info.c | 11 |
9 files changed, 297 insertions, 151 deletions
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h index b92c1a3f004f..552a1e731672 100644 --- a/include/litmus/fdso.h +++ b/include/litmus/fdso.h | |||
@@ -24,11 +24,12 @@ typedef enum { | |||
24 | IKGLP_SEM = 3, | 24 | IKGLP_SEM = 3, |
25 | KFMLP_SEM = 4, | 25 | KFMLP_SEM = 4, |
26 | 26 | ||
27 | IKGLP_GPU_AFF_OBS = 5, | 27 | IKGLP_SIMPLE_GPU_AFF_OBS = 5, |
28 | KFMLP_SIMPLE_GPU_AFF_OBS = 6, | 28 | IKGLP_GPU_AFF_OBS = 6, |
29 | KFMLP_GPU_AFF_OBS = 7, | 29 | KFMLP_SIMPLE_GPU_AFF_OBS = 7, |
30 | KFMLP_GPU_AFF_OBS = 8, | ||
30 | 31 | ||
31 | MAX_OBJ_TYPE = 7 | 32 | MAX_OBJ_TYPE = 8 |
32 | } obj_type_t; | 33 | } obj_type_t; |
33 | 34 | ||
34 | struct inode_obj_id { | 35 | struct inode_obj_id { |
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h index c0cc04db1bc6..2a75a1719815 100644 --- a/include/litmus/ikglp_lock.h +++ b/include/litmus/ikglp_lock.h | |||
@@ -5,6 +5,12 @@ | |||
5 | #include <litmus/binheap.h> | 5 | #include <litmus/binheap.h> |
6 | #include <litmus/locking.h> | 6 | #include <litmus/locking.h> |
7 | 7 | ||
8 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
9 | #include <litmus/kexclu_affinity.h> | ||
10 | |||
11 | struct ikglp_affinity; | ||
12 | #endif | ||
13 | |||
8 | typedef struct ikglp_heap_node | 14 | typedef struct ikglp_heap_node |
9 | { | 15 | { |
10 | struct task_struct *task; | 16 | struct task_struct *task; |
@@ -81,6 +87,10 @@ struct ikglp_semaphore | |||
81 | struct fifo_queue *fifo_queues; // array nr_replicas in length | 87 | struct fifo_queue *fifo_queues; // array nr_replicas in length |
82 | struct binheap_handle priority_queue; // max-heap, base prio | 88 | struct binheap_handle priority_queue; // max-heap, base prio |
83 | struct binheap_handle donors; // max-heap, base prio | 89 | struct binheap_handle donors; // max-heap, base prio |
90 | |||
91 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
92 | struct ikglp_affinity *aff_obs; | ||
93 | #endif | ||
84 | }; | 94 | }; |
85 | 95 | ||
86 | static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock) | 96 | static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock) |
@@ -94,4 +104,55 @@ int ikglp_close(struct litmus_lock* l); | |||
94 | void ikglp_free(struct litmus_lock* l); | 104 | void ikglp_free(struct litmus_lock* l); |
95 | struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg); | 105 | struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg); |
96 | 106 | ||
107 | |||
108 | |||
109 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
110 | |||
111 | struct ikglp_queue_info | ||
112 | { | ||
113 | struct fifo_queue* q; | ||
114 | lt_t estimated_len; | ||
115 | int *nr_cur_users; | ||
116 | }; | ||
117 | |||
118 | struct ikglp_affinity_ops | ||
119 | { | ||
120 | struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO | ||
121 | struct task_struct* (*advise_steal)(struct ikglp_affinity* aff, wait_queue_t** to_steal, struct fifo_queue** to_steal_from); // select steal from FIFO | ||
122 | struct task_struct* (*advise_donee_selection)(struct ikglp_affinity* aff, wait_queue_t** donee, struct fifo_queue** donee_queue); // select a donee | ||
123 | struct task_struct* (*advise_doner_to_fq)(struct ikglp_affinity* aff, ikglp_wait_state_t** donor); // select a donor to move to PQ | ||
124 | |||
125 | void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue | ||
126 | void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue | ||
127 | void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired | ||
128 | void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed | ||
129 | int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding) | ||
130 | }; | ||
131 | |||
132 | struct ikglp_affinity | ||
133 | { | ||
134 | struct affinity_observer obs; | ||
135 | struct ikglp_affinity_ops *ops; | ||
136 | struct fifo_queue *q_info; | ||
137 | int *nr_cur_users_on_rsrc; | ||
138 | int offset; | ||
139 | int nr_simult; | ||
140 | int nr_rsrc; | ||
141 | }; | ||
142 | |||
143 | static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) | ||
144 | { | ||
145 | return container_of(aff_obs, struct ikglp_affinity, obs); | ||
146 | } | ||
147 | |||
148 | int ikglp_aff_obs_close(struct affinity_observer*); | ||
149 | void ikglp_aff_obs_free(struct affinity_observer*); | ||
150 | struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*, | ||
151 | void* __user arg); | ||
152 | struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*, | ||
153 | void* __user arg); | ||
154 | #endif | ||
155 | |||
156 | |||
157 | |||
97 | #endif | 158 | #endif |
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h index 614cccad5307..6d7e24b2a3ad 100644 --- a/include/litmus/kfmlp_lock.h +++ b/include/litmus/kfmlp_lock.h | |||
@@ -6,6 +6,8 @@ | |||
6 | 6 | ||
7 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 7 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
8 | #include <litmus/kexclu_affinity.h> | 8 | #include <litmus/kexclu_affinity.h> |
9 | |||
10 | struct kfmlp_affinity; | ||
9 | #endif | 11 | #endif |
10 | 12 | ||
11 | /* struct for semaphore with priority inheritance */ | 13 | /* struct for semaphore with priority inheritance */ |
@@ -50,10 +52,9 @@ struct kfmlp_queue_info | |||
50 | { | 52 | { |
51 | struct kfmlp_queue* q; | 53 | struct kfmlp_queue* q; |
52 | lt_t estimated_len; | 54 | lt_t estimated_len; |
55 | int *nr_cur_users; | ||
53 | }; | 56 | }; |
54 | 57 | ||
55 | struct kfmlp_affinity; | ||
56 | |||
57 | struct kfmlp_affinity_ops | 58 | struct kfmlp_affinity_ops |
58 | { | 59 | { |
59 | struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t); | 60 | struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t); |
@@ -62,6 +63,7 @@ struct kfmlp_affinity_ops | |||
62 | void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); | 63 | void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); |
63 | void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); | 64 | void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); |
64 | void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); | 65 | void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); |
66 | int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq); | ||
65 | }; | 67 | }; |
66 | 68 | ||
67 | struct kfmlp_affinity | 69 | struct kfmlp_affinity |
@@ -69,8 +71,10 @@ struct kfmlp_affinity | |||
69 | struct affinity_observer obs; | 71 | struct affinity_observer obs; |
70 | struct kfmlp_affinity_ops *ops; | 72 | struct kfmlp_affinity_ops *ops; |
71 | struct kfmlp_queue_info *q_info; | 73 | struct kfmlp_queue_info *q_info; |
72 | struct kfmlp_queue_info *shortest_queue; | 74 | int *nr_cur_users_on_rsrc; |
73 | int offset; | 75 | int offset; |
76 | int nr_simult; | ||
77 | int nr_rsrc; | ||
74 | }; | 78 | }; |
75 | 79 | ||
76 | static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) | 80 | static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) |
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h index 856c575374d3..580728051d4e 100644 --- a/include/litmus/nvidia_info.h +++ b/include/litmus/nvidia_info.h | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | //#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD | 10 | //#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD |
11 | #define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM | 11 | #define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM |
12 | #define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS | ||
12 | 13 | ||
13 | int init_nvidia_info(void); | 14 | int init_nvidia_info(void); |
14 | 15 | ||
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 11f081527545..e832ffcba17c 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -59,10 +59,11 @@ struct affinity_observer_args | |||
59 | int lock_od; | 59 | int lock_od; |
60 | }; | 60 | }; |
61 | 61 | ||
62 | struct kfmlp_gpu_affinity_observer_args | 62 | struct gpu_affinity_observer_args |
63 | { | 63 | { |
64 | struct affinity_observer_args obs; | 64 | struct affinity_observer_args obs; |
65 | int replica_to_gpu_offset; | 65 | int replica_to_gpu_offset; |
66 | int nr_simult_users; | ||
66 | }; | 67 | }; |
67 | 68 | ||
68 | /* The definition of the data that is shared between the kernel and real-time | 69 | /* The definition of the data that is shared between the kernel and real-time |
diff --git a/litmus/Kconfig b/litmus/Kconfig index 34ce6fb3a22e..a34440f3d8bc 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
@@ -325,6 +325,19 @@ config NV_DEVICE_NUM | |||
325 | Should be (<= to the number of CPUs) and | 325 | Should be (<= to the number of CPUs) and |
326 | (<= to the number of GPUs) in your system. | 326 | (<= to the number of GPUs) in your system. |
327 | 327 | ||
328 | config NV_MAX_SIMULT_USERS | ||
329 | int "Maximum number of threads sharing a GPU simultanously" | ||
330 | depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD | ||
331 | range 1 3 | ||
332 | default "2" | ||
333 | help | ||
334 | Should be equal to the #copy_engines + #execution_engines | ||
335 | of the GPUs in your system. | ||
336 | |||
337 | Scientific/Professional GPUs = 3 (ex. M2070, Quadro 6000?) | ||
338 | Consumer Fermi/Kepler GPUs = 2 (GTX-4xx thru -6xx) | ||
339 | Older = 1 (ex. GTX-2xx) | ||
340 | |||
328 | choice | 341 | choice |
329 | prompt "CUDA/Driver Version Support" | 342 | prompt "CUDA/Driver Version Support" |
330 | default CUDA_4_0 | 343 | default CUDA_4_0 |
diff --git a/litmus/fdso.c b/litmus/fdso.c index 5a4f45c3251b..fb328db77dec 100644 --- a/litmus/fdso.c +++ b/litmus/fdso.c | |||
@@ -28,7 +28,8 @@ static const struct fdso_ops* fdso_ops[] = { | |||
28 | &generic_lock_ops, /* RSM_MUTEX */ | 28 | &generic_lock_ops, /* RSM_MUTEX */ |
29 | &generic_lock_ops, /* IKGLP_SEM */ | 29 | &generic_lock_ops, /* IKGLP_SEM */ |
30 | &generic_lock_ops, /* KFMLP_SEM */ | 30 | &generic_lock_ops, /* KFMLP_SEM */ |
31 | &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */ | 31 | &generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */ |
32 | &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */ | ||
32 | &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */ | 33 | &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */ |
33 | &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */ | 34 | &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */ |
34 | }; | 35 | }; |
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c index 7cdca1b7b50a..9bbe31a05b97 100644 --- a/litmus/kfmlp_lock.c +++ b/litmus/kfmlp_lock.c | |||
@@ -73,10 +73,9 @@ static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* se | |||
73 | } | 73 | } |
74 | 74 | ||
75 | 75 | ||
76 | // TODO: BREAK THIS UP INTO TWO STEPS: | 76 | static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, |
77 | // 1) task to steal (and from what queue) | 77 | wait_queue_t** to_steal, |
78 | // 2) update queues | 78 | struct kfmlp_queue** to_steal_from) |
79 | static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) | ||
80 | { | 79 | { |
81 | /* must hold sem->lock */ | 80 | /* must hold sem->lock */ |
82 | 81 | ||
@@ -189,76 +188,7 @@ static void kfmlp_steal_node(struct kfmlp_semaphore *sem, | |||
189 | } | 188 | } |
190 | #endif | 189 | #endif |
191 | } | 190 | } |
192 | //// TODO: BREAK THIS UP INTO TWO STEPS: | 191 | |
193 | //// 1) task to steal (and from what queue) | ||
194 | //// 2) update queues | ||
195 | //static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) | ||
196 | //{ | ||
197 | // /* must hold sem->lock */ | ||
198 | // | ||
199 | // struct kfmlp_queue *my_queue = NULL; | ||
200 | // struct task_struct *max_hp = NULL; | ||
201 | // | ||
202 | // struct list_head *pos; | ||
203 | // struct task_struct *queued; | ||
204 | // int i; | ||
205 | // | ||
206 | // for(i = 0; i < sem->num_resources; ++i) | ||
207 | // { | ||
208 | // if( (sem->queues[i].count > 1) && | ||
209 | // ((my_queue == NULL) || | ||
210 | // //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) | ||
211 | // (litmus->compare(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) | ||
212 | // { | ||
213 | // my_queue = &sem->queues[i]; | ||
214 | // } | ||
215 | // } | ||
216 | // | ||
217 | // if(my_queue) | ||
218 | // { | ||
219 | // max_hp = my_queue->hp_waiter; | ||
220 | // | ||
221 | // BUG_ON(!max_hp); | ||
222 | // | ||
223 | // TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", | ||
224 | // kfmlp_get_idx(sem, my_queue), | ||
225 | // max_hp->comm, max_hp->pid, | ||
226 | // kfmlp_get_idx(sem, my_queue)); | ||
227 | // | ||
228 | // my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); | ||
229 | // | ||
230 | // if(tsk_rt(my_queue->owner)->inh_task == max_hp) | ||
231 | // { | ||
232 | // litmus->decrease_prio(my_queue->owner, my_queue->hp_waiter); | ||
233 | // } | ||
234 | // | ||
235 | // list_for_each(pos, &my_queue->wait.task_list) | ||
236 | // { | ||
237 | // queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
238 | // task_list)->private; | ||
239 | // /* Compare task prios, find high prio task. */ | ||
240 | // if (queued == max_hp) | ||
241 | // { | ||
242 | // /* | ||
243 | // TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", | ||
244 | // kfmlp_get_idx(sem, my_queue)); | ||
245 | // */ | ||
246 | // __remove_wait_queue(&my_queue->wait, | ||
247 | // list_entry(pos, wait_queue_t, task_list)); | ||
248 | // break; | ||
249 | // } | ||
250 | // } | ||
251 | // --(my_queue->count); | ||
252 | // | ||
253 | //#ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
254 | // if(sem->aff_obs) { | ||
255 | // sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, max_hp); | ||
256 | // } | ||
257 | //#endif | ||
258 | // } | ||
259 | // | ||
260 | // return(max_hp); | ||
261 | //} | ||
262 | 192 | ||
263 | int kfmlp_lock(struct litmus_lock* l) | 193 | int kfmlp_lock(struct litmus_lock* l) |
264 | { | 194 | { |
@@ -378,6 +308,12 @@ int kfmlp_lock(struct litmus_lock* l) | |||
378 | spin_unlock_irqrestore(&sem->lock, flags); | 308 | spin_unlock_irqrestore(&sem->lock, flags); |
379 | } | 309 | } |
380 | 310 | ||
311 | |||
312 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
313 | if(sem->aff_obs) { | ||
314 | return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue); | ||
315 | } | ||
316 | #endif | ||
381 | return kfmlp_get_idx(sem, my_queue); | 317 | return kfmlp_get_idx(sem, my_queue); |
382 | } | 318 | } |
383 | 319 | ||
@@ -390,14 +326,14 @@ int kfmlp_unlock(struct litmus_lock* l) | |||
390 | unsigned long flags; | 326 | unsigned long flags; |
391 | int err = 0; | 327 | int err = 0; |
392 | 328 | ||
393 | spin_lock_irqsave(&sem->lock, flags); | ||
394 | |||
395 | my_queue = kfmlp_get_queue(sem, t); | 329 | my_queue = kfmlp_get_queue(sem, t); |
396 | 330 | ||
397 | if (!my_queue || my_queue->owner != t) { | 331 | if (!my_queue) { |
398 | err = -EINVAL; | 332 | err = -EINVAL; |
399 | goto out; | 333 | goto out; |
400 | } | 334 | } |
335 | |||
336 | spin_lock_irqsave(&sem->lock, flags); | ||
401 | 337 | ||
402 | TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue)); | 338 | TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue)); |
403 | 339 | ||
@@ -489,9 +425,9 @@ RETRY: | |||
489 | } | 425 | } |
490 | } | 426 | } |
491 | 427 | ||
492 | out: | ||
493 | spin_unlock_irqrestore(&sem->lock, flags); | 428 | spin_unlock_irqrestore(&sem->lock, flags); |
494 | 429 | ||
430 | out: | ||
495 | return err; | 431 | return err; |
496 | } | 432 | } |
497 | 433 | ||
@@ -580,6 +516,25 @@ struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) | |||
580 | 516 | ||
581 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | 517 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) |
582 | 518 | ||
519 | static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica) | ||
520 | { | ||
521 | int gpu = replica % aff->nr_rsrc; | ||
522 | return gpu; | ||
523 | } | ||
524 | |||
525 | static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica) | ||
526 | { | ||
527 | int gpu = __replica_to_gpu(aff, replica) + aff->offset; | ||
528 | return gpu; | ||
529 | } | ||
530 | |||
531 | static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu) | ||
532 | { | ||
533 | int replica = gpu - aff->offset; | ||
534 | return replica; | ||
535 | } | ||
536 | |||
537 | |||
583 | int kfmlp_aff_obs_close(struct affinity_observer* obs) | 538 | int kfmlp_aff_obs_close(struct affinity_observer* obs) |
584 | { | 539 | { |
585 | return 0; | 540 | return 0; |
@@ -588,6 +543,7 @@ int kfmlp_aff_obs_close(struct affinity_observer* obs) | |||
588 | void kfmlp_aff_obs_free(struct affinity_observer* obs) | 543 | void kfmlp_aff_obs_free(struct affinity_observer* obs) |
589 | { | 544 | { |
590 | struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs); | 545 | struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs); |
546 | kfree(kfmlp_aff->nr_cur_users_on_rsrc); | ||
591 | kfree(kfmlp_aff->q_info); | 547 | kfree(kfmlp_aff->q_info); |
592 | kfree(kfmlp_aff); | 548 | kfree(kfmlp_aff); |
593 | } | 549 | } |
@@ -597,37 +553,56 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
597 | void* __user args) | 553 | void* __user args) |
598 | { | 554 | { |
599 | struct kfmlp_affinity* kfmlp_aff; | 555 | struct kfmlp_affinity* kfmlp_aff; |
600 | struct kfmlp_gpu_affinity_observer_args aff_args; | 556 | struct gpu_affinity_observer_args aff_args; |
601 | struct kfmlp_semaphore* sem; | 557 | struct kfmlp_semaphore* sem; |
602 | int i; | 558 | int i; |
603 | unsigned long flags; | 559 | unsigned long flags; |
604 | 560 | ||
605 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) | 561 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { |
606 | { | ||
607 | return(NULL); | 562 | return(NULL); |
608 | } | 563 | } |
609 | if(__copy_from_user(&aff_args, args, sizeof(aff_args))) | 564 | if(__copy_from_user(&aff_args, args, sizeof(aff_args))) { |
610 | { | ||
611 | return(NULL); | 565 | return(NULL); |
612 | } | 566 | } |
613 | 567 | ||
614 | sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); | 568 | sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); |
615 | 569 | ||
616 | if(sem->litmus_lock.type != KFMLP_SEM) | 570 | if(sem->litmus_lock.type != KFMLP_SEM) { |
617 | { | ||
618 | TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); | 571 | TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); |
619 | return(NULL); | 572 | return(NULL); |
620 | } | 573 | } |
621 | 574 | ||
575 | if((aff_args.nr_simult_users <= 0) || | ||
576 | (sem->num_resources%aff_args.nr_simult_users != 0)) { | ||
577 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " | ||
578 | "(%d) per replica. #replicas should be evenly divisible " | ||
579 | "by #simult_users.\n", | ||
580 | sem->litmus_lock.ident, | ||
581 | sem->num_resources, | ||
582 | aff_args.nr_simult_users); | ||
583 | return(NULL); | ||
584 | } | ||
585 | |||
586 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | ||
587 | TRACE_CUR("System does not support #simult_users >%d. %d requested.\n", | ||
588 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | ||
589 | return(NULL); | ||
590 | } | ||
591 | |||
622 | kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); | 592 | kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); |
623 | if(!kfmlp_aff) | 593 | if(!kfmlp_aff) { |
624 | { | ||
625 | return(NULL); | 594 | return(NULL); |
626 | } | 595 | } |
627 | 596 | ||
628 | kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL); | 597 | kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL); |
629 | if(!kfmlp_aff->q_info) | 598 | if(!kfmlp_aff->q_info) { |
630 | { | 599 | kfree(kfmlp_aff); |
600 | return(NULL); | ||
601 | } | ||
602 | |||
603 | kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL); | ||
604 | if(!kfmlp_aff->nr_cur_users_on_rsrc) { | ||
605 | kfree(kfmlp_aff->q_info); | ||
631 | kfree(kfmlp_aff); | 606 | kfree(kfmlp_aff); |
632 | return(NULL); | 607 | return(NULL); |
633 | } | 608 | } |
@@ -636,16 +611,24 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
636 | 611 | ||
637 | kfmlp_aff->ops = kfmlp_ops; | 612 | kfmlp_aff->ops = kfmlp_ops; |
638 | kfmlp_aff->offset = aff_args.replica_to_gpu_offset; | 613 | kfmlp_aff->offset = aff_args.replica_to_gpu_offset; |
614 | kfmlp_aff->nr_simult = aff_args.nr_simult_users; | ||
615 | kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult; | ||
616 | |||
617 | memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc)); | ||
639 | 618 | ||
640 | for(i = 0; i < sem->num_resources; ++i) | 619 | for(i = 0; i < sem->num_resources; ++i) { |
641 | { | ||
642 | kfmlp_aff->q_info[i].q = &sem->queues[i]; | 620 | kfmlp_aff->q_info[i].q = &sem->queues[i]; |
643 | kfmlp_aff->q_info[i].estimated_len = 0; | 621 | kfmlp_aff->q_info[i].estimated_len = 0; |
622 | |||
623 | // multiple q_info's will point to the same resource (aka GPU) if | ||
624 | // aff_args.nr_simult_users > 1 | ||
625 | kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)]; | ||
644 | } | 626 | } |
645 | 627 | ||
628 | // attach observer to the lock | ||
646 | spin_lock_irqsave(&sem->lock, flags); | 629 | spin_lock_irqsave(&sem->lock, flags); |
647 | sem->aff_obs = kfmlp_aff; | 630 | sem->aff_obs = kfmlp_aff; |
648 | kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)]; | 631 | //kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)]; |
649 | spin_unlock_irqrestore(&sem->lock, flags); | 632 | spin_unlock_irqrestore(&sem->lock, flags); |
650 | 633 | ||
651 | return &kfmlp_aff->obs; | 634 | return &kfmlp_aff->obs; |
@@ -654,6 +637,13 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
654 | 637 | ||
655 | 638 | ||
656 | 639 | ||
640 | static int gpu_replica_to_resource(struct kfmlp_affinity* aff, | ||
641 | struct kfmlp_queue* fq) { | ||
642 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
643 | return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq))); | ||
644 | } | ||
645 | |||
646 | |||
657 | // Smart KFMLP Affinity | 647 | // Smart KFMLP Affinity |
658 | 648 | ||
659 | static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff) | 649 | static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff) |
@@ -675,55 +665,66 @@ struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct | |||
675 | { | 665 | { |
676 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 666 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
677 | lt_t min_len; | 667 | lt_t min_len; |
668 | int min_nr_users; | ||
678 | struct kfmlp_queue_info *shortest; | 669 | struct kfmlp_queue_info *shortest; |
679 | struct kfmlp_queue *to_enqueue; | 670 | struct kfmlp_queue *to_enqueue; |
680 | int i; | 671 | int i; |
672 | int affinity_gpu; | ||
681 | 673 | ||
682 | // simply pick the shortest queue if, we have no affinity, or we have | 674 | // simply pick the shortest queue if, we have no affinity, or we have |
683 | // affinity with the shortest | 675 | // affinity with the shortest |
684 | if(unlikely(tsk_rt(t)->last_gpu < 0)) { | 676 | if(unlikely(tsk_rt(t)->last_gpu < 0)) { |
685 | // we have affinity with the shorest queue. pick it. | 677 | affinity_gpu = aff->offset; // first gpu |
686 | shortest = aff->shortest_queue; | 678 | TRACE_CUR("no affinity\n"); |
687 | TRACE_CUR("special case: no affinity\n"); | 679 | } |
688 | goto out; | 680 | else { |
681 | affinity_gpu = tsk_rt(t)->last_gpu; | ||
689 | } | 682 | } |
690 | 683 | ||
691 | // all things being equal, let's start with the queue with which we have | 684 | // all things being equal, let's start with the queue with which we have |
692 | // affinity. this helps us maintain affinity even when we don't have | 685 | // affinity. this helps us maintain affinity even when we don't have |
693 | // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) | 686 | // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) |
694 | shortest = &aff->q_info[tsk_rt(t)->last_gpu - aff->offset]; | 687 | shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; |
695 | 688 | ||
696 | if(shortest == aff->shortest_queue) { | 689 | // if(shortest == aff->shortest_queue) { |
697 | TRACE_CUR("special case: have affinity with shortest queue\n"); | 690 | // TRACE_CUR("special case: have affinity with shortest queue\n"); |
698 | goto out; | 691 | // goto out; |
699 | } | 692 | // } |
700 | 693 | ||
701 | min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); | 694 | min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); |
695 | min_nr_users = *(shortest->nr_cur_users); | ||
702 | 696 | ||
703 | TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", | 697 | TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", |
704 | get_gpu_estimate(t, MIG_LOCAL), | 698 | get_gpu_estimate(t, MIG_LOCAL), |
705 | kfmlp_get_idx(sem, shortest->q), | 699 | kfmlp_get_idx(sem, shortest->q), |
706 | min_len); | 700 | min_len); |
707 | 701 | ||
708 | for(i = 0; i < sem->num_resources; ++i) { | 702 | for(i = 0; i < sem->num_resources; ++i) { |
709 | if(&aff->q_info[i] != shortest) { | 703 | if(&aff->q_info[i] != shortest) { |
710 | 704 | ||
711 | lt_t est_len = | 705 | lt_t est_len = |
712 | aff->q_info[i].estimated_len + | 706 | aff->q_info[i].estimated_len + |
713 | get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset)); | 707 | get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))); |
714 | if(est_len < min_len) { | 708 | |
709 | // queue is smaller, or they're equal and the other has a smaller number | ||
710 | // of total users. | ||
711 | // | ||
712 | // tie-break on the shortest number of simult users. this only kicks in | ||
713 | // when there are more than 1 empty queues. | ||
714 | if((est_len < min_len) || | ||
715 | ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | ||
715 | shortest = &aff->q_info[i]; | 716 | shortest = &aff->q_info[i]; |
716 | min_len = est_len; | 717 | min_len = est_len; |
718 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
717 | } | 719 | } |
718 | 720 | ||
719 | TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", | 721 | TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", |
720 | get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset)), | 722 | get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))), |
721 | kfmlp_get_idx(sem, aff->q_info[i].q), | 723 | kfmlp_get_idx(sem, aff->q_info[i].q), |
722 | est_len); | 724 | est_len); |
723 | } | 725 | } |
724 | } | 726 | } |
725 | 727 | ||
726 | out: | ||
727 | to_enqueue = shortest->q; | 728 | to_enqueue = shortest->q; |
728 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", | 729 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", |
729 | kfmlp_get_idx(sem, to_enqueue), | 730 | kfmlp_get_idx(sem, to_enqueue), |
@@ -736,7 +737,7 @@ struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queu | |||
736 | { | 737 | { |
737 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 738 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
738 | 739 | ||
739 | // For now, just steal from the shortest (by number) queue. | 740 | // For now, just steal highest priority waiter |
740 | // TODO: Implement affinity-aware stealing. | 741 | // TODO: Implement affinity-aware stealing. |
741 | 742 | ||
742 | return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); | 743 | return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); |
@@ -747,7 +748,7 @@ void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq | |||
747 | { | 748 | { |
748 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 749 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
749 | int replica = kfmlp_get_idx(sem, fq); | 750 | int replica = kfmlp_get_idx(sem, fq); |
750 | int gpu = aff->offset + replica; | 751 | int gpu = replica_to_gpu(aff, replica); |
751 | struct kfmlp_queue_info *info = &aff->q_info[replica]; | 752 | struct kfmlp_queue_info *info = &aff->q_info[replica]; |
752 | lt_t est_time; | 753 | lt_t est_time; |
753 | lt_t est_len_before; | 754 | lt_t est_len_before; |
@@ -765,22 +766,22 @@ void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq | |||
765 | est_len_before, est_time, | 766 | est_len_before, est_time, |
766 | info->estimated_len); | 767 | info->estimated_len); |
767 | 768 | ||
768 | if(aff->shortest_queue == info) { | 769 | // if(aff->shortest_queue == info) { |
769 | // we may no longer be the shortest | 770 | // // we may no longer be the shortest |
770 | aff->shortest_queue = kfmlp_aff_find_shortest(aff); | 771 | // aff->shortest_queue = kfmlp_aff_find_shortest(aff); |
771 | 772 | // | |
772 | TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", | 773 | // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", |
773 | kfmlp_get_idx(sem, aff->shortest_queue->q), | 774 | // kfmlp_get_idx(sem, aff->shortest_queue->q), |
774 | aff->shortest_queue->q->count, | 775 | // aff->shortest_queue->q->count, |
775 | aff->shortest_queue->estimated_len); | 776 | // aff->shortest_queue->estimated_len); |
776 | } | 777 | // } |
777 | } | 778 | } |
778 | 779 | ||
779 | void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | 780 | void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) |
780 | { | 781 | { |
781 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 782 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
782 | int replica = kfmlp_get_idx(sem, fq); | 783 | int replica = kfmlp_get_idx(sem, fq); |
783 | int gpu = aff->offset + replica; | 784 | int gpu = replica_to_gpu(aff, replica); |
784 | struct kfmlp_queue_info *info = &aff->q_info[replica]; | 785 | struct kfmlp_queue_info *info = &aff->q_info[replica]; |
785 | lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | 786 | lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); |
786 | 787 | ||
@@ -797,28 +798,32 @@ void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq | |||
797 | info->estimated_len); | 798 | info->estimated_len); |
798 | 799 | ||
799 | // check to see if we're the shortest queue now. | 800 | // check to see if we're the shortest queue now. |
800 | if((aff->shortest_queue != info) && | 801 | // if((aff->shortest_queue != info) && |
801 | (aff->shortest_queue->estimated_len > info->estimated_len)) { | 802 | // (aff->shortest_queue->estimated_len > info->estimated_len)) { |
802 | 803 | // | |
803 | aff->shortest_queue = info; | 804 | // aff->shortest_queue = info; |
804 | 805 | // | |
805 | TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", | 806 | // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", |
806 | kfmlp_get_idx(sem, info->q), | 807 | // kfmlp_get_idx(sem, info->q), |
807 | info->q->count, | 808 | // info->q->count, |
808 | info->estimated_len); | 809 | // info->estimated_len); |
809 | } | 810 | // } |
810 | } | 811 | } |
811 | 812 | ||
812 | void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | 813 | void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) |
813 | { | 814 | { |
814 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 815 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
815 | int gpu = kfmlp_get_idx(sem, fq) + aff->offset; | 816 | int replica = kfmlp_get_idx(sem, fq); |
817 | int gpu = replica_to_gpu(aff, replica); | ||
816 | 818 | ||
817 | tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration | 819 | tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration |
818 | 820 | ||
819 | TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", | 821 | TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", |
820 | t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); | 822 | t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); |
821 | 823 | ||
824 | // count the number or resource holders | ||
825 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
826 | |||
822 | reg_nv_device(gpu, 1, t); // register | 827 | reg_nv_device(gpu, 1, t); // register |
823 | 828 | ||
824 | tsk_rt(t)->suspend_gpu_tracker_on_block = 0; | 829 | tsk_rt(t)->suspend_gpu_tracker_on_block = 0; |
@@ -829,7 +834,8 @@ void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* f | |||
829 | void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | 834 | void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) |
830 | { | 835 | { |
831 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 836 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
832 | int gpu = kfmlp_get_idx(sem, fq) + aff->offset; | 837 | int replica = kfmlp_get_idx(sem, fq); |
838 | int gpu = replica_to_gpu(aff, replica); | ||
833 | lt_t est_time; | 839 | lt_t est_time; |
834 | 840 | ||
835 | stop_gpu_tracker(t); // stop the tracker before we do anything else. | 841 | stop_gpu_tracker(t); // stop the tracker before we do anything else. |
@@ -837,6 +843,10 @@ void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, | |||
837 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | 843 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); |
838 | 844 | ||
839 | tsk_rt(t)->last_gpu = gpu; | 845 | tsk_rt(t)->last_gpu = gpu; |
846 | |||
847 | // count the number or resource holders | ||
848 | --(*(aff->q_info[replica].nr_cur_users)); | ||
849 | |||
840 | reg_nv_device(gpu, 0, t); // unregister | 850 | reg_nv_device(gpu, 0, t); // unregister |
841 | 851 | ||
842 | // update estimates | 852 | // update estimates |
@@ -856,7 +866,8 @@ struct kfmlp_affinity_ops gpu_kfmlp_affinity = | |||
856 | .notify_enqueue = gpu_kfmlp_notify_enqueue, | 866 | .notify_enqueue = gpu_kfmlp_notify_enqueue, |
857 | .notify_dequeue = gpu_kfmlp_notify_dequeue, | 867 | .notify_dequeue = gpu_kfmlp_notify_dequeue, |
858 | .notify_acquired = gpu_kfmlp_notify_acquired, | 868 | .notify_acquired = gpu_kfmlp_notify_acquired, |
859 | .notify_freed = gpu_kfmlp_notify_freed | 869 | .notify_freed = gpu_kfmlp_notify_freed, |
870 | .replica_to_resource = gpu_replica_to_resource, | ||
860 | }; | 871 | }; |
861 | 872 | ||
862 | struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops, | 873 | struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops, |
@@ -877,8 +888,50 @@ struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* op | |||
877 | struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) | 888 | struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) |
878 | { | 889 | { |
879 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 890 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
891 | int min_count; | ||
892 | int min_nr_users; | ||
893 | struct kfmlp_queue_info *shortest; | ||
894 | struct kfmlp_queue *to_enqueue; | ||
895 | int i; | ||
896 | |||
880 | // TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); | 897 | // TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); |
881 | return sem->shortest_queue; | 898 | |
899 | shortest = &aff->q_info[0]; | ||
900 | min_count = shortest->q->count; | ||
901 | min_nr_users = *(shortest->nr_cur_users); | ||
902 | |||
903 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
904 | kfmlp_get_idx(sem, shortest->q), | ||
905 | shortest->q->count, | ||
906 | min_nr_users); | ||
907 | |||
908 | for(i = 1; i < sem->num_resources; ++i) { | ||
909 | int len = aff->q_info[i].q->count; | ||
910 | |||
911 | // queue is smaller, or they're equal and the other has a smaller number | ||
912 | // of total users. | ||
913 | // | ||
914 | // tie-break on the shortest number of simult users. this only kicks in | ||
915 | // when there are more than 1 empty queues. | ||
916 | if((len < min_count) || | ||
917 | ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | ||
918 | shortest = &aff->q_info[i]; | ||
919 | min_count = shortest->q->count; | ||
920 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
921 | } | ||
922 | |||
923 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
924 | kfmlp_get_idx(sem, aff->q_info[i].q), | ||
925 | aff->q_info[i].q->count, | ||
926 | *(aff->q_info[i].nr_cur_users)); | ||
927 | } | ||
928 | |||
929 | to_enqueue = shortest->q; | ||
930 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", | ||
931 | kfmlp_get_idx(sem, to_enqueue), | ||
932 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
933 | |||
934 | return to_enqueue; | ||
882 | } | 935 | } |
883 | 936 | ||
884 | struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) | 937 | struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) |
@@ -901,19 +954,26 @@ void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_qu | |||
901 | void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | 954 | void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) |
902 | { | 955 | { |
903 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 956 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
904 | int gpu = kfmlp_get_idx(sem, fq) + aff->offset; | 957 | int replica = kfmlp_get_idx(sem, fq); |
958 | int gpu = replica_to_gpu(aff, replica); | ||
905 | 959 | ||
906 | // TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n"); | 960 | // TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n"); |
907 | 961 | ||
962 | // count the number or resource holders | ||
963 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
964 | |||
908 | reg_nv_device(gpu, 1, t); // register | 965 | reg_nv_device(gpu, 1, t); // register |
909 | } | 966 | } |
910 | 967 | ||
911 | void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | 968 | void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) |
912 | { | 969 | { |
913 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 970 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
914 | int gpu = kfmlp_get_idx(sem, fq) + aff->offset; | 971 | int replica = kfmlp_get_idx(sem, fq); |
972 | int gpu = replica_to_gpu(aff, replica); | ||
915 | 973 | ||
916 | // TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n"); | 974 | // TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n"); |
975 | // count the number or resource holders | ||
976 | --(*(aff->q_info[replica].nr_cur_users)); | ||
917 | 977 | ||
918 | reg_nv_device(gpu, 0, t); // unregister | 978 | reg_nv_device(gpu, 0, t); // unregister |
919 | } | 979 | } |
@@ -925,7 +985,8 @@ struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity = | |||
925 | .notify_enqueue = simple_gpu_kfmlp_notify_enqueue, | 985 | .notify_enqueue = simple_gpu_kfmlp_notify_enqueue, |
926 | .notify_dequeue = simple_gpu_kfmlp_notify_dequeue, | 986 | .notify_dequeue = simple_gpu_kfmlp_notify_dequeue, |
927 | .notify_acquired = simple_gpu_kfmlp_notify_acquired, | 987 | .notify_acquired = simple_gpu_kfmlp_notify_acquired, |
928 | .notify_freed = simple_gpu_kfmlp_notify_freed | 988 | .notify_freed = simple_gpu_kfmlp_notify_freed, |
989 | .replica_to_resource = gpu_replica_to_resource, | ||
929 | }; | 990 | }; |
930 | 991 | ||
931 | struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, | 992 | struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, |
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 287e4a0662d9..fd6398121fbf 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c | |||
@@ -368,7 +368,7 @@ static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_ | |||
368 | int i; | 368 | int i; |
369 | struct task_struct *found = NULL; | 369 | struct task_struct *found = NULL; |
370 | for(i = 0; i < reg->nr_owners; ++i) { | 370 | for(i = 0; i < reg->nr_owners; ++i) { |
371 | if(reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) { | 371 | if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) { |
372 | found = reg->owners[i]; | 372 | found = reg->owners[i]; |
373 | } | 373 | } |
374 | } | 374 | } |
@@ -433,8 +433,9 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t) | |||
433 | 433 | ||
434 | raw_spin_lock_irqsave(®->lock, flags); | 434 | raw_spin_lock_irqsave(®->lock, flags); |
435 | 435 | ||
436 | if(reg->nr_owners < MAX_NR_OWNERS) { | 436 | if(reg->nr_owners < NV_MAX_SIMULT_USERS) { |
437 | for(i = 0; i < MAX_NR_OWNERS; ++i) { | 437 | TRACE_TASK(t, "registers GPU %d\n", reg_device_id); |
438 | for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { | ||
438 | if(reg->owners[i] == NULL) { | 439 | if(reg->owners[i] == NULL) { |
439 | reg->owners[i] = t; | 440 | reg->owners[i] = t; |
440 | 441 | ||
@@ -485,7 +486,9 @@ static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) | |||
485 | 486 | ||
486 | raw_spin_lock_irqsave(®->lock, flags); | 487 | raw_spin_lock_irqsave(®->lock, flags); |
487 | 488 | ||
488 | for(i = 0; i < reg->nr_owners; ++i) { | 489 | TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id); |
490 | |||
491 | for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { | ||
489 | if(reg->owners[i] == t) { | 492 | if(reg->owners[i] == t) { |
490 | #ifdef CONFIG_LITMUS_SOFTIRQD | 493 | #ifdef CONFIG_LITMUS_SOFTIRQD |
491 | flush_pending(klitirqd_th, t); | 494 | flush_pending(klitirqd_th, t); |