diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2013-10-30 18:30:58 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2013-10-30 18:30:58 -0400 |
commit | de9427b949a95a510a4a609740613246d753d5fa (patch) | |
tree | 2986370d4c146ca3eb325ace38b4de3287aefdd6 /litmus/ikglp_lock.c | |
parent | 2641252759e58084d47f42c67a947a7dd1ad70c4 (diff) |
Add ikglp.
Adds an implementation of the ikglp. Supports both affinity and
nesting of other locks within the ikglp (the ikglp itself must
be an outer-most lock).
Diffstat (limited to 'litmus/ikglp_lock.c')
-rw-r--r-- | litmus/ikglp_lock.c | 3376 |
1 files changed, 3376 insertions, 0 deletions
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c new file mode 100644 index 000000000000..bb7771a0f3a2 --- /dev/null +++ b/litmus/ikglp_lock.c | |||
@@ -0,0 +1,3376 @@ | |||
1 | #include <linux/slab.h> | ||
2 | #include <linux/uaccess.h> | ||
3 | |||
4 | #include <litmus/trace.h> | ||
5 | #include <litmus/sched_plugin.h> | ||
6 | #include <litmus/fdso.h> | ||
7 | |||
8 | #include <litmus/litmus_proc.h> | ||
9 | |||
10 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
11 | #include <litmus/gpu_affinity.h> | ||
12 | #include <litmus/nvidia_info.h> | ||
13 | #endif | ||
14 | |||
15 | #include <litmus/ikglp_lock.h> | ||
16 | |||
17 | #define IKGLP_INVAL_DISTANCE 0x7FFFFFFF | ||
18 | |||
19 | int ikglp_max_heap_base_priority_order(struct binheap_node *a, | ||
20 | struct binheap_node *b) | ||
21 | { | ||
22 | ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); | ||
23 | ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); | ||
24 | |||
25 | BUG_ON(!d_a); | ||
26 | BUG_ON(!d_b); | ||
27 | |||
28 | return litmus->__compare(d_a->task, BASE, d_b->task, BASE); | ||
29 | } | ||
30 | |||
31 | int ikglp_min_heap_base_priority_order(struct binheap_node *a, | ||
32 | struct binheap_node *b) | ||
33 | { | ||
34 | ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); | ||
35 | ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); | ||
36 | |||
37 | return litmus->__compare(d_b->task, BASE, d_a->task, BASE); | ||
38 | } | ||
39 | |||
40 | int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a, | ||
41 | struct binheap_node *b) | ||
42 | { | ||
43 | ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node); | ||
44 | ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node); | ||
45 | |||
46 | return litmus->__compare(d_a->task, BASE, d_b->task, BASE); | ||
47 | } | ||
48 | |||
49 | |||
50 | int ikglp_min_heap_donee_order(struct binheap_node *a, | ||
51 | struct binheap_node *b) | ||
52 | { | ||
53 | struct task_struct *prio_a, *prio_b; | ||
54 | |||
55 | ikglp_donee_heap_node_t *d_a = | ||
56 | binheap_entry(a, ikglp_donee_heap_node_t, node); | ||
57 | ikglp_donee_heap_node_t *d_b = | ||
58 | binheap_entry(b, ikglp_donee_heap_node_t, node); | ||
59 | |||
60 | if(!d_a->donor_info) { | ||
61 | prio_a = d_a->task; | ||
62 | } | ||
63 | else { | ||
64 | prio_a = d_a->donor_info->task; | ||
65 | BUG_ON(d_a->task != d_a->donor_info->donee_info->task); | ||
66 | } | ||
67 | |||
68 | if(!d_b->donor_info) { | ||
69 | prio_b = d_b->task; | ||
70 | } | ||
71 | else { | ||
72 | prio_b = d_b->donor_info->task; | ||
73 | BUG_ON(d_b->task != d_b->donor_info->donee_info->task); | ||
74 | } | ||
75 | |||
76 | /* note reversed order of "b < a" and not "a < b" */ | ||
77 | return litmus->__compare(prio_b, BASE, prio_a, BASE); | ||
78 | } | ||
79 | |||
80 | |||
81 | static inline unsigned int nominal_fq_len(struct fifo_queue *fq) | ||
82 | { | ||
83 | return (fq->count - fq->is_vunlocked); | ||
84 | } | ||
85 | |||
86 | static inline int ikglp_get_idx(struct ikglp_semaphore *sem, | ||
87 | struct fifo_queue *queue) | ||
88 | { | ||
89 | return (queue - &sem->fifo_queues[0]); | ||
90 | } | ||
91 | |||
92 | static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem, | ||
93 | struct task_struct *holder) | ||
94 | { | ||
95 | struct fifo_queue *fq = NULL; | ||
96 | int i; | ||
97 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
98 | if(sem->fifo_queues[i].owner == holder) { | ||
99 | fq = &sem->fifo_queues[i]; | ||
100 | break; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | return(fq); | ||
105 | } | ||
106 | |||
107 | static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue, | ||
108 | struct task_struct *skip) | ||
109 | { | ||
110 | struct list_head *pos; | ||
111 | struct task_struct *queued, *found = NULL; | ||
112 | |||
113 | list_for_each(pos, &kqueue->wait.task_list) { | ||
114 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, task_list)->private; | ||
115 | |||
116 | /* Compare task prios, find high prio task. */ | ||
117 | if(queued != skip && litmus->compare(queued, found)) | ||
118 | found = queued; | ||
119 | } | ||
120 | return found; | ||
121 | } | ||
122 | |||
123 | static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem, | ||
124 | struct fifo_queue *search_start) | ||
125 | { | ||
126 | /* we start our search at search_start instead of at the beginning of the | ||
127 | queue list to load-balance across all resources. */ | ||
128 | struct fifo_queue* step = search_start; | ||
129 | struct fifo_queue* shortest = sem->shortest_fifo_queue; | ||
130 | |||
131 | do { | ||
132 | step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ? | ||
133 | step+1 : &sem->fifo_queues[0]; | ||
134 | |||
135 | /* consider actual lengths, not nominal lengths */ | ||
136 | if(step->count < shortest->count) { | ||
137 | shortest = step; | ||
138 | if(step->count == 0) | ||
139 | break; /* can't get any shorter */ | ||
140 | } | ||
141 | }while(step != search_start); | ||
142 | |||
143 | return(shortest); | ||
144 | } | ||
145 | |||
146 | static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem) | ||
147 | { | ||
148 | return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task; | ||
149 | } | ||
150 | |||
151 | static void ikglp_add_global_list(struct ikglp_semaphore *sem, | ||
152 | struct task_struct *t, | ||
153 | ikglp_heap_node_t *node) | ||
154 | { | ||
155 | node->task = t; | ||
156 | INIT_BINHEAP_NODE(&node->node); | ||
157 | |||
158 | if(sem->top_m_size < sem->max_in_fifos) { | ||
159 | TRACE_CUR("Trivially adding %s/%d to top-m global list.\n", | ||
160 | t->comm, t->pid); | ||
161 | binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); | ||
162 | ++(sem->top_m_size); | ||
163 | } | ||
164 | else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { | ||
165 | ikglp_heap_node_t *evicted = | ||
166 | binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node); | ||
167 | |||
168 | TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n", | ||
169 | t->comm, t->pid, | ||
170 | evicted->task->comm, evicted->task->pid); | ||
171 | |||
172 | binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node); | ||
173 | INIT_BINHEAP_NODE(&evicted->node); | ||
174 | binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node); | ||
175 | |||
176 | binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); | ||
177 | } | ||
178 | else { | ||
179 | TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n", | ||
180 | t->comm, t->pid); | ||
181 | |||
182 | binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node); | ||
183 | } | ||
184 | } | ||
185 | |||
186 | |||
187 | static void ikglp_del_global_list(struct ikglp_semaphore *sem, | ||
188 | struct task_struct *t, | ||
189 | ikglp_heap_node_t *node) | ||
190 | { | ||
191 | BUG_ON(!binheap_is_in_heap(&node->node)); | ||
192 | |||
193 | TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid); | ||
194 | |||
195 | if(binheap_is_in_this_heap(&node->node, &sem->top_m)) { | ||
196 | TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid); | ||
197 | |||
198 | binheap_delete(&node->node, &sem->top_m); | ||
199 | |||
200 | if(!binheap_empty(&sem->not_top_m)) { | ||
201 | ikglp_heap_node_t *promoted = | ||
202 | binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node); | ||
203 | |||
204 | TRACE_CUR("Promoting %s/%d to top-m\n", | ||
205 | promoted->task->comm, promoted->task->pid); | ||
206 | |||
207 | binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node); | ||
208 | INIT_BINHEAP_NODE(&promoted->node); | ||
209 | |||
210 | binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node); | ||
211 | } | ||
212 | else { | ||
213 | TRACE_CUR("No one to promote to top-m.\n"); | ||
214 | --(sem->top_m_size); | ||
215 | } | ||
216 | } | ||
217 | else { | ||
218 | TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid); | ||
219 | |||
220 | binheap_delete(&node->node, &sem->not_top_m); | ||
221 | } | ||
222 | } | ||
223 | |||
224 | |||
225 | static void ikglp_add_donees(struct ikglp_semaphore *sem, | ||
226 | struct fifo_queue *fq, | ||
227 | struct task_struct *t, | ||
228 | ikglp_donee_heap_node_t* node) | ||
229 | { | ||
230 | node->task = t; | ||
231 | node->donor_info = NULL; | ||
232 | node->fq = fq; | ||
233 | INIT_BINHEAP_NODE(&node->node); | ||
234 | |||
235 | binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node); | ||
236 | } | ||
237 | |||
238 | |||
239 | static void ikglp_refresh_owners_prio_increase(struct task_struct *t, | ||
240 | struct fifo_queue *fq, | ||
241 | struct ikglp_semaphore *sem, | ||
242 | unsigned long flags) | ||
243 | { | ||
244 | /* priority of 't' has increased (note: 't' might already be hp_waiter). */ | ||
245 | if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) { | ||
246 | struct task_struct *old_max_eff_prio; | ||
247 | struct task_struct *new_max_eff_prio; | ||
248 | struct task_struct *new_prio = NULL; | ||
249 | struct task_struct *owner = fq->owner; | ||
250 | |||
251 | if(fq->hp_waiter) | ||
252 | TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", | ||
253 | fq->hp_waiter->comm, fq->hp_waiter->pid); | ||
254 | else | ||
255 | TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n"); | ||
256 | |||
257 | if(owner) | ||
258 | { | ||
259 | raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
260 | |||
261 | if (unlikely(binheap_empty(&tsk_rt(owner)->hp_blocked_tasks))) { | ||
262 | TRACE_TASK(owner, "not drawing inheritance from fq %d.\n", | ||
263 | ikglp_get_idx(sem, fq)); | ||
264 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
265 | WARN_ON(1); | ||
266 | return; | ||
267 | } | ||
268 | |||
269 | old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
270 | |||
271 | fq->hp_waiter = t; | ||
272 | fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); | ||
273 | |||
274 | binheap_decrease(&fq->nest.hp_binheap_node, | ||
275 | &tsk_rt(owner)->hp_blocked_tasks); | ||
276 | new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
277 | |||
278 | if(new_max_eff_prio != old_max_eff_prio) { | ||
279 | TRACE_TASK(t, "is new hp_waiter.\n"); | ||
280 | |||
281 | if ((effective_priority(owner) == old_max_eff_prio) || | ||
282 | (litmus->__compare(new_max_eff_prio, BASE, | ||
283 | owner, EFFECTIVE))){ | ||
284 | new_prio = new_max_eff_prio; | ||
285 | } | ||
286 | } | ||
287 | else { | ||
288 | TRACE_TASK(t, "no change in max_eff_prio of heap.\n"); | ||
289 | } | ||
290 | |||
291 | if(new_prio) { | ||
292 | /* set new inheritance and propagate */ | ||
293 | TRACE_TASK(t, "Effective priority changed for owner " | ||
294 | "%s/%d to %s/%d\n", | ||
295 | owner->comm, owner->pid, | ||
296 | new_prio->comm, new_prio->pid); | ||
297 | litmus->nested_increase_prio(owner, new_prio, &sem->lock, | ||
298 | flags); /* unlocks lock. */ | ||
299 | } | ||
300 | else { | ||
301 | TRACE_TASK(t, "No change in effective priority (is %s/%d). " | ||
302 | "Propagation halted.\n", | ||
303 | new_max_eff_prio->comm, new_max_eff_prio->pid); | ||
304 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
305 | unlock_fine_irqrestore(&sem->lock, flags); | ||
306 | } | ||
307 | } | ||
308 | else { | ||
309 | fq->hp_waiter = t; | ||
310 | fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); | ||
311 | |||
312 | TRACE_TASK(t, "no owner.\n"); | ||
313 | unlock_fine_irqrestore(&sem->lock, flags); | ||
314 | } | ||
315 | } | ||
316 | else { | ||
317 | TRACE_TASK(t, "hp_waiter is unaffected.\n"); | ||
318 | unlock_fine_irqrestore(&sem->lock, flags); | ||
319 | } | ||
320 | } | ||
321 | |||
322 | /* hp_waiter has decreased */ | ||
323 | static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq, | ||
324 | struct ikglp_semaphore *sem, | ||
325 | unsigned long flags, | ||
326 | int budget_triggered) | ||
327 | { | ||
328 | struct task_struct *owner = fq->owner; | ||
329 | |||
330 | struct task_struct *old_max_eff_prio; | ||
331 | struct task_struct *new_max_eff_prio; | ||
332 | |||
333 | if(!owner) { | ||
334 | TRACE_CUR("No owner. Returning.\n"); | ||
335 | unlock_fine_irqrestore(&sem->lock, flags); | ||
336 | return; | ||
337 | } | ||
338 | |||
339 | TRACE_CUR("ikglp_refresh_owners_prio_decrease\n"); | ||
340 | |||
341 | raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
342 | |||
343 | if (unlikely(binheap_empty(&tsk_rt(owner)->hp_blocked_tasks))) { | ||
344 | TRACE_TASK(owner, "not drawing inheritance from fq %d.\n", | ||
345 | ikglp_get_idx(sem, fq)); | ||
346 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
347 | unlock_fine_irqrestore(&sem->lock, flags); | ||
348 | WARN_ON(1); | ||
349 | return; | ||
350 | } | ||
351 | |||
352 | old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
353 | |||
354 | binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks); | ||
355 | fq->nest.hp_waiter_eff_prio = | ||
356 | (fq->hp_waiter) ? effective_priority(fq->hp_waiter) : NULL; | ||
357 | binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks, | ||
358 | struct nested_info, hp_binheap_node); | ||
359 | |||
360 | new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
361 | |||
362 | if((old_max_eff_prio != new_max_eff_prio) && | ||
363 | (effective_priority(owner) == old_max_eff_prio)) | ||
364 | { | ||
365 | /* Need to set new effective_priority for owner */ | ||
366 | struct task_struct *decreased_prio; | ||
367 | |||
368 | TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", | ||
369 | ikglp_get_idx(sem, fq)); | ||
370 | |||
371 | if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { | ||
372 | TRACE_CUR("%s/%d has greater base priority than base priority " | ||
373 | "of owner (%s/%d) of fq %d.\n", | ||
374 | (new_max_eff_prio) ? new_max_eff_prio->comm : "null", | ||
375 | (new_max_eff_prio) ? new_max_eff_prio->pid : 0, | ||
376 | owner->comm, | ||
377 | owner->pid, | ||
378 | ikglp_get_idx(sem, fq)); | ||
379 | |||
380 | decreased_prio = new_max_eff_prio; | ||
381 | } | ||
382 | else { | ||
383 | TRACE_CUR("%s/%d has lesser base priority than base priority " | ||
384 | "of owner (%s/%d) of fq %d.\n", | ||
385 | (new_max_eff_prio) ? new_max_eff_prio->comm : "null", | ||
386 | (new_max_eff_prio) ? new_max_eff_prio->pid : 0, | ||
387 | owner->comm, | ||
388 | owner->pid, | ||
389 | ikglp_get_idx(sem, fq)); | ||
390 | |||
391 | decreased_prio = NULL; | ||
392 | } | ||
393 | |||
394 | /* beware: recursion */ | ||
395 | /* also, call will unlock mutex->lock */ | ||
396 | litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, | ||
397 | flags, budget_triggered); | ||
398 | } | ||
399 | else { | ||
400 | TRACE_TASK(owner, "No need to propagate priority decrease forward.\n"); | ||
401 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
402 | unlock_fine_irqrestore(&sem->lock, flags); | ||
403 | } | ||
404 | } | ||
405 | |||
406 | |||
407 | static void ikglp_remove_donation_from_owner(struct binheap_node *n, | ||
408 | struct fifo_queue *fq, | ||
409 | struct ikglp_semaphore *sem, | ||
410 | unsigned long flags) | ||
411 | { | ||
412 | struct task_struct *owner = fq->owner; | ||
413 | |||
414 | struct task_struct *old_max_eff_prio; | ||
415 | struct task_struct *new_max_eff_prio; | ||
416 | |||
417 | BUG_ON(!owner); | ||
418 | |||
419 | raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
420 | |||
421 | old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
422 | |||
423 | binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks); | ||
424 | |||
425 | new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
426 | |||
427 | if((old_max_eff_prio != new_max_eff_prio) && | ||
428 | (effective_priority(owner) == old_max_eff_prio)) | ||
429 | { | ||
430 | /* Need to set new effective_priority for owner */ | ||
431 | struct task_struct *decreased_prio; | ||
432 | |||
433 | TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", | ||
434 | ikglp_get_idx(sem, fq)); | ||
435 | |||
436 | if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { | ||
437 | TRACE_CUR("has greater base priority than base priority of owner " | ||
438 | "of fq %d.\n", | ||
439 | ikglp_get_idx(sem, fq)); | ||
440 | decreased_prio = new_max_eff_prio; | ||
441 | } | ||
442 | else { | ||
443 | TRACE_CUR("has lesser base priority than base priority of owner of " | ||
444 | "fq %d.\n", | ||
445 | ikglp_get_idx(sem, fq)); | ||
446 | decreased_prio = NULL; | ||
447 | } | ||
448 | |||
449 | /* beware: recursion */ | ||
450 | /* also, call will unlock mutex->lock */ | ||
451 | litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, | ||
452 | flags, 0); | ||
453 | } | ||
454 | else { | ||
455 | TRACE_TASK(owner, "No need to propagate priority decrease forward.\n"); | ||
456 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
457 | unlock_fine_irqrestore(&sem->lock, flags); | ||
458 | } | ||
459 | } | ||
460 | |||
461 | static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t, | ||
462 | struct binheap_node *n) | ||
463 | { | ||
464 | struct task_struct *old_max_eff_prio; | ||
465 | struct task_struct *new_max_eff_prio; | ||
466 | |||
467 | raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
468 | |||
469 | TRACE_CUR("Removing donation from fq waiter %s/%d\n", t->comm, t->pid); | ||
470 | |||
471 | old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks); | ||
472 | |||
473 | binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks); | ||
474 | |||
475 | new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks); | ||
476 | |||
477 | if((old_max_eff_prio != new_max_eff_prio) && | ||
478 | (effective_priority(t) == old_max_eff_prio)) | ||
479 | { | ||
480 | /* Need to set new effective_priority for owner */ | ||
481 | struct task_struct *decreased_prio; | ||
482 | |||
483 | if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) { | ||
484 | decreased_prio = new_max_eff_prio; | ||
485 | } | ||
486 | else { | ||
487 | decreased_prio = NULL; | ||
488 | } | ||
489 | |||
490 | /* no need to propagate decreased inheritance to AUX | ||
491 | or klmirqd tasks since they cannot (should not) inherit | ||
492 | a priority directly from us while we suspend on a litmus | ||
493 | lock. */ | ||
494 | tsk_rt(t)->inh_task = decreased_prio; | ||
495 | } | ||
496 | |||
497 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
498 | } | ||
499 | |||
500 | static void ikglp_get_immediate(struct task_struct* t, | ||
501 | struct fifo_queue *fq, | ||
502 | struct ikglp_semaphore *sem, | ||
503 | unsigned long flags) | ||
504 | { | ||
505 | /* resource available now */ | ||
506 | TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq)); | ||
507 | |||
508 | fq->owner = t; | ||
509 | |||
510 | raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
511 | binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks, | ||
512 | struct nested_info, hp_binheap_node); | ||
513 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
514 | |||
515 | ++(fq->count); | ||
516 | |||
517 | /* even though we got the replica, we're still considered in the fifo */ | ||
518 | ++(sem->nr_in_fifos); | ||
519 | |||
520 | ikglp_add_global_list(sem, t, &fq->global_heap_node); | ||
521 | ikglp_add_donees(sem, fq, t, &fq->donee_heap_node); | ||
522 | |||
523 | sem->shortest_fifo_queue = | ||
524 | ikglp_find_shortest(sem, sem->shortest_fifo_queue); | ||
525 | |||
526 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
527 | if(sem->aff_obs) { | ||
528 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t); | ||
529 | sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t); | ||
530 | } | ||
531 | #endif | ||
532 | |||
533 | unlock_fine_irqrestore(&sem->lock, flags); | ||
534 | } | ||
535 | |||
536 | |||
537 | static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem, | ||
538 | struct fifo_queue *fq, | ||
539 | ikglp_wait_state_t *wait, | ||
540 | ikglp_heap_node_t *global_heap_node, | ||
541 | ikglp_donee_heap_node_t *donee_heap_node) | ||
542 | { | ||
543 | struct task_struct *t = wait->task; | ||
544 | |||
545 | /* resource is not free => must suspend and wait */ | ||
546 | TRACE_TASK(t, "Enqueuing on fq %d.\n", | ||
547 | ikglp_get_idx(sem, fq)); | ||
548 | |||
549 | init_waitqueue_entry(&wait->fq_node, t); | ||
550 | |||
551 | __add_wait_queue_tail_exclusive(&fq->wait, &wait->fq_node); | ||
552 | |||
553 | ++(fq->count); | ||
554 | ++(sem->nr_in_fifos); | ||
555 | |||
556 | /* update global list. */ | ||
557 | if(likely(global_heap_node)) { | ||
558 | if(binheap_is_in_heap(&global_heap_node->node)) { | ||
559 | WARN_ON(1); | ||
560 | ikglp_del_global_list(sem, t, global_heap_node); | ||
561 | } | ||
562 | ikglp_add_global_list(sem, t, global_heap_node); | ||
563 | } | ||
564 | // update donor eligiblity list. | ||
565 | if(likely(donee_heap_node)) | ||
566 | ikglp_add_donees(sem, fq, t, donee_heap_node); | ||
567 | |||
568 | if(sem->shortest_fifo_queue == fq) | ||
569 | sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq); | ||
570 | |||
571 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
572 | if(sem->aff_obs) | ||
573 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t); | ||
574 | #endif | ||
575 | |||
576 | wait->cur_q = IKGLP_FQ; | ||
577 | wait->fq = fq; | ||
578 | mb(); | ||
579 | |||
580 | TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq)); | ||
581 | } | ||
582 | |||
583 | |||
584 | static void ikglp_enqueue_on_fq(struct ikglp_semaphore *sem, | ||
585 | struct fifo_queue *fq, | ||
586 | ikglp_wait_state_t *wait, | ||
587 | unsigned long flags) | ||
588 | { | ||
589 | /* resource is not free => must suspend and wait */ | ||
590 | TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend " | ||
591 | "and wait.\n", | ||
592 | ikglp_get_idx(sem, fq)); | ||
593 | |||
594 | INIT_BINHEAP_NODE(&wait->global_heap_node.node); | ||
595 | INIT_BINHEAP_NODE(&wait->donee_heap_node.node); | ||
596 | |||
597 | __ikglp_enqueue_on_fq(sem, fq, wait, | ||
598 | &wait->global_heap_node, &wait->donee_heap_node); | ||
599 | |||
600 | /* call unlocks sem->lock */ | ||
601 | ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); | ||
602 | } | ||
603 | |||
604 | |||
605 | static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem, | ||
606 | ikglp_wait_state_t *wait) | ||
607 | { | ||
608 | TRACE_TASK(wait->task, "goes to PQ.\n"); | ||
609 | |||
610 | wait->pq_node.task = wait->task; /* copy over task (little redundant...) */ | ||
611 | |||
612 | binheap_add(&wait->pq_node.node, &sem->priority_queue, | ||
613 | ikglp_heap_node_t, node); | ||
614 | |||
615 | wait->cur_q = IKGLP_PQ; | ||
616 | } | ||
617 | |||
618 | static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem, | ||
619 | ikglp_wait_state_t *wait) | ||
620 | { | ||
621 | INIT_BINHEAP_NODE(&wait->global_heap_node.node); | ||
622 | INIT_BINHEAP_NODE(&wait->donee_heap_node.node); | ||
623 | INIT_BINHEAP_NODE(&wait->pq_node.node); | ||
624 | |||
625 | __ikglp_enqueue_on_pq(sem, wait); | ||
626 | } | ||
627 | |||
628 | static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem, | ||
629 | ikglp_wait_state_t* wait, | ||
630 | unsigned long flags) | ||
631 | { | ||
632 | struct task_struct *t = wait->task; | ||
633 | ikglp_donee_heap_node_t *donee_node = NULL; | ||
634 | struct task_struct *donee; | ||
635 | |||
636 | struct task_struct *old_max_eff_prio; | ||
637 | struct task_struct *new_max_eff_prio; | ||
638 | struct task_struct *new_prio = NULL; | ||
639 | |||
640 | INIT_BINHEAP_NODE(&wait->global_heap_node.node); | ||
641 | INIT_BINHEAP_NODE(&wait->donee_heap_node.node); | ||
642 | INIT_BINHEAP_NODE(&wait->pq_node.node); | ||
643 | INIT_BINHEAP_NODE(&wait->node); | ||
644 | |||
645 | /* Add donor to the global list. */ | ||
646 | ikglp_add_global_list(sem, t, &wait->global_heap_node); | ||
647 | |||
648 | /* Select a donee */ | ||
649 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
650 | donee_node = (sem->aff_obs) ? | ||
651 | sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) : | ||
652 | binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
653 | #else | ||
654 | donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
655 | #endif | ||
656 | |||
657 | donee = donee_node->task; | ||
658 | |||
659 | TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid); | ||
660 | |||
661 | TRACE_CUR("Temporarily removing %s/%d to donee list.\n", | ||
662 | donee->comm, donee->pid); | ||
663 | |||
664 | /* Remove from donee list */ | ||
665 | binheap_delete(&donee_node->node, &sem->donees); | ||
666 | |||
667 | wait->donee_info = donee_node; | ||
668 | |||
669 | /* Add t to donor heap. */ | ||
670 | binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node); | ||
671 | |||
672 | /* Now adjust the donee's priority. */ | ||
673 | |||
674 | /* Lock the donee's inheritance heap. */ | ||
675 | raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock); | ||
676 | |||
677 | old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks); | ||
678 | |||
679 | if(donee_node->donor_info) { | ||
680 | /* Steal donation relation. Evict old donor to PQ. */ | ||
681 | |||
682 | /* Remove old donor from donor heap */ | ||
683 | ikglp_wait_state_t *old_wait = donee_node->donor_info; | ||
684 | struct task_struct *old_donor = old_wait->task; | ||
685 | |||
686 | TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. " | ||
687 | "Moving old donor to PQ.\n", | ||
688 | donee->comm, donee->pid, old_donor->comm, old_donor->pid); | ||
689 | |||
690 | binheap_delete(&old_wait->node, &sem->donors); | ||
691 | |||
692 | /* Remove donation from donee's inheritance heap. */ | ||
693 | binheap_delete(&old_wait->prio_donation.hp_binheap_node, | ||
694 | &tsk_rt(donee)->hp_blocked_tasks); | ||
695 | /* WARNING: have not updated inh_prio! */ | ||
696 | |||
697 | /* Add old donor to PQ. */ | ||
698 | __ikglp_enqueue_on_pq(sem, old_wait); | ||
699 | |||
700 | /* Remove old donor from the global heap. */ | ||
701 | ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node); | ||
702 | } | ||
703 | |||
704 | /* Add back donee's node to the donees heap with increased prio */ | ||
705 | TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid); | ||
706 | |||
707 | donee_node->donor_info = wait; | ||
708 | INIT_BINHEAP_NODE(&donee_node->node); | ||
709 | binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node); | ||
710 | |||
711 | /* Add an inheritance/donation to the donee's inheritance heap. */ | ||
712 | wait->prio_donation.lock = (struct litmus_lock*)sem; | ||
713 | wait->prio_donation.hp_waiter_eff_prio = t; | ||
714 | wait->prio_donation.hp_waiter_ptr = NULL; | ||
715 | INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node); | ||
716 | |||
717 | binheap_add(&wait->prio_donation.hp_binheap_node, | ||
718 | &tsk_rt(donee)->hp_blocked_tasks, | ||
719 | struct nested_info, hp_binheap_node); | ||
720 | |||
721 | new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks); | ||
722 | |||
723 | if(new_max_eff_prio != old_max_eff_prio) { | ||
724 | if ((effective_priority(donee) == old_max_eff_prio) || | ||
725 | (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){ | ||
726 | TRACE_TASK(t, "Donation increases %s/%d's effective priority\n", | ||
727 | donee->comm, donee->pid); | ||
728 | new_prio = new_max_eff_prio; | ||
729 | } | ||
730 | } | ||
731 | |||
732 | if(new_prio) { | ||
733 | struct fifo_queue *donee_fq = donee_node->fq; | ||
734 | |||
735 | if(donee != donee_fq->owner) { | ||
736 | TRACE_TASK(t, "%s/%d is not the owner. " | ||
737 | "Propagating priority to owner %s/%d.\n", | ||
738 | donee->comm, donee->pid, | ||
739 | donee_fq->owner->comm, donee_fq->owner->pid); | ||
740 | |||
741 | raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock); | ||
742 | |||
743 | /* call unlocks sem->lock */ | ||
744 | ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); | ||
745 | } | ||
746 | else { | ||
747 | TRACE_TASK(t, "%s/%d is the owner. " | ||
748 | "Propagating priority immediatly.\n", | ||
749 | donee->comm, donee->pid); | ||
750 | |||
751 | /* call unlocks sem->lock and donee's heap lock */ | ||
752 | litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); | ||
753 | } | ||
754 | } | ||
755 | else { | ||
756 | TRACE_TASK(t, "No change in effective priority (it is %s/%d).\n", | ||
757 | (new_max_eff_prio) ? new_max_eff_prio->comm : "null", | ||
758 | (new_max_eff_prio) ? new_max_eff_prio->pid : 0); | ||
759 | raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock); | ||
760 | unlock_fine_irqrestore(&sem->lock, flags); | ||
761 | } | ||
762 | |||
763 | wait->cur_q = IKGLP_DONOR; | ||
764 | } | ||
765 | |||
766 | |||
767 | int ikglp_lock(struct litmus_lock* l) | ||
768 | { | ||
769 | struct task_struct* t = current; | ||
770 | struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
771 | unsigned long flags = 0, more_flags; | ||
772 | struct fifo_queue *fq = NULL; | ||
773 | int replica = -EINVAL; | ||
774 | |||
775 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
776 | raw_spinlock_t *dgl_lock; | ||
777 | #endif | ||
778 | |||
779 | ikglp_wait_state_t wait; | ||
780 | |||
781 | if (!is_realtime(t)) | ||
782 | return -EPERM; | ||
783 | |||
784 | memset(&wait, 0, sizeof(wait)); | ||
785 | |||
786 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
787 | dgl_lock = litmus->get_dgl_spinlock(t); | ||
788 | #endif | ||
789 | |||
790 | lock_global_irqsave(dgl_lock, flags); | ||
791 | raw_spin_lock_irqsave(&sem->real_lock, more_flags); | ||
792 | lock_fine_irqsave(&sem->lock, flags); | ||
793 | |||
794 | TRACE_CUR("Requesting a replica from lock %d.\n", l->ident); | ||
795 | |||
796 | if(sem->nr_in_fifos < sem->max_in_fifos) { | ||
797 | /* enqueue somwhere */ | ||
798 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
799 | fq = (sem->aff_obs) ? | ||
800 | sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) : | ||
801 | sem->shortest_fifo_queue; | ||
802 | #else | ||
803 | fq = sem->shortest_fifo_queue; | ||
804 | #endif | ||
805 | if(fq->count == 0) { | ||
806 | /* take available resource */ | ||
807 | replica = ikglp_get_idx(sem, fq); | ||
808 | |||
809 | TRACE_CUR("Getting replica %d\n", replica); | ||
810 | |||
811 | ikglp_get_immediate(t, fq, sem, flags); /* unlocks sem->lock */ | ||
812 | |||
813 | raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); | ||
814 | unlock_global_irqrestore(dgl_lock, flags); | ||
815 | goto acquired; | ||
816 | } | ||
817 | else { | ||
818 | TRACE_CUR("Will go on FQ somewhere.\n"); | ||
819 | |||
820 | wait.task = t; /* THIS IS CRITICALLY IMPORTANT!!! */ | ||
821 | |||
822 | /* record where we are blocked */ | ||
823 | tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; | ||
824 | mb(); | ||
825 | |||
826 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
827 | |||
828 | ikglp_enqueue_on_fq(sem, fq, &wait, flags); /* unlocks sem->lock */ | ||
829 | } | ||
830 | } | ||
831 | else { | ||
832 | TRACE_CUR("Going on a heap.\n"); | ||
833 | |||
834 | wait.task = t; /* THIS IS CRITICALLY IMPORTANT!!! */ | ||
835 | |||
836 | /* record where we are blocked */ | ||
837 | tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; | ||
838 | mb(); | ||
839 | |||
840 | /* FIXME: interruptible would be nice some day */ | ||
841 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
842 | |||
843 | if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { | ||
844 | TRACE_CUR("Going on PQ heap.\n"); | ||
845 | /* enqueue on PQ */ | ||
846 | ikglp_enqueue_on_pq(sem, &wait); | ||
847 | unlock_fine_irqrestore(&sem->lock, flags); | ||
848 | } | ||
849 | else { | ||
850 | /* enqueue as donor */ | ||
851 | TRACE_CUR("Going on donor heap.\n"); | ||
852 | ikglp_enqueue_on_donor(sem, &wait, flags); /* unlocks sem->lock */ | ||
853 | } | ||
854 | } | ||
855 | |||
856 | tsk_rt(t)->blocked_lock_data = (unsigned long)&wait; | ||
857 | |||
858 | raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); | ||
859 | unlock_global_irqrestore(dgl_lock, flags); | ||
860 | |||
861 | TRACE_CUR("Suspending for replica.\n"); | ||
862 | |||
863 | TS_LOCK_SUSPEND; | ||
864 | |||
865 | suspend_for_lock(); | ||
866 | |||
867 | TS_LOCK_RESUME; | ||
868 | |||
869 | fq = wait.fq; | ||
870 | |||
871 | tsk_rt(t)->blocked_lock_data = 0; | ||
872 | |||
873 | replica = ikglp_get_idx(sem, fq); | ||
874 | |||
875 | acquired: | ||
876 | TRACE_CUR("Acquired lock %d, queue %d\n", l->ident, replica); | ||
877 | |||
878 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
879 | if(sem->aff_obs) | ||
880 | return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq); | ||
881 | #endif | ||
882 | |||
883 | return replica; | ||
884 | } | ||
885 | |||
886 | static void __drop_from_donor(struct ikglp_semaphore *sem, | ||
887 | ikglp_wait_state_t *wait) | ||
888 | { | ||
889 | BUG_ON(wait->cur_q != IKGLP_DONOR); | ||
890 | |||
891 | TRACE_TASK(wait->task, "is being dropped from donor heap.\n"); | ||
892 | |||
893 | binheap_delete(&wait->node, &sem->donors); | ||
894 | wait->cur_q = IKGLP_INVL; | ||
895 | } | ||
896 | |||
897 | static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem, | ||
898 | struct fifo_queue *fq, | ||
899 | ikglp_wait_state_t *donor_info) | ||
900 | { | ||
901 | struct task_struct *t = donor_info->task; | ||
902 | |||
903 | TRACE_CUR("Donor %s/%d being moved to fq %d\n", | ||
904 | t->comm, | ||
905 | t->pid, | ||
906 | ikglp_get_idx(sem, fq)); | ||
907 | |||
908 | __drop_from_donor(sem, donor_info); | ||
909 | |||
910 | /* Already in global_list, so pass null to prevent adding 2nd time. */ | ||
911 | __ikglp_enqueue_on_fq(sem, fq, donor_info, | ||
912 | NULL, /* pass NULL */ | ||
913 | &donor_info->donee_heap_node); | ||
914 | |||
915 | /* Note: ikglp_update_owners_prio() still needs to be called. */ | ||
916 | } | ||
917 | |||
918 | static void __drop_from_pq(struct ikglp_semaphore *sem, | ||
919 | ikglp_wait_state_t *wait) | ||
920 | { | ||
921 | BUG_ON(wait->cur_q != IKGLP_PQ); | ||
922 | |||
923 | TRACE_TASK(wait->task, "is being dropped from the PQ.\n"); | ||
924 | |||
925 | binheap_delete(&wait->pq_node.node, &sem->priority_queue); | ||
926 | wait->cur_q = IKGLP_INVL; | ||
927 | } | ||
928 | |||
929 | static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem, | ||
930 | struct fifo_queue *fq, | ||
931 | ikglp_wait_state_t *wait) | ||
932 | { | ||
933 | struct task_struct *t = wait->task; | ||
934 | |||
935 | TRACE_CUR("PQ request %s/%d being moved to fq %d\n", | ||
936 | t->comm, | ||
937 | t->pid, | ||
938 | ikglp_get_idx(sem, fq)); | ||
939 | |||
940 | __drop_from_pq(sem, wait); | ||
941 | __ikglp_enqueue_on_fq(sem, fq, wait, | ||
942 | &wait->global_heap_node, | ||
943 | &wait->donee_heap_node); | ||
944 | |||
945 | /* Note: ikglp_update_owners_prio() still needs to be called. */ | ||
946 | } | ||
947 | |||
948 | static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal( | ||
949 | struct ikglp_semaphore* sem, | ||
950 | struct fifo_queue* skip) | ||
951 | { | ||
952 | /* must hold sem->lock */ | ||
953 | |||
954 | struct fifo_queue *fq = NULL; | ||
955 | struct list_head *pos; | ||
956 | struct task_struct *queued; | ||
957 | int i; | ||
958 | |||
959 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
960 | if( (sem->fifo_queues[i].count > 1) && (&sem->fifo_queues[i] != skip) && | ||
961 | (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) { | ||
962 | |||
963 | TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than " | ||
964 | "hp_waiter on fq %d (%s/%d)\n", | ||
965 | ikglp_get_idx(sem, &sem->fifo_queues[i]), | ||
966 | sem->fifo_queues[i].hp_waiter->comm, | ||
967 | sem->fifo_queues[i].hp_waiter->pid, | ||
968 | (fq) ? ikglp_get_idx(sem, fq) : 0, | ||
969 | (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "null") : "nullXX", | ||
970 | (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : 0) : -2); | ||
971 | |||
972 | fq = &sem->fifo_queues[i]; | ||
973 | |||
974 | WARN_ON(!(fq->hp_waiter)); | ||
975 | } | ||
976 | } | ||
977 | |||
978 | if(fq) { | ||
979 | struct task_struct *max_hp = fq->hp_waiter; | ||
980 | ikglp_wait_state_t* ret = NULL; | ||
981 | |||
982 | TRACE_CUR("Searching for %s/%d on fq %d\n", | ||
983 | max_hp->comm, | ||
984 | max_hp->pid, | ||
985 | ikglp_get_idx(sem, fq)); | ||
986 | |||
987 | BUG_ON(!max_hp); | ||
988 | |||
989 | list_for_each(pos, &fq->wait.task_list) { | ||
990 | wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list); | ||
991 | |||
992 | queued = (struct task_struct*) wait->private; | ||
993 | |||
994 | TRACE_CUR("fq %d entry: %s/%d\n", | ||
995 | ikglp_get_idx(sem, fq), | ||
996 | queued->comm, | ||
997 | queued->pid); | ||
998 | |||
999 | /* Compare task prios, find high prio task. */ | ||
1000 | if (queued == max_hp) { | ||
1001 | TRACE_CUR("Found it!\n"); | ||
1002 | ret = container_of(wait, ikglp_wait_state_t, fq_node); | ||
1003 | } | ||
1004 | } | ||
1005 | |||
1006 | WARN_ON(!ret); | ||
1007 | return ret; | ||
1008 | } | ||
1009 | |||
1010 | return(NULL); | ||
1011 | } | ||
1012 | |||
1013 | static void __drop_from_fq(struct ikglp_semaphore *sem, | ||
1014 | ikglp_wait_state_t *wait) | ||
1015 | { | ||
1016 | struct task_struct *t = wait->task; | ||
1017 | struct fifo_queue *fq = wait->fq; | ||
1018 | |||
1019 | BUG_ON(wait->cur_q != IKGLP_FQ); | ||
1020 | BUG_ON(!fq); | ||
1021 | |||
1022 | TRACE_TASK(t, "is being dropped from fq.\n"); | ||
1023 | |||
1024 | __remove_wait_queue(&fq->wait, &wait->fq_node); | ||
1025 | --(fq->count); | ||
1026 | |||
1027 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1028 | if(sem->aff_obs) | ||
1029 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t); | ||
1030 | #endif | ||
1031 | |||
1032 | if(t == fq->hp_waiter) { | ||
1033 | fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL); | ||
1034 | TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", | ||
1035 | ikglp_get_idx(sem, fq), | ||
1036 | (fq->hp_waiter) ? fq->hp_waiter->comm : "null", | ||
1037 | (fq->hp_waiter) ? fq->hp_waiter->pid : 0); | ||
1038 | } | ||
1039 | |||
1040 | /* Update shortest. */ | ||
1041 | if(fq->count < sem->shortest_fifo_queue->count) | ||
1042 | sem->shortest_fifo_queue = fq; | ||
1043 | --(sem->nr_in_fifos); | ||
1044 | |||
1045 | wait->cur_q = IKGLP_INVL; | ||
1046 | } | ||
1047 | |||
1048 | static void ikglp_steal_to_fq(struct ikglp_semaphore *sem, | ||
1049 | struct fifo_queue *fq, | ||
1050 | ikglp_wait_state_t *fq_wait) | ||
1051 | { | ||
1052 | WARN_ON(fq_wait->fq != fq_wait->donee_heap_node.fq); | ||
1053 | __drop_from_fq(sem, fq_wait); | ||
1054 | |||
1055 | fq_wait->donee_heap_node.fq = fq; // just to be safe | ||
1056 | __ikglp_enqueue_on_fq(sem, fq, fq_wait, NULL, NULL); | ||
1057 | |||
1058 | /* Note: We have not checked the priority inheritance of fq's owner yet. */ | ||
1059 | } | ||
1060 | |||
1061 | |||
1062 | static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem, | ||
1063 | struct fifo_queue *fq, | ||
1064 | ikglp_wait_state_t *old_wait) | ||
1065 | { | ||
1066 | struct task_struct *t = old_wait->task; | ||
1067 | |||
1068 | BUG_ON(old_wait->donee_heap_node.fq != fq); | ||
1069 | |||
1070 | TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n", | ||
1071 | ikglp_get_idx(sem, fq)); | ||
1072 | |||
1073 | /* Need to migrate global_heap_node and donee_heap_node off of the stack | ||
1074 | to the nodes allocated for the owner of this fq. */ | ||
1075 | |||
1076 | /* TODO: Enhance binheap() to perform this operation in place. */ | ||
1077 | |||
1078 | ikglp_del_global_list(sem, t, &old_wait->global_heap_node); /* remove */ | ||
1079 | fq->global_heap_node = old_wait->global_heap_node; /* copy */ | ||
1080 | ikglp_add_global_list(sem, t, &fq->global_heap_node); /* re-add */ | ||
1081 | |||
1082 | binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); /* remove */ | ||
1083 | fq->donee_heap_node = old_wait->donee_heap_node; /* copy */ | ||
1084 | |||
1085 | if(fq->donee_heap_node.donor_info) { | ||
1086 | /* let donor know that our location has changed */ | ||
1087 | |||
1088 | /* validate cross-link */ | ||
1089 | BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); | ||
1090 | |||
1091 | fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node; | ||
1092 | } | ||
1093 | INIT_BINHEAP_NODE(&fq->donee_heap_node.node); | ||
1094 | binheap_add(&fq->donee_heap_node.node, &sem->donees, | ||
1095 | ikglp_donee_heap_node_t, node); /* re-add */ | ||
1096 | } | ||
1097 | |||
1098 | |||
1099 | |||
1100 | void ikglp_grant_replica_to_next(struct ikglp_semaphore *sem, | ||
1101 | struct fifo_queue *fq) | ||
1102 | { | ||
1103 | wait_queue_t *wait; | ||
1104 | ikglp_wait_state_t *fq_wait; | ||
1105 | struct task_struct *next; | ||
1106 | |||
1107 | BUG_ON(!waitqueue_active(&fq->wait)); | ||
1108 | |||
1109 | wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list); | ||
1110 | fq_wait = container_of(wait, ikglp_wait_state_t, fq_node); | ||
1111 | next = (struct task_struct*) wait->private; | ||
1112 | |||
1113 | __remove_wait_queue(&fq->wait, wait); | ||
1114 | |||
1115 | TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", | ||
1116 | ikglp_get_idx(sem, fq), | ||
1117 | next->comm, next->pid); | ||
1118 | |||
1119 | /* migrate wait-state to fifo-memory. */ | ||
1120 | ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait); | ||
1121 | |||
1122 | /* next becomes the resouce holder */ | ||
1123 | fq->owner = next; | ||
1124 | tsk_rt(next)->blocked_lock = NULL; | ||
1125 | |||
1126 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1127 | if(sem->aff_obs) | ||
1128 | sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next); | ||
1129 | #endif | ||
1130 | |||
1131 | /* determine new hp_waiter if necessary */ | ||
1132 | if (next == fq->hp_waiter) { | ||
1133 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
1134 | /* next has the highest priority --- it doesn't need to | ||
1135 | * inherit. However, we need to make sure that the | ||
1136 | * next-highest priority in the queue is reflected in | ||
1137 | * hp_waiter. */ | ||
1138 | fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL); | ||
1139 | TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n", | ||
1140 | ikglp_get_idx(sem, fq), | ||
1141 | (fq->hp_waiter) ? fq->hp_waiter->comm : "null", | ||
1142 | (fq->hp_waiter) ? fq->hp_waiter->pid : 0); | ||
1143 | |||
1144 | fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ? | ||
1145 | effective_priority(fq->hp_waiter) : NULL; | ||
1146 | |||
1147 | if (fq->hp_waiter) | ||
1148 | TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n"); | ||
1149 | else | ||
1150 | TRACE("no further waiters\n"); | ||
1151 | |||
1152 | raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
1153 | binheap_add(&fq->nest.hp_binheap_node, | ||
1154 | &tsk_rt(next)->hp_blocked_tasks, | ||
1155 | struct nested_info, | ||
1156 | hp_binheap_node); | ||
1157 | raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
1158 | } | ||
1159 | else { | ||
1160 | /* Well, if 'next' is not the highest-priority waiter, | ||
1161 | * then it (probably) ought to inherit the highest-priority | ||
1162 | * waiter's priority. */ | ||
1163 | TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n", | ||
1164 | ikglp_get_idx(sem, fq), | ||
1165 | (fq->hp_waiter) ? fq->hp_waiter->comm : "null", | ||
1166 | (fq->hp_waiter) ? fq->hp_waiter->pid : 0); | ||
1167 | |||
1168 | raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
1169 | |||
1170 | binheap_add(&fq->nest.hp_binheap_node, | ||
1171 | &tsk_rt(next)->hp_blocked_tasks, | ||
1172 | struct nested_info, | ||
1173 | hp_binheap_node); | ||
1174 | |||
1175 | /* It is possible that 'next' *should* be the hp_waiter, but isn't | ||
1176 | * because that update hasn't yet executed (update operation is | ||
1177 | * probably blocked on mutex->lock). So only inherit if the top of | ||
1178 | * 'next's top heap node is indeed the effective prio. of hp_waiter. | ||
1179 | * (We use fq->hp_waiter_eff_prio instead of | ||
1180 | * effective_priority(hp_waiter) since the effective priority of | ||
1181 | * hp_waiter can change (and the update has not made it to this lock).) | ||
1182 | */ | ||
1183 | if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) == | ||
1184 | fq->nest.hp_waiter_eff_prio)) | ||
1185 | { | ||
1186 | if(fq->nest.hp_waiter_eff_prio) | ||
1187 | litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio); | ||
1188 | else | ||
1189 | WARN_ON(1); | ||
1190 | } | ||
1191 | |||
1192 | raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
1193 | } | ||
1194 | |||
1195 | /* wake up the new resource holder! */ | ||
1196 | wake_up_for_lock(next); | ||
1197 | } | ||
1198 | |||
1199 | |||
1200 | /* some compile-time configuration options for testing */ | ||
1201 | #define ALLOW_STEALING 1 | ||
1202 | #define ALWAYS_TERMINATE_DONATION 1 | ||
1203 | |||
1204 | void ikglp_move_next_to_fq(struct ikglp_semaphore *sem, | ||
1205 | struct fifo_queue *fq, | ||
1206 | struct task_struct *t, | ||
1207 | ikglp_donee_heap_node_t *donee_node, | ||
1208 | unsigned long *flags, | ||
1209 | int allow_stealing, | ||
1210 | int always_terminate_donation) | ||
1211 | { | ||
1212 | struct task_struct *donee = NULL; | ||
1213 | struct task_struct *new_on_fq = NULL; | ||
1214 | struct fifo_queue *fq_of_new_on_fq = NULL; | ||
1215 | |||
1216 | ikglp_wait_state_t *other_donor_info = NULL; | ||
1217 | struct fifo_queue *to_steal = NULL; | ||
1218 | int need_steal_prio_reeval = 0; | ||
1219 | |||
1220 | if (donee_node->donor_info) { | ||
1221 | ikglp_wait_state_t *donor_info = donee_node->donor_info; | ||
1222 | |||
1223 | new_on_fq = donor_info->task; | ||
1224 | |||
1225 | /* donor moved to FQ */ | ||
1226 | donee = t; | ||
1227 | |||
1228 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1229 | if(sem->aff_obs) { | ||
1230 | fq_of_new_on_fq = | ||
1231 | sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | ||
1232 | if((nominal_fq_len(fq_of_new_on_fq) >= sem->max_fifo_len) && | ||
1233 | !sem->aff_obs->relax_max_fifo_len) { | ||
1234 | WARN_ON(1); | ||
1235 | fq_of_new_on_fq = fq; | ||
1236 | } | ||
1237 | } | ||
1238 | else { | ||
1239 | fq_of_new_on_fq = fq; | ||
1240 | } | ||
1241 | #else | ||
1242 | fq_of_new_on_fq = fq; | ||
1243 | #endif | ||
1244 | |||
1245 | TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d " | ||
1246 | "(non-aff wanted fq %d).\n", | ||
1247 | new_on_fq->comm, new_on_fq->pid, | ||
1248 | ikglp_get_idx(sem, fq_of_new_on_fq), | ||
1249 | ikglp_get_idx(sem, fq)); | ||
1250 | |||
1251 | ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info); | ||
1252 | |||
1253 | /* treat donor as if it had donated to a task other than 't'. | ||
1254 | * this triggers the termination of the donation relationship. */ | ||
1255 | if (always_terminate_donation) | ||
1256 | other_donor_info = donor_info; | ||
1257 | } | ||
1258 | else if(!binheap_empty(&sem->donors)) { /* No donor, move any donor to FQ */ | ||
1259 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1260 | other_donor_info = (sem->aff_obs) ? | ||
1261 | sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) : | ||
1262 | binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
1263 | #else | ||
1264 | other_donor_info = | ||
1265 | binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
1266 | #endif | ||
1267 | |||
1268 | new_on_fq = other_donor_info->task; | ||
1269 | donee = other_donor_info->donee_info->task; | ||
1270 | |||
1271 | /* update the donee's heap position. */ | ||
1272 | other_donor_info->donee_info->donor_info = NULL; /* clear cross-link */ | ||
1273 | binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); | ||
1274 | |||
1275 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1276 | if(sem->aff_obs) { | ||
1277 | fq_of_new_on_fq = | ||
1278 | sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | ||
1279 | if((nominal_fq_len(fq_of_new_on_fq) >= sem->max_fifo_len) && | ||
1280 | !sem->aff_obs->relax_max_fifo_len) { | ||
1281 | WARN_ON(1); | ||
1282 | fq_of_new_on_fq = fq; | ||
1283 | } | ||
1284 | } | ||
1285 | else { | ||
1286 | fq_of_new_on_fq = fq; | ||
1287 | } | ||
1288 | #else | ||
1289 | fq_of_new_on_fq = fq; | ||
1290 | #endif | ||
1291 | |||
1292 | TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d " | ||
1293 | "(non-aff wanted fq %d).\n", | ||
1294 | new_on_fq->comm, new_on_fq->pid, | ||
1295 | ikglp_get_idx(sem, fq_of_new_on_fq), | ||
1296 | ikglp_get_idx(sem, fq)); | ||
1297 | |||
1298 | ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info); | ||
1299 | } | ||
1300 | else if(!binheap_empty(&sem->priority_queue)) { /* No donors, so move PQ */ | ||
1301 | ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue, | ||
1302 | ikglp_heap_node_t, node); | ||
1303 | ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t, | ||
1304 | pq_node); | ||
1305 | |||
1306 | new_on_fq = pq_wait->task; | ||
1307 | |||
1308 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1309 | if(sem->aff_obs) { | ||
1310 | fq_of_new_on_fq = | ||
1311 | sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | ||
1312 | if((nominal_fq_len(fq_of_new_on_fq) >= sem->max_fifo_len) && | ||
1313 | !sem->aff_obs->relax_max_fifo_len) { | ||
1314 | WARN_ON(1); | ||
1315 | fq_of_new_on_fq = fq; | ||
1316 | } | ||
1317 | } | ||
1318 | else { | ||
1319 | fq_of_new_on_fq = fq; | ||
1320 | } | ||
1321 | #else | ||
1322 | fq_of_new_on_fq = fq; | ||
1323 | #endif | ||
1324 | |||
1325 | TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d " | ||
1326 | "(non-aff wanted fq %d).\n", | ||
1327 | new_on_fq->comm, new_on_fq->pid, | ||
1328 | ikglp_get_idx(sem, fq_of_new_on_fq), | ||
1329 | ikglp_get_idx(sem, fq)); | ||
1330 | |||
1331 | ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait); | ||
1332 | } | ||
1333 | else if(allow_stealing && fq->count == 0) { | ||
1334 | /* No PQ and this queue is empty, so steal. */ | ||
1335 | |||
1336 | ikglp_wait_state_t *fq_wait; | ||
1337 | |||
1338 | TRACE_TASK(t, "Looking to steal a request for fq %d...\n", | ||
1339 | ikglp_get_idx(sem, fq)); | ||
1340 | |||
1341 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1342 | fq_wait = (sem->aff_obs) ? | ||
1343 | sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) : | ||
1344 | ikglp_find_hp_waiter_to_steal(sem, fq); | ||
1345 | #else | ||
1346 | fq_wait = ikglp_find_hp_waiter_to_steal(sem, fq); | ||
1347 | #endif | ||
1348 | |||
1349 | if(fq_wait) { | ||
1350 | to_steal = fq_wait->donee_heap_node.fq; | ||
1351 | |||
1352 | new_on_fq = fq_wait->task; | ||
1353 | fq_of_new_on_fq = fq; | ||
1354 | need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter); | ||
1355 | |||
1356 | TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n", | ||
1357 | new_on_fq->comm, new_on_fq->pid, | ||
1358 | ikglp_get_idx(sem, to_steal), | ||
1359 | ikglp_get_idx(sem, fq)); | ||
1360 | |||
1361 | ikglp_steal_to_fq(sem, fq, fq_wait); | ||
1362 | } | ||
1363 | else { | ||
1364 | TRACE_TASK(t, "Found nothing to steal for fq %d.\n", | ||
1365 | ikglp_get_idx(sem, fq)); | ||
1366 | } | ||
1367 | } | ||
1368 | else { | ||
1369 | /* move no one */ | ||
1370 | } | ||
1371 | |||
1372 | |||
1373 | /* Now patch up other priorities. | ||
1374 | |||
1375 | At most one of the following: | ||
1376 | if(donee && donee != t), decrease prio, propagate to owner, or onward | ||
1377 | if(to_steal), update owner's prio (hp_waiter has already been set) */ | ||
1378 | |||
1379 | BUG_ON(other_donor_info && to_steal); | ||
1380 | |||
1381 | if(other_donor_info) { | ||
1382 | struct fifo_queue *other_fq = other_donor_info->donee_info->fq; | ||
1383 | |||
1384 | BUG_ON(!donee); | ||
1385 | BUG_ON(!always_terminate_donation && donee == t); | ||
1386 | |||
1387 | TRACE_TASK(t, "Terminating donation relation of " | ||
1388 | "donor %s/%d to donee %s/%d!\n", | ||
1389 | other_donor_info->task->comm, other_donor_info->task->pid, | ||
1390 | donee->comm, donee->pid); | ||
1391 | |||
1392 | /* need to terminate donation relation. */ | ||
1393 | if(donee == other_fq->owner) { | ||
1394 | TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n", | ||
1395 | donee->comm, donee->pid, | ||
1396 | ikglp_get_idx(sem, other_fq)); | ||
1397 | |||
1398 | ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, | ||
1399 | other_fq, sem, *flags); | ||
1400 | |||
1401 | /* there should be no contention!!!! */ | ||
1402 | lock_fine_irqsave(&sem->lock, *flags); | ||
1403 | } | ||
1404 | else { | ||
1405 | TRACE_TASK(t, "Donee %s/%d is blocked in of fq %d.\n", | ||
1406 | donee->comm, donee->pid, | ||
1407 | ikglp_get_idx(sem, other_fq)); | ||
1408 | |||
1409 | ikglp_remove_donation_from_fq_waiter(donee, | ||
1410 | &other_donor_info->prio_donation.hp_binheap_node); | ||
1411 | if(donee == other_fq->hp_waiter) { | ||
1412 | TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. " | ||
1413 | "Rechecking hp_waiter.\n", | ||
1414 | donee->comm, donee->pid, | ||
1415 | ikglp_get_idx(sem, other_fq)); | ||
1416 | |||
1417 | other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL); | ||
1418 | TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", | ||
1419 | ikglp_get_idx(sem, other_fq), | ||
1420 | (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "null", | ||
1421 | (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : 0); | ||
1422 | |||
1423 | /* unlocks sem->lock. reacquire it. */ | ||
1424 | ikglp_refresh_owners_prio_decrease(other_fq, sem, *flags, 0); | ||
1425 | /* there should be no contention!!!! */ | ||
1426 | lock_fine_irqsave(&sem->lock, *flags); | ||
1427 | } | ||
1428 | } | ||
1429 | } | ||
1430 | else if(to_steal) { | ||
1431 | TRACE_TASK(t, "Rechecking priority inheritance of fq %d, " | ||
1432 | "triggered by stealing.\n", | ||
1433 | ikglp_get_idx(sem, to_steal)); | ||
1434 | |||
1435 | if(need_steal_prio_reeval) { | ||
1436 | /* unlocks sem->lock. reacquire it. */ | ||
1437 | ikglp_refresh_owners_prio_decrease(to_steal, sem, *flags, 0); | ||
1438 | /* there should be no contention!!!! */ | ||
1439 | lock_fine_irqsave(&sem->lock, *flags); | ||
1440 | } | ||
1441 | } | ||
1442 | |||
1443 | /* check for new HP waiter. */ | ||
1444 | if(new_on_fq) { | ||
1445 | /* unlocks sem->lock. reacquire it. */ | ||
1446 | ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, | ||
1447 | sem, *flags); | ||
1448 | /* there should be no contention!!!! */ | ||
1449 | lock_fine_irqsave(&sem->lock, *flags); | ||
1450 | } | ||
1451 | |||
1452 | /* we moved a request to an empty FQ. wake it up */ | ||
1453 | if(unlikely(fq_of_new_on_fq && | ||
1454 | fq_of_new_on_fq != fq && | ||
1455 | fq_of_new_on_fq->count == 1)) { | ||
1456 | ikglp_grant_replica_to_next(sem, fq_of_new_on_fq); | ||
1457 | } | ||
1458 | } | ||
1459 | |||
1460 | int ikglp_unlock(struct litmus_lock* l) | ||
1461 | { | ||
1462 | struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
1463 | struct task_struct *t = current; | ||
1464 | struct fifo_queue *fq; | ||
1465 | |||
1466 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1467 | raw_spinlock_t *dgl_lock; | ||
1468 | #endif | ||
1469 | |||
1470 | unsigned long flags = 0, more_flags; | ||
1471 | |||
1472 | int err = 0; | ||
1473 | |||
1474 | fq = ikglp_get_queue(sem, t); /* returns NULL if 't' is not owner. */ | ||
1475 | |||
1476 | if (!fq) { | ||
1477 | TRACE_TASK(t, "does not hold a replica of lock %d\n", l->ident); | ||
1478 | err = -EINVAL; | ||
1479 | goto out; | ||
1480 | } | ||
1481 | |||
1482 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1483 | dgl_lock = litmus->get_dgl_spinlock(t); | ||
1484 | #endif | ||
1485 | lock_global_irqsave(dgl_lock, flags); | ||
1486 | raw_spin_lock_irqsave(&sem->real_lock, more_flags); | ||
1487 | lock_fine_irqsave(&sem->lock, flags); | ||
1488 | |||
1489 | TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq)); | ||
1490 | |||
1491 | /* Remove 't' from the heaps, but data in nodes will still be good. */ | ||
1492 | ikglp_del_global_list(sem, t, &fq->global_heap_node); | ||
1493 | binheap_delete(&fq->donee_heap_node.node, &sem->donees); | ||
1494 | |||
1495 | fq->owner = NULL; /* no longer owned!! */ | ||
1496 | --(fq->count); | ||
1497 | if(fq->count < sem->shortest_fifo_queue->count) { | ||
1498 | sem->shortest_fifo_queue = fq; | ||
1499 | } | ||
1500 | |||
1501 | if (likely(!fq->is_vunlocked)) | ||
1502 | --(sem->nr_in_fifos); | ||
1503 | else | ||
1504 | TRACE_TASK(t, "virtually unlocked. handing off replica only.\n"); | ||
1505 | |||
1506 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1507 | if(sem->aff_obs) { | ||
1508 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t); | ||
1509 | sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t); | ||
1510 | } | ||
1511 | #endif | ||
1512 | |||
1513 | /* 't' must drop all priority and clean up data structures before hand-off. | ||
1514 | |||
1515 | DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST | ||
1516 | This kills any inheritance from a donor. | ||
1517 | */ | ||
1518 | raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
1519 | { | ||
1520 | int count = 0; | ||
1521 | |||
1522 | TRACE_TASK(t, "discarding inheritance because IKGLP is outermost\n"); | ||
1523 | |||
1524 | while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) { | ||
1525 | binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks, | ||
1526 | struct nested_info, hp_binheap_node); | ||
1527 | ++count; | ||
1528 | } | ||
1529 | |||
1530 | if (count) | ||
1531 | litmus->decrease_prio(t, NULL, 0); | ||
1532 | } | ||
1533 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
1534 | |||
1535 | if (likely(!fq->is_vunlocked)) { | ||
1536 | /* Move the next request into the FQ and update heaps as needed. | ||
1537 | Skip this step we already did this during the virtual unlock. */ | ||
1538 | ikglp_move_next_to_fq(sem, fq, t, &fq->donee_heap_node, &flags, | ||
1539 | ALLOW_STEALING, !ALWAYS_TERMINATE_DONATION); | ||
1540 | } | ||
1541 | else { | ||
1542 | /* reset vunlock flag */ | ||
1543 | fq->is_vunlocked = 0; | ||
1544 | } | ||
1545 | |||
1546 | if (waitqueue_active(&fq->wait)) | ||
1547 | ikglp_grant_replica_to_next(sem, fq); | ||
1548 | |||
1549 | unlock_fine_irqrestore(&sem->lock, flags); | ||
1550 | raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); | ||
1551 | unlock_global_irqrestore(dgl_lock, flags); | ||
1552 | |||
1553 | TRACE_CUR("done with freeing replica.\n"); | ||
1554 | |||
1555 | out: | ||
1556 | return err; | ||
1557 | } | ||
1558 | |||
1559 | |||
1560 | |||
1561 | void ikglp_abort_request(struct ikglp_semaphore *sem, struct task_struct *t, | ||
1562 | unsigned long flags) | ||
1563 | { | ||
1564 | ikglp_wait_state_t *wait = | ||
1565 | (ikglp_wait_state_t*)tsk_rt(t)->blocked_lock_data; | ||
1566 | ikglp_donee_heap_node_t *donee_info; | ||
1567 | struct task_struct *donee; | ||
1568 | struct fifo_queue *donee_fq; | ||
1569 | struct fifo_queue *fq = wait->fq; | ||
1570 | |||
1571 | BUG_ON(!wait); | ||
1572 | |||
1573 | /* drop the request from the proper IKGLP data structure and re-eval | ||
1574 | * priority relations */ | ||
1575 | switch(wait->cur_q) | ||
1576 | { | ||
1577 | case IKGLP_PQ: | ||
1578 | /* No one inherits from waiters in PQ. Just drop the request. */ | ||
1579 | __drop_from_pq(sem, wait); | ||
1580 | break; | ||
1581 | |||
1582 | |||
1583 | case IKGLP_FQ: | ||
1584 | ikglp_del_global_list(sem, t, &wait->global_heap_node); | ||
1585 | binheap_delete(&wait->donee_heap_node.node, &sem->donees); | ||
1586 | |||
1587 | /* remove the task from the FQ */ | ||
1588 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1589 | if(sem->aff_obs) | ||
1590 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t); | ||
1591 | #endif | ||
1592 | __drop_from_fq(sem, wait); | ||
1593 | |||
1594 | /* Drop any and all inheritance t receives. */ | ||
1595 | raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
1596 | { | ||
1597 | int count = 0; | ||
1598 | TRACE_TASK(t, "discarding inheritance because IKGLP " | ||
1599 | "is outermost\n"); | ||
1600 | |||
1601 | while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) { | ||
1602 | binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks, | ||
1603 | struct nested_info, hp_binheap_node); | ||
1604 | ++count; | ||
1605 | } | ||
1606 | if (count) | ||
1607 | litmus->decrease_prio(t, NULL, 0); | ||
1608 | } | ||
1609 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
1610 | |||
1611 | /* unlocks sem->lock. reacquire it. */ | ||
1612 | ikglp_refresh_owners_prio_decrease(wait->donee_heap_node.fq, | ||
1613 | sem, flags, 1); | ||
1614 | /* there should be no contention!!!! */ | ||
1615 | lock_fine_irqsave(&sem->lock, flags); | ||
1616 | ikglp_move_next_to_fq(sem, fq, t, &wait->donee_heap_node, &flags, | ||
1617 | ALLOW_STEALING, !ALWAYS_TERMINATE_DONATION); | ||
1618 | break; | ||
1619 | |||
1620 | |||
1621 | case IKGLP_DONOR: | ||
1622 | ikglp_del_global_list(sem, t, &wait->global_heap_node); | ||
1623 | __drop_from_donor(sem, wait); | ||
1624 | |||
1625 | /* update donee */ | ||
1626 | donee_info = wait->donee_info; | ||
1627 | donee_info->donor_info = NULL; // clear the cross-link | ||
1628 | binheap_decrease(&donee_info->node, &sem->donees); | ||
1629 | |||
1630 | donee = donee_info->task; | ||
1631 | donee_fq = donee_info->fq; | ||
1632 | if (donee == donee_fq->owner) { | ||
1633 | TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n", | ||
1634 | donee->comm, donee->pid, | ||
1635 | ikglp_get_idx(sem, donee_fq)); | ||
1636 | /* unlocks sem->lock. reacquire it. */ | ||
1637 | ikglp_remove_donation_from_owner(&wait->prio_donation.hp_binheap_node, | ||
1638 | donee_fq, sem, flags); | ||
1639 | /* there should be no contention!!!! */ | ||
1640 | lock_fine_irqsave(&sem->lock, flags); | ||
1641 | } | ||
1642 | else { | ||
1643 | TRACE_TASK(t, "Donee %s/%d is blocked in of fq %d.\n", | ||
1644 | donee->comm, donee->pid, | ||
1645 | ikglp_get_idx(sem, donee_fq)); | ||
1646 | |||
1647 | ikglp_remove_donation_from_fq_waiter(donee, | ||
1648 | &wait->prio_donation.hp_binheap_node); | ||
1649 | if(donee == donee_fq->hp_waiter) { | ||
1650 | TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. " | ||
1651 | "Rechecking hp_waiter.\n", | ||
1652 | donee->comm, donee->pid, | ||
1653 | ikglp_get_idx(sem, donee_fq)); | ||
1654 | |||
1655 | donee_fq->hp_waiter = ikglp_find_hp_waiter(donee_fq, NULL); | ||
1656 | TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", | ||
1657 | ikglp_get_idx(sem, donee_fq), | ||
1658 | (donee_fq->hp_waiter) ? donee_fq->hp_waiter->comm : "null", | ||
1659 | (donee_fq->hp_waiter) ? donee_fq->hp_waiter->pid : 0); | ||
1660 | |||
1661 | /* unlocks sem->lock. reacquire it. */ | ||
1662 | ikglp_refresh_owners_prio_decrease(donee_fq, sem, flags, 1); | ||
1663 | /* there should be no contention!!!! */ | ||
1664 | lock_fine_irqsave(&sem->lock, flags); | ||
1665 | } | ||
1666 | } | ||
1667 | |||
1668 | break; | ||
1669 | default: | ||
1670 | BUG(); | ||
1671 | } | ||
1672 | |||
1673 | BUG_ON(wait->cur_q != IKGLP_INVL); /* state should now be invalid */ | ||
1674 | } | ||
1675 | |||
1676 | void ikglp_budget_exhausted(struct litmus_lock* l, struct task_struct* t) | ||
1677 | { | ||
1678 | /* | ||
1679 | * PRE: (1) Our deadline has already been postponed. | ||
1680 | * (2) DLG lock is already held of DGLs are supported. | ||
1681 | * | ||
1682 | * Exhaustion Response: Remove request from locks and re-issue it. | ||
1683 | * | ||
1684 | * step 1: first check that we are actually blocked. | ||
1685 | * step 2: remove our request from ANY data structure: | ||
1686 | * step 3: reissue the request | ||
1687 | */ | ||
1688 | |||
1689 | struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
1690 | struct litmus_lock* blocked_lock; | ||
1691 | unsigned long flags = 0, more_flags; | ||
1692 | |||
1693 | raw_spin_lock_irqsave(&sem->real_lock, more_flags); | ||
1694 | lock_fine_irqsave(&sem->lock, flags); | ||
1695 | |||
1696 | blocked_lock = tsk_rt(t)->blocked_lock; | ||
1697 | if (blocked_lock == l) { | ||
1698 | ikglp_wait_state_t *wait; | ||
1699 | ikglp_abort_request(sem, t, flags); | ||
1700 | |||
1701 | /* now re-issue the request */ | ||
1702 | |||
1703 | TRACE_TASK(t, "Reissuing a request for replica from lock %d.\n", | ||
1704 | l->ident); | ||
1705 | |||
1706 | wait = (ikglp_wait_state_t*)tsk_rt(t)->blocked_lock_data; | ||
1707 | if(sem->nr_in_fifos < sem->max_in_fifos) { | ||
1708 | |||
1709 | struct fifo_queue *fq; | ||
1710 | |||
1711 | /* enqueue somwhere */ | ||
1712 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1713 | fq = (sem->aff_obs) ? | ||
1714 | sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) : | ||
1715 | sem->shortest_fifo_queue; | ||
1716 | #else | ||
1717 | fq = sem->shortest_fifo_queue; | ||
1718 | #endif | ||
1719 | TRACE_TASK(t, "is going to an FQ.\n"); | ||
1720 | /* if this were true, then we should have been blocked */ | ||
1721 | BUG_ON(fq->count == 0); | ||
1722 | ikglp_enqueue_on_fq(sem, fq, wait, flags); /* unlocks sem->lock */ | ||
1723 | } | ||
1724 | else if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { | ||
1725 | TRACE_TASK(t, "is going to PQ.\n"); | ||
1726 | /* enqueue on PQ */ | ||
1727 | ikglp_enqueue_on_pq(sem, wait); | ||
1728 | unlock_fine_irqrestore(&sem->lock, flags); | ||
1729 | } | ||
1730 | else { | ||
1731 | /* enqueue as donor */ | ||
1732 | TRACE_TASK(t, "is going to donor heap.\n"); | ||
1733 | ikglp_enqueue_on_donor(sem, wait, flags); /* unlocks sem->lock */ | ||
1734 | } | ||
1735 | |||
1736 | raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); | ||
1737 | } | ||
1738 | else if (blocked_lock) { | ||
1739 | unlock_fine_irqrestore(&sem->lock, flags); | ||
1740 | raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); | ||
1741 | |||
1742 | TRACE_TASK(t, "is blocked, but not on IKGLP. Redirecting...\n"); | ||
1743 | if(blocked_lock->ops->supports_budget_exhaustion) { | ||
1744 | TRACE_TASK(t, "Lock %d supports budget exhaustion.\n", | ||
1745 | blocked_lock->ident); | ||
1746 | blocked_lock->ops->budget_exhausted(blocked_lock, t); | ||
1747 | } | ||
1748 | } | ||
1749 | else { | ||
1750 | TRACE_TASK(t, "appears to be no longer blocked.\n"); | ||
1751 | unlock_fine_irqrestore(&sem->lock, flags); | ||
1752 | raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); | ||
1753 | } | ||
1754 | |||
1755 | return; | ||
1756 | } | ||
1757 | |||
1758 | void ikglp_virtual_unlock(struct litmus_lock* l, struct task_struct* t) | ||
1759 | { | ||
1760 | /* PRE: DGL lock already held if DGLs are supported */ | ||
1761 | |||
1762 | struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
1763 | struct fifo_queue *fq = ikglp_get_queue(sem, t); | ||
1764 | unsigned long flags = 0, more_flags; | ||
1765 | |||
1766 | TRACE_TASK(t, "virtual unlock!\n"); | ||
1767 | |||
1768 | if (!fq) | ||
1769 | return; | ||
1770 | |||
1771 | if (fq->is_vunlocked) { | ||
1772 | TRACE_TASK(t, "Lock %d already virtually unlocked.\n", l->ident); | ||
1773 | return; | ||
1774 | } | ||
1775 | |||
1776 | raw_spin_lock_irqsave(&sem->real_lock, more_flags); | ||
1777 | lock_fine_irqsave(&sem->lock, flags); | ||
1778 | |||
1779 | if (unlikely(fq->owner != t)) { | ||
1780 | TRACE_TASK(t, "no longer holds lock %d.\n", l->ident); | ||
1781 | goto out; | ||
1782 | } | ||
1783 | |||
1784 | /* Move a request from donor heap or PQ to fq. don't steal from | ||
1785 | * other FQs. Also, terminate donation relationship if we move | ||
1786 | * a donor to 't' to the FQ (we'll pick inheritance back up via | ||
1787 | * the FQ, if needed). */ | ||
1788 | ikglp_move_next_to_fq(sem, fq, t, &fq->donee_heap_node, &flags, | ||
1789 | !ALLOW_STEALING, ALWAYS_TERMINATE_DONATION); | ||
1790 | |||
1791 | /* decrement fifo count to simulate unlock. individual fifo | ||
1792 | * length counts remain unchanged. */ | ||
1793 | --(sem->nr_in_fifos); | ||
1794 | fq->is_vunlocked = 1; | ||
1795 | |||
1796 | out: | ||
1797 | unlock_fine_irqrestore(&sem->lock, flags); | ||
1798 | raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); | ||
1799 | } | ||
1800 | |||
1801 | |||
1802 | |||
1803 | int ikglp_close(struct litmus_lock* l) | ||
1804 | { | ||
1805 | struct task_struct *t = current; | ||
1806 | struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
1807 | unsigned long flags; | ||
1808 | |||
1809 | int owner = 0; | ||
1810 | int i; | ||
1811 | |||
1812 | raw_spin_lock_irqsave(&sem->real_lock, flags); | ||
1813 | |||
1814 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
1815 | if(sem->fifo_queues[i].owner == t) { | ||
1816 | owner = 1; | ||
1817 | break; | ||
1818 | } | ||
1819 | } | ||
1820 | |||
1821 | raw_spin_unlock_irqrestore(&sem->real_lock, flags); | ||
1822 | |||
1823 | if (owner) | ||
1824 | ikglp_unlock(l); | ||
1825 | |||
1826 | return 0; | ||
1827 | } | ||
1828 | |||
1829 | void ikglp_free(struct litmus_lock* l) | ||
1830 | { | ||
1831 | struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
1832 | |||
1833 | kfree(sem->fifo_queues); | ||
1834 | kfree(sem); | ||
1835 | } | ||
1836 | |||
1837 | struct litmus_lock* ikglp_new(unsigned int m, | ||
1838 | struct litmus_lock_ops* ops, | ||
1839 | void* __user uarg) | ||
1840 | { | ||
1841 | struct ikglp_semaphore* sem; | ||
1842 | struct ikglp_args args; | ||
1843 | unsigned int i; | ||
1844 | |||
1845 | BUG_ON(m <= 0); | ||
1846 | |||
1847 | if(!access_ok(VERIFY_READ, uarg, sizeof(args))) | ||
1848 | return(NULL); | ||
1849 | if(__copy_from_user(&args, uarg, sizeof(args))) | ||
1850 | return(NULL); | ||
1851 | |||
1852 | /* validation */ | ||
1853 | |||
1854 | /* there must be at least one resource */ | ||
1855 | if (args.nr_replicas < 1) { | ||
1856 | printk("Invalid number of replicas.\n"); | ||
1857 | return(NULL); | ||
1858 | } | ||
1859 | /* IKGLP_OPTIMAL_FIFO_LEN can only be determined if nr_max_holders | ||
1860 | is IKGLP_M_HOLDERS (number of CPUs) */ | ||
1861 | if (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN && | ||
1862 | args.max_in_fifos != IKGLP_M_IN_FIFOS) { | ||
1863 | printk("Cannot compute optimal FIFO length if " | ||
1864 | "max_in_fifos != IKGLP_M_IN_FIFOS\n"); | ||
1865 | return(NULL); | ||
1866 | } | ||
1867 | if ((args.max_in_fifos != IKGLP_UNLIMITED_IN_FIFOS) && | ||
1868 | (args.max_fifo_len != IKGLP_UNLIMITED_FIFO_LEN) && | ||
1869 | (args.max_in_fifos > args.nr_replicas*args.max_fifo_len)) { | ||
1870 | printk("Not enough total FIFO space for specified max requests " | ||
1871 | "in FIFOs.\n"); | ||
1872 | return(NULL); | ||
1873 | } | ||
1874 | |||
1875 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1876 | if(!sem) | ||
1877 | return NULL; | ||
1878 | memset(sem, 0, sizeof(*sem)); | ||
1879 | |||
1880 | sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*args.nr_replicas, | ||
1881 | GFP_KERNEL); | ||
1882 | if(!sem->fifo_queues) | ||
1883 | { | ||
1884 | kfree(sem); | ||
1885 | return NULL; | ||
1886 | } | ||
1887 | |||
1888 | sem->litmus_lock.ops = ops; | ||
1889 | // sem->litmus_lock.proc = &ikglp_proc_ops; | ||
1890 | |||
1891 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
1892 | { | ||
1893 | __raw_spin_lock_init(&sem->lock, | ||
1894 | ((struct litmus_lock*)sem)->cheat_lockdep, | ||
1895 | &((struct litmus_lock*)sem)->key); | ||
1896 | } | ||
1897 | #else | ||
1898 | raw_spin_lock_init(&sem->lock); | ||
1899 | #endif | ||
1900 | |||
1901 | raw_spin_lock_init(&sem->real_lock); | ||
1902 | |||
1903 | sem->nr_replicas = args.nr_replicas; | ||
1904 | sem->max_in_fifos = (args.max_in_fifos == IKGLP_M_IN_FIFOS) ? | ||
1905 | m : | ||
1906 | args.max_in_fifos; | ||
1907 | sem->max_fifo_len = (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN) ? | ||
1908 | (sem->max_in_fifos/args.nr_replicas) + | ||
1909 | ((sem->max_in_fifos%args.nr_replicas) != 0) : | ||
1910 | args.max_fifo_len; | ||
1911 | sem->nr_in_fifos = 0; | ||
1912 | |||
1913 | TRACE_CUR("New IKGLP Sem: m = %u, k = %u, max fifo_len = %u\n", | ||
1914 | sem->max_in_fifos, | ||
1915 | sem->nr_replicas, | ||
1916 | sem->max_fifo_len); | ||
1917 | |||
1918 | for(i = 0; i < args.nr_replicas; ++i) { | ||
1919 | struct fifo_queue* q = &(sem->fifo_queues[i]); | ||
1920 | |||
1921 | q->owner = NULL; | ||
1922 | q->hp_waiter = NULL; | ||
1923 | init_waitqueue_head(&q->wait); | ||
1924 | q->count = 0; | ||
1925 | q->is_vunlocked = 0; | ||
1926 | |||
1927 | q->global_heap_node.task = NULL; | ||
1928 | INIT_BINHEAP_NODE(&q->global_heap_node.node); | ||
1929 | |||
1930 | q->donee_heap_node.task = NULL; | ||
1931 | q->donee_heap_node.donor_info = NULL; | ||
1932 | q->donee_heap_node.fq = NULL; | ||
1933 | INIT_BINHEAP_NODE(&q->donee_heap_node.node); | ||
1934 | |||
1935 | q->nest.lock = (struct litmus_lock*)sem; | ||
1936 | q->nest.hp_waiter_eff_prio = NULL; | ||
1937 | q->nest.hp_waiter_ptr = &q->hp_waiter; | ||
1938 | INIT_BINHEAP_NODE(&q->nest.hp_binheap_node); | ||
1939 | } | ||
1940 | |||
1941 | sem->shortest_fifo_queue = &sem->fifo_queues[0]; | ||
1942 | |||
1943 | sem->top_m_size = 0; | ||
1944 | |||
1945 | // init heaps | ||
1946 | INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order); | ||
1947 | INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order); | ||
1948 | INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order); | ||
1949 | INIT_BINHEAP_HANDLE(&sem->priority_queue, | ||
1950 | ikglp_max_heap_base_priority_order); | ||
1951 | INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order); | ||
1952 | |||
1953 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1954 | sem->aff_obs = NULL; | ||
1955 | #endif | ||
1956 | |||
1957 | return &sem->litmus_lock; | ||
1958 | } | ||
1959 | |||
1960 | |||
1961 | |||
1962 | |||
1963 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
1964 | |||
1965 | /****************************************************************************/ | ||
1966 | /* AFFINITY HEURISTICS */ | ||
1967 | /****************************************************************************/ | ||
1968 | |||
1969 | |||
1970 | static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) | ||
1971 | { | ||
1972 | int gpu = replica % aff->nr_rsrc; | ||
1973 | return gpu; | ||
1974 | } | ||
1975 | |||
1976 | static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica) | ||
1977 | { | ||
1978 | int gpu = __replica_to_gpu(aff, replica) + aff->offset; | ||
1979 | return gpu; | ||
1980 | } | ||
1981 | |||
1982 | static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu) | ||
1983 | { | ||
1984 | int replica = gpu - aff->offset; | ||
1985 | return replica; | ||
1986 | } | ||
1987 | |||
1988 | static inline int same_gpu(struct ikglp_affinity* aff, | ||
1989 | int replica_a, int replica_b) | ||
1990 | { | ||
1991 | return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b)); | ||
1992 | } | ||
1993 | |||
1994 | static inline int has_affinity(struct ikglp_affinity* aff, | ||
1995 | struct task_struct* t, int replica) | ||
1996 | { | ||
1997 | if(tsk_rt(t)->last_gpu >= 0) | ||
1998 | return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica)); | ||
1999 | return 0; | ||
2000 | } | ||
2001 | |||
2002 | int ikglp_aff_obs_close(struct affinity_observer* obs) | ||
2003 | { | ||
2004 | return 0; | ||
2005 | } | ||
2006 | |||
2007 | void ikglp_aff_obs_free(struct affinity_observer* obs) | ||
2008 | { | ||
2009 | struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs); | ||
2010 | |||
2011 | /* make sure the thread destroying this semaphore will not | ||
2012 | call the exit callback on a destroyed lock. */ | ||
2013 | struct task_struct *t = current; | ||
2014 | if (is_realtime(t) && tsk_rt(t)->rsrc_exit_cb_args == ikglp_aff) | ||
2015 | { | ||
2016 | tsk_rt(t)->rsrc_exit_cb = NULL; | ||
2017 | tsk_rt(t)->rsrc_exit_cb_args = NULL; | ||
2018 | } | ||
2019 | |||
2020 | kfree(ikglp_aff->nr_cur_users_on_rsrc); | ||
2021 | kfree(ikglp_aff->nr_aff_on_rsrc); | ||
2022 | kfree(ikglp_aff->q_info); | ||
2023 | kfree(ikglp_aff); | ||
2024 | } | ||
2025 | |||
2026 | static struct affinity_observer* ikglp_aff_obs_new( | ||
2027 | struct affinity_observer_ops* ops, | ||
2028 | struct ikglp_affinity_ops* ikglp_ops, | ||
2029 | void* __user args) | ||
2030 | { | ||
2031 | struct ikglp_affinity* ikglp_aff; | ||
2032 | struct gpu_affinity_observer_args aff_args; | ||
2033 | struct ikglp_semaphore* sem; | ||
2034 | unsigned int i; | ||
2035 | unsigned long flags; | ||
2036 | |||
2037 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { | ||
2038 | return(NULL); | ||
2039 | } | ||
2040 | if(__copy_from_user(&aff_args, args, sizeof(aff_args))) { | ||
2041 | return(NULL); | ||
2042 | } | ||
2043 | |||
2044 | sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); | ||
2045 | |||
2046 | if(sem->litmus_lock.type != IKGLP_SEM) { | ||
2047 | TRACE_CUR("Lock type not supported. Type = %d\n", | ||
2048 | sem->litmus_lock.type); | ||
2049 | return(NULL); | ||
2050 | } | ||
2051 | |||
2052 | if((aff_args.rho <= 0) || | ||
2053 | (sem->nr_replicas%aff_args.rho != 0)) { | ||
2054 | TRACE_CUR("Lock %d does not support #replicas (%u) for #simult_users " | ||
2055 | "(%u) per replica. #replicas should be evenly divisible " | ||
2056 | "by #simult_users.\n", | ||
2057 | sem->litmus_lock.ident, | ||
2058 | sem->nr_replicas, | ||
2059 | aff_args.rho); | ||
2060 | return(NULL); | ||
2061 | } | ||
2062 | |||
2063 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); | ||
2064 | if(!ikglp_aff) | ||
2065 | return(NULL); | ||
2066 | |||
2067 | ikglp_aff->q_info = kmalloc( | ||
2068 | sizeof(struct ikglp_queue_info)*sem->nr_replicas, | ||
2069 | GFP_KERNEL); | ||
2070 | if(!ikglp_aff->q_info) { | ||
2071 | kfree(ikglp_aff); | ||
2072 | return(NULL); | ||
2073 | } | ||
2074 | |||
2075 | ikglp_aff->nr_cur_users_on_rsrc = kmalloc( | ||
2076 | sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), | ||
2077 | GFP_KERNEL); | ||
2078 | if(!ikglp_aff->nr_cur_users_on_rsrc) { | ||
2079 | kfree(ikglp_aff->q_info); | ||
2080 | kfree(ikglp_aff); | ||
2081 | return(NULL); | ||
2082 | } | ||
2083 | |||
2084 | ikglp_aff->nr_aff_on_rsrc = kmalloc( | ||
2085 | sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), | ||
2086 | GFP_KERNEL); | ||
2087 | if(!ikglp_aff->nr_aff_on_rsrc) { | ||
2088 | kfree(ikglp_aff->nr_cur_users_on_rsrc); | ||
2089 | kfree(ikglp_aff->q_info); | ||
2090 | kfree(ikglp_aff); | ||
2091 | return(NULL); | ||
2092 | } | ||
2093 | |||
2094 | affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs); | ||
2095 | |||
2096 | ikglp_aff->ops = ikglp_ops; | ||
2097 | ikglp_aff->offset = aff_args.replica_to_gpu_offset; | ||
2098 | ikglp_aff->nr_simult = aff_args.rho; | ||
2099 | ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; | ||
2100 | ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0; | ||
2101 | |||
2102 | TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, " | ||
2103 | "nr_rsrc = %d, relaxed_fifo_len = %d\n", | ||
2104 | ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc, | ||
2105 | ikglp_aff->relax_max_fifo_len); | ||
2106 | |||
2107 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, | ||
2108 | sizeof(int)*(ikglp_aff->nr_rsrc)); | ||
2109 | memset(ikglp_aff->nr_aff_on_rsrc, 0, | ||
2110 | sizeof(unsigned int)*(ikglp_aff->nr_rsrc)); | ||
2111 | |||
2112 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
2113 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; | ||
2114 | ikglp_aff->q_info[i].estimated_len = 0; | ||
2115 | |||
2116 | /* multiple q_info's will point to the same resource (aka GPU) if | ||
2117 | aff_args.nr_simult_users > 1 */ | ||
2118 | ikglp_aff->q_info[i].nr_cur_users = | ||
2119 | &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; | ||
2120 | ikglp_aff->q_info[i].nr_aff_users = | ||
2121 | &ikglp_aff->nr_aff_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; | ||
2122 | } | ||
2123 | |||
2124 | /* attach observer to the lock */ | ||
2125 | raw_spin_lock_irqsave(&sem->real_lock, flags); | ||
2126 | sem->aff_obs = ikglp_aff; | ||
2127 | raw_spin_unlock_irqrestore(&sem->real_lock, flags); | ||
2128 | |||
2129 | return &ikglp_aff->obs; | ||
2130 | } | ||
2131 | |||
2132 | static int gpu_replica_to_resource(struct ikglp_affinity* aff, | ||
2133 | struct fifo_queue* fq) | ||
2134 | { | ||
2135 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2136 | return(replica_to_gpu(aff, ikglp_get_idx(sem, fq))); | ||
2137 | } | ||
2138 | |||
2139 | |||
2140 | |||
2141 | /*--------------------------------------------------------------------------*/ | ||
2142 | /* ADVANCED AFFINITY HEURISITICS */ | ||
2143 | /* */ | ||
2144 | /* These heuristics estimate FIFO length wait times and try to enqueue */ | ||
2145 | /* tasks into the shortest queues. When two queues are equivlenet, the GPU */ | ||
2146 | /* that maintains affinity is selected. When a task has no affinity, the */ | ||
2147 | /* heuristic tries to get the GPU with the fewest number of other tasks */ | ||
2148 | /* with affinity on that GPU. */ | ||
2149 | /* */ | ||
2150 | /* Heuristics to explore in the future: */ | ||
2151 | /* - Utilization */ | ||
2152 | /* - Longest non-preemptive section */ | ||
2153 | /* - Criticality */ | ||
2154 | /* - Task period */ | ||
2155 | /*--------------------------------------------------------------------------*/ | ||
2156 | |||
2157 | struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, | ||
2158 | struct task_struct* t) | ||
2159 | { | ||
2160 | // advise_enqueue must be smart as not not break IKGLP rules: | ||
2161 | // * No queue can be greater than ceil(m/k) in length, unless | ||
2162 | // 'relax_max_fifo_len' is asserted | ||
2163 | // * Cannot let a queue idle if there exist waiting PQ/donors | ||
2164 | // -- needed to guarantee parallel progress of waiters. | ||
2165 | // | ||
2166 | // We may be able to relax some of these constraints, but this will have to | ||
2167 | // be carefully evaluated. | ||
2168 | // | ||
2169 | // Huristic strategy: Find the shortest queue that is not full. | ||
2170 | |||
2171 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2172 | lt_t min_len; | ||
2173 | unsigned int min_nr_users, min_nr_aff_users; | ||
2174 | struct ikglp_queue_info *shortest, *aff_queue; | ||
2175 | struct fifo_queue *to_enqueue; | ||
2176 | unsigned int i; | ||
2177 | int affinity_gpu; | ||
2178 | |||
2179 | unsigned int max_fifo_len = (aff->relax_max_fifo_len) ? | ||
2180 | sem->max_in_fifos : /* allow poss. of all requests on same queue */ | ||
2181 | sem->max_fifo_len; /* constraint FIFO len */ | ||
2182 | |||
2183 | /* if we have no affinity, find the GPU with the least number of users | ||
2184 | with active affinity */ | ||
2185 | if(unlikely(tsk_rt(t)->last_gpu < 0)) { | ||
2186 | int temp_min = aff->nr_aff_on_rsrc[0]; | ||
2187 | affinity_gpu = aff->offset; | ||
2188 | |||
2189 | for(i = 1; i < aff->nr_rsrc; ++i) { | ||
2190 | if(aff->nr_aff_on_rsrc[i] < temp_min) { | ||
2191 | affinity_gpu = aff->offset + i; | ||
2192 | } | ||
2193 | } | ||
2194 | |||
2195 | TRACE_CUR("no affinity. defaulting to %d with %d aff users.\n", | ||
2196 | affinity_gpu, temp_min); | ||
2197 | } | ||
2198 | else { | ||
2199 | affinity_gpu = tsk_rt(t)->last_gpu; | ||
2200 | } | ||
2201 | |||
2202 | // all things being equal, let's start with the queue with which we have | ||
2203 | // affinity. this helps us maintain affinity even when we don't have | ||
2204 | // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) | ||
2205 | aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; | ||
2206 | shortest = aff_queue; | ||
2207 | |||
2208 | min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); | ||
2209 | min_nr_users = *(shortest->nr_cur_users); | ||
2210 | min_nr_aff_users = *(shortest->nr_aff_users); | ||
2211 | |||
2212 | |||
2213 | TRACE_CUR("cs is %llu on queue %d (count = %u): est len = %llu\n", | ||
2214 | get_gpu_estimate(t, MIG_LOCAL), | ||
2215 | ikglp_get_idx(sem, shortest->q), | ||
2216 | shortest->q->count, | ||
2217 | min_len); | ||
2218 | |||
2219 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
2220 | if(&aff->q_info[i] != shortest) { | ||
2221 | /* is there room on this q? */ | ||
2222 | if(nominal_fq_len(aff->q_info[i].q) < max_fifo_len) { | ||
2223 | int want = 0; | ||
2224 | |||
2225 | lt_t migration = | ||
2226 | get_gpu_estimate(t, | ||
2227 | gpu_migration_distance(tsk_rt(t)->last_gpu, | ||
2228 | replica_to_gpu(aff, i))); | ||
2229 | lt_t est_len = aff->q_info[i].estimated_len + migration; | ||
2230 | |||
2231 | // queue is smaller, or they're equal and the other has a | ||
2232 | // smaller number of total users. | ||
2233 | // | ||
2234 | // tie-break on the shortest number of simult users. this | ||
2235 | // only kicks in when there are more than 1 empty queues. | ||
2236 | |||
2237 | // TODO: Make "est_len < min_len" a fuzzy function that allows | ||
2238 | // queues "close enough" in length to be considered equal. | ||
2239 | |||
2240 | /* NOTE: 'shortest' starts out with affinity GPU */ | ||
2241 | if(unlikely(nominal_fq_len(shortest->q) >= max_fifo_len)) { | ||
2242 | /* 'shortest' is full and i-th queue is not */ | ||
2243 | want = 1; | ||
2244 | } | ||
2245 | else if(est_len < min_len) { | ||
2246 | /* i-th queue has shortest length */ | ||
2247 | want = 1; | ||
2248 | } | ||
2249 | else if(unlikely(est_len == min_len)) { | ||
2250 | /* equal lengths */ | ||
2251 | if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) { | ||
2252 | /* don't sacrifice affinity on tie */ | ||
2253 | if(has_affinity(aff, t, i)) { | ||
2254 | /* switch to maintain affinity */ | ||
2255 | want = 1; | ||
2256 | } | ||
2257 | else if(*(aff->q_info[i].nr_aff_users) < | ||
2258 | min_nr_aff_users) { | ||
2259 | /* favor one with less affinity load */ | ||
2260 | want = 1; | ||
2261 | } | ||
2262 | else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */ | ||
2263 | (*(aff->q_info[i].nr_cur_users) < min_nr_users)) { /* favor one with current fewer users */ | ||
2264 | want = 1; | ||
2265 | } | ||
2266 | } | ||
2267 | } | ||
2268 | |||
2269 | if(want) { | ||
2270 | shortest = &aff->q_info[i]; | ||
2271 | min_len = est_len; | ||
2272 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
2273 | min_nr_aff_users = *(aff->q_info[i].nr_aff_users); | ||
2274 | } | ||
2275 | |||
2276 | TRACE_CUR("cs is %llu on queue %d (count = %u): " | ||
2277 | "est len = %llu\n", | ||
2278 | get_gpu_estimate(t, | ||
2279 | gpu_migration_distance(tsk_rt(t)->last_gpu, | ||
2280 | replica_to_gpu(aff, i))), | ||
2281 | ikglp_get_idx(sem, aff->q_info[i].q), | ||
2282 | aff->q_info[i].q->count, | ||
2283 | est_len); | ||
2284 | } | ||
2285 | else { | ||
2286 | TRACE_CUR("queue %d is too long. ineligible for enqueue.\n", | ||
2287 | ikglp_get_idx(sem, aff->q_info[i].q)); | ||
2288 | } | ||
2289 | } | ||
2290 | } | ||
2291 | |||
2292 | if(nominal_fq_len(shortest->q) >= max_fifo_len) { | ||
2293 | TRACE_CUR("selected fq %d is too long, but returning it anyway.\n", | ||
2294 | ikglp_get_idx(sem, shortest->q)); | ||
2295 | } | ||
2296 | |||
2297 | to_enqueue = shortest->q; | ||
2298 | TRACE_CUR("enqueue on fq %d (count = %u) (non-aff wanted fq %d)\n", | ||
2299 | ikglp_get_idx(sem, to_enqueue), | ||
2300 | to_enqueue->count, | ||
2301 | ikglp_get_idx(sem, sem->shortest_fifo_queue)); | ||
2302 | |||
2303 | return to_enqueue; | ||
2304 | } | ||
2305 | |||
2306 | |||
2307 | static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff, | ||
2308 | int dest_gpu, | ||
2309 | struct fifo_queue* fq) | ||
2310 | { | ||
2311 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2312 | ikglp_wait_state_t *wait = NULL; | ||
2313 | int max_improvement = -(MIG_NONE+1); | ||
2314 | int replica = ikglp_get_idx(sem, fq); | ||
2315 | |||
2316 | if(waitqueue_active(&fq->wait)) { | ||
2317 | int this_gpu = replica_to_gpu(aff, replica); | ||
2318 | struct list_head *pos; | ||
2319 | |||
2320 | list_for_each(pos, &fq->wait.task_list) { | ||
2321 | wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list); | ||
2322 | ikglp_wait_state_t *tmp_wait = | ||
2323 | container_of(fq_wait, ikglp_wait_state_t, fq_node); | ||
2324 | |||
2325 | int tmp_improvement = | ||
2326 | gpu_migration_distance(this_gpu, | ||
2327 | tsk_rt(tmp_wait->task)->last_gpu) - | ||
2328 | gpu_migration_distance(dest_gpu, | ||
2329 | tsk_rt(tmp_wait->task)->last_gpu); | ||
2330 | |||
2331 | if(tmp_improvement > max_improvement) { | ||
2332 | wait = tmp_wait; | ||
2333 | max_improvement = tmp_improvement; | ||
2334 | |||
2335 | if(max_improvement >= (MIG_NONE-1)) { | ||
2336 | goto out; | ||
2337 | } | ||
2338 | } | ||
2339 | } | ||
2340 | |||
2341 | BUG_ON(!wait); | ||
2342 | } | ||
2343 | else { | ||
2344 | TRACE_CUR("fq %d is empty!\n", replica); | ||
2345 | } | ||
2346 | |||
2347 | out: | ||
2348 | |||
2349 | TRACE_CUR("Candidate victim from fq %d is %s/%d. aff improvement = %d.\n", | ||
2350 | replica, | ||
2351 | (wait) ? wait->task->comm : "null", | ||
2352 | (wait) ? wait->task->pid : 0, | ||
2353 | max_improvement); | ||
2354 | |||
2355 | return wait; | ||
2356 | } | ||
2357 | |||
2358 | |||
2359 | ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff, | ||
2360 | struct fifo_queue* dst) | ||
2361 | { | ||
2362 | /* Huristic strategy: Find task with greatest improvement in affinity. */ | ||
2363 | |||
2364 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2365 | ikglp_wait_state_t *to_steal_state = NULL; | ||
2366 | int max_improvement = -(MIG_NONE+1); | ||
2367 | int replica, i; | ||
2368 | int dest_gpu; | ||
2369 | |||
2370 | replica = ikglp_get_idx(sem, dst); | ||
2371 | dest_gpu = replica_to_gpu(aff, replica); | ||
2372 | |||
2373 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
2374 | ikglp_wait_state_t *tmp_to_steal_state = | ||
2375 | pick_steal(aff, dest_gpu, &sem->fifo_queues[i]); | ||
2376 | |||
2377 | if(tmp_to_steal_state) { | ||
2378 | int tmp_improvement = | ||
2379 | gpu_migration_distance(replica_to_gpu(aff, i), | ||
2380 | tsk_rt(tmp_to_steal_state->task)->last_gpu) - | ||
2381 | gpu_migration_distance(dest_gpu, | ||
2382 | tsk_rt(tmp_to_steal_state->task)->last_gpu); | ||
2383 | |||
2384 | if(tmp_improvement > max_improvement) { | ||
2385 | to_steal_state = tmp_to_steal_state; | ||
2386 | max_improvement = tmp_improvement; | ||
2387 | |||
2388 | if(max_improvement >= (MIG_NONE-1)) { | ||
2389 | goto out; | ||
2390 | } | ||
2391 | } | ||
2392 | } | ||
2393 | } | ||
2394 | |||
2395 | out: | ||
2396 | if(!to_steal_state) { | ||
2397 | TRACE_CUR("Could not find anyone to steal.\n"); | ||
2398 | } | ||
2399 | else { | ||
2400 | TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d " | ||
2401 | "(GPU %d): improvement = %d\n", | ||
2402 | to_steal_state->task->comm, to_steal_state->task->pid, | ||
2403 | ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq), | ||
2404 | replica_to_gpu(aff, | ||
2405 | ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)), | ||
2406 | ikglp_get_idx(sem, dst), | ||
2407 | dest_gpu, | ||
2408 | max_improvement); | ||
2409 | } | ||
2410 | |||
2411 | return(to_steal_state); | ||
2412 | } | ||
2413 | |||
2414 | |||
2415 | static inline int has_donor(wait_queue_t* fq_wait) | ||
2416 | { | ||
2417 | ikglp_wait_state_t *wait = | ||
2418 | container_of(fq_wait, ikglp_wait_state_t, fq_node); | ||
2419 | return(wait->donee_heap_node.donor_info != NULL); | ||
2420 | } | ||
2421 | |||
2422 | static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff, | ||
2423 | struct fifo_queue* fq, | ||
2424 | int* dist_from_head) | ||
2425 | { | ||
2426 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2427 | struct task_struct *donee; | ||
2428 | ikglp_donee_heap_node_t *donee_node; | ||
2429 | struct task_struct *mth_highest = ikglp_mth_highest(sem); | ||
2430 | |||
2431 | if(fq->owner && | ||
2432 | fq->donee_heap_node.donor_info == NULL && | ||
2433 | mth_highest != fq->owner && | ||
2434 | litmus->__compare(mth_highest, BASE, fq->owner, BASE)) { | ||
2435 | donee = fq->owner; | ||
2436 | donee_node = &(fq->donee_heap_node); | ||
2437 | *dist_from_head = 0; | ||
2438 | |||
2439 | BUG_ON(donee != donee_node->task); | ||
2440 | |||
2441 | TRACE_CUR("picked owner of fq %d as donee\n", | ||
2442 | ikglp_get_idx(sem, fq)); | ||
2443 | |||
2444 | goto out; | ||
2445 | } | ||
2446 | else if(waitqueue_active(&fq->wait)) { | ||
2447 | struct list_head *pos; | ||
2448 | |||
2449 | TRACE_CUR("searching fq %d for donee\n", ikglp_get_idx(sem, fq)); | ||
2450 | |||
2451 | *dist_from_head = 1; | ||
2452 | |||
2453 | /* iterating from the start of the queue is nice since this means | ||
2454 | the donee will be closer to obtaining a resource. */ | ||
2455 | list_for_each(pos, &fq->wait.task_list) { | ||
2456 | wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list); | ||
2457 | ikglp_wait_state_t *wait = | ||
2458 | container_of(fq_wait, ikglp_wait_state_t, fq_node); | ||
2459 | |||
2460 | if(!has_donor(fq_wait) && | ||
2461 | mth_highest != wait->task && | ||
2462 | litmus->__compare(mth_highest, BASE, wait->task, BASE)) { | ||
2463 | donee = (struct task_struct*) fq_wait->private; | ||
2464 | donee_node = &wait->donee_heap_node; | ||
2465 | |||
2466 | BUG_ON(donee != donee_node->task); | ||
2467 | |||
2468 | TRACE_CUR("picked waiter in fq %d as donee\n", | ||
2469 | ikglp_get_idx(sem, fq)); | ||
2470 | |||
2471 | goto out; | ||
2472 | } | ||
2473 | ++(*dist_from_head); | ||
2474 | } | ||
2475 | } | ||
2476 | |||
2477 | donee = NULL; | ||
2478 | donee_node = NULL; | ||
2479 | *dist_from_head = IKGLP_INVAL_DISTANCE; | ||
2480 | |||
2481 | TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq)); | ||
2482 | |||
2483 | out: | ||
2484 | |||
2485 | TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n", | ||
2486 | ikglp_get_idx(sem, fq), | ||
2487 | (donee) ? (donee)->comm : "null", | ||
2488 | (donee) ? (donee)->pid : 0, | ||
2489 | *dist_from_head); | ||
2490 | |||
2491 | return donee_node; | ||
2492 | } | ||
2493 | |||
2494 | ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection( | ||
2495 | struct ikglp_affinity* aff, | ||
2496 | struct task_struct* donor) | ||
2497 | { | ||
2498 | // Huristic strategy: Find the highest-priority donee that is waiting on | ||
2499 | // a queue closest to our affinity. (1) The donee CANNOT already have a | ||
2500 | // donor (exception: donee is the lowest-prio task in the donee heap). | ||
2501 | // (2) Requests in 'top_m' heap are ineligible. | ||
2502 | // | ||
2503 | // Further strategy: amongst elible donees waiting for the same GPU, pick | ||
2504 | // the one closest to the head of the FIFO queue (including owners). | ||
2505 | |||
2506 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2507 | ikglp_donee_heap_node_t *donee_node; | ||
2508 | gpu_migration_dist_t distance; | ||
2509 | int start, i, j; | ||
2510 | |||
2511 | ikglp_donee_heap_node_t *default_donee; | ||
2512 | ikglp_wait_state_t *default_donee_donor_info; | ||
2513 | |||
2514 | if(tsk_rt(donor)->last_gpu < 0) { | ||
2515 | /* no affinity. just return the min prio, like standard IKGLP */ | ||
2516 | /* TODO: Find something closer to the head of the queue?? */ | ||
2517 | donee_node = binheap_top_entry(&sem->donees, | ||
2518 | ikglp_donee_heap_node_t, | ||
2519 | node); | ||
2520 | goto out; | ||
2521 | } | ||
2522 | |||
2523 | |||
2524 | // Temporarily break any donation relation the default donee (the lowest | ||
2525 | // prio task in the FIFO queues) to make it eligible for selection below. | ||
2526 | // | ||
2527 | // NOTE: The original donor relation *must* be restored, even if we select | ||
2528 | // the default donee throug affinity-aware selection, before returning | ||
2529 | // from this function so we don't screw up our heap ordering. | ||
2530 | // The standard IKGLP algorithm will steal the donor relationship if needed. | ||
2531 | default_donee = | ||
2532 | binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
2533 | |||
2534 | default_donee_donor_info = default_donee->donor_info; // back-up donor relation | ||
2535 | default_donee->donor_info = NULL; // temporarily break any donor relation. | ||
2536 | |||
2537 | // initialize our search | ||
2538 | donee_node = NULL; | ||
2539 | distance = MIG_NONE; | ||
2540 | |||
2541 | // TODO: The below search logic may work well for locating nodes to steal | ||
2542 | // when an FQ goes idle. Validate this code and apply it to stealing. | ||
2543 | |||
2544 | // begin search with affinity GPU. | ||
2545 | start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu); | ||
2546 | i = start; | ||
2547 | do { // "for each gpu" / "for each aff->nr_rsrc" | ||
2548 | gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i); | ||
2549 | |||
2550 | // only interested in queues that will improve our distance | ||
2551 | if(temp_distance < distance || donee_node == NULL) { | ||
2552 | int dist_from_head = IKGLP_INVAL_DISTANCE; | ||
2553 | |||
2554 | TRACE_CUR("searching for donor on GPU %d\n", i); | ||
2555 | |||
2556 | // visit each queue and pick a donee. bail as soon as we find | ||
2557 | // one for this class. | ||
2558 | |||
2559 | for(j = 0; j < aff->nr_simult; ++j) { | ||
2560 | int temp_dist_from_head; | ||
2561 | ikglp_donee_heap_node_t *temp_donee_node; | ||
2562 | struct fifo_queue *fq; | ||
2563 | |||
2564 | fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]); | ||
2565 | temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head); | ||
2566 | |||
2567 | if(temp_dist_from_head < dist_from_head) | ||
2568 | { | ||
2569 | // we check all the FQs for this GPU to spread priorities | ||
2570 | // out across the queues. does this decrease jitter? | ||
2571 | donee_node = temp_donee_node; | ||
2572 | dist_from_head = temp_dist_from_head; | ||
2573 | } | ||
2574 | } | ||
2575 | |||
2576 | if(dist_from_head != IKGLP_INVAL_DISTANCE) { | ||
2577 | TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n", | ||
2578 | donee_node->task->comm, donee_node->task->pid, | ||
2579 | dist_from_head); | ||
2580 | } | ||
2581 | else { | ||
2582 | TRACE_CUR("found no eligible donors from GPU %d\n", i); | ||
2583 | } | ||
2584 | } | ||
2585 | else { | ||
2586 | TRACE_CUR("skipping GPU %d (distance = %d, best donor " | ||
2587 | "distance = %d)\n", i, temp_distance, distance); | ||
2588 | } | ||
2589 | |||
2590 | i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around | ||
2591 | } while (i != start); | ||
2592 | |||
2593 | |||
2594 | /* restore old donor info state. */ | ||
2595 | default_donee->donor_info = default_donee_donor_info; | ||
2596 | |||
2597 | if(!donee_node) { | ||
2598 | donee_node = default_donee; | ||
2599 | |||
2600 | TRACE_CUR("Could not find a donee. We have to steal one.\n"); | ||
2601 | // TODO: vv Is the below a bug when raised? | ||
2602 | //WARN_ON(default_donee->donor_info == NULL); | ||
2603 | } | ||
2604 | |||
2605 | out: | ||
2606 | |||
2607 | TRACE_CUR("Selected donee %s/%d on fq %d " | ||
2608 | "(GPU %d) for %s/%d with affinity for GPU %d\n", | ||
2609 | donee_node->task->comm, donee_node->task->pid, | ||
2610 | ikglp_get_idx(sem, donee_node->fq), | ||
2611 | replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)), | ||
2612 | donor->comm, donor->pid, tsk_rt(donor)->last_gpu); | ||
2613 | |||
2614 | return(donee_node); | ||
2615 | } | ||
2616 | |||
2617 | |||
2618 | |||
2619 | static void __find_closest_donor(int target_gpu, | ||
2620 | struct binheap_node* donor_node, | ||
2621 | ikglp_wait_state_t** cur_closest, | ||
2622 | int* cur_dist) | ||
2623 | { | ||
2624 | ikglp_wait_state_t *this_donor = | ||
2625 | binheap_entry(donor_node, ikglp_wait_state_t, node); | ||
2626 | |||
2627 | int this_dist = | ||
2628 | gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu); | ||
2629 | |||
2630 | if(this_dist < *cur_dist) { | ||
2631 | // take this donor | ||
2632 | *cur_dist = this_dist; | ||
2633 | *cur_closest = this_donor; | ||
2634 | } | ||
2635 | else if(this_dist == *cur_dist) { | ||
2636 | // priority tie-break. Even though this is a pre-order traversal, | ||
2637 | // this is a heap, not a binary tree, so we still need to do a priority | ||
2638 | // comparision. | ||
2639 | if(!(*cur_closest) || | ||
2640 | litmus->compare(this_donor->task, (*cur_closest)->task)) { | ||
2641 | *cur_dist = this_dist; | ||
2642 | *cur_closest = this_donor; | ||
2643 | } | ||
2644 | } | ||
2645 | |||
2646 | if(donor_node->left) | ||
2647 | __find_closest_donor(target_gpu, donor_node->left, | ||
2648 | cur_closest, cur_dist); | ||
2649 | |||
2650 | if(donor_node->right) | ||
2651 | __find_closest_donor(target_gpu, donor_node->right, | ||
2652 | cur_closest, cur_dist); | ||
2653 | } | ||
2654 | |||
2655 | ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, | ||
2656 | struct fifo_queue* fq) | ||
2657 | { | ||
2658 | // Huristic strategy: Find donor with the closest affinity to fq. | ||
2659 | // Tie-break on priority. | ||
2660 | |||
2661 | // We need to iterate over all the donors to do this. Unfortunatly, | ||
2662 | // our donors are organized in a heap. We'll visit each node with a | ||
2663 | // recurisve call. This is realitively safe since there are only sem->m | ||
2664 | // donors, at most. We won't recurse too deeply to have to worry about | ||
2665 | // our stack. (even with 128 CPUs, our nest depth is at most 7 deep). | ||
2666 | |||
2667 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2668 | ikglp_wait_state_t *donor = NULL; | ||
2669 | int distance = MIG_NONE; | ||
2670 | int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq)); | ||
2671 | |||
2672 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
2673 | ikglp_wait_state_t* default_donor = | ||
2674 | binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
2675 | #endif | ||
2676 | |||
2677 | __find_closest_donor(gpu, sem->donors.root, &donor, &distance); | ||
2678 | |||
2679 | TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d " | ||
2680 | "(non-aff wanted %s/%d). differs = %d\n", | ||
2681 | donor->task->comm, donor->task->pid, | ||
2682 | distance, | ||
2683 | ikglp_get_idx(sem, fq), | ||
2684 | default_donor->task->comm, default_donor->task->pid, | ||
2685 | (donor->task != default_donor->task) | ||
2686 | ); | ||
2687 | |||
2688 | return(donor); | ||
2689 | } | ||
2690 | |||
2691 | |||
2692 | |||
2693 | void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, | ||
2694 | struct fifo_queue* fq, struct task_struct* t) | ||
2695 | { | ||
2696 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2697 | int replica = ikglp_get_idx(sem, fq); | ||
2698 | int gpu = replica_to_gpu(aff, replica); | ||
2699 | struct ikglp_queue_info *info = &aff->q_info[replica]; | ||
2700 | lt_t est_time; | ||
2701 | lt_t est_len_before; | ||
2702 | |||
2703 | if(current == t) | ||
2704 | tsk_rt(t)->suspend_gpu_tracker_on_block = 1; | ||
2705 | |||
2706 | est_len_before = info->estimated_len; | ||
2707 | est_time = get_gpu_estimate(t, | ||
2708 | gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
2709 | info->estimated_len += est_time; | ||
2710 | |||
2711 | TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n", | ||
2712 | ikglp_get_idx(sem, info->q), | ||
2713 | est_len_before, est_time, | ||
2714 | info->estimated_len); | ||
2715 | } | ||
2716 | |||
2717 | void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, | ||
2718 | struct task_struct* t) | ||
2719 | { | ||
2720 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2721 | int replica = ikglp_get_idx(sem, fq); | ||
2722 | int gpu = replica_to_gpu(aff, replica); | ||
2723 | struct ikglp_queue_info *info = &aff->q_info[replica]; | ||
2724 | lt_t est_time = get_gpu_estimate(t, | ||
2725 | gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
2726 | |||
2727 | if(est_time > info->estimated_len) { | ||
2728 | WARN_ON(1); | ||
2729 | info->estimated_len = 0; | ||
2730 | } | ||
2731 | else { | ||
2732 | info->estimated_len -= est_time; | ||
2733 | } | ||
2734 | |||
2735 | TRACE_CUR("fq %d est len is now %llu\n", | ||
2736 | ikglp_get_idx(sem, info->q), | ||
2737 | info->estimated_len); | ||
2738 | } | ||
2739 | |||
2740 | int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t) | ||
2741 | { | ||
2742 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2743 | unsigned long flags = 0, more_flags; | ||
2744 | int aff_rsrc; | ||
2745 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2746 | raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t); | ||
2747 | #endif | ||
2748 | |||
2749 | if (tsk_rt(t)->last_gpu < 0) | ||
2750 | return 0; | ||
2751 | |||
2752 | lock_global_irqsave(dgl_lock, flags); | ||
2753 | raw_spin_lock_irqsave(&sem->real_lock, more_flags); | ||
2754 | lock_fine_irqsave(&sem->lock, flags); | ||
2755 | |||
2756 | /* decrement affinity count on old GPU */ | ||
2757 | aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; | ||
2758 | --(aff->nr_aff_on_rsrc[aff_rsrc]); | ||
2759 | |||
2760 | if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { | ||
2761 | WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); | ||
2762 | aff->nr_aff_on_rsrc[aff_rsrc] = 0; | ||
2763 | } | ||
2764 | |||
2765 | unlock_fine_irqrestore(&sem->lock, flags); | ||
2766 | raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); | ||
2767 | unlock_global_irqrestore(dgl_lock, flags); | ||
2768 | |||
2769 | return 0; | ||
2770 | } | ||
2771 | |||
2772 | int gpu_ikglp_notify_exit_trampoline(struct task_struct* t) | ||
2773 | { | ||
2774 | struct ikglp_affinity* aff = | ||
2775 | (struct ikglp_affinity*)tsk_rt(t)->rsrc_exit_cb_args; | ||
2776 | if(likely(aff)) | ||
2777 | return gpu_ikglp_notify_exit(aff, t); | ||
2778 | else | ||
2779 | return -1; | ||
2780 | } | ||
2781 | |||
2782 | void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, | ||
2783 | struct fifo_queue* fq, | ||
2784 | struct task_struct* t) | ||
2785 | { | ||
2786 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2787 | int replica = ikglp_get_idx(sem, fq); | ||
2788 | int gpu = replica_to_gpu(aff, replica); | ||
2789 | int last_gpu = tsk_rt(t)->last_gpu; | ||
2790 | |||
2791 | /* record the type of migration */ | ||
2792 | tsk_rt(t)->gpu_migration = gpu_migration_distance(last_gpu, gpu); | ||
2793 | |||
2794 | TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", | ||
2795 | t->comm, t->pid, gpu, last_gpu, tsk_rt(t)->gpu_migration); | ||
2796 | |||
2797 | /* count the number or resource holders */ | ||
2798 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
2799 | |||
2800 | if(gpu != last_gpu) { | ||
2801 | if(last_gpu >= 0) { | ||
2802 | int old_rsrc = last_gpu - aff->offset; | ||
2803 | --(aff->nr_aff_on_rsrc[old_rsrc]); | ||
2804 | } | ||
2805 | |||
2806 | /* increment affinity count on new GPU */ | ||
2807 | ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); | ||
2808 | tsk_rt(t)->rsrc_exit_cb_args = aff; | ||
2809 | tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; | ||
2810 | } | ||
2811 | |||
2812 | reg_nv_device(gpu, 1, t); /* register */ | ||
2813 | |||
2814 | tsk_rt(t)->suspend_gpu_tracker_on_block = 0; | ||
2815 | reset_gpu_tracker(t); | ||
2816 | start_gpu_tracker(t); | ||
2817 | } | ||
2818 | |||
2819 | void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, | ||
2820 | struct fifo_queue* fq, | ||
2821 | struct task_struct* t) | ||
2822 | { | ||
2823 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2824 | int replica = ikglp_get_idx(sem, fq); | ||
2825 | int gpu = replica_to_gpu(aff, replica); | ||
2826 | lt_t est_time; | ||
2827 | |||
2828 | stop_gpu_tracker(t); /* stop the tracker before we do anything else. */ | ||
2829 | |||
2830 | est_time = get_gpu_estimate(t, | ||
2831 | gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
2832 | |||
2833 | /* count the number or resource holders */ | ||
2834 | --(*(aff->q_info[replica].nr_cur_users)); | ||
2835 | |||
2836 | reg_nv_device(gpu, 0, t); /* unregister */ | ||
2837 | |||
2838 | /* update estimates */ | ||
2839 | update_gpu_estimate(t, get_gpu_time(t)); | ||
2840 | |||
2841 | TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. " | ||
2842 | "actual time was %llu. " | ||
2843 | "estimated was %llu. " | ||
2844 | "diff is %d\n", | ||
2845 | t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, | ||
2846 | tsk_rt(t)->gpu_migration, | ||
2847 | get_gpu_time(t), | ||
2848 | est_time, | ||
2849 | (long long)get_gpu_time(t) - (long long)est_time); | ||
2850 | |||
2851 | tsk_rt(t)->last_gpu = gpu; | ||
2852 | } | ||
2853 | |||
2854 | struct ikglp_affinity_ops gpu_ikglp_affinity = | ||
2855 | { | ||
2856 | .advise_enqueue = gpu_ikglp_advise_enqueue, | ||
2857 | .advise_steal = gpu_ikglp_advise_steal, | ||
2858 | .advise_donee_selection = gpu_ikglp_advise_donee_selection, | ||
2859 | .advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq, | ||
2860 | |||
2861 | .notify_enqueue = gpu_ikglp_notify_enqueue, | ||
2862 | .notify_dequeue = gpu_ikglp_notify_dequeue, | ||
2863 | .notify_acquired = gpu_ikglp_notify_acquired, | ||
2864 | .notify_freed = gpu_ikglp_notify_freed, | ||
2865 | |||
2866 | .notify_exit = gpu_ikglp_notify_exit, | ||
2867 | |||
2868 | .replica_to_resource = gpu_replica_to_resource, | ||
2869 | }; | ||
2870 | |||
2871 | struct affinity_observer* ikglp_gpu_aff_obs_new( | ||
2872 | struct affinity_observer_ops* ops, | ||
2873 | void* __user args) | ||
2874 | { | ||
2875 | return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args); | ||
2876 | } | ||
2877 | |||
2878 | |||
2879 | |||
2880 | |||
2881 | /*--------------------------------------------------------------------------*/ | ||
2882 | /* SIMPLE LOAD-BALANCING AFFINITY HEURISTIC */ | ||
2883 | /*--------------------------------------------------------------------------*/ | ||
2884 | |||
2885 | struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, | ||
2886 | struct task_struct* t) | ||
2887 | { | ||
2888 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2889 | unsigned int min_count; | ||
2890 | unsigned int min_nr_users; | ||
2891 | struct ikglp_queue_info *shortest; | ||
2892 | struct fifo_queue *to_enqueue; | ||
2893 | unsigned int i; | ||
2894 | |||
2895 | shortest = &aff->q_info[0]; | ||
2896 | min_count = shortest->q->count; | ||
2897 | min_nr_users = *(shortest->nr_cur_users); | ||
2898 | |||
2899 | TRACE_CUR("queue %d: waiters = %u, total holders = %u\n", | ||
2900 | ikglp_get_idx(sem, shortest->q), | ||
2901 | shortest->q->count, | ||
2902 | min_nr_users); | ||
2903 | |||
2904 | for(i = 1; i < sem->nr_replicas; ++i) { | ||
2905 | unsigned int len = aff->q_info[i].q->count; | ||
2906 | |||
2907 | // queue is smaller, or they're equal and the other has a smaller number | ||
2908 | // of total users. | ||
2909 | // | ||
2910 | // tie-break on the shortest number of simult users. this only kicks in | ||
2911 | // when there are more than 1 empty queues. | ||
2912 | if((len < min_count) || | ||
2913 | ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | ||
2914 | shortest = &aff->q_info[i]; | ||
2915 | min_count = shortest->q->count; | ||
2916 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
2917 | } | ||
2918 | |||
2919 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
2920 | ikglp_get_idx(sem, aff->q_info[i].q), | ||
2921 | aff->q_info[i].q->count, | ||
2922 | *(aff->q_info[i].nr_cur_users)); | ||
2923 | } | ||
2924 | |||
2925 | to_enqueue = shortest->q; | ||
2926 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", | ||
2927 | ikglp_get_idx(sem, to_enqueue), | ||
2928 | ikglp_get_idx(sem, sem->shortest_fifo_queue)); | ||
2929 | |||
2930 | return to_enqueue; | ||
2931 | } | ||
2932 | |||
2933 | ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff, | ||
2934 | struct fifo_queue* dst) | ||
2935 | { | ||
2936 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2937 | return ikglp_find_hp_waiter_to_steal(sem, NULL); | ||
2938 | } | ||
2939 | |||
2940 | ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection( | ||
2941 | struct ikglp_affinity* aff, struct task_struct* donor) | ||
2942 | { | ||
2943 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2944 | ikglp_donee_heap_node_t *donee = | ||
2945 | binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
2946 | return(donee); | ||
2947 | } | ||
2948 | |||
2949 | ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq( | ||
2950 | struct ikglp_affinity* aff, struct fifo_queue* fq) | ||
2951 | { | ||
2952 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2953 | ikglp_wait_state_t* donor = | ||
2954 | binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
2955 | return(donor); | ||
2956 | } | ||
2957 | |||
2958 | void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, | ||
2959 | struct fifo_queue* fq, struct task_struct* t) | ||
2960 | { | ||
2961 | } | ||
2962 | |||
2963 | void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, | ||
2964 | struct fifo_queue* fq, struct task_struct* t) | ||
2965 | { | ||
2966 | } | ||
2967 | |||
2968 | void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, | ||
2969 | struct fifo_queue* fq, struct task_struct* t) | ||
2970 | { | ||
2971 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2972 | int replica = ikglp_get_idx(sem, fq); | ||
2973 | int gpu = replica_to_gpu(aff, replica); | ||
2974 | |||
2975 | /* count the number or resource holders */ | ||
2976 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
2977 | |||
2978 | reg_nv_device(gpu, 1, t); /* register */ | ||
2979 | } | ||
2980 | |||
2981 | void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, | ||
2982 | struct fifo_queue* fq, struct task_struct* t) | ||
2983 | { | ||
2984 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2985 | int replica = ikglp_get_idx(sem, fq); | ||
2986 | int gpu = replica_to_gpu(aff, replica); | ||
2987 | |||
2988 | /* count the number or resource holders */ | ||
2989 | --(*(aff->q_info[replica].nr_cur_users)); | ||
2990 | |||
2991 | reg_nv_device(gpu, 0, t); /* unregister */ | ||
2992 | } | ||
2993 | |||
2994 | struct ikglp_affinity_ops simple_gpu_ikglp_affinity = | ||
2995 | { | ||
2996 | .advise_enqueue = simple_gpu_ikglp_advise_enqueue, | ||
2997 | .advise_steal = simple_gpu_ikglp_advise_steal, | ||
2998 | .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection, | ||
2999 | .advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq, | ||
3000 | |||
3001 | .notify_enqueue = simple_gpu_ikglp_notify_enqueue, | ||
3002 | .notify_dequeue = simple_gpu_ikglp_notify_dequeue, | ||
3003 | .notify_acquired = simple_gpu_ikglp_notify_acquired, | ||
3004 | .notify_freed = simple_gpu_ikglp_notify_freed, | ||
3005 | |||
3006 | .notify_exit = NULL, | ||
3007 | |||
3008 | .replica_to_resource = gpu_replica_to_resource, | ||
3009 | }; | ||
3010 | |||
3011 | struct affinity_observer* ikglp_simple_gpu_aff_obs_new( | ||
3012 | struct affinity_observer_ops* ops, | ||
3013 | void* __user args) | ||
3014 | { | ||
3015 | return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args); | ||
3016 | } | ||
3017 | #endif /* end LITMUS_AFFINITY_LOCKING && LITMUS_NVIDIA */ | ||
3018 | |||
3019 | #if 0 | ||
3020 | /* debugging routines */ | ||
3021 | |||
3022 | static void __ikglp_dump_pq(struct binheap_node *n, int depth) | ||
3023 | { | ||
3024 | ikglp_heap_node_t *request; | ||
3025 | char padding[81] = " "; | ||
3026 | |||
3027 | if(n == NULL) { | ||
3028 | TRACE("+-> %p\n", NULL); | ||
3029 | return; | ||
3030 | } | ||
3031 | |||
3032 | request = binheap_entry(n, ikglp_heap_node_t, node); | ||
3033 | |||
3034 | if(depth*2 <= 80) | ||
3035 | padding[depth*2] = '\0'; | ||
3036 | |||
3037 | |||
3038 | TRACE("%s+-> %s/%d\n", | ||
3039 | padding, | ||
3040 | request->task->comm, | ||
3041 | request->task->pid); | ||
3042 | |||
3043 | if(n->left) __ikglp_dump_pq(n->left, depth+1); | ||
3044 | if(n->right) __ikglp_dump_pq(n->right, depth+1); | ||
3045 | } | ||
3046 | |||
3047 | static void __ikglp_dump_donors(struct binheap_node *n, int depth) | ||
3048 | { | ||
3049 | ikglp_wait_state_t *donor_node; | ||
3050 | char padding[81] = " "; | ||
3051 | |||
3052 | if(n == NULL) { | ||
3053 | TRACE("+-> %p\n", NULL); | ||
3054 | return; | ||
3055 | } | ||
3056 | |||
3057 | donor_node = binheap_entry(n, ikglp_wait_state_t, node); | ||
3058 | |||
3059 | if(depth*2 <= 80) | ||
3060 | padding[depth*2] = '\0'; | ||
3061 | |||
3062 | |||
3063 | TRACE("%s+-> %s/%d (donee: %s/%d)\n", | ||
3064 | padding, | ||
3065 | donor_node->task->comm, | ||
3066 | donor_node->task->pid, | ||
3067 | donor_node->donee_info->task->comm, | ||
3068 | donor_node->donee_info->task->pid); | ||
3069 | |||
3070 | if(n->left) __ikglp_dump_donors(n->left, depth+1); | ||
3071 | if(n->right) __ikglp_dump_donors(n->right, depth+1); | ||
3072 | } | ||
3073 | |||
3074 | static void __ikglp_dump_fifoq(int i, struct fifo_queue* fq) | ||
3075 | { | ||
3076 | TRACE(" FIFO %d: Owner = %s/%d (Virtually Unlocked = %u), HP Waiter = %s/%d, Length = %u\n", | ||
3077 | i, | ||
3078 | (fq->owner) ? fq->owner->comm : "null", | ||
3079 | (fq->owner) ? fq->owner->pid : 0, | ||
3080 | fq->is_vunlocked, | ||
3081 | (fq->hp_waiter) ? fq->hp_waiter->comm : "null", | ||
3082 | (fq->hp_waiter) ? fq->hp_waiter->pid : 0, | ||
3083 | fq->count); | ||
3084 | if (waitqueue_active(&fq->wait)) { | ||
3085 | struct list_head *pos; | ||
3086 | list_for_each(pos, &fq->wait.task_list) { | ||
3087 | wait_queue_t *q = list_entry(pos, wait_queue_t, task_list); | ||
3088 | struct task_struct *t = (struct task_struct*) q->private; | ||
3089 | TRACE(" %s/%d (effective priority: %s/%d)\n", | ||
3090 | t->comm, t->pid, | ||
3091 | (tsk_rt(t)->inh_task) ? tsk_rt(t)->inh_task->comm : "null", | ||
3092 | (tsk_rt(t)->inh_task) ? tsk_rt(t)->inh_task->pid : 0); | ||
3093 | } | ||
3094 | } | ||
3095 | } | ||
3096 | |||
3097 | __attribute__ ((unused)) | ||
3098 | static void __ikglp_dump_state(struct ikglp_semaphore *sem) | ||
3099 | { | ||
3100 | int i; | ||
3101 | TRACE("IKGLP Lock %d\n", sem->litmus_lock.ident); | ||
3102 | TRACE("# Replicas: %u Max FIFO Len: %u Max in FIFOs: %u Cur # in FIFOs: %u\n", | ||
3103 | sem->nr_replicas, sem->max_fifo_len, sem->max_in_fifos, sem->nr_in_fifos); | ||
3104 | TRACE("# requests in top-m: %u\n", sem->top_m_size); | ||
3105 | |||
3106 | for (i = 0; i < sem->nr_replicas; ++i) | ||
3107 | __ikglp_dump_fifoq(i, &sem->fifo_queues[i]); | ||
3108 | |||
3109 | TRACE(" PQ:\n"); | ||
3110 | __ikglp_dump_pq(sem->priority_queue.root, 1); | ||
3111 | |||
3112 | TRACE(" Donors:\n"); | ||
3113 | __ikglp_dump_donors(sem->donors.root, 1); | ||
3114 | } | ||
3115 | |||
3116 | static void print_global_list(struct binheap_node* n, int depth) | ||
3117 | { | ||
3118 | ikglp_heap_node_t *global_heap_node; | ||
3119 | char padding[81] = " "; | ||
3120 | |||
3121 | if(n == NULL) { | ||
3122 | TRACE_CUR("+-> %p\n", NULL); | ||
3123 | return; | ||
3124 | } | ||
3125 | |||
3126 | global_heap_node = binheap_entry(n, ikglp_heap_node_t, node); | ||
3127 | |||
3128 | if(depth*2 <= 80) | ||
3129 | padding[depth*2] = '\0'; | ||
3130 | |||
3131 | TRACE_CUR("%s+-> %s/%d\n", | ||
3132 | padding, | ||
3133 | global_heap_node->task->comm, | ||
3134 | global_heap_node->task->pid); | ||
3135 | |||
3136 | if(n->left) print_global_list(n->left, depth+1); | ||
3137 | if(n->right) print_global_list(n->right, depth+1); | ||
3138 | } | ||
3139 | |||
3140 | static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth) | ||
3141 | { | ||
3142 | ikglp_donee_heap_node_t *donee_node; | ||
3143 | char padding[81] = " "; | ||
3144 | struct task_struct* donor = NULL; | ||
3145 | |||
3146 | if(n == NULL) { | ||
3147 | TRACE_CUR("+-> %p\n", NULL); | ||
3148 | return; | ||
3149 | } | ||
3150 | |||
3151 | donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node); | ||
3152 | |||
3153 | if(depth*2 <= 80) | ||
3154 | padding[depth*2] = '\0'; | ||
3155 | |||
3156 | if(donee_node->donor_info) { | ||
3157 | donor = donee_node->donor_info->task; | ||
3158 | } | ||
3159 | |||
3160 | TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n", | ||
3161 | padding, | ||
3162 | donee_node->task->comm, | ||
3163 | donee_node->task->pid, | ||
3164 | (donor) ? donor->comm : "null", | ||
3165 | (donor) ? donor->pid : 0, | ||
3166 | ikglp_get_idx(sem, donee_node->fq)); | ||
3167 | |||
3168 | if(n->left) print_donees(sem, n->left, depth+1); | ||
3169 | if(n->right) print_donees(sem, n->right, depth+1); | ||
3170 | } | ||
3171 | |||
3172 | static void print_donors(struct binheap_node *n, int depth) | ||
3173 | { | ||
3174 | ikglp_wait_state_t *donor_node; | ||
3175 | char padding[81] = " "; | ||
3176 | |||
3177 | if(n == NULL) { | ||
3178 | TRACE_CUR("+-> %p\n", NULL); | ||
3179 | return; | ||
3180 | } | ||
3181 | |||
3182 | donor_node = binheap_entry(n, ikglp_wait_state_t, node); | ||
3183 | |||
3184 | if(depth*2 <= 80) | ||
3185 | padding[depth*2] = '\0'; | ||
3186 | |||
3187 | |||
3188 | TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n", | ||
3189 | padding, | ||
3190 | donor_node->task->comm, | ||
3191 | donor_node->task->pid, | ||
3192 | donor_node->donee_info->task->comm, | ||
3193 | donor_node->donee_info->task->pid); | ||
3194 | |||
3195 | if(n->left) print_donors(n->left, depth+1); | ||
3196 | if(n->right) print_donors(n->right, depth+1); | ||
3197 | } | ||
3198 | #endif | ||
3199 | |||
3200 | #if 0 | ||
3201 | struct ikglp_proc_print_heap_args | ||
3202 | { | ||
3203 | struct ikglp_semaphore *sem; | ||
3204 | int *size; | ||
3205 | char **next; | ||
3206 | }; | ||
3207 | |||
3208 | static void __ikglp_pq_to_proc(struct binheap_node *n, void *args) | ||
3209 | { | ||
3210 | struct ikglp_proc_print_heap_args *hargs; | ||
3211 | ikglp_heap_node_t *request; | ||
3212 | int w; | ||
3213 | |||
3214 | if (!n) | ||
3215 | return; | ||
3216 | |||
3217 | hargs = (struct ikglp_proc_print_heap_args*) args; | ||
3218 | request = binheap_entry(n, ikglp_heap_node_t, node); | ||
3219 | |||
3220 | w = scnprintf(*(hargs->next), *(hargs->size), "\t%s/%d\n", | ||
3221 | request->task->comm, request->task->pid); | ||
3222 | *(hargs->size) -= w; | ||
3223 | *(hargs->next) += w; | ||
3224 | } | ||
3225 | |||
3226 | static void __ikglp_donor_to_proc(struct binheap_node *n, void *args) | ||
3227 | { | ||
3228 | struct ikglp_proc_print_heap_args *hargs; | ||
3229 | ikglp_wait_state_t *donor_node; | ||
3230 | int w; | ||
3231 | |||
3232 | if (!n) | ||
3233 | return; | ||
3234 | |||
3235 | hargs = (struct ikglp_proc_print_heap_args*) args; | ||
3236 | donor_node = binheap_entry(n, ikglp_wait_state_t, node); | ||
3237 | |||
3238 | w = scnprintf(*(hargs->next), *(hargs->size), "\t%s/%d (donee: %s/%d)\n", | ||
3239 | donor_node->task->comm, | ||
3240 | donor_node->task->pid, | ||
3241 | donor_node->donee_info->task->comm, | ||
3242 | donor_node->donee_info->task->pid); | ||
3243 | *(hargs->size) -= w; | ||
3244 | *(hargs->next) += w; | ||
3245 | } | ||
3246 | |||
3247 | |||
3248 | static int ikglp_proc_print(char *page, char **start, off_t off, int count, int *eof, void *data) | ||
3249 | { | ||
3250 | struct ikglp_semaphore *sem = ikglp_from_lock((struct litmus_lock*)data); | ||
3251 | |||
3252 | int attempts = 0; | ||
3253 | const int max_attempts = 10; | ||
3254 | int locked = 0; | ||
3255 | unsigned long flags; | ||
3256 | |||
3257 | int size = count; | ||
3258 | char *next = page; | ||
3259 | |||
3260 | struct ikglp_proc_print_heap_args heap_args = {sem, &size, &next}; | ||
3261 | |||
3262 | int w; | ||
3263 | int i; | ||
3264 | |||
3265 | while(attempts < max_attempts) | ||
3266 | { | ||
3267 | locked = raw_spin_trylock_irqsave(&sem->real_lock, flags); | ||
3268 | |||
3269 | if (unlikely(!locked)) { | ||
3270 | ++attempts; | ||
3271 | cpu_relax(); | ||
3272 | } | ||
3273 | else { | ||
3274 | break; | ||
3275 | } | ||
3276 | } | ||
3277 | |||
3278 | if (!locked) { | ||
3279 | w = scnprintf(next, size, "%s is busy.\n", sem->litmus_lock.name); | ||
3280 | size -= w; | ||
3281 | next += w; | ||
3282 | return count - size; | ||
3283 | } | ||
3284 | |||
3285 | w = scnprintf(next, size, "nr_replicas: %u\n", sem->nr_replicas); size -= w; next += w; | ||
3286 | w = scnprintf(next, size, "max_fifo_len: %u\n", sem->max_fifo_len); size -= w; next += w; | ||
3287 | w = scnprintf(next, size, "max_in_fifos: %u\n", sem->max_in_fifos); size -= w; next += w; | ||
3288 | w = scnprintf(next, size, "current nr_in_fifos: %u\n", sem->nr_in_fifos); size -= w; next += w; | ||
3289 | w = scnprintf(next, size, "nr in top-m: %u\n\n", sem->top_m_size); size -= w; next += w; | ||
3290 | |||
3291 | for (i = 0; i < sem->nr_replicas; ++i) | ||
3292 | { | ||
3293 | struct fifo_queue *fq = &sem->fifo_queues[i]; | ||
3294 | w = scnprintf(next, size, "replica %d: owner = %s/%d (Virtually Unlocked = %u), hp waiter = %s/%d, length = %u\n", | ||
3295 | i, | ||
3296 | (fq->owner) ? fq->owner->comm : "null", | ||
3297 | (fq->owner) ? fq->owner->pid : 0, | ||
3298 | fq->is_vunlocked, | ||
3299 | (fq->hp_waiter) ? fq->hp_waiter->comm : "null", | ||
3300 | (fq->hp_waiter) ? fq->hp_waiter->pid : 0, | ||
3301 | fq->count); | ||
3302 | size -= w; next += w; | ||
3303 | |||
3304 | |||
3305 | if (waitqueue_active(&fq->wait)) { | ||
3306 | struct list_head *pos; | ||
3307 | list_for_each(pos, &fq->wait.task_list) { | ||
3308 | wait_queue_t *q = list_entry(pos, wait_queue_t, task_list); | ||
3309 | struct task_struct *blocked_task = (struct task_struct*) q->private; | ||
3310 | w = scnprintf(next, size, | ||
3311 | "\t%s/%d (inh: %s/%d)\n", | ||
3312 | blocked_task->comm, blocked_task->pid, | ||
3313 | (tsk_rt(blocked_task)->inh_task) ? | ||
3314 | tsk_rt(blocked_task)->inh_task->comm : "null", | ||
3315 | (tsk_rt(blocked_task)->inh_task) ? | ||
3316 | tsk_rt(blocked_task)->inh_task->pid : 0); | ||
3317 | size -= w; | ||
3318 | next += w; | ||
3319 | } | ||
3320 | } | ||
3321 | else { | ||
3322 | w = scnprintf(next, size, "\t<NONE>\n"); | ||
3323 | size -= w; | ||
3324 | next += w; | ||
3325 | } | ||
3326 | } | ||
3327 | |||
3328 | if (binheap_empty(&sem->priority_queue)) { | ||
3329 | w = scnprintf(next, size, "pq:\n\t<NONE>\n"); | ||
3330 | size -= w; | ||
3331 | next += w; | ||
3332 | } | ||
3333 | else { | ||
3334 | w = scnprintf(next, size, "donors:\n"); size -= w; next += w; | ||
3335 | binheap_for_each(&sem->priority_queue, __ikglp_pq_to_proc, &heap_args); | ||
3336 | } | ||
3337 | |||
3338 | if (binheap_empty(&sem->donors)) { | ||
3339 | w = scnprintf(next, size, "donors:\n\t<NONE>\n"); | ||
3340 | size -= w; | ||
3341 | next += w; | ||
3342 | } | ||
3343 | else { | ||
3344 | w = scnprintf(next, size, "donors:\n"); size -= w; next += w; | ||
3345 | binheap_for_each(&sem->donors, __ikglp_donor_to_proc, &heap_args); | ||
3346 | } | ||
3347 | |||
3348 | raw_spin_unlock_irqrestore(&sem->real_lock, flags); | ||
3349 | |||
3350 | return count - size; | ||
3351 | } | ||
3352 | |||
3353 | static void ikglp_proc_add(struct litmus_lock *l) | ||
3354 | { | ||
3355 | if (!l->name) | ||
3356 | l->name = kmalloc(LOCK_NAME_LEN*sizeof(char), GFP_KERNEL); | ||
3357 | snprintf(l->name, LOCK_NAME_LEN, "ikglp-%d", l->ident); | ||
3358 | litmus_add_proc_lock(l, ikglp_proc_print); | ||
3359 | } | ||
3360 | |||
3361 | static void ikglp_proc_remove(struct litmus_lock *l) | ||
3362 | { | ||
3363 | if (l->name) { | ||
3364 | litmus_remove_proc_lock(l); | ||
3365 | |||
3366 | kfree(l->name); | ||
3367 | l->name = NULL; | ||
3368 | } | ||
3369 | } | ||
3370 | |||
3371 | static struct litmus_lock_proc_ops ikglp_proc_ops = | ||
3372 | { | ||
3373 | .add = ikglp_proc_add, | ||
3374 | .remove = ikglp_proc_remove | ||
3375 | }; | ||
3376 | #endif | ||