aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-04-18 21:30:36 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2012-04-18 21:30:36 -0400
commitf916cdb8e6a9ee2c917fddb7351e6bb39f6c953e (patch)
treeb7904b93f4da153a40815b89378e7b3ca2f70591
parent6ab36ca992441f7353840c70fc91d99a500a940e (diff)
Added support for simult-users in kfmlp
-rw-r--r--include/litmus/fdso.h9
-rw-r--r--include/litmus/ikglp_lock.h61
-rw-r--r--include/litmus/kfmlp_lock.h10
-rw-r--r--include/litmus/nvidia_info.h1
-rw-r--r--include/litmus/rt_param.h3
-rw-r--r--litmus/Kconfig13
-rw-r--r--litmus/fdso.c3
-rw-r--r--litmus/kfmlp_lock.c337
-rw-r--r--litmus/nvidia_info.c11
9 files changed, 297 insertions, 151 deletions
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index b92c1a3f004f..552a1e731672 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -24,11 +24,12 @@ typedef enum {
24 IKGLP_SEM = 3, 24 IKGLP_SEM = 3,
25 KFMLP_SEM = 4, 25 KFMLP_SEM = 4,
26 26
27 IKGLP_GPU_AFF_OBS = 5, 27 IKGLP_SIMPLE_GPU_AFF_OBS = 5,
28 KFMLP_SIMPLE_GPU_AFF_OBS = 6, 28 IKGLP_GPU_AFF_OBS = 6,
29 KFMLP_GPU_AFF_OBS = 7, 29 KFMLP_SIMPLE_GPU_AFF_OBS = 7,
30 KFMLP_GPU_AFF_OBS = 8,
30 31
31 MAX_OBJ_TYPE = 7 32 MAX_OBJ_TYPE = 8
32} obj_type_t; 33} obj_type_t;
33 34
34struct inode_obj_id { 35struct inode_obj_id {
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index c0cc04db1bc6..2a75a1719815 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -5,6 +5,12 @@
5#include <litmus/binheap.h> 5#include <litmus/binheap.h>
6#include <litmus/locking.h> 6#include <litmus/locking.h>
7 7
8#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
9#include <litmus/kexclu_affinity.h>
10
11struct ikglp_affinity;
12#endif
13
8typedef struct ikglp_heap_node 14typedef struct ikglp_heap_node
9{ 15{
10 struct task_struct *task; 16 struct task_struct *task;
@@ -81,6 +87,10 @@ struct ikglp_semaphore
81 struct fifo_queue *fifo_queues; // array nr_replicas in length 87 struct fifo_queue *fifo_queues; // array nr_replicas in length
82 struct binheap_handle priority_queue; // max-heap, base prio 88 struct binheap_handle priority_queue; // max-heap, base prio
83 struct binheap_handle donors; // max-heap, base prio 89 struct binheap_handle donors; // max-heap, base prio
90
91#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
92 struct ikglp_affinity *aff_obs;
93#endif
84}; 94};
85 95
86static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock) 96static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock)
@@ -94,4 +104,55 @@ int ikglp_close(struct litmus_lock* l);
94void ikglp_free(struct litmus_lock* l); 104void ikglp_free(struct litmus_lock* l);
95struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg); 105struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg);
96 106
107
108
109#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
110
111struct ikglp_queue_info
112{
113 struct fifo_queue* q;
114 lt_t estimated_len;
115 int *nr_cur_users;
116};
117
118struct ikglp_affinity_ops
119{
120 struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO
121 struct task_struct* (*advise_steal)(struct ikglp_affinity* aff, wait_queue_t** to_steal, struct fifo_queue** to_steal_from); // select steal from FIFO
122 struct task_struct* (*advise_donee_selection)(struct ikglp_affinity* aff, wait_queue_t** donee, struct fifo_queue** donee_queue); // select a donee
123 struct task_struct* (*advise_doner_to_fq)(struct ikglp_affinity* aff, ikglp_wait_state_t** donor); // select a donor to move to PQ
124
125 void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue
126 void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue
127 void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired
128 void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed
129 int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding)
130};
131
132struct ikglp_affinity
133{
134 struct affinity_observer obs;
135 struct ikglp_affinity_ops *ops;
136 struct fifo_queue *q_info;
137 int *nr_cur_users_on_rsrc;
138 int offset;
139 int nr_simult;
140 int nr_rsrc;
141};
142
143static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
144{
145 return container_of(aff_obs, struct ikglp_affinity, obs);
146}
147
148int ikglp_aff_obs_close(struct affinity_observer*);
149void ikglp_aff_obs_free(struct affinity_observer*);
150struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*,
151 void* __user arg);
152struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
153 void* __user arg);
154#endif
155
156
157
97#endif 158#endif
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
index 614cccad5307..6d7e24b2a3ad 100644
--- a/include/litmus/kfmlp_lock.h
+++ b/include/litmus/kfmlp_lock.h
@@ -6,6 +6,8 @@
6 6
7#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 7#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
8#include <litmus/kexclu_affinity.h> 8#include <litmus/kexclu_affinity.h>
9
10struct kfmlp_affinity;
9#endif 11#endif
10 12
11/* struct for semaphore with priority inheritance */ 13/* struct for semaphore with priority inheritance */
@@ -50,10 +52,9 @@ struct kfmlp_queue_info
50{ 52{
51 struct kfmlp_queue* q; 53 struct kfmlp_queue* q;
52 lt_t estimated_len; 54 lt_t estimated_len;
55 int *nr_cur_users;
53}; 56};
54 57
55struct kfmlp_affinity;
56
57struct kfmlp_affinity_ops 58struct kfmlp_affinity_ops
58{ 59{
59 struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t); 60 struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t);
@@ -62,6 +63,7 @@ struct kfmlp_affinity_ops
62 void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); 63 void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
63 void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); 64 void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
64 void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); 65 void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
66 int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq);
65}; 67};
66 68
67struct kfmlp_affinity 69struct kfmlp_affinity
@@ -69,8 +71,10 @@ struct kfmlp_affinity
69 struct affinity_observer obs; 71 struct affinity_observer obs;
70 struct kfmlp_affinity_ops *ops; 72 struct kfmlp_affinity_ops *ops;
71 struct kfmlp_queue_info *q_info; 73 struct kfmlp_queue_info *q_info;
72 struct kfmlp_queue_info *shortest_queue; 74 int *nr_cur_users_on_rsrc;
73 int offset; 75 int offset;
76 int nr_simult;
77 int nr_rsrc;
74}; 78};
75 79
76static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) 80static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
index 856c575374d3..580728051d4e 100644
--- a/include/litmus/nvidia_info.h
+++ b/include/litmus/nvidia_info.h
@@ -9,6 +9,7 @@
9 9
10//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD 10//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
11#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM 11#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
12#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
12 13
13int init_nvidia_info(void); 14int init_nvidia_info(void);
14 15
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 11f081527545..e832ffcba17c 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -59,10 +59,11 @@ struct affinity_observer_args
59 int lock_od; 59 int lock_od;
60}; 60};
61 61
62struct kfmlp_gpu_affinity_observer_args 62struct gpu_affinity_observer_args
63{ 63{
64 struct affinity_observer_args obs; 64 struct affinity_observer_args obs;
65 int replica_to_gpu_offset; 65 int replica_to_gpu_offset;
66 int nr_simult_users;
66}; 67};
67 68
68/* The definition of the data that is shared between the kernel and real-time 69/* The definition of the data that is shared between the kernel and real-time
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 34ce6fb3a22e..a34440f3d8bc 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -325,6 +325,19 @@ config NV_DEVICE_NUM
325 Should be (<= to the number of CPUs) and 325 Should be (<= to the number of CPUs) and
326 (<= to the number of GPUs) in your system. 326 (<= to the number of GPUs) in your system.
327 327
328config NV_MAX_SIMULT_USERS
329 int "Maximum number of threads sharing a GPU simultanously"
330 depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
331 range 1 3
332 default "2"
333 help
334 Should be equal to the #copy_engines + #execution_engines
335 of the GPUs in your system.
336
337 Scientific/Professional GPUs = 3 (ex. M2070, Quadro 6000?)
338 Consumer Fermi/Kepler GPUs = 2 (GTX-4xx thru -6xx)
339 Older = 1 (ex. GTX-2xx)
340
328choice 341choice
329 prompt "CUDA/Driver Version Support" 342 prompt "CUDA/Driver Version Support"
330 default CUDA_4_0 343 default CUDA_4_0
diff --git a/litmus/fdso.c b/litmus/fdso.c
index 5a4f45c3251b..fb328db77dec 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -28,7 +28,8 @@ static const struct fdso_ops* fdso_ops[] = {
28 &generic_lock_ops, /* RSM_MUTEX */ 28 &generic_lock_ops, /* RSM_MUTEX */
29 &generic_lock_ops, /* IKGLP_SEM */ 29 &generic_lock_ops, /* IKGLP_SEM */
30 &generic_lock_ops, /* KFMLP_SEM */ 30 &generic_lock_ops, /* KFMLP_SEM */
31 &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */ 31 &generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */
32 &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */
32 &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */ 33 &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
33 &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */ 34 &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
34}; 35};
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 7cdca1b7b50a..9bbe31a05b97 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -73,10 +73,9 @@ static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* se
73} 73}
74 74
75 75
76// TODO: BREAK THIS UP INTO TWO STEPS: 76static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
77// 1) task to steal (and from what queue) 77 wait_queue_t** to_steal,
78// 2) update queues 78 struct kfmlp_queue** to_steal_from)
79static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
80{ 79{
81 /* must hold sem->lock */ 80 /* must hold sem->lock */
82 81
@@ -189,76 +188,7 @@ static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
189 } 188 }
190#endif 189#endif
191} 190}
192//// TODO: BREAK THIS UP INTO TWO STEPS: 191
193//// 1) task to steal (and from what queue)
194//// 2) update queues
195//static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
196//{
197// /* must hold sem->lock */
198//
199// struct kfmlp_queue *my_queue = NULL;
200// struct task_struct *max_hp = NULL;
201//
202// struct list_head *pos;
203// struct task_struct *queued;
204// int i;
205//
206// for(i = 0; i < sem->num_resources; ++i)
207// {
208// if( (sem->queues[i].count > 1) &&
209// ((my_queue == NULL) ||
210// //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
211// (litmus->compare(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
212// {
213// my_queue = &sem->queues[i];
214// }
215// }
216//
217// if(my_queue)
218// {
219// max_hp = my_queue->hp_waiter;
220//
221// BUG_ON(!max_hp);
222//
223// TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
224// kfmlp_get_idx(sem, my_queue),
225// max_hp->comm, max_hp->pid,
226// kfmlp_get_idx(sem, my_queue));
227//
228// my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
229//
230// if(tsk_rt(my_queue->owner)->inh_task == max_hp)
231// {
232// litmus->decrease_prio(my_queue->owner, my_queue->hp_waiter);
233// }
234//
235// list_for_each(pos, &my_queue->wait.task_list)
236// {
237// queued = (struct task_struct*) list_entry(pos, wait_queue_t,
238// task_list)->private;
239// /* Compare task prios, find high prio task. */
240// if (queued == max_hp)
241// {
242// /*
243// TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n",
244// kfmlp_get_idx(sem, my_queue));
245// */
246// __remove_wait_queue(&my_queue->wait,
247// list_entry(pos, wait_queue_t, task_list));
248// break;
249// }
250// }
251// --(my_queue->count);
252//
253//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
254// if(sem->aff_obs) {
255// sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, max_hp);
256// }
257//#endif
258// }
259//
260// return(max_hp);
261//}
262 192
263int kfmlp_lock(struct litmus_lock* l) 193int kfmlp_lock(struct litmus_lock* l)
264{ 194{
@@ -378,6 +308,12 @@ int kfmlp_lock(struct litmus_lock* l)
378 spin_unlock_irqrestore(&sem->lock, flags); 308 spin_unlock_irqrestore(&sem->lock, flags);
379 } 309 }
380 310
311
312#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
313 if(sem->aff_obs) {
314 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
315 }
316#endif
381 return kfmlp_get_idx(sem, my_queue); 317 return kfmlp_get_idx(sem, my_queue);
382} 318}
383 319
@@ -390,14 +326,14 @@ int kfmlp_unlock(struct litmus_lock* l)
390 unsigned long flags; 326 unsigned long flags;
391 int err = 0; 327 int err = 0;
392 328
393 spin_lock_irqsave(&sem->lock, flags);
394
395 my_queue = kfmlp_get_queue(sem, t); 329 my_queue = kfmlp_get_queue(sem, t);
396 330
397 if (!my_queue || my_queue->owner != t) { 331 if (!my_queue) {
398 err = -EINVAL; 332 err = -EINVAL;
399 goto out; 333 goto out;
400 } 334 }
335
336 spin_lock_irqsave(&sem->lock, flags);
401 337
402 TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue)); 338 TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));
403 339
@@ -489,9 +425,9 @@ RETRY:
489 } 425 }
490 } 426 }
491 427
492out:
493 spin_unlock_irqrestore(&sem->lock, flags); 428 spin_unlock_irqrestore(&sem->lock, flags);
494 429
430out:
495 return err; 431 return err;
496} 432}
497 433
@@ -580,6 +516,25 @@ struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
580 516
581#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) 517#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
582 518
519static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
520{
521 int gpu = replica % aff->nr_rsrc;
522 return gpu;
523}
524
525static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
526{
527 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
528 return gpu;
529}
530
531static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
532{
533 int replica = gpu - aff->offset;
534 return replica;
535}
536
537
583int kfmlp_aff_obs_close(struct affinity_observer* obs) 538int kfmlp_aff_obs_close(struct affinity_observer* obs)
584{ 539{
585 return 0; 540 return 0;
@@ -588,6 +543,7 @@ int kfmlp_aff_obs_close(struct affinity_observer* obs)
588void kfmlp_aff_obs_free(struct affinity_observer* obs) 543void kfmlp_aff_obs_free(struct affinity_observer* obs)
589{ 544{
590 struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs); 545 struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
546 kfree(kfmlp_aff->nr_cur_users_on_rsrc);
591 kfree(kfmlp_aff->q_info); 547 kfree(kfmlp_aff->q_info);
592 kfree(kfmlp_aff); 548 kfree(kfmlp_aff);
593} 549}
@@ -597,37 +553,56 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
597 void* __user args) 553 void* __user args)
598{ 554{
599 struct kfmlp_affinity* kfmlp_aff; 555 struct kfmlp_affinity* kfmlp_aff;
600 struct kfmlp_gpu_affinity_observer_args aff_args; 556 struct gpu_affinity_observer_args aff_args;
601 struct kfmlp_semaphore* sem; 557 struct kfmlp_semaphore* sem;
602 int i; 558 int i;
603 unsigned long flags; 559 unsigned long flags;
604 560
605 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) 561 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
606 {
607 return(NULL); 562 return(NULL);
608 } 563 }
609 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) 564 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
610 {
611 return(NULL); 565 return(NULL);
612 } 566 }
613 567
614 sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); 568 sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
615 569
616 if(sem->litmus_lock.type != KFMLP_SEM) 570 if(sem->litmus_lock.type != KFMLP_SEM) {
617 {
618 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); 571 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
619 return(NULL); 572 return(NULL);
620 } 573 }
621 574
575 if((aff_args.nr_simult_users <= 0) ||
576 (sem->num_resources%aff_args.nr_simult_users != 0)) {
577 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
578 "(%d) per replica. #replicas should be evenly divisible "
579 "by #simult_users.\n",
580 sem->litmus_lock.ident,
581 sem->num_resources,
582 aff_args.nr_simult_users);
583 return(NULL);
584 }
585
586 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
587 TRACE_CUR("System does not support #simult_users >%d. %d requested.\n",
588 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
589 return(NULL);
590 }
591
622 kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); 592 kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
623 if(!kfmlp_aff) 593 if(!kfmlp_aff) {
624 {
625 return(NULL); 594 return(NULL);
626 } 595 }
627 596
628 kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL); 597 kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
629 if(!kfmlp_aff->q_info) 598 if(!kfmlp_aff->q_info) {
630 { 599 kfree(kfmlp_aff);
600 return(NULL);
601 }
602
603 kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
604 if(!kfmlp_aff->nr_cur_users_on_rsrc) {
605 kfree(kfmlp_aff->q_info);
631 kfree(kfmlp_aff); 606 kfree(kfmlp_aff);
632 return(NULL); 607 return(NULL);
633 } 608 }
@@ -636,16 +611,24 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
636 611
637 kfmlp_aff->ops = kfmlp_ops; 612 kfmlp_aff->ops = kfmlp_ops;
638 kfmlp_aff->offset = aff_args.replica_to_gpu_offset; 613 kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
614 kfmlp_aff->nr_simult = aff_args.nr_simult_users;
615 kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
616
617 memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
639 618
640 for(i = 0; i < sem->num_resources; ++i) 619 for(i = 0; i < sem->num_resources; ++i) {
641 {
642 kfmlp_aff->q_info[i].q = &sem->queues[i]; 620 kfmlp_aff->q_info[i].q = &sem->queues[i];
643 kfmlp_aff->q_info[i].estimated_len = 0; 621 kfmlp_aff->q_info[i].estimated_len = 0;
622
623 // multiple q_info's will point to the same resource (aka GPU) if
624 // aff_args.nr_simult_users > 1
625 kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
644 } 626 }
645 627
628 // attach observer to the lock
646 spin_lock_irqsave(&sem->lock, flags); 629 spin_lock_irqsave(&sem->lock, flags);
647 sem->aff_obs = kfmlp_aff; 630 sem->aff_obs = kfmlp_aff;
648 kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)]; 631 //kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)];
649 spin_unlock_irqrestore(&sem->lock, flags); 632 spin_unlock_irqrestore(&sem->lock, flags);
650 633
651 return &kfmlp_aff->obs; 634 return &kfmlp_aff->obs;
@@ -654,6 +637,13 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
654 637
655 638
656 639
640static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
641 struct kfmlp_queue* fq) {
642 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
643 return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
644}
645
646
657// Smart KFMLP Affinity 647// Smart KFMLP Affinity
658 648
659static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff) 649static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
@@ -675,55 +665,66 @@ struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct
675{ 665{
676 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 666 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
677 lt_t min_len; 667 lt_t min_len;
668 int min_nr_users;
678 struct kfmlp_queue_info *shortest; 669 struct kfmlp_queue_info *shortest;
679 struct kfmlp_queue *to_enqueue; 670 struct kfmlp_queue *to_enqueue;
680 int i; 671 int i;
672 int affinity_gpu;
681 673
682 // simply pick the shortest queue if, we have no affinity, or we have 674 // simply pick the shortest queue if, we have no affinity, or we have
683 // affinity with the shortest 675 // affinity with the shortest
684 if(unlikely(tsk_rt(t)->last_gpu < 0)) { 676 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
685 // we have affinity with the shorest queue. pick it. 677 affinity_gpu = aff->offset; // first gpu
686 shortest = aff->shortest_queue; 678 TRACE_CUR("no affinity\n");
687 TRACE_CUR("special case: no affinity\n"); 679 }
688 goto out; 680 else {
681 affinity_gpu = tsk_rt(t)->last_gpu;
689 } 682 }
690 683
691 // all things being equal, let's start with the queue with which we have 684 // all things being equal, let's start with the queue with which we have
692 // affinity. this helps us maintain affinity even when we don't have 685 // affinity. this helps us maintain affinity even when we don't have
693 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) 686 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
694 shortest = &aff->q_info[tsk_rt(t)->last_gpu - aff->offset]; 687 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
695 688
696 if(shortest == aff->shortest_queue) { 689// if(shortest == aff->shortest_queue) {
697 TRACE_CUR("special case: have affinity with shortest queue\n"); 690// TRACE_CUR("special case: have affinity with shortest queue\n");
698 goto out; 691// goto out;
699 } 692// }
700 693
701 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); 694 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
695 min_nr_users = *(shortest->nr_cur_users);
702 696
703 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", 697 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
704 get_gpu_estimate(t, MIG_LOCAL), 698 get_gpu_estimate(t, MIG_LOCAL),
705 kfmlp_get_idx(sem, shortest->q), 699 kfmlp_get_idx(sem, shortest->q),
706 min_len); 700 min_len);
707 701
708 for(i = 0; i < sem->num_resources; ++i) { 702 for(i = 0; i < sem->num_resources; ++i) {
709 if(&aff->q_info[i] != shortest) { 703 if(&aff->q_info[i] != shortest) {
710 704
711 lt_t est_len = 705 lt_t est_len =
712 aff->q_info[i].estimated_len + 706 aff->q_info[i].estimated_len +
713 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset)); 707 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
714 if(est_len < min_len) { 708
709 // queue is smaller, or they're equal and the other has a smaller number
710 // of total users.
711 //
712 // tie-break on the shortest number of simult users. this only kicks in
713 // when there are more than 1 empty queues.
714 if((est_len < min_len) ||
715 ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
715 shortest = &aff->q_info[i]; 716 shortest = &aff->q_info[i];
716 min_len = est_len; 717 min_len = est_len;
718 min_nr_users = *(aff->q_info[i].nr_cur_users);
717 } 719 }
718 720
719 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", 721 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
720 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset)), 722 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
721 kfmlp_get_idx(sem, aff->q_info[i].q), 723 kfmlp_get_idx(sem, aff->q_info[i].q),
722 est_len); 724 est_len);
723 } 725 }
724 } 726 }
725 727
726out:
727 to_enqueue = shortest->q; 728 to_enqueue = shortest->q;
728 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", 729 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
729 kfmlp_get_idx(sem, to_enqueue), 730 kfmlp_get_idx(sem, to_enqueue),
@@ -736,7 +737,7 @@ struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queu
736{ 737{
737 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 738 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
738 739
739 // For now, just steal from the shortest (by number) queue. 740 // For now, just steal highest priority waiter
740 // TODO: Implement affinity-aware stealing. 741 // TODO: Implement affinity-aware stealing.
741 742
742 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); 743 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
@@ -747,7 +748,7 @@ void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq
747{ 748{
748 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 749 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
749 int replica = kfmlp_get_idx(sem, fq); 750 int replica = kfmlp_get_idx(sem, fq);
750 int gpu = aff->offset + replica; 751 int gpu = replica_to_gpu(aff, replica);
751 struct kfmlp_queue_info *info = &aff->q_info[replica]; 752 struct kfmlp_queue_info *info = &aff->q_info[replica];
752 lt_t est_time; 753 lt_t est_time;
753 lt_t est_len_before; 754 lt_t est_len_before;
@@ -765,22 +766,22 @@ void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq
765 est_len_before, est_time, 766 est_len_before, est_time,
766 info->estimated_len); 767 info->estimated_len);
767 768
768 if(aff->shortest_queue == info) { 769// if(aff->shortest_queue == info) {
769 // we may no longer be the shortest 770// // we may no longer be the shortest
770 aff->shortest_queue = kfmlp_aff_find_shortest(aff); 771// aff->shortest_queue = kfmlp_aff_find_shortest(aff);
771 772//
772 TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", 773// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
773 kfmlp_get_idx(sem, aff->shortest_queue->q), 774// kfmlp_get_idx(sem, aff->shortest_queue->q),
774 aff->shortest_queue->q->count, 775// aff->shortest_queue->q->count,
775 aff->shortest_queue->estimated_len); 776// aff->shortest_queue->estimated_len);
776 } 777// }
777} 778}
778 779
779void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) 780void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
780{ 781{
781 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 782 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
782 int replica = kfmlp_get_idx(sem, fq); 783 int replica = kfmlp_get_idx(sem, fq);
783 int gpu = aff->offset + replica; 784 int gpu = replica_to_gpu(aff, replica);
784 struct kfmlp_queue_info *info = &aff->q_info[replica]; 785 struct kfmlp_queue_info *info = &aff->q_info[replica];
785 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); 786 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
786 787
@@ -797,28 +798,32 @@ void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq
797 info->estimated_len); 798 info->estimated_len);
798 799
799 // check to see if we're the shortest queue now. 800 // check to see if we're the shortest queue now.
800 if((aff->shortest_queue != info) && 801// if((aff->shortest_queue != info) &&
801 (aff->shortest_queue->estimated_len > info->estimated_len)) { 802// (aff->shortest_queue->estimated_len > info->estimated_len)) {
802 803//
803 aff->shortest_queue = info; 804// aff->shortest_queue = info;
804 805//
805 TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", 806// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
806 kfmlp_get_idx(sem, info->q), 807// kfmlp_get_idx(sem, info->q),
807 info->q->count, 808// info->q->count,
808 info->estimated_len); 809// info->estimated_len);
809 } 810// }
810} 811}
811 812
812void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) 813void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
813{ 814{
814 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 815 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
815 int gpu = kfmlp_get_idx(sem, fq) + aff->offset; 816 int replica = kfmlp_get_idx(sem, fq);
817 int gpu = replica_to_gpu(aff, replica);
816 818
817 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration 819 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
818 820
819 TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", 821 TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n",
820 t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); 822 t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
821 823
824 // count the number or resource holders
825 ++(*(aff->q_info[replica].nr_cur_users));
826
822 reg_nv_device(gpu, 1, t); // register 827 reg_nv_device(gpu, 1, t); // register
823 828
824 tsk_rt(t)->suspend_gpu_tracker_on_block = 0; 829 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
@@ -829,7 +834,8 @@ void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* f
829void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) 834void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
830{ 835{
831 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 836 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
832 int gpu = kfmlp_get_idx(sem, fq) + aff->offset; 837 int replica = kfmlp_get_idx(sem, fq);
838 int gpu = replica_to_gpu(aff, replica);
833 lt_t est_time; 839 lt_t est_time;
834 840
835 stop_gpu_tracker(t); // stop the tracker before we do anything else. 841 stop_gpu_tracker(t); // stop the tracker before we do anything else.
@@ -837,6 +843,10 @@ void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq,
837 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); 843 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
838 844
839 tsk_rt(t)->last_gpu = gpu; 845 tsk_rt(t)->last_gpu = gpu;
846
847 // count the number or resource holders
848 --(*(aff->q_info[replica].nr_cur_users));
849
840 reg_nv_device(gpu, 0, t); // unregister 850 reg_nv_device(gpu, 0, t); // unregister
841 851
842 // update estimates 852 // update estimates
@@ -856,7 +866,8 @@ struct kfmlp_affinity_ops gpu_kfmlp_affinity =
856 .notify_enqueue = gpu_kfmlp_notify_enqueue, 866 .notify_enqueue = gpu_kfmlp_notify_enqueue,
857 .notify_dequeue = gpu_kfmlp_notify_dequeue, 867 .notify_dequeue = gpu_kfmlp_notify_dequeue,
858 .notify_acquired = gpu_kfmlp_notify_acquired, 868 .notify_acquired = gpu_kfmlp_notify_acquired,
859 .notify_freed = gpu_kfmlp_notify_freed 869 .notify_freed = gpu_kfmlp_notify_freed,
870 .replica_to_resource = gpu_replica_to_resource,
860}; 871};
861 872
862struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops, 873struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
@@ -877,8 +888,50 @@ struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* op
877struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) 888struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
878{ 889{
879 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 890 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
891 int min_count;
892 int min_nr_users;
893 struct kfmlp_queue_info *shortest;
894 struct kfmlp_queue *to_enqueue;
895 int i;
896
880// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); 897// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
881 return sem->shortest_queue; 898
899 shortest = &aff->q_info[0];
900 min_count = shortest->q->count;
901 min_nr_users = *(shortest->nr_cur_users);
902
903 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
904 kfmlp_get_idx(sem, shortest->q),
905 shortest->q->count,
906 min_nr_users);
907
908 for(i = 1; i < sem->num_resources; ++i) {
909 int len = aff->q_info[i].q->count;
910
911 // queue is smaller, or they're equal and the other has a smaller number
912 // of total users.
913 //
914 // tie-break on the shortest number of simult users. this only kicks in
915 // when there are more than 1 empty queues.
916 if((len < min_count) ||
917 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
918 shortest = &aff->q_info[i];
919 min_count = shortest->q->count;
920 min_nr_users = *(aff->q_info[i].nr_cur_users);
921 }
922
923 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
924 kfmlp_get_idx(sem, aff->q_info[i].q),
925 aff->q_info[i].q->count,
926 *(aff->q_info[i].nr_cur_users));
927 }
928
929 to_enqueue = shortest->q;
930 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
931 kfmlp_get_idx(sem, to_enqueue),
932 kfmlp_get_idx(sem, sem->shortest_queue));
933
934 return to_enqueue;
882} 935}
883 936
884struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) 937struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
@@ -901,19 +954,26 @@ void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_qu
901void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) 954void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
902{ 955{
903 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 956 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
904 int gpu = kfmlp_get_idx(sem, fq) + aff->offset; 957 int replica = kfmlp_get_idx(sem, fq);
958 int gpu = replica_to_gpu(aff, replica);
905 959
906// TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n"); 960// TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
907 961
962 // count the number or resource holders
963 ++(*(aff->q_info[replica].nr_cur_users));
964
908 reg_nv_device(gpu, 1, t); // register 965 reg_nv_device(gpu, 1, t); // register
909} 966}
910 967
911void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) 968void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
912{ 969{
913 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 970 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
914 int gpu = kfmlp_get_idx(sem, fq) + aff->offset; 971 int replica = kfmlp_get_idx(sem, fq);
972 int gpu = replica_to_gpu(aff, replica);
915 973
916// TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n"); 974// TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
975 // count the number or resource holders
976 --(*(aff->q_info[replica].nr_cur_users));
917 977
918 reg_nv_device(gpu, 0, t); // unregister 978 reg_nv_device(gpu, 0, t); // unregister
919} 979}
@@ -925,7 +985,8 @@ struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
925 .notify_enqueue = simple_gpu_kfmlp_notify_enqueue, 985 .notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
926 .notify_dequeue = simple_gpu_kfmlp_notify_dequeue, 986 .notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
927 .notify_acquired = simple_gpu_kfmlp_notify_acquired, 987 .notify_acquired = simple_gpu_kfmlp_notify_acquired,
928 .notify_freed = simple_gpu_kfmlp_notify_freed 988 .notify_freed = simple_gpu_kfmlp_notify_freed,
989 .replica_to_resource = gpu_replica_to_resource,
929}; 990};
930 991
931struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, 992struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 287e4a0662d9..fd6398121fbf 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -368,7 +368,7 @@ static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_
368 int i; 368 int i;
369 struct task_struct *found = NULL; 369 struct task_struct *found = NULL;
370 for(i = 0; i < reg->nr_owners; ++i) { 370 for(i = 0; i < reg->nr_owners; ++i) {
371 if(reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) { 371 if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
372 found = reg->owners[i]; 372 found = reg->owners[i];
373 } 373 }
374 } 374 }
@@ -433,8 +433,9 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
433 433
434 raw_spin_lock_irqsave(&reg->lock, flags); 434 raw_spin_lock_irqsave(&reg->lock, flags);
435 435
436 if(reg->nr_owners < MAX_NR_OWNERS) { 436 if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
437 for(i = 0; i < MAX_NR_OWNERS; ++i) { 437 TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
438 for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
438 if(reg->owners[i] == NULL) { 439 if(reg->owners[i] == NULL) {
439 reg->owners[i] = t; 440 reg->owners[i] = t;
440 441
@@ -485,7 +486,9 @@ static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
485 486
486 raw_spin_lock_irqsave(&reg->lock, flags); 487 raw_spin_lock_irqsave(&reg->lock, flags);
487 488
488 for(i = 0; i < reg->nr_owners; ++i) { 489 TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
490
491 for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
489 if(reg->owners[i] == t) { 492 if(reg->owners[i] == t) {
490#ifdef CONFIG_LITMUS_SOFTIRQD 493#ifdef CONFIG_LITMUS_SOFTIRQD
491 flush_pending(klitirqd_th, t); 494 flush_pending(klitirqd_th, t);