aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2013-02-14 15:35:52 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2013-02-14 15:35:52 -0500
commite9b88341eb6b9fbe16139796f2f78e1f65793e5a (patch)
tree92e4a45391094f904c98ea8d4d6765420311029f
parentc063e088be8e1bcbb6a76b8cd087f8dc8b6923b2 (diff)
Generalize IKGLP implementation
Generalized the IKGLP implementation to support non-optimal configurations. Parameters allow the IKGLP to be configured as FIFO queues (aka KFMLP), a single priority queue, or a hybrid (optimal IKGLP). The maximum number of users within the FIFO queues is also parameterized, allowing more than 'm' replica holders to hold replicas concurrently (this breaks optimality though). Also fixed a bug in locking.c where DGL prority inheritance is determined.
-rw-r--r--include/litmus/ikglp_lock.h31
-rw-r--r--include/litmus/kfmlp_lock.h21
-rw-r--r--include/litmus/locking.h3
-rw-r--r--include/litmus/rt_param.h17
-rw-r--r--litmus/ikglp_lock.c245
-rw-r--r--litmus/kfmlp_lock.c36
-rw-r--r--litmus/locking.c177
7 files changed, 237 insertions, 293 deletions
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index f9178d3dfba9..91a905a5d101 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -58,7 +58,7 @@ struct fifo_queue
58 ikglp_donee_heap_node_t donee_heap_node; 58 ikglp_donee_heap_node_t donee_heap_node;
59 59
60 struct task_struct* hp_waiter; 60 struct task_struct* hp_waiter;
61 int count; /* number of waiters + holder */ 61 unsigned int count; /* number of waiters + holder */
62 62
63 struct nested_info nest; 63 struct nested_info nest;
64}; 64};
@@ -70,14 +70,14 @@ struct ikglp_semaphore
70 raw_spinlock_t lock; 70 raw_spinlock_t lock;
71 raw_spinlock_t real_lock; 71 raw_spinlock_t real_lock;
72 72
73 int nr_replicas; // AKA k 73 unsigned int nr_replicas; // AKA k
74 int m; 74 unsigned int max_fifo_len; // max len of a fifo queue
75 75
76 int max_fifo_len; // max len of a fifo queue 76 unsigned int max_in_fifos; // AKA m
77 int nr_in_fifos; 77 unsigned int nr_in_fifos;
78 78
79 struct binheap top_m; // min heap, base prio 79 struct binheap top_m; // min heap, base prio
80 int top_m_size; // number of nodes in top_m 80 unsigned int top_m_size; // number of nodes in top_m
81 81
82 struct binheap not_top_m; // max heap, base prio 82 struct binheap not_top_m; // max heap, base prio
83 83
@@ -103,7 +103,7 @@ int ikglp_lock(struct litmus_lock* l);
103int ikglp_unlock(struct litmus_lock* l); 103int ikglp_unlock(struct litmus_lock* l);
104int ikglp_close(struct litmus_lock* l); 104int ikglp_close(struct litmus_lock* l);
105void ikglp_free(struct litmus_lock* l); 105void ikglp_free(struct litmus_lock* l);
106struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg); 106struct litmus_lock* ikglp_new(unsigned int m, struct litmus_lock_ops*, void* __user arg);
107 107
108 108
109 109
@@ -113,8 +113,8 @@ struct ikglp_queue_info
113{ 113{
114 struct fifo_queue* q; 114 struct fifo_queue* q;
115 lt_t estimated_len; 115 lt_t estimated_len;
116 int *nr_cur_users; 116 unsigned int *nr_cur_users;
117 int64_t *nr_aff_users; 117 unsigned int *nr_aff_users;
118}; 118};
119 119
120struct ikglp_affinity_ops 120struct ikglp_affinity_ops
@@ -138,12 +138,13 @@ struct ikglp_affinity
138 struct affinity_observer obs; 138 struct affinity_observer obs;
139 struct ikglp_affinity_ops *ops; 139 struct ikglp_affinity_ops *ops;
140 struct ikglp_queue_info *q_info; 140 struct ikglp_queue_info *q_info;
141 int *nr_cur_users_on_rsrc; 141 unsigned int *nr_cur_users_on_rsrc;
142 int64_t *nr_aff_on_rsrc; 142 unsigned int *nr_aff_on_rsrc;
143 int offset; 143 unsigned int offset;
144 int nr_simult; 144 unsigned int nr_simult;
145 int nr_rsrc; 145 unsigned int nr_rsrc;
146 int relax_max_fifo_len; 146
147 int relax_max_fifo_len:1;
147}; 148};
148 149
149static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) 150static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
index 3609a3a20424..2af953852e02 100644
--- a/include/litmus/kfmlp_lock.h
+++ b/include/litmus/kfmlp_lock.h
@@ -16,7 +16,7 @@ struct kfmlp_queue
16 wait_queue_head_t wait; 16 wait_queue_head_t wait;
17 struct task_struct* owner; 17 struct task_struct* owner;
18 struct task_struct* hp_waiter; 18 struct task_struct* hp_waiter;
19 int count; /* number of waiters + holder */ 19 unsigned int count; /* number of waiters + holder */
20}; 20};
21 21
22struct kfmlp_semaphore 22struct kfmlp_semaphore
@@ -25,7 +25,7 @@ struct kfmlp_semaphore
25 25
26 spinlock_t lock; 26 spinlock_t lock;
27 27
28 int num_resources; /* aka k */ 28 unsigned int num_resources; /* aka k */
29 29
30 struct kfmlp_queue *queues; /* array */ 30 struct kfmlp_queue *queues; /* array */
31 struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ 31 struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
@@ -52,7 +52,7 @@ struct kfmlp_queue_info
52{ 52{
53 struct kfmlp_queue* q; 53 struct kfmlp_queue* q;
54 lt_t estimated_len; 54 lt_t estimated_len;
55 int *nr_cur_users; 55 unsigned int *nr_cur_users;
56}; 56};
57 57
58struct kfmlp_affinity_ops 58struct kfmlp_affinity_ops
@@ -71,10 +71,10 @@ struct kfmlp_affinity
71 struct affinity_observer obs; 71 struct affinity_observer obs;
72 struct kfmlp_affinity_ops *ops; 72 struct kfmlp_affinity_ops *ops;
73 struct kfmlp_queue_info *q_info; 73 struct kfmlp_queue_info *q_info;
74 int *nr_cur_users_on_rsrc; 74 unsigned int *nr_cur_users_on_rsrc;
75 int offset; 75 unsigned int offset;
76 int nr_simult; 76 unsigned int nr_simult;
77 int nr_rsrc; 77 unsigned int nr_rsrc;
78}; 78};
79 79
80static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) 80static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
@@ -86,10 +86,9 @@ int kfmlp_aff_obs_close(struct affinity_observer*);
86void kfmlp_aff_obs_free(struct affinity_observer*); 86void kfmlp_aff_obs_free(struct affinity_observer*);
87 87
88#ifdef CONFIG_LITMUS_NVIDIA 88#ifdef CONFIG_LITMUS_NVIDIA
89struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*, 89struct affinity_observer* kfmlp_gpu_aff_obs_new(
90 void* __user arg); 90 struct affinity_observer_ops*, void* __user arg);
91struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*, 91struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*, void* __user arg);
92 void* __user arg);
93#endif 92#endif
94 93
95#endif /* end affinity */ 94#endif /* end affinity */
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
index fc437811d2b6..b9c6a2b1d01e 100644
--- a/include/litmus/locking.h
+++ b/include/litmus/locking.h
@@ -66,8 +66,9 @@ typedef struct dgl_wait_state {
66} dgl_wait_state_t; 66} dgl_wait_state_t;
67 67
68void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait); 68void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait);
69void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/); 69struct litmus_lock* select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/);
70 70
71void init_dgl_wait_state(dgl_wait_state_t* dgl_wait);
71void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait); 72void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait);
72int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key); 73int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key);
73struct task_struct* __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait); 74struct task_struct* __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait);
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 716fc034c5f4..c4cba8551c47 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -119,11 +119,24 @@ struct affinity_observer_args
119struct gpu_affinity_observer_args 119struct gpu_affinity_observer_args
120{ 120{
121 struct affinity_observer_args obs; 121 struct affinity_observer_args obs;
122 int replica_to_gpu_offset; 122 unsigned int replica_to_gpu_offset;
123 int nr_simult_users; 123 unsigned int rho;
124 int relaxed_rules; 124 int relaxed_rules;
125}; 125};
126 126
127
128#define IKGLP_M_IN_FIFOS (0u)
129#define IKGLP_UNLIMITED_IN_FIFOS (~0u)
130#define IKGLP_OPTIMAL_FIFO_LEN (0u)
131#define IKGLP_UNLIMITED_FIFO_LEN (~0u)
132
133struct ikglp_args
134{
135 unsigned int nr_replicas;
136 unsigned int max_in_fifos;
137 unsigned int max_fifo_len;
138};
139
127/* The definition of the data that is shared between the kernel and real-time 140/* The definition of the data that is shared between the kernel and real-time
128 * tasks via a shared page (see litmus/ctrldev.c). 141 * tasks via a shared page (see litmus/ctrldev.c).
129 * 142 *
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 3fd760799a75..cab0d7f938f9 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -103,8 +103,7 @@ static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
103 struct task_struct *queued, *found = NULL; 103 struct task_struct *queued, *found = NULL;
104 104
105 list_for_each(pos, &kqueue->wait.task_list) { 105 list_for_each(pos, &kqueue->wait.task_list) {
106 queued = (struct task_struct*) list_entry(pos, 106 queued = (struct task_struct*) list_entry(pos, wait_queue_t, task_list)->private;
107 wait_queue_t, task_list)->private;
108 107
109 /* Compare task prios, find high prio task. */ 108 /* Compare task prios, find high prio task. */
110 if(queued != skip && litmus->compare(queued, found)) 109 if(queued != skip && litmus->compare(queued, found))
@@ -232,22 +231,14 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
232 struct task_struct *t, 231 struct task_struct *t,
233 ikglp_heap_node_t *node) 232 ikglp_heap_node_t *node)
234{ 233{
235
236
237 node->task = t; 234 node->task = t;
238 INIT_BINHEAP_NODE(&node->node); 235 INIT_BINHEAP_NODE(&node->node);
239 236
240 if(sem->top_m_size < sem->m) { 237 if(sem->top_m_size < sem->max_in_fifos) {
241 TRACE_CUR("Trivially adding %s/%d to top-m global list.\n", 238 TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
242 t->comm, t->pid); 239 t->comm, t->pid);
243// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
244// print_global_list(sem->top_m.root, 1);
245
246 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); 240 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
247 ++(sem->top_m_size); 241 ++(sem->top_m_size);
248
249// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
250// print_global_list(sem->top_m.root, 1);
251 } 242 }
252 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { 243 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
253 ikglp_heap_node_t *evicted = 244 ikglp_heap_node_t *evicted =
@@ -257,12 +248,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
257 t->comm, t->pid, 248 t->comm, t->pid,
258 evicted->task->comm, evicted->task->pid); 249 evicted->task->comm, evicted->task->pid);
259 250
260// TRACE_CUR("Not-Top-M Before:\n");
261// print_global_list(sem->not_top_m.root, 1);
262// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
263// print_global_list(sem->top_m.root, 1);
264
265
266 binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node); 251 binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
267 INIT_BINHEAP_NODE(&evicted->node); 252 INIT_BINHEAP_NODE(&evicted->node);
268 binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node); 253 binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
@@ -279,8 +264,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
279 else { 264 else {
280 TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n", 265 TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
281 t->comm, t->pid); 266 t->comm, t->pid);
282// TRACE_CUR("Not-Top-M Before:\n");
283// print_global_list(sem->not_top_m.root, 1);
284 267
285 binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node); 268 binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
286 269
@@ -303,12 +286,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem,
303 if(binheap_is_in_this_heap(&node->node, &sem->top_m)) { 286 if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
304 TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid); 287 TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
305 288
306// TRACE_CUR("Not-Top-M Before:\n");
307// print_global_list(sem->not_top_m.root, 1);
308// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
309// print_global_list(sem->top_m.root, 1);
310
311
312 binheap_delete(&node->node, &sem->top_m); 289 binheap_delete(&node->node, &sem->top_m);
313 290
314 if(!binheap_empty(&sem->not_top_m)) { 291 if(!binheap_empty(&sem->not_top_m)) {
@@ -337,8 +314,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem,
337 } 314 }
338 else { 315 else {
339 TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid); 316 TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
340// TRACE_CUR("Not-Top-M Before:\n");
341// print_global_list(sem->not_top_m.root, 1);
342 317
343 binheap_delete(&node->node, &sem->not_top_m); 318 binheap_delete(&node->node, &sem->not_top_m);
344 319
@@ -355,10 +330,6 @@ static void ikglp_add_donees(struct ikglp_semaphore *sem,
355 struct task_struct *t, 330 struct task_struct *t,
356 ikglp_donee_heap_node_t* node) 331 ikglp_donee_heap_node_t* node)
357{ 332{
358// TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
359// TRACE_CUR("donees Before:\n");
360// print_donees(sem, sem->donees.root, 1);
361
362 node->task = t; 333 node->task = t;
363 node->donor_info = NULL; 334 node->donor_info = NULL;
364 node->fq = fq; 335 node->fq = fq;
@@ -928,7 +899,7 @@ int ikglp_lock(struct litmus_lock* l)
928 899
929 TRACE_CUR("Requesting a replica from lock %d.\n", l->ident); 900 TRACE_CUR("Requesting a replica from lock %d.\n", l->ident);
930 901
931 if(sem->nr_in_fifos < sem->m) { 902 if(sem->nr_in_fifos < sem->max_in_fifos) {
932 // enqueue somwhere 903 // enqueue somwhere
933#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 904#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
934 fq = (sem->aff_obs) ? 905 fq = (sem->aff_obs) ?
@@ -1272,10 +1243,13 @@ int ikglp_unlock(struct litmus_lock* l)
1272 donee = t; 1243 donee = t;
1273 1244
1274#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 1245#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1275 if(sem->aff_obs) 1246 if(sem->aff_obs) {
1276 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); 1247 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1277 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) 1248 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
1278 fq_of_new_on_fq = fq; /* discard recommendation */ 1249 WARN_ON(1);
1250 fq_of_new_on_fq = fq;
1251 }
1252 }
1279 else 1253 else
1280 fq_of_new_on_fq = fq; 1254 fq_of_new_on_fq = fq;
1281#else 1255#else
@@ -1308,10 +1282,13 @@ int ikglp_unlock(struct litmus_lock* l)
1308 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); 1282 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
1309 1283
1310#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 1284#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1311 if(sem->aff_obs) 1285 if(sem->aff_obs) {
1312 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); 1286 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1313 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) 1287 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
1314 fq_of_new_on_fq = fq; /* discard recommendation */ 1288 WARN_ON(1);
1289 fq_of_new_on_fq = fq;
1290 }
1291 }
1315 else 1292 else
1316 fq_of_new_on_fq = fq; 1293 fq_of_new_on_fq = fq;
1317#else 1294#else
@@ -1335,10 +1312,13 @@ int ikglp_unlock(struct litmus_lock* l)
1335 new_on_fq = pq_wait->task; 1312 new_on_fq = pq_wait->task;
1336 1313
1337#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 1314#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1338 if(sem->aff_obs) 1315 if(sem->aff_obs) {
1339 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); 1316 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1340 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) 1317 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
1341 fq_of_new_on_fq = fq; /* discard recommendation */ 1318 WARN_ON(1);
1319 fq_of_new_on_fq = fq;
1320 }
1321 }
1342 else 1322 else
1343 fq_of_new_on_fq = fq; 1323 fq_of_new_on_fq = fq;
1344#else 1324#else
@@ -1663,26 +1643,44 @@ void ikglp_free(struct litmus_lock* l)
1663 1643
1664 1644
1665 1645
1666struct litmus_lock* ikglp_new(int m, 1646struct litmus_lock* ikglp_new(unsigned int m,
1667 struct litmus_lock_ops* ops, 1647 struct litmus_lock_ops* ops,
1668 void* __user arg) 1648 void* __user uarg)
1669{ 1649{
1650 /* TODO: Support trivial token lock, s.t. args.nr_replicas equals some
1651 * sentinel value, and implement special-case algorithms. There is currently
1652 * a lot of overhead for a trivial token lock since we allocate O(n)-worth
1653 * of data; this could be avoided with special-case algorithms. */
1654
1670 struct ikglp_semaphore* sem; 1655 struct ikglp_semaphore* sem;
1671 int nr_replicas = 0; 1656 struct ikglp_args args;
1672 int i; 1657 unsigned int i;
1673 1658
1674 BUG_ON(m <= 0); 1659 BUG_ON(m <= 0);
1675 1660
1676 if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas))) 1661 if(!access_ok(VERIFY_READ, uarg, sizeof(args)))
1677 { 1662 return(NULL);
1663 if(__copy_from_user(&args, uarg, sizeof(args)))
1664 return(NULL);
1665
1666 /* validation */
1667
1668 /* there must be at least one resource */
1669 if (args.nr_replicas < 1) {
1670 printk("Invalid number of replicas.\n");
1678 return(NULL); 1671 return(NULL);
1679 } 1672 }
1680 if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas))) 1673 /* IKGLP_OPTIMAL_FIFO_LEN can only be determined if nr_max_holders
1681 { 1674 * is IKGLP_M_HOLDERS (number of CPUs) */
1675 if (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN &&
1676 args.max_in_fifos != IKGLP_M_IN_FIFOS) {
1677 printk("Cannot compute optimal FIFO length if max_in_fifos != IKGLP_M_IN_FIFOS\n");
1682 return(NULL); 1678 return(NULL);
1683 } 1679 }
1684 if(nr_replicas < 1) 1680 if ((args.max_in_fifos != IKGLP_UNLIMITED_IN_FIFOS) &&
1685 { 1681 (args.max_fifo_len != IKGLP_UNLIMITED_FIFO_LEN) &&
1682 (args.max_in_fifos > args.nr_replicas*args.max_fifo_len)) {
1683 printk("Not enough total FIFO space for specified max requests in FIFOs.\n");
1686 return(NULL); 1684 return(NULL);
1687 } 1685 }
1688 1686
@@ -1693,7 +1691,7 @@ struct litmus_lock* ikglp_new(int m,
1693 } 1691 }
1694 memset(sem, 0, sizeof(*sem)); 1692 memset(sem, 0, sizeof(*sem));
1695 1693
1696 sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL); 1694 sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*args.nr_replicas, GFP_KERNEL);
1697 if(!sem->fifo_queues) 1695 if(!sem->fifo_queues)
1698 { 1696 {
1699 kfree(sem); 1697 kfree(sem);
@@ -1712,17 +1710,21 @@ struct litmus_lock* ikglp_new(int m,
1712 1710
1713 raw_spin_lock_init(&sem->real_lock); 1711 raw_spin_lock_init(&sem->real_lock);
1714 1712
1715 sem->nr_replicas = nr_replicas; 1713 sem->nr_replicas = args.nr_replicas;
1716 sem->m = m; 1714 sem->max_in_fifos = (args.max_in_fifos == IKGLP_M_IN_FIFOS) ?
1717 sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0); 1715 m :
1716 args.max_in_fifos;
1717 sem->max_fifo_len = (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN) ?
1718 (sem->max_in_fifos/args.nr_replicas) + ((sem->max_in_fifos%args.nr_replicas) != 0) :
1719 args.max_fifo_len;
1718 sem->nr_in_fifos = 0; 1720 sem->nr_in_fifos = 0;
1719 1721
1720 TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n", 1722 TRACE_CUR("New IKGLP Sem: m = %u, k = %u, max fifo_len = %u\n",
1721 sem->m, 1723 sem->max_in_fifos,
1722 sem->nr_replicas, 1724 sem->nr_replicas,
1723 sem->max_fifo_len); 1725 sem->max_fifo_len);
1724 1726
1725 for(i = 0; i < nr_replicas; ++i) 1727 for(i = 0; i < args.nr_replicas; ++i)
1726 { 1728 {
1727 struct fifo_queue* q = &(sem->fifo_queues[i]); 1729 struct fifo_queue* q = &(sem->fifo_queues[i]);
1728 1730
@@ -1766,33 +1768,13 @@ struct litmus_lock* ikglp_new(int m,
1766 1768
1767 1769
1768 1770
1771#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1769 1772
1773/****************************************************************************/
1774/* AFFINITY HEURISTICS */
1775/****************************************************************************/
1770 1776
1771 1777
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1795
1796static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) 1778static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
1797{ 1779{
1798 int gpu = replica % aff->nr_rsrc; 1780 int gpu = replica % aff->nr_rsrc;
@@ -1856,7 +1838,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1856 struct ikglp_affinity* ikglp_aff; 1838 struct ikglp_affinity* ikglp_aff;
1857 struct gpu_affinity_observer_args aff_args; 1839 struct gpu_affinity_observer_args aff_args;
1858 struct ikglp_semaphore* sem; 1840 struct ikglp_semaphore* sem;
1859 int i; 1841 unsigned int i;
1860 unsigned long flags; 1842 unsigned long flags;
1861 1843
1862 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { 1844 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
@@ -1873,23 +1855,17 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1873 return(NULL); 1855 return(NULL);
1874 } 1856 }
1875 1857
1876 if((aff_args.nr_simult_users <= 0) || 1858 if((aff_args.rho <= 0) ||
1877 (sem->nr_replicas%aff_args.nr_simult_users != 0)) { 1859 (sem->nr_replicas%aff_args.rho != 0)) {
1878 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " 1860 TRACE_CUR("Lock %d does not support #replicas (%u) for #simult_users "
1879 "(%d) per replica. #replicas should be evenly divisible " 1861 "(%u) per replica. #replicas should be evenly divisible "
1880 "by #simult_users.\n", 1862 "by #simult_users.\n",
1881 sem->litmus_lock.ident, 1863 sem->litmus_lock.ident,
1882 sem->nr_replicas, 1864 sem->nr_replicas,
1883 aff_args.nr_simult_users); 1865 aff_args.rho);
1884 return(NULL); 1866 return(NULL);
1885 } 1867 }
1886 1868
1887// if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
1888// TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
1889// NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
1890//// return(NULL);
1891// }
1892
1893 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); 1869 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
1894 if(!ikglp_aff) { 1870 if(!ikglp_aff) {
1895 return(NULL); 1871 return(NULL);
@@ -1901,14 +1877,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1901 return(NULL); 1877 return(NULL);
1902 } 1878 }
1903 1879
1904 ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); 1880 ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL);
1905 if(!ikglp_aff->nr_cur_users_on_rsrc) { 1881 if(!ikglp_aff->nr_cur_users_on_rsrc) {
1906 kfree(ikglp_aff->q_info); 1882 kfree(ikglp_aff->q_info);
1907 kfree(ikglp_aff); 1883 kfree(ikglp_aff);
1908 return(NULL); 1884 return(NULL);
1909 } 1885 }
1910 1886
1911 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); 1887 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL);
1912 if(!ikglp_aff->nr_aff_on_rsrc) { 1888 if(!ikglp_aff->nr_aff_on_rsrc) {
1913 kfree(ikglp_aff->nr_cur_users_on_rsrc); 1889 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1914 kfree(ikglp_aff->q_info); 1890 kfree(ikglp_aff->q_info);
@@ -1920,7 +1896,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1920 1896
1921 ikglp_aff->ops = ikglp_ops; 1897 ikglp_aff->ops = ikglp_ops;
1922 ikglp_aff->offset = aff_args.replica_to_gpu_offset; 1898 ikglp_aff->offset = aff_args.replica_to_gpu_offset;
1923 ikglp_aff->nr_simult = aff_args.nr_simult_users; 1899 ikglp_aff->nr_simult = aff_args.rho;
1924 ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; 1900 ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
1925 ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0; 1901 ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
1926 1902
@@ -1930,7 +1906,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1930 ikglp_aff->relax_max_fifo_len); 1906 ikglp_aff->relax_max_fifo_len);
1931 1907
1932 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); 1908 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
1933 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc)); 1909 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(unsigned int)*(ikglp_aff->nr_rsrc));
1934 1910
1935 for(i = 0; i < sem->nr_replicas; ++i) { 1911 for(i = 0; i < sem->nr_replicas; ++i) {
1936 ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; 1912 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -1950,9 +1926,6 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1950 return &ikglp_aff->obs; 1926 return &ikglp_aff->obs;
1951} 1927}
1952 1928
1953
1954
1955
1956static int gpu_replica_to_resource(struct ikglp_affinity* aff, 1929static int gpu_replica_to_resource(struct ikglp_affinity* aff,
1957 struct fifo_queue* fq) { 1930 struct fifo_queue* fq) {
1958 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 1931 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
@@ -1960,29 +1933,28 @@ static int gpu_replica_to_resource(struct ikglp_affinity* aff,
1960} 1933}
1961 1934
1962 1935
1963// Smart IKGLP Affinity
1964 1936
1965//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff) 1937/*--------------------------------------------------------------------------*/
1966//{ 1938/* ADVANCED AFFINITY HEURISITICS */
1967// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 1939/* */
1968// struct ikglp_queue_info *shortest = &aff->q_info[0]; 1940/* These heuristics estimate FIFO length wait times and try to enqueue */
1969// int i; 1941/* tasks into the shortest queues. When two queues are equivlenet, the GPU */
1970// 1942/* that maintains affinity is selected. When a task has no affinity, the */
1971// for(i = 1; i < sem->nr_replicas; ++i) { 1943/* heuristic tries to get the GPU with the fewest number of other tasks */
1972// if(aff->q_info[i].estimated_len < shortest->estimated_len) { 1944/* with affinity on that GPU. */
1973// shortest = &aff->q_info[i]; 1945/* */
1974// } 1946/* Heuristics to explore in the future: */
1975// } 1947/* - Utilization */
1976// 1948/* - Longest non-preemptive section */
1977// return(shortest); 1949/* - Criticality */
1978//} 1950/* - Task period */
1951/*--------------------------------------------------------------------------*/
1979 1952
1980struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) 1953struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
1981{ 1954{
1982 // advise_enqueue must be smart as not not break IKGLP rules: 1955 // advise_enqueue must be smart as not not break IKGLP rules:
1983 // * No queue can be greater than ceil(m/k) in length. We may return 1956 // * No queue can be greater than ceil(m/k) in length, unless
1984 // such a queue, but IKGLP will be smart enough as to send requests 1957 // 'relax_max_fifo_len' is asserted
1985 // to donors or PQ.
1986 // * Cannot let a queue idle if there exist waiting PQ/donors 1958 // * Cannot let a queue idle if there exist waiting PQ/donors
1987 // -- needed to guarantee parallel progress of waiters. 1959 // -- needed to guarantee parallel progress of waiters.
1988 // 1960 //
@@ -1993,14 +1965,15 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
1993 1965
1994 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 1966 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1995 lt_t min_len; 1967 lt_t min_len;
1996 int min_nr_users, min_nr_aff_users; 1968 unsigned int min_nr_users, min_nr_aff_users;
1997 struct ikglp_queue_info *shortest, *aff_queue; 1969 struct ikglp_queue_info *shortest, *aff_queue;
1998 struct fifo_queue *to_enqueue; 1970 struct fifo_queue *to_enqueue;
1999 int i; 1971 unsigned int i;
2000 int affinity_gpu; 1972 int affinity_gpu;
2001 1973
2002 int max_fifo_len = (aff->relax_max_fifo_len) ? 1974 unsigned int max_fifo_len = (aff->relax_max_fifo_len) ?
2003 sem->m : sem->max_fifo_len; 1975 sem->max_in_fifos : /* allow possibility of all requests on same queue */
1976 sem->max_fifo_len; /* constraint FIFO len */
2004 1977
2005 // if we have no affinity, find the GPU with the least number of users 1978 // if we have no affinity, find the GPU with the least number of users
2006 // with active affinity 1979 // with active affinity
@@ -2037,7 +2010,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2037 min_nr_aff_users = *(shortest->nr_aff_users); 2010 min_nr_aff_users = *(shortest->nr_aff_users);
2038 2011
2039 2012
2040 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", 2013 TRACE_CUR("cs is %llu on queue %d (count = %u): est len = %llu\n",
2041 get_gpu_estimate(t, MIG_LOCAL), 2014 get_gpu_estimate(t, MIG_LOCAL),
2042 ikglp_get_idx(sem, shortest->q), 2015 ikglp_get_idx(sem, shortest->q),
2043 shortest->q->count, 2016 shortest->q->count,
@@ -2119,8 +2092,6 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2119 ikglp_get_idx(sem, sem->shortest_fifo_queue)); 2092 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2120 2093
2121 return to_enqueue; 2094 return to_enqueue;
2122
2123 //return(sem->shortest_fifo_queue);
2124} 2095}
2125 2096
2126 2097
@@ -2334,7 +2305,6 @@ static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
2334 2305
2335 donee = NULL; 2306 donee = NULL;
2336 donee_node = NULL; 2307 donee_node = NULL;
2337 //*dist_from_head = sem->max_fifo_len + 1;
2338 *dist_from_head = IKGLP_INVAL_DISTANCE; 2308 *dist_from_head = IKGLP_INVAL_DISTANCE;
2339 2309
2340 TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq)); 2310 TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
@@ -2630,7 +2600,6 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
2630 // decrement affinity count on old GPU 2600 // decrement affinity count on old GPU
2631 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; 2601 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
2632 --(aff->nr_aff_on_rsrc[aff_rsrc]); 2602 --(aff->nr_aff_on_rsrc[aff_rsrc]);
2633// aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
2634 2603
2635 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { 2604 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
2636 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); 2605 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
@@ -2676,12 +2645,10 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2676 if(last_gpu >= 0) { 2645 if(last_gpu >= 0) {
2677 int old_rsrc = last_gpu - aff->offset; 2646 int old_rsrc = last_gpu - aff->offset;
2678 --(aff->nr_aff_on_rsrc[old_rsrc]); 2647 --(aff->nr_aff_on_rsrc[old_rsrc]);
2679// aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
2680 } 2648 }
2681 2649
2682 // increment affinity count on new GPU 2650 // increment affinity count on new GPU
2683 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); 2651 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
2684// aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
2685 tsk_rt(t)->rsrc_exit_cb_args = aff; 2652 tsk_rt(t)->rsrc_exit_cb_args = aff;
2686 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; 2653 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
2687 } 2654 }
@@ -2751,20 +2718,18 @@ struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* op
2751 2718
2752 2719
2753 2720
2754 2721/*--------------------------------------------------------------------------*/
2755 2722/* SIMPLE LOAD-BALANCING AFFINITY HEURISTIC */
2756 2723/*--------------------------------------------------------------------------*/
2757
2758// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
2759 2724
2760struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) 2725struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2761{ 2726{
2762 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 2727 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2763 int min_count; 2728 unsigned int min_count;
2764 int min_nr_users; 2729 unsigned int min_nr_users;
2765 struct ikglp_queue_info *shortest; 2730 struct ikglp_queue_info *shortest;
2766 struct fifo_queue *to_enqueue; 2731 struct fifo_queue *to_enqueue;
2767 int i; 2732 unsigned int i;
2768 2733
2769 // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n"); 2734 // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
2770 2735
@@ -2772,13 +2737,13 @@ struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, s
2772 min_count = shortest->q->count; 2737 min_count = shortest->q->count;
2773 min_nr_users = *(shortest->nr_cur_users); 2738 min_nr_users = *(shortest->nr_cur_users);
2774 2739
2775 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", 2740 TRACE_CUR("queue %d: waiters = %u, total holders = %u\n",
2776 ikglp_get_idx(sem, shortest->q), 2741 ikglp_get_idx(sem, shortest->q),
2777 shortest->q->count, 2742 shortest->q->count,
2778 min_nr_users); 2743 min_nr_users);
2779 2744
2780 for(i = 1; i < sem->nr_replicas; ++i) { 2745 for(i = 1; i < sem->nr_replicas; ++i) {
2781 int len = aff->q_info[i].q->count; 2746 unsigned int len = aff->q_info[i].q->count;
2782 2747
2783 // queue is smaller, or they're equal and the other has a smaller number 2748 // queue is smaller, or they're equal and the other has a smaller number
2784 // of total users. 2749 // of total users.
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 041561839976..7dd866185623 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -21,7 +21,7 @@ static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
21static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, 21static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
22 struct task_struct* holder) 22 struct task_struct* holder)
23{ 23{
24 int i; 24 unsigned int i;
25 for(i = 0; i < sem->num_resources; ++i) 25 for(i = 0; i < sem->num_resources; ++i)
26 if(sem->queues[i].owner == holder) 26 if(sem->queues[i].owner == holder)
27 return(&sem->queues[i]); 27 return(&sem->queues[i]);
@@ -79,7 +79,7 @@ static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
79{ 79{
80 /* must hold sem->lock */ 80 /* must hold sem->lock */
81 81
82 int i; 82 unsigned int i;
83 83
84 *to_steal = NULL; 84 *to_steal = NULL;
85 *to_steal_from = NULL; 85 *to_steal_from = NULL;
@@ -438,7 +438,7 @@ int kfmlp_close(struct litmus_lock* l)
438 struct kfmlp_queue *my_queue; 438 struct kfmlp_queue *my_queue;
439 unsigned long flags; 439 unsigned long flags;
440 440
441 int owner; 441 unsigned int owner;
442 442
443 spin_lock_irqsave(&sem->lock, flags); 443 spin_lock_irqsave(&sem->lock, flags);
444 444
@@ -465,8 +465,8 @@ void kfmlp_free(struct litmus_lock* l)
465struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) 465struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
466{ 466{
467 struct kfmlp_semaphore* sem; 467 struct kfmlp_semaphore* sem;
468 int num_resources = 0; 468 unsigned int num_resources = 0;
469 int i; 469 unsigned int i;
470 470
471 if(!access_ok(VERIFY_READ, args, sizeof(num_resources))) 471 if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
472 { 472 {
@@ -560,7 +560,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
560 struct kfmlp_affinity* kfmlp_aff; 560 struct kfmlp_affinity* kfmlp_aff;
561 struct gpu_affinity_observer_args aff_args; 561 struct gpu_affinity_observer_args aff_args;
562 struct kfmlp_semaphore* sem; 562 struct kfmlp_semaphore* sem;
563 int i; 563 unsigned int i;
564 unsigned long flags; 564 unsigned long flags;
565 565
566 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { 566 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
@@ -577,14 +577,14 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
577 return(NULL); 577 return(NULL);
578 } 578 }
579 579
580 if((aff_args.nr_simult_users <= 0) || 580 if((aff_args.rho <= 0) ||
581 (sem->num_resources%aff_args.nr_simult_users != 0)) { 581 (sem->num_resources%aff_args.rho != 0)) {
582 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " 582 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
583 "(%d) per replica. #replicas should be evenly divisible " 583 "(%d) per replica. #replicas should be evenly divisible "
584 "by #simult_users.\n", 584 "by #simult_users.\n",
585 sem->litmus_lock.ident, 585 sem->litmus_lock.ident,
586 sem->num_resources, 586 sem->num_resources,
587 aff_args.nr_simult_users); 587 aff_args.rho);
588 return(NULL); 588 return(NULL);
589 } 589 }
590 590
@@ -605,7 +605,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
605 return(NULL); 605 return(NULL);
606 } 606 }
607 607
608 kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL); 608 kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->num_resources / aff_args.rho), GFP_KERNEL);
609 if(!kfmlp_aff->nr_cur_users_on_rsrc) { 609 if(!kfmlp_aff->nr_cur_users_on_rsrc) {
610 kfree(kfmlp_aff->q_info); 610 kfree(kfmlp_aff->q_info);
611 kfree(kfmlp_aff); 611 kfree(kfmlp_aff);
@@ -616,10 +616,10 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
616 616
617 kfmlp_aff->ops = kfmlp_ops; 617 kfmlp_aff->ops = kfmlp_ops;
618 kfmlp_aff->offset = aff_args.replica_to_gpu_offset; 618 kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
619 kfmlp_aff->nr_simult = aff_args.nr_simult_users; 619 kfmlp_aff->nr_simult = aff_args.rho;
620 kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult; 620 kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
621 621
622 memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc)); 622 memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(unsigned int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
623 623
624 for(i = 0; i < sem->num_resources; ++i) { 624 for(i = 0; i < sem->num_resources; ++i) {
625 kfmlp_aff->q_info[i].q = &sem->queues[i]; 625 kfmlp_aff->q_info[i].q = &sem->queues[i];
@@ -669,10 +669,10 @@ struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct
669{ 669{
670 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 670 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
671 lt_t min_len; 671 lt_t min_len;
672 int min_nr_users; 672 unsigned int min_nr_users;
673 struct kfmlp_queue_info *shortest; 673 struct kfmlp_queue_info *shortest;
674 struct kfmlp_queue *to_enqueue; 674 struct kfmlp_queue *to_enqueue;
675 int i; 675 unsigned int i;
676 int affinity_gpu; 676 int affinity_gpu;
677 677
678 // simply pick the shortest queue if, we have no affinity, or we have 678 // simply pick the shortest queue if, we have no affinity, or we have
@@ -893,11 +893,11 @@ struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* op
893struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) 893struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
894{ 894{
895 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 895 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
896 int min_count; 896 unsigned int min_count;
897 int min_nr_users; 897 unsigned int min_nr_users;
898 struct kfmlp_queue_info *shortest; 898 struct kfmlp_queue_info *shortest;
899 struct kfmlp_queue *to_enqueue; 899 struct kfmlp_queue *to_enqueue;
900 int i; 900 unsigned int i;
901 901
902// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); 902// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
903 903
@@ -911,7 +911,7 @@ struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff,
911 min_nr_users); 911 min_nr_users);
912 912
913 for(i = 1; i < sem->num_resources; ++i) { 913 for(i = 1; i < sem->num_resources; ++i) {
914 int len = aff->q_info[i].q->count; 914 unsigned int len = aff->q_info[i].q->count;
915 915
916 // queue is smaller, or they're equal and the other has a smaller number 916 // queue is smaller, or they're equal and the other has a smaller number
917 // of total users. 917 // of total users.
diff --git a/litmus/locking.c b/litmus/locking.c
index eddc67a4d36a..8ba46f85f5c6 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -234,12 +234,12 @@ void print_hp_waiters(struct binheap_node* n, int depth)
234 234
235#ifdef CONFIG_LITMUS_DGL_SUPPORT 235#ifdef CONFIG_LITMUS_DGL_SUPPORT
236 236
237struct prioq_mutex; 237struct litmus_lock* select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
238
239void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
240{ 238{
241 int start = dgl_wait->last_primary; 239 int num_locks = dgl_wait->size;
242 extern void __dump_prioq_lock_info(struct prioq_mutex *mutex); 240 int last = dgl_wait->last_primary;
241 int start;
242 int idx;
243 243
244 /* 244 /*
245 We pick the next lock in reverse order. This causes inheritance propagation 245 We pick the next lock in reverse order. This causes inheritance propagation
@@ -250,55 +250,42 @@ void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lo
250 BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock); 250 BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
251 251
252 // note reverse order 252 // note reverse order
253 for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1; 253 // Try to enable priority on a lock that has an owner.
254 dgl_wait->last_primary != start; 254 idx = start = (last != 0) ? last - 1 : num_locks - 1;
255 dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1) 255 do {
256 { 256 struct litmus_lock *l = dgl_wait->locks[idx];
257 257
258 struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary]; 258 if(!l->ops->is_owner(l, dgl_wait->task) && l->ops->get_owner(l)) {
259 259 dgl_wait->last_primary = idx;
260 if(!l->ops->is_owner(l, dgl_wait->task) && 260 tsk_rt(dgl_wait->task)->blocked_lock = l;
261 l->ops->get_owner(l)) {
262
263 tsk_rt(dgl_wait->task)->blocked_lock =
264 dgl_wait->locks[dgl_wait->last_primary];
265 mb(); 261 mb();
266
267 TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); 262 TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident);
268
269 l->ops->enable_priority(l, dgl_wait); 263 l->ops->enable_priority(l, dgl_wait);
270 264 return(l);
271 return;
272 } 265 }
273 } 266 idx = (idx != 0) ? idx - 1 : num_locks - 1;
267 } while(idx != start);
274 268
275 // There was no one to push on. This can happen if the blocked task is 269 // There was no one to push on. This can happen if the blocked task is
276 // behind a task that is idling a prioq-mutex. 270 // behind a task that is idling a prioq-mutex.
277 271
278 // note reverse order 272 // note reverse order
279 dgl_wait->last_primary = start; 273 idx = (last != 0) ? last - 1 : num_locks - 1;
280 for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1; 274 do {
281 dgl_wait->last_primary != start; 275 struct litmus_lock *l = dgl_wait->locks[idx];
282 dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1)
283 {
284
285 struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary];
286 276
287 if(!l->ops->is_owner(l, dgl_wait->task)) { 277 if(!l->ops->is_owner(l, dgl_wait->task)) {
288 278 dgl_wait->last_primary = idx;
289 tsk_rt(dgl_wait->task)->blocked_lock = 279 tsk_rt(dgl_wait->task)->blocked_lock = l;
290 dgl_wait->locks[dgl_wait->last_primary];
291 mb(); 280 mb();
292
293 TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); 281 TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident);
294
295 l->ops->enable_priority(l, dgl_wait); 282 l->ops->enable_priority(l, dgl_wait);
296 283 return(l);
297 return;
298 } 284 }
299 } 285 idx = (idx != 0) ? idx - 1 : num_locks - 1;
286 } while(idx != start);
300 287
301 BUG(); 288 return(NULL);
302} 289}
303 290
304int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key) 291int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
@@ -333,7 +320,12 @@ struct task_struct* __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
333 return task; 320 return task;
334} 321}
335 322
336void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait) 323void init_dgl_wait_state(dgl_wait_state_t *dgl_wait)
324{
325 memset(dgl_wait, 0, sizeof(dgl_wait_state_t));
326}
327
328void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t *dgl_wait)
337{ 329{
338 init_waitqueue_entry(wq_node, dgl_wait->task); 330 init_waitqueue_entry(wq_node, dgl_wait->task);
339 wq_node->private = dgl_wait; 331 wq_node->private = dgl_wait;
@@ -403,83 +395,62 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
403 TRACE_CUR("Locking DGL with size %d: %s\n", dgl_wait->size, dglstr); 395 TRACE_CUR("Locking DGL with size %d: %s\n", dgl_wait->size, dglstr);
404#endif 396#endif
405 397
406 dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
407
408 BUG_ON(dgl_wait->task != current); 398 BUG_ON(dgl_wait->task != current);
409 399
410 raw_spin_lock_irqsave(dgl_lock, irqflags);
411
412 dgl_wait->nr_remaining = dgl_wait->size; 400 dgl_wait->nr_remaining = dgl_wait->size;
413 401
402 dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
403 raw_spin_lock_irqsave(dgl_lock, irqflags);
404
414 // try to acquire each lock. enqueue (non-blocking) if it is unavailable. 405 // try to acquire each lock. enqueue (non-blocking) if it is unavailable.
415 for(i = 0; i < dgl_wait->size; ++i) { 406 for(i = 0; i < dgl_wait->size; ++i) {
416 struct litmus_lock *l = dgl_wait->locks[i]; 407 struct litmus_lock *tmp = dgl_wait->locks[i];
417 408
418 // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks. 409 // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
419 410
420 if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) { 411 if(tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i])) {
421 --(dgl_wait->nr_remaining); 412 --(dgl_wait->nr_remaining);
422 TRACE_CUR("Acquired lock %d immediatly.\n", l->ident); 413 TRACE_CUR("Acquired lock %d immediatly.\n", tmp->ident);
423 } 414 }
424 } 415 }
425 416
426 if(dgl_wait->nr_remaining == 0) { 417 if(dgl_wait->nr_remaining == 0) {
427 // acquired entire group immediatly 418 // acquired entire group immediatly
428 TRACE_CUR("Acquired all locks in DGL immediatly!\n"); 419 TRACE_CUR("Acquired all locks in DGL immediatly!\n");
420 raw_spin_unlock_irqrestore(dgl_lock, irqflags);
429 } 421 }
430 else { 422 else {
423 struct litmus_lock *first_primary;
431 424
432 TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n", 425 TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
433 dgl_wait->nr_remaining); 426 dgl_wait->nr_remaining);
434 427
435#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) 428 first_primary = select_next_lock(dgl_wait);
436 // KLUDGE: don't count this suspension as time in the critical gpu
437 // critical section
438 if(tsk_rt(dgl_wait->task)->held_gpus) {
439 tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
440 }
441#endif
442
443 // note reverse order. see comments in select_next_lock for reason.
444 for(i = dgl_wait->size - 1; i >= 0; --i) {
445 struct litmus_lock *l = dgl_wait->locks[i];
446 if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe
447
448 TRACE_CUR("Activating priority inheritance on lock %d\n",
449 l->ident);
450
451 TS_DGL_LOCK_SUSPEND;
452
453 l->ops->enable_priority(l, dgl_wait);
454 dgl_wait->last_primary = i;
455 429
456 TRACE_CUR("Suspending for lock %d\n", l->ident); 430 if (!first_primary) {
457 431 BUG();
458 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending 432// TRACE_CUR("We hold all the locks?\n");
433// raw_spin_unlock_irqrestore(dgl_lock, irqflags);
434// goto all_acquired;
435 }
459 436
460 suspend_for_lock(); // suspend!!! 437 TRACE_CUR("Suspending for lock %d\n", first_primary->ident);
461 438
462 TS_DGL_LOCK_RESUME; 439 TS_DGL_LOCK_SUSPEND;
463 440
464 TRACE_CUR("Woken up from DGL suspension.\n"); 441 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending
442 suspend_for_lock();
465 443
466 goto all_acquired; // we should hold all locks when we wake up. 444 TS_DGL_LOCK_RESUME;
467 }
468 }
469 445
470 TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n"); 446 TRACE_CUR("Woken up from DGL suspension.\n");
471 //BUG();
472 } 447 }
473 448
474 raw_spin_unlock_irqrestore(dgl_lock, irqflags);
475
476all_acquired:
477
478 // FOR SANITY CHECK FOR TESTING 449 // FOR SANITY CHECK FOR TESTING
479// for(i = 0; i < dgl_wait->size; ++i) { 450 for(i = 0; i < dgl_wait->size; ++i) {
480// struct litmus_lock *l = dgl_wait->locks[i]; 451 struct litmus_lock *tmp = dgl_wait->locks[i];
481// BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); 452 BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task));
482// } 453 }
483 454
484 TRACE_CUR("Acquired entire DGL\n"); 455 TRACE_CUR("Acquired entire DGL\n");
485 456
@@ -493,7 +464,6 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
493 int i; 464 int i;
494 unsigned long irqflags; //, dummyflags; 465 unsigned long irqflags; //, dummyflags;
495 raw_spinlock_t *dgl_lock; 466 raw_spinlock_t *dgl_lock;
496 struct litmus_lock *l;
497 struct task_struct *t = current; 467 struct task_struct *t = current;
498 468
499#ifdef CONFIG_SCHED_DEBUG_TRACE 469#ifdef CONFIG_SCHED_DEBUG_TRACE
@@ -511,13 +481,19 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
511 481
512 dgl_wait->nr_remaining = dgl_wait->size; 482 dgl_wait->nr_remaining = dgl_wait->size;
513 483
484 /* enqueue for all locks */
514 for(i = 0; i < dgl_wait->size; ++i) { 485 for(i = 0; i < dgl_wait->size; ++i) {
515 struct litmus_lock *l = dgl_wait->locks[i]; 486 /* dgl_lock must only enqueue. cannot set TASK_UNINTERRUPTIBLE!!
516 // this should be a forced enqueue if atomic DGLs are needed. 487 * Note the difference in requirements with do_litmus_dgl_lock().
517 l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i]); 488 */
489 struct litmus_lock *tmp = dgl_wait->locks[i];
490 tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i]);
518 } 491 }
519 492
493 /* now try to take all locks */
520 if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) { 494 if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) {
495 struct litmus_lock *l;
496
521 /* Failed to acquire all locks at once. 497 /* Failed to acquire all locks at once.
522 * Pick a lock to push on and suspend. */ 498 * Pick a lock to push on and suspend. */
523 TRACE_CUR("Could not atomically acquire all locks.\n"); 499 TRACE_CUR("Could not atomically acquire all locks.\n");
@@ -526,26 +502,13 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
526 * __attempt_atomic_dgl_acquire() may actually succeed. */ 502 * __attempt_atomic_dgl_acquire() may actually succeed. */
527 set_task_state(t, TASK_UNINTERRUPTIBLE); 503 set_task_state(t, TASK_UNINTERRUPTIBLE);
528 504
529#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) 505 l = select_next_lock(dgl_wait);
530 // KLUDGE: don't count this suspension as time in the critical gpu
531 // critical section
532 if(tsk_rt(t)->held_gpus) {
533 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
534 }
535#endif
536 506
537 // select a lock to push priority on 507 TRACE_CUR("Suspending for lock %d\n", l->ident);
538 dgl_wait->last_primary = 0; // default
539 select_next_lock(dgl_wait); // may change value of last_primary
540
541 l = dgl_wait->locks[dgl_wait->last_primary];
542 508
543 TS_DGL_LOCK_SUSPEND; 509 TS_DGL_LOCK_SUSPEND;
544 510
545 TRACE_CUR("Suspending for lock %d\n", l->ident);
546
547 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending 511 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending
548
549 suspend_for_lock(); // suspend!!! 512 suspend_for_lock(); // suspend!!!
550 513
551 TS_DGL_LOCK_RESUME; 514 TS_DGL_LOCK_RESUME;
@@ -562,8 +525,8 @@ all_acquired:
562 525
563 // SANITY CHECK FOR TESTING 526 // SANITY CHECK FOR TESTING
564 for(i = 0; i < dgl_wait->size; ++i) { 527 for(i = 0; i < dgl_wait->size; ++i) {
565 struct litmus_lock *l = dgl_wait->locks[i]; 528 struct litmus_lock *tmp = dgl_wait->locks[i];
566 BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); 529 BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task));
567 } 530 }
568 531
569 TRACE_CUR("Acquired entire DGL\n"); 532 TRACE_CUR("Acquired entire DGL\n");
@@ -603,6 +566,8 @@ asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
603 err = sys_litmus_lock(dgl_ods[0]); 566 err = sys_litmus_lock(dgl_ods[0]);
604 } 567 }
605 else { 568 else {
569 init_dgl_wait_state(&dgl_wait_state);
570
606 for(i = 0; i < dgl_size; ++i) { 571 for(i = 0; i < dgl_size; ++i) {
607 struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]); 572 struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
608 if(entry && is_lock(entry)) { 573 if(entry && is_lock(entry)) {