diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2013-02-14 15:35:52 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2013-02-14 15:35:52 -0500 |
commit | e9b88341eb6b9fbe16139796f2f78e1f65793e5a (patch) | |
tree | 92e4a45391094f904c98ea8d4d6765420311029f | |
parent | c063e088be8e1bcbb6a76b8cd087f8dc8b6923b2 (diff) |
Generalize IKGLP implementation
Generalized the IKGLP implementation to support non-optimal
configurations. Parameters allow the IKGLP to be configured
as FIFO queues (aka KFMLP), a single priority queue, or
a hybrid (optimal IKGLP). The maximum number of users within
the FIFO queues is also parameterized, allowing more than 'm'
replica holders to hold replicas concurrently (this breaks
optimality though).
Also fixed a bug in locking.c where DGL prority inheritance
is determined.
-rw-r--r-- | include/litmus/ikglp_lock.h | 31 | ||||
-rw-r--r-- | include/litmus/kfmlp_lock.h | 21 | ||||
-rw-r--r-- | include/litmus/locking.h | 3 | ||||
-rw-r--r-- | include/litmus/rt_param.h | 17 | ||||
-rw-r--r-- | litmus/ikglp_lock.c | 245 | ||||
-rw-r--r-- | litmus/kfmlp_lock.c | 36 | ||||
-rw-r--r-- | litmus/locking.c | 177 |
7 files changed, 237 insertions, 293 deletions
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h index f9178d3dfba9..91a905a5d101 100644 --- a/include/litmus/ikglp_lock.h +++ b/include/litmus/ikglp_lock.h | |||
@@ -58,7 +58,7 @@ struct fifo_queue | |||
58 | ikglp_donee_heap_node_t donee_heap_node; | 58 | ikglp_donee_heap_node_t donee_heap_node; |
59 | 59 | ||
60 | struct task_struct* hp_waiter; | 60 | struct task_struct* hp_waiter; |
61 | int count; /* number of waiters + holder */ | 61 | unsigned int count; /* number of waiters + holder */ |
62 | 62 | ||
63 | struct nested_info nest; | 63 | struct nested_info nest; |
64 | }; | 64 | }; |
@@ -70,14 +70,14 @@ struct ikglp_semaphore | |||
70 | raw_spinlock_t lock; | 70 | raw_spinlock_t lock; |
71 | raw_spinlock_t real_lock; | 71 | raw_spinlock_t real_lock; |
72 | 72 | ||
73 | int nr_replicas; // AKA k | 73 | unsigned int nr_replicas; // AKA k |
74 | int m; | 74 | unsigned int max_fifo_len; // max len of a fifo queue |
75 | 75 | ||
76 | int max_fifo_len; // max len of a fifo queue | 76 | unsigned int max_in_fifos; // AKA m |
77 | int nr_in_fifos; | 77 | unsigned int nr_in_fifos; |
78 | 78 | ||
79 | struct binheap top_m; // min heap, base prio | 79 | struct binheap top_m; // min heap, base prio |
80 | int top_m_size; // number of nodes in top_m | 80 | unsigned int top_m_size; // number of nodes in top_m |
81 | 81 | ||
82 | struct binheap not_top_m; // max heap, base prio | 82 | struct binheap not_top_m; // max heap, base prio |
83 | 83 | ||
@@ -103,7 +103,7 @@ int ikglp_lock(struct litmus_lock* l); | |||
103 | int ikglp_unlock(struct litmus_lock* l); | 103 | int ikglp_unlock(struct litmus_lock* l); |
104 | int ikglp_close(struct litmus_lock* l); | 104 | int ikglp_close(struct litmus_lock* l); |
105 | void ikglp_free(struct litmus_lock* l); | 105 | void ikglp_free(struct litmus_lock* l); |
106 | struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg); | 106 | struct litmus_lock* ikglp_new(unsigned int m, struct litmus_lock_ops*, void* __user arg); |
107 | 107 | ||
108 | 108 | ||
109 | 109 | ||
@@ -113,8 +113,8 @@ struct ikglp_queue_info | |||
113 | { | 113 | { |
114 | struct fifo_queue* q; | 114 | struct fifo_queue* q; |
115 | lt_t estimated_len; | 115 | lt_t estimated_len; |
116 | int *nr_cur_users; | 116 | unsigned int *nr_cur_users; |
117 | int64_t *nr_aff_users; | 117 | unsigned int *nr_aff_users; |
118 | }; | 118 | }; |
119 | 119 | ||
120 | struct ikglp_affinity_ops | 120 | struct ikglp_affinity_ops |
@@ -138,12 +138,13 @@ struct ikglp_affinity | |||
138 | struct affinity_observer obs; | 138 | struct affinity_observer obs; |
139 | struct ikglp_affinity_ops *ops; | 139 | struct ikglp_affinity_ops *ops; |
140 | struct ikglp_queue_info *q_info; | 140 | struct ikglp_queue_info *q_info; |
141 | int *nr_cur_users_on_rsrc; | 141 | unsigned int *nr_cur_users_on_rsrc; |
142 | int64_t *nr_aff_on_rsrc; | 142 | unsigned int *nr_aff_on_rsrc; |
143 | int offset; | 143 | unsigned int offset; |
144 | int nr_simult; | 144 | unsigned int nr_simult; |
145 | int nr_rsrc; | 145 | unsigned int nr_rsrc; |
146 | int relax_max_fifo_len; | 146 | |
147 | int relax_max_fifo_len:1; | ||
147 | }; | 148 | }; |
148 | 149 | ||
149 | static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) | 150 | static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) |
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h index 3609a3a20424..2af953852e02 100644 --- a/include/litmus/kfmlp_lock.h +++ b/include/litmus/kfmlp_lock.h | |||
@@ -16,7 +16,7 @@ struct kfmlp_queue | |||
16 | wait_queue_head_t wait; | 16 | wait_queue_head_t wait; |
17 | struct task_struct* owner; | 17 | struct task_struct* owner; |
18 | struct task_struct* hp_waiter; | 18 | struct task_struct* hp_waiter; |
19 | int count; /* number of waiters + holder */ | 19 | unsigned int count; /* number of waiters + holder */ |
20 | }; | 20 | }; |
21 | 21 | ||
22 | struct kfmlp_semaphore | 22 | struct kfmlp_semaphore |
@@ -25,7 +25,7 @@ struct kfmlp_semaphore | |||
25 | 25 | ||
26 | spinlock_t lock; | 26 | spinlock_t lock; |
27 | 27 | ||
28 | int num_resources; /* aka k */ | 28 | unsigned int num_resources; /* aka k */ |
29 | 29 | ||
30 | struct kfmlp_queue *queues; /* array */ | 30 | struct kfmlp_queue *queues; /* array */ |
31 | struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ | 31 | struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ |
@@ -52,7 +52,7 @@ struct kfmlp_queue_info | |||
52 | { | 52 | { |
53 | struct kfmlp_queue* q; | 53 | struct kfmlp_queue* q; |
54 | lt_t estimated_len; | 54 | lt_t estimated_len; |
55 | int *nr_cur_users; | 55 | unsigned int *nr_cur_users; |
56 | }; | 56 | }; |
57 | 57 | ||
58 | struct kfmlp_affinity_ops | 58 | struct kfmlp_affinity_ops |
@@ -71,10 +71,10 @@ struct kfmlp_affinity | |||
71 | struct affinity_observer obs; | 71 | struct affinity_observer obs; |
72 | struct kfmlp_affinity_ops *ops; | 72 | struct kfmlp_affinity_ops *ops; |
73 | struct kfmlp_queue_info *q_info; | 73 | struct kfmlp_queue_info *q_info; |
74 | int *nr_cur_users_on_rsrc; | 74 | unsigned int *nr_cur_users_on_rsrc; |
75 | int offset; | 75 | unsigned int offset; |
76 | int nr_simult; | 76 | unsigned int nr_simult; |
77 | int nr_rsrc; | 77 | unsigned int nr_rsrc; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) | 80 | static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) |
@@ -86,10 +86,9 @@ int kfmlp_aff_obs_close(struct affinity_observer*); | |||
86 | void kfmlp_aff_obs_free(struct affinity_observer*); | 86 | void kfmlp_aff_obs_free(struct affinity_observer*); |
87 | 87 | ||
88 | #ifdef CONFIG_LITMUS_NVIDIA | 88 | #ifdef CONFIG_LITMUS_NVIDIA |
89 | struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*, | 89 | struct affinity_observer* kfmlp_gpu_aff_obs_new( |
90 | void* __user arg); | 90 | struct affinity_observer_ops*, void* __user arg); |
91 | struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*, | 91 | struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*, void* __user arg); |
92 | void* __user arg); | ||
93 | #endif | 92 | #endif |
94 | 93 | ||
95 | #endif /* end affinity */ | 94 | #endif /* end affinity */ |
diff --git a/include/litmus/locking.h b/include/litmus/locking.h index fc437811d2b6..b9c6a2b1d01e 100644 --- a/include/litmus/locking.h +++ b/include/litmus/locking.h | |||
@@ -66,8 +66,9 @@ typedef struct dgl_wait_state { | |||
66 | } dgl_wait_state_t; | 66 | } dgl_wait_state_t; |
67 | 67 | ||
68 | void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait); | 68 | void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait); |
69 | void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/); | 69 | struct litmus_lock* select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/); |
70 | 70 | ||
71 | void init_dgl_wait_state(dgl_wait_state_t* dgl_wait); | ||
71 | void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait); | 72 | void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait); |
72 | int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key); | 73 | int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key); |
73 | struct task_struct* __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait); | 74 | struct task_struct* __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait); |
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 716fc034c5f4..c4cba8551c47 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -119,11 +119,24 @@ struct affinity_observer_args | |||
119 | struct gpu_affinity_observer_args | 119 | struct gpu_affinity_observer_args |
120 | { | 120 | { |
121 | struct affinity_observer_args obs; | 121 | struct affinity_observer_args obs; |
122 | int replica_to_gpu_offset; | 122 | unsigned int replica_to_gpu_offset; |
123 | int nr_simult_users; | 123 | unsigned int rho; |
124 | int relaxed_rules; | 124 | int relaxed_rules; |
125 | }; | 125 | }; |
126 | 126 | ||
127 | |||
128 | #define IKGLP_M_IN_FIFOS (0u) | ||
129 | #define IKGLP_UNLIMITED_IN_FIFOS (~0u) | ||
130 | #define IKGLP_OPTIMAL_FIFO_LEN (0u) | ||
131 | #define IKGLP_UNLIMITED_FIFO_LEN (~0u) | ||
132 | |||
133 | struct ikglp_args | ||
134 | { | ||
135 | unsigned int nr_replicas; | ||
136 | unsigned int max_in_fifos; | ||
137 | unsigned int max_fifo_len; | ||
138 | }; | ||
139 | |||
127 | /* The definition of the data that is shared between the kernel and real-time | 140 | /* The definition of the data that is shared between the kernel and real-time |
128 | * tasks via a shared page (see litmus/ctrldev.c). | 141 | * tasks via a shared page (see litmus/ctrldev.c). |
129 | * | 142 | * |
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index 3fd760799a75..cab0d7f938f9 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c | |||
@@ -103,8 +103,7 @@ static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue, | |||
103 | struct task_struct *queued, *found = NULL; | 103 | struct task_struct *queued, *found = NULL; |
104 | 104 | ||
105 | list_for_each(pos, &kqueue->wait.task_list) { | 105 | list_for_each(pos, &kqueue->wait.task_list) { |
106 | queued = (struct task_struct*) list_entry(pos, | 106 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, task_list)->private; |
107 | wait_queue_t, task_list)->private; | ||
108 | 107 | ||
109 | /* Compare task prios, find high prio task. */ | 108 | /* Compare task prios, find high prio task. */ |
110 | if(queued != skip && litmus->compare(queued, found)) | 109 | if(queued != skip && litmus->compare(queued, found)) |
@@ -232,22 +231,14 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem, | |||
232 | struct task_struct *t, | 231 | struct task_struct *t, |
233 | ikglp_heap_node_t *node) | 232 | ikglp_heap_node_t *node) |
234 | { | 233 | { |
235 | |||
236 | |||
237 | node->task = t; | 234 | node->task = t; |
238 | INIT_BINHEAP_NODE(&node->node); | 235 | INIT_BINHEAP_NODE(&node->node); |
239 | 236 | ||
240 | if(sem->top_m_size < sem->m) { | 237 | if(sem->top_m_size < sem->max_in_fifos) { |
241 | TRACE_CUR("Trivially adding %s/%d to top-m global list.\n", | 238 | TRACE_CUR("Trivially adding %s/%d to top-m global list.\n", |
242 | t->comm, t->pid); | 239 | t->comm, t->pid); |
243 | // TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); | ||
244 | // print_global_list(sem->top_m.root, 1); | ||
245 | |||
246 | binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); | 240 | binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); |
247 | ++(sem->top_m_size); | 241 | ++(sem->top_m_size); |
248 | |||
249 | // TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); | ||
250 | // print_global_list(sem->top_m.root, 1); | ||
251 | } | 242 | } |
252 | else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { | 243 | else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { |
253 | ikglp_heap_node_t *evicted = | 244 | ikglp_heap_node_t *evicted = |
@@ -257,12 +248,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem, | |||
257 | t->comm, t->pid, | 248 | t->comm, t->pid, |
258 | evicted->task->comm, evicted->task->pid); | 249 | evicted->task->comm, evicted->task->pid); |
259 | 250 | ||
260 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
261 | // print_global_list(sem->not_top_m.root, 1); | ||
262 | // TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); | ||
263 | // print_global_list(sem->top_m.root, 1); | ||
264 | |||
265 | |||
266 | binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node); | 251 | binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node); |
267 | INIT_BINHEAP_NODE(&evicted->node); | 252 | INIT_BINHEAP_NODE(&evicted->node); |
268 | binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node); | 253 | binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node); |
@@ -279,8 +264,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem, | |||
279 | else { | 264 | else { |
280 | TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n", | 265 | TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n", |
281 | t->comm, t->pid); | 266 | t->comm, t->pid); |
282 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
283 | // print_global_list(sem->not_top_m.root, 1); | ||
284 | 267 | ||
285 | binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node); | 268 | binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node); |
286 | 269 | ||
@@ -303,12 +286,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem, | |||
303 | if(binheap_is_in_this_heap(&node->node, &sem->top_m)) { | 286 | if(binheap_is_in_this_heap(&node->node, &sem->top_m)) { |
304 | TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid); | 287 | TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid); |
305 | 288 | ||
306 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
307 | // print_global_list(sem->not_top_m.root, 1); | ||
308 | // TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); | ||
309 | // print_global_list(sem->top_m.root, 1); | ||
310 | |||
311 | |||
312 | binheap_delete(&node->node, &sem->top_m); | 289 | binheap_delete(&node->node, &sem->top_m); |
313 | 290 | ||
314 | if(!binheap_empty(&sem->not_top_m)) { | 291 | if(!binheap_empty(&sem->not_top_m)) { |
@@ -337,8 +314,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem, | |||
337 | } | 314 | } |
338 | else { | 315 | else { |
339 | TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid); | 316 | TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid); |
340 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
341 | // print_global_list(sem->not_top_m.root, 1); | ||
342 | 317 | ||
343 | binheap_delete(&node->node, &sem->not_top_m); | 318 | binheap_delete(&node->node, &sem->not_top_m); |
344 | 319 | ||
@@ -355,10 +330,6 @@ static void ikglp_add_donees(struct ikglp_semaphore *sem, | |||
355 | struct task_struct *t, | 330 | struct task_struct *t, |
356 | ikglp_donee_heap_node_t* node) | 331 | ikglp_donee_heap_node_t* node) |
357 | { | 332 | { |
358 | // TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid); | ||
359 | // TRACE_CUR("donees Before:\n"); | ||
360 | // print_donees(sem, sem->donees.root, 1); | ||
361 | |||
362 | node->task = t; | 333 | node->task = t; |
363 | node->donor_info = NULL; | 334 | node->donor_info = NULL; |
364 | node->fq = fq; | 335 | node->fq = fq; |
@@ -928,7 +899,7 @@ int ikglp_lock(struct litmus_lock* l) | |||
928 | 899 | ||
929 | TRACE_CUR("Requesting a replica from lock %d.\n", l->ident); | 900 | TRACE_CUR("Requesting a replica from lock %d.\n", l->ident); |
930 | 901 | ||
931 | if(sem->nr_in_fifos < sem->m) { | 902 | if(sem->nr_in_fifos < sem->max_in_fifos) { |
932 | // enqueue somwhere | 903 | // enqueue somwhere |
933 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 904 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
934 | fq = (sem->aff_obs) ? | 905 | fq = (sem->aff_obs) ? |
@@ -1272,10 +1243,13 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1272 | donee = t; | 1243 | donee = t; |
1273 | 1244 | ||
1274 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1245 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
1275 | if(sem->aff_obs) | 1246 | if(sem->aff_obs) { |
1276 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1247 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
1277 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) | 1248 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) { |
1278 | fq_of_new_on_fq = fq; /* discard recommendation */ | 1249 | WARN_ON(1); |
1250 | fq_of_new_on_fq = fq; | ||
1251 | } | ||
1252 | } | ||
1279 | else | 1253 | else |
1280 | fq_of_new_on_fq = fq; | 1254 | fq_of_new_on_fq = fq; |
1281 | #else | 1255 | #else |
@@ -1308,10 +1282,13 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1308 | binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); | 1282 | binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); |
1309 | 1283 | ||
1310 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1284 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
1311 | if(sem->aff_obs) | 1285 | if(sem->aff_obs) { |
1312 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1286 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
1313 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) | 1287 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) { |
1314 | fq_of_new_on_fq = fq; /* discard recommendation */ | 1288 | WARN_ON(1); |
1289 | fq_of_new_on_fq = fq; | ||
1290 | } | ||
1291 | } | ||
1315 | else | 1292 | else |
1316 | fq_of_new_on_fq = fq; | 1293 | fq_of_new_on_fq = fq; |
1317 | #else | 1294 | #else |
@@ -1335,10 +1312,13 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1335 | new_on_fq = pq_wait->task; | 1312 | new_on_fq = pq_wait->task; |
1336 | 1313 | ||
1337 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1314 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
1338 | if(sem->aff_obs) | 1315 | if(sem->aff_obs) { |
1339 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1316 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
1340 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) | 1317 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) { |
1341 | fq_of_new_on_fq = fq; /* discard recommendation */ | 1318 | WARN_ON(1); |
1319 | fq_of_new_on_fq = fq; | ||
1320 | } | ||
1321 | } | ||
1342 | else | 1322 | else |
1343 | fq_of_new_on_fq = fq; | 1323 | fq_of_new_on_fq = fq; |
1344 | #else | 1324 | #else |
@@ -1663,26 +1643,44 @@ void ikglp_free(struct litmus_lock* l) | |||
1663 | 1643 | ||
1664 | 1644 | ||
1665 | 1645 | ||
1666 | struct litmus_lock* ikglp_new(int m, | 1646 | struct litmus_lock* ikglp_new(unsigned int m, |
1667 | struct litmus_lock_ops* ops, | 1647 | struct litmus_lock_ops* ops, |
1668 | void* __user arg) | 1648 | void* __user uarg) |
1669 | { | 1649 | { |
1650 | /* TODO: Support trivial token lock, s.t. args.nr_replicas equals some | ||
1651 | * sentinel value, and implement special-case algorithms. There is currently | ||
1652 | * a lot of overhead for a trivial token lock since we allocate O(n)-worth | ||
1653 | * of data; this could be avoided with special-case algorithms. */ | ||
1654 | |||
1670 | struct ikglp_semaphore* sem; | 1655 | struct ikglp_semaphore* sem; |
1671 | int nr_replicas = 0; | 1656 | struct ikglp_args args; |
1672 | int i; | 1657 | unsigned int i; |
1673 | 1658 | ||
1674 | BUG_ON(m <= 0); | 1659 | BUG_ON(m <= 0); |
1675 | 1660 | ||
1676 | if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas))) | 1661 | if(!access_ok(VERIFY_READ, uarg, sizeof(args))) |
1677 | { | 1662 | return(NULL); |
1663 | if(__copy_from_user(&args, uarg, sizeof(args))) | ||
1664 | return(NULL); | ||
1665 | |||
1666 | /* validation */ | ||
1667 | |||
1668 | /* there must be at least one resource */ | ||
1669 | if (args.nr_replicas < 1) { | ||
1670 | printk("Invalid number of replicas.\n"); | ||
1678 | return(NULL); | 1671 | return(NULL); |
1679 | } | 1672 | } |
1680 | if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas))) | 1673 | /* IKGLP_OPTIMAL_FIFO_LEN can only be determined if nr_max_holders |
1681 | { | 1674 | * is IKGLP_M_HOLDERS (number of CPUs) */ |
1675 | if (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN && | ||
1676 | args.max_in_fifos != IKGLP_M_IN_FIFOS) { | ||
1677 | printk("Cannot compute optimal FIFO length if max_in_fifos != IKGLP_M_IN_FIFOS\n"); | ||
1682 | return(NULL); | 1678 | return(NULL); |
1683 | } | 1679 | } |
1684 | if(nr_replicas < 1) | 1680 | if ((args.max_in_fifos != IKGLP_UNLIMITED_IN_FIFOS) && |
1685 | { | 1681 | (args.max_fifo_len != IKGLP_UNLIMITED_FIFO_LEN) && |
1682 | (args.max_in_fifos > args.nr_replicas*args.max_fifo_len)) { | ||
1683 | printk("Not enough total FIFO space for specified max requests in FIFOs.\n"); | ||
1686 | return(NULL); | 1684 | return(NULL); |
1687 | } | 1685 | } |
1688 | 1686 | ||
@@ -1693,7 +1691,7 @@ struct litmus_lock* ikglp_new(int m, | |||
1693 | } | 1691 | } |
1694 | memset(sem, 0, sizeof(*sem)); | 1692 | memset(sem, 0, sizeof(*sem)); |
1695 | 1693 | ||
1696 | sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL); | 1694 | sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*args.nr_replicas, GFP_KERNEL); |
1697 | if(!sem->fifo_queues) | 1695 | if(!sem->fifo_queues) |
1698 | { | 1696 | { |
1699 | kfree(sem); | 1697 | kfree(sem); |
@@ -1712,17 +1710,21 @@ struct litmus_lock* ikglp_new(int m, | |||
1712 | 1710 | ||
1713 | raw_spin_lock_init(&sem->real_lock); | 1711 | raw_spin_lock_init(&sem->real_lock); |
1714 | 1712 | ||
1715 | sem->nr_replicas = nr_replicas; | 1713 | sem->nr_replicas = args.nr_replicas; |
1716 | sem->m = m; | 1714 | sem->max_in_fifos = (args.max_in_fifos == IKGLP_M_IN_FIFOS) ? |
1717 | sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0); | 1715 | m : |
1716 | args.max_in_fifos; | ||
1717 | sem->max_fifo_len = (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN) ? | ||
1718 | (sem->max_in_fifos/args.nr_replicas) + ((sem->max_in_fifos%args.nr_replicas) != 0) : | ||
1719 | args.max_fifo_len; | ||
1718 | sem->nr_in_fifos = 0; | 1720 | sem->nr_in_fifos = 0; |
1719 | 1721 | ||
1720 | TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n", | 1722 | TRACE_CUR("New IKGLP Sem: m = %u, k = %u, max fifo_len = %u\n", |
1721 | sem->m, | 1723 | sem->max_in_fifos, |
1722 | sem->nr_replicas, | 1724 | sem->nr_replicas, |
1723 | sem->max_fifo_len); | 1725 | sem->max_fifo_len); |
1724 | 1726 | ||
1725 | for(i = 0; i < nr_replicas; ++i) | 1727 | for(i = 0; i < args.nr_replicas; ++i) |
1726 | { | 1728 | { |
1727 | struct fifo_queue* q = &(sem->fifo_queues[i]); | 1729 | struct fifo_queue* q = &(sem->fifo_queues[i]); |
1728 | 1730 | ||
@@ -1766,33 +1768,13 @@ struct litmus_lock* ikglp_new(int m, | |||
1766 | 1768 | ||
1767 | 1769 | ||
1768 | 1770 | ||
1771 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
1769 | 1772 | ||
1773 | /****************************************************************************/ | ||
1774 | /* AFFINITY HEURISTICS */ | ||
1775 | /****************************************************************************/ | ||
1770 | 1776 | ||
1771 | 1777 | ||
1772 | |||
1773 | |||
1774 | |||
1775 | |||
1776 | |||
1777 | |||
1778 | |||
1779 | |||
1780 | |||
1781 | |||
1782 | |||
1783 | |||
1784 | |||
1785 | |||
1786 | |||
1787 | |||
1788 | |||
1789 | |||
1790 | |||
1791 | |||
1792 | |||
1793 | |||
1794 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
1795 | |||
1796 | static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) | 1778 | static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) |
1797 | { | 1779 | { |
1798 | int gpu = replica % aff->nr_rsrc; | 1780 | int gpu = replica % aff->nr_rsrc; |
@@ -1856,7 +1838,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1856 | struct ikglp_affinity* ikglp_aff; | 1838 | struct ikglp_affinity* ikglp_aff; |
1857 | struct gpu_affinity_observer_args aff_args; | 1839 | struct gpu_affinity_observer_args aff_args; |
1858 | struct ikglp_semaphore* sem; | 1840 | struct ikglp_semaphore* sem; |
1859 | int i; | 1841 | unsigned int i; |
1860 | unsigned long flags; | 1842 | unsigned long flags; |
1861 | 1843 | ||
1862 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { | 1844 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { |
@@ -1873,23 +1855,17 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1873 | return(NULL); | 1855 | return(NULL); |
1874 | } | 1856 | } |
1875 | 1857 | ||
1876 | if((aff_args.nr_simult_users <= 0) || | 1858 | if((aff_args.rho <= 0) || |
1877 | (sem->nr_replicas%aff_args.nr_simult_users != 0)) { | 1859 | (sem->nr_replicas%aff_args.rho != 0)) { |
1878 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " | 1860 | TRACE_CUR("Lock %d does not support #replicas (%u) for #simult_users " |
1879 | "(%d) per replica. #replicas should be evenly divisible " | 1861 | "(%u) per replica. #replicas should be evenly divisible " |
1880 | "by #simult_users.\n", | 1862 | "by #simult_users.\n", |
1881 | sem->litmus_lock.ident, | 1863 | sem->litmus_lock.ident, |
1882 | sem->nr_replicas, | 1864 | sem->nr_replicas, |
1883 | aff_args.nr_simult_users); | 1865 | aff_args.rho); |
1884 | return(NULL); | 1866 | return(NULL); |
1885 | } | 1867 | } |
1886 | 1868 | ||
1887 | // if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | ||
1888 | // TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", | ||
1889 | // NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | ||
1890 | //// return(NULL); | ||
1891 | // } | ||
1892 | |||
1893 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); | 1869 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); |
1894 | if(!ikglp_aff) { | 1870 | if(!ikglp_aff) { |
1895 | return(NULL); | 1871 | return(NULL); |
@@ -1901,14 +1877,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1901 | return(NULL); | 1877 | return(NULL); |
1902 | } | 1878 | } |
1903 | 1879 | ||
1904 | ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); | 1880 | ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL); |
1905 | if(!ikglp_aff->nr_cur_users_on_rsrc) { | 1881 | if(!ikglp_aff->nr_cur_users_on_rsrc) { |
1906 | kfree(ikglp_aff->q_info); | 1882 | kfree(ikglp_aff->q_info); |
1907 | kfree(ikglp_aff); | 1883 | kfree(ikglp_aff); |
1908 | return(NULL); | 1884 | return(NULL); |
1909 | } | 1885 | } |
1910 | 1886 | ||
1911 | ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); | 1887 | ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL); |
1912 | if(!ikglp_aff->nr_aff_on_rsrc) { | 1888 | if(!ikglp_aff->nr_aff_on_rsrc) { |
1913 | kfree(ikglp_aff->nr_cur_users_on_rsrc); | 1889 | kfree(ikglp_aff->nr_cur_users_on_rsrc); |
1914 | kfree(ikglp_aff->q_info); | 1890 | kfree(ikglp_aff->q_info); |
@@ -1920,7 +1896,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1920 | 1896 | ||
1921 | ikglp_aff->ops = ikglp_ops; | 1897 | ikglp_aff->ops = ikglp_ops; |
1922 | ikglp_aff->offset = aff_args.replica_to_gpu_offset; | 1898 | ikglp_aff->offset = aff_args.replica_to_gpu_offset; |
1923 | ikglp_aff->nr_simult = aff_args.nr_simult_users; | 1899 | ikglp_aff->nr_simult = aff_args.rho; |
1924 | ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; | 1900 | ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; |
1925 | ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0; | 1901 | ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0; |
1926 | 1902 | ||
@@ -1930,7 +1906,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1930 | ikglp_aff->relax_max_fifo_len); | 1906 | ikglp_aff->relax_max_fifo_len); |
1931 | 1907 | ||
1932 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); | 1908 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); |
1933 | memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc)); | 1909 | memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(unsigned int)*(ikglp_aff->nr_rsrc)); |
1934 | 1910 | ||
1935 | for(i = 0; i < sem->nr_replicas; ++i) { | 1911 | for(i = 0; i < sem->nr_replicas; ++i) { |
1936 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; | 1912 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; |
@@ -1950,9 +1926,6 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1950 | return &ikglp_aff->obs; | 1926 | return &ikglp_aff->obs; |
1951 | } | 1927 | } |
1952 | 1928 | ||
1953 | |||
1954 | |||
1955 | |||
1956 | static int gpu_replica_to_resource(struct ikglp_affinity* aff, | 1929 | static int gpu_replica_to_resource(struct ikglp_affinity* aff, |
1957 | struct fifo_queue* fq) { | 1930 | struct fifo_queue* fq) { |
1958 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 1931 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); |
@@ -1960,29 +1933,28 @@ static int gpu_replica_to_resource(struct ikglp_affinity* aff, | |||
1960 | } | 1933 | } |
1961 | 1934 | ||
1962 | 1935 | ||
1963 | // Smart IKGLP Affinity | ||
1964 | 1936 | ||
1965 | //static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff) | 1937 | /*--------------------------------------------------------------------------*/ |
1966 | //{ | 1938 | /* ADVANCED AFFINITY HEURISITICS */ |
1967 | // struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 1939 | /* */ |
1968 | // struct ikglp_queue_info *shortest = &aff->q_info[0]; | 1940 | /* These heuristics estimate FIFO length wait times and try to enqueue */ |
1969 | // int i; | 1941 | /* tasks into the shortest queues. When two queues are equivlenet, the GPU */ |
1970 | // | 1942 | /* that maintains affinity is selected. When a task has no affinity, the */ |
1971 | // for(i = 1; i < sem->nr_replicas; ++i) { | 1943 | /* heuristic tries to get the GPU with the fewest number of other tasks */ |
1972 | // if(aff->q_info[i].estimated_len < shortest->estimated_len) { | 1944 | /* with affinity on that GPU. */ |
1973 | // shortest = &aff->q_info[i]; | 1945 | /* */ |
1974 | // } | 1946 | /* Heuristics to explore in the future: */ |
1975 | // } | 1947 | /* - Utilization */ |
1976 | // | 1948 | /* - Longest non-preemptive section */ |
1977 | // return(shortest); | 1949 | /* - Criticality */ |
1978 | //} | 1950 | /* - Task period */ |
1951 | /*--------------------------------------------------------------------------*/ | ||
1979 | 1952 | ||
1980 | struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) | 1953 | struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) |
1981 | { | 1954 | { |
1982 | // advise_enqueue must be smart as not not break IKGLP rules: | 1955 | // advise_enqueue must be smart as not not break IKGLP rules: |
1983 | // * No queue can be greater than ceil(m/k) in length. We may return | 1956 | // * No queue can be greater than ceil(m/k) in length, unless |
1984 | // such a queue, but IKGLP will be smart enough as to send requests | 1957 | // 'relax_max_fifo_len' is asserted |
1985 | // to donors or PQ. | ||
1986 | // * Cannot let a queue idle if there exist waiting PQ/donors | 1958 | // * Cannot let a queue idle if there exist waiting PQ/donors |
1987 | // -- needed to guarantee parallel progress of waiters. | 1959 | // -- needed to guarantee parallel progress of waiters. |
1988 | // | 1960 | // |
@@ -1993,14 +1965,15 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
1993 | 1965 | ||
1994 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 1966 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); |
1995 | lt_t min_len; | 1967 | lt_t min_len; |
1996 | int min_nr_users, min_nr_aff_users; | 1968 | unsigned int min_nr_users, min_nr_aff_users; |
1997 | struct ikglp_queue_info *shortest, *aff_queue; | 1969 | struct ikglp_queue_info *shortest, *aff_queue; |
1998 | struct fifo_queue *to_enqueue; | 1970 | struct fifo_queue *to_enqueue; |
1999 | int i; | 1971 | unsigned int i; |
2000 | int affinity_gpu; | 1972 | int affinity_gpu; |
2001 | 1973 | ||
2002 | int max_fifo_len = (aff->relax_max_fifo_len) ? | 1974 | unsigned int max_fifo_len = (aff->relax_max_fifo_len) ? |
2003 | sem->m : sem->max_fifo_len; | 1975 | sem->max_in_fifos : /* allow possibility of all requests on same queue */ |
1976 | sem->max_fifo_len; /* constraint FIFO len */ | ||
2004 | 1977 | ||
2005 | // if we have no affinity, find the GPU with the least number of users | 1978 | // if we have no affinity, find the GPU with the least number of users |
2006 | // with active affinity | 1979 | // with active affinity |
@@ -2037,7 +2010,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2037 | min_nr_aff_users = *(shortest->nr_aff_users); | 2010 | min_nr_aff_users = *(shortest->nr_aff_users); |
2038 | 2011 | ||
2039 | 2012 | ||
2040 | TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", | 2013 | TRACE_CUR("cs is %llu on queue %d (count = %u): est len = %llu\n", |
2041 | get_gpu_estimate(t, MIG_LOCAL), | 2014 | get_gpu_estimate(t, MIG_LOCAL), |
2042 | ikglp_get_idx(sem, shortest->q), | 2015 | ikglp_get_idx(sem, shortest->q), |
2043 | shortest->q->count, | 2016 | shortest->q->count, |
@@ -2119,8 +2092,6 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2119 | ikglp_get_idx(sem, sem->shortest_fifo_queue)); | 2092 | ikglp_get_idx(sem, sem->shortest_fifo_queue)); |
2120 | 2093 | ||
2121 | return to_enqueue; | 2094 | return to_enqueue; |
2122 | |||
2123 | //return(sem->shortest_fifo_queue); | ||
2124 | } | 2095 | } |
2125 | 2096 | ||
2126 | 2097 | ||
@@ -2334,7 +2305,6 @@ static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff, | |||
2334 | 2305 | ||
2335 | donee = NULL; | 2306 | donee = NULL; |
2336 | donee_node = NULL; | 2307 | donee_node = NULL; |
2337 | //*dist_from_head = sem->max_fifo_len + 1; | ||
2338 | *dist_from_head = IKGLP_INVAL_DISTANCE; | 2308 | *dist_from_head = IKGLP_INVAL_DISTANCE; |
2339 | 2309 | ||
2340 | TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq)); | 2310 | TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq)); |
@@ -2630,7 +2600,6 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t) | |||
2630 | // decrement affinity count on old GPU | 2600 | // decrement affinity count on old GPU |
2631 | aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; | 2601 | aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; |
2632 | --(aff->nr_aff_on_rsrc[aff_rsrc]); | 2602 | --(aff->nr_aff_on_rsrc[aff_rsrc]); |
2633 | // aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t); | ||
2634 | 2603 | ||
2635 | if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { | 2604 | if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { |
2636 | WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); | 2605 | WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); |
@@ -2676,12 +2645,10 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, | |||
2676 | if(last_gpu >= 0) { | 2645 | if(last_gpu >= 0) { |
2677 | int old_rsrc = last_gpu - aff->offset; | 2646 | int old_rsrc = last_gpu - aff->offset; |
2678 | --(aff->nr_aff_on_rsrc[old_rsrc]); | 2647 | --(aff->nr_aff_on_rsrc[old_rsrc]); |
2679 | // aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t)); | ||
2680 | } | 2648 | } |
2681 | 2649 | ||
2682 | // increment affinity count on new GPU | 2650 | // increment affinity count on new GPU |
2683 | ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); | 2651 | ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); |
2684 | // aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t)); | ||
2685 | tsk_rt(t)->rsrc_exit_cb_args = aff; | 2652 | tsk_rt(t)->rsrc_exit_cb_args = aff; |
2686 | tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; | 2653 | tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; |
2687 | } | 2654 | } |
@@ -2751,20 +2718,18 @@ struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* op | |||
2751 | 2718 | ||
2752 | 2719 | ||
2753 | 2720 | ||
2754 | 2721 | /*--------------------------------------------------------------------------*/ | |
2755 | 2722 | /* SIMPLE LOAD-BALANCING AFFINITY HEURISTIC */ | |
2756 | 2723 | /*--------------------------------------------------------------------------*/ | |
2757 | |||
2758 | // Simple ikglp Affinity (standard ikglp with auto-gpu registration) | ||
2759 | 2724 | ||
2760 | struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) | 2725 | struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) |
2761 | { | 2726 | { |
2762 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 2727 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); |
2763 | int min_count; | 2728 | unsigned int min_count; |
2764 | int min_nr_users; | 2729 | unsigned int min_nr_users; |
2765 | struct ikglp_queue_info *shortest; | 2730 | struct ikglp_queue_info *shortest; |
2766 | struct fifo_queue *to_enqueue; | 2731 | struct fifo_queue *to_enqueue; |
2767 | int i; | 2732 | unsigned int i; |
2768 | 2733 | ||
2769 | // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n"); | 2734 | // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n"); |
2770 | 2735 | ||
@@ -2772,13 +2737,13 @@ struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, s | |||
2772 | min_count = shortest->q->count; | 2737 | min_count = shortest->q->count; |
2773 | min_nr_users = *(shortest->nr_cur_users); | 2738 | min_nr_users = *(shortest->nr_cur_users); |
2774 | 2739 | ||
2775 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | 2740 | TRACE_CUR("queue %d: waiters = %u, total holders = %u\n", |
2776 | ikglp_get_idx(sem, shortest->q), | 2741 | ikglp_get_idx(sem, shortest->q), |
2777 | shortest->q->count, | 2742 | shortest->q->count, |
2778 | min_nr_users); | 2743 | min_nr_users); |
2779 | 2744 | ||
2780 | for(i = 1; i < sem->nr_replicas; ++i) { | 2745 | for(i = 1; i < sem->nr_replicas; ++i) { |
2781 | int len = aff->q_info[i].q->count; | 2746 | unsigned int len = aff->q_info[i].q->count; |
2782 | 2747 | ||
2783 | // queue is smaller, or they're equal and the other has a smaller number | 2748 | // queue is smaller, or they're equal and the other has a smaller number |
2784 | // of total users. | 2749 | // of total users. |
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c index 041561839976..7dd866185623 100644 --- a/litmus/kfmlp_lock.c +++ b/litmus/kfmlp_lock.c | |||
@@ -21,7 +21,7 @@ static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, | |||
21 | static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, | 21 | static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, |
22 | struct task_struct* holder) | 22 | struct task_struct* holder) |
23 | { | 23 | { |
24 | int i; | 24 | unsigned int i; |
25 | for(i = 0; i < sem->num_resources; ++i) | 25 | for(i = 0; i < sem->num_resources; ++i) |
26 | if(sem->queues[i].owner == holder) | 26 | if(sem->queues[i].owner == holder) |
27 | return(&sem->queues[i]); | 27 | return(&sem->queues[i]); |
@@ -79,7 +79,7 @@ static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, | |||
79 | { | 79 | { |
80 | /* must hold sem->lock */ | 80 | /* must hold sem->lock */ |
81 | 81 | ||
82 | int i; | 82 | unsigned int i; |
83 | 83 | ||
84 | *to_steal = NULL; | 84 | *to_steal = NULL; |
85 | *to_steal_from = NULL; | 85 | *to_steal_from = NULL; |
@@ -438,7 +438,7 @@ int kfmlp_close(struct litmus_lock* l) | |||
438 | struct kfmlp_queue *my_queue; | 438 | struct kfmlp_queue *my_queue; |
439 | unsigned long flags; | 439 | unsigned long flags; |
440 | 440 | ||
441 | int owner; | 441 | unsigned int owner; |
442 | 442 | ||
443 | spin_lock_irqsave(&sem->lock, flags); | 443 | spin_lock_irqsave(&sem->lock, flags); |
444 | 444 | ||
@@ -465,8 +465,8 @@ void kfmlp_free(struct litmus_lock* l) | |||
465 | struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) | 465 | struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) |
466 | { | 466 | { |
467 | struct kfmlp_semaphore* sem; | 467 | struct kfmlp_semaphore* sem; |
468 | int num_resources = 0; | 468 | unsigned int num_resources = 0; |
469 | int i; | 469 | unsigned int i; |
470 | 470 | ||
471 | if(!access_ok(VERIFY_READ, args, sizeof(num_resources))) | 471 | if(!access_ok(VERIFY_READ, args, sizeof(num_resources))) |
472 | { | 472 | { |
@@ -560,7 +560,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
560 | struct kfmlp_affinity* kfmlp_aff; | 560 | struct kfmlp_affinity* kfmlp_aff; |
561 | struct gpu_affinity_observer_args aff_args; | 561 | struct gpu_affinity_observer_args aff_args; |
562 | struct kfmlp_semaphore* sem; | 562 | struct kfmlp_semaphore* sem; |
563 | int i; | 563 | unsigned int i; |
564 | unsigned long flags; | 564 | unsigned long flags; |
565 | 565 | ||
566 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { | 566 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { |
@@ -577,14 +577,14 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
577 | return(NULL); | 577 | return(NULL); |
578 | } | 578 | } |
579 | 579 | ||
580 | if((aff_args.nr_simult_users <= 0) || | 580 | if((aff_args.rho <= 0) || |
581 | (sem->num_resources%aff_args.nr_simult_users != 0)) { | 581 | (sem->num_resources%aff_args.rho != 0)) { |
582 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " | 582 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " |
583 | "(%d) per replica. #replicas should be evenly divisible " | 583 | "(%d) per replica. #replicas should be evenly divisible " |
584 | "by #simult_users.\n", | 584 | "by #simult_users.\n", |
585 | sem->litmus_lock.ident, | 585 | sem->litmus_lock.ident, |
586 | sem->num_resources, | 586 | sem->num_resources, |
587 | aff_args.nr_simult_users); | 587 | aff_args.rho); |
588 | return(NULL); | 588 | return(NULL); |
589 | } | 589 | } |
590 | 590 | ||
@@ -605,7 +605,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
605 | return(NULL); | 605 | return(NULL); |
606 | } | 606 | } |
607 | 607 | ||
608 | kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL); | 608 | kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->num_resources / aff_args.rho), GFP_KERNEL); |
609 | if(!kfmlp_aff->nr_cur_users_on_rsrc) { | 609 | if(!kfmlp_aff->nr_cur_users_on_rsrc) { |
610 | kfree(kfmlp_aff->q_info); | 610 | kfree(kfmlp_aff->q_info); |
611 | kfree(kfmlp_aff); | 611 | kfree(kfmlp_aff); |
@@ -616,10 +616,10 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
616 | 616 | ||
617 | kfmlp_aff->ops = kfmlp_ops; | 617 | kfmlp_aff->ops = kfmlp_ops; |
618 | kfmlp_aff->offset = aff_args.replica_to_gpu_offset; | 618 | kfmlp_aff->offset = aff_args.replica_to_gpu_offset; |
619 | kfmlp_aff->nr_simult = aff_args.nr_simult_users; | 619 | kfmlp_aff->nr_simult = aff_args.rho; |
620 | kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult; | 620 | kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult; |
621 | 621 | ||
622 | memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc)); | 622 | memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(unsigned int)*(sem->num_resources / kfmlp_aff->nr_rsrc)); |
623 | 623 | ||
624 | for(i = 0; i < sem->num_resources; ++i) { | 624 | for(i = 0; i < sem->num_resources; ++i) { |
625 | kfmlp_aff->q_info[i].q = &sem->queues[i]; | 625 | kfmlp_aff->q_info[i].q = &sem->queues[i]; |
@@ -669,10 +669,10 @@ struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct | |||
669 | { | 669 | { |
670 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 670 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
671 | lt_t min_len; | 671 | lt_t min_len; |
672 | int min_nr_users; | 672 | unsigned int min_nr_users; |
673 | struct kfmlp_queue_info *shortest; | 673 | struct kfmlp_queue_info *shortest; |
674 | struct kfmlp_queue *to_enqueue; | 674 | struct kfmlp_queue *to_enqueue; |
675 | int i; | 675 | unsigned int i; |
676 | int affinity_gpu; | 676 | int affinity_gpu; |
677 | 677 | ||
678 | // simply pick the shortest queue if, we have no affinity, or we have | 678 | // simply pick the shortest queue if, we have no affinity, or we have |
@@ -893,11 +893,11 @@ struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* op | |||
893 | struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) | 893 | struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) |
894 | { | 894 | { |
895 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 895 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
896 | int min_count; | 896 | unsigned int min_count; |
897 | int min_nr_users; | 897 | unsigned int min_nr_users; |
898 | struct kfmlp_queue_info *shortest; | 898 | struct kfmlp_queue_info *shortest; |
899 | struct kfmlp_queue *to_enqueue; | 899 | struct kfmlp_queue *to_enqueue; |
900 | int i; | 900 | unsigned int i; |
901 | 901 | ||
902 | // TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); | 902 | // TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); |
903 | 903 | ||
@@ -911,7 +911,7 @@ struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, | |||
911 | min_nr_users); | 911 | min_nr_users); |
912 | 912 | ||
913 | for(i = 1; i < sem->num_resources; ++i) { | 913 | for(i = 1; i < sem->num_resources; ++i) { |
914 | int len = aff->q_info[i].q->count; | 914 | unsigned int len = aff->q_info[i].q->count; |
915 | 915 | ||
916 | // queue is smaller, or they're equal and the other has a smaller number | 916 | // queue is smaller, or they're equal and the other has a smaller number |
917 | // of total users. | 917 | // of total users. |
diff --git a/litmus/locking.c b/litmus/locking.c index eddc67a4d36a..8ba46f85f5c6 100644 --- a/litmus/locking.c +++ b/litmus/locking.c | |||
@@ -234,12 +234,12 @@ void print_hp_waiters(struct binheap_node* n, int depth) | |||
234 | 234 | ||
235 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | 235 | #ifdef CONFIG_LITMUS_DGL_SUPPORT |
236 | 236 | ||
237 | struct prioq_mutex; | 237 | struct litmus_lock* select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/) |
238 | |||
239 | void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/) | ||
240 | { | 238 | { |
241 | int start = dgl_wait->last_primary; | 239 | int num_locks = dgl_wait->size; |
242 | extern void __dump_prioq_lock_info(struct prioq_mutex *mutex); | 240 | int last = dgl_wait->last_primary; |
241 | int start; | ||
242 | int idx; | ||
243 | 243 | ||
244 | /* | 244 | /* |
245 | We pick the next lock in reverse order. This causes inheritance propagation | 245 | We pick the next lock in reverse order. This causes inheritance propagation |
@@ -250,55 +250,42 @@ void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lo | |||
250 | BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock); | 250 | BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock); |
251 | 251 | ||
252 | // note reverse order | 252 | // note reverse order |
253 | for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1; | 253 | // Try to enable priority on a lock that has an owner. |
254 | dgl_wait->last_primary != start; | 254 | idx = start = (last != 0) ? last - 1 : num_locks - 1; |
255 | dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1) | 255 | do { |
256 | { | 256 | struct litmus_lock *l = dgl_wait->locks[idx]; |
257 | 257 | ||
258 | struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary]; | 258 | if(!l->ops->is_owner(l, dgl_wait->task) && l->ops->get_owner(l)) { |
259 | 259 | dgl_wait->last_primary = idx; | |
260 | if(!l->ops->is_owner(l, dgl_wait->task) && | 260 | tsk_rt(dgl_wait->task)->blocked_lock = l; |
261 | l->ops->get_owner(l)) { | ||
262 | |||
263 | tsk_rt(dgl_wait->task)->blocked_lock = | ||
264 | dgl_wait->locks[dgl_wait->last_primary]; | ||
265 | mb(); | 261 | mb(); |
266 | |||
267 | TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); | 262 | TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); |
268 | |||
269 | l->ops->enable_priority(l, dgl_wait); | 263 | l->ops->enable_priority(l, dgl_wait); |
270 | 264 | return(l); | |
271 | return; | ||
272 | } | 265 | } |
273 | } | 266 | idx = (idx != 0) ? idx - 1 : num_locks - 1; |
267 | } while(idx != start); | ||
274 | 268 | ||
275 | // There was no one to push on. This can happen if the blocked task is | 269 | // There was no one to push on. This can happen if the blocked task is |
276 | // behind a task that is idling a prioq-mutex. | 270 | // behind a task that is idling a prioq-mutex. |
277 | 271 | ||
278 | // note reverse order | 272 | // note reverse order |
279 | dgl_wait->last_primary = start; | 273 | idx = (last != 0) ? last - 1 : num_locks - 1; |
280 | for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1; | 274 | do { |
281 | dgl_wait->last_primary != start; | 275 | struct litmus_lock *l = dgl_wait->locks[idx]; |
282 | dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1) | ||
283 | { | ||
284 | |||
285 | struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary]; | ||
286 | 276 | ||
287 | if(!l->ops->is_owner(l, dgl_wait->task)) { | 277 | if(!l->ops->is_owner(l, dgl_wait->task)) { |
288 | 278 | dgl_wait->last_primary = idx; | |
289 | tsk_rt(dgl_wait->task)->blocked_lock = | 279 | tsk_rt(dgl_wait->task)->blocked_lock = l; |
290 | dgl_wait->locks[dgl_wait->last_primary]; | ||
291 | mb(); | 280 | mb(); |
292 | |||
293 | TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); | 281 | TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); |
294 | |||
295 | l->ops->enable_priority(l, dgl_wait); | 282 | l->ops->enable_priority(l, dgl_wait); |
296 | 283 | return(l); | |
297 | return; | ||
298 | } | 284 | } |
299 | } | 285 | idx = (idx != 0) ? idx - 1 : num_locks - 1; |
286 | } while(idx != start); | ||
300 | 287 | ||
301 | BUG(); | 288 | return(NULL); |
302 | } | 289 | } |
303 | 290 | ||
304 | int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key) | 291 | int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key) |
@@ -333,7 +320,12 @@ struct task_struct* __waitqueue_dgl_remove_first(wait_queue_head_t *wq, | |||
333 | return task; | 320 | return task; |
334 | } | 321 | } |
335 | 322 | ||
336 | void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait) | 323 | void init_dgl_wait_state(dgl_wait_state_t *dgl_wait) |
324 | { | ||
325 | memset(dgl_wait, 0, sizeof(dgl_wait_state_t)); | ||
326 | } | ||
327 | |||
328 | void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t *dgl_wait) | ||
337 | { | 329 | { |
338 | init_waitqueue_entry(wq_node, dgl_wait->task); | 330 | init_waitqueue_entry(wq_node, dgl_wait->task); |
339 | wq_node->private = dgl_wait; | 331 | wq_node->private = dgl_wait; |
@@ -403,83 +395,62 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait) | |||
403 | TRACE_CUR("Locking DGL with size %d: %s\n", dgl_wait->size, dglstr); | 395 | TRACE_CUR("Locking DGL with size %d: %s\n", dgl_wait->size, dglstr); |
404 | #endif | 396 | #endif |
405 | 397 | ||
406 | dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task); | ||
407 | |||
408 | BUG_ON(dgl_wait->task != current); | 398 | BUG_ON(dgl_wait->task != current); |
409 | 399 | ||
410 | raw_spin_lock_irqsave(dgl_lock, irqflags); | ||
411 | |||
412 | dgl_wait->nr_remaining = dgl_wait->size; | 400 | dgl_wait->nr_remaining = dgl_wait->size; |
413 | 401 | ||
402 | dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task); | ||
403 | raw_spin_lock_irqsave(dgl_lock, irqflags); | ||
404 | |||
414 | // try to acquire each lock. enqueue (non-blocking) if it is unavailable. | 405 | // try to acquire each lock. enqueue (non-blocking) if it is unavailable. |
415 | for(i = 0; i < dgl_wait->size; ++i) { | 406 | for(i = 0; i < dgl_wait->size; ++i) { |
416 | struct litmus_lock *l = dgl_wait->locks[i]; | 407 | struct litmus_lock *tmp = dgl_wait->locks[i]; |
417 | 408 | ||
418 | // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks. | 409 | // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks. |
419 | 410 | ||
420 | if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) { | 411 | if(tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i])) { |
421 | --(dgl_wait->nr_remaining); | 412 | --(dgl_wait->nr_remaining); |
422 | TRACE_CUR("Acquired lock %d immediatly.\n", l->ident); | 413 | TRACE_CUR("Acquired lock %d immediatly.\n", tmp->ident); |
423 | } | 414 | } |
424 | } | 415 | } |
425 | 416 | ||
426 | if(dgl_wait->nr_remaining == 0) { | 417 | if(dgl_wait->nr_remaining == 0) { |
427 | // acquired entire group immediatly | 418 | // acquired entire group immediatly |
428 | TRACE_CUR("Acquired all locks in DGL immediatly!\n"); | 419 | TRACE_CUR("Acquired all locks in DGL immediatly!\n"); |
420 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); | ||
429 | } | 421 | } |
430 | else { | 422 | else { |
423 | struct litmus_lock *first_primary; | ||
431 | 424 | ||
432 | TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n", | 425 | TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n", |
433 | dgl_wait->nr_remaining); | 426 | dgl_wait->nr_remaining); |
434 | 427 | ||
435 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | 428 | first_primary = select_next_lock(dgl_wait); |
436 | // KLUDGE: don't count this suspension as time in the critical gpu | ||
437 | // critical section | ||
438 | if(tsk_rt(dgl_wait->task)->held_gpus) { | ||
439 | tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1; | ||
440 | } | ||
441 | #endif | ||
442 | |||
443 | // note reverse order. see comments in select_next_lock for reason. | ||
444 | for(i = dgl_wait->size - 1; i >= 0; --i) { | ||
445 | struct litmus_lock *l = dgl_wait->locks[i]; | ||
446 | if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe | ||
447 | |||
448 | TRACE_CUR("Activating priority inheritance on lock %d\n", | ||
449 | l->ident); | ||
450 | |||
451 | TS_DGL_LOCK_SUSPEND; | ||
452 | |||
453 | l->ops->enable_priority(l, dgl_wait); | ||
454 | dgl_wait->last_primary = i; | ||
455 | 429 | ||
456 | TRACE_CUR("Suspending for lock %d\n", l->ident); | 430 | if (!first_primary) { |
457 | 431 | BUG(); | |
458 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending | 432 | // TRACE_CUR("We hold all the locks?\n"); |
433 | // raw_spin_unlock_irqrestore(dgl_lock, irqflags); | ||
434 | // goto all_acquired; | ||
435 | } | ||
459 | 436 | ||
460 | suspend_for_lock(); // suspend!!! | 437 | TRACE_CUR("Suspending for lock %d\n", first_primary->ident); |
461 | 438 | ||
462 | TS_DGL_LOCK_RESUME; | 439 | TS_DGL_LOCK_SUSPEND; |
463 | 440 | ||
464 | TRACE_CUR("Woken up from DGL suspension.\n"); | 441 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending |
442 | suspend_for_lock(); | ||
465 | 443 | ||
466 | goto all_acquired; // we should hold all locks when we wake up. | 444 | TS_DGL_LOCK_RESUME; |
467 | } | ||
468 | } | ||
469 | 445 | ||
470 | TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n"); | 446 | TRACE_CUR("Woken up from DGL suspension.\n"); |
471 | //BUG(); | ||
472 | } | 447 | } |
473 | 448 | ||
474 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); | ||
475 | |||
476 | all_acquired: | ||
477 | |||
478 | // FOR SANITY CHECK FOR TESTING | 449 | // FOR SANITY CHECK FOR TESTING |
479 | // for(i = 0; i < dgl_wait->size; ++i) { | 450 | for(i = 0; i < dgl_wait->size; ++i) { |
480 | // struct litmus_lock *l = dgl_wait->locks[i]; | 451 | struct litmus_lock *tmp = dgl_wait->locks[i]; |
481 | // BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); | 452 | BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task)); |
482 | // } | 453 | } |
483 | 454 | ||
484 | TRACE_CUR("Acquired entire DGL\n"); | 455 | TRACE_CUR("Acquired entire DGL\n"); |
485 | 456 | ||
@@ -493,7 +464,6 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait) | |||
493 | int i; | 464 | int i; |
494 | unsigned long irqflags; //, dummyflags; | 465 | unsigned long irqflags; //, dummyflags; |
495 | raw_spinlock_t *dgl_lock; | 466 | raw_spinlock_t *dgl_lock; |
496 | struct litmus_lock *l; | ||
497 | struct task_struct *t = current; | 467 | struct task_struct *t = current; |
498 | 468 | ||
499 | #ifdef CONFIG_SCHED_DEBUG_TRACE | 469 | #ifdef CONFIG_SCHED_DEBUG_TRACE |
@@ -511,13 +481,19 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait) | |||
511 | 481 | ||
512 | dgl_wait->nr_remaining = dgl_wait->size; | 482 | dgl_wait->nr_remaining = dgl_wait->size; |
513 | 483 | ||
484 | /* enqueue for all locks */ | ||
514 | for(i = 0; i < dgl_wait->size; ++i) { | 485 | for(i = 0; i < dgl_wait->size; ++i) { |
515 | struct litmus_lock *l = dgl_wait->locks[i]; | 486 | /* dgl_lock must only enqueue. cannot set TASK_UNINTERRUPTIBLE!! |
516 | // this should be a forced enqueue if atomic DGLs are needed. | 487 | * Note the difference in requirements with do_litmus_dgl_lock(). |
517 | l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i]); | 488 | */ |
489 | struct litmus_lock *tmp = dgl_wait->locks[i]; | ||
490 | tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i]); | ||
518 | } | 491 | } |
519 | 492 | ||
493 | /* now try to take all locks */ | ||
520 | if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) { | 494 | if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) { |
495 | struct litmus_lock *l; | ||
496 | |||
521 | /* Failed to acquire all locks at once. | 497 | /* Failed to acquire all locks at once. |
522 | * Pick a lock to push on and suspend. */ | 498 | * Pick a lock to push on and suspend. */ |
523 | TRACE_CUR("Could not atomically acquire all locks.\n"); | 499 | TRACE_CUR("Could not atomically acquire all locks.\n"); |
@@ -526,26 +502,13 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait) | |||
526 | * __attempt_atomic_dgl_acquire() may actually succeed. */ | 502 | * __attempt_atomic_dgl_acquire() may actually succeed. */ |
527 | set_task_state(t, TASK_UNINTERRUPTIBLE); | 503 | set_task_state(t, TASK_UNINTERRUPTIBLE); |
528 | 504 | ||
529 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | 505 | l = select_next_lock(dgl_wait); |
530 | // KLUDGE: don't count this suspension as time in the critical gpu | ||
531 | // critical section | ||
532 | if(tsk_rt(t)->held_gpus) { | ||
533 | tsk_rt(t)->suspend_gpu_tracker_on_block = 1; | ||
534 | } | ||
535 | #endif | ||
536 | 506 | ||
537 | // select a lock to push priority on | 507 | TRACE_CUR("Suspending for lock %d\n", l->ident); |
538 | dgl_wait->last_primary = 0; // default | ||
539 | select_next_lock(dgl_wait); // may change value of last_primary | ||
540 | |||
541 | l = dgl_wait->locks[dgl_wait->last_primary]; | ||
542 | 508 | ||
543 | TS_DGL_LOCK_SUSPEND; | 509 | TS_DGL_LOCK_SUSPEND; |
544 | 510 | ||
545 | TRACE_CUR("Suspending for lock %d\n", l->ident); | ||
546 | |||
547 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending | 511 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending |
548 | |||
549 | suspend_for_lock(); // suspend!!! | 512 | suspend_for_lock(); // suspend!!! |
550 | 513 | ||
551 | TS_DGL_LOCK_RESUME; | 514 | TS_DGL_LOCK_RESUME; |
@@ -562,8 +525,8 @@ all_acquired: | |||
562 | 525 | ||
563 | // SANITY CHECK FOR TESTING | 526 | // SANITY CHECK FOR TESTING |
564 | for(i = 0; i < dgl_wait->size; ++i) { | 527 | for(i = 0; i < dgl_wait->size; ++i) { |
565 | struct litmus_lock *l = dgl_wait->locks[i]; | 528 | struct litmus_lock *tmp = dgl_wait->locks[i]; |
566 | BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); | 529 | BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task)); |
567 | } | 530 | } |
568 | 531 | ||
569 | TRACE_CUR("Acquired entire DGL\n"); | 532 | TRACE_CUR("Acquired entire DGL\n"); |
@@ -603,6 +566,8 @@ asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size) | |||
603 | err = sys_litmus_lock(dgl_ods[0]); | 566 | err = sys_litmus_lock(dgl_ods[0]); |
604 | } | 567 | } |
605 | else { | 568 | else { |
569 | init_dgl_wait_state(&dgl_wait_state); | ||
570 | |||
606 | for(i = 0; i < dgl_size; ++i) { | 571 | for(i = 0; i < dgl_size; ++i) { |
607 | struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]); | 572 | struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]); |
608 | if(entry && is_lock(entry)) { | 573 | if(entry && is_lock(entry)) { |