diff options
Diffstat (limited to 'litmus')
-rw-r--r-- | litmus/ikglp_lock.c | 245 | ||||
-rw-r--r-- | litmus/kfmlp_lock.c | 36 | ||||
-rw-r--r-- | litmus/locking.c | 177 |
3 files changed, 194 insertions, 264 deletions
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index 3fd760799a75..cab0d7f938f9 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c | |||
@@ -103,8 +103,7 @@ static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue, | |||
103 | struct task_struct *queued, *found = NULL; | 103 | struct task_struct *queued, *found = NULL; |
104 | 104 | ||
105 | list_for_each(pos, &kqueue->wait.task_list) { | 105 | list_for_each(pos, &kqueue->wait.task_list) { |
106 | queued = (struct task_struct*) list_entry(pos, | 106 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, task_list)->private; |
107 | wait_queue_t, task_list)->private; | ||
108 | 107 | ||
109 | /* Compare task prios, find high prio task. */ | 108 | /* Compare task prios, find high prio task. */ |
110 | if(queued != skip && litmus->compare(queued, found)) | 109 | if(queued != skip && litmus->compare(queued, found)) |
@@ -232,22 +231,14 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem, | |||
232 | struct task_struct *t, | 231 | struct task_struct *t, |
233 | ikglp_heap_node_t *node) | 232 | ikglp_heap_node_t *node) |
234 | { | 233 | { |
235 | |||
236 | |||
237 | node->task = t; | 234 | node->task = t; |
238 | INIT_BINHEAP_NODE(&node->node); | 235 | INIT_BINHEAP_NODE(&node->node); |
239 | 236 | ||
240 | if(sem->top_m_size < sem->m) { | 237 | if(sem->top_m_size < sem->max_in_fifos) { |
241 | TRACE_CUR("Trivially adding %s/%d to top-m global list.\n", | 238 | TRACE_CUR("Trivially adding %s/%d to top-m global list.\n", |
242 | t->comm, t->pid); | 239 | t->comm, t->pid); |
243 | // TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); | ||
244 | // print_global_list(sem->top_m.root, 1); | ||
245 | |||
246 | binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); | 240 | binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); |
247 | ++(sem->top_m_size); | 241 | ++(sem->top_m_size); |
248 | |||
249 | // TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); | ||
250 | // print_global_list(sem->top_m.root, 1); | ||
251 | } | 242 | } |
252 | else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { | 243 | else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { |
253 | ikglp_heap_node_t *evicted = | 244 | ikglp_heap_node_t *evicted = |
@@ -257,12 +248,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem, | |||
257 | t->comm, t->pid, | 248 | t->comm, t->pid, |
258 | evicted->task->comm, evicted->task->pid); | 249 | evicted->task->comm, evicted->task->pid); |
259 | 250 | ||
260 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
261 | // print_global_list(sem->not_top_m.root, 1); | ||
262 | // TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); | ||
263 | // print_global_list(sem->top_m.root, 1); | ||
264 | |||
265 | |||
266 | binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node); | 251 | binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node); |
267 | INIT_BINHEAP_NODE(&evicted->node); | 252 | INIT_BINHEAP_NODE(&evicted->node); |
268 | binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node); | 253 | binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node); |
@@ -279,8 +264,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem, | |||
279 | else { | 264 | else { |
280 | TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n", | 265 | TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n", |
281 | t->comm, t->pid); | 266 | t->comm, t->pid); |
282 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
283 | // print_global_list(sem->not_top_m.root, 1); | ||
284 | 267 | ||
285 | binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node); | 268 | binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node); |
286 | 269 | ||
@@ -303,12 +286,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem, | |||
303 | if(binheap_is_in_this_heap(&node->node, &sem->top_m)) { | 286 | if(binheap_is_in_this_heap(&node->node, &sem->top_m)) { |
304 | TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid); | 287 | TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid); |
305 | 288 | ||
306 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
307 | // print_global_list(sem->not_top_m.root, 1); | ||
308 | // TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); | ||
309 | // print_global_list(sem->top_m.root, 1); | ||
310 | |||
311 | |||
312 | binheap_delete(&node->node, &sem->top_m); | 289 | binheap_delete(&node->node, &sem->top_m); |
313 | 290 | ||
314 | if(!binheap_empty(&sem->not_top_m)) { | 291 | if(!binheap_empty(&sem->not_top_m)) { |
@@ -337,8 +314,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem, | |||
337 | } | 314 | } |
338 | else { | 315 | else { |
339 | TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid); | 316 | TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid); |
340 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
341 | // print_global_list(sem->not_top_m.root, 1); | ||
342 | 317 | ||
343 | binheap_delete(&node->node, &sem->not_top_m); | 318 | binheap_delete(&node->node, &sem->not_top_m); |
344 | 319 | ||
@@ -355,10 +330,6 @@ static void ikglp_add_donees(struct ikglp_semaphore *sem, | |||
355 | struct task_struct *t, | 330 | struct task_struct *t, |
356 | ikglp_donee_heap_node_t* node) | 331 | ikglp_donee_heap_node_t* node) |
357 | { | 332 | { |
358 | // TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid); | ||
359 | // TRACE_CUR("donees Before:\n"); | ||
360 | // print_donees(sem, sem->donees.root, 1); | ||
361 | |||
362 | node->task = t; | 333 | node->task = t; |
363 | node->donor_info = NULL; | 334 | node->donor_info = NULL; |
364 | node->fq = fq; | 335 | node->fq = fq; |
@@ -928,7 +899,7 @@ int ikglp_lock(struct litmus_lock* l) | |||
928 | 899 | ||
929 | TRACE_CUR("Requesting a replica from lock %d.\n", l->ident); | 900 | TRACE_CUR("Requesting a replica from lock %d.\n", l->ident); |
930 | 901 | ||
931 | if(sem->nr_in_fifos < sem->m) { | 902 | if(sem->nr_in_fifos < sem->max_in_fifos) { |
932 | // enqueue somwhere | 903 | // enqueue somwhere |
933 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 904 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
934 | fq = (sem->aff_obs) ? | 905 | fq = (sem->aff_obs) ? |
@@ -1272,10 +1243,13 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1272 | donee = t; | 1243 | donee = t; |
1273 | 1244 | ||
1274 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1245 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
1275 | if(sem->aff_obs) | 1246 | if(sem->aff_obs) { |
1276 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1247 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
1277 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) | 1248 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) { |
1278 | fq_of_new_on_fq = fq; /* discard recommendation */ | 1249 | WARN_ON(1); |
1250 | fq_of_new_on_fq = fq; | ||
1251 | } | ||
1252 | } | ||
1279 | else | 1253 | else |
1280 | fq_of_new_on_fq = fq; | 1254 | fq_of_new_on_fq = fq; |
1281 | #else | 1255 | #else |
@@ -1308,10 +1282,13 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1308 | binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); | 1282 | binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); |
1309 | 1283 | ||
1310 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1284 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
1311 | if(sem->aff_obs) | 1285 | if(sem->aff_obs) { |
1312 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1286 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
1313 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) | 1287 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) { |
1314 | fq_of_new_on_fq = fq; /* discard recommendation */ | 1288 | WARN_ON(1); |
1289 | fq_of_new_on_fq = fq; | ||
1290 | } | ||
1291 | } | ||
1315 | else | 1292 | else |
1316 | fq_of_new_on_fq = fq; | 1293 | fq_of_new_on_fq = fq; |
1317 | #else | 1294 | #else |
@@ -1335,10 +1312,13 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1335 | new_on_fq = pq_wait->task; | 1312 | new_on_fq = pq_wait->task; |
1336 | 1313 | ||
1337 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1314 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
1338 | if(sem->aff_obs) | 1315 | if(sem->aff_obs) { |
1339 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1316 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
1340 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) | 1317 | if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) { |
1341 | fq_of_new_on_fq = fq; /* discard recommendation */ | 1318 | WARN_ON(1); |
1319 | fq_of_new_on_fq = fq; | ||
1320 | } | ||
1321 | } | ||
1342 | else | 1322 | else |
1343 | fq_of_new_on_fq = fq; | 1323 | fq_of_new_on_fq = fq; |
1344 | #else | 1324 | #else |
@@ -1663,26 +1643,44 @@ void ikglp_free(struct litmus_lock* l) | |||
1663 | 1643 | ||
1664 | 1644 | ||
1665 | 1645 | ||
1666 | struct litmus_lock* ikglp_new(int m, | 1646 | struct litmus_lock* ikglp_new(unsigned int m, |
1667 | struct litmus_lock_ops* ops, | 1647 | struct litmus_lock_ops* ops, |
1668 | void* __user arg) | 1648 | void* __user uarg) |
1669 | { | 1649 | { |
1650 | /* TODO: Support trivial token lock, s.t. args.nr_replicas equals some | ||
1651 | * sentinel value, and implement special-case algorithms. There is currently | ||
1652 | * a lot of overhead for a trivial token lock since we allocate O(n)-worth | ||
1653 | * of data; this could be avoided with special-case algorithms. */ | ||
1654 | |||
1670 | struct ikglp_semaphore* sem; | 1655 | struct ikglp_semaphore* sem; |
1671 | int nr_replicas = 0; | 1656 | struct ikglp_args args; |
1672 | int i; | 1657 | unsigned int i; |
1673 | 1658 | ||
1674 | BUG_ON(m <= 0); | 1659 | BUG_ON(m <= 0); |
1675 | 1660 | ||
1676 | if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas))) | 1661 | if(!access_ok(VERIFY_READ, uarg, sizeof(args))) |
1677 | { | 1662 | return(NULL); |
1663 | if(__copy_from_user(&args, uarg, sizeof(args))) | ||
1664 | return(NULL); | ||
1665 | |||
1666 | /* validation */ | ||
1667 | |||
1668 | /* there must be at least one resource */ | ||
1669 | if (args.nr_replicas < 1) { | ||
1670 | printk("Invalid number of replicas.\n"); | ||
1678 | return(NULL); | 1671 | return(NULL); |
1679 | } | 1672 | } |
1680 | if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas))) | 1673 | /* IKGLP_OPTIMAL_FIFO_LEN can only be determined if nr_max_holders |
1681 | { | 1674 | * is IKGLP_M_HOLDERS (number of CPUs) */ |
1675 | if (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN && | ||
1676 | args.max_in_fifos != IKGLP_M_IN_FIFOS) { | ||
1677 | printk("Cannot compute optimal FIFO length if max_in_fifos != IKGLP_M_IN_FIFOS\n"); | ||
1682 | return(NULL); | 1678 | return(NULL); |
1683 | } | 1679 | } |
1684 | if(nr_replicas < 1) | 1680 | if ((args.max_in_fifos != IKGLP_UNLIMITED_IN_FIFOS) && |
1685 | { | 1681 | (args.max_fifo_len != IKGLP_UNLIMITED_FIFO_LEN) && |
1682 | (args.max_in_fifos > args.nr_replicas*args.max_fifo_len)) { | ||
1683 | printk("Not enough total FIFO space for specified max requests in FIFOs.\n"); | ||
1686 | return(NULL); | 1684 | return(NULL); |
1687 | } | 1685 | } |
1688 | 1686 | ||
@@ -1693,7 +1691,7 @@ struct litmus_lock* ikglp_new(int m, | |||
1693 | } | 1691 | } |
1694 | memset(sem, 0, sizeof(*sem)); | 1692 | memset(sem, 0, sizeof(*sem)); |
1695 | 1693 | ||
1696 | sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL); | 1694 | sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*args.nr_replicas, GFP_KERNEL); |
1697 | if(!sem->fifo_queues) | 1695 | if(!sem->fifo_queues) |
1698 | { | 1696 | { |
1699 | kfree(sem); | 1697 | kfree(sem); |
@@ -1712,17 +1710,21 @@ struct litmus_lock* ikglp_new(int m, | |||
1712 | 1710 | ||
1713 | raw_spin_lock_init(&sem->real_lock); | 1711 | raw_spin_lock_init(&sem->real_lock); |
1714 | 1712 | ||
1715 | sem->nr_replicas = nr_replicas; | 1713 | sem->nr_replicas = args.nr_replicas; |
1716 | sem->m = m; | 1714 | sem->max_in_fifos = (args.max_in_fifos == IKGLP_M_IN_FIFOS) ? |
1717 | sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0); | 1715 | m : |
1716 | args.max_in_fifos; | ||
1717 | sem->max_fifo_len = (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN) ? | ||
1718 | (sem->max_in_fifos/args.nr_replicas) + ((sem->max_in_fifos%args.nr_replicas) != 0) : | ||
1719 | args.max_fifo_len; | ||
1718 | sem->nr_in_fifos = 0; | 1720 | sem->nr_in_fifos = 0; |
1719 | 1721 | ||
1720 | TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n", | 1722 | TRACE_CUR("New IKGLP Sem: m = %u, k = %u, max fifo_len = %u\n", |
1721 | sem->m, | 1723 | sem->max_in_fifos, |
1722 | sem->nr_replicas, | 1724 | sem->nr_replicas, |
1723 | sem->max_fifo_len); | 1725 | sem->max_fifo_len); |
1724 | 1726 | ||
1725 | for(i = 0; i < nr_replicas; ++i) | 1727 | for(i = 0; i < args.nr_replicas; ++i) |
1726 | { | 1728 | { |
1727 | struct fifo_queue* q = &(sem->fifo_queues[i]); | 1729 | struct fifo_queue* q = &(sem->fifo_queues[i]); |
1728 | 1730 | ||
@@ -1766,33 +1768,13 @@ struct litmus_lock* ikglp_new(int m, | |||
1766 | 1768 | ||
1767 | 1769 | ||
1768 | 1770 | ||
1771 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
1769 | 1772 | ||
1773 | /****************************************************************************/ | ||
1774 | /* AFFINITY HEURISTICS */ | ||
1775 | /****************************************************************************/ | ||
1770 | 1776 | ||
1771 | 1777 | ||
1772 | |||
1773 | |||
1774 | |||
1775 | |||
1776 | |||
1777 | |||
1778 | |||
1779 | |||
1780 | |||
1781 | |||
1782 | |||
1783 | |||
1784 | |||
1785 | |||
1786 | |||
1787 | |||
1788 | |||
1789 | |||
1790 | |||
1791 | |||
1792 | |||
1793 | |||
1794 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
1795 | |||
1796 | static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) | 1778 | static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) |
1797 | { | 1779 | { |
1798 | int gpu = replica % aff->nr_rsrc; | 1780 | int gpu = replica % aff->nr_rsrc; |
@@ -1856,7 +1838,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1856 | struct ikglp_affinity* ikglp_aff; | 1838 | struct ikglp_affinity* ikglp_aff; |
1857 | struct gpu_affinity_observer_args aff_args; | 1839 | struct gpu_affinity_observer_args aff_args; |
1858 | struct ikglp_semaphore* sem; | 1840 | struct ikglp_semaphore* sem; |
1859 | int i; | 1841 | unsigned int i; |
1860 | unsigned long flags; | 1842 | unsigned long flags; |
1861 | 1843 | ||
1862 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { | 1844 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { |
@@ -1873,23 +1855,17 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1873 | return(NULL); | 1855 | return(NULL); |
1874 | } | 1856 | } |
1875 | 1857 | ||
1876 | if((aff_args.nr_simult_users <= 0) || | 1858 | if((aff_args.rho <= 0) || |
1877 | (sem->nr_replicas%aff_args.nr_simult_users != 0)) { | 1859 | (sem->nr_replicas%aff_args.rho != 0)) { |
1878 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " | 1860 | TRACE_CUR("Lock %d does not support #replicas (%u) for #simult_users " |
1879 | "(%d) per replica. #replicas should be evenly divisible " | 1861 | "(%u) per replica. #replicas should be evenly divisible " |
1880 | "by #simult_users.\n", | 1862 | "by #simult_users.\n", |
1881 | sem->litmus_lock.ident, | 1863 | sem->litmus_lock.ident, |
1882 | sem->nr_replicas, | 1864 | sem->nr_replicas, |
1883 | aff_args.nr_simult_users); | 1865 | aff_args.rho); |
1884 | return(NULL); | 1866 | return(NULL); |
1885 | } | 1867 | } |
1886 | 1868 | ||
1887 | // if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | ||
1888 | // TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", | ||
1889 | // NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | ||
1890 | //// return(NULL); | ||
1891 | // } | ||
1892 | |||
1893 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); | 1869 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); |
1894 | if(!ikglp_aff) { | 1870 | if(!ikglp_aff) { |
1895 | return(NULL); | 1871 | return(NULL); |
@@ -1901,14 +1877,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1901 | return(NULL); | 1877 | return(NULL); |
1902 | } | 1878 | } |
1903 | 1879 | ||
1904 | ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); | 1880 | ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL); |
1905 | if(!ikglp_aff->nr_cur_users_on_rsrc) { | 1881 | if(!ikglp_aff->nr_cur_users_on_rsrc) { |
1906 | kfree(ikglp_aff->q_info); | 1882 | kfree(ikglp_aff->q_info); |
1907 | kfree(ikglp_aff); | 1883 | kfree(ikglp_aff); |
1908 | return(NULL); | 1884 | return(NULL); |
1909 | } | 1885 | } |
1910 | 1886 | ||
1911 | ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); | 1887 | ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL); |
1912 | if(!ikglp_aff->nr_aff_on_rsrc) { | 1888 | if(!ikglp_aff->nr_aff_on_rsrc) { |
1913 | kfree(ikglp_aff->nr_cur_users_on_rsrc); | 1889 | kfree(ikglp_aff->nr_cur_users_on_rsrc); |
1914 | kfree(ikglp_aff->q_info); | 1890 | kfree(ikglp_aff->q_info); |
@@ -1920,7 +1896,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1920 | 1896 | ||
1921 | ikglp_aff->ops = ikglp_ops; | 1897 | ikglp_aff->ops = ikglp_ops; |
1922 | ikglp_aff->offset = aff_args.replica_to_gpu_offset; | 1898 | ikglp_aff->offset = aff_args.replica_to_gpu_offset; |
1923 | ikglp_aff->nr_simult = aff_args.nr_simult_users; | 1899 | ikglp_aff->nr_simult = aff_args.rho; |
1924 | ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; | 1900 | ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; |
1925 | ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0; | 1901 | ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0; |
1926 | 1902 | ||
@@ -1930,7 +1906,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1930 | ikglp_aff->relax_max_fifo_len); | 1906 | ikglp_aff->relax_max_fifo_len); |
1931 | 1907 | ||
1932 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); | 1908 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); |
1933 | memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc)); | 1909 | memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(unsigned int)*(ikglp_aff->nr_rsrc)); |
1934 | 1910 | ||
1935 | for(i = 0; i < sem->nr_replicas; ++i) { | 1911 | for(i = 0; i < sem->nr_replicas; ++i) { |
1936 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; | 1912 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; |
@@ -1950,9 +1926,6 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1950 | return &ikglp_aff->obs; | 1926 | return &ikglp_aff->obs; |
1951 | } | 1927 | } |
1952 | 1928 | ||
1953 | |||
1954 | |||
1955 | |||
1956 | static int gpu_replica_to_resource(struct ikglp_affinity* aff, | 1929 | static int gpu_replica_to_resource(struct ikglp_affinity* aff, |
1957 | struct fifo_queue* fq) { | 1930 | struct fifo_queue* fq) { |
1958 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 1931 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); |
@@ -1960,29 +1933,28 @@ static int gpu_replica_to_resource(struct ikglp_affinity* aff, | |||
1960 | } | 1933 | } |
1961 | 1934 | ||
1962 | 1935 | ||
1963 | // Smart IKGLP Affinity | ||
1964 | 1936 | ||
1965 | //static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff) | 1937 | /*--------------------------------------------------------------------------*/ |
1966 | //{ | 1938 | /* ADVANCED AFFINITY HEURISITICS */ |
1967 | // struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 1939 | /* */ |
1968 | // struct ikglp_queue_info *shortest = &aff->q_info[0]; | 1940 | /* These heuristics estimate FIFO length wait times and try to enqueue */ |
1969 | // int i; | 1941 | /* tasks into the shortest queues. When two queues are equivlenet, the GPU */ |
1970 | // | 1942 | /* that maintains affinity is selected. When a task has no affinity, the */ |
1971 | // for(i = 1; i < sem->nr_replicas; ++i) { | 1943 | /* heuristic tries to get the GPU with the fewest number of other tasks */ |
1972 | // if(aff->q_info[i].estimated_len < shortest->estimated_len) { | 1944 | /* with affinity on that GPU. */ |
1973 | // shortest = &aff->q_info[i]; | 1945 | /* */ |
1974 | // } | 1946 | /* Heuristics to explore in the future: */ |
1975 | // } | 1947 | /* - Utilization */ |
1976 | // | 1948 | /* - Longest non-preemptive section */ |
1977 | // return(shortest); | 1949 | /* - Criticality */ |
1978 | //} | 1950 | /* - Task period */ |
1951 | /*--------------------------------------------------------------------------*/ | ||
1979 | 1952 | ||
1980 | struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) | 1953 | struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) |
1981 | { | 1954 | { |
1982 | // advise_enqueue must be smart as not not break IKGLP rules: | 1955 | // advise_enqueue must be smart as not not break IKGLP rules: |
1983 | // * No queue can be greater than ceil(m/k) in length. We may return | 1956 | // * No queue can be greater than ceil(m/k) in length, unless |
1984 | // such a queue, but IKGLP will be smart enough as to send requests | 1957 | // 'relax_max_fifo_len' is asserted |
1985 | // to donors or PQ. | ||
1986 | // * Cannot let a queue idle if there exist waiting PQ/donors | 1958 | // * Cannot let a queue idle if there exist waiting PQ/donors |
1987 | // -- needed to guarantee parallel progress of waiters. | 1959 | // -- needed to guarantee parallel progress of waiters. |
1988 | // | 1960 | // |
@@ -1993,14 +1965,15 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
1993 | 1965 | ||
1994 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 1966 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); |
1995 | lt_t min_len; | 1967 | lt_t min_len; |
1996 | int min_nr_users, min_nr_aff_users; | 1968 | unsigned int min_nr_users, min_nr_aff_users; |
1997 | struct ikglp_queue_info *shortest, *aff_queue; | 1969 | struct ikglp_queue_info *shortest, *aff_queue; |
1998 | struct fifo_queue *to_enqueue; | 1970 | struct fifo_queue *to_enqueue; |
1999 | int i; | 1971 | unsigned int i; |
2000 | int affinity_gpu; | 1972 | int affinity_gpu; |
2001 | 1973 | ||
2002 | int max_fifo_len = (aff->relax_max_fifo_len) ? | 1974 | unsigned int max_fifo_len = (aff->relax_max_fifo_len) ? |
2003 | sem->m : sem->max_fifo_len; | 1975 | sem->max_in_fifos : /* allow possibility of all requests on same queue */ |
1976 | sem->max_fifo_len; /* constraint FIFO len */ | ||
2004 | 1977 | ||
2005 | // if we have no affinity, find the GPU with the least number of users | 1978 | // if we have no affinity, find the GPU with the least number of users |
2006 | // with active affinity | 1979 | // with active affinity |
@@ -2037,7 +2010,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2037 | min_nr_aff_users = *(shortest->nr_aff_users); | 2010 | min_nr_aff_users = *(shortest->nr_aff_users); |
2038 | 2011 | ||
2039 | 2012 | ||
2040 | TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", | 2013 | TRACE_CUR("cs is %llu on queue %d (count = %u): est len = %llu\n", |
2041 | get_gpu_estimate(t, MIG_LOCAL), | 2014 | get_gpu_estimate(t, MIG_LOCAL), |
2042 | ikglp_get_idx(sem, shortest->q), | 2015 | ikglp_get_idx(sem, shortest->q), |
2043 | shortest->q->count, | 2016 | shortest->q->count, |
@@ -2119,8 +2092,6 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2119 | ikglp_get_idx(sem, sem->shortest_fifo_queue)); | 2092 | ikglp_get_idx(sem, sem->shortest_fifo_queue)); |
2120 | 2093 | ||
2121 | return to_enqueue; | 2094 | return to_enqueue; |
2122 | |||
2123 | //return(sem->shortest_fifo_queue); | ||
2124 | } | 2095 | } |
2125 | 2096 | ||
2126 | 2097 | ||
@@ -2334,7 +2305,6 @@ static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff, | |||
2334 | 2305 | ||
2335 | donee = NULL; | 2306 | donee = NULL; |
2336 | donee_node = NULL; | 2307 | donee_node = NULL; |
2337 | //*dist_from_head = sem->max_fifo_len + 1; | ||
2338 | *dist_from_head = IKGLP_INVAL_DISTANCE; | 2308 | *dist_from_head = IKGLP_INVAL_DISTANCE; |
2339 | 2309 | ||
2340 | TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq)); | 2310 | TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq)); |
@@ -2630,7 +2600,6 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t) | |||
2630 | // decrement affinity count on old GPU | 2600 | // decrement affinity count on old GPU |
2631 | aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; | 2601 | aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; |
2632 | --(aff->nr_aff_on_rsrc[aff_rsrc]); | 2602 | --(aff->nr_aff_on_rsrc[aff_rsrc]); |
2633 | // aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t); | ||
2634 | 2603 | ||
2635 | if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { | 2604 | if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { |
2636 | WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); | 2605 | WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); |
@@ -2676,12 +2645,10 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, | |||
2676 | if(last_gpu >= 0) { | 2645 | if(last_gpu >= 0) { |
2677 | int old_rsrc = last_gpu - aff->offset; | 2646 | int old_rsrc = last_gpu - aff->offset; |
2678 | --(aff->nr_aff_on_rsrc[old_rsrc]); | 2647 | --(aff->nr_aff_on_rsrc[old_rsrc]); |
2679 | // aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t)); | ||
2680 | } | 2648 | } |
2681 | 2649 | ||
2682 | // increment affinity count on new GPU | 2650 | // increment affinity count on new GPU |
2683 | ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); | 2651 | ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); |
2684 | // aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t)); | ||
2685 | tsk_rt(t)->rsrc_exit_cb_args = aff; | 2652 | tsk_rt(t)->rsrc_exit_cb_args = aff; |
2686 | tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; | 2653 | tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; |
2687 | } | 2654 | } |
@@ -2751,20 +2718,18 @@ struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* op | |||
2751 | 2718 | ||
2752 | 2719 | ||
2753 | 2720 | ||
2754 | 2721 | /*--------------------------------------------------------------------------*/ | |
2755 | 2722 | /* SIMPLE LOAD-BALANCING AFFINITY HEURISTIC */ | |
2756 | 2723 | /*--------------------------------------------------------------------------*/ | |
2757 | |||
2758 | // Simple ikglp Affinity (standard ikglp with auto-gpu registration) | ||
2759 | 2724 | ||
2760 | struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) | 2725 | struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) |
2761 | { | 2726 | { |
2762 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 2727 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); |
2763 | int min_count; | 2728 | unsigned int min_count; |
2764 | int min_nr_users; | 2729 | unsigned int min_nr_users; |
2765 | struct ikglp_queue_info *shortest; | 2730 | struct ikglp_queue_info *shortest; |
2766 | struct fifo_queue *to_enqueue; | 2731 | struct fifo_queue *to_enqueue; |
2767 | int i; | 2732 | unsigned int i; |
2768 | 2733 | ||
2769 | // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n"); | 2734 | // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n"); |
2770 | 2735 | ||
@@ -2772,13 +2737,13 @@ struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, s | |||
2772 | min_count = shortest->q->count; | 2737 | min_count = shortest->q->count; |
2773 | min_nr_users = *(shortest->nr_cur_users); | 2738 | min_nr_users = *(shortest->nr_cur_users); |
2774 | 2739 | ||
2775 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | 2740 | TRACE_CUR("queue %d: waiters = %u, total holders = %u\n", |
2776 | ikglp_get_idx(sem, shortest->q), | 2741 | ikglp_get_idx(sem, shortest->q), |
2777 | shortest->q->count, | 2742 | shortest->q->count, |
2778 | min_nr_users); | 2743 | min_nr_users); |
2779 | 2744 | ||
2780 | for(i = 1; i < sem->nr_replicas; ++i) { | 2745 | for(i = 1; i < sem->nr_replicas; ++i) { |
2781 | int len = aff->q_info[i].q->count; | 2746 | unsigned int len = aff->q_info[i].q->count; |
2782 | 2747 | ||
2783 | // queue is smaller, or they're equal and the other has a smaller number | 2748 | // queue is smaller, or they're equal and the other has a smaller number |
2784 | // of total users. | 2749 | // of total users. |
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c index 041561839976..7dd866185623 100644 --- a/litmus/kfmlp_lock.c +++ b/litmus/kfmlp_lock.c | |||
@@ -21,7 +21,7 @@ static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, | |||
21 | static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, | 21 | static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, |
22 | struct task_struct* holder) | 22 | struct task_struct* holder) |
23 | { | 23 | { |
24 | int i; | 24 | unsigned int i; |
25 | for(i = 0; i < sem->num_resources; ++i) | 25 | for(i = 0; i < sem->num_resources; ++i) |
26 | if(sem->queues[i].owner == holder) | 26 | if(sem->queues[i].owner == holder) |
27 | return(&sem->queues[i]); | 27 | return(&sem->queues[i]); |
@@ -79,7 +79,7 @@ static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, | |||
79 | { | 79 | { |
80 | /* must hold sem->lock */ | 80 | /* must hold sem->lock */ |
81 | 81 | ||
82 | int i; | 82 | unsigned int i; |
83 | 83 | ||
84 | *to_steal = NULL; | 84 | *to_steal = NULL; |
85 | *to_steal_from = NULL; | 85 | *to_steal_from = NULL; |
@@ -438,7 +438,7 @@ int kfmlp_close(struct litmus_lock* l) | |||
438 | struct kfmlp_queue *my_queue; | 438 | struct kfmlp_queue *my_queue; |
439 | unsigned long flags; | 439 | unsigned long flags; |
440 | 440 | ||
441 | int owner; | 441 | unsigned int owner; |
442 | 442 | ||
443 | spin_lock_irqsave(&sem->lock, flags); | 443 | spin_lock_irqsave(&sem->lock, flags); |
444 | 444 | ||
@@ -465,8 +465,8 @@ void kfmlp_free(struct litmus_lock* l) | |||
465 | struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) | 465 | struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) |
466 | { | 466 | { |
467 | struct kfmlp_semaphore* sem; | 467 | struct kfmlp_semaphore* sem; |
468 | int num_resources = 0; | 468 | unsigned int num_resources = 0; |
469 | int i; | 469 | unsigned int i; |
470 | 470 | ||
471 | if(!access_ok(VERIFY_READ, args, sizeof(num_resources))) | 471 | if(!access_ok(VERIFY_READ, args, sizeof(num_resources))) |
472 | { | 472 | { |
@@ -560,7 +560,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
560 | struct kfmlp_affinity* kfmlp_aff; | 560 | struct kfmlp_affinity* kfmlp_aff; |
561 | struct gpu_affinity_observer_args aff_args; | 561 | struct gpu_affinity_observer_args aff_args; |
562 | struct kfmlp_semaphore* sem; | 562 | struct kfmlp_semaphore* sem; |
563 | int i; | 563 | unsigned int i; |
564 | unsigned long flags; | 564 | unsigned long flags; |
565 | 565 | ||
566 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { | 566 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { |
@@ -577,14 +577,14 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
577 | return(NULL); | 577 | return(NULL); |
578 | } | 578 | } |
579 | 579 | ||
580 | if((aff_args.nr_simult_users <= 0) || | 580 | if((aff_args.rho <= 0) || |
581 | (sem->num_resources%aff_args.nr_simult_users != 0)) { | 581 | (sem->num_resources%aff_args.rho != 0)) { |
582 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " | 582 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " |
583 | "(%d) per replica. #replicas should be evenly divisible " | 583 | "(%d) per replica. #replicas should be evenly divisible " |
584 | "by #simult_users.\n", | 584 | "by #simult_users.\n", |
585 | sem->litmus_lock.ident, | 585 | sem->litmus_lock.ident, |
586 | sem->num_resources, | 586 | sem->num_resources, |
587 | aff_args.nr_simult_users); | 587 | aff_args.rho); |
588 | return(NULL); | 588 | return(NULL); |
589 | } | 589 | } |
590 | 590 | ||
@@ -605,7 +605,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
605 | return(NULL); | 605 | return(NULL); |
606 | } | 606 | } |
607 | 607 | ||
608 | kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL); | 608 | kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->num_resources / aff_args.rho), GFP_KERNEL); |
609 | if(!kfmlp_aff->nr_cur_users_on_rsrc) { | 609 | if(!kfmlp_aff->nr_cur_users_on_rsrc) { |
610 | kfree(kfmlp_aff->q_info); | 610 | kfree(kfmlp_aff->q_info); |
611 | kfree(kfmlp_aff); | 611 | kfree(kfmlp_aff); |
@@ -616,10 +616,10 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
616 | 616 | ||
617 | kfmlp_aff->ops = kfmlp_ops; | 617 | kfmlp_aff->ops = kfmlp_ops; |
618 | kfmlp_aff->offset = aff_args.replica_to_gpu_offset; | 618 | kfmlp_aff->offset = aff_args.replica_to_gpu_offset; |
619 | kfmlp_aff->nr_simult = aff_args.nr_simult_users; | 619 | kfmlp_aff->nr_simult = aff_args.rho; |
620 | kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult; | 620 | kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult; |
621 | 621 | ||
622 | memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc)); | 622 | memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(unsigned int)*(sem->num_resources / kfmlp_aff->nr_rsrc)); |
623 | 623 | ||
624 | for(i = 0; i < sem->num_resources; ++i) { | 624 | for(i = 0; i < sem->num_resources; ++i) { |
625 | kfmlp_aff->q_info[i].q = &sem->queues[i]; | 625 | kfmlp_aff->q_info[i].q = &sem->queues[i]; |
@@ -669,10 +669,10 @@ struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct | |||
669 | { | 669 | { |
670 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 670 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
671 | lt_t min_len; | 671 | lt_t min_len; |
672 | int min_nr_users; | 672 | unsigned int min_nr_users; |
673 | struct kfmlp_queue_info *shortest; | 673 | struct kfmlp_queue_info *shortest; |
674 | struct kfmlp_queue *to_enqueue; | 674 | struct kfmlp_queue *to_enqueue; |
675 | int i; | 675 | unsigned int i; |
676 | int affinity_gpu; | 676 | int affinity_gpu; |
677 | 677 | ||
678 | // simply pick the shortest queue if, we have no affinity, or we have | 678 | // simply pick the shortest queue if, we have no affinity, or we have |
@@ -893,11 +893,11 @@ struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* op | |||
893 | struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) | 893 | struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) |
894 | { | 894 | { |
895 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 895 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
896 | int min_count; | 896 | unsigned int min_count; |
897 | int min_nr_users; | 897 | unsigned int min_nr_users; |
898 | struct kfmlp_queue_info *shortest; | 898 | struct kfmlp_queue_info *shortest; |
899 | struct kfmlp_queue *to_enqueue; | 899 | struct kfmlp_queue *to_enqueue; |
900 | int i; | 900 | unsigned int i; |
901 | 901 | ||
902 | // TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); | 902 | // TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); |
903 | 903 | ||
@@ -911,7 +911,7 @@ struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, | |||
911 | min_nr_users); | 911 | min_nr_users); |
912 | 912 | ||
913 | for(i = 1; i < sem->num_resources; ++i) { | 913 | for(i = 1; i < sem->num_resources; ++i) { |
914 | int len = aff->q_info[i].q->count; | 914 | unsigned int len = aff->q_info[i].q->count; |
915 | 915 | ||
916 | // queue is smaller, or they're equal and the other has a smaller number | 916 | // queue is smaller, or they're equal and the other has a smaller number |
917 | // of total users. | 917 | // of total users. |
diff --git a/litmus/locking.c b/litmus/locking.c index eddc67a4d36a..8ba46f85f5c6 100644 --- a/litmus/locking.c +++ b/litmus/locking.c | |||
@@ -234,12 +234,12 @@ void print_hp_waiters(struct binheap_node* n, int depth) | |||
234 | 234 | ||
235 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | 235 | #ifdef CONFIG_LITMUS_DGL_SUPPORT |
236 | 236 | ||
237 | struct prioq_mutex; | 237 | struct litmus_lock* select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/) |
238 | |||
239 | void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/) | ||
240 | { | 238 | { |
241 | int start = dgl_wait->last_primary; | 239 | int num_locks = dgl_wait->size; |
242 | extern void __dump_prioq_lock_info(struct prioq_mutex *mutex); | 240 | int last = dgl_wait->last_primary; |
241 | int start; | ||
242 | int idx; | ||
243 | 243 | ||
244 | /* | 244 | /* |
245 | We pick the next lock in reverse order. This causes inheritance propagation | 245 | We pick the next lock in reverse order. This causes inheritance propagation |
@@ -250,55 +250,42 @@ void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lo | |||
250 | BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock); | 250 | BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock); |
251 | 251 | ||
252 | // note reverse order | 252 | // note reverse order |
253 | for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1; | 253 | // Try to enable priority on a lock that has an owner. |
254 | dgl_wait->last_primary != start; | 254 | idx = start = (last != 0) ? last - 1 : num_locks - 1; |
255 | dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1) | 255 | do { |
256 | { | 256 | struct litmus_lock *l = dgl_wait->locks[idx]; |
257 | 257 | ||
258 | struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary]; | 258 | if(!l->ops->is_owner(l, dgl_wait->task) && l->ops->get_owner(l)) { |
259 | 259 | dgl_wait->last_primary = idx; | |
260 | if(!l->ops->is_owner(l, dgl_wait->task) && | 260 | tsk_rt(dgl_wait->task)->blocked_lock = l; |
261 | l->ops->get_owner(l)) { | ||
262 | |||
263 | tsk_rt(dgl_wait->task)->blocked_lock = | ||
264 | dgl_wait->locks[dgl_wait->last_primary]; | ||
265 | mb(); | 261 | mb(); |
266 | |||
267 | TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); | 262 | TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); |
268 | |||
269 | l->ops->enable_priority(l, dgl_wait); | 263 | l->ops->enable_priority(l, dgl_wait); |
270 | 264 | return(l); | |
271 | return; | ||
272 | } | 265 | } |
273 | } | 266 | idx = (idx != 0) ? idx - 1 : num_locks - 1; |
267 | } while(idx != start); | ||
274 | 268 | ||
275 | // There was no one to push on. This can happen if the blocked task is | 269 | // There was no one to push on. This can happen if the blocked task is |
276 | // behind a task that is idling a prioq-mutex. | 270 | // behind a task that is idling a prioq-mutex. |
277 | 271 | ||
278 | // note reverse order | 272 | // note reverse order |
279 | dgl_wait->last_primary = start; | 273 | idx = (last != 0) ? last - 1 : num_locks - 1; |
280 | for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1; | 274 | do { |
281 | dgl_wait->last_primary != start; | 275 | struct litmus_lock *l = dgl_wait->locks[idx]; |
282 | dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1) | ||
283 | { | ||
284 | |||
285 | struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary]; | ||
286 | 276 | ||
287 | if(!l->ops->is_owner(l, dgl_wait->task)) { | 277 | if(!l->ops->is_owner(l, dgl_wait->task)) { |
288 | 278 | dgl_wait->last_primary = idx; | |
289 | tsk_rt(dgl_wait->task)->blocked_lock = | 279 | tsk_rt(dgl_wait->task)->blocked_lock = l; |
290 | dgl_wait->locks[dgl_wait->last_primary]; | ||
291 | mb(); | 280 | mb(); |
292 | |||
293 | TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); | 281 | TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); |
294 | |||
295 | l->ops->enable_priority(l, dgl_wait); | 282 | l->ops->enable_priority(l, dgl_wait); |
296 | 283 | return(l); | |
297 | return; | ||
298 | } | 284 | } |
299 | } | 285 | idx = (idx != 0) ? idx - 1 : num_locks - 1; |
286 | } while(idx != start); | ||
300 | 287 | ||
301 | BUG(); | 288 | return(NULL); |
302 | } | 289 | } |
303 | 290 | ||
304 | int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key) | 291 | int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key) |
@@ -333,7 +320,12 @@ struct task_struct* __waitqueue_dgl_remove_first(wait_queue_head_t *wq, | |||
333 | return task; | 320 | return task; |
334 | } | 321 | } |
335 | 322 | ||
336 | void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait) | 323 | void init_dgl_wait_state(dgl_wait_state_t *dgl_wait) |
324 | { | ||
325 | memset(dgl_wait, 0, sizeof(dgl_wait_state_t)); | ||
326 | } | ||
327 | |||
328 | void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t *dgl_wait) | ||
337 | { | 329 | { |
338 | init_waitqueue_entry(wq_node, dgl_wait->task); | 330 | init_waitqueue_entry(wq_node, dgl_wait->task); |
339 | wq_node->private = dgl_wait; | 331 | wq_node->private = dgl_wait; |
@@ -403,83 +395,62 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait) | |||
403 | TRACE_CUR("Locking DGL with size %d: %s\n", dgl_wait->size, dglstr); | 395 | TRACE_CUR("Locking DGL with size %d: %s\n", dgl_wait->size, dglstr); |
404 | #endif | 396 | #endif |
405 | 397 | ||
406 | dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task); | ||
407 | |||
408 | BUG_ON(dgl_wait->task != current); | 398 | BUG_ON(dgl_wait->task != current); |
409 | 399 | ||
410 | raw_spin_lock_irqsave(dgl_lock, irqflags); | ||
411 | |||
412 | dgl_wait->nr_remaining = dgl_wait->size; | 400 | dgl_wait->nr_remaining = dgl_wait->size; |
413 | 401 | ||
402 | dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task); | ||
403 | raw_spin_lock_irqsave(dgl_lock, irqflags); | ||
404 | |||
414 | // try to acquire each lock. enqueue (non-blocking) if it is unavailable. | 405 | // try to acquire each lock. enqueue (non-blocking) if it is unavailable. |
415 | for(i = 0; i < dgl_wait->size; ++i) { | 406 | for(i = 0; i < dgl_wait->size; ++i) { |
416 | struct litmus_lock *l = dgl_wait->locks[i]; | 407 | struct litmus_lock *tmp = dgl_wait->locks[i]; |
417 | 408 | ||
418 | // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks. | 409 | // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks. |
419 | 410 | ||
420 | if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) { | 411 | if(tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i])) { |
421 | --(dgl_wait->nr_remaining); | 412 | --(dgl_wait->nr_remaining); |
422 | TRACE_CUR("Acquired lock %d immediatly.\n", l->ident); | 413 | TRACE_CUR("Acquired lock %d immediatly.\n", tmp->ident); |
423 | } | 414 | } |
424 | } | 415 | } |
425 | 416 | ||
426 | if(dgl_wait->nr_remaining == 0) { | 417 | if(dgl_wait->nr_remaining == 0) { |
427 | // acquired entire group immediatly | 418 | // acquired entire group immediatly |
428 | TRACE_CUR("Acquired all locks in DGL immediatly!\n"); | 419 | TRACE_CUR("Acquired all locks in DGL immediatly!\n"); |
420 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); | ||
429 | } | 421 | } |
430 | else { | 422 | else { |
423 | struct litmus_lock *first_primary; | ||
431 | 424 | ||
432 | TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n", | 425 | TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n", |
433 | dgl_wait->nr_remaining); | 426 | dgl_wait->nr_remaining); |
434 | 427 | ||
435 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | 428 | first_primary = select_next_lock(dgl_wait); |
436 | // KLUDGE: don't count this suspension as time in the critical gpu | ||
437 | // critical section | ||
438 | if(tsk_rt(dgl_wait->task)->held_gpus) { | ||
439 | tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1; | ||
440 | } | ||
441 | #endif | ||
442 | |||
443 | // note reverse order. see comments in select_next_lock for reason. | ||
444 | for(i = dgl_wait->size - 1; i >= 0; --i) { | ||
445 | struct litmus_lock *l = dgl_wait->locks[i]; | ||
446 | if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe | ||
447 | |||
448 | TRACE_CUR("Activating priority inheritance on lock %d\n", | ||
449 | l->ident); | ||
450 | |||
451 | TS_DGL_LOCK_SUSPEND; | ||
452 | |||
453 | l->ops->enable_priority(l, dgl_wait); | ||
454 | dgl_wait->last_primary = i; | ||
455 | 429 | ||
456 | TRACE_CUR("Suspending for lock %d\n", l->ident); | 430 | if (!first_primary) { |
457 | 431 | BUG(); | |
458 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending | 432 | // TRACE_CUR("We hold all the locks?\n"); |
433 | // raw_spin_unlock_irqrestore(dgl_lock, irqflags); | ||
434 | // goto all_acquired; | ||
435 | } | ||
459 | 436 | ||
460 | suspend_for_lock(); // suspend!!! | 437 | TRACE_CUR("Suspending for lock %d\n", first_primary->ident); |
461 | 438 | ||
462 | TS_DGL_LOCK_RESUME; | 439 | TS_DGL_LOCK_SUSPEND; |
463 | 440 | ||
464 | TRACE_CUR("Woken up from DGL suspension.\n"); | 441 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending |
442 | suspend_for_lock(); | ||
465 | 443 | ||
466 | goto all_acquired; // we should hold all locks when we wake up. | 444 | TS_DGL_LOCK_RESUME; |
467 | } | ||
468 | } | ||
469 | 445 | ||
470 | TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n"); | 446 | TRACE_CUR("Woken up from DGL suspension.\n"); |
471 | //BUG(); | ||
472 | } | 447 | } |
473 | 448 | ||
474 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); | ||
475 | |||
476 | all_acquired: | ||
477 | |||
478 | // FOR SANITY CHECK FOR TESTING | 449 | // FOR SANITY CHECK FOR TESTING |
479 | // for(i = 0; i < dgl_wait->size; ++i) { | 450 | for(i = 0; i < dgl_wait->size; ++i) { |
480 | // struct litmus_lock *l = dgl_wait->locks[i]; | 451 | struct litmus_lock *tmp = dgl_wait->locks[i]; |
481 | // BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); | 452 | BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task)); |
482 | // } | 453 | } |
483 | 454 | ||
484 | TRACE_CUR("Acquired entire DGL\n"); | 455 | TRACE_CUR("Acquired entire DGL\n"); |
485 | 456 | ||
@@ -493,7 +464,6 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait) | |||
493 | int i; | 464 | int i; |
494 | unsigned long irqflags; //, dummyflags; | 465 | unsigned long irqflags; //, dummyflags; |
495 | raw_spinlock_t *dgl_lock; | 466 | raw_spinlock_t *dgl_lock; |
496 | struct litmus_lock *l; | ||
497 | struct task_struct *t = current; | 467 | struct task_struct *t = current; |
498 | 468 | ||
499 | #ifdef CONFIG_SCHED_DEBUG_TRACE | 469 | #ifdef CONFIG_SCHED_DEBUG_TRACE |
@@ -511,13 +481,19 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait) | |||
511 | 481 | ||
512 | dgl_wait->nr_remaining = dgl_wait->size; | 482 | dgl_wait->nr_remaining = dgl_wait->size; |
513 | 483 | ||
484 | /* enqueue for all locks */ | ||
514 | for(i = 0; i < dgl_wait->size; ++i) { | 485 | for(i = 0; i < dgl_wait->size; ++i) { |
515 | struct litmus_lock *l = dgl_wait->locks[i]; | 486 | /* dgl_lock must only enqueue. cannot set TASK_UNINTERRUPTIBLE!! |
516 | // this should be a forced enqueue if atomic DGLs are needed. | 487 | * Note the difference in requirements with do_litmus_dgl_lock(). |
517 | l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i]); | 488 | */ |
489 | struct litmus_lock *tmp = dgl_wait->locks[i]; | ||
490 | tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i]); | ||
518 | } | 491 | } |
519 | 492 | ||
493 | /* now try to take all locks */ | ||
520 | if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) { | 494 | if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) { |
495 | struct litmus_lock *l; | ||
496 | |||
521 | /* Failed to acquire all locks at once. | 497 | /* Failed to acquire all locks at once. |
522 | * Pick a lock to push on and suspend. */ | 498 | * Pick a lock to push on and suspend. */ |
523 | TRACE_CUR("Could not atomically acquire all locks.\n"); | 499 | TRACE_CUR("Could not atomically acquire all locks.\n"); |
@@ -526,26 +502,13 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait) | |||
526 | * __attempt_atomic_dgl_acquire() may actually succeed. */ | 502 | * __attempt_atomic_dgl_acquire() may actually succeed. */ |
527 | set_task_state(t, TASK_UNINTERRUPTIBLE); | 503 | set_task_state(t, TASK_UNINTERRUPTIBLE); |
528 | 504 | ||
529 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | 505 | l = select_next_lock(dgl_wait); |
530 | // KLUDGE: don't count this suspension as time in the critical gpu | ||
531 | // critical section | ||
532 | if(tsk_rt(t)->held_gpus) { | ||
533 | tsk_rt(t)->suspend_gpu_tracker_on_block = 1; | ||
534 | } | ||
535 | #endif | ||
536 | 506 | ||
537 | // select a lock to push priority on | 507 | TRACE_CUR("Suspending for lock %d\n", l->ident); |
538 | dgl_wait->last_primary = 0; // default | ||
539 | select_next_lock(dgl_wait); // may change value of last_primary | ||
540 | |||
541 | l = dgl_wait->locks[dgl_wait->last_primary]; | ||
542 | 508 | ||
543 | TS_DGL_LOCK_SUSPEND; | 509 | TS_DGL_LOCK_SUSPEND; |
544 | 510 | ||
545 | TRACE_CUR("Suspending for lock %d\n", l->ident); | ||
546 | |||
547 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending | 511 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending |
548 | |||
549 | suspend_for_lock(); // suspend!!! | 512 | suspend_for_lock(); // suspend!!! |
550 | 513 | ||
551 | TS_DGL_LOCK_RESUME; | 514 | TS_DGL_LOCK_RESUME; |
@@ -562,8 +525,8 @@ all_acquired: | |||
562 | 525 | ||
563 | // SANITY CHECK FOR TESTING | 526 | // SANITY CHECK FOR TESTING |
564 | for(i = 0; i < dgl_wait->size; ++i) { | 527 | for(i = 0; i < dgl_wait->size; ++i) { |
565 | struct litmus_lock *l = dgl_wait->locks[i]; | 528 | struct litmus_lock *tmp = dgl_wait->locks[i]; |
566 | BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); | 529 | BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task)); |
567 | } | 530 | } |
568 | 531 | ||
569 | TRACE_CUR("Acquired entire DGL\n"); | 532 | TRACE_CUR("Acquired entire DGL\n"); |
@@ -603,6 +566,8 @@ asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size) | |||
603 | err = sys_litmus_lock(dgl_ods[0]); | 566 | err = sys_litmus_lock(dgl_ods[0]); |
604 | } | 567 | } |
605 | else { | 568 | else { |
569 | init_dgl_wait_state(&dgl_wait_state); | ||
570 | |||
606 | for(i = 0; i < dgl_size; ++i) { | 571 | for(i = 0; i < dgl_size; ++i) { |
607 | struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]); | 572 | struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]); |
608 | if(entry && is_lock(entry)) { | 573 | if(entry && is_lock(entry)) { |