aboutsummaryrefslogtreecommitdiffstats
path: root/litmus
diff options
context:
space:
mode:
Diffstat (limited to 'litmus')
-rw-r--r--litmus/ikglp_lock.c245
-rw-r--r--litmus/kfmlp_lock.c36
-rw-r--r--litmus/locking.c177
3 files changed, 194 insertions, 264 deletions
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 3fd760799a75..cab0d7f938f9 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -103,8 +103,7 @@ static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
103 struct task_struct *queued, *found = NULL; 103 struct task_struct *queued, *found = NULL;
104 104
105 list_for_each(pos, &kqueue->wait.task_list) { 105 list_for_each(pos, &kqueue->wait.task_list) {
106 queued = (struct task_struct*) list_entry(pos, 106 queued = (struct task_struct*) list_entry(pos, wait_queue_t, task_list)->private;
107 wait_queue_t, task_list)->private;
108 107
109 /* Compare task prios, find high prio task. */ 108 /* Compare task prios, find high prio task. */
110 if(queued != skip && litmus->compare(queued, found)) 109 if(queued != skip && litmus->compare(queued, found))
@@ -232,22 +231,14 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
232 struct task_struct *t, 231 struct task_struct *t,
233 ikglp_heap_node_t *node) 232 ikglp_heap_node_t *node)
234{ 233{
235
236
237 node->task = t; 234 node->task = t;
238 INIT_BINHEAP_NODE(&node->node); 235 INIT_BINHEAP_NODE(&node->node);
239 236
240 if(sem->top_m_size < sem->m) { 237 if(sem->top_m_size < sem->max_in_fifos) {
241 TRACE_CUR("Trivially adding %s/%d to top-m global list.\n", 238 TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
242 t->comm, t->pid); 239 t->comm, t->pid);
243// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
244// print_global_list(sem->top_m.root, 1);
245
246 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); 240 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
247 ++(sem->top_m_size); 241 ++(sem->top_m_size);
248
249// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
250// print_global_list(sem->top_m.root, 1);
251 } 242 }
252 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { 243 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
253 ikglp_heap_node_t *evicted = 244 ikglp_heap_node_t *evicted =
@@ -257,12 +248,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
257 t->comm, t->pid, 248 t->comm, t->pid,
258 evicted->task->comm, evicted->task->pid); 249 evicted->task->comm, evicted->task->pid);
259 250
260// TRACE_CUR("Not-Top-M Before:\n");
261// print_global_list(sem->not_top_m.root, 1);
262// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
263// print_global_list(sem->top_m.root, 1);
264
265
266 binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node); 251 binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
267 INIT_BINHEAP_NODE(&evicted->node); 252 INIT_BINHEAP_NODE(&evicted->node);
268 binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node); 253 binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
@@ -279,8 +264,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem,
279 else { 264 else {
280 TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n", 265 TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
281 t->comm, t->pid); 266 t->comm, t->pid);
282// TRACE_CUR("Not-Top-M Before:\n");
283// print_global_list(sem->not_top_m.root, 1);
284 267
285 binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node); 268 binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
286 269
@@ -303,12 +286,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem,
303 if(binheap_is_in_this_heap(&node->node, &sem->top_m)) { 286 if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
304 TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid); 287 TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
305 288
306// TRACE_CUR("Not-Top-M Before:\n");
307// print_global_list(sem->not_top_m.root, 1);
308// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
309// print_global_list(sem->top_m.root, 1);
310
311
312 binheap_delete(&node->node, &sem->top_m); 289 binheap_delete(&node->node, &sem->top_m);
313 290
314 if(!binheap_empty(&sem->not_top_m)) { 291 if(!binheap_empty(&sem->not_top_m)) {
@@ -337,8 +314,6 @@ static void ikglp_del_global_list(struct ikglp_semaphore *sem,
337 } 314 }
338 else { 315 else {
339 TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid); 316 TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
340// TRACE_CUR("Not-Top-M Before:\n");
341// print_global_list(sem->not_top_m.root, 1);
342 317
343 binheap_delete(&node->node, &sem->not_top_m); 318 binheap_delete(&node->node, &sem->not_top_m);
344 319
@@ -355,10 +330,6 @@ static void ikglp_add_donees(struct ikglp_semaphore *sem,
355 struct task_struct *t, 330 struct task_struct *t,
356 ikglp_donee_heap_node_t* node) 331 ikglp_donee_heap_node_t* node)
357{ 332{
358// TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
359// TRACE_CUR("donees Before:\n");
360// print_donees(sem, sem->donees.root, 1);
361
362 node->task = t; 333 node->task = t;
363 node->donor_info = NULL; 334 node->donor_info = NULL;
364 node->fq = fq; 335 node->fq = fq;
@@ -928,7 +899,7 @@ int ikglp_lock(struct litmus_lock* l)
928 899
929 TRACE_CUR("Requesting a replica from lock %d.\n", l->ident); 900 TRACE_CUR("Requesting a replica from lock %d.\n", l->ident);
930 901
931 if(sem->nr_in_fifos < sem->m) { 902 if(sem->nr_in_fifos < sem->max_in_fifos) {
932 // enqueue somwhere 903 // enqueue somwhere
933#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 904#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
934 fq = (sem->aff_obs) ? 905 fq = (sem->aff_obs) ?
@@ -1272,10 +1243,13 @@ int ikglp_unlock(struct litmus_lock* l)
1272 donee = t; 1243 donee = t;
1273 1244
1274#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 1245#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1275 if(sem->aff_obs) 1246 if(sem->aff_obs) {
1276 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); 1247 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1277 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) 1248 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
1278 fq_of_new_on_fq = fq; /* discard recommendation */ 1249 WARN_ON(1);
1250 fq_of_new_on_fq = fq;
1251 }
1252 }
1279 else 1253 else
1280 fq_of_new_on_fq = fq; 1254 fq_of_new_on_fq = fq;
1281#else 1255#else
@@ -1308,10 +1282,13 @@ int ikglp_unlock(struct litmus_lock* l)
1308 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); 1282 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
1309 1283
1310#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 1284#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1311 if(sem->aff_obs) 1285 if(sem->aff_obs) {
1312 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); 1286 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1313 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) 1287 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
1314 fq_of_new_on_fq = fq; /* discard recommendation */ 1288 WARN_ON(1);
1289 fq_of_new_on_fq = fq;
1290 }
1291 }
1315 else 1292 else
1316 fq_of_new_on_fq = fq; 1293 fq_of_new_on_fq = fq;
1317#else 1294#else
@@ -1335,10 +1312,13 @@ int ikglp_unlock(struct litmus_lock* l)
1335 new_on_fq = pq_wait->task; 1312 new_on_fq = pq_wait->task;
1336 1313
1337#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 1314#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1338 if(sem->aff_obs) 1315 if(sem->aff_obs) {
1339 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); 1316 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1340 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) 1317 if((fq_of_new_on_fq->count >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) {
1341 fq_of_new_on_fq = fq; /* discard recommendation */ 1318 WARN_ON(1);
1319 fq_of_new_on_fq = fq;
1320 }
1321 }
1342 else 1322 else
1343 fq_of_new_on_fq = fq; 1323 fq_of_new_on_fq = fq;
1344#else 1324#else
@@ -1663,26 +1643,44 @@ void ikglp_free(struct litmus_lock* l)
1663 1643
1664 1644
1665 1645
1666struct litmus_lock* ikglp_new(int m, 1646struct litmus_lock* ikglp_new(unsigned int m,
1667 struct litmus_lock_ops* ops, 1647 struct litmus_lock_ops* ops,
1668 void* __user arg) 1648 void* __user uarg)
1669{ 1649{
1650 /* TODO: Support trivial token lock, s.t. args.nr_replicas equals some
1651 * sentinel value, and implement special-case algorithms. There is currently
1652 * a lot of overhead for a trivial token lock since we allocate O(n)-worth
1653 * of data; this could be avoided with special-case algorithms. */
1654
1670 struct ikglp_semaphore* sem; 1655 struct ikglp_semaphore* sem;
1671 int nr_replicas = 0; 1656 struct ikglp_args args;
1672 int i; 1657 unsigned int i;
1673 1658
1674 BUG_ON(m <= 0); 1659 BUG_ON(m <= 0);
1675 1660
1676 if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas))) 1661 if(!access_ok(VERIFY_READ, uarg, sizeof(args)))
1677 { 1662 return(NULL);
1663 if(__copy_from_user(&args, uarg, sizeof(args)))
1664 return(NULL);
1665
1666 /* validation */
1667
1668 /* there must be at least one resource */
1669 if (args.nr_replicas < 1) {
1670 printk("Invalid number of replicas.\n");
1678 return(NULL); 1671 return(NULL);
1679 } 1672 }
1680 if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas))) 1673 /* IKGLP_OPTIMAL_FIFO_LEN can only be determined if nr_max_holders
1681 { 1674 * is IKGLP_M_HOLDERS (number of CPUs) */
1675 if (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN &&
1676 args.max_in_fifos != IKGLP_M_IN_FIFOS) {
1677 printk("Cannot compute optimal FIFO length if max_in_fifos != IKGLP_M_IN_FIFOS\n");
1682 return(NULL); 1678 return(NULL);
1683 } 1679 }
1684 if(nr_replicas < 1) 1680 if ((args.max_in_fifos != IKGLP_UNLIMITED_IN_FIFOS) &&
1685 { 1681 (args.max_fifo_len != IKGLP_UNLIMITED_FIFO_LEN) &&
1682 (args.max_in_fifos > args.nr_replicas*args.max_fifo_len)) {
1683 printk("Not enough total FIFO space for specified max requests in FIFOs.\n");
1686 return(NULL); 1684 return(NULL);
1687 } 1685 }
1688 1686
@@ -1693,7 +1691,7 @@ struct litmus_lock* ikglp_new(int m,
1693 } 1691 }
1694 memset(sem, 0, sizeof(*sem)); 1692 memset(sem, 0, sizeof(*sem));
1695 1693
1696 sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL); 1694 sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*args.nr_replicas, GFP_KERNEL);
1697 if(!sem->fifo_queues) 1695 if(!sem->fifo_queues)
1698 { 1696 {
1699 kfree(sem); 1697 kfree(sem);
@@ -1712,17 +1710,21 @@ struct litmus_lock* ikglp_new(int m,
1712 1710
1713 raw_spin_lock_init(&sem->real_lock); 1711 raw_spin_lock_init(&sem->real_lock);
1714 1712
1715 sem->nr_replicas = nr_replicas; 1713 sem->nr_replicas = args.nr_replicas;
1716 sem->m = m; 1714 sem->max_in_fifos = (args.max_in_fifos == IKGLP_M_IN_FIFOS) ?
1717 sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0); 1715 m :
1716 args.max_in_fifos;
1717 sem->max_fifo_len = (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN) ?
1718 (sem->max_in_fifos/args.nr_replicas) + ((sem->max_in_fifos%args.nr_replicas) != 0) :
1719 args.max_fifo_len;
1718 sem->nr_in_fifos = 0; 1720 sem->nr_in_fifos = 0;
1719 1721
1720 TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n", 1722 TRACE_CUR("New IKGLP Sem: m = %u, k = %u, max fifo_len = %u\n",
1721 sem->m, 1723 sem->max_in_fifos,
1722 sem->nr_replicas, 1724 sem->nr_replicas,
1723 sem->max_fifo_len); 1725 sem->max_fifo_len);
1724 1726
1725 for(i = 0; i < nr_replicas; ++i) 1727 for(i = 0; i < args.nr_replicas; ++i)
1726 { 1728 {
1727 struct fifo_queue* q = &(sem->fifo_queues[i]); 1729 struct fifo_queue* q = &(sem->fifo_queues[i]);
1728 1730
@@ -1766,33 +1768,13 @@ struct litmus_lock* ikglp_new(int m,
1766 1768
1767 1769
1768 1770
1771#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1769 1772
1773/****************************************************************************/
1774/* AFFINITY HEURISTICS */
1775/****************************************************************************/
1770 1776
1771 1777
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1795
1796static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) 1778static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
1797{ 1779{
1798 int gpu = replica % aff->nr_rsrc; 1780 int gpu = replica % aff->nr_rsrc;
@@ -1856,7 +1838,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1856 struct ikglp_affinity* ikglp_aff; 1838 struct ikglp_affinity* ikglp_aff;
1857 struct gpu_affinity_observer_args aff_args; 1839 struct gpu_affinity_observer_args aff_args;
1858 struct ikglp_semaphore* sem; 1840 struct ikglp_semaphore* sem;
1859 int i; 1841 unsigned int i;
1860 unsigned long flags; 1842 unsigned long flags;
1861 1843
1862 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { 1844 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
@@ -1873,23 +1855,17 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1873 return(NULL); 1855 return(NULL);
1874 } 1856 }
1875 1857
1876 if((aff_args.nr_simult_users <= 0) || 1858 if((aff_args.rho <= 0) ||
1877 (sem->nr_replicas%aff_args.nr_simult_users != 0)) { 1859 (sem->nr_replicas%aff_args.rho != 0)) {
1878 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " 1860 TRACE_CUR("Lock %d does not support #replicas (%u) for #simult_users "
1879 "(%d) per replica. #replicas should be evenly divisible " 1861 "(%u) per replica. #replicas should be evenly divisible "
1880 "by #simult_users.\n", 1862 "by #simult_users.\n",
1881 sem->litmus_lock.ident, 1863 sem->litmus_lock.ident,
1882 sem->nr_replicas, 1864 sem->nr_replicas,
1883 aff_args.nr_simult_users); 1865 aff_args.rho);
1884 return(NULL); 1866 return(NULL);
1885 } 1867 }
1886 1868
1887// if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
1888// TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
1889// NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
1890//// return(NULL);
1891// }
1892
1893 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); 1869 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
1894 if(!ikglp_aff) { 1870 if(!ikglp_aff) {
1895 return(NULL); 1871 return(NULL);
@@ -1901,14 +1877,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1901 return(NULL); 1877 return(NULL);
1902 } 1878 }
1903 1879
1904 ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); 1880 ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL);
1905 if(!ikglp_aff->nr_cur_users_on_rsrc) { 1881 if(!ikglp_aff->nr_cur_users_on_rsrc) {
1906 kfree(ikglp_aff->q_info); 1882 kfree(ikglp_aff->q_info);
1907 kfree(ikglp_aff); 1883 kfree(ikglp_aff);
1908 return(NULL); 1884 return(NULL);
1909 } 1885 }
1910 1886
1911 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); 1887 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL);
1912 if(!ikglp_aff->nr_aff_on_rsrc) { 1888 if(!ikglp_aff->nr_aff_on_rsrc) {
1913 kfree(ikglp_aff->nr_cur_users_on_rsrc); 1889 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1914 kfree(ikglp_aff->q_info); 1890 kfree(ikglp_aff->q_info);
@@ -1920,7 +1896,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1920 1896
1921 ikglp_aff->ops = ikglp_ops; 1897 ikglp_aff->ops = ikglp_ops;
1922 ikglp_aff->offset = aff_args.replica_to_gpu_offset; 1898 ikglp_aff->offset = aff_args.replica_to_gpu_offset;
1923 ikglp_aff->nr_simult = aff_args.nr_simult_users; 1899 ikglp_aff->nr_simult = aff_args.rho;
1924 ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; 1900 ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
1925 ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0; 1901 ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
1926 1902
@@ -1930,7 +1906,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1930 ikglp_aff->relax_max_fifo_len); 1906 ikglp_aff->relax_max_fifo_len);
1931 1907
1932 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); 1908 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
1933 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc)); 1909 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(unsigned int)*(ikglp_aff->nr_rsrc));
1934 1910
1935 for(i = 0; i < sem->nr_replicas; ++i) { 1911 for(i = 0; i < sem->nr_replicas; ++i) {
1936 ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; 1912 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -1950,9 +1926,6 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1950 return &ikglp_aff->obs; 1926 return &ikglp_aff->obs;
1951} 1927}
1952 1928
1953
1954
1955
1956static int gpu_replica_to_resource(struct ikglp_affinity* aff, 1929static int gpu_replica_to_resource(struct ikglp_affinity* aff,
1957 struct fifo_queue* fq) { 1930 struct fifo_queue* fq) {
1958 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 1931 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
@@ -1960,29 +1933,28 @@ static int gpu_replica_to_resource(struct ikglp_affinity* aff,
1960} 1933}
1961 1934
1962 1935
1963// Smart IKGLP Affinity
1964 1936
1965//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff) 1937/*--------------------------------------------------------------------------*/
1966//{ 1938/* ADVANCED AFFINITY HEURISITICS */
1967// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 1939/* */
1968// struct ikglp_queue_info *shortest = &aff->q_info[0]; 1940/* These heuristics estimate FIFO length wait times and try to enqueue */
1969// int i; 1941/* tasks into the shortest queues. When two queues are equivlenet, the GPU */
1970// 1942/* that maintains affinity is selected. When a task has no affinity, the */
1971// for(i = 1; i < sem->nr_replicas; ++i) { 1943/* heuristic tries to get the GPU with the fewest number of other tasks */
1972// if(aff->q_info[i].estimated_len < shortest->estimated_len) { 1944/* with affinity on that GPU. */
1973// shortest = &aff->q_info[i]; 1945/* */
1974// } 1946/* Heuristics to explore in the future: */
1975// } 1947/* - Utilization */
1976// 1948/* - Longest non-preemptive section */
1977// return(shortest); 1949/* - Criticality */
1978//} 1950/* - Task period */
1951/*--------------------------------------------------------------------------*/
1979 1952
1980struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) 1953struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
1981{ 1954{
1982 // advise_enqueue must be smart as not not break IKGLP rules: 1955 // advise_enqueue must be smart as not not break IKGLP rules:
1983 // * No queue can be greater than ceil(m/k) in length. We may return 1956 // * No queue can be greater than ceil(m/k) in length, unless
1984 // such a queue, but IKGLP will be smart enough as to send requests 1957 // 'relax_max_fifo_len' is asserted
1985 // to donors or PQ.
1986 // * Cannot let a queue idle if there exist waiting PQ/donors 1958 // * Cannot let a queue idle if there exist waiting PQ/donors
1987 // -- needed to guarantee parallel progress of waiters. 1959 // -- needed to guarantee parallel progress of waiters.
1988 // 1960 //
@@ -1993,14 +1965,15 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
1993 1965
1994 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 1966 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
1995 lt_t min_len; 1967 lt_t min_len;
1996 int min_nr_users, min_nr_aff_users; 1968 unsigned int min_nr_users, min_nr_aff_users;
1997 struct ikglp_queue_info *shortest, *aff_queue; 1969 struct ikglp_queue_info *shortest, *aff_queue;
1998 struct fifo_queue *to_enqueue; 1970 struct fifo_queue *to_enqueue;
1999 int i; 1971 unsigned int i;
2000 int affinity_gpu; 1972 int affinity_gpu;
2001 1973
2002 int max_fifo_len = (aff->relax_max_fifo_len) ? 1974 unsigned int max_fifo_len = (aff->relax_max_fifo_len) ?
2003 sem->m : sem->max_fifo_len; 1975 sem->max_in_fifos : /* allow possibility of all requests on same queue */
1976 sem->max_fifo_len; /* constraint FIFO len */
2004 1977
2005 // if we have no affinity, find the GPU with the least number of users 1978 // if we have no affinity, find the GPU with the least number of users
2006 // with active affinity 1979 // with active affinity
@@ -2037,7 +2010,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2037 min_nr_aff_users = *(shortest->nr_aff_users); 2010 min_nr_aff_users = *(shortest->nr_aff_users);
2038 2011
2039 2012
2040 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", 2013 TRACE_CUR("cs is %llu on queue %d (count = %u): est len = %llu\n",
2041 get_gpu_estimate(t, MIG_LOCAL), 2014 get_gpu_estimate(t, MIG_LOCAL),
2042 ikglp_get_idx(sem, shortest->q), 2015 ikglp_get_idx(sem, shortest->q),
2043 shortest->q->count, 2016 shortest->q->count,
@@ -2119,8 +2092,6 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2119 ikglp_get_idx(sem, sem->shortest_fifo_queue)); 2092 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2120 2093
2121 return to_enqueue; 2094 return to_enqueue;
2122
2123 //return(sem->shortest_fifo_queue);
2124} 2095}
2125 2096
2126 2097
@@ -2334,7 +2305,6 @@ static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
2334 2305
2335 donee = NULL; 2306 donee = NULL;
2336 donee_node = NULL; 2307 donee_node = NULL;
2337 //*dist_from_head = sem->max_fifo_len + 1;
2338 *dist_from_head = IKGLP_INVAL_DISTANCE; 2308 *dist_from_head = IKGLP_INVAL_DISTANCE;
2339 2309
2340 TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq)); 2310 TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
@@ -2630,7 +2600,6 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
2630 // decrement affinity count on old GPU 2600 // decrement affinity count on old GPU
2631 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; 2601 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
2632 --(aff->nr_aff_on_rsrc[aff_rsrc]); 2602 --(aff->nr_aff_on_rsrc[aff_rsrc]);
2633// aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
2634 2603
2635 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { 2604 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
2636 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); 2605 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
@@ -2676,12 +2645,10 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2676 if(last_gpu >= 0) { 2645 if(last_gpu >= 0) {
2677 int old_rsrc = last_gpu - aff->offset; 2646 int old_rsrc = last_gpu - aff->offset;
2678 --(aff->nr_aff_on_rsrc[old_rsrc]); 2647 --(aff->nr_aff_on_rsrc[old_rsrc]);
2679// aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
2680 } 2648 }
2681 2649
2682 // increment affinity count on new GPU 2650 // increment affinity count on new GPU
2683 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); 2651 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
2684// aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
2685 tsk_rt(t)->rsrc_exit_cb_args = aff; 2652 tsk_rt(t)->rsrc_exit_cb_args = aff;
2686 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; 2653 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
2687 } 2654 }
@@ -2751,20 +2718,18 @@ struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* op
2751 2718
2752 2719
2753 2720
2754 2721/*--------------------------------------------------------------------------*/
2755 2722/* SIMPLE LOAD-BALANCING AFFINITY HEURISTIC */
2756 2723/*--------------------------------------------------------------------------*/
2757
2758// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
2759 2724
2760struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) 2725struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2761{ 2726{
2762 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 2727 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2763 int min_count; 2728 unsigned int min_count;
2764 int min_nr_users; 2729 unsigned int min_nr_users;
2765 struct ikglp_queue_info *shortest; 2730 struct ikglp_queue_info *shortest;
2766 struct fifo_queue *to_enqueue; 2731 struct fifo_queue *to_enqueue;
2767 int i; 2732 unsigned int i;
2768 2733
2769 // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n"); 2734 // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
2770 2735
@@ -2772,13 +2737,13 @@ struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, s
2772 min_count = shortest->q->count; 2737 min_count = shortest->q->count;
2773 min_nr_users = *(shortest->nr_cur_users); 2738 min_nr_users = *(shortest->nr_cur_users);
2774 2739
2775 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", 2740 TRACE_CUR("queue %d: waiters = %u, total holders = %u\n",
2776 ikglp_get_idx(sem, shortest->q), 2741 ikglp_get_idx(sem, shortest->q),
2777 shortest->q->count, 2742 shortest->q->count,
2778 min_nr_users); 2743 min_nr_users);
2779 2744
2780 for(i = 1; i < sem->nr_replicas; ++i) { 2745 for(i = 1; i < sem->nr_replicas; ++i) {
2781 int len = aff->q_info[i].q->count; 2746 unsigned int len = aff->q_info[i].q->count;
2782 2747
2783 // queue is smaller, or they're equal and the other has a smaller number 2748 // queue is smaller, or they're equal and the other has a smaller number
2784 // of total users. 2749 // of total users.
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 041561839976..7dd866185623 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -21,7 +21,7 @@ static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
21static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, 21static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
22 struct task_struct* holder) 22 struct task_struct* holder)
23{ 23{
24 int i; 24 unsigned int i;
25 for(i = 0; i < sem->num_resources; ++i) 25 for(i = 0; i < sem->num_resources; ++i)
26 if(sem->queues[i].owner == holder) 26 if(sem->queues[i].owner == holder)
27 return(&sem->queues[i]); 27 return(&sem->queues[i]);
@@ -79,7 +79,7 @@ static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
79{ 79{
80 /* must hold sem->lock */ 80 /* must hold sem->lock */
81 81
82 int i; 82 unsigned int i;
83 83
84 *to_steal = NULL; 84 *to_steal = NULL;
85 *to_steal_from = NULL; 85 *to_steal_from = NULL;
@@ -438,7 +438,7 @@ int kfmlp_close(struct litmus_lock* l)
438 struct kfmlp_queue *my_queue; 438 struct kfmlp_queue *my_queue;
439 unsigned long flags; 439 unsigned long flags;
440 440
441 int owner; 441 unsigned int owner;
442 442
443 spin_lock_irqsave(&sem->lock, flags); 443 spin_lock_irqsave(&sem->lock, flags);
444 444
@@ -465,8 +465,8 @@ void kfmlp_free(struct litmus_lock* l)
465struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) 465struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
466{ 466{
467 struct kfmlp_semaphore* sem; 467 struct kfmlp_semaphore* sem;
468 int num_resources = 0; 468 unsigned int num_resources = 0;
469 int i; 469 unsigned int i;
470 470
471 if(!access_ok(VERIFY_READ, args, sizeof(num_resources))) 471 if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
472 { 472 {
@@ -560,7 +560,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
560 struct kfmlp_affinity* kfmlp_aff; 560 struct kfmlp_affinity* kfmlp_aff;
561 struct gpu_affinity_observer_args aff_args; 561 struct gpu_affinity_observer_args aff_args;
562 struct kfmlp_semaphore* sem; 562 struct kfmlp_semaphore* sem;
563 int i; 563 unsigned int i;
564 unsigned long flags; 564 unsigned long flags;
565 565
566 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { 566 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
@@ -577,14 +577,14 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
577 return(NULL); 577 return(NULL);
578 } 578 }
579 579
580 if((aff_args.nr_simult_users <= 0) || 580 if((aff_args.rho <= 0) ||
581 (sem->num_resources%aff_args.nr_simult_users != 0)) { 581 (sem->num_resources%aff_args.rho != 0)) {
582 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " 582 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
583 "(%d) per replica. #replicas should be evenly divisible " 583 "(%d) per replica. #replicas should be evenly divisible "
584 "by #simult_users.\n", 584 "by #simult_users.\n",
585 sem->litmus_lock.ident, 585 sem->litmus_lock.ident,
586 sem->num_resources, 586 sem->num_resources,
587 aff_args.nr_simult_users); 587 aff_args.rho);
588 return(NULL); 588 return(NULL);
589 } 589 }
590 590
@@ -605,7 +605,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
605 return(NULL); 605 return(NULL);
606 } 606 }
607 607
608 kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL); 608 kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->num_resources / aff_args.rho), GFP_KERNEL);
609 if(!kfmlp_aff->nr_cur_users_on_rsrc) { 609 if(!kfmlp_aff->nr_cur_users_on_rsrc) {
610 kfree(kfmlp_aff->q_info); 610 kfree(kfmlp_aff->q_info);
611 kfree(kfmlp_aff); 611 kfree(kfmlp_aff);
@@ -616,10 +616,10 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
616 616
617 kfmlp_aff->ops = kfmlp_ops; 617 kfmlp_aff->ops = kfmlp_ops;
618 kfmlp_aff->offset = aff_args.replica_to_gpu_offset; 618 kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
619 kfmlp_aff->nr_simult = aff_args.nr_simult_users; 619 kfmlp_aff->nr_simult = aff_args.rho;
620 kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult; 620 kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
621 621
622 memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc)); 622 memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(unsigned int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
623 623
624 for(i = 0; i < sem->num_resources; ++i) { 624 for(i = 0; i < sem->num_resources; ++i) {
625 kfmlp_aff->q_info[i].q = &sem->queues[i]; 625 kfmlp_aff->q_info[i].q = &sem->queues[i];
@@ -669,10 +669,10 @@ struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct
669{ 669{
670 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 670 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
671 lt_t min_len; 671 lt_t min_len;
672 int min_nr_users; 672 unsigned int min_nr_users;
673 struct kfmlp_queue_info *shortest; 673 struct kfmlp_queue_info *shortest;
674 struct kfmlp_queue *to_enqueue; 674 struct kfmlp_queue *to_enqueue;
675 int i; 675 unsigned int i;
676 int affinity_gpu; 676 int affinity_gpu;
677 677
678 // simply pick the shortest queue if, we have no affinity, or we have 678 // simply pick the shortest queue if, we have no affinity, or we have
@@ -893,11 +893,11 @@ struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* op
893struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) 893struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
894{ 894{
895 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); 895 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
896 int min_count; 896 unsigned int min_count;
897 int min_nr_users; 897 unsigned int min_nr_users;
898 struct kfmlp_queue_info *shortest; 898 struct kfmlp_queue_info *shortest;
899 struct kfmlp_queue *to_enqueue; 899 struct kfmlp_queue *to_enqueue;
900 int i; 900 unsigned int i;
901 901
902// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); 902// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
903 903
@@ -911,7 +911,7 @@ struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff,
911 min_nr_users); 911 min_nr_users);
912 912
913 for(i = 1; i < sem->num_resources; ++i) { 913 for(i = 1; i < sem->num_resources; ++i) {
914 int len = aff->q_info[i].q->count; 914 unsigned int len = aff->q_info[i].q->count;
915 915
916 // queue is smaller, or they're equal and the other has a smaller number 916 // queue is smaller, or they're equal and the other has a smaller number
917 // of total users. 917 // of total users.
diff --git a/litmus/locking.c b/litmus/locking.c
index eddc67a4d36a..8ba46f85f5c6 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -234,12 +234,12 @@ void print_hp_waiters(struct binheap_node* n, int depth)
234 234
235#ifdef CONFIG_LITMUS_DGL_SUPPORT 235#ifdef CONFIG_LITMUS_DGL_SUPPORT
236 236
237struct prioq_mutex; 237struct litmus_lock* select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
238
239void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
240{ 238{
241 int start = dgl_wait->last_primary; 239 int num_locks = dgl_wait->size;
242 extern void __dump_prioq_lock_info(struct prioq_mutex *mutex); 240 int last = dgl_wait->last_primary;
241 int start;
242 int idx;
243 243
244 /* 244 /*
245 We pick the next lock in reverse order. This causes inheritance propagation 245 We pick the next lock in reverse order. This causes inheritance propagation
@@ -250,55 +250,42 @@ void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lo
250 BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock); 250 BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
251 251
252 // note reverse order 252 // note reverse order
253 for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1; 253 // Try to enable priority on a lock that has an owner.
254 dgl_wait->last_primary != start; 254 idx = start = (last != 0) ? last - 1 : num_locks - 1;
255 dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1) 255 do {
256 { 256 struct litmus_lock *l = dgl_wait->locks[idx];
257 257
258 struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary]; 258 if(!l->ops->is_owner(l, dgl_wait->task) && l->ops->get_owner(l)) {
259 259 dgl_wait->last_primary = idx;
260 if(!l->ops->is_owner(l, dgl_wait->task) && 260 tsk_rt(dgl_wait->task)->blocked_lock = l;
261 l->ops->get_owner(l)) {
262
263 tsk_rt(dgl_wait->task)->blocked_lock =
264 dgl_wait->locks[dgl_wait->last_primary];
265 mb(); 261 mb();
266
267 TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); 262 TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident);
268
269 l->ops->enable_priority(l, dgl_wait); 263 l->ops->enable_priority(l, dgl_wait);
270 264 return(l);
271 return;
272 } 265 }
273 } 266 idx = (idx != 0) ? idx - 1 : num_locks - 1;
267 } while(idx != start);
274 268
275 // There was no one to push on. This can happen if the blocked task is 269 // There was no one to push on. This can happen if the blocked task is
276 // behind a task that is idling a prioq-mutex. 270 // behind a task that is idling a prioq-mutex.
277 271
278 // note reverse order 272 // note reverse order
279 dgl_wait->last_primary = start; 273 idx = (last != 0) ? last - 1 : num_locks - 1;
280 for(dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1; 274 do {
281 dgl_wait->last_primary != start; 275 struct litmus_lock *l = dgl_wait->locks[idx];
282 dgl_wait->last_primary = (dgl_wait->last_primary != 0) ? dgl_wait->last_primary - 1 : dgl_wait->size-1)
283 {
284
285 struct litmus_lock *l = dgl_wait->locks[dgl_wait->last_primary];
286 276
287 if(!l->ops->is_owner(l, dgl_wait->task)) { 277 if(!l->ops->is_owner(l, dgl_wait->task)) {
288 278 dgl_wait->last_primary = idx;
289 tsk_rt(dgl_wait->task)->blocked_lock = 279 tsk_rt(dgl_wait->task)->blocked_lock = l;
290 dgl_wait->locks[dgl_wait->last_primary];
291 mb(); 280 mb();
292
293 TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident); 281 TRACE_TASK(dgl_wait->task, "New blocked lock is %d\n", l->ident);
294
295 l->ops->enable_priority(l, dgl_wait); 282 l->ops->enable_priority(l, dgl_wait);
296 283 return(l);
297 return;
298 } 284 }
299 } 285 idx = (idx != 0) ? idx - 1 : num_locks - 1;
286 } while(idx != start);
300 287
301 BUG(); 288 return(NULL);
302} 289}
303 290
304int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key) 291int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
@@ -333,7 +320,12 @@ struct task_struct* __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
333 return task; 320 return task;
334} 321}
335 322
336void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait) 323void init_dgl_wait_state(dgl_wait_state_t *dgl_wait)
324{
325 memset(dgl_wait, 0, sizeof(dgl_wait_state_t));
326}
327
328void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t *dgl_wait)
337{ 329{
338 init_waitqueue_entry(wq_node, dgl_wait->task); 330 init_waitqueue_entry(wq_node, dgl_wait->task);
339 wq_node->private = dgl_wait; 331 wq_node->private = dgl_wait;
@@ -403,83 +395,62 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
403 TRACE_CUR("Locking DGL with size %d: %s\n", dgl_wait->size, dglstr); 395 TRACE_CUR("Locking DGL with size %d: %s\n", dgl_wait->size, dglstr);
404#endif 396#endif
405 397
406 dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
407
408 BUG_ON(dgl_wait->task != current); 398 BUG_ON(dgl_wait->task != current);
409 399
410 raw_spin_lock_irqsave(dgl_lock, irqflags);
411
412 dgl_wait->nr_remaining = dgl_wait->size; 400 dgl_wait->nr_remaining = dgl_wait->size;
413 401
402 dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
403 raw_spin_lock_irqsave(dgl_lock, irqflags);
404
414 // try to acquire each lock. enqueue (non-blocking) if it is unavailable. 405 // try to acquire each lock. enqueue (non-blocking) if it is unavailable.
415 for(i = 0; i < dgl_wait->size; ++i) { 406 for(i = 0; i < dgl_wait->size; ++i) {
416 struct litmus_lock *l = dgl_wait->locks[i]; 407 struct litmus_lock *tmp = dgl_wait->locks[i];
417 408
418 // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks. 409 // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
419 410
420 if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) { 411 if(tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i])) {
421 --(dgl_wait->nr_remaining); 412 --(dgl_wait->nr_remaining);
422 TRACE_CUR("Acquired lock %d immediatly.\n", l->ident); 413 TRACE_CUR("Acquired lock %d immediatly.\n", tmp->ident);
423 } 414 }
424 } 415 }
425 416
426 if(dgl_wait->nr_remaining == 0) { 417 if(dgl_wait->nr_remaining == 0) {
427 // acquired entire group immediatly 418 // acquired entire group immediatly
428 TRACE_CUR("Acquired all locks in DGL immediatly!\n"); 419 TRACE_CUR("Acquired all locks in DGL immediatly!\n");
420 raw_spin_unlock_irqrestore(dgl_lock, irqflags);
429 } 421 }
430 else { 422 else {
423 struct litmus_lock *first_primary;
431 424
432 TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n", 425 TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
433 dgl_wait->nr_remaining); 426 dgl_wait->nr_remaining);
434 427
435#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) 428 first_primary = select_next_lock(dgl_wait);
436 // KLUDGE: don't count this suspension as time in the critical gpu
437 // critical section
438 if(tsk_rt(dgl_wait->task)->held_gpus) {
439 tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
440 }
441#endif
442
443 // note reverse order. see comments in select_next_lock for reason.
444 for(i = dgl_wait->size - 1; i >= 0; --i) {
445 struct litmus_lock *l = dgl_wait->locks[i];
446 if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe
447
448 TRACE_CUR("Activating priority inheritance on lock %d\n",
449 l->ident);
450
451 TS_DGL_LOCK_SUSPEND;
452
453 l->ops->enable_priority(l, dgl_wait);
454 dgl_wait->last_primary = i;
455 429
456 TRACE_CUR("Suspending for lock %d\n", l->ident); 430 if (!first_primary) {
457 431 BUG();
458 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending 432// TRACE_CUR("We hold all the locks?\n");
433// raw_spin_unlock_irqrestore(dgl_lock, irqflags);
434// goto all_acquired;
435 }
459 436
460 suspend_for_lock(); // suspend!!! 437 TRACE_CUR("Suspending for lock %d\n", first_primary->ident);
461 438
462 TS_DGL_LOCK_RESUME; 439 TS_DGL_LOCK_SUSPEND;
463 440
464 TRACE_CUR("Woken up from DGL suspension.\n"); 441 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending
442 suspend_for_lock();
465 443
466 goto all_acquired; // we should hold all locks when we wake up. 444 TS_DGL_LOCK_RESUME;
467 }
468 }
469 445
470 TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n"); 446 TRACE_CUR("Woken up from DGL suspension.\n");
471 //BUG();
472 } 447 }
473 448
474 raw_spin_unlock_irqrestore(dgl_lock, irqflags);
475
476all_acquired:
477
478 // FOR SANITY CHECK FOR TESTING 449 // FOR SANITY CHECK FOR TESTING
479// for(i = 0; i < dgl_wait->size; ++i) { 450 for(i = 0; i < dgl_wait->size; ++i) {
480// struct litmus_lock *l = dgl_wait->locks[i]; 451 struct litmus_lock *tmp = dgl_wait->locks[i];
481// BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); 452 BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task));
482// } 453 }
483 454
484 TRACE_CUR("Acquired entire DGL\n"); 455 TRACE_CUR("Acquired entire DGL\n");
485 456
@@ -493,7 +464,6 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
493 int i; 464 int i;
494 unsigned long irqflags; //, dummyflags; 465 unsigned long irqflags; //, dummyflags;
495 raw_spinlock_t *dgl_lock; 466 raw_spinlock_t *dgl_lock;
496 struct litmus_lock *l;
497 struct task_struct *t = current; 467 struct task_struct *t = current;
498 468
499#ifdef CONFIG_SCHED_DEBUG_TRACE 469#ifdef CONFIG_SCHED_DEBUG_TRACE
@@ -511,13 +481,19 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
511 481
512 dgl_wait->nr_remaining = dgl_wait->size; 482 dgl_wait->nr_remaining = dgl_wait->size;
513 483
484 /* enqueue for all locks */
514 for(i = 0; i < dgl_wait->size; ++i) { 485 for(i = 0; i < dgl_wait->size; ++i) {
515 struct litmus_lock *l = dgl_wait->locks[i]; 486 /* dgl_lock must only enqueue. cannot set TASK_UNINTERRUPTIBLE!!
516 // this should be a forced enqueue if atomic DGLs are needed. 487 * Note the difference in requirements with do_litmus_dgl_lock().
517 l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i]); 488 */
489 struct litmus_lock *tmp = dgl_wait->locks[i];
490 tmp->ops->dgl_lock(tmp, dgl_wait, &dgl_wait->wq_nodes[i]);
518 } 491 }
519 492
493 /* now try to take all locks */
520 if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) { 494 if(__attempt_atomic_dgl_acquire(NULL, dgl_wait)) {
495 struct litmus_lock *l;
496
521 /* Failed to acquire all locks at once. 497 /* Failed to acquire all locks at once.
522 * Pick a lock to push on and suspend. */ 498 * Pick a lock to push on and suspend. */
523 TRACE_CUR("Could not atomically acquire all locks.\n"); 499 TRACE_CUR("Could not atomically acquire all locks.\n");
@@ -526,26 +502,13 @@ static long do_litmus_dgl_atomic_lock(dgl_wait_state_t *dgl_wait)
526 * __attempt_atomic_dgl_acquire() may actually succeed. */ 502 * __attempt_atomic_dgl_acquire() may actually succeed. */
527 set_task_state(t, TASK_UNINTERRUPTIBLE); 503 set_task_state(t, TASK_UNINTERRUPTIBLE);
528 504
529#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) 505 l = select_next_lock(dgl_wait);
530 // KLUDGE: don't count this suspension as time in the critical gpu
531 // critical section
532 if(tsk_rt(t)->held_gpus) {
533 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
534 }
535#endif
536 506
537 // select a lock to push priority on 507 TRACE_CUR("Suspending for lock %d\n", l->ident);
538 dgl_wait->last_primary = 0; // default
539 select_next_lock(dgl_wait); // may change value of last_primary
540
541 l = dgl_wait->locks[dgl_wait->last_primary];
542 508
543 TS_DGL_LOCK_SUSPEND; 509 TS_DGL_LOCK_SUSPEND;
544 510
545 TRACE_CUR("Suspending for lock %d\n", l->ident);
546
547 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending 511 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending
548
549 suspend_for_lock(); // suspend!!! 512 suspend_for_lock(); // suspend!!!
550 513
551 TS_DGL_LOCK_RESUME; 514 TS_DGL_LOCK_RESUME;
@@ -562,8 +525,8 @@ all_acquired:
562 525
563 // SANITY CHECK FOR TESTING 526 // SANITY CHECK FOR TESTING
564 for(i = 0; i < dgl_wait->size; ++i) { 527 for(i = 0; i < dgl_wait->size; ++i) {
565 struct litmus_lock *l = dgl_wait->locks[i]; 528 struct litmus_lock *tmp = dgl_wait->locks[i];
566 BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); 529 BUG_ON(!tmp->ops->is_owner(tmp, dgl_wait->task));
567 } 530 }
568 531
569 TRACE_CUR("Acquired entire DGL\n"); 532 TRACE_CUR("Acquired entire DGL\n");
@@ -603,6 +566,8 @@ asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
603 err = sys_litmus_lock(dgl_ods[0]); 566 err = sys_litmus_lock(dgl_ods[0]);
604 } 567 }
605 else { 568 else {
569 init_dgl_wait_state(&dgl_wait_state);
570
606 for(i = 0; i < dgl_size; ++i) { 571 for(i = 0; i < dgl_size; ++i) {
607 struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]); 572 struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
608 if(entry && is_lock(entry)) { 573 if(entry && is_lock(entry)) {