diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-04-18 23:18:32 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-04-18 23:18:32 -0400 |
commit | c6d04216a123f8e0b50eb78bbb1eaf646a1ca4e0 (patch) | |
tree | c6db90c6fd95a308db4849abebcb09a0dafaedd8 | |
parent | 149ef3b424a49e6b928c5e23fea83380ed95ea38 (diff) |
Added hooks for IKGLP affinity and a little logic.
simple IKGLP is already done. it does:
1) auto gpu de/registration.
2) distruption amongst simultanous users across queues
3) calls default IKGLP routines when appropriate.
Remaining work:
1) FQ advisement.
2) Donor stealing advisement.
3) Donee selection advisement.
-rw-r--r-- | include/litmus/ikglp_lock.h | 8 | ||||
-rw-r--r-- | litmus/ikglp_lock.c | 730 | ||||
-rw-r--r-- | litmus/kfmlp_lock.c | 39 |
3 files changed, 705 insertions, 72 deletions
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h index 08e73332c3d4..3fa23251b539 100644 --- a/include/litmus/ikglp_lock.h +++ b/include/litmus/ikglp_lock.h | |||
@@ -118,9 +118,9 @@ struct ikglp_queue_info | |||
118 | struct ikglp_affinity_ops | 118 | struct ikglp_affinity_ops |
119 | { | 119 | { |
120 | struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO | 120 | struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO |
121 | struct task_struct* (*advise_steal)(struct ikglp_affinity* aff, wait_queue_t** to_steal, struct fifo_queue** to_steal_from); // select steal from FIFO | 121 | ikglp_wait_state_t* (*advise_steal)(struct ikglp_affinity* aff); // select steal from FIFO |
122 | struct task_struct* (*advise_donee_selection)(struct ikglp_affinity* aff, wait_queue_t** donee, struct fifo_queue** donee_queue); // select a donee | 122 | ikglp_donee_heap_node_t* (*advise_donee_selection)(struct ikglp_affinity* aff); // select a donee |
123 | struct task_struct* (*advise_doner_to_fq)(struct ikglp_affinity* aff, ikglp_wait_state_t** donor); // select a donor to move to PQ | 123 | ikglp_wait_state_t* (*advise_doner_to_fq)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select a donor to move to PQ |
124 | 124 | ||
125 | void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue | 125 | void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue |
126 | void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue | 126 | void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue |
@@ -133,7 +133,7 @@ struct ikglp_affinity | |||
133 | { | 133 | { |
134 | struct affinity_observer obs; | 134 | struct affinity_observer obs; |
135 | struct ikglp_affinity_ops *ops; | 135 | struct ikglp_affinity_ops *ops; |
136 | struct fifo_queue *q_info; | 136 | struct ikglp_queue_info *q_info; |
137 | int *nr_cur_users_on_rsrc; | 137 | int *nr_cur_users_on_rsrc; |
138 | int offset; | 138 | int offset; |
139 | int nr_simult; | 139 | int nr_simult; |
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index 94c954464a96..0e07841b86ba 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c | |||
@@ -3,9 +3,14 @@ | |||
3 | 3 | ||
4 | #include <litmus/trace.h> | 4 | #include <litmus/trace.h> |
5 | #include <litmus/sched_plugin.h> | 5 | #include <litmus/sched_plugin.h> |
6 | #include <litmus/ikglp_lock.h> | 6 | #include <litmus/fdso.h> |
7 | 7 | ||
8 | //#include <litmus/edf_common.h> | 8 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) |
9 | #include <litmus/gpu_affinity.h> | ||
10 | #include <litmus/nvidia_info.h> | ||
11 | #endif | ||
12 | |||
13 | #include <litmus/ikglp_lock.h> | ||
9 | 14 | ||
10 | int ikglp_max_heap_base_priority_order(struct binheap_node *a, | 15 | int ikglp_max_heap_base_priority_order(struct binheap_node *a, |
11 | struct binheap_node *b) | 16 | struct binheap_node *b) |
@@ -16,7 +21,6 @@ int ikglp_max_heap_base_priority_order(struct binheap_node *a, | |||
16 | BUG_ON(!d_a); | 21 | BUG_ON(!d_a); |
17 | BUG_ON(!d_b); | 22 | BUG_ON(!d_b); |
18 | 23 | ||
19 | //return __edf_higher_prio(d_a->task, BASE, d_b->task, BASE); | ||
20 | return litmus->__compare(d_a->task, BASE, d_b->task, BASE); | 24 | return litmus->__compare(d_a->task, BASE, d_b->task, BASE); |
21 | } | 25 | } |
22 | 26 | ||
@@ -26,7 +30,6 @@ int ikglp_min_heap_base_priority_order(struct binheap_node *a, | |||
26 | ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); | 30 | ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); |
27 | ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); | 31 | ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); |
28 | 32 | ||
29 | //return __edf_higher_prio(d_b->task, BASE, d_a->task, BASE); | ||
30 | return litmus->__compare(d_b->task, BASE, d_a->task, BASE); | 33 | return litmus->__compare(d_b->task, BASE, d_a->task, BASE); |
31 | } | 34 | } |
32 | 35 | ||
@@ -36,7 +39,6 @@ int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a, | |||
36 | ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node); | 39 | ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node); |
37 | ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node); | 40 | ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node); |
38 | 41 | ||
39 | //return __edf_higher_prio(d_a->task, BASE, d_b->task, BASE); | ||
40 | return litmus->__compare(d_a->task, BASE, d_b->task, BASE); | 42 | return litmus->__compare(d_a->task, BASE, d_b->task, BASE); |
41 | } | 43 | } |
42 | 44 | ||
@@ -68,7 +70,6 @@ int ikglp_min_heap_donee_order(struct binheap_node *a, | |||
68 | } | 70 | } |
69 | 71 | ||
70 | // note reversed order | 72 | // note reversed order |
71 | //return __edf_higher_prio(prio_b, BASE, prio_a, BASE); | ||
72 | return litmus->__compare(prio_b, BASE, prio_a, BASE); | 73 | return litmus->__compare(prio_b, BASE, prio_a, BASE); |
73 | } | 74 | } |
74 | 75 | ||
@@ -103,7 +104,6 @@ static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue, | |||
103 | wait_queue_t, task_list)->private; | 104 | wait_queue_t, task_list)->private; |
104 | 105 | ||
105 | /* Compare task prios, find high prio task. */ | 106 | /* Compare task prios, find high prio task. */ |
106 | //if (queued != skip && edf_higher_prio(queued, found)) | ||
107 | if(queued != skip && litmus->compare(queued, found)) | 107 | if(queued != skip && litmus->compare(queued, found)) |
108 | found = queued; | 108 | found = queued; |
109 | } | 109 | } |
@@ -246,7 +246,6 @@ static void ikglp_add_global_list(struct ikglp_semaphore *sem, | |||
246 | // TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); | 246 | // TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); |
247 | // print_global_list(sem->top_m.root, 1); | 247 | // print_global_list(sem->top_m.root, 1); |
248 | } | 248 | } |
249 | //else if(__edf_higher_prio(t, BASE, ikglp_mth_highest(sem), BASE)) { | ||
250 | else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { | 249 | else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { |
251 | ikglp_heap_node_t *evicted = | 250 | ikglp_heap_node_t *evicted = |
252 | binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node); | 251 | binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node); |
@@ -367,7 +366,6 @@ static void ikglp_refresh_owners_prio_increase(struct task_struct *t, | |||
367 | unsigned long flags) | 366 | unsigned long flags) |
368 | { | 367 | { |
369 | // priority of 't' has increased (note: 't' might already be hp_waiter). | 368 | // priority of 't' has increased (note: 't' might already be hp_waiter). |
370 | // if ((t == fq->hp_waiter) || edf_higher_prio(t, fq->hp_waiter)) { | ||
371 | if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) { | 369 | if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) { |
372 | struct task_struct *old_max_eff_prio; | 370 | struct task_struct *old_max_eff_prio; |
373 | struct task_struct *new_max_eff_prio; | 371 | struct task_struct *new_max_eff_prio; |
@@ -478,7 +476,6 @@ static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq, | |||
478 | TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", | 476 | TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", |
479 | ikglp_get_idx(sem, fq)); | 477 | ikglp_get_idx(sem, fq)); |
480 | 478 | ||
481 | //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) { | ||
482 | if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { | 479 | if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { |
483 | TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n", | 480 | TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n", |
484 | (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", | 481 | (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", |
@@ -540,7 +537,6 @@ static void ikglp_remove_donation_from_owner(struct binheap_node *n, | |||
540 | TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", | 537 | TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", |
541 | ikglp_get_idx(sem, fq)); | 538 | ikglp_get_idx(sem, fq)); |
542 | 539 | ||
543 | //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) { | ||
544 | if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { | 540 | if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { |
545 | TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n", | 541 | TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n", |
546 | ikglp_get_idx(sem, fq)); | 542 | ikglp_get_idx(sem, fq)); |
@@ -582,7 +578,6 @@ static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t, | |||
582 | // Need to set new effective_priority for owner | 578 | // Need to set new effective_priority for owner |
583 | struct task_struct *decreased_prio; | 579 | struct task_struct *decreased_prio; |
584 | 580 | ||
585 | //if(__edf_higher_prio(new_max_eff_prio, BASE, t, BASE)) { | ||
586 | if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) { | 581 | if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) { |
587 | decreased_prio = new_max_eff_prio; | 582 | decreased_prio = new_max_eff_prio; |
588 | } | 583 | } |
@@ -618,6 +613,13 @@ static void ikglp_get_immediate(struct task_struct* t, | |||
618 | 613 | ||
619 | sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue); | 614 | sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue); |
620 | 615 | ||
616 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
617 | if(sem->aff_obs) { | ||
618 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t); | ||
619 | sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t); | ||
620 | } | ||
621 | #endif | ||
622 | |||
621 | unlock_fine_irqrestore(&sem->lock, flags); | 623 | unlock_fine_irqrestore(&sem->lock, flags); |
622 | } | 624 | } |
623 | 625 | ||
@@ -662,6 +664,12 @@ static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem, | |||
662 | sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq); | 664 | sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq); |
663 | } | 665 | } |
664 | 666 | ||
667 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
668 | if(sem->aff_obs) { | ||
669 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t); | ||
670 | } | ||
671 | #endif | ||
672 | |||
665 | TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq)); | 673 | TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq)); |
666 | } | 674 | } |
667 | 675 | ||
@@ -732,8 +740,14 @@ static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem, | |||
732 | ikglp_add_global_list(sem, t, &wait->global_heap_node); | 740 | ikglp_add_global_list(sem, t, &wait->global_heap_node); |
733 | 741 | ||
734 | // Select a donee | 742 | // Select a donee |
735 | donee_node = binheap_top_entry(&sem->donees, | 743 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
736 | ikglp_donee_heap_node_t, node); | 744 | donee_node = (sem->aff_obs) ? |
745 | sem->aff_obs->ops->advise_donee_selection(sem->aff_obs) : | ||
746 | binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
747 | #else | ||
748 | donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
749 | #endif | ||
750 | |||
737 | donee = donee_node->task; | 751 | donee = donee_node->task; |
738 | 752 | ||
739 | TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid); | 753 | TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid); |
@@ -743,7 +757,8 @@ static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem, | |||
743 | // TRACE_CUR("donees Before:\n"); | 757 | // TRACE_CUR("donees Before:\n"); |
744 | // print_donees(sem, sem->donees.root, 1); | 758 | // print_donees(sem, sem->donees.root, 1); |
745 | 759 | ||
746 | binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly | 760 | //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly |
761 | binheap_delete(&donee_node->node, &sem->donees); | ||
747 | 762 | ||
748 | // TRACE_CUR("donees After:\n"); | 763 | // TRACE_CUR("donees After:\n"); |
749 | // print_donees(sem, sem->donees.root, 1); | 764 | // print_donees(sem, sem->donees.root, 1); |
@@ -813,7 +828,6 @@ static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem, | |||
813 | 828 | ||
814 | if(new_max_eff_prio != old_max_eff_prio) { | 829 | if(new_max_eff_prio != old_max_eff_prio) { |
815 | if ((effective_priority(donee) == old_max_eff_prio) || | 830 | if ((effective_priority(donee) == old_max_eff_prio) || |
816 | //(__edf_higher_prio(new_max_eff_prio, BASE, donee, EFFECTIVE))){ | ||
817 | (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){ | 831 | (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){ |
818 | TRACE_TASK(t, "Donation increases %s/%d's effective priority\n", | 832 | TRACE_TASK(t, "Donation increases %s/%d's effective priority\n", |
819 | donee->comm, donee->pid); | 833 | donee->comm, donee->pid); |
@@ -887,11 +901,20 @@ int ikglp_lock(struct litmus_lock* l) | |||
887 | lock_global_irqsave(dgl_lock, flags); | 901 | lock_global_irqsave(dgl_lock, flags); |
888 | lock_fine_irqsave(&sem->lock, flags); | 902 | lock_fine_irqsave(&sem->lock, flags); |
889 | 903 | ||
890 | if(sem->shortest_fifo_queue->count == 0) { | 904 | |
905 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
906 | fq = (sem->aff_obs) ? | ||
907 | sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) : | ||
908 | sem->shortest_fifo_queue; | ||
909 | #else | ||
910 | fq = sem->shortest_fifo_queue; | ||
911 | #endif | ||
912 | |||
913 | if(fq->count == 0) { | ||
891 | // take available resource | 914 | // take available resource |
892 | replica = ikglp_get_idx(sem, sem->shortest_fifo_queue); | 915 | //replica = ikglp_get_idx(sem, fq); |
893 | 916 | ||
894 | ikglp_get_immediate(t, sem->shortest_fifo_queue, sem, flags); // unlocks sem->lock | 917 | ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock |
895 | 918 | ||
896 | unlock_global_irqrestore(dgl_lock, flags); | 919 | unlock_global_irqrestore(dgl_lock, flags); |
897 | raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); | 920 | raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); |
@@ -908,17 +931,16 @@ int ikglp_lock(struct litmus_lock* l) | |||
908 | /* FIXME: interruptible would be nice some day */ | 931 | /* FIXME: interruptible would be nice some day */ |
909 | set_task_state(t, TASK_UNINTERRUPTIBLE); | 932 | set_task_state(t, TASK_UNINTERRUPTIBLE); |
910 | 933 | ||
911 | if(sem->shortest_fifo_queue->count < sem->max_fifo_len) { | 934 | if(fq->count < sem->max_fifo_len) { |
912 | // enqueue on fq | 935 | // enqueue on fq |
913 | ikglp_enqueue_on_fq(sem, sem->shortest_fifo_queue, &wait, flags); // unlocks sem->lock | 936 | ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock |
914 | } | 937 | } |
915 | else { | 938 | else { |
916 | 939 | ||
917 | TRACE_CUR("IKGLP fifo queues are full.\n"); | 940 | TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n"); |
918 | 941 | ||
919 | // no room in fifos. Go to PQ or donors. | 942 | // no room in fifos. Go to PQ or donors. |
920 | 943 | ||
921 | //if(__edf_higher_prio(ikglp_mth_highest(sem), BASE, t, BASE)) { | ||
922 | if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { | 944 | if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { |
923 | // enqueue on PQ | 945 | // enqueue on PQ |
924 | ikglp_enqueue_on_pq(sem, &wait); | 946 | ikglp_enqueue_on_pq(sem, &wait); |
@@ -942,13 +964,19 @@ int ikglp_lock(struct litmus_lock* l) | |||
942 | fq = ikglp_get_queue(sem, t); | 964 | fq = ikglp_get_queue(sem, t); |
943 | BUG_ON(!fq); | 965 | BUG_ON(!fq); |
944 | 966 | ||
945 | replica = ikglp_get_idx(sem, fq); | 967 | //replica = ikglp_get_idx(sem, fq); |
946 | } | 968 | } |
947 | 969 | ||
948 | TRACE_CUR("Acquired lock %d, queue %d\n", | 970 | TRACE_CUR("Acquired lock %d, queue %d\n", |
949 | l->ident, replica); | 971 | l->ident, replica); |
950 | 972 | ||
951 | return replica; | 973 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
974 | if(sem->aff_obs) { | ||
975 | return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq); | ||
976 | } | ||
977 | #endif | ||
978 | |||
979 | return ikglp_get_idx(sem, fq); | ||
952 | } | 980 | } |
953 | 981 | ||
954 | static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem, | 982 | static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem, |
@@ -1006,7 +1034,6 @@ static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal( | |||
1006 | 1034 | ||
1007 | for(i = 0; i < sem->nr_replicas; ++i) { | 1035 | for(i = 0; i < sem->nr_replicas; ++i) { |
1008 | if( (sem->fifo_queues[i].count > 1) && | 1036 | if( (sem->fifo_queues[i].count > 1) && |
1009 | //(!fq || edf_higher_prio(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) { | ||
1010 | (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) { | 1037 | (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) { |
1011 | 1038 | ||
1012 | TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n", | 1039 | TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n", |
@@ -1078,6 +1105,12 @@ static void ikglp_steal_to_fq(struct ikglp_semaphore *sem, | |||
1078 | __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node); | 1105 | __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node); |
1079 | --(fq_steal->count); | 1106 | --(fq_steal->count); |
1080 | 1107 | ||
1108 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1109 | if(sem->aff_obs) { | ||
1110 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t); | ||
1111 | } | ||
1112 | #endif | ||
1113 | |||
1081 | fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL); | 1114 | fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL); |
1082 | TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", | 1115 | TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", |
1083 | ikglp_get_idx(sem, fq_steal), | 1116 | ikglp_get_idx(sem, fq_steal), |
@@ -1152,14 +1185,6 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1152 | 1185 | ||
1153 | int err = 0; | 1186 | int err = 0; |
1154 | 1187 | ||
1155 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1156 | dgl_lock = litmus->get_dgl_spinlock(t); | ||
1157 | #endif | ||
1158 | |||
1159 | raw_spin_lock_irqsave(&sem->real_lock, real_flags); | ||
1160 | |||
1161 | lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper | ||
1162 | lock_fine_irqsave(&sem->lock, flags); | ||
1163 | 1188 | ||
1164 | fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner. | 1189 | fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner. |
1165 | 1190 | ||
@@ -1168,6 +1193,14 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1168 | goto out; | 1193 | goto out; |
1169 | } | 1194 | } |
1170 | 1195 | ||
1196 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1197 | dgl_lock = litmus->get_dgl_spinlock(t); | ||
1198 | #endif | ||
1199 | raw_spin_lock_irqsave(&sem->real_lock, real_flags); | ||
1200 | |||
1201 | lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper | ||
1202 | lock_fine_irqsave(&sem->lock, flags); | ||
1203 | |||
1171 | TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq)); | 1204 | TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq)); |
1172 | 1205 | ||
1173 | 1206 | ||
@@ -1175,6 +1208,19 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1175 | ikglp_del_global_list(sem, t, &fq->global_heap_node); | 1208 | ikglp_del_global_list(sem, t, &fq->global_heap_node); |
1176 | binheap_delete(&fq->donee_heap_node.node, &sem->donees); | 1209 | binheap_delete(&fq->donee_heap_node.node, &sem->donees); |
1177 | 1210 | ||
1211 | fq->owner = NULL; // no longer owned!! | ||
1212 | --(fq->count); | ||
1213 | if(fq->count < sem->shortest_fifo_queue->count) { | ||
1214 | sem->shortest_fifo_queue = fq; | ||
1215 | } | ||
1216 | |||
1217 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1218 | if(sem->aff_obs) { | ||
1219 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t); | ||
1220 | sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t); | ||
1221 | } | ||
1222 | #endif | ||
1223 | |||
1178 | // Move the next request into the FQ and update heaps as needed. | 1224 | // Move the next request into the FQ and update heaps as needed. |
1179 | // We defer re-evaluation of priorities to later in the function. | 1225 | // We defer re-evaluation of priorities to later in the function. |
1180 | if(fq->donee_heap_node.donor_info) { // move my doner to FQ | 1226 | if(fq->donee_heap_node.donor_info) { // move my doner to FQ |
@@ -1191,8 +1237,14 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1191 | } | 1237 | } |
1192 | else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ | 1238 | else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ |
1193 | // move other donor to FQ | 1239 | // move other donor to FQ |
1194 | other_donor_info = binheap_top_entry(&sem->donors, | 1240 | // Select a donor |
1195 | ikglp_wait_state_t, node); | 1241 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
1242 | other_donor_info = (sem->aff_obs) ? | ||
1243 | sem->aff_obs->ops->advise_doner_to_fq(sem->aff_obs, fq) : | ||
1244 | binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
1245 | #else | ||
1246 | other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
1247 | #endif | ||
1196 | 1248 | ||
1197 | new_on_fq = other_donor_info->task; | 1249 | new_on_fq = other_donor_info->task; |
1198 | donee = other_donor_info->donee_info->task; | 1250 | donee = other_donor_info->donee_info->task; |
@@ -1201,7 +1253,6 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1201 | other_donor_info->donee_info->donor_info = NULL; // clear the cross-link | 1253 | other_donor_info->donee_info->donor_info = NULL; // clear the cross-link |
1202 | binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); | 1254 | binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); |
1203 | 1255 | ||
1204 | |||
1205 | TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d.\n", | 1256 | TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d.\n", |
1206 | new_on_fq->comm, new_on_fq->pid, | 1257 | new_on_fq->comm, new_on_fq->pid, |
1207 | ikglp_get_idx(sem, fq)); | 1258 | ikglp_get_idx(sem, fq)); |
@@ -1222,14 +1273,20 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1222 | 1273 | ||
1223 | ikglp_move_pq_to_fq(sem, fq, pq_wait); | 1274 | ikglp_move_pq_to_fq(sem, fq, pq_wait); |
1224 | } | 1275 | } |
1225 | else if(fq->count == 1) { // No PQ and this queue is empty, so steal | 1276 | else if(fq->count == 0) { // No PQ and this queue is empty, so steal. |
1226 | // steal. | ||
1227 | ikglp_wait_state_t *fq_wait; | 1277 | ikglp_wait_state_t *fq_wait; |
1228 | 1278 | ||
1229 | TRACE_TASK(t, "Looking to steal a request for fq %d...\n", | 1279 | TRACE_TASK(t, "Looking to steal a request for fq %d...\n", |
1230 | ikglp_get_idx(sem, fq)); | 1280 | ikglp_get_idx(sem, fq)); |
1231 | 1281 | ||
1282 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1283 | fq_wait = (sem->aff_obs) ? | ||
1284 | sem->aff_obs->ops->advise_steal(sem->aff_obs) : | ||
1285 | ikglp_find_hp_waiter_to_steal(sem); | ||
1286 | #else | ||
1232 | fq_wait = ikglp_find_hp_waiter_to_steal(sem); | 1287 | fq_wait = ikglp_find_hp_waiter_to_steal(sem); |
1288 | #endif | ||
1289 | |||
1233 | if(fq_wait) { | 1290 | if(fq_wait) { |
1234 | to_steal = fq_wait->donee_heap_node.fq; | 1291 | to_steal = fq_wait->donee_heap_node.fq; |
1235 | 1292 | ||
@@ -1267,15 +1324,6 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1267 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); | 1324 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); |
1268 | 1325 | ||
1269 | 1326 | ||
1270 | // Updating the owner and updating sem->shortest_fifo_queue | ||
1271 | // could have been done sooner, but it is deffered, hoping | ||
1272 | // that it will reduce thrashing of sem->shortest_fifo_queue | ||
1273 | // assignment. | ||
1274 | fq->owner = NULL; // no longer owned!! | ||
1275 | --(fq->count); | ||
1276 | if(fq->count < sem->shortest_fifo_queue->count) { | ||
1277 | sem->shortest_fifo_queue = fq; | ||
1278 | } | ||
1279 | 1327 | ||
1280 | // Now patch up other priorities. | 1328 | // Now patch up other priorities. |
1281 | // | 1329 | // |
@@ -1344,7 +1392,6 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1344 | fq->hp_waiter->comm, fq->hp_waiter->pid); | 1392 | fq->hp_waiter->comm, fq->hp_waiter->pid); |
1345 | fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure... | 1393 | fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure... |
1346 | } | 1394 | } |
1347 | //else if(edf_higher_prio(new_on_fq, fq->hp_waiter)) { | ||
1348 | else if(litmus->compare(new_on_fq, fq->hp_waiter)) { | 1395 | else if(litmus->compare(new_on_fq, fq->hp_waiter)) { |
1349 | if(fq->hp_waiter) | 1396 | if(fq->hp_waiter) |
1350 | TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", | 1397 | TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", |
@@ -1382,6 +1429,11 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1382 | fq->owner = next; | 1429 | fq->owner = next; |
1383 | tsk_rt(next)->blocked_lock = NULL; | 1430 | tsk_rt(next)->blocked_lock = NULL; |
1384 | 1431 | ||
1432 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1433 | if(sem->aff_obs) { | ||
1434 | sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next); | ||
1435 | } | ||
1436 | #endif | ||
1385 | 1437 | ||
1386 | /* determine new hp_waiter if necessary */ | 1438 | /* determine new hp_waiter if necessary */ |
1387 | if (next == fq->hp_waiter) { | 1439 | if (next == fq->hp_waiter) { |
@@ -1461,12 +1513,12 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1461 | wake_up_process(next); | 1513 | wake_up_process(next); |
1462 | } | 1514 | } |
1463 | 1515 | ||
1464 | out: | ||
1465 | unlock_fine_irqrestore(&sem->lock, flags); | 1516 | unlock_fine_irqrestore(&sem->lock, flags); |
1466 | unlock_global_irqrestore(dgl_lock, flags); | 1517 | unlock_global_irqrestore(dgl_lock, flags); |
1467 | 1518 | ||
1468 | raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); | 1519 | raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); |
1469 | 1520 | ||
1521 | out: | ||
1470 | return err; | 1522 | return err; |
1471 | } | 1523 | } |
1472 | 1524 | ||
@@ -1597,5 +1649,583 @@ struct litmus_lock* ikglp_new(int m, | |||
1597 | INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order); | 1649 | INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order); |
1598 | INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order); | 1650 | INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order); |
1599 | 1651 | ||
1652 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1653 | sem->aff_obs = NULL; | ||
1654 | #endif | ||
1655 | |||
1600 | return &sem->litmus_lock; | 1656 | return &sem->litmus_lock; |
1601 | } | 1657 | } |
1658 | |||
1659 | |||
1660 | |||
1661 | |||
1662 | |||
1663 | |||
1664 | |||
1665 | |||
1666 | |||
1667 | |||
1668 | |||
1669 | |||
1670 | |||
1671 | |||
1672 | |||
1673 | |||
1674 | |||
1675 | |||
1676 | |||
1677 | |||
1678 | |||
1679 | |||
1680 | |||
1681 | |||
1682 | |||
1683 | |||
1684 | |||
1685 | |||
1686 | |||
1687 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
1688 | |||
1689 | static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) | ||
1690 | { | ||
1691 | int gpu = replica % aff->nr_rsrc; | ||
1692 | return gpu; | ||
1693 | } | ||
1694 | |||
1695 | static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica) | ||
1696 | { | ||
1697 | int gpu = __replica_to_gpu(aff, replica) + aff->offset; | ||
1698 | return gpu; | ||
1699 | } | ||
1700 | |||
1701 | static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu) | ||
1702 | { | ||
1703 | int replica = gpu - aff->offset; | ||
1704 | return replica; | ||
1705 | } | ||
1706 | |||
1707 | |||
1708 | int ikglp_aff_obs_close(struct affinity_observer* obs) | ||
1709 | { | ||
1710 | return 0; | ||
1711 | } | ||
1712 | |||
1713 | void ikglp_aff_obs_free(struct affinity_observer* obs) | ||
1714 | { | ||
1715 | struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs); | ||
1716 | kfree(ikglp_aff->nr_cur_users_on_rsrc); | ||
1717 | kfree(ikglp_aff->q_info); | ||
1718 | kfree(ikglp_aff); | ||
1719 | } | ||
1720 | |||
1721 | static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops, | ||
1722 | struct ikglp_affinity_ops* ikglp_ops, | ||
1723 | void* __user args) | ||
1724 | { | ||
1725 | struct ikglp_affinity* ikglp_aff; | ||
1726 | struct gpu_affinity_observer_args aff_args; | ||
1727 | struct ikglp_semaphore* sem; | ||
1728 | int i; | ||
1729 | unsigned long flags; | ||
1730 | |||
1731 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { | ||
1732 | return(NULL); | ||
1733 | } | ||
1734 | if(__copy_from_user(&aff_args, args, sizeof(aff_args))) { | ||
1735 | return(NULL); | ||
1736 | } | ||
1737 | |||
1738 | sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); | ||
1739 | |||
1740 | if(sem->litmus_lock.type != IKGLP_SEM) { | ||
1741 | TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); | ||
1742 | return(NULL); | ||
1743 | } | ||
1744 | |||
1745 | if((aff_args.nr_simult_users <= 0) || | ||
1746 | (sem->nr_replicas%aff_args.nr_simult_users != 0)) { | ||
1747 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " | ||
1748 | "(%d) per replica. #replicas should be evenly divisible " | ||
1749 | "by #simult_users.\n", | ||
1750 | sem->litmus_lock.ident, | ||
1751 | sem->nr_replicas, | ||
1752 | aff_args.nr_simult_users); | ||
1753 | return(NULL); | ||
1754 | } | ||
1755 | |||
1756 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | ||
1757 | TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", | ||
1758 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | ||
1759 | return(NULL); | ||
1760 | } | ||
1761 | |||
1762 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); | ||
1763 | if(!ikglp_aff) { | ||
1764 | return(NULL); | ||
1765 | } | ||
1766 | |||
1767 | ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL); | ||
1768 | if(!ikglp_aff->q_info) { | ||
1769 | kfree(ikglp_aff); | ||
1770 | return(NULL); | ||
1771 | } | ||
1772 | |||
1773 | ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); | ||
1774 | if(!ikglp_aff->nr_cur_users_on_rsrc) { | ||
1775 | kfree(ikglp_aff->q_info); | ||
1776 | kfree(ikglp_aff); | ||
1777 | return(NULL); | ||
1778 | } | ||
1779 | |||
1780 | affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs); | ||
1781 | |||
1782 | ikglp_aff->ops = ikglp_ops; | ||
1783 | ikglp_aff->offset = aff_args.replica_to_gpu_offset; | ||
1784 | ikglp_aff->nr_simult = aff_args.nr_simult_users; | ||
1785 | ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; | ||
1786 | |||
1787 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->nr_replicas / ikglp_aff->nr_rsrc)); | ||
1788 | |||
1789 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
1790 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; | ||
1791 | ikglp_aff->q_info[i].estimated_len = 0; | ||
1792 | |||
1793 | // multiple q_info's will point to the same resource (aka GPU) if | ||
1794 | // aff_args.nr_simult_users > 1 | ||
1795 | ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; | ||
1796 | } | ||
1797 | |||
1798 | // attach observer to the lock | ||
1799 | raw_spin_lock_irqsave(&sem->real_lock, flags); | ||
1800 | sem->aff_obs = ikglp_aff; | ||
1801 | raw_spin_unlock_irqrestore(&sem->real_lock, flags); | ||
1802 | |||
1803 | return &ikglp_aff->obs; | ||
1804 | } | ||
1805 | |||
1806 | |||
1807 | |||
1808 | |||
1809 | static int gpu_replica_to_resource(struct ikglp_affinity* aff, | ||
1810 | struct fifo_queue* fq) { | ||
1811 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
1812 | return(replica_to_gpu(aff, ikglp_get_idx(sem, fq))); | ||
1813 | } | ||
1814 | |||
1815 | |||
1816 | // Smart IKGLP Affinity | ||
1817 | |||
1818 | //static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff) | ||
1819 | //{ | ||
1820 | // struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
1821 | // struct ikglp_queue_info *shortest = &aff->q_info[0]; | ||
1822 | // int i; | ||
1823 | // | ||
1824 | // for(i = 1; i < sem->nr_replicas; ++i) { | ||
1825 | // if(aff->q_info[i].estimated_len < shortest->estimated_len) { | ||
1826 | // shortest = &aff->q_info[i]; | ||
1827 | // } | ||
1828 | // } | ||
1829 | // | ||
1830 | // return(shortest); | ||
1831 | //} | ||
1832 | |||
1833 | struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) | ||
1834 | { | ||
1835 | // advise_enqueue must be smart as not not break IKGLP rules: | ||
1836 | // * Total number of waiters cannot exceed ceil(m/k)*k. | ||
1837 | // * Cannot let a queue idle if there exist waiting PQ/donors | ||
1838 | // -- needed to guarantee parallel progress of waiters. | ||
1839 | // | ||
1840 | // Locking protocol is smart enough to noticed that a queue we return is | ||
1841 | // full and send new requests to Donors/PQ. | ||
1842 | // | ||
1843 | // We may be able to relax some of these constraints, but this will have to | ||
1844 | // be carefully evaluated. | ||
1845 | |||
1846 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
1847 | |||
1848 | /* | ||
1849 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
1850 | lt_t min_len; | ||
1851 | int min_nr_users; | ||
1852 | struct ikglp_queue_info *shortest; | ||
1853 | struct ikglp_queue *to_enqueue; | ||
1854 | int i; | ||
1855 | int affinity_gpu; | ||
1856 | |||
1857 | // simply pick the shortest queue if, we have no affinity, or we have | ||
1858 | // affinity with the shortest | ||
1859 | if(unlikely(tsk_rt(t)->last_gpu < 0)) { | ||
1860 | affinity_gpu = aff->offset; // first gpu | ||
1861 | TRACE_CUR("no affinity\n"); | ||
1862 | } | ||
1863 | else { | ||
1864 | affinity_gpu = tsk_rt(t)->last_gpu; | ||
1865 | } | ||
1866 | |||
1867 | // all things being equal, let's start with the queue with which we have | ||
1868 | // affinity. this helps us maintain affinity even when we don't have | ||
1869 | // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) | ||
1870 | shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; | ||
1871 | |||
1872 | // if(shortest == aff->shortest_queue) { | ||
1873 | // TRACE_CUR("special case: have affinity with shortest queue\n"); | ||
1874 | // goto out; | ||
1875 | // } | ||
1876 | |||
1877 | min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); | ||
1878 | min_nr_users = *(shortest->nr_cur_users); | ||
1879 | |||
1880 | TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", | ||
1881 | get_gpu_estimate(t, MIG_LOCAL), | ||
1882 | ikglp_get_idx(sem, shortest->q), | ||
1883 | min_len); | ||
1884 | |||
1885 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
1886 | if(&aff->q_info[i] != shortest) { | ||
1887 | |||
1888 | lt_t est_len = | ||
1889 | aff->q_info[i].estimated_len + | ||
1890 | get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))); | ||
1891 | |||
1892 | // queue is smaller, or they're equal and the other has a smaller number | ||
1893 | // of total users. | ||
1894 | // | ||
1895 | // tie-break on the shortest number of simult users. this only kicks in | ||
1896 | // when there are more than 1 empty queues. | ||
1897 | if((est_len < min_len) || | ||
1898 | ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | ||
1899 | shortest = &aff->q_info[i]; | ||
1900 | min_len = est_len; | ||
1901 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
1902 | } | ||
1903 | |||
1904 | TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", | ||
1905 | get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))), | ||
1906 | ikglp_get_idx(sem, aff->q_info[i].q), | ||
1907 | est_len); | ||
1908 | } | ||
1909 | } | ||
1910 | |||
1911 | to_enqueue = shortest->q; | ||
1912 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", | ||
1913 | ikglp_get_idx(sem, to_enqueue), | ||
1914 | ikglp_get_idx(sem, sem->shortest_queue)); | ||
1915 | |||
1916 | return to_enqueue; | ||
1917 | */ | ||
1918 | return(sem->shortest_fifo_queue); | ||
1919 | } | ||
1920 | |||
1921 | ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff) | ||
1922 | { | ||
1923 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
1924 | |||
1925 | // For now, just steal highest priority waiter | ||
1926 | // TODO: Implement affinity-aware stealing. | ||
1927 | |||
1928 | return ikglp_find_hp_waiter_to_steal(sem); | ||
1929 | } | ||
1930 | |||
1931 | ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff) | ||
1932 | { | ||
1933 | // TODO: MAKE THIS SMARTER | ||
1934 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
1935 | ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
1936 | return(donee); | ||
1937 | } | ||
1938 | |||
1939 | ikglp_wait_state_t* gpu_ikglp_advise_doner_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq) | ||
1940 | { | ||
1941 | // TODO: MAKE THIS SMARTER | ||
1942 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
1943 | ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
1944 | return(donor); | ||
1945 | } | ||
1946 | |||
1947 | |||
1948 | |||
1949 | void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
1950 | { | ||
1951 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
1952 | int replica = ikglp_get_idx(sem, fq); | ||
1953 | int gpu = replica_to_gpu(aff, replica); | ||
1954 | struct ikglp_queue_info *info = &aff->q_info[replica]; | ||
1955 | lt_t est_time; | ||
1956 | lt_t est_len_before; | ||
1957 | |||
1958 | if(current == t) { | ||
1959 | tsk_rt(t)->suspend_gpu_tracker_on_block = 1; | ||
1960 | } | ||
1961 | |||
1962 | est_len_before = info->estimated_len; | ||
1963 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
1964 | info->estimated_len += est_time; | ||
1965 | |||
1966 | TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n", | ||
1967 | ikglp_get_idx(sem, info->q), | ||
1968 | est_len_before, est_time, | ||
1969 | info->estimated_len); | ||
1970 | |||
1971 | // if(aff->shortest_queue == info) { | ||
1972 | // // we may no longer be the shortest | ||
1973 | // aff->shortest_queue = ikglp_aff_find_shortest(aff); | ||
1974 | // | ||
1975 | // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", | ||
1976 | // ikglp_get_idx(sem, aff->shortest_queue->q), | ||
1977 | // aff->shortest_queue->q->count, | ||
1978 | // aff->shortest_queue->estimated_len); | ||
1979 | // } | ||
1980 | } | ||
1981 | |||
1982 | void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
1983 | { | ||
1984 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
1985 | int replica = ikglp_get_idx(sem, fq); | ||
1986 | int gpu = replica_to_gpu(aff, replica); | ||
1987 | struct ikglp_queue_info *info = &aff->q_info[replica]; | ||
1988 | lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
1989 | |||
1990 | if(est_time > info->estimated_len) { | ||
1991 | WARN_ON(1); | ||
1992 | info->estimated_len = 0; | ||
1993 | } | ||
1994 | else { | ||
1995 | info->estimated_len -= est_time; | ||
1996 | } | ||
1997 | |||
1998 | TRACE_CUR("fq %d est len is now %llu\n", | ||
1999 | ikglp_get_idx(sem, info->q), | ||
2000 | info->estimated_len); | ||
2001 | |||
2002 | // check to see if we're the shortest queue now. | ||
2003 | // if((aff->shortest_queue != info) && | ||
2004 | // (aff->shortest_queue->estimated_len > info->estimated_len)) { | ||
2005 | // | ||
2006 | // aff->shortest_queue = info; | ||
2007 | // | ||
2008 | // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", | ||
2009 | // ikglp_get_idx(sem, info->q), | ||
2010 | // info->q->count, | ||
2011 | // info->estimated_len); | ||
2012 | // } | ||
2013 | } | ||
2014 | |||
2015 | void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2016 | { | ||
2017 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2018 | int replica = ikglp_get_idx(sem, fq); | ||
2019 | int gpu = replica_to_gpu(aff, replica); | ||
2020 | |||
2021 | tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration | ||
2022 | |||
2023 | TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", | ||
2024 | t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); | ||
2025 | |||
2026 | // count the number or resource holders | ||
2027 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
2028 | |||
2029 | reg_nv_device(gpu, 1, t); // register | ||
2030 | |||
2031 | tsk_rt(t)->suspend_gpu_tracker_on_block = 0; | ||
2032 | reset_gpu_tracker(t); | ||
2033 | start_gpu_tracker(t); | ||
2034 | } | ||
2035 | |||
2036 | void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2037 | { | ||
2038 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2039 | int replica = ikglp_get_idx(sem, fq); | ||
2040 | int gpu = replica_to_gpu(aff, replica); | ||
2041 | lt_t est_time; | ||
2042 | |||
2043 | stop_gpu_tracker(t); // stop the tracker before we do anything else. | ||
2044 | |||
2045 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
2046 | |||
2047 | tsk_rt(t)->last_gpu = gpu; | ||
2048 | |||
2049 | // count the number or resource holders | ||
2050 | --(*(aff->q_info[replica].nr_cur_users)); | ||
2051 | |||
2052 | reg_nv_device(gpu, 0, t); // unregister | ||
2053 | |||
2054 | // update estimates | ||
2055 | update_gpu_estimate(t, get_gpu_time(t)); | ||
2056 | |||
2057 | TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n", | ||
2058 | t->comm, t->pid, gpu, | ||
2059 | get_gpu_time(t), | ||
2060 | est_time, | ||
2061 | (long long)get_gpu_time(t) - (long long)est_time); | ||
2062 | } | ||
2063 | |||
2064 | struct ikglp_affinity_ops gpu_ikglp_affinity = | ||
2065 | { | ||
2066 | .advise_enqueue = gpu_ikglp_advise_enqueue, | ||
2067 | .advise_steal = gpu_ikglp_advise_steal, | ||
2068 | .advise_donee_selection = gpu_ikglp_advise_donee_selection, | ||
2069 | .advise_doner_to_fq = gpu_ikglp_advise_doner_to_fq, | ||
2070 | |||
2071 | .notify_enqueue = gpu_ikglp_notify_enqueue, | ||
2072 | .notify_dequeue = gpu_ikglp_notify_dequeue, | ||
2073 | .notify_acquired = gpu_ikglp_notify_acquired, | ||
2074 | .notify_freed = gpu_ikglp_notify_freed, | ||
2075 | |||
2076 | .replica_to_resource = gpu_replica_to_resource, | ||
2077 | }; | ||
2078 | |||
2079 | struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops, | ||
2080 | void* __user args) | ||
2081 | { | ||
2082 | return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args); | ||
2083 | } | ||
2084 | |||
2085 | |||
2086 | |||
2087 | |||
2088 | |||
2089 | |||
2090 | |||
2091 | |||
2092 | // Simple ikglp Affinity (standard ikglp with auto-gpu registration) | ||
2093 | |||
2094 | struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) | ||
2095 | { | ||
2096 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2097 | int min_count; | ||
2098 | int min_nr_users; | ||
2099 | struct ikglp_queue_info *shortest; | ||
2100 | struct fifo_queue *to_enqueue; | ||
2101 | int i; | ||
2102 | |||
2103 | // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n"); | ||
2104 | |||
2105 | shortest = &aff->q_info[0]; | ||
2106 | min_count = shortest->q->count; | ||
2107 | min_nr_users = *(shortest->nr_cur_users); | ||
2108 | |||
2109 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
2110 | ikglp_get_idx(sem, shortest->q), | ||
2111 | shortest->q->count, | ||
2112 | min_nr_users); | ||
2113 | |||
2114 | for(i = 1; i < sem->nr_replicas; ++i) { | ||
2115 | int len = aff->q_info[i].q->count; | ||
2116 | |||
2117 | // queue is smaller, or they're equal and the other has a smaller number | ||
2118 | // of total users. | ||
2119 | // | ||
2120 | // tie-break on the shortest number of simult users. this only kicks in | ||
2121 | // when there are more than 1 empty queues. | ||
2122 | if((len < min_count) || | ||
2123 | ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | ||
2124 | shortest = &aff->q_info[i]; | ||
2125 | min_count = shortest->q->count; | ||
2126 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
2127 | } | ||
2128 | |||
2129 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
2130 | ikglp_get_idx(sem, aff->q_info[i].q), | ||
2131 | aff->q_info[i].q->count, | ||
2132 | *(aff->q_info[i].nr_cur_users)); | ||
2133 | } | ||
2134 | |||
2135 | to_enqueue = shortest->q; | ||
2136 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", | ||
2137 | ikglp_get_idx(sem, to_enqueue), | ||
2138 | ikglp_get_idx(sem, sem->shortest_fifo_queue)); | ||
2139 | |||
2140 | return to_enqueue; | ||
2141 | } | ||
2142 | |||
2143 | ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff) | ||
2144 | { | ||
2145 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2146 | // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n"); | ||
2147 | return ikglp_find_hp_waiter_to_steal(sem); | ||
2148 | } | ||
2149 | |||
2150 | ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff) | ||
2151 | { | ||
2152 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2153 | ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
2154 | return(donee); | ||
2155 | } | ||
2156 | |||
2157 | ikglp_wait_state_t* simple_gpu_ikglp_advise_doner_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq) | ||
2158 | { | ||
2159 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2160 | ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
2161 | return(donor); | ||
2162 | } | ||
2163 | |||
2164 | void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2165 | { | ||
2166 | // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n"); | ||
2167 | } | ||
2168 | |||
2169 | void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2170 | { | ||
2171 | // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n"); | ||
2172 | } | ||
2173 | |||
2174 | void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2175 | { | ||
2176 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2177 | int replica = ikglp_get_idx(sem, fq); | ||
2178 | int gpu = replica_to_gpu(aff, replica); | ||
2179 | |||
2180 | // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n"); | ||
2181 | |||
2182 | // count the number or resource holders | ||
2183 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
2184 | |||
2185 | reg_nv_device(gpu, 1, t); // register | ||
2186 | } | ||
2187 | |||
2188 | void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2189 | { | ||
2190 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2191 | int replica = ikglp_get_idx(sem, fq); | ||
2192 | int gpu = replica_to_gpu(aff, replica); | ||
2193 | |||
2194 | // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n"); | ||
2195 | // count the number or resource holders | ||
2196 | --(*(aff->q_info[replica].nr_cur_users)); | ||
2197 | |||
2198 | reg_nv_device(gpu, 0, t); // unregister | ||
2199 | } | ||
2200 | |||
2201 | struct ikglp_affinity_ops simple_gpu_ikglp_affinity = | ||
2202 | { | ||
2203 | .advise_enqueue = simple_gpu_ikglp_advise_enqueue, | ||
2204 | .advise_steal = simple_gpu_ikglp_advise_steal, | ||
2205 | .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection, | ||
2206 | .advise_doner_to_fq = simple_gpu_ikglp_advise_doner_to_fq, | ||
2207 | |||
2208 | .notify_enqueue = simple_gpu_ikglp_notify_enqueue, | ||
2209 | .notify_dequeue = simple_gpu_ikglp_notify_dequeue, | ||
2210 | .notify_acquired = simple_gpu_ikglp_notify_acquired, | ||
2211 | .notify_freed = simple_gpu_ikglp_notify_freed, | ||
2212 | |||
2213 | .replica_to_resource = gpu_replica_to_resource, | ||
2214 | }; | ||
2215 | |||
2216 | struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, | ||
2217 | void* __user args) | ||
2218 | { | ||
2219 | return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args); | ||
2220 | } | ||
2221 | |||
2222 | #endif | ||
2223 | |||
2224 | |||
2225 | |||
2226 | |||
2227 | |||
2228 | |||
2229 | |||
2230 | |||
2231 | |||
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c index d0a6bd364c43..0b64977789a6 100644 --- a/litmus/kfmlp_lock.c +++ b/litmus/kfmlp_lock.c | |||
@@ -5,13 +5,13 @@ | |||
5 | #include <litmus/sched_plugin.h> | 5 | #include <litmus/sched_plugin.h> |
6 | #include <litmus/fdso.h> | 6 | #include <litmus/fdso.h> |
7 | 7 | ||
8 | #include <litmus/kfmlp_lock.h> | ||
9 | |||
10 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | 8 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) |
11 | #include <litmus/gpu_affinity.h> | 9 | #include <litmus/gpu_affinity.h> |
12 | #include <litmus/nvidia_info.h> | 10 | #include <litmus/nvidia_info.h> |
13 | #endif | 11 | #endif |
14 | 12 | ||
13 | #include <litmus/kfmlp_lock.h> | ||
14 | |||
15 | static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, | 15 | static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, |
16 | struct kfmlp_queue* queue) | 16 | struct kfmlp_queue* queue) |
17 | { | 17 | { |
@@ -508,6 +508,10 @@ struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) | |||
508 | 508 | ||
509 | sem->shortest_queue = &sem->queues[0]; | 509 | sem->shortest_queue = &sem->queues[0]; |
510 | 510 | ||
511 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
512 | sem->aff_obs = NULL; | ||
513 | #endif | ||
514 | |||
511 | return &sem->litmus_lock; | 515 | return &sem->litmus_lock; |
512 | } | 516 | } |
513 | 517 | ||
@@ -584,7 +588,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
584 | } | 588 | } |
585 | 589 | ||
586 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | 590 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { |
587 | TRACE_CUR("System does not support #simult_users >%d. %d requested.\n", | 591 | TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", |
588 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | 592 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); |
589 | return(NULL); | 593 | return(NULL); |
590 | } | 594 | } |
@@ -628,7 +632,6 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
628 | // attach observer to the lock | 632 | // attach observer to the lock |
629 | spin_lock_irqsave(&sem->lock, flags); | 633 | spin_lock_irqsave(&sem->lock, flags); |
630 | sem->aff_obs = kfmlp_aff; | 634 | sem->aff_obs = kfmlp_aff; |
631 | //kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)]; | ||
632 | spin_unlock_irqrestore(&sem->lock, flags); | 635 | spin_unlock_irqrestore(&sem->lock, flags); |
633 | 636 | ||
634 | return &kfmlp_aff->obs; | 637 | return &kfmlp_aff->obs; |
@@ -646,20 +649,20 @@ static int gpu_replica_to_resource(struct kfmlp_affinity* aff, | |||
646 | 649 | ||
647 | // Smart KFMLP Affinity | 650 | // Smart KFMLP Affinity |
648 | 651 | ||
649 | static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff) | 652 | //static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff) |
650 | { | 653 | //{ |
651 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | 654 | // struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); |
652 | struct kfmlp_queue_info *shortest = &aff->q_info[0]; | 655 | // struct kfmlp_queue_info *shortest = &aff->q_info[0]; |
653 | int i; | 656 | // int i; |
654 | 657 | // | |
655 | for(i = 1; i < sem->num_resources; ++i) { | 658 | // for(i = 1; i < sem->num_resources; ++i) { |
656 | if(aff->q_info[i].estimated_len < shortest->estimated_len) { | 659 | // if(aff->q_info[i].estimated_len < shortest->estimated_len) { |
657 | shortest = &aff->q_info[i]; | 660 | // shortest = &aff->q_info[i]; |
658 | } | 661 | // } |
659 | } | 662 | // } |
660 | 663 | // | |
661 | return(shortest); | 664 | // return(shortest); |
662 | } | 665 | //} |
663 | 666 | ||
664 | struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) | 667 | struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) |
665 | { | 668 | { |