aboutsummaryrefslogtreecommitdiffstats
path: root/litmus/ikglp_lock.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-11-30 13:36:03 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-11-30 13:36:03 -0500
commit3ee5f13b8213270ba30e4b3625dff46b1cc8326f (patch)
treeb349e8f80559fdd608d057781fd0f2dcc1e498fe /litmus/ikglp_lock.c
parent7ebec2a6abe03d5c42742a6dce74787880394897 (diff)
More improvements on affinity heuristics
Diffstat (limited to 'litmus/ikglp_lock.c')
-rw-r--r--litmus/ikglp_lock.c68
1 files changed, 51 insertions, 17 deletions
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 9c57bc24e8bd..16ae621bbf75 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1887,6 +1887,19 @@ static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
1887 return replica; 1887 return replica;
1888} 1888}
1889 1889
1890static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b)
1891{
1892 return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b));
1893}
1894
1895static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica)
1896{
1897 if(tsk_rt(t)->last_gpu >= 0)
1898 {
1899 return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica));
1900 }
1901 return 0;
1902}
1890 1903
1891int ikglp_aff_obs_close(struct affinity_observer* obs) 1904int ikglp_aff_obs_close(struct affinity_observer* obs)
1892{ 1905{
@@ -1971,7 +1984,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1971 return(NULL); 1984 return(NULL);
1972 } 1985 }
1973 1986
1974 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); 1987 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1975 if(!ikglp_aff->nr_aff_on_rsrc) { 1988 if(!ikglp_aff->nr_aff_on_rsrc) {
1976 kfree(ikglp_aff->nr_cur_users_on_rsrc); 1989 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1977 kfree(ikglp_aff->q_info); 1990 kfree(ikglp_aff->q_info);
@@ -1993,7 +2006,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1993 ikglp_aff->relax_max_fifo_len); 2006 ikglp_aff->relax_max_fifo_len);
1994 2007
1995 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); 2008 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
1996 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); 2009 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc));
1997 2010
1998 for(i = 0; i < sem->nr_replicas; ++i) { 2011 for(i = 0; i < sem->nr_replicas; ++i) {
1999 ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; 2012 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -2057,7 +2070,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2057 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 2070 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2058 lt_t min_len; 2071 lt_t min_len;
2059 int min_nr_users, min_nr_aff_users; 2072 int min_nr_users, min_nr_aff_users;
2060 struct ikglp_queue_info *shortest; 2073 struct ikglp_queue_info *shortest, *aff_queue;
2061 struct fifo_queue *to_enqueue; 2074 struct fifo_queue *to_enqueue;
2062 int i; 2075 int i;
2063 int affinity_gpu; 2076 int affinity_gpu;
@@ -2087,7 +2100,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2087 // all things being equal, let's start with the queue with which we have 2100 // all things being equal, let's start with the queue with which we have
2088 // affinity. this helps us maintain affinity even when we don't have 2101 // affinity. this helps us maintain affinity even when we don't have
2089 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) 2102 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
2090 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; 2103 aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
2104 shortest = aff_queue;
2091 2105
2092 // if(shortest == aff->shortest_queue) { 2106 // if(shortest == aff->shortest_queue) {
2093 // TRACE_CUR("special case: have affinity with shortest queue\n"); 2107 // TRACE_CUR("special case: have affinity with shortest queue\n");
@@ -2108,29 +2122,46 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2108 for(i = 0; i < sem->nr_replicas; ++i) { 2122 for(i = 0; i < sem->nr_replicas; ++i) {
2109 if(&aff->q_info[i] != shortest) { 2123 if(&aff->q_info[i] != shortest) {
2110 if(aff->q_info[i].q->count < max_fifo_len) { 2124 if(aff->q_info[i].q->count < max_fifo_len) {
2125 int want = 0;
2111 2126
2112 lt_t est_len = 2127 lt_t migration =
2113 aff->q_info[i].estimated_len +
2114 get_gpu_estimate(t, 2128 get_gpu_estimate(t,
2115 gpu_migration_distance(tsk_rt(t)->last_gpu, 2129 gpu_migration_distance(tsk_rt(t)->last_gpu,
2116 replica_to_gpu(aff, i))); 2130 replica_to_gpu(aff, i)));
2131 lt_t est_len = aff->q_info[i].estimated_len + migration;
2117 2132
2118 // queue is smaller, or they're equal and the other has a smaller number 2133 // queue is smaller, or they're equal and the other has a smaller number
2119 // of total users. 2134 // of total users.
2120 // 2135 //
2121 // tie-break on the shortest number of simult users. this only kicks in 2136 // tie-break on the shortest number of simult users. this only kicks in
2122 // when there are more than 1 empty queues. 2137 // when there are more than 1 empty queues.
2123 2138
2124 // TODO: Make "est_len < min_len" a fuzzy function that allows 2139 // TODO: Make "est_len < min_len" a fuzzy function that allows
2125 // queues "close enough" in length to be considered equal. 2140 // queues "close enough" in length to be considered equal.
2126 2141
2127 if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */ 2142 /* NOTE: 'shortest' starts out with affinity GPU */
2128 (est_len < min_len) || /* i-th queue has shortest length */ 2143 if(unlikely(shortest->q->count >= max_fifo_len)) { /* 'shortest' is full and i-th queue is not */
2129 ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */ 2144 want = 1;
2130 ((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) || 2145 }
2131 ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && 2146 else if(est_len < min_len) {
2132 (*(aff->q_info[i].nr_cur_users) < min_nr_users))))) { 2147 want = 1; /* i-th queue has shortest length */
2148 }
2149 else if(unlikely(est_len == min_len)) { /* equal lengths */
2150 if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) { /* don't sacrifice affinity on tie */
2151 if(has_affinity(aff, t, i)) {
2152 want = 1; /* switch to maintain affinity */
2153 }
2154 else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) { /* favor one with less affinity load */
2155 want = 1;
2156 }
2157 else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */
2158 (*(aff->q_info[i].nr_cur_users) < min_nr_users)) { /* favor one with current fewer users */
2159 want = 1;
2160 }
2161 }
2162 }
2133 2163
2164 if(want) {
2134 shortest = &aff->q_info[i]; 2165 shortest = &aff->q_info[i];
2135 min_len = est_len; 2166 min_len = est_len;
2136 min_nr_users = *(aff->q_info[i].nr_cur_users); 2167 min_nr_users = *(aff->q_info[i].nr_cur_users);
@@ -2672,6 +2703,7 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
2672 // decrement affinity count on old GPU 2703 // decrement affinity count on old GPU
2673 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; 2704 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
2674 --(aff->nr_aff_on_rsrc[aff_rsrc]); 2705 --(aff->nr_aff_on_rsrc[aff_rsrc]);
2706// aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
2675 2707
2676 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { 2708 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
2677 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); 2709 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
@@ -2717,10 +2749,12 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2717 if(last_gpu >= 0) { 2749 if(last_gpu >= 0) {
2718 int old_rsrc = last_gpu - aff->offset; 2750 int old_rsrc = last_gpu - aff->offset;
2719 --(aff->nr_aff_on_rsrc[old_rsrc]); 2751 --(aff->nr_aff_on_rsrc[old_rsrc]);
2752// aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
2720 } 2753 }
2721 2754
2722 // increment affinity count on new GPU 2755 // increment affinity count on new GPU
2723 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); 2756 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
2757// aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
2724 tsk_rt(t)->rsrc_exit_cb_args = aff; 2758 tsk_rt(t)->rsrc_exit_cb_args = aff;
2725 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; 2759 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
2726 } 2760 }