aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-11-30 13:36:03 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-11-30 13:36:03 -0500
commit3ee5f13b8213270ba30e4b3625dff46b1cc8326f (patch)
treeb349e8f80559fdd608d057781fd0f2dcc1e498fe
parent7ebec2a6abe03d5c42742a6dce74787880394897 (diff)
More improvements on affinity heuristics
-rw-r--r--include/litmus/gpu_affinity.h16
-rw-r--r--include/litmus/ikglp_lock.h2
-rw-r--r--litmus/edf_common.c2
-rw-r--r--litmus/ikglp_lock.c68
4 files changed, 69 insertions, 19 deletions
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
index d64a15cbf2a5..47da725717b0 100644
--- a/include/litmus/gpu_affinity.h
+++ b/include/litmus/gpu_affinity.h
@@ -31,6 +31,21 @@ static inline lt_t get_gpu_time(struct task_struct* t)
31 31
32static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) 32static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
33{ 33{
34 int i;
35 lt_t val;
36
37 if(dist == MIG_NONE) {
38 dist = MIG_LOCAL;
39 }
40
41 val = t->rt_param.gpu_migration_est[dist].avg;
42 for(i = dist-1; i >= 0; --i) {
43 if(t->rt_param.gpu_migration_est[i].avg > val) {
44 val = t->rt_param.gpu_migration_est[i].avg;
45 }
46 }
47
48#if 0
34// int i; 49// int i;
35// fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); 50// fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
36// lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... 51// lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates...
@@ -43,6 +58,7 @@ static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t
43// for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { 58// for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
44// val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); 59// val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
45// } 60// }
61#endif
46 62
47 return ((val > 0) ? val : dist+1); 63 return ((val > 0) ? val : dist+1);
48} 64}
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index 9d0cd3d1904e..89d9c37c7631 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -139,7 +139,7 @@ struct ikglp_affinity
139 struct ikglp_affinity_ops *ops; 139 struct ikglp_affinity_ops *ops;
140 struct ikglp_queue_info *q_info; 140 struct ikglp_queue_info *q_info;
141 int *nr_cur_users_on_rsrc; 141 int *nr_cur_users_on_rsrc;
142 int *nr_aff_on_rsrc; 142 int64_t *nr_aff_on_rsrc;
143 int offset; 143 int offset;
144 int nr_simult; 144 int nr_simult;
145 int nr_rsrc; 145 int nr_rsrc;
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 916b1b4309b7..a9bf0c08e125 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -272,7 +272,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
272 return 1; 272 return 1;
273 } 273 }
274 else if (first->pid == second->pid) { 274 else if (first->pid == second->pid) {
275 WARN_ON(1); 275 //WARN_ON(1);
276 } 276 }
277 } 277 }
278 else { 278 else {
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 9c57bc24e8bd..16ae621bbf75 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1887,6 +1887,19 @@ static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
1887 return replica; 1887 return replica;
1888} 1888}
1889 1889
1890static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b)
1891{
1892 return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b));
1893}
1894
1895static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica)
1896{
1897 if(tsk_rt(t)->last_gpu >= 0)
1898 {
1899 return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica));
1900 }
1901 return 0;
1902}
1890 1903
1891int ikglp_aff_obs_close(struct affinity_observer* obs) 1904int ikglp_aff_obs_close(struct affinity_observer* obs)
1892{ 1905{
@@ -1971,7 +1984,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1971 return(NULL); 1984 return(NULL);
1972 } 1985 }
1973 1986
1974 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); 1987 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1975 if(!ikglp_aff->nr_aff_on_rsrc) { 1988 if(!ikglp_aff->nr_aff_on_rsrc) {
1976 kfree(ikglp_aff->nr_cur_users_on_rsrc); 1989 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1977 kfree(ikglp_aff->q_info); 1990 kfree(ikglp_aff->q_info);
@@ -1993,7 +2006,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1993 ikglp_aff->relax_max_fifo_len); 2006 ikglp_aff->relax_max_fifo_len);
1994 2007
1995 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); 2008 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
1996 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); 2009 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc));
1997 2010
1998 for(i = 0; i < sem->nr_replicas; ++i) { 2011 for(i = 0; i < sem->nr_replicas; ++i) {
1999 ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; 2012 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -2057,7 +2070,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2057 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 2070 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2058 lt_t min_len; 2071 lt_t min_len;
2059 int min_nr_users, min_nr_aff_users; 2072 int min_nr_users, min_nr_aff_users;
2060 struct ikglp_queue_info *shortest; 2073 struct ikglp_queue_info *shortest, *aff_queue;
2061 struct fifo_queue *to_enqueue; 2074 struct fifo_queue *to_enqueue;
2062 int i; 2075 int i;
2063 int affinity_gpu; 2076 int affinity_gpu;
@@ -2087,7 +2100,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2087 // all things being equal, let's start with the queue with which we have 2100 // all things being equal, let's start with the queue with which we have
2088 // affinity. this helps us maintain affinity even when we don't have 2101 // affinity. this helps us maintain affinity even when we don't have
2089 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) 2102 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
2090 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; 2103 aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
2104 shortest = aff_queue;
2091 2105
2092 // if(shortest == aff->shortest_queue) { 2106 // if(shortest == aff->shortest_queue) {
2093 // TRACE_CUR("special case: have affinity with shortest queue\n"); 2107 // TRACE_CUR("special case: have affinity with shortest queue\n");
@@ -2108,29 +2122,46 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2108 for(i = 0; i < sem->nr_replicas; ++i) { 2122 for(i = 0; i < sem->nr_replicas; ++i) {
2109 if(&aff->q_info[i] != shortest) { 2123 if(&aff->q_info[i] != shortest) {
2110 if(aff->q_info[i].q->count < max_fifo_len) { 2124 if(aff->q_info[i].q->count < max_fifo_len) {
2125 int want = 0;
2111 2126
2112 lt_t est_len = 2127 lt_t migration =
2113 aff->q_info[i].estimated_len +
2114 get_gpu_estimate(t, 2128 get_gpu_estimate(t,
2115 gpu_migration_distance(tsk_rt(t)->last_gpu, 2129 gpu_migration_distance(tsk_rt(t)->last_gpu,
2116 replica_to_gpu(aff, i))); 2130 replica_to_gpu(aff, i)));
2131 lt_t est_len = aff->q_info[i].estimated_len + migration;
2117 2132
2118 // queue is smaller, or they're equal and the other has a smaller number 2133 // queue is smaller, or they're equal and the other has a smaller number
2119 // of total users. 2134 // of total users.
2120 // 2135 //
2121 // tie-break on the shortest number of simult users. this only kicks in 2136 // tie-break on the shortest number of simult users. this only kicks in
2122 // when there are more than 1 empty queues. 2137 // when there are more than 1 empty queues.
2123 2138
2124 // TODO: Make "est_len < min_len" a fuzzy function that allows 2139 // TODO: Make "est_len < min_len" a fuzzy function that allows
2125 // queues "close enough" in length to be considered equal. 2140 // queues "close enough" in length to be considered equal.
2126 2141
2127 if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */ 2142 /* NOTE: 'shortest' starts out with affinity GPU */
2128 (est_len < min_len) || /* i-th queue has shortest length */ 2143 if(unlikely(shortest->q->count >= max_fifo_len)) { /* 'shortest' is full and i-th queue is not */
2129 ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */ 2144 want = 1;
2130 ((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) || 2145 }
2131 ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && 2146 else if(est_len < min_len) {
2132 (*(aff->q_info[i].nr_cur_users) < min_nr_users))))) { 2147 want = 1; /* i-th queue has shortest length */
2148 }
2149 else if(unlikely(est_len == min_len)) { /* equal lengths */
2150 if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) { /* don't sacrifice affinity on tie */
2151 if(has_affinity(aff, t, i)) {
2152 want = 1; /* switch to maintain affinity */
2153 }
2154 else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) { /* favor one with less affinity load */
2155 want = 1;
2156 }
2157 else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */
2158 (*(aff->q_info[i].nr_cur_users) < min_nr_users)) { /* favor one with current fewer users */
2159 want = 1;
2160 }
2161 }
2162 }
2133 2163
2164 if(want) {
2134 shortest = &aff->q_info[i]; 2165 shortest = &aff->q_info[i];
2135 min_len = est_len; 2166 min_len = est_len;
2136 min_nr_users = *(aff->q_info[i].nr_cur_users); 2167 min_nr_users = *(aff->q_info[i].nr_cur_users);
@@ -2672,6 +2703,7 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
2672 // decrement affinity count on old GPU 2703 // decrement affinity count on old GPU
2673 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; 2704 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
2674 --(aff->nr_aff_on_rsrc[aff_rsrc]); 2705 --(aff->nr_aff_on_rsrc[aff_rsrc]);
2706// aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
2675 2707
2676 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { 2708 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
2677 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); 2709 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
@@ -2717,10 +2749,12 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2717 if(last_gpu >= 0) { 2749 if(last_gpu >= 0) {
2718 int old_rsrc = last_gpu - aff->offset; 2750 int old_rsrc = last_gpu - aff->offset;
2719 --(aff->nr_aff_on_rsrc[old_rsrc]); 2751 --(aff->nr_aff_on_rsrc[old_rsrc]);
2752// aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
2720 } 2753 }
2721 2754
2722 // increment affinity count on new GPU 2755 // increment affinity count on new GPU
2723 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); 2756 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
2757// aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
2724 tsk_rt(t)->rsrc_exit_cb_args = aff; 2758 tsk_rt(t)->rsrc_exit_cb_args = aff;
2725 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; 2759 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
2726 } 2760 }