diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-11-30 13:36:03 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-11-30 13:36:03 -0500 |
commit | 3ee5f13b8213270ba30e4b3625dff46b1cc8326f (patch) | |
tree | b349e8f80559fdd608d057781fd0f2dcc1e498fe | |
parent | 7ebec2a6abe03d5c42742a6dce74787880394897 (diff) |
More improvements on affinity heuristics
-rw-r--r-- | include/litmus/gpu_affinity.h | 16 | ||||
-rw-r--r-- | include/litmus/ikglp_lock.h | 2 | ||||
-rw-r--r-- | litmus/edf_common.c | 2 | ||||
-rw-r--r-- | litmus/ikglp_lock.c | 68 |
4 files changed, 69 insertions, 19 deletions
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h index d64a15cbf2a5..47da725717b0 100644 --- a/include/litmus/gpu_affinity.h +++ b/include/litmus/gpu_affinity.h | |||
@@ -31,6 +31,21 @@ static inline lt_t get_gpu_time(struct task_struct* t) | |||
31 | 31 | ||
32 | static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) | 32 | static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) |
33 | { | 33 | { |
34 | int i; | ||
35 | lt_t val; | ||
36 | |||
37 | if(dist == MIG_NONE) { | ||
38 | dist = MIG_LOCAL; | ||
39 | } | ||
40 | |||
41 | val = t->rt_param.gpu_migration_est[dist].avg; | ||
42 | for(i = dist-1; i >= 0; --i) { | ||
43 | if(t->rt_param.gpu_migration_est[i].avg > val) { | ||
44 | val = t->rt_param.gpu_migration_est[i].avg; | ||
45 | } | ||
46 | } | ||
47 | |||
48 | #if 0 | ||
34 | // int i; | 49 | // int i; |
35 | // fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); | 50 | // fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); |
36 | // lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... | 51 | // lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... |
@@ -43,6 +58,7 @@ static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t | |||
43 | // for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { | 58 | // for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { |
44 | // val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); | 59 | // val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); |
45 | // } | 60 | // } |
61 | #endif | ||
46 | 62 | ||
47 | return ((val > 0) ? val : dist+1); | 63 | return ((val > 0) ? val : dist+1); |
48 | } | 64 | } |
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h index 9d0cd3d1904e..89d9c37c7631 100644 --- a/include/litmus/ikglp_lock.h +++ b/include/litmus/ikglp_lock.h | |||
@@ -139,7 +139,7 @@ struct ikglp_affinity | |||
139 | struct ikglp_affinity_ops *ops; | 139 | struct ikglp_affinity_ops *ops; |
140 | struct ikglp_queue_info *q_info; | 140 | struct ikglp_queue_info *q_info; |
141 | int *nr_cur_users_on_rsrc; | 141 | int *nr_cur_users_on_rsrc; |
142 | int *nr_aff_on_rsrc; | 142 | int64_t *nr_aff_on_rsrc; |
143 | int offset; | 143 | int offset; |
144 | int nr_simult; | 144 | int nr_simult; |
145 | int nr_rsrc; | 145 | int nr_rsrc; |
diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 916b1b4309b7..a9bf0c08e125 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c | |||
@@ -272,7 +272,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second) | |||
272 | return 1; | 272 | return 1; |
273 | } | 273 | } |
274 | else if (first->pid == second->pid) { | 274 | else if (first->pid == second->pid) { |
275 | WARN_ON(1); | 275 | //WARN_ON(1); |
276 | } | 276 | } |
277 | } | 277 | } |
278 | else { | 278 | else { |
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index 9c57bc24e8bd..16ae621bbf75 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c | |||
@@ -1887,6 +1887,19 @@ static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu) | |||
1887 | return replica; | 1887 | return replica; |
1888 | } | 1888 | } |
1889 | 1889 | ||
1890 | static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b) | ||
1891 | { | ||
1892 | return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b)); | ||
1893 | } | ||
1894 | |||
1895 | static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica) | ||
1896 | { | ||
1897 | if(tsk_rt(t)->last_gpu >= 0) | ||
1898 | { | ||
1899 | return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica)); | ||
1900 | } | ||
1901 | return 0; | ||
1902 | } | ||
1890 | 1903 | ||
1891 | int ikglp_aff_obs_close(struct affinity_observer* obs) | 1904 | int ikglp_aff_obs_close(struct affinity_observer* obs) |
1892 | { | 1905 | { |
@@ -1971,7 +1984,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1971 | return(NULL); | 1984 | return(NULL); |
1972 | } | 1985 | } |
1973 | 1986 | ||
1974 | ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); | 1987 | ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); |
1975 | if(!ikglp_aff->nr_aff_on_rsrc) { | 1988 | if(!ikglp_aff->nr_aff_on_rsrc) { |
1976 | kfree(ikglp_aff->nr_cur_users_on_rsrc); | 1989 | kfree(ikglp_aff->nr_cur_users_on_rsrc); |
1977 | kfree(ikglp_aff->q_info); | 1990 | kfree(ikglp_aff->q_info); |
@@ -1993,7 +2006,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1993 | ikglp_aff->relax_max_fifo_len); | 2006 | ikglp_aff->relax_max_fifo_len); |
1994 | 2007 | ||
1995 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); | 2008 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); |
1996 | memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); | 2009 | memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc)); |
1997 | 2010 | ||
1998 | for(i = 0; i < sem->nr_replicas; ++i) { | 2011 | for(i = 0; i < sem->nr_replicas; ++i) { |
1999 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; | 2012 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; |
@@ -2057,7 +2070,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2057 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 2070 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); |
2058 | lt_t min_len; | 2071 | lt_t min_len; |
2059 | int min_nr_users, min_nr_aff_users; | 2072 | int min_nr_users, min_nr_aff_users; |
2060 | struct ikglp_queue_info *shortest; | 2073 | struct ikglp_queue_info *shortest, *aff_queue; |
2061 | struct fifo_queue *to_enqueue; | 2074 | struct fifo_queue *to_enqueue; |
2062 | int i; | 2075 | int i; |
2063 | int affinity_gpu; | 2076 | int affinity_gpu; |
@@ -2087,7 +2100,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2087 | // all things being equal, let's start with the queue with which we have | 2100 | // all things being equal, let's start with the queue with which we have |
2088 | // affinity. this helps us maintain affinity even when we don't have | 2101 | // affinity. this helps us maintain affinity even when we don't have |
2089 | // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) | 2102 | // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) |
2090 | shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; | 2103 | aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; |
2104 | shortest = aff_queue; | ||
2091 | 2105 | ||
2092 | // if(shortest == aff->shortest_queue) { | 2106 | // if(shortest == aff->shortest_queue) { |
2093 | // TRACE_CUR("special case: have affinity with shortest queue\n"); | 2107 | // TRACE_CUR("special case: have affinity with shortest queue\n"); |
@@ -2108,29 +2122,46 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2108 | for(i = 0; i < sem->nr_replicas; ++i) { | 2122 | for(i = 0; i < sem->nr_replicas; ++i) { |
2109 | if(&aff->q_info[i] != shortest) { | 2123 | if(&aff->q_info[i] != shortest) { |
2110 | if(aff->q_info[i].q->count < max_fifo_len) { | 2124 | if(aff->q_info[i].q->count < max_fifo_len) { |
2125 | int want = 0; | ||
2111 | 2126 | ||
2112 | lt_t est_len = | 2127 | lt_t migration = |
2113 | aff->q_info[i].estimated_len + | ||
2114 | get_gpu_estimate(t, | 2128 | get_gpu_estimate(t, |
2115 | gpu_migration_distance(tsk_rt(t)->last_gpu, | 2129 | gpu_migration_distance(tsk_rt(t)->last_gpu, |
2116 | replica_to_gpu(aff, i))); | 2130 | replica_to_gpu(aff, i))); |
2131 | lt_t est_len = aff->q_info[i].estimated_len + migration; | ||
2117 | 2132 | ||
2118 | // queue is smaller, or they're equal and the other has a smaller number | 2133 | // queue is smaller, or they're equal and the other has a smaller number |
2119 | // of total users. | 2134 | // of total users. |
2120 | // | 2135 | // |
2121 | // tie-break on the shortest number of simult users. this only kicks in | 2136 | // tie-break on the shortest number of simult users. this only kicks in |
2122 | // when there are more than 1 empty queues. | 2137 | // when there are more than 1 empty queues. |
2123 | 2138 | ||
2124 | // TODO: Make "est_len < min_len" a fuzzy function that allows | 2139 | // TODO: Make "est_len < min_len" a fuzzy function that allows |
2125 | // queues "close enough" in length to be considered equal. | 2140 | // queues "close enough" in length to be considered equal. |
2126 | 2141 | ||
2127 | if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */ | 2142 | /* NOTE: 'shortest' starts out with affinity GPU */ |
2128 | (est_len < min_len) || /* i-th queue has shortest length */ | 2143 | if(unlikely(shortest->q->count >= max_fifo_len)) { /* 'shortest' is full and i-th queue is not */ |
2129 | ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */ | 2144 | want = 1; |
2130 | ((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) || | 2145 | } |
2131 | ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && | 2146 | else if(est_len < min_len) { |
2132 | (*(aff->q_info[i].nr_cur_users) < min_nr_users))))) { | 2147 | want = 1; /* i-th queue has shortest length */ |
2148 | } | ||
2149 | else if(unlikely(est_len == min_len)) { /* equal lengths */ | ||
2150 | if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) { /* don't sacrifice affinity on tie */ | ||
2151 | if(has_affinity(aff, t, i)) { | ||
2152 | want = 1; /* switch to maintain affinity */ | ||
2153 | } | ||
2154 | else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) { /* favor one with less affinity load */ | ||
2155 | want = 1; | ||
2156 | } | ||
2157 | else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */ | ||
2158 | (*(aff->q_info[i].nr_cur_users) < min_nr_users)) { /* favor one with current fewer users */ | ||
2159 | want = 1; | ||
2160 | } | ||
2161 | } | ||
2162 | } | ||
2133 | 2163 | ||
2164 | if(want) { | ||
2134 | shortest = &aff->q_info[i]; | 2165 | shortest = &aff->q_info[i]; |
2135 | min_len = est_len; | 2166 | min_len = est_len; |
2136 | min_nr_users = *(aff->q_info[i].nr_cur_users); | 2167 | min_nr_users = *(aff->q_info[i].nr_cur_users); |
@@ -2672,6 +2703,7 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t) | |||
2672 | // decrement affinity count on old GPU | 2703 | // decrement affinity count on old GPU |
2673 | aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; | 2704 | aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; |
2674 | --(aff->nr_aff_on_rsrc[aff_rsrc]); | 2705 | --(aff->nr_aff_on_rsrc[aff_rsrc]); |
2706 | // aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t); | ||
2675 | 2707 | ||
2676 | if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { | 2708 | if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { |
2677 | WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); | 2709 | WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); |
@@ -2717,10 +2749,12 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, | |||
2717 | if(last_gpu >= 0) { | 2749 | if(last_gpu >= 0) { |
2718 | int old_rsrc = last_gpu - aff->offset; | 2750 | int old_rsrc = last_gpu - aff->offset; |
2719 | --(aff->nr_aff_on_rsrc[old_rsrc]); | 2751 | --(aff->nr_aff_on_rsrc[old_rsrc]); |
2752 | // aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t)); | ||
2720 | } | 2753 | } |
2721 | 2754 | ||
2722 | // increment affinity count on new GPU | 2755 | // increment affinity count on new GPU |
2723 | ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); | 2756 | ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); |
2757 | // aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t)); | ||
2724 | tsk_rt(t)->rsrc_exit_cb_args = aff; | 2758 | tsk_rt(t)->rsrc_exit_cb_args = aff; |
2725 | tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; | 2759 | tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; |
2726 | } | 2760 | } |