More improvements on affinity heuristics

author: Glenn Elliott <gelliott@cs.unc.edu> 2012-11-30 13:36:03 -0500
committer: Glenn Elliott <gelliott@cs.unc.edu> 2012-11-30 13:36:03 -0500
commit: 3ee5f13b8213270ba30e4b3625dff46b1cc8326f (patch)
tree: b349e8f80559fdd608d057781fd0f2dcc1e498fe /litmus/ikglp_lock.c
parent: 7ebec2a6abe03d5c42742a6dce74787880394897 (diff)
1 files changed, 51 insertions, 17 deletions
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 9c57bc24e8bd..16ae621bbf75 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1887,6 +1887,19 @@ static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
        return replica;
 }
+static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b)
+{
+        return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b));
+}
+static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica)
+{
+        if(tsk_rt(t)->last_gpu >= 0)
+        {
+                return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica));
+        }
+        return 0;
+}
 int ikglp_aff_obs_close(struct affinity_observer* obs)
 {
@@ -1971,7 +1984,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
                return(NULL);
        }
-        ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
+        ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
        if(!ikglp_aff->nr_aff_on_rsrc) {
                kfree(ikglp_aff->nr_cur_users_on_rsrc);
                kfree(ikglp_aff->q_info);
@@ -1993,7 +2006,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
                          ikglp_aff->relax_max_fifo_len);
        memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
-        memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
+        memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc));
        for(i = 0; i < sem->nr_replicas; ++i) {
                ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -2057,7 +2070,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
        lt_t min_len;
        int min_nr_users, min_nr_aff_users;
-        struct ikglp_queue_info *shortest;
+        struct ikglp_queue_info *shortest, *aff_queue;
        struct fifo_queue *to_enqueue;
        int i;
        int affinity_gpu;
@@ -2087,7 +2100,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
        // all things being equal, let's start with the queue with which we have
        // affinity.  this helps us maintain affinity even when we don't have
        // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
-        shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
+        aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
+        shortest = aff_queue;
        //      if(shortest == aff->shortest_queue) {
        //              TRACE_CUR("special case: have affinity with shortest queue\n");
@@ -2108,29 +2122,46 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
        for(i = 0; i < sem->nr_replicas; ++i) {
                if(&aff->q_info[i] != shortest) {
                        if(aff->q_info[i].q->count < max_fifo_len) {
+                                int want = 0;
-                                lt_t est_len =
+                                lt_t migration = 
-                                        aff->q_info[i].estimated_len +
                                        get_gpu_estimate(t,
                                                                gpu_migration_distance(tsk_rt(t)->last_gpu,
                                                                                                        replica_to_gpu(aff, i)));
+                                lt_t est_len = aff->q_info[i].estimated_len + migration;
-                // queue is smaller, or they're equal and the other has a smaller number
+                                // queue is smaller, or they're equal and the other has a smaller number
-                // of total users.
+                                // of total users.
-                //
+                                //
-                // tie-break on the shortest number of simult users.  this only kicks in
+                                // tie-break on the shortest number of simult users.  this only kicks in
-                // when there are more than 1 empty queues.
+                                // when there are more than 1 empty queues.
                                // TODO: Make "est_len < min_len" a fuzzy function that allows
                                // queues "close enough" in length to be considered equal.
-                                if((shortest->q->count >= max_fifo_len) ||              /* 'shortest' is full and i-th queue is not */
+                                /* NOTE: 'shortest' starts out with affinity GPU */
-                                   (est_len < min_len) ||                                               /* i-th queue has shortest length */
+                                if(unlikely(shortest->q->count >= max_fifo_len)) {                                              /* 'shortest' is full and i-th queue is not */
-                                   ((est_len == min_len) &&                                             /* equal lengths, but one has fewer over-all users */
+                                        want = 1;
-                                        ((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) ||
+                                }
-                                         ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) &&
+                                else if(est_len < min_len) {
-                                                (*(aff->q_info[i].nr_cur_users) < min_nr_users))))) {
+                                        want = 1;                                                                                                                       /* i-th queue has shortest length */
+                                }
+                                else if(unlikely(est_len == min_len)) {                                                                 /* equal lengths */
+                                        if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) {            /* don't sacrifice affinity on tie */
+                                                if(has_affinity(aff, t, i)) {
+                                                        want = 1;                                                                                                       /* switch to maintain affinity */
+                                                }
+                                                else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) {    /* favor one with less affinity load */
+                                                        want = 1;
+                                                }
+                                                else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */
+                                                                (*(aff->q_info[i].nr_cur_users) < min_nr_users)) {              /* favor one with current fewer users */
+                                                        want = 1;
+                                                }
+                                        }
+                                }
+                                if(want) {
                                        shortest = &aff->q_info[i];
                                        min_len = est_len;
                                        min_nr_users = *(aff->q_info[i].nr_cur_users);
@@ -2672,6 +2703,7 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
        // decrement affinity count on old GPU
        aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
        --(aff->nr_aff_on_rsrc[aff_rsrc]);
+//      aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
        if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
                WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
@@ -2717,10 +2749,12 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
                if(last_gpu >= 0) {
                        int old_rsrc = last_gpu - aff->offset;
                        --(aff->nr_aff_on_rsrc[old_rsrc]);
+//                      aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
                }
                // increment affinity count on new GPU
                ++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
+//              aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
                tsk_rt(t)->rsrc_exit_cb_args = aff;
                tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
        }
author	Glenn Elliott <gelliott@cs.unc.edu>	2012-11-30 13:36:03 -0500
committer	Glenn Elliott <gelliott@cs.unc.edu>	2012-11-30 13:36:03 -0500
commit	3ee5f13b8213270ba30e4b3625dff46b1cc8326f (patch)
tree	b349e8f80559fdd608d057781fd0f2dcc1e498fe /litmus/ikglp_lock.c
parent	7ebec2a6abe03d5c42742a6dce74787880394897 (diff)