4 files changed, 69 insertions, 19 deletions
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
index d64a15cbf2a5..47da725717b0 100644
--- a/include/litmus/gpu_affinity.h
+++ b/include/litmus/gpu_affinity.h
@@ -31,6 +31,21 @@ static inline lt_t get_gpu_time(struct task_struct* t)
 static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
 {
+        int i;
+        lt_t val;
+        if(dist == MIG_NONE) {
+                dist = MIG_LOCAL;
+        }
+        val = t->rt_param.gpu_migration_est[dist].avg;
+        for(i = dist-1; i >= 0; --i) {
+                if(t->rt_param.gpu_migration_est[i].avg > val) {
+                        val = t->rt_param.gpu_migration_est[i].avg;
+                }
+        }
+#if 0
 //      int i;
 //      fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
 //      lt_t val = (temp >= 0) ? temp : 0;  // never allow negative estimates...
@@ -43,6 +58,7 @@ static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t
 //      for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
 //              val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
 //      }
+#endif
        return ((val > 0) ? val : dist+1);
 }
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index 9d0cd3d1904e..89d9c37c7631 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -139,7 +139,7 @@ struct ikglp_affinity
        struct ikglp_affinity_ops *ops;
        struct ikglp_queue_info *q_info;
        int *nr_cur_users_on_rsrc;
-        int *nr_aff_on_rsrc;
+        int64_t *nr_aff_on_rsrc;
        int offset;
        int nr_simult;
        int nr_rsrc;
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 916b1b4309b7..a9bf0c08e125 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -272,7 +272,7 @@ int edf_higher_prio(struct task_struct* first, struct task_struct* second)
                                                return 1;
                                        }
                                        else if (first->pid == second->pid) {
-                                                WARN_ON(1);
+                                                //WARN_ON(1);
                                        }
                                }
                                else {
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 9c57bc24e8bd..16ae621bbf75 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1887,6 +1887,19 @@ static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
        return replica;
 }
+static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b)
+{
+        return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b));
+}
+static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica)
+{
+        if(tsk_rt(t)->last_gpu >= 0)
+        {
+                return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica));
+        }
+        return 0;
+}
 int ikglp_aff_obs_close(struct affinity_observer* obs)
 {
@@ -1971,7 +1984,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
                return(NULL);
        }
-        ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
+        ikglp_aff->nr_aff_on_rsrc =  kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
        if(!ikglp_aff->nr_aff_on_rsrc) {
                kfree(ikglp_aff->nr_cur_users_on_rsrc);
                kfree(ikglp_aff->q_info);
@@ -1993,7 +2006,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
                          ikglp_aff->relax_max_fifo_len);
        memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
-        memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
+        memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc));
        for(i = 0; i < sem->nr_replicas; ++i) {
                ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -2057,7 +2070,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
        struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
        lt_t min_len;
        int min_nr_users, min_nr_aff_users;
-        struct ikglp_queue_info *shortest;
+        struct ikglp_queue_info *shortest, *aff_queue;
        struct fifo_queue *to_enqueue;
        int i;
        int affinity_gpu;
@@ -2087,7 +2100,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
        // all things being equal, let's start with the queue with which we have
        // affinity.  this helps us maintain affinity even when we don't have
        // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
-        shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
+        aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
+        shortest = aff_queue;
        //      if(shortest == aff->shortest_queue) {
        //              TRACE_CUR("special case: have affinity with shortest queue\n");
@@ -2108,29 +2122,46 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
        for(i = 0; i < sem->nr_replicas; ++i) {
                if(&aff->q_info[i] != shortest) {
                        if(aff->q_info[i].q->count < max_fifo_len) {
+                                int want = 0;
-                                lt_t est_len =
+                                lt_t migration = 
-                                        aff->q_info[i].estimated_len +
                                        get_gpu_estimate(t,
                                                                gpu_migration_distance(tsk_rt(t)->last_gpu,
                                                                                                        replica_to_gpu(aff, i)));
+                                lt_t est_len = aff->q_info[i].estimated_len + migration;
-                // queue is smaller, or they're equal and the other has a smaller number
+                                // queue is smaller, or they're equal and the other has a smaller number
-                // of total users.
+                                // of total users.
-                //
+                                //
-                // tie-break on the shortest number of simult users.  this only kicks in
+                                // tie-break on the shortest number of simult users.  this only kicks in
-                // when there are more than 1 empty queues.
+                                // when there are more than 1 empty queues.
                                // TODO: Make "est_len < min_len" a fuzzy function that allows
                                // queues "close enough" in length to be considered equal.
-                                if((shortest->q->count >= max_fifo_len) ||              /* 'shortest' is full and i-th queue is not */
+                                /* NOTE: 'shortest' starts out with affinity GPU */
-                                   (est_len < min_len) ||                                               /* i-th queue has shortest length */
+                                if(unlikely(shortest->q->count >= max_fifo_len)) {                                              /* 'shortest' is full and i-th queue is not */
-                                   ((est_len == min_len) &&                                             /* equal lengths, but one has fewer over-all users */
+                                        want = 1;
-                                        ((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) ||
+                                }
-                                         ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) &&
+                                else if(est_len < min_len) {
-                                                (*(aff->q_info[i].nr_cur_users) < min_nr_users))))) {
+                                        want = 1;                                                                                                                       /* i-th queue has shortest length */
+                                }
+                                else if(unlikely(est_len == min_len)) {                                                                 /* equal lengths */
+                                        if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) {            /* don't sacrifice affinity on tie */
+                                                if(has_affinity(aff, t, i)) {
+                                                        want = 1;                                                                                                       /* switch to maintain affinity */
+                                                }
+                                                else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) {    /* favor one with less affinity load */
+                                                        want = 1;
+                                                }
+                                                else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */
+                                                                (*(aff->q_info[i].nr_cur_users) < min_nr_users)) {              /* favor one with current fewer users */
+                                                        want = 1;
+                                                }
+                                        }
+                                }
+                                if(want) {
                                        shortest = &aff->q_info[i];
                                        min_len = est_len;
                                        min_nr_users = *(aff->q_info[i].nr_cur_users);
@@ -2672,6 +2703,7 @@ int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
        // decrement affinity count on old GPU
        aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
        --(aff->nr_aff_on_rsrc[aff_rsrc]);
+//      aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
        if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
                WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
@@ -2717,10 +2749,12 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
                if(last_gpu >= 0) {
                        int old_rsrc = last_gpu - aff->offset;
                        --(aff->nr_aff_on_rsrc[old_rsrc]);
+//                      aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
                }
                // increment affinity count on new GPU
                ++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
+//              aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
                tsk_rt(t)->rsrc_exit_cb_args = aff;
                tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
        }