Added support for simult-users in kfmlp

author: Glenn Elliott <gelliott@cs.unc.edu> 2012-04-18 21:30:36 -0400
committer: Glenn Elliott <gelliott@cs.unc.edu> 2012-04-18 21:30:36 -0400
commit: f916cdb8e6a9ee2c917fddb7351e6bb39f6c953e (patch)
tree: b7904b93f4da153a40815b89378e7b3ca2f70591
parent: 6ab36ca992441f7353840c70fc91d99a500a940e (diff)
9 files changed, 297 insertions, 151 deletions
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index b92c1a3f004f..552a1e731672 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -24,11 +24,12 @@ typedef enum  {
        IKGLP_SEM       = 3,
        KFMLP_SEM       = 4,
-        IKGLP_GPU_AFF_OBS = 5,
+        IKGLP_SIMPLE_GPU_AFF_OBS = 5,
-        KFMLP_SIMPLE_GPU_AFF_OBS = 6,
+        IKGLP_GPU_AFF_OBS = 6,
-        KFMLP_GPU_AFF_OBS = 7,
+        KFMLP_SIMPLE_GPU_AFF_OBS = 7,
+        KFMLP_GPU_AFF_OBS = 8,
        
-        MAX_OBJ_TYPE    = 7
+        MAX_OBJ_TYPE    = 8
 } obj_type_t;
 struct inode_obj_id {
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index c0cc04db1bc6..2a75a1719815 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -5,6 +5,12 @@
 #include <litmus/binheap.h>
 #include <litmus/locking.h>
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+#include <litmus/kexclu_affinity.h>
+struct ikglp_affinity;
+#endif
 typedef struct ikglp_heap_node
 {
        struct task_struct *task;
@@ -81,6 +87,10 @@ struct ikglp_semaphore
        struct fifo_queue *fifo_queues; // array nr_replicas in length
        struct binheap_handle priority_queue;   // max-heap, base prio
        struct binheap_handle donors;   // max-heap, base prio
+        
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        struct ikglp_affinity *aff_obs;
+#endif  
 };
 static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock)
@@ -94,4 +104,55 @@ int ikglp_close(struct litmus_lock* l);
 void ikglp_free(struct litmus_lock* l);
 struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg);
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+struct ikglp_queue_info
+{
+        struct fifo_queue* q;
+        lt_t estimated_len;
+        int *nr_cur_users;
+};
+struct ikglp_affinity_ops
+{
+        struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t);        // select FIFO
+        struct task_struct* (*advise_steal)(struct ikglp_affinity* aff, wait_queue_t** to_steal, struct fifo_queue** to_steal_from);    // select steal from FIFO
+        struct task_struct* (*advise_donee_selection)(struct ikglp_affinity* aff, wait_queue_t** donee, struct fifo_queue** donee_queue);       // select a donee
+        struct task_struct* (*advise_doner_to_fq)(struct ikglp_affinity* aff, ikglp_wait_state_t** donor);      // select a donor to move to PQ
+        
+        void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);       // fifo enqueue
+        void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);       // fifo dequeue
+        void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);      // replica acquired
+        void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);         // replica freed
+        int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq);          // convert a replica # to a GPU (includes offsets and simult user folding)
+};
+struct ikglp_affinity
+{
+        struct affinity_observer obs;
+        struct ikglp_affinity_ops *ops; 
+        struct fifo_queue *q_info;
+        int *nr_cur_users_on_rsrc;
+        int offset;
+        int nr_simult;
+        int nr_rsrc;
+};
+static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
+{
+        return container_of(aff_obs, struct ikglp_affinity, obs);
+}
+int ikglp_aff_obs_close(struct affinity_observer*);
+void ikglp_aff_obs_free(struct affinity_observer*);
+struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*,
+                                                                                                void* __user arg);
+struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
+                                                                                                void* __user arg);
+#endif
 #endif
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
index 614cccad5307..6d7e24b2a3ad 100644
--- a/include/litmus/kfmlp_lock.h
+++ b/include/litmus/kfmlp_lock.h
@@ -6,6 +6,8 @@
 #ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 #include <litmus/kexclu_affinity.h>
+struct kfmlp_affinity;
 #endif
 /* struct for semaphore with priority inheritance */
@@ -50,10 +52,9 @@ struct kfmlp_queue_info
 {
        struct kfmlp_queue* q;
        lt_t estimated_len;
+        int *nr_cur_users;
 };
-struct kfmlp_affinity;
 struct kfmlp_affinity_ops
 {
        struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t);
@@ -62,6 +63,7 @@ struct kfmlp_affinity_ops
        void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
        void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
        void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+        int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq);
 };
 struct kfmlp_affinity
@@ -69,8 +71,10 @@ struct kfmlp_affinity
        struct affinity_observer obs;
        struct kfmlp_affinity_ops *ops; 
        struct kfmlp_queue_info *q_info;
-        struct kfmlp_queue_info *shortest_queue;
+        int *nr_cur_users_on_rsrc;
        int offset;
+        int nr_simult;
+        int nr_rsrc;
 };
 static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
index 856c575374d3..580728051d4e 100644
--- a/include/litmus/nvidia_info.h
+++ b/include/litmus/nvidia_info.h
@@ -9,6 +9,7 @@
 //#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
 #define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
+#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
 int init_nvidia_info(void);
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 11f081527545..e832ffcba17c 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -59,10 +59,11 @@ struct affinity_observer_args
        int lock_od;
 };
-struct kfmlp_gpu_affinity_observer_args
+struct gpu_affinity_observer_args
 {
        struct affinity_observer_args obs;
        int replica_to_gpu_offset;
+        int nr_simult_users;
 };
 /* The definition of the data that is shared between the kernel and real-time
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 34ce6fb3a22e..a34440f3d8bc 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -325,6 +325,19 @@ config NV_DEVICE_NUM
             Should be (<= to the number of CPUs) and
                 (<= to the number of GPUs) in your system.
+config NV_MAX_SIMULT_USERS
+        int "Maximum number of threads sharing a GPU simultanously"
+        depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
+        range 1 3
+        default "2"
+        help
+                Should be equal to the #copy_engines + #execution_engines
+                of the GPUs in your system.
+                Scientific/Professional GPUs = 3  (ex. M2070, Quadro 6000?)
+                Consumer Fermi/Kepler GPUs   = 2  (GTX-4xx thru -6xx)
+                Older                        = 1  (ex. GTX-2xx)
 choice
          prompt "CUDA/Driver Version Support"
          default CUDA_4_0
diff --git a/litmus/fdso.c b/litmus/fdso.c
index 5a4f45c3251b..fb328db77dec 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -28,7 +28,8 @@ static const struct fdso_ops* fdso_ops[] = {
        &generic_lock_ops, /* RSM_MUTEX */
        &generic_lock_ops, /* IKGLP_SEM */
        &generic_lock_ops, /* KFMLP_SEM */
-        &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */
+        &generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */
+        &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */  
        &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
        &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
 };
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 7cdca1b7b50a..9bbe31a05b97 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -73,10 +73,9 @@ static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* se
 }
-// TODO: BREAK THIS UP INTO TWO STEPS:
+static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
-// 1) task to steal (and from what queue)
+                                                                                                 wait_queue_t** to_steal,
-// 2) update queues
+                                                                                                 struct kfmlp_queue** to_steal_from)
-static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
 {
        /* must hold sem->lock */       
        
@@ -189,76 +188,7 @@ static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
        }
 #endif  
 }
-//// TODO: BREAK THIS UP INTO TWO STEPS:
-//// 1) task to steal (and from what queue)
-//// 2) update queues
-//static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
-//{
-//      /* must hold sem->lock */
-//      
-//      struct kfmlp_queue *my_queue = NULL;
-//      struct task_struct *max_hp = NULL;
-//      
-//      struct list_head        *pos;
-//      struct task_struct      *queued;
-//      int i;
-//      
-//      for(i = 0; i < sem->num_resources; ++i)
-//      {
-//              if( (sem->queues[i].count > 1) &&
-//                 ((my_queue == NULL) ||
-//                      //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
-//                      (litmus->compare(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
-//              {
-//                      my_queue = &sem->queues[i];
-//              }
-//      }
-//      
-//      if(my_queue)
-//      {               
-//              max_hp = my_queue->hp_waiter;
-//              
-//              BUG_ON(!max_hp);
-//              
-//              TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
-//                                kfmlp_get_idx(sem, my_queue),
-//                                max_hp->comm, max_hp->pid,
-//                                kfmlp_get_idx(sem, my_queue));
-//              
-//              my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
-//              
-//              if(tsk_rt(my_queue->owner)->inh_task == max_hp)
-//              {
-//                      litmus->decrease_prio(my_queue->owner, my_queue->hp_waiter);
-//              }
-//
-//              list_for_each(pos, &my_queue->wait.task_list)
-//              {
-//                      queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
-//                                                                                                         task_list)->private;
-//                      /* Compare task prios, find high prio task. */
-//                      if (queued == max_hp)
-//                      {
-//                              /*
-//                               TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
-//                               kfmlp_get_idx(sem, my_queue));
-//                               */
-//                              __remove_wait_queue(&my_queue->wait,
-//                                                                      list_entry(pos, wait_queue_t, task_list));
-//                              break;
-//                      }
-//              }
-//              --(my_queue->count);
-//              
-//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-//              if(sem->aff_obs) {
-//                      sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, max_hp);
-//              }               
-//#endif                
-//      }
-//      
-//      return(max_hp);
-//}
 int kfmlp_lock(struct litmus_lock* l)
 {
@@ -378,6 +308,12 @@ int kfmlp_lock(struct litmus_lock* l)
                spin_unlock_irqrestore(&sem->lock, flags);
        }
        
+        
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        if(sem->aff_obs) {
+                return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
+        }
+#endif
        return kfmlp_get_idx(sem, my_queue);
 }
@@ -390,14 +326,14 @@ int kfmlp_unlock(struct litmus_lock* l)
        unsigned long flags;
        int err = 0;
        
-        spin_lock_irqsave(&sem->lock, flags);
-        
        my_queue = kfmlp_get_queue(sem, t);
        
-        if (!my_queue || my_queue->owner != t) {
+        if (!my_queue) {
                err = -EINVAL;
                goto out;
-        }
+        }       
+        
+        spin_lock_irqsave(&sem->lock, flags);
        
        TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));       
        
@@ -489,9 +425,9 @@ RETRY:
                }
        }
        
-out:
        spin_unlock_irqrestore(&sem->lock, flags);
-        
+out:    
        return err;
 }
@@ -580,6 +516,25 @@ struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
 #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
+{       
+        int gpu = replica % aff->nr_rsrc;
+        return gpu;
+}
+static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
+{       
+        int gpu = __replica_to_gpu(aff, replica) + aff->offset;
+        return gpu;
+}
+static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
+{
+        int replica = gpu - aff->offset;
+        return replica;
+}
 int kfmlp_aff_obs_close(struct affinity_observer* obs)
 {
        return 0;
@@ -588,6 +543,7 @@ int kfmlp_aff_obs_close(struct affinity_observer* obs)
 void kfmlp_aff_obs_free(struct affinity_observer* obs)
 {
        struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
+        kfree(kfmlp_aff->nr_cur_users_on_rsrc);
        kfree(kfmlp_aff->q_info);
        kfree(kfmlp_aff);
 }
@@ -597,37 +553,56 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
                                                                                                   void* __user args)
 {
        struct kfmlp_affinity* kfmlp_aff;
-        struct kfmlp_gpu_affinity_observer_args aff_args;
+        struct gpu_affinity_observer_args aff_args;
        struct kfmlp_semaphore* sem;
        int i;
        unsigned long flags;
        
-        if(!access_ok(VERIFY_READ, args, sizeof(aff_args)))
+        if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
-        {
                return(NULL);
        }
-        if(__copy_from_user(&aff_args, args, sizeof(aff_args)))
+        if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
-        {
                return(NULL);
        }
        
        sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
        
-        if(sem->litmus_lock.type != KFMLP_SEM)
+        if(sem->litmus_lock.type != KFMLP_SEM) {
-        {
                TRACE_CUR("Lock type not supported.  Type = %d\n", sem->litmus_lock.type);
                return(NULL);
        }
        
+        if((aff_args.nr_simult_users <= 0) ||
+           (sem->num_resources%aff_args.nr_simult_users != 0)) {
+                TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
+                                  "(%d) per replica.  #replicas should be evenly divisible "
+                                  "by #simult_users.\n",
+                                  sem->litmus_lock.ident,
+                                  sem->num_resources,
+                                  aff_args.nr_simult_users);
+                return(NULL);
+        }
+        
+        if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
+                TRACE_CUR("System does not support #simult_users >%d.  %d requested.\n",
+                                  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
+                return(NULL);
+        }
+        
        kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
-        if(!kfmlp_aff)
+        if(!kfmlp_aff) {
-        {
                return(NULL);
        }
        
        kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
-        if(!kfmlp_aff->q_info)
+        if(!kfmlp_aff->q_info) {
-        {
+                kfree(kfmlp_aff);
+                return(NULL);
+        }
+        
+        kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
+        if(!kfmlp_aff->nr_cur_users_on_rsrc) {
+                kfree(kfmlp_aff->q_info);               
                kfree(kfmlp_aff);
                return(NULL);
        }
@@ -636,16 +611,24 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
        
        kfmlp_aff->ops = kfmlp_ops;
        kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
+        kfmlp_aff->nr_simult = aff_args.nr_simult_users;
+        kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
+        memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
        
-        for(i = 0; i < sem->num_resources; ++i)
+        for(i = 0; i < sem->num_resources; ++i) {
-        {
                kfmlp_aff->q_info[i].q = &sem->queues[i];
                kfmlp_aff->q_info[i].estimated_len = 0;
+                
+                // multiple q_info's will point to the same resource (aka GPU) if
+                // aff_args.nr_simult_users > 1
+                kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
        }
        
+        // attach observer to the lock
        spin_lock_irqsave(&sem->lock, flags);
        sem->aff_obs = kfmlp_aff;
-        kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)];
+        //kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)];
        spin_unlock_irqrestore(&sem->lock, flags);
        
        return &kfmlp_aff->obs;
@@ -654,6 +637,13 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
+static int gpu_replica_to_resource(struct kfmlp_affinity* aff, 
+                                                                   struct kfmlp_queue* fq) {
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
+}
 // Smart KFMLP Affinity
 static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
@@ -675,55 +665,66 @@ struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
        lt_t min_len;
+        int min_nr_users;
        struct kfmlp_queue_info *shortest;
        struct kfmlp_queue *to_enqueue;
        int i;
+        int affinity_gpu;
        
        // simply pick the shortest queue if, we have no affinity, or we have
        // affinity with the shortest
        if(unlikely(tsk_rt(t)->last_gpu < 0)) {
-                // we have affinity with the shorest queue.  pick it.
+                affinity_gpu = aff->offset;  // first gpu
-                shortest = aff->shortest_queue;
+                TRACE_CUR("no affinity\n");
-                TRACE_CUR("special case: no affinity\n");
+        }
-                goto out;
+        else {
+                affinity_gpu = tsk_rt(t)->last_gpu;
        }
        
        // all things being equal, let's start with the queue with which we have
        // affinity.  this helps us maintain affinity even when we don't have
        // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
-        shortest = &aff->q_info[tsk_rt(t)->last_gpu - aff->offset];
+        shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
        
-        if(shortest == aff->shortest_queue) {
+//      if(shortest == aff->shortest_queue) {
-                TRACE_CUR("special case: have affinity with shortest queue\n");
+//              TRACE_CUR("special case: have affinity with shortest queue\n");
-                goto out;
+//              goto out;
-        }
+//      }
        
        min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
+        min_nr_users = *(shortest->nr_cur_users);
        
        TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
                          get_gpu_estimate(t, MIG_LOCAL),
                          kfmlp_get_idx(sem, shortest->q),
                          min_len);
-        
        for(i = 0; i < sem->num_resources; ++i) {
                if(&aff->q_info[i] != shortest) {
                        
                        lt_t est_len =
                                aff->q_info[i].estimated_len +
-                                get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset));
+                                get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
-                        if(est_len < min_len) {
+                        
+                        // queue is smaller, or they're equal and the other has a smaller number
+                        // of total users.
+                        //
+                        // tie-break on the shortest number of simult users.  this only kicks in
+                        // when there are more than 1 empty queues.
+                        if((est_len < min_len) ||
+                           ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
                                shortest = &aff->q_info[i];
                                min_len = est_len;
+                                min_nr_users = *(aff->q_info[i].nr_cur_users);
                        }
                        
                        TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
-                                          get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset)),
+                                          get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
                                          kfmlp_get_idx(sem, aff->q_info[i].q),
                                          est_len);                             
                }
        }
-        
-out:    
        to_enqueue = shortest->q;
        TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
                          kfmlp_get_idx(sem, to_enqueue),
@@ -736,7 +737,7 @@ struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queu
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
        
-        // For now, just steal from the shortest (by number) queue.
+        // For now, just steal highest priority waiter
        // TODO: Implement affinity-aware stealing.
        
        return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
@@ -747,7 +748,7 @@ void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
        int replica = kfmlp_get_idx(sem, fq);
-        int gpu = aff->offset + replica;
+        int gpu = replica_to_gpu(aff, replica);
        struct kfmlp_queue_info *info = &aff->q_info[replica];
        lt_t est_time;
        lt_t est_len_before;
@@ -765,22 +766,22 @@ void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq
                          est_len_before, est_time,
                          info->estimated_len);
        
-        if(aff->shortest_queue == info) {
+//      if(aff->shortest_queue == info) {
-                // we may no longer be the shortest
+//              // we may no longer be the shortest
-                aff->shortest_queue = kfmlp_aff_find_shortest(aff);
+//              aff->shortest_queue = kfmlp_aff_find_shortest(aff);
-                
+//              
-                TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+//              TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
-                                  kfmlp_get_idx(sem, aff->shortest_queue->q),
+//                                kfmlp_get_idx(sem, aff->shortest_queue->q),
-                                  aff->shortest_queue->q->count,
+//                                aff->shortest_queue->q->count,
-                                  aff->shortest_queue->estimated_len);
+//                                aff->shortest_queue->estimated_len);
-        }
+//      }
 }
 void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
        int replica = kfmlp_get_idx(sem, fq);
-        int gpu = aff->offset + replica;
+        int gpu = replica_to_gpu(aff, replica);
        struct kfmlp_queue_info *info = &aff->q_info[replica];
        lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
        
@@ -797,28 +798,32 @@ void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq
                          info->estimated_len); 
        
        // check to see if we're the shortest queue now.
-        if((aff->shortest_queue != info) &&
+//      if((aff->shortest_queue != info) &&
-           (aff->shortest_queue->estimated_len > info->estimated_len)) {
+//         (aff->shortest_queue->estimated_len > info->estimated_len)) {
-                
+//              
-                aff->shortest_queue = info;
+//              aff->shortest_queue = info;
-                
+//              
-                TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+//              TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
-                                  kfmlp_get_idx(sem, info->q),
+//                                kfmlp_get_idx(sem, info->q),
-                                  info->q->count,
+//                                info->q->count,
-                                  info->estimated_len);         
+//                                info->estimated_len);         
-        }
+//      }
 }
 void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-        int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+        int replica = kfmlp_get_idx(sem, fq);
+        int gpu = replica_to_gpu(aff, replica);
        
        tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
        
        TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
                          t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
        
+        // count the number or resource holders
+        ++(*(aff->q_info[replica].nr_cur_users));
+        
        reg_nv_device(gpu, 1, t);  // register
        
        tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
@@ -829,7 +834,8 @@ void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* f
 void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-        int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+        int replica = kfmlp_get_idx(sem, fq);
+        int gpu = replica_to_gpu(aff, replica);
        lt_t est_time;
        
        stop_gpu_tracker(t);  // stop the tracker before we do anything else.
@@ -837,6 +843,10 @@ void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq,
        est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
        
        tsk_rt(t)->last_gpu = gpu;
+        
+        // count the number or resource holders
+        --(*(aff->q_info[replica].nr_cur_users));       
+        
        reg_nv_device(gpu, 0, t);       // unregister
        
        // update estimates
@@ -856,7 +866,8 @@ struct kfmlp_affinity_ops gpu_kfmlp_affinity =
        .notify_enqueue = gpu_kfmlp_notify_enqueue,
        .notify_dequeue = gpu_kfmlp_notify_dequeue,
        .notify_acquired = gpu_kfmlp_notify_acquired,
-        .notify_freed = gpu_kfmlp_notify_freed
+        .notify_freed = gpu_kfmlp_notify_freed,
+        .replica_to_resource = gpu_replica_to_resource,
 };
 struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
@@ -877,8 +888,50 @@ struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* op
 struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        int min_count;
+        int min_nr_users;
+        struct kfmlp_queue_info *shortest;
+        struct kfmlp_queue *to_enqueue;
+        int i;
+        
 //      TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
-        return sem->shortest_queue;
+        
+        shortest = &aff->q_info[0];
+        min_count = shortest->q->count;
+        min_nr_users = *(shortest->nr_cur_users);
+        TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+                          kfmlp_get_idx(sem, shortest->q),
+                          shortest->q->count,
+                          min_nr_users);
+        
+        for(i = 1; i < sem->num_resources; ++i) {
+                int len = aff->q_info[i].q->count;
+                
+                // queue is smaller, or they're equal and the other has a smaller number
+                // of total users.
+                //
+                // tie-break on the shortest number of simult users.  this only kicks in
+                // when there are more than 1 empty queues.
+                if((len < min_count) ||
+                   ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
+                        shortest = &aff->q_info[i];
+                        min_count = shortest->q->count;
+                        min_nr_users = *(aff->q_info[i].nr_cur_users);
+                }
+                
+                TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
+                                  kfmlp_get_idx(sem, aff->q_info[i].q),
+                                  aff->q_info[i].q->count,
+                                  *(aff->q_info[i].nr_cur_users));                      
+        }
+        
+        to_enqueue = shortest->q;
+        TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
+                          kfmlp_get_idx(sem, to_enqueue),
+                          kfmlp_get_idx(sem, sem->shortest_queue));     
+        
+        return to_enqueue;
 }
 struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
@@ -901,19 +954,26 @@ void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_qu
 void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-        int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+        int replica = kfmlp_get_idx(sem, fq);
+        int gpu = replica_to_gpu(aff, replica);
        
 //      TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
        
+        // count the number or resource holders
+        ++(*(aff->q_info[replica].nr_cur_users));               
+        
        reg_nv_device(gpu, 1, t);  // register
 }
 void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
 {
        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
-        int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+        int replica = kfmlp_get_idx(sem, fq);
+        int gpu = replica_to_gpu(aff, replica);
        
 //      TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
+        // count the number or resource holders
+        --(*(aff->q_info[replica].nr_cur_users));
        
        reg_nv_device(gpu, 0, t);       // unregister
 }
@@ -925,7 +985,8 @@ struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
        .notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
        .notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
        .notify_acquired = simple_gpu_kfmlp_notify_acquired,
-        .notify_freed = simple_gpu_kfmlp_notify_freed
+        .notify_freed = simple_gpu_kfmlp_notify_freed,
+        .replica_to_resource = gpu_replica_to_resource, 
 };
 struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 287e4a0662d9..fd6398121fbf 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -368,7 +368,7 @@ static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_
        int i;
        struct task_struct *found = NULL;
        for(i = 0; i < reg->nr_owners; ++i) {
-                if(reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
+                if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
                        found = reg->owners[i];
                }
        }
@@ -433,8 +433,9 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
                        
        raw_spin_lock_irqsave(&reg->lock, flags);
        
-        if(reg->nr_owners < MAX_NR_OWNERS) {
+        if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
-                for(i = 0; i < MAX_NR_OWNERS; ++i) {
+                TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
+                for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
                        if(reg->owners[i] == NULL) {
                                reg->owners[i] = t;
                                
@@ -485,7 +486,9 @@ static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
        raw_spin_lock_irqsave(&reg->lock, flags);
        
-        for(i = 0; i < reg->nr_owners; ++i) {
+        TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
+        
+        for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
                if(reg->owners[i] == t) {
 #ifdef CONFIG_LITMUS_SOFTIRQD                   
                        flush_pending(klitirqd_th, t);
author	Glenn Elliott <gelliott@cs.unc.edu>	2012-04-18 21:30:36 -0400
committer	Glenn Elliott <gelliott@cs.unc.edu>	2012-04-18 21:30:36 -0400
commit	f916cdb8e6a9ee2c917fddb7351e6bb39f6c953e (patch)
tree	b7904b93f4da153a40815b89378e7b3ca2f70591
parent	6ab36ca992441f7353840c70fc91d99a500a940e (diff)