Implement GPU-affinity-aware kfmlp (untested)

author: Glenn Elliott <gelliott@cs.unc.edu> 2012-04-16 20:09:15 -0400
committer: Glenn Elliott <gelliott@cs.unc.edu> 2012-04-16 20:09:15 -0400
commit: 8675824ed85d6e83a24e77dabaf3a5c02c91ef6f (patch)
tree: 29e19fb32cacb062abca434fc921636d600ce77b
parent: 0b865246946a97dc03a81ccf55bf84acce923c4b (diff)
12 files changed, 1006 insertions, 105 deletions
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index baf28c47e95d..b92c1a3f004f 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -24,10 +24,11 @@ typedef enum  {
        IKGLP_SEM       = 3,
        KFMLP_SEM       = 4,
-        IKGLP_AFF_OBS = 5,
+        IKGLP_GPU_AFF_OBS = 5,
-        KFMLP_AFF_OBS = 6,
+        KFMLP_SIMPLE_GPU_AFF_OBS = 6,
+        KFMLP_GPU_AFF_OBS = 7,
        
-        MAX_OBJ_TYPE    = 6
+        MAX_OBJ_TYPE    = 7
 } obj_type_t;
 struct inode_obj_id {
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..35f81683d6ab
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,134 @@
+#ifndef __FP_MATH_H__
+#define __FP_MATH_H__
+// Use 64-bit because we want to track things at the nanosecond scale.
+// This can lead to very large numbers.
+typedef int64_t fpbuf_t;
+typedef struct
+{
+        fpbuf_t val;
+} fp_t;
+#define FP_SHIFT 10
+#define ROUND_BIT (FP_SHIFT - 1)
+#define ONE FP(1)
+#define _fp(x) ((fp_t) {x})
+static const fp_t LITMUS_FP_ZERO = {.val = 0};
+static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
+static inline fp_t FP(fpbuf_t x)
+{
+        return _fp(((fpbuf_t) x) << FP_SHIFT);
+}
+/* divide two integers to obtain a fixed point value  */
+static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
+{
+        return _fp(FP(a).val / (b));
+}
+#ifdef __KERNEL__
+static inline fpbuf_t _point(fp_t x) 
+{       
+        return (x.val % (1 << FP_SHIFT));
+        
+}       
+#define fp2str(x) x.val
+/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
+#define _FP_  "%ld/1024"
+static inline fpbuf_t _floor(fp_t x)
+{
+        return x.val >> FP_SHIFT;
+}
+/* FIXME: negative rounding */
+static inline fpbuf_t _round(fp_t x)
+{
+        return _floor(x) + ((x.val >> ROUND_BIT) & 1);
+}       
+/* multiply two fixed point values */
+static inline fp_t _mul(fp_t a, fp_t b)
+{       
+        return _fp((a.val * b.val) >> FP_SHIFT);
+}
+static inline fp_t _div(fp_t a, fp_t b)
+{
+        /* try not to overflow */
+        if (unlikely(  a.val > (2l << (BITS_PER_LONG - FP_SHIFT)) ))
+                return _fp((a.val / b.val) << FP_SHIFT);
+        else
+                return _fp((a.val << FP_SHIFT) / b.val);
+}
+static inline fp_t _add(fp_t a, fp_t b)
+{
+        return _fp(a.val + b.val);
+}
+static inline fp_t _sub(fp_t a, fp_t b)
+{
+        return _fp(a.val - b.val);
+}
+static inline fp_t _neg(fp_t x)
+{
+        return _fp(-x.val);
+}
+static inline fp_t _abs(fp_t x)
+{
+        return _fp(abs(x.val));
+}
+/* works the same as casting float/double to integer */
+static inline fpbuf_t _fp_to_integer(fp_t x)
+{
+        return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
+}
+static inline fp_t _integer_to_fp(fpbuf_t x)
+{
+        return _frac(x,1);
+}
+static inline int _leq(fp_t a, fp_t b)
+{
+        return a.val <= b.val;
+}
+static inline int _geq(fp_t a, fp_t b)
+{
+        return a.val >= b.val;
+}
+static inline int _lt(fp_t a, fp_t b)
+{
+        return a.val < b.val;
+}
+static inline int _gt(fp_t a, fp_t b)
+{
+        return a.val > b.val;
+}
+static inline int _eq(fp_t a, fp_t b)
+{
+        return a.val == b.val;
+}
+static inline fp_t _max(fp_t a, fp_t b)
+{
+        if (a.val < b.val)
+                return b;
+        else
+                return a;
+}
+#endif
+#endif
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
new file mode 100644
index 000000000000..c29ff3de997c
--- /dev/null
+++ b/include/litmus/gpu_affinity.h
@@ -0,0 +1,40 @@
+#ifndef LITMUS_GPU_AFFINITY_H
+#define LITMUS_GPU_AFFINITY_H
+#include <litmus/rt_param.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/litmus.h>
+void update_gpu_estimate(struct task_struct* t, lt_t observed);
+gpu_migration_dist_t gpu_migration_distance(int a, int b);
+static inline void reset_gpu_tracker(struct task_struct* t)
+{
+        t->rt_param.accum_gpu_time = 0;
+}
+static inline void start_gpu_tracker(struct task_struct* t)
+{
+        t->rt_param.gpu_time_stamp = litmus_clock();
+}
+static inline void stop_gpu_tracker(struct task_struct* t)
+{
+        lt_t now = litmus_clock();
+        t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp);
+}
+static inline lt_t get_gpu_time(struct task_struct* t)
+{
+        return t->rt_param.accum_gpu_time;
+}
+static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
+{
+        lt_t val = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
+        
+        // minimum value is 1.
+        return ((val > 0) ? val : 1);
+}
+#endif
+\ No newline at end of file
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
index 49156a9ba4ea..614cccad5307 100644
--- a/include/litmus/kfmlp_lock.h
+++ b/include/litmus/kfmlp_lock.h
@@ -4,6 +4,10 @@
 #include <litmus/litmus.h>
 #include <litmus/locking.h>
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+#include <litmus/kexclu_affinity.h>
+#endif
 /* struct for semaphore with priority inheritance */
 struct kfmlp_queue
 {
@@ -23,6 +27,10 @@ struct kfmlp_semaphore
        
        struct kfmlp_queue *queues; /* array */
        struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
+        
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        struct kfmlp_affinity *aff_obs;
+#endif
 };
 static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
@@ -36,4 +44,50 @@ int kfmlp_close(struct litmus_lock* l);
 void kfmlp_free(struct litmus_lock* l);
 struct litmus_lock* kfmlp_new(struct litmus_lock_ops*, void* __user arg);
-#endif
-\ No newline at end of file
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+struct kfmlp_queue_info
+{
+        struct kfmlp_queue* q;
+        lt_t estimated_len;
+};
+struct kfmlp_affinity;
+struct kfmlp_affinity_ops
+{
+        struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t);
+        struct task_struct* (*advise_steal)(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from);
+        void (*notify_enqueue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);      
+        void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+        void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+        void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
+};
+struct kfmlp_affinity
+{
+        struct affinity_observer obs;
+        struct kfmlp_affinity_ops *ops; 
+        struct kfmlp_queue_info *q_info;
+        struct kfmlp_queue_info *shortest_queue;
+        int offset;
+};
+static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
+{
+        return container_of(aff_obs, struct kfmlp_affinity, obs);
+}
+int kfmlp_aff_obs_close(struct affinity_observer*);
+void kfmlp_aff_obs_free(struct affinity_observer*);
+struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*,
+                                                                                        void* __user arg);
+struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
+                                                                                                void* __user arg);
+#endif
+#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index cc638e9c55d1..ad46ab4c64cc 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -5,6 +5,8 @@
 #ifndef _LINUX_RT_PARAM_H_
 #define _LINUX_RT_PARAM_H_
+#include <litmus/fpmath.h>
 /* Litmus time type. */
 typedef unsigned long long lt_t;
@@ -57,6 +59,12 @@ struct affinity_observer_args
        int lock_od;
 };
+struct kfmlp_gpu_affinity_observer_args
+{
+        struct affinity_observer_args obs;
+        int replica_to_gpu_offset;
+};
 /* The definition of the data that is shared between the kernel and real-time
 * tasks via a shared page (see litmus/ctrldev.c).
 *
@@ -116,6 +124,21 @@ enum klitirqd_sem_status
        HELD
 };
+typedef enum gpu_migration_dist
+{       
+        MIG_LOCAL = 0,
+        MIG_NEAR = 1,
+        MIG_MED = 2,
+        MIG_FAR = 3,
+        
+        MIG_LAST = MIG_FAR
+} gpu_migration_dist_t;
+typedef struct feedback_est{
+        fp_t est;
+        fp_t accum_err;
+} feedback_est_t;
 /*      RT task parameters for scheduling extensions
 *      These parameters are inherited during clone and therefore must
 *      be explicitly set up before the task set is launched.
@@ -160,6 +183,20 @@ struct rt_param {
        /* number of top-half interrupts handled on behalf of current job */
        atomic_t                                        nv_int_count;
        long unsigned int                       held_gpus;  // bitmap of held GPUs.
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        fp_t    gpu_fb_param_a;
+        fp_t    gpu_fb_param_b;
+        
+        gpu_migration_dist_t    gpu_migration;
+        int                             last_gpu;
+        feedback_est_t  gpu_migration_est[MIG_LAST]; // local, near, med, far
+        
+        lt_t accum_gpu_time;
+        lt_t gpu_time_stamp;
+        
+        unsigned int suspend_gpu_tracker_on_block:1;
+#endif
 #endif
 #ifdef CONFIG_LITMUS_LOCKING
diff --git a/litmus/Makefile b/litmus/Makefile
index 1698afb75ec4..080cbf694a41 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -35,4 +35,4 @@ obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
 obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
 obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
-obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o
+obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
new file mode 100644
index 000000000000..43171390bed7
--- /dev/null
+++ b/litmus/gpu_affinity.c
@@ -0,0 +1,72 @@
+#ifdef CONFIG_LITMUS_NVIDIA
+#include <linux/sched.h>
+#include <litmus/litmus.h>
+#include <litmus/gpu_affinity.h>
+static void update_estimate(feedback_est_t* fb, fp_t* a, fp_t* b, lt_t observed)
+{
+        fp_t err, new;
+        fp_t actual = _frac(observed, 1); // observed is in ns, so beware of overflow!
+        
+        err = _sub(actual, fb->est);
+        new = _add(_mul(*a, err),
+                           _mul(*b, fb->accum_err));
+        
+        fb->est = new;
+        fb->accum_err = _add(fb->accum_err, err);
+}
+void update_gpu_estimate(struct task_struct *t, lt_t observed)
+{
+        feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
+        
+        TRACE_TASK(t, "GPU est update before (dist = %d): %d.%d\n", 
+                           tsk_rt(t)->gpu_migration,
+                           _fp_to_integer(fb->est),
+                           _point(fb->est));
+        
+        update_estimate(fb,
+                                        &tsk_rt(t)->gpu_fb_param_a,
+                                        &tsk_rt(t)->gpu_fb_param_b,
+                                        observed);
+        
+        TRACE_TASK(t, "GPU est update after (dist = %d): %d.%d\n", 
+                           tsk_rt(t)->gpu_migration,
+                           _fp_to_integer(fb->est),
+                           _point(fb->est));    
+}
+gpu_migration_dist_t gpu_migration_distance(int a, int b)
+{
+        // GPUs organized in a binary hierarchy, no more than 2^MIG_LAST GPUs
+        int i;
+        int level;
+        int max_level;
+        
+        if(unlikely(a < 0 || b < 0)) {
+                return MIG_LAST;
+        }
+        
+        if(a == b) {
+                return MIG_LOCAL;
+        }
+        
+        for(i = 1, level = 2, max_level = 1<<MIG_LAST;
+                level <= max_level;
+                ++i, level <<= 1) {
+                if(a/level == b/level) {
+                        return (gpu_migration_dist_t)(i);
+                }
+        }
+        
+        WARN_ON(1);
+        return MIG_LAST;
+}
+#endif
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index f7bb17103383..b30e5b589882 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -3,9 +3,14 @@
 #include <litmus/trace.h>
 #include <litmus/sched_plugin.h>
+#include <litmus/fdso.h>
 #include <litmus/kfmlp_lock.h>
-//#include <litmus/edf_common.h>
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#include <litmus/nvidia_info.h>
+#endif
 static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
                                                                struct kfmlp_queue* queue)
@@ -67,74 +72,177 @@ static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* se
        return(shortest);
 }
-static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
+// TODO: BREAK THIS UP INTO TWO STEPS:
+// 1) task to steal (and from what queue)
+// 2) update queues
+static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
 {
-        /* must hold sem->lock */
+        /* must hold sem->lock */       
-        
-        struct kfmlp_queue *my_queue = NULL;
-        struct task_struct *max_hp = NULL;
-        
        
-        struct list_head        *pos;
-        struct task_struct      *queued;
        int i;
        
+        *to_steal = NULL;
+        *to_steal_from = NULL;  
+        
        for(i = 0; i < sem->num_resources; ++i)
        {
                if( (sem->queues[i].count > 1) &&
-                   ((my_queue == NULL) ||
+                   ((*to_steal_from == NULL) ||
                        //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
-                        (litmus->compare(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
+                        (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
                {
-                        my_queue = &sem->queues[i];
+                        *to_steal_from = &sem->queues[i];
                }
        }
        
-        if(my_queue)
+        if(*to_steal_from)
-        {               
+        {
-                max_hp = my_queue->hp_waiter;
+                struct list_head *pos;
-                
+                list_for_each(pos, &(*to_steal_from)->wait.task_list)
-                BUG_ON(!max_hp);
-                
-                TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
-                                  kfmlp_get_idx(sem, my_queue),
-                                  max_hp->comm, max_hp->pid,
-                                  kfmlp_get_idx(sem, my_queue));
-                
-                my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
-                
-                if(tsk_rt(my_queue->owner)->inh_task == max_hp)
-                {
-                        litmus->decrease_prio(my_queue->owner, my_queue->hp_waiter);
-                }
-                list_for_each(pos, &my_queue->wait.task_list)
                {
-                        queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
+                        wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
-                                                                                                           task_list)->private;
+                        struct task_struct *queued = (struct task_struct*) node->private;
                        /* Compare task prios, find high prio task. */
-                        if (queued == max_hp)
+                        if (queued == (*to_steal_from)->hp_waiter)
                        {
-                                /*
+                                *to_steal = node;
-                                 TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
+                                
-                                 kfmlp_get_idx(sem, my_queue));
+                                TRACE_CUR("steal: selected %s/%d from queue %d\n",
-                                 */
+                                                  queued->comm, queued->pid,
-                                __remove_wait_queue(&my_queue->wait,
+                                                  kfmlp_get_idx(sem, *to_steal_from));
-                                                                        list_entry(pos, wait_queue_t, task_list));
+                                
-                                break;
+                                return queued;
                        }
                }
-                --(my_queue->count);
        }
        
-        return(max_hp);
+        return NULL;
+}
+static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
+                                                         struct kfmlp_queue *dst,
+                                                         wait_queue_t *wait,
+                                                         struct kfmlp_queue *src)
+{
+        struct task_struct* t = (struct task_struct*) wait->private;
+        
+        __remove_wait_queue(&src->wait, wait);  
+        --(src->count);
+        
+        if(t == src->hp_waiter) {
+                src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
+                
+                if(src->owner && tsk_rt(src->owner)->inh_task == t) {
+                        litmus->decrease_prio(src->owner, src->hp_waiter);
+                }
+        }
+        
+        if(sem->shortest_queue->count > src->count) {
+                sem->shortest_queue = src;
+        }
+        
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        if(sem->aff_obs) {
+                sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
+        }
+#endif  
+        
+        init_waitqueue_entry(wait, t);
+        __add_wait_queue_tail_exclusive(&dst->wait, wait);
+        ++(dst->count);
+        
+        if(litmus->compare(t, dst->hp_waiter)) {
+                dst->hp_waiter = t;
+                
+                if(dst->owner && litmus->compare(t, dst->owner))
+                {
+                        litmus->increase_prio(dst->owner, t);
+                }
+        }
+        
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        if(sem->aff_obs) {
+                sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
+        }
+#endif  
 }
+//// TODO: BREAK THIS UP INTO TWO STEPS:
+//// 1) task to steal (and from what queue)
+//// 2) update queues
+//static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
+//{
+//      /* must hold sem->lock */
+//      
+//      struct kfmlp_queue *my_queue = NULL;
+//      struct task_struct *max_hp = NULL;
+//      
+//      struct list_head        *pos;
+//      struct task_struct      *queued;
+//      int i;
+//      
+//      for(i = 0; i < sem->num_resources; ++i)
+//      {
+//              if( (sem->queues[i].count > 1) &&
+//                 ((my_queue == NULL) ||
+//                      //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
+//                      (litmus->compare(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
+//              {
+//                      my_queue = &sem->queues[i];
+//              }
+//      }
+//      
+//      if(my_queue)
+//      {               
+//              max_hp = my_queue->hp_waiter;
+//              
+//              BUG_ON(!max_hp);
+//              
+//              TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
+//                                kfmlp_get_idx(sem, my_queue),
+//                                max_hp->comm, max_hp->pid,
+//                                kfmlp_get_idx(sem, my_queue));
+//              
+//              my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
+//              
+//              if(tsk_rt(my_queue->owner)->inh_task == max_hp)
+//              {
+//                      litmus->decrease_prio(my_queue->owner, my_queue->hp_waiter);
+//              }
+//
+//              list_for_each(pos, &my_queue->wait.task_list)
+//              {
+//                      queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
+//                                                                                                         task_list)->private;
+//                      /* Compare task prios, find high prio task. */
+//                      if (queued == max_hp)
+//                      {
+//                              /*
+//                               TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
+//                               kfmlp_get_idx(sem, my_queue));
+//                               */
+//                              __remove_wait_queue(&my_queue->wait,
+//                                                                      list_entry(pos, wait_queue_t, task_list));
+//                              break;
+//                      }
+//              }
+//              --(my_queue->count);
+//              
+//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+//              if(sem->aff_obs) {
+//                      sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, max_hp);
+//              }               
+//#endif                
+//      }
+//      
+//      return(max_hp);
+//}
 int kfmlp_lock(struct litmus_lock* l)
 {
        struct task_struct* t = current;
        struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
-        struct kfmlp_queue* my_queue;
+        struct kfmlp_queue* my_queue = NULL;
        wait_queue_t wait;
        unsigned long flags;
        
@@ -143,7 +251,16 @@ int kfmlp_lock(struct litmus_lock* l)
        
        spin_lock_irqsave(&sem->lock, flags);
        
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING   
+        if(sem->aff_obs) {
+                my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
+        }
+        if(!my_queue) {
+                my_queue = sem->shortest_queue;
+        }
+#else
        my_queue = sem->shortest_queue;
+#endif
        
        if (my_queue->owner) {
                /* resource is not free => must suspend and wait */
@@ -170,7 +287,17 @@ int kfmlp_lock(struct litmus_lock* l)
                }
                
                ++(my_queue->count);
+                
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING           
+                if(my_queue == sem->shortest_queue) {
+                        sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+                }
+                if(sem->aff_obs) {
+                        sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
+                }
+#else
                sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+#endif
                
                /* release lock before sleeping */
                spin_unlock_irqrestore(&sem->lock, flags);
@@ -206,7 +333,18 @@ int kfmlp_lock(struct litmus_lock* l)
                my_queue->owner = t;
                
                ++(my_queue->count);
-                sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);               
+                
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+                if(my_queue == sem->shortest_queue) {
+                        sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+                }
+                if(sem->aff_obs) {
+                        sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
+                        sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
+                }
+#else
+                sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
+#endif
                
                spin_unlock_irqrestore(&sem->lock, flags);
        }
@@ -219,7 +357,7 @@ int kfmlp_unlock(struct litmus_lock* l)
 {
        struct task_struct *t = current, *next;
        struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
-        struct kfmlp_queue *my_queue;
+        struct kfmlp_queue *my_queue, *to_steal_from;
        unsigned long flags;
        int err = 0;
        
@@ -227,29 +365,43 @@ int kfmlp_unlock(struct litmus_lock* l)
        
        my_queue = kfmlp_get_queue(sem, t);
        
-        if (!my_queue) {
+        if (!my_queue || my_queue->owner != t) {
                err = -EINVAL;
                goto out;
        }
        
+        my_queue->owner = NULL;  // clear ownership
+        --(my_queue->count);
+        
+        if(my_queue->count < sem->shortest_queue->count)
+        {
+                sem->shortest_queue = my_queue;
+        }
+        
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        if(sem->aff_obs) {
+                sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
+                sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
+        }       
+#endif
+        
+        /* we lose the benefit of priority inheritance (if any) */
+        if (tsk_rt(t)->inh_task)
+                litmus->decrease_prio(t, NULL); 
+        
+        
        /* check if there are jobs waiting for this resource */
+RETRY:
        next = __waitqueue_remove_first(&my_queue->wait);
        if (next) {
-                /*
-                 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
-                 kfmlp_get_idx(sem, my_queue),
-                 next->comm, next->pid);
-                 */
                /* next becomes the resouce holder */
                my_queue->owner = next;
                
-                --(my_queue->count);
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-                // the '=' of '<=' is a dumb method to attempt to build
+                if(sem->aff_obs) {
-                // affinity until tasks can tell us where they ran last...
+                        sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
-                if(my_queue->count <= sem->shortest_queue->count)
+                }               
-                {
+#endif          
-                        sem->shortest_queue = my_queue;
-                }       
                
                TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
                                  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
@@ -257,10 +409,6 @@ int kfmlp_unlock(struct litmus_lock* l)
                /* determine new hp_waiter if necessary */
                if (next == my_queue->hp_waiter) {
                        TRACE_TASK(next, "was highest-prio waiter\n");
-                        /* next has the highest priority --- it doesn't need to
-                         * inherit.  However, we need to make sure that the
-                         * next-highest priority in the queue is reflected in
-                         * hp_waiter. */
                        my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
                        if (my_queue->hp_waiter)
                                TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
@@ -278,46 +426,34 @@ int kfmlp_unlock(struct litmus_lock* l)
        }
        else
        {
-                TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
+                // TODO: put this stealing logic before we attempt to release
-                
+                // our resource.  (simplifies code and gets rid of ugly goto RETRY.
-                next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
+                wait_queue_t *wait;
                
-                /*
+                TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
-                 if(next)
-                 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
-                 kfmlp_get_idx(sem, my_queue),
+                next = (sem->aff_obs) ?
-                 next->comm, next->pid);
+                        sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
-                 */
+                        kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
-                
+#else
-                my_queue->owner = next;
+                next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
+#endif
                
-                if(next)
+                if(next) {
-                {
+                        kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
-                        TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
-                                          kfmlp_get_idx(sem, my_queue),
-                                          next->comm, next->pid);
                        
-                        /* wake up next */
+                        TRACE_CUR("queued %d: stole %s/%d from queue %d\n",
-                        wake_up_process(next);                  
+                                          next->comm, next->pid,
+                                          kfmlp_get_idx(sem, to_steal_from));
+                        
+                        goto RETRY;  // will succeed this time.
                }
-                else
+                else {          
-                {
                        TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
-                        
-                        --(my_queue->count);
-                        // the '=' of '<=' is a dumb method to attempt to build
-                        // affinity until tasks can tell us where they ran last...
-                        if(my_queue->count <= sem->shortest_queue->count)
-                        {
-                                sem->shortest_queue = my_queue;
-                        }
                }
        }
        
-        /* we lose the benefit of priority inheritance (if any) */
-        if (tsk_rt(t)->inh_task)
-                litmus->decrease_prio(t, NULL);
-        
 out:
        spin_unlock_irqrestore(&sem->lock, flags);
        
@@ -403,3 +539,337 @@ struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
        return &sem->litmus_lock;
 }
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+int kfmlp_aff_obs_close(struct affinity_observer* obs)
+{
+        return 0;
+}
+void kfmlp_aff_obs_free(struct affinity_observer* obs)
+{
+        struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
+        kfree(kfmlp_aff->q_info);
+        kfree(kfmlp_aff);
+}
+static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
+                                                                                                   struct kfmlp_affinity_ops* kfmlp_ops,
+                                                                                                   void* __user args)
+{
+        struct kfmlp_affinity* kfmlp_aff;
+        struct kfmlp_gpu_affinity_observer_args aff_args;
+        struct kfmlp_semaphore* sem;
+        int i;
+        unsigned long flags;
+        
+        if(!access_ok(VERIFY_READ, args, sizeof(aff_args)))
+        {
+                return(NULL);
+        }
+        if(__copy_from_user(&aff_args, args, sizeof(aff_args)))
+        {
+                return(NULL);
+        }
+        
+        sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
+        
+        if(sem->litmus_lock.type != KFMLP_SEM)
+        {
+                TRACE_CUR("Lock type not supported.  Type = %d\n", sem->litmus_lock.type);
+                return(NULL);
+        }
+        
+        kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
+        if(!kfmlp_aff)
+        {
+                return(NULL);
+        }
+        
+        kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
+        if(!kfmlp_aff->q_info)
+        {
+                kfree(kfmlp_aff);
+                return(NULL);
+        }
+        
+        kfmlp_aff->obs.ops = ops;
+        kfmlp_aff->ops = kfmlp_ops;
+        kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
+        
+        for(i = 0; i < sem->num_resources; ++i)
+        {
+                kfmlp_aff->q_info[i].q = &sem->queues[i];
+                kfmlp_aff->q_info[i].estimated_len = 0;
+        }
+        
+        spin_lock_irqsave(&sem->lock, flags);
+        sem->aff_obs = kfmlp_aff;
+        kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)];
+        spin_unlock_irqrestore(&sem->lock, flags);
+        
+        return &kfmlp_aff->obs;
+}
+// Smart KFMLP Affinity
+static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        struct kfmlp_queue_info *shortest = &aff->q_info[0];
+        int i;
+        
+        for(i = 1; i < sem->num_resources; ++i) {
+                if(aff->q_info[i].estimated_len < shortest->estimated_len) {
+                        shortest = &aff->q_info[i];
+                }
+        }
+        
+        return(shortest);
+}
+struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        lt_t min_len;
+        struct kfmlp_queue_info *shortest;
+        struct kfmlp_queue *to_enqueue;
+        int i;
+        
+        // simply pick the shortest queue if, we have no affinity, or we have
+        // affinity with the shortest
+        if((tsk_rt(t)->last_gpu < 0) ||
+           ((kfmlp_get_idx(sem, aff->shortest_queue->q) + aff->offset) == tsk_rt(t)->last_gpu)) {
+                // we have affinity with the shorest queue.  pick it.
+                to_enqueue = aff->shortest_queue->q;
+                
+                TRACE_CUR("special case: no affinity or have affinity with shortest\n");
+                
+                goto out;
+        }
+        
+        // enqueue where we will have the shortest time to completion
+        
+        shortest = &aff->q_info[0];
+        min_len = shortest->estimated_len + get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, 0 + aff->offset));
+        
+        for(i = 1; i < sem->num_resources; ++i) {
+                lt_t est_len =
+                        aff->q_info[i].estimated_len +
+                        get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset));
+                                                         
+                if(est_len < min_len) {
+                        shortest = &aff->q_info[i];
+                        min_len = est_len;
+                }
+        }
+        to_enqueue = shortest->q;
+        
+out:
+        TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
+                          kfmlp_get_idx(sem, to_enqueue),
+                          kfmlp_get_idx(sem, sem->shortest_queue));     
+        
+        return to_enqueue;
+}
+struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        
+        // For now, just steal from the shortest (by number) queue.
+        // TODO: Implement affinity-aware stealing.
+        
+        return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
+}
+void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        int replica = kfmlp_get_idx(sem, fq);
+        int gpu = aff->offset + replica;
+        struct kfmlp_queue_info *info = &aff->q_info[replica];
+        lt_t est_time;
+        
+        if(current == t) {
+                tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
+        }
+        est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+        info->estimated_len += est_time;
+        
+        TRACE_CUR("fq %d est len is now %llu\n",
+                          kfmlp_get_idx(sem, aff->shortest_queue->q),
+                          aff->shortest_queue->estimated_len);
+        
+        if(aff->shortest_queue == info) {
+                // we may no longer be the shortest
+                aff->shortest_queue = kfmlp_aff_find_shortest(aff);
+                
+                TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+                                  kfmlp_get_idx(sem, aff->shortest_queue->q),
+                                  aff->shortest_queue->q->count,
+                                  aff->shortest_queue->estimated_len);
+        }
+}
+void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        int replica = kfmlp_get_idx(sem, fq);
+        int gpu = aff->offset + replica;
+        struct kfmlp_queue_info *info = &aff->q_info[replica];
+        lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+        
+        if(est_time > info->estimated_len) {
+                WARN_ON(1);
+                info->estimated_len = 0;
+        }
+        else {
+                info->estimated_len -= est_time;
+        }
+        
+        TRACE_CUR("fq %d est len is now %llu\n",
+                          kfmlp_get_idx(sem, info->q),
+                          info->estimated_len); 
+        
+        // check to see if we're the shortest queue now.
+        if((aff->shortest_queue != info) &&
+           (aff->shortest_queue->estimated_len > info->estimated_len)) {
+                
+                aff->shortest_queue = info;
+                
+                TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
+                                  kfmlp_get_idx(sem, info->q),
+                                  info->q->count,
+                                  info->estimated_len);         
+        }
+}
+void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+        
+        tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
+        
+        TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
+                          t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
+        
+        reg_nv_device(gpu, 1);  // register
+        
+        tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
+        reset_gpu_tracker(t);
+        start_gpu_tracker(t);
+}
+void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+        lt_t est_time;
+        
+        stop_gpu_tracker(t);  // stop the tracker before we do anything else.
+        
+        est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
+        
+        tsk_rt(t)->last_gpu = gpu;
+        reg_nv_device(gpu, 0);  // unregister
+        
+        // update estimates
+        update_gpu_estimate(t, get_gpu_time(t));
+        
+        TRACE_CUR("%s/%d freed gpu %d.  actual time was %llu.  estimated was %llu.  diff is %d\n",
+                          t->comm, t->pid, gpu,
+                          get_gpu_time(t),
+                          est_time,
+                          (long long)get_gpu_time(t) - (long long)est_time);
+}
+struct kfmlp_affinity_ops gpu_kfmlp_affinity =
+{
+        .advise_enqueue = gpu_kfmlp_advise_enqueue,
+        .advise_steal = gpu_kfmlp_advise_steal,
+        .notify_enqueue = gpu_kfmlp_notify_enqueue,
+        .notify_dequeue = gpu_kfmlp_notify_dequeue,
+        .notify_acquired = gpu_kfmlp_notify_acquired,
+        .notify_freed = gpu_kfmlp_notify_freed
+};
+struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
+                                                                                        void* __user args)
+{
+        return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
+}
+// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
+struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        return sem->shortest_queue;
+}
+struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
+}
+void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+}
+void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+}
+void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+        
+        reg_nv_device(gpu, 1);  // register
+}
+void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
+{
+        struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
+        int gpu = kfmlp_get_idx(sem, fq) + aff->offset;
+        
+        reg_nv_device(gpu, 0);  // unregister
+}
+struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
+{
+        .advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
+        .advise_steal = simple_gpu_kfmlp_advise_steal,
+        .notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
+        .notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
+        .notify_acquired = simple_gpu_kfmlp_notify_acquired,
+        .notify_freed = simple_gpu_kfmlp_notify_freed
+};
+struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
+                                                                                                void* __user args)
+{
+        return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
+}
+#endif
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 2f9079421ec7..dd8b72e1af08 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -387,6 +387,13 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
                p->rt_param.ctrl_page   = ctrl_page;
        }
        
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
+        p->rt_param.gpu_fb_param_a = _frac(14008, 1000);
+        p->rt_param.gpu_fb_param_b = _frac(16024, 1000);
+        p->rt_param.gpu_migration = MIG_LAST;
+        p->rt_param.last_gpu = -1;
+#endif
+        
 #ifdef CONFIG_LITMUS_NESTED_LOCKING
        INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
        raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
diff --git a/litmus/locking.c b/litmus/locking.c
index 6d28efe97c91..ef13062913ce 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -10,6 +10,10 @@
 #include <linux/uaccess.h>
 #endif
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#endif
 static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
 static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
 static int close_generic_lock(struct od_table_entry* entry);
@@ -50,6 +54,7 @@ static  int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar
                INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node);
                WARN_ON(!(lock->nest.hp_waiter_ptr));
 #endif
+                lock->type = type;              
                lock->ident = atomic_inc_return(&lock_id_gen);
                *obj_ref = lock;
    }
@@ -292,6 +297,14 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
                TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
                                  dgl_wait->nr_remaining);
+                
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+                // KLUDGE: don't count this suspension as time in the critical gpu
+                // critical section
+                if(tsk_rt(dgl_wait->task)->held_gpus) {
+                        tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
+                }
+#endif          
                // note reverse order.  see comments in select_next_lock for reason.
                for(i = dgl_wait->size - 1; i >= 0; --i) {
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
index aaca93c1e5d1..0a851cd430a7 100644
--- a/litmus/rsm_lock.c
+++ b/litmus/rsm_lock.c
@@ -7,6 +7,10 @@
 //#include <litmus/edf_common.h>
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#endif
 /* caller is responsible for locking */
 static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex,
@@ -202,7 +206,15 @@ int rsm_mutex_lock(struct litmus_lock* l)
        if (mutex->owner) {
                TRACE_TASK(t, "Blocking on lock %d.\n", l->ident);
+                
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+                // KLUDGE: don't count this suspension as time in the critical gpu
+                // critical section
+                if(tsk_rt(t)->held_gpus) {
+                        tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
+                }
+#endif
+                
                /* resource is not free => must suspend and wait */
                owner = mutex->owner;
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 1440372227c6..b4ab2361e37a 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -61,6 +61,9 @@
 #include <litmus/nvidia_info.h>
 #endif
+#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
+#include <litmus/gpu_affinity.h>
+#endif
 /* Overview of GSN-EDF operations.
 *
@@ -813,6 +816,14 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
         */
        if (blocks)
                unlink(entry->scheduled);
+        
+#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)    
+        if(tsk_rt(entry->scheduled)->held_gpus) {
+                if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
+                        stop_gpu_tracker(entry->scheduled);
+                }
+        }
+#endif
        /* Request a sys_exit_np() call if we would like to preempt but cannot.
         * We need to make sure to update the link structure anyway in case
@@ -862,7 +873,7 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
                if (exists)
                        next = prev;
        }
+        
        sched_state_task_picked();
        raw_spin_unlock(&gsnedf_lock);
@@ -1429,9 +1440,6 @@ static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg)
        return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg);
 }
 /* ******************** FMLP support ********************** */
 /* struct for semaphore with priority inheritance */
@@ -1676,7 +1684,57 @@ UNSUPPORTED_LOCK:
        return err;
 }
+#endif  // CONFIG_LITMUS_LOCKING
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = {
+        .close = kfmlp_aff_obs_close,
+        .deallocate = kfmlp_aff_obs_free,
+};
+static long gsnedf_allocate_affinity_observer(
+                                                                struct affinity_observer **aff_obs,
+                                                                int type,
+                                                                void* __user args)
+{
+        int err;
+        
+        /* GSN-EDF currently only supports the FMLP for global resources. */
+        switch (type) {
+                        
+                case KFMLP_SIMPLE_GPU_AFF_OBS:
+                        *aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
+                        break;
+                case KFMLP_GPU_AFF_OBS:
+                        *aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
+                        break;                  
+#ifdef CONFIG_LITMUS_NESTED_LOCKING
+//              case IKGLP_GPU_AFF_OBS:
+//                      *aff_obs = gsnedf_new_ikglp_aff(arg);
+//                      break;
 #endif
+                default:
+                        err = -ENXIO;
+                        goto UNSUPPORTED_AFF_OBS;
+        };
+        
+        if (*aff_obs)
+                err = 0;
+        else
+                err = -ENOMEM;
+        
+UNSUPPORTED_AFF_OBS:
+        return err;
+}
+#endif
 static long gsnedf_activate_plugin(void)
 {
@@ -1746,6 +1804,9 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
 #ifdef CONFIG_LITMUS_DGL_SUPPORT
        .get_dgl_spinlock = gsnedf_get_dgl_spinlock,
 #endif
+#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
+        .allocate_aff_obs = gsnedf_allocate_affinity_observer,
+#endif  
 #ifdef CONFIG_LITMUS_SOFTIRQD
        .increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
        .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
author	Glenn Elliott <gelliott@cs.unc.edu>	2012-04-16 20:09:15 -0400
committer	Glenn Elliott <gelliott@cs.unc.edu>	2012-04-16 20:09:15 -0400
commit	8675824ed85d6e83a24e77dabaf3a5c02c91ef6f (patch)
tree	29e19fb32cacb062abca434fc921636d600ce77b
parent	0b865246946a97dc03a81ccf55bf84acce923c4b (diff)