From 8675824ed85d6e83a24e77dabaf3a5c02c91ef6f Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Mon, 16 Apr 2012 20:09:15 -0400 Subject: Implement GPU-affinity-aware kfmlp (untested) --- include/litmus/fdso.h | 7 +- include/litmus/fpmath.h | 134 +++++++++ include/litmus/gpu_affinity.h | 40 +++ include/litmus/kfmlp_lock.h | 56 +++- include/litmus/rt_param.h | 37 +++ litmus/Makefile | 2 +- litmus/gpu_affinity.c | 72 +++++ litmus/kfmlp_lock.c | 660 ++++++++++++++++++++++++++++++++++++------ litmus/litmus.c | 7 + litmus/locking.c | 13 + litmus/rsm_lock.c | 14 +- litmus/sched_gsn_edf.c | 69 ++++- 12 files changed, 1006 insertions(+), 105 deletions(-) create mode 100644 include/litmus/fpmath.h create mode 100644 include/litmus/gpu_affinity.h create mode 100644 litmus/gpu_affinity.c diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h index baf28c47e95d..b92c1a3f004f 100644 --- a/include/litmus/fdso.h +++ b/include/litmus/fdso.h @@ -24,10 +24,11 @@ typedef enum { IKGLP_SEM = 3, KFMLP_SEM = 4, - IKGLP_AFF_OBS = 5, - KFMLP_AFF_OBS = 6, + IKGLP_GPU_AFF_OBS = 5, + KFMLP_SIMPLE_GPU_AFF_OBS = 6, + KFMLP_GPU_AFF_OBS = 7, - MAX_OBJ_TYPE = 6 + MAX_OBJ_TYPE = 7 } obj_type_t; struct inode_obj_id { diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h new file mode 100644 index 000000000000..35f81683d6ab --- /dev/null +++ b/include/litmus/fpmath.h @@ -0,0 +1,134 @@ +#ifndef __FP_MATH_H__ +#define __FP_MATH_H__ + +// Use 64-bit because we want to track things at the nanosecond scale. +// This can lead to very large numbers. +typedef int64_t fpbuf_t; +typedef struct +{ + fpbuf_t val; +} fp_t; + +#define FP_SHIFT 10 +#define ROUND_BIT (FP_SHIFT - 1) +#define ONE FP(1) + +#define _fp(x) ((fp_t) {x}) + +static const fp_t LITMUS_FP_ZERO = {.val = 0}; +static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)}; + +static inline fp_t FP(fpbuf_t x) +{ + return _fp(((fpbuf_t) x) << FP_SHIFT); +} + +/* divide two integers to obtain a fixed point value */ +static inline fp_t _frac(fpbuf_t a, fpbuf_t b) +{ + return _fp(FP(a).val / (b)); +} + +#ifdef __KERNEL__ + +static inline fpbuf_t _point(fp_t x) +{ + return (x.val % (1 << FP_SHIFT)); + +} + +#define fp2str(x) x.val +/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */ +#define _FP_ "%ld/1024" + +static inline fpbuf_t _floor(fp_t x) +{ + return x.val >> FP_SHIFT; +} + +/* FIXME: negative rounding */ +static inline fpbuf_t _round(fp_t x) +{ + return _floor(x) + ((x.val >> ROUND_BIT) & 1); +} + +/* multiply two fixed point values */ +static inline fp_t _mul(fp_t a, fp_t b) +{ + return _fp((a.val * b.val) >> FP_SHIFT); +} + +static inline fp_t _div(fp_t a, fp_t b) +{ + /* try not to overflow */ + if (unlikely( a.val > (2l << (BITS_PER_LONG - FP_SHIFT)) )) + return _fp((a.val / b.val) << FP_SHIFT); + else + return _fp((a.val << FP_SHIFT) / b.val); +} + +static inline fp_t _add(fp_t a, fp_t b) +{ + return _fp(a.val + b.val); +} + +static inline fp_t _sub(fp_t a, fp_t b) +{ + return _fp(a.val - b.val); +} + +static inline fp_t _neg(fp_t x) +{ + return _fp(-x.val); +} + +static inline fp_t _abs(fp_t x) +{ + return _fp(abs(x.val)); +} + +/* works the same as casting float/double to integer */ +static inline fpbuf_t _fp_to_integer(fp_t x) +{ + return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1); +} + +static inline fp_t _integer_to_fp(fpbuf_t x) +{ + return _frac(x,1); +} + +static inline int _leq(fp_t a, fp_t b) +{ + return a.val <= b.val; +} + +static inline int _geq(fp_t a, fp_t b) +{ + return a.val >= b.val; +} + +static inline int _lt(fp_t a, fp_t b) +{ + return a.val < b.val; +} + +static inline int _gt(fp_t a, fp_t b) +{ + return a.val > b.val; +} + +static inline int _eq(fp_t a, fp_t b) +{ + return a.val == b.val; +} + +static inline fp_t _max(fp_t a, fp_t b) +{ + if (a.val < b.val) + return b; + else + return a; +} +#endif +#endif diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h new file mode 100644 index 000000000000..c29ff3de997c --- /dev/null +++ b/include/litmus/gpu_affinity.h @@ -0,0 +1,40 @@ +#ifndef LITMUS_GPU_AFFINITY_H +#define LITMUS_GPU_AFFINITY_H + +#include +#include +#include + +void update_gpu_estimate(struct task_struct* t, lt_t observed); +gpu_migration_dist_t gpu_migration_distance(int a, int b); + +static inline void reset_gpu_tracker(struct task_struct* t) +{ + t->rt_param.accum_gpu_time = 0; +} + +static inline void start_gpu_tracker(struct task_struct* t) +{ + t->rt_param.gpu_time_stamp = litmus_clock(); +} + +static inline void stop_gpu_tracker(struct task_struct* t) +{ + lt_t now = litmus_clock(); + t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp); +} + +static inline lt_t get_gpu_time(struct task_struct* t) +{ + return t->rt_param.accum_gpu_time; +} + +static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) +{ + lt_t val = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); + + // minimum value is 1. + return ((val > 0) ? val : 1); +} + +#endif \ No newline at end of file diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h index 49156a9ba4ea..614cccad5307 100644 --- a/include/litmus/kfmlp_lock.h +++ b/include/litmus/kfmlp_lock.h @@ -4,6 +4,10 @@ #include #include +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +#include +#endif + /* struct for semaphore with priority inheritance */ struct kfmlp_queue { @@ -23,6 +27,10 @@ struct kfmlp_semaphore struct kfmlp_queue *queues; /* array */ struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + struct kfmlp_affinity *aff_obs; +#endif }; static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) @@ -36,4 +44,50 @@ int kfmlp_close(struct litmus_lock* l); void kfmlp_free(struct litmus_lock* l); struct litmus_lock* kfmlp_new(struct litmus_lock_ops*, void* __user arg); -#endif \ No newline at end of file +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + +struct kfmlp_queue_info +{ + struct kfmlp_queue* q; + lt_t estimated_len; +}; + +struct kfmlp_affinity; + +struct kfmlp_affinity_ops +{ + struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t); + struct task_struct* (*advise_steal)(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from); + void (*notify_enqueue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); + void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); + void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); + void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); +}; + +struct kfmlp_affinity +{ + struct affinity_observer obs; + struct kfmlp_affinity_ops *ops; + struct kfmlp_queue_info *q_info; + struct kfmlp_queue_info *shortest_queue; + int offset; +}; + +static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) +{ + return container_of(aff_obs, struct kfmlp_affinity, obs); +} + +int kfmlp_aff_obs_close(struct affinity_observer*); +void kfmlp_aff_obs_free(struct affinity_observer*); +struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*, + void* __user arg); +struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*, + void* __user arg); + + +#endif + +#endif + + diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index cc638e9c55d1..ad46ab4c64cc 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -5,6 +5,8 @@ #ifndef _LINUX_RT_PARAM_H_ #define _LINUX_RT_PARAM_H_ +#include + /* Litmus time type. */ typedef unsigned long long lt_t; @@ -57,6 +59,12 @@ struct affinity_observer_args int lock_od; }; +struct kfmlp_gpu_affinity_observer_args +{ + struct affinity_observer_args obs; + int replica_to_gpu_offset; +}; + /* The definition of the data that is shared between the kernel and real-time * tasks via a shared page (see litmus/ctrldev.c). * @@ -116,6 +124,21 @@ enum klitirqd_sem_status HELD }; +typedef enum gpu_migration_dist +{ + MIG_LOCAL = 0, + MIG_NEAR = 1, + MIG_MED = 2, + MIG_FAR = 3, + + MIG_LAST = MIG_FAR +} gpu_migration_dist_t; + +typedef struct feedback_est{ + fp_t est; + fp_t accum_err; +} feedback_est_t; + /* RT task parameters for scheduling extensions * These parameters are inherited during clone and therefore must * be explicitly set up before the task set is launched. @@ -160,6 +183,20 @@ struct rt_param { /* number of top-half interrupts handled on behalf of current job */ atomic_t nv_int_count; long unsigned int held_gpus; // bitmap of held GPUs. + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + fp_t gpu_fb_param_a; + fp_t gpu_fb_param_b; + + gpu_migration_dist_t gpu_migration; + int last_gpu; + feedback_est_t gpu_migration_est[MIG_LAST]; // local, near, med, far + + lt_t accum_gpu_time; + lt_t gpu_time_stamp; + + unsigned int suspend_gpu_tracker_on_block:1; +#endif #endif #ifdef CONFIG_LITMUS_LOCKING diff --git a/litmus/Makefile b/litmus/Makefile index 1698afb75ec4..080cbf694a41 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -35,4 +35,4 @@ obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o -obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o +obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c new file mode 100644 index 000000000000..43171390bed7 --- /dev/null +++ b/litmus/gpu_affinity.c @@ -0,0 +1,72 @@ + +#ifdef CONFIG_LITMUS_NVIDIA + +#include +#include +#include + +static void update_estimate(feedback_est_t* fb, fp_t* a, fp_t* b, lt_t observed) +{ + fp_t err, new; + fp_t actual = _frac(observed, 1); // observed is in ns, so beware of overflow! + + err = _sub(actual, fb->est); + new = _add(_mul(*a, err), + _mul(*b, fb->accum_err)); + + fb->est = new; + fb->accum_err = _add(fb->accum_err, err); +} + +void update_gpu_estimate(struct task_struct *t, lt_t observed) +{ + feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); + + TRACE_TASK(t, "GPU est update before (dist = %d): %d.%d\n", + tsk_rt(t)->gpu_migration, + _fp_to_integer(fb->est), + _point(fb->est)); + + update_estimate(fb, + &tsk_rt(t)->gpu_fb_param_a, + &tsk_rt(t)->gpu_fb_param_b, + observed); + + TRACE_TASK(t, "GPU est update after (dist = %d): %d.%d\n", + tsk_rt(t)->gpu_migration, + _fp_to_integer(fb->est), + _point(fb->est)); +} + +gpu_migration_dist_t gpu_migration_distance(int a, int b) +{ + // GPUs organized in a binary hierarchy, no more than 2^MIG_LAST GPUs + int i; + int level; + int max_level; + + if(unlikely(a < 0 || b < 0)) { + return MIG_LAST; + } + + if(a == b) { + return MIG_LOCAL; + } + + for(i = 1, level = 2, max_level = 1< #include +#include + #include -//#include +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) +#include +#include +#endif static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, struct kfmlp_queue* queue) @@ -67,74 +72,177 @@ static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* se return(shortest); } -static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) + +// TODO: BREAK THIS UP INTO TWO STEPS: +// 1) task to steal (and from what queue) +// 2) update queues +static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) { - /* must hold sem->lock */ - - struct kfmlp_queue *my_queue = NULL; - struct task_struct *max_hp = NULL; - + /* must hold sem->lock */ - struct list_head *pos; - struct task_struct *queued; int i; + *to_steal = NULL; + *to_steal_from = NULL; + for(i = 0; i < sem->num_resources; ++i) { if( (sem->queues[i].count > 1) && - ((my_queue == NULL) || + ((*to_steal_from == NULL) || //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) - (litmus->compare(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) + (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) ) { - my_queue = &sem->queues[i]; + *to_steal_from = &sem->queues[i]; } } - if(my_queue) - { - max_hp = my_queue->hp_waiter; - - BUG_ON(!max_hp); - - TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", - kfmlp_get_idx(sem, my_queue), - max_hp->comm, max_hp->pid, - kfmlp_get_idx(sem, my_queue)); - - my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); - - if(tsk_rt(my_queue->owner)->inh_task == max_hp) - { - litmus->decrease_prio(my_queue->owner, my_queue->hp_waiter); - } - - list_for_each(pos, &my_queue->wait.task_list) + if(*to_steal_from) + { + struct list_head *pos; + list_for_each(pos, &(*to_steal_from)->wait.task_list) { - queued = (struct task_struct*) list_entry(pos, wait_queue_t, - task_list)->private; + wait_queue_t *node = list_entry(pos, wait_queue_t, task_list); + struct task_struct *queued = (struct task_struct*) node->private; /* Compare task prios, find high prio task. */ - if (queued == max_hp) + if (queued == (*to_steal_from)->hp_waiter) { - /* - TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", - kfmlp_get_idx(sem, my_queue)); - */ - __remove_wait_queue(&my_queue->wait, - list_entry(pos, wait_queue_t, task_list)); - break; + *to_steal = node; + + TRACE_CUR("steal: selected %s/%d from queue %d\n", + queued->comm, queued->pid, + kfmlp_get_idx(sem, *to_steal_from)); + + return queued; } } - --(my_queue->count); } - return(max_hp); + return NULL; +} + +static void kfmlp_steal_node(struct kfmlp_semaphore *sem, + struct kfmlp_queue *dst, + wait_queue_t *wait, + struct kfmlp_queue *src) +{ + struct task_struct* t = (struct task_struct*) wait->private; + + __remove_wait_queue(&src->wait, wait); + --(src->count); + + if(t == src->hp_waiter) { + src->hp_waiter = kfmlp_find_hp_waiter(src, NULL); + + if(src->owner && tsk_rt(src->owner)->inh_task == t) { + litmus->decrease_prio(src->owner, src->hp_waiter); + } + } + + if(sem->shortest_queue->count > src->count) { + sem->shortest_queue = src; + } + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t); + } +#endif + + init_waitqueue_entry(wait, t); + __add_wait_queue_tail_exclusive(&dst->wait, wait); + ++(dst->count); + + if(litmus->compare(t, dst->hp_waiter)) { + dst->hp_waiter = t; + + if(dst->owner && litmus->compare(t, dst->owner)) + { + litmus->increase_prio(dst->owner, t); + } + } + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t); + } +#endif } +//// TODO: BREAK THIS UP INTO TWO STEPS: +//// 1) task to steal (and from what queue) +//// 2) update queues +//static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) +//{ +// /* must hold sem->lock */ +// +// struct kfmlp_queue *my_queue = NULL; +// struct task_struct *max_hp = NULL; +// +// struct list_head *pos; +// struct task_struct *queued; +// int i; +// +// for(i = 0; i < sem->num_resources; ++i) +// { +// if( (sem->queues[i].count > 1) && +// ((my_queue == NULL) || +// //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) +// (litmus->compare(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) +// { +// my_queue = &sem->queues[i]; +// } +// } +// +// if(my_queue) +// { +// max_hp = my_queue->hp_waiter; +// +// BUG_ON(!max_hp); +// +// TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", +// kfmlp_get_idx(sem, my_queue), +// max_hp->comm, max_hp->pid, +// kfmlp_get_idx(sem, my_queue)); +// +// my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); +// +// if(tsk_rt(my_queue->owner)->inh_task == max_hp) +// { +// litmus->decrease_prio(my_queue->owner, my_queue->hp_waiter); +// } +// +// list_for_each(pos, &my_queue->wait.task_list) +// { +// queued = (struct task_struct*) list_entry(pos, wait_queue_t, +// task_list)->private; +// /* Compare task prios, find high prio task. */ +// if (queued == max_hp) +// { +// /* +// TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", +// kfmlp_get_idx(sem, my_queue)); +// */ +// __remove_wait_queue(&my_queue->wait, +// list_entry(pos, wait_queue_t, task_list)); +// break; +// } +// } +// --(my_queue->count); +// +//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +// if(sem->aff_obs) { +// sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, max_hp); +// } +//#endif +// } +// +// return(max_hp); +//} int kfmlp_lock(struct litmus_lock* l) { struct task_struct* t = current; struct kfmlp_semaphore *sem = kfmlp_from_lock(l); - struct kfmlp_queue* my_queue; + struct kfmlp_queue* my_queue = NULL; wait_queue_t wait; unsigned long flags; @@ -143,7 +251,16 @@ int kfmlp_lock(struct litmus_lock* l) spin_lock_irqsave(&sem->lock, flags); +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t); + } + if(!my_queue) { + my_queue = sem->shortest_queue; + } +#else my_queue = sem->shortest_queue; +#endif if (my_queue->owner) { /* resource is not free => must suspend and wait */ @@ -170,7 +287,17 @@ int kfmlp_lock(struct litmus_lock* l) } ++(my_queue->count); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(my_queue == sem->shortest_queue) { + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + } + if(sem->aff_obs) { + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t); + } +#else sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); +#endif /* release lock before sleeping */ spin_unlock_irqrestore(&sem->lock, flags); @@ -206,7 +333,18 @@ int kfmlp_lock(struct litmus_lock* l) my_queue->owner = t; ++(my_queue->count); - sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(my_queue == sem->shortest_queue) { + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); + } + if(sem->aff_obs) { + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t); + sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t); + } +#else + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); +#endif spin_unlock_irqrestore(&sem->lock, flags); } @@ -219,7 +357,7 @@ int kfmlp_unlock(struct litmus_lock* l) { struct task_struct *t = current, *next; struct kfmlp_semaphore *sem = kfmlp_from_lock(l); - struct kfmlp_queue *my_queue; + struct kfmlp_queue *my_queue, *to_steal_from; unsigned long flags; int err = 0; @@ -227,29 +365,43 @@ int kfmlp_unlock(struct litmus_lock* l) my_queue = kfmlp_get_queue(sem, t); - if (!my_queue) { + if (!my_queue || my_queue->owner != t) { err = -EINVAL; goto out; } + my_queue->owner = NULL; // clear ownership + --(my_queue->count); + + if(my_queue->count < sem->shortest_queue->count) + { + sem->shortest_queue = my_queue; + } + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t); + sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t); + } +#endif + + /* we lose the benefit of priority inheritance (if any) */ + if (tsk_rt(t)->inh_task) + litmus->decrease_prio(t, NULL); + + /* check if there are jobs waiting for this resource */ +RETRY: next = __waitqueue_remove_first(&my_queue->wait); if (next) { - /* - TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", - kfmlp_get_idx(sem, my_queue), - next->comm, next->pid); - */ /* next becomes the resouce holder */ my_queue->owner = next; - --(my_queue->count); - // the '=' of '<=' is a dumb method to attempt to build - // affinity until tasks can tell us where they ran last... - if(my_queue->count <= sem->shortest_queue->count) - { - sem->shortest_queue = my_queue; - } +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + if(sem->aff_obs) { + sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next); + } +#endif TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", kfmlp_get_idx(sem, my_queue), next->comm, next->pid); @@ -257,10 +409,6 @@ int kfmlp_unlock(struct litmus_lock* l) /* determine new hp_waiter if necessary */ if (next == my_queue->hp_waiter) { TRACE_TASK(next, "was highest-prio waiter\n"); - /* next has the highest priority --- it doesn't need to - * inherit. However, we need to make sure that the - * next-highest priority in the queue is reflected in - * hp_waiter. */ my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); if (my_queue->hp_waiter) TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); @@ -278,46 +426,34 @@ int kfmlp_unlock(struct litmus_lock* l) } else { - TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); - - next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ + // TODO: put this stealing logic before we attempt to release + // our resource. (simplifies code and gets rid of ugly goto RETRY. + wait_queue_t *wait; - /* - if(next) - TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", - kfmlp_get_idx(sem, my_queue), - next->comm, next->pid); - */ - - my_queue->owner = next; + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + next = (sem->aff_obs) ? + sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) : + kfmlp_select_hp_steal(sem, &wait, &to_steal_from); +#else + next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from); +#endif - if(next) - { - TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", - kfmlp_get_idx(sem, my_queue), - next->comm, next->pid); + if(next) { + kfmlp_steal_node(sem, my_queue, wait, to_steal_from); - /* wake up next */ - wake_up_process(next); + TRACE_CUR("queued %d: stole %s/%d from queue %d\n", + next->comm, next->pid, + kfmlp_get_idx(sem, to_steal_from)); + + goto RETRY; // will succeed this time. } - else - { + else { TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); - - --(my_queue->count); - // the '=' of '<=' is a dumb method to attempt to build - // affinity until tasks can tell us where they ran last... - if(my_queue->count <= sem->shortest_queue->count) - { - sem->shortest_queue = my_queue; - } } } - /* we lose the benefit of priority inheritance (if any) */ - if (tsk_rt(t)->inh_task) - litmus->decrease_prio(t, NULL); - out: spin_unlock_irqrestore(&sem->lock, flags); @@ -403,3 +539,337 @@ struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) return &sem->litmus_lock; } + + + + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + +int kfmlp_aff_obs_close(struct affinity_observer* obs) +{ + return 0; +} + +void kfmlp_aff_obs_free(struct affinity_observer* obs) +{ + struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs); + kfree(kfmlp_aff->q_info); + kfree(kfmlp_aff); +} + +static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops, + struct kfmlp_affinity_ops* kfmlp_ops, + void* __user args) +{ + struct kfmlp_affinity* kfmlp_aff; + struct kfmlp_gpu_affinity_observer_args aff_args; + struct kfmlp_semaphore* sem; + int i; + unsigned long flags; + + if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) + { + return(NULL); + } + if(__copy_from_user(&aff_args, args, sizeof(aff_args))) + { + return(NULL); + } + + sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); + + if(sem->litmus_lock.type != KFMLP_SEM) + { + TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); + return(NULL); + } + + kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); + if(!kfmlp_aff) + { + return(NULL); + } + + kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL); + if(!kfmlp_aff->q_info) + { + kfree(kfmlp_aff); + return(NULL); + } + + kfmlp_aff->obs.ops = ops; + kfmlp_aff->ops = kfmlp_ops; + kfmlp_aff->offset = aff_args.replica_to_gpu_offset; + + for(i = 0; i < sem->num_resources; ++i) + { + kfmlp_aff->q_info[i].q = &sem->queues[i]; + kfmlp_aff->q_info[i].estimated_len = 0; + } + + spin_lock_irqsave(&sem->lock, flags); + sem->aff_obs = kfmlp_aff; + kfmlp_aff->shortest_queue = &kfmlp_aff->q_info[kfmlp_get_idx(sem, sem->shortest_queue)]; + spin_unlock_irqrestore(&sem->lock, flags); + + return &kfmlp_aff->obs; +} + + + + +// Smart KFMLP Affinity + +static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + struct kfmlp_queue_info *shortest = &aff->q_info[0]; + int i; + + for(i = 1; i < sem->num_resources; ++i) { + if(aff->q_info[i].estimated_len < shortest->estimated_len) { + shortest = &aff->q_info[i]; + } + } + + return(shortest); +} + +struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + lt_t min_len; + struct kfmlp_queue_info *shortest; + struct kfmlp_queue *to_enqueue; + int i; + + // simply pick the shortest queue if, we have no affinity, or we have + // affinity with the shortest + if((tsk_rt(t)->last_gpu < 0) || + ((kfmlp_get_idx(sem, aff->shortest_queue->q) + aff->offset) == tsk_rt(t)->last_gpu)) { + // we have affinity with the shorest queue. pick it. + to_enqueue = aff->shortest_queue->q; + + TRACE_CUR("special case: no affinity or have affinity with shortest\n"); + + goto out; + } + + // enqueue where we will have the shortest time to completion + + shortest = &aff->q_info[0]; + min_len = shortest->estimated_len + get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, 0 + aff->offset)); + + for(i = 1; i < sem->num_resources; ++i) { + lt_t est_len = + aff->q_info[i].estimated_len + + get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, i + aff->offset)); + + if(est_len < min_len) { + shortest = &aff->q_info[i]; + min_len = est_len; + } + } + to_enqueue = shortest->q; + +out: + TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", + kfmlp_get_idx(sem, to_enqueue), + kfmlp_get_idx(sem, sem->shortest_queue)); + + return to_enqueue; +} + +struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + + // For now, just steal from the shortest (by number) queue. + // TODO: Implement affinity-aware stealing. + + return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); +} + + +void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int replica = kfmlp_get_idx(sem, fq); + int gpu = aff->offset + replica; + struct kfmlp_queue_info *info = &aff->q_info[replica]; + lt_t est_time; + + if(current == t) { + tsk_rt(t)->suspend_gpu_tracker_on_block = 1; + } + + est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); + info->estimated_len += est_time; + + TRACE_CUR("fq %d est len is now %llu\n", + kfmlp_get_idx(sem, aff->shortest_queue->q), + aff->shortest_queue->estimated_len); + + if(aff->shortest_queue == info) { + // we may no longer be the shortest + aff->shortest_queue = kfmlp_aff_find_shortest(aff); + + TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", + kfmlp_get_idx(sem, aff->shortest_queue->q), + aff->shortest_queue->q->count, + aff->shortest_queue->estimated_len); + } +} + +void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int replica = kfmlp_get_idx(sem, fq); + int gpu = aff->offset + replica; + struct kfmlp_queue_info *info = &aff->q_info[replica]; + lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); + + if(est_time > info->estimated_len) { + WARN_ON(1); + info->estimated_len = 0; + } + else { + info->estimated_len -= est_time; + } + + TRACE_CUR("fq %d est len is now %llu\n", + kfmlp_get_idx(sem, info->q), + info->estimated_len); + + // check to see if we're the shortest queue now. + if((aff->shortest_queue != info) && + (aff->shortest_queue->estimated_len > info->estimated_len)) { + + aff->shortest_queue = info; + + TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", + kfmlp_get_idx(sem, info->q), + info->q->count, + info->estimated_len); + } +} + +void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int gpu = kfmlp_get_idx(sem, fq) + aff->offset; + + tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration + + TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", + t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); + + reg_nv_device(gpu, 1); // register + + tsk_rt(t)->suspend_gpu_tracker_on_block = 0; + reset_gpu_tracker(t); + start_gpu_tracker(t); +} + +void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int gpu = kfmlp_get_idx(sem, fq) + aff->offset; + lt_t est_time; + + stop_gpu_tracker(t); // stop the tracker before we do anything else. + + est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); + + tsk_rt(t)->last_gpu = gpu; + reg_nv_device(gpu, 0); // unregister + + // update estimates + update_gpu_estimate(t, get_gpu_time(t)); + + TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n", + t->comm, t->pid, gpu, + get_gpu_time(t), + est_time, + (long long)get_gpu_time(t) - (long long)est_time); +} + +struct kfmlp_affinity_ops gpu_kfmlp_affinity = +{ + .advise_enqueue = gpu_kfmlp_advise_enqueue, + .advise_steal = gpu_kfmlp_advise_steal, + .notify_enqueue = gpu_kfmlp_notify_enqueue, + .notify_dequeue = gpu_kfmlp_notify_dequeue, + .notify_acquired = gpu_kfmlp_notify_acquired, + .notify_freed = gpu_kfmlp_notify_freed +}; + +struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops, + void* __user args) +{ + return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args); +} + + + + + + + + +// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration) + +struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + return sem->shortest_queue; +} + +struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); +} + +void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ +} + +void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ +} + +void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int gpu = kfmlp_get_idx(sem, fq) + aff->offset; + + reg_nv_device(gpu, 1); // register +} + +void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) +{ + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); + int gpu = kfmlp_get_idx(sem, fq) + aff->offset; + + reg_nv_device(gpu, 0); // unregister +} + +struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity = +{ + .advise_enqueue = simple_gpu_kfmlp_advise_enqueue, + .advise_steal = simple_gpu_kfmlp_advise_steal, + .notify_enqueue = simple_gpu_kfmlp_notify_enqueue, + .notify_dequeue = simple_gpu_kfmlp_notify_dequeue, + .notify_acquired = simple_gpu_kfmlp_notify_acquired, + .notify_freed = simple_gpu_kfmlp_notify_freed +}; + +struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, + void* __user args) +{ + return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args); +} + +#endif + diff --git a/litmus/litmus.c b/litmus/litmus.c index 2f9079421ec7..dd8b72e1af08 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -387,6 +387,13 @@ static void reinit_litmus_state(struct task_struct* p, int restore) p->rt_param.ctrl_page = ctrl_page; } +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) + p->rt_param.gpu_fb_param_a = _frac(14008, 1000); + p->rt_param.gpu_fb_param_b = _frac(16024, 1000); + p->rt_param.gpu_migration = MIG_LAST; + p->rt_param.last_gpu = -1; +#endif + #ifdef CONFIG_LITMUS_NESTED_LOCKING INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order); raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock); diff --git a/litmus/locking.c b/litmus/locking.c index 6d28efe97c91..ef13062913ce 100644 --- a/litmus/locking.c +++ b/litmus/locking.c @@ -10,6 +10,10 @@ #include #endif +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) +#include +#endif + static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); static int open_generic_lock(struct od_table_entry* entry, void* __user arg); static int close_generic_lock(struct od_table_entry* entry); @@ -50,6 +54,7 @@ static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node); WARN_ON(!(lock->nest.hp_waiter_ptr)); #endif + lock->type = type; lock->ident = atomic_inc_return(&lock_id_gen); *obj_ref = lock; } @@ -292,6 +297,14 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait) TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n", dgl_wait->nr_remaining); + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + // KLUDGE: don't count this suspension as time in the critical gpu + // critical section + if(tsk_rt(dgl_wait->task)->held_gpus) { + tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1; + } +#endif // note reverse order. see comments in select_next_lock for reason. for(i = dgl_wait->size - 1; i >= 0; --i) { diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c index aaca93c1e5d1..0a851cd430a7 100644 --- a/litmus/rsm_lock.c +++ b/litmus/rsm_lock.c @@ -7,6 +7,10 @@ //#include +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) +#include +#endif + /* caller is responsible for locking */ static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex, @@ -202,7 +206,15 @@ int rsm_mutex_lock(struct litmus_lock* l) if (mutex->owner) { TRACE_TASK(t, "Blocking on lock %d.\n", l->ident); - + +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) + // KLUDGE: don't count this suspension as time in the critical gpu + // critical section + if(tsk_rt(t)->held_gpus) { + tsk_rt(t)->suspend_gpu_tracker_on_block = 1; + } +#endif + /* resource is not free => must suspend and wait */ owner = mutex->owner; diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index 1440372227c6..b4ab2361e37a 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c @@ -61,6 +61,9 @@ #include #endif +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) +#include +#endif /* Overview of GSN-EDF operations. * @@ -813,6 +816,14 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) */ if (blocks) unlink(entry->scheduled); + +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) + if(tsk_rt(entry->scheduled)->held_gpus) { + if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { + stop_gpu_tracker(entry->scheduled); + } + } +#endif /* Request a sys_exit_np() call if we would like to preempt but cannot. * We need to make sure to update the link structure anyway in case @@ -862,7 +873,7 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) if (exists) next = prev; } - + sched_state_task_picked(); raw_spin_unlock(&gsnedf_lock); @@ -1429,9 +1440,6 @@ static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg) return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg); } - - - /* ******************** FMLP support ********************** */ /* struct for semaphore with priority inheritance */ @@ -1676,7 +1684,57 @@ UNSUPPORTED_LOCK: return err; } +#endif // CONFIG_LITMUS_LOCKING + + + + + +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING +static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = { + .close = kfmlp_aff_obs_close, + .deallocate = kfmlp_aff_obs_free, +}; + +static long gsnedf_allocate_affinity_observer( + struct affinity_observer **aff_obs, + int type, + void* __user args) +{ + int err; + + /* GSN-EDF currently only supports the FMLP for global resources. */ + switch (type) { + + case KFMLP_SIMPLE_GPU_AFF_OBS: + *aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args); + break; + case KFMLP_GPU_AFF_OBS: + *aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args); + break; +#ifdef CONFIG_LITMUS_NESTED_LOCKING +// case IKGLP_GPU_AFF_OBS: +// *aff_obs = gsnedf_new_ikglp_aff(arg); +// break; #endif + default: + err = -ENXIO; + goto UNSUPPORTED_AFF_OBS; + }; + + if (*aff_obs) + err = 0; + else + err = -ENOMEM; + +UNSUPPORTED_AFF_OBS: + return err; +} +#endif + + + + static long gsnedf_activate_plugin(void) { @@ -1746,6 +1804,9 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { #ifdef CONFIG_LITMUS_DGL_SUPPORT .get_dgl_spinlock = gsnedf_get_dgl_spinlock, #endif +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING + .allocate_aff_obs = gsnedf_allocate_affinity_observer, +#endif #ifdef CONFIG_LITMUS_SOFTIRQD .increase_prio_klitirqd = increase_priority_inheritance_klitirqd, .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd, -- cgit v1.2.2