#include <linux/slab.h>
#include <linux/uaccess.h>
#include <litmus/trace.h>
#include <litmus/sched_plugin.h>
#include <litmus/fdso.h>
#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
#include <litmus/gpu_affinity.h>
#include <litmus/nvidia_info.h>
#endif
#include <litmus/kfmlp_lock.h>
static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
struct kfmlp_queue* queue)
{
return (queue - &sem->queues[0]);
}
static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
struct task_struct* holder)
{
int i;
for(i = 0; i < sem->num_resources; ++i)
if(sem->queues[i].owner == holder)
return(&sem->queues[i]);
return(NULL);
}
/* caller is responsible for locking */
static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
struct task_struct *skip)
{
struct list_head *pos;
struct task_struct *queued, *found = NULL;
list_for_each(pos, &kqueue->wait.task_list) {
queued = (struct task_struct*) list_entry(pos, wait_queue_t,
task_list)->private;
/* Compare task prios, find high prio task. */
//if (queued != skip && edf_higher_prio(queued, found))
if (queued != skip && litmus->compare(queued, found))
found = queued;
}
return found;
}
static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem,
struct kfmlp_queue* search_start)
{
// we start our search at search_start instead of at the beginning of the
// queue list to load-balance across all resources.
struct kfmlp_queue* step = search_start;
struct kfmlp_queue* shortest = sem->shortest_queue;
do
{
step = (step+1 != &sem->queues[sem->num_resources]) ?
step+1 : &sem->queues[0];
if(step->count < shortest->count)
{
shortest = step;
if(step->count == 0)
break; /* can't get any shorter */
}
}while(step != search_start);
return(shortest);
}
static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
wait_queue_t** to_steal,
struct kfmlp_queue** to_steal_from)
{
/* must hold sem->lock */
int i;
*to_steal = NULL;
*to_steal_from = NULL;
for(i = 0; i < sem->num_resources; ++i)
{
if( (sem->queues[i].count > 1) &&
((*to_steal_from == NULL) ||
//(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
(litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
{
*to_steal_from = &sem->queues[i];
}
}
if(*to_steal_from)
{
struct list_head *pos;
struct task_struct *target = (*to_steal_from)->hp_waiter;
TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n",
target->comm,
target->pid,
kfmlp_get_idx(sem, *to_steal_from));
list_for_each(pos, &(*to_steal_from)->wait.task_list)
{
wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
struct task_struct *queued = (struct task_struct*) node->private;
/* Compare task prios, find high prio task. */
if (queued == target)
{
*to_steal = node;
TRACE_CUR("steal: selected %s/%d from queue %d\n",
queued->comm, queued->pid,
kfmlp_get_idx(sem, *to_steal_from));
return queued;
}
}
TRACE_CUR("Could not find %s/%d in queue %d!!! THIS IS A BUG!\n",
target->comm,
target->pid,
kfmlp_get_idx(sem, *to_steal_from));
}
return NULL;
}
static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
struct kfmlp_queue *dst,
wait_queue_t *wait,
struct kfmlp_queue *src)
{
struct task_struct* t = (struct task_struct*) wait->private;
__remove_wait_queue(&src->wait, wait);
--(src->count);
if(t == src->hp_waiter) {
src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
kfmlp_get_idx(sem, src),
(src->hp_waiter) ? src->hp_waiter->comm : "nil",
(src->hp_waiter) ? src->hp_waiter->pid : -1);
if(src->owner && tsk_rt(src->owner)->inh_task == t) {
litmus->decrease_prio(src->owner, src->hp_waiter);
}
}
if(sem->shortest_queue->count > src->count) {
sem->shortest_queue = src;
TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue));
}
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
}
#endif
init_waitqueue_entry(wait, t);
__add_wait_queue_tail_exclusive(&dst->wait, wait);
++(dst->count);
if(litmus->compare(t, dst->hp_waiter)) {
dst->hp_waiter = t;
TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
kfmlp_get_idx(sem, dst),
t->comm, t->pid);
if(dst->owner && litmus->compare(t, dst->owner))
{
litmus->increase_prio(dst->owner, t);
}
}
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
}
#endif
}
int kfmlp_lock(struct litmus_lock* l)
{
struct task_struct* t = current;
struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
struct kfmlp_queue* my_queue = NULL;
wait_queue_t wait;
unsigned long flags;
if (!is_realtime(t))
return -EPERM;
spin_lock_irqsave(&sem->lock, flags);
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
}
if(!my_queue) {
my_queue = sem->shortest_queue;
}
#else
my_queue = sem->shortest_queue;
#endif
if (my_queue->owner) {
/* resource is not free => must suspend and wait */
TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n",
kfmlp_get_idx(sem, my_queue),
my_queue->count);
init_waitqueue_entry(&wait, t);
/* FIXME: interruptible would be nice some day */
set_task_state(t, TASK_UNINTERRUPTIBLE);
__add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n",
kfmlp_get_idx(sem, my_queue),
(my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil",
(my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1);
/* check if we need to activate priority inheritance */
//if (edf_higher_prio(t, my_queue->hp_waiter))
if (litmus->compare(t, my_queue->hp_waiter)) {
my_queue->hp_waiter = t;
TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
kfmlp_get_idx(sem, my_queue),
t->comm, t->pid);
//if (edf_higher_prio(t, my_queue->owner))
if (litmus->compare(t, my_queue->owner)) {
litmus->increase_prio(my_queue->owner, my_queue->hp_waiter);
}
}
++(my_queue->count);
if(my_queue == sem->shortest_queue) {
sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
TRACE_CUR("queue %d is the shortest\n",
kfmlp_get_idx(sem, sem->shortest_queue));
}
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
}
#endif
/* release lock before sleeping */
spin_unlock_irqrestore(&sem->lock, flags);
/* We depend on the FIFO order. Thus, we don't need to recheck
* when we wake up; we are guaranteed to have the lock since
* there is only one wake up per release (or steal).
*/
schedule();
if(my_queue->owner == t) {
TRACE_CUR("queue %d: acquired through waiting\n",
kfmlp_get_idx(sem, my_queue));
}
else {
/* this case may happen if our wait entry was stolen
between queues. record where we went. */
my_queue = kfmlp_get_queue(sem, t);
BUG_ON(!my_queue);
TRACE_CUR("queue %d: acquired through stealing\n",
kfmlp_get_idx(sem, my_queue));
}
}
else {
TRACE_CUR("queue %d: acquired immediately\n",
kfmlp_get_idx(sem, my_queue));
my_queue->owner = t;
++(my_queue->count);
if(my_queue == sem->shortest_queue) {
sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
TRACE_CUR("queue %d is the shortest\n",
kfmlp_get_idx(sem, sem->shortest_queue));
}
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
}
#endif
spin_unlock_irqrestore(&sem->lock, flags);
}
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
}
#endif
return kfmlp_get_idx(sem, my_queue);
}
int kfmlp_unlock(struct litmus_lock* l)
{
struct task_struct *t = current, *next;
struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
struct kfmlp_queue *my_queue, *to_steal_from;
unsigned long flags;
int err = 0;
my_queue = kfmlp_get_queue(sem, t);
if (!my_queue) {
err = -EINVAL;
goto out;
}
spin_lock_irqsave(&sem->lock, flags);
TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));
my_queue->owner = NULL; // clear ownership
--(my_queue->count);
if(my_queue->count < sem->shortest_queue->count)
{
sem->shortest_queue = my_queue;
TRACE_CUR("queue %d is the shortest\n",
kfmlp_get_idx(sem, sem->shortest_queue));
}
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
}
#endif
/* we lose the benefit of priority inheritance (if any) */
if (tsk_rt(t)->inh_task)
litmus->decrease_prio(t, NULL);
/* check if there are jobs waiting for this resource */
RETRY:
next = __waitqueue_remove_first(&my_queue->wait);
if (next) {
/* next becomes the resouce holder */
my_queue->owner = next;
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
}
#endif
TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
/* determine new hp_waiter if necessary */
if (next == my_queue->hp_waiter) {
TRACE_TASK(next, "was highest-prio waiter\n");
my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
if (my_queue->hp_waiter)
TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
else
TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
} else {
/* Well, if next is not the highest-priority waiter,
* then it ought to inherit the highest-priority
* waiter's priority. */
litmus->increase_prio(next, my_queue->hp_waiter);
}
/* wake up next */
wake_up_process(next);
}
else {
// TODO: put this stealing logic before we attempt to release
// our resource. (simplifies code and gets rid of ugly goto RETRY.
wait_queue_t *wait;
TRACE_CUR("queue %d: looking to steal someone...\n",
kfmlp_get_idx(sem, my_queue));
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
next = (sem->aff_obs) ?
sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
#else
next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
#endif
if(next) {
TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
kfmlp_get_idx(sem, my_queue),
next->comm, next->pid,
kfmlp_get_idx(sem, to_steal_from));
kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
goto RETRY; // will succeed this time.
}
else {
TRACE_CUR("queue %d: no one to steal.\n",
kfmlp_get_idx(sem, my_queue));
}
}
spin_unlock_irqrestore(&sem->lock, flags);
out:
return err;
}
int kfmlp_close(struct litmus_lock* l)
{
struct task_struct *t = current;
struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
struct kfmlp_queue *my_queue;
unsigned long flags;
int owner;
spin_lock_irqsave(&sem->lock, flags);
my_queue = kfmlp_get_queue(sem, t);
owner = (my_queue) ? (my_queue->owner == t) : 0;
spin_unlock_irqrestore(&sem->lock, flags);
if (owner)
kfmlp_unlock(l);
return 0;
}
void kfmlp_free(struct litmus_lock* l)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
kfree(sem->queues);
kfree(sem);
}
struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
{
struct kfmlp_semaphore* sem;
int num_resources = 0;
int i;
if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
{
return(NULL);
}
if(__copy_from_user(&num_resources, args, sizeof(num_resources)))
{
return(NULL);
}
if(num_resources < 1)
{
return(NULL);
}
sem = kmalloc(sizeof(*sem), GFP_KERNEL);
if(!sem)
{
return(NULL);
}
sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
if(!sem->queues)
{
kfree(sem);
return(NULL);
}
sem->litmus_lock.ops = ops;
spin_lock_init(&sem->lock);
sem->num_resources = num_resources;
for(i = 0; i < num_resources; ++i)
{
sem->queues[i].owner = NULL;
sem->queues[i].hp_waiter = NULL;
init_waitqueue_head(&sem->queues[i].wait);
sem->queues[i].count = 0;
}
sem->shortest_queue = &sem->queues[0];
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
sem->aff_obs = NULL;
#endif
return &sem->litmus_lock;
}
#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
{
int gpu = replica % aff->nr_rsrc;
return gpu;
}
static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
{
int gpu = __replica_to_gpu(aff, replica) + aff->offset;
return gpu;
}
static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
{
int replica = gpu - aff->offset;
return replica;
}
int kfmlp_aff_obs_close(struct affinity_observer* obs)
{
return 0;
}
void kfmlp_aff_obs_free(struct affinity_observer* obs)
{
struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
kfree(kfmlp_aff->nr_cur_users_on_rsrc);
kfree(kfmlp_aff->q_info);
kfree(kfmlp_aff);
}
static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
struct kfmlp_affinity_ops* kfmlp_ops,
void* __user args)
{
struct kfmlp_affinity* kfmlp_aff;
struct gpu_affinity_observer_args aff_args;
struct kfmlp_semaphore* sem;
int i;
unsigned long flags;
if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
return(NULL);
}
if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
return(NULL);
}
sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
if(sem->litmus_lock.type != KFMLP_SEM) {
TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
return(NULL);
}
if((aff_args.nr_simult_users <= 0) ||
(sem->num_resources%aff_args.nr_simult_users != 0)) {
TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
"(%d) per replica. #replicas should be evenly divisible "
"by #simult_users.\n",
sem->litmus_lock.ident,
sem->num_resources,
aff_args.nr_simult_users);
return(NULL);
}
if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
return(NULL);
}
kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
if(!kfmlp_aff) {
return(NULL);
}
kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
if(!kfmlp_aff->q_info) {
kfree(kfmlp_aff);
return(NULL);
}
kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
if(!kfmlp_aff->nr_cur_users_on_rsrc) {
kfree(kfmlp_aff->q_info);
kfree(kfmlp_aff);
return(NULL);
}
affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs);
kfmlp_aff->ops = kfmlp_ops;
kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
kfmlp_aff->nr_simult = aff_args.nr_simult_users;
kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
for(i = 0; i < sem->num_resources; ++i) {
kfmlp_aff->q_info[i].q = &sem->queues[i];
kfmlp_aff->q_info[i].estimated_len = 0;
// multiple q_info's will point to the same resource (aka GPU) if
// aff_args.nr_simult_users > 1
kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
}
// attach observer to the lock
spin_lock_irqsave(&sem->lock, flags);
sem->aff_obs = kfmlp_aff;
spin_unlock_irqrestore(&sem->lock, flags);
return &kfmlp_aff->obs;
}
static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
struct kfmlp_queue* fq) {
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
}
// Smart KFMLP Affinity
//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
//{
// struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
// struct kfmlp_queue_info *shortest = &aff->q_info[0];
// int i;
//
// for(i = 1; i < sem->num_resources; ++i) {
// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
// shortest = &aff->q_info[i];
// }
// }
//
// return(shortest);
//}
struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
lt_t min_len;
int min_nr_users;
struct kfmlp_queue_info *shortest;
struct kfmlp_queue *to_enqueue;
int i;
int affinity_gpu;
// simply pick the shortest queue if, we have no affinity, or we have
// affinity with the shortest
if(unlikely(tsk_rt(t)->last_gpu < 0)) {
affinity_gpu = aff->offset; // first gpu
TRACE_CUR("no affinity\n");
}
else {
affinity_gpu = tsk_rt(t)->last_gpu;
}
// all things being equal, let's start with the queue with which we have
// affinity. this helps us maintain affinity even when we don't have
// an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
// if(shortest == aff->shortest_queue) {
// TRACE_CUR("special case: have affinity with shortest queue\n");
// goto out;
// }
min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
min_nr_users = *(shortest->nr_cur_users);
TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
get_gpu_estimate(t, MIG_LOCAL),
kfmlp_get_idx(sem, shortest->q),
min_len);
for(i = 0; i < sem->num_resources; ++i) {
if(&aff->q_info[i] != shortest) {
lt_t est_len =
aff->q_info[i].estimated_len +
get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
// queue is smaller, or they're equal and the other has a smaller number
// of total users.
//
// tie-break on the shortest number of simult users. this only kicks in
// when there are more than 1 empty queues.
if((est_len < min_len) ||
((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
shortest = &aff->q_info[i];
min_len = est_len;
min_nr_users = *(aff->q_info[i].nr_cur_users);
}
TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
kfmlp_get_idx(sem, aff->q_info[i].q),
est_len);
}
}
to_enqueue = shortest->q;
TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
kfmlp_get_idx(sem, to_enqueue),
kfmlp_get_idx(sem, sem->shortest_queue));
return to_enqueue;
}
struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
// For now, just steal highest priority waiter
// TODO: Implement affinity-aware stealing.
return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
}
void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
int replica = kfmlp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
struct kfmlp_queue_info *info = &aff->q_info[replica];
lt_t est_time;
lt_t est_len_before;
if(current == t) {
tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
}
est_len_before = info->estimated_len;
est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
info->estimated_len += est_time;
TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
kfmlp_get_idx(sem, info->q),
est_len_before, est_time,
info->estimated_len);
// if(aff->shortest_queue == info) {
// // we may no longer be the shortest
// aff->shortest_queue = kfmlp_aff_find_shortest(aff);
//
// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
// kfmlp_get_idx(sem, aff->shortest_queue->q),
// aff->shortest_queue->q->count,
// aff->shortest_queue->estimated_len);
// }
}
void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
int replica = kfmlp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
struct kfmlp_queue_info *info = &aff->q_info[replica];
lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
if(est_time > info->estimated_len) {
WARN_ON(1);
info->estimated_len = 0;
}
else {
info->estimated_len -= est_time;
}
TRACE_CUR("fq %d est len is now %llu\n",
kfmlp_get_idx(sem, info->q),
info->estimated_len);
// check to see if we're the shortest queue now.
// if((aff->shortest_queue != info) &&
// (aff->shortest_queue->estimated_len > info->estimated_len)) {
//
// aff->shortest_queue = info;
//
// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
// kfmlp_get_idx(sem, info->q),
// info->q->count,
// info->estimated_len);
// }
}
void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
int replica = kfmlp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n",
t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
// count the number or resource holders
++(*(aff->q_info[replica].nr_cur_users));
reg_nv_device(gpu, 1, t); // register
tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
reset_gpu_tracker(t);
start_gpu_tracker(t);
}
void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
int replica = kfmlp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
lt_t est_time;
stop_gpu_tracker(t); // stop the tracker before we do anything else.
est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
tsk_rt(t)->last_gpu = gpu;
// count the number or resource holders
--(*(aff->q_info[replica].nr_cur_users));
reg_nv_device(gpu, 0, t); // unregister
// update estimates
update_gpu_estimate(t, get_gpu_time(t));
TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n",
t->comm, t->pid, gpu,
get_gpu_time(t),
est_time,
(long long)get_gpu_time(t) - (long long)est_time);
}
struct kfmlp_affinity_ops gpu_kfmlp_affinity =
{
.advise_enqueue = gpu_kfmlp_advise_enqueue,
.advise_steal = gpu_kfmlp_advise_steal,
.notify_enqueue = gpu_kfmlp_notify_enqueue,
.notify_dequeue = gpu_kfmlp_notify_dequeue,
.notify_acquired = gpu_kfmlp_notify_acquired,
.notify_freed = gpu_kfmlp_notify_freed,
.replica_to_resource = gpu_replica_to_resource,
};
struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
void* __user args)
{
return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
}
// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
int min_count;
int min_nr_users;
struct kfmlp_queue_info *shortest;
struct kfmlp_queue *to_enqueue;
int i;
// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
shortest = &aff->q_info[0];
min_count = shortest->q->count;
min_nr_users = *(shortest->nr_cur_users);
TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
kfmlp_get_idx(sem, shortest->q),
shortest->q->count,
min_nr_users);
for(i = 1; i < sem->num_resources; ++i) {
int len = aff->q_info[i].q->count;
// queue is smaller, or they're equal and the other has a smaller number
// of total users.
//
// tie-break on the shortest number of simult users. this only kicks in
// when there are more than 1 empty queues.
if((len < min_count) ||
((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
shortest = &aff->q_info[i];
min_count = shortest->q->count;
min_nr_users = *(aff->q_info[i].nr_cur_users);
}
TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
kfmlp_get_idx(sem, aff->q_info[i].q),
aff->q_info[i].q->count,
*(aff->q_info[i].nr_cur_users));
}
to_enqueue = shortest->q;
TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
kfmlp_get_idx(sem, to_enqueue),
kfmlp_get_idx(sem, sem->shortest_queue));
return to_enqueue;
}
struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
// TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n");
return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
}
void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
{
// TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n");
}
void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
{
// TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n");
}
void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
int replica = kfmlp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
// TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
// count the number or resource holders
++(*(aff->q_info[replica].nr_cur_users));
reg_nv_device(gpu, 1, t); // register
}
void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
{
struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
int replica = kfmlp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
// TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
// count the number or resource holders
--(*(aff->q_info[replica].nr_cur_users));
reg_nv_device(gpu, 0, t); // unregister
}
struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
{
.advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
.advise_steal = simple_gpu_kfmlp_advise_steal,
.notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
.notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
.notify_acquired = simple_gpu_kfmlp_notify_acquired,
.notify_freed = simple_gpu_kfmlp_notify_freed,
.replica_to_resource = gpu_replica_to_resource,
};
struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
void* __user args)
{
return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
}
#endif