#include <linux/slab.h>
#include <linux/uaccess.h>
#include <litmus/trace.h>
#include <litmus/sched_plugin.h>
#include <litmus/fdso.h>
#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
#include <litmus/gpu_affinity.h>
#include <litmus/nvidia_info.h>
#endif
#include <litmus/ikglp_lock.h>
int ikglp_max_heap_base_priority_order(struct binheap_node *a,
struct binheap_node *b)
{
ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
BUG_ON(!d_a);
BUG_ON(!d_b);
return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
}
int ikglp_min_heap_base_priority_order(struct binheap_node *a,
struct binheap_node *b)
{
ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
}
int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
struct binheap_node *b)
{
ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
}
int ikglp_min_heap_donee_order(struct binheap_node *a,
struct binheap_node *b)
{
struct task_struct *prio_a, *prio_b;
ikglp_donee_heap_node_t *d_a =
binheap_entry(a, ikglp_donee_heap_node_t, node);
ikglp_donee_heap_node_t *d_b =
binheap_entry(b, ikglp_donee_heap_node_t, node);
if(!d_a->donor_info) {
prio_a = d_a->task;
}
else {
prio_a = d_a->donor_info->task;
BUG_ON(d_a->task != d_a->donor_info->donee_info->task);
}
if(!d_b->donor_info) {
prio_b = d_b->task;
}
else {
prio_b = d_b->donor_info->task;
BUG_ON(d_b->task != d_b->donor_info->donee_info->task);
}
// note reversed order
return litmus->__compare(prio_b, BASE, prio_a, BASE);
}
static inline int ikglp_get_idx(struct ikglp_semaphore *sem,
struct fifo_queue *queue)
{
return (queue - &sem->fifo_queues[0]);
}
static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem,
struct task_struct *holder)
{
int i;
for(i = 0; i < sem->nr_replicas; ++i)
if(sem->fifo_queues[i].owner == holder)
return(&sem->fifo_queues[i]);
return(NULL);
}
static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
struct task_struct *skip)
{
struct list_head *pos;
struct task_struct *queued, *found = NULL;
list_for_each(pos, &kqueue->wait.task_list) {
queued = (struct task_struct*) list_entry(pos,
wait_queue_t, task_list)->private;
/* Compare task prios, find high prio task. */
if(queued != skip && litmus->compare(queued, found))
found = queued;
}
return found;
}
static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem,
struct fifo_queue *search_start)
{
// we start our search at search_start instead of at the beginning of the
// queue list to load-balance across all resources.
struct fifo_queue* step = search_start;
struct fifo_queue* shortest = sem->shortest_fifo_queue;
do {
step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ?
step+1 : &sem->fifo_queues[0];
if(step->count < shortest->count) {
shortest = step;
if(step->count == 0)
break; /* can't get any shorter */
}
}while(step != search_start);
return(shortest);
}
static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem)
{
return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task;
}
#if 0
static void print_global_list(struct binheap_node* n, int depth)
{
ikglp_heap_node_t *global_heap_node;
char padding[81] = " ";
if(n == NULL) {
TRACE_CUR("+-> %p\n", NULL);
return;
}
global_heap_node = binheap_entry(n, ikglp_heap_node_t, node);
if(depth*2 <= 80)
padding[depth*2] = '\0';
TRACE_CUR("%s+-> %s/%d\n",
padding,
global_heap_node->task->comm,
global_heap_node->task->pid);
if(n->left) print_global_list(n->left, depth+1);
if(n->right) print_global_list(n->right, depth+1);
}
static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth)
{
ikglp_donee_heap_node_t *donee_node;
char padding[81] = " ";
struct task_struct* donor = NULL;
if(n == NULL) {
TRACE_CUR("+-> %p\n", NULL);
return;
}
donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node);
if(depth*2 <= 80)
padding[depth*2] = '\0';
if(donee_node->donor_info) {
donor = donee_node->donor_info->task;
}
TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n",
padding,
donee_node->task->comm,
donee_node->task->pid,
(donor) ? donor->comm : "nil",
(donor) ? donor->pid : -1,
ikglp_get_idx(sem, donee_node->fq));
if(n->left) print_donees(sem, n->left, depth+1);
if(n->right) print_donees(sem, n->right, depth+1);
}
static void print_donors(struct binheap_node *n, int depth)
{
ikglp_wait_state_t *donor_node;
char padding[81] = " ";
if(n == NULL) {
TRACE_CUR("+-> %p\n", NULL);
return;
}
donor_node = binheap_entry(n, ikglp_wait_state_t, node);
if(depth*2 <= 80)
padding[depth*2] = '\0';
TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n",
padding,
donor_node->task->comm,
donor_node->task->pid,
donor_node->donee_info->task->comm,
donor_node->donee_info->task->pid);
if(n->left) print_donors(n->left, depth+1);
if(n->right) print_donors(n->right, depth+1);
}
#endif
static void ikglp_add_global_list(struct ikglp_semaphore *sem,
struct task_struct *t,
ikglp_heap_node_t *node)
{
node->task = t;
INIT_BINHEAP_NODE(&node->node);
if(sem->top_m_size < sem->m) {
TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
t->comm, t->pid);
// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
// print_global_list(sem->top_m.root, 1);
binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
++(sem->top_m_size);
// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
// print_global_list(sem->top_m.root, 1);
}
else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
ikglp_heap_node_t *evicted =
binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n",
t->comm, t->pid,
evicted->task->comm, evicted->task->pid);
// TRACE_CUR("Not-Top-M Before:\n");
// print_global_list(sem->not_top_m.root, 1);
// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
// print_global_list(sem->top_m.root, 1);
binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
INIT_BINHEAP_NODE(&evicted->node);
binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
// print_global_list(sem->top_m.root, 1);
// TRACE_CUR("Not-Top-M After:\n");
// print_global_list(sem->not_top_m.root, 1);
}
else {
TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
t->comm, t->pid);
// TRACE_CUR("Not-Top-M Before:\n");
// print_global_list(sem->not_top_m.root, 1);
binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
// TRACE_CUR("Not-Top-M After:\n");
// print_global_list(sem->not_top_m.root, 1);
}
}
static void ikglp_del_global_list(struct ikglp_semaphore *sem,
struct task_struct *t,
ikglp_heap_node_t *node)
{
BUG_ON(!binheap_is_in_heap(&node->node));
TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid);
if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
// TRACE_CUR("Not-Top-M Before:\n");
// print_global_list(sem->not_top_m.root, 1);
// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
// print_global_list(sem->top_m.root, 1);
binheap_delete(&node->node, &sem->top_m);
if(!binheap_empty(&sem->not_top_m)) {
ikglp_heap_node_t *promoted =
binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node);
TRACE_CUR("Promoting %s/%d to top-m\n",
promoted->task->comm, promoted->task->pid);
binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node);
INIT_BINHEAP_NODE(&promoted->node);
binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node);
}
else {
TRACE_CUR("No one to promote to top-m.\n");
--(sem->top_m_size);
}
// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
// print_global_list(sem->top_m.root, 1);
// TRACE_CUR("Not-Top-M After:\n");
// print_global_list(sem->not_top_m.root, 1);
}
else {
// TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
// TRACE_CUR("Not-Top-M Before:\n");
// print_global_list(sem->not_top_m.root, 1);
binheap_delete(&node->node, &sem->not_top_m);
// TRACE_CUR("Not-Top-M After:\n");
// print_global_list(sem->not_top_m.root, 1);
}
}
static void ikglp_add_donees(struct ikglp_semaphore *sem,
struct fifo_queue *fq,
struct task_struct *t,
ikglp_donee_heap_node_t* node)
{
// TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
// TRACE_CUR("donees Before:\n");
// print_donees(sem, sem->donees.root, 1);
node->task = t;
node->donor_info = NULL;
node->fq = fq;
INIT_BINHEAP_NODE(&node->node);
binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node);
// TRACE_CUR("donees After:\n");
// print_donees(sem, sem->donees.root, 1);
}
static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
struct fifo_queue *fq,
struct ikglp_semaphore *sem,
unsigned long flags)
{
// priority of 't' has increased (note: 't' might already be hp_waiter).
if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
struct task_struct *old_max_eff_prio;
struct task_struct *new_max_eff_prio;
struct task_struct *new_prio = NULL;
struct task_struct *owner = fq->owner;
if(fq->hp_waiter)
TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
fq->hp_waiter->comm, fq->hp_waiter->pid);
else
TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
if(owner)
{
raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
// TRACE_TASK(owner, "Heap Before:\n");
// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
fq->hp_waiter = t;
fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
binheap_decrease(&fq->nest.hp_binheap_node,
&tsk_rt(owner)->hp_blocked_tasks);
// TRACE_TASK(owner, "Heap After:\n");
// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
if(new_max_eff_prio != old_max_eff_prio) {
TRACE_TASK(t, "is new hp_waiter.\n");
if ((effective_priority(owner) == old_max_eff_prio) ||
(litmus->__compare(new_max_eff_prio, BASE,
owner, EFFECTIVE))){
new_prio = new_max_eff_prio;
}
}
else {
TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
}
if(new_prio) {
// set new inheritance and propagate
TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n",
owner->comm, owner->pid,
new_prio->comm, new_prio->pid);
litmus->nested_increase_prio(owner, new_prio, &sem->lock,
flags); // unlocks lock.
}
else {
TRACE_TASK(t, "No change in effective priority (is %s/%d). Propagation halted.\n",
new_max_eff_prio->comm, new_max_eff_prio->pid);
raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
unlock_fine_irqrestore(&sem->lock, flags);
}
}
else {
fq->hp_waiter = t;
fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
TRACE_TASK(t, "no owner??\n");
unlock_fine_irqrestore(&sem->lock, flags);
}
}
else {
TRACE_TASK(t, "hp_waiter is unaffected.\n");
unlock_fine_irqrestore(&sem->lock, flags);
}
}
// hp_waiter has decreased
static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
struct ikglp_semaphore *sem,
unsigned long flags)
{
struct task_struct *owner = fq->owner;
struct task_struct *old_max_eff_prio;
struct task_struct *new_max_eff_prio;
if(!owner) {
TRACE_CUR("No owner. Returning.\n");
unlock_fine_irqrestore(&sem->lock, flags);
return;
}
raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
fq->nest.hp_waiter_eff_prio = fq->hp_waiter;
binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks,
struct nested_info, hp_binheap_node);
new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
if((old_max_eff_prio != new_max_eff_prio) &&
(effective_priority(owner) == old_max_eff_prio))
{
// Need to set new effective_priority for owner
struct task_struct *decreased_prio;
TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
ikglp_get_idx(sem, fq));
if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
(new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
(new_max_eff_prio) ? new_max_eff_prio->pid : -1,
owner->comm,
owner->pid,
ikglp_get_idx(sem, fq));
decreased_prio = new_max_eff_prio;
}
else {
TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n",
(new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
(new_max_eff_prio) ? new_max_eff_prio->pid : -1,
owner->comm,
owner->pid,
ikglp_get_idx(sem, fq));
decreased_prio = NULL;
}
// beware: recursion
litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
}
else {
TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
unlock_fine_irqrestore(&sem->lock, flags);
}
}
static void ikglp_remove_donation_from_owner(struct binheap_node *n,
struct fifo_queue *fq,
struct ikglp_semaphore *sem,
unsigned long flags)
{
struct task_struct *owner = fq->owner;
struct task_struct *old_max_eff_prio;
struct task_struct *new_max_eff_prio;
BUG_ON(!owner);
raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks);
new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
if((old_max_eff_prio != new_max_eff_prio) &&
(effective_priority(owner) == old_max_eff_prio))
{
// Need to set new effective_priority for owner
struct task_struct *decreased_prio;
TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
ikglp_get_idx(sem, fq));
if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
ikglp_get_idx(sem, fq));
decreased_prio = new_max_eff_prio;
}
else {
TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n",
ikglp_get_idx(sem, fq));
decreased_prio = NULL;
}
// beware: recursion
litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
}
else {
TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
unlock_fine_irqrestore(&sem->lock, flags);
}
}
static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
struct binheap_node *n)
{
struct task_struct *old_max_eff_prio;
struct task_struct *new_max_eff_prio;
raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks);
new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
if((old_max_eff_prio != new_max_eff_prio) &&
(effective_priority(t) == old_max_eff_prio))
{
// Need to set new effective_priority for owner
struct task_struct *decreased_prio;
if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
decreased_prio = new_max_eff_prio;
}
else {
decreased_prio = NULL;
}
tsk_rt(t)->inh_task = decreased_prio;
}
raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
}
static void ikglp_get_immediate(struct task_struct* t,
struct fifo_queue *fq,
struct ikglp_semaphore *sem,
unsigned long flags)
{
// resource available now
TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq));
fq->owner = t;
raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
struct nested_info, hp_binheap_node);
raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
++(fq->count);
ikglp_add_global_list(sem, t, &fq->global_heap_node);
ikglp_add_donees(sem, fq, t, &fq->donee_heap_node);
sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
}
#endif
unlock_fine_irqrestore(&sem->lock, flags);
}
static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
struct fifo_queue* fq,
struct task_struct* t,
wait_queue_t *wait,
ikglp_heap_node_t *global_heap_node,
ikglp_donee_heap_node_t *donee_heap_node)
{
/* resource is not free => must suspend and wait */
TRACE_TASK(t, "Enqueuing on fq %d.\n",
ikglp_get_idx(sem, fq));
init_waitqueue_entry(wait, t);
__add_wait_queue_tail_exclusive(&fq->wait, wait);
++(fq->count);
// update global list.
if(likely(global_heap_node)) {
if(binheap_is_in_heap(&global_heap_node->node)) {
WARN_ON(1);
ikglp_del_global_list(sem, t, global_heap_node);
}
ikglp_add_global_list(sem, t, global_heap_node);
}
// update donor eligiblity list.
if(likely(donee_heap_node)) {
// if(binheap_is_in_heap(&donee_heap_node->node)) {
// WARN_ON(1);
// }
ikglp_add_donees(sem, fq, t, donee_heap_node);
}
if(sem->shortest_fifo_queue == fq) {
sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
}
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
}
#endif
TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
}
static void ikglp_enqueue_on_fq(
struct ikglp_semaphore *sem,
struct fifo_queue *fq,
ikglp_wait_state_t *wait,
unsigned long flags)
{
/* resource is not free => must suspend and wait */
TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n",
ikglp_get_idx(sem, fq));
INIT_BINHEAP_NODE(&wait->global_heap_node.node);
INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
__ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node,
&wait->global_heap_node, &wait->donee_heap_node);
ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); // unlocks sem->lock
}
static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
ikglp_wait_state_t *wait)
{
TRACE_TASK(wait->task, "goes to PQ.\n");
wait->pq_node.task = wait->task; // copy over task (little redundant...)
binheap_add(&wait->pq_node.node, &sem->priority_queue,
ikglp_heap_node_t, node);
}
static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
ikglp_wait_state_t *wait)
{
INIT_BINHEAP_NODE(&wait->global_heap_node.node);
INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
INIT_BINHEAP_NODE(&wait->pq_node.node);
__ikglp_enqueue_on_pq(sem, wait);
}
static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
ikglp_wait_state_t* wait,
unsigned long flags)
{
struct task_struct *t = wait->task;
ikglp_donee_heap_node_t *donee_node = NULL;
struct task_struct *donee;
struct task_struct *old_max_eff_prio;
struct task_struct *new_max_eff_prio;
struct task_struct *new_prio = NULL;
INIT_BINHEAP_NODE(&wait->global_heap_node.node);
INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
INIT_BINHEAP_NODE(&wait->pq_node.node);
INIT_BINHEAP_NODE(&wait->node);
// TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid);
// TRACE_CUR("donors Before:\n");
// print_donors(sem->donors.root, 1);
// Add donor to the global list.
ikglp_add_global_list(sem, t, &wait->global_heap_node);
// Select a donee
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
donee_node = (sem->aff_obs) ?
sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) :
binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
#else
donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
#endif
donee = donee_node->task;
TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
TRACE_CUR("Temporarily removing %s/%d to donee list.\n",
donee->comm, donee->pid);
// TRACE_CUR("donees Before:\n");
// print_donees(sem, sem->donees.root, 1);
//binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly
binheap_delete(&donee_node->node, &sem->donees);
// TRACE_CUR("donees After:\n");
// print_donees(sem, sem->donees.root, 1);
wait->donee_info = donee_node;
// Add t to donor heap.
binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node);
// Now adjust the donee's priority.
// Lock the donee's inheritance heap.
raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock);
old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
if(donee_node->donor_info) {
// Steal donation relation. Evict old donor to PQ.
// Remove old donor from donor heap
ikglp_wait_state_t *old_wait = donee_node->donor_info;
struct task_struct *old_donor = old_wait->task;
TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. Moving old donor to PQ.\n",
donee->comm, donee->pid, old_donor->comm, old_donor->pid);
binheap_delete(&old_wait->node, &sem->donors);
// Remove donation from donee's inheritance heap.
binheap_delete(&old_wait->prio_donation.hp_binheap_node,
&tsk_rt(donee)->hp_blocked_tasks);
// WARNING: have not updated inh_prio!
// Add old donor to PQ.
__ikglp_enqueue_on_pq(sem, old_wait);
// Remove old donor from the global heap.
ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node);
}
// Add back donee's node to the donees heap with increased prio
donee_node->donor_info = wait;
INIT_BINHEAP_NODE(&donee_node->node);
TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid);
// TRACE_CUR("donees Before:\n");
// print_donees(sem, sem->donees.root, 1);
binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node);
// TRACE_CUR("donees After:\n");
// print_donees(sem, sem->donees.root, 1);
// Add an inheritance/donation to the donee's inheritance heap.
wait->prio_donation.lock = (struct litmus_lock*)sem;
wait->prio_donation.hp_waiter_eff_prio = t;
wait->prio_donation.hp_waiter_ptr = NULL;
INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node);
binheap_add(&wait->prio_donation.hp_binheap_node,
&tsk_rt(donee)->hp_blocked_tasks,
struct nested_info, hp_binheap_node);
new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
if(new_max_eff_prio != old_max_eff_prio) {
if ((effective_priority(donee) == old_max_eff_prio) ||
(litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
donee->comm, donee->pid);
new_prio = new_max_eff_prio;
}
// else {
// // should be bug. donor would not be in top-m.
// TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid);
// WARN_ON(1);
// }
// }
// else {
// // should be bug. donor would not be in top-m.
// TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid);
// WARN_ON(1);
}
if(new_prio) {
struct fifo_queue *donee_fq = donee_node->fq;
if(donee != donee_fq->owner) {
TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n",
donee->comm, donee->pid,
donee_fq->owner->comm, donee_fq->owner->pid);
raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); // unlocks sem->lock
}
else {
TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n",
donee->comm, donee->pid);
litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); // unlocks sem->lock and donee's heap lock
}
}
else {
TRACE_TASK(t, "No change in effective priority (it is %d/%s). BUG?\n",
new_max_eff_prio->comm, new_max_eff_prio->pid);
raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
unlock_fine_irqrestore(&sem->lock, flags);
}
// TRACE_CUR("donors After:\n");
// print_donors(sem->donors.root, 1);
}
int ikglp_lock(struct litmus_lock* l)
{
struct task_struct* t = current;
struct ikglp_semaphore *sem = ikglp_from_lock(l);
unsigned long flags = 0, real_flags;
struct fifo_queue *fq = NULL;
int replica = -EINVAL;
#ifdef CONFIG_LITMUS_DGL_SUPPORT
raw_spinlock_t *dgl_lock;
#endif
ikglp_wait_state_t wait;
if (!is_realtime(t))
return -EPERM;
#ifdef CONFIG_LITMUS_DGL_SUPPORT
dgl_lock = litmus->get_dgl_spinlock(t);
#endif
raw_spin_lock_irqsave(&sem->real_lock, real_flags);
lock_global_irqsave(dgl_lock, flags);
lock_fine_irqsave(&sem->lock, flags);
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
fq = (sem->aff_obs) ?
sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
sem->shortest_fifo_queue;
#else
fq = sem->shortest_fifo_queue;
#endif
if(fq->count == 0) {
// take available resource
replica = ikglp_get_idx(sem, fq);
ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
unlock_global_irqrestore(dgl_lock, flags);
raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
}
else
{
// we have to suspend.
wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
mb();
/* FIXME: interruptible would be nice some day */
set_task_state(t, TASK_UNINTERRUPTIBLE);
if(fq->count < sem->max_fifo_len) {
// enqueue on fq
ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
}
else {
TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
// no room in fifos. Go to PQ or donors.
if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
// enqueue on PQ
ikglp_enqueue_on_pq(sem, &wait);
unlock_fine_irqrestore(&sem->lock, flags);
}
else {
// enqueue as donor
ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
}
}
unlock_global_irqrestore(dgl_lock, flags);
raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
TS_LOCK_SUSPEND;
schedule();
TS_LOCK_RESUME;
fq = ikglp_get_queue(sem, t);
BUG_ON(!fq);
replica = ikglp_get_idx(sem, fq);
}
TRACE_CUR("Acquired lock %d, queue %d\n",
l->ident, replica);
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
}
#endif
return replica;
}
static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
struct fifo_queue *fq,
ikglp_wait_state_t *donor_info)
{
struct task_struct *t = donor_info->task;
TRACE_CUR("Donor %s/%d being moved to fq %d\n",
t->comm,
t->pid,
ikglp_get_idx(sem, fq));
binheap_delete(&donor_info->node, &sem->donors);
__ikglp_enqueue_on_fq(sem, fq, t,
&donor_info->fq_node,
NULL, // already in global_list, so pass null to prevent adding 2nd time.
&donor_info->donee_heap_node);
// warning:
// ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
}
static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem,
struct fifo_queue *fq,
ikglp_wait_state_t *wait)
{
struct task_struct *t = wait->task;
TRACE_CUR("PQ request %s/%d being moved to fq %d\n",
t->comm,
t->pid,
ikglp_get_idx(sem, fq));
binheap_delete(&wait->pq_node.node, &sem->priority_queue);
__ikglp_enqueue_on_fq(sem, fq, t,
&wait->fq_node,
&wait->global_heap_node,
&wait->donee_heap_node);
// warning:
// ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
}
static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
struct ikglp_semaphore* sem)
{
/* must hold sem->lock */
struct fifo_queue *fq = NULL;
struct list_head *pos;
struct task_struct *queued;
int i;
for(i = 0; i < sem->nr_replicas; ++i) {
if( (sem->fifo_queues[i].count > 1) &&
(!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
ikglp_get_idx(sem, &sem->fifo_queues[i]),
sem->fifo_queues[i].hp_waiter->comm,
sem->fifo_queues[i].hp_waiter->pid,
(fq) ? ikglp_get_idx(sem, fq) : -1,
(fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX",
(fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2);
fq = &sem->fifo_queues[i];
WARN_ON(!(fq->hp_waiter));
}
}
if(fq) {
struct task_struct *max_hp = fq->hp_waiter;
ikglp_wait_state_t* ret = NULL;
TRACE_CUR("Searching for %s/%d on fq %d\n",
max_hp->comm,
max_hp->pid,
ikglp_get_idx(sem, fq));
BUG_ON(!max_hp);
list_for_each(pos, &fq->wait.task_list) {
wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list);
queued = (struct task_struct*) wait->private;
TRACE_CUR("fq %d entry: %s/%d\n",
ikglp_get_idx(sem, fq),
queued->comm,
queued->pid);
/* Compare task prios, find high prio task. */
if (queued == max_hp) {
TRACE_CUR("Found it!\n");
ret = container_of(wait, ikglp_wait_state_t, fq_node);
}
}
WARN_ON(!ret);
return ret;
}
return(NULL);
}
static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
struct fifo_queue *fq,
ikglp_wait_state_t *fq_wait)
{
struct task_struct *t = fq_wait->task;
struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq;
WARN_ON(t != fq_steal->hp_waiter);
TRACE_CUR("FQ request %s/%d being moved to fq %d\n",
t->comm,
t->pid,
ikglp_get_idx(sem, fq));
fq_wait->donee_heap_node.fq = fq; // just to be safe
__remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
--(fq_steal->count);
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
}
#endif
fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
ikglp_get_idx(sem, fq_steal),
(fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil",
(fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1);
// Update shortest.
if(fq_steal->count < sem->shortest_fifo_queue->count) {
sem->shortest_fifo_queue = fq_steal;
}
__ikglp_enqueue_on_fq(sem, fq, t,
&fq_wait->fq_node,
NULL,
NULL);
// warning: We have not checked the priority inheritance of fq's owner yet.
}
static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem,
struct fifo_queue *fq,
ikglp_wait_state_t *old_wait)
{
struct task_struct *t = old_wait->task;
BUG_ON(old_wait->donee_heap_node.fq != fq);
TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n",
ikglp_get_idx(sem, fq));
// need to migrate global_heap_node and donee_heap_node off of the stack
// to the nodes allocated for the owner of this fq.
// TODO: Enhance binheap() to perform this operation in place.
ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove
fq->global_heap_node = old_wait->global_heap_node; // copy
ikglp_add_global_list(sem, t, &fq->global_heap_node); // re-add
binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); // remove
fq->donee_heap_node = old_wait->donee_heap_node; // copy
if(fq->donee_heap_node.donor_info) {
// let donor know that our location has changed
BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); // validate cross-link
fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node;
}
INIT_BINHEAP_NODE(&fq->donee_heap_node.node);
binheap_add(&fq->donee_heap_node.node, &sem->donees,
ikglp_donee_heap_node_t, node); // re-add
}
int ikglp_unlock(struct litmus_lock* l)
{
struct ikglp_semaphore *sem = ikglp_from_lock(l);
struct task_struct *t = current;
struct task_struct *donee = NULL;
struct task_struct *next = NULL;
struct task_struct *new_on_fq = NULL;
ikglp_wait_state_t *other_donor_info = NULL;
struct fifo_queue *to_steal = NULL;
struct fifo_queue *fq;
#ifdef CONFIG_LITMUS_DGL_SUPPORT
raw_spinlock_t *dgl_lock;
#endif
unsigned long flags = 0, real_flags;
int err = 0;
#ifdef CONFIG_LITMUS_DGL_SUPPORT
dgl_lock = litmus->get_dgl_spinlock(t);
#endif
raw_spin_lock_irqsave(&sem->real_lock, real_flags);
lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper
lock_fine_irqsave(&sem->lock, flags);
fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner.
if (!fq) {
err = -EINVAL;
goto out;
}
TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
// Remove 't' from the heaps, but data in nodes will still be good.
ikglp_del_global_list(sem, t, &fq->global_heap_node);
binheap_delete(&fq->donee_heap_node.node, &sem->donees);
fq->owner = NULL; // no longer owned!!
--(fq->count);
if(fq->count < sem->shortest_fifo_queue->count) {
sem->shortest_fifo_queue = fq;
}
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
}
#endif
// Move the next request into the FQ and update heaps as needed.
// We defer re-evaluation of priorities to later in the function.
if(fq->donee_heap_node.donor_info) { // move my donor to FQ
ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info;
new_on_fq = donor_info->task;
TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d.\n",
new_on_fq->comm, new_on_fq->pid,
ikglp_get_idx(sem, fq));
// donor moved to FQ
donee = t;
ikglp_move_donor_to_fq(sem, fq, donor_info);
}
else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ
// move other donor to FQ
// Select a donor
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
other_donor_info = (sem->aff_obs) ?
sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) :
binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
#else
other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
#endif
new_on_fq = other_donor_info->task;
donee = other_donor_info->donee_info->task;
// update the donee's heap position.
other_donor_info->donee_info->donor_info = NULL; // clear the cross-link
binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d.\n",
new_on_fq->comm, new_on_fq->pid,
ikglp_get_idx(sem, fq));
ikglp_move_donor_to_fq(sem, fq, other_donor_info);
}
else if(!binheap_empty(&sem->priority_queue)) { // No donors, so move PQ
ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue,
ikglp_heap_node_t, node);
ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t,
pq_node);
new_on_fq = pq_wait->task;
TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d.\n",
new_on_fq->comm, new_on_fq->pid,
ikglp_get_idx(sem, fq));
ikglp_move_pq_to_fq(sem, fq, pq_wait);
}
else if(fq->count == 0) { // No PQ and this queue is empty, so steal.
ikglp_wait_state_t *fq_wait;
TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
ikglp_get_idx(sem, fq));
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
fq_wait = (sem->aff_obs) ?
sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) :
ikglp_find_hp_waiter_to_steal(sem);
#else
fq_wait = ikglp_find_hp_waiter_to_steal(sem);
#endif
if(fq_wait) {
to_steal = fq_wait->donee_heap_node.fq;
new_on_fq = fq_wait->task;
TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n",
new_on_fq->comm, new_on_fq->pid,
ikglp_get_idx(sem, to_steal),
ikglp_get_idx(sem, fq));
ikglp_steal_to_fq(sem, fq, fq_wait);
}
else {
TRACE_TASK(t, "Found nothing to steal for fq %d.\n",
ikglp_get_idx(sem, fq));
}
}
else { // move no one
}
// 't' must drop all priority and clean up data structures before hand-off.
// DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST
raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
{
int count = 0;
while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) {
binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks,
struct nested_info, hp_binheap_node);
++count;
}
litmus->decrease_prio(t, NULL);
WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible.
}
raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
// Now patch up other priorities.
//
// At most one of the following:
// if(donee && donee != t), decrease prio, propagate to owner, or onward
// if(to_steal), update owner's prio (hp_waiter has already been set)
//
BUG_ON((other_donor_info != NULL) && (to_steal != NULL));
if(other_donor_info) {
struct fifo_queue *other_fq = other_donor_info->donee_info->fq;
BUG_ON(!donee);
BUG_ON(donee == t);
TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n",
other_donor_info->task->comm, other_donor_info->task->pid,
donee->comm, donee->pid);
// need to terminate donation relation.
if(donee == other_fq->owner) {
TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n",
donee->comm, donee->pid,
ikglp_get_idx(sem, other_fq));
ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags);
lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
}
else {
TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n",
donee->comm, donee->pid,
ikglp_get_idx(sem, other_fq));
ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node);
if(donee == other_fq->hp_waiter) {
TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n",
donee->comm, donee->pid,
ikglp_get_idx(sem, other_fq));
other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL);
TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
ikglp_get_idx(sem, other_fq),
(other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil",
(other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1);
ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock. reacquire it.
lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
}
}
}
else if(to_steal) {
TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n",
ikglp_get_idx(sem, to_steal));
ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock. reacquire it.
lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
}
// check for new HP waiter.
if(new_on_fq) {
// fq->owner is null, so just update the hp_waiter without locking.
if(new_on_fq == fq->hp_waiter) {
TRACE_TASK(t, "new_on_fq is already hp_waiter.\n",
fq->hp_waiter->comm, fq->hp_waiter->pid);
fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure...
}
else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
if(fq->hp_waiter)
TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
fq->hp_waiter->comm, fq->hp_waiter->pid);
else
TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
fq->hp_waiter = new_on_fq;
fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
ikglp_get_idx(sem, fq),
(fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
(fq->hp_waiter) ? fq->hp_waiter->pid : -1);
}
}
if(waitqueue_active(&fq->wait))
{
wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node);
next = (struct task_struct*) wait->private;
__remove_wait_queue(&fq->wait, wait);
TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
ikglp_get_idx(sem, fq),
next->comm, next->pid);
// migrate wait-state to fifo-memory.
ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait);
/* next becomes the resouce holder */
fq->owner = next;
tsk_rt(next)->blocked_lock = NULL;
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
if(sem->aff_obs) {
sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
}
#endif
/* determine new hp_waiter if necessary */
if (next == fq->hp_waiter) {
TRACE_TASK(next, "was highest-prio waiter\n");
/* next has the highest priority --- it doesn't need to
* inherit. However, we need to make sure that the
* next-highest priority in the queue is reflected in
* hp_waiter. */
fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL);
TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n",
ikglp_get_idx(sem, fq),
(fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
(fq->hp_waiter) ? fq->hp_waiter->pid : -1);
fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ?
effective_priority(fq->hp_waiter) : NULL;
if (fq->hp_waiter)
TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n");
else
TRACE("no further waiters\n");
raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
// TRACE_TASK(next, "Heap Before:\n");
// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
binheap_add(&fq->nest.hp_binheap_node,
&tsk_rt(next)->hp_blocked_tasks,
struct nested_info,
hp_binheap_node);
// TRACE_TASK(next, "Heap After:\n");
// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
}
else {
/* Well, if 'next' is not the highest-priority waiter,
* then it (probably) ought to inherit the highest-priority
* waiter's priority. */
TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n",
ikglp_get_idx(sem, fq),
(fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
(fq->hp_waiter) ? fq->hp_waiter->pid : -1);
raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
binheap_add(&fq->nest.hp_binheap_node,
&tsk_rt(next)->hp_blocked_tasks,
struct nested_info,
hp_binheap_node);
/* It is possible that 'next' *should* be the hp_waiter, but isn't
* because that update hasn't yet executed (update operation is
* probably blocked on mutex->lock). So only inherit if the top of
* 'next's top heap node is indeed the effective prio. of hp_waiter.
* (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
* since the effective priority of hp_waiter can change (and the
* update has not made it to this lock).)
*/
if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
fq->nest.hp_waiter_eff_prio))
{
if(fq->nest.hp_waiter_eff_prio)
litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio);
else
WARN_ON(1);
}
raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
}
// wake up the new resource holder!
wake_up_process(next);
}
unlock_fine_irqrestore(&sem->lock, flags);
unlock_global_irqrestore(dgl_lock, flags);
raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
out:
return err;
}
int ikglp_close(struct litmus_lock* l)
{
struct task_struct *t = current;
struct ikglp_semaphore *sem = ikglp_from_lock(l);
unsigned long flags;
int owner = 0;
int i;
raw_spin_lock_irqsave(&sem->real_lock, flags);
for(i = 0; i < sem->nr_replicas; ++i) {
if(sem->fifo_queues[i].owner == t) {
owner = 1;
break;
}
}
raw_spin_unlock_irqrestore(&sem->real_lock, flags);
if (owner)
ikglp_unlock(l);
return 0;
}
void ikglp_free(struct litmus_lock* l)
{
struct ikglp_semaphore *sem = ikglp_from_lock(l);
kfree(sem->fifo_queues);
kfree(sem);
}
struct litmus_lock* ikglp_new(int m,
struct litmus_lock_ops* ops,
void* __user arg)
{
struct ikglp_semaphore* sem;
int nr_replicas = 0;
int i;
if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
{
return(NULL);
}
if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
{
return(NULL);
}
if(nr_replicas < 1)
{
return(NULL);
}
sem = kmalloc(sizeof(*sem), GFP_KERNEL);
if(!sem)
{
return NULL;
}
sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
if(!sem->fifo_queues)
{
kfree(sem);
return NULL;
}
sem->litmus_lock.ops = ops;
#ifdef CONFIG_DEBUG_SPINLOCK
{
__raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key);
}
#else
raw_spin_lock_init(&sem->lock);
#endif
raw_spin_lock_init(&sem->real_lock);
sem->nr_replicas = nr_replicas;
sem->m = m;
sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
sem->m,
sem->nr_replicas,
sem->max_fifo_len);
for(i = 0; i < nr_replicas; ++i)
{
struct fifo_queue* q = &(sem->fifo_queues[i]);
q->owner = NULL;
q->hp_waiter = NULL;
init_waitqueue_head(&q->wait);
q->count = 0;
q->global_heap_node.task = NULL;
INIT_BINHEAP_NODE(&q->global_heap_node.node);
q->donee_heap_node.task = NULL;
q->donee_heap_node.donor_info = NULL;
q->donee_heap_node.fq = NULL;
INIT_BINHEAP_NODE(&q->donee_heap_node.node);
q->nest.lock = (struct litmus_lock*)sem;
q->nest.hp_waiter_eff_prio = NULL;
q->nest.hp_waiter_ptr = &q->hp_waiter;
INIT_BINHEAP_NODE(&q->nest.hp_binheap_node);
}
sem->shortest_fifo_queue = &sem->fifo_queues[0];
sem->top_m_size = 0;
// init heaps
INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order);
INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order);
INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order);
INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
sem->aff_obs = NULL;
#endif
return &sem->litmus_lock;
}
#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
{
int gpu = replica % aff->nr_rsrc;
return gpu;
}
static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
{
int gpu = __replica_to_gpu(aff, replica) + aff->offset;
return gpu;
}
static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
{
int replica = gpu - aff->offset;
return replica;
}
int ikglp_aff_obs_close(struct affinity_observer* obs)
{
return 0;
}
void ikglp_aff_obs_free(struct affinity_observer* obs)
{
struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
kfree(ikglp_aff->nr_cur_users_on_rsrc);
kfree(ikglp_aff->q_info);
kfree(ikglp_aff);
}
static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
struct ikglp_affinity_ops* ikglp_ops,
void* __user args)
{
struct ikglp_affinity* ikglp_aff;
struct gpu_affinity_observer_args aff_args;
struct ikglp_semaphore* sem;
int i;
unsigned long flags;
if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
return(NULL);
}
if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
return(NULL);
}
sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
if(sem->litmus_lock.type != IKGLP_SEM) {
TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
return(NULL);
}
if((aff_args.nr_simult_users <= 0) ||
(sem->nr_replicas%aff_args.nr_simult_users != 0)) {
TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
"(%d) per replica. #replicas should be evenly divisible "
"by #simult_users.\n",
sem->litmus_lock.ident,
sem->nr_replicas,
aff_args.nr_simult_users);
return(NULL);
}
if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
return(NULL);
}
ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
if(!ikglp_aff) {
return(NULL);
}
ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
if(!ikglp_aff->q_info) {
kfree(ikglp_aff);
return(NULL);
}
ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
if(!ikglp_aff->nr_cur_users_on_rsrc) {
kfree(ikglp_aff->q_info);
kfree(ikglp_aff);
return(NULL);
}
affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
ikglp_aff->ops = ikglp_ops;
ikglp_aff->offset = aff_args.replica_to_gpu_offset;
ikglp_aff->nr_simult = aff_args.nr_simult_users;
ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
for(i = 0; i < sem->nr_replicas; ++i) {
ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
ikglp_aff->q_info[i].estimated_len = 0;
// multiple q_info's will point to the same resource (aka GPU) if
// aff_args.nr_simult_users > 1
ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
}
// attach observer to the lock
raw_spin_lock_irqsave(&sem->real_lock, flags);
sem->aff_obs = ikglp_aff;
raw_spin_unlock_irqrestore(&sem->real_lock, flags);
return &ikglp_aff->obs;
}
static int gpu_replica_to_resource(struct ikglp_affinity* aff,
struct fifo_queue* fq) {
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
}
// Smart IKGLP Affinity
//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
//{
// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
// struct ikglp_queue_info *shortest = &aff->q_info[0];
// int i;
//
// for(i = 1; i < sem->nr_replicas; ++i) {
// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
// shortest = &aff->q_info[i];
// }
// }
//
// return(shortest);
//}
struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
{
// advise_enqueue must be smart as not not break IKGLP rules:
// * No queue can be greater than ceil(m/k) in length. We may return
// such a queue, but IKGLP will be smart enough as to send requests
// to donors or PQ.
// * Cannot let a queue idle if there exist waiting PQ/donors
// -- needed to guarantee parallel progress of waiters.
//
// We may be able to relax some of these constraints, but this will have to
// be carefully evaluated.
//
// Huristic strategy: Find the shortest queue that is not full.
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
lt_t min_len;
int min_nr_users;
struct ikglp_queue_info *shortest;
struct fifo_queue *to_enqueue;
int i;
int affinity_gpu;
// simply pick the shortest queue if, we have no affinity, or we have
// affinity with the shortest
if(unlikely(tsk_rt(t)->last_gpu < 0)) {
affinity_gpu = aff->offset; // first gpu
TRACE_CUR("no affinity\n");
}
else {
affinity_gpu = tsk_rt(t)->last_gpu;
}
// all things being equal, let's start with the queue with which we have
// affinity. this helps us maintain affinity even when we don't have
// an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
// if(shortest == aff->shortest_queue) {
// TRACE_CUR("special case: have affinity with shortest queue\n");
// goto out;
// }
min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
min_nr_users = *(shortest->nr_cur_users);
TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
get_gpu_estimate(t, MIG_LOCAL),
ikglp_get_idx(sem, shortest->q),
shortest->q->count,
min_len);
for(i = 0; i < sem->nr_replicas; ++i) {
if(&aff->q_info[i] != shortest) {
if(aff->q_info[i].q->count < sem->max_fifo_len) {
lt_t est_len =
aff->q_info[i].estimated_len +
get_gpu_estimate(t,
gpu_migration_distance(tsk_rt(t)->last_gpu,
replica_to_gpu(aff, i)));
// queue is smaller, or they're equal and the other has a smaller number
// of total users.
//
// tie-break on the shortest number of simult users. this only kicks in
// when there are more than 1 empty queues.
if((shortest->q->count >= sem->max_fifo_len) || /* 'shortest' is full and i-th queue is not */
(est_len < min_len) || /* i-th queue has shortest length */
((est_len == min_len) && /* equal lengths, but one has fewer over-all users */
(*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
shortest = &aff->q_info[i];
min_len = est_len;
min_nr_users = *(aff->q_info[i].nr_cur_users);
}
TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
get_gpu_estimate(t,
gpu_migration_distance(tsk_rt(t)->last_gpu,
replica_to_gpu(aff, i))),
ikglp_get_idx(sem, aff->q_info[i].q),
aff->q_info[i].q->count,
est_len);
}
else {
TRACE_CUR("queue %d is too long. ineligible for enqueue.\n",
ikglp_get_idx(sem, aff->q_info[i].q));
}
}
}
if(shortest->q->count >= sem->max_fifo_len) {
TRACE_CUR("selected fq %d is too long, but returning it anyway.\n",
ikglp_get_idx(sem, shortest->q));
}
to_enqueue = shortest->q;
TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n",
ikglp_get_idx(sem, to_enqueue),
to_enqueue->count,
ikglp_get_idx(sem, sem->shortest_fifo_queue));
return to_enqueue;
//return(sem->shortest_fifo_queue);
}
ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
struct fifo_queue* dst)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
// For now, just steal highest priority waiter
// TODO: Implement affinity-aware stealing.
return ikglp_find_hp_waiter_to_steal(sem);
}
static inline int has_donor(wait_queue_t* fq_wait)
{
ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
return(wait->donee_heap_node.donor_info != NULL);
}
static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
struct fifo_queue* fq,
int* dist_from_head)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
struct task_struct *donee;
ikglp_donee_heap_node_t *donee_node;
struct task_struct *mth_highest = ikglp_mth_highest(sem);
lt_t now = litmus_clock();
// TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ",
// ikglp_get_idx(sem, fq),
// mth_highest->comm, mth_highest->pid,
// (int)get_deadline(mth_highest) - now);
if(fq->owner &&
fq->donee_heap_node.donor_info == NULL &&
mth_highest != fq->owner &&
litmus->__compare(mth_highest, BASE, fq->owner, BASE)) {
donee = fq->owner;
donee_node = &(fq->donee_heap_node);
*dist_from_head = 0;
BUG_ON(donee != donee_node->task);
TRACE_CUR("picked owner of fq %d as donee\n",
ikglp_get_idx(sem, fq));
goto out;
}
else if(waitqueue_active(&fq->wait)) {
struct list_head *pos;
// TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d "
// "(mth_highest != fq->owner) = %d "
// "(mth_highest > fq->owner) = %d\n",
// ikglp_get_idx(sem, fq),
// (fq->owner) ? fq->owner->comm : "nil",
// (fq->owner) ? fq->owner->pid : -1,
// (fq->owner) ? (int)get_deadline(fq->owner) - now : -999,
// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil",
// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1,
// (mth_highest != fq->owner),
// (litmus->__compare(mth_highest, BASE, fq->owner, BASE)));
*dist_from_head = 1;
// iterating from the start of the queue is nice since this means
// the donee will be closer to obtaining a resource.
list_for_each(pos, &fq->wait.task_list) {
wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
// TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d "
// "(mth_highest != wait->task) = %d "
// "(mth_highest > wait->task) = %d\n",
// ikglp_get_idx(sem, fq),
// dist_from_head,
// wait->task->comm, wait->task->pid,
// (int)get_deadline(wait->task) - now,
// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil",
// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1,
// (mth_highest != wait->task),
// (litmus->__compare(mth_highest, BASE, wait->task, BASE)));
if(!has_donor(fq_wait) &&
mth_highest != wait->task &&
litmus->__compare(mth_highest, BASE, wait->task, BASE)) {
donee = (struct task_struct*) fq_wait->private;
donee_node = &wait->donee_heap_node;
BUG_ON(donee != donee_node->task);
TRACE_CUR("picked waiter in fq %d as donee\n",
ikglp_get_idx(sem, fq));
goto out;
}
++(*dist_from_head);
}
}
donee = NULL;
donee_node = NULL;
*dist_from_head = sem->max_fifo_len + 1;
TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
out:
TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n",
ikglp_get_idx(sem, fq),
(donee) ? (donee)->comm : "nil",
(donee) ? (donee)->pid : -1,
*dist_from_head);
return donee_node;
}
ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(
struct ikglp_affinity* aff,
struct task_struct* donor)
{
// Huristic strategy: Find the highest-priority donee that is waiting on
// a queue closest to our affinity. (1) The donee CANNOT already have a
// donor (exception: donee is the lowest-prio task in the donee heap).
// (2) Requests in 'top_m' heap are ineligible.
//
// Further strategy: amongst elible donees waiting for the same GPU, pick
// the one closest to the head of the FIFO queue (including owners).
//
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
ikglp_donee_heap_node_t *donee_node;
gpu_migration_dist_t distance;
int start, i, j;
ikglp_donee_heap_node_t *default_donee;
ikglp_wait_state_t *default_donee_donor_info;
if(tsk_rt(donor)->last_gpu < 0) {
// no affinity. just return the min prio, like standard IKGLP
// TODO: Find something closer to the head of the queue??
donee_node = binheap_top_entry(&sem->donees,
ikglp_donee_heap_node_t,
node);
goto out;
}
// Temporarily break any donation relation the default donee (the lowest
// prio task in the FIFO queues) to make it eligible for selection below.
//
// NOTE: The original donor relation *must* be restored, even if we select
// the default donee throug affinity-aware selection, before returning
// from this function so we don't screw up our heap ordering.
// The standard IKGLP algorithm will steal the donor relationship if needed.
default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
default_donee_donor_info = default_donee->donor_info; // back-up donor relation
default_donee->donor_info = NULL; // temporarily break any donor relation.
// initialize our search
donee_node = NULL;
distance = MIG_NONE;
// TODO: The below search logic may work well for locating nodes to steal
// when an FQ goes idle. Validate this code and apply it to stealing.
// begin search with affinity GPU.
start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu);
i = start;
do { // "for each gpu" / "for each aff->nr_rsrc"
gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i);
// only interested in queues that will improve our distance
if(temp_distance < distance || donee_node == NULL) {
int dist_from_head = sem->max_fifo_len + 1;
TRACE_CUR("searching for donor on GPU %d", i);
// visit each queue and pick a donee. bail as soon as we find
// one for this class.
for(j = 0; j < aff->nr_simult; ++j) {
int temp_dist_from_head;
ikglp_donee_heap_node_t *temp_donee_node;
struct fifo_queue *fq;
fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]);
temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head);
if(temp_dist_from_head < dist_from_head)
{
// we check all the FQs for this GPU to spread priorities
// out across the queues. does this decrease jitter?
donee_node = temp_donee_node;
dist_from_head = temp_dist_from_head;
}
}
if(dist_from_head != sem->max_fifo_len + 1) {
TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n",
donee_node->task->comm, donee_node->task->pid,
dist_from_head);
}
else {
TRACE_CUR("found no eligible donors from GPU %d\n", i);
}
}
else {
TRACE_CUR("skipping GPU %d (distance = %d, best donor "
"distance = %d)\n", i, temp_distance, distance);
}
i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around
} while (i != start);
// restore old donor info state.
default_donee->donor_info = default_donee_donor_info;
if(!donee_node) {
donee_node = default_donee;
TRACE_CUR("Could not find a donee. We have to steal one.\n");
WARN_ON(default_donee->donor_info == NULL);
}
out:
TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n",
donee_node->task->comm, donee_node->task->pid,
ikglp_get_idx(sem, donee_node->fq),
replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)),
donor->comm, donor->pid, tsk_rt(donor)->last_gpu);
return(donee_node);
}
static void __find_closest_donor(int target_gpu,
struct binheap_node* donor_node,
ikglp_wait_state_t** cur_closest,
int* cur_dist)
{
ikglp_wait_state_t *this_donor =
binheap_entry(donor_node, ikglp_wait_state_t, node);
int this_dist =
gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu);
// TRACE_CUR("%s/%d: dist from target = %d\n",
// this_donor->task->comm,
// this_donor->task->pid,
// this_dist);
if(this_dist < *cur_dist) {
// take this donor
*cur_dist = this_dist;
*cur_closest = this_donor;
}
else if(this_dist == *cur_dist) {
// priority tie-break. Even though this is a pre-order traversal,
// this is a heap, not a binary tree, so we still need to do a priority
// comparision.
if(!(*cur_closest) ||
litmus->compare(this_donor->task, (*cur_closest)->task)) {
*cur_dist = this_dist;
*cur_closest = this_donor;
}
}
if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist);
if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist);
}
ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
{
// Huristic strategy: Find donor with the closest affinity to fq.
// Tie-break on priority.
// We need to iterate over all the donors to do this. Unfortunatly,
// our donors are organized in a heap. We'll visit each node with a
// recurisve call. This is realitively safe since there are only sem->m
// donors, at most. We won't recurse too deeply to have to worry about
// our stack. (even with 128 CPUs, our nest depth is at most 7 deep).
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
ikglp_wait_state_t *donor = NULL;
int distance = MIG_NONE;
int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq));
ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
__find_closest_donor(gpu, sem->donors.root, &donor, &distance);
TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d "
"(non-aff wanted %s/%d). differs = %d\n",
donor->task->comm, donor->task->pid,
distance,
ikglp_get_idx(sem, fq),
default_donor->task->comm, default_donor->task->pid,
(donor->task != default_donor->task)
);
return(donor);
}
void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
int replica = ikglp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
struct ikglp_queue_info *info = &aff->q_info[replica];
lt_t est_time;
lt_t est_len_before;
if(current == t) {
tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
}
est_len_before = info->estimated_len;
est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
info->estimated_len += est_time;
TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
ikglp_get_idx(sem, info->q),
est_len_before, est_time,
info->estimated_len);
// if(aff->shortest_queue == info) {
// // we may no longer be the shortest
// aff->shortest_queue = ikglp_aff_find_shortest(aff);
//
// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
// ikglp_get_idx(sem, aff->shortest_queue->q),
// aff->shortest_queue->q->count,
// aff->shortest_queue->estimated_len);
// }
}
void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
int replica = ikglp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
struct ikglp_queue_info *info = &aff->q_info[replica];
lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
if(est_time > info->estimated_len) {
WARN_ON(1);
info->estimated_len = 0;
}
else {
info->estimated_len -= est_time;
}
TRACE_CUR("fq %d est len is now %llu\n",
ikglp_get_idx(sem, info->q),
info->estimated_len);
// check to see if we're the shortest queue now.
// if((aff->shortest_queue != info) &&
// (aff->shortest_queue->estimated_len > info->estimated_len)) {
//
// aff->shortest_queue = info;
//
// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
// ikglp_get_idx(sem, info->q),
// info->q->count,
// info->estimated_len);
// }
}
void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
struct fifo_queue* fq,
struct task_struct* t)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
int replica = ikglp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n",
t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
// count the number or resource holders
++(*(aff->q_info[replica].nr_cur_users));
reg_nv_device(gpu, 1, t); // register
tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
reset_gpu_tracker(t);
start_gpu_tracker(t);
}
void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
struct fifo_queue* fq,
struct task_struct* t)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
int replica = ikglp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
lt_t est_time;
stop_gpu_tracker(t); // stop the tracker before we do anything else.
est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
tsk_rt(t)->last_gpu = gpu;
// count the number or resource holders
--(*(aff->q_info[replica].nr_cur_users));
reg_nv_device(gpu, 0, t); // unregister
// update estimates
update_gpu_estimate(t, get_gpu_time(t));
TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. "
"estimated was %llu. diff is %d\n",
t->comm, t->pid, gpu,
get_gpu_time(t),
est_time,
(long long)get_gpu_time(t) - (long long)est_time);
}
struct ikglp_affinity_ops gpu_ikglp_affinity =
{
.advise_enqueue = gpu_ikglp_advise_enqueue,
.advise_steal = gpu_ikglp_advise_steal,
.advise_donee_selection = gpu_ikglp_advise_donee_selection,
.advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq,
.notify_enqueue = gpu_ikglp_notify_enqueue,
.notify_dequeue = gpu_ikglp_notify_dequeue,
.notify_acquired = gpu_ikglp_notify_acquired,
.notify_freed = gpu_ikglp_notify_freed,
.replica_to_resource = gpu_replica_to_resource,
};
struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
void* __user args)
{
return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
}
// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
int min_count;
int min_nr_users;
struct ikglp_queue_info *shortest;
struct fifo_queue *to_enqueue;
int i;
// TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
shortest = &aff->q_info[0];
min_count = shortest->q->count;
min_nr_users = *(shortest->nr_cur_users);
TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
ikglp_get_idx(sem, shortest->q),
shortest->q->count,
min_nr_users);
for(i = 1; i < sem->nr_replicas; ++i) {
int len = aff->q_info[i].q->count;
// queue is smaller, or they're equal and the other has a smaller number
// of total users.
//
// tie-break on the shortest number of simult users. this only kicks in
// when there are more than 1 empty queues.
if((len < min_count) ||
((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
shortest = &aff->q_info[i];
min_count = shortest->q->count;
min_nr_users = *(aff->q_info[i].nr_cur_users);
}
TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
ikglp_get_idx(sem, aff->q_info[i].q),
aff->q_info[i].q->count,
*(aff->q_info[i].nr_cur_users));
}
to_enqueue = shortest->q;
TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
ikglp_get_idx(sem, to_enqueue),
ikglp_get_idx(sem, sem->shortest_fifo_queue));
return to_enqueue;
}
ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
struct fifo_queue* dst)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
// TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
return ikglp_find_hp_waiter_to_steal(sem);
}
ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
return(donee);
}
ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
return(donor);
}
void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
{
// TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
}
void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
{
// TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
}
void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
int replica = ikglp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
// TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
// count the number or resource holders
++(*(aff->q_info[replica].nr_cur_users));
reg_nv_device(gpu, 1, t); // register
}
void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
{
struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
int replica = ikglp_get_idx(sem, fq);
int gpu = replica_to_gpu(aff, replica);
// TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
// count the number or resource holders
--(*(aff->q_info[replica].nr_cur_users));
reg_nv_device(gpu, 0, t); // unregister
}
struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
{
.advise_enqueue = simple_gpu_ikglp_advise_enqueue,
.advise_steal = simple_gpu_ikglp_advise_steal,
.advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
.advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq,
.notify_enqueue = simple_gpu_ikglp_notify_enqueue,
.notify_dequeue = simple_gpu_ikglp_notify_dequeue,
.notify_acquired = simple_gpu_ikglp_notify_acquired,
.notify_freed = simple_gpu_ikglp_notify_freed,
.replica_to_resource = gpu_replica_to_resource,
};
struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
void* __user args)
{
return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
}
#endif