#include #include #include #include #include #include #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) #include #include #endif #include // big signed value. #define IKGLP_INVAL_DISTANCE 0x7FFFFFFF int ikglp_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) { ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); BUG_ON(!d_a); BUG_ON(!d_b); return litmus->__compare(d_a->task, BASE, d_b->task, BASE); } int ikglp_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) { ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); return litmus->__compare(d_b->task, BASE, d_a->task, BASE); } int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) { ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node); ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node); return litmus->__compare(d_a->task, BASE, d_b->task, BASE); } int ikglp_min_heap_donee_order(struct binheap_node *a, struct binheap_node *b) { struct task_struct *prio_a, *prio_b; ikglp_donee_heap_node_t *d_a = binheap_entry(a, ikglp_donee_heap_node_t, node); ikglp_donee_heap_node_t *d_b = binheap_entry(b, ikglp_donee_heap_node_t, node); if(!d_a->donor_info) { prio_a = d_a->task; } else { prio_a = d_a->donor_info->task; BUG_ON(d_a->task != d_a->donor_info->donee_info->task); } if(!d_b->donor_info) { prio_b = d_b->task; } else { prio_b = d_b->donor_info->task; BUG_ON(d_b->task != d_b->donor_info->donee_info->task); } // note reversed order return litmus->__compare(prio_b, BASE, prio_a, BASE); } static inline unsigned int nominal_fq_len(struct fifo_queue *fq) { return (fq->count - fq->is_vunlocked); } static inline int ikglp_get_idx(struct ikglp_semaphore *sem, struct fifo_queue *queue) { return (queue - &sem->fifo_queues[0]); } static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem, struct task_struct *holder) { struct fifo_queue *fq = NULL; int i; for(i = 0; i < sem->nr_replicas; ++i) { if(sem->fifo_queues[i].owner == holder) { fq = &sem->fifo_queues[i]; break; } } return(fq); } static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue, struct task_struct *skip) { struct list_head *pos; struct task_struct *queued, *found = NULL; list_for_each(pos, &kqueue->wait.task_list) { queued = (struct task_struct*) list_entry(pos, wait_queue_t, task_list)->private; /* Compare task prios, find high prio task. */ if(queued != skip && litmus->compare(queued, found)) found = queued; } return found; } static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem, struct fifo_queue *search_start) { // we start our search at search_start instead of at the beginning of the // queue list to load-balance across all resources. struct fifo_queue* step = search_start; struct fifo_queue* shortest = sem->shortest_fifo_queue; do { step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ? step+1 : &sem->fifo_queues[0]; /* consider actual lengths, not nominal lengths */ if(step->count < shortest->count) { shortest = step; if(step->count == 0) break; /* can't get any shorter */ } }while(step != search_start); return(shortest); } static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem) { return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task; } static void __ikglp_dump_pq(struct binheap_node *n, int depth) { ikglp_heap_node_t *request; char padding[81] = " "; if(n == NULL) { TRACE("+-> %p\n", NULL); return; } request = binheap_entry(n, ikglp_heap_node_t, node); if(depth*2 <= 80) padding[depth*2] = '\0'; TRACE("%s+-> %s/%d\n", padding, request->task->comm, request->task->pid); if(n->left) __ikglp_dump_pq(n->left, depth+1); if(n->right) __ikglp_dump_pq(n->right, depth+1); } static void __ikglp_dump_donors(struct binheap_node *n, int depth) { ikglp_wait_state_t *donor_node; char padding[81] = " "; if(n == NULL) { TRACE("+-> %p\n", NULL); return; } donor_node = binheap_entry(n, ikglp_wait_state_t, node); if(depth*2 <= 80) padding[depth*2] = '\0'; TRACE("%s+-> %s/%d (donee: %s/%d)\n", padding, donor_node->task->comm, donor_node->task->pid, donor_node->donee_info->task->comm, donor_node->donee_info->task->pid); if(n->left) __ikglp_dump_donors(n->left, depth+1); if(n->right) __ikglp_dump_donors(n->right, depth+1); } static void __ikglp_dump_fifoq(int i, struct fifo_queue* fq) { TRACE(" FIFO %d: Owner = %s/%d (Virtually Unlocked = %u), HP Waiter = %s/%d, Length = %u\n", i, (fq->owner) ? fq->owner->comm : "null", (fq->owner) ? fq->owner->pid : 0, fq->is_vunlocked, (fq->hp_waiter) ? fq->hp_waiter->comm : "null", (fq->hp_waiter) ? fq->hp_waiter->pid : 0, fq->count); if (waitqueue_active(&fq->wait)) { struct list_head *pos; list_for_each(pos, &fq->wait.task_list) { wait_queue_t *q = list_entry(pos, wait_queue_t, task_list); struct task_struct *t = (struct task_struct*) q->private; TRACE(" %s/%d (effective priority: %s/%d)\n", t->comm, t->pid, (tsk_rt(t)->inh_task) ? tsk_rt(t)->inh_task->comm : "null", (tsk_rt(t)->inh_task) ? tsk_rt(t)->inh_task->pid : 0); } } } __attribute__ ((unused)) static void __ikglp_dump_state(struct ikglp_semaphore *sem) { int i; TRACE("IKGLP Lock %d\n", sem->litmus_lock.ident); TRACE("# Replicas: %u Max FIFO Len: %u Max in FIFOs: %u Cur # in FIFOs: %u\n", sem->nr_replicas, sem->max_fifo_len, sem->max_in_fifos, sem->nr_in_fifos); TRACE("# requests in top-m: %u\n", sem->top_m_size); for (i = 0; i < sem->nr_replicas; ++i) __ikglp_dump_fifoq(i, &sem->fifo_queues[i]); TRACE(" PQ:\n"); __ikglp_dump_pq(sem->priority_queue.root, 1); TRACE(" Donors:\n"); __ikglp_dump_donors(sem->donors.root, 1); } #if 0 static void print_global_list(struct binheap_node* n, int depth) { ikglp_heap_node_t *global_heap_node; char padding[81] = " "; if(n == NULL) { TRACE_CUR("+-> %p\n", NULL); return; } global_heap_node = binheap_entry(n, ikglp_heap_node_t, node); if(depth*2 <= 80) padding[depth*2] = '\0'; TRACE_CUR("%s+-> %s/%d\n", padding, global_heap_node->task->comm, global_heap_node->task->pid); if(n->left) print_global_list(n->left, depth+1); if(n->right) print_global_list(n->right, depth+1); } static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth) { ikglp_donee_heap_node_t *donee_node; char padding[81] = " "; struct task_struct* donor = NULL; if(n == NULL) { TRACE_CUR("+-> %p\n", NULL); return; } donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node); if(depth*2 <= 80) padding[depth*2] = '\0'; if(donee_node->donor_info) { donor = donee_node->donor_info->task; } TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n", padding, donee_node->task->comm, donee_node->task->pid, (donor) ? donor->comm : "null", (donor) ? donor->pid : 0, ikglp_get_idx(sem, donee_node->fq)); if(n->left) print_donees(sem, n->left, depth+1); if(n->right) print_donees(sem, n->right, depth+1); } static void print_donors(struct binheap_node *n, int depth) { ikglp_wait_state_t *donor_node; char padding[81] = " "; if(n == NULL) { TRACE_CUR("+-> %p\n", NULL); return; } donor_node = binheap_entry(n, ikglp_wait_state_t, node); if(depth*2 <= 80) padding[depth*2] = '\0'; TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n", padding, donor_node->task->comm, donor_node->task->pid, donor_node->donee_info->task->comm, donor_node->donee_info->task->pid); if(n->left) print_donors(n->left, depth+1); if(n->right) print_donors(n->right, depth+1); } #endif struct ikglp_proc_print_heap_args { struct ikglp_semaphore *sem; int *size; char **next; }; static void __ikglp_pq_to_proc(struct binheap_node *n, void *args) { struct ikglp_proc_print_heap_args *hargs; ikglp_heap_node_t *request; int w; if (!n) return; hargs = (struct ikglp_proc_print_heap_args*) args; request = binheap_entry(n, ikglp_heap_node_t, node); w = scnprintf(*(hargs->next), *(hargs->size), "\t%s/%d\n", request->task->comm, request->task->pid); *(hargs->size) -= w; *(hargs->next) += w; } static void __ikglp_donor_to_proc(struct binheap_node *n, void *args) { struct ikglp_proc_print_heap_args *hargs; ikglp_wait_state_t *donor_node; int w; if (!n) return; hargs = (struct ikglp_proc_print_heap_args*) args; donor_node = binheap_entry(n, ikglp_wait_state_t, node); w = scnprintf(*(hargs->next), *(hargs->size), "\t%s/%d (donee: %s/%d)\n", donor_node->task->comm, donor_node->task->pid, donor_node->donee_info->task->comm, donor_node->donee_info->task->pid); *(hargs->size) -= w; *(hargs->next) += w; } static int ikglp_proc_print(char *page, char **start, off_t off, int count, int *eof, void *data) { struct ikglp_semaphore *sem = ikglp_from_lock((struct litmus_lock*)data); int attempts = 0; const int max_attempts = 10; int locked = 0; unsigned long flags; int size = count; char *next = page; struct ikglp_proc_print_heap_args heap_args = {sem, &size, &next}; int w; int i; while(attempts < max_attempts) { locked = raw_spin_trylock_irqsave(&sem->real_lock, flags); if (unlikely(!locked)) { ++attempts; cpu_relax(); } else { break; } } if (!locked) { w = scnprintf(next, size, "%s is busy.\n", sem->litmus_lock.name); size -= w; next += w; return count - size; } w = scnprintf(next, size, "nr_replicas: %u\n", sem->nr_replicas); size -= w; next += w; w = scnprintf(next, size, "max_fifo_len: %u\n", sem->max_fifo_len); size -= w; next += w; w = scnprintf(next, size, "max_in_fifos: %u\n", sem->max_in_fifos); size -= w; next += w; w = scnprintf(next, size, "current nr_in_fifos: %u\n", sem->nr_in_fifos); size -= w; next += w; w = scnprintf(next, size, "nr in top-m: %u\n\n", sem->top_m_size); size -= w; next += w; for (i = 0; i < sem->nr_replicas; ++i) { struct fifo_queue *fq = &sem->fifo_queues[i]; w = scnprintf(next, size, "replica %d: owner = %s/%d (Virtually Unlocked = %u), hp waiter = %s/%d, length = %u\n", i, (fq->owner) ? fq->owner->comm : "null", (fq->owner) ? fq->owner->pid : 0, fq->is_vunlocked, (fq->hp_waiter) ? fq->hp_waiter->comm : "null", (fq->hp_waiter) ? fq->hp_waiter->pid : 0, fq->count); size -= w; next += w; if (waitqueue_active(&fq->wait)) { struct list_head *pos; list_for_each(pos, &fq->wait.task_list) { wait_queue_t *q = list_entry(pos, wait_queue_t, task_list); struct task_struct *blocked_task = (struct task_struct*) q->private; w = scnprintf(next, size, "\t%s/%d (inh: %s/%d)\n", blocked_task->comm, blocked_task->pid, (tsk_rt(blocked_task)->inh_task) ? tsk_rt(blocked_task)->inh_task->comm : "null", (tsk_rt(blocked_task)->inh_task) ? tsk_rt(blocked_task)->inh_task->pid : 0); size -= w; next += w; } } else { w = scnprintf(next, size, "\t\n"); size -= w; next += w; } } if (binheap_empty(&sem->priority_queue)) { w = scnprintf(next, size, "pq:\n\t\n"); size -= w; next += w; } else { w = scnprintf(next, size, "donors:\n"); size -= w; next += w; binheap_for_each(&sem->priority_queue, __ikglp_pq_to_proc, &heap_args); } if (binheap_empty(&sem->donors)) { w = scnprintf(next, size, "donors:\n\t\n"); size -= w; next += w; } else { w = scnprintf(next, size, "donors:\n"); size -= w; next += w; binheap_for_each(&sem->donors, __ikglp_donor_to_proc, &heap_args); } raw_spin_unlock_irqrestore(&sem->real_lock, flags); return count - size; } static void ikglp_add_global_list(struct ikglp_semaphore *sem, struct task_struct *t, ikglp_heap_node_t *node) { node->task = t; INIT_BINHEAP_NODE(&node->node); if(sem->top_m_size < sem->max_in_fifos) { TRACE_CUR("Trivially adding %s/%d to top-m global list.\n", t->comm, t->pid); binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); ++(sem->top_m_size); } else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { ikglp_heap_node_t *evicted = binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node); TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n", t->comm, t->pid, evicted->task->comm, evicted->task->pid); binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node); INIT_BINHEAP_NODE(&evicted->node); binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node); binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); #if 0 TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); print_global_list(sem->top_m.root, 1); TRACE_CUR("Not-Top-M After:\n"); print_global_list(sem->not_top_m.root, 1); #endif } else { TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n", t->comm, t->pid); binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node); #if 0 TRACE_CUR("Not-Top-M After:\n"); print_global_list(sem->not_top_m.root, 1); #endif } } static void ikglp_del_global_list(struct ikglp_semaphore *sem, struct task_struct *t, ikglp_heap_node_t *node) { BUG_ON(!binheap_is_in_heap(&node->node)); TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid); if(binheap_is_in_this_heap(&node->node, &sem->top_m)) { TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid); binheap_delete(&node->node, &sem->top_m); if(!binheap_empty(&sem->not_top_m)) { ikglp_heap_node_t *promoted = binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node); TRACE_CUR("Promoting %s/%d to top-m\n", promoted->task->comm, promoted->task->pid); binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node); INIT_BINHEAP_NODE(&promoted->node); binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node); } else { TRACE_CUR("No one to promote to top-m.\n"); --(sem->top_m_size); } #if 0 TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); print_global_list(sem->top_m.root, 1); TRACE_CUR("Not-Top-M After:\n"); print_global_list(sem->not_top_m.root, 1); #endif } else { TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid); binheap_delete(&node->node, &sem->not_top_m); #if 0 TRACE_CUR("Not-Top-M After:\n"); print_global_list(sem->not_top_m.root, 1); #endif } } static void ikglp_add_donees(struct ikglp_semaphore *sem, struct fifo_queue *fq, struct task_struct *t, ikglp_donee_heap_node_t* node) { node->task = t; node->donor_info = NULL; node->fq = fq; INIT_BINHEAP_NODE(&node->node); binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node); #if 0 TRACE_CUR("donees After:\n"); print_donees(sem, sem->donees.root, 1); #endif } static void ikglp_refresh_owners_prio_increase(struct task_struct *t, struct fifo_queue *fq, struct ikglp_semaphore *sem, unsigned long flags) { // priority of 't' has increased (note: 't' might already be hp_waiter). if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) { struct task_struct *old_max_eff_prio; struct task_struct *new_max_eff_prio; struct task_struct *new_prio = NULL; struct task_struct *owner = fq->owner; if(fq->hp_waiter) TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", fq->hp_waiter->comm, fq->hp_waiter->pid); else TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n"); if(owner) { raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); // TRACE_TASK(owner, "Heap Before:\n"); // print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0); if (unlikely(binheap_empty(&tsk_rt(owner)->hp_blocked_tasks))) { TRACE_TASK(owner, "not drawing inheritance from fq %d.\n", ikglp_get_idx(sem, fq)); raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); WARN_ON(1); return; } old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); fq->hp_waiter = t; fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); binheap_decrease(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks); // TRACE_TASK(owner, "Heap After:\n"); // print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0); new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); if(new_max_eff_prio != old_max_eff_prio) { TRACE_TASK(t, "is new hp_waiter.\n"); if ((effective_priority(owner) == old_max_eff_prio) || (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){ new_prio = new_max_eff_prio; } } else { TRACE_TASK(t, "no change in max_eff_prio of heap.\n"); } if(new_prio) { // set new inheritance and propagate TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n", owner->comm, owner->pid, new_prio->comm, new_prio->pid); litmus->nested_increase_prio(owner, new_prio, &sem->lock, flags); // unlocks lock. } else { TRACE_TASK(t, "No change in effective priority (is %s/%d). Propagation halted.\n", new_max_eff_prio->comm, new_max_eff_prio->pid); raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); unlock_fine_irqrestore(&sem->lock, flags); } } else { fq->hp_waiter = t; fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); TRACE_TASK(t, "no owner.\n"); unlock_fine_irqrestore(&sem->lock, flags); } } else { TRACE_TASK(t, "hp_waiter is unaffected.\n"); unlock_fine_irqrestore(&sem->lock, flags); } } // hp_waiter has decreased static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq, struct ikglp_semaphore *sem, unsigned long flags, int budget_triggered) { struct task_struct *owner = fq->owner; struct task_struct *old_max_eff_prio; struct task_struct *new_max_eff_prio; if(!owner) { TRACE_CUR("No owner. Returning.\n"); unlock_fine_irqrestore(&sem->lock, flags); return; } TRACE_CUR("ikglp_refresh_owners_prio_decrease\n"); raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); if (unlikely(binheap_empty(&tsk_rt(owner)->hp_blocked_tasks))) { TRACE_TASK(owner, "not drawing inheritance from fq %d.\n", ikglp_get_idx(sem, fq)); raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); unlock_fine_irqrestore(&sem->lock, flags); WARN_ON(1); return; } old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks); fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ? effective_priority(fq->hp_waiter) : NULL; binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks, struct nested_info, hp_binheap_node); new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); if((old_max_eff_prio != new_max_eff_prio) && (effective_priority(owner) == old_max_eff_prio)) { // Need to set new effective_priority for owner struct task_struct *decreased_prio; TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", ikglp_get_idx(sem, fq)); if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n", (new_max_eff_prio) ? new_max_eff_prio->comm : "null", (new_max_eff_prio) ? new_max_eff_prio->pid : 0, owner->comm, owner->pid, ikglp_get_idx(sem, fq)); decreased_prio = new_max_eff_prio; } else { TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n", (new_max_eff_prio) ? new_max_eff_prio->comm : "null", (new_max_eff_prio) ? new_max_eff_prio->pid : 0, owner->comm, owner->pid, ikglp_get_idx(sem, fq)); decreased_prio = NULL; } // beware: recursion litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags, budget_triggered); // will unlock mutex->lock } else { TRACE_TASK(owner, "No need to propagate priority decrease forward.\n"); raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); unlock_fine_irqrestore(&sem->lock, flags); } } static void ikglp_remove_donation_from_owner(struct binheap_node *n, struct fifo_queue *fq, struct ikglp_semaphore *sem, unsigned long flags) { struct task_struct *owner = fq->owner; struct task_struct *old_max_eff_prio; struct task_struct *new_max_eff_prio; BUG_ON(!owner); raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks); new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); if((old_max_eff_prio != new_max_eff_prio) && (effective_priority(owner) == old_max_eff_prio)) { // Need to set new effective_priority for owner struct task_struct *decreased_prio; TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", ikglp_get_idx(sem, fq)); if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n", ikglp_get_idx(sem, fq)); decreased_prio = new_max_eff_prio; } else { TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n", ikglp_get_idx(sem, fq)); decreased_prio = NULL; } // beware: recursion litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags, 0); // will unlock mutex->lock } else { TRACE_TASK(owner, "No need to propagate priority decrease forward.\n"); raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); unlock_fine_irqrestore(&sem->lock, flags); } } static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t, struct binheap_node *n) { struct task_struct *old_max_eff_prio; struct task_struct *new_max_eff_prio; raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); TRACE_CUR("Removing donation from fq waiter %s/%d\n", t->comm, t->pid); old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks); binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks); new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks); if((old_max_eff_prio != new_max_eff_prio) && (effective_priority(t) == old_max_eff_prio)) { // Need to set new effective_priority for owner struct task_struct *decreased_prio; if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) { decreased_prio = new_max_eff_prio; } else { decreased_prio = NULL; } // no need to propagate decreased inheritance to AUX // or klmirqd tasks since they cannot (should not) inherit // a priority directly from us while we suspend on a litmus // lock. tsk_rt(t)->inh_task = decreased_prio; } raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); } static void ikglp_get_immediate(struct task_struct* t, struct fifo_queue *fq, struct ikglp_semaphore *sem, unsigned long flags) { // resource available now TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq)); fq->owner = t; raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks, struct nested_info, hp_binheap_node); raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); ++(fq->count); // even though we got the replica, we're still considered in the fifo ++(sem->nr_in_fifos); ikglp_add_global_list(sem, t, &fq->global_heap_node); ikglp_add_donees(sem, fq, t, &fq->donee_heap_node); sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue); #ifdef CONFIG_LITMUS_AFFINITY_LOCKING if(sem->aff_obs) { sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t); sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t); } #endif unlock_fine_irqrestore(&sem->lock, flags); } static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem, struct fifo_queue *fq, ikglp_wait_state_t *wait, ikglp_heap_node_t *global_heap_node, ikglp_donee_heap_node_t *donee_heap_node) { struct task_struct *t = wait->task; /* resource is not free => must suspend and wait */ TRACE_TASK(t, "Enqueuing on fq %d.\n", ikglp_get_idx(sem, fq)); init_waitqueue_entry(&wait->fq_node, t); __add_wait_queue_tail_exclusive(&fq->wait, &wait->fq_node); ++(fq->count); ++(sem->nr_in_fifos); // update global list. if(likely(global_heap_node)) { if(binheap_is_in_heap(&global_heap_node->node)) { WARN_ON(1); ikglp_del_global_list(sem, t, global_heap_node); } ikglp_add_global_list(sem, t, global_heap_node); } // update donor eligiblity list. if(likely(donee_heap_node)) ikglp_add_donees(sem, fq, t, donee_heap_node); if(sem->shortest_fifo_queue == fq) sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq); #ifdef CONFIG_LITMUS_AFFINITY_LOCKING if(sem->aff_obs) sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t); #endif wait->cur_q = IKGLP_FQ; wait->fq = fq; TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq)); } static void ikglp_enqueue_on_fq(struct ikglp_semaphore *sem, struct fifo_queue *fq, ikglp_wait_state_t *wait, unsigned long flags) { /* resource is not free => must suspend and wait */ TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n", ikglp_get_idx(sem, fq)); INIT_BINHEAP_NODE(&wait->global_heap_node.node); INIT_BINHEAP_NODE(&wait->donee_heap_node.node); __ikglp_enqueue_on_fq(sem, fq, wait, &wait->global_heap_node, &wait->donee_heap_node); ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); // unlocks sem->lock } static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem, ikglp_wait_state_t *wait) { TRACE_TASK(wait->task, "goes to PQ.\n"); wait->pq_node.task = wait->task; // copy over task (little redundant...) binheap_add(&wait->pq_node.node, &sem->priority_queue, ikglp_heap_node_t, node); wait->cur_q = IKGLP_PQ; } static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem, ikglp_wait_state_t *wait) { INIT_BINHEAP_NODE(&wait->global_heap_node.node); INIT_BINHEAP_NODE(&wait->donee_heap_node.node); INIT_BINHEAP_NODE(&wait->pq_node.node); __ikglp_enqueue_on_pq(sem, wait); } static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem, ikglp_wait_state_t* wait, unsigned long flags) { struct task_struct *t = wait->task; ikglp_donee_heap_node_t *donee_node = NULL; struct task_struct *donee; struct task_struct *old_max_eff_prio; struct task_struct *new_max_eff_prio; struct task_struct *new_prio = NULL; INIT_BINHEAP_NODE(&wait->global_heap_node.node); INIT_BINHEAP_NODE(&wait->donee_heap_node.node); INIT_BINHEAP_NODE(&wait->pq_node.node); INIT_BINHEAP_NODE(&wait->node); // TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid); // TRACE_CUR("donors Before:\n"); // print_donors(sem->donors.root, 1); // Add donor to the global list. ikglp_add_global_list(sem, t, &wait->global_heap_node); // Select a donee #ifdef CONFIG_LITMUS_AFFINITY_LOCKING donee_node = (sem->aff_obs) ? sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) : binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); #else donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); #endif donee = donee_node->task; TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid); TRACE_CUR("Temporarily removing %s/%d to donee list.\n", donee->comm, donee->pid); // TRACE_CUR("donees Before:\n"); // print_donees(sem, sem->donees.root, 1); //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly binheap_delete(&donee_node->node, &sem->donees); // TRACE_CUR("donees After:\n"); // print_donees(sem, sem->donees.root, 1); wait->donee_info = donee_node; // Add t to donor heap. binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node); // Now adjust the donee's priority. // Lock the donee's inheritance heap. raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock); old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks); if(donee_node->donor_info) { // Steal donation relation. Evict old donor to PQ. // Remove old donor from donor heap ikglp_wait_state_t *old_wait = donee_node->donor_info; struct task_struct *old_donor = old_wait->task; TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. Moving old donor to PQ.\n", donee->comm, donee->pid, old_donor->comm, old_donor->pid); binheap_delete(&old_wait->node, &sem->donors); // Remove donation from donee's inheritance heap. binheap_delete(&old_wait->prio_donation.hp_binheap_node, &tsk_rt(donee)->hp_blocked_tasks); // WARNING: have not updated inh_prio! // Add old donor to PQ. __ikglp_enqueue_on_pq(sem, old_wait); // Remove old donor from the global heap. ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node); } // Add back donee's node to the donees heap with increased prio donee_node->donor_info = wait; INIT_BINHEAP_NODE(&donee_node->node); TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid); // TRACE_CUR("donees Before:\n"); // print_donees(sem, sem->donees.root, 1); binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node); // TRACE_CUR("donees After:\n"); // print_donees(sem, sem->donees.root, 1); // Add an inheritance/donation to the donee's inheritance heap. wait->prio_donation.lock = (struct litmus_lock*)sem; wait->prio_donation.hp_waiter_eff_prio = t; wait->prio_donation.hp_waiter_ptr = NULL; INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node); binheap_add(&wait->prio_donation.hp_binheap_node, &tsk_rt(donee)->hp_blocked_tasks, struct nested_info, hp_binheap_node); new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks); if(new_max_eff_prio != old_max_eff_prio) { if ((effective_priority(donee) == old_max_eff_prio) || (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){ TRACE_TASK(t, "Donation increases %s/%d's effective priority\n", donee->comm, donee->pid); new_prio = new_max_eff_prio; } // else { // // should be bug. donor would not be in top-m. // TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid); // WARN_ON(1); // } // } // else { // // should be bug. donor would not be in top-m. // TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid); // WARN_ON(1); } if(new_prio) { struct fifo_queue *donee_fq = donee_node->fq; if(donee != donee_fq->owner) { TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n", donee->comm, donee->pid, donee_fq->owner->comm, donee_fq->owner->pid); raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock); ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); // unlocks sem->lock } else { TRACE_TASK(t, "%s/%d is the owner. Propagating priority immediatly.\n", donee->comm, donee->pid); litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); // unlocks sem->lock and donee's heap lock } } else { TRACE_TASK(t, "No change in effective priority (it is %s/%d).\n", (new_max_eff_prio) ? new_max_eff_prio->comm : "null", (new_max_eff_prio) ? new_max_eff_prio->pid : 0); raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock); unlock_fine_irqrestore(&sem->lock, flags); } wait->cur_q = IKGLP_DONOR; // TRACE_CUR("donors After:\n"); // print_donors(sem->donors.root, 1); } int ikglp_lock(struct litmus_lock* l) { struct task_struct* t = current; struct ikglp_semaphore *sem = ikglp_from_lock(l); unsigned long flags = 0, more_flags; struct fifo_queue *fq = NULL; int replica = -EINVAL; #ifdef CONFIG_LITMUS_DGL_SUPPORT raw_spinlock_t *dgl_lock; #endif ikglp_wait_state_t wait; if (!is_realtime(t)) return -EPERM; memset(&wait, 0, sizeof(wait)); #ifdef CONFIG_LITMUS_DGL_SUPPORT dgl_lock = litmus->get_dgl_spinlock(t); #endif lock_global_irqsave(dgl_lock, flags); raw_spin_lock_irqsave(&sem->real_lock, more_flags); lock_fine_irqsave(&sem->lock, flags); TRACE_CUR("Requesting a replica from lock %d.\n", l->ident); if(sem->nr_in_fifos < sem->max_in_fifos) { // enqueue somwhere #ifdef CONFIG_LITMUS_AFFINITY_LOCKING fq = (sem->aff_obs) ? sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) : sem->shortest_fifo_queue; #else fq = sem->shortest_fifo_queue; #endif if(fq->count == 0) { // take available resource replica = ikglp_get_idx(sem, fq); TRACE_CUR("Getting replica %d\n", replica); ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); unlock_global_irqrestore(dgl_lock, flags); goto acquired; } else { TRACE_CUR("Will go on FQ somewhere.\n"); wait.task = t; // THIS IS CRITICALLY IMPORTANT!!! tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked mb(); /* FIXME: interruptible would be nice some day */ set_task_state(t, TASK_UNINTERRUPTIBLE); ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock } } else { TRACE_CUR("Going on a heap.\n"); // donor! wait.task = t; // THIS IS CRITICALLY IMPORTANT!!! tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked mb(); /* FIXME: interruptible would be nice some day */ set_task_state(t, TASK_UNINTERRUPTIBLE); if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { TRACE_CUR("Going on PQ heap.\n"); // enqueue on PQ ikglp_enqueue_on_pq(sem, &wait); unlock_fine_irqrestore(&sem->lock, flags); } else { // enqueue as donor TRACE_CUR("Going on donor heap.\n"); ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock } } tsk_rt(t)->blocked_lock_data = (unsigned long)&wait; raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); unlock_global_irqrestore(dgl_lock, flags); TRACE_CUR("Suspending for replica.\n"); TS_LOCK_SUSPEND; suspend_for_lock(); TS_LOCK_RESUME; fq = wait.fq; BUG_ON(fq != ikglp_get_queue(sem, t)); tsk_rt(t)->blocked_lock_data = 0; replica = ikglp_get_idx(sem, fq); acquired: TRACE_CUR("Acquired lock %d, queue %d\n", l->ident, replica); #ifdef CONFIG_LITMUS_AFFINITY_LOCKING if(sem->aff_obs) { return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq); } #endif return replica; } static void __drop_from_donor(struct ikglp_semaphore *sem, ikglp_wait_state_t *wait) { BUG_ON(wait->cur_q != IKGLP_DONOR); TRACE_TASK(wait->task, "is being dropped from donor heap.\n"); binheap_delete(&wait->node, &sem->donors); wait->cur_q = IKGLP_INVL; } static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem, struct fifo_queue *fq, ikglp_wait_state_t *donor_info) { struct task_struct *t = donor_info->task; TRACE_CUR("Donor %s/%d being moved to fq %d\n", t->comm, t->pid, ikglp_get_idx(sem, fq)); __drop_from_donor(sem, donor_info); __ikglp_enqueue_on_fq(sem, fq, donor_info, NULL, // already in global_list, so pass null to prevent adding 2nd time. &donor_info->donee_heap_node); // warning: // ikglp_update_owners_prio(t, fq, sem, flags) has not been called. } static void __drop_from_pq(struct ikglp_semaphore *sem, ikglp_wait_state_t *wait) { BUG_ON(wait->cur_q != IKGLP_PQ); TRACE_TASK(wait->task, "is being dropped from the PQ.\n"); binheap_delete(&wait->pq_node.node, &sem->priority_queue); wait->cur_q = IKGLP_INVL; } static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem, struct fifo_queue *fq, ikglp_wait_state_t *wait) { struct task_struct *t = wait->task; TRACE_CUR("PQ request %s/%d being moved to fq %d\n", t->comm, t->pid, ikglp_get_idx(sem, fq)); __drop_from_pq(sem, wait); __ikglp_enqueue_on_fq(sem, fq, wait, &wait->global_heap_node, &wait->donee_heap_node); // warning: // ikglp_update_owners_prio(t, fq, sem, flags) has not been called. } static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal( struct ikglp_semaphore* sem, struct fifo_queue* skip) { /* must hold sem->lock */ struct fifo_queue *fq = NULL; struct list_head *pos; struct task_struct *queued; int i; for(i = 0; i < sem->nr_replicas; ++i) { if( (sem->fifo_queues[i].count > 1) && (&sem->fifo_queues[i] != skip) && (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) { TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n", ikglp_get_idx(sem, &sem->fifo_queues[i]), sem->fifo_queues[i].hp_waiter->comm, sem->fifo_queues[i].hp_waiter->pid, (fq) ? ikglp_get_idx(sem, fq) : 0, (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "null") : "nullXX", (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : 0) : -2); fq = &sem->fifo_queues[i]; WARN_ON(!(fq->hp_waiter)); } } if(fq) { struct task_struct *max_hp = fq->hp_waiter; ikglp_wait_state_t* ret = NULL; TRACE_CUR("Searching for %s/%d on fq %d\n", max_hp->comm, max_hp->pid, ikglp_get_idx(sem, fq)); BUG_ON(!max_hp); list_for_each(pos, &fq->wait.task_list) { wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list); queued = (struct task_struct*) wait->private; TRACE_CUR("fq %d entry: %s/%d\n", ikglp_get_idx(sem, fq), queued->comm, queued->pid); /* Compare task prios, find high prio task. */ if (queued == max_hp) { TRACE_CUR("Found it!\n"); ret = container_of(wait, ikglp_wait_state_t, fq_node); } } WARN_ON(!ret); return ret; } return(NULL); } static void __drop_from_fq(struct ikglp_semaphore *sem, ikglp_wait_state_t *wait) { struct task_struct *t = wait->task; struct fifo_queue *fq = wait->fq; BUG_ON(wait->cur_q != IKGLP_FQ); BUG_ON(!fq); TRACE_TASK(t, "is being dropped from fq.\n"); __remove_wait_queue(&fq->wait, &wait->fq_node); --(fq->count); #ifdef CONFIG_LITMUS_AFFINITY_LOCKING if(sem->aff_obs) sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t); #endif if(t == fq->hp_waiter) { fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL); TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", ikglp_get_idx(sem, fq), (fq->hp_waiter) ? fq->hp_waiter->comm : "null", (fq->hp_waiter) ? fq->hp_waiter->pid : 0); } // Update shortest. if(fq->count < sem->shortest_fifo_queue->count) sem->shortest_fifo_queue = fq; --(sem->nr_in_fifos); wait->cur_q = IKGLP_INVL; } static void ikglp_steal_to_fq(struct ikglp_semaphore *sem, struct fifo_queue *fq, ikglp_wait_state_t *fq_wait) { WARN_ON(fq_wait->fq != fq_wait->donee_heap_node.fq); // __drop_from_fq(sem, fq_wait->donee_heap_node.fq, fq_wait); __drop_from_fq(sem, fq_wait); fq_wait->donee_heap_node.fq = fq; // just to be safe __ikglp_enqueue_on_fq(sem, fq, fq_wait, NULL, NULL); // warning: We have not checked the priority inheritance of fq's owner yet. } static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem, struct fifo_queue *fq, ikglp_wait_state_t *old_wait) { struct task_struct *t = old_wait->task; BUG_ON(old_wait->donee_heap_node.fq != fq); TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n", ikglp_get_idx(sem, fq)); // need to migrate global_heap_node and donee_heap_node off of the stack // to the nodes allocated for the owner of this fq. // TODO: Enhance binheap() to perform this operation in place. ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove fq->global_heap_node = old_wait->global_heap_node; // copy ikglp_add_global_list(sem, t, &fq->global_heap_node); // re-add binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); // remove fq->donee_heap_node = old_wait->donee_heap_node; // copy if(fq->donee_heap_node.donor_info) { // let donor know that our location has changed BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); // validate cross-link fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node; } INIT_BINHEAP_NODE(&fq->donee_heap_node.node); binheap_add(&fq->donee_heap_node.node, &sem->donees, ikglp_donee_heap_node_t, node); // re-add } void ikglp_grant_replica_to_next(struct ikglp_semaphore *sem, struct fifo_queue *fq) { wait_queue_t *wait; ikglp_wait_state_t *fq_wait; struct task_struct *next; BUG_ON(!waitqueue_active(&fq->wait)); wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list); fq_wait = container_of(wait, ikglp_wait_state_t, fq_node); next = (struct task_struct*) wait->private; __remove_wait_queue(&fq->wait, wait); TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", ikglp_get_idx(sem, fq), next->comm, next->pid); // migrate wait-state to fifo-memory. ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait); /* next becomes the resouce holder */ fq->owner = next; tsk_rt(next)->blocked_lock = NULL; #ifdef CONFIG_LITMUS_AFFINITY_LOCKING if(sem->aff_obs) { sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next); } #endif /* determine new hp_waiter if necessary */ if (next == fq->hp_waiter) { TRACE_TASK(next, "was highest-prio waiter\n"); /* next has the highest priority --- it doesn't need to * inherit. However, we need to make sure that the * next-highest priority in the queue is reflected in * hp_waiter. */ fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL); TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n", ikglp_get_idx(sem, fq), (fq->hp_waiter) ? fq->hp_waiter->comm : "null", (fq->hp_waiter) ? fq->hp_waiter->pid : 0); fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ? effective_priority(fq->hp_waiter) : NULL; if (fq->hp_waiter) TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n"); else TRACE("no further waiters\n"); raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(next)->hp_blocked_tasks, struct nested_info, hp_binheap_node); raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); } else { /* Well, if 'next' is not the highest-priority waiter, * then it (probably) ought to inherit the highest-priority * waiter's priority. */ TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n", ikglp_get_idx(sem, fq), (fq->hp_waiter) ? fq->hp_waiter->comm : "null", (fq->hp_waiter) ? fq->hp_waiter->pid : 0); raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(next)->hp_blocked_tasks, struct nested_info, hp_binheap_node); /* It is possible that 'next' *should* be the hp_waiter, but isn't * because that update hasn't yet executed (update operation is * probably blocked on mutex->lock). So only inherit if the top of * 'next's top heap node is indeed the effective prio. of hp_waiter. * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter) * since the effective priority of hp_waiter can change (and the * update has not made it to this lock).) */ if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) == fq->nest.hp_waiter_eff_prio)) { if(fq->nest.hp_waiter_eff_prio) litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio); else WARN_ON(1); } raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); } // wake up the new resource holder! wake_up_for_lock(next); } #define ALLOW_STEALING 1 #define ALWAYS_TERMINATE_DONATION 1 void ikglp_move_next_to_fq(struct ikglp_semaphore *sem, struct fifo_queue *fq, struct task_struct *t, ikglp_donee_heap_node_t *donee_node, unsigned long *flags, int allow_stealing, int always_terminate_donation) { struct task_struct *donee = NULL; struct task_struct *new_on_fq = NULL; struct fifo_queue *fq_of_new_on_fq = NULL; ikglp_wait_state_t *other_donor_info = NULL; struct fifo_queue *to_steal = NULL; int need_steal_prio_reeval = 0; if (donee_node->donor_info) { ikglp_wait_state_t *donor_info = donee_node->donor_info; new_on_fq = donor_info->task; // donor moved to FQ donee = t; #ifdef CONFIG_LITMUS_AFFINITY_LOCKING if(sem->aff_obs) { fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); if((nominal_fq_len(fq_of_new_on_fq) >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) { WARN_ON(1); fq_of_new_on_fq = fq; } } else fq_of_new_on_fq = fq; #else fq_of_new_on_fq = fq; #endif TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n", new_on_fq->comm, new_on_fq->pid, ikglp_get_idx(sem, fq_of_new_on_fq), ikglp_get_idx(sem, fq)); ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info); /* treat donor as if it had donated to a task other than 't'. * this triggers the termination of the donation relationship. */ if (always_terminate_donation) other_donor_info = donor_info; } else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ // Select a donor #ifdef CONFIG_LITMUS_AFFINITY_LOCKING other_donor_info = (sem->aff_obs) ? sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) : binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); #else other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); #endif new_on_fq = other_donor_info->task; donee = other_donor_info->donee_info->task; // update the donee's heap position. other_donor_info->donee_info->donor_info = NULL; // clear the cross-link binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); #ifdef CONFIG_LITMUS_AFFINITY_LOCKING if(sem->aff_obs) { fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); if((nominal_fq_len(fq_of_new_on_fq) >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) { WARN_ON(1); fq_of_new_on_fq = fq; } } else fq_of_new_on_fq = fq; #else fq_of_new_on_fq = fq; #endif TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n", new_on_fq->comm, new_on_fq->pid, ikglp_get_idx(sem, fq_of_new_on_fq), ikglp_get_idx(sem, fq)); ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info); } else if(!binheap_empty(&sem->priority_queue)) { // No donors, so move PQ ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue, ikglp_heap_node_t, node); ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t, pq_node); new_on_fq = pq_wait->task; #ifdef CONFIG_LITMUS_AFFINITY_LOCKING if(sem->aff_obs) { fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); if((nominal_fq_len(fq_of_new_on_fq) >= sem->max_fifo_len) && !sem->aff_obs->relax_max_fifo_len) { WARN_ON(1); fq_of_new_on_fq = fq; } } else fq_of_new_on_fq = fq; #else fq_of_new_on_fq = fq; #endif TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n", new_on_fq->comm, new_on_fq->pid, ikglp_get_idx(sem, fq_of_new_on_fq), ikglp_get_idx(sem, fq)); ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait); } else if(allow_stealing && fq->count == 0) { // No PQ and this queue is empty, so steal. ikglp_wait_state_t *fq_wait; TRACE_TASK(t, "Looking to steal a request for fq %d...\n", ikglp_get_idx(sem, fq)); #ifdef CONFIG_LITMUS_AFFINITY_LOCKING fq_wait = (sem->aff_obs) ? sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) : ikglp_find_hp_waiter_to_steal(sem, fq); #else fq_wait = ikglp_find_hp_waiter_to_steal(sem, fq); #endif if(fq_wait) { to_steal = fq_wait->donee_heap_node.fq; new_on_fq = fq_wait->task; fq_of_new_on_fq = fq; need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter); TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n", new_on_fq->comm, new_on_fq->pid, ikglp_get_idx(sem, to_steal), ikglp_get_idx(sem, fq)); ikglp_steal_to_fq(sem, fq, fq_wait); } else { TRACE_TASK(t, "Found nothing to steal for fq %d.\n", ikglp_get_idx(sem, fq)); } } else { // move no one } // Now patch up other priorities. // // At most one of the following: // if(donee && donee != t), decrease prio, propagate to owner, or onward // if(to_steal), update owner's prio (hp_waiter has already been set) // BUG_ON(other_donor_info && to_steal); if(other_donor_info) { struct fifo_queue *other_fq = other_donor_info->donee_info->fq; BUG_ON(!donee); BUG_ON(!always_terminate_donation && donee == t); TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n", other_donor_info->task->comm, other_donor_info->task->pid, donee->comm, donee->pid); // need to terminate donation relation. if(donee == other_fq->owner) { TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n", donee->comm, donee->pid, ikglp_get_idx(sem, other_fq)); ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, *flags); lock_fine_irqsave(&sem->lock, *flags); // there should be no contention!!!! } else { TRACE_TASK(t, "Donee %s/%d is blocked in of fq %d.\n", donee->comm, donee->pid, ikglp_get_idx(sem, other_fq)); ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node); if(donee == other_fq->hp_waiter) { TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n", donee->comm, donee->pid, ikglp_get_idx(sem, other_fq)); other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL); TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", ikglp_get_idx(sem, other_fq), (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "null", (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : 0); ikglp_refresh_owners_prio_decrease(other_fq, sem, *flags, 0); // unlocks sem->lock. reacquire it. lock_fine_irqsave(&sem->lock, *flags); // there should be no contention!!!! } } } else if(to_steal) { TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n", ikglp_get_idx(sem, to_steal)); if(need_steal_prio_reeval) { ikglp_refresh_owners_prio_decrease(to_steal, sem, *flags, 0); // unlocks sem->lock. reacquire it. lock_fine_irqsave(&sem->lock, *flags); // there should be no contention!!!! } } // check for new HP waiter. if(new_on_fq) { ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, *flags); // unlocks sem->lock. reacquire it. lock_fine_irqsave(&sem->lock, *flags); // there should be no contention!!!! } /* we moved a request to an empty FQ. wake it up */ if(unlikely(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1)) { ikglp_grant_replica_to_next(sem, fq_of_new_on_fq); } } int ikglp_unlock(struct litmus_lock* l) { struct ikglp_semaphore *sem = ikglp_from_lock(l); struct task_struct *t = current; struct fifo_queue *fq; #ifdef CONFIG_LITMUS_DGL_SUPPORT raw_spinlock_t *dgl_lock; #endif unsigned long flags = 0, more_flags; int err = 0; fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner. if (!fq) { err = -EINVAL; goto out; } #ifdef CONFIG_LITMUS_DGL_SUPPORT dgl_lock = litmus->get_dgl_spinlock(t); #endif lock_global_irqsave(dgl_lock, flags); raw_spin_lock_irqsave(&sem->real_lock, more_flags); lock_fine_irqsave(&sem->lock, flags); TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq)); // Remove 't' from the heaps, but data in nodes will still be good. ikglp_del_global_list(sem, t, &fq->global_heap_node); binheap_delete(&fq->donee_heap_node.node, &sem->donees); fq->owner = NULL; // no longer owned!! --(fq->count); if(fq->count < sem->shortest_fifo_queue->count) { sem->shortest_fifo_queue = fq; } if (likely(!fq->is_vunlocked)) --(sem->nr_in_fifos); else TRACE_TASK(t, "virtually unlocked. handing off replica only.\n"); #ifdef CONFIG_LITMUS_AFFINITY_LOCKING if(sem->aff_obs) { sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t); sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t); } #endif // 't' must drop all priority and clean up data structures before hand-off. // DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST // This kills any inheritance from a donor. raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); { int count = 0; TRACE_TASK(t, "discarding _all_ inheritance because IKGLP is outermost\n"); while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) { binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks, struct nested_info, hp_binheap_node); ++count; } if (count) litmus->decrease_prio(t, NULL, 0); WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible. } raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); if (likely(!fq->is_vunlocked)) { // Move the next request into the FQ and update heaps as needed. // Skip this step we already did this during the virtual unlock. ikglp_move_next_to_fq(sem, fq, t, &fq->donee_heap_node, &flags, ALLOW_STEALING, !ALWAYS_TERMINATE_DONATION); } else fq->is_vunlocked = 0; /* reset vunlock flag */ if (waitqueue_active(&fq->wait)) ikglp_grant_replica_to_next(sem, fq); unlock_fine_irqrestore(&sem->lock, flags); raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); unlock_global_irqrestore(dgl_lock, flags); TRACE_CUR("done with freeing replica.\n"); out: return err; } void ikglp_abort_request(struct ikglp_semaphore *sem, struct task_struct *t, unsigned long flags) { ikglp_wait_state_t *wait = (ikglp_wait_state_t*)tsk_rt(t)->blocked_lock_data; ikglp_donee_heap_node_t *donee_info; struct task_struct *donee; struct fifo_queue *donee_fq; struct fifo_queue *fq = wait->fq; BUG_ON(!wait); /* drop the request from the proper IKGLP data structure and re-eval * priority relations */ switch(wait->cur_q) { case IKGLP_PQ: // No one inherits from waiters in PQ. Just drop the request. __drop_from_pq(sem, wait); break; case IKGLP_FQ: ikglp_del_global_list(sem, t, &wait->global_heap_node); binheap_delete(&wait->donee_heap_node.node, &sem->donees); /* remove the task from the FQ */ #ifdef CONFIG_LITMUS_AFFINITY_LOCKING if(sem->aff_obs) sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t); #endif __drop_from_fq(sem, wait); // Drop any and all inheritance t receives. raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); { int count = 0; TRACE_TASK(t, "discarding _all_ inheritance because IKGLP is outermost\n"); while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) { binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks, struct nested_info, hp_binheap_node); ++count; } if (count) litmus->decrease_prio(t, NULL, 0); WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible. } raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); ikglp_refresh_owners_prio_decrease(wait->donee_heap_node.fq, sem, flags, 1); // unlocks sem->lock. reacquire it. lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! ikglp_move_next_to_fq(sem, fq, t, &wait->donee_heap_node, &flags, ALLOW_STEALING, !ALWAYS_TERMINATE_DONATION); break; case IKGLP_DONOR: ikglp_del_global_list(sem, t, &wait->global_heap_node); __drop_from_donor(sem, wait); /* update donee */ donee_info = wait->donee_info; donee_info->donor_info = NULL; // clear the cross-link binheap_decrease(&donee_info->node, &sem->donees); donee = donee_info->task; donee_fq = donee_info->fq; if (donee == donee_fq->owner) { TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n", donee->comm, donee->pid, ikglp_get_idx(sem, donee_fq)); ikglp_remove_donation_from_owner(&wait->prio_donation.hp_binheap_node, donee_fq, sem, flags); // unlocks sem->lock. reacquire it. lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! } else { TRACE_TASK(t, "Donee %s/%d is blocked in of fq %d.\n", donee->comm, donee->pid, ikglp_get_idx(sem, donee_fq)); ikglp_remove_donation_from_fq_waiter(donee, &wait->prio_donation.hp_binheap_node); if(donee == donee_fq->hp_waiter) { TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n", donee->comm, donee->pid, ikglp_get_idx(sem, donee_fq)); donee_fq->hp_waiter = ikglp_find_hp_waiter(donee_fq, NULL); TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", ikglp_get_idx(sem, donee_fq), (donee_fq->hp_waiter) ? donee_fq->hp_waiter->comm : "null", (donee_fq->hp_waiter) ? donee_fq->hp_waiter->pid : 0); ikglp_refresh_owners_prio_decrease(donee_fq, sem, flags, 1); // unlocks sem->lock. reacquire it. lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! } } break; default: BUG(); } BUG_ON(wait->cur_q != IKGLP_INVL); /* state should now be invalid */ } void ikglp_budget_exhausted(struct litmus_lock* l, struct task_struct* t) { /* * PRE: (1) Our deadline has already been postponed. * (2) DLG lock is already held of DGLs are supported. * * Exhaustion Response: Remove request from locks and re-issue it. * * step 1: first check that we are actually blocked. * step 2: remove our request from ANY data structure: * step 3: reissue the request */ struct ikglp_semaphore *sem = ikglp_from_lock(l); struct litmus_lock* blocked_lock; unsigned long flags = 0, more_flags; raw_spin_lock_irqsave(&sem->real_lock, more_flags); lock_fine_irqsave(&sem->lock, flags); blocked_lock = tsk_rt(t)->blocked_lock; if (blocked_lock == l) { ikglp_wait_state_t *wait; // TRACE_TASK(t, "IKGLP before reject:\n"); // __ikglp_dump_state(sem); ikglp_abort_request(sem, t, flags); // TRACE_TASK(t, "IKGLP after reject (before reissue):\n"); // __ikglp_dump_state(sem); /* now re-issue the request */ TRACE_TASK(t, "Reissuing a request for replica from lock %d.\n", l->ident); wait = (ikglp_wait_state_t*)tsk_rt(t)->blocked_lock_data; if(sem->nr_in_fifos < sem->max_in_fifos) { struct fifo_queue *fq; // enqueue somwhere #ifdef CONFIG_LITMUS_AFFINITY_LOCKING fq = (sem->aff_obs) ? sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) : sem->shortest_fifo_queue; #else fq = sem->shortest_fifo_queue; #endif TRACE_TASK(t, "is going to an FQ.\n"); /* if this were true, then we should have been blocked */ BUG_ON(fq->count == 0); ikglp_enqueue_on_fq(sem, fq, wait, flags); // unlocks sem->lock } else if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { TRACE_TASK(t, "is going to PQ.\n"); // enqueue on PQ ikglp_enqueue_on_pq(sem, wait); unlock_fine_irqrestore(&sem->lock, flags); } else { // enqueue as donor TRACE_TASK(t, "is going to donor heap.\n"); ikglp_enqueue_on_donor(sem, wait, flags); // unlocks sem->lock } // TRACE_TASK(t, "IKGLP after reissue:\n"); // __ikglp_dump_state(sem); raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); } else if (blocked_lock) { unlock_fine_irqrestore(&sem->lock, flags); raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); TRACE_TASK(t, "is blocked, but not on IKGLP. Redirecting...\n"); if(blocked_lock->ops->supports_budget_exhaustion) { TRACE_TASK(t, "Lock %d supports budget exhaustion.\n", blocked_lock->ident); blocked_lock->ops->budget_exhausted(blocked_lock, t); } } else { TRACE_TASK(t, "appears to be no longer blocked.\n"); unlock_fine_irqrestore(&sem->lock, flags); raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); } return; } void ikglp_virtual_unlock(struct litmus_lock* l, struct task_struct* t) { /* PRE: DGL lock already held if DGLs are supported */ struct ikglp_semaphore *sem = ikglp_from_lock(l); struct fifo_queue *fq = ikglp_get_queue(sem, t); unsigned long flags = 0, more_flags; TRACE_TASK(t, "virtual unlock!\n"); if (!fq) return; if (fq->is_vunlocked) { TRACE_TASK(t, "Lock %d already virtually unlocked.\n", l->ident); return; } raw_spin_lock_irqsave(&sem->real_lock, more_flags); lock_fine_irqsave(&sem->lock, flags); if (unlikely(fq->owner != t)) { TRACE_TASK(t, "no longer holds lock %d.\n", l->ident); goto out; } // TRACE_TASK(t, "IKGLP before virtual unlock:\n"); // __ikglp_dump_state(sem); /* Move a request from donor heap or PQ to fq. don't steal from * other FQs. Also, terminate donation relationship if we move * a donor to 't' to the FQ (we'll pick inheritance back up via * the FQ, if needed). */ ikglp_move_next_to_fq(sem, fq, t, &fq->donee_heap_node, &flags, !ALLOW_STEALING, ALWAYS_TERMINATE_DONATION); /* decrement fifo count to simulate unlock. individual fifo * length counts remain unchanged. */ --(sem->nr_in_fifos); fq->is_vunlocked = 1; // TRACE_TASK(t, "IKGLP after virtual unlock:\n"); // __ikglp_dump_state(sem); out: unlock_fine_irqrestore(&sem->lock, flags); raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); } int ikglp_close(struct litmus_lock* l) { struct task_struct *t = current; struct ikglp_semaphore *sem = ikglp_from_lock(l); unsigned long flags; int owner = 0; int i; raw_spin_lock_irqsave(&sem->real_lock, flags); for(i = 0; i < sem->nr_replicas; ++i) { if(sem->fifo_queues[i].owner == t) { owner = 1; break; } } raw_spin_unlock_irqrestore(&sem->real_lock, flags); if (owner) ikglp_unlock(l); return 0; } void ikglp_free(struct litmus_lock* l) { struct ikglp_semaphore *sem = ikglp_from_lock(l); kfree(sem->fifo_queues); kfree(sem); } static void ikglp_proc_add(struct litmus_lock *l) { if (!l->name) l->name = kmalloc(LOCK_NAME_LEN*sizeof(char), GFP_KERNEL); snprintf(l->name, LOCK_NAME_LEN, "ikglp-%d", l->ident); litmus_add_proc_lock(l, ikglp_proc_print); } static void ikglp_proc_remove(struct litmus_lock *l) { if (l->name) { litmus_remove_proc_lock(l); kfree(l->name); l->name = NULL; } } static struct litmus_lock_proc_ops ikglp_proc_ops = { .add = ikglp_proc_add, .remove = ikglp_proc_remove }; struct litmus_lock* ikglp_new(unsigned int m, struct litmus_lock_ops* ops, void* __user uarg) { /* TODO: Support trivial token lock, s.t. args.nr_replicas equals some * sentinel value, and implement special-case algorithms. There is currently * a lot of overhead for a trivial token lock since we allocate O(n)-worth * of data; this could be avoided with special-case algorithms. */ struct ikglp_semaphore* sem; struct ikglp_args args; unsigned int i; BUG_ON(m <= 0); if(!access_ok(VERIFY_READ, uarg, sizeof(args))) return(NULL); if(__copy_from_user(&args, uarg, sizeof(args))) return(NULL); /* validation */ /* there must be at least one resource */ if (args.nr_replicas < 1) { printk("Invalid number of replicas.\n"); return(NULL); } /* IKGLP_OPTIMAL_FIFO_LEN can only be determined if nr_max_holders * is IKGLP_M_HOLDERS (number of CPUs) */ if (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN && args.max_in_fifos != IKGLP_M_IN_FIFOS) { printk("Cannot compute optimal FIFO length if max_in_fifos != IKGLP_M_IN_FIFOS\n"); return(NULL); } if ((args.max_in_fifos != IKGLP_UNLIMITED_IN_FIFOS) && (args.max_fifo_len != IKGLP_UNLIMITED_FIFO_LEN) && (args.max_in_fifos > args.nr_replicas*args.max_fifo_len)) { printk("Not enough total FIFO space for specified max requests in FIFOs.\n"); return(NULL); } sem = kmalloc(sizeof(*sem), GFP_KERNEL); if(!sem) { return NULL; } memset(sem, 0, sizeof(*sem)); sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*args.nr_replicas, GFP_KERNEL); if(!sem->fifo_queues) { kfree(sem); return NULL; } sem->litmus_lock.ops = ops; // sem->litmus_lock.proc = &ikglp_proc_ops; #ifdef CONFIG_DEBUG_SPINLOCK { __raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key); } #else raw_spin_lock_init(&sem->lock); #endif raw_spin_lock_init(&sem->real_lock); sem->nr_replicas = args.nr_replicas; sem->max_in_fifos = (args.max_in_fifos == IKGLP_M_IN_FIFOS) ? m : args.max_in_fifos; sem->max_fifo_len = (args.max_fifo_len == IKGLP_OPTIMAL_FIFO_LEN) ? (sem->max_in_fifos/args.nr_replicas) + ((sem->max_in_fifos%args.nr_replicas) != 0) : args.max_fifo_len; sem->nr_in_fifos = 0; TRACE_CUR("New IKGLP Sem: m = %u, k = %u, max fifo_len = %u\n", sem->max_in_fifos, sem->nr_replicas, sem->max_fifo_len); for(i = 0; i < args.nr_replicas; ++i) { struct fifo_queue* q = &(sem->fifo_queues[i]); q->owner = NULL; q->hp_waiter = NULL; init_waitqueue_head(&q->wait); q->count = 0; q->is_vunlocked = 0; q->global_heap_node.task = NULL; INIT_BINHEAP_NODE(&q->global_heap_node.node); q->donee_heap_node.task = NULL; q->donee_heap_node.donor_info = NULL; q->donee_heap_node.fq = NULL; INIT_BINHEAP_NODE(&q->donee_heap_node.node); q->nest.lock = (struct litmus_lock*)sem; q->nest.hp_waiter_eff_prio = NULL; q->nest.hp_waiter_ptr = &q->hp_waiter; INIT_BINHEAP_NODE(&q->nest.hp_binheap_node); } sem->shortest_fifo_queue = &sem->fifo_queues[0]; sem->top_m_size = 0; // init heaps INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order); INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order); INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order); INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order); INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order); #ifdef CONFIG_LITMUS_AFFINITY_LOCKING sem->aff_obs = NULL; #endif return &sem->litmus_lock; } #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) /****************************************************************************/ /* AFFINITY HEURISTICS */ /****************************************************************************/ static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) { int gpu = replica % aff->nr_rsrc; return gpu; } static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica) { int gpu = __replica_to_gpu(aff, replica) + aff->offset; return gpu; } static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu) { int replica = gpu - aff->offset; return replica; } static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b) { return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b)); } static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica) { if(tsk_rt(t)->last_gpu >= 0) { return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica)); } return 0; } int ikglp_aff_obs_close(struct affinity_observer* obs) { return 0; } void ikglp_aff_obs_free(struct affinity_observer* obs) { struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs); // make sure the thread destroying this semaphore will not // call the exit callback on a destroyed lock. struct task_struct *t = current; if (is_realtime(t) && tsk_rt(t)->rsrc_exit_cb_args == ikglp_aff) { tsk_rt(t)->rsrc_exit_cb = NULL; tsk_rt(t)->rsrc_exit_cb_args = NULL; } kfree(ikglp_aff->nr_cur_users_on_rsrc); kfree(ikglp_aff->nr_aff_on_rsrc); kfree(ikglp_aff->q_info); kfree(ikglp_aff); } static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops, struct ikglp_affinity_ops* ikglp_ops, void* __user args) { struct ikglp_affinity* ikglp_aff; struct gpu_affinity_observer_args aff_args; struct ikglp_semaphore* sem; unsigned int i; unsigned long flags; if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { return(NULL); } if(__copy_from_user(&aff_args, args, sizeof(aff_args))) { return(NULL); } sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); if(sem->litmus_lock.type != IKGLP_SEM) { TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); return(NULL); } if((aff_args.rho <= 0) || (sem->nr_replicas%aff_args.rho != 0)) { TRACE_CUR("Lock %d does not support #replicas (%u) for #simult_users " "(%u) per replica. #replicas should be evenly divisible " "by #simult_users.\n", sem->litmus_lock.ident, sem->nr_replicas, aff_args.rho); return(NULL); } ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); if(!ikglp_aff) { return(NULL); } ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL); if(!ikglp_aff->q_info) { kfree(ikglp_aff); return(NULL); } ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL); if(!ikglp_aff->nr_cur_users_on_rsrc) { kfree(ikglp_aff->q_info); kfree(ikglp_aff); return(NULL); } ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(unsigned int)*(sem->nr_replicas / aff_args.rho), GFP_KERNEL); if(!ikglp_aff->nr_aff_on_rsrc) { kfree(ikglp_aff->nr_cur_users_on_rsrc); kfree(ikglp_aff->q_info); kfree(ikglp_aff); return(NULL); } affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs); ikglp_aff->ops = ikglp_ops; ikglp_aff->offset = aff_args.replica_to_gpu_offset; ikglp_aff->nr_simult = aff_args.rho; ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0; TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, " "nr_rsrc = %d, relaxed_fifo_len = %d\n", ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc, ikglp_aff->relax_max_fifo_len); memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(unsigned int)*(ikglp_aff->nr_rsrc)); for(i = 0; i < sem->nr_replicas; ++i) { ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; ikglp_aff->q_info[i].estimated_len = 0; // multiple q_info's will point to the same resource (aka GPU) if // aff_args.nr_simult_users > 1 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; ikglp_aff->q_info[i].nr_aff_users = &ikglp_aff->nr_aff_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; } // attach observer to the lock raw_spin_lock_irqsave(&sem->real_lock, flags); sem->aff_obs = ikglp_aff; raw_spin_unlock_irqrestore(&sem->real_lock, flags); return &ikglp_aff->obs; } static int gpu_replica_to_resource(struct ikglp_affinity* aff, struct fifo_queue* fq) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); return(replica_to_gpu(aff, ikglp_get_idx(sem, fq))); } /*--------------------------------------------------------------------------*/ /* ADVANCED AFFINITY HEURISITICS */ /* */ /* These heuristics estimate FIFO length wait times and try to enqueue */ /* tasks into the shortest queues. When two queues are equivlenet, the GPU */ /* that maintains affinity is selected. When a task has no affinity, the */ /* heuristic tries to get the GPU with the fewest number of other tasks */ /* with affinity on that GPU. */ /* */ /* Heuristics to explore in the future: */ /* - Utilization */ /* - Longest non-preemptive section */ /* - Criticality */ /* - Task period */ /*--------------------------------------------------------------------------*/ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) { // advise_enqueue must be smart as not not break IKGLP rules: // * No queue can be greater than ceil(m/k) in length, unless // 'relax_max_fifo_len' is asserted // * Cannot let a queue idle if there exist waiting PQ/donors // -- needed to guarantee parallel progress of waiters. // // We may be able to relax some of these constraints, but this will have to // be carefully evaluated. // // Huristic strategy: Find the shortest queue that is not full. struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); lt_t min_len; unsigned int min_nr_users, min_nr_aff_users; struct ikglp_queue_info *shortest, *aff_queue; struct fifo_queue *to_enqueue; unsigned int i; int affinity_gpu; unsigned int max_fifo_len = (aff->relax_max_fifo_len) ? sem->max_in_fifos : /* allow possibility of all requests on same queue */ sem->max_fifo_len; /* constraint FIFO len */ // if we have no affinity, find the GPU with the least number of users // with active affinity if(unlikely(tsk_rt(t)->last_gpu < 0)) { int temp_min = aff->nr_aff_on_rsrc[0]; affinity_gpu = aff->offset; for(i = 1; i < aff->nr_rsrc; ++i) { if(aff->nr_aff_on_rsrc[i] < temp_min) { affinity_gpu = aff->offset + i; } } TRACE_CUR("no affinity. defaulting to %d with %d aff users.\n", affinity_gpu, temp_min); } else { affinity_gpu = tsk_rt(t)->last_gpu; } // all things being equal, let's start with the queue with which we have // affinity. this helps us maintain affinity even when we don't have // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; shortest = aff_queue; // if(shortest == aff->shortest_queue) { // TRACE_CUR("special case: have affinity with shortest queue\n"); // goto out; // } min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); min_nr_users = *(shortest->nr_cur_users); min_nr_aff_users = *(shortest->nr_aff_users); TRACE_CUR("cs is %llu on queue %d (count = %u): est len = %llu\n", get_gpu_estimate(t, MIG_LOCAL), ikglp_get_idx(sem, shortest->q), shortest->q->count, min_len); for(i = 0; i < sem->nr_replicas; ++i) { if(&aff->q_info[i] != shortest) { /* is there room on this q? */ if(nominal_fq_len(aff->q_info[i].q) < max_fifo_len) { int want = 0; lt_t migration = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))); lt_t est_len = aff->q_info[i].estimated_len + migration; // queue is smaller, or they're equal and the other has a smaller number // of total users. // // tie-break on the shortest number of simult users. this only kicks in // when there are more than 1 empty queues. // TODO: Make "est_len < min_len" a fuzzy function that allows // queues "close enough" in length to be considered equal. /* NOTE: 'shortest' starts out with affinity GPU */ if(unlikely(nominal_fq_len(shortest->q) >= max_fifo_len)) { /* 'shortest' is full and i-th queue is not */ want = 1; } else if(est_len < min_len) { want = 1; /* i-th queue has shortest length */ } else if(unlikely(est_len == min_len)) { /* equal lengths */ if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) { /* don't sacrifice affinity on tie */ if(has_affinity(aff, t, i)) { want = 1; /* switch to maintain affinity */ } else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) { /* favor one with less affinity load */ want = 1; } else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */ (*(aff->q_info[i].nr_cur_users) < min_nr_users)) { /* favor one with current fewer users */ want = 1; } } } if(want) { shortest = &aff->q_info[i]; min_len = est_len; min_nr_users = *(aff->q_info[i].nr_cur_users); min_nr_aff_users = *(aff->q_info[i].nr_aff_users); } TRACE_CUR("cs is %llu on queue %d (count = %u): est len = %llu\n", get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))), ikglp_get_idx(sem, aff->q_info[i].q), aff->q_info[i].q->count, est_len); } else { TRACE_CUR("queue %d is too long. ineligible for enqueue.\n", ikglp_get_idx(sem, aff->q_info[i].q)); } } } if(nominal_fq_len(shortest->q) >= max_fifo_len) { TRACE_CUR("selected fq %d is too long, but returning it anyway.\n", ikglp_get_idx(sem, shortest->q)); } to_enqueue = shortest->q; TRACE_CUR("enqueue on fq %d (count = %u) (non-aff wanted fq %d)\n", ikglp_get_idx(sem, to_enqueue), to_enqueue->count, ikglp_get_idx(sem, sem->shortest_fifo_queue)); return to_enqueue; } static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff, int dest_gpu, struct fifo_queue* fq) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); ikglp_wait_state_t *wait = NULL; int max_improvement = -(MIG_NONE+1); int replica = ikglp_get_idx(sem, fq); if(waitqueue_active(&fq->wait)) { int this_gpu = replica_to_gpu(aff, replica); struct list_head *pos; list_for_each(pos, &fq->wait.task_list) { wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list); ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node); int tmp_improvement = gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) - gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu); if(tmp_improvement > max_improvement) { wait = tmp_wait; max_improvement = tmp_improvement; if(max_improvement >= (MIG_NONE-1)) { goto out; } } } BUG_ON(!wait); } else { TRACE_CUR("fq %d is empty!\n", replica); } out: TRACE_CUR("Candidate victim from fq %d is %s/%d. aff improvement = %d.\n", replica, (wait) ? wait->task->comm : "null", (wait) ? wait->task->pid : 0, max_improvement); return wait; } ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff, struct fifo_queue* dst) { // Huristic strategy: Find task with greatest improvement in affinity. // struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); ikglp_wait_state_t *to_steal_state = NULL; // ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem); int max_improvement = -(MIG_NONE+1); int replica, i; int dest_gpu; replica = ikglp_get_idx(sem, dst); dest_gpu = replica_to_gpu(aff, replica); for(i = 0; i < sem->nr_replicas; ++i) { ikglp_wait_state_t *tmp_to_steal_state = pick_steal(aff, dest_gpu, &sem->fifo_queues[i]); if(tmp_to_steal_state) { int tmp_improvement = gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) - gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu); if(tmp_improvement > max_improvement) { to_steal_state = tmp_to_steal_state; max_improvement = tmp_improvement; if(max_improvement >= (MIG_NONE-1)) { goto out; } } } } out: if(!to_steal_state) { TRACE_CUR("Could not find anyone to steal.\n"); } else { TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n", to_steal_state->task->comm, to_steal_state->task->pid, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq), replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)), ikglp_get_idx(sem, dst), dest_gpu, max_improvement); // TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n", // default_to_steal_state->task->comm, default_to_steal_state->task->pid, // ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq), // replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)), // ikglp_get_idx(sem, dst), // replica_to_gpu(aff, ikglp_get_idx(sem, dst)), // // gpu_migration_distance( // replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)), // tsk_rt(default_to_steal_state->task)->last_gpu) - // gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu)); } return(to_steal_state); } static inline int has_donor(wait_queue_t* fq_wait) { ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node); return(wait->donee_heap_node.donor_info != NULL); } static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff, struct fifo_queue* fq, int* dist_from_head) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); struct task_struct *donee; ikglp_donee_heap_node_t *donee_node; struct task_struct *mth_highest = ikglp_mth_highest(sem); // lt_t now = litmus_clock(); // // TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ", // ikglp_get_idx(sem, fq), // mth_highest->comm, mth_highest->pid, // (int)get_deadline(mth_highest) - now); if(fq->owner && fq->donee_heap_node.donor_info == NULL && mth_highest != fq->owner && litmus->__compare(mth_highest, BASE, fq->owner, BASE)) { donee = fq->owner; donee_node = &(fq->donee_heap_node); *dist_from_head = 0; BUG_ON(donee != donee_node->task); TRACE_CUR("picked owner of fq %d as donee\n", ikglp_get_idx(sem, fq)); goto out; } else if(waitqueue_active(&fq->wait)) { struct list_head *pos; // TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d " // "(mth_highest != fq->owner) = %d " // "(mth_highest > fq->owner) = %d\n", // ikglp_get_idx(sem, fq), // (fq->owner) ? fq->owner->comm : "null", // (fq->owner) ? fq->owner->pid : 0, // (fq->owner) ? (int)get_deadline(fq->owner) - now : -999, // (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "null", // (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : 0, // (mth_highest != fq->owner), // (litmus->__compare(mth_highest, BASE, fq->owner, BASE))); *dist_from_head = 1; // iterating from the start of the queue is nice since this means // the donee will be closer to obtaining a resource. list_for_each(pos, &fq->wait.task_list) { wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list); ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node); // TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d " // "(mth_highest != wait->task) = %d " // "(mth_highest > wait->task) = %d\n", // ikglp_get_idx(sem, fq), // dist_from_head, // wait->task->comm, wait->task->pid, // (int)get_deadline(wait->task) - now, // (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "null", // (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : 0, // (mth_highest != wait->task), // (litmus->__compare(mth_highest, BASE, wait->task, BASE))); if(!has_donor(fq_wait) && mth_highest != wait->task && litmus->__compare(mth_highest, BASE, wait->task, BASE)) { donee = (struct task_struct*) fq_wait->private; donee_node = &wait->donee_heap_node; BUG_ON(donee != donee_node->task); TRACE_CUR("picked waiter in fq %d as donee\n", ikglp_get_idx(sem, fq)); goto out; } ++(*dist_from_head); } } donee = NULL; donee_node = NULL; *dist_from_head = IKGLP_INVAL_DISTANCE; TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq)); out: TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n", ikglp_get_idx(sem, fq), (donee) ? (donee)->comm : "null", (donee) ? (donee)->pid : 0, *dist_from_head); return donee_node; } ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection( struct ikglp_affinity* aff, struct task_struct* donor) { // Huristic strategy: Find the highest-priority donee that is waiting on // a queue closest to our affinity. (1) The donee CANNOT already have a // donor (exception: donee is the lowest-prio task in the donee heap). // (2) Requests in 'top_m' heap are ineligible. // // Further strategy: amongst elible donees waiting for the same GPU, pick // the one closest to the head of the FIFO queue (including owners). // struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); ikglp_donee_heap_node_t *donee_node; gpu_migration_dist_t distance; int start, i, j; ikglp_donee_heap_node_t *default_donee; ikglp_wait_state_t *default_donee_donor_info; if(tsk_rt(donor)->last_gpu < 0) { // no affinity. just return the min prio, like standard IKGLP // TODO: Find something closer to the head of the queue?? donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); goto out; } // Temporarily break any donation relation the default donee (the lowest // prio task in the FIFO queues) to make it eligible for selection below. // // NOTE: The original donor relation *must* be restored, even if we select // the default donee throug affinity-aware selection, before returning // from this function so we don't screw up our heap ordering. // The standard IKGLP algorithm will steal the donor relationship if needed. default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); default_donee_donor_info = default_donee->donor_info; // back-up donor relation default_donee->donor_info = NULL; // temporarily break any donor relation. // initialize our search donee_node = NULL; distance = MIG_NONE; // TODO: The below search logic may work well for locating nodes to steal // when an FQ goes idle. Validate this code and apply it to stealing. // begin search with affinity GPU. start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu); i = start; do { // "for each gpu" / "for each aff->nr_rsrc" gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i); // only interested in queues that will improve our distance if(temp_distance < distance || donee_node == NULL) { int dist_from_head = IKGLP_INVAL_DISTANCE; TRACE_CUR("searching for donor on GPU %d\n", i); // visit each queue and pick a donee. bail as soon as we find // one for this class. for(j = 0; j < aff->nr_simult; ++j) { int temp_dist_from_head; ikglp_donee_heap_node_t *temp_donee_node; struct fifo_queue *fq; fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]); temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head); if(temp_dist_from_head < dist_from_head) { // we check all the FQs for this GPU to spread priorities // out across the queues. does this decrease jitter? donee_node = temp_donee_node; dist_from_head = temp_dist_from_head; } } if(dist_from_head != IKGLP_INVAL_DISTANCE) { TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n", donee_node->task->comm, donee_node->task->pid, dist_from_head); } else { TRACE_CUR("found no eligible donors from GPU %d\n", i); } } else { TRACE_CUR("skipping GPU %d (distance = %d, best donor " "distance = %d)\n", i, temp_distance, distance); } i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around } while (i != start); // restore old donor info state. default_donee->donor_info = default_donee_donor_info; if(!donee_node) { donee_node = default_donee; TRACE_CUR("Could not find a donee. We have to steal one.\n"); WARN_ON(default_donee->donor_info == NULL); } out: TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n", donee_node->task->comm, donee_node->task->pid, ikglp_get_idx(sem, donee_node->fq), replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)), donor->comm, donor->pid, tsk_rt(donor)->last_gpu); return(donee_node); } static void __find_closest_donor(int target_gpu, struct binheap_node* donor_node, ikglp_wait_state_t** cur_closest, int* cur_dist) { ikglp_wait_state_t *this_donor = binheap_entry(donor_node, ikglp_wait_state_t, node); int this_dist = gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu); // TRACE_CUR("%s/%d: dist from target = %d\n", // this_donor->task->comm, // this_donor->task->pid, // this_dist); if(this_dist < *cur_dist) { // take this donor *cur_dist = this_dist; *cur_closest = this_donor; } else if(this_dist == *cur_dist) { // priority tie-break. Even though this is a pre-order traversal, // this is a heap, not a binary tree, so we still need to do a priority // comparision. if(!(*cur_closest) || litmus->compare(this_donor->task, (*cur_closest)->task)) { *cur_dist = this_dist; *cur_closest = this_donor; } } if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist); if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist); } ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq) { // Huristic strategy: Find donor with the closest affinity to fq. // Tie-break on priority. // We need to iterate over all the donors to do this. Unfortunatly, // our donors are organized in a heap. We'll visit each node with a // recurisve call. This is realitively safe since there are only sem->m // donors, at most. We won't recurse too deeply to have to worry about // our stack. (even with 128 CPUs, our nest depth is at most 7 deep). struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); ikglp_wait_state_t *donor = NULL; int distance = MIG_NONE; int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq)); #ifdef CONFIG_SCHED_DEBUG_TRACE ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); #endif __find_closest_donor(gpu, sem->donors.root, &donor, &distance); TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d " "(non-aff wanted %s/%d). differs = %d\n", donor->task->comm, donor->task->pid, distance, ikglp_get_idx(sem, fq), default_donor->task->comm, default_donor->task->pid, (donor->task != default_donor->task) ); return(donor); } void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); int replica = ikglp_get_idx(sem, fq); int gpu = replica_to_gpu(aff, replica); struct ikglp_queue_info *info = &aff->q_info[replica]; lt_t est_time; lt_t est_len_before; if(current == t) { tsk_rt(t)->suspend_gpu_tracker_on_block = 1; } est_len_before = info->estimated_len; est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); info->estimated_len += est_time; TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n", ikglp_get_idx(sem, info->q), est_len_before, est_time, info->estimated_len); // if(aff->shortest_queue == info) { // // we may no longer be the shortest // aff->shortest_queue = ikglp_aff_find_shortest(aff); // // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", // ikglp_get_idx(sem, aff->shortest_queue->q), // aff->shortest_queue->q->count, // aff->shortest_queue->estimated_len); // } } void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); int replica = ikglp_get_idx(sem, fq); int gpu = replica_to_gpu(aff, replica); struct ikglp_queue_info *info = &aff->q_info[replica]; lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); if(est_time > info->estimated_len) { WARN_ON(1); info->estimated_len = 0; } else { info->estimated_len -= est_time; } TRACE_CUR("fq %d est len is now %llu\n", ikglp_get_idx(sem, info->q), info->estimated_len); // check to see if we're the shortest queue now. // if((aff->shortest_queue != info) && // (aff->shortest_queue->estimated_len > info->estimated_len)) { // // aff->shortest_queue = info; // // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", // ikglp_get_idx(sem, info->q), // info->q->count, // info->estimated_len); // } } int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); unsigned long flags = 0, more_flags; int aff_rsrc; #ifdef CONFIG_LITMUS_DGL_SUPPORT raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t); #endif if (tsk_rt(t)->last_gpu < 0) return 0; lock_global_irqsave(dgl_lock, flags); raw_spin_lock_irqsave(&sem->real_lock, more_flags); lock_fine_irqsave(&sem->lock, flags); // decrement affinity count on old GPU aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; --(aff->nr_aff_on_rsrc[aff_rsrc]); if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); aff->nr_aff_on_rsrc[aff_rsrc] = 0; } unlock_fine_irqrestore(&sem->lock, flags); raw_spin_unlock_irqrestore(&sem->real_lock, more_flags); unlock_global_irqrestore(dgl_lock, flags); return 0; } int gpu_ikglp_notify_exit_trampoline(struct task_struct* t) { struct ikglp_affinity* aff = (struct ikglp_affinity*)tsk_rt(t)->rsrc_exit_cb_args; if(likely(aff)) { return gpu_ikglp_notify_exit(aff, t); } else { return -1; } } void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); int replica = ikglp_get_idx(sem, fq); int gpu = replica_to_gpu(aff, replica); int last_gpu = tsk_rt(t)->last_gpu; tsk_rt(t)->gpu_migration = gpu_migration_distance(last_gpu, gpu); // record the type of migration TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", t->comm, t->pid, gpu, last_gpu, tsk_rt(t)->gpu_migration); // count the number or resource holders ++(*(aff->q_info[replica].nr_cur_users)); if(gpu != last_gpu) { if(last_gpu >= 0) { int old_rsrc = last_gpu - aff->offset; --(aff->nr_aff_on_rsrc[old_rsrc]); } // increment affinity count on new GPU ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); tsk_rt(t)->rsrc_exit_cb_args = aff; tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; } reg_nv_device(gpu, 1, t); // register tsk_rt(t)->suspend_gpu_tracker_on_block = 0; reset_gpu_tracker(t); start_gpu_tracker(t); } void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); int replica = ikglp_get_idx(sem, fq); int gpu = replica_to_gpu(aff, replica); lt_t est_time; stop_gpu_tracker(t); // stop the tracker before we do anything else. est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); // count the number or resource holders --(*(aff->q_info[replica].nr_cur_users)); reg_nv_device(gpu, 0, t); // unregister // update estimates update_gpu_estimate(t, get_gpu_time(t)); TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. " "estimated was %llu. diff is %d\n", t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration, get_gpu_time(t), est_time, (long long)get_gpu_time(t) - (long long)est_time); tsk_rt(t)->last_gpu = gpu; } struct ikglp_affinity_ops gpu_ikglp_affinity = { .advise_enqueue = gpu_ikglp_advise_enqueue, .advise_steal = gpu_ikglp_advise_steal, .advise_donee_selection = gpu_ikglp_advise_donee_selection, .advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq, .notify_enqueue = gpu_ikglp_notify_enqueue, .notify_dequeue = gpu_ikglp_notify_dequeue, .notify_acquired = gpu_ikglp_notify_acquired, .notify_freed = gpu_ikglp_notify_freed, .notify_exit = gpu_ikglp_notify_exit, .replica_to_resource = gpu_replica_to_resource, }; struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops, void* __user args) { return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args); } /*--------------------------------------------------------------------------*/ /* SIMPLE LOAD-BALANCING AFFINITY HEURISTIC */ /*--------------------------------------------------------------------------*/ struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); unsigned int min_count; unsigned int min_nr_users; struct ikglp_queue_info *shortest; struct fifo_queue *to_enqueue; unsigned int i; // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n"); shortest = &aff->q_info[0]; min_count = shortest->q->count; min_nr_users = *(shortest->nr_cur_users); TRACE_CUR("queue %d: waiters = %u, total holders = %u\n", ikglp_get_idx(sem, shortest->q), shortest->q->count, min_nr_users); for(i = 1; i < sem->nr_replicas; ++i) { unsigned int len = aff->q_info[i].q->count; // queue is smaller, or they're equal and the other has a smaller number // of total users. // // tie-break on the shortest number of simult users. this only kicks in // when there are more than 1 empty queues. if((len < min_count) || ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { shortest = &aff->q_info[i]; min_count = shortest->q->count; min_nr_users = *(aff->q_info[i].nr_cur_users); } TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", ikglp_get_idx(sem, aff->q_info[i].q), aff->q_info[i].q->count, *(aff->q_info[i].nr_cur_users)); } to_enqueue = shortest->q; TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", ikglp_get_idx(sem, to_enqueue), ikglp_get_idx(sem, sem->shortest_fifo_queue)); return to_enqueue; } ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff, struct fifo_queue* dst) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n"); return ikglp_find_hp_waiter_to_steal(sem, NULL); } ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); return(donee); } ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); return(donor); } void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) { // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n"); } void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) { // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n"); } void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); int replica = ikglp_get_idx(sem, fq); int gpu = replica_to_gpu(aff, replica); // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n"); // count the number or resource holders ++(*(aff->q_info[replica].nr_cur_users)); reg_nv_device(gpu, 1, t); // register } void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) { struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); int replica = ikglp_get_idx(sem, fq); int gpu = replica_to_gpu(aff, replica); // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n"); // count the number or resource holders --(*(aff->q_info[replica].nr_cur_users)); reg_nv_device(gpu, 0, t); // unregister } struct ikglp_affinity_ops simple_gpu_ikglp_affinity = { .advise_enqueue = simple_gpu_ikglp_advise_enqueue, .advise_steal = simple_gpu_ikglp_advise_steal, .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection, .advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq, .notify_enqueue = simple_gpu_ikglp_notify_enqueue, .notify_dequeue = simple_gpu_ikglp_notify_dequeue, .notify_acquired = simple_gpu_ikglp_notify_acquired, .notify_freed = simple_gpu_ikglp_notify_freed, .notify_exit = NULL, .replica_to_resource = gpu_replica_to_resource, }; struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, void* __user args) { return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args); } #endif