From dede6a6b8ce09f48295d8ba4635480c98ef85284 Mon Sep 17 00:00:00 2001 From: Glenn Elliott Date: Sun, 11 Nov 2012 13:10:43 -0500 Subject: improve ikglp heuristics --- litmus/ikglp_lock.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++---- litmus/litmus.c | 5 +++ 2 files changed, 114 insertions(+), 8 deletions(-) (limited to 'litmus') diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index bd7bfc0f48ac..9c57bc24e8bd 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c @@ -1896,7 +1896,18 @@ int ikglp_aff_obs_close(struct affinity_observer* obs) void ikglp_aff_obs_free(struct affinity_observer* obs) { struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs); + + // make sure the thread destroying this semaphore will not + // call the exit callback on a destroyed lock. + struct task_struct *t = current; + if (is_realtime(t) && tsk_rt(t)->rsrc_exit_cb_args == ikglp_aff) + { + tsk_rt(t)->rsrc_exit_cb = NULL; + tsk_rt(t)->rsrc_exit_cb_args = NULL; + } + kfree(ikglp_aff->nr_cur_users_on_rsrc); + kfree(ikglp_aff->nr_aff_on_rsrc); kfree(ikglp_aff->q_info); kfree(ikglp_aff); } @@ -1960,6 +1971,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* return(NULL); } + ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); + if(!ikglp_aff->nr_aff_on_rsrc) { + kfree(ikglp_aff->nr_cur_users_on_rsrc); + kfree(ikglp_aff->q_info); + kfree(ikglp_aff); + return(NULL); + } + affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs); ikglp_aff->ops = ikglp_ops; @@ -1974,6 +1993,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ikglp_aff->relax_max_fifo_len); memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); + memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); for(i = 0; i < sem->nr_replicas; ++i) { ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; @@ -1982,6 +2002,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* // multiple q_info's will point to the same resource (aka GPU) if // aff_args.nr_simult_users > 1 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; + ikglp_aff->q_info[i].nr_aff_users = &ikglp_aff->nr_aff_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; } // attach observer to the lock @@ -2035,7 +2056,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); lt_t min_len; - int min_nr_users; + int min_nr_users, min_nr_aff_users; struct ikglp_queue_info *shortest; struct fifo_queue *to_enqueue; int i; @@ -2044,11 +2065,20 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t int max_fifo_len = (aff->relax_max_fifo_len) ? sem->m : sem->max_fifo_len; - // simply pick the shortest queue if, we have no affinity, or we have - // affinity with the shortest + // if we have no affinity, find the GPU with the least number of users + // with active affinity if(unlikely(tsk_rt(t)->last_gpu < 0)) { - affinity_gpu = aff->offset; // first gpu - TRACE_CUR("no affinity\n"); + int temp_min = aff->nr_aff_on_rsrc[0]; + affinity_gpu = aff->offset; + + for(i = 1; i < aff->nr_rsrc; ++i) { + if(aff->nr_aff_on_rsrc[i] < temp_min) { + affinity_gpu = aff->offset + i; + } + } + + TRACE_CUR("no affinity. defaulting to %d with %d aff users.\n", + affinity_gpu, temp_min); } else { affinity_gpu = tsk_rt(t)->last_gpu; @@ -2066,6 +2096,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); min_nr_users = *(shortest->nr_cur_users); + min_nr_aff_users = *(shortest->nr_aff_users); + TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", get_gpu_estimate(t, MIG_LOCAL), @@ -2088,14 +2120,21 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t // // tie-break on the shortest number of simult users. this only kicks in // when there are more than 1 empty queues. + + // TODO: Make "est_len < min_len" a fuzzy function that allows + // queues "close enough" in length to be considered equal. + if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */ (est_len < min_len) || /* i-th queue has shortest length */ ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */ - (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { + ((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) || + ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && + (*(aff->q_info[i].nr_cur_users) < min_nr_users))))) { shortest = &aff->q_info[i]; min_len = est_len; min_nr_users = *(aff->q_info[i].nr_cur_users); + min_nr_aff_users = *(aff->q_info[i].nr_aff_users); } TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", @@ -2612,6 +2651,51 @@ void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, // } } +int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t) +{ + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); + unsigned long flags = 0, real_flags; + int aff_rsrc; +#ifdef CONFIG_LITMUS_DGL_SUPPORT + raw_spinlock_t *dgl_lock; + + dgl_lock = litmus->get_dgl_spinlock(t); +#endif + + if (tsk_rt(t)->last_gpu < 0) + return 0; + + raw_spin_lock_irqsave(&sem->real_lock, real_flags); + lock_global_irqsave(dgl_lock, flags); + lock_fine_irqsave(&sem->lock, flags); + + // decrement affinity count on old GPU + aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; + --(aff->nr_aff_on_rsrc[aff_rsrc]); + + if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { + WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); + aff->nr_aff_on_rsrc[aff_rsrc] = 0; + } + + unlock_fine_irqrestore(&sem->lock, flags); + unlock_global_irqrestore(dgl_lock, flags); + raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); + + return 0; +} + +int gpu_ikglp_notify_exit_trampoline(struct task_struct* t) +{ + struct ikglp_affinity* aff = (struct ikglp_affinity*)tsk_rt(t)->rsrc_exit_cb_args; + if(likely(aff)) { + return gpu_ikglp_notify_exit(aff, t); + } + else { + return -1; + } +} + void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) @@ -2619,15 +2703,28 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); int replica = ikglp_get_idx(sem, fq); int gpu = replica_to_gpu(aff, replica); + int last_gpu = tsk_rt(t)->last_gpu; - tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration + tsk_rt(t)->gpu_migration = gpu_migration_distance(last_gpu, gpu); // record the type of migration TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", - t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration); + t->comm, t->pid, gpu, last_gpu, tsk_rt(t)->gpu_migration); // count the number or resource holders ++(*(aff->q_info[replica].nr_cur_users)); + if(gpu != last_gpu) { + if(last_gpu >= 0) { + int old_rsrc = last_gpu - aff->offset; + --(aff->nr_aff_on_rsrc[old_rsrc]); + } + + // increment affinity count on new GPU + ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); + tsk_rt(t)->rsrc_exit_cb_args = aff; + tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; + } + reg_nv_device(gpu, 1, t); // register tsk_rt(t)->suspend_gpu_tracker_on_block = 0; @@ -2679,6 +2776,8 @@ struct ikglp_affinity_ops gpu_ikglp_affinity = .notify_acquired = gpu_ikglp_notify_acquired, .notify_freed = gpu_ikglp_notify_freed, + .notify_exit = gpu_ikglp_notify_exit, + .replica_to_resource = gpu_replica_to_resource, }; @@ -2817,6 +2916,8 @@ struct ikglp_affinity_ops simple_gpu_ikglp_affinity = .notify_acquired = simple_gpu_ikglp_notify_acquired, .notify_freed = simple_gpu_ikglp_notify_freed, + .notify_exit = NULL, + .replica_to_resource = gpu_replica_to_resource, }; diff --git a/litmus/litmus.c b/litmus/litmus.c index d368202ab8c3..143c746c344e 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -533,6 +533,11 @@ void litmus_exit_task(struct task_struct* tsk) if (is_realtime(tsk)) { sched_trace_task_completion(tsk, 1); + if (tsk_rt(tsk)->rsrc_exit_cb) { + int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk); + WARN_ON(ret != 0); + } + litmus->task_exit(tsk); BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); -- cgit v1.2.2