diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-11-11 13:10:43 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-11-11 13:10:43 -0500 |
commit | dede6a6b8ce09f48295d8ba4635480c98ef85284 (patch) | |
tree | 3bc2dcbe492d1e799076cd7060bc886b7a376219 | |
parent | 33cb64c787070d6b60a02ea40064d717d3b9dc07 (diff) |
improve ikglp heuristics
-rw-r--r-- | include/litmus/ikglp_lock.h | 4 | ||||
-rw-r--r-- | include/litmus/rt_param.h | 5 | ||||
-rw-r--r-- | litmus/ikglp_lock.c | 117 | ||||
-rw-r--r-- | litmus/litmus.c | 5 |
4 files changed, 123 insertions, 8 deletions
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h index 0b89c8135360..9d0cd3d1904e 100644 --- a/include/litmus/ikglp_lock.h +++ b/include/litmus/ikglp_lock.h | |||
@@ -114,6 +114,7 @@ struct ikglp_queue_info | |||
114 | struct fifo_queue* q; | 114 | struct fifo_queue* q; |
115 | lt_t estimated_len; | 115 | lt_t estimated_len; |
116 | int *nr_cur_users; | 116 | int *nr_cur_users; |
117 | int *nr_aff_users; | ||
117 | }; | 118 | }; |
118 | 119 | ||
119 | struct ikglp_affinity_ops | 120 | struct ikglp_affinity_ops |
@@ -128,6 +129,8 @@ struct ikglp_affinity_ops | |||
128 | void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired | 129 | void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired |
129 | void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed | 130 | void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed |
130 | int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding) | 131 | int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding) |
132 | |||
133 | int (*notify_exit)(struct ikglp_affinity* aff, struct task_struct* t); | ||
131 | }; | 134 | }; |
132 | 135 | ||
133 | struct ikglp_affinity | 136 | struct ikglp_affinity |
@@ -136,6 +139,7 @@ struct ikglp_affinity | |||
136 | struct ikglp_affinity_ops *ops; | 139 | struct ikglp_affinity_ops *ops; |
137 | struct ikglp_queue_info *q_info; | 140 | struct ikglp_queue_info *q_info; |
138 | int *nr_cur_users_on_rsrc; | 141 | int *nr_cur_users_on_rsrc; |
142 | int *nr_aff_on_rsrc; | ||
139 | int offset; | 143 | int offset; |
140 | int nr_simult; | 144 | int nr_simult; |
141 | int nr_rsrc; | 145 | int nr_rsrc; |
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 44f85a366574..cb7c3ac64339 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -188,6 +188,8 @@ typedef struct feedback_est{ | |||
188 | 188 | ||
189 | #define AVG_EST_WINDOW_SIZE 20 | 189 | #define AVG_EST_WINDOW_SIZE 20 |
190 | 190 | ||
191 | typedef int (*notify_rsrc_exit_t)(struct task_struct* tsk); | ||
192 | |||
191 | typedef struct avg_est{ | 193 | typedef struct avg_est{ |
192 | lt_t history[AVG_EST_WINDOW_SIZE]; | 194 | lt_t history[AVG_EST_WINDOW_SIZE]; |
193 | uint16_t count; | 195 | uint16_t count; |
@@ -248,6 +250,9 @@ struct rt_param { | |||
248 | gpu_migration_dist_t gpu_migration; | 250 | gpu_migration_dist_t gpu_migration; |
249 | int last_gpu; | 251 | int last_gpu; |
250 | 252 | ||
253 | notify_rsrc_exit_t rsrc_exit_cb; | ||
254 | void* rsrc_exit_cb_args; | ||
255 | |||
251 | lt_t accum_gpu_time; | 256 | lt_t accum_gpu_time; |
252 | lt_t gpu_time_stamp; | 257 | lt_t gpu_time_stamp; |
253 | 258 | ||
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index bd7bfc0f48ac..9c57bc24e8bd 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c | |||
@@ -1896,7 +1896,18 @@ int ikglp_aff_obs_close(struct affinity_observer* obs) | |||
1896 | void ikglp_aff_obs_free(struct affinity_observer* obs) | 1896 | void ikglp_aff_obs_free(struct affinity_observer* obs) |
1897 | { | 1897 | { |
1898 | struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs); | 1898 | struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs); |
1899 | |||
1900 | // make sure the thread destroying this semaphore will not | ||
1901 | // call the exit callback on a destroyed lock. | ||
1902 | struct task_struct *t = current; | ||
1903 | if (is_realtime(t) && tsk_rt(t)->rsrc_exit_cb_args == ikglp_aff) | ||
1904 | { | ||
1905 | tsk_rt(t)->rsrc_exit_cb = NULL; | ||
1906 | tsk_rt(t)->rsrc_exit_cb_args = NULL; | ||
1907 | } | ||
1908 | |||
1899 | kfree(ikglp_aff->nr_cur_users_on_rsrc); | 1909 | kfree(ikglp_aff->nr_cur_users_on_rsrc); |
1910 | kfree(ikglp_aff->nr_aff_on_rsrc); | ||
1900 | kfree(ikglp_aff->q_info); | 1911 | kfree(ikglp_aff->q_info); |
1901 | kfree(ikglp_aff); | 1912 | kfree(ikglp_aff); |
1902 | } | 1913 | } |
@@ -1960,6 +1971,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1960 | return(NULL); | 1971 | return(NULL); |
1961 | } | 1972 | } |
1962 | 1973 | ||
1974 | ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); | ||
1975 | if(!ikglp_aff->nr_aff_on_rsrc) { | ||
1976 | kfree(ikglp_aff->nr_cur_users_on_rsrc); | ||
1977 | kfree(ikglp_aff->q_info); | ||
1978 | kfree(ikglp_aff); | ||
1979 | return(NULL); | ||
1980 | } | ||
1981 | |||
1963 | affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs); | 1982 | affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs); |
1964 | 1983 | ||
1965 | ikglp_aff->ops = ikglp_ops; | 1984 | ikglp_aff->ops = ikglp_ops; |
@@ -1974,6 +1993,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1974 | ikglp_aff->relax_max_fifo_len); | 1993 | ikglp_aff->relax_max_fifo_len); |
1975 | 1994 | ||
1976 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); | 1995 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); |
1996 | memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); | ||
1977 | 1997 | ||
1978 | for(i = 0; i < sem->nr_replicas; ++i) { | 1998 | for(i = 0; i < sem->nr_replicas; ++i) { |
1979 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; | 1999 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; |
@@ -1982,6 +2002,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1982 | // multiple q_info's will point to the same resource (aka GPU) if | 2002 | // multiple q_info's will point to the same resource (aka GPU) if |
1983 | // aff_args.nr_simult_users > 1 | 2003 | // aff_args.nr_simult_users > 1 |
1984 | ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; | 2004 | ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; |
2005 | ikglp_aff->q_info[i].nr_aff_users = &ikglp_aff->nr_aff_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; | ||
1985 | } | 2006 | } |
1986 | 2007 | ||
1987 | // attach observer to the lock | 2008 | // attach observer to the lock |
@@ -2035,7 +2056,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2035 | 2056 | ||
2036 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 2057 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); |
2037 | lt_t min_len; | 2058 | lt_t min_len; |
2038 | int min_nr_users; | 2059 | int min_nr_users, min_nr_aff_users; |
2039 | struct ikglp_queue_info *shortest; | 2060 | struct ikglp_queue_info *shortest; |
2040 | struct fifo_queue *to_enqueue; | 2061 | struct fifo_queue *to_enqueue; |
2041 | int i; | 2062 | int i; |
@@ -2044,11 +2065,20 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2044 | int max_fifo_len = (aff->relax_max_fifo_len) ? | 2065 | int max_fifo_len = (aff->relax_max_fifo_len) ? |
2045 | sem->m : sem->max_fifo_len; | 2066 | sem->m : sem->max_fifo_len; |
2046 | 2067 | ||
2047 | // simply pick the shortest queue if, we have no affinity, or we have | 2068 | // if we have no affinity, find the GPU with the least number of users |
2048 | // affinity with the shortest | 2069 | // with active affinity |
2049 | if(unlikely(tsk_rt(t)->last_gpu < 0)) { | 2070 | if(unlikely(tsk_rt(t)->last_gpu < 0)) { |
2050 | affinity_gpu = aff->offset; // first gpu | 2071 | int temp_min = aff->nr_aff_on_rsrc[0]; |
2051 | TRACE_CUR("no affinity\n"); | 2072 | affinity_gpu = aff->offset; |
2073 | |||
2074 | for(i = 1; i < aff->nr_rsrc; ++i) { | ||
2075 | if(aff->nr_aff_on_rsrc[i] < temp_min) { | ||
2076 | affinity_gpu = aff->offset + i; | ||
2077 | } | ||
2078 | } | ||
2079 | |||
2080 | TRACE_CUR("no affinity. defaulting to %d with %d aff users.\n", | ||
2081 | affinity_gpu, temp_min); | ||
2052 | } | 2082 | } |
2053 | else { | 2083 | else { |
2054 | affinity_gpu = tsk_rt(t)->last_gpu; | 2084 | affinity_gpu = tsk_rt(t)->last_gpu; |
@@ -2066,6 +2096,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2066 | 2096 | ||
2067 | min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); | 2097 | min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); |
2068 | min_nr_users = *(shortest->nr_cur_users); | 2098 | min_nr_users = *(shortest->nr_cur_users); |
2099 | min_nr_aff_users = *(shortest->nr_aff_users); | ||
2100 | |||
2069 | 2101 | ||
2070 | TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", | 2102 | TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", |
2071 | get_gpu_estimate(t, MIG_LOCAL), | 2103 | get_gpu_estimate(t, MIG_LOCAL), |
@@ -2088,14 +2120,21 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t | |||
2088 | // | 2120 | // |
2089 | // tie-break on the shortest number of simult users. this only kicks in | 2121 | // tie-break on the shortest number of simult users. this only kicks in |
2090 | // when there are more than 1 empty queues. | 2122 | // when there are more than 1 empty queues. |
2123 | |||
2124 | // TODO: Make "est_len < min_len" a fuzzy function that allows | ||
2125 | // queues "close enough" in length to be considered equal. | ||
2126 | |||
2091 | if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */ | 2127 | if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */ |
2092 | (est_len < min_len) || /* i-th queue has shortest length */ | 2128 | (est_len < min_len) || /* i-th queue has shortest length */ |
2093 | ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */ | 2129 | ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */ |
2094 | (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | 2130 | ((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) || |
2131 | ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && | ||
2132 | (*(aff->q_info[i].nr_cur_users) < min_nr_users))))) { | ||
2095 | 2133 | ||
2096 | shortest = &aff->q_info[i]; | 2134 | shortest = &aff->q_info[i]; |
2097 | min_len = est_len; | 2135 | min_len = est_len; |
2098 | min_nr_users = *(aff->q_info[i].nr_cur_users); | 2136 | min_nr_users = *(aff->q_info[i].nr_cur_users); |
2137 | min_nr_aff_users = *(aff->q_info[i].nr_aff_users); | ||
2099 | } | 2138 | } |
2100 | 2139 | ||
2101 | TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", | 2140 | TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", |
@@ -2612,6 +2651,51 @@ void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, | |||
2612 | // } | 2651 | // } |
2613 | } | 2652 | } |
2614 | 2653 | ||
2654 | int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t) | ||
2655 | { | ||
2656 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2657 | unsigned long flags = 0, real_flags; | ||
2658 | int aff_rsrc; | ||
2659 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2660 | raw_spinlock_t *dgl_lock; | ||
2661 | |||
2662 | dgl_lock = litmus->get_dgl_spinlock(t); | ||
2663 | #endif | ||
2664 | |||
2665 | if (tsk_rt(t)->last_gpu < 0) | ||
2666 | return 0; | ||
2667 | |||
2668 | raw_spin_lock_irqsave(&sem->real_lock, real_flags); | ||
2669 | lock_global_irqsave(dgl_lock, flags); | ||
2670 | lock_fine_irqsave(&sem->lock, flags); | ||
2671 | |||
2672 | // decrement affinity count on old GPU | ||
2673 | aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; | ||
2674 | --(aff->nr_aff_on_rsrc[aff_rsrc]); | ||
2675 | |||
2676 | if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { | ||
2677 | WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); | ||
2678 | aff->nr_aff_on_rsrc[aff_rsrc] = 0; | ||
2679 | } | ||
2680 | |||
2681 | unlock_fine_irqrestore(&sem->lock, flags); | ||
2682 | unlock_global_irqrestore(dgl_lock, flags); | ||
2683 | raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); | ||
2684 | |||
2685 | return 0; | ||
2686 | } | ||
2687 | |||
2688 | int gpu_ikglp_notify_exit_trampoline(struct task_struct* t) | ||
2689 | { | ||
2690 | struct ikglp_affinity* aff = (struct ikglp_affinity*)tsk_rt(t)->rsrc_exit_cb_args; | ||
2691 | if(likely(aff)) { | ||
2692 | return gpu_ikglp_notify_exit(aff, t); | ||
2693 | } | ||
2694 | else { | ||
2695 | return -1; | ||
2696 | } | ||
2697 | } | ||
2698 | |||
2615 | void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, | 2699 | void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, |
2616 | struct fifo_queue* fq, | 2700 | struct fifo_queue* fq, |
2617 | struct task_struct* t) | 2701 | struct task_struct* t) |
@@ -2619,15 +2703,28 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, | |||
2619 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | 2703 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); |
2620 | int replica = ikglp_get_idx(sem, fq); | 2704 | int replica = ikglp_get_idx(sem, fq); |
2621 | int gpu = replica_to_gpu(aff, replica); | 2705 | int gpu = replica_to_gpu(aff, replica); |
2706 | int last_gpu = tsk_rt(t)->last_gpu; | ||
2622 | 2707 | ||
2623 | tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration | 2708 | tsk_rt(t)->gpu_migration = gpu_migration_distance(last_gpu, gpu); // record the type of migration |
2624 | 2709 | ||
2625 | TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", | 2710 | TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", |
2626 | t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration); | 2711 | t->comm, t->pid, gpu, last_gpu, tsk_rt(t)->gpu_migration); |
2627 | 2712 | ||
2628 | // count the number or resource holders | 2713 | // count the number or resource holders |
2629 | ++(*(aff->q_info[replica].nr_cur_users)); | 2714 | ++(*(aff->q_info[replica].nr_cur_users)); |
2630 | 2715 | ||
2716 | if(gpu != last_gpu) { | ||
2717 | if(last_gpu >= 0) { | ||
2718 | int old_rsrc = last_gpu - aff->offset; | ||
2719 | --(aff->nr_aff_on_rsrc[old_rsrc]); | ||
2720 | } | ||
2721 | |||
2722 | // increment affinity count on new GPU | ||
2723 | ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); | ||
2724 | tsk_rt(t)->rsrc_exit_cb_args = aff; | ||
2725 | tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; | ||
2726 | } | ||
2727 | |||
2631 | reg_nv_device(gpu, 1, t); // register | 2728 | reg_nv_device(gpu, 1, t); // register |
2632 | 2729 | ||
2633 | tsk_rt(t)->suspend_gpu_tracker_on_block = 0; | 2730 | tsk_rt(t)->suspend_gpu_tracker_on_block = 0; |
@@ -2679,6 +2776,8 @@ struct ikglp_affinity_ops gpu_ikglp_affinity = | |||
2679 | .notify_acquired = gpu_ikglp_notify_acquired, | 2776 | .notify_acquired = gpu_ikglp_notify_acquired, |
2680 | .notify_freed = gpu_ikglp_notify_freed, | 2777 | .notify_freed = gpu_ikglp_notify_freed, |
2681 | 2778 | ||
2779 | .notify_exit = gpu_ikglp_notify_exit, | ||
2780 | |||
2682 | .replica_to_resource = gpu_replica_to_resource, | 2781 | .replica_to_resource = gpu_replica_to_resource, |
2683 | }; | 2782 | }; |
2684 | 2783 | ||
@@ -2817,6 +2916,8 @@ struct ikglp_affinity_ops simple_gpu_ikglp_affinity = | |||
2817 | .notify_acquired = simple_gpu_ikglp_notify_acquired, | 2916 | .notify_acquired = simple_gpu_ikglp_notify_acquired, |
2818 | .notify_freed = simple_gpu_ikglp_notify_freed, | 2917 | .notify_freed = simple_gpu_ikglp_notify_freed, |
2819 | 2918 | ||
2919 | .notify_exit = NULL, | ||
2920 | |||
2820 | .replica_to_resource = gpu_replica_to_resource, | 2921 | .replica_to_resource = gpu_replica_to_resource, |
2821 | }; | 2922 | }; |
2822 | 2923 | ||
diff --git a/litmus/litmus.c b/litmus/litmus.c index d368202ab8c3..143c746c344e 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c | |||
@@ -533,6 +533,11 @@ void litmus_exit_task(struct task_struct* tsk) | |||
533 | if (is_realtime(tsk)) { | 533 | if (is_realtime(tsk)) { |
534 | sched_trace_task_completion(tsk, 1); | 534 | sched_trace_task_completion(tsk, 1); |
535 | 535 | ||
536 | if (tsk_rt(tsk)->rsrc_exit_cb) { | ||
537 | int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk); | ||
538 | WARN_ON(ret != 0); | ||
539 | } | ||
540 | |||
536 | litmus->task_exit(tsk); | 541 | litmus->task_exit(tsk); |
537 | 542 | ||
538 | BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); | 543 | BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); |