aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-11-11 13:10:43 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-11-11 13:10:43 -0500
commitdede6a6b8ce09f48295d8ba4635480c98ef85284 (patch)
tree3bc2dcbe492d1e799076cd7060bc886b7a376219
parent33cb64c787070d6b60a02ea40064d717d3b9dc07 (diff)
improve ikglp heuristics
-rw-r--r--include/litmus/ikglp_lock.h4
-rw-r--r--include/litmus/rt_param.h5
-rw-r--r--litmus/ikglp_lock.c117
-rw-r--r--litmus/litmus.c5
4 files changed, 123 insertions, 8 deletions
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
index 0b89c8135360..9d0cd3d1904e 100644
--- a/include/litmus/ikglp_lock.h
+++ b/include/litmus/ikglp_lock.h
@@ -114,6 +114,7 @@ struct ikglp_queue_info
114 struct fifo_queue* q; 114 struct fifo_queue* q;
115 lt_t estimated_len; 115 lt_t estimated_len;
116 int *nr_cur_users; 116 int *nr_cur_users;
117 int *nr_aff_users;
117}; 118};
118 119
119struct ikglp_affinity_ops 120struct ikglp_affinity_ops
@@ -128,6 +129,8 @@ struct ikglp_affinity_ops
128 void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired 129 void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired
129 void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed 130 void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed
130 int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding) 131 int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding)
132
133 int (*notify_exit)(struct ikglp_affinity* aff, struct task_struct* t);
131}; 134};
132 135
133struct ikglp_affinity 136struct ikglp_affinity
@@ -136,6 +139,7 @@ struct ikglp_affinity
136 struct ikglp_affinity_ops *ops; 139 struct ikglp_affinity_ops *ops;
137 struct ikglp_queue_info *q_info; 140 struct ikglp_queue_info *q_info;
138 int *nr_cur_users_on_rsrc; 141 int *nr_cur_users_on_rsrc;
142 int *nr_aff_on_rsrc;
139 int offset; 143 int offset;
140 int nr_simult; 144 int nr_simult;
141 int nr_rsrc; 145 int nr_rsrc;
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 44f85a366574..cb7c3ac64339 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -188,6 +188,8 @@ typedef struct feedback_est{
188 188
189#define AVG_EST_WINDOW_SIZE 20 189#define AVG_EST_WINDOW_SIZE 20
190 190
191typedef int (*notify_rsrc_exit_t)(struct task_struct* tsk);
192
191typedef struct avg_est{ 193typedef struct avg_est{
192 lt_t history[AVG_EST_WINDOW_SIZE]; 194 lt_t history[AVG_EST_WINDOW_SIZE];
193 uint16_t count; 195 uint16_t count;
@@ -248,6 +250,9 @@ struct rt_param {
248 gpu_migration_dist_t gpu_migration; 250 gpu_migration_dist_t gpu_migration;
249 int last_gpu; 251 int last_gpu;
250 252
253 notify_rsrc_exit_t rsrc_exit_cb;
254 void* rsrc_exit_cb_args;
255
251 lt_t accum_gpu_time; 256 lt_t accum_gpu_time;
252 lt_t gpu_time_stamp; 257 lt_t gpu_time_stamp;
253 258
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index bd7bfc0f48ac..9c57bc24e8bd 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1896,7 +1896,18 @@ int ikglp_aff_obs_close(struct affinity_observer* obs)
1896void ikglp_aff_obs_free(struct affinity_observer* obs) 1896void ikglp_aff_obs_free(struct affinity_observer* obs)
1897{ 1897{
1898 struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs); 1898 struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
1899
1900 // make sure the thread destroying this semaphore will not
1901 // call the exit callback on a destroyed lock.
1902 struct task_struct *t = current;
1903 if (is_realtime(t) && tsk_rt(t)->rsrc_exit_cb_args == ikglp_aff)
1904 {
1905 tsk_rt(t)->rsrc_exit_cb = NULL;
1906 tsk_rt(t)->rsrc_exit_cb_args = NULL;
1907 }
1908
1899 kfree(ikglp_aff->nr_cur_users_on_rsrc); 1909 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1910 kfree(ikglp_aff->nr_aff_on_rsrc);
1900 kfree(ikglp_aff->q_info); 1911 kfree(ikglp_aff->q_info);
1901 kfree(ikglp_aff); 1912 kfree(ikglp_aff);
1902} 1913}
@@ -1960,6 +1971,14 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1960 return(NULL); 1971 return(NULL);
1961 } 1972 }
1962 1973
1974 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1975 if(!ikglp_aff->nr_aff_on_rsrc) {
1976 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1977 kfree(ikglp_aff->q_info);
1978 kfree(ikglp_aff);
1979 return(NULL);
1980 }
1981
1963 affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs); 1982 affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
1964 1983
1965 ikglp_aff->ops = ikglp_ops; 1984 ikglp_aff->ops = ikglp_ops;
@@ -1974,6 +1993,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1974 ikglp_aff->relax_max_fifo_len); 1993 ikglp_aff->relax_max_fifo_len);
1975 1994
1976 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); 1995 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
1996 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
1977 1997
1978 for(i = 0; i < sem->nr_replicas; ++i) { 1998 for(i = 0; i < sem->nr_replicas; ++i) {
1979 ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; 1999 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
@@ -1982,6 +2002,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1982 // multiple q_info's will point to the same resource (aka GPU) if 2002 // multiple q_info's will point to the same resource (aka GPU) if
1983 // aff_args.nr_simult_users > 1 2003 // aff_args.nr_simult_users > 1
1984 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; 2004 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
2005 ikglp_aff->q_info[i].nr_aff_users = &ikglp_aff->nr_aff_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
1985 } 2006 }
1986 2007
1987 // attach observer to the lock 2008 // attach observer to the lock
@@ -2035,7 +2056,7 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2035 2056
2036 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 2057 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2037 lt_t min_len; 2058 lt_t min_len;
2038 int min_nr_users; 2059 int min_nr_users, min_nr_aff_users;
2039 struct ikglp_queue_info *shortest; 2060 struct ikglp_queue_info *shortest;
2040 struct fifo_queue *to_enqueue; 2061 struct fifo_queue *to_enqueue;
2041 int i; 2062 int i;
@@ -2044,11 +2065,20 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2044 int max_fifo_len = (aff->relax_max_fifo_len) ? 2065 int max_fifo_len = (aff->relax_max_fifo_len) ?
2045 sem->m : sem->max_fifo_len; 2066 sem->m : sem->max_fifo_len;
2046 2067
2047 // simply pick the shortest queue if, we have no affinity, or we have 2068 // if we have no affinity, find the GPU with the least number of users
2048 // affinity with the shortest 2069 // with active affinity
2049 if(unlikely(tsk_rt(t)->last_gpu < 0)) { 2070 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
2050 affinity_gpu = aff->offset; // first gpu 2071 int temp_min = aff->nr_aff_on_rsrc[0];
2051 TRACE_CUR("no affinity\n"); 2072 affinity_gpu = aff->offset;
2073
2074 for(i = 1; i < aff->nr_rsrc; ++i) {
2075 if(aff->nr_aff_on_rsrc[i] < temp_min) {
2076 affinity_gpu = aff->offset + i;
2077 }
2078 }
2079
2080 TRACE_CUR("no affinity. defaulting to %d with %d aff users.\n",
2081 affinity_gpu, temp_min);
2052 } 2082 }
2053 else { 2083 else {
2054 affinity_gpu = tsk_rt(t)->last_gpu; 2084 affinity_gpu = tsk_rt(t)->last_gpu;
@@ -2066,6 +2096,8 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2066 2096
2067 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); 2097 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
2068 min_nr_users = *(shortest->nr_cur_users); 2098 min_nr_users = *(shortest->nr_cur_users);
2099 min_nr_aff_users = *(shortest->nr_aff_users);
2100
2069 2101
2070 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", 2102 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2071 get_gpu_estimate(t, MIG_LOCAL), 2103 get_gpu_estimate(t, MIG_LOCAL),
@@ -2088,14 +2120,21 @@ struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct t
2088 // 2120 //
2089 // tie-break on the shortest number of simult users. this only kicks in 2121 // tie-break on the shortest number of simult users. this only kicks in
2090 // when there are more than 1 empty queues. 2122 // when there are more than 1 empty queues.
2123
2124 // TODO: Make "est_len < min_len" a fuzzy function that allows
2125 // queues "close enough" in length to be considered equal.
2126
2091 if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */ 2127 if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */
2092 (est_len < min_len) || /* i-th queue has shortest length */ 2128 (est_len < min_len) || /* i-th queue has shortest length */
2093 ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */ 2129 ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */
2094 (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { 2130 ((*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) ||
2131 ((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) &&
2132 (*(aff->q_info[i].nr_cur_users) < min_nr_users))))) {
2095 2133
2096 shortest = &aff->q_info[i]; 2134 shortest = &aff->q_info[i];
2097 min_len = est_len; 2135 min_len = est_len;
2098 min_nr_users = *(aff->q_info[i].nr_cur_users); 2136 min_nr_users = *(aff->q_info[i].nr_cur_users);
2137 min_nr_aff_users = *(aff->q_info[i].nr_aff_users);
2099 } 2138 }
2100 2139
2101 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", 2140 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
@@ -2612,6 +2651,51 @@ void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq,
2612 // } 2651 // }
2613} 2652}
2614 2653
2654int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
2655{
2656 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2657 unsigned long flags = 0, real_flags;
2658 int aff_rsrc;
2659#ifdef CONFIG_LITMUS_DGL_SUPPORT
2660 raw_spinlock_t *dgl_lock;
2661
2662 dgl_lock = litmus->get_dgl_spinlock(t);
2663#endif
2664
2665 if (tsk_rt(t)->last_gpu < 0)
2666 return 0;
2667
2668 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
2669 lock_global_irqsave(dgl_lock, flags);
2670 lock_fine_irqsave(&sem->lock, flags);
2671
2672 // decrement affinity count on old GPU
2673 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
2674 --(aff->nr_aff_on_rsrc[aff_rsrc]);
2675
2676 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
2677 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
2678 aff->nr_aff_on_rsrc[aff_rsrc] = 0;
2679 }
2680
2681 unlock_fine_irqrestore(&sem->lock, flags);
2682 unlock_global_irqrestore(dgl_lock, flags);
2683 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
2684
2685 return 0;
2686}
2687
2688int gpu_ikglp_notify_exit_trampoline(struct task_struct* t)
2689{
2690 struct ikglp_affinity* aff = (struct ikglp_affinity*)tsk_rt(t)->rsrc_exit_cb_args;
2691 if(likely(aff)) {
2692 return gpu_ikglp_notify_exit(aff, t);
2693 }
2694 else {
2695 return -1;
2696 }
2697}
2698
2615void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, 2699void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2616 struct fifo_queue* fq, 2700 struct fifo_queue* fq,
2617 struct task_struct* t) 2701 struct task_struct* t)
@@ -2619,15 +2703,28 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2619 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); 2703 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2620 int replica = ikglp_get_idx(sem, fq); 2704 int replica = ikglp_get_idx(sem, fq);
2621 int gpu = replica_to_gpu(aff, replica); 2705 int gpu = replica_to_gpu(aff, replica);
2706 int last_gpu = tsk_rt(t)->last_gpu;
2622 2707
2623 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration 2708 tsk_rt(t)->gpu_migration = gpu_migration_distance(last_gpu, gpu); // record the type of migration
2624 2709
2625 TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", 2710 TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n",
2626 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration); 2711 t->comm, t->pid, gpu, last_gpu, tsk_rt(t)->gpu_migration);
2627 2712
2628 // count the number or resource holders 2713 // count the number or resource holders
2629 ++(*(aff->q_info[replica].nr_cur_users)); 2714 ++(*(aff->q_info[replica].nr_cur_users));
2630 2715
2716 if(gpu != last_gpu) {
2717 if(last_gpu >= 0) {
2718 int old_rsrc = last_gpu - aff->offset;
2719 --(aff->nr_aff_on_rsrc[old_rsrc]);
2720 }
2721
2722 // increment affinity count on new GPU
2723 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
2724 tsk_rt(t)->rsrc_exit_cb_args = aff;
2725 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
2726 }
2727
2631 reg_nv_device(gpu, 1, t); // register 2728 reg_nv_device(gpu, 1, t); // register
2632 2729
2633 tsk_rt(t)->suspend_gpu_tracker_on_block = 0; 2730 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
@@ -2679,6 +2776,8 @@ struct ikglp_affinity_ops gpu_ikglp_affinity =
2679 .notify_acquired = gpu_ikglp_notify_acquired, 2776 .notify_acquired = gpu_ikglp_notify_acquired,
2680 .notify_freed = gpu_ikglp_notify_freed, 2777 .notify_freed = gpu_ikglp_notify_freed,
2681 2778
2779 .notify_exit = gpu_ikglp_notify_exit,
2780
2682 .replica_to_resource = gpu_replica_to_resource, 2781 .replica_to_resource = gpu_replica_to_resource,
2683}; 2782};
2684 2783
@@ -2817,6 +2916,8 @@ struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
2817 .notify_acquired = simple_gpu_ikglp_notify_acquired, 2916 .notify_acquired = simple_gpu_ikglp_notify_acquired,
2818 .notify_freed = simple_gpu_ikglp_notify_freed, 2917 .notify_freed = simple_gpu_ikglp_notify_freed,
2819 2918
2919 .notify_exit = NULL,
2920
2820 .replica_to_resource = gpu_replica_to_resource, 2921 .replica_to_resource = gpu_replica_to_resource,
2821}; 2922};
2822 2923
diff --git a/litmus/litmus.c b/litmus/litmus.c
index d368202ab8c3..143c746c344e 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -533,6 +533,11 @@ void litmus_exit_task(struct task_struct* tsk)
533 if (is_realtime(tsk)) { 533 if (is_realtime(tsk)) {
534 sched_trace_task_completion(tsk, 1); 534 sched_trace_task_completion(tsk, 1);
535 535
536 if (tsk_rt(tsk)->rsrc_exit_cb) {
537 int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk);
538 WARN_ON(ret != 0);
539 }
540
536 litmus->task_exit(tsk); 541 litmus->task_exit(tsk);
537 542
538 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); 543 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));