aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-05-14 16:51:05 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2012-05-14 16:51:05 -0400
commit6827bb817faecede51838e2fcc8b6283e54fe872 (patch)
tree00ff42c305926c800e18b13df8440a4de1a1a041
parentaf6eeb156c7da47ff5df03a3da04432c8ac4460c (diff)
Final GPUSync implementation.wip-gpu-rtss12
-rw-r--r--include/litmus/gpu_affinity.h7
-rw-r--r--include/litmus/nvidia_info.h1
-rw-r--r--include/litmus/rt_param.h5
-rw-r--r--include/litmus/sched_trace.h44
-rw-r--r--kernel/softirq.c34
-rw-r--r--litmus/Kconfig2
-rw-r--r--litmus/gpu_affinity.c38
-rw-r--r--litmus/ikglp_lock.c39
-rw-r--r--litmus/jobs.c17
-rw-r--r--litmus/kfmlp_lock.c2
-rw-r--r--litmus/litmus.c20
-rw-r--r--litmus/locking.c8
-rw-r--r--litmus/nvidia_info.c13
-rw-r--r--litmus/rsm_lock.c7
-rw-r--r--litmus/sched_cedf.c13
-rw-r--r--litmus/sched_plugin.c7
-rw-r--r--litmus/sched_task_trace.c50
17 files changed, 261 insertions, 46 deletions
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
index d4db2003ad86..6b3fb8b28745 100644
--- a/include/litmus/gpu_affinity.h
+++ b/include/litmus/gpu_affinity.h
@@ -43,10 +43,7 @@ static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t
43 val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); 43 val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
44 } 44 }
45 45
46 // minimum value is 1 (val is 0 if we haven't run with local affinity yet) 46 return ((val > 0) ? val : dist+1);
47 // TODO: pick a better default min-value. 1 is too small. perhaps
48 // task execution time?
49 return ((val > 0) ? val : 1);
50} 47}
51 48
52#endif \ No newline at end of file 49#endif
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
index 580728051d4e..97c9577141db 100644
--- a/include/litmus/nvidia_info.h
+++ b/include/litmus/nvidia_info.h
@@ -12,6 +12,7 @@
12#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS 12#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
13 13
14int init_nvidia_info(void); 14int init_nvidia_info(void);
15void shutdown_nvidia_info(void);
15 16
16int is_nvidia_func(void* func_addr); 17int is_nvidia_func(void* func_addr);
17 18
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 4553521146cc..0198884eab86 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -26,6 +26,7 @@ static inline int lt_after_eq(lt_t a, lt_t b)
26typedef enum { 26typedef enum {
27 RT_CLASS_HARD, 27 RT_CLASS_HARD,
28 RT_CLASS_SOFT, 28 RT_CLASS_SOFT,
29 RT_CLASS_SOFT_W_SLIP,
29 RT_CLASS_BEST_EFFORT 30 RT_CLASS_BEST_EFFORT
30} task_class_t; 31} task_class_t;
31 32
@@ -189,8 +190,8 @@ struct rt_param {
189 long unsigned int held_gpus; // bitmap of held GPUs. 190 long unsigned int held_gpus; // bitmap of held GPUs.
190 191
191#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 192#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
192 fp_t gpu_fb_param_a; 193 fp_t gpu_fb_param_a[MIG_LAST+1];
193 fp_t gpu_fb_param_b; 194 fp_t gpu_fb_param_b[MIG_LAST+1];
194 195
195 gpu_migration_dist_t gpu_migration; 196 gpu_migration_dist_t gpu_migration;
196 int last_gpu; 197 int last_gpu;
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 232c7588d103..b1b71f6c5f0c 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -10,7 +10,8 @@ struct st_trace_header {
10 u8 type; /* Of what type is this record? */ 10 u8 type; /* Of what type is this record? */
11 u8 cpu; /* On which CPU was it recorded? */ 11 u8 cpu; /* On which CPU was it recorded? */
12 u16 pid; /* PID of the task. */ 12 u16 pid; /* PID of the task. */
13 u32 job; /* The job sequence number. */ 13 u32 job:24; /* The job sequence number. */
14 u8 extra;
14} __attribute__((packed)); 15} __attribute__((packed));
15 16
16#define ST_NAME_LEN 16 17#define ST_NAME_LEN 16
@@ -136,6 +137,22 @@ struct st_nv_interrupt_end_data {
136 u32 serialNumber; 137 u32 serialNumber;
137} __attribute__((packed)); 138} __attribute__((packed));
138 139
140struct st_prediction_err_data {
141 u64 distance;
142 u64 rel_err;
143} __attribute__((packed));
144
145struct st_migration_data {
146 u64 observed;
147 u64 estimated;
148} __attribute__((packed));
149
150struct migration_info {
151 u64 observed;
152 u64 estimated;
153 u8 distance;
154} __attribute__((packed));
155
139#define DATA(x) struct st_ ## x ## _data x; 156#define DATA(x) struct st_ ## x ## _data x;
140 157
141typedef enum { 158typedef enum {
@@ -160,6 +177,9 @@ typedef enum {
160 ST_EFF_PRIO_CHANGE, 177 ST_EFF_PRIO_CHANGE,
161 ST_NV_INTERRUPT_BEGIN, 178 ST_NV_INTERRUPT_BEGIN,
162 ST_NV_INTERRUPT_END, 179 ST_NV_INTERRUPT_END,
180
181 ST_PREDICTION_ERR,
182 ST_MIGRATION,
163} st_event_record_type_t; 183} st_event_record_type_t;
164 184
165struct st_event_record { 185struct st_event_record {
@@ -187,6 +207,9 @@ struct st_event_record {
187 DATA(effective_priority_change); 207 DATA(effective_priority_change);
188 DATA(nv_interrupt_begin); 208 DATA(nv_interrupt_begin);
189 DATA(nv_interrupt_end); 209 DATA(nv_interrupt_end);
210
211 DATA(prediction_err);
212 DATA(migration);
190 } data; 213 } data;
191} __attribute__((packed)); 214} __attribute__((packed));
192 215
@@ -259,6 +282,19 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
259feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, 282feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
260 unsigned long unused); 283 unsigned long unused);
261 284
285feather_callback void do_sched_trace_prediction_err(unsigned long id,
286 struct task_struct* task,
287 gpu_migration_dist_t* distance,
288 fp_t* rel_err);
289
290
291
292
293
294feather_callback void do_sched_trace_migration(unsigned long id,
295 struct task_struct* task,
296 struct migration_info* mig_info);
297
262 298
263/* returns true if we're tracing an interrupt on current CPU */ 299/* returns true if we're tracing an interrupt on current CPU */
264/* int is_interrupt_tracing_active(void); */ 300/* int is_interrupt_tracing_active(void); */
@@ -331,6 +367,12 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
331#define sched_trace_nv_interrupt_end(d) \ 367#define sched_trace_nv_interrupt_end(d) \
332 SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d) 368 SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
333 369
370#define sched_trace_prediction_err(t, dist, rel_err) \
371 SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err)
372
373#define sched_trace_migration(t, mig_info) \
374 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info)
375
334#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ 376#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
335 377
336#endif /* __KERNEL__ */ 378#endif /* __KERNEL__ */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 1c42e08fdfaa..5ce271675662 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -412,12 +412,44 @@ struct tasklet_head
412static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); 412static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
413static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); 413static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
414 414
415#ifdef CONFIG_LITMUS_NVIDIA
416static int __do_nv_now(struct tasklet_struct* tasklet)
417{
418 int success = 1;
419
420 if(tasklet_trylock(tasklet)) {
421 if (!atomic_read(&tasklet->count)) {
422 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) {
423 BUG();
424 }
425 tasklet->func(tasklet->data);
426 tasklet_unlock(tasklet);
427 }
428 else {
429 success = 0;
430 }
431
432 tasklet_unlock(tasklet);
433 }
434 else {
435 success = 0;
436 }
437
438 return success;
439}
440#endif
441
415 442
416void __tasklet_schedule(struct tasklet_struct *t) 443void __tasklet_schedule(struct tasklet_struct *t)
417{ 444{
418#ifdef CONFIG_LITMUS_NVIDIA 445#ifdef CONFIG_LITMUS_NVIDIA
419 if(is_nvidia_func(t->func)) 446 if(is_nvidia_func(t->func))
420 { 447 {
448#if 0
449 // do nvidia tasklets right away and return
450 if(__do_nv_now(t))
451 return;
452#else
421 u32 nvidia_device = get_tasklet_nv_device_num(t); 453 u32 nvidia_device = get_tasklet_nv_device_num(t);
422 // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", 454 // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
423 // __FUNCTION__, nvidia_device,litmus_clock()); 455 // __FUNCTION__, nvidia_device,litmus_clock());
@@ -461,7 +493,9 @@ void __tasklet_schedule(struct tasklet_struct *t)
461 } 493 }
462 } 494 }
463 unlock_nv_registry(nvidia_device, &flags); 495 unlock_nv_registry(nvidia_device, &flags);
496#endif
464 } 497 }
498
465#endif 499#endif
466 500
467 ___tasklet_schedule(t); 501 ___tasklet_schedule(t);
diff --git a/litmus/Kconfig b/litmus/Kconfig
index a34440f3d8bc..8c156e4da528 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -157,7 +157,7 @@ config SCHED_TASK_TRACE
157config SCHED_TASK_TRACE_SHIFT 157config SCHED_TASK_TRACE_SHIFT
158 int "Buffer size for sched_trace_xxx() events" 158 int "Buffer size for sched_trace_xxx() events"
159 depends on SCHED_TASK_TRACE 159 depends on SCHED_TASK_TRACE
160 range 8 13 160 range 8 15
161 default 9 161 default 9
162 help 162 help
163 163
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
index 70a86bdd9aec..9762be1a085e 100644
--- a/litmus/gpu_affinity.c
+++ b/litmus/gpu_affinity.c
@@ -5,25 +5,32 @@
5#include <litmus/litmus.h> 5#include <litmus/litmus.h>
6#include <litmus/gpu_affinity.h> 6#include <litmus/gpu_affinity.h>
7 7
8#include <litmus/sched_trace.h>
9
8#define OBSERVATION_CAP 2*1e9 10#define OBSERVATION_CAP 2*1e9
9 11
10static void update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) 12static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
11{ 13{
14 fp_t relative_err;
12 fp_t err, new; 15 fp_t err, new;
13 fp_t actual = _integer_to_fp(observed); 16 fp_t actual = _integer_to_fp(observed);
14 17
15 err = _sub(actual, fb->est); 18 err = _sub(actual, fb->est);
16 new = _add(_mul(a, err), _mul(b, fb->accum_err)); 19 new = _add(_mul(a, err), _mul(b, fb->accum_err));
17 20
21 relative_err = _div(err, actual);
22
18 fb->est = new; 23 fb->est = new;
19 fb->accum_err = _add(fb->accum_err, err); 24 fb->accum_err = _add(fb->accum_err, err);
25
26 return relative_err;
20} 27}
21 28
22void update_gpu_estimate(struct task_struct *t, lt_t observed) 29void update_gpu_estimate(struct task_struct *t, lt_t observed)
23{ 30{
24 feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); 31 feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
25 32
26 WARN_ON(tsk_rt(t)->gpu_migration > MIG_LAST); 33 BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
27 34
28 if(unlikely(fb->est.val == 0)) { 35 if(unlikely(fb->est.val == 0)) {
29 // kludge-- cap observed values to prevent whacky estimations. 36 // kludge-- cap observed values to prevent whacky estimations.
@@ -40,18 +47,29 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed)
40 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. 47 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
41 } 48 }
42 else { 49 else {
43 update_estimate(fb, 50 fp_t rel_err = update_estimate(fb,
44 tsk_rt(t)->gpu_fb_param_a, 51 tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
45 tsk_rt(t)->gpu_fb_param_b, 52 tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
46 observed); 53 observed);
47 54
48 if(_fp_to_integer(fb->est) <= 0) { 55 if(unlikely(_fp_to_integer(fb->est) <= 0)) {
49 // TODO: talk to Jonathan about how well this works.
50 // Maybe we should average the observed and est instead?
51 TRACE_TASK(t, "Invalid estimate. Patching.\n"); 56 TRACE_TASK(t, "Invalid estimate. Patching.\n");
52 fb->est = _integer_to_fp(observed); 57 fb->est = _integer_to_fp(observed);
53 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. 58 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
54 } 59 }
60 else {
61// struct migration_info mig_info;
62
63 sched_trace_prediction_err(t,
64 &(tsk_rt(t)->gpu_migration),
65 &rel_err);
66
67// mig_info.observed = observed;
68// mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
69// mig_info.distance = tsk_rt(t)->gpu_migration;
70//
71// sched_trace_migration(t, &mig_info);
72 }
55 } 73 }
56 74
57 TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n", 75 TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
index 023443014d4b..83b708ab85cb 100644
--- a/litmus/ikglp_lock.c
+++ b/litmus/ikglp_lock.c
@@ -1346,6 +1346,10 @@ int ikglp_unlock(struct litmus_lock* l)
1346#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 1346#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1347 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { 1347 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1348 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); 1348 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1349 if(fq_of_new_on_fq->count == 0) {
1350 // ignore it?
1351// fq_of_new_on_fq = fq;
1352 }
1349 } 1353 }
1350 else { 1354 else {
1351 fq_of_new_on_fq = fq; 1355 fq_of_new_on_fq = fq;
@@ -1383,6 +1387,10 @@ int ikglp_unlock(struct litmus_lock* l)
1383#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 1387#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1384 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { 1388 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1385 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); 1389 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1390 if(fq_of_new_on_fq->count == 0) {
1391 // ignore it?
1392// fq_of_new_on_fq = fq;
1393 }
1386 } 1394 }
1387 else { 1395 else {
1388 fq_of_new_on_fq = fq; 1396 fq_of_new_on_fq = fq;
@@ -1409,6 +1417,10 @@ int ikglp_unlock(struct litmus_lock* l)
1409#ifdef CONFIG_LITMUS_AFFINITY_LOCKING 1417#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1410 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { 1418 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1411 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); 1419 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1420 if(fq_of_new_on_fq->count == 0) {
1421 // ignore it?
1422// fq_of_new_on_fq = fq;
1423 }
1412 } 1424 }
1413 else { 1425 else {
1414 fq_of_new_on_fq = fq; 1426 fq_of_new_on_fq = fq;
@@ -1569,7 +1581,7 @@ int ikglp_unlock(struct litmus_lock* l)
1569 } 1581 }
1570 } 1582 }
1571 1583
1572 1584wake_kludge:
1573 if(waitqueue_active(&fq->wait)) 1585 if(waitqueue_active(&fq->wait))
1574 { 1586 {
1575 wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list); 1587 wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
@@ -1672,6 +1684,16 @@ int ikglp_unlock(struct litmus_lock* l)
1672 // wake up the new resource holder! 1684 // wake up the new resource holder!
1673 wake_up_process(next); 1685 wake_up_process(next);
1674 } 1686 }
1687 if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
1688 // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
1689 // Wake up the new guy too.
1690
1691 BUG_ON(fq_of_new_on_fq->owner != NULL);
1692
1693 fq = fq_of_new_on_fq;
1694 fq_of_new_on_fq = NULL;
1695 goto wake_kludge;
1696 }
1675 1697
1676 unlock_fine_irqrestore(&sem->lock, flags); 1698 unlock_fine_irqrestore(&sem->lock, flags);
1677 unlock_global_irqrestore(dgl_lock, flags); 1699 unlock_global_irqrestore(dgl_lock, flags);
@@ -1917,7 +1939,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops*
1917 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { 1939 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
1918 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", 1940 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
1919 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); 1941 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
1920 return(NULL); 1942// return(NULL);
1921 } 1943 }
1922 1944
1923 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); 1945 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
@@ -2600,8 +2622,8 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2600 2622
2601 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration 2623 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
2602 2624
2603 TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", 2625 TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n",
2604 t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); 2626 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
2605 2627
2606 // count the number or resource holders 2628 // count the number or resource holders
2607 ++(*(aff->q_info[replica].nr_cur_users)); 2629 ++(*(aff->q_info[replica].nr_cur_users));
@@ -2626,8 +2648,6 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
2626 2648
2627 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); 2649 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2628 2650
2629 tsk_rt(t)->last_gpu = gpu;
2630
2631 // count the number or resource holders 2651 // count the number or resource holders
2632 --(*(aff->q_info[replica].nr_cur_users)); 2652 --(*(aff->q_info[replica].nr_cur_users));
2633 2653
@@ -2636,12 +2656,15 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
2636 // update estimates 2656 // update estimates
2637 update_gpu_estimate(t, get_gpu_time(t)); 2657 update_gpu_estimate(t, get_gpu_time(t));
2638 2658
2639 TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. " 2659 TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. "
2640 "estimated was %llu. diff is %d\n", 2660 "estimated was %llu. diff is %d\n",
2641 t->comm, t->pid, gpu, 2661 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
2662 tsk_rt(t)->gpu_migration,
2642 get_gpu_time(t), 2663 get_gpu_time(t),
2643 est_time, 2664 est_time,
2644 (long long)get_gpu_time(t) - (long long)est_time); 2665 (long long)get_gpu_time(t) - (long long)est_time);
2666
2667 tsk_rt(t)->last_gpu = gpu;
2645} 2668}
2646 2669
2647struct ikglp_affinity_ops gpu_ikglp_affinity = 2670struct ikglp_affinity_ops gpu_ikglp_affinity =
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 36e314625d86..1d97462cc128 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t)
10{ 10{
11 BUG_ON(!t); 11 BUG_ON(!t);
12 /* prepare next release */ 12 /* prepare next release */
13 t->rt_param.job_params.release = t->rt_param.job_params.deadline; 13
14 t->rt_param.job_params.deadline += get_rt_period(t); 14 if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) {
15 /* allow the release point to slip if we've passed our deadline. */
16 lt_t now = litmus_clock();
17 t->rt_param.job_params.release =
18 (t->rt_param.job_params.deadline < now) ?
19 now : t->rt_param.job_params.deadline;
20 t->rt_param.job_params.deadline =
21 t->rt_param.job_params.release + get_rt_period(t);
22 }
23 else {
24 t->rt_param.job_params.release = t->rt_param.job_params.deadline;
25 t->rt_param.job_params.deadline += get_rt_period(t);
26 }
27
15 t->rt_param.job_params.exec_time = 0; 28 t->rt_param.job_params.exec_time = 0;
16 /* update job sequence number */ 29 /* update job sequence number */
17 t->rt_param.job_params.job_no++; 30 t->rt_param.job_params.job_no++;
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
index 0b64977789a6..bff857ed8d4e 100644
--- a/litmus/kfmlp_lock.c
+++ b/litmus/kfmlp_lock.c
@@ -590,7 +590,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops*
590 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { 590 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
591 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", 591 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
592 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); 592 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
593 return(NULL); 593// return(NULL);
594 } 594 }
595 595
596 kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); 596 kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 5b301c418b96..d1f836c8af6e 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -318,11 +318,21 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
318void init_gpu_affinity_state(struct task_struct* p) 318void init_gpu_affinity_state(struct task_struct* p)
319{ 319{
320 // under-damped 320 // under-damped
321 p->rt_param.gpu_fb_param_a = _frac(14008, 10000); 321 //p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
322 p->rt_param.gpu_fb_param_b = _frac(16024, 10000); 322 //p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
323 // critically-damped 323
324 // p->rt_param.gpu_fb_param_a = _frac(102, 1000); 324 // emperical;
325 // p->rt_param.gpu_fb_param_b = _frac(303, 1000); 325 p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
326 p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
327
328 p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
329 p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
330
331 p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
332 p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
333
334 p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
335 p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
326 336
327 p->rt_param.gpu_migration = MIG_NONE; 337 p->rt_param.gpu_migration = MIG_NONE;
328 p->rt_param.last_gpu = -1; 338 p->rt_param.last_gpu = -1;
diff --git a/litmus/locking.c b/litmus/locking.c
index cb11c04ed0d4..718a5a3281d7 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -349,10 +349,10 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
349all_acquired: 349all_acquired:
350 350
351 // FOR SANITY CHECK FOR TESTING 351 // FOR SANITY CHECK FOR TESTING
352 for(i = 0; i < dgl_wait->size; ++i) { 352// for(i = 0; i < dgl_wait->size; ++i) {
353 struct litmus_lock *l = dgl_wait->locks[i]; 353// struct litmus_lock *l = dgl_wait->locks[i];
354 BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); 354// BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
355 } 355// }
356 356
357 TRACE_CUR("Acquired entire DGL\n"); 357 TRACE_CUR("Acquired entire DGL\n");
358 358
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 3d8c50882066..4b86a50d3bd1 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -265,6 +265,11 @@ int init_nvidia_info(void)
265 } 265 }
266} 266}
267 267
268void shutdown_nvidia_info(void)
269{
270 nvidia_mod = NULL;
271 mb();
272}
268 273
269/* works with pointers to static data inside the module too. */ 274/* works with pointers to static data inside the module too. */
270int is_nvidia_func(void* func_addr) 275int is_nvidia_func(void* func_addr)
@@ -319,14 +324,11 @@ u32 get_work_nv_device_num(const struct work_struct *t)
319} 324}
320 325
321 326
322
323#define MAX_NR_OWNERS 3
324
325typedef struct { 327typedef struct {
326 raw_spinlock_t lock; 328 raw_spinlock_t lock;
327 int nr_owners; 329 int nr_owners;
328 struct task_struct* max_prio_owner; 330 struct task_struct* max_prio_owner;
329 struct task_struct* owners[MAX_NR_OWNERS]; 331 struct task_struct* owners[NV_MAX_SIMULT_USERS];
330}nv_device_registry_t; 332}nv_device_registry_t;
331 333
332static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; 334static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
@@ -431,6 +433,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
431 return ret; // assume already registered. 433 return ret; // assume already registered.
432 } 434 }
433 435
436
434 raw_spin_lock_irqsave(&reg->lock, flags); 437 raw_spin_lock_irqsave(&reg->lock, flags);
435 438
436 if(reg->nr_owners < NV_MAX_SIMULT_USERS) { 439 if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
@@ -461,7 +464,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t)
461 else 464 else
462 { 465 {
463 TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); 466 TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
464 ret = -EBUSY; 467 //ret = -EBUSY;
465 } 468 }
466 469
467 raw_spin_unlock_irqrestore(&reg->lock, flags); 470 raw_spin_unlock_irqrestore(&reg->lock, flags);
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
index 965164c43537..75ed87c5ed48 100644
--- a/litmus/rsm_lock.c
+++ b/litmus/rsm_lock.c
@@ -502,6 +502,13 @@ int rsm_mutex_unlock(struct litmus_lock* l)
502 tsk_rt(next)->blocked_lock = NULL; 502 tsk_rt(next)->blocked_lock = NULL;
503 mb(); 503 mb();
504 504
505#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
506 // re-enable tracking
507 if(tsk_rt(next)->held_gpus) {
508 tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
509 }
510#endif
511
505 wake_up_process(next); 512 wake_up_process(next);
506 } 513 }
507 else { 514 else {
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 99f7620925ba..be14dbec6ed2 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -733,11 +733,11 @@ static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
733 unsigned long flags; 733 unsigned long flags;
734 cedf_domain_t *cluster; 734 cedf_domain_t *cluster;
735 struct task_struct *probe; 735 struct task_struct *probe;
736 736
737 // identify the cluster by the assignment of these tasks. one should 737 // identify the cluster by the assignment of these tasks. one should
738 // be non-NULL. 738 // be non-NULL.
739 probe = (old_prio) ? old_prio : new_prio; 739 probe = (old_prio) ? old_prio : new_prio;
740 740
741 if(probe) { 741 if(probe) {
742 cluster = task_cpu_cluster(probe); 742 cluster = task_cpu_cluster(probe);
743 743
@@ -838,8 +838,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
838#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) 838#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
839 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { 839 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
840 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { 840 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
841 // don't track preemptions or locking protocol suspensions.
842 TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
841 stop_gpu_tracker(entry->scheduled); 843 stop_gpu_tracker(entry->scheduled);
842 } 844 }
845 else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
846 TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
847 }
843 } 848 }
844#endif 849#endif
845 850
@@ -1596,6 +1601,10 @@ static void cleanup_cedf(void)
1596{ 1601{
1597 int i; 1602 int i;
1598 1603
1604#ifdef CONFIG_LITMUS_NVIDIA
1605 shutdown_nvidia_info();
1606#endif
1607
1599 if (clusters_allocated) { 1608 if (clusters_allocated) {
1600 for (i = 0; i < num_clusters; i++) { 1609 for (i = 0; i < num_clusters; i++) {
1601 kfree(cedf[i].cpus); 1610 kfree(cedf[i].cpus);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 2433297b7482..245e41c25a5d 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -13,6 +13,10 @@
13#include <litmus/preempt.h> 13#include <litmus/preempt.h>
14#include <litmus/jobs.h> 14#include <litmus/jobs.h>
15 15
16#ifdef CONFIG_LITMUS_NVIDIA
17#include <litmus/nvidia_info.h>
18#endif
19
16/* 20/*
17 * Generic function to trigger preemption on either local or remote cpu 21 * Generic function to trigger preemption on either local or remote cpu
18 * from scheduler plugins. The key feature is that this function is 22 * from scheduler plugins. The key feature is that this function is
@@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
102 106
103static long litmus_dummy_activate_plugin(void) 107static long litmus_dummy_activate_plugin(void)
104{ 108{
109#ifdef CONFIG_LITMUS_NVIDIA
110 shutdown_nvidia_info();
111#endif
105 return 0; 112 return 0;
106} 113}
107 114
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 2bd3a787611b..f7f575346b54 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -247,6 +247,53 @@ feather_callback void do_sched_trace_action(unsigned long id,
247} 247}
248 248
249 249
250
251
252feather_callback void do_sched_trace_prediction_err(unsigned long id,
253 unsigned long _task,
254 unsigned long _distance,
255 unsigned long _rel_err)
256{
257 struct task_struct *t = (struct task_struct*) _task;
258 struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
259
260 if (rec) {
261 gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
262 fp_t* rel_err = (fp_t*) _rel_err;
263
264 rec->data.prediction_err.distance = *distance;
265 rec->data.prediction_err.rel_err = rel_err->val;
266 put_record(rec);
267 }
268}
269
270
271feather_callback void do_sched_trace_migration(unsigned long id,
272 unsigned long _task,
273 unsigned long _mig_info)
274{
275 struct task_struct *t = (struct task_struct*) _task;
276 struct st_event_record *rec = get_record(ST_MIGRATION, t);
277
278 if (rec) {
279 struct migration_info* mig_info = (struct migration_info*) _mig_info;
280
281 rec->hdr.extra = mig_info->distance;
282 rec->data.migration.observed = mig_info->observed;
283 rec->data.migration.estimated = mig_info->estimated;
284
285 put_record(rec);
286 }
287}
288
289
290
291
292
293
294
295
296
250feather_callback void do_sched_trace_tasklet_release(unsigned long id, 297feather_callback void do_sched_trace_tasklet_release(unsigned long id,
251 unsigned long _owner) 298 unsigned long _owner)
252{ 299{
@@ -457,3 +504,6 @@ EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
457 504
458 505
459 506
507
508
509