aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-09-11 22:42:51 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2012-09-11 22:42:51 -0400
commitc1d1979c99ca397241da4e3d7e0cb77f7ec28240 (patch)
tree2a988aae1ae7c08891543e844171cbcb4281a5bb
parentfd3aa01f176cf12b1625f4f46ba01f3340bb57ed (diff)
parent55e04c94b925b0790c2ae0a79f16e939e9bb2846 (diff)
Merge branch 'wip-gpu-rtas12' into wip-slave-threads
Conflicts: include/litmus/unistd_32.h include/litmus/unistd_64.h litmus/litmus.c
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/linux/interrupt.h10
-rw-r--r--include/linux/mutex.h10
-rw-r--r--include/linux/semaphore.h9
-rw-r--r--include/linux/workqueue.h18
-rw-r--r--include/litmus/budget.h20
-rw-r--r--include/litmus/edf_common.h12
-rw-r--r--include/litmus/fdso.h16
-rw-r--r--include/litmus/fpmath.h145
-rw-r--r--include/litmus/gpu_affinity.h50
-rw-r--r--include/litmus/ikglp_lock.h160
-rw-r--r--include/litmus/kexclu_affinity.h35
-rw-r--r--include/litmus/kfmlp_lock.h97
-rw-r--r--include/litmus/litmus.h9
-rw-r--r--include/litmus/litmus_softirq.h199
-rw-r--r--include/litmus/locking.h142
-rw-r--r--include/litmus/nvidia_info.h46
-rw-r--r--include/litmus/preempt.h2
-rw-r--r--include/litmus/rsm_lock.h54
-rw-r--r--include/litmus/rt_param.h132
-rw-r--r--include/litmus/sched_plugin.h76
-rw-r--r--include/litmus/sched_trace.h218
-rw-r--r--include/litmus/sched_trace_external.h78
-rw-r--r--include/litmus/signal.h47
-rw-r--r--include/litmus/trace.h34
-rw-r--r--include/litmus/unistd_32.h7
-rw-r--r--include/litmus/unistd_64.h10
-rw-r--r--kernel/lockdep.c7
-rw-r--r--kernel/mutex.c125
-rw-r--r--kernel/sched.c27
-rw-r--r--kernel/semaphore.c13
-rw-r--r--kernel/softirq.c322
-rw-r--r--kernel/workqueue.c71
-rw-r--r--litmus/Kconfig194
-rw-r--r--litmus/Makefile8
-rw-r--r--litmus/affinity.c2
-rw-r--r--litmus/budget.c16
-rw-r--r--litmus/edf_common.c227
-rw-r--r--litmus/fdso.c15
-rw-r--r--litmus/gpu_affinity.c231
-rw-r--r--litmus/ikglp_lock.c2838
-rw-r--r--litmus/jobs.c10
-rw-r--r--litmus/kexclu_affinity.c92
-rw-r--r--litmus/kfmlp_lock.c1002
-rw-r--r--litmus/litmus.c161
-rw-r--r--litmus/litmus_pai_softirq.c64
-rw-r--r--litmus/litmus_proc.c17
-rw-r--r--litmus/litmus_softirq.c1582
-rw-r--r--litmus/locking.c394
-rw-r--r--litmus/nvidia_info.c597
-rw-r--r--litmus/preempt.c10
-rw-r--r--litmus/rsm_lock.c796
-rw-r--r--litmus/sched_cedf.c1109
-rw-r--r--litmus/sched_gsn_edf.c1085
-rw-r--r--litmus/sched_litmus.c2
-rw-r--r--litmus/sched_pfp.c40
-rw-r--r--litmus/sched_plugin.c135
-rw-r--r--litmus/sched_psn_edf.c41
-rw-r--r--litmus/sched_task_trace.c282
-rw-r--r--litmus/sched_trace_external.c64
62 files changed, 12862 insertions, 359 deletions
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 6c0802eb2f7f..680a5cb4b585 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -10,6 +10,10 @@
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <linux/delay.h> 11#include <linux/delay.h>
12 12
13#ifdef CONFIG_LITMUS_NVIDIA
14#include <litmus/sched_trace.h>
15#endif
16
13#include <asm/apic.h> 17#include <asm/apic.h>
14#include <asm/io_apic.h> 18#include <asm/io_apic.h>
15#include <asm/irq.h> 19#include <asm/irq.h>
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d0126222b394..0cb4373698e7 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -358,3 +358,4 @@ ENTRY(sys_call_table)
358 .long sys_wait_for_ts_release 358 .long sys_wait_for_ts_release
359 .long sys_release_ts /* +10 */ 359 .long sys_release_ts /* +10 */
360 .long sys_null_call 360 .long sys_null_call
361 .long sys_register_nv_device
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 9d727271c9fe..cff405c4dd3a 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x)
76 init_waitqueue_head(&x->wait); 76 init_waitqueue_head(&x->wait);
77} 77}
78 78
79extern void __wait_for_completion_locked(struct completion *);
79extern void wait_for_completion(struct completion *); 80extern void wait_for_completion(struct completion *);
80extern int wait_for_completion_interruptible(struct completion *x); 81extern int wait_for_completion_interruptible(struct completion *x);
81extern int wait_for_completion_killable(struct completion *x); 82extern int wait_for_completion_killable(struct completion *x);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f6efed0039ed..57a7bc8807be 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -445,6 +445,7 @@ static inline void __raise_softirq_irqoff(unsigned int nr)
445 445
446extern void raise_softirq_irqoff(unsigned int nr); 446extern void raise_softirq_irqoff(unsigned int nr);
447extern void raise_softirq(unsigned int nr); 447extern void raise_softirq(unsigned int nr);
448extern void wakeup_softirqd(void);
448 449
449/* This is the worklist that queues up per-cpu softirq work. 450/* This is the worklist that queues up per-cpu softirq work.
450 * 451 *
@@ -500,6 +501,10 @@ struct tasklet_struct
500 atomic_t count; 501 atomic_t count;
501 void (*func)(unsigned long); 502 void (*func)(unsigned long);
502 unsigned long data; 503 unsigned long data;
504
505#if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD)
506 struct task_struct *owner;
507#endif
503}; 508};
504 509
505#define DECLARE_TASKLET(name, func, data) \ 510#define DECLARE_TASKLET(name, func, data) \
@@ -537,6 +542,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t)
537#define tasklet_unlock(t) do { } while (0) 542#define tasklet_unlock(t) do { } while (0)
538#endif 543#endif
539 544
545extern void ___tasklet_schedule(struct tasklet_struct *t);
540extern void __tasklet_schedule(struct tasklet_struct *t); 546extern void __tasklet_schedule(struct tasklet_struct *t);
541 547
542static inline void tasklet_schedule(struct tasklet_struct *t) 548static inline void tasklet_schedule(struct tasklet_struct *t)
@@ -545,6 +551,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t)
545 __tasklet_schedule(t); 551 __tasklet_schedule(t);
546} 552}
547 553
554extern void ___tasklet_hi_schedule(struct tasklet_struct *t);
548extern void __tasklet_hi_schedule(struct tasklet_struct *t); 555extern void __tasklet_hi_schedule(struct tasklet_struct *t);
549 556
550static inline void tasklet_hi_schedule(struct tasklet_struct *t) 557static inline void tasklet_hi_schedule(struct tasklet_struct *t)
@@ -553,6 +560,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t)
553 __tasklet_hi_schedule(t); 560 __tasklet_hi_schedule(t);
554} 561}
555 562
563extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t);
556extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); 564extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
557 565
558/* 566/*
@@ -582,7 +590,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
582} 590}
583 591
584static inline void tasklet_enable(struct tasklet_struct *t) 592static inline void tasklet_enable(struct tasklet_struct *t)
585{ 593{
586 smp_mb__before_atomic_dec(); 594 smp_mb__before_atomic_dec();
587 atomic_dec(&t->count); 595 atomic_dec(&t->count);
588} 596}
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index a940fe435aca..cb47debbf24d 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock)
126 return atomic_read(&lock->count) != 1; 126 return atomic_read(&lock->count) != 1;
127} 127}
128 128
129/* return non-zero to abort. only pre-side-effects may abort */
130typedef int (*side_effect_t)(unsigned long);
131extern void mutex_lock_sfx(struct mutex *lock,
132 side_effect_t pre, unsigned long pre_arg,
133 side_effect_t post, unsigned long post_arg);
134extern void mutex_unlock_sfx(struct mutex *lock,
135 side_effect_t pre, unsigned long pre_arg,
136 side_effect_t post, unsigned long post_arg);
137
129/* 138/*
130 * See kernel/mutex.c for detailed documentation of these APIs. 139 * See kernel/mutex.c for detailed documentation of these APIs.
131 * Also see Documentation/mutex-design.txt. 140 * Also see Documentation/mutex-design.txt.
@@ -153,6 +162,7 @@ extern void mutex_lock(struct mutex *lock);
153extern int __must_check mutex_lock_interruptible(struct mutex *lock); 162extern int __must_check mutex_lock_interruptible(struct mutex *lock);
154extern int __must_check mutex_lock_killable(struct mutex *lock); 163extern int __must_check mutex_lock_killable(struct mutex *lock);
155 164
165
156# define mutex_lock_nested(lock, subclass) mutex_lock(lock) 166# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
157# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) 167# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
158# define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) 168# define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 39fa04966aa8..c83fc2b65f01 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -43,4 +43,13 @@ extern int __must_check down_trylock(struct semaphore *sem);
43extern int __must_check down_timeout(struct semaphore *sem, long jiffies); 43extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
44extern void up(struct semaphore *sem); 44extern void up(struct semaphore *sem);
45 45
46extern void __down(struct semaphore *sem);
47extern void __up(struct semaphore *sem);
48
49struct semaphore_waiter {
50 struct list_head list;
51 struct task_struct *task;
52 int up;
53};
54
46#endif /* __LINUX_SEMAPHORE_H */ 55#endif /* __LINUX_SEMAPHORE_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f584aba78ca9..1ec2ec7d4e3b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -83,6 +83,9 @@ struct work_struct {
83#ifdef CONFIG_LOCKDEP 83#ifdef CONFIG_LOCKDEP
84 struct lockdep_map lockdep_map; 84 struct lockdep_map lockdep_map;
85#endif 85#endif
86#ifdef CONFIG_LITMUS_SOFTIRQD
87 struct task_struct *owner;
88#endif
86}; 89};
87 90
88#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU) 91#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
@@ -115,11 +118,25 @@ struct execute_work {
115#define __WORK_INIT_LOCKDEP_MAP(n, k) 118#define __WORK_INIT_LOCKDEP_MAP(n, k)
116#endif 119#endif
117 120
121#ifdef CONFIG_LITMUS_SOFTIRQD
122#define __WORK_INIT_OWNER() \
123 .owner = NULL,
124
125#define PREPARE_OWNER(_work, _owner) \
126 do { \
127 (_work)->owner = (_owner); \
128 } while(0)
129#else
130#define __WORK_INIT_OWNER()
131#define PREPARE_OWNER(_work, _owner)
132#endif
133
118#define __WORK_INITIALIZER(n, f) { \ 134#define __WORK_INITIALIZER(n, f) { \
119 .data = WORK_DATA_STATIC_INIT(), \ 135 .data = WORK_DATA_STATIC_INIT(), \
120 .entry = { &(n).entry, &(n).entry }, \ 136 .entry = { &(n).entry, &(n).entry }, \
121 .func = (f), \ 137 .func = (f), \
122 __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ 138 __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \
139 __WORK_INIT_OWNER() \
123 } 140 }
124 141
125#define __DELAYED_WORK_INITIALIZER(n, f) { \ 142#define __DELAYED_WORK_INITIALIZER(n, f) { \
@@ -357,6 +374,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
357extern void flush_workqueue(struct workqueue_struct *wq); 374extern void flush_workqueue(struct workqueue_struct *wq);
358extern void flush_scheduled_work(void); 375extern void flush_scheduled_work(void);
359 376
377extern int __schedule_work(struct work_struct *work);
360extern int schedule_work(struct work_struct *work); 378extern int schedule_work(struct work_struct *work);
361extern int schedule_work_on(int cpu, struct work_struct *work); 379extern int schedule_work_on(int cpu, struct work_struct *work);
362extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay); 380extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
index 33344ee8d5f9..763b31c0e9f6 100644
--- a/include/litmus/budget.h
+++ b/include/litmus/budget.h
@@ -5,6 +5,9 @@
5 * the next task. */ 5 * the next task. */
6void update_enforcement_timer(struct task_struct* t); 6void update_enforcement_timer(struct task_struct* t);
7 7
8/* Send SIG_BUDGET to a real-time task. */
9void send_sigbudget(struct task_struct* t);
10
8inline static int budget_exhausted(struct task_struct* t) 11inline static int budget_exhausted(struct task_struct* t)
9{ 12{
10 return get_exec_time(t) >= get_exec_cost(t); 13 return get_exec_time(t) >= get_exec_cost(t);
@@ -19,10 +22,21 @@ inline static lt_t budget_remaining(struct task_struct* t)
19 return 0; 22 return 0;
20} 23}
21 24
22#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT) 25#define budget_enforced(t) (\
26 tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
27
28#define budget_precisely_tracked(t) (\
29 tsk_rt(t)->task_params.budget_policy == PRECISE_ENFORCEMENT || \
30 tsk_rt(t)->task_params.budget_signal_policy == PRECISE_SIGNALS)
31
32#define budget_signalled(t) (\
33 tsk_rt(t)->task_params.budget_signal_policy != NO_SIGNALS)
34
35#define budget_precisely_signalled(t) (\
36 tsk_rt(t)->task_params.budget_policy == PRECISE_SIGNALS)
23 37
24#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \ 38#define sigbudget_sent(t) (\
25 == PRECISE_ENFORCEMENT) 39 test_bit(RT_JOB_SIG_BUDGET_SENT, &tsk_rt(t)->job_params.flags))
26 40
27static inline int requeue_preempted_job(struct task_struct* t) 41static inline int requeue_preempted_job(struct task_struct* t)
28{ 42{
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
index bbaf22ea7f12..63dff7efe8fb 100644
--- a/include/litmus/edf_common.h
+++ b/include/litmus/edf_common.h
@@ -20,6 +20,18 @@ int edf_higher_prio(struct task_struct* first,
20 20
21int edf_ready_order(struct bheap_node* a, struct bheap_node* b); 21int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
22 22
23#ifdef CONFIG_LITMUS_NESTED_LOCKING
24/* binheap_nodes must be embedded within 'struct litmus_lock' */
25int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b);
26int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b);
27int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b);
28int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b);
29
30int __edf_higher_prio(struct task_struct* first, comparison_mode_t first_mode,
31 struct task_struct* second, comparison_mode_t second_mode);
32
33#endif
34
23int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t); 35int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24 36
25#endif 37#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index f2115b83f1e4..1469c0fd0460 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -24,9 +24,18 @@ typedef enum {
24 MPCP_VS_SEM = 3, 24 MPCP_VS_SEM = 3,
25 DPCP_SEM = 4, 25 DPCP_SEM = 4,
26 26
27 PCP_SEM = 5, 27 PCP_SEM = 5,
28 28
29 MAX_OBJ_TYPE = 5 29 RSM_MUTEX = 6,
30 IKGLP_SEM = 7,
31 KFMLP_SEM = 8,
32
33 IKGLP_SIMPLE_GPU_AFF_OBS = 9,
34 IKGLP_GPU_AFF_OBS = 10,
35 KFMLP_SIMPLE_GPU_AFF_OBS = 11,
36 KFMLP_GPU_AFF_OBS = 12,
37
38 MAX_OBJ_TYPE = 12
30} obj_type_t; 39} obj_type_t;
31 40
32struct inode_obj_id { 41struct inode_obj_id {
@@ -70,8 +79,11 @@ static inline void* od_lookup(int od, obj_type_t type)
70} 79}
71 80
72#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM)) 81#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
82#define lookup_kfmlp_sem(od)((struct pi_semaphore*) od_lookup(od, KFMLP_SEM))
73#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) 83#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
74#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) 84#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
75 85
86#define lookup_rsm_mutex(od)((struct litmus_lock*) od_lookup(od, FMLP_SEM))
87
76 88
77#endif 89#endif
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..04d4bcaeae96
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,145 @@
1#ifndef __FP_MATH_H__
2#define __FP_MATH_H__
3
4#ifndef __KERNEL__
5#include <stdint.h>
6#define abs(x) (((x) < 0) ? -(x) : x)
7#endif
8
9// Use 64-bit because we want to track things at the nanosecond scale.
10// This can lead to very large numbers.
11typedef int64_t fpbuf_t;
12typedef struct
13{
14 fpbuf_t val;
15} fp_t;
16
17#define FP_SHIFT 10
18#define ROUND_BIT (FP_SHIFT - 1)
19
20#define _fp(x) ((fp_t) {x})
21
22#ifdef __KERNEL__
23static const fp_t LITMUS_FP_ZERO = {.val = 0};
24static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
25#endif
26
27static inline fp_t FP(fpbuf_t x)
28{
29 return _fp(((fpbuf_t) x) << FP_SHIFT);
30}
31
32/* divide two integers to obtain a fixed point value */
33static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
34{
35 return _fp(FP(a).val / (b));
36}
37
38static inline fpbuf_t _point(fp_t x)
39{
40 return (x.val % (1 << FP_SHIFT));
41
42}
43
44#define fp2str(x) x.val
45/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
46#define _FP_ "%ld/1024"
47
48static inline fpbuf_t _floor(fp_t x)
49{
50 return x.val >> FP_SHIFT;
51}
52
53/* FIXME: negative rounding */
54static inline fpbuf_t _round(fp_t x)
55{
56 return _floor(x) + ((x.val >> ROUND_BIT) & 1);
57}
58
59/* multiply two fixed point values */
60static inline fp_t _mul(fp_t a, fp_t b)
61{
62 return _fp((a.val * b.val) >> FP_SHIFT);
63}
64
65static inline fp_t _div(fp_t a, fp_t b)
66{
67#if !defined(__KERNEL__) && !defined(unlikely)
68#define unlikely(x) (x)
69#define DO_UNDEF_UNLIKELY
70#endif
71 /* try not to overflow */
72 if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
73 return _fp((a.val / b.val) << FP_SHIFT);
74 else
75 return _fp((a.val << FP_SHIFT) / b.val);
76#ifdef DO_UNDEF_UNLIKELY
77#undef unlikely
78#undef DO_UNDEF_UNLIKELY
79#endif
80}
81
82static inline fp_t _add(fp_t a, fp_t b)
83{
84 return _fp(a.val + b.val);
85}
86
87static inline fp_t _sub(fp_t a, fp_t b)
88{
89 return _fp(a.val - b.val);
90}
91
92static inline fp_t _neg(fp_t x)
93{
94 return _fp(-x.val);
95}
96
97static inline fp_t _abs(fp_t x)
98{
99 return _fp(abs(x.val));
100}
101
102/* works the same as casting float/double to integer */
103static inline fpbuf_t _fp_to_integer(fp_t x)
104{
105 return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
106}
107
108static inline fp_t _integer_to_fp(fpbuf_t x)
109{
110 return _frac(x,1);
111}
112
113static inline int _leq(fp_t a, fp_t b)
114{
115 return a.val <= b.val;
116}
117
118static inline int _geq(fp_t a, fp_t b)
119{
120 return a.val >= b.val;
121}
122
123static inline int _lt(fp_t a, fp_t b)
124{
125 return a.val < b.val;
126}
127
128static inline int _gt(fp_t a, fp_t b)
129{
130 return a.val > b.val;
131}
132
133static inline int _eq(fp_t a, fp_t b)
134{
135 return a.val == b.val;
136}
137
138static inline fp_t _max(fp_t a, fp_t b)
139{
140 if (a.val < b.val)
141 return b;
142 else
143 return a;
144}
145#endif
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
new file mode 100644
index 000000000000..d64a15cbf2a5
--- /dev/null
+++ b/include/litmus/gpu_affinity.h
@@ -0,0 +1,50 @@
1#ifndef LITMUS_GPU_AFFINITY_H
2#define LITMUS_GPU_AFFINITY_H
3
4#include <litmus/rt_param.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/litmus.h>
7
8void update_gpu_estimate(struct task_struct* t, lt_t observed);
9gpu_migration_dist_t gpu_migration_distance(int a, int b);
10
11static inline void reset_gpu_tracker(struct task_struct* t)
12{
13 t->rt_param.accum_gpu_time = 0;
14}
15
16static inline void start_gpu_tracker(struct task_struct* t)
17{
18 t->rt_param.gpu_time_stamp = litmus_clock();
19}
20
21static inline void stop_gpu_tracker(struct task_struct* t)
22{
23 lt_t now = litmus_clock();
24 t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp);
25}
26
27static inline lt_t get_gpu_time(struct task_struct* t)
28{
29 return t->rt_param.accum_gpu_time;
30}
31
32static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
33{
34// int i;
35// fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
36// lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates...
37 lt_t val = t->rt_param.gpu_migration_est[dist].avg;
38
39// WARN_ON(temp < 0);
40
41 // lower-bound a distant migration to be at least equal to the level
42 // below it.
43// for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
44// val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
45// }
46
47 return ((val > 0) ? val : dist+1);
48}
49
50#endif
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
new file mode 100644
index 000000000000..0b89c8135360
--- /dev/null
+++ b/include/litmus/ikglp_lock.h
@@ -0,0 +1,160 @@
1#ifndef LITMUS_IKGLP_H
2#define LITMUS_IKGLP_H
3
4#include <litmus/litmus.h>
5#include <litmus/binheap.h>
6#include <litmus/locking.h>
7
8#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
9#include <litmus/kexclu_affinity.h>
10
11struct ikglp_affinity;
12#endif
13
14typedef struct ikglp_heap_node
15{
16 struct task_struct *task;
17 struct binheap_node node;
18} ikglp_heap_node_t;
19
20struct fifo_queue;
21struct ikglp_wait_state;
22
23typedef struct ikglp_donee_heap_node
24{
25 struct task_struct *task;
26 struct fifo_queue *fq;
27 struct ikglp_wait_state *donor_info; // cross-linked with ikglp_wait_state_t of donor
28
29 struct binheap_node node;
30} ikglp_donee_heap_node_t;
31
32// Maintains the state of a request as it goes through the IKGLP
33typedef struct ikglp_wait_state {
34 struct task_struct *task; // pointer back to the requesting task
35
36 // Data for while waiting in FIFO Queue
37 wait_queue_t fq_node;
38 ikglp_heap_node_t global_heap_node;
39 ikglp_donee_heap_node_t donee_heap_node;
40
41 // Data for while waiting in PQ
42 ikglp_heap_node_t pq_node;
43
44 // Data for while waiting as a donor
45 ikglp_donee_heap_node_t *donee_info; // cross-linked with donee's ikglp_donee_heap_node_t
46 struct nested_info prio_donation;
47 struct binheap_node node;
48} ikglp_wait_state_t;
49
50/* struct for semaphore with priority inheritance */
51struct fifo_queue
52{
53 wait_queue_head_t wait;
54 struct task_struct* owner;
55
56 // used for bookkeepping
57 ikglp_heap_node_t global_heap_node;
58 ikglp_donee_heap_node_t donee_heap_node;
59
60 struct task_struct* hp_waiter;
61 int count; /* number of waiters + holder */
62
63 struct nested_info nest;
64};
65
66struct ikglp_semaphore
67{
68 struct litmus_lock litmus_lock;
69
70 raw_spinlock_t lock;
71 raw_spinlock_t real_lock;
72
73 int nr_replicas; // AKA k
74 int m;
75
76 int max_fifo_len; // max len of a fifo queue
77 int nr_in_fifos;
78
79 struct binheap top_m; // min heap, base prio
80 int top_m_size; // number of nodes in top_m
81
82 struct binheap not_top_m; // max heap, base prio
83
84 struct binheap donees; // min-heap, base prio
85 struct fifo_queue *shortest_fifo_queue; // pointer to shortest fifo queue
86
87 /* data structures for holding requests */
88 struct fifo_queue *fifo_queues; // array nr_replicas in length
89 struct binheap priority_queue; // max-heap, base prio
90 struct binheap donors; // max-heap, base prio
91
92#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
93 struct ikglp_affinity *aff_obs;
94#endif
95};
96
97static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock)
98{
99 return container_of(lock, struct ikglp_semaphore, litmus_lock);
100}
101
102int ikglp_lock(struct litmus_lock* l);
103int ikglp_unlock(struct litmus_lock* l);
104int ikglp_close(struct litmus_lock* l);
105void ikglp_free(struct litmus_lock* l);
106struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg);
107
108
109
110#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
111
112struct ikglp_queue_info
113{
114 struct fifo_queue* q;
115 lt_t estimated_len;
116 int *nr_cur_users;
117};
118
119struct ikglp_affinity_ops
120{
121 struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO
122 ikglp_wait_state_t* (*advise_steal)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select steal from FIFO
123 ikglp_donee_heap_node_t* (*advise_donee_selection)(struct ikglp_affinity* aff, struct task_struct* t); // select a donee
124 ikglp_wait_state_t* (*advise_donor_to_fq)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select a donor to move to PQ
125
126 void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue
127 void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue
128 void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired
129 void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed
130 int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding)
131};
132
133struct ikglp_affinity
134{
135 struct affinity_observer obs;
136 struct ikglp_affinity_ops *ops;
137 struct ikglp_queue_info *q_info;
138 int *nr_cur_users_on_rsrc;
139 int offset;
140 int nr_simult;
141 int nr_rsrc;
142 int relax_max_fifo_len;
143};
144
145static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
146{
147 return container_of(aff_obs, struct ikglp_affinity, obs);
148}
149
150int ikglp_aff_obs_close(struct affinity_observer*);
151void ikglp_aff_obs_free(struct affinity_observer*);
152struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*,
153 void* __user arg);
154struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
155 void* __user arg);
156#endif
157
158
159
160#endif
diff --git a/include/litmus/kexclu_affinity.h b/include/litmus/kexclu_affinity.h
new file mode 100644
index 000000000000..f6355de49074
--- /dev/null
+++ b/include/litmus/kexclu_affinity.h
@@ -0,0 +1,35 @@
1#ifndef LITMUS_AFF_OBS_H
2#define LITMUS_AFF_OBS_H
3
4#include <litmus/locking.h>
5
6struct affinity_observer_ops;
7
8struct affinity_observer
9{
10 struct affinity_observer_ops* ops;
11 int type;
12 int ident;
13
14 struct litmus_lock* lock; // the lock under observation
15};
16
17typedef int (*aff_obs_open_t)(struct affinity_observer* aff_obs,
18 void* __user arg);
19typedef int (*aff_obs_close_t)(struct affinity_observer* aff_obs);
20typedef void (*aff_obs_free_t)(struct affinity_observer* aff_obs);
21
22struct affinity_observer_ops
23{
24 aff_obs_open_t open;
25 aff_obs_close_t close;
26 aff_obs_free_t deallocate;
27};
28
29struct litmus_lock* get_lock_from_od(int od);
30
31void affinity_observer_new(struct affinity_observer* aff,
32 struct affinity_observer_ops* ops,
33 struct affinity_observer_args* args);
34
35#endif
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
new file mode 100644
index 000000000000..5f0aae6e6f42
--- /dev/null
+++ b/include/litmus/kfmlp_lock.h
@@ -0,0 +1,97 @@
1#ifndef LITMUS_KFMLP_H
2#define LITMUS_KFMLP_H
3
4#include <litmus/litmus.h>
5#include <litmus/locking.h>
6
7#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
8#include <litmus/kexclu_affinity.h>
9
10struct kfmlp_affinity;
11#endif
12
13/* struct for semaphore with priority inheritance */
14struct kfmlp_queue
15{
16 wait_queue_head_t wait;
17 struct task_struct* owner;
18 struct task_struct* hp_waiter;
19 int count; /* number of waiters + holder */
20};
21
22struct kfmlp_semaphore
23{
24 struct litmus_lock litmus_lock;
25
26 spinlock_t lock;
27
28 int num_resources; /* aka k */
29
30 struct kfmlp_queue *queues; /* array */
31 struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
32
33#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
34 struct kfmlp_affinity *aff_obs;
35#endif
36};
37
38static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
39{
40 return container_of(lock, struct kfmlp_semaphore, litmus_lock);
41}
42
43int kfmlp_lock(struct litmus_lock* l);
44int kfmlp_unlock(struct litmus_lock* l);
45int kfmlp_close(struct litmus_lock* l);
46void kfmlp_free(struct litmus_lock* l);
47struct litmus_lock* kfmlp_new(struct litmus_lock_ops*, void* __user arg);
48
49#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
50
51struct kfmlp_queue_info
52{
53 struct kfmlp_queue* q;
54 lt_t estimated_len;
55 int *nr_cur_users;
56};
57
58struct kfmlp_affinity_ops
59{
60 struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t);
61 struct task_struct* (*advise_steal)(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from);
62 void (*notify_enqueue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
63 void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
64 void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
65 void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
66 int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq);
67};
68
69struct kfmlp_affinity
70{
71 struct affinity_observer obs;
72 struct kfmlp_affinity_ops *ops;
73 struct kfmlp_queue_info *q_info;
74 int *nr_cur_users_on_rsrc;
75 int offset;
76 int nr_simult;
77 int nr_rsrc;
78};
79
80static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
81{
82 return container_of(aff_obs, struct kfmlp_affinity, obs);
83}
84
85int kfmlp_aff_obs_close(struct affinity_observer*);
86void kfmlp_aff_obs_free(struct affinity_observer*);
87struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*,
88 void* __user arg);
89struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
90 void* __user arg);
91
92
93#endif
94
95#endif
96
97
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 338245abd6ed..1d70ab713571 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -26,6 +26,7 @@ static inline int in_list(struct list_head* list)
26 ); 26 );
27} 27}
28 28
29
29struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq); 30struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
30 31
31#define NO_CPU 0xffffffff 32#define NO_CPU 0xffffffff
@@ -62,8 +63,12 @@ void litmus_exit_task(struct task_struct *tsk);
62/* job_param macros */ 63/* job_param macros */
63#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) 64#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
64#define get_deadline(t) (tsk_rt(t)->job_params.deadline) 65#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
66#define get_period(t) (tsk_rt(t)->task_params.period)
65#define get_release(t) (tsk_rt(t)->job_params.release) 67#define get_release(t) (tsk_rt(t)->job_params.release)
68#define get_lateness(t) (tsk_rt(t)->job_params.lateness)
66 69
70#define effective_priority(t) ((!(tsk_rt(t)->inh_task)) ? t : tsk_rt(t)->inh_task)
71#define base_priority(t) (t)
67 72
68#define is_hrt(t) \ 73#define is_hrt(t) \
69 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD) 74 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
@@ -101,10 +106,12 @@ static inline lt_t litmus_clock(void)
101#define earlier_deadline(a, b) (lt_before(\ 106#define earlier_deadline(a, b) (lt_before(\
102 (a)->rt_param.job_params.deadline,\ 107 (a)->rt_param.job_params.deadline,\
103 (b)->rt_param.job_params.deadline)) 108 (b)->rt_param.job_params.deadline))
109#define shorter_period(a, b) (lt_before(\
110 (a)->rt_param.task_params.period,\
111 (b)->rt_param.task_params.period))
104#define earlier_release(a, b) (lt_before(\ 112#define earlier_release(a, b) (lt_before(\
105 (a)->rt_param.job_params.release,\ 113 (a)->rt_param.job_params.release,\
106 (b)->rt_param.job_params.release)) 114 (b)->rt_param.job_params.release))
107
108void preempt_if_preemptable(struct task_struct* t, int on_cpu); 115void preempt_if_preemptable(struct task_struct* t, int on_cpu);
109 116
110#ifdef CONFIG_LITMUS_LOCKING 117#ifdef CONFIG_LITMUS_LOCKING
diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
new file mode 100644
index 000000000000..1eb5ea1a6c4b
--- /dev/null
+++ b/include/litmus/litmus_softirq.h
@@ -0,0 +1,199 @@
1#ifndef __LITMUS_SOFTIRQ_H
2#define __LITMUS_SOFTIRQ_H
3
4#include <linux/interrupt.h>
5#include <linux/workqueue.h>
6
7/*
8 Threaded tasklet handling for Litmus. Tasklets
9 are scheduled with the priority of the tasklet's
10 owner---that is, the RT task on behalf the tasklet
11 runs.
12
13 Tasklets are current scheduled in FIFO order with
14 NO priority inheritance for "blocked" tasklets.
15
16 klitirqd assumes the priority of the owner of the
17 tasklet when the tasklet is next to execute.
18
19 Currently, hi-tasklets are scheduled before
20 low-tasklets, regardless of priority of low-tasklets.
21 And likewise, low-tasklets are scheduled before work
22 queue objects. This priority inversion probably needs
23 to be fixed, though it is not an issue if our work with
24 GPUs as GPUs are owned (and associated klitirqds) for
25 exclusive time periods, thus no inversions can
26 occur.
27 */
28
29
30
31#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD
32
33/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons.
34 Actual launch of threads is deffered to kworker's
35 workqueue, so daemons will likely not be immediately
36 running when this function returns, though the required
37 data will be initialized.
38
39 @affinity_set: an array expressing the processor affinity
40 for each of the NR_LITMUS_SOFTIRQD daemons. May be set
41 to NULL for global scheduling.
42
43 - Examples -
44 8-CPU system with two CPU clusters:
45 affinity[] = {0, 0, 0, 0, 3, 3, 3, 3}
46 NOTE: Daemons not actually bound to specified CPU, but rather
47 cluster in which the CPU resides.
48
49 8-CPU system, partitioned:
50 affinity[] = {0, 1, 2, 3, 4, 5, 6, 7}
51
52 FIXME: change array to a CPU topology or array of cpumasks
53
54 */
55void spawn_klitirqd(int* affinity);
56
57
58/* Raises a flag to tell klitirqds to terminate.
59 Termination is async, so some threads may be running
60 after function return. */
61void kill_klitirqd(void);
62
63
64/* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready
65 to handle tasklets. 0, otherwise.*/
66int klitirqd_is_ready(void);
67
68/* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready
69 to handle tasklets. 0, otherwise.*/
70int klitirqd_is_dead(void);
71
72/* Flushes all pending work out to the OS for regular
73 * tasklet/work processing of the specified 'owner'
74 *
75 * PRECOND: klitirqd_thread must have a clear entry
76 * in the GPU registry, otherwise this call will become
77 * a no-op as work will loop back to the klitirqd_thread.
78 *
79 * Pass NULL for owner to flush ALL pending items.
80 */
81void flush_pending(struct task_struct* klitirqd_thread,
82 struct task_struct* owner);
83
84struct task_struct* get_klitirqd(unsigned int k_id);
85
86
87extern int __litmus_tasklet_schedule(
88 struct tasklet_struct *t,
89 unsigned int k_id);
90
91/* schedule a tasklet on klitirqd #k_id */
92static inline int litmus_tasklet_schedule(
93 struct tasklet_struct *t,
94 unsigned int k_id)
95{
96 int ret = 0;
97 if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
98 ret = __litmus_tasklet_schedule(t, k_id);
99 return(ret);
100}
101
102/* for use by __tasklet_schedule() */
103static inline int _litmus_tasklet_schedule(
104 struct tasklet_struct *t,
105 unsigned int k_id)
106{
107 return(__litmus_tasklet_schedule(t, k_id));
108}
109
110
111
112
113extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t,
114 unsigned int k_id);
115
116/* schedule a hi tasklet on klitirqd #k_id */
117static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t,
118 unsigned int k_id)
119{
120 int ret = 0;
121 if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
122 ret = __litmus_tasklet_hi_schedule(t, k_id);
123 return(ret);
124}
125
126/* for use by __tasklet_hi_schedule() */
127static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t,
128 unsigned int k_id)
129{
130 return(__litmus_tasklet_hi_schedule(t, k_id));
131}
132
133
134
135
136
137extern int __litmus_tasklet_hi_schedule_first(
138 struct tasklet_struct *t,
139 unsigned int k_id);
140
141/* schedule a hi tasklet on klitirqd #k_id on next go-around */
142/* PRECONDITION: Interrupts must be disabled. */
143static inline int litmus_tasklet_hi_schedule_first(
144 struct tasklet_struct *t,
145 unsigned int k_id)
146{
147 int ret = 0;
148 if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
149 ret = __litmus_tasklet_hi_schedule_first(t, k_id);
150 return(ret);
151}
152
153/* for use by __tasklet_hi_schedule_first() */
154static inline int _litmus_tasklet_hi_schedule_first(
155 struct tasklet_struct *t,
156 unsigned int k_id)
157{
158 return(__litmus_tasklet_hi_schedule_first(t, k_id));
159}
160
161
162
163//////////////
164
165extern int __litmus_schedule_work(
166 struct work_struct* w,
167 unsigned int k_id);
168
169static inline int litmus_schedule_work(
170 struct work_struct* w,
171 unsigned int k_id)
172{
173 return(__litmus_schedule_work(w, k_id));
174}
175
176
177
178///////////// mutex operations for client threads.
179
180void down_and_set_stat(struct task_struct* t,
181 enum klitirqd_sem_status to_set,
182 struct mutex* sem);
183
184void __down_and_reset_and_set_stat(struct task_struct* t,
185 enum klitirqd_sem_status to_reset,
186 enum klitirqd_sem_status to_set,
187 struct mutex* sem);
188
189void up_and_set_stat(struct task_struct* t,
190 enum klitirqd_sem_status to_set,
191 struct mutex* sem);
192
193
194
195void release_klitirqd_lock(struct task_struct* t);
196
197int reacquire_klitirqd_lock(struct task_struct* t);
198
199#endif
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
index 4d7b870cb443..296bbf6f7af0 100644
--- a/include/litmus/locking.h
+++ b/include/litmus/locking.h
@@ -1,28 +1,160 @@
1#ifndef LITMUS_LOCKING_H 1#ifndef LITMUS_LOCKING_H
2#define LITMUS_LOCKING_H 2#define LITMUS_LOCKING_H
3 3
4#include <linux/list.h>
5
4struct litmus_lock_ops; 6struct litmus_lock_ops;
5 7
8#ifdef CONFIG_LITMUS_NESTED_LOCKING
9struct nested_info
10{
11 struct litmus_lock *lock;
12 struct task_struct *hp_waiter_eff_prio;
13 struct task_struct **hp_waiter_ptr;
14 struct binheap_node hp_binheap_node;
15};
16
17static inline struct task_struct* top_priority(struct binheap* handle) {
18 if(!binheap_empty(handle)) {
19 return (struct task_struct*)(binheap_top_entry(handle, struct nested_info, hp_binheap_node)->hp_waiter_eff_prio);
20 }
21 return NULL;
22}
23
24void print_hp_waiters(struct binheap_node* n, int depth);
25#endif
26
27
6/* Generic base struct for LITMUS^RT userspace semaphores. 28/* Generic base struct for LITMUS^RT userspace semaphores.
7 * This structure should be embedded in protocol-specific semaphores. 29 * This structure should be embedded in protocol-specific semaphores.
8 */ 30 */
9struct litmus_lock { 31struct litmus_lock {
10 struct litmus_lock_ops *ops; 32 struct litmus_lock_ops *ops;
11 int type; 33 int type;
34
35 int ident;
36
37#ifdef CONFIG_LITMUS_NESTED_LOCKING
38 struct nested_info nest;
39//#ifdef CONFIG_DEBUG_SPINLOCK
40 char cheat_lockdep[2];
41 struct lock_class_key key;
42//#endif
43#endif
12}; 44};
13 45
46#ifdef CONFIG_LITMUS_DGL_SUPPORT
47
48#define MAX_DGL_SIZE CONFIG_LITMUS_MAX_DGL_SIZE
49
50typedef struct dgl_wait_state {
51 struct task_struct *task; /* task waiting on DGL */
52 struct litmus_lock *locks[MAX_DGL_SIZE]; /* requested locks in DGL */
53 int size; /* size of the DGL */
54 int nr_remaining; /* nr locks remainging before DGL is complete */
55 int last_primary; /* index lock in locks[] that has active priority */
56 wait_queue_t wq_nodes[MAX_DGL_SIZE];
57} dgl_wait_state_t;
58
59void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait);
60void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/);
61
62void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait);
63int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key);
64void __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait, struct task_struct **task);
65#endif
66
67typedef int (*lock_op_t)(struct litmus_lock *l);
68typedef lock_op_t lock_close_t;
69typedef lock_op_t lock_lock_t;
70typedef lock_op_t lock_unlock_t;
71
72typedef int (*lock_open_t)(struct litmus_lock *l, void* __user arg);
73typedef void (*lock_free_t)(struct litmus_lock *l);
74
14struct litmus_lock_ops { 75struct litmus_lock_ops {
15 /* Current task tries to obtain / drop a reference to a lock. 76 /* Current task tries to obtain / drop a reference to a lock.
16 * Optional methods, allowed by default. */ 77 * Optional methods, allowed by default. */
17 int (*open)(struct litmus_lock*, void* __user); 78 lock_open_t open;
18 int (*close)(struct litmus_lock*); 79 lock_close_t close;
19 80
20 /* Current tries to lock/unlock this lock (mandatory methods). */ 81 /* Current tries to lock/unlock this lock (mandatory methods). */
21 int (*lock)(struct litmus_lock*); 82 lock_lock_t lock;
22 int (*unlock)(struct litmus_lock*); 83 lock_unlock_t unlock;
23 84
24 /* The lock is no longer being referenced (mandatory method). */ 85 /* The lock is no longer being referenced (mandatory method). */
25 void (*deallocate)(struct litmus_lock*); 86 lock_free_t deallocate;
87
88#ifdef CONFIG_LITMUS_NESTED_LOCKING
89 void (*propagate_increase_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags);
90 void (*propagate_decrease_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags);
91#endif
92
93#ifdef CONFIG_LITMUS_DGL_SUPPORT
94 raw_spinlock_t* (*get_dgl_spin_lock)(struct litmus_lock *l);
95 int (*dgl_lock)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node);
96 int (*is_owner)(struct litmus_lock *l, struct task_struct *t);
97 void (*enable_priority)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait);
98#endif
26}; 99};
27 100
101
102/*
103 Nested inheritance can be achieved with fine-grain locking when there is
104 no need for DGL support, presuming locks are acquired in a partial order
105 (no cycles!). However, DGLs allow locks to be acquired in any order. This
106 makes nested inheritance very difficult (we don't yet know a solution) to
107 realize with fine-grain locks, so we use a big lock instead.
108
109 Code contains both fine-grain and coarse-grain methods together, side-by-side.
110 Each lock operation *IS NOT* surrounded by ifdef/endif to help make code more
111 readable. However, this leads to the odd situation where both code paths
112 appear together in code as if they were both active together.
113
114 THIS IS NOT REALLY THE CASE! ONLY ONE CODE PATH IS ACTUALLY ACTIVE!
115
116 Example:
117 lock_global_irqsave(coarseLock, flags);
118 lock_fine_irqsave(fineLock, flags);
119
120 Reality (coarse):
121 lock_global_irqsave(coarseLock, flags);
122 //lock_fine_irqsave(fineLock, flags);
123
124 Reality (fine):
125 //lock_global_irqsave(coarseLock, flags);
126 lock_fine_irqsave(fineLock, flags);
127
128 Be careful when you read code involving nested inheritance.
129 */
130#if defined(CONFIG_LITMUS_DGL_SUPPORT)
131/* DGL requires a big lock to implement nested inheritance */
132#define lock_global_irqsave(lock, flags) raw_spin_lock_irqsave((lock), (flags))
133#define lock_global(lock) raw_spin_lock((lock))
134#define unlock_global_irqrestore(lock, flags) raw_spin_unlock_irqrestore((lock), (flags))
135#define unlock_global(lock) raw_spin_unlock((lock))
136
137/* fine-grain locking are no-ops with DGL support */
138#define lock_fine_irqsave(lock, flags)
139#define lock_fine(lock)
140#define unlock_fine_irqrestore(lock, flags)
141#define unlock_fine(lock)
142
143#elif defined(CONFIG_LITMUS_NESTED_LOCKING)
144/* Use fine-grain locking when DGLs are disabled. */
145/* global locking are no-ops without DGL support */
146#define lock_global_irqsave(lock, flags)
147#define lock_global(lock)
148#define unlock_global_irqrestore(lock, flags)
149#define unlock_global(lock)
150
151#define lock_fine_irqsave(lock, flags) raw_spin_lock_irqsave((lock), (flags))
152#define lock_fine(lock) raw_spin_lock((lock))
153#define unlock_fine_irqrestore(lock, flags) raw_spin_unlock_irqrestore((lock), (flags))
154#define unlock_fine(lock) raw_spin_unlock((lock))
155
28#endif 156#endif
157
158
159#endif
160
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
new file mode 100644
index 000000000000..97c9577141db
--- /dev/null
+++ b/include/litmus/nvidia_info.h
@@ -0,0 +1,46 @@
1#ifndef __LITMUS_NVIDIA_H
2#define __LITMUS_NVIDIA_H
3
4#include <linux/interrupt.h>
5
6
7#include <litmus/litmus_softirq.h>
8
9
10//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
11#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
12#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
13
14int init_nvidia_info(void);
15void shutdown_nvidia_info(void);
16
17int is_nvidia_func(void* func_addr);
18
19void dump_nvidia_info(const struct tasklet_struct *t);
20
21
22// Returns the Nvidia device # associated with provided tasklet and work_struct.
23u32 get_tasklet_nv_device_num(const struct tasklet_struct *t);
24u32 get_work_nv_device_num(const struct work_struct *t);
25
26
27int init_nv_device_reg(void);
28//int get_nv_device_id(struct task_struct* owner);
29
30
31int reg_nv_device(int reg_device_id, int register_device, struct task_struct *t);
32
33struct task_struct* get_nv_max_device_owner(u32 target_device_id);
34//int is_nv_device_owner(u32 target_device_id);
35
36void lock_nv_registry(u32 reg_device_id, unsigned long* flags);
37void unlock_nv_registry(u32 reg_device_id, unsigned long* flags);
38
39#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
40void pai_check_priority_increase(struct task_struct *t, int reg_device_id);
41void pai_check_priority_decrease(struct task_struct *t, int reg_device_id);
42#endif
43
44//void increment_nv_int_count(u32 device);
45
46#endif
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
index 380b886d78ff..8f3a9ca2d4e3 100644
--- a/include/litmus/preempt.h
+++ b/include/litmus/preempt.h
@@ -26,12 +26,12 @@ const char* sched_state_name(int s);
26 (x), #x, __FUNCTION__); \ 26 (x), #x, __FUNCTION__); \
27 } while (0); 27 } while (0);
28 28
29//#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */
29#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \ 30#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
30 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \ 31 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
31 cpu, (x), sched_state_name(x), \ 32 cpu, (x), sched_state_name(x), \
32 (y), sched_state_name(y)) 33 (y), sched_state_name(y))
33 34
34
35typedef enum scheduling_state { 35typedef enum scheduling_state {
36 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that 36 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that
37 * should be scheduled, and the processor does not 37 * should be scheduled, and the processor does not
diff --git a/include/litmus/rsm_lock.h b/include/litmus/rsm_lock.h
new file mode 100644
index 000000000000..a15189683de4
--- /dev/null
+++ b/include/litmus/rsm_lock.h
@@ -0,0 +1,54 @@
1#ifndef LITMUS_RSM_H
2#define LITMUS_RSM_H
3
4#include <litmus/litmus.h>
5#include <litmus/binheap.h>
6#include <litmus/locking.h>
7
8/* struct for semaphore with priority inheritance */
9struct rsm_mutex {
10 struct litmus_lock litmus_lock;
11
12 /* current resource holder */
13 struct task_struct *owner;
14
15 /* highest-priority waiter */
16 struct task_struct *hp_waiter;
17
18 /* FIFO queue of waiting tasks -- for now. time stamp in the future. */
19 wait_queue_head_t wait;
20
21 /* we do some nesting within spinlocks, so we can't use the normal
22 sleeplocks found in wait_queue_head_t. */
23 raw_spinlock_t lock;
24};
25
26static inline struct rsm_mutex* rsm_mutex_from_lock(struct litmus_lock* lock)
27{
28 return container_of(lock, struct rsm_mutex, litmus_lock);
29}
30
31#ifdef CONFIG_LITMUS_DGL_SUPPORT
32int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t);
33int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node);
34void rsm_mutex_enable_priority(struct litmus_lock *l, dgl_wait_state_t* dgl_wait);
35#endif
36
37void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
38 struct task_struct* t,
39 raw_spinlock_t* to_unlock,
40 unsigned long irqflags);
41
42void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
43 struct task_struct* t,
44 raw_spinlock_t* to_unlock,
45 unsigned long irqflags);
46
47int rsm_mutex_lock(struct litmus_lock* l);
48int rsm_mutex_unlock(struct litmus_lock* l);
49int rsm_mutex_close(struct litmus_lock* l);
50void rsm_mutex_free(struct litmus_lock* l);
51struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops*);
52
53
54#endif \ No newline at end of file
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 21430623a940..02b750a9570b 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -5,6 +5,8 @@
5#ifndef _LINUX_RT_PARAM_H_ 5#ifndef _LINUX_RT_PARAM_H_
6#define _LINUX_RT_PARAM_H_ 6#define _LINUX_RT_PARAM_H_
7 7
8#include <litmus/fpmath.h>
9
8/* Litmus time type. */ 10/* Litmus time type. */
9typedef unsigned long long lt_t; 11typedef unsigned long long lt_t;
10 12
@@ -30,9 +32,15 @@ typedef enum {
30typedef enum { 32typedef enum {
31 NO_ENFORCEMENT, /* job may overrun unhindered */ 33 NO_ENFORCEMENT, /* job may overrun unhindered */
32 QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */ 34 QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */
33 PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */ 35 PRECISE_ENFORCEMENT, /* budgets are enforced with hrtimers */
34} budget_policy_t; 36} budget_policy_t;
35 37
38typedef enum {
39 NO_SIGNALS, /* job receives no signals when it exhausts its budget */
40 QUANTUM_SIGNALS, /* budget signals are only sent on quantum boundaries */
41 PRECISE_SIGNALS, /* budget signals are triggered with hrtimers */
42} budget_signal_policy_t;
43
36/* We use the common priority interpretation "lower index == higher priority", 44/* We use the common priority interpretation "lower index == higher priority",
37 * which is commonly used in fixed-priority schedulability analysis papers. 45 * which is commonly used in fixed-priority schedulability analysis papers.
38 * So, a numerically lower priority value implies higher scheduling priority, 46 * So, a numerically lower priority value implies higher scheduling priority,
@@ -62,6 +70,7 @@ struct rt_task {
62 unsigned int priority; 70 unsigned int priority;
63 task_class_t cls; 71 task_class_t cls;
64 budget_policy_t budget_policy; /* ignored by pfair */ 72 budget_policy_t budget_policy; /* ignored by pfair */
73 budget_signal_policy_t budget_signal_policy; /* currently ignored by pfair */
65}; 74};
66 75
67union np_flag { 76union np_flag {
@@ -74,6 +83,19 @@ union np_flag {
74 } np; 83 } np;
75}; 84};
76 85
86struct affinity_observer_args
87{
88 int lock_od;
89};
90
91struct gpu_affinity_observer_args
92{
93 struct affinity_observer_args obs;
94 int replica_to_gpu_offset;
95 int nr_simult_users;
96 int relaxed_rules;
97};
98
77/* The definition of the data that is shared between the kernel and real-time 99/* The definition of the data that is shared between the kernel and real-time
78 * tasks via a shared page (see litmus/ctrldev.c). 100 * tasks via a shared page (see litmus/ctrldev.c).
79 * 101 *
@@ -97,6 +119,9 @@ struct control_page {
97/* don't export internal data structures to user space (liblitmus) */ 119/* don't export internal data structures to user space (liblitmus) */
98#ifdef __KERNEL__ 120#ifdef __KERNEL__
99 121
122#include <litmus/binheap.h>
123#include <linux/semaphore.h>
124
100struct _rt_domain; 125struct _rt_domain;
101struct bheap_node; 126struct bheap_node;
102struct release_heap; 127struct release_heap;
@@ -110,6 +135,12 @@ struct rt_job {
110 /* How much service has this job received so far? */ 135 /* How much service has this job received so far? */
111 lt_t exec_time; 136 lt_t exec_time;
112 137
138 /* By how much did the prior job miss its deadline by?
139 * Value differs from tardiness in that lateness may
140 * be negative (when job finishes before its deadline).
141 */
142 long long lateness;
143
113 /* Which job is this. This is used to let user space 144 /* Which job is this. This is used to let user space
114 * specify which job to wait for, which is important if jobs 145 * specify which job to wait for, which is important if jobs
115 * overrun. If we just call sys_sleep_next_period() then we 146 * overrun. If we just call sys_sleep_next_period() then we
@@ -118,10 +149,54 @@ struct rt_job {
118 * Increase this sequence number when a job is released. 149 * Increase this sequence number when a job is released.
119 */ 150 */
120 unsigned int job_no; 151 unsigned int job_no;
152
153 /* bits:
154 * 0th: Set if a budget exhaustion signal has already been sent for
155 * the current job. */
156 unsigned long flags;
121}; 157};
122 158
159#define RT_JOB_SIG_BUDGET_SENT 0
160
123struct pfair_param; 161struct pfair_param;
124 162
163enum klitirqd_sem_status
164{
165 NEED_TO_REACQUIRE,
166 REACQUIRING,
167 NOT_HELD,
168 HELD
169};
170
171typedef enum gpu_migration_dist
172{
173 // TODO: Make this variable against NR_NVIDIA_GPUS
174 MIG_LOCAL = 0,
175 MIG_NEAR = 1,
176 MIG_MED = 2,
177 MIG_FAR = 3, // 8 GPUs in a binary tree hierarchy
178 MIG_NONE = 4,
179
180 MIG_LAST = MIG_NONE
181} gpu_migration_dist_t;
182
183typedef struct feedback_est{
184 fp_t est;
185 fp_t accum_err;
186} feedback_est_t;
187
188
189#define AVG_EST_WINDOW_SIZE 20
190
191typedef struct avg_est{
192 lt_t history[AVG_EST_WINDOW_SIZE];
193 uint16_t count;
194 uint16_t idx;
195 lt_t sum;
196 lt_t std;
197 lt_t avg;
198} avg_est_t;
199
125/* RT task parameters for scheduling extensions 200/* RT task parameters for scheduling extensions
126 * These parameters are inherited during clone and therefore must 201 * These parameters are inherited during clone and therefore must
127 * be explicitly set up before the task set is launched. 202 * be explicitly set up before the task set is launched.
@@ -136,6 +211,50 @@ struct rt_param {
136 /* is the task present? (true if it can be scheduled) */ 211 /* is the task present? (true if it can be scheduled) */
137 unsigned int present:1; 212 unsigned int present:1;
138 213
214#ifdef CONFIG_LITMUS_SOFTIRQD
215 /* proxy threads have minimum priority by default */
216 unsigned int is_proxy_thread:1;
217
218 /* pointer to klitirqd currently working on this
219 task_struct's behalf. only set by the task pointed
220 to by klitirqd.
221
222 ptr only valid if is_proxy_thread == 0
223 */
224 struct task_struct* cur_klitirqd;
225
226 /* Used to implement mutual execution exclusion between
227 * job and klitirqd execution. Job must always hold
228 * it's klitirqd_sem to execute. klitirqd instance
229 * must hold the semaphore before executing on behalf
230 * of a job.
231 */
232 struct mutex klitirqd_sem;
233
234 /* status of held klitirqd_sem, even if the held klitirqd_sem is from
235 another task (only proxy threads do this though).
236 */
237 atomic_t klitirqd_sem_stat;
238#endif
239
240#ifdef CONFIG_LITMUS_NVIDIA
241 /* number of top-half interrupts handled on behalf of current job */
242 atomic_t nv_int_count;
243 long unsigned int held_gpus; // bitmap of held GPUs.
244
245#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
246 avg_est_t gpu_migration_est[MIG_LAST+1];
247
248 gpu_migration_dist_t gpu_migration;
249 int last_gpu;
250
251 lt_t accum_gpu_time;
252 lt_t gpu_time_stamp;
253
254 unsigned int suspend_gpu_tracker_on_block:1;
255#endif
256#endif
257
139#ifdef CONFIG_LITMUS_LOCKING 258#ifdef CONFIG_LITMUS_LOCKING
140 /* Is the task being priority-boosted by a locking protocol? */ 259 /* Is the task being priority-boosted by a locking protocol? */
141 unsigned int priority_boosted:1; 260 unsigned int priority_boosted:1;
@@ -155,11 +274,20 @@ struct rt_param {
155 * could point to self if PI does not result in 274 * could point to self if PI does not result in
156 * an increased task priority. 275 * an increased task priority.
157 */ 276 */
158 struct task_struct* inh_task; 277 struct task_struct* inh_task;
278
279#ifdef CONFIG_LITMUS_NESTED_LOCKING
280 raw_spinlock_t hp_blocked_tasks_lock;
281 struct binheap hp_blocked_tasks;
282
283 /* pointer to lock upon which is currently blocked */
284 struct litmus_lock* blocked_lock;
285#endif
159 286
160 287
161 struct task_struct* hp_group; 288 struct task_struct* hp_group;
162 unsigned int is_slave:1; 289 unsigned int is_slave:1;
290 unsigned int has_slaves:1;
163 291
164 292
165#ifdef CONFIG_NP_SECTION 293#ifdef CONFIG_NP_SECTION
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 6e7cabdddae8..24a6858b4b0b 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -11,6 +11,12 @@
11#include <litmus/locking.h> 11#include <litmus/locking.h>
12#endif 12#endif
13 13
14#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
15#include <litmus/kexclu_affinity.h>
16#endif
17
18#include <linux/interrupt.h>
19
14/************************ setup/tear down ********************/ 20/************************ setup/tear down ********************/
15 21
16typedef long (*activate_plugin_t) (void); 22typedef long (*activate_plugin_t) (void);
@@ -29,7 +35,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
29 */ 35 */
30typedef void (*finish_switch_t)(struct task_struct *prev); 36typedef void (*finish_switch_t)(struct task_struct *prev);
31 37
32
33/********************* task state changes ********************/ 38/********************* task state changes ********************/
34 39
35/* Called to setup a new real-time task. 40/* Called to setup a new real-time task.
@@ -58,6 +63,47 @@ typedef void (*task_exit_t) (struct task_struct *);
58typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type, 63typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
59 void* __user config); 64 void* __user config);
60 65
66struct affinity_observer;
67typedef long (*allocate_affinity_observer_t) (
68 struct affinity_observer **aff_obs, int type,
69 void* __user config);
70
71typedef void (*increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
72typedef void (*decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
73typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
74 raw_spinlock_t *to_unlock, unsigned long irqflags);
75typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
76 raw_spinlock_t *to_unlock, unsigned long irqflags);
77
78typedef void (*increase_prio_klitirq_t)(struct task_struct* klitirqd,
79 struct task_struct* old_owner,
80 struct task_struct* new_owner);
81typedef void (*decrease_prio_klitirqd_t)(struct task_struct* klitirqd,
82 struct task_struct* old_owner);
83
84
85typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet);
86typedef void (*change_prio_pai_tasklet_t)(struct task_struct *old_prio,
87 struct task_struct *new_prio);
88typedef void (*run_tasklets_t)(struct task_struct* next);
89
90typedef raw_spinlock_t* (*get_dgl_spinlock_t) (struct task_struct *t);
91
92
93typedef int (*higher_prio_t)(struct task_struct* a, struct task_struct* b);
94
95#ifdef CONFIG_LITMUS_NESTED_LOCKING
96
97typedef enum
98{
99 BASE,
100 EFFECTIVE
101} comparison_mode_t;
102
103typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod,
104 struct task_struct* b, comparison_mode_t b_mod);
105#endif
106
61 107
62/********************* sys call backends ********************/ 108/********************* sys call backends ********************/
63/* This function causes the caller to sleep until the next release */ 109/* This function causes the caller to sleep until the next release */
@@ -88,14 +134,40 @@ struct sched_plugin {
88 /* task state changes */ 134 /* task state changes */
89 admit_task_t admit_task; 135 admit_task_t admit_task;
90 136
91 task_new_t task_new; 137 task_new_t task_new;
92 task_wake_up_t task_wake_up; 138 task_wake_up_t task_wake_up;
93 task_block_t task_block; 139 task_block_t task_block;
94 task_exit_t task_exit; 140 task_exit_t task_exit;
95 141
142 higher_prio_t compare;
143
96#ifdef CONFIG_LITMUS_LOCKING 144#ifdef CONFIG_LITMUS_LOCKING
97 /* locking protocols */ 145 /* locking protocols */
98 allocate_lock_t allocate_lock; 146 allocate_lock_t allocate_lock;
147 increase_prio_t increase_prio;
148 decrease_prio_t decrease_prio;
149#endif
150#ifdef CONFIG_LITMUS_NESTED_LOCKING
151 nested_increase_prio_t nested_increase_prio;
152 nested_decrease_prio_t nested_decrease_prio;
153 __higher_prio_t __compare;
154#endif
155#ifdef CONFIG_LITMUS_DGL_SUPPORT
156 get_dgl_spinlock_t get_dgl_spinlock;
157#endif
158
159#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
160 allocate_affinity_observer_t allocate_aff_obs;
161#endif
162
163#ifdef CONFIG_LITMUS_SOFTIRQD
164 increase_prio_klitirq_t increase_prio_klitirqd;
165 decrease_prio_klitirqd_t decrease_prio_klitirqd;
166#endif
167#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
168 enqueue_pai_tasklet_t enqueue_pai_tasklet;
169 change_prio_pai_tasklet_t change_prio_pai_tasklet;
170 run_tasklets_t run_tasklets;
99#endif 171#endif
100} __attribute__ ((__aligned__(SMP_CACHE_BYTES))); 172} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
101 173
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 82bde8241298..7af12f49c600 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -10,13 +10,14 @@ struct st_trace_header {
10 u8 type; /* Of what type is this record? */ 10 u8 type; /* Of what type is this record? */
11 u8 cpu; /* On which CPU was it recorded? */ 11 u8 cpu; /* On which CPU was it recorded? */
12 u16 pid; /* PID of the task. */ 12 u16 pid; /* PID of the task. */
13 u32 job; /* The job sequence number. */ 13 u32 job:24; /* The job sequence number. */
14}; 14 u8 extra;
15} __attribute__((packed));
15 16
16#define ST_NAME_LEN 16 17#define ST_NAME_LEN 16
17struct st_name_data { 18struct st_name_data {
18 char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ 19 char cmd[ST_NAME_LEN];/* The name of the executable of this process. */
19}; 20} __attribute__((packed));
20 21
21struct st_param_data { /* regular params */ 22struct st_param_data { /* regular params */
22 u32 wcet; 23 u32 wcet;
@@ -25,30 +26,29 @@ struct st_param_data { /* regular params */
25 u8 partition; 26 u8 partition;
26 u8 class; 27 u8 class;
27 u8 __unused[2]; 28 u8 __unused[2];
28}; 29} __attribute__((packed));
29 30
30struct st_release_data { /* A job is was/is going to be released. */ 31struct st_release_data { /* A job is was/is going to be released. */
31 u64 release; /* What's the release time? */ 32 u64 release; /* What's the release time? */
32 u64 deadline; /* By when must it finish? */ 33 u64 deadline; /* By when must it finish? */
33}; 34} __attribute__((packed));
34 35
35struct st_assigned_data { /* A job was asigned to a CPU. */ 36struct st_assigned_data { /* A job was asigned to a CPU. */
36 u64 when; 37 u64 when;
37 u8 target; /* Where should it execute? */ 38 u8 target; /* Where should it execute? */
38 u8 __unused[7]; 39 u8 __unused[7];
39}; 40} __attribute__((packed));
40 41
41struct st_switch_to_data { /* A process was switched to on a given CPU. */ 42struct st_switch_to_data { /* A process was switched to on a given CPU. */
42 u64 when; /* When did this occur? */ 43 u64 when; /* When did this occur? */
43 u32 exec_time; /* Time the current job has executed. */ 44 u32 exec_time; /* Time the current job has executed. */
44 u8 __unused[4]; 45 u8 __unused[4];
45 46} __attribute__((packed));
46};
47 47
48struct st_switch_away_data { /* A process was switched away from on a given CPU. */ 48struct st_switch_away_data { /* A process was switched away from on a given CPU. */
49 u64 when; 49 u64 when;
50 u64 exec_time; 50 u64 exec_time;
51}; 51} __attribute__((packed));
52 52
53struct st_completion_data { /* A job completed. */ 53struct st_completion_data { /* A job completed. */
54 u64 when; 54 u64 when;
@@ -56,35 +56,108 @@ struct st_completion_data { /* A job completed. */
56 * next task automatically; set to 0 otherwise. 56 * next task automatically; set to 0 otherwise.
57 */ 57 */
58 u8 __uflags:7; 58 u8 __uflags:7;
59 u8 __unused[7]; 59 u16 nv_int_count;
60}; 60 u8 __unused[5];
61} __attribute__((packed));
61 62
62struct st_block_data { /* A task blocks. */ 63struct st_block_data { /* A task blocks. */
63 u64 when; 64 u64 when;
64 u64 __unused; 65 u64 __unused;
65}; 66} __attribute__((packed));
66 67
67struct st_resume_data { /* A task resumes. */ 68struct st_resume_data { /* A task resumes. */
68 u64 when; 69 u64 when;
69 u64 __unused; 70 u64 __unused;
70}; 71} __attribute__((packed));
71 72
72struct st_action_data { 73struct st_action_data {
73 u64 when; 74 u64 when;
74 u8 action; 75 u8 action;
75 u8 __unused[7]; 76 u8 __unused[7];
76}; 77} __attribute__((packed));
77 78
78struct st_sys_release_data { 79struct st_sys_release_data {
79 u64 when; 80 u64 when;
80 u64 release; 81 u64 release;
81}; 82} __attribute__((packed));
83
84
85struct st_tasklet_release_data {
86 u64 when;
87 u64 __unused;
88} __attribute__((packed));
89
90struct st_tasklet_begin_data {
91 u64 when;
92 u16 exe_pid;
93 u8 __unused[6];
94} __attribute__((packed));
95
96struct st_tasklet_end_data {
97 u64 when;
98 u16 exe_pid;
99 u8 flushed;
100 u8 __unused[5];
101} __attribute__((packed));
102
103
104struct st_work_release_data {
105 u64 when;
106 u64 __unused;
107} __attribute__((packed));
108
109struct st_work_begin_data {
110 u64 when;
111 u16 exe_pid;
112 u8 __unused[6];
113} __attribute__((packed));
114
115struct st_work_end_data {
116 u64 when;
117 u16 exe_pid;
118 u8 flushed;
119 u8 __unused[5];
120} __attribute__((packed));
121
122struct st_effective_priority_change_data {
123 u64 when;
124 u16 inh_pid;
125 u8 __unused[6];
126} __attribute__((packed));
127
128struct st_nv_interrupt_begin_data {
129 u64 when;
130 u32 device;
131 u32 serialNumber;
132} __attribute__((packed));
133
134struct st_nv_interrupt_end_data {
135 u64 when;
136 u32 device;
137 u32 serialNumber;
138} __attribute__((packed));
139
140struct st_prediction_err_data {
141 u64 distance;
142 u64 rel_err;
143} __attribute__((packed));
144
145struct st_migration_data {
146 u64 observed;
147 u64 estimated;
148} __attribute__((packed));
149
150struct migration_info {
151 u64 observed;
152 u64 estimated;
153 u8 distance;
154} __attribute__((packed));
82 155
83#define DATA(x) struct st_ ## x ## _data x; 156#define DATA(x) struct st_ ## x ## _data x;
84 157
85typedef enum { 158typedef enum {
86 ST_NAME = 1, /* Start at one, so that we can spot 159 ST_NAME = 1, /* Start at one, so that we can spot
87 * uninitialized records. */ 160 * uninitialized records. */
88 ST_PARAM, 161 ST_PARAM,
89 ST_RELEASE, 162 ST_RELEASE,
90 ST_ASSIGNED, 163 ST_ASSIGNED,
@@ -94,7 +167,19 @@ typedef enum {
94 ST_BLOCK, 167 ST_BLOCK,
95 ST_RESUME, 168 ST_RESUME,
96 ST_ACTION, 169 ST_ACTION,
97 ST_SYS_RELEASE 170 ST_SYS_RELEASE,
171 ST_TASKLET_RELEASE,
172 ST_TASKLET_BEGIN,
173 ST_TASKLET_END,
174 ST_WORK_RELEASE,
175 ST_WORK_BEGIN,
176 ST_WORK_END,
177 ST_EFF_PRIO_CHANGE,
178 ST_NV_INTERRUPT_BEGIN,
179 ST_NV_INTERRUPT_END,
180
181 ST_PREDICTION_ERR,
182 ST_MIGRATION,
98} st_event_record_type_t; 183} st_event_record_type_t;
99 184
100struct st_event_record { 185struct st_event_record {
@@ -113,8 +198,20 @@ struct st_event_record {
113 DATA(resume); 198 DATA(resume);
114 DATA(action); 199 DATA(action);
115 DATA(sys_release); 200 DATA(sys_release);
201 DATA(tasklet_release);
202 DATA(tasklet_begin);
203 DATA(tasklet_end);
204 DATA(work_release);
205 DATA(work_begin);
206 DATA(work_end);
207 DATA(effective_priority_change);
208 DATA(nv_interrupt_begin);
209 DATA(nv_interrupt_end);
210
211 DATA(prediction_err);
212 DATA(migration);
116 } data; 213 } data;
117}; 214} __attribute__((packed));
118 215
119#undef DATA 216#undef DATA
120 217
@@ -129,6 +226,8 @@ struct st_event_record {
129 ft_event1(id, callback, task) 226 ft_event1(id, callback, task)
130#define SCHED_TRACE2(id, callback, task, xtra) \ 227#define SCHED_TRACE2(id, callback, task, xtra) \
131 ft_event2(id, callback, task, xtra) 228 ft_event2(id, callback, task, xtra)
229#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \
230 ft_event3(id, callback, task, xtra1, xtra2)
132 231
133/* provide prototypes; needed on sparc64 */ 232/* provide prototypes; needed on sparc64 */
134#ifndef NO_TASK_TRACE_DECLS 233#ifndef NO_TASK_TRACE_DECLS
@@ -155,12 +254,58 @@ feather_callback void do_sched_trace_action(unsigned long id,
155feather_callback void do_sched_trace_sys_release(unsigned long id, 254feather_callback void do_sched_trace_sys_release(unsigned long id,
156 lt_t* start); 255 lt_t* start);
157 256
257
258feather_callback void do_sched_trace_tasklet_release(unsigned long id,
259 struct task_struct* owner);
260feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
261 struct task_struct* owner);
262feather_callback void do_sched_trace_tasklet_end(unsigned long id,
263 struct task_struct* owner,
264 unsigned long flushed);
265
266feather_callback void do_sched_trace_work_release(unsigned long id,
267 struct task_struct* owner);
268feather_callback void do_sched_trace_work_begin(unsigned long id,
269 struct task_struct* owner,
270 struct task_struct* exe);
271feather_callback void do_sched_trace_work_end(unsigned long id,
272 struct task_struct* owner,
273 struct task_struct* exe,
274 unsigned long flushed);
275
276feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
277 struct task_struct* task,
278 struct task_struct* inh);
279
280feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
281 u32 device);
282feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
283 unsigned long unused);
284
285feather_callback void do_sched_trace_prediction_err(unsigned long id,
286 struct task_struct* task,
287 gpu_migration_dist_t* distance,
288 fp_t* rel_err);
289
290
291
292
293
294feather_callback void do_sched_trace_migration(unsigned long id,
295 struct task_struct* task,
296 struct migration_info* mig_info);
297
298
299/* returns true if we're tracing an interrupt on current CPU */
300/* int is_interrupt_tracing_active(void); */
301
158#endif 302#endif
159 303
160#else 304#else
161 305
162#define SCHED_TRACE(id, callback, task) /* no tracing */ 306#define SCHED_TRACE(id, callback, task) /* no tracing */
163#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ 307#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
308#define SCHED_TRACE3(id, callback, task, xtra1, xtra2)
164 309
165#endif 310#endif
166 311
@@ -252,6 +397,41 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
252 trace_litmus_sys_release(when); \ 397 trace_litmus_sys_release(when); \
253 } while (0) 398 } while (0)
254 399
400#define sched_trace_tasklet_release(t) \
401 SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t)
402
403#define sched_trace_tasklet_begin(t) \
404 SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t)
405
406#define sched_trace_tasklet_end(t, flushed) \
407 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed)
408
409
410#define sched_trace_work_release(t) \
411 SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t)
412
413#define sched_trace_work_begin(t, e) \
414 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e)
415
416#define sched_trace_work_end(t, e, flushed) \
417 SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed)
418
419
420#define sched_trace_eff_prio_change(t, inh) \
421 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh)
422
423
424#define sched_trace_nv_interrupt_begin(d) \
425 SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d)
426#define sched_trace_nv_interrupt_end(d) \
427 SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
428
429#define sched_trace_prediction_err(t, dist, rel_err) \
430 SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err)
431
432#define sched_trace_migration(t, mig_info) \
433 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info)
434
255#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ 435#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
256 436
257#endif /* __KERNEL__ */ 437#endif /* __KERNEL__ */
diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h
new file mode 100644
index 000000000000..e70e45e4cf51
--- /dev/null
+++ b/include/litmus/sched_trace_external.h
@@ -0,0 +1,78 @@
1/*
2 * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
3 */
4#ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_
5#define _LINUX_SCHED_TRACE_EXTERNAL_H_
6
7
8#ifdef CONFIG_SCHED_TASK_TRACE
9extern void __sched_trace_tasklet_begin_external(struct task_struct* t);
10static inline void sched_trace_tasklet_begin_external(struct task_struct* t)
11{
12 __sched_trace_tasklet_begin_external(t);
13}
14
15extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed);
16static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
17{
18 __sched_trace_tasklet_end_external(t, flushed);
19}
20
21extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e);
22static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
23{
24 __sched_trace_work_begin_external(t, e);
25}
26
27extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f);
28static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
29{
30 __sched_trace_work_end_external(t, e, f);
31}
32
33#ifdef CONFIG_LITMUS_NVIDIA
34extern void __sched_trace_nv_interrupt_begin_external(u32 device);
35static inline void sched_trace_nv_interrupt_begin_external(u32 device)
36{
37 __sched_trace_nv_interrupt_begin_external(device);
38}
39
40extern void __sched_trace_nv_interrupt_end_external(u32 device);
41static inline void sched_trace_nv_interrupt_end_external(u32 device)
42{
43 __sched_trace_nv_interrupt_end_external(device);
44}
45#endif
46
47#else
48
49// no tracing.
50static inline void sched_trace_tasklet_begin_external(struct task_struct* t){}
51static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed){}
52static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e){}
53static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f){}
54
55#ifdef CONFIG_LITMUS_NVIDIA
56static inline void sched_trace_nv_interrupt_begin_external(u32 device){}
57static inline void sched_trace_nv_interrupt_end_external(u32 device){}
58#endif
59
60#endif
61
62
63#ifdef CONFIG_LITMUS_NVIDIA
64
65#define EX_TS(evt) \
66extern void __##evt(void); \
67static inline void EX_##evt(void) { __##evt(); }
68
69EX_TS(TS_NV_TOPISR_START)
70EX_TS(TS_NV_TOPISR_END)
71EX_TS(TS_NV_BOTISR_START)
72EX_TS(TS_NV_BOTISR_END)
73EX_TS(TS_NV_RELEASE_BOTISR_START)
74EX_TS(TS_NV_RELEASE_BOTISR_END)
75
76#endif
77
78#endif
diff --git a/include/litmus/signal.h b/include/litmus/signal.h
new file mode 100644
index 000000000000..b3d82b294984
--- /dev/null
+++ b/include/litmus/signal.h
@@ -0,0 +1,47 @@
1#ifndef LITMUS_SIGNAL_H
2#define LITMUS_SIGNAL_H
3
4#ifdef __KERNEL__
5#include <linux/signal.h>
6#else
7#include <signal.h>
8#endif
9
10/* Signals used by Litmus to asynchronously communicate events
11 * to real-time tasks.
12 *
13 * Signal values overlap with [SIGRTMIN, SIGRTMAX], so beware of
14 * application-level conflicts when dealing with COTS user-level
15 * code.
16 */
17
18/* Sent to a Litmus task when all of the following conditions are true:
19 * (1) The task has exhausted its budget.
20 * (2) budget_signal_policy is QUANTUM_SIGNALS or PRECISE_SIGNALS.
21 *
22 * Note: If a task does not have a registered handler for SIG_BUDGET,
23 * the signal will cause the task to terminate (default action).
24 */
25
26/* Assigned values start at SIGRTMAX and decrease, hopefully reducing
27 * likelihood of user-level conflicts.
28 */
29#define SIG_BUDGET (SIGRTMAX - 0)
30
31/*
32Future signals could include:
33
34#define SIG_DEADLINE_MISS (SIGRTMAX - 1)
35#define SIG_CRIT_LEVEL_CHANGE (SIGRTMAX - 2)
36*/
37
38#define SIGLITMUSMIN SIG_BUDGET
39
40#ifdef __KERNEL__
41#if (SIGLITMUSMIN < SIGRTMIN)
42/* no compile-time check in user-space since SIGRTMIN may be a variable. */
43#error "Too many LITMUS^RT signals!"
44#endif
45#endif
46
47#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index e809376d6487..e078aee4234d 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -103,14 +103,46 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
103#define TS_LOCK_START TIMESTAMP(170) 103#define TS_LOCK_START TIMESTAMP(170)
104#define TS_LOCK_SUSPEND TIMESTAMP(171) 104#define TS_LOCK_SUSPEND TIMESTAMP(171)
105#define TS_LOCK_RESUME TIMESTAMP(172) 105#define TS_LOCK_RESUME TIMESTAMP(172)
106#define TS_LOCK_END TIMESTAMP(173) 106#define TS_LOCK_END TIMESTAMP(173)
107
108#ifdef CONFIG_LITMUS_DGL_SUPPORT
109#define TS_DGL_LOCK_START TIMESTAMP(175)
110#define TS_DGL_LOCK_SUSPEND TIMESTAMP(176)
111#define TS_DGL_LOCK_RESUME TIMESTAMP(177)
112#define TS_DGL_LOCK_END TIMESTAMP(178)
113#endif
107 114
108#define TS_UNLOCK_START TIMESTAMP(180) 115#define TS_UNLOCK_START TIMESTAMP(180)
109#define TS_UNLOCK_END TIMESTAMP(181) 116#define TS_UNLOCK_END TIMESTAMP(181)
110 117
118#ifdef CONFIG_LITMUS_DGL_SUPPORT
119#define TS_DGL_UNLOCK_START TIMESTAMP(185)
120#define TS_DGL_UNLOCK_END TIMESTAMP(186)
121#endif
122
111#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) 123#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c)
112#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) 124#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN)
113 125
114#define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when)) 126#define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when))
115 127
128
129#ifdef CONFIG_LITMUS_NVIDIA
130
131#define TS_NV_TOPISR_START TIMESTAMP(200)
132#define TS_NV_TOPISR_END TIMESTAMP(201)
133
134#define TS_NV_BOTISR_START TIMESTAMP(202)
135#define TS_NV_BOTISR_END TIMESTAMP(203)
136
137#define TS_NV_RELEASE_BOTISR_START TIMESTAMP(204)
138#define TS_NV_RELEASE_BOTISR_END TIMESTAMP(205)
139
140#endif
141
142#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
143#define TS_NV_SCHED_BOTISR_START TIMESTAMP(206)
144#define TS_NV_SCHED_BOTISR_END TIMESTAMP(207)
145#endif
146
147
116#endif /* !_SYS_TRACE_H_ */ 148#endif /* !_SYS_TRACE_H_ */
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index bcb8f1183b4f..4fd92956d13f 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -17,7 +17,10 @@
17#define __NR_wait_for_ts_release __LSC(9) 17#define __NR_wait_for_ts_release __LSC(9)
18#define __NR_release_ts __LSC(10) 18#define __NR_release_ts __LSC(10)
19#define __NR_null_call __LSC(11) 19#define __NR_null_call __LSC(11)
20#define __NR_litmus_dgl_lock __LSC(12)
21#define __NR_litmus_dgl_unlock __LSC(13)
22#define __NR_register_nv_device __LSC(14)
20 23
21#define __NR_slave_non_rt_threads _LSC(12) 24#define __NR_slave_non_rt_threads _LSC(15)
22 25
23#define NR_litmus_syscalls 13 26#define NR_litmus_syscalls 16
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index 5f56d5947343..abb45c181e8e 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -29,8 +29,14 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
29__SYSCALL(__NR_release_ts, sys_release_ts) 29__SYSCALL(__NR_release_ts, sys_release_ts)
30#define __NR_null_call __LSC(11) 30#define __NR_null_call __LSC(11)
31__SYSCALL(__NR_null_call, sys_null_call) 31__SYSCALL(__NR_null_call, sys_null_call)
32#define __NR_litmus_dgl_lock __LSC(12)
33__SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock)
34#define __NR_litmus_dgl_unlock __LSC(13)
35__SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock)
36#define __NR_register_nv_device __LSC(14)
37__SYSCALL(__NR_register_nv_device, sys_register_nv_device)
32 38
33#define __NR_slave_non_rt_threads __LSC(12) 39#define __NR_slave_non_rt_threads __LSC(15)
34__SYSCALL(__NR_slave_non_rt_threads, sys_slave_non_rt_threads) 40__SYSCALL(__NR_slave_non_rt_threads, sys_slave_non_rt_threads)
35 41
36#define NR_litmus_syscalls 13 42#define NR_litmus_syscalls 16
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 298c9276dfdb..2bdcdc3691e5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -542,7 +542,7 @@ static void print_lock(struct held_lock *hlock)
542 print_ip_sym(hlock->acquire_ip); 542 print_ip_sym(hlock->acquire_ip);
543} 543}
544 544
545static void lockdep_print_held_locks(struct task_struct *curr) 545void lockdep_print_held_locks(struct task_struct *curr)
546{ 546{
547 int i, depth = curr->lockdep_depth; 547 int i, depth = curr->lockdep_depth;
548 548
@@ -558,6 +558,7 @@ static void lockdep_print_held_locks(struct task_struct *curr)
558 print_lock(curr->held_locks + i); 558 print_lock(curr->held_locks + i);
559 } 559 }
560} 560}
561EXPORT_SYMBOL(lockdep_print_held_locks);
561 562
562static void print_kernel_version(void) 563static void print_kernel_version(void)
563{ 564{
@@ -583,6 +584,10 @@ static int static_obj(void *obj)
583 end = (unsigned long) &_end, 584 end = (unsigned long) &_end,
584 addr = (unsigned long) obj; 585 addr = (unsigned long) obj;
585 586
587 // GLENN
588 return 1;
589
590
586 /* 591 /*
587 * static variable? 592 * static variable?
588 */ 593 */
diff --git a/kernel/mutex.c b/kernel/mutex.c
index d607ed5dd441..2f363b9bfc1f 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -498,3 +498,128 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
498 return 1; 498 return 1;
499} 499}
500EXPORT_SYMBOL(atomic_dec_and_mutex_lock); 500EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
501
502
503
504
505void mutex_lock_sfx(struct mutex *lock,
506 side_effect_t pre, unsigned long pre_arg,
507 side_effect_t post, unsigned long post_arg)
508{
509 long state = TASK_UNINTERRUPTIBLE;
510
511 struct task_struct *task = current;
512 struct mutex_waiter waiter;
513 unsigned long flags;
514
515 preempt_disable();
516 mutex_acquire(&lock->dep_map, subclass, 0, ip);
517
518 spin_lock_mutex(&lock->wait_lock, flags);
519
520 if(pre)
521 {
522 if(unlikely(pre(pre_arg)))
523 {
524 // this will fuck with lockdep's CONFIG_PROVE_LOCKING...
525 spin_unlock_mutex(&lock->wait_lock, flags);
526 preempt_enable();
527 return;
528 }
529 }
530
531 debug_mutex_lock_common(lock, &waiter);
532 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
533
534 /* add waiting tasks to the end of the waitqueue (FIFO): */
535 list_add_tail(&waiter.list, &lock->wait_list);
536 waiter.task = task;
537
538 if (atomic_xchg(&lock->count, -1) == 1)
539 goto done;
540
541 lock_contended(&lock->dep_map, ip);
542
543 for (;;) {
544 /*
545 * Lets try to take the lock again - this is needed even if
546 * we get here for the first time (shortly after failing to
547 * acquire the lock), to make sure that we get a wakeup once
548 * it's unlocked. Later on, if we sleep, this is the
549 * operation that gives us the lock. We xchg it to -1, so
550 * that when we release the lock, we properly wake up the
551 * other waiters:
552 */
553 if (atomic_xchg(&lock->count, -1) == 1)
554 break;
555
556 __set_task_state(task, state);
557
558 /* didnt get the lock, go to sleep: */
559 spin_unlock_mutex(&lock->wait_lock, flags);
560 preempt_enable_no_resched();
561 schedule();
562 preempt_disable();
563 spin_lock_mutex(&lock->wait_lock, flags);
564 }
565
566done:
567 lock_acquired(&lock->dep_map, ip);
568 /* got the lock - rejoice! */
569 mutex_remove_waiter(lock, &waiter, current_thread_info());
570 mutex_set_owner(lock);
571
572 /* set it to 0 if there are no waiters left: */
573 if (likely(list_empty(&lock->wait_list)))
574 atomic_set(&lock->count, 0);
575
576 if(post)
577 post(post_arg);
578
579 spin_unlock_mutex(&lock->wait_lock, flags);
580
581 debug_mutex_free_waiter(&waiter);
582 preempt_enable();
583}
584EXPORT_SYMBOL(mutex_lock_sfx);
585
586void mutex_unlock_sfx(struct mutex *lock,
587 side_effect_t pre, unsigned long pre_arg,
588 side_effect_t post, unsigned long post_arg)
589{
590 unsigned long flags;
591
592 spin_lock_mutex(&lock->wait_lock, flags);
593
594 if(pre)
595 pre(pre_arg);
596
597 //mutex_release(&lock->dep_map, nested, _RET_IP_);
598 mutex_release(&lock->dep_map, 1, _RET_IP_);
599 debug_mutex_unlock(lock);
600
601 /*
602 * some architectures leave the lock unlocked in the fastpath failure
603 * case, others need to leave it locked. In the later case we have to
604 * unlock it here
605 */
606 if (__mutex_slowpath_needs_to_unlock())
607 atomic_set(&lock->count, 1);
608
609 if (!list_empty(&lock->wait_list)) {
610 /* get the first entry from the wait-list: */
611 struct mutex_waiter *waiter =
612 list_entry(lock->wait_list.next,
613 struct mutex_waiter, list);
614
615 debug_mutex_wake_waiter(lock, waiter);
616
617 wake_up_process(waiter->task);
618 }
619
620 if(post)
621 post(post_arg);
622
623 spin_unlock_mutex(&lock->wait_lock, flags);
624}
625EXPORT_SYMBOL(mutex_unlock_sfx);
diff --git a/kernel/sched.c b/kernel/sched.c
index 2229d0deec4b..9e8d8698323b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -86,6 +86,10 @@
86#include <litmus/sched_trace.h> 86#include <litmus/sched_trace.h>
87#include <litmus/trace.h> 87#include <litmus/trace.h>
88 88
89#ifdef CONFIG_LITMUS_SOFTIRQD
90#include <litmus/litmus_softirq.h>
91#endif
92
89static void litmus_tick(struct rq*, struct task_struct*); 93static void litmus_tick(struct rq*, struct task_struct*);
90 94
91/* 95/*
@@ -4305,6 +4309,7 @@ pick_next_task(struct rq *rq)
4305 BUG(); /* the idle class will always have a runnable task */ 4309 BUG(); /* the idle class will always have a runnable task */
4306} 4310}
4307 4311
4312
4308/* 4313/*
4309 * schedule() is the main scheduler function. 4314 * schedule() is the main scheduler function.
4310 */ 4315 */
@@ -4323,6 +4328,10 @@ need_resched:
4323 rcu_note_context_switch(cpu); 4328 rcu_note_context_switch(cpu);
4324 prev = rq->curr; 4329 prev = rq->curr;
4325 4330
4331#ifdef CONFIG_LITMUS_SOFTIRQD
4332 release_klitirqd_lock(prev);
4333#endif
4334
4326 /* LITMUS^RT: quickly re-evaluate the scheduling decision 4335 /* LITMUS^RT: quickly re-evaluate the scheduling decision
4327 * if the previous one is no longer valid after CTX. 4336 * if the previous one is no longer valid after CTX.
4328 */ 4337 */
@@ -4411,13 +4420,24 @@ litmus_need_resched_nonpreemptible:
4411 goto litmus_need_resched_nonpreemptible; 4420 goto litmus_need_resched_nonpreemptible;
4412 4421
4413 preempt_enable_no_resched(); 4422 preempt_enable_no_resched();
4423
4414 if (need_resched()) 4424 if (need_resched())
4415 goto need_resched; 4425 goto need_resched;
4416 4426
4427#ifdef LITMUS_SOFTIRQD
4428 reacquire_klitirqd_lock(prev);
4429#endif
4430
4431#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
4432 litmus->run_tasklets(prev);
4433#endif
4434
4417 srp_ceiling_block(); 4435 srp_ceiling_block();
4418} 4436}
4419EXPORT_SYMBOL(schedule); 4437EXPORT_SYMBOL(schedule);
4420 4438
4439
4440
4421#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 4441#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
4422 4442
4423static inline bool owner_running(struct mutex *lock, struct task_struct *owner) 4443static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
@@ -4561,6 +4581,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
4561 } 4581 }
4562} 4582}
4563 4583
4584
4564/** 4585/**
4565 * __wake_up - wake up threads blocked on a waitqueue. 4586 * __wake_up - wake up threads blocked on a waitqueue.
4566 * @q: the waitqueue 4587 * @q: the waitqueue
@@ -4747,6 +4768,12 @@ void __sched wait_for_completion(struct completion *x)
4747} 4768}
4748EXPORT_SYMBOL(wait_for_completion); 4769EXPORT_SYMBOL(wait_for_completion);
4749 4770
4771void __sched __wait_for_completion_locked(struct completion *x)
4772{
4773 do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
4774}
4775EXPORT_SYMBOL(__wait_for_completion_locked);
4776
4750/** 4777/**
4751 * wait_for_completion_timeout: - waits for completion of a task (w/timeout) 4778 * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
4752 * @x: holds the state of this particular completion 4779 * @x: holds the state of this particular completion
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 94a62c0d4ade..c947a046a6d7 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -33,11 +33,11 @@
33#include <linux/spinlock.h> 33#include <linux/spinlock.h>
34#include <linux/ftrace.h> 34#include <linux/ftrace.h>
35 35
36static noinline void __down(struct semaphore *sem); 36noinline void __down(struct semaphore *sem);
37static noinline int __down_interruptible(struct semaphore *sem); 37static noinline int __down_interruptible(struct semaphore *sem);
38static noinline int __down_killable(struct semaphore *sem); 38static noinline int __down_killable(struct semaphore *sem);
39static noinline int __down_timeout(struct semaphore *sem, long jiffies); 39static noinline int __down_timeout(struct semaphore *sem, long jiffies);
40static noinline void __up(struct semaphore *sem); 40noinline void __up(struct semaphore *sem);
41 41
42/** 42/**
43 * down - acquire the semaphore 43 * down - acquire the semaphore
@@ -190,11 +190,13 @@ EXPORT_SYMBOL(up);
190 190
191/* Functions for the contended case */ 191/* Functions for the contended case */
192 192
193/*
193struct semaphore_waiter { 194struct semaphore_waiter {
194 struct list_head list; 195 struct list_head list;
195 struct task_struct *task; 196 struct task_struct *task;
196 int up; 197 int up;
197}; 198};
199 */
198 200
199/* 201/*
200 * Because this function is inlined, the 'state' parameter will be 202 * Because this function is inlined, the 'state' parameter will be
@@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
233 return -EINTR; 235 return -EINTR;
234} 236}
235 237
236static noinline void __sched __down(struct semaphore *sem) 238noinline void __sched __down(struct semaphore *sem)
237{ 239{
238 __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); 240 __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
239} 241}
242EXPORT_SYMBOL(__down);
243
240 244
241static noinline int __sched __down_interruptible(struct semaphore *sem) 245static noinline int __sched __down_interruptible(struct semaphore *sem)
242{ 246{
@@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
253 return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); 257 return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
254} 258}
255 259
256static noinline void __sched __up(struct semaphore *sem) 260noinline void __sched __up(struct semaphore *sem)
257{ 261{
258 struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, 262 struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
259 struct semaphore_waiter, list); 263 struct semaphore_waiter, list);
@@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem)
261 waiter->up = 1; 265 waiter->up = 1;
262 wake_up_process(waiter->task); 266 wake_up_process(waiter->task);
263} 267}
268EXPORT_SYMBOL(__up); \ No newline at end of file
diff --git a/kernel/softirq.c b/kernel/softirq.c
index fca82c32042b..b013046e8c36 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -29,6 +29,15 @@
29#include <trace/events/irq.h> 29#include <trace/events/irq.h>
30 30
31#include <asm/irq.h> 31#include <asm/irq.h>
32
33#include <litmus/litmus.h>
34#include <litmus/sched_trace.h>
35
36#ifdef CONFIG_LITMUS_NVIDIA
37#include <litmus/nvidia_info.h>
38#include <litmus/trace.h>
39#endif
40
32/* 41/*
33 - No shared variables, all the data are CPU local. 42 - No shared variables, all the data are CPU local.
34 - If a softirq needs serialization, let it serialize itself 43 - If a softirq needs serialization, let it serialize itself
@@ -67,7 +76,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
67 * to the pending events, so lets the scheduler to balance 76 * to the pending events, so lets the scheduler to balance
68 * the softirq load for us. 77 * the softirq load for us.
69 */ 78 */
70static void wakeup_softirqd(void) 79void wakeup_softirqd(void)
71{ 80{
72 /* Interrupts are disabled: no need to stop preemption */ 81 /* Interrupts are disabled: no need to stop preemption */
73 struct task_struct *tsk = __this_cpu_read(ksoftirqd); 82 struct task_struct *tsk = __this_cpu_read(ksoftirqd);
@@ -193,6 +202,7 @@ void local_bh_enable_ip(unsigned long ip)
193} 202}
194EXPORT_SYMBOL(local_bh_enable_ip); 203EXPORT_SYMBOL(local_bh_enable_ip);
195 204
205
196/* 206/*
197 * We restart softirq processing MAX_SOFTIRQ_RESTART times, 207 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
198 * and we fall back to softirqd after that. 208 * and we fall back to softirqd after that.
@@ -206,65 +216,65 @@ EXPORT_SYMBOL(local_bh_enable_ip);
206 216
207asmlinkage void __do_softirq(void) 217asmlinkage void __do_softirq(void)
208{ 218{
209 struct softirq_action *h; 219 struct softirq_action *h;
210 __u32 pending; 220 __u32 pending;
211 int max_restart = MAX_SOFTIRQ_RESTART; 221 int max_restart = MAX_SOFTIRQ_RESTART;
212 int cpu; 222 int cpu;
213 223
214 pending = local_softirq_pending(); 224 pending = local_softirq_pending();
215 account_system_vtime(current); 225 account_system_vtime(current);
216 226
217 __local_bh_disable((unsigned long)__builtin_return_address(0), 227 __local_bh_disable((unsigned long)__builtin_return_address(0),
218 SOFTIRQ_OFFSET); 228 SOFTIRQ_OFFSET);
219 lockdep_softirq_enter(); 229 lockdep_softirq_enter();
220 230
221 cpu = smp_processor_id(); 231 cpu = smp_processor_id();
222restart: 232restart:
223 /* Reset the pending bitmask before enabling irqs */ 233 /* Reset the pending bitmask before enabling irqs */
224 set_softirq_pending(0); 234 set_softirq_pending(0);
225 235
226 local_irq_enable(); 236 local_irq_enable();
227 237
228 h = softirq_vec; 238 h = softirq_vec;
229
230 do {
231 if (pending & 1) {
232 unsigned int vec_nr = h - softirq_vec;
233 int prev_count = preempt_count();
234
235 kstat_incr_softirqs_this_cpu(vec_nr);
236
237 trace_softirq_entry(vec_nr);
238 h->action(h);
239 trace_softirq_exit(vec_nr);
240 if (unlikely(prev_count != preempt_count())) {
241 printk(KERN_ERR "huh, entered softirq %u %s %p"
242 "with preempt_count %08x,"
243 " exited with %08x?\n", vec_nr,
244 softirq_to_name[vec_nr], h->action,
245 prev_count, preempt_count());
246 preempt_count() = prev_count;
247 }
248 239
249 rcu_bh_qs(cpu); 240 do {
250 } 241 if (pending & 1) {
251 h++; 242 unsigned int vec_nr = h - softirq_vec;
252 pending >>= 1; 243 int prev_count = preempt_count();
253 } while (pending);
254 244
255 local_irq_disable(); 245 kstat_incr_softirqs_this_cpu(vec_nr);
256 246
257 pending = local_softirq_pending(); 247 trace_softirq_entry(vec_nr);
258 if (pending && --max_restart) 248 h->action(h);
259 goto restart; 249 trace_softirq_exit(vec_nr);
250 if (unlikely(prev_count != preempt_count())) {
251 printk(KERN_ERR "huh, entered softirq %u %s %p"
252 "with preempt_count %08x,"
253 " exited with %08x?\n", vec_nr,
254 softirq_to_name[vec_nr], h->action,
255 prev_count, preempt_count());
256 preempt_count() = prev_count;
257 }
260 258
261 if (pending) 259 rcu_bh_qs(cpu);
262 wakeup_softirqd(); 260 }
261 h++;
262 pending >>= 1;
263 } while (pending);
263 264
264 lockdep_softirq_exit(); 265 local_irq_disable();
265 266
266 account_system_vtime(current); 267 pending = local_softirq_pending();
267 __local_bh_enable(SOFTIRQ_OFFSET); 268 if (pending && --max_restart)
269 goto restart;
270
271 if (pending)
272 wakeup_softirqd();
273
274 lockdep_softirq_exit();
275
276 account_system_vtime(current);
277 __local_bh_enable(SOFTIRQ_OFFSET);
268} 278}
269 279
270#ifndef __ARCH_HAS_DO_SOFTIRQ 280#ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -402,8 +412,99 @@ struct tasklet_head
402static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); 412static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
403static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); 413static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
404 414
415#ifdef CONFIG_LITMUS_NVIDIA
416static int __do_nv_now(struct tasklet_struct* tasklet)
417{
418 int success = 1;
419
420 if(tasklet_trylock(tasklet)) {
421 if (!atomic_read(&tasklet->count)) {
422 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) {
423 BUG();
424 }
425 tasklet->func(tasklet->data);
426 tasklet_unlock(tasklet);
427 }
428 else {
429 success = 0;
430 }
431
432 tasklet_unlock(tasklet);
433 }
434 else {
435 success = 0;
436 }
437
438 return success;
439}
440#endif
441
442
405void __tasklet_schedule(struct tasklet_struct *t) 443void __tasklet_schedule(struct tasklet_struct *t)
406{ 444{
445#ifdef CONFIG_LITMUS_NVIDIA
446 if(is_nvidia_func(t->func))
447 {
448#if 1
449 // do nvidia tasklets right away and return
450 if(__do_nv_now(t))
451 return;
452#else
453 u32 nvidia_device = get_tasklet_nv_device_num(t);
454 // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
455 // __FUNCTION__, nvidia_device,litmus_clock());
456
457 unsigned long flags;
458 struct task_struct* device_owner;
459
460 lock_nv_registry(nvidia_device, &flags);
461
462 device_owner = get_nv_max_device_owner(nvidia_device);
463
464 if(device_owner==NULL)
465 {
466 t->owner = NULL;
467 }
468 else
469 {
470 if(is_realtime(device_owner))
471 {
472 TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
473 __FUNCTION__, nvidia_device,litmus_clock());
474 TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
475 __FUNCTION__,device_owner->pid,nvidia_device);
476
477 t->owner = device_owner;
478 sched_trace_tasklet_release(t->owner);
479
480 if(likely(_litmus_tasklet_schedule(t,nvidia_device)))
481 {
482 unlock_nv_registry(nvidia_device, &flags);
483 return;
484 }
485 else
486 {
487 t->owner = NULL; /* fall through to normal scheduling */
488 }
489 }
490 else
491 {
492 t->owner = NULL;
493 }
494 }
495 unlock_nv_registry(nvidia_device, &flags);
496#endif
497 }
498
499#endif
500
501 ___tasklet_schedule(t);
502}
503EXPORT_SYMBOL(__tasklet_schedule);
504
505
506void ___tasklet_schedule(struct tasklet_struct *t)
507{
407 unsigned long flags; 508 unsigned long flags;
408 509
409 local_irq_save(flags); 510 local_irq_save(flags);
@@ -413,11 +514,65 @@ void __tasklet_schedule(struct tasklet_struct *t)
413 raise_softirq_irqoff(TASKLET_SOFTIRQ); 514 raise_softirq_irqoff(TASKLET_SOFTIRQ);
414 local_irq_restore(flags); 515 local_irq_restore(flags);
415} 516}
517EXPORT_SYMBOL(___tasklet_schedule);
416 518
417EXPORT_SYMBOL(__tasklet_schedule);
418 519
419void __tasklet_hi_schedule(struct tasklet_struct *t) 520void __tasklet_hi_schedule(struct tasklet_struct *t)
420{ 521{
522#ifdef CONFIG_LITMUS_NVIDIA
523 if(is_nvidia_func(t->func))
524 {
525 u32 nvidia_device = get_tasklet_nv_device_num(t);
526 // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
527 // __FUNCTION__, nvidia_device,litmus_clock());
528
529 unsigned long flags;
530 struct task_struct* device_owner;
531
532 lock_nv_registry(nvidia_device, &flags);
533
534 device_owner = get_nv_max_device_owner(nvidia_device);
535
536 if(device_owner==NULL)
537 {
538 t->owner = NULL;
539 }
540 else
541 {
542 if( is_realtime(device_owner))
543 {
544 TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n",
545 __FUNCTION__, nvidia_device,litmus_clock());
546 TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
547 __FUNCTION__,device_owner->pid,nvidia_device);
548
549 t->owner = device_owner;
550 sched_trace_tasklet_release(t->owner);
551 if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device)))
552 {
553 unlock_nv_registry(nvidia_device, &flags);
554 return;
555 }
556 else
557 {
558 t->owner = NULL; /* fall through to normal scheduling */
559 }
560 }
561 else
562 {
563 t->owner = NULL;
564 }
565 }
566 unlock_nv_registry(nvidia_device, &flags);
567 }
568#endif
569
570 ___tasklet_hi_schedule(t);
571}
572EXPORT_SYMBOL(__tasklet_hi_schedule);
573
574void ___tasklet_hi_schedule(struct tasklet_struct* t)
575{
421 unsigned long flags; 576 unsigned long flags;
422 577
423 local_irq_save(flags); 578 local_irq_save(flags);
@@ -427,19 +582,72 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
427 raise_softirq_irqoff(HI_SOFTIRQ); 582 raise_softirq_irqoff(HI_SOFTIRQ);
428 local_irq_restore(flags); 583 local_irq_restore(flags);
429} 584}
430 585EXPORT_SYMBOL(___tasklet_hi_schedule);
431EXPORT_SYMBOL(__tasklet_hi_schedule);
432 586
433void __tasklet_hi_schedule_first(struct tasklet_struct *t) 587void __tasklet_hi_schedule_first(struct tasklet_struct *t)
434{ 588{
435 BUG_ON(!irqs_disabled()); 589 BUG_ON(!irqs_disabled());
590#ifdef CONFIG_LITMUS_NVIDIA
591 if(is_nvidia_func(t->func))
592 {
593 u32 nvidia_device = get_tasklet_nv_device_num(t);
594 // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
595 // __FUNCTION__, nvidia_device,litmus_clock());
596 unsigned long flags;
597 struct task_struct* device_owner;
598
599 lock_nv_registry(nvidia_device, &flags);
600
601 device_owner = get_nv_max_device_owner(nvidia_device);
602
603 if(device_owner==NULL)
604 {
605 t->owner = NULL;
606 }
607 else
608 {
609 if(is_realtime(device_owner))
610 {
611 TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
612 __FUNCTION__, nvidia_device,litmus_clock());
613
614 TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
615 __FUNCTION__,device_owner->pid,nvidia_device);
616
617 t->owner = device_owner;
618 sched_trace_tasklet_release(t->owner);
619 if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device)))
620 {
621 unlock_nv_registry(nvidia_device, &flags);
622 return;
623 }
624 else
625 {
626 t->owner = NULL; /* fall through to normal scheduling */
627 }
628 }
629 else
630 {
631 t->owner = NULL;
632 }
633 }
634 unlock_nv_registry(nvidia_device, &flags);
635 }
636#endif
637
638 ___tasklet_hi_schedule_first(t);
639}
640EXPORT_SYMBOL(__tasklet_hi_schedule_first);
641
642void ___tasklet_hi_schedule_first(struct tasklet_struct* t)
643{
644 BUG_ON(!irqs_disabled());
436 645
437 t->next = __this_cpu_read(tasklet_hi_vec.head); 646 t->next = __this_cpu_read(tasklet_hi_vec.head);
438 __this_cpu_write(tasklet_hi_vec.head, t); 647 __this_cpu_write(tasklet_hi_vec.head, t);
439 __raise_softirq_irqoff(HI_SOFTIRQ); 648 __raise_softirq_irqoff(HI_SOFTIRQ);
440} 649}
441 650EXPORT_SYMBOL(___tasklet_hi_schedule_first);
442EXPORT_SYMBOL(__tasklet_hi_schedule_first);
443 651
444static void tasklet_action(struct softirq_action *a) 652static void tasklet_action(struct softirq_action *a)
445{ 653{
@@ -495,6 +703,7 @@ static void tasklet_hi_action(struct softirq_action *a)
495 if (!atomic_read(&t->count)) { 703 if (!atomic_read(&t->count)) {
496 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) 704 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
497 BUG(); 705 BUG();
706
498 t->func(t->data); 707 t->func(t->data);
499 tasklet_unlock(t); 708 tasklet_unlock(t);
500 continue; 709 continue;
@@ -518,8 +727,13 @@ void tasklet_init(struct tasklet_struct *t,
518 t->next = NULL; 727 t->next = NULL;
519 t->state = 0; 728 t->state = 0;
520 atomic_set(&t->count, 0); 729 atomic_set(&t->count, 0);
730
521 t->func = func; 731 t->func = func;
522 t->data = data; 732 t->data = data;
733
734#ifdef CONFIG_LITMUS_SOFTIRQD
735 t->owner = NULL;
736#endif
523} 737}
524 738
525EXPORT_SYMBOL(tasklet_init); 739EXPORT_SYMBOL(tasklet_init);
@@ -534,6 +748,7 @@ void tasklet_kill(struct tasklet_struct *t)
534 yield(); 748 yield();
535 } while (test_bit(TASKLET_STATE_SCHED, &t->state)); 749 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
536 } 750 }
751
537 tasklet_unlock_wait(t); 752 tasklet_unlock_wait(t);
538 clear_bit(TASKLET_STATE_SCHED, &t->state); 753 clear_bit(TASKLET_STATE_SCHED, &t->state);
539} 754}
@@ -808,6 +1023,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
808 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) { 1023 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
809 if (*i == t) { 1024 if (*i == t) {
810 *i = t->next; 1025 *i = t->next;
1026
811 /* If this was the tail element, move the tail ptr */ 1027 /* If this was the tail element, move the tail ptr */
812 if (*i == NULL) 1028 if (*i == NULL)
813 per_cpu(tasklet_vec, cpu).tail = i; 1029 per_cpu(tasklet_vec, cpu).tail = i;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0400553f0d04..6b59d59ce3cf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -44,6 +44,13 @@
44 44
45#include "workqueue_sched.h" 45#include "workqueue_sched.h"
46 46
47#ifdef CONFIG_LITMUS_NVIDIA
48#include <litmus/litmus.h>
49#include <litmus/sched_trace.h>
50#include <litmus/nvidia_info.h>
51#endif
52
53
47enum { 54enum {
48 /* global_cwq flags */ 55 /* global_cwq flags */
49 GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ 56 GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
@@ -1047,9 +1054,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
1047 work_flags |= WORK_STRUCT_DELAYED; 1054 work_flags |= WORK_STRUCT_DELAYED;
1048 worklist = &cwq->delayed_works; 1055 worklist = &cwq->delayed_works;
1049 } 1056 }
1050
1051 insert_work(cwq, work, worklist, work_flags); 1057 insert_work(cwq, work, worklist, work_flags);
1052
1053 spin_unlock_irqrestore(&gcwq->lock, flags); 1058 spin_unlock_irqrestore(&gcwq->lock, flags);
1054} 1059}
1055 1060
@@ -2687,10 +2692,70 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
2687 */ 2692 */
2688int schedule_work(struct work_struct *work) 2693int schedule_work(struct work_struct *work)
2689{ 2694{
2690 return queue_work(system_wq, work); 2695#if 0
2696#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
2697 if(is_nvidia_func(work->func))
2698 {
2699 u32 nvidiaDevice = get_work_nv_device_num(work);
2700
2701 //1) Ask Litmus which task owns GPU <nvidiaDevice>. (API to be defined.)
2702 unsigned long flags;
2703 struct task_struct* device_owner;
2704
2705 lock_nv_registry(nvidiaDevice, &flags);
2706
2707 device_owner = get_nv_max_device_owner(nvidiaDevice);
2708
2709 //2) If there is an owner, set work->owner to the owner's task struct.
2710 if(device_owner==NULL)
2711 {
2712 work->owner = NULL;
2713 //TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice);
2714 }
2715 else
2716 {
2717 if( is_realtime(device_owner))
2718 {
2719 TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n",
2720 __FUNCTION__, nvidiaDevice,litmus_clock());
2721 TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
2722 __FUNCTION__,
2723 device_owner->pid,
2724 nvidiaDevice);
2725
2726 //3) Call litmus_schedule_work() and return (don't execute the rest
2727 // of schedule_schedule()).
2728 work->owner = device_owner;
2729 sched_trace_work_release(work->owner);
2730 if(likely(litmus_schedule_work(work, nvidiaDevice)))
2731 {
2732 unlock_nv_registry(nvidiaDevice, &flags);
2733 return 1;
2734 }
2735 else
2736 {
2737 work->owner = NULL; /* fall through to normal work scheduling */
2738 }
2739 }
2740 else
2741 {
2742 work->owner = NULL;
2743 }
2744 }
2745 unlock_nv_registry(nvidiaDevice, &flags);
2746 }
2747#endif
2748#endif
2749 return(__schedule_work(work));
2691} 2750}
2692EXPORT_SYMBOL(schedule_work); 2751EXPORT_SYMBOL(schedule_work);
2693 2752
2753int __schedule_work(struct work_struct* work)
2754{
2755 return queue_work(system_wq, work);
2756}
2757EXPORT_SYMBOL(__schedule_work);
2758
2694/* 2759/*
2695 * schedule_work_on - put work task on a specific cpu 2760 * schedule_work_on - put work task on a specific cpu
2696 * @cpu: cpu to put the work task on 2761 * @cpu: cpu to put the work task on
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 68459d4dca41..95e0671e2aec 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -60,6 +60,42 @@ config LITMUS_LOCKING
60 Say Yes if you want to include locking protocols such as the FMLP and 60 Say Yes if you want to include locking protocols such as the FMLP and
61 Baker's SRP. 61 Baker's SRP.
62 62
63config LITMUS_AFFINITY_LOCKING
64 bool "Enable affinity infrastructure in k-exclusion locking protocols."
65 depends on LITMUS_LOCKING
66 default n
67 help
68 Enable affinity tracking infrastructure in k-exclusion locking protocols.
69 This only enabled the *infrastructure* not actual affinity algorithms.
70
71 If unsure, say No.
72
73config LITMUS_NESTED_LOCKING
74 bool "Support for nested inheritance in locking protocols"
75 depends on LITMUS_LOCKING
76 default n
77 help
78 Enable nested priority inheritance.
79
80config LITMUS_DGL_SUPPORT
81 bool "Support for dynamic group locks"
82 depends on LITMUS_NESTED_LOCKING
83 default n
84 help
85 Enable dynamic group lock support.
86
87config LITMUS_MAX_DGL_SIZE
88 int "Maximum size of a dynamic group lock."
89 depends on LITMUS_DGL_SUPPORT
90 range 1 128
91 default "10"
92 help
93 Dynamic group lock data structures are allocated on the process
94 stack when a group is requested. We set a maximum size of
95 locks in a dynamic group lock to avoid dynamic allocation.
96
97 TODO: Batch DGL requests exceeding LITMUS_MAX_DGL_SIZE.
98
63endmenu 99endmenu
64 100
65menu "Performance Enhancements" 101menu "Performance Enhancements"
@@ -79,6 +115,52 @@ config SCHED_CPU_AFFINITY
79 115
80 Say Yes if unsure. 116 Say Yes if unsure.
81 117
118choice
119 prompt "EDF Tie-Break Behavior"
120 default EDF_TIE_BREAK_LATENESS_NORM
121 help
122 Allows the configuration of tie-breaking behavior when the deadlines
123 of two EDF-scheduled tasks are equal.
124
125 config EDF_TIE_BREAK_LATENESS
126 bool "Lateness-based Tie Break"
127 help
128 Break ties between two jobs, A and B, based upon the lateness of their
129 prior jobs. The job with the greatest lateness has priority. Note that
130 lateness has a negative value if the prior job finished before its
131 deadline.
132
133 config EDF_TIE_BREAK_LATENESS_NORM
134 bool "Normalized Lateness-based Tie Break"
135 help
136 Break ties between two jobs, A and B, based upon the lateness, normalized
137 by relative deadline, of their prior jobs. The job with the greatest
138 normalized lateness has priority. Note that lateness has a negative value
139 if the prior job finished before its deadline.
140
141 Normalized lateness tie-breaks are likely desireable over non-normalized
142 tie-breaks if the execution times and/or relative deadlines of tasks in a
143 task set vary greatly.
144
145 config EDF_TIE_BREAK_HASH
146 bool "Hash-based Tie Breaks"
147 help
148 Break ties between two jobs, A and B, with equal deadlines by using a
149 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
150 A has ~50% of winning a given tie-break.
151
152 config EDF_PID_TIE_BREAK
153 bool "PID-based Tie Breaks"
154 help
155 Break ties based upon OS-assigned thread IDs. Use this option if
156 required by algorithm's real-time analysis or per-task response-time
157 jitter must be minimized.
158
159 NOTES:
160 * This tie-breaking method was default in Litmus 2012.2 and before.
161
162endchoice
163
82endmenu 164endmenu
83 165
84menu "Tracing" 166menu "Tracing"
@@ -121,7 +203,7 @@ config SCHED_TASK_TRACE
121config SCHED_TASK_TRACE_SHIFT 203config SCHED_TASK_TRACE_SHIFT
122 int "Buffer size for sched_trace_xxx() events" 204 int "Buffer size for sched_trace_xxx() events"
123 depends on SCHED_TASK_TRACE 205 depends on SCHED_TASK_TRACE
124 range 8 13 206 range 8 15
125 default 9 207 default 9
126 help 208 help
127 209
@@ -233,4 +315,114 @@ config PREEMPT_STATE_TRACE
233 315
234endmenu 316endmenu
235 317
318menu "Interrupt Handling"
319
320choice
321 prompt "Scheduling of interrupt bottom-halves in Litmus."
322 default LITMUS_SOFTIRQD_NONE
323 depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
324 help
325 Schedule tasklets with known priorities in Litmus.
326
327config LITMUS_SOFTIRQD_NONE
328 bool "No tasklet scheduling in Litmus."
329 help
330 Don't schedule tasklets in Litmus. Default.
331
332config LITMUS_SOFTIRQD
333 bool "Spawn klitirqd interrupt handling threads."
334 help
335 Create klitirqd interrupt handling threads. Work must be
336 specifically dispatched to these workers. (Softirqs for
337 Litmus tasks are not magically redirected to klitirqd.)
338
339 G-EDF/RM, C-EDF/RM ONLY for now!
340
341
342config LITMUS_PAI_SOFTIRQD
343 bool "Defer tasklets to context switch points."
344 help
345 Only execute scheduled tasklet bottom halves at
346 scheduling points. Trades context switch overhead
347 at the cost of non-preemptive durations of bottom half
348 processing.
349
350 G-EDF/RM, C-EDF/RM ONLY for now!
351
352endchoice
353
354
355config NR_LITMUS_SOFTIRQD
356 int "Number of klitirqd."
357 depends on LITMUS_SOFTIRQD
358 range 1 4096
359 default "1"
360 help
361 Should be <= to the number of CPUs in your system.
362
363config LITMUS_NVIDIA
364 bool "Litmus handling of NVIDIA interrupts."
365 default n
366 help
367 Direct tasklets from NVIDIA devices to Litmus's klitirqd
368 or PAI interrupt handling routines.
369
370 If unsure, say No.
371
372config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT
373 bool "Enable affinity-aware heuristics to improve GPU assignment."
374 depends on LITMUS_NVIDIA && LITMUS_AFFINITY_LOCKING
375 default n
376 help
377 Enable several heuristics to improve the assignment
378 of GPUs to real-time tasks to reduce the overheads
379 of memory migrations.
380
381 If unsure, say No.
382
383config NV_DEVICE_NUM
384 int "Number of NVIDIA GPUs."
385 depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
386 range 1 4096
387 default "1"
388 help
389 Should be (<= to the number of CPUs) and
390 (<= to the number of GPUs) in your system.
391
392config NV_MAX_SIMULT_USERS
393 int "Maximum number of threads sharing a GPU simultanously"
394 depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
395 range 1 3
396 default "2"
397 help
398 Should be equal to the #copy_engines + #execution_engines
399 of the GPUs in your system.
400
401 Scientific/Professional GPUs = 3 (ex. M2070, Quadro 6000?)
402 Consumer Fermi/Kepler GPUs = 2 (GTX-4xx thru -6xx)
403 Older = 1 (ex. GTX-2xx)
404
405choice
406 prompt "CUDA/Driver Version Support"
407 default CUDA_4_0
408 depends on LITMUS_NVIDIA
409 help
410 Select the version of CUDA/driver to support.
411
412config CUDA_4_0
413 bool "CUDA 4.0"
414 depends on LITMUS_NVIDIA
415 help
416 Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40)
417
418config CUDA_3_2
419 bool "CUDA 3.2"
420 depends on LITMUS_NVIDIA
421 help
422 Support CUDA 3.2 (dev. driver version: x86_64-260.24)
423
424endchoice
425
426endmenu
427
236endmenu 428endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index d26ca7076b62..59c018560ee9 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -30,3 +30,11 @@ obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
30obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 30obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
31obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 31obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
32obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o 32obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
33
34obj-$(CONFIG_LITMUS_LOCKING) += kfmlp_lock.o
35obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o
36obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
37obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
38obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
39
40obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o
diff --git a/litmus/affinity.c b/litmus/affinity.c
index 3fa6dd789400..cd93249b5506 100644
--- a/litmus/affinity.c
+++ b/litmus/affinity.c
@@ -26,7 +26,7 @@ void init_topology(void) {
26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); 26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
27 } 27 }
28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", 28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
29 cpu, neigh_info[cpu].size[i], i, 29 cpu, neigh_info[cpu].size[i], i,
30 *cpumask_bits(neigh_info[cpu].neighbors[i])); 30 *cpumask_bits(neigh_info[cpu].neighbors[i]));
31 } 31 }
32 32
diff --git a/litmus/budget.c b/litmus/budget.c
index f7712be29adb..518174a37a3b 100644
--- a/litmus/budget.c
+++ b/litmus/budget.c
@@ -1,11 +1,13 @@
1#include <linux/sched.h> 1#include <linux/sched.h>
2#include <linux/percpu.h> 2#include <linux/percpu.h>
3#include <linux/hrtimer.h> 3#include <linux/hrtimer.h>
4#include <linux/signal.h>
4 5
5#include <litmus/litmus.h> 6#include <litmus/litmus.h>
6#include <litmus/preempt.h> 7#include <litmus/preempt.h>
7 8
8#include <litmus/budget.h> 9#include <litmus/budget.h>
10#include <litmus/signal.h>
9 11
10struct enforcement_timer { 12struct enforcement_timer {
11 /* The enforcement timer is used to accurately police 13 /* The enforcement timer is used to accurately police
@@ -64,7 +66,7 @@ static void arm_enforcement_timer(struct enforcement_timer* et,
64 66
65 /* Calling this when there is no budget left for the task 67 /* Calling this when there is no budget left for the task
66 * makes no sense, unless the task is non-preemptive. */ 68 * makes no sense, unless the task is non-preemptive. */
67 BUG_ON(budget_exhausted(t) && (!is_np(t))); 69 BUG_ON(budget_exhausted(t) && !is_np(t));
68 70
69 /* __hrtimer_start_range_ns() cancels the timer 71 /* __hrtimer_start_range_ns() cancels the timer
70 * anyway, so we don't have to check whether it is still armed */ 72 * anyway, so we don't have to check whether it is still armed */
@@ -86,7 +88,7 @@ void update_enforcement_timer(struct task_struct* t)
86{ 88{
87 struct enforcement_timer* et = &__get_cpu_var(budget_timer); 89 struct enforcement_timer* et = &__get_cpu_var(budget_timer);
88 90
89 if (t && budget_precisely_enforced(t)) { 91 if (t && budget_precisely_tracked(t) && !sigbudget_sent(t)) {
90 /* Make sure we call into the scheduler when this budget 92 /* Make sure we call into the scheduler when this budget
91 * expires. */ 93 * expires. */
92 arm_enforcement_timer(et, t); 94 arm_enforcement_timer(et, t);
@@ -96,6 +98,16 @@ void update_enforcement_timer(struct task_struct* t)
96 } 98 }
97} 99}
98 100
101void send_sigbudget(struct task_struct* t)
102{
103 if (!test_and_set_bit(RT_JOB_SIG_BUDGET_SENT, &tsk_rt(t)->job_params.flags)) {
104 /* signal has not yet been sent and we are responsible for sending
105 * since we just set the sent-bit when it was previously 0. */
106
107 TRACE_TASK(t, "SIG_BUDGET being sent!\n");
108 send_sig(SIG_BUDGET, t, 1); /* '1' denotes signal sent from kernel */
109 }
110}
99 111
100static int __init init_budget_enforcement(void) 112static int __init init_budget_enforcement(void)
101{ 113{
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 9b44dc2d8d1e..39ce1816ee04 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -12,40 +12,85 @@
12#include <litmus/sched_plugin.h> 12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h> 13#include <litmus/sched_trace.h>
14 14
15#ifdef CONFIG_LITMUS_NESTED_LOCKING
16#include <litmus/locking.h>
17#endif
18
15#include <litmus/edf_common.h> 19#include <litmus/edf_common.h>
16 20
21#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
22#include <litmus/fpmath.h>
23#endif
24
25#ifdef CONFIG_EDF_TIE_BREAK_HASH
26#include <linux/hash.h>
27static inline long edf_hash(struct task_struct *t)
28{
29 /* pid is 32 bits, so normally we would shove that into the
30 * upper 32-bits and and put the job number in the bottom
31 * and hash the 64-bit number with hash_64(). Sadly,
32 * in testing, hash_64() doesn't distribute keys were the
33 * upper bits are close together (as would be the case with
34 * pids) and job numbers are equal (as would be the case with
35 * synchronous task sets with all relative deadlines equal).
36 *
37 * A 2006 Linux patch proposed the following solution
38 * (but for some reason it wasn't accepted...).
39 *
40 * At least this workaround works for 32-bit systems as well.
41 */
42 return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
43}
44#endif
45
46
17/* edf_higher_prio - returns true if first has a higher EDF priority 47/* edf_higher_prio - returns true if first has a higher EDF priority
18 * than second. Deadline ties are broken by PID. 48 * than second. Deadline ties are broken by PID.
19 * 49 *
20 * both first and second may be NULL 50 * both first and second may be NULL
21 */ 51 */
22int edf_higher_prio(struct task_struct* first, 52#ifdef CONFIG_LITMUS_NESTED_LOCKING
23 struct task_struct* second) 53int __edf_higher_prio(
54 struct task_struct* first, comparison_mode_t first_mode,
55 struct task_struct* second, comparison_mode_t second_mode)
56#else
57int edf_higher_prio(struct task_struct* first, struct task_struct* second)
58#endif
24{ 59{
25 struct task_struct *first_task = first; 60 struct task_struct *first_task = first;
26 struct task_struct *second_task = second; 61 struct task_struct *second_task = second;
27 62
28 /* There is no point in comparing a task to itself. */ 63 /* There is no point in comparing a task to itself. */
29 if (first && first == second) { 64 if (first && first == second) {
30 TRACE_TASK(first, 65 TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid);
31 "WARNING: pointless edf priority comparison.\n"); 66 WARN_ON(1);
32 return 0; 67 return 0;
33 } 68 }
34 69
35 70
36 /* check for NULL tasks */ 71 /* check for NULL tasks */
37 if (!first || !second) 72 if (!first || !second) {
38 return first && !second; 73 return first && !second;
74 }
39 75
40#ifdef CONFIG_LITMUS_LOCKING 76#ifdef CONFIG_LITMUS_LOCKING
41 77 /* Check for EFFECTIVE priorities. Change task
42 /* Check for inherited priorities. Change task
43 * used for comparison in such a case. 78 * used for comparison in such a case.
44 */ 79 */
45 if (unlikely(first->rt_param.inh_task)) 80 if (unlikely(first->rt_param.inh_task)
81#ifdef CONFIG_LITMUS_NESTED_LOCKING
82 && (first_mode == EFFECTIVE)
83#endif
84 ) {
46 first_task = first->rt_param.inh_task; 85 first_task = first->rt_param.inh_task;
47 if (unlikely(second->rt_param.inh_task)) 86 }
87 if (unlikely(second->rt_param.inh_task)
88#ifdef CONFIG_LITMUS_NESTED_LOCKING
89 && (second_mode == EFFECTIVE)
90#endif
91 ) {
48 second_task = second->rt_param.inh_task; 92 second_task = second->rt_param.inh_task;
93 }
49 94
50 /* Check for priority boosting. Tie-break by start of boosting. 95 /* Check for priority boosting. Tie-break by start of boosting.
51 */ 96 */
@@ -53,37 +98,167 @@ int edf_higher_prio(struct task_struct* first,
53 /* first_task is boosted, how about second_task? */ 98 /* first_task is boosted, how about second_task? */
54 if (!is_priority_boosted(second_task) || 99 if (!is_priority_boosted(second_task) ||
55 lt_before(get_boost_start(first_task), 100 lt_before(get_boost_start(first_task),
56 get_boost_start(second_task))) 101 get_boost_start(second_task))) {
57 return 1; 102 return 1;
58 else 103 }
104 else {
59 return 0; 105 return 0;
60 } else if (unlikely(is_priority_boosted(second_task))) 106 }
107 }
108 else if (unlikely(is_priority_boosted(second_task))) {
61 /* second_task is boosted, first is not*/ 109 /* second_task is boosted, first is not*/
62 return 0; 110 return 0;
111 }
63 112
64#endif 113#endif
65 114
66 115 if (!is_realtime(second_task)) {
67 return !is_realtime(second_task) || 116 return 1;
68 117 }
69 /* is the deadline of the first task earlier? 118 else if (earlier_deadline(first_task, second_task)) {
70 * Then it has higher priority. 119 return 1;
120 }
121 else if (get_deadline(first_task) == get_deadline(second_task)) {
122 /* Need to tie break. All methods must set pid_break to 0/1 if
123 * first_task does not have priority over second_task.
71 */ 124 */
72 earlier_deadline(first_task, second_task) || 125 int pid_break;
73 126
74 /* Do we have a deadline tie? 127#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
75 * Then break by PID. 128 /* Tie break by lateness. Jobs with greater lateness get
129 * priority. This should spread tardiness across all tasks,
130 * especially in task sets where all tasks have the same
131 * period and relative deadlines.
76 */ 132 */
77 (get_deadline(first_task) == get_deadline(second_task) && 133 if (get_lateness(first_task) > get_lateness(second_task)) {
78 (first_task->pid < second_task->pid || 134 return 1;
135 }
136 pid_break = (get_lateness(first_task) == get_lateness(second_task));
137
79 138
80 /* If the PIDs are the same then the task with the inherited 139#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
81 * priority wins. 140 /* Tie break by lateness, normalized by relative deadline. Jobs with
141 * greater normalized lateness get priority.
142 *
143 * Note: Considered using the algebraically equivalent
144 * lateness(first)*relative_deadline(second) >
145 lateness(second)*relative_deadline(first)
146 * to avoid fixed-point math, but values are prone to overflow if inputs
147 * are on the order of several seconds, even in 64-bit.
148 */
149 fp_t fnorm = _frac(get_lateness(first_task),
150 get_rt_relative_deadline(first_task));
151 fp_t snorm = _frac(get_lateness(second_task),
152 get_rt_relative_deadline(second_task));
153 if (_gt(fnorm, snorm)) {
154 return 1;
155 }
156 pid_break = _eq(fnorm, snorm);
157
158
159#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
160 /* Tie break by comparing hashs of (pid, job#) tuple. There should be
161 * a 50% chance that first_task has a higher priority than second_task.
82 */ 162 */
83 (first_task->pid == second_task->pid && 163 long fhash = edf_hash(first_task);
84 !second->rt_param.inh_task))); 164 long shash = edf_hash(second_task);
165 if (fhash < shash) {
166 return 1;
167 }
168 pid_break = (fhash == shash);
169#else
170
171
172 /* CONFIG_EDF_PID_TIE_BREAK */
173 pid_break = 1; // fall through to tie-break by pid;
174#endif
175
176 /* Tie break by pid */
177 if(pid_break) {
178 if (first_task->pid < second_task->pid) {
179 return 1;
180 }
181 else if (first_task->pid == second_task->pid) {
182#ifdef CONFIG_LITMUS_SOFTIRQD
183 if (first_task->rt_param.is_proxy_thread <
184 second_task->rt_param.is_proxy_thread) {
185 return 1;
186 }
187#endif
188 /* Something could be wrong if you get this far. */
189 if (unlikely(first->rt_param.inh_task ==
190 second->rt_param.inh_task)) {
191 /* Both tasks have the same inherited priority.
192 * Likely in a bug-condition.
193 */
194 if (likely(first->pid < second->pid)) {
195 return 1;
196 }
197 else if (first->pid == second->pid) {
198 WARN_ON(1);
199 }
200 }
201 else {
202 /* At least one task must inherit */
203 BUG_ON(!first->rt_param.inh_task &&
204 !second->rt_param.inh_task);
205
206 /* The task with the inherited priority wins. */
207 if (!second->rt_param.inh_task) {
208 TRACE_CUR("unusual comparison: "
209 "first = %s/%d first_task = %s/%d "
210 "second = %s/%d second_task = %s/%d\n",
211 first->comm, first->pid,
212 (first->rt_param.inh_task) ? first->rt_param.inh_task->comm : "(nil)",
213 (first->rt_param.inh_task) ? first->rt_param.inh_task->pid : 0,
214 second->comm, second->pid,
215 (second->rt_param.inh_task) ? second->rt_param.inh_task->comm : "(nil)",
216 (second->rt_param.inh_task) ? second->rt_param.inh_task->pid : 0);
217 return 1;
218 }
219 }
220 }
221 }
222 }
223
224 return 0; /* fall-through. prio(second_task) > prio(first_task) */
225}
226
227
228#ifdef CONFIG_LITMUS_NESTED_LOCKING
229int edf_higher_prio(struct task_struct* first, struct task_struct* second)
230{
231 return __edf_higher_prio(first, EFFECTIVE, second, EFFECTIVE);
232}
233
234int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b)
235{
236 struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
237 struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
238
239 return __edf_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE);
240}
241
242int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b)
243{
244 return edf_max_heap_order(b, a); // swap comparison
245}
246
247int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
248{
249 struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
250 struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
251
252 return __edf_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE);
85} 253}
86 254
255int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
256{
257 return edf_max_heap_base_priority_order(b, a); // swap comparison
258}
259#endif
260
261
87int edf_ready_order(struct bheap_node* a, struct bheap_node* b) 262int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
88{ 263{
89 return edf_higher_prio(bheap2task(a), bheap2task(b)); 264 return edf_higher_prio(bheap2task(a), bheap2task(b));
diff --git a/litmus/fdso.c b/litmus/fdso.c
index cd85b9cd9a0a..2411d16ba486 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -20,13 +20,28 @@
20 20
21extern struct fdso_ops generic_lock_ops; 21extern struct fdso_ops generic_lock_ops;
22 22
23#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
24extern struct fdso_ops generic_affinity_ops;
25#endif
26
23static const struct fdso_ops* fdso_ops[] = { 27static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */ 28 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */ 29 &generic_lock_ops, /* SRP_SEM */
30
26 &generic_lock_ops, /* MPCP_SEM */ 31 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */ 32 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */ 33 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* PCP_SEM */ 34 &generic_lock_ops, /* PCP_SEM */
35
36 &generic_lock_ops, /* RSM_MUTEX */
37 &generic_lock_ops, /* IKGLP_SEM */
38 &generic_lock_ops, /* KFMLP_SEM */
39#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
40 &generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */
41 &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */
42 &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
43 &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
44#endif
30}; 45};
31 46
32static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) 47static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
new file mode 100644
index 000000000000..7d73105b4181
--- /dev/null
+++ b/litmus/gpu_affinity.c
@@ -0,0 +1,231 @@
1
2#ifdef CONFIG_LITMUS_NVIDIA
3
4#include <linux/sched.h>
5#include <litmus/litmus.h>
6#include <litmus/gpu_affinity.h>
7
8#include <litmus/sched_trace.h>
9
10#define OBSERVATION_CAP ((lt_t)(2e9))
11
12// reason for skew: high outliers are less
13// frequent and way out of bounds
14//#define HI_THRESHOLD 2
15//#define LO_THRESHOLD 4
16
17#define NUM_STDEV_NUM 1
18#define NUM_STDEV_DENOM 2
19
20#define MIN(a, b) ((a < b) ? a : b)
21
22static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
23{
24 fp_t relative_err;
25 fp_t err, new;
26 fp_t actual = _integer_to_fp(observed);
27
28 err = _sub(actual, fb->est);
29 new = _add(_mul(a, err), _mul(b, fb->accum_err));
30
31 relative_err = _div(err, actual);
32
33 fb->est = new;
34 fb->accum_err = _add(fb->accum_err, err);
35
36 return relative_err;
37}
38
39lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count)
40{
41 /* brute force: takes about as much time as incremental running methods when
42 * count < 50 (on Bonham). Brute force also less prone to overflow.
43 */
44 lt_t sqdeviations = 0;
45 uint16_t i;
46 for(i = 0; i < count; ++i)
47 {
48 lt_t temp = (int64_t)nums[i] - (int64_t)avg;
49 sqdeviations += temp * temp;
50 }
51 return sqdeviations/count;
52}
53
54lt_t isqrt(lt_t n)
55{
56 /* integer square root using babylonian method
57 * (algo taken from wikipedia */
58 lt_t res = 0;
59 lt_t bit = ((lt_t)1) << (sizeof(n)*8-2);
60 while (bit > n) {
61 bit >>= 2;
62 }
63
64 while (bit != 0) {
65 if (n >= res + bit) {
66 n -= res + bit;
67 res = (res >> 1) + bit;
68 }
69 else {
70 res >>= 1;
71 }
72 bit >>= 2;
73 }
74 return res;
75}
76
77void update_gpu_estimate(struct task_struct *t, lt_t observed)
78{
79 //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
80 avg_est_t *est;
81 struct migration_info mig_info;
82
83 BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
84
85 est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
86
87 if (unlikely(observed > OBSERVATION_CAP)) {
88 TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n",
89 observed,
90 OBSERVATION_CAP);
91 return;
92 }
93
94#if 0
95 // filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out
96 // of range of the average, but only filter if enough samples
97 // have been taken.
98 if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
99 if (unlikely(observed < est->avg/LO_THRESHOLD)) {
100 TRACE_TASK(t, "Observation is too small: %llu\n",
101 observed);
102 return;
103 }
104 else if (unlikely(observed > est->avg*HI_THRESHOLD)) {
105 TRACE_TASK(t, "Observation is too large: %llu\n",
106 observed);
107 return;
108 }
109#endif
110 // filter values outside NUM_STDEVx the standard deviation,
111 // but only filter if enough samples have been taken.
112 if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
113 lt_t lower, upper;
114
115 lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM;
116 lower = est->avg - MIN(range, est->avg); // no underflow.
117
118 if (unlikely(observed < lower)) {
119 TRACE_TASK(t, "Observation is too small: %llu\n", observed);
120 return;
121 }
122
123 upper = est->avg + range;
124 if (unlikely(observed > upper)) {
125 TRACE_TASK(t, "Observation is too large: %llu\n", observed);
126 return;
127 }
128 }
129
130
131
132 if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {
133 ++est->count;
134 }
135 else {
136 est->sum -= est->history[est->idx];
137 }
138
139 mig_info.observed = observed;
140 mig_info.estimated = est->avg;
141 mig_info.distance = tsk_rt(t)->gpu_migration;
142 sched_trace_migration(t, &mig_info);
143
144
145 est->history[est->idx] = observed;
146 est->sum += observed;
147 est->avg = est->sum/est->count;
148 est->std = isqrt(varience(est->history, est->avg, est->count));
149 est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;
150
151
152#if 0
153 if(unlikely(fb->est.val == 0)) {
154 // kludge-- cap observed values to prevent whacky estimations.
155 // whacky stuff happens during the first few jobs.
156 if(unlikely(observed > OBSERVATION_CAP)) {
157 TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
158 observed, OBSERVATION_CAP);
159 observed = OBSERVATION_CAP;
160 }
161
162 // take the first observation as our estimate
163 // (initial value of 0 was bogus anyhow)
164 fb->est = _integer_to_fp(observed);
165 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
166 }
167 else {
168 fp_t rel_err = update_estimate(fb,
169 tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
170 tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
171 observed);
172
173 if(unlikely(_fp_to_integer(fb->est) <= 0)) {
174 TRACE_TASK(t, "Invalid estimate. Patching.\n");
175 fb->est = _integer_to_fp(observed);
176 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
177 }
178 else {
179 struct migration_info mig_info;
180
181 sched_trace_prediction_err(t,
182 &(tsk_rt(t)->gpu_migration),
183 &rel_err);
184
185 mig_info.observed = observed;
186 mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
187 mig_info.distance = tsk_rt(t)->gpu_migration;
188
189 sched_trace_migration(t, &mig_info);
190 }
191 }
192#endif
193
194 TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n",
195 tsk_rt(t)->gpu_migration,
196 observed,
197 est->avg);
198}
199
200gpu_migration_dist_t gpu_migration_distance(int a, int b)
201{
202 // GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
203 int i;
204 int dist;
205
206 if(likely(a >= 0 && b >= 0)) {
207 for(i = 0; i <= MIG_FAR; ++i) {
208 if(a>>i == b>>i) {
209 dist = i;
210 goto out;
211 }
212 }
213 dist = MIG_NONE; // hopefully never reached.
214 TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
215 }
216 else {
217 dist = MIG_NONE;
218 }
219
220out:
221 TRACE_CUR("Distance %d -> %d is %d\n",
222 a, b, dist);
223
224 return dist;
225}
226
227
228
229
230#endif
231
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
new file mode 100644
index 000000000000..83b708ab85cb
--- /dev/null
+++ b/litmus/ikglp_lock.c
@@ -0,0 +1,2838 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h>
7
8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/ikglp_lock.h>
14
15// big signed value.
16#define IKGLP_INVAL_DISTANCE 0x7FFFFFFF
17
18int ikglp_max_heap_base_priority_order(struct binheap_node *a,
19 struct binheap_node *b)
20{
21 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
22 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
23
24 BUG_ON(!d_a);
25 BUG_ON(!d_b);
26
27 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
28}
29
30int ikglp_min_heap_base_priority_order(struct binheap_node *a,
31 struct binheap_node *b)
32{
33 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
34 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
35
36 return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
37}
38
39int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
40 struct binheap_node *b)
41{
42 ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
43 ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
44
45 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
46}
47
48
49int ikglp_min_heap_donee_order(struct binheap_node *a,
50 struct binheap_node *b)
51{
52 struct task_struct *prio_a, *prio_b;
53
54 ikglp_donee_heap_node_t *d_a =
55 binheap_entry(a, ikglp_donee_heap_node_t, node);
56 ikglp_donee_heap_node_t *d_b =
57 binheap_entry(b, ikglp_donee_heap_node_t, node);
58
59 if(!d_a->donor_info) {
60 prio_a = d_a->task;
61 }
62 else {
63 prio_a = d_a->donor_info->task;
64 BUG_ON(d_a->task != d_a->donor_info->donee_info->task);
65 }
66
67 if(!d_b->donor_info) {
68 prio_b = d_b->task;
69 }
70 else {
71 prio_b = d_b->donor_info->task;
72 BUG_ON(d_b->task != d_b->donor_info->donee_info->task);
73 }
74
75 // note reversed order
76 return litmus->__compare(prio_b, BASE, prio_a, BASE);
77}
78
79
80
81static inline int ikglp_get_idx(struct ikglp_semaphore *sem,
82 struct fifo_queue *queue)
83{
84 return (queue - &sem->fifo_queues[0]);
85}
86
87static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem,
88 struct task_struct *holder)
89{
90 int i;
91 for(i = 0; i < sem->nr_replicas; ++i)
92 if(sem->fifo_queues[i].owner == holder)
93 return(&sem->fifo_queues[i]);
94 return(NULL);
95}
96
97
98
99static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
100 struct task_struct *skip)
101{
102 struct list_head *pos;
103 struct task_struct *queued, *found = NULL;
104
105 list_for_each(pos, &kqueue->wait.task_list) {
106 queued = (struct task_struct*) list_entry(pos,
107 wait_queue_t, task_list)->private;
108
109 /* Compare task prios, find high prio task. */
110 if(queued != skip && litmus->compare(queued, found))
111 found = queued;
112 }
113 return found;
114}
115
116static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem,
117 struct fifo_queue *search_start)
118{
119 // we start our search at search_start instead of at the beginning of the
120 // queue list to load-balance across all resources.
121 struct fifo_queue* step = search_start;
122 struct fifo_queue* shortest = sem->shortest_fifo_queue;
123
124 do {
125 step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ?
126 step+1 : &sem->fifo_queues[0];
127
128 if(step->count < shortest->count) {
129 shortest = step;
130 if(step->count == 0)
131 break; /* can't get any shorter */
132 }
133
134 }while(step != search_start);
135
136 return(shortest);
137}
138
139static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem)
140{
141 return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task;
142}
143
144
145
146#if 0
147static void print_global_list(struct binheap_node* n, int depth)
148{
149 ikglp_heap_node_t *global_heap_node;
150 char padding[81] = " ";
151
152 if(n == NULL) {
153 TRACE_CUR("+-> %p\n", NULL);
154 return;
155 }
156
157 global_heap_node = binheap_entry(n, ikglp_heap_node_t, node);
158
159 if(depth*2 <= 80)
160 padding[depth*2] = '\0';
161
162 TRACE_CUR("%s+-> %s/%d\n",
163 padding,
164 global_heap_node->task->comm,
165 global_heap_node->task->pid);
166
167 if(n->left) print_global_list(n->left, depth+1);
168 if(n->right) print_global_list(n->right, depth+1);
169}
170
171static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth)
172{
173 ikglp_donee_heap_node_t *donee_node;
174 char padding[81] = " ";
175 struct task_struct* donor = NULL;
176
177 if(n == NULL) {
178 TRACE_CUR("+-> %p\n", NULL);
179 return;
180 }
181
182 donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node);
183
184 if(depth*2 <= 80)
185 padding[depth*2] = '\0';
186
187 if(donee_node->donor_info) {
188 donor = donee_node->donor_info->task;
189 }
190
191 TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n",
192 padding,
193 donee_node->task->comm,
194 donee_node->task->pid,
195 (donor) ? donor->comm : "nil",
196 (donor) ? donor->pid : -1,
197 ikglp_get_idx(sem, donee_node->fq));
198
199 if(n->left) print_donees(sem, n->left, depth+1);
200 if(n->right) print_donees(sem, n->right, depth+1);
201}
202
203static void print_donors(struct binheap_node *n, int depth)
204{
205 ikglp_wait_state_t *donor_node;
206 char padding[81] = " ";
207
208 if(n == NULL) {
209 TRACE_CUR("+-> %p\n", NULL);
210 return;
211 }
212
213 donor_node = binheap_entry(n, ikglp_wait_state_t, node);
214
215 if(depth*2 <= 80)
216 padding[depth*2] = '\0';
217
218
219 TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n",
220 padding,
221 donor_node->task->comm,
222 donor_node->task->pid,
223 donor_node->donee_info->task->comm,
224 donor_node->donee_info->task->pid);
225
226 if(n->left) print_donors(n->left, depth+1);
227 if(n->right) print_donors(n->right, depth+1);
228}
229#endif
230
231static void ikglp_add_global_list(struct ikglp_semaphore *sem,
232 struct task_struct *t,
233 ikglp_heap_node_t *node)
234{
235
236
237 node->task = t;
238 INIT_BINHEAP_NODE(&node->node);
239
240 if(sem->top_m_size < sem->m) {
241 TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
242 t->comm, t->pid);
243// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
244// print_global_list(sem->top_m.root, 1);
245
246 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
247 ++(sem->top_m_size);
248
249// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
250// print_global_list(sem->top_m.root, 1);
251 }
252 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
253 ikglp_heap_node_t *evicted =
254 binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
255
256 TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n",
257 t->comm, t->pid,
258 evicted->task->comm, evicted->task->pid);
259
260// TRACE_CUR("Not-Top-M Before:\n");
261// print_global_list(sem->not_top_m.root, 1);
262// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
263// print_global_list(sem->top_m.root, 1);
264
265
266 binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
267 INIT_BINHEAP_NODE(&evicted->node);
268 binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
269
270 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
271
272// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
273// print_global_list(sem->top_m.root, 1);
274// TRACE_CUR("Not-Top-M After:\n");
275// print_global_list(sem->not_top_m.root, 1);
276 }
277 else {
278 TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
279 t->comm, t->pid);
280// TRACE_CUR("Not-Top-M Before:\n");
281// print_global_list(sem->not_top_m.root, 1);
282
283 binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
284
285// TRACE_CUR("Not-Top-M After:\n");
286// print_global_list(sem->not_top_m.root, 1);
287 }
288}
289
290
291static void ikglp_del_global_list(struct ikglp_semaphore *sem,
292 struct task_struct *t,
293 ikglp_heap_node_t *node)
294{
295 BUG_ON(!binheap_is_in_heap(&node->node));
296
297 TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid);
298
299 if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
300 TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
301
302// TRACE_CUR("Not-Top-M Before:\n");
303// print_global_list(sem->not_top_m.root, 1);
304// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
305// print_global_list(sem->top_m.root, 1);
306
307
308 binheap_delete(&node->node, &sem->top_m);
309
310 if(!binheap_empty(&sem->not_top_m)) {
311 ikglp_heap_node_t *promoted =
312 binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node);
313
314 TRACE_CUR("Promoting %s/%d to top-m\n",
315 promoted->task->comm, promoted->task->pid);
316
317 binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node);
318 INIT_BINHEAP_NODE(&promoted->node);
319
320 binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node);
321 }
322 else {
323 TRACE_CUR("No one to promote to top-m.\n");
324 --(sem->top_m_size);
325 }
326
327// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
328// print_global_list(sem->top_m.root, 1);
329// TRACE_CUR("Not-Top-M After:\n");
330// print_global_list(sem->not_top_m.root, 1);
331 }
332 else {
333 TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
334// TRACE_CUR("Not-Top-M Before:\n");
335// print_global_list(sem->not_top_m.root, 1);
336
337 binheap_delete(&node->node, &sem->not_top_m);
338
339// TRACE_CUR("Not-Top-M After:\n");
340// print_global_list(sem->not_top_m.root, 1);
341 }
342}
343
344
345static void ikglp_add_donees(struct ikglp_semaphore *sem,
346 struct fifo_queue *fq,
347 struct task_struct *t,
348 ikglp_donee_heap_node_t* node)
349{
350// TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
351// TRACE_CUR("donees Before:\n");
352// print_donees(sem, sem->donees.root, 1);
353
354 node->task = t;
355 node->donor_info = NULL;
356 node->fq = fq;
357 INIT_BINHEAP_NODE(&node->node);
358
359 binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node);
360
361// TRACE_CUR("donees After:\n");
362// print_donees(sem, sem->donees.root, 1);
363}
364
365
366static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
367 struct fifo_queue *fq,
368 struct ikglp_semaphore *sem,
369 unsigned long flags)
370{
371 // priority of 't' has increased (note: 't' might already be hp_waiter).
372 if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
373 struct task_struct *old_max_eff_prio;
374 struct task_struct *new_max_eff_prio;
375 struct task_struct *new_prio = NULL;
376 struct task_struct *owner = fq->owner;
377
378 if(fq->hp_waiter)
379 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
380 fq->hp_waiter->comm, fq->hp_waiter->pid);
381 else
382 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
383
384 if(owner)
385 {
386 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
387
388// TRACE_TASK(owner, "Heap Before:\n");
389// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
390
391 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
392
393 fq->hp_waiter = t;
394 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
395
396 binheap_decrease(&fq->nest.hp_binheap_node,
397 &tsk_rt(owner)->hp_blocked_tasks);
398
399// TRACE_TASK(owner, "Heap After:\n");
400// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
401
402 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
403
404 if(new_max_eff_prio != old_max_eff_prio) {
405 TRACE_TASK(t, "is new hp_waiter.\n");
406
407 if ((effective_priority(owner) == old_max_eff_prio) ||
408 (litmus->__compare(new_max_eff_prio, BASE,
409 owner, EFFECTIVE))){
410 new_prio = new_max_eff_prio;
411 }
412 }
413 else {
414 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
415 }
416
417 if(new_prio) {
418 // set new inheritance and propagate
419 TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n",
420 owner->comm, owner->pid,
421 new_prio->comm, new_prio->pid);
422 litmus->nested_increase_prio(owner, new_prio, &sem->lock,
423 flags); // unlocks lock.
424 }
425 else {
426 TRACE_TASK(t, "No change in effective priority (is %s/%d). Propagation halted.\n",
427 new_max_eff_prio->comm, new_max_eff_prio->pid);
428 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
429 unlock_fine_irqrestore(&sem->lock, flags);
430 }
431 }
432 else {
433 fq->hp_waiter = t;
434 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
435
436 TRACE_TASK(t, "no owner.\n");
437 unlock_fine_irqrestore(&sem->lock, flags);
438 }
439 }
440 else {
441 TRACE_TASK(t, "hp_waiter is unaffected.\n");
442 unlock_fine_irqrestore(&sem->lock, flags);
443 }
444}
445
446// hp_waiter has decreased
447static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
448 struct ikglp_semaphore *sem,
449 unsigned long flags)
450{
451 struct task_struct *owner = fq->owner;
452
453 struct task_struct *old_max_eff_prio;
454 struct task_struct *new_max_eff_prio;
455
456 if(!owner) {
457 TRACE_CUR("No owner. Returning.\n");
458 unlock_fine_irqrestore(&sem->lock, flags);
459 return;
460 }
461
462 TRACE_CUR("ikglp_refresh_owners_prio_decrease\n");
463
464 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
465
466 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
467
468 binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
469 fq->nest.hp_waiter_eff_prio = fq->hp_waiter;
470 binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks,
471 struct nested_info, hp_binheap_node);
472
473 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
474
475 if((old_max_eff_prio != new_max_eff_prio) &&
476 (effective_priority(owner) == old_max_eff_prio))
477 {
478 // Need to set new effective_priority for owner
479 struct task_struct *decreased_prio;
480
481 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
482 ikglp_get_idx(sem, fq));
483
484 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
485 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
486 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
487 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
488 owner->comm,
489 owner->pid,
490 ikglp_get_idx(sem, fq));
491
492 decreased_prio = new_max_eff_prio;
493 }
494 else {
495 TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n",
496 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
497 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
498 owner->comm,
499 owner->pid,
500 ikglp_get_idx(sem, fq));
501
502 decreased_prio = NULL;
503 }
504
505 // beware: recursion
506 litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
507 }
508 else {
509 TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
510 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
511 unlock_fine_irqrestore(&sem->lock, flags);
512 }
513}
514
515
516static void ikglp_remove_donation_from_owner(struct binheap_node *n,
517 struct fifo_queue *fq,
518 struct ikglp_semaphore *sem,
519 unsigned long flags)
520{
521 struct task_struct *owner = fq->owner;
522
523 struct task_struct *old_max_eff_prio;
524 struct task_struct *new_max_eff_prio;
525
526 BUG_ON(!owner);
527
528 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
529
530 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
531
532 binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks);
533
534 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
535
536 if((old_max_eff_prio != new_max_eff_prio) &&
537 (effective_priority(owner) == old_max_eff_prio))
538 {
539 // Need to set new effective_priority for owner
540 struct task_struct *decreased_prio;
541
542 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
543 ikglp_get_idx(sem, fq));
544
545 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
546 TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
547 ikglp_get_idx(sem, fq));
548 decreased_prio = new_max_eff_prio;
549 }
550 else {
551 TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n",
552 ikglp_get_idx(sem, fq));
553 decreased_prio = NULL;
554 }
555
556 // beware: recursion
557 litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
558 }
559 else {
560 TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
561 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
562 unlock_fine_irqrestore(&sem->lock, flags);
563 }
564}
565
566static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
567 struct binheap_node *n)
568{
569 struct task_struct *old_max_eff_prio;
570 struct task_struct *new_max_eff_prio;
571
572 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
573
574 old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
575
576 binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks);
577
578 new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
579
580 if((old_max_eff_prio != new_max_eff_prio) &&
581 (effective_priority(t) == old_max_eff_prio))
582 {
583 // Need to set new effective_priority for owner
584 struct task_struct *decreased_prio;
585
586 if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
587 decreased_prio = new_max_eff_prio;
588 }
589 else {
590 decreased_prio = NULL;
591 }
592
593 tsk_rt(t)->inh_task = decreased_prio;
594 }
595
596 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
597}
598
599static void ikglp_get_immediate(struct task_struct* t,
600 struct fifo_queue *fq,
601 struct ikglp_semaphore *sem,
602 unsigned long flags)
603{
604 // resource available now
605 TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq));
606
607 fq->owner = t;
608
609 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
610 binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
611 struct nested_info, hp_binheap_node);
612 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
613
614 ++(fq->count);
615
616 ikglp_add_global_list(sem, t, &fq->global_heap_node);
617 ikglp_add_donees(sem, fq, t, &fq->donee_heap_node);
618
619 sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
620
621#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
622 if(sem->aff_obs) {
623 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
624 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
625 }
626#endif
627
628 unlock_fine_irqrestore(&sem->lock, flags);
629}
630
631
632
633
634
635static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
636 struct fifo_queue* fq,
637 struct task_struct* t,
638 wait_queue_t *wait,
639 ikglp_heap_node_t *global_heap_node,
640 ikglp_donee_heap_node_t *donee_heap_node)
641{
642 /* resource is not free => must suspend and wait */
643 TRACE_TASK(t, "Enqueuing on fq %d.\n",
644 ikglp_get_idx(sem, fq));
645
646 init_waitqueue_entry(wait, t);
647
648 __add_wait_queue_tail_exclusive(&fq->wait, wait);
649
650 ++(fq->count);
651 ++(sem->nr_in_fifos);
652
653 // update global list.
654 if(likely(global_heap_node)) {
655 if(binheap_is_in_heap(&global_heap_node->node)) {
656 WARN_ON(1);
657 ikglp_del_global_list(sem, t, global_heap_node);
658 }
659 ikglp_add_global_list(sem, t, global_heap_node);
660 }
661 // update donor eligiblity list.
662 if(likely(donee_heap_node)) {
663// if(binheap_is_in_heap(&donee_heap_node->node)) {
664// WARN_ON(1);
665// }
666 ikglp_add_donees(sem, fq, t, donee_heap_node);
667 }
668
669 if(sem->shortest_fifo_queue == fq) {
670 sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
671 }
672
673#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
674 if(sem->aff_obs) {
675 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
676 }
677#endif
678
679 TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
680}
681
682
683static void ikglp_enqueue_on_fq(
684 struct ikglp_semaphore *sem,
685 struct fifo_queue *fq,
686 ikglp_wait_state_t *wait,
687 unsigned long flags)
688{
689 /* resource is not free => must suspend and wait */
690 TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n",
691 ikglp_get_idx(sem, fq));
692
693 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
694 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
695
696 __ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node,
697 &wait->global_heap_node, &wait->donee_heap_node);
698
699 ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); // unlocks sem->lock
700}
701
702
703static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
704 ikglp_wait_state_t *wait)
705{
706 TRACE_TASK(wait->task, "goes to PQ.\n");
707
708 wait->pq_node.task = wait->task; // copy over task (little redundant...)
709
710 binheap_add(&wait->pq_node.node, &sem->priority_queue,
711 ikglp_heap_node_t, node);
712}
713
714static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
715 ikglp_wait_state_t *wait)
716{
717 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
718 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
719 INIT_BINHEAP_NODE(&wait->pq_node.node);
720
721 __ikglp_enqueue_on_pq(sem, wait);
722}
723
724static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
725 ikglp_wait_state_t* wait,
726 unsigned long flags)
727{
728 struct task_struct *t = wait->task;
729 ikglp_donee_heap_node_t *donee_node = NULL;
730 struct task_struct *donee;
731
732 struct task_struct *old_max_eff_prio;
733 struct task_struct *new_max_eff_prio;
734 struct task_struct *new_prio = NULL;
735
736 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
737 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
738 INIT_BINHEAP_NODE(&wait->pq_node.node);
739 INIT_BINHEAP_NODE(&wait->node);
740
741// TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid);
742// TRACE_CUR("donors Before:\n");
743// print_donors(sem->donors.root, 1);
744
745 // Add donor to the global list.
746 ikglp_add_global_list(sem, t, &wait->global_heap_node);
747
748 // Select a donee
749#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
750 donee_node = (sem->aff_obs) ?
751 sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) :
752 binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
753#else
754 donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
755#endif
756
757 donee = donee_node->task;
758
759 TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
760
761 TRACE_CUR("Temporarily removing %s/%d to donee list.\n",
762 donee->comm, donee->pid);
763// TRACE_CUR("donees Before:\n");
764// print_donees(sem, sem->donees.root, 1);
765
766 //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly
767 binheap_delete(&donee_node->node, &sem->donees);
768
769// TRACE_CUR("donees After:\n");
770// print_donees(sem, sem->donees.root, 1);
771
772
773 wait->donee_info = donee_node;
774
775 // Add t to donor heap.
776 binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node);
777
778 // Now adjust the donee's priority.
779
780 // Lock the donee's inheritance heap.
781 raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock);
782
783 old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
784
785 if(donee_node->donor_info) {
786 // Steal donation relation. Evict old donor to PQ.
787
788 // Remove old donor from donor heap
789 ikglp_wait_state_t *old_wait = donee_node->donor_info;
790 struct task_struct *old_donor = old_wait->task;
791
792 TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. Moving old donor to PQ.\n",
793 donee->comm, donee->pid, old_donor->comm, old_donor->pid);
794
795 binheap_delete(&old_wait->node, &sem->donors);
796
797 // Remove donation from donee's inheritance heap.
798 binheap_delete(&old_wait->prio_donation.hp_binheap_node,
799 &tsk_rt(donee)->hp_blocked_tasks);
800 // WARNING: have not updated inh_prio!
801
802 // Add old donor to PQ.
803 __ikglp_enqueue_on_pq(sem, old_wait);
804
805 // Remove old donor from the global heap.
806 ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node);
807 }
808
809 // Add back donee's node to the donees heap with increased prio
810 donee_node->donor_info = wait;
811 INIT_BINHEAP_NODE(&donee_node->node);
812
813
814 TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid);
815// TRACE_CUR("donees Before:\n");
816// print_donees(sem, sem->donees.root, 1);
817
818 binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node);
819
820// TRACE_CUR("donees After:\n");
821// print_donees(sem, sem->donees.root, 1);
822
823 // Add an inheritance/donation to the donee's inheritance heap.
824 wait->prio_donation.lock = (struct litmus_lock*)sem;
825 wait->prio_donation.hp_waiter_eff_prio = t;
826 wait->prio_donation.hp_waiter_ptr = NULL;
827 INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node);
828
829 binheap_add(&wait->prio_donation.hp_binheap_node,
830 &tsk_rt(donee)->hp_blocked_tasks,
831 struct nested_info, hp_binheap_node);
832
833 new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
834
835 if(new_max_eff_prio != old_max_eff_prio) {
836 if ((effective_priority(donee) == old_max_eff_prio) ||
837 (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
838 TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
839 donee->comm, donee->pid);
840 new_prio = new_max_eff_prio;
841 }
842// else {
843// // should be bug. donor would not be in top-m.
844// TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid);
845// WARN_ON(1);
846// }
847// }
848// else {
849// // should be bug. donor would not be in top-m.
850// TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid);
851// WARN_ON(1);
852 }
853
854 if(new_prio) {
855 struct fifo_queue *donee_fq = donee_node->fq;
856
857 if(donee != donee_fq->owner) {
858 TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n",
859 donee->comm, donee->pid,
860 donee_fq->owner->comm, donee_fq->owner->pid);
861
862 raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
863 ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); // unlocks sem->lock
864 }
865 else {
866 TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n",
867 donee->comm, donee->pid);
868 litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); // unlocks sem->lock and donee's heap lock
869 }
870 }
871 else {
872 TRACE_TASK(t, "No change in effective priority (it is %d/%s). BUG?\n",
873 new_max_eff_prio->comm, new_max_eff_prio->pid);
874 raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
875 unlock_fine_irqrestore(&sem->lock, flags);
876 }
877
878
879// TRACE_CUR("donors After:\n");
880// print_donors(sem->donors.root, 1);
881}
882
883int ikglp_lock(struct litmus_lock* l)
884{
885 struct task_struct* t = current;
886 struct ikglp_semaphore *sem = ikglp_from_lock(l);
887 unsigned long flags = 0, real_flags;
888 struct fifo_queue *fq = NULL;
889 int replica = -EINVAL;
890
891#ifdef CONFIG_LITMUS_DGL_SUPPORT
892 raw_spinlock_t *dgl_lock;
893#endif
894
895 ikglp_wait_state_t wait;
896
897 if (!is_realtime(t))
898 return -EPERM;
899
900#ifdef CONFIG_LITMUS_DGL_SUPPORT
901 dgl_lock = litmus->get_dgl_spinlock(t);
902#endif
903
904 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
905
906 lock_global_irqsave(dgl_lock, flags);
907 lock_fine_irqsave(&sem->lock, flags);
908
909 if(sem->nr_in_fifos < sem->m) {
910 // enqueue somwhere
911#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
912 fq = (sem->aff_obs) ?
913 sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
914 sem->shortest_fifo_queue;
915#else
916 fq = sem->shortest_fifo_queue;
917#endif
918 if(fq->count == 0) {
919 // take available resource
920 replica = ikglp_get_idx(sem, fq);
921
922 ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
923
924 unlock_global_irqrestore(dgl_lock, flags);
925 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
926 goto acquired;
927 }
928 else {
929 wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
930
931 tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
932 mb();
933
934 /* FIXME: interruptible would be nice some day */
935 set_task_state(t, TASK_UNINTERRUPTIBLE);
936
937 ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
938 }
939 }
940 else {
941 // donor!
942 wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
943
944 tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
945 mb();
946
947 /* FIXME: interruptible would be nice some day */
948 set_task_state(t, TASK_UNINTERRUPTIBLE);
949
950 if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
951 // enqueue on PQ
952 ikglp_enqueue_on_pq(sem, &wait);
953 unlock_fine_irqrestore(&sem->lock, flags);
954 }
955 else {
956 // enqueue as donor
957 ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
958 }
959 }
960
961 unlock_global_irqrestore(dgl_lock, flags);
962 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
963
964 TS_LOCK_SUSPEND;
965
966 schedule();
967
968 TS_LOCK_RESUME;
969
970 fq = ikglp_get_queue(sem, t);
971 BUG_ON(!fq);
972
973 replica = ikglp_get_idx(sem, fq);
974
975acquired:
976 TRACE_CUR("Acquired lock %d, queue %d\n",
977 l->ident, replica);
978
979#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
980 if(sem->aff_obs) {
981 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
982 }
983#endif
984
985 return replica;
986}
987
988//int ikglp_lock(struct litmus_lock* l)
989//{
990// struct task_struct* t = current;
991// struct ikglp_semaphore *sem = ikglp_from_lock(l);
992// unsigned long flags = 0, real_flags;
993// struct fifo_queue *fq = NULL;
994// int replica = -EINVAL;
995//
996//#ifdef CONFIG_LITMUS_DGL_SUPPORT
997// raw_spinlock_t *dgl_lock;
998//#endif
999//
1000// ikglp_wait_state_t wait;
1001//
1002// if (!is_realtime(t))
1003// return -EPERM;
1004//
1005//#ifdef CONFIG_LITMUS_DGL_SUPPORT
1006// dgl_lock = litmus->get_dgl_spinlock(t);
1007//#endif
1008//
1009// raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1010//
1011// lock_global_irqsave(dgl_lock, flags);
1012// lock_fine_irqsave(&sem->lock, flags);
1013//
1014//
1015//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1016// fq = (sem->aff_obs) ?
1017// sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
1018// sem->shortest_fifo_queue;
1019//#else
1020// fq = sem->shortest_fifo_queue;
1021//#endif
1022//
1023// if(fq->count == 0) {
1024// // take available resource
1025// replica = ikglp_get_idx(sem, fq);
1026//
1027// ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
1028//
1029// unlock_global_irqrestore(dgl_lock, flags);
1030// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1031// }
1032// else
1033// {
1034// // we have to suspend.
1035//
1036// wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
1037//
1038// tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
1039// mb();
1040//
1041// /* FIXME: interruptible would be nice some day */
1042// set_task_state(t, TASK_UNINTERRUPTIBLE);
1043//
1044// if(fq->count < sem->max_fifo_len) {
1045// // enqueue on fq
1046// ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
1047// }
1048// else {
1049//
1050// TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
1051//
1052// // no room in fifos. Go to PQ or donors.
1053//
1054// if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
1055// // enqueue on PQ
1056// ikglp_enqueue_on_pq(sem, &wait);
1057// unlock_fine_irqrestore(&sem->lock, flags);
1058// }
1059// else {
1060// // enqueue as donor
1061// ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
1062// }
1063// }
1064//
1065// unlock_global_irqrestore(dgl_lock, flags);
1066// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1067//
1068// TS_LOCK_SUSPEND;
1069//
1070// schedule();
1071//
1072// TS_LOCK_RESUME;
1073//
1074// fq = ikglp_get_queue(sem, t);
1075// BUG_ON(!fq);
1076//
1077// replica = ikglp_get_idx(sem, fq);
1078// }
1079//
1080// TRACE_CUR("Acquired lock %d, queue %d\n",
1081// l->ident, replica);
1082//
1083//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1084// if(sem->aff_obs) {
1085// return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
1086// }
1087//#endif
1088//
1089// return replica;
1090//}
1091
1092static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
1093 struct fifo_queue *fq,
1094 ikglp_wait_state_t *donor_info)
1095{
1096 struct task_struct *t = donor_info->task;
1097
1098 TRACE_CUR("Donor %s/%d being moved to fq %d\n",
1099 t->comm,
1100 t->pid,
1101 ikglp_get_idx(sem, fq));
1102
1103 binheap_delete(&donor_info->node, &sem->donors);
1104
1105 __ikglp_enqueue_on_fq(sem, fq, t,
1106 &donor_info->fq_node,
1107 NULL, // already in global_list, so pass null to prevent adding 2nd time.
1108 &donor_info->donee_heap_node);
1109
1110 // warning:
1111 // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
1112}
1113
1114static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem,
1115 struct fifo_queue *fq,
1116 ikglp_wait_state_t *wait)
1117{
1118 struct task_struct *t = wait->task;
1119
1120 TRACE_CUR("PQ request %s/%d being moved to fq %d\n",
1121 t->comm,
1122 t->pid,
1123 ikglp_get_idx(sem, fq));
1124
1125 binheap_delete(&wait->pq_node.node, &sem->priority_queue);
1126
1127 __ikglp_enqueue_on_fq(sem, fq, t,
1128 &wait->fq_node,
1129 &wait->global_heap_node,
1130 &wait->donee_heap_node);
1131 // warning:
1132 // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
1133}
1134
1135static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
1136 struct ikglp_semaphore* sem)
1137{
1138 /* must hold sem->lock */
1139
1140 struct fifo_queue *fq = NULL;
1141 struct list_head *pos;
1142 struct task_struct *queued;
1143 int i;
1144
1145 for(i = 0; i < sem->nr_replicas; ++i) {
1146 if( (sem->fifo_queues[i].count > 1) &&
1147 (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
1148
1149 TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
1150 ikglp_get_idx(sem, &sem->fifo_queues[i]),
1151 sem->fifo_queues[i].hp_waiter->comm,
1152 sem->fifo_queues[i].hp_waiter->pid,
1153 (fq) ? ikglp_get_idx(sem, fq) : -1,
1154 (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX",
1155 (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2);
1156
1157 fq = &sem->fifo_queues[i];
1158
1159 WARN_ON(!(fq->hp_waiter));
1160 }
1161 }
1162
1163 if(fq) {
1164 struct task_struct *max_hp = fq->hp_waiter;
1165 ikglp_wait_state_t* ret = NULL;
1166
1167 TRACE_CUR("Searching for %s/%d on fq %d\n",
1168 max_hp->comm,
1169 max_hp->pid,
1170 ikglp_get_idx(sem, fq));
1171
1172 BUG_ON(!max_hp);
1173
1174 list_for_each(pos, &fq->wait.task_list) {
1175 wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list);
1176
1177 queued = (struct task_struct*) wait->private;
1178
1179 TRACE_CUR("fq %d entry: %s/%d\n",
1180 ikglp_get_idx(sem, fq),
1181 queued->comm,
1182 queued->pid);
1183
1184 /* Compare task prios, find high prio task. */
1185 if (queued == max_hp) {
1186 TRACE_CUR("Found it!\n");
1187 ret = container_of(wait, ikglp_wait_state_t, fq_node);
1188 }
1189 }
1190
1191 WARN_ON(!ret);
1192 return ret;
1193 }
1194
1195 return(NULL);
1196}
1197
1198static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
1199 struct fifo_queue *fq,
1200 ikglp_wait_state_t *fq_wait)
1201{
1202 struct task_struct *t = fq_wait->task;
1203 struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq;
1204
1205 TRACE_CUR("FQ request %s/%d being moved to fq %d\n",
1206 t->comm,
1207 t->pid,
1208 ikglp_get_idx(sem, fq));
1209
1210 fq_wait->donee_heap_node.fq = fq; // just to be safe
1211
1212
1213 __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
1214 --(fq_steal->count);
1215
1216#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1217 if(sem->aff_obs) {
1218 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
1219 }
1220#endif
1221
1222 if(t == fq_steal->hp_waiter) {
1223 fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
1224 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1225 ikglp_get_idx(sem, fq_steal),
1226 (fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil",
1227 (fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1);
1228 }
1229
1230
1231 // Update shortest.
1232 if(fq_steal->count < sem->shortest_fifo_queue->count) {
1233 sem->shortest_fifo_queue = fq_steal;
1234 }
1235
1236 __ikglp_enqueue_on_fq(sem, fq, t,
1237 &fq_wait->fq_node,
1238 NULL,
1239 NULL);
1240
1241 // warning: We have not checked the priority inheritance of fq's owner yet.
1242}
1243
1244
1245static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem,
1246 struct fifo_queue *fq,
1247 ikglp_wait_state_t *old_wait)
1248{
1249 struct task_struct *t = old_wait->task;
1250
1251 BUG_ON(old_wait->donee_heap_node.fq != fq);
1252
1253 TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n",
1254 ikglp_get_idx(sem, fq));
1255
1256 // need to migrate global_heap_node and donee_heap_node off of the stack
1257 // to the nodes allocated for the owner of this fq.
1258
1259 // TODO: Enhance binheap() to perform this operation in place.
1260
1261 ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove
1262 fq->global_heap_node = old_wait->global_heap_node; // copy
1263 ikglp_add_global_list(sem, t, &fq->global_heap_node); // re-add
1264
1265 binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); // remove
1266 fq->donee_heap_node = old_wait->donee_heap_node; // copy
1267
1268 if(fq->donee_heap_node.donor_info) {
1269 // let donor know that our location has changed
1270 BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); // validate cross-link
1271 fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node;
1272 }
1273 INIT_BINHEAP_NODE(&fq->donee_heap_node.node);
1274 binheap_add(&fq->donee_heap_node.node, &sem->donees,
1275 ikglp_donee_heap_node_t, node); // re-add
1276}
1277
1278int ikglp_unlock(struct litmus_lock* l)
1279{
1280 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1281 struct task_struct *t = current;
1282 struct task_struct *donee = NULL;
1283 struct task_struct *next = NULL;
1284 struct task_struct *new_on_fq = NULL;
1285 struct fifo_queue *fq_of_new_on_fq = NULL;
1286
1287 ikglp_wait_state_t *other_donor_info = NULL;
1288 struct fifo_queue *to_steal = NULL;
1289 int need_steal_prio_reeval = 0;
1290 struct fifo_queue *fq;
1291
1292#ifdef CONFIG_LITMUS_DGL_SUPPORT
1293 raw_spinlock_t *dgl_lock;
1294#endif
1295
1296 unsigned long flags = 0, real_flags;
1297
1298 int err = 0;
1299
1300 fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner.
1301
1302 if (!fq) {
1303 err = -EINVAL;
1304 goto out;
1305 }
1306
1307#ifdef CONFIG_LITMUS_DGL_SUPPORT
1308 dgl_lock = litmus->get_dgl_spinlock(t);
1309#endif
1310 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1311
1312 lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper
1313 lock_fine_irqsave(&sem->lock, flags);
1314
1315 TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
1316
1317
1318 // Remove 't' from the heaps, but data in nodes will still be good.
1319 ikglp_del_global_list(sem, t, &fq->global_heap_node);
1320 binheap_delete(&fq->donee_heap_node.node, &sem->donees);
1321
1322 fq->owner = NULL; // no longer owned!!
1323 --(fq->count);
1324 if(fq->count < sem->shortest_fifo_queue->count) {
1325 sem->shortest_fifo_queue = fq;
1326 }
1327 --(sem->nr_in_fifos);
1328
1329#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1330 if(sem->aff_obs) {
1331 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
1332 sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
1333 }
1334#endif
1335
1336 // Move the next request into the FQ and update heaps as needed.
1337 // We defer re-evaluation of priorities to later in the function.
1338 if(fq->donee_heap_node.donor_info) { // move my donor to FQ
1339 ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info;
1340
1341 new_on_fq = donor_info->task;
1342
1343 // donor moved to FQ
1344 donee = t;
1345
1346#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1347 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1348 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1349 if(fq_of_new_on_fq->count == 0) {
1350 // ignore it?
1351// fq_of_new_on_fq = fq;
1352 }
1353 }
1354 else {
1355 fq_of_new_on_fq = fq;
1356 }
1357#else
1358 fq_of_new_on_fq = fq;
1359#endif
1360
1361 TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1362 new_on_fq->comm, new_on_fq->pid,
1363 ikglp_get_idx(sem, fq_of_new_on_fq),
1364 ikglp_get_idx(sem, fq));
1365
1366
1367 ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info);
1368 }
1369 else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ
1370 // move other donor to FQ
1371 // Select a donor
1372#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1373 other_donor_info = (sem->aff_obs) ?
1374 sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) :
1375 binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1376#else
1377 other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1378#endif
1379
1380 new_on_fq = other_donor_info->task;
1381 donee = other_donor_info->donee_info->task;
1382
1383 // update the donee's heap position.
1384 other_donor_info->donee_info->donor_info = NULL; // clear the cross-link
1385 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
1386
1387#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1388 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1389 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1390 if(fq_of_new_on_fq->count == 0) {
1391 // ignore it?
1392// fq_of_new_on_fq = fq;
1393 }
1394 }
1395 else {
1396 fq_of_new_on_fq = fq;
1397 }
1398#else
1399 fq_of_new_on_fq = fq;
1400#endif
1401
1402 TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1403 new_on_fq->comm, new_on_fq->pid,
1404 ikglp_get_idx(sem, fq_of_new_on_fq),
1405 ikglp_get_idx(sem, fq));
1406
1407 ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info);
1408 }
1409 else if(!binheap_empty(&sem->priority_queue)) { // No donors, so move PQ
1410 ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue,
1411 ikglp_heap_node_t, node);
1412 ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t,
1413 pq_node);
1414
1415 new_on_fq = pq_wait->task;
1416
1417#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1418 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1419 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1420 if(fq_of_new_on_fq->count == 0) {
1421 // ignore it?
1422// fq_of_new_on_fq = fq;
1423 }
1424 }
1425 else {
1426 fq_of_new_on_fq = fq;
1427 }
1428#else
1429 fq_of_new_on_fq = fq;
1430#endif
1431
1432 TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1433 new_on_fq->comm, new_on_fq->pid,
1434 ikglp_get_idx(sem, fq_of_new_on_fq),
1435 ikglp_get_idx(sem, fq));
1436
1437 ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait);
1438 }
1439 else if(fq->count == 0) { // No PQ and this queue is empty, so steal.
1440 ikglp_wait_state_t *fq_wait;
1441
1442 TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
1443 ikglp_get_idx(sem, fq));
1444
1445#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1446 fq_wait = (sem->aff_obs) ?
1447 sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) :
1448 ikglp_find_hp_waiter_to_steal(sem);
1449#else
1450 fq_wait = ikglp_find_hp_waiter_to_steal(sem);
1451#endif
1452
1453 if(fq_wait) {
1454 to_steal = fq_wait->donee_heap_node.fq;
1455
1456 new_on_fq = fq_wait->task;
1457 fq_of_new_on_fq = fq;
1458 need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter);
1459
1460 TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n",
1461 new_on_fq->comm, new_on_fq->pid,
1462 ikglp_get_idx(sem, to_steal),
1463 ikglp_get_idx(sem, fq));
1464
1465 ikglp_steal_to_fq(sem, fq, fq_wait);
1466 }
1467 else {
1468 TRACE_TASK(t, "Found nothing to steal for fq %d.\n",
1469 ikglp_get_idx(sem, fq));
1470 }
1471 }
1472 else { // move no one
1473 }
1474
1475 // 't' must drop all priority and clean up data structures before hand-off.
1476
1477 // DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST
1478 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
1479 {
1480 int count = 0;
1481 while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) {
1482 binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks,
1483 struct nested_info, hp_binheap_node);
1484 ++count;
1485 }
1486 litmus->decrease_prio(t, NULL);
1487 WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible.
1488 }
1489 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
1490
1491
1492
1493 // Now patch up other priorities.
1494 //
1495 // At most one of the following:
1496 // if(donee && donee != t), decrease prio, propagate to owner, or onward
1497 // if(to_steal), update owner's prio (hp_waiter has already been set)
1498 //
1499
1500 BUG_ON((other_donor_info != NULL) && (to_steal != NULL));
1501
1502 if(other_donor_info) {
1503 struct fifo_queue *other_fq = other_donor_info->donee_info->fq;
1504
1505 BUG_ON(!donee);
1506 BUG_ON(donee == t);
1507
1508 TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n",
1509 other_donor_info->task->comm, other_donor_info->task->pid,
1510 donee->comm, donee->pid);
1511
1512 // need to terminate donation relation.
1513 if(donee == other_fq->owner) {
1514 TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n",
1515 donee->comm, donee->pid,
1516 ikglp_get_idx(sem, other_fq));
1517
1518 ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags);
1519 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1520 }
1521 else {
1522 TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n",
1523 donee->comm, donee->pid,
1524 ikglp_get_idx(sem, other_fq));
1525
1526 ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node);
1527 if(donee == other_fq->hp_waiter) {
1528 TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n",
1529 donee->comm, donee->pid,
1530 ikglp_get_idx(sem, other_fq));
1531
1532 other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL);
1533 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1534 ikglp_get_idx(sem, other_fq),
1535 (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil",
1536 (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1);
1537
1538 ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock. reacquire it.
1539 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1540 }
1541 }
1542 }
1543 else if(to_steal) {
1544 TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n",
1545 ikglp_get_idx(sem, to_steal));
1546
1547 if(need_steal_prio_reeval) {
1548 ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock. reacquire it.
1549 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1550 }
1551 }
1552
1553 // check for new HP waiter.
1554 if(new_on_fq) {
1555 if(fq == fq_of_new_on_fq) {
1556 // fq->owner is null, so just update the hp_waiter without locking.
1557 if(new_on_fq == fq->hp_waiter) {
1558 TRACE_TASK(t, "new_on_fq is already hp_waiter.\n",
1559 fq->hp_waiter->comm, fq->hp_waiter->pid);
1560 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure...
1561 }
1562 else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
1563 if(fq->hp_waiter)
1564 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
1565 fq->hp_waiter->comm, fq->hp_waiter->pid);
1566 else
1567 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
1568
1569 fq->hp_waiter = new_on_fq;
1570 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
1571
1572 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1573 ikglp_get_idx(sem, fq),
1574 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1575 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1576 }
1577 }
1578 else {
1579 ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, flags); // unlocks sem->lock. reacquire it.
1580 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1581 }
1582 }
1583
1584wake_kludge:
1585 if(waitqueue_active(&fq->wait))
1586 {
1587 wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
1588 ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node);
1589 next = (struct task_struct*) wait->private;
1590
1591 __remove_wait_queue(&fq->wait, wait);
1592
1593 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
1594 ikglp_get_idx(sem, fq),
1595 next->comm, next->pid);
1596
1597 // migrate wait-state to fifo-memory.
1598 ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait);
1599
1600 /* next becomes the resouce holder */
1601 fq->owner = next;
1602 tsk_rt(next)->blocked_lock = NULL;
1603
1604#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1605 if(sem->aff_obs) {
1606 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
1607 }
1608#endif
1609
1610 /* determine new hp_waiter if necessary */
1611 if (next == fq->hp_waiter) {
1612
1613 TRACE_TASK(next, "was highest-prio waiter\n");
1614 /* next has the highest priority --- it doesn't need to
1615 * inherit. However, we need to make sure that the
1616 * next-highest priority in the queue is reflected in
1617 * hp_waiter. */
1618 fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL);
1619 TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n",
1620 ikglp_get_idx(sem, fq),
1621 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1622 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1623
1624 fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ?
1625 effective_priority(fq->hp_waiter) : NULL;
1626
1627 if (fq->hp_waiter)
1628 TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n");
1629 else
1630 TRACE("no further waiters\n");
1631
1632 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
1633
1634// TRACE_TASK(next, "Heap Before:\n");
1635// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
1636
1637 binheap_add(&fq->nest.hp_binheap_node,
1638 &tsk_rt(next)->hp_blocked_tasks,
1639 struct nested_info,
1640 hp_binheap_node);
1641
1642// TRACE_TASK(next, "Heap After:\n");
1643// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
1644
1645 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
1646 }
1647 else {
1648 /* Well, if 'next' is not the highest-priority waiter,
1649 * then it (probably) ought to inherit the highest-priority
1650 * waiter's priority. */
1651 TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n",
1652 ikglp_get_idx(sem, fq),
1653 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1654 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1655
1656 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
1657
1658 binheap_add(&fq->nest.hp_binheap_node,
1659 &tsk_rt(next)->hp_blocked_tasks,
1660 struct nested_info,
1661 hp_binheap_node);
1662
1663 /* It is possible that 'next' *should* be the hp_waiter, but isn't
1664 * because that update hasn't yet executed (update operation is
1665 * probably blocked on mutex->lock). So only inherit if the top of
1666 * 'next's top heap node is indeed the effective prio. of hp_waiter.
1667 * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
1668 * since the effective priority of hp_waiter can change (and the
1669 * update has not made it to this lock).)
1670 */
1671 if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
1672 fq->nest.hp_waiter_eff_prio))
1673 {
1674 if(fq->nest.hp_waiter_eff_prio)
1675 litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio);
1676 else
1677 WARN_ON(1);
1678 }
1679
1680 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
1681 }
1682
1683
1684 // wake up the new resource holder!
1685 wake_up_process(next);
1686 }
1687 if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
1688 // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
1689 // Wake up the new guy too.
1690
1691 BUG_ON(fq_of_new_on_fq->owner != NULL);
1692
1693 fq = fq_of_new_on_fq;
1694 fq_of_new_on_fq = NULL;
1695 goto wake_kludge;
1696 }
1697
1698 unlock_fine_irqrestore(&sem->lock, flags);
1699 unlock_global_irqrestore(dgl_lock, flags);
1700
1701 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1702
1703out:
1704 return err;
1705}
1706
1707
1708
1709int ikglp_close(struct litmus_lock* l)
1710{
1711 struct task_struct *t = current;
1712 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1713 unsigned long flags;
1714
1715 int owner = 0;
1716 int i;
1717
1718 raw_spin_lock_irqsave(&sem->real_lock, flags);
1719
1720 for(i = 0; i < sem->nr_replicas; ++i) {
1721 if(sem->fifo_queues[i].owner == t) {
1722 owner = 1;
1723 break;
1724 }
1725 }
1726
1727 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
1728
1729 if (owner)
1730 ikglp_unlock(l);
1731
1732 return 0;
1733}
1734
1735void ikglp_free(struct litmus_lock* l)
1736{
1737 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1738
1739 kfree(sem->fifo_queues);
1740 kfree(sem);
1741}
1742
1743
1744
1745struct litmus_lock* ikglp_new(int m,
1746 struct litmus_lock_ops* ops,
1747 void* __user arg)
1748{
1749 struct ikglp_semaphore* sem;
1750 int nr_replicas = 0;
1751 int i;
1752
1753 if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
1754 {
1755 return(NULL);
1756 }
1757 if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
1758 {
1759 return(NULL);
1760 }
1761 if(nr_replicas < 1)
1762 {
1763 return(NULL);
1764 }
1765
1766 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1767 if(!sem)
1768 {
1769 return NULL;
1770 }
1771
1772 sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
1773 if(!sem->fifo_queues)
1774 {
1775 kfree(sem);
1776 return NULL;
1777 }
1778
1779 sem->litmus_lock.ops = ops;
1780
1781#ifdef CONFIG_DEBUG_SPINLOCK
1782 {
1783 __raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key);
1784 }
1785#else
1786 raw_spin_lock_init(&sem->lock);
1787#endif
1788
1789 raw_spin_lock_init(&sem->real_lock);
1790
1791 sem->nr_replicas = nr_replicas;
1792 sem->m = m;
1793 sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
1794 sem->nr_in_fifos = 0;
1795
1796 TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
1797 sem->m,
1798 sem->nr_replicas,
1799 sem->max_fifo_len);
1800
1801 for(i = 0; i < nr_replicas; ++i)
1802 {
1803 struct fifo_queue* q = &(sem->fifo_queues[i]);
1804
1805 q->owner = NULL;
1806 q->hp_waiter = NULL;
1807 init_waitqueue_head(&q->wait);
1808 q->count = 0;
1809
1810 q->global_heap_node.task = NULL;
1811 INIT_BINHEAP_NODE(&q->global_heap_node.node);
1812
1813 q->donee_heap_node.task = NULL;
1814 q->donee_heap_node.donor_info = NULL;
1815 q->donee_heap_node.fq = NULL;
1816 INIT_BINHEAP_NODE(&q->donee_heap_node.node);
1817
1818 q->nest.lock = (struct litmus_lock*)sem;
1819 q->nest.hp_waiter_eff_prio = NULL;
1820 q->nest.hp_waiter_ptr = &q->hp_waiter;
1821 INIT_BINHEAP_NODE(&q->nest.hp_binheap_node);
1822 }
1823
1824 sem->shortest_fifo_queue = &sem->fifo_queues[0];
1825
1826 sem->top_m_size = 0;
1827
1828 // init heaps
1829 INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order);
1830 INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order);
1831 INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order);
1832 INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
1833 INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
1834
1835#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1836 sem->aff_obs = NULL;
1837#endif
1838
1839 return &sem->litmus_lock;
1840}
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1871
1872static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
1873{
1874 int gpu = replica % aff->nr_rsrc;
1875 return gpu;
1876}
1877
1878static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
1879{
1880 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
1881 return gpu;
1882}
1883
1884static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
1885{
1886 int replica = gpu - aff->offset;
1887 return replica;
1888}
1889
1890
1891int ikglp_aff_obs_close(struct affinity_observer* obs)
1892{
1893 return 0;
1894}
1895
1896void ikglp_aff_obs_free(struct affinity_observer* obs)
1897{
1898 struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
1899 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1900 kfree(ikglp_aff->q_info);
1901 kfree(ikglp_aff);
1902}
1903
1904static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
1905 struct ikglp_affinity_ops* ikglp_ops,
1906 void* __user args)
1907{
1908 struct ikglp_affinity* ikglp_aff;
1909 struct gpu_affinity_observer_args aff_args;
1910 struct ikglp_semaphore* sem;
1911 int i;
1912 unsigned long flags;
1913
1914 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
1915 return(NULL);
1916 }
1917 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
1918 return(NULL);
1919 }
1920
1921 sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
1922
1923 if(sem->litmus_lock.type != IKGLP_SEM) {
1924 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
1925 return(NULL);
1926 }
1927
1928 if((aff_args.nr_simult_users <= 0) ||
1929 (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
1930 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
1931 "(%d) per replica. #replicas should be evenly divisible "
1932 "by #simult_users.\n",
1933 sem->litmus_lock.ident,
1934 sem->nr_replicas,
1935 aff_args.nr_simult_users);
1936 return(NULL);
1937 }
1938
1939 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
1940 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
1941 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
1942// return(NULL);
1943 }
1944
1945 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
1946 if(!ikglp_aff) {
1947 return(NULL);
1948 }
1949
1950 ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
1951 if(!ikglp_aff->q_info) {
1952 kfree(ikglp_aff);
1953 return(NULL);
1954 }
1955
1956 ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1957 if(!ikglp_aff->nr_cur_users_on_rsrc) {
1958 kfree(ikglp_aff->q_info);
1959 kfree(ikglp_aff);
1960 return(NULL);
1961 }
1962
1963 affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
1964
1965 ikglp_aff->ops = ikglp_ops;
1966 ikglp_aff->offset = aff_args.replica_to_gpu_offset;
1967 ikglp_aff->nr_simult = aff_args.nr_simult_users;
1968 ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
1969 ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
1970
1971 TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, "
1972 "nr_rsrc = %d, relaxed_fifo_len = %d\n",
1973 ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc,
1974 ikglp_aff->relax_max_fifo_len);
1975
1976 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
1977
1978 for(i = 0; i < sem->nr_replicas; ++i) {
1979 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
1980 ikglp_aff->q_info[i].estimated_len = 0;
1981
1982 // multiple q_info's will point to the same resource (aka GPU) if
1983 // aff_args.nr_simult_users > 1
1984 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
1985 }
1986
1987 // attach observer to the lock
1988 raw_spin_lock_irqsave(&sem->real_lock, flags);
1989 sem->aff_obs = ikglp_aff;
1990 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
1991
1992 return &ikglp_aff->obs;
1993}
1994
1995
1996
1997
1998static int gpu_replica_to_resource(struct ikglp_affinity* aff,
1999 struct fifo_queue* fq) {
2000 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2001 return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
2002}
2003
2004
2005// Smart IKGLP Affinity
2006
2007//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
2008//{
2009// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2010// struct ikglp_queue_info *shortest = &aff->q_info[0];
2011// int i;
2012//
2013// for(i = 1; i < sem->nr_replicas; ++i) {
2014// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
2015// shortest = &aff->q_info[i];
2016// }
2017// }
2018//
2019// return(shortest);
2020//}
2021
2022struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2023{
2024 // advise_enqueue must be smart as not not break IKGLP rules:
2025 // * No queue can be greater than ceil(m/k) in length. We may return
2026 // such a queue, but IKGLP will be smart enough as to send requests
2027 // to donors or PQ.
2028 // * Cannot let a queue idle if there exist waiting PQ/donors
2029 // -- needed to guarantee parallel progress of waiters.
2030 //
2031 // We may be able to relax some of these constraints, but this will have to
2032 // be carefully evaluated.
2033 //
2034 // Huristic strategy: Find the shortest queue that is not full.
2035
2036 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2037 lt_t min_len;
2038 int min_nr_users;
2039 struct ikglp_queue_info *shortest;
2040 struct fifo_queue *to_enqueue;
2041 int i;
2042 int affinity_gpu;
2043
2044 int max_fifo_len = (aff->relax_max_fifo_len) ?
2045 sem->m : sem->max_fifo_len;
2046
2047 // simply pick the shortest queue if, we have no affinity, or we have
2048 // affinity with the shortest
2049 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
2050 affinity_gpu = aff->offset; // first gpu
2051 TRACE_CUR("no affinity\n");
2052 }
2053 else {
2054 affinity_gpu = tsk_rt(t)->last_gpu;
2055 }
2056
2057 // all things being equal, let's start with the queue with which we have
2058 // affinity. this helps us maintain affinity even when we don't have
2059 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
2060 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
2061
2062 // if(shortest == aff->shortest_queue) {
2063 // TRACE_CUR("special case: have affinity with shortest queue\n");
2064 // goto out;
2065 // }
2066
2067 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
2068 min_nr_users = *(shortest->nr_cur_users);
2069
2070 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2071 get_gpu_estimate(t, MIG_LOCAL),
2072 ikglp_get_idx(sem, shortest->q),
2073 shortest->q->count,
2074 min_len);
2075
2076 for(i = 0; i < sem->nr_replicas; ++i) {
2077 if(&aff->q_info[i] != shortest) {
2078 if(aff->q_info[i].q->count < max_fifo_len) {
2079
2080 lt_t est_len =
2081 aff->q_info[i].estimated_len +
2082 get_gpu_estimate(t,
2083 gpu_migration_distance(tsk_rt(t)->last_gpu,
2084 replica_to_gpu(aff, i)));
2085
2086 // queue is smaller, or they're equal and the other has a smaller number
2087 // of total users.
2088 //
2089 // tie-break on the shortest number of simult users. this only kicks in
2090 // when there are more than 1 empty queues.
2091 if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */
2092 (est_len < min_len) || /* i-th queue has shortest length */
2093 ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */
2094 (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
2095
2096 shortest = &aff->q_info[i];
2097 min_len = est_len;
2098 min_nr_users = *(aff->q_info[i].nr_cur_users);
2099 }
2100
2101 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2102 get_gpu_estimate(t,
2103 gpu_migration_distance(tsk_rt(t)->last_gpu,
2104 replica_to_gpu(aff, i))),
2105 ikglp_get_idx(sem, aff->q_info[i].q),
2106 aff->q_info[i].q->count,
2107 est_len);
2108 }
2109 else {
2110 TRACE_CUR("queue %d is too long. ineligible for enqueue.\n",
2111 ikglp_get_idx(sem, aff->q_info[i].q));
2112 }
2113 }
2114 }
2115
2116 if(shortest->q->count >= max_fifo_len) {
2117 TRACE_CUR("selected fq %d is too long, but returning it anyway.\n",
2118 ikglp_get_idx(sem, shortest->q));
2119 }
2120
2121 to_enqueue = shortest->q;
2122 TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n",
2123 ikglp_get_idx(sem, to_enqueue),
2124 to_enqueue->count,
2125 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2126
2127 return to_enqueue;
2128
2129 //return(sem->shortest_fifo_queue);
2130}
2131
2132
2133
2134
2135static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff,
2136 int dest_gpu,
2137 struct fifo_queue* fq)
2138{
2139 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2140 ikglp_wait_state_t *wait = NULL;
2141 int max_improvement = -(MIG_NONE+1);
2142 int replica = ikglp_get_idx(sem, fq);
2143
2144 if(waitqueue_active(&fq->wait)) {
2145 int this_gpu = replica_to_gpu(aff, replica);
2146 struct list_head *pos;
2147
2148 list_for_each(pos, &fq->wait.task_list) {
2149 wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
2150 ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2151
2152 int tmp_improvement =
2153 gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) -
2154 gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu);
2155
2156 if(tmp_improvement > max_improvement) {
2157 wait = tmp_wait;
2158 max_improvement = tmp_improvement;
2159
2160 if(max_improvement >= (MIG_NONE-1)) {
2161 goto out;
2162 }
2163 }
2164 }
2165
2166 BUG_ON(!wait);
2167 }
2168 else {
2169 TRACE_CUR("fq %d is empty!\n", replica);
2170 }
2171
2172out:
2173
2174 TRACE_CUR("Candidate victim from fq %d is %s/%d. aff improvement = %d.\n",
2175 replica,
2176 (wait) ? wait->task->comm : "nil",
2177 (wait) ? wait->task->pid : -1,
2178 max_improvement);
2179
2180 return wait;
2181}
2182
2183
2184ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
2185 struct fifo_queue* dst)
2186{
2187 // Huristic strategy: Find task with greatest improvement in affinity.
2188 //
2189 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2190 ikglp_wait_state_t *to_steal_state = NULL;
2191// ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem);
2192 int max_improvement = -(MIG_NONE+1);
2193 int replica, i;
2194 int dest_gpu;
2195
2196 replica = ikglp_get_idx(sem, dst);
2197 dest_gpu = replica_to_gpu(aff, replica);
2198
2199 for(i = 0; i < sem->nr_replicas; ++i) {
2200 ikglp_wait_state_t *tmp_to_steal_state =
2201 pick_steal(aff, dest_gpu, &sem->fifo_queues[i]);
2202
2203 if(tmp_to_steal_state) {
2204 int tmp_improvement =
2205 gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) -
2206 gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu);
2207
2208 if(tmp_improvement > max_improvement) {
2209 to_steal_state = tmp_to_steal_state;
2210 max_improvement = tmp_improvement;
2211
2212 if(max_improvement >= (MIG_NONE-1)) {
2213 goto out;
2214 }
2215 }
2216 }
2217 }
2218
2219out:
2220 if(!to_steal_state) {
2221 TRACE_CUR("Could not find anyone to steal.\n");
2222 }
2223 else {
2224 TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
2225 to_steal_state->task->comm, to_steal_state->task->pid,
2226 ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq),
2227 replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)),
2228 ikglp_get_idx(sem, dst),
2229 dest_gpu,
2230 max_improvement);
2231
2232// TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
2233// default_to_steal_state->task->comm, default_to_steal_state->task->pid,
2234// ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq),
2235// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
2236// ikglp_get_idx(sem, dst),
2237// replica_to_gpu(aff, ikglp_get_idx(sem, dst)),
2238//
2239// gpu_migration_distance(
2240// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
2241// tsk_rt(default_to_steal_state->task)->last_gpu) -
2242// gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu));
2243 }
2244
2245 return(to_steal_state);
2246}
2247
2248
2249static inline int has_donor(wait_queue_t* fq_wait)
2250{
2251 ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2252 return(wait->donee_heap_node.donor_info != NULL);
2253}
2254
2255static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
2256 struct fifo_queue* fq,
2257 int* dist_from_head)
2258{
2259 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2260 struct task_struct *donee;
2261 ikglp_donee_heap_node_t *donee_node;
2262 struct task_struct *mth_highest = ikglp_mth_highest(sem);
2263
2264// lt_t now = litmus_clock();
2265//
2266// TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ",
2267// ikglp_get_idx(sem, fq),
2268// mth_highest->comm, mth_highest->pid,
2269// (int)get_deadline(mth_highest) - now);
2270
2271 if(fq->owner &&
2272 fq->donee_heap_node.donor_info == NULL &&
2273 mth_highest != fq->owner &&
2274 litmus->__compare(mth_highest, BASE, fq->owner, BASE)) {
2275 donee = fq->owner;
2276 donee_node = &(fq->donee_heap_node);
2277 *dist_from_head = 0;
2278
2279 BUG_ON(donee != donee_node->task);
2280
2281 TRACE_CUR("picked owner of fq %d as donee\n",
2282 ikglp_get_idx(sem, fq));
2283
2284 goto out;
2285 }
2286 else if(waitqueue_active(&fq->wait)) {
2287 struct list_head *pos;
2288
2289
2290// TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d "
2291// "(mth_highest != fq->owner) = %d "
2292// "(mth_highest > fq->owner) = %d\n",
2293// ikglp_get_idx(sem, fq),
2294// (fq->owner) ? fq->owner->comm : "nil",
2295// (fq->owner) ? fq->owner->pid : -1,
2296// (fq->owner) ? (int)get_deadline(fq->owner) - now : -999,
2297// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil",
2298// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1,
2299// (mth_highest != fq->owner),
2300// (litmus->__compare(mth_highest, BASE, fq->owner, BASE)));
2301
2302
2303 *dist_from_head = 1;
2304
2305 // iterating from the start of the queue is nice since this means
2306 // the donee will be closer to obtaining a resource.
2307 list_for_each(pos, &fq->wait.task_list) {
2308 wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
2309 ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2310
2311// TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d "
2312// "(mth_highest != wait->task) = %d "
2313// "(mth_highest > wait->task) = %d\n",
2314// ikglp_get_idx(sem, fq),
2315// dist_from_head,
2316// wait->task->comm, wait->task->pid,
2317// (int)get_deadline(wait->task) - now,
2318// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil",
2319// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1,
2320// (mth_highest != wait->task),
2321// (litmus->__compare(mth_highest, BASE, wait->task, BASE)));
2322
2323
2324 if(!has_donor(fq_wait) &&
2325 mth_highest != wait->task &&
2326 litmus->__compare(mth_highest, BASE, wait->task, BASE)) {
2327 donee = (struct task_struct*) fq_wait->private;
2328 donee_node = &wait->donee_heap_node;
2329
2330 BUG_ON(donee != donee_node->task);
2331
2332 TRACE_CUR("picked waiter in fq %d as donee\n",
2333 ikglp_get_idx(sem, fq));
2334
2335 goto out;
2336 }
2337 ++(*dist_from_head);
2338 }
2339 }
2340
2341 donee = NULL;
2342 donee_node = NULL;
2343 //*dist_from_head = sem->max_fifo_len + 1;
2344 *dist_from_head = IKGLP_INVAL_DISTANCE;
2345
2346 TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
2347
2348out:
2349
2350 TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n",
2351 ikglp_get_idx(sem, fq),
2352 (donee) ? (donee)->comm : "nil",
2353 (donee) ? (donee)->pid : -1,
2354 *dist_from_head);
2355
2356 return donee_node;
2357}
2358
2359ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(
2360 struct ikglp_affinity* aff,
2361 struct task_struct* donor)
2362{
2363 // Huristic strategy: Find the highest-priority donee that is waiting on
2364 // a queue closest to our affinity. (1) The donee CANNOT already have a
2365 // donor (exception: donee is the lowest-prio task in the donee heap).
2366 // (2) Requests in 'top_m' heap are ineligible.
2367 //
2368 // Further strategy: amongst elible donees waiting for the same GPU, pick
2369 // the one closest to the head of the FIFO queue (including owners).
2370 //
2371 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2372 ikglp_donee_heap_node_t *donee_node;
2373 gpu_migration_dist_t distance;
2374 int start, i, j;
2375
2376 ikglp_donee_heap_node_t *default_donee;
2377 ikglp_wait_state_t *default_donee_donor_info;
2378
2379 if(tsk_rt(donor)->last_gpu < 0) {
2380 // no affinity. just return the min prio, like standard IKGLP
2381 // TODO: Find something closer to the head of the queue??
2382 donee_node = binheap_top_entry(&sem->donees,
2383 ikglp_donee_heap_node_t,
2384 node);
2385 goto out;
2386 }
2387
2388
2389 // Temporarily break any donation relation the default donee (the lowest
2390 // prio task in the FIFO queues) to make it eligible for selection below.
2391 //
2392 // NOTE: The original donor relation *must* be restored, even if we select
2393 // the default donee throug affinity-aware selection, before returning
2394 // from this function so we don't screw up our heap ordering.
2395 // The standard IKGLP algorithm will steal the donor relationship if needed.
2396 default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2397 default_donee_donor_info = default_donee->donor_info; // back-up donor relation
2398 default_donee->donor_info = NULL; // temporarily break any donor relation.
2399
2400 // initialize our search
2401 donee_node = NULL;
2402 distance = MIG_NONE;
2403
2404 // TODO: The below search logic may work well for locating nodes to steal
2405 // when an FQ goes idle. Validate this code and apply it to stealing.
2406
2407 // begin search with affinity GPU.
2408 start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu);
2409 i = start;
2410 do { // "for each gpu" / "for each aff->nr_rsrc"
2411 gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i);
2412
2413 // only interested in queues that will improve our distance
2414 if(temp_distance < distance || donee_node == NULL) {
2415 int dist_from_head = IKGLP_INVAL_DISTANCE;
2416
2417 TRACE_CUR("searching for donor on GPU %d", i);
2418
2419 // visit each queue and pick a donee. bail as soon as we find
2420 // one for this class.
2421
2422 for(j = 0; j < aff->nr_simult; ++j) {
2423 int temp_dist_from_head;
2424 ikglp_donee_heap_node_t *temp_donee_node;
2425 struct fifo_queue *fq;
2426
2427 fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]);
2428 temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head);
2429
2430 if(temp_dist_from_head < dist_from_head)
2431 {
2432 // we check all the FQs for this GPU to spread priorities
2433 // out across the queues. does this decrease jitter?
2434 donee_node = temp_donee_node;
2435 dist_from_head = temp_dist_from_head;
2436 }
2437 }
2438
2439 if(dist_from_head != IKGLP_INVAL_DISTANCE) {
2440 TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n",
2441 donee_node->task->comm, donee_node->task->pid,
2442 dist_from_head);
2443 }
2444 else {
2445 TRACE_CUR("found no eligible donors from GPU %d\n", i);
2446 }
2447 }
2448 else {
2449 TRACE_CUR("skipping GPU %d (distance = %d, best donor "
2450 "distance = %d)\n", i, temp_distance, distance);
2451 }
2452
2453 i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around
2454 } while (i != start);
2455
2456
2457 // restore old donor info state.
2458 default_donee->donor_info = default_donee_donor_info;
2459
2460 if(!donee_node) {
2461 donee_node = default_donee;
2462
2463 TRACE_CUR("Could not find a donee. We have to steal one.\n");
2464 WARN_ON(default_donee->donor_info == NULL);
2465 }
2466
2467out:
2468
2469 TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n",
2470 donee_node->task->comm, donee_node->task->pid,
2471 ikglp_get_idx(sem, donee_node->fq),
2472 replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)),
2473 donor->comm, donor->pid, tsk_rt(donor)->last_gpu);
2474
2475 return(donee_node);
2476}
2477
2478
2479
2480static void __find_closest_donor(int target_gpu,
2481 struct binheap_node* donor_node,
2482 ikglp_wait_state_t** cur_closest,
2483 int* cur_dist)
2484{
2485 ikglp_wait_state_t *this_donor =
2486 binheap_entry(donor_node, ikglp_wait_state_t, node);
2487
2488 int this_dist =
2489 gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu);
2490
2491// TRACE_CUR("%s/%d: dist from target = %d\n",
2492// this_donor->task->comm,
2493// this_donor->task->pid,
2494// this_dist);
2495
2496 if(this_dist < *cur_dist) {
2497 // take this donor
2498 *cur_dist = this_dist;
2499 *cur_closest = this_donor;
2500 }
2501 else if(this_dist == *cur_dist) {
2502 // priority tie-break. Even though this is a pre-order traversal,
2503 // this is a heap, not a binary tree, so we still need to do a priority
2504 // comparision.
2505 if(!(*cur_closest) ||
2506 litmus->compare(this_donor->task, (*cur_closest)->task)) {
2507 *cur_dist = this_dist;
2508 *cur_closest = this_donor;
2509 }
2510 }
2511
2512 if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist);
2513 if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist);
2514}
2515
2516ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2517{
2518 // Huristic strategy: Find donor with the closest affinity to fq.
2519 // Tie-break on priority.
2520
2521 // We need to iterate over all the donors to do this. Unfortunatly,
2522 // our donors are organized in a heap. We'll visit each node with a
2523 // recurisve call. This is realitively safe since there are only sem->m
2524 // donors, at most. We won't recurse too deeply to have to worry about
2525 // our stack. (even with 128 CPUs, our nest depth is at most 7 deep).
2526
2527 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2528 ikglp_wait_state_t *donor = NULL;
2529 int distance = MIG_NONE;
2530 int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq));
2531 ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2532
2533 __find_closest_donor(gpu, sem->donors.root, &donor, &distance);
2534
2535 TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d "
2536 "(non-aff wanted %s/%d). differs = %d\n",
2537 donor->task->comm, donor->task->pid,
2538 distance,
2539 ikglp_get_idx(sem, fq),
2540 default_donor->task->comm, default_donor->task->pid,
2541 (donor->task != default_donor->task)
2542 );
2543
2544 return(donor);
2545}
2546
2547
2548
2549void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2550{
2551 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2552 int replica = ikglp_get_idx(sem, fq);
2553 int gpu = replica_to_gpu(aff, replica);
2554 struct ikglp_queue_info *info = &aff->q_info[replica];
2555 lt_t est_time;
2556 lt_t est_len_before;
2557
2558 if(current == t) {
2559 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
2560 }
2561
2562 est_len_before = info->estimated_len;
2563 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2564 info->estimated_len += est_time;
2565
2566 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
2567 ikglp_get_idx(sem, info->q),
2568 est_len_before, est_time,
2569 info->estimated_len);
2570
2571 // if(aff->shortest_queue == info) {
2572 // // we may no longer be the shortest
2573 // aff->shortest_queue = ikglp_aff_find_shortest(aff);
2574 //
2575 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2576 // ikglp_get_idx(sem, aff->shortest_queue->q),
2577 // aff->shortest_queue->q->count,
2578 // aff->shortest_queue->estimated_len);
2579 // }
2580}
2581
2582void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2583{
2584 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2585 int replica = ikglp_get_idx(sem, fq);
2586 int gpu = replica_to_gpu(aff, replica);
2587 struct ikglp_queue_info *info = &aff->q_info[replica];
2588 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2589
2590 if(est_time > info->estimated_len) {
2591 WARN_ON(1);
2592 info->estimated_len = 0;
2593 }
2594 else {
2595 info->estimated_len -= est_time;
2596 }
2597
2598 TRACE_CUR("fq %d est len is now %llu\n",
2599 ikglp_get_idx(sem, info->q),
2600 info->estimated_len);
2601
2602 // check to see if we're the shortest queue now.
2603 // if((aff->shortest_queue != info) &&
2604 // (aff->shortest_queue->estimated_len > info->estimated_len)) {
2605 //
2606 // aff->shortest_queue = info;
2607 //
2608 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2609 // ikglp_get_idx(sem, info->q),
2610 // info->q->count,
2611 // info->estimated_len);
2612 // }
2613}
2614
2615void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2616 struct fifo_queue* fq,
2617 struct task_struct* t)
2618{
2619 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2620 int replica = ikglp_get_idx(sem, fq);
2621 int gpu = replica_to_gpu(aff, replica);
2622
2623 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
2624
2625 TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n",
2626 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
2627
2628 // count the number or resource holders
2629 ++(*(aff->q_info[replica].nr_cur_users));
2630
2631 reg_nv_device(gpu, 1, t); // register
2632
2633 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
2634 reset_gpu_tracker(t);
2635 start_gpu_tracker(t);
2636}
2637
2638void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
2639 struct fifo_queue* fq,
2640 struct task_struct* t)
2641{
2642 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2643 int replica = ikglp_get_idx(sem, fq);
2644 int gpu = replica_to_gpu(aff, replica);
2645 lt_t est_time;
2646
2647 stop_gpu_tracker(t); // stop the tracker before we do anything else.
2648
2649 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2650
2651 // count the number or resource holders
2652 --(*(aff->q_info[replica].nr_cur_users));
2653
2654 reg_nv_device(gpu, 0, t); // unregister
2655
2656 // update estimates
2657 update_gpu_estimate(t, get_gpu_time(t));
2658
2659 TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. "
2660 "estimated was %llu. diff is %d\n",
2661 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
2662 tsk_rt(t)->gpu_migration,
2663 get_gpu_time(t),
2664 est_time,
2665 (long long)get_gpu_time(t) - (long long)est_time);
2666
2667 tsk_rt(t)->last_gpu = gpu;
2668}
2669
2670struct ikglp_affinity_ops gpu_ikglp_affinity =
2671{
2672 .advise_enqueue = gpu_ikglp_advise_enqueue,
2673 .advise_steal = gpu_ikglp_advise_steal,
2674 .advise_donee_selection = gpu_ikglp_advise_donee_selection,
2675 .advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq,
2676
2677 .notify_enqueue = gpu_ikglp_notify_enqueue,
2678 .notify_dequeue = gpu_ikglp_notify_dequeue,
2679 .notify_acquired = gpu_ikglp_notify_acquired,
2680 .notify_freed = gpu_ikglp_notify_freed,
2681
2682 .replica_to_resource = gpu_replica_to_resource,
2683};
2684
2685struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2686 void* __user args)
2687{
2688 return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
2689}
2690
2691
2692
2693
2694
2695
2696
2697
2698// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
2699
2700struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2701{
2702 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2703 int min_count;
2704 int min_nr_users;
2705 struct ikglp_queue_info *shortest;
2706 struct fifo_queue *to_enqueue;
2707 int i;
2708
2709 // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
2710
2711 shortest = &aff->q_info[0];
2712 min_count = shortest->q->count;
2713 min_nr_users = *(shortest->nr_cur_users);
2714
2715 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2716 ikglp_get_idx(sem, shortest->q),
2717 shortest->q->count,
2718 min_nr_users);
2719
2720 for(i = 1; i < sem->nr_replicas; ++i) {
2721 int len = aff->q_info[i].q->count;
2722
2723 // queue is smaller, or they're equal and the other has a smaller number
2724 // of total users.
2725 //
2726 // tie-break on the shortest number of simult users. this only kicks in
2727 // when there are more than 1 empty queues.
2728 if((len < min_count) ||
2729 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
2730 shortest = &aff->q_info[i];
2731 min_count = shortest->q->count;
2732 min_nr_users = *(aff->q_info[i].nr_cur_users);
2733 }
2734
2735 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2736 ikglp_get_idx(sem, aff->q_info[i].q),
2737 aff->q_info[i].q->count,
2738 *(aff->q_info[i].nr_cur_users));
2739 }
2740
2741 to_enqueue = shortest->q;
2742 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
2743 ikglp_get_idx(sem, to_enqueue),
2744 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2745
2746 return to_enqueue;
2747}
2748
2749ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
2750 struct fifo_queue* dst)
2751{
2752 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2753 // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
2754 return ikglp_find_hp_waiter_to_steal(sem);
2755}
2756
2757ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor)
2758{
2759 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2760 ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2761 return(donee);
2762}
2763
2764ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2765{
2766 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2767 ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2768 return(donor);
2769}
2770
2771void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2772{
2773 // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
2774}
2775
2776void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2777{
2778 // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
2779}
2780
2781void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2782{
2783 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2784 int replica = ikglp_get_idx(sem, fq);
2785 int gpu = replica_to_gpu(aff, replica);
2786
2787 // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
2788
2789 // count the number or resource holders
2790 ++(*(aff->q_info[replica].nr_cur_users));
2791
2792 reg_nv_device(gpu, 1, t); // register
2793}
2794
2795void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2796{
2797 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2798 int replica = ikglp_get_idx(sem, fq);
2799 int gpu = replica_to_gpu(aff, replica);
2800
2801 // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
2802 // count the number or resource holders
2803 --(*(aff->q_info[replica].nr_cur_users));
2804
2805 reg_nv_device(gpu, 0, t); // unregister
2806}
2807
2808struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
2809{
2810 .advise_enqueue = simple_gpu_ikglp_advise_enqueue,
2811 .advise_steal = simple_gpu_ikglp_advise_steal,
2812 .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
2813 .advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq,
2814
2815 .notify_enqueue = simple_gpu_ikglp_notify_enqueue,
2816 .notify_dequeue = simple_gpu_ikglp_notify_dequeue,
2817 .notify_acquired = simple_gpu_ikglp_notify_acquired,
2818 .notify_freed = simple_gpu_ikglp_notify_freed,
2819
2820 .replica_to_resource = gpu_replica_to_resource,
2821};
2822
2823struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2824 void* __user args)
2825{
2826 return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
2827}
2828
2829#endif
2830
2831
2832
2833
2834
2835
2836
2837
2838
diff --git a/litmus/jobs.c b/litmus/jobs.c
index bc8246572e54..9fe4eb1fa168 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -13,6 +13,8 @@ static inline void setup_release(struct task_struct *t, lt_t release)
13 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t); 13 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
14 t->rt_param.job_params.exec_time = 0; 14 t->rt_param.job_params.exec_time = 0;
15 15
16 clear_bit(RT_JOB_SIG_BUDGET_SENT, &t->rt_param.job_params.flags);
17
16 /* update job sequence number */ 18 /* update job sequence number */
17 t->rt_param.job_params.job_no++; 19 t->rt_param.job_params.job_no++;
18 20
@@ -23,6 +25,14 @@ static inline void setup_release(struct task_struct *t, lt_t release)
23void prepare_for_next_period(struct task_struct *t) 25void prepare_for_next_period(struct task_struct *t)
24{ 26{
25 BUG_ON(!t); 27 BUG_ON(!t);
28
29 /* Record lateness before we set up the next job's
30 * release and deadline. Lateness may be negative.
31 */
32 t->rt_param.job_params.lateness =
33 (long long)litmus_clock() -
34 (long long)t->rt_param.job_params.deadline;
35
26 setup_release(t, get_release(t) + get_rt_period(t)); 36 setup_release(t, get_release(t) + get_rt_period(t));
27} 37}
28 38
diff --git a/litmus/kexclu_affinity.c b/litmus/kexclu_affinity.c
new file mode 100644
index 000000000000..5ef5e54d600d
--- /dev/null
+++ b/litmus/kexclu_affinity.c
@@ -0,0 +1,92 @@
1#include <litmus/fdso.h>
2#include <litmus/sched_plugin.h>
3#include <litmus/trace.h>
4#include <litmus/litmus.h>
5#include <litmus/locking.h>
6
7#include <litmus/kexclu_affinity.h>
8
9static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg);
10static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg);
11static int close_generic_aff_obs(struct od_table_entry* entry);
12static void destroy_generic_aff_obs(obj_type_t type, void* sem);
13
14struct fdso_ops generic_affinity_ops = {
15 .create = create_generic_aff_obs,
16 .open = open_generic_aff_obs,
17 .close = close_generic_aff_obs,
18 .destroy = destroy_generic_aff_obs
19};
20
21static atomic_t aff_obs_id_gen = ATOMIC_INIT(0);
22
23static inline bool is_affinity_observer(struct od_table_entry *entry)
24{
25 return (entry->class == &generic_affinity_ops);
26}
27
28static inline struct affinity_observer* get_affinity_observer(struct od_table_entry* entry)
29{
30 BUG_ON(!is_affinity_observer(entry));
31 return (struct affinity_observer*) entry->obj->obj;
32}
33
34static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg)
35{
36 struct affinity_observer* aff_obs;
37 int err;
38
39 err = litmus->allocate_aff_obs(&aff_obs, type, arg);
40 if (err == 0) {
41 BUG_ON(!aff_obs->lock);
42 aff_obs->type = type;
43 *obj_ref = aff_obs;
44 }
45 return err;
46}
47
48static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg)
49{
50 struct affinity_observer* aff_obs = get_affinity_observer(entry);
51 if (aff_obs->ops->open)
52 return aff_obs->ops->open(aff_obs, arg);
53 else
54 return 0; /* default: any task can open it */
55}
56
57static int close_generic_aff_obs(struct od_table_entry* entry)
58{
59 struct affinity_observer* aff_obs = get_affinity_observer(entry);
60 if (aff_obs->ops->close)
61 return aff_obs->ops->close(aff_obs);
62 else
63 return 0; /* default: closing succeeds */
64}
65
66static void destroy_generic_aff_obs(obj_type_t type, void* obj)
67{
68 struct affinity_observer* aff_obs = (struct affinity_observer*) obj;
69 aff_obs->ops->deallocate(aff_obs);
70}
71
72
73struct litmus_lock* get_lock_from_od(int od)
74{
75 extern struct fdso_ops generic_lock_ops;
76
77 struct od_table_entry *entry = get_entry_for_od(od);
78
79 if(entry && entry->class == &generic_lock_ops) {
80 return (struct litmus_lock*) entry->obj->obj;
81 }
82 return NULL;
83}
84
85void affinity_observer_new(struct affinity_observer* aff,
86 struct affinity_observer_ops* ops,
87 struct affinity_observer_args* args)
88{
89 aff->ops = ops;
90 aff->lock = get_lock_from_od(args->lock_od);
91 aff->ident = atomic_inc_return(&aff_obs_id_gen);
92} \ No newline at end of file
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
new file mode 100644
index 000000000000..bff857ed8d4e
--- /dev/null
+++ b/litmus/kfmlp_lock.c
@@ -0,0 +1,1002 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h>
7
8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/kfmlp_lock.h>
14
15static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
16 struct kfmlp_queue* queue)
17{
18 return (queue - &sem->queues[0]);
19}
20
21static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
22 struct task_struct* holder)
23{
24 int i;
25 for(i = 0; i < sem->num_resources; ++i)
26 if(sem->queues[i].owner == holder)
27 return(&sem->queues[i]);
28 return(NULL);
29}
30
31/* caller is responsible for locking */
32static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
33 struct task_struct *skip)
34{
35 struct list_head *pos;
36 struct task_struct *queued, *found = NULL;
37
38 list_for_each(pos, &kqueue->wait.task_list) {
39 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
40 task_list)->private;
41
42 /* Compare task prios, find high prio task. */
43 //if (queued != skip && edf_higher_prio(queued, found))
44 if (queued != skip && litmus->compare(queued, found))
45 found = queued;
46 }
47 return found;
48}
49
50static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem,
51 struct kfmlp_queue* search_start)
52{
53 // we start our search at search_start instead of at the beginning of the
54 // queue list to load-balance across all resources.
55 struct kfmlp_queue* step = search_start;
56 struct kfmlp_queue* shortest = sem->shortest_queue;
57
58 do
59 {
60 step = (step+1 != &sem->queues[sem->num_resources]) ?
61 step+1 : &sem->queues[0];
62
63 if(step->count < shortest->count)
64 {
65 shortest = step;
66 if(step->count == 0)
67 break; /* can't get any shorter */
68 }
69
70 }while(step != search_start);
71
72 return(shortest);
73}
74
75
76static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
77 wait_queue_t** to_steal,
78 struct kfmlp_queue** to_steal_from)
79{
80 /* must hold sem->lock */
81
82 int i;
83
84 *to_steal = NULL;
85 *to_steal_from = NULL;
86
87 for(i = 0; i < sem->num_resources; ++i)
88 {
89 if( (sem->queues[i].count > 1) &&
90 ((*to_steal_from == NULL) ||
91 //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
92 (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
93 {
94 *to_steal_from = &sem->queues[i];
95 }
96 }
97
98 if(*to_steal_from)
99 {
100 struct list_head *pos;
101 struct task_struct *target = (*to_steal_from)->hp_waiter;
102
103 TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n",
104 target->comm,
105 target->pid,
106 kfmlp_get_idx(sem, *to_steal_from));
107
108 list_for_each(pos, &(*to_steal_from)->wait.task_list)
109 {
110 wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
111 struct task_struct *queued = (struct task_struct*) node->private;
112 /* Compare task prios, find high prio task. */
113 if (queued == target)
114 {
115 *to_steal = node;
116
117 TRACE_CUR("steal: selected %s/%d from queue %d\n",
118 queued->comm, queued->pid,
119 kfmlp_get_idx(sem, *to_steal_from));
120
121 return queued;
122 }
123 }
124
125 TRACE_CUR("Could not find %s/%d in queue %d!!! THIS IS A BUG!\n",
126 target->comm,
127 target->pid,
128 kfmlp_get_idx(sem, *to_steal_from));
129 }
130
131 return NULL;
132}
133
134static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
135 struct kfmlp_queue *dst,
136 wait_queue_t *wait,
137 struct kfmlp_queue *src)
138{
139 struct task_struct* t = (struct task_struct*) wait->private;
140
141 __remove_wait_queue(&src->wait, wait);
142 --(src->count);
143
144 if(t == src->hp_waiter) {
145 src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
146
147 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
148 kfmlp_get_idx(sem, src),
149 (src->hp_waiter) ? src->hp_waiter->comm : "nil",
150 (src->hp_waiter) ? src->hp_waiter->pid : -1);
151
152 if(src->owner && tsk_rt(src->owner)->inh_task == t) {
153 litmus->decrease_prio(src->owner, src->hp_waiter);
154 }
155 }
156
157 if(sem->shortest_queue->count > src->count) {
158 sem->shortest_queue = src;
159 TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue));
160 }
161
162#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
163 if(sem->aff_obs) {
164 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
165 }
166#endif
167
168 init_waitqueue_entry(wait, t);
169 __add_wait_queue_tail_exclusive(&dst->wait, wait);
170 ++(dst->count);
171
172 if(litmus->compare(t, dst->hp_waiter)) {
173 dst->hp_waiter = t;
174
175 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
176 kfmlp_get_idx(sem, dst),
177 t->comm, t->pid);
178
179 if(dst->owner && litmus->compare(t, dst->owner))
180 {
181 litmus->increase_prio(dst->owner, t);
182 }
183 }
184
185#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
186 if(sem->aff_obs) {
187 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
188 }
189#endif
190}
191
192
193int kfmlp_lock(struct litmus_lock* l)
194{
195 struct task_struct* t = current;
196 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
197 struct kfmlp_queue* my_queue = NULL;
198 wait_queue_t wait;
199 unsigned long flags;
200
201 if (!is_realtime(t))
202 return -EPERM;
203
204 spin_lock_irqsave(&sem->lock, flags);
205
206#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
207 if(sem->aff_obs) {
208 my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
209 }
210 if(!my_queue) {
211 my_queue = sem->shortest_queue;
212 }
213#else
214 my_queue = sem->shortest_queue;
215#endif
216
217 if (my_queue->owner) {
218 /* resource is not free => must suspend and wait */
219 TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n",
220 kfmlp_get_idx(sem, my_queue),
221 my_queue->count);
222
223 init_waitqueue_entry(&wait, t);
224
225 /* FIXME: interruptible would be nice some day */
226 set_task_state(t, TASK_UNINTERRUPTIBLE);
227
228 __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
229
230 TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n",
231 kfmlp_get_idx(sem, my_queue),
232 (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil",
233 (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1);
234
235 /* check if we need to activate priority inheritance */
236 //if (edf_higher_prio(t, my_queue->hp_waiter))
237 if (litmus->compare(t, my_queue->hp_waiter)) {
238 my_queue->hp_waiter = t;
239 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
240 kfmlp_get_idx(sem, my_queue),
241 t->comm, t->pid);
242
243 //if (edf_higher_prio(t, my_queue->owner))
244 if (litmus->compare(t, my_queue->owner)) {
245 litmus->increase_prio(my_queue->owner, my_queue->hp_waiter);
246 }
247 }
248
249 ++(my_queue->count);
250
251 if(my_queue == sem->shortest_queue) {
252 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
253 TRACE_CUR("queue %d is the shortest\n",
254 kfmlp_get_idx(sem, sem->shortest_queue));
255 }
256
257#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
258 if(sem->aff_obs) {
259 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
260 }
261#endif
262
263 /* release lock before sleeping */
264 spin_unlock_irqrestore(&sem->lock, flags);
265
266 /* We depend on the FIFO order. Thus, we don't need to recheck
267 * when we wake up; we are guaranteed to have the lock since
268 * there is only one wake up per release (or steal).
269 */
270 schedule();
271
272
273 if(my_queue->owner == t) {
274 TRACE_CUR("queue %d: acquired through waiting\n",
275 kfmlp_get_idx(sem, my_queue));
276 }
277 else {
278 /* this case may happen if our wait entry was stolen
279 between queues. record where we went. */
280 my_queue = kfmlp_get_queue(sem, t);
281
282 BUG_ON(!my_queue);
283 TRACE_CUR("queue %d: acquired through stealing\n",
284 kfmlp_get_idx(sem, my_queue));
285 }
286 }
287 else {
288 TRACE_CUR("queue %d: acquired immediately\n",
289 kfmlp_get_idx(sem, my_queue));
290
291 my_queue->owner = t;
292
293 ++(my_queue->count);
294
295 if(my_queue == sem->shortest_queue) {
296 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
297 TRACE_CUR("queue %d is the shortest\n",
298 kfmlp_get_idx(sem, sem->shortest_queue));
299 }
300
301#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
302 if(sem->aff_obs) {
303 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
304 sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
305 }
306#endif
307
308 spin_unlock_irqrestore(&sem->lock, flags);
309 }
310
311
312#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
313 if(sem->aff_obs) {
314 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
315 }
316#endif
317 return kfmlp_get_idx(sem, my_queue);
318}
319
320
321int kfmlp_unlock(struct litmus_lock* l)
322{
323 struct task_struct *t = current, *next;
324 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
325 struct kfmlp_queue *my_queue, *to_steal_from;
326 unsigned long flags;
327 int err = 0;
328
329 my_queue = kfmlp_get_queue(sem, t);
330
331 if (!my_queue) {
332 err = -EINVAL;
333 goto out;
334 }
335
336 spin_lock_irqsave(&sem->lock, flags);
337
338 TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));
339
340 my_queue->owner = NULL; // clear ownership
341 --(my_queue->count);
342
343 if(my_queue->count < sem->shortest_queue->count)
344 {
345 sem->shortest_queue = my_queue;
346 TRACE_CUR("queue %d is the shortest\n",
347 kfmlp_get_idx(sem, sem->shortest_queue));
348 }
349
350#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
351 if(sem->aff_obs) {
352 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
353 sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
354 }
355#endif
356
357 /* we lose the benefit of priority inheritance (if any) */
358 if (tsk_rt(t)->inh_task)
359 litmus->decrease_prio(t, NULL);
360
361
362 /* check if there are jobs waiting for this resource */
363RETRY:
364 next = __waitqueue_remove_first(&my_queue->wait);
365 if (next) {
366 /* next becomes the resouce holder */
367 my_queue->owner = next;
368
369#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
370 if(sem->aff_obs) {
371 sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
372 }
373#endif
374
375 TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
376 kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
377
378 /* determine new hp_waiter if necessary */
379 if (next == my_queue->hp_waiter) {
380 TRACE_TASK(next, "was highest-prio waiter\n");
381 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
382 if (my_queue->hp_waiter)
383 TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
384 else
385 TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
386 } else {
387 /* Well, if next is not the highest-priority waiter,
388 * then it ought to inherit the highest-priority
389 * waiter's priority. */
390 litmus->increase_prio(next, my_queue->hp_waiter);
391 }
392
393 /* wake up next */
394 wake_up_process(next);
395 }
396 else {
397 // TODO: put this stealing logic before we attempt to release
398 // our resource. (simplifies code and gets rid of ugly goto RETRY.
399 wait_queue_t *wait;
400
401 TRACE_CUR("queue %d: looking to steal someone...\n",
402 kfmlp_get_idx(sem, my_queue));
403
404#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
405 next = (sem->aff_obs) ?
406 sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
407 kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
408#else
409 next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
410#endif
411
412 if(next) {
413 TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
414 kfmlp_get_idx(sem, my_queue),
415 next->comm, next->pid,
416 kfmlp_get_idx(sem, to_steal_from));
417
418 kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
419
420 goto RETRY; // will succeed this time.
421 }
422 else {
423 TRACE_CUR("queue %d: no one to steal.\n",
424 kfmlp_get_idx(sem, my_queue));
425 }
426 }
427
428 spin_unlock_irqrestore(&sem->lock, flags);
429
430out:
431 return err;
432}
433
434int kfmlp_close(struct litmus_lock* l)
435{
436 struct task_struct *t = current;
437 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
438 struct kfmlp_queue *my_queue;
439 unsigned long flags;
440
441 int owner;
442
443 spin_lock_irqsave(&sem->lock, flags);
444
445 my_queue = kfmlp_get_queue(sem, t);
446 owner = (my_queue) ? (my_queue->owner == t) : 0;
447
448 spin_unlock_irqrestore(&sem->lock, flags);
449
450 if (owner)
451 kfmlp_unlock(l);
452
453 return 0;
454}
455
456void kfmlp_free(struct litmus_lock* l)
457{
458 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
459 kfree(sem->queues);
460 kfree(sem);
461}
462
463
464
465struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
466{
467 struct kfmlp_semaphore* sem;
468 int num_resources = 0;
469 int i;
470
471 if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
472 {
473 return(NULL);
474 }
475 if(__copy_from_user(&num_resources, args, sizeof(num_resources)))
476 {
477 return(NULL);
478 }
479 if(num_resources < 1)
480 {
481 return(NULL);
482 }
483
484 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
485 if(!sem)
486 {
487 return(NULL);
488 }
489
490 sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
491 if(!sem->queues)
492 {
493 kfree(sem);
494 return(NULL);
495 }
496
497 sem->litmus_lock.ops = ops;
498 spin_lock_init(&sem->lock);
499 sem->num_resources = num_resources;
500
501 for(i = 0; i < num_resources; ++i)
502 {
503 sem->queues[i].owner = NULL;
504 sem->queues[i].hp_waiter = NULL;
505 init_waitqueue_head(&sem->queues[i].wait);
506 sem->queues[i].count = 0;
507 }
508
509 sem->shortest_queue = &sem->queues[0];
510
511#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
512 sem->aff_obs = NULL;
513#endif
514
515 return &sem->litmus_lock;
516}
517
518
519
520
521#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
522
523static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
524{
525 int gpu = replica % aff->nr_rsrc;
526 return gpu;
527}
528
529static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
530{
531 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
532 return gpu;
533}
534
535static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
536{
537 int replica = gpu - aff->offset;
538 return replica;
539}
540
541
542int kfmlp_aff_obs_close(struct affinity_observer* obs)
543{
544 return 0;
545}
546
547void kfmlp_aff_obs_free(struct affinity_observer* obs)
548{
549 struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
550 kfree(kfmlp_aff->nr_cur_users_on_rsrc);
551 kfree(kfmlp_aff->q_info);
552 kfree(kfmlp_aff);
553}
554
555static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
556 struct kfmlp_affinity_ops* kfmlp_ops,
557 void* __user args)
558{
559 struct kfmlp_affinity* kfmlp_aff;
560 struct gpu_affinity_observer_args aff_args;
561 struct kfmlp_semaphore* sem;
562 int i;
563 unsigned long flags;
564
565 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
566 return(NULL);
567 }
568 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
569 return(NULL);
570 }
571
572 sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
573
574 if(sem->litmus_lock.type != KFMLP_SEM) {
575 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
576 return(NULL);
577 }
578
579 if((aff_args.nr_simult_users <= 0) ||
580 (sem->num_resources%aff_args.nr_simult_users != 0)) {
581 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
582 "(%d) per replica. #replicas should be evenly divisible "
583 "by #simult_users.\n",
584 sem->litmus_lock.ident,
585 sem->num_resources,
586 aff_args.nr_simult_users);
587 return(NULL);
588 }
589
590 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
591 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
592 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
593// return(NULL);
594 }
595
596 kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
597 if(!kfmlp_aff) {
598 return(NULL);
599 }
600
601 kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
602 if(!kfmlp_aff->q_info) {
603 kfree(kfmlp_aff);
604 return(NULL);
605 }
606
607 kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
608 if(!kfmlp_aff->nr_cur_users_on_rsrc) {
609 kfree(kfmlp_aff->q_info);
610 kfree(kfmlp_aff);
611 return(NULL);
612 }
613
614 affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs);
615
616 kfmlp_aff->ops = kfmlp_ops;
617 kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
618 kfmlp_aff->nr_simult = aff_args.nr_simult_users;
619 kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
620
621 memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
622
623 for(i = 0; i < sem->num_resources; ++i) {
624 kfmlp_aff->q_info[i].q = &sem->queues[i];
625 kfmlp_aff->q_info[i].estimated_len = 0;
626
627 // multiple q_info's will point to the same resource (aka GPU) if
628 // aff_args.nr_simult_users > 1
629 kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
630 }
631
632 // attach observer to the lock
633 spin_lock_irqsave(&sem->lock, flags);
634 sem->aff_obs = kfmlp_aff;
635 spin_unlock_irqrestore(&sem->lock, flags);
636
637 return &kfmlp_aff->obs;
638}
639
640
641
642
643static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
644 struct kfmlp_queue* fq) {
645 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
646 return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
647}
648
649
650// Smart KFMLP Affinity
651
652//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
653//{
654// struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
655// struct kfmlp_queue_info *shortest = &aff->q_info[0];
656// int i;
657//
658// for(i = 1; i < sem->num_resources; ++i) {
659// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
660// shortest = &aff->q_info[i];
661// }
662// }
663//
664// return(shortest);
665//}
666
667struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
668{
669 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
670 lt_t min_len;
671 int min_nr_users;
672 struct kfmlp_queue_info *shortest;
673 struct kfmlp_queue *to_enqueue;
674 int i;
675 int affinity_gpu;
676
677 // simply pick the shortest queue if, we have no affinity, or we have
678 // affinity with the shortest
679 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
680 affinity_gpu = aff->offset; // first gpu
681 TRACE_CUR("no affinity\n");
682 }
683 else {
684 affinity_gpu = tsk_rt(t)->last_gpu;
685 }
686
687 // all things being equal, let's start with the queue with which we have
688 // affinity. this helps us maintain affinity even when we don't have
689 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
690 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
691
692// if(shortest == aff->shortest_queue) {
693// TRACE_CUR("special case: have affinity with shortest queue\n");
694// goto out;
695// }
696
697 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
698 min_nr_users = *(shortest->nr_cur_users);
699
700 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
701 get_gpu_estimate(t, MIG_LOCAL),
702 kfmlp_get_idx(sem, shortest->q),
703 min_len);
704
705 for(i = 0; i < sem->num_resources; ++i) {
706 if(&aff->q_info[i] != shortest) {
707
708 lt_t est_len =
709 aff->q_info[i].estimated_len +
710 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
711
712 // queue is smaller, or they're equal and the other has a smaller number
713 // of total users.
714 //
715 // tie-break on the shortest number of simult users. this only kicks in
716 // when there are more than 1 empty queues.
717 if((est_len < min_len) ||
718 ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
719 shortest = &aff->q_info[i];
720 min_len = est_len;
721 min_nr_users = *(aff->q_info[i].nr_cur_users);
722 }
723
724 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
725 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
726 kfmlp_get_idx(sem, aff->q_info[i].q),
727 est_len);
728 }
729 }
730
731 to_enqueue = shortest->q;
732 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
733 kfmlp_get_idx(sem, to_enqueue),
734 kfmlp_get_idx(sem, sem->shortest_queue));
735
736 return to_enqueue;
737}
738
739struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
740{
741 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
742
743 // For now, just steal highest priority waiter
744 // TODO: Implement affinity-aware stealing.
745
746 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
747}
748
749
750void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
751{
752 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
753 int replica = kfmlp_get_idx(sem, fq);
754 int gpu = replica_to_gpu(aff, replica);
755 struct kfmlp_queue_info *info = &aff->q_info[replica];
756 lt_t est_time;
757 lt_t est_len_before;
758
759 if(current == t) {
760 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
761 }
762
763 est_len_before = info->estimated_len;
764 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
765 info->estimated_len += est_time;
766
767 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
768 kfmlp_get_idx(sem, info->q),
769 est_len_before, est_time,
770 info->estimated_len);
771
772// if(aff->shortest_queue == info) {
773// // we may no longer be the shortest
774// aff->shortest_queue = kfmlp_aff_find_shortest(aff);
775//
776// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
777// kfmlp_get_idx(sem, aff->shortest_queue->q),
778// aff->shortest_queue->q->count,
779// aff->shortest_queue->estimated_len);
780// }
781}
782
783void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
784{
785 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
786 int replica = kfmlp_get_idx(sem, fq);
787 int gpu = replica_to_gpu(aff, replica);
788 struct kfmlp_queue_info *info = &aff->q_info[replica];
789 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
790
791 if(est_time > info->estimated_len) {
792 WARN_ON(1);
793 info->estimated_len = 0;
794 }
795 else {
796 info->estimated_len -= est_time;
797 }
798
799 TRACE_CUR("fq %d est len is now %llu\n",
800 kfmlp_get_idx(sem, info->q),
801 info->estimated_len);
802
803 // check to see if we're the shortest queue now.
804// if((aff->shortest_queue != info) &&
805// (aff->shortest_queue->estimated_len > info->estimated_len)) {
806//
807// aff->shortest_queue = info;
808//
809// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
810// kfmlp_get_idx(sem, info->q),
811// info->q->count,
812// info->estimated_len);
813// }
814}
815
816void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
817{
818 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
819 int replica = kfmlp_get_idx(sem, fq);
820 int gpu = replica_to_gpu(aff, replica);
821
822 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
823
824 TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n",
825 t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
826
827 // count the number or resource holders
828 ++(*(aff->q_info[replica].nr_cur_users));
829
830 reg_nv_device(gpu, 1, t); // register
831
832 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
833 reset_gpu_tracker(t);
834 start_gpu_tracker(t);
835}
836
837void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
838{
839 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
840 int replica = kfmlp_get_idx(sem, fq);
841 int gpu = replica_to_gpu(aff, replica);
842 lt_t est_time;
843
844 stop_gpu_tracker(t); // stop the tracker before we do anything else.
845
846 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
847
848 tsk_rt(t)->last_gpu = gpu;
849
850 // count the number or resource holders
851 --(*(aff->q_info[replica].nr_cur_users));
852
853 reg_nv_device(gpu, 0, t); // unregister
854
855 // update estimates
856 update_gpu_estimate(t, get_gpu_time(t));
857
858 TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n",
859 t->comm, t->pid, gpu,
860 get_gpu_time(t),
861 est_time,
862 (long long)get_gpu_time(t) - (long long)est_time);
863}
864
865struct kfmlp_affinity_ops gpu_kfmlp_affinity =
866{
867 .advise_enqueue = gpu_kfmlp_advise_enqueue,
868 .advise_steal = gpu_kfmlp_advise_steal,
869 .notify_enqueue = gpu_kfmlp_notify_enqueue,
870 .notify_dequeue = gpu_kfmlp_notify_dequeue,
871 .notify_acquired = gpu_kfmlp_notify_acquired,
872 .notify_freed = gpu_kfmlp_notify_freed,
873 .replica_to_resource = gpu_replica_to_resource,
874};
875
876struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
877 void* __user args)
878{
879 return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
880}
881
882
883
884
885
886
887
888
889// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
890
891struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
892{
893 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
894 int min_count;
895 int min_nr_users;
896 struct kfmlp_queue_info *shortest;
897 struct kfmlp_queue *to_enqueue;
898 int i;
899
900// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
901
902 shortest = &aff->q_info[0];
903 min_count = shortest->q->count;
904 min_nr_users = *(shortest->nr_cur_users);
905
906 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
907 kfmlp_get_idx(sem, shortest->q),
908 shortest->q->count,
909 min_nr_users);
910
911 for(i = 1; i < sem->num_resources; ++i) {
912 int len = aff->q_info[i].q->count;
913
914 // queue is smaller, or they're equal and the other has a smaller number
915 // of total users.
916 //
917 // tie-break on the shortest number of simult users. this only kicks in
918 // when there are more than 1 empty queues.
919 if((len < min_count) ||
920 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
921 shortest = &aff->q_info[i];
922 min_count = shortest->q->count;
923 min_nr_users = *(aff->q_info[i].nr_cur_users);
924 }
925
926 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
927 kfmlp_get_idx(sem, aff->q_info[i].q),
928 aff->q_info[i].q->count,
929 *(aff->q_info[i].nr_cur_users));
930 }
931
932 to_enqueue = shortest->q;
933 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
934 kfmlp_get_idx(sem, to_enqueue),
935 kfmlp_get_idx(sem, sem->shortest_queue));
936
937 return to_enqueue;
938}
939
940struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
941{
942 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
943// TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n");
944 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
945}
946
947void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
948{
949// TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n");
950}
951
952void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
953{
954// TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n");
955}
956
957void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
958{
959 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
960 int replica = kfmlp_get_idx(sem, fq);
961 int gpu = replica_to_gpu(aff, replica);
962
963// TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
964
965 // count the number or resource holders
966 ++(*(aff->q_info[replica].nr_cur_users));
967
968 reg_nv_device(gpu, 1, t); // register
969}
970
971void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
972{
973 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
974 int replica = kfmlp_get_idx(sem, fq);
975 int gpu = replica_to_gpu(aff, replica);
976
977// TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
978 // count the number or resource holders
979 --(*(aff->q_info[replica].nr_cur_users));
980
981 reg_nv_device(gpu, 0, t); // unregister
982}
983
984struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
985{
986 .advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
987 .advise_steal = simple_gpu_kfmlp_advise_steal,
988 .notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
989 .notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
990 .notify_acquired = simple_gpu_kfmlp_notify_acquired,
991 .notify_freed = simple_gpu_kfmlp_notify_freed,
992 .replica_to_resource = gpu_replica_to_resource,
993};
994
995struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
996 void* __user args)
997{
998 return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
999}
1000
1001#endif
1002
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 2300281b6b30..83e8ef3f42af 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -21,6 +21,10 @@
21#include <litmus/affinity.h> 21#include <litmus/affinity.h>
22#endif 22#endif
23 23
24#ifdef CONFIG_LITMUS_NVIDIA
25#include <litmus/nvidia_info.h>
26#endif
27
24/* Number of RT tasks that exist in the system */ 28/* Number of RT tasks that exist in the system */
25atomic_t rt_task_count = ATOMIC_INIT(0); 29atomic_t rt_task_count = ATOMIC_INIT(0);
26static DEFINE_RAW_SPINLOCK(task_transition_lock); 30static DEFINE_RAW_SPINLOCK(task_transition_lock);
@@ -51,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn)
51struct release_heap* release_heap_alloc(int gfp_flags); 55struct release_heap* release_heap_alloc(int gfp_flags);
52void release_heap_free(struct release_heap* rh); 56void release_heap_free(struct release_heap* rh);
53 57
58#ifdef CONFIG_LITMUS_NVIDIA
59/*
60 * sys_register_nv_device
61 * @nv_device_id: The Nvidia device id that the task want to register
62 * @reg_action: set to '1' to register the specified device. zero otherwise.
63 * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
64 * Returns EFAULT if nv_device_id is out of range.
65 * 0 if success
66 */
67asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
68{
69 /* register the device to caller (aka 'current') */
70 return(reg_nv_device(nv_device_id, reg_action, current));
71}
72#else
73asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
74{
75 return(-EINVAL);
76}
77#endif
78
79
54/* 80/*
55 * sys_set_task_rt_param 81 * sys_set_task_rt_param
56 * @pid: Pid of the task which scheduling parameters must be changed 82 * @pid: Pid of the task which scheduling parameters must be changed
@@ -136,6 +162,16 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
136 pid, tp.budget_policy); 162 pid, tp.budget_policy);
137 goto out_unlock; 163 goto out_unlock;
138 } 164 }
165 if (tp.budget_signal_policy != NO_SIGNALS &&
166 tp.budget_signal_policy != QUANTUM_SIGNALS &&
167 tp.budget_signal_policy != PRECISE_SIGNALS)
168 {
169 printk(KERN_INFO "litmus: real-time task %d rejected "
170 "because unsupported budget signalling policy "
171 "specified (%d)\n",
172 pid, tp.budget_signal_policy);
173 goto out_unlock;
174 }
139 175
140 target->rt_param.task_params = tp; 176 target->rt_param.task_params = tp;
141 177
@@ -273,6 +309,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job)
273 return retval; 309 return retval;
274} 310}
275 311
312
276/* sys_null_call() is only used for determining raw system call 313/* sys_null_call() is only used for determining raw system call
277 * overheads (kernel entry, kernel exit). It has no useful side effects. 314 * overheads (kernel entry, kernel exit). It has no useful side effects.
278 * If ts is non-NULL, then the current Feather-Trace time is recorded. 315 * If ts is non-NULL, then the current Feather-Trace time is recorded.
@@ -291,14 +328,6 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
291} 328}
292 329
293 330
294
295
296
297
298
299
300
301
302long __litmus_admit_task(struct task_struct* tsk); 331long __litmus_admit_task(struct task_struct* tsk);
303 332
304asmlinkage long sys_slave_non_rt_threads(void) 333asmlinkage long sys_slave_non_rt_threads(void)
@@ -310,8 +339,6 @@ asmlinkage long sys_slave_non_rt_threads(void)
310 339
311 read_lock_irq(&tasklist_lock); 340 read_lock_irq(&tasklist_lock);
312 341
313 is_realtime(target)
314
315 t = leader; 342 t = leader;
316 do { 343 do {
317 TRACE_CUR("threads in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); 344 TRACE_CUR("threads in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
@@ -324,8 +351,10 @@ asmlinkage long sys_slave_non_rt_threads(void)
324 /* hasn't been admitted into rt. make it a slave. */ 351 /* hasn't been admitted into rt. make it a slave. */
325 tsk_rt(t)->slave = 1; 352 tsk_rt(t)->slave = 1;
326 } 353 }
327 else if (is_realtime(t)) 354 else {
328 if (litmus->compare(t, hp)) { 355 tsk_rt(t)->has_slaves = 1;
356
357 if (is_realtime(t) && litmus->compare(t, hp)) {
329 hp = t; 358 hp = t;
330 } 359 }
331 } 360 }
@@ -334,8 +363,17 @@ asmlinkage long sys_slave_non_rt_threads(void)
334 } while(t != leader); 363 } while(t != leader);
335 364
336 if (hp) { 365 if (hp) {
366 TRACE_CUR("found hp in group: %s/%d\n", hp->comm, hp->pid);
367
337 /* set up inheritance */ 368 /* set up inheritance */
338 369 leader->hp_group = hp;
370
371 t = leader;
372 do {
373 if (tsk_rt(t)->slave) {
374 litmus->increase_prio(t);
375 }
376 } while(t != leader);
339 } 377 }
340 378
341 read_unlock_irq(&tasklist_lock); 379 read_unlock_irq(&tasklist_lock);
@@ -343,6 +381,31 @@ asmlinkage long sys_slave_non_rt_threads(void)
343 return 0; 381 return 0;
344} 382}
345 383
384#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
385void init_gpu_affinity_state(struct task_struct* p)
386{
387 // under-damped
388 //p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
389 //p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
390
391#if 0
392 // emperical;
393 p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
394 p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
395
396 p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
397 p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
398
399 p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
400 p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
401
402 p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
403 p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
404#endif
405 p->rt_param.gpu_migration = MIG_NONE;
406 p->rt_param.last_gpu = -1;
407}
408#endif
346 409
347/* p is a real-time task. Re-init its state as a best-effort task. */ 410/* p is a real-time task. Re-init its state as a best-effort task. */
348static void reinit_litmus_state(struct task_struct* p, int restore) 411static void reinit_litmus_state(struct task_struct* p, int restore)
@@ -350,6 +413,10 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
350 struct rt_task user_config = {}; 413 struct rt_task user_config = {};
351 void* ctrl_page = NULL; 414 void* ctrl_page = NULL;
352 415
416#ifdef CONFIG_LITMUS_NESTED_LOCKING
417 binheap_order_t prio_order = NULL;
418#endif
419
353 if (restore) { 420 if (restore) {
354 /* Safe user-space provided configuration data. 421 /* Safe user-space provided configuration data.
355 * and allocated page. */ 422 * and allocated page. */
@@ -357,11 +424,38 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
357 ctrl_page = p->rt_param.ctrl_page; 424 ctrl_page = p->rt_param.ctrl_page;
358 } 425 }
359 426
427#ifdef CONFIG_LITMUS_NESTED_LOCKING
428 prio_order = p->rt_param.hp_blocked_tasks.compare;
429#endif
430
360 /* We probably should not be inheriting any task's priority 431 /* We probably should not be inheriting any task's priority
361 * at this point in time. 432 * at this point in time.
362 */ 433 */
363 WARN_ON(p->rt_param.inh_task); 434 WARN_ON(p->rt_param.inh_task);
364 435
436#ifdef CONFIG_LITMUS_NESTED_LOCKING
437 WARN_ON(p->rt_param.blocked_lock);
438 WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
439#endif
440
441#ifdef CONFIG_LITMUS_SOFTIRQD
442 /* We probably should not have any tasklets executing for
443 * us at this time.
444 */
445 WARN_ON(p->rt_param.cur_klitirqd);
446 WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD);
447
448 if(p->rt_param.cur_klitirqd)
449 flush_pending(p->rt_param.cur_klitirqd, p);
450
451 if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD)
452 up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem);
453#endif
454
455#ifdef CONFIG_LITMUS_NVIDIA
456 WARN_ON(p->rt_param.held_gpus != 0);
457#endif
458
365 /* Cleanup everything else. */ 459 /* Cleanup everything else. */
366 memset(&p->rt_param, 0, sizeof(p->rt_param)); 460 memset(&p->rt_param, 0, sizeof(p->rt_param));
367 461
@@ -370,6 +464,15 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
370 p->rt_param.task_params = user_config; 464 p->rt_param.task_params = user_config;
371 p->rt_param.ctrl_page = ctrl_page; 465 p->rt_param.ctrl_page = ctrl_page;
372 } 466 }
467
468#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
469 init_gpu_affinity_state(p);
470#endif
471
472#ifdef CONFIG_LITMUS_NESTED_LOCKING
473 INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
474 raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
475#endif
373} 476}
374 477
375long __litmus_admit_task(struct task_struct* tsk) 478long __litmus_admit_task(struct task_struct* tsk)
@@ -398,6 +501,25 @@ long __litmus_admit_task(struct task_struct* tsk)
398 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); 501 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
399 } 502 }
400 503
504#ifdef CONFIG_LITMUS_NVIDIA
505 atomic_set(&tsk_rt(tsk)->nv_int_count, 0);
506#endif
507#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
508 init_gpu_affinity_state(tsk);
509#endif
510#ifdef CONFIG_LITMUS_NESTED_LOCKING
511 tsk_rt(tsk)->blocked_lock = NULL;
512 raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock);
513 //INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, prio_order); // done by scheduler
514#endif
515#ifdef CONFIG_LITMUS_SOFTIRQD
516 /* proxy thread off by default */
517 tsk_rt(tsk)is_proxy_thread = 0;
518 tsk_rt(tsk)cur_klitirqd = NULL;
519 mutex_init(&tsk_rt(tsk)->klitirqd_sem);
520 atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD);
521#endif
522
401 retval = litmus->admit_task(tsk); 523 retval = litmus->admit_task(tsk);
402 524
403 if (!retval) { 525 if (!retval) {
@@ -475,7 +597,7 @@ static void synch_on_plugin_switch(void* info)
475 */ 597 */
476int switch_sched_plugin(struct sched_plugin* plugin) 598int switch_sched_plugin(struct sched_plugin* plugin)
477{ 599{
478 unsigned long flags; 600 //unsigned long flags;
479 int ret = 0; 601 int ret = 0;
480 602
481 BUG_ON(!plugin); 603 BUG_ON(!plugin);
@@ -489,8 +611,15 @@ int switch_sched_plugin(struct sched_plugin* plugin)
489 while (atomic_read(&cannot_use_plugin) < num_online_cpus()) 611 while (atomic_read(&cannot_use_plugin) < num_online_cpus())
490 cpu_relax(); 612 cpu_relax();
491 613
614#ifdef CONFIG_LITMUS_SOFTIRQD
615 if(!klitirqd_is_dead())
616 {
617 kill_klitirqd();
618 }
619#endif
620
492 /* stop task transitions */ 621 /* stop task transitions */
493 raw_spin_lock_irqsave(&task_transition_lock, flags); 622 //raw_spin_lock_irqsave(&task_transition_lock, flags);
494 623
495 /* don't switch if there are active real-time tasks */ 624 /* don't switch if there are active real-time tasks */
496 if (atomic_read(&rt_task_count) == 0) { 625 if (atomic_read(&rt_task_count) == 0) {
@@ -508,7 +637,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
508 } else 637 } else
509 ret = -EBUSY; 638 ret = -EBUSY;
510out: 639out:
511 raw_spin_unlock_irqrestore(&task_transition_lock, flags); 640 //raw_spin_unlock_irqrestore(&task_transition_lock, flags);
512 atomic_set(&cannot_use_plugin, 0); 641 atomic_set(&cannot_use_plugin, 0);
513 return ret; 642 return ret;
514} 643}
diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c
new file mode 100644
index 000000000000..300571a81bbd
--- /dev/null
+++ b/litmus/litmus_pai_softirq.c
@@ -0,0 +1,64 @@
1#include <linux/interrupt.h>
2#include <linux/percpu.h>
3#include <linux/cpu.h>
4#include <linux/kthread.h>
5#include <linux/ftrace.h>
6#include <linux/smp.h>
7#include <linux/slab.h>
8#include <linux/mutex.h>
9
10#include <linux/sched.h>
11#include <linux/cpuset.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_trace.h>
15#include <litmus/jobs.h>
16#include <litmus/sched_plugin.h>
17#include <litmus/litmus_softirq.h>
18
19
20
21int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
22{
23 int ret = 0; /* assume failure */
24 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
25 {
26 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
27 BUG();
28 }
29
30 ret = litmus->enqueue_pai_tasklet(t);
31
32 return(ret);
33}
34
35EXPORT_SYMBOL(__litmus_tasklet_schedule);
36
37
38
39// failure causes default Linux handling.
40int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
41{
42 int ret = 0; /* assume failure */
43 return(ret);
44}
45EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
46
47
48// failure causes default Linux handling.
49int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
50{
51 int ret = 0; /* assume failure */
52 return(ret);
53}
54EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
55
56
57// failure causes default Linux handling.
58int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
59{
60 int ret = 0; /* assume failure */
61 return(ret);
62}
63EXPORT_SYMBOL(__litmus_schedule_work);
64
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
index 4bf725a36c9c..9ab7e015a3c1 100644
--- a/litmus/litmus_proc.c
+++ b/litmus/litmus_proc.c
@@ -20,11 +20,18 @@ static struct proc_dir_entry *litmus_dir = NULL,
20#ifdef CONFIG_RELEASE_MASTER 20#ifdef CONFIG_RELEASE_MASTER
21 *release_master_file = NULL, 21 *release_master_file = NULL,
22#endif 22#endif
23#ifdef CONFIG_LITMUS_SOFTIRQD
24 *klitirqd_file = NULL,
25#endif
23 *plugs_file = NULL; 26 *plugs_file = NULL;
24 27
25/* in litmus/sync.c */ 28/* in litmus/sync.c */
26int count_tasks_waiting_for_release(void); 29int count_tasks_waiting_for_release(void);
27 30
31extern int proc_read_klitirqd_stats(char *page, char **start,
32 off_t off, int count,
33 int *eof, void *data);
34
28static int proc_read_stats(char *page, char **start, 35static int proc_read_stats(char *page, char **start,
29 off_t off, int count, 36 off_t off, int count,
30 int *eof, void *data) 37 int *eof, void *data)
@@ -161,6 +168,12 @@ int __init init_litmus_proc(void)
161 release_master_file->write_proc = proc_write_release_master; 168 release_master_file->write_proc = proc_write_release_master;
162#endif 169#endif
163 170
171#ifdef CONFIG_LITMUS_SOFTIRQD
172 klitirqd_file =
173 create_proc_read_entry("klitirqd_stats", 0444, litmus_dir,
174 proc_read_klitirqd_stats, NULL);
175#endif
176
164 stat_file = create_proc_read_entry("stats", 0444, litmus_dir, 177 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
165 proc_read_stats, NULL); 178 proc_read_stats, NULL);
166 179
@@ -187,6 +200,10 @@ void exit_litmus_proc(void)
187 remove_proc_entry("stats", litmus_dir); 200 remove_proc_entry("stats", litmus_dir);
188 if (curr_file) 201 if (curr_file)
189 remove_proc_entry("active_plugin", litmus_dir); 202 remove_proc_entry("active_plugin", litmus_dir);
203#ifdef CONFIG_LITMUS_SOFTIRQD
204 if (klitirqd_file)
205 remove_proc_entry("klitirqd_stats", litmus_dir);
206#endif
190#ifdef CONFIG_RELEASE_MASTER 207#ifdef CONFIG_RELEASE_MASTER
191 if (release_master_file) 208 if (release_master_file)
192 remove_proc_entry("release_master", litmus_dir); 209 remove_proc_entry("release_master", litmus_dir);
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
new file mode 100644
index 000000000000..9f7d9da5facb
--- /dev/null
+++ b/litmus/litmus_softirq.c
@@ -0,0 +1,1582 @@
1#include <linux/interrupt.h>
2#include <linux/percpu.h>
3#include <linux/cpu.h>
4#include <linux/kthread.h>
5#include <linux/ftrace.h>
6#include <linux/smp.h>
7#include <linux/slab.h>
8#include <linux/mutex.h>
9
10#include <linux/sched.h>
11#include <linux/cpuset.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_trace.h>
15#include <litmus/jobs.h>
16#include <litmus/sched_plugin.h>
17#include <litmus/litmus_softirq.h>
18
19/* TODO: Remove unneeded mb() and other barriers. */
20
21
22/* counts number of daemons ready to handle litmus irqs. */
23static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);
24
25enum pending_flags
26{
27 LIT_TASKLET_LOW = 0x1,
28 LIT_TASKLET_HI = LIT_TASKLET_LOW<<1,
29 LIT_WORK = LIT_TASKLET_HI<<1
30};
31
32/* only support tasklet processing for now. */
33struct tasklet_head
34{
35 struct tasklet_struct *head;
36 struct tasklet_struct **tail;
37};
38
39struct klitirqd_info
40{
41 struct task_struct* klitirqd;
42 struct task_struct* current_owner;
43 int terminating;
44
45
46 raw_spinlock_t lock;
47
48 u32 pending;
49 atomic_t num_hi_pending;
50 atomic_t num_low_pending;
51 atomic_t num_work_pending;
52
53 /* in order of priority */
54 struct tasklet_head pending_tasklets_hi;
55 struct tasklet_head pending_tasklets;
56 struct list_head worklist;
57};
58
59/* one list for each klitirqd */
60static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];
61
62
63
64
65
66int proc_read_klitirqd_stats(char *page, char **start,
67 off_t off, int count,
68 int *eof, void *data)
69{
70 int len = snprintf(page, PAGE_SIZE,
71 "num ready klitirqds: %d\n\n",
72 atomic_read(&num_ready_klitirqds));
73
74 if(klitirqd_is_ready())
75 {
76 int i;
77 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
78 {
79 len +=
80 snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
81 "klitirqd_th%d: %s/%d\n"
82 "\tcurrent_owner: %s/%d\n"
83 "\tpending: %x\n"
84 "\tnum hi: %d\n"
85 "\tnum low: %d\n"
86 "\tnum work: %d\n\n",
87 i,
88 klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
89 (klitirqds[i].current_owner != NULL) ?
90 klitirqds[i].current_owner->comm : "(null)",
91 (klitirqds[i].current_owner != NULL) ?
92 klitirqds[i].current_owner->pid : 0,
93 klitirqds[i].pending,
94 atomic_read(&klitirqds[i].num_hi_pending),
95 atomic_read(&klitirqds[i].num_low_pending),
96 atomic_read(&klitirqds[i].num_work_pending));
97 }
98 }
99
100 return(len);
101}
102
103
104
105
106
107#if 0
108static atomic_t dump_id = ATOMIC_INIT(0);
109
110static void __dump_state(struct klitirqd_info* which, const char* caller)
111{
112 struct tasklet_struct* list;
113
114 int id = atomic_inc_return(&dump_id);
115
116 //if(in_interrupt())
117 {
118 if(which->current_owner)
119 {
120 TRACE("(id: %d caller: %s)\n"
121 "klitirqd: %s/%d\n"
122 "current owner: %s/%d\n"
123 "pending: %x\n",
124 id, caller,
125 which->klitirqd->comm, which->klitirqd->pid,
126 which->current_owner->comm, which->current_owner->pid,
127 which->pending);
128 }
129 else
130 {
131 TRACE("(id: %d caller: %s)\n"
132 "klitirqd: %s/%d\n"
133 "current owner: %p\n"
134 "pending: %x\n",
135 id, caller,
136 which->klitirqd->comm, which->klitirqd->pid,
137 NULL,
138 which->pending);
139 }
140
141 list = which->pending_tasklets.head;
142 while(list)
143 {
144 struct tasklet_struct *t = list;
145 list = list->next; /* advance */
146 if(t->owner)
147 TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
148 else
149 TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
150 }
151 }
152}
153
154static void dump_state(struct klitirqd_info* which, const char* caller)
155{
156 unsigned long flags;
157
158 raw_spin_lock_irqsave(&which->lock, flags);
159 __dump_state(which, caller);
160 raw_spin_unlock_irqrestore(&which->lock, flags);
161}
162#endif
163
164
165/* forward declarations */
166static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
167 struct klitirqd_info *which,
168 int wakeup);
169static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
170 struct klitirqd_info *which,
171 int wakeup);
172static void ___litmus_schedule_work(struct work_struct *w,
173 struct klitirqd_info *which,
174 int wakeup);
175
176
177
178inline unsigned int klitirqd_id(struct task_struct* tsk)
179{
180 int i;
181 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
182 {
183 if(klitirqds[i].klitirqd == tsk)
184 {
185 return i;
186 }
187 }
188
189 BUG();
190
191 return 0;
192}
193
194
195inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
196{
197 return (which->pending & LIT_TASKLET_HI);
198}
199
200inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
201{
202 return (which->pending & LIT_TASKLET_LOW);
203}
204
205inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
206{
207 return (which->pending & LIT_WORK);
208}
209
210inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
211{
212 return(which->pending);
213}
214
215
216inline static u32 litirq_pending(struct klitirqd_info* which)
217{
218 unsigned long flags;
219 u32 pending;
220
221 raw_spin_lock_irqsave(&which->lock, flags);
222 pending = litirq_pending_irqoff(which);
223 raw_spin_unlock_irqrestore(&which->lock, flags);
224
225 return pending;
226};
227
228inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
229{
230 unsigned long flags;
231 u32 pending;
232
233 raw_spin_lock_irqsave(&which->lock, flags);
234 pending = litirq_pending_irqoff(which);
235 if(pending)
236 {
237 if(which->current_owner != owner)
238 {
239 pending = 0; // owner switch!
240 }
241 }
242 raw_spin_unlock_irqrestore(&which->lock, flags);
243
244 return pending;
245}
246
247
248inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
249 struct mutex** sem,
250 struct task_struct** t)
251{
252 unsigned long flags;
253 u32 pending;
254
255 /* init values */
256 *sem = NULL;
257 *t = NULL;
258
259 raw_spin_lock_irqsave(&which->lock, flags);
260
261 pending = litirq_pending_irqoff(which);
262 if(pending)
263 {
264 if(which->current_owner != NULL)
265 {
266 *t = which->current_owner;
267 *sem = &tsk_rt(which->current_owner)->klitirqd_sem;
268 }
269 else
270 {
271 BUG();
272 }
273 }
274 raw_spin_unlock_irqrestore(&which->lock, flags);
275
276 if(likely(*sem))
277 {
278 return pending;
279 }
280 else
281 {
282 return 0;
283 }
284}
285
286/* returns true if the next piece of work to do is from a different owner.
287 */
288static int tasklet_ownership_change(
289 struct klitirqd_info* which,
290 enum pending_flags taskletQ)
291{
292 /* this function doesn't have to look at work objects since they have
293 priority below tasklets. */
294
295 unsigned long flags;
296 int ret = 0;
297
298 raw_spin_lock_irqsave(&which->lock, flags);
299
300 switch(taskletQ)
301 {
302 case LIT_TASKLET_HI:
303 if(litirq_pending_hi_irqoff(which))
304 {
305 ret = (which->pending_tasklets_hi.head->owner !=
306 which->current_owner);
307 }
308 break;
309 case LIT_TASKLET_LOW:
310 if(litirq_pending_low_irqoff(which))
311 {
312 ret = (which->pending_tasklets.head->owner !=
313 which->current_owner);
314 }
315 break;
316 default:
317 break;
318 }
319
320 raw_spin_unlock_irqrestore(&which->lock, flags);
321
322 TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);
323
324 return ret;
325}
326
327
328static void __reeval_prio(struct klitirqd_info* which)
329{
330 struct task_struct* next_owner = NULL;
331 struct task_struct* klitirqd = which->klitirqd;
332
333 /* Check in prio-order */
334 u32 pending = litirq_pending_irqoff(which);
335
336 //__dump_state(which, "__reeval_prio: before");
337
338 if(pending)
339 {
340 if(pending & LIT_TASKLET_HI)
341 {
342 next_owner = which->pending_tasklets_hi.head->owner;
343 }
344 else if(pending & LIT_TASKLET_LOW)
345 {
346 next_owner = which->pending_tasklets.head->owner;
347 }
348 else if(pending & LIT_WORK)
349 {
350 struct work_struct* work =
351 list_first_entry(&which->worklist, struct work_struct, entry);
352 next_owner = work->owner;
353 }
354 }
355
356 if(next_owner != which->current_owner)
357 {
358 struct task_struct* old_owner = which->current_owner;
359
360 /* bind the next owner. */
361 which->current_owner = next_owner;
362 mb();
363
364 if(next_owner != NULL)
365 {
366 if(!in_interrupt())
367 {
368 TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
369 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
370 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
371 next_owner->comm, next_owner->pid);
372 }
373 else
374 {
375 TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
376 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
377 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
378 next_owner->comm, next_owner->pid);
379 }
380
381 litmus->increase_prio_inheritance_klitirqd(klitirqd, old_owner, next_owner);
382 }
383 else
384 {
385 if(likely(!in_interrupt()))
386 {
387 TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
388 __FUNCTION__, klitirqd->comm, klitirqd->pid);
389 }
390 else
391 {
392 // is this a bug?
393 TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
394 __FUNCTION__, klitirqd->comm, klitirqd->pid);
395 }
396
397 BUG_ON(pending != 0);
398 litmus->decrease_prio_inheritance_klitirqd(klitirqd, old_owner, NULL);
399 }
400 }
401
402 //__dump_state(which, "__reeval_prio: after");
403}
404
405static void reeval_prio(struct klitirqd_info* which)
406{
407 unsigned long flags;
408
409 raw_spin_lock_irqsave(&which->lock, flags);
410 __reeval_prio(which);
411 raw_spin_unlock_irqrestore(&which->lock, flags);
412}
413
414
415static void wakeup_litirqd_locked(struct klitirqd_info* which)
416{
417 /* Interrupts are disabled: no need to stop preemption */
418 if (which && which->klitirqd)
419 {
420 __reeval_prio(which); /* configure the proper priority */
421
422 if(which->klitirqd->state != TASK_RUNNING)
423 {
424 TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
425 which->klitirqd->comm, which->klitirqd->pid);
426
427 wake_up_process(which->klitirqd);
428 }
429 }
430}
431
432
433static void do_lit_tasklet(struct klitirqd_info* which,
434 struct tasklet_head* pending_tasklets)
435{
436 unsigned long flags;
437 struct tasklet_struct *list;
438 atomic_t* count;
439
440 raw_spin_lock_irqsave(&which->lock, flags);
441
442 //__dump_state(which, "do_lit_tasklet: before steal");
443
444 /* copy out the tasklets for our private use. */
445 list = pending_tasklets->head;
446 pending_tasklets->head = NULL;
447 pending_tasklets->tail = &pending_tasklets->head;
448
449 /* remove pending flag */
450 which->pending &= (pending_tasklets == &which->pending_tasklets) ?
451 ~LIT_TASKLET_LOW :
452 ~LIT_TASKLET_HI;
453
454 count = (pending_tasklets == &which->pending_tasklets) ?
455 &which->num_low_pending:
456 &which->num_hi_pending;
457
458 //__dump_state(which, "do_lit_tasklet: after steal");
459
460 raw_spin_unlock_irqrestore(&which->lock, flags);
461
462
463 while(list)
464 {
465 struct tasklet_struct *t = list;
466
467 /* advance, lest we forget */
468 list = list->next;
469
470 /* execute tasklet if it has my priority and is free */
471 if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
472 if (!atomic_read(&t->count)) {
473
474 sched_trace_tasklet_begin(t->owner);
475
476 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
477 {
478 BUG();
479 }
480 TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
481 t->func(t->data);
482 tasklet_unlock(t);
483
484 atomic_dec(count);
485
486 sched_trace_tasklet_end(t->owner, 0ul);
487
488 continue; /* process more tasklets */
489 }
490 tasklet_unlock(t);
491 }
492
493 TRACE_CUR("%s: Could not invoke tasklet. Requeuing.\n", __FUNCTION__);
494
495 /* couldn't process tasklet. put it back at the end of the queue. */
496 if(pending_tasklets == &which->pending_tasklets)
497 ___litmus_tasklet_schedule(t, which, 0);
498 else
499 ___litmus_tasklet_hi_schedule(t, which, 0);
500 }
501}
502
503
504// returns 1 if priorities need to be changed to continue processing
505// pending tasklets.
506static int do_litirq(struct klitirqd_info* which)
507{
508 u32 pending;
509 int resched = 0;
510
511 if(in_interrupt())
512 {
513 TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
514 return(0);
515 }
516
517 if(which->klitirqd != current)
518 {
519 TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
520 __FUNCTION__, current->comm, current->pid,
521 which->klitirqd->comm, which->klitirqd->pid);
522 return(0);
523 }
524
525 if(!is_realtime(current))
526 {
527 TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
528 __FUNCTION__, current->policy);
529 return(0);
530 }
531
532
533 /* We only handle tasklets & work objects, no need for RCU triggers? */
534
535 pending = litirq_pending(which);
536 if(pending)
537 {
538 /* extract the work to do and do it! */
539 if(pending & LIT_TASKLET_HI)
540 {
541 TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
542 do_lit_tasklet(which, &which->pending_tasklets_hi);
543 resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
544
545 if(resched)
546 {
547 TRACE_CUR("%s: HI tasklets of another owner remain. "
548 "Skipping any LOW tasklets.\n", __FUNCTION__);
549 }
550 }
551
552 if(!resched && (pending & LIT_TASKLET_LOW))
553 {
554 TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
555 do_lit_tasklet(which, &which->pending_tasklets);
556 resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
557
558 if(resched)
559 {
560 TRACE_CUR("%s: LOW tasklets of another owner remain. "
561 "Skipping any work objects.\n", __FUNCTION__);
562 }
563 }
564 }
565
566 return(resched);
567}
568
569
570static void do_work(struct klitirqd_info* which)
571{
572 unsigned long flags;
573 work_func_t f;
574 struct work_struct* work;
575
576 // only execute one work-queue item to yield to tasklets.
577 // ...is this a good idea, or should we just batch them?
578 raw_spin_lock_irqsave(&which->lock, flags);
579
580 if(!litirq_pending_work_irqoff(which))
581 {
582 raw_spin_unlock_irqrestore(&which->lock, flags);
583 goto no_work;
584 }
585
586 work = list_first_entry(&which->worklist, struct work_struct, entry);
587 list_del_init(&work->entry);
588
589 if(list_empty(&which->worklist))
590 {
591 which->pending &= ~LIT_WORK;
592 }
593
594 raw_spin_unlock_irqrestore(&which->lock, flags);
595
596
597
598 /* safe to read current_owner outside of lock since only this thread
599 may write to the pointer. */
600 if(work->owner == which->current_owner)
601 {
602 TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
603 // do the work!
604 work_clear_pending(work);
605 f = work->func;
606 f(work); /* can't touch 'work' after this point,
607 the user may have freed it. */
608
609 atomic_dec(&which->num_work_pending);
610 }
611 else
612 {
613 TRACE_CUR("%s: Could not invoke work object. Requeuing.\n",
614 __FUNCTION__);
615 ___litmus_schedule_work(work, which, 0);
616 }
617
618no_work:
619 return;
620}
621
622
623static int set_litmus_daemon_sched(void)
624{
625 /* set up a daemon job that will never complete.
626 it should only ever run on behalf of another
627 real-time task.
628
629 TODO: Transition to a new job whenever a
630 new tasklet is handled */
631
632 int ret = 0;
633
634 struct rt_task tp = {
635 .exec_cost = 0,
636 .period = 1000000000, /* dummy 1 second period */
637 .phase = 0,
638 .cpu = task_cpu(current),
639 .budget_policy = NO_ENFORCEMENT,
640 .cls = RT_CLASS_BEST_EFFORT
641 };
642
643 struct sched_param param = { .sched_priority = 0};
644
645
646 /* set task params, mark as proxy thread, and init other data */
647 tsk_rt(current)->task_params = tp;
648 tsk_rt(current)->is_proxy_thread = 1;
649 tsk_rt(current)->cur_klitirqd = NULL;
650 mutex_init(&tsk_rt(current)->klitirqd_sem);
651 atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);
652
653 /* inform the OS we're SCHED_LITMUS --
654 sched_setscheduler_nocheck() calls litmus_admit_task(). */
655 sched_setscheduler_nocheck(current, SCHED_LITMUS, &param);
656
657 return ret;
658}
659
660static void enter_execution_phase(struct klitirqd_info* which,
661 struct mutex* sem,
662 struct task_struct* t)
663{
664 TRACE_CUR("%s: Trying to enter execution phase. "
665 "Acquiring semaphore of %s/%d\n", __FUNCTION__,
666 t->comm, t->pid);
667 down_and_set_stat(current, HELD, sem);
668 TRACE_CUR("%s: Execution phase entered! "
669 "Acquired semaphore of %s/%d\n", __FUNCTION__,
670 t->comm, t->pid);
671}
672
673static void exit_execution_phase(struct klitirqd_info* which,
674 struct mutex* sem,
675 struct task_struct* t)
676{
677 TRACE_CUR("%s: Exiting execution phase. "
678 "Releasing semaphore of %s/%d\n", __FUNCTION__,
679 t->comm, t->pid);
680 if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
681 {
682 up_and_set_stat(current, NOT_HELD, sem);
683 TRACE_CUR("%s: Execution phase exited! "
684 "Released semaphore of %s/%d\n", __FUNCTION__,
685 t->comm, t->pid);
686 }
687 else
688 {
689 TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
690 }
691}
692
693/* main loop for klitsoftirqd */
694static int run_klitirqd(void* unused)
695{
696 struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
697 struct mutex* sem;
698 struct task_struct* owner;
699
700 int rt_status = set_litmus_daemon_sched();
701
702 if(rt_status != 0)
703 {
704 TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
705 goto rt_failed;
706 }
707
708 atomic_inc(&num_ready_klitirqds);
709
710 set_current_state(TASK_INTERRUPTIBLE);
711
712 while (!kthread_should_stop())
713 {
714 preempt_disable();
715 if (!litirq_pending(which))
716 {
717 /* sleep for work */
718 TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
719 __FUNCTION__);
720 preempt_enable_no_resched();
721 schedule();
722
723 if(kthread_should_stop()) /* bail out */
724 {
725 TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
726 continue;
727 }
728
729 preempt_disable();
730 }
731
732 __set_current_state(TASK_RUNNING);
733
734 while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
735 {
736 int needs_resched = 0;
737
738 preempt_enable_no_resched();
739
740 BUG_ON(sem == NULL);
741
742 // wait to enter execution phase; wait for 'current_owner' to block.
743 enter_execution_phase(which, sem, owner);
744
745 if(kthread_should_stop())
746 {
747 TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
748 break;
749 }
750
751 preempt_disable();
752
753 /* Double check that there's still pending work and the owner hasn't
754 * changed. Pending items may have been flushed while we were sleeping.
755 */
756 if(litirq_pending_with_owner(which, owner))
757 {
758 TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
759 __FUNCTION__);
760
761 needs_resched = do_litirq(which);
762
763 preempt_enable_no_resched();
764
765 // work objects are preemptible.
766 if(!needs_resched)
767 {
768 do_work(which);
769 }
770
771 // exit execution phase.
772 exit_execution_phase(which, sem, owner);
773
774 TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
775 reeval_prio(which); /* check if we need to change priority here */
776 }
777 else
778 {
779 TRACE_CUR("%s: Pending work was flushed! Prev owner was %s/%d\n",
780 __FUNCTION__,
781 owner->comm, owner->pid);
782 preempt_enable_no_resched();
783
784 // exit execution phase.
785 exit_execution_phase(which, sem, owner);
786 }
787
788 cond_resched();
789 preempt_disable();
790 }
791 preempt_enable();
792 set_current_state(TASK_INTERRUPTIBLE);
793 }
794 __set_current_state(TASK_RUNNING);
795
796 atomic_dec(&num_ready_klitirqds);
797
798rt_failed:
799 litmus_exit_task(current);
800
801 return rt_status;
802}
803
804
805struct klitirqd_launch_data
806{
807 int* cpu_affinity;
808 struct work_struct work;
809};
810
811/* executed by a kworker from workqueues */
812static void launch_klitirqd(struct work_struct *work)
813{
814 int i;
815
816 struct klitirqd_launch_data* launch_data =
817 container_of(work, struct klitirqd_launch_data, work);
818
819 TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
820
821 /* create the daemon threads */
822 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
823 {
824 if(launch_data->cpu_affinity)
825 {
826 klitirqds[i].klitirqd =
827 kthread_create(
828 run_klitirqd,
829 /* treat the affinity as a pointer, we'll cast it back later */
830 (void*)(long long)launch_data->cpu_affinity[i],
831 "klitirqd_th%d/%d",
832 i,
833 launch_data->cpu_affinity[i]);
834
835 /* litmus will put is in the right cluster. */
836 kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
837 }
838 else
839 {
840 klitirqds[i].klitirqd =
841 kthread_create(
842 run_klitirqd,
843 /* treat the affinity as a pointer, we'll cast it back later */
844 (void*)(long long)(-1),
845 "klitirqd_th%d",
846 i);
847 }
848 }
849
850 TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
851
852 /* unleash the daemons */
853 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
854 {
855 wake_up_process(klitirqds[i].klitirqd);
856 }
857
858 if(launch_data->cpu_affinity)
859 kfree(launch_data->cpu_affinity);
860 kfree(launch_data);
861}
862
863
864void spawn_klitirqd(int* affinity)
865{
866 int i;
867 struct klitirqd_launch_data* delayed_launch;
868
869 if(atomic_read(&num_ready_klitirqds) != 0)
870 {
871 TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
872 return;
873 }
874
875 /* init the tasklet & work queues */
876 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
877 {
878 klitirqds[i].terminating = 0;
879 klitirqds[i].pending = 0;
880
881 klitirqds[i].num_hi_pending.counter = 0;
882 klitirqds[i].num_low_pending.counter = 0;
883 klitirqds[i].num_work_pending.counter = 0;
884
885 klitirqds[i].pending_tasklets_hi.head = NULL;
886 klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;
887
888 klitirqds[i].pending_tasklets.head = NULL;
889 klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;
890
891 INIT_LIST_HEAD(&klitirqds[i].worklist);
892
893 raw_spin_lock_init(&klitirqds[i].lock);
894 }
895
896 /* wait to flush the initializations to memory since other threads
897 will access it. */
898 mb();
899
900 /* tell a work queue to launch the threads. we can't make scheduling
901 calls since we're in an atomic state. */
902 TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
903 delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
904 if(affinity)
905 {
906 delayed_launch->cpu_affinity =
907 kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
908
909 memcpy(delayed_launch->cpu_affinity, affinity,
910 sizeof(int)*NR_LITMUS_SOFTIRQD);
911 }
912 else
913 {
914 delayed_launch->cpu_affinity = NULL;
915 }
916 INIT_WORK(&delayed_launch->work, launch_klitirqd);
917 schedule_work(&delayed_launch->work);
918}
919
920
921void kill_klitirqd(void)
922{
923 if(!klitirqd_is_dead())
924 {
925 int i;
926
927 TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
928
929 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
930 {
931 if(klitirqds[i].terminating != 1)
932 {
933 klitirqds[i].terminating = 1;
934 mb(); /* just to be sure? */
935 flush_pending(klitirqds[i].klitirqd, NULL);
936
937 /* signal termination */
938 kthread_stop(klitirqds[i].klitirqd);
939 }
940 }
941 }
942}
943
944
945int klitirqd_is_ready(void)
946{
947 return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
948}
949
950int klitirqd_is_dead(void)
951{
952 return(atomic_read(&num_ready_klitirqds) == 0);
953}
954
955
956struct task_struct* get_klitirqd(unsigned int k_id)
957{
958 return(klitirqds[k_id].klitirqd);
959}
960
961
962void flush_pending(struct task_struct* klitirqd_thread,
963 struct task_struct* owner)
964{
965 unsigned int k_id = klitirqd_id(klitirqd_thread);
966 struct klitirqd_info *which = &klitirqds[k_id];
967
968 unsigned long flags;
969 struct tasklet_struct *list;
970
971 u32 work_flushed = 0;
972
973 raw_spin_lock_irqsave(&which->lock, flags);
974
975 //__dump_state(which, "flush_pending: before");
976
977 // flush hi tasklets.
978 if(litirq_pending_hi_irqoff(which))
979 {
980 which->pending &= ~LIT_TASKLET_HI;
981
982 list = which->pending_tasklets_hi.head;
983 which->pending_tasklets_hi.head = NULL;
984 which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;
985
986 TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);
987
988 while(list)
989 {
990 struct tasklet_struct *t = list;
991 list = list->next;
992
993 if(likely((t->owner == owner) || (owner == NULL)))
994 {
995 if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
996 {
997 BUG();
998 }
999
1000 work_flushed |= LIT_TASKLET_HI;
1001
1002 t->owner = NULL;
1003
1004 // WTF?
1005 if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1006 {
1007 atomic_dec(&which->num_hi_pending);
1008 ___tasklet_hi_schedule(t);
1009 }
1010 else
1011 {
1012 TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
1013 BUG();
1014 }
1015 }
1016 else
1017 {
1018 TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
1019 // put back on queue.
1020 ___litmus_tasklet_hi_schedule(t, which, 0);
1021 }
1022 }
1023 }
1024
1025 // flush low tasklets.
1026 if(litirq_pending_low_irqoff(which))
1027 {
1028 which->pending &= ~LIT_TASKLET_LOW;
1029
1030 list = which->pending_tasklets.head;
1031 which->pending_tasklets.head = NULL;
1032 which->pending_tasklets.tail = &which->pending_tasklets.head;
1033
1034 TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);
1035
1036 while(list)
1037 {
1038 struct tasklet_struct *t = list;
1039 list = list->next;
1040
1041 if(likely((t->owner == owner) || (owner == NULL)))
1042 {
1043 if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
1044 {
1045 BUG();
1046 }
1047
1048 work_flushed |= LIT_TASKLET_LOW;
1049
1050 t->owner = NULL;
1051 sched_trace_tasklet_end(owner, 1ul);
1052
1053 if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1054 {
1055 atomic_dec(&which->num_low_pending);
1056 ___tasklet_schedule(t);
1057 }
1058 else
1059 {
1060 TRACE("%s: dropped tasklet??\n", __FUNCTION__);
1061 BUG();
1062 }
1063 }
1064 else
1065 {
1066 TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
1067 // put back on queue
1068 ___litmus_tasklet_schedule(t, which, 0);
1069 }
1070 }
1071 }
1072
1073 // flush work objects
1074 if(litirq_pending_work_irqoff(which))
1075 {
1076 which->pending &= ~LIT_WORK;
1077
1078 TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);
1079
1080 while(!list_empty(&which->worklist))
1081 {
1082 struct work_struct* work =
1083 list_first_entry(&which->worklist, struct work_struct, entry);
1084 list_del_init(&work->entry);
1085
1086 if(likely((work->owner == owner) || (owner == NULL)))
1087 {
1088 work_flushed |= LIT_WORK;
1089 atomic_dec(&which->num_work_pending);
1090
1091 work->owner = NULL;
1092 sched_trace_work_end(owner, current, 1ul);
1093 __schedule_work(work);
1094 }
1095 else
1096 {
1097 TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
1098 // put back on queue
1099 ___litmus_schedule_work(work, which, 0);
1100 }
1101 }
1102 }
1103
1104 //__dump_state(which, "flush_pending: after (before reeval prio)");
1105
1106
1107 mb(); /* commit changes to pending flags */
1108
1109 /* reset the scheduling priority */
1110 if(work_flushed)
1111 {
1112 __reeval_prio(which);
1113
1114 /* Try to offload flushed tasklets to Linux's ksoftirqd. */
1115 if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
1116 {
1117 wakeup_softirqd();
1118 }
1119 }
1120 else
1121 {
1122 TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
1123 }
1124
1125 raw_spin_unlock_irqrestore(&which->lock, flags);
1126}
1127
1128
1129
1130
1131static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
1132 struct klitirqd_info *which,
1133 int wakeup)
1134{
1135 unsigned long flags;
1136 u32 old_pending;
1137
1138 t->next = NULL;
1139
1140 raw_spin_lock_irqsave(&which->lock, flags);
1141
1142 //__dump_state(which, "___litmus_tasklet_schedule: before queuing");
1143
1144 *(which->pending_tasklets.tail) = t;
1145 which->pending_tasklets.tail = &t->next;
1146
1147 old_pending = which->pending;
1148 which->pending |= LIT_TASKLET_LOW;
1149
1150 atomic_inc(&which->num_low_pending);
1151
1152 mb();
1153
1154 if(!old_pending && wakeup)
1155 {
1156 wakeup_litirqd_locked(which); /* wake up the klitirqd */
1157 }
1158
1159 //__dump_state(which, "___litmus_tasklet_schedule: after queuing");
1160
1161 raw_spin_unlock_irqrestore(&which->lock, flags);
1162}
1163
1164int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
1165{
1166 int ret = 0; /* assume failure */
1167 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
1168 {
1169 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
1170 BUG();
1171 }
1172
1173 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1174 {
1175 TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
1176 BUG();
1177 }
1178
1179 if(likely(!klitirqds[k_id].terminating))
1180 {
1181 /* Can't accept tasklets while we're processing a workqueue
1182 because they're handled by the same thread. This case is
1183 very RARE.
1184
1185 TODO: Use a separate thread for work objects!!!!!!
1186 */
1187 if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
1188 {
1189 ret = 1;
1190 ___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
1191 }
1192 else
1193 {
1194 TRACE("%s: rejected tasklet because of pending work.\n",
1195 __FUNCTION__);
1196 }
1197 }
1198 return(ret);
1199}
1200
1201EXPORT_SYMBOL(__litmus_tasklet_schedule);
1202
1203
1204static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
1205 struct klitirqd_info *which,
1206 int wakeup)
1207{
1208 unsigned long flags;
1209 u32 old_pending;
1210
1211 t->next = NULL;
1212
1213 raw_spin_lock_irqsave(&which->lock, flags);
1214
1215 *(which->pending_tasklets_hi.tail) = t;
1216 which->pending_tasklets_hi.tail = &t->next;
1217
1218 old_pending = which->pending;
1219 which->pending |= LIT_TASKLET_HI;
1220
1221 atomic_inc(&which->num_hi_pending);
1222
1223 mb();
1224
1225 if(!old_pending && wakeup)
1226 {
1227 wakeup_litirqd_locked(which); /* wake up the klitirqd */
1228 }
1229
1230 raw_spin_unlock_irqrestore(&which->lock, flags);
1231}
1232
1233int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
1234{
1235 int ret = 0; /* assume failure */
1236 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
1237 {
1238 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
1239 BUG();
1240 }
1241
1242 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1243 {
1244 TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
1245 BUG();
1246 }
1247
1248 if(unlikely(!klitirqd_is_ready()))
1249 {
1250 TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
1251 BUG();
1252 }
1253
1254 if(likely(!klitirqds[k_id].terminating))
1255 {
1256 if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
1257 {
1258 ret = 1;
1259 ___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
1260 }
1261 else
1262 {
1263 TRACE("%s: rejected tasklet because of pending work.\n",
1264 __FUNCTION__);
1265 }
1266 }
1267 return(ret);
1268}
1269
1270EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
1271
1272
1273int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
1274{
1275 int ret = 0; /* assume failure */
1276 u32 old_pending;
1277
1278 BUG_ON(!irqs_disabled());
1279
1280 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
1281 {
1282 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
1283 BUG();
1284 }
1285
1286 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1287 {
1288 TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
1289 BUG();
1290 }
1291
1292 if(unlikely(!klitirqd_is_ready()))
1293 {
1294 TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
1295 BUG();
1296 }
1297
1298 if(likely(!klitirqds[k_id].terminating))
1299 {
1300 raw_spin_lock(&klitirqds[k_id].lock);
1301
1302 if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
1303 {
1304 ret = 1; // success!
1305
1306 t->next = klitirqds[k_id].pending_tasklets_hi.head;
1307 klitirqds[k_id].pending_tasklets_hi.head = t;
1308
1309 old_pending = klitirqds[k_id].pending;
1310 klitirqds[k_id].pending |= LIT_TASKLET_HI;
1311
1312 atomic_inc(&klitirqds[k_id].num_hi_pending);
1313
1314 mb();
1315
1316 if(!old_pending)
1317 wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
1318 }
1319 else
1320 {
1321 TRACE("%s: rejected tasklet because of pending work.\n",
1322 __FUNCTION__);
1323 }
1324
1325 raw_spin_unlock(&klitirqds[k_id].lock);
1326 }
1327 return(ret);
1328}
1329
1330EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
1331
1332
1333
1334static void ___litmus_schedule_work(struct work_struct *w,
1335 struct klitirqd_info *which,
1336 int wakeup)
1337{
1338 unsigned long flags;
1339 u32 old_pending;
1340
1341 raw_spin_lock_irqsave(&which->lock, flags);
1342
1343 work_pending(w);
1344 list_add_tail(&w->entry, &which->worklist);
1345
1346 old_pending = which->pending;
1347 which->pending |= LIT_WORK;
1348
1349 atomic_inc(&which->num_work_pending);
1350
1351 mb();
1352
1353 if(!old_pending && wakeup)
1354 {
1355 wakeup_litirqd_locked(which); /* wakeup the klitirqd */
1356 }
1357
1358 raw_spin_unlock_irqrestore(&which->lock, flags);
1359}
1360
1361int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
1362{
1363 int ret = 1; /* assume success */
1364 if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
1365 {
1366 TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
1367 BUG();
1368 }
1369
1370 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1371 {
1372 TRACE("%s: No klitirqd_th%u!\n", k_id);
1373 BUG();
1374 }
1375
1376 if(unlikely(!klitirqd_is_ready()))
1377 {
1378 TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
1379 BUG();
1380 }
1381
1382 if(likely(!klitirqds[k_id].terminating))
1383 ___litmus_schedule_work(w, &klitirqds[k_id], 1);
1384 else
1385 ret = 0;
1386 return(ret);
1387}
1388EXPORT_SYMBOL(__litmus_schedule_work);
1389
1390
1391static int set_klitirqd_sem_status(unsigned long stat)
1392{
1393 TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
1394 atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
1395 stat);
1396 atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
1397 //mb();
1398
1399 return(0);
1400}
1401
1402static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
1403{
1404 if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
1405 {
1406 return(set_klitirqd_sem_status(stat));
1407 }
1408 return(-1);
1409}
1410
1411
1412void __down_and_reset_and_set_stat(struct task_struct* t,
1413 enum klitirqd_sem_status to_reset,
1414 enum klitirqd_sem_status to_set,
1415 struct mutex* sem)
1416{
1417#if 0
1418 struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
1419 struct task_struct* task = container_of(param, struct task_struct, rt_param);
1420
1421 TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n",
1422 __FUNCTION__, task->comm, task->pid);
1423#endif
1424
1425 mutex_lock_sfx(sem,
1426 set_klitirqd_sem_status_if_not_held, to_reset,
1427 set_klitirqd_sem_status, to_set);
1428#if 0
1429 TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n",
1430 __FUNCTION__, task->comm, task->pid);
1431#endif
1432}
1433
1434void down_and_set_stat(struct task_struct* t,
1435 enum klitirqd_sem_status to_set,
1436 struct mutex* sem)
1437{
1438#if 0
1439 struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
1440 struct task_struct* task = container_of(param, struct task_struct, rt_param);
1441
1442 TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n",
1443 __FUNCTION__, task->comm, task->pid);
1444#endif
1445
1446 mutex_lock_sfx(sem,
1447 NULL, 0,
1448 set_klitirqd_sem_status, to_set);
1449
1450#if 0
1451 TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n",
1452 __FUNCTION__, task->comm, task->pid);
1453#endif
1454}
1455
1456
1457void up_and_set_stat(struct task_struct* t,
1458 enum klitirqd_sem_status to_set,
1459 struct mutex* sem)
1460{
1461#if 0
1462 struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
1463 struct task_struct* task = container_of(param, struct task_struct, rt_param);
1464
1465 TRACE_CUR("%s: entered. Unlocking semaphore of %s/%d\n",
1466 __FUNCTION__,
1467 task->comm, task->pid);
1468#endif
1469
1470 mutex_unlock_sfx(sem, NULL, 0,
1471 set_klitirqd_sem_status, to_set);
1472
1473#if 0
1474 TRACE_CUR("%s: exiting. Unlocked semaphore of %s/%d\n",
1475 __FUNCTION__,
1476 task->comm, task->pid);
1477#endif
1478}
1479
1480
1481
1482void release_klitirqd_lock(struct task_struct* t)
1483{
1484 if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
1485 {
1486 struct mutex* sem;
1487 struct task_struct* owner = t;
1488
1489 if(t->state == TASK_RUNNING)
1490 {
1491 TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
1492 return;
1493 }
1494
1495 if(likely(!tsk_rt(t)->is_proxy_thread))
1496 {
1497 sem = &tsk_rt(t)->klitirqd_sem;
1498 }
1499 else
1500 {
1501 unsigned int k_id = klitirqd_id(t);
1502 owner = klitirqds[k_id].current_owner;
1503
1504 BUG_ON(t != klitirqds[k_id].klitirqd);
1505
1506 if(likely(owner))
1507 {
1508 sem = &tsk_rt(owner)->klitirqd_sem;
1509 }
1510 else
1511 {
1512 BUG();
1513
1514 // We had the rug pulled out from under us. Abort attempt
1515 // to reacquire the lock since our client no longer needs us.
1516 TRACE_CUR("HUH?! How did this happen?\n");
1517 atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
1518 return;
1519 }
1520 }
1521
1522 //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
1523 up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
1524 //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
1525 }
1526 /*
1527 else if(is_realtime(t))
1528 {
1529 TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
1530 }
1531 */
1532}
1533
1534int reacquire_klitirqd_lock(struct task_struct* t)
1535{
1536 int ret = 0;
1537
1538 if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
1539 {
1540 struct mutex* sem;
1541 struct task_struct* owner = t;
1542
1543 if(likely(!tsk_rt(t)->is_proxy_thread))
1544 {
1545 sem = &tsk_rt(t)->klitirqd_sem;
1546 }
1547 else
1548 {
1549 unsigned int k_id = klitirqd_id(t);
1550 //struct task_struct* owner = klitirqds[k_id].current_owner;
1551 owner = klitirqds[k_id].current_owner;
1552
1553 BUG_ON(t != klitirqds[k_id].klitirqd);
1554
1555 if(likely(owner))
1556 {
1557 sem = &tsk_rt(owner)->klitirqd_sem;
1558 }
1559 else
1560 {
1561 // We had the rug pulled out from under us. Abort attempt
1562 // to reacquire the lock since our client no longer needs us.
1563 TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
1564 atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
1565 return(0);
1566 }
1567 }
1568
1569 //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
1570 __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
1571 //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
1572 }
1573 /*
1574 else if(is_realtime(t))
1575 {
1576 TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
1577 }
1578 */
1579
1580 return(ret);
1581}
1582
diff --git a/litmus/locking.c b/litmus/locking.c
index ca5a073a989e..12a23eb715cc 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -4,8 +4,17 @@
4 4
5#include <litmus/sched_plugin.h> 5#include <litmus/sched_plugin.h>
6#include <litmus/trace.h> 6#include <litmus/trace.h>
7#include <litmus/litmus.h>
7#include <litmus/wait.h> 8#include <litmus/wait.h>
8 9
10#ifdef CONFIG_LITMUS_DGL_SUPPORT
11#include <linux/uaccess.h>
12#endif
13
14#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
15#include <litmus/gpu_affinity.h>
16#endif
17
9static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); 18static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
10static int open_generic_lock(struct od_table_entry* entry, void* __user arg); 19static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
11static int close_generic_lock(struct od_table_entry* entry); 20static int close_generic_lock(struct od_table_entry* entry);
@@ -18,6 +27,9 @@ struct fdso_ops generic_lock_ops = {
18 .destroy = destroy_generic_lock 27 .destroy = destroy_generic_lock
19}; 28};
20 29
30static atomic_t lock_id_gen = ATOMIC_INIT(0);
31
32
21static inline bool is_lock(struct od_table_entry* entry) 33static inline bool is_lock(struct od_table_entry* entry)
22{ 34{
23 return entry->class == &generic_lock_ops; 35 return entry->class == &generic_lock_ops;
@@ -35,8 +47,21 @@ static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar
35 int err; 47 int err;
36 48
37 err = litmus->allocate_lock(&lock, type, arg); 49 err = litmus->allocate_lock(&lock, type, arg);
38 if (err == 0) 50 if (err == 0) {
51#ifdef CONFIG_LITMUS_NESTED_LOCKING
52 lock->nest.lock = lock;
53 lock->nest.hp_waiter_eff_prio = NULL;
54
55 INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node);
56 if(!lock->nest.hp_waiter_ptr) {
57 TRACE_CUR("BEWARE: hp_waiter_ptr should probably not be NULL in "
58 "most uses. (exception: IKGLP donors)\n");
59 }
60#endif
61 lock->type = type;
62 lock->ident = atomic_inc_return(&lock_id_gen);
39 *obj_ref = lock; 63 *obj_ref = lock;
64 }
40 return err; 65 return err;
41} 66}
42 67
@@ -75,7 +100,8 @@ asmlinkage long sys_litmus_lock(int lock_od)
75 entry = get_entry_for_od(lock_od); 100 entry = get_entry_for_od(lock_od);
76 if (entry && is_lock(entry)) { 101 if (entry && is_lock(entry)) {
77 l = get_lock(entry); 102 l = get_lock(entry);
78 TRACE_CUR("attempts to lock 0x%p\n", l); 103 //TRACE_CUR("attempts to lock 0x%p\n", l);
104 TRACE_CUR("attempts to lock %d\n", l->ident);
79 err = l->ops->lock(l); 105 err = l->ops->lock(l);
80 } 106 }
81 107
@@ -97,7 +123,8 @@ asmlinkage long sys_litmus_unlock(int lock_od)
97 entry = get_entry_for_od(lock_od); 123 entry = get_entry_for_od(lock_od);
98 if (entry && is_lock(entry)) { 124 if (entry && is_lock(entry)) {
99 l = get_lock(entry); 125 l = get_lock(entry);
100 TRACE_CUR("attempts to unlock 0x%p\n", l); 126 //TRACE_CUR("attempts to unlock 0x%p\n", l);
127 TRACE_CUR("attempts to unlock %d\n", l->ident);
101 err = l->ops->unlock(l); 128 err = l->ops->unlock(l);
102 } 129 }
103 130
@@ -122,6 +149,365 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
122 return(t); 149 return(t);
123} 150}
124 151
152#ifdef CONFIG_LITMUS_NESTED_LOCKING
153
154void print_hp_waiters(struct binheap_node* n, int depth)
155{
156 struct litmus_lock *l;
157 struct nested_info *nest;
158 char padding[81] = " ";
159 struct task_struct *hp = NULL;
160 struct task_struct *hp_eff = NULL;
161 struct task_struct *node_prio = NULL;
162
163
164 if(n == NULL) {
165 TRACE("+-> %p\n", NULL);
166 return;
167 }
168
169 nest = binheap_entry(n, struct nested_info, hp_binheap_node);
170 l = nest->lock;
171
172 if(depth*2 <= 80)
173 padding[depth*2] = '\0';
174
175 if(nest->hp_waiter_ptr && *(nest->hp_waiter_ptr)) {
176 hp = *(nest->hp_waiter_ptr);
177
178 if(tsk_rt(hp)->inh_task) {
179 hp_eff = tsk_rt(hp)->inh_task;
180 }
181 }
182
183 node_prio = nest->hp_waiter_eff_prio;
184
185 TRACE("%s+-> %s/%d [waiter = %s/%d] [waiter's inh = %s/%d] (lock = %d)\n",
186 padding,
187 (node_prio) ? node_prio->comm : "nil",
188 (node_prio) ? node_prio->pid : -1,
189 (hp) ? hp->comm : "nil",
190 (hp) ? hp->pid : -1,
191 (hp_eff) ? hp_eff->comm : "nil",
192 (hp_eff) ? hp_eff->pid : -1,
193 l->ident);
194
195 if(n->left) print_hp_waiters(n->left, depth+1);
196 if(n->right) print_hp_waiters(n->right, depth+1);
197}
198#endif
199
200
201#ifdef CONFIG_LITMUS_DGL_SUPPORT
202
203void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
204{
205 /*
206 We pick the next lock in reverse order. This causes inheritance propagation
207 from locks received earlier to flow in the same direction as regular nested
208 locking. This might make fine-grain DGL easier in the future.
209 */
210
211 BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
212
213 //WARN_ON(dgl_wait->locks[dgl_wait->last_primary] != prev_lock);
214
215 // note reverse order
216 for(dgl_wait->last_primary = dgl_wait->last_primary - 1;
217 dgl_wait->last_primary >= 0;
218 --(dgl_wait->last_primary)){
219 if(!dgl_wait->locks[dgl_wait->last_primary]->ops->is_owner(
220 dgl_wait->locks[dgl_wait->last_primary], dgl_wait->task)) {
221
222 tsk_rt(dgl_wait->task)->blocked_lock =
223 dgl_wait->locks[dgl_wait->last_primary];
224 mb();
225
226 TRACE_CUR("New blocked lock is %d\n",
227 dgl_wait->locks[dgl_wait->last_primary]->ident);
228
229 break;
230 }
231 }
232}
233
234int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
235{
236 // should never be called.
237 BUG();
238 return 1;
239}
240
241void __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
242 dgl_wait_state_t** dgl_wait,
243 struct task_struct **task)
244{
245 wait_queue_t *q;
246
247 *dgl_wait = NULL;
248 *task = NULL;
249
250 if (waitqueue_active(wq)) {
251 q = list_entry(wq->task_list.next,
252 wait_queue_t, task_list);
253
254 if(q->func == dgl_wake_up) {
255 *dgl_wait = (dgl_wait_state_t*) q->private;
256 }
257 else {
258 *task = (struct task_struct*) q->private;
259 }
260
261 __remove_wait_queue(wq, q);
262 }
263}
264
265void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait)
266{
267 init_waitqueue_entry(wq_node, dgl_wait->task);
268 wq_node->private = dgl_wait;
269 wq_node->func = dgl_wake_up;
270}
271
272
273static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
274{
275 int i;
276 unsigned long irqflags; //, dummyflags;
277 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
278
279 BUG_ON(dgl_wait->task != current);
280
281 raw_spin_lock_irqsave(dgl_lock, irqflags);
282
283
284 dgl_wait->nr_remaining = dgl_wait->size;
285
286 TRACE_CUR("Locking DGL with size %d\n", dgl_wait->size);
287
288 // try to acquire each lock. enqueue (non-blocking) if it is unavailable.
289 for(i = 0; i < dgl_wait->size; ++i) {
290 struct litmus_lock *l = dgl_wait->locks[i];
291
292 // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
293
294 if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) {
295 --(dgl_wait->nr_remaining);
296 TRACE_CUR("Acquired lock %d immediatly.\n", l->ident);
297 }
298 }
299
300 if(dgl_wait->nr_remaining == 0) {
301 // acquired entire group immediatly
302 TRACE_CUR("Acquired all locks in DGL immediatly!\n");
303 }
304 else {
305
306 TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
307 dgl_wait->nr_remaining);
308
309#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
310 // KLUDGE: don't count this suspension as time in the critical gpu
311 // critical section
312 if(tsk_rt(dgl_wait->task)->held_gpus) {
313 tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
314 }
315#endif
316
317 // note reverse order. see comments in select_next_lock for reason.
318 for(i = dgl_wait->size - 1; i >= 0; --i) {
319 struct litmus_lock *l = dgl_wait->locks[i];
320 if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe
321
322 TRACE_CUR("Activating priority inheritance on lock %d\n",
323 l->ident);
324
325 TS_DGL_LOCK_SUSPEND;
326
327 l->ops->enable_priority(l, dgl_wait);
328 dgl_wait->last_primary = i;
329
330 TRACE_CUR("Suspending for lock %d\n", l->ident);
331
332 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending
333
334 schedule(); // suspend!!!
335
336 TS_DGL_LOCK_RESUME;
337
338 TRACE_CUR("Woken up from DGL suspension.\n");
339
340 goto all_acquired; // we should hold all locks when we wake up.
341 }
342 }
343
344 TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n");
345 //BUG();
346 }
347
348 raw_spin_unlock_irqrestore(dgl_lock, irqflags);
349
350all_acquired:
351
352 // FOR SANITY CHECK FOR TESTING
353// for(i = 0; i < dgl_wait->size; ++i) {
354// struct litmus_lock *l = dgl_wait->locks[i];
355// BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
356// }
357
358 TRACE_CUR("Acquired entire DGL\n");
359
360 return 0;
361}
362
363static int supports_dgl(struct litmus_lock *l)
364{
365 struct litmus_lock_ops* ops = l->ops;
366
367 return (ops->dgl_lock &&
368 ops->is_owner &&
369 ops->enable_priority);
370}
371
372asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
373{
374 struct task_struct *t = current;
375 long err = -EINVAL;
376 int dgl_ods[MAX_DGL_SIZE];
377 int i;
378
379 dgl_wait_state_t dgl_wait_state; // lives on the stack until all resources in DGL are held.
380
381 if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
382 goto out;
383
384 if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
385 goto out;
386
387 if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
388 goto out;
389
390 if (!is_realtime(t)) {
391 err = -EPERM;
392 goto out;
393 }
394
395 for(i = 0; i < dgl_size; ++i) {
396 struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
397 if(entry && is_lock(entry)) {
398 dgl_wait_state.locks[i] = get_lock(entry);
399 if(!supports_dgl(dgl_wait_state.locks[i])) {
400 TRACE_CUR("Lock %d does not support all required DGL operations.\n",
401 dgl_wait_state.locks[i]->ident);
402 goto out;
403 }
404 }
405 else {
406 TRACE_CUR("Invalid lock identifier\n");
407 goto out;
408 }
409 }
410
411 dgl_wait_state.task = t;
412 dgl_wait_state.size = dgl_size;
413
414 TS_DGL_LOCK_START;
415 err = do_litmus_dgl_lock(&dgl_wait_state);
416
417 /* Note: task my have been suspended or preempted in between! Take
418 * this into account when computing overheads. */
419 TS_DGL_LOCK_END;
420
421out:
422 return err;
423}
424
425static long do_litmus_dgl_unlock(struct litmus_lock* dgl_locks[], int dgl_size)
426{
427 int i;
428 long err = 0;
429
430 TRACE_CUR("Unlocking a DGL of %d size\n", dgl_size);
431
432 for(i = dgl_size - 1; i >= 0; --i) { // unlock in reverse order
433
434 struct litmus_lock *l = dgl_locks[i];
435 long tmp_err;
436
437 TRACE_CUR("Unlocking lock %d of DGL.\n", l->ident);
438
439 tmp_err = l->ops->unlock(l);
440
441 if(tmp_err) {
442 TRACE_CUR("There was an error unlocking %d: %d.\n", l->ident, tmp_err);
443 err = tmp_err;
444 }
445 }
446
447 TRACE_CUR("DGL unlocked. err = %d\n", err);
448
449 return err;
450}
451
452asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
453{
454 long err = -EINVAL;
455 int dgl_ods[MAX_DGL_SIZE];
456 struct od_table_entry* entry;
457 int i;
458
459 struct litmus_lock* dgl_locks[MAX_DGL_SIZE];
460
461 if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
462 goto out;
463
464 if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
465 goto out;
466
467 if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
468 goto out;
469
470 for(i = 0; i < dgl_size; ++i) {
471 entry = get_entry_for_od(dgl_ods[i]);
472 if(entry && is_lock(entry)) {
473 dgl_locks[i] = get_lock(entry);
474 if(!supports_dgl(dgl_locks[i])) {
475 TRACE_CUR("Lock %d does not support all required DGL operations.\n",
476 dgl_locks[i]->ident);
477 goto out;
478 }
479 }
480 else {
481 TRACE_CUR("Invalid lock identifier\n");
482 goto out;
483 }
484 }
485
486 TS_DGL_UNLOCK_START;
487 err = do_litmus_dgl_unlock(dgl_locks, dgl_size);
488
489 /* Note: task my have been suspended or preempted in between! Take
490 * this into account when computing overheads. */
491 TS_DGL_UNLOCK_END;
492
493out:
494 return err;
495}
496
497#else // CONFIG_LITMUS_DGL_SUPPORT
498
499asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
500{
501 return -ENOSYS;
502}
503
504asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
505{
506 return -ENOSYS;
507}
508
509#endif
510
125unsigned int __add_wait_queue_prio_exclusive( 511unsigned int __add_wait_queue_prio_exclusive(
126 wait_queue_head_t* head, 512 wait_queue_head_t* head,
127 prio_wait_queue_t *new) 513 prio_wait_queue_t *new)
@@ -154,7 +540,7 @@ out:
154 return passed; 540 return passed;
155} 541}
156 542
157#else 543#else // CONFIG_LITMUS_LOCKING
158 544
159struct fdso_ops generic_lock_ops = {}; 545struct fdso_ops generic_lock_ops = {};
160 546
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
new file mode 100644
index 000000000000..4b86a50d3bd1
--- /dev/null
+++ b/litmus/nvidia_info.c
@@ -0,0 +1,597 @@
1#include <linux/module.h>
2#include <linux/semaphore.h>
3#include <linux/pci.h>
4
5#include <litmus/sched_trace.h>
6#include <litmus/nvidia_info.h>
7#include <litmus/litmus.h>
8
9#include <litmus/sched_plugin.h>
10
11#include <litmus/binheap.h>
12
13typedef unsigned char NvV8; /* "void": enumerated or multiple fields */
14typedef unsigned short NvV16; /* "void": enumerated or multiple fields */
15typedef unsigned char NvU8; /* 0 to 255 */
16typedef unsigned short NvU16; /* 0 to 65535 */
17typedef signed char NvS8; /* -128 to 127 */
18typedef signed short NvS16; /* -32768 to 32767 */
19typedef float NvF32; /* IEEE Single Precision (S1E8M23) */
20typedef double NvF64; /* IEEE Double Precision (S1E11M52) */
21typedef unsigned int NvV32; /* "void": enumerated or multiple fields */
22typedef unsigned int NvU32; /* 0 to 4294967295 */
23typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */
24typedef union
25{
26 volatile NvV8 Reg008[1];
27 volatile NvV16 Reg016[1];
28 volatile NvV32 Reg032[1];
29} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
30
31typedef struct
32{
33 NvU64 address;
34 NvU64 size;
35 NvU32 offset;
36 NvU32 *map;
37 litmus_nv_phwreg_t map_u;
38} litmus_nv_aperture_t;
39
40typedef struct
41{
42 void *priv; /* private data */
43 void *os_state; /* os-specific device state */
44
45 int rmInitialized;
46 int flags;
47
48 /* PCI config info */
49 NvU32 domain;
50 NvU16 bus;
51 NvU16 slot;
52 NvU16 vendor_id;
53 NvU16 device_id;
54 NvU16 subsystem_id;
55 NvU32 gpu_id;
56 void *handle;
57
58 NvU32 pci_cfg_space[16];
59
60 /* physical characteristics */
61 litmus_nv_aperture_t bars[3];
62 litmus_nv_aperture_t *regs;
63 litmus_nv_aperture_t *fb, ud;
64 litmus_nv_aperture_t agp;
65
66 NvU32 interrupt_line;
67
68 NvU32 agp_config;
69 NvU32 agp_status;
70
71 NvU32 primary_vga;
72
73 NvU32 sim_env;
74
75 NvU32 rc_timer_enabled;
76
77 /* list of events allocated for this device */
78 void *event_list;
79
80 void *kern_mappings;
81
82} litmus_nv_state_t;
83
84typedef struct work_struct litmus_nv_task_t;
85
86typedef struct litmus_nv_work_s {
87 litmus_nv_task_t task;
88 void *data;
89} litmus_nv_work_t;
90
91typedef struct litmus_nv_linux_state_s {
92 litmus_nv_state_t nv_state;
93 atomic_t usage_count;
94
95 struct pci_dev *dev;
96 void *agp_bridge;
97 void *alloc_queue;
98
99 void *timer_sp;
100 void *isr_sp;
101 void *pci_cfgchk_sp;
102 void *isr_bh_sp;
103
104#ifdef CONFIG_CUDA_4_0
105 char registry_keys[512];
106#endif
107
108 /* keep track of any pending bottom halfes */
109 struct tasklet_struct tasklet;
110 litmus_nv_work_t work;
111
112 /* get a timer callback every second */
113 struct timer_list rc_timer;
114
115 /* lock for linux-specific data, not used by core rm */
116 struct semaphore ldata_lock;
117
118 /* lock for linux-specific alloc queue */
119 struct semaphore at_lock;
120
121#if 0
122#if defined(NV_USER_MAP)
123 /* list of user mappings */
124 struct nv_usermap_s *usermap_list;
125
126 /* lock for VMware-specific mapping list */
127 struct semaphore mt_lock;
128#endif /* defined(NV_USER_MAP) */
129#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
130 void *apm_nv_dev;
131#endif
132#endif
133
134 NvU32 device_num;
135 struct litmus_nv_linux_state_s *next;
136} litmus_nv_linux_state_t;
137
138void dump_nvidia_info(const struct tasklet_struct *t)
139{
140 litmus_nv_state_t* nvstate = NULL;
141 litmus_nv_linux_state_t* linuxstate = NULL;
142 struct pci_dev* pci = NULL;
143
144 nvstate = (litmus_nv_state_t*)(t->data);
145
146 if(nvstate)
147 {
148 TRACE("NV State:\n"
149 "\ttasklet ptr = %p\n"
150 "\tstate ptr = %p\n"
151 "\tprivate data ptr = %p\n"
152 "\tos state ptr = %p\n"
153 "\tdomain = %u\n"
154 "\tbus = %u\n"
155 "\tslot = %u\n"
156 "\tvender_id = %u\n"
157 "\tdevice_id = %u\n"
158 "\tsubsystem_id = %u\n"
159 "\tgpu_id = %u\n"
160 "\tinterrupt_line = %u\n",
161 t,
162 nvstate,
163 nvstate->priv,
164 nvstate->os_state,
165 nvstate->domain,
166 nvstate->bus,
167 nvstate->slot,
168 nvstate->vendor_id,
169 nvstate->device_id,
170 nvstate->subsystem_id,
171 nvstate->gpu_id,
172 nvstate->interrupt_line);
173
174 linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
175 }
176 else
177 {
178 TRACE("INVALID NVSTATE????\n");
179 }
180
181 if(linuxstate)
182 {
183 int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
184 int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
185 int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
186
187
188 TRACE("LINUX NV State:\n"
189 "\tlinux nv state ptr: %p\n"
190 "\taddress of tasklet: %p\n"
191 "\taddress of work: %p\n"
192 "\tusage_count: %d\n"
193 "\tdevice_num: %u\n"
194 "\ttasklet addr == this tasklet: %d\n"
195 "\tpci: %p\n",
196 linuxstate,
197 &(linuxstate->tasklet),
198 &(linuxstate->work),
199 atomic_read(&(linuxstate->usage_count)),
200 linuxstate->device_num,
201 (t == &(linuxstate->tasklet)),
202 linuxstate->dev);
203
204 pci = linuxstate->dev;
205
206 TRACE("Offsets:\n"
207 "\tOffset from LinuxState: %d, %x\n"
208 "\tOffset from NVState: %d, %x\n"
209 "\tOffset from parameter: %d, %x\n"
210 "\tdevice_num: %u\n",
211 ls_offset, ls_offset,
212 ns_offset_raw, ns_offset_raw,
213 ns_offset_desired, ns_offset_desired,
214 *((u32*)((void*)nvstate + ns_offset_desired)));
215 }
216 else
217 {
218 TRACE("INVALID LINUXNVSTATE?????\n");
219 }
220
221#if 0
222 if(pci)
223 {
224 TRACE("PCI DEV Info:\n"
225 "pci device ptr: %p\n"
226 "\tdevfn = %d\n"
227 "\tvendor = %d\n"
228 "\tdevice = %d\n"
229 "\tsubsystem_vendor = %d\n"
230 "\tsubsystem_device = %d\n"
231 "\tslot # = %d\n",
232 pci,
233 pci->devfn,
234 pci->vendor,
235 pci->device,
236 pci->subsystem_vendor,
237 pci->subsystem_device,
238 pci->slot->number);
239 }
240 else
241 {
242 TRACE("INVALID PCIDEV PTR?????\n");
243 }
244#endif
245}
246
247static struct module* nvidia_mod = NULL;
248int init_nvidia_info(void)
249{
250 mutex_lock(&module_mutex);
251 nvidia_mod = find_module("nvidia");
252 mutex_unlock(&module_mutex);
253 if(nvidia_mod != NULL)
254 {
255 TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
256 (void*)(nvidia_mod->module_core),
257 (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
258 init_nv_device_reg();
259 return(0);
260 }
261 else
262 {
263 TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
264 return(-1);
265 }
266}
267
268void shutdown_nvidia_info(void)
269{
270 nvidia_mod = NULL;
271 mb();
272}
273
274/* works with pointers to static data inside the module too. */
275int is_nvidia_func(void* func_addr)
276{
277 int ret = 0;
278 if(nvidia_mod)
279 {
280 ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
281 /*
282 if(ret)
283 {
284 TRACE("%s : %p is in NVIDIA module: %d\n",
285 __FUNCTION__, func_addr, ret);
286 }*/
287 }
288
289 return(ret);
290}
291
292u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
293{
294 // life is too short to use hard-coded offsets. update this later.
295 litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
296 litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
297
298 BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
299
300 return(linuxstate->device_num);
301
302 //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
303
304#if 0
305 // offset determined though observed behavior of the NV driver.
306 //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1
307 //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2
308
309 void* state = (void*)(t->data);
310 void* device_num_ptr = state + DEVICE_NUM_OFFSET;
311
312 //dump_nvidia_info(t);
313 return(*((u32*)device_num_ptr));
314#endif
315}
316
317u32 get_work_nv_device_num(const struct work_struct *t)
318{
319 // offset determined though observed behavior of the NV driver.
320 const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
321 void* state = (void*)(t);
322 void** device_num_ptr = state + DEVICE_NUM_OFFSET;
323 return(*((u32*)(*device_num_ptr)));
324}
325
326
327typedef struct {
328 raw_spinlock_t lock;
329 int nr_owners;
330 struct task_struct* max_prio_owner;
331 struct task_struct* owners[NV_MAX_SIMULT_USERS];
332}nv_device_registry_t;
333
334static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
335
336int init_nv_device_reg(void)
337{
338 int i;
339
340 memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
341
342 for(i = 0; i < NV_DEVICE_NUM; ++i)
343 {
344 raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
345 }
346
347 return(1);
348}
349
350/* use to get nv_device_id by given owner.
351 (if return -1, can't get the assocaite device id)*/
352/*
353int get_nv_device_id(struct task_struct* owner)
354{
355 int i;
356 if(!owner)
357 {
358 return(-1);
359 }
360 for(i = 0; i < NV_DEVICE_NUM; ++i)
361 {
362 if(NV_DEVICE_REG[i].device_owner == owner)
363 return(i);
364 }
365 return(-1);
366}
367*/
368
369static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) {
370 int i;
371 struct task_struct *found = NULL;
372 for(i = 0; i < reg->nr_owners; ++i) {
373 if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
374 found = reg->owners[i];
375 }
376 }
377 return found;
378}
379
380#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
381void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
382{
383 unsigned long flags;
384 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
385
386 if(reg->max_prio_owner != t) {
387
388 raw_spin_lock_irqsave(&reg->lock, flags);
389
390 if(reg->max_prio_owner != t) {
391 if(litmus->compare(t, reg->max_prio_owner)) {
392 litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
393 reg->max_prio_owner = t;
394 }
395 }
396
397 raw_spin_unlock_irqrestore(&reg->lock, flags);
398 }
399}
400
401
402void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
403{
404 unsigned long flags;
405 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
406
407 if(reg->max_prio_owner == t) {
408
409 raw_spin_lock_irqsave(&reg->lock, flags);
410
411 if(reg->max_prio_owner == t) {
412 reg->max_prio_owner = find_hp_owner(reg, NULL);
413 if(reg->max_prio_owner != t) {
414 litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
415 }
416 }
417
418 raw_spin_unlock_irqrestore(&reg->lock, flags);
419 }
420}
421#endif
422
423static int __reg_nv_device(int reg_device_id, struct task_struct *t)
424{
425 int ret = 0;
426 int i;
427 struct task_struct *old_max = NULL;
428 unsigned long flags;
429 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
430
431 if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) {
432 // TODO: check if taks is already registered.
433 return ret; // assume already registered.
434 }
435
436
437 raw_spin_lock_irqsave(&reg->lock, flags);
438
439 if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
440 TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
441 for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
442 if(reg->owners[i] == NULL) {
443 reg->owners[i] = t;
444
445 //if(edf_higher_prio(t, reg->max_prio_owner)) {
446 if(litmus->compare(t, reg->max_prio_owner)) {
447 old_max = reg->max_prio_owner;
448 reg->max_prio_owner = t;
449
450#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
451 litmus->change_prio_pai_tasklet(old_max, t);
452#endif
453 }
454
455#ifdef CONFIG_LITMUS_SOFTIRQD
456 down_and_set_stat(t, HELD, &tsk_rt(t)->klitirqd_sem);
457#endif
458 ++(reg->nr_owners);
459
460 break;
461 }
462 }
463 }
464 else
465 {
466 TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
467 //ret = -EBUSY;
468 }
469
470 raw_spin_unlock_irqrestore(&reg->lock, flags);
471
472 __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
473
474 return(ret);
475}
476
477static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
478{
479 int ret = 0;
480 int i;
481 unsigned long flags;
482 nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
483
484#ifdef CONFIG_LITMUS_SOFTIRQD
485 struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
486#endif
487
488 if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
489 return ret;
490 }
491
492 raw_spin_lock_irqsave(&reg->lock, flags);
493
494 TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
495
496 for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
497 if(reg->owners[i] == t) {
498#ifdef CONFIG_LITMUS_SOFTIRQD
499 flush_pending(klitirqd_th, t);
500#endif
501 if(reg->max_prio_owner == t) {
502 reg->max_prio_owner = find_hp_owner(reg, t);
503#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
504 litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
505#endif
506 }
507
508#ifdef CONFIG_LITMUS_SOFTIRQD
509 up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klitirqd_sem);
510#endif
511
512 reg->owners[i] = NULL;
513 --(reg->nr_owners);
514
515 break;
516 }
517 }
518
519 raw_spin_unlock_irqrestore(&reg->lock, flags);
520
521 __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
522
523 return(ret);
524}
525
526
527int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
528{
529 int ret;
530
531 if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
532 {
533 if(reg_action)
534 ret = __reg_nv_device(reg_device_id, t);
535 else
536 ret = __clear_reg_nv_device(reg_device_id, t);
537 }
538 else
539 {
540 ret = -ENODEV;
541 }
542
543 return(ret);
544}
545
546/* use to get the owner of nv_device_id. */
547struct task_struct* get_nv_max_device_owner(u32 target_device_id)
548{
549 struct task_struct *owner = NULL;
550 BUG_ON(target_device_id >= NV_DEVICE_NUM);
551 owner = NV_DEVICE_REG[target_device_id].max_prio_owner;
552 return(owner);
553}
554
555void lock_nv_registry(u32 target_device_id, unsigned long* flags)
556{
557 BUG_ON(target_device_id >= NV_DEVICE_NUM);
558
559 if(in_interrupt())
560 TRACE("Locking registry for %d.\n", target_device_id);
561 else
562 TRACE_CUR("Locking registry for %d.\n", target_device_id);
563
564 raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
565}
566
567void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
568{
569 BUG_ON(target_device_id >= NV_DEVICE_NUM);
570
571 if(in_interrupt())
572 TRACE("Unlocking registry for %d.\n", target_device_id);
573 else
574 TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
575
576 raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
577}
578
579
580//void increment_nv_int_count(u32 device)
581//{
582// unsigned long flags;
583// struct task_struct* owner;
584//
585// lock_nv_registry(device, &flags);
586//
587// owner = NV_DEVICE_REG[device].device_owner;
588// if(owner)
589// {
590// atomic_inc(&tsk_rt(owner)->nv_int_count);
591// }
592//
593// unlock_nv_registry(device, &flags);
594//}
595//EXPORT_SYMBOL(increment_nv_int_count);
596
597
diff --git a/litmus/preempt.c b/litmus/preempt.c
index 5704d0bf4c0b..a2cae3648e15 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -26,10 +26,12 @@ void sched_state_will_schedule(struct task_struct* tsk)
26 set_sched_state(PICKED_WRONG_TASK); 26 set_sched_state(PICKED_WRONG_TASK);
27 else 27 else
28 set_sched_state(WILL_SCHEDULE); 28 set_sched_state(WILL_SCHEDULE);
29 } else 29 } else {
30 /* Litmus tasks should never be subject to a remote 30 /* Litmus tasks should never be subject to a remote
31 * set_tsk_need_resched(). */ 31 * set_tsk_need_resched(). */
32 BUG_ON(is_realtime(tsk)); 32 //BUG_ON(is_realtime(tsk));
33 }
34
33#ifdef CONFIG_PREEMPT_STATE_TRACE 35#ifdef CONFIG_PREEMPT_STATE_TRACE
34 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", 36 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
35 __builtin_return_address(0)); 37 __builtin_return_address(0));
@@ -45,13 +47,17 @@ void sched_state_ipi(void)
45 /* Cause scheduler to be invoked. 47 /* Cause scheduler to be invoked.
46 * This will cause a transition to WILL_SCHEDULE. */ 48 * This will cause a transition to WILL_SCHEDULE. */
47 set_tsk_need_resched(current); 49 set_tsk_need_resched(current);
50 /*
48 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", 51 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
49 current->comm, current->pid); 52 current->comm, current->pid);
53 */
50 } else { 54 } else {
51 /* ignore */ 55 /* ignore */
56 /*
52 TRACE_STATE("ignoring IPI in state %x (%s)\n", 57 TRACE_STATE("ignoring IPI in state %x (%s)\n",
53 get_sched_state(), 58 get_sched_state(),
54 sched_state_name(get_sched_state())); 59 sched_state_name(get_sched_state()));
60 */
55 } 61 }
56} 62}
57 63
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
new file mode 100644
index 000000000000..75ed87c5ed48
--- /dev/null
+++ b/litmus/rsm_lock.c
@@ -0,0 +1,796 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/rsm_lock.h>
7
8//#include <litmus/edf_common.h>
9
10#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11#include <litmus/gpu_affinity.h>
12#endif
13
14
15/* caller is responsible for locking */
16static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex,
17 struct task_struct* skip)
18{
19 wait_queue_t *q;
20 struct list_head *pos;
21 struct task_struct *queued = NULL, *found = NULL;
22
23#ifdef CONFIG_LITMUS_DGL_SUPPORT
24 dgl_wait_state_t *dgl_wait = NULL;
25#endif
26
27 list_for_each(pos, &mutex->wait.task_list) {
28 q = list_entry(pos, wait_queue_t, task_list);
29
30#ifdef CONFIG_LITMUS_DGL_SUPPORT
31 if(q->func == dgl_wake_up) {
32 dgl_wait = (dgl_wait_state_t*) q->private;
33 if(tsk_rt(dgl_wait->task)->blocked_lock == &mutex->litmus_lock) {
34 queued = dgl_wait->task;
35 }
36 else {
37 queued = NULL; // skip it.
38 }
39 }
40 else {
41 queued = (struct task_struct*) q->private;
42 }
43#else
44 queued = (struct task_struct*) q->private;
45#endif
46
47 /* Compare task prios, find high prio task. */
48 //if (queued && queued != skip && edf_higher_prio(queued, found)) {
49 if (queued && queued != skip && litmus->compare(queued, found)) {
50 found = queued;
51 }
52 }
53 return found;
54}
55
56
57#ifdef CONFIG_LITMUS_DGL_SUPPORT
58
59int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t)
60{
61 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
62 return(mutex->owner == t);
63}
64
65// return 1 if resource was immediatly acquired.
66// Assumes mutex->lock is held.
67// Must set task state to TASK_UNINTERRUPTIBLE if task blocks.
68int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait,
69 wait_queue_t* wq_node)
70{
71 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
72 struct task_struct *t = dgl_wait->task;
73
74 int acquired_immediatly = 0;
75
76 BUG_ON(t != current);
77
78 if (mutex->owner) {
79 TRACE_TASK(t, "Enqueuing on lock %d.\n", l->ident);
80
81 init_dgl_waitqueue_entry(wq_node, dgl_wait);
82
83 set_task_state(t, TASK_UNINTERRUPTIBLE);
84 __add_wait_queue_tail_exclusive(&mutex->wait, wq_node);
85 } else {
86 TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
87
88 /* it's ours now */
89 mutex->owner = t;
90
91 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
92 binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
93 struct nested_info, hp_binheap_node);
94 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
95
96 acquired_immediatly = 1;
97 }
98
99 return acquired_immediatly;
100}
101
102void rsm_mutex_enable_priority(struct litmus_lock *l,
103 dgl_wait_state_t* dgl_wait)
104{
105 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
106 struct task_struct *t = dgl_wait->task;
107 struct task_struct *owner = mutex->owner;
108 unsigned long flags = 0; // these are unused under DGL coarse-grain locking
109
110 BUG_ON(owner == t);
111
112 tsk_rt(t)->blocked_lock = l;
113 mb();
114
115 //if (edf_higher_prio(t, mutex->hp_waiter)) {
116 if (litmus->compare(t, mutex->hp_waiter)) {
117
118 struct task_struct *old_max_eff_prio;
119 struct task_struct *new_max_eff_prio;
120 struct task_struct *new_prio = NULL;
121
122 if(mutex->hp_waiter)
123 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
124 mutex->hp_waiter->comm, mutex->hp_waiter->pid);
125 else
126 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
127
128 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
129
130 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
131 mutex->hp_waiter = t;
132 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
133 binheap_decrease(&l->nest.hp_binheap_node,
134 &tsk_rt(owner)->hp_blocked_tasks);
135 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
136
137 if(new_max_eff_prio != old_max_eff_prio) {
138 TRACE_TASK(t, "is new hp_waiter.\n");
139
140 if ((effective_priority(owner) == old_max_eff_prio) ||
141 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
142 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
143 new_prio = new_max_eff_prio;
144 }
145 }
146 else {
147 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
148 }
149
150 if(new_prio) {
151 litmus->nested_increase_prio(owner, new_prio,
152 &mutex->lock, flags); // unlocks lock.
153 }
154 else {
155 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
156 unlock_fine_irqrestore(&mutex->lock, flags);
157 }
158 }
159 else {
160 TRACE_TASK(t, "no change in hp_waiter.\n");
161 unlock_fine_irqrestore(&mutex->lock, flags);
162 }
163}
164
165static void select_next_lock_if_primary(struct litmus_lock *l,
166 dgl_wait_state_t *dgl_wait)
167{
168 if(tsk_rt(dgl_wait->task)->blocked_lock == l) {
169 TRACE_CUR("Lock %d in DGL was primary for %s/%d.\n",
170 l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
171 tsk_rt(dgl_wait->task)->blocked_lock = NULL;
172 mb();
173 select_next_lock(dgl_wait /*, l*/); // pick the next lock to be blocked on
174 }
175 else {
176 TRACE_CUR("Got lock early! Lock %d in DGL was NOT primary for %s/%d.\n",
177 l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
178 }
179}
180#endif
181
182
183
184
185int rsm_mutex_lock(struct litmus_lock* l)
186{
187 struct task_struct *t = current;
188 struct task_struct *owner;
189 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
190 wait_queue_t wait;
191 unsigned long flags;
192
193#ifdef CONFIG_LITMUS_DGL_SUPPORT
194 raw_spinlock_t *dgl_lock;
195#endif
196
197 if (!is_realtime(t))
198 return -EPERM;
199
200#ifdef CONFIG_LITMUS_DGL_SUPPORT
201 dgl_lock = litmus->get_dgl_spinlock(t);
202#endif
203
204 lock_global_irqsave(dgl_lock, flags);
205 lock_fine_irqsave(&mutex->lock, flags);
206
207 if (mutex->owner) {
208 TRACE_TASK(t, "Blocking on lock %d.\n", l->ident);
209
210#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
211 // KLUDGE: don't count this suspension as time in the critical gpu
212 // critical section
213 if(tsk_rt(t)->held_gpus) {
214 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
215 }
216#endif
217
218 /* resource is not free => must suspend and wait */
219
220 owner = mutex->owner;
221
222 init_waitqueue_entry(&wait, t);
223
224 tsk_rt(t)->blocked_lock = l; /* record where we are blocked */
225 mb(); // needed?
226
227 /* FIXME: interruptible would be nice some day */
228 set_task_state(t, TASK_UNINTERRUPTIBLE);
229
230 __add_wait_queue_tail_exclusive(&mutex->wait, &wait);
231
232 /* check if we need to activate priority inheritance */
233 //if (edf_higher_prio(t, mutex->hp_waiter)) {
234 if (litmus->compare(t, mutex->hp_waiter)) {
235
236 struct task_struct *old_max_eff_prio;
237 struct task_struct *new_max_eff_prio;
238 struct task_struct *new_prio = NULL;
239
240 if(mutex->hp_waiter)
241 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
242 mutex->hp_waiter->comm, mutex->hp_waiter->pid);
243 else
244 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
245
246 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
247
248 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
249 mutex->hp_waiter = t;
250 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
251 binheap_decrease(&l->nest.hp_binheap_node,
252 &tsk_rt(owner)->hp_blocked_tasks);
253 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
254
255 if(new_max_eff_prio != old_max_eff_prio) {
256 TRACE_TASK(t, "is new hp_waiter.\n");
257
258 if ((effective_priority(owner) == old_max_eff_prio) ||
259 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
260 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
261 new_prio = new_max_eff_prio;
262 }
263 }
264 else {
265 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
266 }
267
268 if(new_prio) {
269 litmus->nested_increase_prio(owner, new_prio, &mutex->lock,
270 flags); // unlocks lock.
271 }
272 else {
273 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
274 unlock_fine_irqrestore(&mutex->lock, flags);
275 }
276 }
277 else {
278 TRACE_TASK(t, "no change in hp_waiter.\n");
279
280 unlock_fine_irqrestore(&mutex->lock, flags);
281 }
282
283 unlock_global_irqrestore(dgl_lock, flags);
284
285 TS_LOCK_SUSPEND;
286
287 /* We depend on the FIFO order. Thus, we don't need to recheck
288 * when we wake up; we are guaranteed to have the lock since
289 * there is only one wake up per release.
290 */
291
292 schedule();
293
294 TS_LOCK_RESUME;
295
296 /* Since we hold the lock, no other task will change
297 * ->owner. We can thus check it without acquiring the spin
298 * lock. */
299 BUG_ON(mutex->owner != t);
300
301 TRACE_TASK(t, "Acquired lock %d.\n", l->ident);
302
303 } else {
304 TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
305
306 /* it's ours now */
307 mutex->owner = t;
308
309 raw_spin_lock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
310 binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
311 struct nested_info, hp_binheap_node);
312 raw_spin_unlock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
313
314
315 unlock_fine_irqrestore(&mutex->lock, flags);
316 unlock_global_irqrestore(dgl_lock, flags);
317 }
318
319 return 0;
320}
321
322
323
324int rsm_mutex_unlock(struct litmus_lock* l)
325{
326 struct task_struct *t = current, *next = NULL;
327 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
328 unsigned long flags;
329
330 struct task_struct *old_max_eff_prio;
331
332 int wake_up_task = 1;
333
334#ifdef CONFIG_LITMUS_DGL_SUPPORT
335 dgl_wait_state_t *dgl_wait = NULL;
336 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
337#endif
338
339 int err = 0;
340
341 if (mutex->owner != t) {
342 err = -EINVAL;
343 return err;
344 }
345
346 lock_global_irqsave(dgl_lock, flags);
347 lock_fine_irqsave(&mutex->lock, flags);
348
349 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
350
351 TRACE_TASK(t, "Freeing lock %d\n", l->ident);
352
353 old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
354 binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks);
355
356 if(tsk_rt(t)->inh_task){
357 struct task_struct *new_max_eff_prio =
358 top_priority(&tsk_rt(t)->hp_blocked_tasks);
359
360 if((new_max_eff_prio == NULL) ||
361 /* there was a change in eff prio */
362 ( (new_max_eff_prio != old_max_eff_prio) &&
363 /* and owner had the old eff prio */
364 (effective_priority(t) == old_max_eff_prio)) )
365 {
366 // old_max_eff_prio > new_max_eff_prio
367
368 //if(__edf_higher_prio(new_max_eff_prio, BASE, t, EFFECTIVE)) {
369 if(litmus->__compare(new_max_eff_prio, BASE, t, EFFECTIVE)) {
370 TRACE_TASK(t, "new_max_eff_prio > task's eff_prio-- new_max_eff_prio: %s/%d task: %s/%d [%s/%d]\n",
371 new_max_eff_prio->comm, new_max_eff_prio->pid,
372 t->comm, t->pid, tsk_rt(t)->inh_task->comm,
373 tsk_rt(t)->inh_task->pid);
374 WARN_ON(1);
375 }
376
377 litmus->decrease_prio(t, new_max_eff_prio);
378 }
379 }
380
381 if(binheap_empty(&tsk_rt(t)->hp_blocked_tasks) &&
382 tsk_rt(t)->inh_task != NULL)
383 {
384 WARN_ON(tsk_rt(t)->inh_task != NULL);
385 TRACE_TASK(t, "No more locks are held, but eff_prio = %s/%d\n",
386 tsk_rt(t)->inh_task->comm, tsk_rt(t)->inh_task->pid);
387 }
388
389 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
390
391
392 /* check if there are jobs waiting for this resource */
393#ifdef CONFIG_LITMUS_DGL_SUPPORT
394 __waitqueue_dgl_remove_first(&mutex->wait, &dgl_wait, &next);
395 if(dgl_wait) {
396 next = dgl_wait->task;
397 //select_next_lock_if_primary(l, dgl_wait);
398 }
399#else
400 next = __waitqueue_remove_first(&mutex->wait);
401#endif
402 if (next) {
403 /* next becomes the resouce holder */
404 mutex->owner = next;
405 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
406
407 /* determine new hp_waiter if necessary */
408 if (next == mutex->hp_waiter) {
409
410 TRACE_TASK(next, "was highest-prio waiter\n");
411 /* next has the highest priority --- it doesn't need to
412 * inherit. However, we need to make sure that the
413 * next-highest priority in the queue is reflected in
414 * hp_waiter. */
415 mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, next);
416 l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
417 effective_priority(mutex->hp_waiter) :
418 NULL;
419
420 if (mutex->hp_waiter)
421 TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n");
422 else
423 TRACE("no further waiters\n");
424
425 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
426
427 binheap_add(&l->nest.hp_binheap_node,
428 &tsk_rt(next)->hp_blocked_tasks,
429 struct nested_info, hp_binheap_node);
430
431#ifdef CONFIG_LITMUS_DGL_SUPPORT
432 if(dgl_wait) {
433 select_next_lock_if_primary(l, dgl_wait);
434 //wake_up_task = atomic_dec_and_test(&dgl_wait->nr_remaining);
435 --(dgl_wait->nr_remaining);
436 wake_up_task = (dgl_wait->nr_remaining == 0);
437 }
438#endif
439 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
440 }
441 else {
442 /* Well, if 'next' is not the highest-priority waiter,
443 * then it (probably) ought to inherit the highest-priority
444 * waiter's priority. */
445 TRACE_TASK(next, "is not hp_waiter of lock %d.\n", l->ident);
446
447 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
448
449 binheap_add(&l->nest.hp_binheap_node,
450 &tsk_rt(next)->hp_blocked_tasks,
451 struct nested_info, hp_binheap_node);
452
453#ifdef CONFIG_LITMUS_DGL_SUPPORT
454 if(dgl_wait) {
455 select_next_lock_if_primary(l, dgl_wait);
456 --(dgl_wait->nr_remaining);
457 wake_up_task = (dgl_wait->nr_remaining == 0);
458 }
459#endif
460
461 /* It is possible that 'next' *should* be the hp_waiter, but isn't
462 * because that update hasn't yet executed (update operation is
463 * probably blocked on mutex->lock). So only inherit if the top of
464 * 'next's top heap node is indeed the effective prio. of hp_waiter.
465 * (We use l->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
466 * since the effective priority of hp_waiter can change (and the
467 * update has not made it to this lock).)
468 */
469#ifdef CONFIG_LITMUS_DGL_SUPPORT
470 if((l->nest.hp_waiter_eff_prio != NULL) &&
471 (top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
472 l->nest.hp_waiter_eff_prio))
473 {
474 if(dgl_wait && tsk_rt(next)->blocked_lock) {
475 BUG_ON(wake_up_task);
476 //if(__edf_higher_prio(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
477 if(litmus->__compare(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
478 litmus->nested_increase_prio(next,
479 l->nest.hp_waiter_eff_prio, &mutex->lock, flags); // unlocks lock && hp_blocked_tasks_lock.
480 goto out; // all spinlocks are released. bail out now.
481 }
482 }
483 else {
484 litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
485 }
486 }
487
488 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
489#else
490 if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
491 l->nest.hp_waiter_eff_prio))
492 {
493 litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
494 }
495 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
496#endif
497 }
498
499 if(wake_up_task) {
500 TRACE_TASK(next, "waking up since it is no longer blocked.\n");
501
502 tsk_rt(next)->blocked_lock = NULL;
503 mb();
504
505#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
506 // re-enable tracking
507 if(tsk_rt(next)->held_gpus) {
508 tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
509 }
510#endif
511
512 wake_up_process(next);
513 }
514 else {
515 TRACE_TASK(next, "is still blocked.\n");
516 }
517 }
518 else {
519 /* becomes available */
520 mutex->owner = NULL;
521 }
522
523 unlock_fine_irqrestore(&mutex->lock, flags);
524
525#ifdef CONFIG_LITMUS_DGL_SUPPORT
526out:
527#endif
528 unlock_global_irqrestore(dgl_lock, flags);
529
530 return err;
531}
532
533
534void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
535 struct task_struct* t,
536 raw_spinlock_t* to_unlock,
537 unsigned long irqflags)
538{
539 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
540
541 // relay-style locking
542 lock_fine(&mutex->lock);
543 unlock_fine(to_unlock);
544
545 if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked
546 struct task_struct *owner = mutex->owner;
547
548 struct task_struct *old_max_eff_prio;
549 struct task_struct *new_max_eff_prio;
550
551 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
552
553 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
554
555 //if((t != mutex->hp_waiter) && edf_higher_prio(t, mutex->hp_waiter)) {
556 if((t != mutex->hp_waiter) && litmus->compare(t, mutex->hp_waiter)) {
557 TRACE_TASK(t, "is new highest-prio waiter by propagation.\n");
558 mutex->hp_waiter = t;
559 }
560 if(t == mutex->hp_waiter) {
561 // reflect the decreased priority in the heap node.
562 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
563
564 BUG_ON(!binheap_is_in_heap(&l->nest.hp_binheap_node));
565 BUG_ON(!binheap_is_in_this_heap(&l->nest.hp_binheap_node,
566 &tsk_rt(owner)->hp_blocked_tasks));
567
568 binheap_decrease(&l->nest.hp_binheap_node,
569 &tsk_rt(owner)->hp_blocked_tasks);
570 }
571
572 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
573
574
575 if(new_max_eff_prio != old_max_eff_prio) {
576 // new_max_eff_prio > old_max_eff_prio holds.
577 if ((effective_priority(owner) == old_max_eff_prio) ||
578 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
579 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
580 TRACE_CUR("Propagating inheritance to holder of lock %d.\n",
581 l->ident);
582
583 // beware: recursion
584 litmus->nested_increase_prio(owner, new_max_eff_prio,
585 &mutex->lock, irqflags); // unlocks mutex->lock
586 }
587 else {
588 TRACE_CUR("Lower priority than holder %s/%d. No propagation.\n",
589 owner->comm, owner->pid);
590 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
591 unlock_fine_irqrestore(&mutex->lock, irqflags);
592 }
593 }
594 else {
595 TRACE_TASK(mutex->owner, "No change in maxiumum effective priority.\n");
596 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
597 unlock_fine_irqrestore(&mutex->lock, irqflags);
598 }
599 }
600 else {
601 struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
602
603 TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
604 if(still_blocked) {
605 TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
606 still_blocked->ident);
607 if(still_blocked->ops->propagate_increase_inheritance) {
608 /* due to relay-style nesting of spinlocks (acq. A, acq. B, free A, free B)
609 we know that task 't' has not released any locks behind us in this
610 chain. Propagation just needs to catch up with task 't'. */
611 still_blocked->ops->propagate_increase_inheritance(still_blocked,
612 t,
613 &mutex->lock,
614 irqflags);
615 }
616 else {
617 TRACE_TASK(t,
618 "Inheritor is blocked on lock (%p) that does not "
619 "support nesting!\n",
620 still_blocked);
621 unlock_fine_irqrestore(&mutex->lock, irqflags);
622 }
623 }
624 else {
625 unlock_fine_irqrestore(&mutex->lock, irqflags);
626 }
627 }
628}
629
630
631void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
632 struct task_struct* t,
633 raw_spinlock_t* to_unlock,
634 unsigned long irqflags)
635{
636 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
637
638 // relay-style locking
639 lock_fine(&mutex->lock);
640 unlock_fine(to_unlock);
641
642 if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked
643 if(t == mutex->hp_waiter) {
644 struct task_struct *owner = mutex->owner;
645
646 struct task_struct *old_max_eff_prio;
647 struct task_struct *new_max_eff_prio;
648
649 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
650
651 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
652
653 binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
654 mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, NULL);
655 l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
656 effective_priority(mutex->hp_waiter) : NULL;
657 binheap_add(&l->nest.hp_binheap_node,
658 &tsk_rt(owner)->hp_blocked_tasks,
659 struct nested_info, hp_binheap_node);
660
661 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
662
663 if((old_max_eff_prio != new_max_eff_prio) &&
664 (effective_priority(owner) == old_max_eff_prio))
665 {
666 // Need to set new effective_priority for owner
667
668 struct task_struct *decreased_prio;
669
670 TRACE_CUR("Propagating decreased inheritance to holder of lock %d.\n",
671 l->ident);
672
673 //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
674 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
675 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of lock %d.\n",
676 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
677 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
678 owner->comm,
679 owner->pid,
680 l->ident);
681
682 decreased_prio = new_max_eff_prio;
683 }
684 else {
685 TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of lock %d.\n",
686 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
687 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
688 owner->comm,
689 owner->pid,
690 l->ident);
691
692 decreased_prio = NULL;
693 }
694
695 // beware: recursion
696 litmus->nested_decrease_prio(owner, decreased_prio, &mutex->lock, irqflags); // will unlock mutex->lock
697 }
698 else {
699 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
700 unlock_fine_irqrestore(&mutex->lock, irqflags);
701 }
702 }
703 else {
704 TRACE_TASK(t, "is not hp_waiter. No propagation.\n");
705 unlock_fine_irqrestore(&mutex->lock, irqflags);
706 }
707 }
708 else {
709 struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
710
711 TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
712 if(still_blocked) {
713 TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
714 still_blocked->ident);
715 if(still_blocked->ops->propagate_decrease_inheritance) {
716 /* due to linked nesting of spinlocks (acq. A, acq. B, free A, free B)
717 we know that task 't' has not released any locks behind us in this
718 chain. propagation just needs to catch up with task 't' */
719 still_blocked->ops->propagate_decrease_inheritance(still_blocked,
720 t,
721 &mutex->lock,
722 irqflags);
723 }
724 else {
725 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
726 still_blocked);
727 unlock_fine_irqrestore(&mutex->lock, irqflags);
728 }
729 }
730 else {
731 unlock_fine_irqrestore(&mutex->lock, irqflags);
732 }
733 }
734}
735
736
737int rsm_mutex_close(struct litmus_lock* l)
738{
739 struct task_struct *t = current;
740 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
741 unsigned long flags;
742
743 int owner;
744
745#ifdef CONFIG_LITMUS_DGL_SUPPORT
746 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
747#endif
748
749 lock_global_irqsave(dgl_lock, flags);
750 lock_fine_irqsave(&mutex->lock, flags);
751
752 owner = (mutex->owner == t);
753
754 unlock_fine_irqrestore(&mutex->lock, flags);
755 unlock_global_irqrestore(dgl_lock, flags);
756
757 if (owner)
758 rsm_mutex_unlock(l);
759
760 return 0;
761}
762
763void rsm_mutex_free(struct litmus_lock* lock)
764{
765 kfree(rsm_mutex_from_lock(lock));
766}
767
768struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops* ops)
769{
770 struct rsm_mutex* mutex;
771
772 mutex = kmalloc(sizeof(*mutex), GFP_KERNEL);
773 if (!mutex)
774 return NULL;
775
776 mutex->litmus_lock.ops = ops;
777 mutex->owner = NULL;
778 mutex->hp_waiter = NULL;
779 init_waitqueue_head(&mutex->wait);
780
781
782#ifdef CONFIG_DEBUG_SPINLOCK
783 {
784 __raw_spin_lock_init(&mutex->lock,
785 ((struct litmus_lock*)mutex)->cheat_lockdep,
786 &((struct litmus_lock*)mutex)->key);
787 }
788#else
789 raw_spin_lock_init(&mutex->lock);
790#endif
791
792 ((struct litmus_lock*)mutex)->nest.hp_waiter_ptr = &mutex->hp_waiter;
793
794 return &mutex->litmus_lock;
795}
796
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index b0c16e34d2c5..d98de4579394 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -29,7 +29,7 @@
29#include <linux/percpu.h> 29#include <linux/percpu.h>
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32 32#include <linux/uaccess.h>
33#include <linux/module.h> 33#include <linux/module.h>
34 34
35#include <litmus/litmus.h> 35#include <litmus/litmus.h>
@@ -43,6 +43,16 @@
43#include <litmus/clustered.h> 43#include <litmus/clustered.h>
44 44
45#include <litmus/bheap.h> 45#include <litmus/bheap.h>
46#include <litmus/binheap.h>
47
48#ifdef CONFIG_LITMUS_LOCKING
49#include <litmus/kfmlp_lock.h>
50#endif
51
52#ifdef CONFIG_LITMUS_NESTED_LOCKING
53#include <litmus/rsm_lock.h>
54#include <litmus/ikglp_lock.h>
55#endif
46 56
47#ifdef CONFIG_SCHED_CPU_AFFINITY 57#ifdef CONFIG_SCHED_CPU_AFFINITY
48#include <litmus/affinity.h> 58#include <litmus/affinity.h>
@@ -50,7 +60,27 @@
50 60
51/* to configure the cluster size */ 61/* to configure the cluster size */
52#include <litmus/litmus_proc.h> 62#include <litmus/litmus_proc.h>
53#include <linux/uaccess.h> 63
64#ifdef CONFIG_SCHED_CPU_AFFINITY
65#include <litmus/affinity.h>
66#endif
67
68#ifdef CONFIG_LITMUS_SOFTIRQD
69#include <litmus/litmus_softirq.h>
70#endif
71
72#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
73#include <linux/interrupt.h>
74#include <litmus/trace.h>
75#endif
76
77#ifdef CONFIG_LITMUS_NVIDIA
78#include <litmus/nvidia_info.h>
79#endif
80
81#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
82#include <litmus/gpu_affinity.h>
83#endif
54 84
55/* Reference configuration variable. Determines which cache level is used to 85/* Reference configuration variable. Determines which cache level is used to
56 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that 86 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
@@ -71,7 +101,7 @@ typedef struct {
71 struct task_struct* linked; /* only RT tasks */ 101 struct task_struct* linked; /* only RT tasks */
72 struct task_struct* scheduled; /* only RT tasks */ 102 struct task_struct* scheduled; /* only RT tasks */
73 atomic_t will_schedule; /* prevent unneeded IPIs */ 103 atomic_t will_schedule; /* prevent unneeded IPIs */
74 struct bheap_node* hn; 104 struct binheap_node hn;
75} cpu_entry_t; 105} cpu_entry_t;
76 106
77/* one cpu_entry_t per CPU */ 107/* one cpu_entry_t per CPU */
@@ -84,6 +114,14 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
84#define test_will_schedule(cpu) \ 114#define test_will_schedule(cpu) \
85 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) 115 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
86 116
117#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
118struct tasklet_head
119{
120 struct tasklet_struct *head;
121 struct tasklet_struct **tail;
122};
123#endif
124
87/* 125/*
88 * In C-EDF there is a cedf domain _per_ cluster 126 * In C-EDF there is a cedf domain _per_ cluster
89 * The number of clusters is dynamically determined accordingly to the 127 * The number of clusters is dynamically determined accordingly to the
@@ -97,10 +135,17 @@ typedef struct clusterdomain {
97 /* map of this cluster cpus */ 135 /* map of this cluster cpus */
98 cpumask_var_t cpu_map; 136 cpumask_var_t cpu_map;
99 /* the cpus queue themselves according to priority in here */ 137 /* the cpus queue themselves according to priority in here */
100 struct bheap_node *heap_node; 138 struct binheap cpu_heap;
101 struct bheap cpu_heap;
102 /* lock for this cluster */ 139 /* lock for this cluster */
103#define cluster_lock domain.ready_lock 140#define cluster_lock domain.ready_lock
141
142#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
143 struct tasklet_head pending_tasklets;
144#endif
145
146#ifdef CONFIG_LITMUS_DGL_SUPPORT
147 raw_spinlock_t dgl_lock;
148#endif
104} cedf_domain_t; 149} cedf_domain_t;
105 150
106/* a cedf_domain per cluster; allocation is done at init/activation time */ 151/* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -109,6 +154,22 @@ cedf_domain_t *cedf;
109#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) 154#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
110#define task_cpu_cluster(task) remote_cluster(get_partition(task)) 155#define task_cpu_cluster(task) remote_cluster(get_partition(task))
111 156
157/* total number of cluster */
158static int num_clusters;
159/* we do not support cluster of different sizes */
160static unsigned int cluster_size;
161
162static int clusters_allocated = 0;
163
164#ifdef CONFIG_LITMUS_DGL_SUPPORT
165static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t)
166{
167 cedf_domain_t *cluster = task_cpu_cluster(t);
168 return(&cluster->dgl_lock);
169}
170#endif
171
172
112/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling 173/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
113 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose 174 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
114 * information during the initialization of the plugin (e.g., topology) 175 * information during the initialization of the plugin (e.g., topology)
@@ -116,11 +177,11 @@ cedf_domain_t *cedf;
116 */ 177 */
117#define VERBOSE_INIT 178#define VERBOSE_INIT
118 179
119static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) 180static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
120{ 181{
121 cpu_entry_t *a, *b; 182 cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
122 a = _a->value; 183 cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
123 b = _b->value; 184
124 /* Note that a and b are inverted: we want the lowest-priority CPU at 185 /* Note that a and b are inverted: we want the lowest-priority CPU at
125 * the top of the heap. 186 * the top of the heap.
126 */ 187 */
@@ -134,20 +195,17 @@ static void update_cpu_position(cpu_entry_t *entry)
134{ 195{
135 cedf_domain_t *cluster = entry->cluster; 196 cedf_domain_t *cluster = entry->cluster;
136 197
137 if (likely(bheap_node_in_heap(entry->hn))) 198 if (likely(binheap_is_in_heap(&entry->hn))) {
138 bheap_delete(cpu_lower_prio, 199 binheap_delete(&entry->hn, &cluster->cpu_heap);
139 &cluster->cpu_heap, 200 }
140 entry->hn);
141 201
142 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); 202 binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
143} 203}
144 204
145/* caller must hold cedf lock */ 205/* caller must hold cedf lock */
146static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) 206static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
147{ 207{
148 struct bheap_node* hn; 208 return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn);
149 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
150 return hn->value;
151} 209}
152 210
153 211
@@ -209,7 +267,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
209} 267}
210 268
211/* unlink - Make sure a task is not linked any longer to an entry 269/* unlink - Make sure a task is not linked any longer to an entry
212 * where it was linked before. Must hold cedf_lock. 270 * where it was linked before. Must hold cluster_lock.
213 */ 271 */
214static noinline void unlink(struct task_struct* t) 272static noinline void unlink(struct task_struct* t)
215{ 273{
@@ -245,7 +303,7 @@ static void preempt(cpu_entry_t *entry)
245} 303}
246 304
247/* requeue - Put an unlinked task into gsn-edf domain. 305/* requeue - Put an unlinked task into gsn-edf domain.
248 * Caller must hold cedf_lock. 306 * Caller must hold cluster_lock.
249 */ 307 */
250static noinline void requeue(struct task_struct* task) 308static noinline void requeue(struct task_struct* task)
251{ 309{
@@ -340,13 +398,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
340 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 398 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
341} 399}
342 400
343/* caller holds cedf_lock */ 401/* caller holds cluster_lock */
344static noinline void job_completion(struct task_struct *t, int forced) 402static noinline void job_completion(struct task_struct *t, int forced)
345{ 403{
346 BUG_ON(!t); 404 BUG_ON(!t);
347 405
348 sched_trace_task_completion(t, forced); 406 sched_trace_task_completion(t, forced);
349 407
408#ifdef CONFIG_LITMUS_NVIDIA
409 atomic_set(&tsk_rt(t)->nv_int_count, 0);
410#endif
411
350 TRACE_TASK(t, "job_completion().\n"); 412 TRACE_TASK(t, "job_completion().\n");
351 413
352 /* set flags */ 414 /* set flags */
@@ -371,25 +433,341 @@ static noinline void job_completion(struct task_struct *t, int forced)
371 */ 433 */
372static void cedf_tick(struct task_struct* t) 434static void cedf_tick(struct task_struct* t)
373{ 435{
374 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { 436 if (is_realtime(t) && budget_exhausted(t))
375 if (!is_np(t)) { 437 {
376 /* np tasks will be preempted when they become 438 if (budget_signalled(t) && !sigbudget_sent(t)) {
377 * preemptable again 439 /* signal exhaustion */
378 */ 440 send_sigbudget(t);
379 litmus_reschedule_local(); 441 }
380 set_will_schedule(); 442
381 TRACE("cedf_scheduler_tick: " 443 if (budget_enforced(t)) {
382 "%d is preemptable " 444 if (!is_np(t)) {
383 " => FORCE_RESCHED\n", t->pid); 445 /* np tasks will be preempted when they become
384 } else if (is_user_np(t)) { 446 * preemptable again
385 TRACE("cedf_scheduler_tick: " 447 */
386 "%d is non-preemptable, " 448 litmus_reschedule_local();
387 "preemption delayed.\n", t->pid); 449 set_will_schedule();
388 request_exit_np(t); 450 TRACE("cedf_scheduler_tick: "
451 "%d is preemptable "
452 " => FORCE_RESCHED\n", t->pid);
453 } else if (is_user_np(t)) {
454 TRACE("cedf_scheduler_tick: "
455 "%d is non-preemptable, "
456 "preemption delayed.\n", t->pid);
457 request_exit_np(t);
458 }
459 }
460 }
461}
462
463
464
465
466
467
468
469
470
471
472
473
474
475#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
476
477
478static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
479{
480 if (!atomic_read(&tasklet->count)) {
481 if(tasklet->owner) {
482 sched_trace_tasklet_begin(tasklet->owner);
483 }
484
485 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
486 {
487 BUG();
488 }
489 TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
490 __FUNCTION__,
491 (tasklet->owner) ? tasklet->owner->pid : -1,
492 (tasklet->owner) ? 0 : 1);
493 tasklet->func(tasklet->data);
494 tasklet_unlock(tasklet);
495
496 if(tasklet->owner) {
497 sched_trace_tasklet_end(tasklet->owner, flushed);
498 }
499 }
500 else {
501 BUG();
502 }
503}
504
505
506static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
507{
508 int work_to_do = 1;
509 struct tasklet_struct *tasklet = NULL;
510 unsigned long flags;
511
512 while(work_to_do) {
513
514 TS_NV_SCHED_BOTISR_START;
515
516 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
517
518 if(cluster->pending_tasklets.head != NULL) {
519 // remove tasklet at head.
520 struct tasklet_struct *prev = NULL;
521 tasklet = cluster->pending_tasklets.head;
522
523 // find a tasklet with prio to execute; skip ones where
524 // sched_task has a higher priority.
525 // We use the '!edf' test instead of swaping function arguments since
526 // both sched_task and owner could be NULL. In this case, we want to
527 // still execute the tasklet.
528 while(tasklet && !edf_higher_prio(tasklet->owner, sched_task)) {
529 prev = tasklet;
530 tasklet = tasklet->next;
531 }
532
533 if(tasklet) { // found something to execuite
534 // remove the tasklet from the queue
535 if(prev) {
536 prev->next = tasklet->next;
537 if(prev->next == NULL) {
538 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
539 cluster->pending_tasklets.tail = &(prev);
540 }
541 }
542 else {
543 cluster->pending_tasklets.head = tasklet->next;
544 if(tasklet->next == NULL) {
545 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
546 cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
547 }
548 }
549 }
550 else {
551 TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__);
552 }
553 }
554 else {
555 TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
556 }
557
558 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
559
560 if(tasklet) {
561 __do_lit_tasklet(tasklet, 0ul);
562 tasklet = NULL;
563 }
564 else {
565 work_to_do = 0;
566 }
567
568 TS_NV_SCHED_BOTISR_END;
569 }
570}
571
572static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
573{
574 struct tasklet_struct* step;
575
576 tasklet->next = NULL; // make sure there are no old values floating around
577
578 step = cluster->pending_tasklets.head;
579 if(step == NULL) {
580 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
581 // insert at tail.
582 *(cluster->pending_tasklets.tail) = tasklet;
583 cluster->pending_tasklets.tail = &(tasklet->next);
584 }
585 else if((*(cluster->pending_tasklets.tail) != NULL) &&
586 edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
587 // insert at tail.
588 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
589
590 *(cluster->pending_tasklets.tail) = tasklet;
591 cluster->pending_tasklets.tail = &(tasklet->next);
592 }
593 else {
594
595 // insert the tasklet somewhere in the middle.
596
597 TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
598
599 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
600 step = step->next;
601 }
602
603 // insert tasklet right before step->next.
604
605 TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
606 tasklet->owner->pid,
607 (step->owner) ?
608 step->owner->pid :
609 -1,
610 (step->next) ?
611 ((step->next->owner) ?
612 step->next->owner->pid :
613 -1) :
614 -1);
615
616 tasklet->next = step->next;
617 step->next = tasklet;
618
619 // patch up the head if needed.
620 if(cluster->pending_tasklets.head == step)
621 {
622 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
623 cluster->pending_tasklets.head = tasklet;
389 } 624 }
390 } 625 }
391} 626}
392 627
628static void cedf_run_tasklets(struct task_struct* sched_task)
629{
630 cedf_domain_t* cluster;
631
632 preempt_disable();
633
634 cluster = (is_realtime(sched_task)) ?
635 task_cpu_cluster(sched_task) :
636 remote_cluster(smp_processor_id());
637
638 if(cluster && cluster->pending_tasklets.head != NULL) {
639 TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
640 do_lit_tasklets(cluster, sched_task);
641 }
642
643 preempt_enable_no_resched();
644}
645
646
647
648static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
649{
650#if 0
651 cedf_domain_t *cluster = NULL;
652 cpu_entry_t *targetCPU = NULL;
653 int thisCPU;
654 int runLocal = 0;
655 int runNow = 0;
656 unsigned long flags;
657
658 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
659 {
660 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
661 return 0;
662 }
663
664 cluster = task_cpu_cluster(tasklet->owner);
665
666 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
667
668 thisCPU = smp_processor_id();
669
670#ifdef CONFIG_SCHED_CPU_AFFINITY
671 {
672 cpu_entry_t* affinity = NULL;
673
674 // use this CPU if it is in our cluster and isn't running any RT work.
675 if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
676 affinity = &(__get_cpu_var(cedf_cpu_entries));
677 }
678 else {
679 // this CPU is busy or shouldn't run tasklet in this cluster.
680 // look for available near by CPUs.
681 // NOTE: Affinity towards owner and not this CPU. Is this right?
682 affinity =
683 cedf_get_nearest_available_cpu(cluster,
684 &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
685 }
686
687 targetCPU = affinity;
688 }
689#endif
690
691 if (targetCPU == NULL) {
692 targetCPU = lowest_prio_cpu(cluster);
693 }
694
695 if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
696 if (thisCPU == targetCPU->cpu) {
697 TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
698 runLocal = 1;
699 runNow = 1;
700 }
701 else {
702 TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
703 runLocal = 0;
704 runNow = 1;
705 }
706 }
707 else {
708 runLocal = 0;
709 runNow = 0;
710 }
711
712 if(!runLocal) {
713 // enqueue the tasklet
714 __add_pai_tasklet(tasklet, cluster);
715 }
716
717 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
718
719
720 if (runLocal /*&& runNow */) { // runNow == 1 is implied
721 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
722 __do_lit_tasklet(tasklet, 0ul);
723 }
724 else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
725 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
726 preempt(targetCPU); // need to be protected by cluster_lock?
727 }
728 else {
729 TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
730 }
731#else
732 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
733 __do_lit_tasklet(tasklet, 0ul);
734#endif
735 return(1); // success
736}
737
738static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
739 struct task_struct *new_prio)
740{
741 struct tasklet_struct* step;
742 unsigned long flags;
743 cedf_domain_t *cluster;
744 struct task_struct *probe;
745
746 // identify the cluster by the assignment of these tasks. one should
747 // be non-NULL.
748 probe = (old_prio) ? old_prio : new_prio;
749
750 if(probe) {
751 cluster = task_cpu_cluster(probe);
752
753 if(cluster->pending_tasklets.head != NULL) {
754 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
755 for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) {
756 if(step->owner == old_prio) {
757 TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
758 step->owner = new_prio;
759 }
760 }
761 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
762 }
763 }
764 else {
765 TRACE("%s: Both priorities were NULL\n");
766 }
767}
768
769#endif // PAI
770
393/* Getting schedule() right is a bit tricky. schedule() may not make any 771/* Getting schedule() right is a bit tricky. schedule() may not make any
394 * assumptions on the state of the current task since it may be called for a 772 * assumptions on the state of the current task since it may be called for a
395 * number of reasons. The reasons include a scheduler_tick() determined that it 773 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -415,7 +793,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
415{ 793{
416 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); 794 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
417 cedf_domain_t *cluster = entry->cluster; 795 cedf_domain_t *cluster = entry->cluster;
418 int out_of_time, sleep, preempt, np, exists, blocks; 796 int out_of_time, signal_budget, sleep, preempt, np, exists, blocks;
419 struct task_struct* next = NULL; 797 struct task_struct* next = NULL;
420 798
421#ifdef CONFIG_RELEASE_MASTER 799#ifdef CONFIG_RELEASE_MASTER
@@ -442,6 +820,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
442 out_of_time = exists && 820 out_of_time = exists &&
443 budget_enforced(entry->scheduled) && 821 budget_enforced(entry->scheduled) &&
444 budget_exhausted(entry->scheduled); 822 budget_exhausted(entry->scheduled);
823 signal_budget = exists &&
824 budget_signalled(entry->scheduled) &&
825 budget_exhausted(entry->scheduled) &&
826 !sigbudget_sent(entry->scheduled);
445 np = exists && is_np(entry->scheduled); 827 np = exists && is_np(entry->scheduled);
446 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; 828 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
447 preempt = entry->scheduled != entry->linked; 829 preempt = entry->scheduled != entry->linked;
@@ -460,12 +842,28 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
460 TRACE_TASK(prev, "will be preempted by %s/%d\n", 842 TRACE_TASK(prev, "will be preempted by %s/%d\n",
461 entry->linked->comm, entry->linked->pid); 843 entry->linked->comm, entry->linked->pid);
462 844
845 /* Send the signal that the budget has been exhausted */
846 if (signal_budget)
847 send_sigbudget(entry->scheduled);
463 848
464 /* If a task blocks we have no choice but to reschedule. 849 /* If a task blocks we have no choice but to reschedule.
465 */ 850 */
466 if (blocks) 851 if (blocks)
467 unlink(entry->scheduled); 852 unlink(entry->scheduled);
468 853
854#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
855 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
856 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
857 // don't track preemptions or locking protocol suspensions.
858 TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
859 stop_gpu_tracker(entry->scheduled);
860 }
861 else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
862 TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
863 }
864 }
865#endif
866
469 /* Request a sys_exit_np() call if we would like to preempt but cannot. 867 /* Request a sys_exit_np() call if we would like to preempt but cannot.
470 * We need to make sure to update the link structure anyway in case 868 * We need to make sure to update the link structure anyway in case
471 * that we are still linked. Multiple calls to request_exit_np() don't 869 * that we are still linked. Multiple calls to request_exit_np() don't
@@ -515,7 +913,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
515 raw_spin_unlock(&cluster->cluster_lock); 913 raw_spin_unlock(&cluster->cluster_lock);
516 914
517#ifdef WANT_ALL_SCHED_EVENTS 915#ifdef WANT_ALL_SCHED_EVENTS
518 TRACE("cedf_lock released, next=0x%p\n", next); 916 TRACE("cluster_lock released, next=0x%p\n", next);
519 917
520 if (next) 918 if (next)
521 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); 919 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
@@ -523,7 +921,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
523 TRACE("becomes idle at %llu.\n", litmus_clock()); 921 TRACE("becomes idle at %llu.\n", litmus_clock());
524#endif 922#endif
525 923
526
527 return next; 924 return next;
528} 925}
529 926
@@ -549,7 +946,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
549 cpu_entry_t* entry; 946 cpu_entry_t* entry;
550 cedf_domain_t* cluster; 947 cedf_domain_t* cluster;
551 948
552 TRACE("gsn edf: task new %d\n", t->pid); 949 TRACE("c-edf: task new %d\n", t->pid);
553 950
554 /* the cluster doesn't change even if t is running */ 951 /* the cluster doesn't change even if t is running */
555 cluster = task_cpu_cluster(t); 952 cluster = task_cpu_cluster(t);
@@ -587,7 +984,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
587static void cedf_task_wake_up(struct task_struct *task) 984static void cedf_task_wake_up(struct task_struct *task)
588{ 985{
589 unsigned long flags; 986 unsigned long flags;
590 lt_t now; 987 //lt_t now;
591 cedf_domain_t *cluster; 988 cedf_domain_t *cluster;
592 989
593 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 990 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
@@ -595,6 +992,8 @@ static void cedf_task_wake_up(struct task_struct *task)
595 cluster = task_cpu_cluster(task); 992 cluster = task_cpu_cluster(task);
596 993
597 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 994 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
995
996#if 0 // sproadic task model
598 /* We need to take suspensions because of semaphores into 997 /* We need to take suspensions because of semaphores into
599 * account! If a job resumes after being suspended due to acquiring 998 * account! If a job resumes after being suspended due to acquiring
600 * a semaphore, it should never be treated as a new job release. 999 * a semaphore, it should never be treated as a new job release.
@@ -616,7 +1015,13 @@ static void cedf_task_wake_up(struct task_struct *task)
616 } 1015 }
617 } 1016 }
618 } 1017 }
619 cedf_job_arrival(task); 1018#else
1019 set_rt_flags(task, RT_F_RUNNING); // periodic model
1020#endif
1021
1022 if(tsk_rt(task)->linked_on == NO_CPU)
1023 cedf_job_arrival(task);
1024
620 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 1025 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
621} 1026}
622 1027
@@ -643,6 +1048,10 @@ static void cedf_task_exit(struct task_struct * t)
643 unsigned long flags; 1048 unsigned long flags;
644 cedf_domain_t *cluster = task_cpu_cluster(t); 1049 cedf_domain_t *cluster = task_cpu_cluster(t);
645 1050
1051#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1052 cedf_change_prio_pai_tasklet(t, NULL);
1053#endif
1054
646 /* unlink if necessary */ 1055 /* unlink if necessary */
647 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 1056 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
648 unlink(t); 1057 unlink(t);
@@ -660,13 +1069,536 @@ static void cedf_task_exit(struct task_struct * t)
660 1069
661static long cedf_admit_task(struct task_struct* tsk) 1070static long cedf_admit_task(struct task_struct* tsk)
662{ 1071{
1072#ifdef CONFIG_LITMUS_NESTED_LOCKING
1073 INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
1074 edf_max_heap_base_priority_order);
1075#endif
1076
663 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; 1077 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
664} 1078}
665 1079
666/* total number of cluster */ 1080
667static int num_clusters; 1081
668/* we do not support cluster of different sizes */ 1082#ifdef CONFIG_LITMUS_LOCKING
669static unsigned int cluster_size; 1083
1084#include <litmus/fdso.h>
1085
1086
1087
1088/* called with IRQs off */
1089static void __increase_priority_inheritance(struct task_struct* t,
1090 struct task_struct* prio_inh)
1091{
1092 int linked_on;
1093 int check_preempt = 0;
1094
1095 cedf_domain_t* cluster = task_cpu_cluster(t);
1096
1097#ifdef CONFIG_LITMUS_NESTED_LOCKING
1098 /* this sanity check allows for weaker locking in protocols */
1099 /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
1100 if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
1101#endif
1102 TRACE_TASK(t, "inherits priority from %s/%d\n",
1103 prio_inh->comm, prio_inh->pid);
1104 tsk_rt(t)->inh_task = prio_inh;
1105
1106 linked_on = tsk_rt(t)->linked_on;
1107
1108 /* If it is scheduled, then we need to reorder the CPU heap. */
1109 if (linked_on != NO_CPU) {
1110 TRACE_TASK(t, "%s: linked on %d\n",
1111 __FUNCTION__, linked_on);
1112 /* Holder is scheduled; need to re-order CPUs.
1113 * We can't use heap_decrease() here since
1114 * the cpu_heap is ordered in reverse direction, so
1115 * it is actually an increase. */
1116 binheap_delete(&per_cpu(cedf_cpu_entries, linked_on).hn,
1117 &cluster->cpu_heap);
1118 binheap_add(&per_cpu(cedf_cpu_entries, linked_on).hn,
1119 &cluster->cpu_heap, cpu_entry_t, hn);
1120
1121 } else {
1122 /* holder may be queued: first stop queue changes */
1123 raw_spin_lock(&cluster->domain.release_lock);
1124 if (is_queued(t)) {
1125 TRACE_TASK(t, "%s: is queued\n",
1126 __FUNCTION__);
1127 /* We need to update the position of holder in some
1128 * heap. Note that this could be a release heap if we
1129 * budget enforcement is used and this job overran. */
1130 check_preempt =
1131 !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
1132 } else {
1133 /* Nothing to do: if it is not queued and not linked
1134 * then it is either sleeping or currently being moved
1135 * by other code (e.g., a timer interrupt handler) that
1136 * will use the correct priority when enqueuing the
1137 * task. */
1138 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
1139 __FUNCTION__);
1140 }
1141 raw_spin_unlock(&cluster->domain.release_lock);
1142
1143 /* If holder was enqueued in a release heap, then the following
1144 * preemption check is pointless, but we can't easily detect
1145 * that case. If you want to fix this, then consider that
1146 * simply adding a state flag requires O(n) time to update when
1147 * releasing n tasks, which conflicts with the goal to have
1148 * O(log n) merges. */
1149 if (check_preempt) {
1150 /* heap_decrease() hit the top level of the heap: make
1151 * sure preemption checks get the right task, not the
1152 * potentially stale cache. */
1153 bheap_uncache_min(edf_ready_order,
1154 &cluster->domain.ready_queue);
1155 check_for_preemptions(cluster);
1156 }
1157 }
1158#ifdef CONFIG_LITMUS_NESTED_LOCKING
1159 }
1160 else {
1161 TRACE_TASK(t, "Spurious invalid priority increase. "
1162 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1163 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1164 t->comm, t->pid,
1165 effective_priority(t)->comm, effective_priority(t)->pid,
1166 (prio_inh) ? prio_inh->comm : "nil",
1167 (prio_inh) ? prio_inh->pid : -1);
1168 WARN_ON(!prio_inh);
1169 }
1170#endif
1171}
1172
1173/* called with IRQs off */
1174static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
1175{
1176 cedf_domain_t* cluster = task_cpu_cluster(t);
1177
1178 raw_spin_lock(&cluster->cluster_lock);
1179
1180 __increase_priority_inheritance(t, prio_inh);
1181
1182#ifdef CONFIG_LITMUS_SOFTIRQD
1183 if(tsk_rt(t)->cur_klitirqd != NULL)
1184 {
1185 TRACE_TASK(t, "%s/%d inherits a new priority!\n",
1186 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1187
1188 __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
1189 }
1190#endif
1191
1192 raw_spin_unlock(&cluster->cluster_lock);
1193
1194#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1195 if(tsk_rt(t)->held_gpus) {
1196 int i;
1197 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1198 i < NV_DEVICE_NUM;
1199 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1200 pai_check_priority_increase(t, i);
1201 }
1202 }
1203#endif
1204}
1205
1206/* called with IRQs off */
1207static void __decrease_priority_inheritance(struct task_struct* t,
1208 struct task_struct* prio_inh)
1209{
1210#ifdef CONFIG_LITMUS_NESTED_LOCKING
1211 if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
1212#endif
1213 /* A job only stops inheriting a priority when it releases a
1214 * resource. Thus we can make the following assumption.*/
1215 if(prio_inh)
1216 TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
1217 prio_inh->comm, prio_inh->pid);
1218 else
1219 TRACE_TASK(t, "base priority restored.\n");
1220
1221 tsk_rt(t)->inh_task = prio_inh;
1222
1223 if(tsk_rt(t)->scheduled_on != NO_CPU) {
1224 TRACE_TASK(t, "is scheduled.\n");
1225
1226 /* Check if rescheduling is necessary. We can't use heap_decrease()
1227 * since the priority was effectively lowered. */
1228 unlink(t);
1229 cedf_job_arrival(t);
1230 }
1231 else {
1232 cedf_domain_t* cluster = task_cpu_cluster(t);
1233 /* task is queued */
1234 raw_spin_lock(&cluster->domain.release_lock);
1235 if (is_queued(t)) {
1236 TRACE_TASK(t, "is queued.\n");
1237
1238 /* decrease in priority, so we have to re-add to binomial heap */
1239 unlink(t);
1240 cedf_job_arrival(t);
1241 }
1242 else {
1243 TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
1244 }
1245 raw_spin_unlock(&cluster->domain.release_lock);
1246 }
1247#ifdef CONFIG_LITMUS_NESTED_LOCKING
1248 }
1249 else {
1250 TRACE_TASK(t, "Spurious invalid priority decrease. "
1251 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1252 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1253 t->comm, t->pid,
1254 effective_priority(t)->comm, effective_priority(t)->pid,
1255 (prio_inh) ? prio_inh->comm : "nil",
1256 (prio_inh) ? prio_inh->pid : -1);
1257 }
1258#endif
1259}
1260
1261static void decrease_priority_inheritance(struct task_struct* t,
1262 struct task_struct* prio_inh)
1263{
1264 cedf_domain_t* cluster = task_cpu_cluster(t);
1265
1266 raw_spin_lock(&cluster->cluster_lock);
1267 __decrease_priority_inheritance(t, prio_inh);
1268
1269#ifdef CONFIG_LITMUS_SOFTIRQD
1270 if(tsk_rt(t)->cur_klitirqd != NULL)
1271 {
1272 TRACE_TASK(t, "%s/%d decreases in priority!\n",
1273 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1274
1275 __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
1276 }
1277#endif
1278
1279 raw_spin_unlock(&cluster->cluster_lock);
1280
1281#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1282 if(tsk_rt(t)->held_gpus) {
1283 int i;
1284 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1285 i < NV_DEVICE_NUM;
1286 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1287 pai_check_priority_decrease(t, i);
1288 }
1289 }
1290#endif
1291}
1292
1293
1294
1295
1296
1297#ifdef CONFIG_LITMUS_SOFTIRQD
1298/* called with IRQs off */
1299static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1300 struct task_struct* old_owner,
1301 struct task_struct* new_owner)
1302{
1303 cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
1304
1305 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1306
1307 raw_spin_lock(&cluster->cluster_lock);
1308
1309 if(old_owner != new_owner)
1310 {
1311 if(old_owner)
1312 {
1313 // unreachable?
1314 tsk_rt(old_owner)->cur_klitirqd = NULL;
1315 }
1316
1317 TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
1318 new_owner->comm, new_owner->pid);
1319
1320 tsk_rt(new_owner)->cur_klitirqd = klitirqd;
1321 }
1322
1323 __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio.
1324
1325 __increase_priority_inheritance(klitirqd,
1326 (tsk_rt(new_owner)->inh_task == NULL) ?
1327 new_owner :
1328 tsk_rt(new_owner)->inh_task);
1329
1330 raw_spin_unlock(&cluster->cluster_lock);
1331}
1332
1333
1334/* called with IRQs off */
1335static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1336 struct task_struct* old_owner,
1337 struct task_struct* new_owner)
1338{
1339 cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
1340
1341 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1342
1343 raw_spin_lock(&cluster->cluster_lock);
1344
1345 TRACE_TASK(klitirqd, "priority restored\n");
1346
1347 __decrease_priority_inheritance(klitirqd, new_owner);
1348
1349 tsk_rt(old_owner)->cur_klitirqd = NULL;
1350
1351 raw_spin_unlock(&cluster->cluster_lock);
1352}
1353#endif // CONFIG_LITMUS_SOFTIRQD
1354
1355
1356
1357
1358
1359
1360
1361#ifdef CONFIG_LITMUS_NESTED_LOCKING
1362
1363/* called with IRQs off */
1364/* preconditions:
1365 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1366 (2) The lock 'to_unlock' is held.
1367 */
1368static void nested_increase_priority_inheritance(struct task_struct* t,
1369 struct task_struct* prio_inh,
1370 raw_spinlock_t *to_unlock,
1371 unsigned long irqflags)
1372{
1373 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1374
1375 if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
1376 increase_priority_inheritance(t, prio_inh); // increase our prio.
1377 }
1378
1379 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1380
1381
1382 if(blocked_lock) {
1383 if(blocked_lock->ops->propagate_increase_inheritance) {
1384 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1385 blocked_lock->ident);
1386
1387 // beware: recursion
1388 blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
1389 t, to_unlock,
1390 irqflags);
1391 }
1392 else {
1393 TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
1394 blocked_lock->ident);
1395 unlock_fine_irqrestore(to_unlock, irqflags);
1396 }
1397 }
1398 else {
1399 TRACE_TASK(t, "is not blocked. No propagation.\n");
1400 unlock_fine_irqrestore(to_unlock, irqflags);
1401 }
1402}
1403
1404/* called with IRQs off */
1405/* preconditions:
1406 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1407 (2) The lock 'to_unlock' is held.
1408 */
1409static void nested_decrease_priority_inheritance(struct task_struct* t,
1410 struct task_struct* prio_inh,
1411 raw_spinlock_t *to_unlock,
1412 unsigned long irqflags)
1413{
1414 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1415 decrease_priority_inheritance(t, prio_inh);
1416
1417 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1418
1419 if(blocked_lock) {
1420 if(blocked_lock->ops->propagate_decrease_inheritance) {
1421 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1422 blocked_lock->ident);
1423
1424 // beware: recursion
1425 blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
1426 to_unlock,
1427 irqflags);
1428 }
1429 else {
1430 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
1431 blocked_lock);
1432 unlock_fine_irqrestore(to_unlock, irqflags);
1433 }
1434 }
1435 else {
1436 TRACE_TASK(t, "is not blocked. No propagation.\n");
1437 unlock_fine_irqrestore(to_unlock, irqflags);
1438 }
1439}
1440
1441
1442/* ******************** RSM MUTEX ********************** */
1443
1444static struct litmus_lock_ops cedf_rsm_mutex_lock_ops = {
1445 .lock = rsm_mutex_lock,
1446 .unlock = rsm_mutex_unlock,
1447 .close = rsm_mutex_close,
1448 .deallocate = rsm_mutex_free,
1449
1450 .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
1451 .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
1452
1453#ifdef CONFIG_LITMUS_DGL_SUPPORT
1454 .dgl_lock = rsm_mutex_dgl_lock,
1455 .is_owner = rsm_mutex_is_owner,
1456 .enable_priority = rsm_mutex_enable_priority,
1457#endif
1458};
1459
1460static struct litmus_lock* cedf_new_rsm_mutex(void)
1461{
1462 return rsm_mutex_new(&cedf_rsm_mutex_lock_ops);
1463}
1464
1465/* ******************** IKGLP ********************** */
1466
1467static struct litmus_lock_ops cedf_ikglp_lock_ops = {
1468 .lock = ikglp_lock,
1469 .unlock = ikglp_unlock,
1470 .close = ikglp_close,
1471 .deallocate = ikglp_free,
1472
1473 // ikglp can only be an outer-most lock.
1474 .propagate_increase_inheritance = NULL,
1475 .propagate_decrease_inheritance = NULL,
1476};
1477
1478static struct litmus_lock* cedf_new_ikglp(void* __user arg)
1479{
1480 // assumes clusters of uniform size.
1481 return ikglp_new(cluster_size/num_clusters, &cedf_ikglp_lock_ops, arg);
1482}
1483
1484#endif /* CONFIG_LITMUS_NESTED_LOCKING */
1485
1486
1487
1488
1489/* ******************** KFMLP support ********************** */
1490
1491static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
1492 .lock = kfmlp_lock,
1493 .unlock = kfmlp_unlock,
1494 .close = kfmlp_close,
1495 .deallocate = kfmlp_free,
1496
1497 // kfmlp can only be an outer-most lock.
1498 .propagate_increase_inheritance = NULL,
1499 .propagate_decrease_inheritance = NULL,
1500};
1501
1502
1503static struct litmus_lock* cedf_new_kfmlp(void* __user arg)
1504{
1505 return kfmlp_new(&cedf_kfmlp_lock_ops, arg);
1506}
1507
1508
1509/* **** lock constructor **** */
1510
1511static long cedf_allocate_lock(struct litmus_lock **lock, int type,
1512 void* __user args)
1513{
1514 int err;
1515
1516 switch (type) {
1517#ifdef CONFIG_LITMUS_NESTED_LOCKING
1518 case RSM_MUTEX:
1519 *lock = cedf_new_rsm_mutex();
1520 break;
1521
1522 case IKGLP_SEM:
1523 *lock = cedf_new_ikglp(args);
1524 break;
1525#endif
1526 case KFMLP_SEM:
1527 *lock = cedf_new_kfmlp(args);
1528 break;
1529
1530 default:
1531 err = -ENXIO;
1532 goto UNSUPPORTED_LOCK;
1533 };
1534
1535 if (*lock)
1536 err = 0;
1537 else
1538 err = -ENOMEM;
1539
1540UNSUPPORTED_LOCK:
1541 return err;
1542}
1543
1544#endif // CONFIG_LITMUS_LOCKING
1545
1546
1547#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1548static struct affinity_observer_ops cedf_kfmlp_affinity_ops = {
1549 .close = kfmlp_aff_obs_close,
1550 .deallocate = kfmlp_aff_obs_free,
1551};
1552
1553#ifdef CONFIG_LITMUS_NESTED_LOCKING
1554static struct affinity_observer_ops cedf_ikglp_affinity_ops = {
1555 .close = ikglp_aff_obs_close,
1556 .deallocate = ikglp_aff_obs_free,
1557};
1558#endif
1559
1560static long cedf_allocate_affinity_observer(struct affinity_observer **aff_obs,
1561 int type,
1562 void* __user args)
1563{
1564 int err;
1565
1566 switch (type) {
1567
1568 case KFMLP_SIMPLE_GPU_AFF_OBS:
1569 *aff_obs = kfmlp_simple_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
1570 break;
1571
1572 case KFMLP_GPU_AFF_OBS:
1573 *aff_obs = kfmlp_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
1574 break;
1575
1576#ifdef CONFIG_LITMUS_NESTED_LOCKING
1577 case IKGLP_SIMPLE_GPU_AFF_OBS:
1578 *aff_obs = ikglp_simple_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
1579 break;
1580
1581 case IKGLP_GPU_AFF_OBS:
1582 *aff_obs = ikglp_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
1583 break;
1584#endif
1585 default:
1586 err = -ENXIO;
1587 goto UNSUPPORTED_AFF_OBS;
1588 };
1589
1590 if (*aff_obs)
1591 err = 0;
1592 else
1593 err = -ENOMEM;
1594
1595UNSUPPORTED_AFF_OBS:
1596 return err;
1597}
1598#endif
1599
1600
1601
670 1602
671#ifdef VERBOSE_INIT 1603#ifdef VERBOSE_INIT
672static void print_cluster_topology(cpumask_var_t mask, int cpu) 1604static void print_cluster_topology(cpumask_var_t mask, int cpu)
@@ -681,16 +1613,17 @@ static void print_cluster_topology(cpumask_var_t mask, int cpu)
681} 1613}
682#endif 1614#endif
683 1615
684static int clusters_allocated = 0;
685
686static void cleanup_cedf(void) 1616static void cleanup_cedf(void)
687{ 1617{
688 int i; 1618 int i;
689 1619
1620#ifdef CONFIG_LITMUS_NVIDIA
1621 shutdown_nvidia_info();
1622#endif
1623
690 if (clusters_allocated) { 1624 if (clusters_allocated) {
691 for (i = 0; i < num_clusters; i++) { 1625 for (i = 0; i < num_clusters; i++) {
692 kfree(cedf[i].cpus); 1626 kfree(cedf[i].cpus);
693 kfree(cedf[i].heap_node);
694 free_cpumask_var(cedf[i].cpu_map); 1627 free_cpumask_var(cedf[i].cpu_map);
695 } 1628 }
696 1629
@@ -750,12 +1683,16 @@ static long cedf_activate_plugin(void)
750 1683
751 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), 1684 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
752 GFP_ATOMIC); 1685 GFP_ATOMIC);
753 cedf[i].heap_node = kmalloc( 1686 INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio);
754 cluster_size * sizeof(struct bheap_node),
755 GFP_ATOMIC);
756 bheap_init(&(cedf[i].cpu_heap));
757 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); 1687 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
758 1688
1689
1690#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1691 cedf[i].pending_tasklets.head = NULL;
1692 cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
1693#endif
1694
1695
759 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) 1696 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
760 return -ENOMEM; 1697 return -ENOMEM;
761#ifdef CONFIG_RELEASE_MASTER 1698#ifdef CONFIG_RELEASE_MASTER
@@ -766,6 +1703,10 @@ static long cedf_activate_plugin(void)
766 /* cycle through cluster and add cpus to them */ 1703 /* cycle through cluster and add cpus to them */
767 for (i = 0; i < num_clusters; i++) { 1704 for (i = 0; i < num_clusters; i++) {
768 1705
1706#ifdef CONFIG_LITMUS_DGL_SUPPORT
1707 raw_spin_lock_init(&cedf[i].dgl_lock);
1708#endif
1709
769 for_each_online_cpu(cpu) { 1710 for_each_online_cpu(cpu) {
770 /* check if the cpu is already in a cluster */ 1711 /* check if the cpu is already in a cluster */
771 for (j = 0; j < num_clusters; j++) 1712 for (j = 0; j < num_clusters; j++)
@@ -796,8 +1737,8 @@ static long cedf_activate_plugin(void)
796 atomic_set(&entry->will_schedule, 0); 1737 atomic_set(&entry->will_schedule, 0);
797 entry->cpu = ccpu; 1738 entry->cpu = ccpu;
798 entry->cluster = &cedf[i]; 1739 entry->cluster = &cedf[i];
799 entry->hn = &(cedf[i].heap_node[cpu_count]); 1740
800 bheap_node_init(&entry->hn, entry); 1741 INIT_BINHEAP_NODE(&entry->hn);
801 1742
802 cpu_count++; 1743 cpu_count++;
803 1744
@@ -814,6 +1755,40 @@ static long cedf_activate_plugin(void)
814 } 1755 }
815 } 1756 }
816 1757
1758#ifdef CONFIG_LITMUS_SOFTIRQD
1759 {
1760 /* distribute the daemons evenly across the clusters. */
1761 int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
1762 int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
1763 int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
1764
1765 int daemon = 0;
1766 for(i = 0; i < num_clusters; ++i)
1767 {
1768 int num_on_this_cluster = num_daemons_per_cluster;
1769 if(left_over)
1770 {
1771 ++num_on_this_cluster;
1772 --left_over;
1773 }
1774
1775 for(j = 0; j < num_on_this_cluster; ++j)
1776 {
1777 // first CPU of this cluster
1778 affinity[daemon++] = i*cluster_size;
1779 }
1780 }
1781
1782 spawn_klitirqd(affinity);
1783
1784 kfree(affinity);
1785 }
1786#endif
1787
1788#ifdef CONFIG_LITMUS_NVIDIA
1789 init_nvidia_info();
1790#endif
1791
817 free_cpumask_var(mask); 1792 free_cpumask_var(mask);
818 clusters_allocated = 1; 1793 clusters_allocated = 1;
819 return 0; 1794 return 0;
@@ -832,6 +1807,32 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
832 .task_block = cedf_task_block, 1807 .task_block = cedf_task_block,
833 .admit_task = cedf_admit_task, 1808 .admit_task = cedf_admit_task,
834 .activate_plugin = cedf_activate_plugin, 1809 .activate_plugin = cedf_activate_plugin,
1810 .compare = edf_higher_prio,
1811#ifdef CONFIG_LITMUS_LOCKING
1812 .allocate_lock = cedf_allocate_lock,
1813 .increase_prio = increase_priority_inheritance,
1814 .decrease_prio = decrease_priority_inheritance,
1815#endif
1816#ifdef CONFIG_LITMUS_NESTED_LOCKING
1817 .nested_increase_prio = nested_increase_priority_inheritance,
1818 .nested_decrease_prio = nested_decrease_priority_inheritance,
1819 .__compare = __edf_higher_prio,
1820#endif
1821#ifdef CONFIG_LITMUS_DGL_SUPPORT
1822 .get_dgl_spinlock = cedf_get_dgl_spinlock,
1823#endif
1824#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1825 .allocate_aff_obs = cedf_allocate_affinity_observer,
1826#endif
1827#ifdef CONFIG_LITMUS_SOFTIRQD
1828 .increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
1829 .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
1830#endif
1831#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1832 .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
1833 .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
1834 .run_tasklets = cedf_run_tasklets,
1835#endif
835}; 1836};
836 1837
837static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; 1838static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index c3344b9d288f..83b2f04b1532 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -12,24 +12,50 @@
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/uaccess.h>
16#include <linux/module.h>
15 17
16#include <litmus/litmus.h> 18#include <litmus/litmus.h>
17#include <litmus/jobs.h> 19#include <litmus/jobs.h>
18#include <litmus/sched_plugin.h> 20#include <litmus/sched_plugin.h>
19#include <litmus/edf_common.h> 21#include <litmus/edf_common.h>
20#include <litmus/sched_trace.h> 22#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22 23
23#include <litmus/preempt.h> 24#include <litmus/preempt.h>
24#include <litmus/budget.h> 25#include <litmus/budget.h>
25 26
26#include <litmus/bheap.h> 27#include <litmus/bheap.h>
28#include <litmus/binheap.h>
29
30#ifdef CONFIG_LITMUS_LOCKING
31#include <litmus/kfmlp_lock.h>
32#endif
33
34#ifdef CONFIG_LITMUS_NESTED_LOCKING
35#include <litmus/rsm_lock.h>
36#include <litmus/ikglp_lock.h>
37#endif
27 38
28#ifdef CONFIG_SCHED_CPU_AFFINITY 39#ifdef CONFIG_SCHED_CPU_AFFINITY
29#include <litmus/affinity.h> 40#include <litmus/affinity.h>
30#endif 41#endif
31 42
32#include <linux/module.h> 43#ifdef CONFIG_LITMUS_SOFTIRQD
44#include <litmus/litmus_softirq.h>
45#endif
46
47#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
48#include <linux/interrupt.h>
49#include <litmus/trace.h>
50#endif
51
52#ifdef CONFIG_LITMUS_NVIDIA
53#include <litmus/nvidia_info.h>
54#endif
55
56#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
57#include <litmus/gpu_affinity.h>
58#endif
33 59
34/* Overview of GSN-EDF operations. 60/* Overview of GSN-EDF operations.
35 * 61 *
@@ -104,52 +130,70 @@ typedef struct {
104 int cpu; 130 int cpu;
105 struct task_struct* linked; /* only RT tasks */ 131 struct task_struct* linked; /* only RT tasks */
106 struct task_struct* scheduled; /* only RT tasks */ 132 struct task_struct* scheduled; /* only RT tasks */
107 struct bheap_node* hn; 133 struct binheap_node hn;
108} cpu_entry_t; 134} cpu_entry_t;
109DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); 135DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
110 136
111cpu_entry_t* gsnedf_cpus[NR_CPUS]; 137cpu_entry_t* gsnedf_cpus[NR_CPUS];
112 138
113/* the cpus queue themselves according to priority in here */ 139/* the cpus queue themselves according to priority in here */
114static struct bheap_node gsnedf_heap_node[NR_CPUS]; 140static struct binheap gsnedf_cpu_heap;
115static struct bheap gsnedf_cpu_heap;
116 141
117static rt_domain_t gsnedf; 142static rt_domain_t gsnedf;
118#define gsnedf_lock (gsnedf.ready_lock) 143#define gsnedf_lock (gsnedf.ready_lock)
119 144
145#ifdef CONFIG_LITMUS_DGL_SUPPORT
146static raw_spinlock_t dgl_lock;
147
148static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t)
149{
150 return(&dgl_lock);
151}
152#endif
153
154#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
155struct tasklet_head
156{
157 struct tasklet_struct *head;
158 struct tasklet_struct **tail;
159};
160
161struct tasklet_head gsnedf_pending_tasklets;
162#endif
163
120 164
121/* Uncomment this if you want to see all scheduling decisions in the 165/* Uncomment this if you want to see all scheduling decisions in the
122 * TRACE() log. 166 * TRACE() log.
123#define WANT_ALL_SCHED_EVENTS 167#define WANT_ALL_SCHED_EVENTS
124 */ 168 */
125 169
126static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) 170static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
127{ 171{
128 cpu_entry_t *a, *b; 172 cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
129 a = _a->value; 173 cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
130 b = _b->value; 174
131 /* Note that a and b are inverted: we want the lowest-priority CPU at 175 /* Note that a and b are inverted: we want the lowest-priority CPU at
132 * the top of the heap. 176 * the top of the heap.
133 */ 177 */
134 return edf_higher_prio(b->linked, a->linked); 178 return edf_higher_prio(b->linked, a->linked);
135} 179}
136 180
181
137/* update_cpu_position - Move the cpu entry to the correct place to maintain 182/* update_cpu_position - Move the cpu entry to the correct place to maintain
138 * order in the cpu queue. Caller must hold gsnedf lock. 183 * order in the cpu queue. Caller must hold gsnedf lock.
139 */ 184 */
140static void update_cpu_position(cpu_entry_t *entry) 185static void update_cpu_position(cpu_entry_t *entry)
141{ 186{
142 if (likely(bheap_node_in_heap(entry->hn))) 187 if (likely(binheap_is_in_heap(&entry->hn))) {
143 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); 188 binheap_delete(&entry->hn, &gsnedf_cpu_heap);
144 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); 189 }
190 binheap_add(&entry->hn, &gsnedf_cpu_heap, cpu_entry_t, hn);
145} 191}
146 192
147/* caller must hold gsnedf lock */ 193/* caller must hold gsnedf lock */
148static cpu_entry_t* lowest_prio_cpu(void) 194static cpu_entry_t* lowest_prio_cpu(void)
149{ 195{
150 struct bheap_node* hn; 196 return binheap_top_entry(&gsnedf_cpu_heap, cpu_entry_t, hn);
151 hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
152 return hn->value;
153} 197}
154 198
155 199
@@ -338,6 +382,10 @@ static noinline void job_completion(struct task_struct *t, int forced)
338 382
339 sched_trace_task_completion(t, forced); 383 sched_trace_task_completion(t, forced);
340 384
385#ifdef CONFIG_LITMUS_NVIDIA
386 atomic_set(&tsk_rt(t)->nv_int_count, 0);
387#endif
388
341 TRACE_TASK(t, "job_completion().\n"); 389 TRACE_TASK(t, "job_completion().\n");
342 390
343 /* set flags */ 391 /* set flags */
@@ -362,24 +410,344 @@ static noinline void job_completion(struct task_struct *t, int forced)
362 */ 410 */
363static void gsnedf_tick(struct task_struct* t) 411static void gsnedf_tick(struct task_struct* t)
364{ 412{
365 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { 413 if (is_realtime(t) && budget_exhausted(t))
366 if (!is_np(t)) { 414 {
367 /* np tasks will be preempted when they become 415 if (budget_signalled(t) && !sigbudget_sent(t)) {
368 * preemptable again 416 /* signal exhaustion */
369 */ 417 send_sigbudget(t);
370 litmus_reschedule_local(); 418 }
371 TRACE("gsnedf_scheduler_tick: " 419
372 "%d is preemptable " 420 if (budget_enforced(t)) {
373 " => FORCE_RESCHED\n", t->pid); 421 if (!is_np(t)) {
374 } else if (is_user_np(t)) { 422 /* np tasks will be preempted when they become
375 TRACE("gsnedf_scheduler_tick: " 423 * preemptable again
376 "%d is non-preemptable, " 424 */
377 "preemption delayed.\n", t->pid); 425 litmus_reschedule_local();
378 request_exit_np(t); 426 TRACE("gsnedf_scheduler_tick: "
427 "%d is preemptable "
428 " => FORCE_RESCHED\n", t->pid);
429 } else if (is_user_np(t)) {
430 TRACE("gsnedf_scheduler_tick: "
431 "%d is non-preemptable, "
432 "preemption delayed.\n", t->pid);
433 request_exit_np(t);
434 }
435 }
436 }
437}
438
439
440
441
442#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
443
444
445static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
446{
447 if (!atomic_read(&tasklet->count)) {
448 if(tasklet->owner) {
449 sched_trace_tasklet_begin(tasklet->owner);
450 }
451
452 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
453 {
454 BUG();
455 }
456 TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
457 __FUNCTION__,
458 (tasklet->owner) ? tasklet->owner->pid : -1,
459 (tasklet->owner) ? 0 : 1);
460 tasklet->func(tasklet->data);
461 tasklet_unlock(tasklet);
462
463 if(tasklet->owner) {
464 sched_trace_tasklet_end(tasklet->owner, flushed);
465 }
466 }
467 else {
468 BUG();
469 }
470}
471
472static void do_lit_tasklets(struct task_struct* sched_task)
473{
474 int work_to_do = 1;
475 struct tasklet_struct *tasklet = NULL;
476 unsigned long flags;
477
478 while(work_to_do) {
479
480 TS_NV_SCHED_BOTISR_START;
481
482 // execute one tasklet that has higher priority
483 raw_spin_lock_irqsave(&gsnedf_lock, flags);
484
485 if(gsnedf_pending_tasklets.head != NULL) {
486 struct tasklet_struct *prev = NULL;
487 tasklet = gsnedf_pending_tasklets.head;
488
489 while(tasklet && edf_higher_prio(sched_task, tasklet->owner)) {
490 prev = tasklet;
491 tasklet = tasklet->next;
492 }
493
494 // remove the tasklet from the queue
495 if(prev) {
496 prev->next = tasklet->next;
497 if(prev->next == NULL) {
498 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
499 gsnedf_pending_tasklets.tail = &(prev);
500 }
501 }
502 else {
503 gsnedf_pending_tasklets.head = tasklet->next;
504 if(tasklet->next == NULL) {
505 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
506 gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
507 }
508 }
509 }
510 else {
511 TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
512 }
513
514 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
515
516 if(tasklet) {
517 __do_lit_tasklet(tasklet, 0ul);
518 tasklet = NULL;
519 }
520 else {
521 work_to_do = 0;
379 } 522 }
523
524 TS_NV_SCHED_BOTISR_END;
380 } 525 }
381} 526}
382 527
528//static void do_lit_tasklets(struct task_struct* sched_task)
529//{
530// int work_to_do = 1;
531// struct tasklet_struct *tasklet = NULL;
532// //struct tasklet_struct *step;
533// unsigned long flags;
534//
535// while(work_to_do) {
536//
537// TS_NV_SCHED_BOTISR_START;
538//
539// // remove tasklet at head of list if it has higher priority.
540// raw_spin_lock_irqsave(&gsnedf_lock, flags);
541//
542// if(gsnedf_pending_tasklets.head != NULL) {
543// // remove tasklet at head.
544// tasklet = gsnedf_pending_tasklets.head;
545//
546// if(edf_higher_prio(tasklet->owner, sched_task)) {
547//
548// if(NULL == tasklet->next) {
549// // tasklet is at the head, list only has one element
550// TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
551// gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
552// }
553//
554// // remove the tasklet from the queue
555// gsnedf_pending_tasklets.head = tasklet->next;
556//
557// TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
558// }
559// else {
560// TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
561// tasklet = NULL;
562// }
563// }
564// else {
565// TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
566// }
567//
568// raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
569//
570// TS_NV_SCHED_BOTISR_END;
571//
572// if(tasklet) {
573// __do_lit_tasklet(tasklet, 0ul);
574// tasklet = NULL;
575// }
576// else {
577// work_to_do = 0;
578// }
579// }
580//
581// //TRACE("%s: exited.\n", __FUNCTION__);
582//}
583
584static void __add_pai_tasklet(struct tasklet_struct* tasklet)
585{
586 struct tasklet_struct* step;
587
588 tasklet->next = NULL; // make sure there are no old values floating around
589
590 step = gsnedf_pending_tasklets.head;
591 if(step == NULL) {
592 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
593 // insert at tail.
594 *(gsnedf_pending_tasklets.tail) = tasklet;
595 gsnedf_pending_tasklets.tail = &(tasklet->next);
596 }
597 else if((*(gsnedf_pending_tasklets.tail) != NULL) &&
598 edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) {
599 // insert at tail.
600 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
601
602 *(gsnedf_pending_tasklets.tail) = tasklet;
603 gsnedf_pending_tasklets.tail = &(tasklet->next);
604 }
605 else {
606 // insert the tasklet somewhere in the middle.
607
608 TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
609
610 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
611 step = step->next;
612 }
613
614 // insert tasklet right before step->next.
615
616 TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
617
618 tasklet->next = step->next;
619 step->next = tasklet;
620
621 // patch up the head if needed.
622 if(gsnedf_pending_tasklets.head == step)
623 {
624 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
625 gsnedf_pending_tasklets.head = tasklet;
626 }
627 }
628}
629
630static void gsnedf_run_tasklets(struct task_struct* sched_task)
631{
632 preempt_disable();
633
634 if(gsnedf_pending_tasklets.head != NULL) {
635 TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
636 do_lit_tasklets(sched_task);
637 }
638
639 preempt_enable_no_resched();
640}
641
642static int gsnedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
643{
644 cpu_entry_t *targetCPU = NULL;
645 int thisCPU;
646 int runLocal = 0;
647 int runNow = 0;
648 unsigned long flags;
649
650 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
651 {
652 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
653 return 0;
654 }
655
656
657 raw_spin_lock_irqsave(&gsnedf_lock, flags);
658
659 thisCPU = smp_processor_id();
660
661#ifdef CONFIG_SCHED_CPU_AFFINITY
662 {
663 cpu_entry_t* affinity = NULL;
664
665 // use this CPU if it is in our cluster and isn't running any RT work.
666 if(
667#ifdef CONFIG_RELEASE_MASTER
668 (thisCPU != gsnedf.release_master) &&
669#endif
670 (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) {
671 affinity = &(__get_cpu_var(gsnedf_cpu_entries));
672 }
673 else {
674 // this CPU is busy or shouldn't run tasklet in this cluster.
675 // look for available near by CPUs.
676 // NOTE: Affinity towards owner and not this CPU. Is this right?
677 affinity =
678 gsnedf_get_nearest_available_cpu(
679 &per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner)));
680 }
681
682 targetCPU = affinity;
683 }
684#endif
685
686 if (targetCPU == NULL) {
687 targetCPU = lowest_prio_cpu();
688 }
689
690 if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
691 if (thisCPU == targetCPU->cpu) {
692 TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
693 runLocal = 1;
694 runNow = 1;
695 }
696 else {
697 TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
698 runLocal = 0;
699 runNow = 1;
700 }
701 }
702 else {
703 runLocal = 0;
704 runNow = 0;
705 }
706
707 if(!runLocal) {
708 // enqueue the tasklet
709 __add_pai_tasklet(tasklet);
710 }
711
712 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
713
714
715 if (runLocal /*&& runNow */) { // runNow == 1 is implied
716 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
717 __do_lit_tasklet(tasklet, 0ul);
718 }
719 else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
720 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
721 preempt(targetCPU); // need to be protected by cedf_lock?
722 }
723 else {
724 TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
725 }
726
727 return(1); // success
728}
729
730static void gsnedf_change_prio_pai_tasklet(struct task_struct *old_prio,
731 struct task_struct *new_prio)
732{
733 struct tasklet_struct* step;
734 unsigned long flags;
735
736 if(gsnedf_pending_tasklets.head != NULL) {
737 raw_spin_lock_irqsave(&gsnedf_lock, flags);
738 for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next) {
739 if(step->owner == old_prio) {
740 TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
741 step->owner = new_prio;
742 }
743 }
744 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
745 }
746}
747
748#endif // end PAI
749
750
383/* Getting schedule() right is a bit tricky. schedule() may not make any 751/* Getting schedule() right is a bit tricky. schedule() may not make any
384 * assumptions on the state of the current task since it may be called for a 752 * assumptions on the state of the current task since it may be called for a
385 * number of reasons. The reasons include a scheduler_tick() determined that it 753 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -404,7 +772,7 @@ static void gsnedf_tick(struct task_struct* t)
404static struct task_struct* gsnedf_schedule(struct task_struct * prev) 772static struct task_struct* gsnedf_schedule(struct task_struct * prev)
405{ 773{
406 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); 774 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
407 int out_of_time, sleep, preempt, np, exists, blocks; 775 int out_of_time, signal_budget, sleep, preempt, np, exists, blocks;
408 struct task_struct* next = NULL; 776 struct task_struct* next = NULL;
409 777
410#ifdef CONFIG_RELEASE_MASTER 778#ifdef CONFIG_RELEASE_MASTER
@@ -427,8 +795,13 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
427 /* (0) Determine state */ 795 /* (0) Determine state */
428 exists = entry->scheduled != NULL; 796 exists = entry->scheduled != NULL;
429 blocks = exists && !is_running(entry->scheduled); 797 blocks = exists && !is_running(entry->scheduled);
430 out_of_time = exists && budget_enforced(entry->scheduled) 798 out_of_time = exists &&
431 && budget_exhausted(entry->scheduled); 799 budget_enforced(entry->scheduled) &&
800 budget_exhausted(entry->scheduled);
801 signal_budget = exists &&
802 budget_signalled(entry->scheduled) &&
803 budget_exhausted(entry->scheduled) &&
804 !sigbudget_sent(entry->scheduled);
432 np = exists && is_np(entry->scheduled); 805 np = exists && is_np(entry->scheduled);
433 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; 806 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
434 preempt = entry->scheduled != entry->linked; 807 preempt = entry->scheduled != entry->linked;
@@ -437,21 +810,36 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
437 TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); 810 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
438#endif 811#endif
439 812
813 /*
440 if (exists) 814 if (exists)
441 TRACE_TASK(prev, 815 TRACE_TASK(prev,
442 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " 816 "blocks:%d out_of_time:%d signal_budget: %d np:%d sleep:%d preempt:%d "
443 "state:%d sig:%d\n", 817 "state:%d sig:%d\n",
444 blocks, out_of_time, np, sleep, preempt, 818 blocks, out_of_time, signal_budget, np, sleep, preempt,
445 prev->state, signal_pending(prev)); 819 prev->state, signal_pending(prev));
820 */
821
446 if (entry->linked && preempt) 822 if (entry->linked && preempt)
447 TRACE_TASK(prev, "will be preempted by %s/%d\n", 823 TRACE_TASK(prev, "will be preempted by %s/%d\n",
448 entry->linked->comm, entry->linked->pid); 824 entry->linked->comm, entry->linked->pid);
449 825
826 /* Send the signal that the budget has been exhausted */
827 if (signal_budget)
828 send_sigbudget(entry->scheduled);
450 829
451 /* If a task blocks we have no choice but to reschedule. 830 /* If a task blocks we have no choice but to reschedule.
452 */ 831 */
453 if (blocks) 832 if (blocks) {
454 unlink(entry->scheduled); 833 unlink(entry->scheduled);
834 }
835
836#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
837 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
838 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
839 stop_gpu_tracker(entry->scheduled);
840 }
841 }
842#endif
455 843
456 /* Request a sys_exit_np() call if we would like to preempt but cannot. 844 /* Request a sys_exit_np() call if we would like to preempt but cannot.
457 * We need to make sure to update the link structure anyway in case 845 * We need to make sure to update the link structure anyway in case
@@ -492,12 +880,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
492 entry->scheduled->rt_param.scheduled_on = NO_CPU; 880 entry->scheduled->rt_param.scheduled_on = NO_CPU;
493 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); 881 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
494 } 882 }
495 } else 883 }
884 else
885 {
496 /* Only override Linux scheduler if we have a real-time task 886 /* Only override Linux scheduler if we have a real-time task
497 * scheduled that needs to continue. 887 * scheduled that needs to continue.
498 */ 888 */
499 if (exists) 889 if (exists)
500 next = prev; 890 next = prev;
891 }
501 892
502 sched_state_task_picked(); 893 sched_state_task_picked();
503 894
@@ -524,6 +915,7 @@ static void gsnedf_finish_switch(struct task_struct *prev)
524 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); 915 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
525 916
526 entry->scheduled = is_realtime(current) ? current : NULL; 917 entry->scheduled = is_realtime(current) ? current : NULL;
918
527#ifdef WANT_ALL_SCHED_EVENTS 919#ifdef WANT_ALL_SCHED_EVENTS
528 TRACE_TASK(prev, "switched away from\n"); 920 TRACE_TASK(prev, "switched away from\n");
529#endif 921#endif
@@ -572,11 +964,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
572static void gsnedf_task_wake_up(struct task_struct *task) 964static void gsnedf_task_wake_up(struct task_struct *task)
573{ 965{
574 unsigned long flags; 966 unsigned long flags;
575 lt_t now; 967 //lt_t now;
576 968
577 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 969 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
578 970
579 raw_spin_lock_irqsave(&gsnedf_lock, flags); 971 raw_spin_lock_irqsave(&gsnedf_lock, flags);
972
973
974#if 0 // sporadic task model
580 /* We need to take suspensions because of semaphores into 975 /* We need to take suspensions because of semaphores into
581 * account! If a job resumes after being suspended due to acquiring 976 * account! If a job resumes after being suspended due to acquiring
582 * a semaphore, it should never be treated as a new job release. 977 * a semaphore, it should never be treated as a new job release.
@@ -598,19 +993,26 @@ static void gsnedf_task_wake_up(struct task_struct *task)
598 } 993 }
599 } 994 }
600 } 995 }
996#else // periodic task model
997 set_rt_flags(task, RT_F_RUNNING);
998#endif
999
601 gsnedf_job_arrival(task); 1000 gsnedf_job_arrival(task);
602 raw_spin_unlock_irqrestore(&gsnedf_lock, flags); 1001 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
603} 1002}
604 1003
605static void gsnedf_task_block(struct task_struct *t) 1004static void gsnedf_task_block(struct task_struct *t)
606{ 1005{
1006 // TODO: is this called on preemption??
607 unsigned long flags; 1007 unsigned long flags;
608 1008
609 TRACE_TASK(t, "block at %llu\n", litmus_clock()); 1009 TRACE_TASK(t, "block at %llu\n", litmus_clock());
610 1010
611 /* unlink if necessary */ 1011 /* unlink if necessary */
612 raw_spin_lock_irqsave(&gsnedf_lock, flags); 1012 raw_spin_lock_irqsave(&gsnedf_lock, flags);
1013
613 unlink(t); 1014 unlink(t);
1015
614 raw_spin_unlock_irqrestore(&gsnedf_lock, flags); 1016 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
615 1017
616 BUG_ON(!is_realtime(t)); 1018 BUG_ON(!is_realtime(t));
@@ -621,6 +1023,10 @@ static void gsnedf_task_exit(struct task_struct * t)
621{ 1023{
622 unsigned long flags; 1024 unsigned long flags;
623 1025
1026#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1027 gsnedf_change_prio_pai_tasklet(t, NULL);
1028#endif
1029
624 /* unlink if necessary */ 1030 /* unlink if necessary */
625 raw_spin_lock_irqsave(&gsnedf_lock, flags); 1031 raw_spin_lock_irqsave(&gsnedf_lock, flags);
626 unlink(t); 1032 unlink(t);
@@ -637,101 +1043,423 @@ static void gsnedf_task_exit(struct task_struct * t)
637 1043
638static long gsnedf_admit_task(struct task_struct* tsk) 1044static long gsnedf_admit_task(struct task_struct* tsk)
639{ 1045{
1046#ifdef CONFIG_LITMUS_NESTED_LOCKING
1047 INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
1048 edf_max_heap_base_priority_order);
1049#endif
1050
640 return 0; 1051 return 0;
641} 1052}
642 1053
1054
1055
1056
1057
1058
643#ifdef CONFIG_LITMUS_LOCKING 1059#ifdef CONFIG_LITMUS_LOCKING
644 1060
645#include <litmus/fdso.h> 1061#include <litmus/fdso.h>
646 1062
647/* called with IRQs off */ 1063/* called with IRQs off */
648static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) 1064static void __increase_priority_inheritance(struct task_struct* t,
1065 struct task_struct* prio_inh)
649{ 1066{
650 int linked_on; 1067 int linked_on;
651 int check_preempt = 0; 1068 int check_preempt = 0;
652 1069
1070#ifdef CONFIG_LITMUS_NESTED_LOCKING
1071 /* this sanity check allows for weaker locking in protocols */
1072 /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
1073 if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
1074#endif
1075 TRACE_TASK(t, "inherits priority from %s/%d\n",
1076 prio_inh->comm, prio_inh->pid);
1077 tsk_rt(t)->inh_task = prio_inh;
1078
1079 linked_on = tsk_rt(t)->linked_on;
1080
1081 /* If it is scheduled, then we need to reorder the CPU heap. */
1082 if (linked_on != NO_CPU) {
1083 TRACE_TASK(t, "%s: linked on %d\n",
1084 __FUNCTION__, linked_on);
1085 /* Holder is scheduled; need to re-order CPUs.
1086 * We can't use heap_decrease() here since
1087 * the cpu_heap is ordered in reverse direction, so
1088 * it is actually an increase. */
1089 binheap_delete(&gsnedf_cpus[linked_on]->hn, &gsnedf_cpu_heap);
1090 binheap_add(&gsnedf_cpus[linked_on]->hn,
1091 &gsnedf_cpu_heap, cpu_entry_t, hn);
1092 } else {
1093 /* holder may be queued: first stop queue changes */
1094 raw_spin_lock(&gsnedf.release_lock);
1095 if (is_queued(t)) {
1096 TRACE_TASK(t, "%s: is queued\n",
1097 __FUNCTION__);
1098 /* We need to update the position of holder in some
1099 * heap. Note that this could be a release heap if we
1100 * budget enforcement is used and this job overran. */
1101 check_preempt =
1102 !bheap_decrease(edf_ready_order,
1103 tsk_rt(t)->heap_node);
1104 } else {
1105 /* Nothing to do: if it is not queued and not linked
1106 * then it is either sleeping or currently being moved
1107 * by other code (e.g., a timer interrupt handler) that
1108 * will use the correct priority when enqueuing the
1109 * task. */
1110 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
1111 __FUNCTION__);
1112 }
1113 raw_spin_unlock(&gsnedf.release_lock);
1114
1115 /* If holder was enqueued in a release heap, then the following
1116 * preemption check is pointless, but we can't easily detect
1117 * that case. If you want to fix this, then consider that
1118 * simply adding a state flag requires O(n) time to update when
1119 * releasing n tasks, which conflicts with the goal to have
1120 * O(log n) merges. */
1121 if (check_preempt) {
1122 /* heap_decrease() hit the top level of the heap: make
1123 * sure preemption checks get the right task, not the
1124 * potentially stale cache. */
1125 bheap_uncache_min(edf_ready_order,
1126 &gsnedf.ready_queue);
1127 check_for_preemptions();
1128 }
1129 }
1130#ifdef CONFIG_LITMUS_NESTED_LOCKING
1131 }
1132 else {
1133 TRACE_TASK(t, "Spurious invalid priority increase. "
1134 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1135 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1136 t->comm, t->pid,
1137 effective_priority(t)->comm, effective_priority(t)->pid,
1138 (prio_inh) ? prio_inh->comm : "nil",
1139 (prio_inh) ? prio_inh->pid : -1);
1140 WARN_ON(!prio_inh);
1141 }
1142#endif
1143}
1144
1145/* called with IRQs off */
1146static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
1147{
653 raw_spin_lock(&gsnedf_lock); 1148 raw_spin_lock(&gsnedf_lock);
654 1149
655 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); 1150 __increase_priority_inheritance(t, prio_inh);
656 tsk_rt(t)->inh_task = prio_inh; 1151
657 1152#ifdef CONFIG_LITMUS_SOFTIRQD
658 linked_on = tsk_rt(t)->linked_on; 1153 if(tsk_rt(t)->cur_klitirqd != NULL)
659 1154 {
660 /* If it is scheduled, then we need to reorder the CPU heap. */ 1155 TRACE_TASK(t, "%s/%d inherits a new priority!\n",
661 if (linked_on != NO_CPU) { 1156 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
662 TRACE_TASK(t, "%s: linked on %d\n", 1157
663 __FUNCTION__, linked_on); 1158 __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
664 /* Holder is scheduled; need to re-order CPUs. 1159 }
665 * We can't use heap_decrease() here since 1160#endif
666 * the cpu_heap is ordered in reverse direction, so 1161
667 * it is actually an increase. */ 1162 raw_spin_unlock(&gsnedf_lock);
668 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, 1163
669 gsnedf_cpus[linked_on]->hn); 1164#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
670 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, 1165 if(tsk_rt(t)->held_gpus) {
671 gsnedf_cpus[linked_on]->hn); 1166 int i;
672 } else { 1167 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
673 /* holder may be queued: first stop queue changes */ 1168 i < NV_DEVICE_NUM;
674 raw_spin_lock(&gsnedf.release_lock); 1169 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
675 if (is_queued(t)) { 1170 pai_check_priority_increase(t, i);
676 TRACE_TASK(t, "%s: is queued\n", 1171 }
677 __FUNCTION__); 1172 }
678 /* We need to update the position of holder in some 1173#endif
679 * heap. Note that this could be a release heap if we 1174}
680 * budget enforcement is used and this job overran. */ 1175
681 check_preempt = 1176
682 !bheap_decrease(edf_ready_order, 1177/* called with IRQs off */
683 tsk_rt(t)->heap_node); 1178static void __decrease_priority_inheritance(struct task_struct* t,
684 } else { 1179 struct task_struct* prio_inh)
685 /* Nothing to do: if it is not queued and not linked 1180{
686 * then it is either sleeping or currently being moved 1181#ifdef CONFIG_LITMUS_NESTED_LOCKING
687 * by other code (e.g., a timer interrupt handler) that 1182 if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
688 * will use the correct priority when enqueuing the 1183#endif
689 * task. */ 1184 /* A job only stops inheriting a priority when it releases a
690 TRACE_TASK(t, "%s: is NOT queued => Done.\n", 1185 * resource. Thus we can make the following assumption.*/
691 __FUNCTION__); 1186 if(prio_inh)
1187 TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
1188 prio_inh->comm, prio_inh->pid);
1189 else
1190 TRACE_TASK(t, "base priority restored.\n");
1191
1192 tsk_rt(t)->inh_task = prio_inh;
1193
1194 if(tsk_rt(t)->scheduled_on != NO_CPU) {
1195 TRACE_TASK(t, "is scheduled.\n");
1196
1197 /* Check if rescheduling is necessary. We can't use heap_decrease()
1198 * since the priority was effectively lowered. */
1199 unlink(t);
1200 gsnedf_job_arrival(t);
692 } 1201 }
693 raw_spin_unlock(&gsnedf.release_lock); 1202 else {
694 1203 /* task is queued */
695 /* If holder was enqueued in a release heap, then the following 1204 raw_spin_lock(&gsnedf.release_lock);
696 * preemption check is pointless, but we can't easily detect 1205 if (is_queued(t)) {
697 * that case. If you want to fix this, then consider that 1206 TRACE_TASK(t, "is queued.\n");
698 * simply adding a state flag requires O(n) time to update when 1207
699 * releasing n tasks, which conflicts with the goal to have 1208 /* decrease in priority, so we have to re-add to binomial heap */
700 * O(log n) merges. */ 1209 unlink(t);
701 if (check_preempt) { 1210 gsnedf_job_arrival(t);
702 /* heap_decrease() hit the top level of the heap: make 1211 }
703 * sure preemption checks get the right task, not the 1212 else {
704 * potentially stale cache. */ 1213 TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
705 bheap_uncache_min(edf_ready_order, 1214 }
706 &gsnedf.ready_queue); 1215 raw_spin_unlock(&gsnedf.release_lock);
707 check_for_preemptions();
708 } 1216 }
1217#ifdef CONFIG_LITMUS_NESTED_LOCKING
1218 }
1219 else {
1220 TRACE_TASK(t, "Spurious invalid priority decrease. "
1221 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1222 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1223 t->comm, t->pid,
1224 effective_priority(t)->comm, effective_priority(t)->pid,
1225 (prio_inh) ? prio_inh->comm : "nil",
1226 (prio_inh) ? prio_inh->pid : -1);
709 } 1227 }
1228#endif
1229}
1230
1231static void decrease_priority_inheritance(struct task_struct* t,
1232 struct task_struct* prio_inh)
1233{
1234 raw_spin_lock(&gsnedf_lock);
1235 __decrease_priority_inheritance(t, prio_inh);
1236
1237#ifdef CONFIG_LITMUS_SOFTIRQD
1238 if(tsk_rt(t)->cur_klitirqd != NULL)
1239 {
1240 TRACE_TASK(t, "%s/%d decreases in priority!\n",
1241 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1242
1243 __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
1244 }
1245#endif
710 1246
711 raw_spin_unlock(&gsnedf_lock); 1247 raw_spin_unlock(&gsnedf_lock);
1248
1249#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1250 if(tsk_rt(t)->held_gpus) {
1251 int i;
1252 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1253 i < NV_DEVICE_NUM;
1254 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1255 pai_check_priority_decrease(t, i);
1256 }
1257 }
1258#endif
712} 1259}
713 1260
1261
1262#ifdef CONFIG_LITMUS_SOFTIRQD
714/* called with IRQs off */ 1263/* called with IRQs off */
715static void clear_priority_inheritance(struct task_struct* t) 1264static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1265 struct task_struct* old_owner,
1266 struct task_struct* new_owner)
716{ 1267{
1268 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1269
717 raw_spin_lock(&gsnedf_lock); 1270 raw_spin_lock(&gsnedf_lock);
718 1271
719 /* A job only stops inheriting a priority when it releases a 1272 if(old_owner != new_owner)
720 * resource. Thus we can make the following assumption.*/ 1273 {
721 BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); 1274 if(old_owner)
1275 {
1276 // unreachable?
1277 tsk_rt(old_owner)->cur_klitirqd = NULL;
1278 }
722 1279
723 TRACE_TASK(t, "priority restored\n"); 1280 TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
724 tsk_rt(t)->inh_task = NULL; 1281 new_owner->comm, new_owner->pid);
725 1282
726 /* Check if rescheduling is necessary. We can't use heap_decrease() 1283 tsk_rt(new_owner)->cur_klitirqd = klitirqd;
727 * since the priority was effectively lowered. */ 1284 }
728 unlink(t); 1285
729 gsnedf_job_arrival(t); 1286 __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio.
1287
1288 __increase_priority_inheritance(klitirqd,
1289 (tsk_rt(new_owner)->inh_task == NULL) ?
1290 new_owner :
1291 tsk_rt(new_owner)->inh_task);
730 1292
731 raw_spin_unlock(&gsnedf_lock); 1293 raw_spin_unlock(&gsnedf_lock);
732} 1294}
733 1295
734 1296
1297/* called with IRQs off */
1298static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1299 struct task_struct* old_owner,
1300 struct task_struct* new_owner)
1301{
1302 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1303
1304 raw_spin_lock(&gsnedf_lock);
1305
1306 TRACE_TASK(klitirqd, "priority restored\n");
1307
1308 __decrease_priority_inheritance(klitirqd, new_owner);
1309
1310 tsk_rt(old_owner)->cur_klitirqd = NULL;
1311
1312 raw_spin_unlock(&gsnedf_lock);
1313}
1314#endif
1315
1316
1317
1318
1319#ifdef CONFIG_LITMUS_NESTED_LOCKING
1320
1321/* called with IRQs off */
1322/* preconditions:
1323 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1324 (2) The lock 'to_unlock' is held.
1325 */
1326static void nested_increase_priority_inheritance(struct task_struct* t,
1327 struct task_struct* prio_inh,
1328 raw_spinlock_t *to_unlock,
1329 unsigned long irqflags)
1330{
1331 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1332
1333 if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
1334 increase_priority_inheritance(t, prio_inh); // increase our prio.
1335 }
1336
1337 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1338
1339
1340 if(blocked_lock) {
1341 if(blocked_lock->ops->propagate_increase_inheritance) {
1342 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1343 blocked_lock->ident);
1344
1345 // beware: recursion
1346 blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
1347 t, to_unlock,
1348 irqflags);
1349 }
1350 else {
1351 TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
1352 blocked_lock->ident);
1353 unlock_fine_irqrestore(to_unlock, irqflags);
1354 }
1355 }
1356 else {
1357 TRACE_TASK(t, "is not blocked. No propagation.\n");
1358 unlock_fine_irqrestore(to_unlock, irqflags);
1359 }
1360}
1361
1362/* called with IRQs off */
1363/* preconditions:
1364 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1365 (2) The lock 'to_unlock' is held.
1366 */
1367static void nested_decrease_priority_inheritance(struct task_struct* t,
1368 struct task_struct* prio_inh,
1369 raw_spinlock_t *to_unlock,
1370 unsigned long irqflags)
1371{
1372 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1373 decrease_priority_inheritance(t, prio_inh);
1374
1375 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1376
1377 if(blocked_lock) {
1378 if(blocked_lock->ops->propagate_decrease_inheritance) {
1379 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1380 blocked_lock->ident);
1381
1382 // beware: recursion
1383 blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
1384 to_unlock,
1385 irqflags);
1386 }
1387 else {
1388 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
1389 blocked_lock);
1390 unlock_fine_irqrestore(to_unlock, irqflags);
1391 }
1392 }
1393 else {
1394 TRACE_TASK(t, "is not blocked. No propagation.\n");
1395 unlock_fine_irqrestore(to_unlock, irqflags);
1396 }
1397}
1398
1399
1400/* ******************** RSM MUTEX ********************** */
1401
1402static struct litmus_lock_ops gsnedf_rsm_mutex_lock_ops = {
1403 .lock = rsm_mutex_lock,
1404 .unlock = rsm_mutex_unlock,
1405 .close = rsm_mutex_close,
1406 .deallocate = rsm_mutex_free,
1407
1408 .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
1409 .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
1410
1411#ifdef CONFIG_LITMUS_DGL_SUPPORT
1412 .dgl_lock = rsm_mutex_dgl_lock,
1413 .is_owner = rsm_mutex_is_owner,
1414 .enable_priority = rsm_mutex_enable_priority,
1415#endif
1416};
1417
1418static struct litmus_lock* gsnedf_new_rsm_mutex(void)
1419{
1420 return rsm_mutex_new(&gsnedf_rsm_mutex_lock_ops);
1421}
1422
1423/* ******************** IKGLP ********************** */
1424
1425static struct litmus_lock_ops gsnedf_ikglp_lock_ops = {
1426 .lock = ikglp_lock,
1427 .unlock = ikglp_unlock,
1428 .close = ikglp_close,
1429 .deallocate = ikglp_free,
1430
1431 // ikglp can only be an outer-most lock.
1432 .propagate_increase_inheritance = NULL,
1433 .propagate_decrease_inheritance = NULL,
1434};
1435
1436static struct litmus_lock* gsnedf_new_ikglp(void* __user arg)
1437{
1438 return ikglp_new(num_online_cpus(), &gsnedf_ikglp_lock_ops, arg);
1439}
1440
1441#endif /* CONFIG_LITMUS_NESTED_LOCKING */
1442
1443
1444/* ******************** KFMLP support ********************** */
1445
1446static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = {
1447 .lock = kfmlp_lock,
1448 .unlock = kfmlp_unlock,
1449 .close = kfmlp_close,
1450 .deallocate = kfmlp_free,
1451
1452 // kfmlp can only be an outer-most lock.
1453 .propagate_increase_inheritance = NULL,
1454 .propagate_decrease_inheritance = NULL,
1455};
1456
1457
1458static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg)
1459{
1460 return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg);
1461}
1462
735/* ******************** FMLP support ********************** */ 1463/* ******************** FMLP support ********************** */
736 1464
737/* struct for semaphore with priority inheritance */ 1465/* struct for semaphore with priority inheritance */
@@ -797,7 +1525,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
797 if (edf_higher_prio(t, sem->hp_waiter)) { 1525 if (edf_higher_prio(t, sem->hp_waiter)) {
798 sem->hp_waiter = t; 1526 sem->hp_waiter = t;
799 if (edf_higher_prio(t, sem->owner)) 1527 if (edf_higher_prio(t, sem->owner))
800 set_priority_inheritance(sem->owner, sem->hp_waiter); 1528 increase_priority_inheritance(sem->owner, sem->hp_waiter);
801 } 1529 }
802 1530
803 TS_LOCK_SUSPEND; 1531 TS_LOCK_SUSPEND;
@@ -865,7 +1593,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
865 /* Well, if next is not the highest-priority waiter, 1593 /* Well, if next is not the highest-priority waiter,
866 * then it ought to inherit the highest-priority 1594 * then it ought to inherit the highest-priority
867 * waiter's priority. */ 1595 * waiter's priority. */
868 set_priority_inheritance(next, sem->hp_waiter); 1596 increase_priority_inheritance(next, sem->hp_waiter);
869 } 1597 }
870 1598
871 /* wake up next */ 1599 /* wake up next */
@@ -876,7 +1604,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
876 1604
877 /* we lose the benefit of priority inheritance (if any) */ 1605 /* we lose the benefit of priority inheritance (if any) */
878 if (tsk_rt(t)->inh_task) 1606 if (tsk_rt(t)->inh_task)
879 clear_priority_inheritance(t); 1607 decrease_priority_inheritance(t, NULL);
880 1608
881out: 1609out:
882 spin_unlock_irqrestore(&sem->wait.lock, flags); 1610 spin_unlock_irqrestore(&sem->wait.lock, flags);
@@ -914,6 +1642,11 @@ static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
914 .lock = gsnedf_fmlp_lock, 1642 .lock = gsnedf_fmlp_lock,
915 .unlock = gsnedf_fmlp_unlock, 1643 .unlock = gsnedf_fmlp_unlock,
916 .deallocate = gsnedf_fmlp_free, 1644 .deallocate = gsnedf_fmlp_free,
1645
1646#ifdef CONFIG_LITMUS_NESTED_LOCKING
1647 .propagate_increase_inheritance = NULL,
1648 .propagate_decrease_inheritance = NULL
1649#endif
917}; 1650};
918 1651
919static struct litmus_lock* gsnedf_new_fmlp(void) 1652static struct litmus_lock* gsnedf_new_fmlp(void)
@@ -932,47 +1665,121 @@ static struct litmus_lock* gsnedf_new_fmlp(void)
932 return &sem->litmus_lock; 1665 return &sem->litmus_lock;
933} 1666}
934 1667
935/* **** lock constructor **** */
936
937 1668
938static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, 1669static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
939 void* __user unused) 1670 void* __user args)
940{ 1671{
941 int err = -ENXIO; 1672 int err;
942 1673
943 /* GSN-EDF currently only supports the FMLP for global resources. */
944 switch (type) { 1674 switch (type) {
945 1675
946 case FMLP_SEM: 1676 case FMLP_SEM:
947 /* Flexible Multiprocessor Locking Protocol */ 1677 /* Flexible Multiprocessor Locking Protocol */
948 *lock = gsnedf_new_fmlp(); 1678 *lock = gsnedf_new_fmlp();
949 if (*lock) 1679 break;
950 err = 0; 1680#ifdef CONFIG_LITMUS_NESTED_LOCKING
951 else 1681 case RSM_MUTEX:
952 err = -ENOMEM; 1682 *lock = gsnedf_new_rsm_mutex();
953 break; 1683 break;
954 1684
1685 case IKGLP_SEM:
1686 *lock = gsnedf_new_ikglp(args);
1687 break;
1688#endif
1689 case KFMLP_SEM:
1690 *lock = gsnedf_new_kfmlp(args);
1691 break;
1692 default:
1693 err = -ENXIO;
1694 goto UNSUPPORTED_LOCK;
955 }; 1695 };
956 1696
1697 if (*lock)
1698 err = 0;
1699 else
1700 err = -ENOMEM;
1701
1702UNSUPPORTED_LOCK:
957 return err; 1703 return err;
958} 1704}
959 1705
1706#endif // CONFIG_LITMUS_LOCKING
1707
1708
1709
1710
1711
1712#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1713static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = {
1714 .close = kfmlp_aff_obs_close,
1715 .deallocate = kfmlp_aff_obs_free,
1716};
1717
1718#ifdef CONFIG_LITMUS_NESTED_LOCKING
1719static struct affinity_observer_ops gsnedf_ikglp_affinity_ops = {
1720 .close = ikglp_aff_obs_close,
1721 .deallocate = ikglp_aff_obs_free,
1722};
960#endif 1723#endif
961 1724
1725static long gsnedf_allocate_affinity_observer(
1726 struct affinity_observer **aff_obs,
1727 int type,
1728 void* __user args)
1729{
1730 int err;
1731
1732 switch (type) {
1733
1734 case KFMLP_SIMPLE_GPU_AFF_OBS:
1735 *aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
1736 break;
1737
1738 case KFMLP_GPU_AFF_OBS:
1739 *aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
1740 break;
1741
1742#ifdef CONFIG_LITMUS_NESTED_LOCKING
1743 case IKGLP_SIMPLE_GPU_AFF_OBS:
1744 *aff_obs = ikglp_simple_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
1745 break;
1746
1747 case IKGLP_GPU_AFF_OBS:
1748 *aff_obs = ikglp_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
1749 break;
1750#endif
1751 default:
1752 err = -ENXIO;
1753 goto UNSUPPORTED_AFF_OBS;
1754 };
1755
1756 if (*aff_obs)
1757 err = 0;
1758 else
1759 err = -ENOMEM;
1760
1761UNSUPPORTED_AFF_OBS:
1762 return err;
1763}
1764#endif
1765
1766
1767
1768
962 1769
963static long gsnedf_activate_plugin(void) 1770static long gsnedf_activate_plugin(void)
964{ 1771{
965 int cpu; 1772 int cpu;
966 cpu_entry_t *entry; 1773 cpu_entry_t *entry;
967 1774
968 bheap_init(&gsnedf_cpu_heap); 1775 INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
969#ifdef CONFIG_RELEASE_MASTER 1776#ifdef CONFIG_RELEASE_MASTER
970 gsnedf.release_master = atomic_read(&release_master_cpu); 1777 gsnedf.release_master = atomic_read(&release_master_cpu);
971#endif 1778#endif
972 1779
973 for_each_online_cpu(cpu) { 1780 for_each_online_cpu(cpu) {
974 entry = &per_cpu(gsnedf_cpu_entries, cpu); 1781 entry = &per_cpu(gsnedf_cpu_entries, cpu);
975 bheap_node_init(&entry->hn, entry); 1782 INIT_BINHEAP_NODE(&entry->hn);
976 entry->linked = NULL; 1783 entry->linked = NULL;
977 entry->scheduled = NULL; 1784 entry->scheduled = NULL;
978#ifdef CONFIG_RELEASE_MASTER 1785#ifdef CONFIG_RELEASE_MASTER
@@ -986,6 +1793,20 @@ static long gsnedf_activate_plugin(void)
986 } 1793 }
987#endif 1794#endif
988 } 1795 }
1796
1797#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1798 gsnedf_pending_tasklets.head = NULL;
1799 gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
1800#endif
1801
1802#ifdef CONFIG_LITMUS_SOFTIRQD
1803 spawn_klitirqd(NULL);
1804#endif
1805
1806#ifdef CONFIG_LITMUS_NVIDIA
1807 init_nvidia_info();
1808#endif
1809
989 return 0; 1810 return 0;
990} 1811}
991 1812
@@ -1002,8 +1823,31 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
1002 .task_block = gsnedf_task_block, 1823 .task_block = gsnedf_task_block,
1003 .admit_task = gsnedf_admit_task, 1824 .admit_task = gsnedf_admit_task,
1004 .activate_plugin = gsnedf_activate_plugin, 1825 .activate_plugin = gsnedf_activate_plugin,
1826 .compare = edf_higher_prio,
1005#ifdef CONFIG_LITMUS_LOCKING 1827#ifdef CONFIG_LITMUS_LOCKING
1006 .allocate_lock = gsnedf_allocate_lock, 1828 .allocate_lock = gsnedf_allocate_lock,
1829 .increase_prio = increase_priority_inheritance,
1830 .decrease_prio = decrease_priority_inheritance,
1831#endif
1832#ifdef CONFIG_LITMUS_NESTED_LOCKING
1833 .nested_increase_prio = nested_increase_priority_inheritance,
1834 .nested_decrease_prio = nested_decrease_priority_inheritance,
1835 .__compare = __edf_higher_prio,
1836#endif
1837#ifdef CONFIG_LITMUS_DGL_SUPPORT
1838 .get_dgl_spinlock = gsnedf_get_dgl_spinlock,
1839#endif
1840#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1841 .allocate_aff_obs = gsnedf_allocate_affinity_observer,
1842#endif
1843#ifdef CONFIG_LITMUS_SOFTIRQD
1844 .increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
1845 .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
1846#endif
1847#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1848 .enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet,
1849 .change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet,
1850 .run_tasklets = gsnedf_run_tasklets,
1007#endif 1851#endif
1008}; 1852};
1009 1853
@@ -1013,15 +1857,20 @@ static int __init init_gsn_edf(void)
1013 int cpu; 1857 int cpu;
1014 cpu_entry_t *entry; 1858 cpu_entry_t *entry;
1015 1859
1016 bheap_init(&gsnedf_cpu_heap); 1860 INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
1017 /* initialize CPU state */ 1861 /* initialize CPU state */
1018 for (cpu = 0; cpu < NR_CPUS; cpu++) { 1862 for (cpu = 0; cpu < NR_CPUS; ++cpu) {
1019 entry = &per_cpu(gsnedf_cpu_entries, cpu); 1863 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1020 gsnedf_cpus[cpu] = entry; 1864 gsnedf_cpus[cpu] = entry;
1021 entry->cpu = cpu; 1865 entry->cpu = cpu;
1022 entry->hn = &gsnedf_heap_node[cpu]; 1866
1023 bheap_node_init(&entry->hn, entry); 1867 INIT_BINHEAP_NODE(&entry->hn);
1024 } 1868 }
1869
1870#ifdef CONFIG_LITMUS_DGL_SUPPORT
1871 raw_spin_lock_init(&dgl_lock);
1872#endif
1873
1025 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); 1874 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
1026 return register_sched_plugin(&gsn_edf_plugin); 1875 return register_sched_plugin(&gsn_edf_plugin);
1027} 1876}
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index 5a15ce938984..9a6fe487718e 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
103 } 103 }
104#ifdef __ARCH_WANT_UNLOCKED_CTXSW 104#ifdef __ARCH_WANT_UNLOCKED_CTXSW
105 if (next->oncpu) 105 if (next->oncpu)
106 {
106 TRACE_TASK(next, "waiting for !oncpu"); 107 TRACE_TASK(next, "waiting for !oncpu");
108 }
107 while (next->oncpu) { 109 while (next->oncpu) {
108 cpu_relax(); 110 cpu_relax();
109 mb(); 111 mb();
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
index 62be699629b1..6129eb94d3ea 100644
--- a/litmus/sched_pfp.c
+++ b/litmus/sched_pfp.c
@@ -135,17 +135,25 @@ static void pfp_tick(struct task_struct *t)
135 */ 135 */
136 BUG_ON(is_realtime(t) && t != pfp->scheduled); 136 BUG_ON(is_realtime(t) && t != pfp->scheduled);
137 137
138 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { 138 if (is_realtime(t) && budget_exhausted(t))
139 if (!is_np(t)) { 139 {
140 litmus_reschedule_local(); 140 if (budget_signalled(t) && !sigbudget_sent(t)) {
141 TRACE("pfp_scheduler_tick: " 141 /* signal exhaustion */
142 "%d is preemptable " 142 send_sigbudget(t);
143 " => FORCE_RESCHED\n", t->pid); 143 }
144 } else if (is_user_np(t)) { 144
145 TRACE("pfp_scheduler_tick: " 145 if (budget_enforced(t)) {
146 "%d is non-preemptable, " 146 if (!is_np(t)) {
147 "preemption delayed.\n", t->pid); 147 litmus_reschedule_local();
148 request_exit_np(t); 148 TRACE("pfp_scheduler_tick: "
149 "%d is preemptable "
150 " => FORCE_RESCHED\n", t->pid);
151 } else if (is_user_np(t)) {
152 TRACE("pfp_scheduler_tick: "
153 "%d is non-preemptable, "
154 "preemption delayed.\n", t->pid);
155 request_exit_np(t);
156 }
149 } 157 }
150 } 158 }
151} 159}
@@ -155,7 +163,7 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
155 pfp_domain_t* pfp = local_pfp; 163 pfp_domain_t* pfp = local_pfp;
156 struct task_struct* next; 164 struct task_struct* next;
157 165
158 int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate; 166 int out_of_time, signal_budget, sleep, preempt, np, exists, blocks, resched, migrate;
159 167
160 raw_spin_lock(&pfp->slock); 168 raw_spin_lock(&pfp->slock);
161 169
@@ -172,6 +180,10 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
172 out_of_time = exists && 180 out_of_time = exists &&
173 budget_enforced(pfp->scheduled) && 181 budget_enforced(pfp->scheduled) &&
174 budget_exhausted(pfp->scheduled); 182 budget_exhausted(pfp->scheduled);
183 signal_budget = exists &&
184 budget_signalled(pfp->scheduled) &&
185 budget_exhausted(pfp->scheduled) &&
186 !sigbudget_sent(pfp->scheduled);
175 np = exists && is_np(pfp->scheduled); 187 np = exists && is_np(pfp->scheduled);
176 sleep = exists && get_rt_flags(pfp->scheduled) == RT_F_SLEEP; 188 sleep = exists && get_rt_flags(pfp->scheduled) == RT_F_SLEEP;
177 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu; 189 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
@@ -183,6 +195,10 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
183 */ 195 */
184 resched = preempt; 196 resched = preempt;
185 197
198 /* Send the signal that the budget has been exhausted */
199 if (signal_budget)
200 send_sigbudget(pfp->scheduled);
201
186 /* If a task blocks we have no choice but to reschedule. 202 /* If a task blocks we have no choice but to reschedule.
187 */ 203 */
188 if (blocks) 204 if (blocks)
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 00a1900d6457..245e41c25a5d 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -13,6 +13,10 @@
13#include <litmus/preempt.h> 13#include <litmus/preempt.h>
14#include <litmus/jobs.h> 14#include <litmus/jobs.h>
15 15
16#ifdef CONFIG_LITMUS_NVIDIA
17#include <litmus/nvidia_info.h>
18#endif
19
16/* 20/*
17 * Generic function to trigger preemption on either local or remote cpu 21 * Generic function to trigger preemption on either local or remote cpu
18 * from scheduler plugins. The key feature is that this function is 22 * from scheduler plugins. The key feature is that this function is
@@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
102 106
103static long litmus_dummy_activate_plugin(void) 107static long litmus_dummy_activate_plugin(void)
104{ 108{
109#ifdef CONFIG_LITMUS_NVIDIA
110 shutdown_nvidia_info();
111#endif
105 return 0; 112 return 0;
106} 113}
107 114
@@ -110,14 +117,93 @@ static long litmus_dummy_deactivate_plugin(void)
110 return 0; 117 return 0;
111} 118}
112 119
113#ifdef CONFIG_LITMUS_LOCKING 120static int litmus_dummy_compare(struct task_struct* a, struct task_struct* b)
121{
122 TRACE_CUR("WARNING: Dummy compare function called!\n");
123 return 0;
124}
114 125
126#ifdef CONFIG_LITMUS_LOCKING
115static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, 127static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
116 void* __user config) 128 void* __user config)
117{ 129{
118 return -ENXIO; 130 return -ENXIO;
119} 131}
120 132
133static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct* prio_inh)
134{
135}
136
137static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh)
138{
139}
140#endif
141
142#ifdef CONFIG_LITMUS_SOFTIRQD
143static void litmus_dummy_increase_prio_klitirq(struct task_struct* klitirqd,
144 struct task_struct* old_owner,
145 struct task_struct* new_owner)
146{
147}
148
149static void litmus_dummy_decrease_prio_klitirqd(struct task_struct* klitirqd,
150 struct task_struct* old_owner)
151{
152}
153#endif
154
155#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
156static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
157{
158 TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
159 return(0); // failure.
160}
161
162static void litmus_dummy_change_prio_pai_tasklet(struct task_struct *old_prio,
163 struct task_struct *new_prio)
164{
165 TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
166}
167
168static void litmus_dummy_run_tasklets(struct task_struct* t)
169{
170 //TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
171}
172#endif
173
174#ifdef CONFIG_LITMUS_NESTED_LOCKING
175static void litmus_dummy_nested_increase_prio(struct task_struct* t, struct task_struct* prio_inh,
176 raw_spinlock_t *to_unlock, unsigned long irqflags)
177{
178}
179
180static void litmus_dummy_nested_decrease_prio(struct task_struct* t, struct task_struct* prio_inh,
181 raw_spinlock_t *to_unlock, unsigned long irqflags)
182{
183}
184
185static int litmus_dummy___compare(struct task_struct* a, comparison_mode_t a_mod,
186 struct task_struct* b, comparison_mode_t b_mode)
187{
188 TRACE_CUR("WARNING: Dummy compare function called!\n");
189 return 0;
190}
191#endif
192
193#ifdef CONFIG_LITMUS_DGL_SUPPORT
194static raw_spinlock_t* litmus_dummy_get_dgl_spinlock(struct task_struct *t)
195{
196 return NULL;
197}
198#endif
199
200#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
201static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs,
202 int type,
203 void* __user config)
204{
205 return -ENXIO;
206}
121#endif 207#endif
122 208
123 209
@@ -136,9 +222,33 @@ struct sched_plugin linux_sched_plugin = {
136 .finish_switch = litmus_dummy_finish_switch, 222 .finish_switch = litmus_dummy_finish_switch,
137 .activate_plugin = litmus_dummy_activate_plugin, 223 .activate_plugin = litmus_dummy_activate_plugin,
138 .deactivate_plugin = litmus_dummy_deactivate_plugin, 224 .deactivate_plugin = litmus_dummy_deactivate_plugin,
225 .compare = litmus_dummy_compare,
139#ifdef CONFIG_LITMUS_LOCKING 226#ifdef CONFIG_LITMUS_LOCKING
140 .allocate_lock = litmus_dummy_allocate_lock, 227 .allocate_lock = litmus_dummy_allocate_lock,
228 .increase_prio = litmus_dummy_increase_prio,
229 .decrease_prio = litmus_dummy_decrease_prio,
230#endif
231#ifdef CONFIG_LITMUS_NESTED_LOCKING
232 .nested_increase_prio = litmus_dummy_nested_increase_prio,
233 .nested_decrease_prio = litmus_dummy_nested_decrease_prio,
234 .__compare = litmus_dummy___compare,
235#endif
236#ifdef CONFIG_LITMUS_SOFTIRQD
237 .increase_prio_klitirqd = litmus_dummy_increase_prio_klitirqd,
238 .decrease_prio_klitirqd = litmus_dummy_decrease_prio_klitirqd,
239#endif
240#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
241 .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
242 .change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet,
243 .run_tasklets = litmus_dummy_run_tasklets,
244#endif
245#ifdef CONFIG_LITMUS_DGL_SUPPORT
246 .get_dgl_spinlock = litmus_dummy_get_dgl_spinlock,
141#endif 247#endif
248#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
249 .allocate_aff_obs = litmus_dummy_allocate_aff_obs,
250#endif
251
142 .admit_task = litmus_dummy_admit_task 252 .admit_task = litmus_dummy_admit_task
143}; 253};
144 254
@@ -174,8 +284,31 @@ int register_sched_plugin(struct sched_plugin* plugin)
174 CHECK(complete_job); 284 CHECK(complete_job);
175 CHECK(activate_plugin); 285 CHECK(activate_plugin);
176 CHECK(deactivate_plugin); 286 CHECK(deactivate_plugin);
287 CHECK(compare);
177#ifdef CONFIG_LITMUS_LOCKING 288#ifdef CONFIG_LITMUS_LOCKING
178 CHECK(allocate_lock); 289 CHECK(allocate_lock);
290 CHECK(increase_prio);
291 CHECK(decrease_prio);
292#endif
293#ifdef CONFIG_LITMUS_NESTED_LOCKING
294 CHECK(nested_increase_prio);
295 CHECK(nested_decrease_prio);
296 CHECK(__compare);
297#endif
298#ifdef CONFIG_LITMUS_SOFTIRQD
299 CHECK(increase_prio_klitirqd);
300 CHECK(decrease_prio_klitirqd);
301#endif
302#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
303 CHECK(enqueue_pai_tasklet);
304 CHECK(change_prio_pai_tasklet);
305 CHECK(run_tasklets);
306#endif
307#ifdef CONFIG_LITMUS_DGL_SUPPORT
308 CHECK(get_dgl_spinlock);
309#endif
310#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
311 CHECK(allocate_aff_obs);
179#endif 312#endif
180 CHECK(admit_task); 313 CHECK(admit_task);
181 314
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
index b0c8126bd44a..a5fda133bad9 100644
--- a/litmus/sched_psn_edf.c
+++ b/litmus/sched_psn_edf.c
@@ -169,17 +169,25 @@ static void psnedf_tick(struct task_struct *t)
169 */ 169 */
170 BUG_ON(is_realtime(t) && t != pedf->scheduled); 170 BUG_ON(is_realtime(t) && t != pedf->scheduled);
171 171
172 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { 172 if (is_realtime(t) && budget_exhausted(t))
173 if (!is_np(t)) { 173 {
174 litmus_reschedule_local(); 174 if (budget_signalled(t) && !sigbudget_sent(t)) {
175 TRACE("psnedf_scheduler_tick: " 175 /* signal exhaustion */
176 "%d is preemptable " 176 send_sigbudget(t);
177 " => FORCE_RESCHED\n", t->pid); 177 }
178 } else if (is_user_np(t)) { 178
179 TRACE("psnedf_scheduler_tick: " 179 if (budget_enforced(t)) {
180 "%d is non-preemptable, " 180 if (!is_np(t)) {
181 "preemption delayed.\n", t->pid); 181 litmus_reschedule_local();
182 request_exit_np(t); 182 TRACE("psnedf_scheduler_tick: "
183 "%d is preemptable "
184 " => FORCE_RESCHED\n", t->pid);
185 } else if (is_user_np(t)) {
186 TRACE("psnedf_scheduler_tick: "
187 "%d is non-preemptable, "
188 "preemption delayed.\n", t->pid);
189 request_exit_np(t);
190 }
183 } 191 }
184 } 192 }
185} 193}
@@ -190,8 +198,7 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
190 rt_domain_t* edf = &pedf->domain; 198 rt_domain_t* edf = &pedf->domain;
191 struct task_struct* next; 199 struct task_struct* next;
192 200
193 int out_of_time, sleep, preempt, 201 int out_of_time, signal_budget, sleep, preempt, np, exists, blocks, resched;
194 np, exists, blocks, resched;
195 202
196 raw_spin_lock(&pedf->slock); 203 raw_spin_lock(&pedf->slock);
197 204
@@ -208,6 +215,10 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
208 out_of_time = exists && 215 out_of_time = exists &&
209 budget_enforced(pedf->scheduled) && 216 budget_enforced(pedf->scheduled) &&
210 budget_exhausted(pedf->scheduled); 217 budget_exhausted(pedf->scheduled);
218 signal_budget = exists &&
219 budget_signalled(pedf->scheduled) &&
220 budget_exhausted(pedf->scheduled) &&
221 !sigbudget_sent(pedf->scheduled);
211 np = exists && is_np(pedf->scheduled); 222 np = exists && is_np(pedf->scheduled);
212 sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP; 223 sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
213 preempt = edf_preemption_needed(edf, prev); 224 preempt = edf_preemption_needed(edf, prev);
@@ -218,6 +229,10 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
218 */ 229 */
219 resched = preempt; 230 resched = preempt;
220 231
232 /* Send the signal that the budget has been exhausted */
233 if (signal_budget)
234 send_sigbudget(pedf->scheduled);
235
221 /* If a task blocks we have no choice but to reschedule. 236 /* If a task blocks we have no choice but to reschedule.
222 */ 237 */
223 if (blocks) 238 if (blocks)
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 5ef8d09ab41f..f7f575346b54 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -7,6 +7,7 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/percpu.h> 9#include <linux/percpu.h>
10#include <linux/hardirq.h>
10 11
11#include <litmus/ftdev.h> 12#include <litmus/ftdev.h>
12#include <litmus/litmus.h> 13#include <litmus/litmus.h>
@@ -16,13 +17,13 @@
16#include <litmus/ftdev.h> 17#include <litmus/ftdev.h>
17 18
18 19
19#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) 20#define NUM_EVENTS (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11))
20 21
21#define now() litmus_clock() 22#define now() litmus_clock()
22 23
23struct local_buffer { 24struct local_buffer {
24 struct st_event_record record[NO_EVENTS]; 25 struct st_event_record record[NUM_EVENTS];
25 char flag[NO_EVENTS]; 26 char flag[NUM_EVENTS];
26 struct ft_buffer ftbuf; 27 struct ft_buffer ftbuf;
27}; 28};
28 29
@@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void)
41 int i, ok = 0, err; 42 int i, ok = 0, err;
42 printk("Allocated %u sched_trace_xxx() events per CPU " 43 printk("Allocated %u sched_trace_xxx() events per CPU "
43 "(buffer size: %d bytes)\n", 44 "(buffer size: %d bytes)\n",
44 NO_EVENTS, (int) sizeof(struct local_buffer)); 45 NUM_EVENTS, (int) sizeof(struct local_buffer));
45 46
46 err = ftdev_init(&st_dev, THIS_MODULE, 47 err = ftdev_init(&st_dev, THIS_MODULE,
47 num_online_cpus(), "sched_trace"); 48 num_online_cpus(), "sched_trace");
@@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void)
50 51
51 for (i = 0; i < st_dev.minor_cnt; i++) { 52 for (i = 0; i < st_dev.minor_cnt; i++) {
52 buf = &per_cpu(st_event_buffer, i); 53 buf = &per_cpu(st_event_buffer, i);
53 ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, 54 ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS,
54 sizeof(struct st_event_record), 55 sizeof(struct st_event_record),
55 buf->flag, 56 buf->flag,
56 buf->record); 57 buf->record);
@@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id,
154{ 155{
155 struct task_struct *t = (struct task_struct*) _task; 156 struct task_struct *t = (struct task_struct*) _task;
156 struct st_event_record* rec; 157 struct st_event_record* rec;
157 if (is_realtime(t)) { 158 //if (is_realtime(t)) /* comment out to trace EVERYTHING */
159 {
158 rec = get_record(ST_SWITCH_TO, t); 160 rec = get_record(ST_SWITCH_TO, t);
159 if (rec) { 161 if (rec) {
160 rec->data.switch_to.when = now(); 162 rec->data.switch_to.when = now();
@@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id,
169{ 171{
170 struct task_struct *t = (struct task_struct*) _task; 172 struct task_struct *t = (struct task_struct*) _task;
171 struct st_event_record* rec; 173 struct st_event_record* rec;
172 if (is_realtime(t)) { 174 //if (is_realtime(t)) /* comment out to trace EVERYTHING */
175 {
173 rec = get_record(ST_SWITCH_AWAY, t); 176 rec = get_record(ST_SWITCH_AWAY, t);
174 if (rec) { 177 if (rec) {
175 rec->data.switch_away.when = now(); 178 rec->data.switch_away.when = now();
@@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
188 if (rec) { 191 if (rec) {
189 rec->data.completion.when = now(); 192 rec->data.completion.when = now();
190 rec->data.completion.forced = forced; 193 rec->data.completion.forced = forced;
194#ifdef LITMUS_NVIDIA
195 rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
196#endif
191 put_record(rec); 197 put_record(rec);
192 } 198 }
193} 199}
@@ -239,3 +245,265 @@ feather_callback void do_sched_trace_action(unsigned long id,
239 put_record(rec); 245 put_record(rec);
240 } 246 }
241} 247}
248
249
250
251
252feather_callback void do_sched_trace_prediction_err(unsigned long id,
253 unsigned long _task,
254 unsigned long _distance,
255 unsigned long _rel_err)
256{
257 struct task_struct *t = (struct task_struct*) _task;
258 struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
259
260 if (rec) {
261 gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
262 fp_t* rel_err = (fp_t*) _rel_err;
263
264 rec->data.prediction_err.distance = *distance;
265 rec->data.prediction_err.rel_err = rel_err->val;
266 put_record(rec);
267 }
268}
269
270
271feather_callback void do_sched_trace_migration(unsigned long id,
272 unsigned long _task,
273 unsigned long _mig_info)
274{
275 struct task_struct *t = (struct task_struct*) _task;
276 struct st_event_record *rec = get_record(ST_MIGRATION, t);
277
278 if (rec) {
279 struct migration_info* mig_info = (struct migration_info*) _mig_info;
280
281 rec->hdr.extra = mig_info->distance;
282 rec->data.migration.observed = mig_info->observed;
283 rec->data.migration.estimated = mig_info->estimated;
284
285 put_record(rec);
286 }
287}
288
289
290
291
292
293
294
295
296
297feather_callback void do_sched_trace_tasklet_release(unsigned long id,
298 unsigned long _owner)
299{
300 struct task_struct *t = (struct task_struct*) _owner;
301 struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t);
302
303 if (rec) {
304 rec->data.tasklet_release.when = now();
305 put_record(rec);
306 }
307}
308
309
310feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
311 unsigned long _owner)
312{
313 struct task_struct *t = (struct task_struct*) _owner;
314 struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t);
315
316 if (rec) {
317 rec->data.tasklet_begin.when = now();
318
319 if(!in_interrupt())
320 rec->data.tasklet_begin.exe_pid = current->pid;
321 else
322 rec->data.tasklet_begin.exe_pid = 0;
323
324 put_record(rec);
325 }
326}
327EXPORT_SYMBOL(do_sched_trace_tasklet_begin);
328
329
330feather_callback void do_sched_trace_tasklet_end(unsigned long id,
331 unsigned long _owner,
332 unsigned long _flushed)
333{
334 struct task_struct *t = (struct task_struct*) _owner;
335 struct st_event_record *rec = get_record(ST_TASKLET_END, t);
336
337 if (rec) {
338 rec->data.tasklet_end.when = now();
339 rec->data.tasklet_end.flushed = _flushed;
340
341 if(!in_interrupt())
342 rec->data.tasklet_end.exe_pid = current->pid;
343 else
344 rec->data.tasklet_end.exe_pid = 0;
345
346 put_record(rec);
347 }
348}
349EXPORT_SYMBOL(do_sched_trace_tasklet_end);
350
351
352feather_callback void do_sched_trace_work_release(unsigned long id,
353 unsigned long _owner)
354{
355 struct task_struct *t = (struct task_struct*) _owner;
356 struct st_event_record *rec = get_record(ST_WORK_RELEASE, t);
357
358 if (rec) {
359 rec->data.work_release.when = now();
360 put_record(rec);
361 }
362}
363
364
365feather_callback void do_sched_trace_work_begin(unsigned long id,
366 unsigned long _owner,
367 unsigned long _exe)
368{
369 struct task_struct *t = (struct task_struct*) _owner;
370 struct st_event_record *rec = get_record(ST_WORK_BEGIN, t);
371
372 if (rec) {
373 struct task_struct *exe = (struct task_struct*) _exe;
374 rec->data.work_begin.exe_pid = exe->pid;
375 rec->data.work_begin.when = now();
376 put_record(rec);
377 }
378}
379EXPORT_SYMBOL(do_sched_trace_work_begin);
380
381
382feather_callback void do_sched_trace_work_end(unsigned long id,
383 unsigned long _owner,
384 unsigned long _exe,
385 unsigned long _flushed)
386{
387 struct task_struct *t = (struct task_struct*) _owner;
388 struct st_event_record *rec = get_record(ST_WORK_END, t);
389
390 if (rec) {
391 struct task_struct *exe = (struct task_struct*) _exe;
392 rec->data.work_end.exe_pid = exe->pid;
393 rec->data.work_end.flushed = _flushed;
394 rec->data.work_end.when = now();
395 put_record(rec);
396 }
397}
398EXPORT_SYMBOL(do_sched_trace_work_end);
399
400
401feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
402 unsigned long _task,
403 unsigned long _inh)
404{
405 struct task_struct *t = (struct task_struct*) _task;
406 struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t);
407
408 if (rec) {
409 struct task_struct *inh = (struct task_struct*) _inh;
410 rec->data.effective_priority_change.when = now();
411 rec->data.effective_priority_change.inh_pid = (inh != NULL) ?
412 inh->pid :
413 0xffff;
414
415 put_record(rec);
416 }
417}
418
419/* pray for no nesting of nv interrupts on same CPU... */
420struct tracing_interrupt_map
421{
422 int active;
423 int count;
424 unsigned long data[128]; // assume nesting less than 128...
425 unsigned long serial[128];
426};
427DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
428
429
430DEFINE_PER_CPU(u32, intCounter);
431
432feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
433 unsigned long _device)
434{
435 struct st_event_record *rec;
436 u32 serialNum;
437
438 {
439 u32* serial;
440 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
441 if(!int_map->active == 0xcafebabe)
442 {
443 int_map->count++;
444 }
445 else
446 {
447 int_map->active = 0xcafebabe;
448 int_map->count = 1;
449 }
450 //int_map->data[int_map->count-1] = _device;
451
452 serial = &per_cpu(intCounter, smp_processor_id());
453 *serial += num_online_cpus();
454 serialNum = *serial;
455 int_map->serial[int_map->count-1] = serialNum;
456 }
457
458 rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
459 if(rec) {
460 u32 device = _device;
461 rec->data.nv_interrupt_begin.when = now();
462 rec->data.nv_interrupt_begin.device = device;
463 rec->data.nv_interrupt_begin.serialNumber = serialNum;
464 put_record(rec);
465 }
466}
467EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin);
468
469/*
470int is_interrupt_tracing_active(void)
471{
472 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
473 if(int_map->active == 0xcafebabe)
474 return 1;
475 return 0;
476}
477*/
478
479feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device)
480{
481 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
482 if(int_map->active == 0xcafebabe)
483 {
484 struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL);
485
486 int_map->count--;
487 if(int_map->count == 0)
488 int_map->active = 0;
489
490 if(rec) {
491 u32 device = _device;
492 rec->data.nv_interrupt_end.when = now();
493 //rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
494 rec->data.nv_interrupt_end.device = device;
495 rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count];
496 put_record(rec);
497 }
498 }
499}
500EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
501
502
503
504
505
506
507
508
509
diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
new file mode 100644
index 000000000000..cf8e1d78aa77
--- /dev/null
+++ b/litmus/sched_trace_external.c
@@ -0,0 +1,64 @@
1#include <linux/module.h>
2
3#include <litmus/trace.h>
4#include <litmus/sched_trace.h>
5#include <litmus/litmus.h>
6
7void __sched_trace_tasklet_begin_external(struct task_struct* t)
8{
9 sched_trace_tasklet_begin(t);
10}
11EXPORT_SYMBOL(__sched_trace_tasklet_begin_external);
12
13void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
14{
15 sched_trace_tasklet_end(t, flushed);
16}
17EXPORT_SYMBOL(__sched_trace_tasklet_end_external);
18
19
20
21void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
22{
23 sched_trace_work_begin(t, e);
24}
25EXPORT_SYMBOL(__sched_trace_work_begin_external);
26
27void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
28{
29 sched_trace_work_end(t, e, f);
30}
31EXPORT_SYMBOL(__sched_trace_work_end_external);
32
33
34
35void __sched_trace_nv_interrupt_begin_external(u32 device)
36{
37 //unsigned long _device = device;
38 sched_trace_nv_interrupt_begin((unsigned long)device);
39}
40EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
41
42void __sched_trace_nv_interrupt_end_external(u32 device)
43{
44 //unsigned long _device = device;
45 sched_trace_nv_interrupt_end((unsigned long)device);
46}
47EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
48
49
50#ifdef CONFIG_LITMUS_NVIDIA
51
52#define EXX_TS(evt) \
53void __##evt(void) { evt; } \
54EXPORT_SYMBOL(__##evt);
55
56EXX_TS(TS_NV_TOPISR_START)
57EXX_TS(TS_NV_TOPISR_END)
58EXX_TS(TS_NV_BOTISR_START)
59EXX_TS(TS_NV_BOTISR_END)
60EXX_TS(TS_NV_RELEASE_BOTISR_START)
61EXX_TS(TS_NV_RELEASE_BOTISR_END)
62
63#endif
64