aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-01-11 14:37:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-01-11 14:37:13 -0500
commit5d7dcfa10ea0dd283773a301e3ce610a7797d582 (patch)
tree7f7a57ac940e7fe1f538cdd771a954d4fb28f8c0
parent3d5537c160c1484e8d562b9828baf679cc53f67a (diff)
PAI implementation, C-RM, C-FIFO.
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/litmus/fifo_common.h25
-rw-r--r--include/litmus/litmus.h4
-rw-r--r--include/litmus/nvidia_info.h3
-rw-r--r--include/litmus/rm_common.h25
-rw-r--r--include/litmus/rm_srt_common.h25
-rw-r--r--include/litmus/sched_plugin.h11
-rw-r--r--include/litmus/sched_trace.h8
-rw-r--r--include/litmus/sched_trace_external.h22
-rw-r--r--include/litmus/trace.h14
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/workqueue.c2
-rw-r--r--litmus/Kconfig52
-rw-r--r--litmus/Makefile3
-rw-r--r--litmus/edf_common.c39
-rw-r--r--litmus/fifo_common.c124
-rw-r--r--litmus/litmus_pai_softirq.c64
-rw-r--r--litmus/litmus_softirq.c2
-rw-r--r--litmus/nvidia_info.c24
-rw-r--r--litmus/rm_common.c160
-rw-r--r--litmus/rm_srt_common.c167
-rw-r--r--litmus/sched_cedf.c229
-rw-r--r--litmus/sched_cfifo.c1611
-rw-r--r--litmus/sched_crm.c1611
-rw-r--r--litmus/sched_crm_srt.c1611
-rw-r--r--litmus/sched_gsn_edf.c10
-rw-r--r--litmus/sched_plugin.c11
-rw-r--r--litmus/sched_task_trace.c26
-rw-r--r--litmus/sched_trace_external.c23
30 files changed, 5874 insertions, 42 deletions
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5d22f5342376..a2f2880d5517 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -460,7 +460,7 @@ struct tasklet_struct
460 void (*func)(unsigned long); 460 void (*func)(unsigned long);
461 unsigned long data; 461 unsigned long data;
462 462
463#ifdef CONFIG_LITMUS_SOFTIRQD 463#if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD)
464 struct task_struct *owner; 464 struct task_struct *owner;
465#endif 465#endif
466}; 466};
diff --git a/include/litmus/fifo_common.h b/include/litmus/fifo_common.h
new file mode 100644
index 000000000000..12cfbfea41ee
--- /dev/null
+++ b/include/litmus/fifo_common.h
@@ -0,0 +1,25 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_FIFO_COMMON_H__
11#define __UNC_FIFO_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int fifo_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int fifo_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 3df242bf272f..829c1c5ab91f 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -118,7 +118,9 @@ static inline lt_t litmus_clock(void)
118#define earlier_release(a, b) (lt_before(\ 118#define earlier_release(a, b) (lt_before(\
119 (a)->rt_param.job_params.release,\ 119 (a)->rt_param.job_params.release,\
120 (b)->rt_param.job_params.release)) 120 (b)->rt_param.job_params.release))
121 121#define shorter_period(a, b) (lt_before(\
122 (a)->rt_param.task_params.period,\
123 (b)->rt_param.task_params.period))
122void preempt_if_preemptable(struct task_struct* t, int on_cpu); 124void preempt_if_preemptable(struct task_struct* t, int on_cpu);
123 125
124#ifdef CONFIG_LITMUS_LOCKING 126#ifdef CONFIG_LITMUS_LOCKING
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
index 579301d77cf5..9e07a27fdee3 100644
--- a/include/litmus/nvidia_info.h
+++ b/include/litmus/nvidia_info.h
@@ -7,7 +7,8 @@
7#include <litmus/litmus_softirq.h> 7#include <litmus/litmus_softirq.h>
8 8
9 9
10#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD 10//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
11#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
11 12
12int init_nvidia_info(void); 13int init_nvidia_info(void);
13 14
diff --git a/include/litmus/rm_common.h b/include/litmus/rm_common.h
new file mode 100644
index 000000000000..5991b0b4e758
--- /dev/null
+++ b/include/litmus/rm_common.h
@@ -0,0 +1,25 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_RM_COMMON_H__
11#define __UNC_RM_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int rm_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int rm_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/rm_srt_common.h b/include/litmus/rm_srt_common.h
new file mode 100644
index 000000000000..78aa287327a2
--- /dev/null
+++ b/include/litmus/rm_srt_common.h
@@ -0,0 +1,25 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_RM_SRT_COMMON_H__
11#define __UNC_RM_SRT_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void rm_srt_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int rm_srt_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int rm_srt_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int rm_srt_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index df50930d14a0..12a9ab65a673 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -11,6 +11,10 @@
11#include <litmus/locking.h> 11#include <litmus/locking.h>
12#endif 12#endif
13 13
14#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
15#include <linux/interrupt.h>
16#endif
17
14/************************ setup/tear down ********************/ 18/************************ setup/tear down ********************/
15 19
16typedef long (*activate_plugin_t) (void); 20typedef long (*activate_plugin_t) (void);
@@ -69,6 +73,9 @@ typedef void (*set_prio_inh_klitirq_t)(struct task_struct* klitirqd,
69typedef void (*clear_prio_inh_klitirqd_t)(struct task_struct* klitirqd, 73typedef void (*clear_prio_inh_klitirqd_t)(struct task_struct* klitirqd,
70 struct task_struct* old_owner); 74 struct task_struct* old_owner);
71 75
76
77typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet);
78
72/********************* sys call backends ********************/ 79/********************* sys call backends ********************/
73/* This function causes the caller to sleep until the next release */ 80/* This function causes the caller to sleep until the next release */
74typedef long (*complete_job_t) (void); 81typedef long (*complete_job_t) (void);
@@ -115,6 +122,10 @@ struct sched_plugin {
115 set_prio_inh_klitirq_t set_prio_inh_klitirqd; 122 set_prio_inh_klitirq_t set_prio_inh_klitirqd;
116 clear_prio_inh_klitirqd_t clear_prio_inh_klitirqd; 123 clear_prio_inh_klitirqd_t clear_prio_inh_klitirqd;
117#endif 124#endif
125
126#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
127 enqueue_pai_tasklet_t enqueue_pai_tasklet;
128#endif
118} __attribute__ ((__aligned__(SMP_CACHE_BYTES))); 129} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
119 130
120 131
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 1486c778aff8..232c7588d103 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -127,13 +127,13 @@ struct st_effective_priority_change_data {
127struct st_nv_interrupt_begin_data { 127struct st_nv_interrupt_begin_data {
128 u64 when; 128 u64 when;
129 u32 device; 129 u32 device;
130 u8 __unused[4]; 130 u32 serialNumber;
131} __attribute__((packed)); 131} __attribute__((packed));
132 132
133struct st_nv_interrupt_end_data { 133struct st_nv_interrupt_end_data {
134 u64 when; 134 u64 when;
135 u32 device; 135 u32 device;
136 u8 __unused[4]; 136 u32 serialNumber;
137} __attribute__((packed)); 137} __attribute__((packed));
138 138
139#define DATA(x) struct st_ ## x ## _data x; 139#define DATA(x) struct st_ ## x ## _data x;
@@ -328,8 +328,8 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
328 328
329#define sched_trace_nv_interrupt_begin(d) \ 329#define sched_trace_nv_interrupt_begin(d) \
330 SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d) 330 SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d)
331#define sched_trace_nv_interrupt_end() \ 331#define sched_trace_nv_interrupt_end(d) \
332 SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, 0ul) 332 SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
333 333
334#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ 334#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
335 335
diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h
index c2c872639880..90424d5c564c 100644
--- a/include/litmus/sched_trace_external.h
+++ b/include/litmus/sched_trace_external.h
@@ -34,9 +34,25 @@ static inline void sched_trace_nv_interrupt_begin_external(u32 device)
34 __sched_trace_nv_interrupt_begin_external(device); 34 __sched_trace_nv_interrupt_begin_external(device);
35} 35}
36 36
37extern void __sched_trace_nv_interrupt_end_external(void); 37extern void __sched_trace_nv_interrupt_end_external(u32 device);
38static inline void sched_trace_nv_interrupt_end_external(void) 38static inline void sched_trace_nv_interrupt_end_external(u32 device)
39{ 39{
40 __sched_trace_nv_interrupt_end_external(); 40 __sched_trace_nv_interrupt_end_external(device);
41} 41}
42
43#ifdef CONFIG_LITMUS_NVIDIA
44
45#define EX_TS(evt) \
46extern void __##evt(void); \
47static inline void EX_##evt(void) { __##evt(); }
48
49EX_TS(TS_NV_TOPISR_START)
50EX_TS(TS_NV_TOPISR_END)
51EX_TS(TS_NV_BOTISR_START)
52EX_TS(TS_NV_BOTISR_END)
53EX_TS(TS_NV_RELEASE_BOTISR_START)
54EX_TS(TS_NV_RELEASE_BOTISR_END)
55
56#endif
57
42#endif 58#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index 05f487263f28..aa3ee4a6757b 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -100,4 +100,18 @@ feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu)
100#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) 100#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN)
101 101
102 102
103
104#ifdef CONFIG_LITMUS_NVIDIA
105
106#define TS_NV_TOPISR_START TIMESTAMP(200)
107#define TS_NV_TOPISR_END TIMESTAMP(201)
108
109#define TS_NV_BOTISR_START TIMESTAMP(202)
110#define TS_NV_BOTISR_END TIMESTAMP(203)
111
112#define TS_NV_RELEASE_BOTISR_START TIMESTAMP(204)
113#define TS_NV_RELEASE_BOTISR_END TIMESTAMP(205)
114
115#endif
116
103#endif /* !_SYS_TRACE_H_ */ 117#endif /* !_SYS_TRACE_H_ */
diff --git a/kernel/sched.c b/kernel/sched.c
index 3162605ffc91..3aa2be09122b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3897,8 +3897,10 @@ need_resched_nonpreemptible:
3897 if (need_resched()) 3897 if (need_resched())
3898 goto need_resched; 3898 goto need_resched;
3899 3899
3900#ifdef LITMUS_SOFTIRQD
3900 reacquire_klitirqd_lock(prev); 3901 reacquire_klitirqd_lock(prev);
3901 3902#endif
3903
3902 srp_ceiling_block(); 3904 srp_ceiling_block();
3903} 3905}
3904EXPORT_SYMBOL(schedule); 3906EXPORT_SYMBOL(schedule);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index be4b8fab3637..ae77c5c1d17e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -35,6 +35,7 @@
35 35
36#ifdef CONFIG_LITMUS_NVIDIA 36#ifdef CONFIG_LITMUS_NVIDIA
37#include <litmus/nvidia_info.h> 37#include <litmus/nvidia_info.h>
38#include <litmus/trace.h>
38#endif 39#endif
39 40
40/* 41/*
@@ -441,6 +442,9 @@ void __tasklet_schedule(struct tasklet_struct *t)
441 if(likely(_litmus_tasklet_schedule(t,nvidia_device))) 442 if(likely(_litmus_tasklet_schedule(t,nvidia_device)))
442 { 443 {
443 unlock_nv_registry(nvidia_device, &flags); 444 unlock_nv_registry(nvidia_device, &flags);
445
446 TS_NV_RELEASE_BOTISR_END;
447
444 return; 448 return;
445 } 449 }
446 else 450 else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8139208eaee1..637cadac2627 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2531,7 +2531,7 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
2531 */ 2531 */
2532int schedule_work(struct work_struct *work) 2532int schedule_work(struct work_struct *work)
2533{ 2533{
2534#ifdef CONFIG_LITMUS_NVIDIA 2534#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
2535 if(is_nvidia_func(work->func)) 2535 if(is_nvidia_func(work->func))
2536 { 2536 {
2537 u32 nvidiaDevice = get_work_nv_device_num(work); 2537 u32 nvidiaDevice = get_work_nv_device_num(work);
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 7e865d4dd703..5109cf7db7f6 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -218,18 +218,41 @@ config LITMUS_THREAD_ALL_SOFTIRQ
218 218
219 If unsure, say No. 219 If unsure, say No.
220 220
221
222choice
223 prompt "Scheduling of interrupt bottom-halves in Litmus."
224 default LITMUS_SOFTIRQD_NONE
225 depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
226 help
227 Schedule tasklets with known priorities in Litmus.
228
229config LITMUS_SOFTIRQD_NONE
230 bool "No tasklet scheduling in Litmus."
231 help
232 Don't schedule tasklets in Litmus. Default.
233
221config LITMUS_SOFTIRQD 234config LITMUS_SOFTIRQD
222 bool "Spawn klitirqd interrupt handling threads." 235 bool "Spawn klitirqd interrupt handling threads."
223 depends on LITMUS_LOCKING 236 help
224 default n 237 Create klitirqd interrupt handling threads. Work must be
225 help 238 specifically dispatched to these workers. (Softirqs for
226 Create klitirqd interrupt handling threads. Work must be 239 Litmus tasks are not magically redirected to klitirqd.)
227 specifically dispatched to these workers. (Softirqs for
228 Litmus tasks are not magically redirected to klitirqd.)
229 240
230 G-EDF ONLY for now! 241 G-EDF/RM, C-EDF/RM ONLY for now!
231 242
232 If unsure, say No. 243
244config LITMUS_PAI_SOFTIRQD
245 bool "Defer tasklets to context switch points."
246 help
247 Only execute scheduled tasklet bottom halves at
248 scheduling points. Trades context switch overhead
249 at the cost of non-preemptive durations of bottom half
250 processing.
251
252 G-EDF/RM, C-EDF/RM ONLY for now!
253
254endchoice
255
233 256
234config NR_LITMUS_SOFTIRQD 257config NR_LITMUS_SOFTIRQD
235 int "Number of klitirqd." 258 int "Number of klitirqd."
@@ -241,13 +264,22 @@ config NR_LITMUS_SOFTIRQD
241 264
242config LITMUS_NVIDIA 265config LITMUS_NVIDIA
243 bool "Litmus handling of NVIDIA interrupts." 266 bool "Litmus handling of NVIDIA interrupts."
244 depends on LITMUS_SOFTIRQD 267 depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
245 default n 268 default n
246 help 269 help
247 Direct tasklets from NVIDIA devices to Litmus's klitirqd. 270 Direct tasklets from NVIDIA devices to Litmus's klitirqd.
248 271
249 If unsure, say No. 272 If unsure, say No.
250 273
274config NV_DEVICE_NUM
275 int "Number of NVIDIA GPUs."
276 depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
277 range 1 4096
278 default "1"
279 help
280 Should be (<= to the number of CPUs) and
281 (<= to the number of GPUs) in your system.
282
251choice 283choice
252 prompt "CUDA/Driver Version Support" 284 prompt "CUDA/Driver Version Support"
253 default CUDA_4_0 285 default CUDA_4_0
diff --git a/litmus/Makefile b/litmus/Makefile
index 892e01c2e1b3..869939e2270c 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -19,7 +19,7 @@ obj-y = sched_plugin.o litmus.o \
19 sched_gsn_edf.o \ 19 sched_gsn_edf.o \
20 sched_psn_edf.o 20 sched_psn_edf.o
21 21
22obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 22obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o sched_cfifo.o fifo_common.o sched_crm.o rm_common.o sched_crm_srt.o rm_srt_common.o
23obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 23obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
24obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o 24obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
25 25
@@ -29,4 +29,5 @@ obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
29obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o 29obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
30 30
31obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o 31obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
32obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
32obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o 33obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index fbd67ab5f467..0a06d7a26c00 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -63,7 +63,45 @@ int edf_higher_prio(struct task_struct* first,
63 63
64#endif 64#endif
65 65
66 if (!is_realtime(second_task))
67 return true;
68
69 if (earlier_deadline(first_task, second_task))
70 return true;
71
72 if (get_deadline(first_task) == get_deadline(second_task))
73 {
74 if (shorter_period(first_task, second_task))
75 {
76 return true;
77 }
78 if (get_rt_period(first_task) == get_rt_period(second_task))
79 {
80#ifdef CONFIG_LITMUS_SOFTIRQD
81 if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread)
82 {
83 return true;
84 }
85 if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread)
86 {
87#endif
88 if (first_task->pid < second_task->pid)
89 {
90 return true;
91 }
92 if (first_task->pid == second_task->pid)
93 {
94 return !second->rt_param.inh_task;
95 }
96#ifdef CONFIG_LITMUS_SOFTIRQD
97 }
98#endif
99 }
100 }
101
102 return false;
66 103
104#if 0
67 return !is_realtime(second_task) || 105 return !is_realtime(second_task) ||
68 106
69#ifdef CONFIG_LITMUS_SOFTIRQD 107#ifdef CONFIG_LITMUS_SOFTIRQD
@@ -88,6 +126,7 @@ int edf_higher_prio(struct task_struct* first,
88 */ 126 */
89 (first_task->pid == second_task->pid && 127 (first_task->pid == second_task->pid &&
90 !second->rt_param.inh_task))); 128 !second->rt_param.inh_task)));
129#endif
91} 130}
92 131
93int edf_ready_order(struct bheap_node* a, struct bheap_node* b) 132int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
diff --git a/litmus/fifo_common.c b/litmus/fifo_common.c
new file mode 100644
index 000000000000..c94510a171d9
--- /dev/null
+++ b/litmus/fifo_common.c
@@ -0,0 +1,124 @@
1/*
2 * kernel/fifo_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/fifo_common.h>
16
17/* fifo_higher_prio - returns true if first has a higher EDF priority
18 * than second. Deadline ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int fifo_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (first && first == second) {
30 TRACE_TASK(first,
31 "WARNING: pointless edf priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40#ifdef CONFIG_LITMUS_LOCKING
41
42 /* Check for inherited priorities. Change task
43 * used for comparison in such a case.
44 */
45 if (unlikely(first->rt_param.inh_task))
46 first_task = first->rt_param.inh_task;
47 if (unlikely(second->rt_param.inh_task))
48 second_task = second->rt_param.inh_task;
49
50 /* Check for priority boosting. Tie-break by start of boosting.
51 */
52 if (unlikely(is_priority_boosted(first_task))) {
53 /* first_task is boosted, how about second_task? */
54 if (!is_priority_boosted(second_task) ||
55 lt_before(get_boost_start(first_task),
56 get_boost_start(second_task)))
57 return 1;
58 else
59 return 0;
60 } else if (unlikely(is_priority_boosted(second_task)))
61 /* second_task is boosted, first is not*/
62 return 0;
63
64#endif
65
66
67 return !is_realtime(second_task) ||
68
69#ifdef CONFIG_LITMUS_SOFTIRQD
70 /* proxy threads always lose w/o inheritance. */
71 (first_task->rt_param.is_proxy_thread <
72 second_task->rt_param.is_proxy_thread) ||
73#endif
74
75 /* is the deadline of the first task earlier?
76 * Then it has higher priority.
77 */
78 earlier_release(first_task, second_task) ||
79
80 /* Do we have a deadline tie?
81 * Then break by PID.
82 */
83 (get_release(first_task) == get_release(second_task) &&
84 (first_task->pid < second_task->pid ||
85
86 /* If the PIDs are the same then the task with the inherited
87 * priority wins.
88 */
89 (first_task->pid == second_task->pid &&
90 !second->rt_param.inh_task)));
91}
92
93int fifo_ready_order(struct bheap_node* a, struct bheap_node* b)
94{
95 return fifo_higher_prio(bheap2task(a), bheap2task(b));
96}
97
98void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
99 release_jobs_t release)
100{
101 rt_domain_init(rt, fifo_ready_order, resched, release);
102}
103
104/* need_to_preempt - check whether the task t needs to be preempted
105 * call only with irqs disabled and with ready_lock acquired
106 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
107 */
108int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t)
109{
110 /* we need the read lock for fifo_ready_queue */
111 /* no need to preempt if there is nothing pending */
112 if (!__jobs_pending(rt))
113 return 0;
114 /* we need to reschedule if t doesn't exist */
115 if (!t)
116 return 1;
117
118 /* NOTE: We cannot check for non-preemptibility since we
119 * don't know what address space we're currently in.
120 */
121
122 /* make sure to get non-rt stuff out of the way */
123 return !is_realtime(t) || fifo_higher_prio(__next_ready(rt), t);
124}
diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c
new file mode 100644
index 000000000000..b31eeb8a2538
--- /dev/null
+++ b/litmus/litmus_pai_softirq.c
@@ -0,0 +1,64 @@
1#include <linux/interrupt.h>
2#include <linux/percpu.h>
3#include <linux/cpu.h>
4#include <linux/kthread.h>
5#include <linux/ftrace.h>
6#include <linux/smp.h>
7#include <linux/slab.h>
8#include <linux/mutex.h>
9
10#include <linux/sched.h>
11#include <linux/cpuset.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_trace.h>
15#include <litmus/jobs.h>
16#include <litmus/sched_plugin.h>
17#include <litmus/litmus_softirq.h>
18
19
20
21int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
22{
23 int ret = 0; /* assume failure */
24 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
25 {
26 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
27 BUG();
28 }
29
30 ret = litmus->enqueue_pai_tasklet(t);
31
32 return(ret);
33}
34
35EXPORT_SYMBOL(__litmus_tasklet_schedule);
36
37
38
39// failure causes default Linux handling.
40int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
41{
42 int ret = 0; /* assume failure */
43 return(ret);
44}
45EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
46
47
48// failure causes default Linux handling.
49int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
50{
51 int ret = 0; /* assume failure */
52 return(ret);
53}
54EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
55
56
57// failure causes default Linux handling.
58int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
59{
60 int ret = 0; /* assume failure */
61 return(ret);
62}
63EXPORT_SYMBOL(__litmus_schedule_work);
64
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
index 271e770dbaea..f5cca964b6c6 100644
--- a/litmus/litmus_softirq.c
+++ b/litmus/litmus_softirq.c
@@ -1166,7 +1166,7 @@ int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
1166 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); 1166 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
1167 BUG(); 1167 BUG();
1168 } 1168 }
1169 1169
1170 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) 1170 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1171 { 1171 {
1172 TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); 1172 TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
index 78f035244d21..d17152138c63 100644
--- a/litmus/nvidia_info.c
+++ b/litmus/nvidia_info.c
@@ -361,6 +361,7 @@ int get_nv_device_id(struct task_struct* owner)
361 361
362static int __reg_nv_device(int reg_device_id) 362static int __reg_nv_device(int reg_device_id)
363{ 363{
364 int ret = 0;
364 struct task_struct* old = 365 struct task_struct* old =
365 cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner, 366 cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner,
366 NULL, 367 NULL,
@@ -370,16 +371,21 @@ static int __reg_nv_device(int reg_device_id)
370 371
371 if(likely(old == NULL)) 372 if(likely(old == NULL))
372 { 373 {
374#ifdef CONFIG_LITMUS_SOFTIRQD
373 down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); 375 down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
376#endif
374 TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); 377 TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
375 return(0);
376 } 378 }
377 else 379 else
378 { 380 {
379 TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); 381 TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
380 return(-EBUSY); 382 ret = -EBUSY;
381 } 383 }
382 384
385 return(ret);
386
387
388
383#if 0 389#if 0
384 //unsigned long flags; 390 //unsigned long flags;
385 //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags); 391 //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags);
@@ -411,19 +417,22 @@ static int __reg_nv_device(int reg_device_id)
411 417
412static int __clear_reg_nv_device(int de_reg_device_id) 418static int __clear_reg_nv_device(int de_reg_device_id)
413{ 419{
414 int ret; 420 int ret = 0;
415 unsigned long flags;
416 struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
417 struct task_struct* old; 421 struct task_struct* old;
418 422
423#ifdef CONFIG_LITMUS_SOFTIRQD
424 unsigned long flags;
425 struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
419 lock_nv_registry(de_reg_device_id, &flags); 426 lock_nv_registry(de_reg_device_id, &flags);
427#endif
420 428
421 old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner, 429 old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner,
422 current, 430 current,
423 NULL); 431 NULL);
424 432
425 mb(); 433 mb();
426 434
435#ifdef CONFIG_LITMUS_SOFTIRQD
427 if(likely(old == current)) 436 if(likely(old == current))
428 { 437 {
429 flush_pending(klitirqd_th, current); 438 flush_pending(klitirqd_th, current);
@@ -448,6 +457,7 @@ static int __clear_reg_nv_device(int de_reg_device_id)
448 TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n", 457 TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n",
449 __FUNCTION__, de_reg_device_id); 458 __FUNCTION__, de_reg_device_id);
450 } 459 }
460#endif
451 461
452 return(ret); 462 return(ret);
453} 463}
diff --git a/litmus/rm_common.c b/litmus/rm_common.c
new file mode 100644
index 000000000000..88f83bcbd9d8
--- /dev/null
+++ b/litmus/rm_common.c
@@ -0,0 +1,160 @@
1/*
2 * kernel/rm_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/rm_common.h>
16
17/* rm_higher_prio - returns true if first has a higher EDF priority
18 * than second. Deadline ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int rm_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (first && first == second) {
30 TRACE_TASK(first,
31 "WARNING: pointless edf priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40#ifdef CONFIG_LITMUS_LOCKING
41
42 /* Check for inherited priorities. Change task
43 * used for comparison in such a case.
44 */
45 if (unlikely(first->rt_param.inh_task))
46 first_task = first->rt_param.inh_task;
47 if (unlikely(second->rt_param.inh_task))
48 second_task = second->rt_param.inh_task;
49
50 /* Check for priority boosting. Tie-break by start of boosting.
51 */
52 if (unlikely(is_priority_boosted(first_task))) {
53 /* first_task is boosted, how about second_task? */
54 if (!is_priority_boosted(second_task) ||
55 lt_before(get_boost_start(first_task),
56 get_boost_start(second_task)))
57 return 1;
58 else
59 return 0;
60 } else if (unlikely(is_priority_boosted(second_task)))
61 /* second_task is boosted, first is not*/
62 return 0;
63
64#endif
65
66 if (!is_realtime(second_task))
67 return true;
68
69 if (shorter_period(first_task, second_task))
70 return true;
71
72 if (get_rt_period(first_task) == get_rt_period(second_task))
73 {
74#ifdef CONFIG_LITMUS_SOFTIRQD
75 if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread)
76 {
77 return true;
78 }
79 if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread)
80 {
81#endif
82 if (first_task->pid < second_task->pid)
83 {
84 return true;
85 }
86 if (first_task->pid == second_task->pid)
87 {
88 return !second->rt_param.inh_task;
89 }
90#ifdef CONFIG_LITMUS_SOFTIRQD
91 }
92#endif
93 }
94
95 return false;
96
97#if 0
98 return !is_realtime(second_task) ||
99 shorter_period(first_task, second_task) ||
100 ((get_rt_period(first_task) == get_rt_period(second_task)) && earlier_deadline(first_task, second_task))
101
102#ifdef CONFIG_LITMUS_SOFTIRQD
103 /* proxy threads always lose w/o inheritance. */
104 (first_task->rt_param.is_proxy_thread <
105 second_task->rt_param.is_proxy_thread) ||
106#endif
107
108 /* is the period of the first task shorter?
109 * Then it has higher priority.
110 */
111 shorter_period(first_task, second_task) ||
112
113 (earlier_deadline(first_task, second_task) ||
114
115 /* Do we have a deadline tie?
116 * Then break by PID.
117 */
118 (get_rt_period(first_task) == get_rt_period(second_task) &&
119 (first_task->pid < second_task->pid ||
120
121 /* If the PIDs are the same then the task with the inherited
122 * priority wins.
123 */
124 (first_task->pid == second_task->pid &&
125 !second->rt_param.inh_task)));
126#endif
127}
128
129int rm_ready_order(struct bheap_node* a, struct bheap_node* b)
130{
131 return rm_higher_prio(bheap2task(a), bheap2task(b));
132}
133
134void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
135 release_jobs_t release)
136{
137 rt_domain_init(rt, rm_ready_order, resched, release);
138}
139
140/* need_to_preempt - check whether the task t needs to be preempted
141 * call only with irqs disabled and with ready_lock acquired
142 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
143 */
144int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
145{
146 /* we need the read lock for rm_ready_queue */
147 /* no need to preempt if there is nothing pending */
148 if (!__jobs_pending(rt))
149 return 0;
150 /* we need to reschedule if t doesn't exist */
151 if (!t)
152 return 1;
153
154 /* NOTE: We cannot check for non-preemptibility since we
155 * don't know what address space we're currently in.
156 */
157
158 /* make sure to get non-rt stuff out of the way */
159 return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t);
160}
diff --git a/litmus/rm_srt_common.c b/litmus/rm_srt_common.c
new file mode 100644
index 000000000000..f58a8007678f
--- /dev/null
+++ b/litmus/rm_srt_common.c
@@ -0,0 +1,167 @@
1/*
2 * kernel/rm_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/rm_common.h>
16
17/* rm_srt_higher_prio - returns true if first has a higher EDF priority
18 * than second. Deadline ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int rm_srt_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (first && first == second) {
30 TRACE_TASK(first,
31 "WARNING: pointless edf priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40#ifdef CONFIG_LITMUS_LOCKING
41
42 /* Check for inherited priorities. Change task
43 * used for comparison in such a case.
44 */
45 if (unlikely(first->rt_param.inh_task))
46 first_task = first->rt_param.inh_task;
47 if (unlikely(second->rt_param.inh_task))
48 second_task = second->rt_param.inh_task;
49
50 /* Check for priority boosting. Tie-break by start of boosting.
51 */
52 if (unlikely(is_priority_boosted(first_task))) {
53 /* first_task is boosted, how about second_task? */
54 if (!is_priority_boosted(second_task) ||
55 lt_before(get_boost_start(first_task),
56 get_boost_start(second_task)))
57 return 1;
58 else
59 return 0;
60 } else if (unlikely(is_priority_boosted(second_task)))
61 /* second_task is boosted, first is not*/
62 return 0;
63
64#endif
65
66 if (!is_realtime(second_task))
67 return true;
68
69 if (shorter_period(first_task, second_task))
70 return true;
71
72 if (get_rt_period(first_task) == get_rt_period(second_task))
73 {
74 if (earlier_deadline(first_task, second_task))
75 {
76 return true;
77 }
78 if(get_deadline(first_task) == get_deadline(second_task))
79 {
80#ifdef CONFIG_LITMUS_SOFTIRQD
81 if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread)
82 {
83 return true;
84 }
85 if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread)
86 {
87#endif
88 if (first_task->pid < second_task->pid)
89 {
90 return true;
91 }
92 if (first_task->pid == second_task->pid)
93 {
94 return !second->rt_param.inh_task;
95 }
96#ifdef CONFIG_LITMUS_SOFTIRQD
97 }
98#endif
99 }
100 }
101
102 return false;
103
104#if 0
105 return !is_realtime(second_task) ||
106 shorter_period(first_task, second_task) ||
107 ((get_rt_period(first_task) == get_rt_period(second_task)) && earlier_deadline(first_task, second_task))
108
109#ifdef CONFIG_LITMUS_SOFTIRQD
110 /* proxy threads always lose w/o inheritance. */
111 (first_task->rt_param.is_proxy_thread <
112 second_task->rt_param.is_proxy_thread) ||
113#endif
114
115 /* is the period of the first task shorter?
116 * Then it has higher priority.
117 */
118 shorter_period(first_task, second_task) ||
119
120 (earlier_deadline(first_task, second_task) ||
121
122 /* Do we have a deadline tie?
123 * Then break by PID.
124 */
125 (get_rt_period(first_task) == get_rt_period(second_task) &&
126 (first_task->pid < second_task->pid ||
127
128 /* If the PIDs are the same then the task with the inherited
129 * priority wins.
130 */
131 (first_task->pid == second_task->pid &&
132 !second->rt_param.inh_task)));
133#endif
134}
135
136int rm_srt_ready_order(struct bheap_node* a, struct bheap_node* b)
137{
138 return rm_srt_higher_prio(bheap2task(a), bheap2task(b));
139}
140
141void rm_srt_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
142 release_jobs_t release)
143{
144 rt_domain_init(rt, rm_srt_ready_order, resched, release);
145}
146
147/* need_to_preempt - check whether the task t needs to be preempted
148 * call only with irqs disabled and with ready_lock acquired
149 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
150 */
151int rm_srt_preemption_needed(rt_domain_t* rt, struct task_struct *t)
152{
153 /* we need the read lock for rm_ready_queue */
154 /* no need to preempt if there is nothing pending */
155 if (!__jobs_pending(rt))
156 return 0;
157 /* we need to reschedule if t doesn't exist */
158 if (!t)
159 return 1;
160
161 /* NOTE: We cannot check for non-preemptibility since we
162 * don't know what address space we're currently in.
163 */
164
165 /* make sure to get non-rt stuff out of the way */
166 return !is_realtime(t) || rm_srt_higher_prio(__next_ready(rt), t);
167}
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 9b0a8d3b624d..f0356de60b2f 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -55,6 +55,10 @@
55#include <litmus/litmus_softirq.h> 55#include <litmus/litmus_softirq.h>
56#endif 56#endif
57 57
58#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
59#include <linux/interrupt.h>
60#endif
61
58#ifdef CONFIG_LITMUS_NVIDIA 62#ifdef CONFIG_LITMUS_NVIDIA
59#include <litmus/nvidia_info.h> 63#include <litmus/nvidia_info.h>
60#endif 64#endif
@@ -91,6 +95,15 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
91#define test_will_schedule(cpu) \ 95#define test_will_schedule(cpu) \
92 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) 96 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
93 97
98
99#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
100struct tasklet_head
101{
102 struct tasklet_struct *head;
103 struct tasklet_struct **tail;
104};
105#endif
106
94/* 107/*
95 * In C-EDF there is a cedf domain _per_ cluster 108 * In C-EDF there is a cedf domain _per_ cluster
96 * The number of clusters is dynamically determined accordingly to the 109 * The number of clusters is dynamically determined accordingly to the
@@ -108,6 +121,12 @@ typedef struct clusterdomain {
108 struct bheap cpu_heap; 121 struct bheap cpu_heap;
109 /* lock for this cluster */ 122 /* lock for this cluster */
110#define cedf_lock domain.ready_lock 123#define cedf_lock domain.ready_lock
124
125
126#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
127 struct tasklet_head pending_tasklets;
128#endif
129
111} cedf_domain_t; 130} cedf_domain_t;
112 131
113/* a cedf_domain per cluster; allocation is done at init/activation time */ 132/* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -395,6 +414,198 @@ static void cedf_tick(struct task_struct* t)
395 } 414 }
396} 415}
397 416
417
418
419
420
421
422
423
424
425
426
427
428
429#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
430
431
432void __do_lit_tasklet(struct tasklet_struct* tasklet)
433{
434 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
435 {
436 BUG();
437 }
438 TRACE("%s: Invoking tasklet with owner pid = %d.\n", __FUNCTION__, tasklet->owner->pid);
439 tasklet->func(tasklet->data);
440 tasklet_unlock(tasklet);
441
442}
443
444void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* next)
445{
446 int work_to_do = 1;
447 struct tasklet_struct *tasklet = NULL;
448
449 TRACE("%s: entered.\n", __FUNCTION__);
450
451 while(work_to_do) {
452 // remove tasklet at head of list if it has higher priority.
453 raw_spin_lock(&cluster->cedf_lock);
454 // remove tasklet at head.
455 if(cluster->pending_tasklets.head != NULL) {
456 tasklet = cluster->pending_tasklets.head;
457
458 if(edf_higher_prio(tasklet->owner, next)) {
459 // remove the tasklet from the queue
460 cluster->pending_tasklets.head = tasklet->next;
461
462 TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
463 }
464 else {
465 TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
466 tasklet = NULL;
467 }
468 }
469 else {
470 //TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
471 }
472 raw_spin_unlock(&cluster->cedf_lock);
473
474 if(tasklet) {
475 __do_lit_tasklet(tasklet);
476 tasklet = NULL;
477 }
478 else {
479 work_to_do = 0;
480 }
481 }
482
483 TRACE("%s: exited.\n", __FUNCTION__);
484}
485
486
487void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
488{
489 struct tasklet_struct* step;
490
491 step = cluster->pending_tasklets.head;
492 TRACE("%s: (BEFORE) dumping tasklet queue...\n");
493 while(step != NULL){
494 TRACE("%s: %d\n", __FUNCTION__, step->owner);
495 step = step->next;
496 }
497 TRACE("%s: done.\n", __FUNCTION__);
498
499
500 step = cluster->pending_tasklets.head;
501 if(step == NULL) {
502 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
503 // insert at tail.
504 tasklet->next = NULL;
505 *(cluster->pending_tasklets.tail) = tasklet;
506 cluster->pending_tasklets.tail = &tasklet->next;
507 }
508 else if((*cluster->pending_tasklets.tail != NULL) &&
509 edf_higher_prio((*cluster->pending_tasklets.tail)->owner, tasklet->owner)) {
510 // insert at tail.
511 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
512
513 tasklet->next = NULL;
514 *(cluster->pending_tasklets.tail) = tasklet;
515 cluster->pending_tasklets.tail = &tasklet->next;
516 }
517 else {
518 // insert the tasklet somewhere in the middle.
519
520 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
521 step = step->next;
522 }
523
524 // insert tasklet right before step->next.
525
526 TRACE("%s: tasklet belongs at end. inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
527
528 tasklet->next = step->next;
529 step->next = tasklet;
530
531 // patch up the head if needed.
532 if(cluster->pending_tasklets.head == step)
533 {
534 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
535 cluster->pending_tasklets.head = tasklet;
536 }
537 }
538
539
540 step = cluster->pending_tasklets.head;
541 TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
542 while(step != NULL){
543 TRACE("%s: %d\n", __FUNCTION__, step->owner);
544 step = step->next;
545 }
546 TRACE("%s: done.\n", __FUNCTION__);
547
548// TODO: Maintain this list in priority order.
549// tasklet->next = NULL;
550// *(cluster->pending_tasklets.tail) = tasklet;
551// cluster->pending_tasklets.tail = &tasklet->next;
552}
553
554int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
555{
556 cedf_domain_t* cluster = task_cpu_cluster(tasklet->owner);
557 cpu_entry_t *lowest;
558 unsigned long flags;
559
560 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
561 {
562 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
563 BUG();
564 }
565
566 raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
567
568 lowest = lowest_prio_cpu(cluster);
569 if (edf_higher_prio(tasklet->owner, lowest->linked)) {
570 if (smp_processor_id() == lowest->cpu) {
571 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
572 // execute the tasklet now.
573 __do_lit_tasklet(tasklet);
574 }
575 else {
576 // preempt the lowest CPU
577 __add_pai_tasklet(tasklet, cluster);
578
579 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, lowest->cpu);
580
581 preempt(lowest);
582 }
583 }
584
585 raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
586
587 return(1); // success
588}
589
590
591#endif
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
398/* Getting schedule() right is a bit tricky. schedule() may not make any 609/* Getting schedule() right is a bit tricky. schedule() may not make any
399 * assumptions on the state of the current task since it may be called for a 610 * assumptions on the state of the current task since it may be called for a
400 * number of reasons. The reasons include a scheduler_tick() determined that it 611 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -507,8 +718,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
507 next = prev; 718 next = prev;
508 719
509 sched_state_task_picked(); 720 sched_state_task_picked();
721
510 raw_spin_unlock(&cluster->cedf_lock); 722 raw_spin_unlock(&cluster->cedf_lock);
511 723
724#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
725 do_lit_tasklets(cluster, next);
726#endif
727
512#ifdef WANT_ALL_SCHED_EVENTS 728#ifdef WANT_ALL_SCHED_EVENTS
513 TRACE("cedf_lock released, next=0x%p\n", next); 729 TRACE("cedf_lock released, next=0x%p\n", next);
514 730
@@ -518,7 +734,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
518 TRACE("becomes idle at %llu.\n", litmus_clock()); 734 TRACE("becomes idle at %llu.\n", litmus_clock());
519#endif 735#endif
520 736
521
522 return next; 737 return next;
523} 738}
524 739
@@ -1467,6 +1682,13 @@ static long cedf_activate_plugin(void)
1467 bheap_init(&(cedf[i].cpu_heap)); 1682 bheap_init(&(cedf[i].cpu_heap));
1468 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); 1683 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
1469 1684
1685
1686#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1687 cedf[i].pending_tasklets.head = NULL;
1688 cedf[i].pending_tasklets.tail = &cedf[i].pending_tasklets.head;
1689#endif
1690
1691
1470 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) 1692 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
1471 return -ENOMEM; 1693 return -ENOMEM;
1472 } 1694 }
@@ -1578,7 +1800,10 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
1578#ifdef CONFIG_LITMUS_SOFTIRQD 1800#ifdef CONFIG_LITMUS_SOFTIRQD
1579 .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, 1801 .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
1580 .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, 1802 .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
1581#endif 1803#endif
1804#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1805 .enqueue_pai_tasklet = enqueue_pai_tasklet,
1806#endif
1582}; 1807};
1583 1808
1584static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; 1809static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
diff --git a/litmus/sched_cfifo.c b/litmus/sched_cfifo.c
new file mode 100644
index 000000000000..f515446f76ed
--- /dev/null
+++ b/litmus/sched_cfifo.c
@@ -0,0 +1,1611 @@
1/*
2 * litmus/sched_cfifo.c
3 *
4 * Implementation of the C-FIFO scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32#include <linux/uaccess.h>
33
34#include <linux/module.h>
35
36#include <litmus/litmus.h>
37#include <litmus/jobs.h>
38#include <litmus/preempt.h>
39#include <litmus/sched_plugin.h>
40#include <litmus/fifo_common.h>
41#include <litmus/sched_trace.h>
42
43#include <litmus/clustered.h>
44
45#include <litmus/bheap.h>
46
47/* to configure the cluster size */
48#include <litmus/litmus_proc.h>
49
50#ifdef CONFIG_SCHED_CPU_AFFINITY
51#include <litmus/affinity.h>
52#endif
53
54#ifdef CONFIG_LITMUS_SOFTIRQD
55#include <litmus/litmus_softirq.h>
56#endif
57
58#ifdef CONFIG_LITMUS_NVIDIA
59#include <litmus/nvidia_info.h>
60#endif
61
62/* Reference configuration variable. Determines which cache level is used to
63 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
64 * all CPUs form a single cluster (just like GSN-EDF).
65 */
66static enum cache_level cluster_config = GLOBAL_CLUSTER;
67
68struct clusterdomain;
69
70/* cpu_entry_t - maintain the linked and scheduled state
71 *
72 * A cpu also contains a pointer to the cfifo_domain_t cluster
73 * that owns it (struct clusterdomain*)
74 */
75typedef struct {
76 int cpu;
77 struct clusterdomain* cluster; /* owning cluster */
78 struct task_struct* linked; /* only RT tasks */
79 struct task_struct* scheduled; /* only RT tasks */
80 atomic_t will_schedule; /* prevent unneeded IPIs */
81 struct bheap_node* hn;
82} cpu_entry_t;
83
84/* one cpu_entry_t per CPU */
85DEFINE_PER_CPU(cpu_entry_t, cfifo_cpu_entries);
86
87#define set_will_schedule() \
88 (atomic_set(&__get_cpu_var(cfifo_cpu_entries).will_schedule, 1))
89#define clear_will_schedule() \
90 (atomic_set(&__get_cpu_var(cfifo_cpu_entries).will_schedule, 0))
91#define test_will_schedule(cpu) \
92 (atomic_read(&per_cpu(cfifo_cpu_entries, cpu).will_schedule))
93
94/*
95 * In C-FIFO there is a cfifo domain _per_ cluster
96 * The number of clusters is dynamically determined accordingly to the
97 * total cpu number and the cluster size
98 */
99typedef struct clusterdomain {
100 /* rt_domain for this cluster */
101 rt_domain_t domain;
102 /* cpus in this cluster */
103 cpu_entry_t* *cpus;
104 /* map of this cluster cpus */
105 cpumask_var_t cpu_map;
106 /* the cpus queue themselves according to priority in here */
107 struct bheap_node *heap_node;
108 struct bheap cpu_heap;
109 /* lock for this cluster */
110#define cfifo_lock domain.ready_lock
111} cfifo_domain_t;
112
113/* a cfifo_domain per cluster; allocation is done at init/activation time */
114cfifo_domain_t *cfifo;
115
116#define remote_cluster(cpu) ((cfifo_domain_t *) per_cpu(cfifo_cpu_entries, cpu).cluster)
117#define task_cpu_cluster(task) remote_cluster(get_partition(task))
118
119/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
120 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
121 * information during the initialization of the plugin (e.g., topology)
122#define WANT_ALL_SCHED_EVENTS
123 */
124#define VERBOSE_INIT
125
126static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
127{
128 cpu_entry_t *a, *b;
129 a = _a->value;
130 b = _b->value;
131 /* Note that a and b are inverted: we want the lowest-priority CPU at
132 * the top of the heap.
133 */
134 return fifo_higher_prio(b->linked, a->linked);
135}
136
137/* update_cpu_position - Move the cpu entry to the correct place to maintain
138 * order in the cpu queue. Caller must hold cfifo lock.
139 */
140static void update_cpu_position(cpu_entry_t *entry)
141{
142 cfifo_domain_t *cluster = entry->cluster;
143
144 if (likely(bheap_node_in_heap(entry->hn)))
145 bheap_delete(cpu_lower_prio,
146 &cluster->cpu_heap,
147 entry->hn);
148
149 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
150}
151
152/* caller must hold cfifo lock */
153static cpu_entry_t* lowest_prio_cpu(cfifo_domain_t *cluster)
154{
155 struct bheap_node* hn;
156 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
157 return hn->value;
158}
159
160
161/* link_task_to_cpu - Update the link of a CPU.
162 * Handles the case where the to-be-linked task is already
163 * scheduled on a different CPU.
164 */
165static noinline void link_task_to_cpu(struct task_struct* linked,
166 cpu_entry_t *entry)
167{
168 cpu_entry_t *sched;
169 struct task_struct* tmp;
170 int on_cpu;
171
172 BUG_ON(linked && !is_realtime(linked));
173
174 /* Currently linked task is set to be unlinked. */
175 if (entry->linked) {
176 entry->linked->rt_param.linked_on = NO_CPU;
177 }
178
179 /* Link new task to CPU. */
180 if (linked) {
181 set_rt_flags(linked, RT_F_RUNNING);
182 /* handle task is already scheduled somewhere! */
183 on_cpu = linked->rt_param.scheduled_on;
184 if (on_cpu != NO_CPU) {
185 sched = &per_cpu(cfifo_cpu_entries, on_cpu);
186 /* this should only happen if not linked already */
187 BUG_ON(sched->linked == linked);
188
189 /* If we are already scheduled on the CPU to which we
190 * wanted to link, we don't need to do the swap --
191 * we just link ourselves to the CPU and depend on
192 * the caller to get things right.
193 */
194 if (entry != sched) {
195 TRACE_TASK(linked,
196 "already scheduled on %d, updating link.\n",
197 sched->cpu);
198 tmp = sched->linked;
199 linked->rt_param.linked_on = sched->cpu;
200 sched->linked = linked;
201 update_cpu_position(sched);
202 linked = tmp;
203 }
204 }
205 if (linked) /* might be NULL due to swap */
206 linked->rt_param.linked_on = entry->cpu;
207 }
208 entry->linked = linked;
209#ifdef WANT_ALL_SCHED_EVENTS
210 if (linked)
211 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
212 else
213 TRACE("NULL linked to %d.\n", entry->cpu);
214#endif
215 update_cpu_position(entry);
216}
217
218/* unlink - Make sure a task is not linked any longer to an entry
219 * where it was linked before. Must hold cfifo_lock.
220 */
221static noinline void unlink(struct task_struct* t)
222{
223 cpu_entry_t *entry;
224
225 if (t->rt_param.linked_on != NO_CPU) {
226 /* unlink */
227 entry = &per_cpu(cfifo_cpu_entries, t->rt_param.linked_on);
228 t->rt_param.linked_on = NO_CPU;
229 link_task_to_cpu(NULL, entry);
230 } else if (is_queued(t)) {
231 /* This is an interesting situation: t is scheduled,
232 * but was just recently unlinked. It cannot be
233 * linked anywhere else (because then it would have
234 * been relinked to this CPU), thus it must be in some
235 * queue. We must remove it from the list in this
236 * case.
237 *
238 * in C-FIFO case is should be somewhere in the queue for
239 * its domain, therefore and we can get the domain using
240 * task_cpu_cluster
241 */
242 remove(&(task_cpu_cluster(t))->domain, t);
243 }
244}
245
246
247/* preempt - force a CPU to reschedule
248 */
249static void preempt(cpu_entry_t *entry)
250{
251 preempt_if_preemptable(entry->scheduled, entry->cpu);
252}
253
254/* requeue - Put an unlinked task into gsn-edf domain.
255 * Caller must hold cfifo_lock.
256 */
257static noinline void requeue(struct task_struct* task)
258{
259 cfifo_domain_t *cluster = task_cpu_cluster(task);
260 BUG_ON(!task);
261 /* sanity check before insertion */
262 BUG_ON(is_queued(task));
263
264 if (is_released(task, litmus_clock()))
265 __add_ready(&cluster->domain, task);
266 else {
267 /* it has got to wait */
268 add_release(&cluster->domain, task);
269 }
270}
271
272#ifdef CONFIG_SCHED_CPU_AFFINITY
273static cpu_entry_t* cfifo_get_nearest_available_cpu(
274 cfifo_domain_t *cluster, cpu_entry_t* start)
275{
276 cpu_entry_t* affinity;
277
278 get_nearest_available_cpu(affinity, start, cfifo_cpu_entries, -1);
279
280 /* make sure CPU is in our cluster */
281 if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
282 return(affinity);
283 else
284 return(NULL);
285}
286#endif
287
288
289/* check for any necessary preemptions */
290static void check_for_preemptions(cfifo_domain_t *cluster)
291{
292 struct task_struct *task;
293 cpu_entry_t *last;
294
295 for(last = lowest_prio_cpu(cluster);
296 fifo_preemption_needed(&cluster->domain, last->linked);
297 last = lowest_prio_cpu(cluster)) {
298 /* preemption necessary */
299 task = __take_ready(&cluster->domain);
300#ifdef CONFIG_SCHED_CPU_AFFINITY
301 {
302 cpu_entry_t* affinity =
303 cfifo_get_nearest_available_cpu(cluster,
304 &per_cpu(cfifo_cpu_entries, task_cpu(task)));
305 if(affinity)
306 last = affinity;
307 else if(last->linked)
308 requeue(last->linked);
309 }
310#else
311 if (last->linked)
312 requeue(last->linked);
313#endif
314 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
315 task->pid, last->cpu);
316 link_task_to_cpu(task, last);
317 preempt(last);
318 }
319}
320
321/* cfifo_job_arrival: task is either resumed or released */
322static noinline void cfifo_job_arrival(struct task_struct* task)
323{
324 cfifo_domain_t *cluster = task_cpu_cluster(task);
325 BUG_ON(!task);
326
327 requeue(task);
328 check_for_preemptions(cluster);
329}
330
331static void cfifo_release_jobs(rt_domain_t* rt, struct bheap* tasks)
332{
333 cfifo_domain_t* cluster = container_of(rt, cfifo_domain_t, domain);
334 unsigned long flags;
335
336 raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
337
338 __merge_ready(&cluster->domain, tasks);
339 check_for_preemptions(cluster);
340
341 raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
342}
343
344/* caller holds cfifo_lock */
345static noinline void job_completion(struct task_struct *t, int forced)
346{
347 BUG_ON(!t);
348
349 sched_trace_task_completion(t, forced);
350
351#ifdef CONFIG_LITMUS_NVIDIA
352 atomic_set(&tsk_rt(t)->nv_int_count, 0);
353#endif
354
355 TRACE_TASK(t, "job_completion().\n");
356
357 /* set flags */
358 set_rt_flags(t, RT_F_SLEEP);
359 /* prepare for next period */
360 prepare_for_next_period(t);
361 if (is_released(t, litmus_clock()))
362 sched_trace_task_release(t);
363 /* unlink */
364 unlink(t);
365 /* requeue
366 * But don't requeue a blocking task. */
367 if (is_running(t))
368 cfifo_job_arrival(t);
369}
370
371/* cfifo_tick - this function is called for every local timer
372 * interrupt.
373 *
374 * checks whether the current task has expired and checks
375 * whether we need to preempt it if it has not expired
376 */
377static void cfifo_tick(struct task_struct* t)
378{
379 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
380 if (!is_np(t)) {
381 /* np tasks will be preempted when they become
382 * preemptable again
383 */
384 litmus_reschedule_local();
385 set_will_schedule();
386 TRACE("cfifo_scheduler_tick: "
387 "%d is preemptable "
388 " => FORCE_RESCHED\n", t->pid);
389 } else if (is_user_np(t)) {
390 TRACE("cfifo_scheduler_tick: "
391 "%d is non-preemptable, "
392 "preemption delayed.\n", t->pid);
393 request_exit_np(t);
394 }
395 }
396}
397
398/* Getting schedule() right is a bit tricky. schedule() may not make any
399 * assumptions on the state of the current task since it may be called for a
400 * number of reasons. The reasons include a scheduler_tick() determined that it
401 * was necessary, because sys_exit_np() was called, because some Linux
402 * subsystem determined so, or even (in the worst case) because there is a bug
403 * hidden somewhere. Thus, we must take extreme care to determine what the
404 * current state is.
405 *
406 * The CPU could currently be scheduling a task (or not), be linked (or not).
407 *
408 * The following assertions for the scheduled task could hold:
409 *
410 * - !is_running(scheduled) // the job blocks
411 * - scheduled->timeslice == 0 // the job completed (forcefully)
412 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
413 * - linked != scheduled // we need to reschedule (for any reason)
414 * - is_np(scheduled) // rescheduling must be delayed,
415 * sys_exit_np must be requested
416 *
417 * Any of these can occur together.
418 */
419static struct task_struct* cfifo_schedule(struct task_struct * prev)
420{
421 cpu_entry_t* entry = &__get_cpu_var(cfifo_cpu_entries);
422 cfifo_domain_t *cluster = entry->cluster;
423 int out_of_time, sleep, preempt, np, exists, blocks;
424 struct task_struct* next = NULL;
425
426 raw_spin_lock(&cluster->cfifo_lock);
427 clear_will_schedule();
428
429 /* sanity checking */
430 BUG_ON(entry->scheduled && entry->scheduled != prev);
431 BUG_ON(entry->scheduled && !is_realtime(prev));
432 BUG_ON(is_realtime(prev) && !entry->scheduled);
433
434 /* (0) Determine state */
435 exists = entry->scheduled != NULL;
436 blocks = exists && !is_running(entry->scheduled);
437 out_of_time = exists &&
438 budget_enforced(entry->scheduled) &&
439 budget_exhausted(entry->scheduled);
440 np = exists && is_np(entry->scheduled);
441 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
442 preempt = entry->scheduled != entry->linked;
443
444#ifdef WANT_ALL_SCHED_EVENTS
445 TRACE_TASK(prev, "invoked cfifo_schedule.\n");
446#endif
447
448 if (exists)
449 TRACE_TASK(prev,
450 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
451 "state:%d sig:%d\n",
452 blocks, out_of_time, np, sleep, preempt,
453 prev->state, signal_pending(prev));
454 if (entry->linked && preempt)
455 TRACE_TASK(prev, "will be preempted by %s/%d\n",
456 entry->linked->comm, entry->linked->pid);
457
458
459 /* If a task blocks we have no choice but to reschedule.
460 */
461 if (blocks)
462 unlink(entry->scheduled);
463
464 /* Request a sys_exit_np() call if we would like to preempt but cannot.
465 * We need to make sure to update the link structure anyway in case
466 * that we are still linked. Multiple calls to request_exit_np() don't
467 * hurt.
468 */
469 if (np && (out_of_time || preempt || sleep)) {
470 unlink(entry->scheduled);
471 request_exit_np(entry->scheduled);
472 }
473
474 /* Any task that is preemptable and either exhausts its execution
475 * budget or wants to sleep completes. We may have to reschedule after
476 * this. Don't do a job completion if we block (can't have timers running
477 * for blocked jobs). Preemption go first for the same reason.
478 */
479 if (!np && (out_of_time || sleep) && !blocks && !preempt)
480 job_completion(entry->scheduled, !sleep);
481
482 /* Link pending task if we became unlinked.
483 */
484 if (!entry->linked)
485 link_task_to_cpu(__take_ready(&cluster->domain), entry);
486
487 /* The final scheduling decision. Do we need to switch for some reason?
488 * If linked is different from scheduled, then select linked as next.
489 */
490 if ((!np || blocks) &&
491 entry->linked != entry->scheduled) {
492 /* Schedule a linked job? */
493 if (entry->linked) {
494 entry->linked->rt_param.scheduled_on = entry->cpu;
495 next = entry->linked;
496 }
497 if (entry->scheduled) {
498 /* not gonna be scheduled soon */
499 entry->scheduled->rt_param.scheduled_on = NO_CPU;
500 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
501 }
502 } else
503 /* Only override Linux scheduler if we have a real-time task
504 * scheduled that needs to continue.
505 */
506 if (exists)
507 next = prev;
508
509 sched_state_task_picked();
510 raw_spin_unlock(&cluster->cfifo_lock);
511
512#ifdef WANT_ALL_SCHED_EVENTS
513 TRACE("cfifo_lock released, next=0x%p\n", next);
514
515 if (next)
516 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
517 else if (exists && !next)
518 TRACE("becomes idle at %llu.\n", litmus_clock());
519#endif
520
521
522 return next;
523}
524
525
526/* _finish_switch - we just finished the switch away from prev
527 */
528static void cfifo_finish_switch(struct task_struct *prev)
529{
530 cpu_entry_t* entry = &__get_cpu_var(cfifo_cpu_entries);
531
532 entry->scheduled = is_realtime(current) ? current : NULL;
533#ifdef WANT_ALL_SCHED_EVENTS
534 TRACE_TASK(prev, "switched away from\n");
535#endif
536}
537
538
539/* Prepare a task for running in RT mode
540 */
541static void cfifo_task_new(struct task_struct * t, int on_rq, int running)
542{
543 unsigned long flags;
544 cpu_entry_t* entry;
545 cfifo_domain_t* cluster;
546
547 TRACE("gsn edf: task new %d\n", t->pid);
548
549 /* the cluster doesn't change even if t is running */
550 cluster = task_cpu_cluster(t);
551
552 raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
553
554 /* setup job params */
555 release_at(t, litmus_clock());
556
557 if (running) {
558 entry = &per_cpu(cfifo_cpu_entries, task_cpu(t));
559 BUG_ON(entry->scheduled);
560
561 entry->scheduled = t;
562 tsk_rt(t)->scheduled_on = task_cpu(t);
563 } else {
564 t->rt_param.scheduled_on = NO_CPU;
565 }
566 t->rt_param.linked_on = NO_CPU;
567
568 cfifo_job_arrival(t);
569 raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
570}
571
572static void cfifo_task_wake_up(struct task_struct *task)
573{
574 unsigned long flags;
575 //lt_t now;
576 cfifo_domain_t *cluster;
577
578 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
579
580 cluster = task_cpu_cluster(task);
581
582 raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
583
584#if 0 // sporadic task model
585 /* We need to take suspensions because of semaphores into
586 * account! If a job resumes after being suspended due to acquiring
587 * a semaphore, it should never be treated as a new job release.
588 */
589 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
590 set_rt_flags(task, RT_F_RUNNING);
591 } else {
592 now = litmus_clock();
593 if (is_tardy(task, now)) {
594 /* new sporadic release */
595 release_at(task, now);
596 sched_trace_task_release(task);
597 }
598 else {
599 if (task->rt.time_slice) {
600 /* came back in time before deadline
601 */
602 set_rt_flags(task, RT_F_RUNNING);
603 }
604 }
605 }
606#endif
607
608 //BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
609 set_rt_flags(task, RT_F_RUNNING); // periodic model
610
611 if(tsk_rt(task)->linked_on == NO_CPU)
612 cfifo_job_arrival(task);
613 else
614 TRACE("WTF, mate?!\n");
615
616 raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
617}
618
619static void cfifo_task_block(struct task_struct *t)
620{
621 unsigned long flags;
622 cfifo_domain_t *cluster;
623
624 TRACE_TASK(t, "block at %llu\n", litmus_clock());
625
626 cluster = task_cpu_cluster(t);
627
628 /* unlink if necessary */
629 raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
630 unlink(t);
631 raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
632
633 BUG_ON(!is_realtime(t));
634}
635
636
637static void cfifo_task_exit(struct task_struct * t)
638{
639 unsigned long flags;
640 cfifo_domain_t *cluster = task_cpu_cluster(t);
641
642 /* unlink if necessary */
643 raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
644 unlink(t);
645 if (tsk_rt(t)->scheduled_on != NO_CPU) {
646 cpu_entry_t *cpu;
647 cpu = &per_cpu(cfifo_cpu_entries, tsk_rt(t)->scheduled_on);
648 cpu->scheduled = NULL;
649 tsk_rt(t)->scheduled_on = NO_CPU;
650 }
651 raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
652
653 BUG_ON(!is_realtime(t));
654 TRACE_TASK(t, "RIP\n");
655}
656
657static long cfifo_admit_task(struct task_struct* tsk)
658{
659 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
660}
661
662
663
664
665
666
667
668
669
670
671
672
673
674#ifdef CONFIG_LITMUS_LOCKING
675
676#include <litmus/fdso.h>
677
678
679static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
680{
681 int linked_on;
682 int check_preempt = 0;
683
684 cfifo_domain_t* cluster = task_cpu_cluster(t);
685
686 if(prio_inh != NULL)
687 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
688 else
689 TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
690
691 sched_trace_eff_prio_change(t, prio_inh);
692
693 tsk_rt(t)->inh_task = prio_inh;
694
695 linked_on = tsk_rt(t)->linked_on;
696
697 /* If it is scheduled, then we need to reorder the CPU heap. */
698 if (linked_on != NO_CPU) {
699 TRACE_TASK(t, "%s: linked on %d\n",
700 __FUNCTION__, linked_on);
701 /* Holder is scheduled; need to re-order CPUs.
702 * We can't use heap_decrease() here since
703 * the cpu_heap is ordered in reverse direction, so
704 * it is actually an increase. */
705 bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
706 per_cpu(cfifo_cpu_entries, linked_on).hn);
707 bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
708 per_cpu(cfifo_cpu_entries, linked_on).hn);
709 } else {
710 /* holder may be queued: first stop queue changes */
711 raw_spin_lock(&cluster->domain.release_lock);
712 if (is_queued(t)) {
713 TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
714
715 /* We need to update the position of holder in some
716 * heap. Note that this could be a release heap if we
717 * budget enforcement is used and this job overran. */
718 check_preempt = !bheap_decrease(fifo_ready_order, tsk_rt(t)->heap_node);
719
720 } else {
721 /* Nothing to do: if it is not queued and not linked
722 * then it is either sleeping or currently being moved
723 * by other code (e.g., a timer interrupt handler) that
724 * will use the correct priority when enqueuing the
725 * task. */
726 TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
727 }
728 raw_spin_unlock(&cluster->domain.release_lock);
729
730 /* If holder was enqueued in a release heap, then the following
731 * preemption check is pointless, but we can't easily detect
732 * that case. If you want to fix this, then consider that
733 * simply adding a state flag requires O(n) time to update when
734 * releasing n tasks, which conflicts with the goal to have
735 * O(log n) merges. */
736 if (check_preempt) {
737 /* heap_decrease() hit the top level of the heap: make
738 * sure preemption checks get the right task, not the
739 * potentially stale cache. */
740 bheap_uncache_min(fifo_ready_order, &cluster->domain.ready_queue);
741 check_for_preemptions(cluster);
742 }
743 }
744}
745
746/* called with IRQs off */
747static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
748{
749 cfifo_domain_t* cluster = task_cpu_cluster(t);
750
751 raw_spin_lock(&cluster->cfifo_lock);
752
753 __set_priority_inheritance(t, prio_inh);
754
755#ifdef CONFIG_LITMUS_SOFTIRQD
756 if(tsk_rt(t)->cur_klitirqd != NULL)
757 {
758 TRACE_TASK(t, "%s/%d inherits a new priority!\n",
759 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
760
761 __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
762 }
763#endif
764
765 raw_spin_unlock(&cluster->cfifo_lock);
766}
767
768
769/* called with IRQs off */
770static void __clear_priority_inheritance(struct task_struct* t)
771{
772 TRACE_TASK(t, "priority restored\n");
773
774 if(tsk_rt(t)->scheduled_on != NO_CPU)
775 {
776 sched_trace_eff_prio_change(t, NULL);
777
778 tsk_rt(t)->inh_task = NULL;
779
780 /* Check if rescheduling is necessary. We can't use heap_decrease()
781 * since the priority was effectively lowered. */
782 unlink(t);
783 cfifo_job_arrival(t);
784 }
785 else
786 {
787 __set_priority_inheritance(t, NULL);
788 }
789
790#ifdef CONFIG_LITMUS_SOFTIRQD
791 if(tsk_rt(t)->cur_klitirqd != NULL)
792 {
793 TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
794 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
795
796 if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
797 {
798 sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
799
800 tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
801
802 /* Check if rescheduling is necessary. We can't use heap_decrease()
803 * since the priority was effectively lowered. */
804 unlink(tsk_rt(t)->cur_klitirqd);
805 cfifo_job_arrival(tsk_rt(t)->cur_klitirqd);
806 }
807 else
808 {
809 __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
810 }
811 }
812#endif
813}
814
815/* called with IRQs off */
816static void clear_priority_inheritance(struct task_struct* t)
817{
818 cfifo_domain_t* cluster = task_cpu_cluster(t);
819
820 raw_spin_lock(&cluster->cfifo_lock);
821 __clear_priority_inheritance(t);
822 raw_spin_unlock(&cluster->cfifo_lock);
823}
824
825
826
827#ifdef CONFIG_LITMUS_SOFTIRQD
828/* called with IRQs off */
829static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
830 struct task_struct* old_owner,
831 struct task_struct* new_owner)
832{
833 cfifo_domain_t* cluster = task_cpu_cluster(klitirqd);
834
835 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
836
837 raw_spin_lock(&cluster->cfifo_lock);
838
839 if(old_owner != new_owner)
840 {
841 if(old_owner)
842 {
843 // unreachable?
844 tsk_rt(old_owner)->cur_klitirqd = NULL;
845 }
846
847 TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
848 new_owner->comm, new_owner->pid);
849
850 tsk_rt(new_owner)->cur_klitirqd = klitirqd;
851 }
852
853 __set_priority_inheritance(klitirqd,
854 (tsk_rt(new_owner)->inh_task == NULL) ?
855 new_owner :
856 tsk_rt(new_owner)->inh_task);
857
858 raw_spin_unlock(&cluster->cfifo_lock);
859}
860
861/* called with IRQs off */
862static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
863 struct task_struct* old_owner)
864{
865 cfifo_domain_t* cluster = task_cpu_cluster(klitirqd);
866
867 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
868
869 raw_spin_lock(&cluster->cfifo_lock);
870
871 TRACE_TASK(klitirqd, "priority restored\n");
872
873 if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
874 {
875 tsk_rt(klitirqd)->inh_task = NULL;
876
877 /* Check if rescheduling is necessary. We can't use heap_decrease()
878 * since the priority was effectively lowered. */
879 unlink(klitirqd);
880 cfifo_job_arrival(klitirqd);
881 }
882 else
883 {
884 __set_priority_inheritance(klitirqd, NULL);
885 }
886
887 tsk_rt(old_owner)->cur_klitirqd = NULL;
888
889 raw_spin_unlock(&cluster->cfifo_lock);
890}
891#endif // CONFIG_LITMUS_SOFTIRQD
892
893
894/* ******************** KFMLP support ********************** */
895
896/* struct for semaphore with priority inheritance */
897struct kfmlp_queue
898{
899 wait_queue_head_t wait;
900 struct task_struct* owner;
901 struct task_struct* hp_waiter;
902 int count; /* number of waiters + holder */
903};
904
905struct kfmlp_semaphore
906{
907 struct litmus_lock litmus_lock;
908
909 spinlock_t lock;
910
911 int num_resources; /* aka k */
912 struct kfmlp_queue *queues; /* array */
913 struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
914};
915
916static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
917{
918 return container_of(lock, struct kfmlp_semaphore, litmus_lock);
919}
920
921static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
922 struct kfmlp_queue* queue)
923{
924 return (queue - &sem->queues[0]);
925}
926
927static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
928 struct task_struct* holder)
929{
930 int i;
931 for(i = 0; i < sem->num_resources; ++i)
932 if(sem->queues[i].owner == holder)
933 return(&sem->queues[i]);
934 return(NULL);
935}
936
937/* caller is responsible for locking */
938static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
939 struct task_struct *skip)
940{
941 struct list_head *pos;
942 struct task_struct *queued, *found = NULL;
943
944 list_for_each(pos, &kqueue->wait.task_list) {
945 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
946 task_list)->private;
947
948 /* Compare task prios, find high prio task. */
949 if (queued != skip && fifo_higher_prio(queued, found))
950 found = queued;
951 }
952 return found;
953}
954
955static inline struct kfmlp_queue* kfmlp_find_shortest(
956 struct kfmlp_semaphore* sem,
957 struct kfmlp_queue* search_start)
958{
959 // we start our search at search_start instead of at the beginning of the
960 // queue list to load-balance across all resources.
961 struct kfmlp_queue* step = search_start;
962 struct kfmlp_queue* shortest = sem->shortest_queue;
963
964 do
965 {
966 step = (step+1 != &sem->queues[sem->num_resources]) ?
967 step+1 : &sem->queues[0];
968 if(step->count < shortest->count)
969 {
970 shortest = step;
971 if(step->count == 0)
972 break; /* can't get any shorter */
973 }
974 }while(step != search_start);
975
976 return(shortest);
977}
978
979static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
980{
981 /* must hold sem->lock */
982
983 struct kfmlp_queue *my_queue = NULL;
984 struct task_struct *max_hp = NULL;
985
986
987 struct list_head *pos;
988 struct task_struct *queued;
989 int i;
990
991 for(i = 0; i < sem->num_resources; ++i)
992 {
993 if( (sem->queues[i].count > 1) &&
994 ((my_queue == NULL) ||
995 (fifo_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
996 {
997 my_queue = &sem->queues[i];
998 }
999 }
1000
1001 if(my_queue)
1002 {
1003 cfifo_domain_t* cluster;
1004
1005 max_hp = my_queue->hp_waiter;
1006 BUG_ON(!max_hp);
1007
1008 TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
1009 kfmlp_get_idx(sem, my_queue),
1010 max_hp->comm, max_hp->pid,
1011 kfmlp_get_idx(sem, my_queue));
1012
1013 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
1014
1015 /*
1016 if(my_queue->hp_waiter)
1017 TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
1018 kfmlp_get_idx(sem, my_queue),
1019 my_queue->hp_waiter->comm,
1020 my_queue->hp_waiter->pid);
1021 else
1022 TRACE_CUR("queue %d: new hp_waiter is %p\n",
1023 kfmlp_get_idx(sem, my_queue), NULL);
1024 */
1025
1026 cluster = task_cpu_cluster(max_hp);
1027
1028 raw_spin_lock(&cluster->cfifo_lock);
1029
1030 /*
1031 if(my_queue->owner)
1032 TRACE_CUR("queue %d: owner is %s/%d\n",
1033 kfmlp_get_idx(sem, my_queue),
1034 my_queue->owner->comm,
1035 my_queue->owner->pid);
1036 else
1037 TRACE_CUR("queue %d: owner is %p\n",
1038 kfmlp_get_idx(sem, my_queue),
1039 NULL);
1040 */
1041
1042 if(tsk_rt(my_queue->owner)->inh_task == max_hp)
1043 {
1044 __clear_priority_inheritance(my_queue->owner);
1045 if(my_queue->hp_waiter != NULL)
1046 {
1047 __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
1048 }
1049 }
1050 raw_spin_unlock(&cluster->cfifo_lock);
1051
1052 list_for_each(pos, &my_queue->wait.task_list)
1053 {
1054 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
1055 task_list)->private;
1056 /* Compare task prios, find high prio task. */
1057 if (queued == max_hp)
1058 {
1059 /*
1060 TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n",
1061 kfmlp_get_idx(sem, my_queue));
1062 */
1063 __remove_wait_queue(&my_queue->wait,
1064 list_entry(pos, wait_queue_t, task_list));
1065 break;
1066 }
1067 }
1068 --(my_queue->count);
1069 }
1070
1071 return(max_hp);
1072}
1073
1074int cfifo_kfmlp_lock(struct litmus_lock* l)
1075{
1076 struct task_struct* t = current;
1077 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1078 struct kfmlp_queue* my_queue;
1079 wait_queue_t wait;
1080 unsigned long flags;
1081
1082 if (!is_realtime(t))
1083 return -EPERM;
1084
1085 spin_lock_irqsave(&sem->lock, flags);
1086
1087 my_queue = sem->shortest_queue;
1088
1089 if (my_queue->owner) {
1090 /* resource is not free => must suspend and wait */
1091 TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
1092 kfmlp_get_idx(sem, my_queue));
1093
1094 init_waitqueue_entry(&wait, t);
1095
1096 /* FIXME: interruptible would be nice some day */
1097 set_task_state(t, TASK_UNINTERRUPTIBLE);
1098
1099 __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
1100
1101 /* check if we need to activate priority inheritance */
1102 if (fifo_higher_prio(t, my_queue->hp_waiter))
1103 {
1104 my_queue->hp_waiter = t;
1105 if (fifo_higher_prio(t, my_queue->owner))
1106 {
1107 set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
1108 }
1109 }
1110
1111 ++(my_queue->count);
1112 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
1113
1114 /* release lock before sleeping */
1115 spin_unlock_irqrestore(&sem->lock, flags);
1116
1117 /* We depend on the FIFO order. Thus, we don't need to recheck
1118 * when we wake up; we are guaranteed to have the lock since
1119 * there is only one wake up per release (or steal).
1120 */
1121 schedule();
1122
1123
1124 if(my_queue->owner == t)
1125 {
1126 TRACE_CUR("queue %d: acquired through waiting\n",
1127 kfmlp_get_idx(sem, my_queue));
1128 }
1129 else
1130 {
1131 /* this case may happen if our wait entry was stolen
1132 between queues. record where we went.*/
1133 my_queue = kfmlp_get_queue(sem, t);
1134 BUG_ON(!my_queue);
1135 TRACE_CUR("queue %d: acquired through stealing\n",
1136 kfmlp_get_idx(sem, my_queue));
1137 }
1138 }
1139 else
1140 {
1141 TRACE_CUR("queue %d: acquired immediately\n",
1142 kfmlp_get_idx(sem, my_queue));
1143
1144 my_queue->owner = t;
1145
1146 ++(my_queue->count);
1147 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
1148
1149 spin_unlock_irqrestore(&sem->lock, flags);
1150 }
1151
1152 return kfmlp_get_idx(sem, my_queue);
1153}
1154
1155int cfifo_kfmlp_unlock(struct litmus_lock* l)
1156{
1157 struct task_struct *t = current, *next;
1158 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1159 struct kfmlp_queue *my_queue;
1160 unsigned long flags;
1161 int err = 0;
1162
1163 spin_lock_irqsave(&sem->lock, flags);
1164
1165 my_queue = kfmlp_get_queue(sem, t);
1166
1167 if (!my_queue) {
1168 err = -EINVAL;
1169 goto out;
1170 }
1171
1172 /* check if there are jobs waiting for this resource */
1173 next = __waitqueue_remove_first(&my_queue->wait);
1174 if (next) {
1175 /*
1176 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
1177 kfmlp_get_idx(sem, my_queue),
1178 next->comm, next->pid);
1179 */
1180 /* next becomes the resouce holder */
1181 my_queue->owner = next;
1182
1183 --(my_queue->count);
1184 if(my_queue->count < sem->shortest_queue->count)
1185 {
1186 sem->shortest_queue = my_queue;
1187 }
1188
1189 TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
1190 kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
1191
1192 /* determine new hp_waiter if necessary */
1193 if (next == my_queue->hp_waiter) {
1194 TRACE_TASK(next, "was highest-prio waiter\n");
1195 /* next has the highest priority --- it doesn't need to
1196 * inherit. However, we need to make sure that the
1197 * next-highest priority in the queue is reflected in
1198 * hp_waiter. */
1199 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
1200 if (my_queue->hp_waiter)
1201 TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
1202 else
1203 TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
1204 } else {
1205 /* Well, if next is not the highest-priority waiter,
1206 * then it ought to inherit the highest-priority
1207 * waiter's priority. */
1208 set_priority_inheritance(next, my_queue->hp_waiter);
1209 }
1210
1211 /* wake up next */
1212 wake_up_process(next);
1213 }
1214 else
1215 {
1216 TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
1217
1218 next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
1219
1220 /*
1221 if(next)
1222 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
1223 kfmlp_get_idx(sem, my_queue),
1224 next->comm, next->pid);
1225 */
1226
1227 my_queue->owner = next;
1228
1229 if(next)
1230 {
1231 TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
1232 kfmlp_get_idx(sem, my_queue),
1233 next->comm, next->pid);
1234
1235 /* wake up next */
1236 wake_up_process(next);
1237 }
1238 else
1239 {
1240 TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
1241
1242 --(my_queue->count);
1243 if(my_queue->count < sem->shortest_queue->count)
1244 {
1245 sem->shortest_queue = my_queue;
1246 }
1247 }
1248 }
1249
1250 /* we lose the benefit of priority inheritance (if any) */
1251 if (tsk_rt(t)->inh_task)
1252 clear_priority_inheritance(t);
1253
1254out:
1255 spin_unlock_irqrestore(&sem->lock, flags);
1256
1257 return err;
1258}
1259
1260int cfifo_kfmlp_close(struct litmus_lock* l)
1261{
1262 struct task_struct *t = current;
1263 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1264 struct kfmlp_queue *my_queue;
1265 unsigned long flags;
1266
1267 int owner;
1268
1269 spin_lock_irqsave(&sem->lock, flags);
1270
1271 my_queue = kfmlp_get_queue(sem, t);
1272 owner = (my_queue) ? (my_queue->owner == t) : 0;
1273
1274 spin_unlock_irqrestore(&sem->lock, flags);
1275
1276 if (owner)
1277 cfifo_kfmlp_unlock(l);
1278
1279 return 0;
1280}
1281
1282void cfifo_kfmlp_free(struct litmus_lock* l)
1283{
1284 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1285 kfree(sem->queues);
1286 kfree(sem);
1287}
1288
1289static struct litmus_lock_ops cfifo_kfmlp_lock_ops = {
1290 .close = cfifo_kfmlp_close,
1291 .lock = cfifo_kfmlp_lock,
1292 .unlock = cfifo_kfmlp_unlock,
1293 .deallocate = cfifo_kfmlp_free,
1294};
1295
1296static struct litmus_lock* cfifo_new_kfmlp(void* __user arg, int* ret_code)
1297{
1298 struct kfmlp_semaphore* sem;
1299 int num_resources = 0;
1300 int i;
1301
1302 if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
1303 {
1304 *ret_code = -EINVAL;
1305 return(NULL);
1306 }
1307 if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
1308 {
1309 *ret_code = -EINVAL;
1310 return(NULL);
1311 }
1312 if(num_resources < 1)
1313 {
1314 *ret_code = -EINVAL;
1315 return(NULL);
1316 }
1317
1318 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1319 if(!sem)
1320 {
1321 *ret_code = -ENOMEM;
1322 return NULL;
1323 }
1324
1325 sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
1326 if(!sem->queues)
1327 {
1328 kfree(sem);
1329 *ret_code = -ENOMEM;
1330 return NULL;
1331 }
1332
1333 sem->litmus_lock.ops = &cfifo_kfmlp_lock_ops;
1334 spin_lock_init(&sem->lock);
1335 sem->num_resources = num_resources;
1336
1337 for(i = 0; i < num_resources; ++i)
1338 {
1339 sem->queues[i].owner = NULL;
1340 sem->queues[i].hp_waiter = NULL;
1341 init_waitqueue_head(&sem->queues[i].wait);
1342 sem->queues[i].count = 0;
1343 }
1344
1345 sem->shortest_queue = &sem->queues[0];
1346
1347 *ret_code = 0;
1348 return &sem->litmus_lock;
1349}
1350
1351
1352/* **** lock constructor **** */
1353
1354static long cfifo_allocate_lock(struct litmus_lock **lock, int type,
1355 void* __user arg)
1356{
1357 int err = -ENXIO;
1358
1359 /* C-FIFO currently only supports the FMLP for global resources
1360 WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */
1361 switch (type) {
1362 case KFMLP_SEM:
1363 *lock = cfifo_new_kfmlp(arg, &err);
1364 break;
1365 };
1366
1367 return err;
1368}
1369
1370#endif // CONFIG_LITMUS_LOCKING
1371
1372
1373
1374
1375
1376
1377/* total number of cluster */
1378static int num_clusters;
1379/* we do not support cluster of different sizes */
1380static unsigned int cluster_size;
1381
1382#ifdef VERBOSE_INIT
1383static void print_cluster_topology(cpumask_var_t mask, int cpu)
1384{
1385 int chk;
1386 char buf[255];
1387
1388 chk = cpulist_scnprintf(buf, 254, mask);
1389 buf[chk] = '\0';
1390 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
1391
1392}
1393#endif
1394
1395static int clusters_allocated = 0;
1396
1397static void cleanup_cfifo(void)
1398{
1399 int i;
1400
1401 if (clusters_allocated) {
1402 for (i = 0; i < num_clusters; i++) {
1403 kfree(cfifo[i].cpus);
1404 kfree(cfifo[i].heap_node);
1405 free_cpumask_var(cfifo[i].cpu_map);
1406 }
1407
1408 kfree(cfifo);
1409 }
1410}
1411
1412static long cfifo_activate_plugin(void)
1413{
1414 int i, j, cpu, ccpu, cpu_count;
1415 cpu_entry_t *entry;
1416
1417 cpumask_var_t mask;
1418 int chk = 0;
1419
1420 /* de-allocate old clusters, if any */
1421 cleanup_cfifo();
1422
1423 printk(KERN_INFO "C-FIFO: Activate Plugin, cluster configuration = %d\n",
1424 cluster_config);
1425
1426 /* need to get cluster_size first */
1427 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
1428 return -ENOMEM;
1429
1430 if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
1431 cluster_size = num_online_cpus();
1432 } else {
1433 chk = get_shared_cpu_map(mask, 0, cluster_config);
1434 if (chk) {
1435 /* if chk != 0 then it is the max allowed index */
1436 printk(KERN_INFO "C-FIFO: Cluster configuration = %d "
1437 "is not supported on this hardware.\n",
1438 cluster_config);
1439 /* User should notice that the configuration failed, so
1440 * let's bail out. */
1441 return -EINVAL;
1442 }
1443
1444 cluster_size = cpumask_weight(mask);
1445 }
1446
1447 if ((num_online_cpus() % cluster_size) != 0) {
1448 /* this can't be right, some cpus are left out */
1449 printk(KERN_ERR "C-FIFO: Trying to group %d cpus in %d!\n",
1450 num_online_cpus(), cluster_size);
1451 return -1;
1452 }
1453
1454 num_clusters = num_online_cpus() / cluster_size;
1455 printk(KERN_INFO "C-FIFO: %d cluster(s) of size = %d\n",
1456 num_clusters, cluster_size);
1457
1458 /* initialize clusters */
1459 cfifo = kmalloc(num_clusters * sizeof(cfifo_domain_t), GFP_ATOMIC);
1460 for (i = 0; i < num_clusters; i++) {
1461
1462 cfifo[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
1463 GFP_ATOMIC);
1464 cfifo[i].heap_node = kmalloc(
1465 cluster_size * sizeof(struct bheap_node),
1466 GFP_ATOMIC);
1467 bheap_init(&(cfifo[i].cpu_heap));
1468 fifo_domain_init(&(cfifo[i].domain), NULL, cfifo_release_jobs);
1469
1470 if(!zalloc_cpumask_var(&cfifo[i].cpu_map, GFP_ATOMIC))
1471 return -ENOMEM;
1472 }
1473
1474 /* cycle through cluster and add cpus to them */
1475 for (i = 0; i < num_clusters; i++) {
1476
1477 for_each_online_cpu(cpu) {
1478 /* check if the cpu is already in a cluster */
1479 for (j = 0; j < num_clusters; j++)
1480 if (cpumask_test_cpu(cpu, cfifo[j].cpu_map))
1481 break;
1482 /* if it is in a cluster go to next cpu */
1483 if (j < num_clusters &&
1484 cpumask_test_cpu(cpu, cfifo[j].cpu_map))
1485 continue;
1486
1487 /* this cpu isn't in any cluster */
1488 /* get the shared cpus */
1489 if (unlikely(cluster_config == GLOBAL_CLUSTER))
1490 cpumask_copy(mask, cpu_online_mask);
1491 else
1492 get_shared_cpu_map(mask, cpu, cluster_config);
1493
1494 cpumask_copy(cfifo[i].cpu_map, mask);
1495#ifdef VERBOSE_INIT
1496 print_cluster_topology(mask, cpu);
1497#endif
1498 /* add cpus to current cluster and init cpu_entry_t */
1499 cpu_count = 0;
1500 for_each_cpu(ccpu, cfifo[i].cpu_map) {
1501
1502 entry = &per_cpu(cfifo_cpu_entries, ccpu);
1503 cfifo[i].cpus[cpu_count] = entry;
1504 atomic_set(&entry->will_schedule, 0);
1505 entry->cpu = ccpu;
1506 entry->cluster = &cfifo[i];
1507 entry->hn = &(cfifo[i].heap_node[cpu_count]);
1508 bheap_node_init(&entry->hn, entry);
1509
1510 cpu_count++;
1511
1512 entry->linked = NULL;
1513 entry->scheduled = NULL;
1514 update_cpu_position(entry);
1515 }
1516 /* done with this cluster */
1517 break;
1518 }
1519 }
1520
1521#ifdef CONFIG_LITMUS_SOFTIRQD
1522 {
1523 /* distribute the daemons evenly across the clusters. */
1524 int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
1525 int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
1526 int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
1527
1528 int daemon = 0;
1529 for(i = 0; i < num_clusters; ++i)
1530 {
1531 int num_on_this_cluster = num_daemons_per_cluster;
1532 if(left_over)
1533 {
1534 ++num_on_this_cluster;
1535 --left_over;
1536 }
1537
1538 for(j = 0; j < num_on_this_cluster; ++j)
1539 {
1540 // first CPU of this cluster
1541 affinity[daemon++] = i*cluster_size;
1542 }
1543 }
1544
1545 spawn_klitirqd(affinity);
1546
1547 kfree(affinity);
1548 }
1549#endif
1550
1551#ifdef CONFIG_LITMUS_NVIDIA
1552 init_nvidia_info();
1553#endif
1554
1555 free_cpumask_var(mask);
1556 clusters_allocated = 1;
1557 return 0;
1558}
1559
1560/* Plugin object */
1561static struct sched_plugin cfifo_plugin __cacheline_aligned_in_smp = {
1562 .plugin_name = "C-FIFO",
1563 .finish_switch = cfifo_finish_switch,
1564 .tick = cfifo_tick,
1565 .task_new = cfifo_task_new,
1566 .complete_job = complete_job,
1567 .task_exit = cfifo_task_exit,
1568 .schedule = cfifo_schedule,
1569 .task_wake_up = cfifo_task_wake_up,
1570 .task_block = cfifo_task_block,
1571 .admit_task = cfifo_admit_task,
1572 .activate_plugin = cfifo_activate_plugin,
1573#ifdef CONFIG_LITMUS_LOCKING
1574 .allocate_lock = cfifo_allocate_lock,
1575 .set_prio_inh = set_priority_inheritance,
1576 .clear_prio_inh = clear_priority_inheritance,
1577#endif
1578#ifdef CONFIG_LITMUS_SOFTIRQD
1579 .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
1580 .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
1581#endif
1582};
1583
1584static struct proc_dir_entry *cluster_file = NULL, *cfifo_dir = NULL;
1585
1586static int __init init_cfifo(void)
1587{
1588 int err, fs;
1589
1590 err = register_sched_plugin(&cfifo_plugin);
1591 if (!err) {
1592 fs = make_plugin_proc_dir(&cfifo_plugin, &cfifo_dir);
1593 if (!fs)
1594 cluster_file = create_cluster_file(cfifo_dir, &cluster_config);
1595 else
1596 printk(KERN_ERR "Could not allocate C-FIFO procfs dir.\n");
1597 }
1598 return err;
1599}
1600
1601static void clean_cfifo(void)
1602{
1603 cleanup_cfifo();
1604 if (cluster_file)
1605 remove_proc_entry("cluster", cfifo_dir);
1606 if (cfifo_dir)
1607 remove_plugin_proc_dir(&cfifo_plugin);
1608}
1609
1610module_init(init_cfifo);
1611module_exit(clean_cfifo);
diff --git a/litmus/sched_crm.c b/litmus/sched_crm.c
new file mode 100644
index 000000000000..061b29eaff7e
--- /dev/null
+++ b/litmus/sched_crm.c
@@ -0,0 +1,1611 @@
1/*
2 * litmus/sched_crm.c
3 *
4 * Implementation of the C-RM scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32#include <linux/uaccess.h>
33
34#include <linux/module.h>
35
36#include <litmus/litmus.h>
37#include <litmus/jobs.h>
38#include <litmus/preempt.h>
39#include <litmus/sched_plugin.h>
40#include <litmus/rm_common.h>
41#include <litmus/sched_trace.h>
42
43#include <litmus/clustered.h>
44
45#include <litmus/bheap.h>
46
47/* to configure the cluster size */
48#include <litmus/litmus_proc.h>
49
50#ifdef CONFIG_SCHED_CPU_AFFINITY
51#include <litmus/affinity.h>
52#endif
53
54#ifdef CONFIG_LITMUS_SOFTIRQD
55#include <litmus/litmus_softirq.h>
56#endif
57
58#ifdef CONFIG_LITMUS_NVIDIA
59#include <litmus/nvidia_info.h>
60#endif
61
62/* Reference configuration variable. Determines which cache level is used to
63 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
64 * all CPUs form a single cluster (just like GSN-EDF).
65 */
66static enum cache_level cluster_config = GLOBAL_CLUSTER;
67
68struct clusterdomain;
69
70/* cpu_entry_t - maintain the linked and scheduled state
71 *
72 * A cpu also contains a pointer to the crm_domain_t cluster
73 * that owns it (struct clusterdomain*)
74 */
75typedef struct {
76 int cpu;
77 struct clusterdomain* cluster; /* owning cluster */
78 struct task_struct* linked; /* only RT tasks */
79 struct task_struct* scheduled; /* only RT tasks */
80 atomic_t will_schedule; /* prevent unneeded IPIs */
81 struct bheap_node* hn;
82} cpu_entry_t;
83
84/* one cpu_entry_t per CPU */
85DEFINE_PER_CPU(cpu_entry_t, crm_cpu_entries);
86
87#define set_will_schedule() \
88 (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 1))
89#define clear_will_schedule() \
90 (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 0))
91#define test_will_schedule(cpu) \
92 (atomic_read(&per_cpu(crm_cpu_entries, cpu).will_schedule))
93
94/*
95 * In C-RM there is a crm domain _per_ cluster
96 * The number of clusters is dynamically determined accordingly to the
97 * total cpu number and the cluster size
98 */
99typedef struct clusterdomain {
100 /* rt_domain for this cluster */
101 rt_domain_t domain;
102 /* cpus in this cluster */
103 cpu_entry_t* *cpus;
104 /* map of this cluster cpus */
105 cpumask_var_t cpu_map;
106 /* the cpus queue themselves according to priority in here */
107 struct bheap_node *heap_node;
108 struct bheap cpu_heap;
109 /* lock for this cluster */
110#define crm_lock domain.ready_lock
111} crm_domain_t;
112
113/* a crm_domain per cluster; allocation is done at init/activation time */
114crm_domain_t *crm;
115
116#define remote_cluster(cpu) ((crm_domain_t *) per_cpu(crm_cpu_entries, cpu).cluster)
117#define task_cpu_cluster(task) remote_cluster(get_partition(task))
118
119/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
120 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
121 * information during the initialization of the plugin (e.g., topology)
122#define WANT_ALL_SCHED_EVENTS
123 */
124#define VERBOSE_INIT
125
126static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
127{
128 cpu_entry_t *a, *b;
129 a = _a->value;
130 b = _b->value;
131 /* Note that a and b are inverted: we want the lowest-priority CPU at
132 * the top of the heap.
133 */
134 return rm_higher_prio(b->linked, a->linked);
135}
136
137/* update_cpu_position - Move the cpu entry to the correct place to maintain
138 * order in the cpu queue. Caller must hold crm lock.
139 */
140static void update_cpu_position(cpu_entry_t *entry)
141{
142 crm_domain_t *cluster = entry->cluster;
143
144 if (likely(bheap_node_in_heap(entry->hn)))
145 bheap_delete(cpu_lower_prio,
146 &cluster->cpu_heap,
147 entry->hn);
148
149 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
150}
151
152/* caller must hold crm lock */
153static cpu_entry_t* lowest_prio_cpu(crm_domain_t *cluster)
154{
155 struct bheap_node* hn;
156 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
157 return hn->value;
158}
159
160
161/* link_task_to_cpu - Update the link of a CPU.
162 * Handles the case where the to-be-linked task is already
163 * scheduled on a different CPU.
164 */
165static noinline void link_task_to_cpu(struct task_struct* linked,
166 cpu_entry_t *entry)
167{
168 cpu_entry_t *sched;
169 struct task_struct* tmp;
170 int on_cpu;
171
172 BUG_ON(linked && !is_realtime(linked));
173
174 /* Currently linked task is set to be unlinked. */
175 if (entry->linked) {
176 entry->linked->rt_param.linked_on = NO_CPU;
177 }
178
179 /* Link new task to CPU. */
180 if (linked) {
181 set_rt_flags(linked, RT_F_RUNNING);
182 /* handle task is already scheduled somewhere! */
183 on_cpu = linked->rt_param.scheduled_on;
184 if (on_cpu != NO_CPU) {
185 sched = &per_cpu(crm_cpu_entries, on_cpu);
186 /* this should only happen if not linked already */
187 BUG_ON(sched->linked == linked);
188
189 /* If we are already scheduled on the CPU to which we
190 * wanted to link, we don't need to do the swap --
191 * we just link ourselves to the CPU and depend on
192 * the caller to get things right.
193 */
194 if (entry != sched) {
195 TRACE_TASK(linked,
196 "already scheduled on %d, updating link.\n",
197 sched->cpu);
198 tmp = sched->linked;
199 linked->rt_param.linked_on = sched->cpu;
200 sched->linked = linked;
201 update_cpu_position(sched);
202 linked = tmp;
203 }
204 }
205 if (linked) /* might be NULL due to swap */
206 linked->rt_param.linked_on = entry->cpu;
207 }
208 entry->linked = linked;
209#ifdef WANT_ALL_SCHED_EVENTS
210 if (linked)
211 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
212 else
213 TRACE("NULL linked to %d.\n", entry->cpu);
214#endif
215 update_cpu_position(entry);
216}
217
218/* unlink - Make sure a task is not linked any longer to an entry
219 * where it was linked before. Must hold crm_lock.
220 */
221static noinline void unlink(struct task_struct* t)
222{
223 cpu_entry_t *entry;
224
225 if (t->rt_param.linked_on != NO_CPU) {
226 /* unlink */
227 entry = &per_cpu(crm_cpu_entries, t->rt_param.linked_on);
228 t->rt_param.linked_on = NO_CPU;
229 link_task_to_cpu(NULL, entry);
230 } else if (is_queued(t)) {
231 /* This is an interesting situation: t is scheduled,
232 * but was just recently unlinked. It cannot be
233 * linked anywhere else (because then it would have
234 * been relinked to this CPU), thus it must be in some
235 * queue. We must remove it from the list in this
236 * case.
237 *
238 * in C-RM case is should be somewhere in the queue for
239 * its domain, therefore and we can get the domain using
240 * task_cpu_cluster
241 */
242 remove(&(task_cpu_cluster(t))->domain, t);
243 }
244}
245
246
247/* preempt - force a CPU to reschedule
248 */
249static void preempt(cpu_entry_t *entry)
250{
251 preempt_if_preemptable(entry->scheduled, entry->cpu);
252}
253
254/* requeue - Put an unlinked task into gsn-edf domain.
255 * Caller must hold crm_lock.
256 */
257static noinline void requeue(struct task_struct* task)
258{
259 crm_domain_t *cluster = task_cpu_cluster(task);
260 BUG_ON(!task);
261 /* sanity check before insertion */
262 BUG_ON(is_queued(task));
263
264 if (is_released(task, litmus_clock()))
265 __add_ready(&cluster->domain, task);
266 else {
267 /* it has got to wait */
268 add_release(&cluster->domain, task);
269 }
270}
271
272#ifdef CONFIG_SCHED_CPU_AFFINITY
273static cpu_entry_t* crm_get_nearest_available_cpu(
274 crm_domain_t *cluster, cpu_entry_t* start)
275{
276 cpu_entry_t* affinity;
277
278 get_nearest_available_cpu(affinity, start, crm_cpu_entries, -1);
279
280 /* make sure CPU is in our cluster */
281 if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
282 return(affinity);
283 else
284 return(NULL);
285}
286#endif
287
288
289/* check for any necessary preemptions */
290static void check_for_preemptions(crm_domain_t *cluster)
291{
292 struct task_struct *task;
293 cpu_entry_t *last;
294
295 for(last = lowest_prio_cpu(cluster);
296 rm_preemption_needed(&cluster->domain, last->linked);
297 last = lowest_prio_cpu(cluster)) {
298 /* preemption necessary */
299 task = __take_ready(&cluster->domain);
300#ifdef CONFIG_SCHED_CPU_AFFINITY
301 {
302 cpu_entry_t* affinity =
303 crm_get_nearest_available_cpu(cluster,
304 &per_cpu(crm_cpu_entries, task_cpu(task)));
305 if(affinity)
306 last = affinity;
307 else if(last->linked)
308 requeue(last->linked);
309 }
310#else
311 if (last->linked)
312 requeue(last->linked);
313#endif
314 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
315 task->pid, last->cpu);
316 link_task_to_cpu(task, last);
317 preempt(last);
318 }
319}
320
321/* crm_job_arrival: task is either resumed or released */
322static noinline void crm_job_arrival(struct task_struct* task)
323{
324 crm_domain_t *cluster = task_cpu_cluster(task);
325 BUG_ON(!task);
326
327 requeue(task);
328 check_for_preemptions(cluster);
329}
330
331static void crm_release_jobs(rt_domain_t* rt, struct bheap* tasks)
332{
333 crm_domain_t* cluster = container_of(rt, crm_domain_t, domain);
334 unsigned long flags;
335
336 raw_spin_lock_irqsave(&cluster->crm_lock, flags);
337
338 __merge_ready(&cluster->domain, tasks);
339 check_for_preemptions(cluster);
340
341 raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
342}
343
344/* caller holds crm_lock */
345static noinline void job_completion(struct task_struct *t, int forced)
346{
347 BUG_ON(!t);
348
349 sched_trace_task_completion(t, forced);
350
351#ifdef CONFIG_LITMUS_NVIDIA
352 atomic_set(&tsk_rt(t)->nv_int_count, 0);
353#endif
354
355 TRACE_TASK(t, "job_completion().\n");
356
357 /* set flags */
358 set_rt_flags(t, RT_F_SLEEP);
359 /* prepare for next period */
360 prepare_for_next_period(t);
361 if (is_released(t, litmus_clock()))
362 sched_trace_task_release(t);
363 /* unlink */
364 unlink(t);
365 /* requeue
366 * But don't requeue a blocking task. */
367 if (is_running(t))
368 crm_job_arrival(t);
369}
370
371/* crm_tick - this function is called for every local timer
372 * interrupt.
373 *
374 * checks whether the current task has expired and checks
375 * whether we need to preempt it if it has not expired
376 */
377static void crm_tick(struct task_struct* t)
378{
379 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
380 if (!is_np(t)) {
381 /* np tasks will be preempted when they become
382 * preemptable again
383 */
384 litmus_reschedule_local();
385 set_will_schedule();
386 TRACE("crm_scheduler_tick: "
387 "%d is preemptable "
388 " => FORCE_RESCHED\n", t->pid);
389 } else if (is_user_np(t)) {
390 TRACE("crm_scheduler_tick: "
391 "%d is non-preemptable, "
392 "preemption delayed.\n", t->pid);
393 request_exit_np(t);
394 }
395 }
396}
397
398/* Getting schedule() right is a bit tricky. schedule() may not make any
399 * assumptions on the state of the current task since it may be called for a
400 * number of reasons. The reasons include a scheduler_tick() determined that it
401 * was necessary, because sys_exit_np() was called, because some Linux
402 * subsystem determined so, or even (in the worst case) because there is a bug
403 * hidden somewhere. Thus, we must take extreme care to determine what the
404 * current state is.
405 *
406 * The CPU could currently be scheduling a task (or not), be linked (or not).
407 *
408 * The following assertions for the scheduled task could hold:
409 *
410 * - !is_running(scheduled) // the job blocks
411 * - scheduled->timeslice == 0 // the job completed (forcefully)
412 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
413 * - linked != scheduled // we need to reschedule (for any reason)
414 * - is_np(scheduled) // rescheduling must be delayed,
415 * sys_exit_np must be requested
416 *
417 * Any of these can occur together.
418 */
419static struct task_struct* crm_schedule(struct task_struct * prev)
420{
421 cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries);
422 crm_domain_t *cluster = entry->cluster;
423 int out_of_time, sleep, preempt, np, exists, blocks;
424 struct task_struct* next = NULL;
425
426 raw_spin_lock(&cluster->crm_lock);
427 clear_will_schedule();
428
429 /* sanity checking */
430 BUG_ON(entry->scheduled && entry->scheduled != prev);
431 BUG_ON(entry->scheduled && !is_realtime(prev));
432 BUG_ON(is_realtime(prev) && !entry->scheduled);
433
434 /* (0) Determine state */
435 exists = entry->scheduled != NULL;
436 blocks = exists && !is_running(entry->scheduled);
437 out_of_time = exists &&
438 budget_enforced(entry->scheduled) &&
439 budget_exhausted(entry->scheduled);
440 np = exists && is_np(entry->scheduled);
441 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
442 preempt = entry->scheduled != entry->linked;
443
444#ifdef WANT_ALL_SCHED_EVENTS
445 TRACE_TASK(prev, "invoked crm_schedule.\n");
446#endif
447
448 if (exists)
449 TRACE_TASK(prev,
450 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
451 "state:%d sig:%d\n",
452 blocks, out_of_time, np, sleep, preempt,
453 prev->state, signal_pending(prev));
454 if (entry->linked && preempt)
455 TRACE_TASK(prev, "will be preempted by %s/%d\n",
456 entry->linked->comm, entry->linked->pid);
457
458
459 /* If a task blocks we have no choice but to reschedule.
460 */
461 if (blocks)
462 unlink(entry->scheduled);
463
464 /* Request a sys_exit_np() call if we would like to preempt but cannot.
465 * We need to make sure to update the link structure anyway in case
466 * that we are still linked. Multiple calls to request_exit_np() don't
467 * hurt.
468 */
469 if (np && (out_of_time || preempt || sleep)) {
470 unlink(entry->scheduled);
471 request_exit_np(entry->scheduled);
472 }
473
474 /* Any task that is preemptable and either exhausts its execution
475 * budget or wants to sleep completes. We may have to reschedule after
476 * this. Don't do a job completion if we block (can't have timers running
477 * for blocked jobs). Preemption go first for the same reason.
478 */
479 if (!np && (out_of_time || sleep) && !blocks && !preempt)
480 job_completion(entry->scheduled, !sleep);
481
482 /* Link pending task if we became unlinked.
483 */
484 if (!entry->linked)
485 link_task_to_cpu(__take_ready(&cluster->domain), entry);
486
487 /* The final scheduling decision. Do we need to switch for some reason?
488 * If linked is different from scheduled, then select linked as next.
489 */
490 if ((!np || blocks) &&
491 entry->linked != entry->scheduled) {
492 /* Schedule a linked job? */
493 if (entry->linked) {
494 entry->linked->rt_param.scheduled_on = entry->cpu;
495 next = entry->linked;
496 }
497 if (entry->scheduled) {
498 /* not gonna be scheduled soon */
499 entry->scheduled->rt_param.scheduled_on = NO_CPU;
500 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
501 }
502 } else
503 /* Only override Linux scheduler if we have a real-time task
504 * scheduled that needs to continue.
505 */
506 if (exists)
507 next = prev;
508
509 sched_state_task_picked();
510 raw_spin_unlock(&cluster->crm_lock);
511
512#ifdef WANT_ALL_SCHED_EVENTS
513 TRACE("crm_lock released, next=0x%p\n", next);
514
515 if (next)
516 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
517 else if (exists && !next)
518 TRACE("becomes idle at %llu.\n", litmus_clock());
519#endif
520
521
522 return next;
523}
524
525
526/* _finish_switch - we just finished the switch away from prev
527 */
528static void crm_finish_switch(struct task_struct *prev)
529{
530 cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries);
531
532 entry->scheduled = is_realtime(current) ? current : NULL;
533#ifdef WANT_ALL_SCHED_EVENTS
534 TRACE_TASK(prev, "switched away from\n");
535#endif
536}
537
538
539/* Prepare a task for running in RT mode
540 */
541static void crm_task_new(struct task_struct * t, int on_rq, int running)
542{
543 unsigned long flags;
544 cpu_entry_t* entry;
545 crm_domain_t* cluster;
546
547 TRACE("gsn edf: task new %d\n", t->pid);
548
549 /* the cluster doesn't change even if t is running */
550 cluster = task_cpu_cluster(t);
551
552 raw_spin_lock_irqsave(&cluster->crm_lock, flags);
553
554 /* setup job params */
555 release_at(t, litmus_clock());
556
557 if (running) {
558 entry = &per_cpu(crm_cpu_entries, task_cpu(t));
559 BUG_ON(entry->scheduled);
560
561 entry->scheduled = t;
562 tsk_rt(t)->scheduled_on = task_cpu(t);
563 } else {
564 t->rt_param.scheduled_on = NO_CPU;
565 }
566 t->rt_param.linked_on = NO_CPU;
567
568 crm_job_arrival(t);
569 raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
570}
571
572static void crm_task_wake_up(struct task_struct *task)
573{
574 unsigned long flags;
575 //lt_t now;
576 crm_domain_t *cluster;
577
578 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
579
580 cluster = task_cpu_cluster(task);
581
582 raw_spin_lock_irqsave(&cluster->crm_lock, flags);
583
584#if 0 // sporadic task model
585 /* We need to take suspensions because of semaphores into
586 * account! If a job resumes after being suspended due to acquiring
587 * a semaphore, it should never be treated as a new job release.
588 */
589 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
590 set_rt_flags(task, RT_F_RUNNING);
591 } else {
592 now = litmus_clock();
593 if (is_tardy(task, now)) {
594 /* new sporadic release */
595 release_at(task, now);
596 sched_trace_task_release(task);
597 }
598 else {
599 if (task->rt.time_slice) {
600 /* came back in time before deadline
601 */
602 set_rt_flags(task, RT_F_RUNNING);
603 }
604 }
605 }
606#endif
607
608 //BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
609 set_rt_flags(task, RT_F_RUNNING); // periodic model
610
611 if(tsk_rt(task)->linked_on == NO_CPU)
612 crm_job_arrival(task);
613 else
614 TRACE("WTF, mate?!\n");
615
616 raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
617}
618
619static void crm_task_block(struct task_struct *t)
620{
621 unsigned long flags;
622 crm_domain_t *cluster;
623
624 TRACE_TASK(t, "block at %llu\n", litmus_clock());
625
626 cluster = task_cpu_cluster(t);
627
628 /* unlink if necessary */
629 raw_spin_lock_irqsave(&cluster->crm_lock, flags);
630 unlink(t);
631 raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
632
633 BUG_ON(!is_realtime(t));
634}
635
636
637static void crm_task_exit(struct task_struct * t)
638{
639 unsigned long flags;
640 crm_domain_t *cluster = task_cpu_cluster(t);
641
642 /* unlink if necessary */
643 raw_spin_lock_irqsave(&cluster->crm_lock, flags);
644 unlink(t);
645 if (tsk_rt(t)->scheduled_on != NO_CPU) {
646 cpu_entry_t *cpu;
647 cpu = &per_cpu(crm_cpu_entries, tsk_rt(t)->scheduled_on);
648 cpu->scheduled = NULL;
649 tsk_rt(t)->scheduled_on = NO_CPU;
650 }
651 raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
652
653 BUG_ON(!is_realtime(t));
654 TRACE_TASK(t, "RIP\n");
655}
656
657static long crm_admit_task(struct task_struct* tsk)
658{
659 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
660}
661
662
663
664
665
666
667
668
669
670
671
672
673
674#ifdef CONFIG_LITMUS_LOCKING
675
676#include <litmus/fdso.h>
677
678
679static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
680{
681 int linked_on;
682 int check_preempt = 0;
683
684 crm_domain_t* cluster = task_cpu_cluster(t);
685
686 if(prio_inh != NULL)
687 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
688 else
689 TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
690
691 sched_trace_eff_prio_change(t, prio_inh);
692
693 tsk_rt(t)->inh_task = prio_inh;
694
695 linked_on = tsk_rt(t)->linked_on;
696
697 /* If it is scheduled, then we need to reorder the CPU heap. */
698 if (linked_on != NO_CPU) {
699 TRACE_TASK(t, "%s: linked on %d\n",
700 __FUNCTION__, linked_on);
701 /* Holder is scheduled; need to re-order CPUs.
702 * We can't use heap_decrease() here since
703 * the cpu_heap is ordered in reverse direction, so
704 * it is actually an increase. */
705 bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
706 per_cpu(crm_cpu_entries, linked_on).hn);
707 bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
708 per_cpu(crm_cpu_entries, linked_on).hn);
709 } else {
710 /* holder may be queued: first stop queue changes */
711 raw_spin_lock(&cluster->domain.release_lock);
712 if (is_queued(t)) {
713 TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
714
715 /* We need to update the position of holder in some
716 * heap. Note that this could be a release heap if we
717 * budget enforcement is used and this job overran. */
718 check_preempt = !bheap_decrease(rm_ready_order, tsk_rt(t)->heap_node);
719
720 } else {
721 /* Nothing to do: if it is not queued and not linked
722 * then it is either sleeping or currently being moved
723 * by other code (e.g., a timer interrupt handler) that
724 * will use the correct priority when enqueuing the
725 * task. */
726 TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
727 }
728 raw_spin_unlock(&cluster->domain.release_lock);
729
730 /* If holder was enqueued in a release heap, then the following
731 * preemption check is pointless, but we can't easily detect
732 * that case. If you want to fix this, then consider that
733 * simply adding a state flag requires O(n) time to update when
734 * releasing n tasks, which conflicts with the goal to have
735 * O(log n) merges. */
736 if (check_preempt) {
737 /* heap_decrease() hit the top level of the heap: make
738 * sure preemption checks get the right task, not the
739 * potentially stale cache. */
740 bheap_uncache_min(rm_ready_order, &cluster->domain.ready_queue);
741 check_for_preemptions(cluster);
742 }
743 }
744}
745
746/* called with IRQs off */
747static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
748{
749 crm_domain_t* cluster = task_cpu_cluster(t);
750
751 raw_spin_lock(&cluster->crm_lock);
752
753 __set_priority_inheritance(t, prio_inh);
754
755#ifdef CONFIG_LITMUS_SOFTIRQD
756 if(tsk_rt(t)->cur_klitirqd != NULL)
757 {
758 TRACE_TASK(t, "%s/%d inherits a new priority!\n",
759 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
760
761 __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
762 }
763#endif
764
765 raw_spin_unlock(&cluster->crm_lock);
766}
767
768
769/* called with IRQs off */
770static void __clear_priority_inheritance(struct task_struct* t)
771{
772 TRACE_TASK(t, "priority restored\n");
773
774 if(tsk_rt(t)->scheduled_on != NO_CPU)
775 {
776 sched_trace_eff_prio_change(t, NULL);
777
778 tsk_rt(t)->inh_task = NULL;
779
780 /* Check if rescheduling is necessary. We can't use heap_decrease()
781 * since the priority was effectively lowered. */
782 unlink(t);
783 crm_job_arrival(t);
784 }
785 else
786 {
787 __set_priority_inheritance(t, NULL);
788 }
789
790#ifdef CONFIG_LITMUS_SOFTIRQD
791 if(tsk_rt(t)->cur_klitirqd != NULL)
792 {
793 TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
794 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
795
796 if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
797 {
798 sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
799
800 tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
801
802 /* Check if rescheduling is necessary. We can't use heap_decrease()
803 * since the priority was effectively lowered. */
804 unlink(tsk_rt(t)->cur_klitirqd);
805 crm_job_arrival(tsk_rt(t)->cur_klitirqd);
806 }
807 else
808 {
809 __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
810 }
811 }
812#endif
813}
814
815/* called with IRQs off */
816static void clear_priority_inheritance(struct task_struct* t)
817{
818 crm_domain_t* cluster = task_cpu_cluster(t);
819
820 raw_spin_lock(&cluster->crm_lock);
821 __clear_priority_inheritance(t);
822 raw_spin_unlock(&cluster->crm_lock);
823}
824
825
826
827#ifdef CONFIG_LITMUS_SOFTIRQD
828/* called with IRQs off */
829static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
830 struct task_struct* old_owner,
831 struct task_struct* new_owner)
832{
833 crm_domain_t* cluster = task_cpu_cluster(klitirqd);
834
835 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
836
837 raw_spin_lock(&cluster->crm_lock);
838
839 if(old_owner != new_owner)
840 {
841 if(old_owner)
842 {
843 // unreachable?
844 tsk_rt(old_owner)->cur_klitirqd = NULL;
845 }
846
847 TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
848 new_owner->comm, new_owner->pid);
849
850 tsk_rt(new_owner)->cur_klitirqd = klitirqd;
851 }
852
853 __set_priority_inheritance(klitirqd,
854 (tsk_rt(new_owner)->inh_task == NULL) ?
855 new_owner :
856 tsk_rt(new_owner)->inh_task);
857
858 raw_spin_unlock(&cluster->crm_lock);
859}
860
861/* called with IRQs off */
862static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
863 struct task_struct* old_owner)
864{
865 crm_domain_t* cluster = task_cpu_cluster(klitirqd);
866
867 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
868
869 raw_spin_lock(&cluster->crm_lock);
870
871 TRACE_TASK(klitirqd, "priority restored\n");
872
873 if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
874 {
875 tsk_rt(klitirqd)->inh_task = NULL;
876
877 /* Check if rescheduling is necessary. We can't use heap_decrease()
878 * since the priority was effectively lowered. */
879 unlink(klitirqd);
880 crm_job_arrival(klitirqd);
881 }
882 else
883 {
884 __set_priority_inheritance(klitirqd, NULL);
885 }
886
887 tsk_rt(old_owner)->cur_klitirqd = NULL;
888
889 raw_spin_unlock(&cluster->crm_lock);
890}
891#endif // CONFIG_LITMUS_SOFTIRQD
892
893
894/* ******************** KFMLP support ********************** */
895
896/* struct for semaphore with priority inheritance */
897struct kfmlp_queue
898{
899 wait_queue_head_t wait;
900 struct task_struct* owner;
901 struct task_struct* hp_waiter;
902 int count; /* number of waiters + holder */
903};
904
905struct kfmlp_semaphore
906{
907 struct litmus_lock litmus_lock;
908
909 spinlock_t lock;
910
911 int num_resources; /* aka k */
912 struct kfmlp_queue *queues; /* array */
913 struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
914};
915
916static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
917{
918 return container_of(lock, struct kfmlp_semaphore, litmus_lock);
919}
920
921static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
922 struct kfmlp_queue* queue)
923{
924 return (queue - &sem->queues[0]);
925}
926
927static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
928 struct task_struct* holder)
929{
930 int i;
931 for(i = 0; i < sem->num_resources; ++i)
932 if(sem->queues[i].owner == holder)
933 return(&sem->queues[i]);
934 return(NULL);
935}
936
937/* caller is responsible for locking */
938static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
939 struct task_struct *skip)
940{
941 struct list_head *pos;
942 struct task_struct *queued, *found = NULL;
943
944 list_for_each(pos, &kqueue->wait.task_list) {
945 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
946 task_list)->private;
947
948 /* Compare task prios, find high prio task. */
949 if (queued != skip && rm_higher_prio(queued, found))
950 found = queued;
951 }
952 return found;
953}
954
955static inline struct kfmlp_queue* kfmlp_find_shortest(
956 struct kfmlp_semaphore* sem,
957 struct kfmlp_queue* search_start)
958{
959 // we start our search at search_start instead of at the beginning of the
960 // queue list to load-balance across all resources.
961 struct kfmlp_queue* step = search_start;
962 struct kfmlp_queue* shortest = sem->shortest_queue;
963
964 do
965 {
966 step = (step+1 != &sem->queues[sem->num_resources]) ?
967 step+1 : &sem->queues[0];
968 if(step->count < shortest->count)
969 {
970 shortest = step;
971 if(step->count == 0)
972 break; /* can't get any shorter */
973 }
974 }while(step != search_start);
975
976 return(shortest);
977}
978
979static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
980{
981 /* must hold sem->lock */
982
983 struct kfmlp_queue *my_queue = NULL;
984 struct task_struct *max_hp = NULL;
985
986
987 struct list_head *pos;
988 struct task_struct *queued;
989 int i;
990
991 for(i = 0; i < sem->num_resources; ++i)
992 {
993 if( (sem->queues[i].count > 1) &&
994 ((my_queue == NULL) ||
995 (rm_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
996 {
997 my_queue = &sem->queues[i];
998 }
999 }
1000
1001 if(my_queue)
1002 {
1003 crm_domain_t* cluster;
1004
1005 max_hp = my_queue->hp_waiter;
1006 BUG_ON(!max_hp);
1007
1008 TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
1009 kfmlp_get_idx(sem, my_queue),
1010 max_hp->comm, max_hp->pid,
1011 kfmlp_get_idx(sem, my_queue));
1012
1013 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
1014
1015 /*
1016 if(my_queue->hp_waiter)
1017 TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
1018 kfmlp_get_idx(sem, my_queue),
1019 my_queue->hp_waiter->comm,
1020 my_queue->hp_waiter->pid);
1021 else
1022 TRACE_CUR("queue %d: new hp_waiter is %p\n",
1023 kfmlp_get_idx(sem, my_queue), NULL);
1024 */
1025
1026 cluster = task_cpu_cluster(max_hp);
1027
1028 raw_spin_lock(&cluster->crm_lock);
1029
1030 /*
1031 if(my_queue->owner)
1032 TRACE_CUR("queue %d: owner is %s/%d\n",
1033 kfmlp_get_idx(sem, my_queue),
1034 my_queue->owner->comm,
1035 my_queue->owner->pid);
1036 else
1037 TRACE_CUR("queue %d: owner is %p\n",
1038 kfmlp_get_idx(sem, my_queue),
1039 NULL);
1040 */
1041
1042 if(tsk_rt(my_queue->owner)->inh_task == max_hp)
1043 {
1044 __clear_priority_inheritance(my_queue->owner);
1045 if(my_queue->hp_waiter != NULL)
1046 {
1047 __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
1048 }
1049 }
1050 raw_spin_unlock(&cluster->crm_lock);
1051
1052 list_for_each(pos, &my_queue->wait.task_list)
1053 {
1054 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
1055 task_list)->private;
1056 /* Compare task prios, find high prio task. */
1057 if (queued == max_hp)
1058 {
1059 /*
1060 TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n",
1061 kfmlp_get_idx(sem, my_queue));
1062 */
1063 __remove_wait_queue(&my_queue->wait,
1064 list_entry(pos, wait_queue_t, task_list));
1065 break;
1066 }
1067 }
1068 --(my_queue->count);
1069 }
1070
1071 return(max_hp);
1072}
1073
1074int crm_kfmlp_lock(struct litmus_lock* l)
1075{
1076 struct task_struct* t = current;
1077 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1078 struct kfmlp_queue* my_queue;
1079 wait_queue_t wait;
1080 unsigned long flags;
1081
1082 if (!is_realtime(t))
1083 return -EPERM;
1084
1085 spin_lock_irqsave(&sem->lock, flags);
1086
1087 my_queue = sem->shortest_queue;
1088
1089 if (my_queue->owner) {
1090 /* resource is not free => must suspend and wait */
1091 TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
1092 kfmlp_get_idx(sem, my_queue));
1093
1094 init_waitqueue_entry(&wait, t);
1095
1096 /* FIXME: interruptible would be nice some day */
1097 set_task_state(t, TASK_UNINTERRUPTIBLE);
1098
1099 __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
1100
1101 /* check if we need to activate priority inheritance */
1102 if (rm_higher_prio(t, my_queue->hp_waiter))
1103 {
1104 my_queue->hp_waiter = t;
1105 if (rm_higher_prio(t, my_queue->owner))
1106 {
1107 set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
1108 }
1109 }
1110
1111 ++(my_queue->count);
1112 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
1113
1114 /* release lock before sleeping */
1115 spin_unlock_irqrestore(&sem->lock, flags);
1116
1117 /* We depend on the FIFO order. Thus, we don't need to recheck
1118 * when we wake up; we are guaranteed to have the lock since
1119 * there is only one wake up per release (or steal).
1120 */
1121 schedule();
1122
1123
1124 if(my_queue->owner == t)
1125 {
1126 TRACE_CUR("queue %d: acquired through waiting\n",
1127 kfmlp_get_idx(sem, my_queue));
1128 }
1129 else
1130 {
1131 /* this case may happen if our wait entry was stolen
1132 between queues. record where we went.*/
1133 my_queue = kfmlp_get_queue(sem, t);
1134 BUG_ON(!my_queue);
1135 TRACE_CUR("queue %d: acquired through stealing\n",
1136 kfmlp_get_idx(sem, my_queue));
1137 }
1138 }
1139 else
1140 {
1141 TRACE_CUR("queue %d: acquired immediately\n",
1142 kfmlp_get_idx(sem, my_queue));
1143
1144 my_queue->owner = t;
1145
1146 ++(my_queue->count);
1147 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
1148
1149 spin_unlock_irqrestore(&sem->lock, flags);
1150 }
1151
1152 return kfmlp_get_idx(sem, my_queue);
1153}
1154
1155int crm_kfmlp_unlock(struct litmus_lock* l)
1156{
1157 struct task_struct *t = current, *next;
1158 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1159 struct kfmlp_queue *my_queue;
1160 unsigned long flags;
1161 int err = 0;
1162
1163 spin_lock_irqsave(&sem->lock, flags);
1164
1165 my_queue = kfmlp_get_queue(sem, t);
1166
1167 if (!my_queue) {
1168 err = -EINVAL;
1169 goto out;
1170 }
1171
1172 /* check if there are jobs waiting for this resource */
1173 next = __waitqueue_remove_first(&my_queue->wait);
1174 if (next) {
1175 /*
1176 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
1177 kfmlp_get_idx(sem, my_queue),
1178 next->comm, next->pid);
1179 */
1180 /* next becomes the resouce holder */
1181 my_queue->owner = next;
1182
1183 --(my_queue->count);
1184 if(my_queue->count < sem->shortest_queue->count)
1185 {
1186 sem->shortest_queue = my_queue;
1187 }
1188
1189 TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
1190 kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
1191
1192 /* determine new hp_waiter if necessary */
1193 if (next == my_queue->hp_waiter) {
1194 TRACE_TASK(next, "was highest-prio waiter\n");
1195 /* next has the highest priority --- it doesn't need to
1196 * inherit. However, we need to make sure that the
1197 * next-highest priority in the queue is reflected in
1198 * hp_waiter. */
1199 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
1200 if (my_queue->hp_waiter)
1201 TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
1202 else
1203 TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
1204 } else {
1205 /* Well, if next is not the highest-priority waiter,
1206 * then it ought to inherit the highest-priority
1207 * waiter's priority. */
1208 set_priority_inheritance(next, my_queue->hp_waiter);
1209 }
1210
1211 /* wake up next */
1212 wake_up_process(next);
1213 }
1214 else
1215 {
1216 TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
1217
1218 next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
1219
1220 /*
1221 if(next)
1222 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
1223 kfmlp_get_idx(sem, my_queue),
1224 next->comm, next->pid);
1225 */
1226
1227 my_queue->owner = next;
1228
1229 if(next)
1230 {
1231 TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
1232 kfmlp_get_idx(sem, my_queue),
1233 next->comm, next->pid);
1234
1235 /* wake up next */
1236 wake_up_process(next);
1237 }
1238 else
1239 {
1240 TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
1241
1242 --(my_queue->count);
1243 if(my_queue->count < sem->shortest_queue->count)
1244 {
1245 sem->shortest_queue = my_queue;
1246 }
1247 }
1248 }
1249
1250 /* we lose the benefit of priority inheritance (if any) */
1251 if (tsk_rt(t)->inh_task)
1252 clear_priority_inheritance(t);
1253
1254out:
1255 spin_unlock_irqrestore(&sem->lock, flags);
1256
1257 return err;
1258}
1259
1260int crm_kfmlp_close(struct litmus_lock* l)
1261{
1262 struct task_struct *t = current;
1263 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1264 struct kfmlp_queue *my_queue;
1265 unsigned long flags;
1266
1267 int owner;
1268
1269 spin_lock_irqsave(&sem->lock, flags);
1270
1271 my_queue = kfmlp_get_queue(sem, t);
1272 owner = (my_queue) ? (my_queue->owner == t) : 0;
1273
1274 spin_unlock_irqrestore(&sem->lock, flags);
1275
1276 if (owner)
1277 crm_kfmlp_unlock(l);
1278
1279 return 0;
1280}
1281
1282void crm_kfmlp_free(struct litmus_lock* l)
1283{
1284 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1285 kfree(sem->queues);
1286 kfree(sem);
1287}
1288
1289static struct litmus_lock_ops crm_kfmlp_lock_ops = {
1290 .close = crm_kfmlp_close,
1291 .lock = crm_kfmlp_lock,
1292 .unlock = crm_kfmlp_unlock,
1293 .deallocate = crm_kfmlp_free,
1294};
1295
1296static struct litmus_lock* crm_new_kfmlp(void* __user arg, int* ret_code)
1297{
1298 struct kfmlp_semaphore* sem;
1299 int num_resources = 0;
1300 int i;
1301
1302 if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
1303 {
1304 *ret_code = -EINVAL;
1305 return(NULL);
1306 }
1307 if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
1308 {
1309 *ret_code = -EINVAL;
1310 return(NULL);
1311 }
1312 if(num_resources < 1)
1313 {
1314 *ret_code = -EINVAL;
1315 return(NULL);
1316 }
1317
1318 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1319 if(!sem)
1320 {
1321 *ret_code = -ENOMEM;
1322 return NULL;
1323 }
1324
1325 sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
1326 if(!sem->queues)
1327 {
1328 kfree(sem);
1329 *ret_code = -ENOMEM;
1330 return NULL;
1331 }
1332
1333 sem->litmus_lock.ops = &crm_kfmlp_lock_ops;
1334 spin_lock_init(&sem->lock);
1335 sem->num_resources = num_resources;
1336
1337 for(i = 0; i < num_resources; ++i)
1338 {
1339 sem->queues[i].owner = NULL;
1340 sem->queues[i].hp_waiter = NULL;
1341 init_waitqueue_head(&sem->queues[i].wait);
1342 sem->queues[i].count = 0;
1343 }
1344
1345 sem->shortest_queue = &sem->queues[0];
1346
1347 *ret_code = 0;
1348 return &sem->litmus_lock;
1349}
1350
1351
1352/* **** lock constructor **** */
1353
1354static long crm_allocate_lock(struct litmus_lock **lock, int type,
1355 void* __user arg)
1356{
1357 int err = -ENXIO;
1358
1359 /* C-RM currently only supports the FMLP for global resources
1360 WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */
1361 switch (type) {
1362 case KFMLP_SEM:
1363 *lock = crm_new_kfmlp(arg, &err);
1364 break;
1365 };
1366
1367 return err;
1368}
1369
1370#endif // CONFIG_LITMUS_LOCKING
1371
1372
1373
1374
1375
1376
1377/* total number of cluster */
1378static int num_clusters;
1379/* we do not support cluster of different sizes */
1380static unsigned int cluster_size;
1381
1382#ifdef VERBOSE_INIT
1383static void print_cluster_topology(cpumask_var_t mask, int cpu)
1384{
1385 int chk;
1386 char buf[255];
1387
1388 chk = cpulist_scnprintf(buf, 254, mask);
1389 buf[chk] = '\0';
1390 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
1391
1392}
1393#endif
1394
1395static int clusters_allocated = 0;
1396
1397static void cleanup_crm(void)
1398{
1399 int i;
1400
1401 if (clusters_allocated) {
1402 for (i = 0; i < num_clusters; i++) {
1403 kfree(crm[i].cpus);
1404 kfree(crm[i].heap_node);
1405 free_cpumask_var(crm[i].cpu_map);
1406 }
1407
1408 kfree(crm);
1409 }
1410}
1411
1412static long crm_activate_plugin(void)
1413{
1414 int i, j, cpu, ccpu, cpu_count;
1415 cpu_entry_t *entry;
1416
1417 cpumask_var_t mask;
1418 int chk = 0;
1419
1420 /* de-allocate old clusters, if any */
1421 cleanup_crm();
1422
1423 printk(KERN_INFO "C-RM: Activate Plugin, cluster configuration = %d\n",
1424 cluster_config);
1425
1426 /* need to get cluster_size first */
1427 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
1428 return -ENOMEM;
1429
1430 if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
1431 cluster_size = num_online_cpus();
1432 } else {
1433 chk = get_shared_cpu_map(mask, 0, cluster_config);
1434 if (chk) {
1435 /* if chk != 0 then it is the max allowed index */
1436 printk(KERN_INFO "C-RM: Cluster configuration = %d "
1437 "is not supported on this hardware.\n",
1438 cluster_config);
1439 /* User should notice that the configuration failed, so
1440 * let's bail out. */
1441 return -EINVAL;
1442 }
1443
1444 cluster_size = cpumask_weight(mask);
1445 }
1446
1447 if ((num_online_cpus() % cluster_size) != 0) {
1448 /* this can't be right, some cpus are left out */
1449 printk(KERN_ERR "C-RM: Trying to group %d cpus in %d!\n",
1450 num_online_cpus(), cluster_size);
1451 return -1;
1452 }
1453
1454 num_clusters = num_online_cpus() / cluster_size;
1455 printk(KERN_INFO "C-RM: %d cluster(s) of size = %d\n",
1456 num_clusters, cluster_size);
1457
1458 /* initialize clusters */
1459 crm = kmalloc(num_clusters * sizeof(crm_domain_t), GFP_ATOMIC);
1460 for (i = 0; i < num_clusters; i++) {
1461
1462 crm[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
1463 GFP_ATOMIC);
1464 crm[i].heap_node = kmalloc(
1465 cluster_size * sizeof(struct bheap_node),
1466 GFP_ATOMIC);
1467 bheap_init(&(crm[i].cpu_heap));
1468 rm_domain_init(&(crm[i].domain), NULL, crm_release_jobs);
1469
1470 if(!zalloc_cpumask_var(&crm[i].cpu_map, GFP_ATOMIC))
1471 return -ENOMEM;
1472 }
1473
1474 /* cycle through cluster and add cpus to them */
1475 for (i = 0; i < num_clusters; i++) {
1476
1477 for_each_online_cpu(cpu) {
1478 /* check if the cpu is already in a cluster */
1479 for (j = 0; j < num_clusters; j++)
1480 if (cpumask_test_cpu(cpu, crm[j].cpu_map))
1481 break;
1482 /* if it is in a cluster go to next cpu */
1483 if (j < num_clusters &&
1484 cpumask_test_cpu(cpu, crm[j].cpu_map))
1485 continue;
1486
1487 /* this cpu isn't in any cluster */
1488 /* get the shared cpus */
1489 if (unlikely(cluster_config == GLOBAL_CLUSTER))
1490 cpumask_copy(mask, cpu_online_mask);
1491 else
1492 get_shared_cpu_map(mask, cpu, cluster_config);
1493
1494 cpumask_copy(crm[i].cpu_map, mask);
1495#ifdef VERBOSE_INIT
1496 print_cluster_topology(mask, cpu);
1497#endif
1498 /* add cpus to current cluster and init cpu_entry_t */
1499 cpu_count = 0;
1500 for_each_cpu(ccpu, crm[i].cpu_map) {
1501
1502 entry = &per_cpu(crm_cpu_entries, ccpu);
1503 crm[i].cpus[cpu_count] = entry;
1504 atomic_set(&entry->will_schedule, 0);
1505 entry->cpu = ccpu;
1506 entry->cluster = &crm[i];
1507 entry->hn = &(crm[i].heap_node[cpu_count]);
1508 bheap_node_init(&entry->hn, entry);
1509
1510 cpu_count++;
1511
1512 entry->linked = NULL;
1513 entry->scheduled = NULL;
1514 update_cpu_position(entry);
1515 }
1516 /* done with this cluster */
1517 break;
1518 }
1519 }
1520
1521#ifdef CONFIG_LITMUS_SOFTIRQD
1522 {
1523 /* distribute the daemons evenly across the clusters. */
1524 int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
1525 int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
1526 int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
1527
1528 int daemon = 0;
1529 for(i = 0; i < num_clusters; ++i)
1530 {
1531 int num_on_this_cluster = num_daemons_per_cluster;
1532 if(left_over)
1533 {
1534 ++num_on_this_cluster;
1535 --left_over;
1536 }
1537
1538 for(j = 0; j < num_on_this_cluster; ++j)
1539 {
1540 // first CPU of this cluster
1541 affinity[daemon++] = i*cluster_size;
1542 }
1543 }
1544
1545 spawn_klitirqd(affinity);
1546
1547 kfree(affinity);
1548 }
1549#endif
1550
1551#ifdef CONFIG_LITMUS_NVIDIA
1552 init_nvidia_info();
1553#endif
1554
1555 free_cpumask_var(mask);
1556 clusters_allocated = 1;
1557 return 0;
1558}
1559
1560/* Plugin object */
1561static struct sched_plugin crm_plugin __cacheline_aligned_in_smp = {
1562 .plugin_name = "C-RM",
1563 .finish_switch = crm_finish_switch,
1564 .tick = crm_tick,
1565 .task_new = crm_task_new,
1566 .complete_job = complete_job,
1567 .task_exit = crm_task_exit,
1568 .schedule = crm_schedule,
1569 .task_wake_up = crm_task_wake_up,
1570 .task_block = crm_task_block,
1571 .admit_task = crm_admit_task,
1572 .activate_plugin = crm_activate_plugin,
1573#ifdef CONFIG_LITMUS_LOCKING
1574 .allocate_lock = crm_allocate_lock,
1575 .set_prio_inh = set_priority_inheritance,
1576 .clear_prio_inh = clear_priority_inheritance,
1577#endif
1578#ifdef CONFIG_LITMUS_SOFTIRQD
1579 .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
1580 .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
1581#endif
1582};
1583
1584static struct proc_dir_entry *cluster_file = NULL, *crm_dir = NULL;
1585
1586static int __init init_crm(void)
1587{
1588 int err, fs;
1589
1590 err = register_sched_plugin(&crm_plugin);
1591 if (!err) {
1592 fs = make_plugin_proc_dir(&crm_plugin, &crm_dir);
1593 if (!fs)
1594 cluster_file = create_cluster_file(crm_dir, &cluster_config);
1595 else
1596 printk(KERN_ERR "Could not allocate C-RM procfs dir.\n");
1597 }
1598 return err;
1599}
1600
1601static void clean_crm(void)
1602{
1603 cleanup_crm();
1604 if (cluster_file)
1605 remove_proc_entry("cluster", crm_dir);
1606 if (crm_dir)
1607 remove_plugin_proc_dir(&crm_plugin);
1608}
1609
1610module_init(init_crm);
1611module_exit(clean_crm);
diff --git a/litmus/sched_crm_srt.c b/litmus/sched_crm_srt.c
new file mode 100644
index 000000000000..4473f35e64cd
--- /dev/null
+++ b/litmus/sched_crm_srt.c
@@ -0,0 +1,1611 @@
1/*
2 * litmus/sched_crm_srt.c
3 *
4 * Implementation of the C-RM-SRT scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32#include <linux/uaccess.h>
33
34#include <linux/module.h>
35
36#include <litmus/litmus.h>
37#include <litmus/jobs.h>
38#include <litmus/preempt.h>
39#include <litmus/sched_plugin.h>
40#include <litmus/rm_srt_common.h>
41#include <litmus/sched_trace.h>
42
43#include <litmus/clustered.h>
44
45#include <litmus/bheap.h>
46
47/* to configure the cluster size */
48#include <litmus/litmus_proc.h>
49
50#ifdef CONFIG_SCHED_CPU_AFFINITY
51#include <litmus/affinity.h>
52#endif
53
54#ifdef CONFIG_LITMUS_SOFTIRQD
55#include <litmus/litmus_softirq.h>
56#endif
57
58#ifdef CONFIG_LITMUS_NVIDIA
59#include <litmus/nvidia_info.h>
60#endif
61
62/* Reference configuration variable. Determines which cache level is used to
63 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
64 * all CPUs form a single cluster (just like GSN-EDF).
65 */
66static enum cache_level cluster_config = GLOBAL_CLUSTER;
67
68struct clusterdomain;
69
70/* cpu_entry_t - maintain the linked and scheduled state
71 *
72 * A cpu also contains a pointer to the crm_srt_domain_t cluster
73 * that owns it (struct clusterdomain*)
74 */
75typedef struct {
76 int cpu;
77 struct clusterdomain* cluster; /* owning cluster */
78 struct task_struct* linked; /* only RT tasks */
79 struct task_struct* scheduled; /* only RT tasks */
80 atomic_t will_schedule; /* prevent unneeded IPIs */
81 struct bheap_node* hn;
82} cpu_entry_t;
83
84/* one cpu_entry_t per CPU */
85DEFINE_PER_CPU(cpu_entry_t, crm_srt_cpu_entries);
86
87#define set_will_schedule() \
88 (atomic_set(&__get_cpu_var(crm_srt_cpu_entries).will_schedule, 1))
89#define clear_will_schedule() \
90 (atomic_set(&__get_cpu_var(crm_srt_cpu_entries).will_schedule, 0))
91#define test_will_schedule(cpu) \
92 (atomic_read(&per_cpu(crm_srt_cpu_entries, cpu).will_schedule))
93
94/*
95 * In C-RM-SRT there is a crm_srt domain _per_ cluster
96 * The number of clusters is dynamically determined accordingly to the
97 * total cpu number and the cluster size
98 */
99typedef struct clusterdomain {
100 /* rt_domain for this cluster */
101 rt_domain_t domain;
102 /* cpus in this cluster */
103 cpu_entry_t* *cpus;
104 /* map of this cluster cpus */
105 cpumask_var_t cpu_map;
106 /* the cpus queue themselves according to priority in here */
107 struct bheap_node *heap_node;
108 struct bheap cpu_heap;
109 /* lock for this cluster */
110#define crm_srt_lock domain.ready_lock
111} crm_srt_domain_t;
112
113/* a crm_srt_domain per cluster; allocation is done at init/activation time */
114crm_srt_domain_t *crm_srt;
115
116#define remote_cluster(cpu) ((crm_srt_domain_t *) per_cpu(crm_srt_cpu_entries, cpu).cluster)
117#define task_cpu_cluster(task) remote_cluster(get_partition(task))
118
119/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
120 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
121 * information during the initialization of the plugin (e.g., topology)
122#define WANT_ALL_SCHED_EVENTS
123 */
124#define VERBOSE_INIT
125
126static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
127{
128 cpu_entry_t *a, *b;
129 a = _a->value;
130 b = _b->value;
131 /* Note that a and b are inverted: we want the lowest-priority CPU at
132 * the top of the heap.
133 */
134 return rm_srt_higher_prio(b->linked, a->linked);
135}
136
137/* update_cpu_position - Move the cpu entry to the correct place to maintain
138 * order in the cpu queue. Caller must hold crm_srt lock.
139 */
140static void update_cpu_position(cpu_entry_t *entry)
141{
142 crm_srt_domain_t *cluster = entry->cluster;
143
144 if (likely(bheap_node_in_heap(entry->hn)))
145 bheap_delete(cpu_lower_prio,
146 &cluster->cpu_heap,
147 entry->hn);
148
149 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
150}
151
152/* caller must hold crm_srt lock */
153static cpu_entry_t* lowest_prio_cpu(crm_srt_domain_t *cluster)
154{
155 struct bheap_node* hn;
156 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
157 return hn->value;
158}
159
160
161/* link_task_to_cpu - Update the link of a CPU.
162 * Handles the case where the to-be-linked task is already
163 * scheduled on a different CPU.
164 */
165static noinline void link_task_to_cpu(struct task_struct* linked,
166 cpu_entry_t *entry)
167{
168 cpu_entry_t *sched;
169 struct task_struct* tmp;
170 int on_cpu;
171
172 BUG_ON(linked && !is_realtime(linked));
173
174 /* Currently linked task is set to be unlinked. */
175 if (entry->linked) {
176 entry->linked->rt_param.linked_on = NO_CPU;
177 }
178
179 /* Link new task to CPU. */
180 if (linked) {
181 set_rt_flags(linked, RT_F_RUNNING);
182 /* handle task is already scheduled somewhere! */
183 on_cpu = linked->rt_param.scheduled_on;
184 if (on_cpu != NO_CPU) {
185 sched = &per_cpu(crm_srt_cpu_entries, on_cpu);
186 /* this should only happen if not linked already */
187 BUG_ON(sched->linked == linked);
188
189 /* If we are already scheduled on the CPU to which we
190 * wanted to link, we don't need to do the swap --
191 * we just link ourselves to the CPU and depend on
192 * the caller to get things right.
193 */
194 if (entry != sched) {
195 TRACE_TASK(linked,
196 "already scheduled on %d, updating link.\n",
197 sched->cpu);
198 tmp = sched->linked;
199 linked->rt_param.linked_on = sched->cpu;
200 sched->linked = linked;
201 update_cpu_position(sched);
202 linked = tmp;
203 }
204 }
205 if (linked) /* might be NULL due to swap */
206 linked->rt_param.linked_on = entry->cpu;
207 }
208 entry->linked = linked;
209#ifdef WANT_ALL_SCHED_EVENTS
210 if (linked)
211 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
212 else
213 TRACE("NULL linked to %d.\n", entry->cpu);
214#endif
215 update_cpu_position(entry);
216}
217
218/* unlink - Make sure a task is not linked any longer to an entry
219 * where it was linked before. Must hold crm_srt_lock.
220 */
221static noinline void unlink(struct task_struct* t)
222{
223 cpu_entry_t *entry;
224
225 if (t->rt_param.linked_on != NO_CPU) {
226 /* unlink */
227 entry = &per_cpu(crm_srt_cpu_entries, t->rt_param.linked_on);
228 t->rt_param.linked_on = NO_CPU;
229 link_task_to_cpu(NULL, entry);
230 } else if (is_queued(t)) {
231 /* This is an interesting situation: t is scheduled,
232 * but was just recently unlinked. It cannot be
233 * linked anywhere else (because then it would have
234 * been relinked to this CPU), thus it must be in some
235 * queue. We must remove it from the list in this
236 * case.
237 *
238 * in C-RM-SRT case is should be somewhere in the queue for
239 * its domain, therefore and we can get the domain using
240 * task_cpu_cluster
241 */
242 remove(&(task_cpu_cluster(t))->domain, t);
243 }
244}
245
246
247/* preempt - force a CPU to reschedule
248 */
249static void preempt(cpu_entry_t *entry)
250{
251 preempt_if_preemptable(entry->scheduled, entry->cpu);
252}
253
254/* requeue - Put an unlinked task into gsn-edf domain.
255 * Caller must hold crm_srt_lock.
256 */
257static noinline void requeue(struct task_struct* task)
258{
259 crm_srt_domain_t *cluster = task_cpu_cluster(task);
260 BUG_ON(!task);
261 /* sanity check before insertion */
262 BUG_ON(is_queued(task));
263
264 if (is_released(task, litmus_clock()))
265 __add_ready(&cluster->domain, task);
266 else {
267 /* it has got to wait */
268 add_release(&cluster->domain, task);
269 }
270}
271
272#ifdef CONFIG_SCHED_CPU_AFFINITY
273static cpu_entry_t* crm_srt_get_nearest_available_cpu(
274 crm_srt_domain_t *cluster, cpu_entry_t* start)
275{
276 cpu_entry_t* affinity;
277
278 get_nearest_available_cpu(affinity, start, crm_srt_cpu_entries, -1);
279
280 /* make sure CPU is in our cluster */
281 if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
282 return(affinity);
283 else
284 return(NULL);
285}
286#endif
287
288
289/* check for any necessary preemptions */
290static void check_for_preemptions(crm_srt_domain_t *cluster)
291{
292 struct task_struct *task;
293 cpu_entry_t *last;
294
295 for(last = lowest_prio_cpu(cluster);
296 rm_srt_preemption_needed(&cluster->domain, last->linked);
297 last = lowest_prio_cpu(cluster)) {
298 /* preemption necessary */
299 task = __take_ready(&cluster->domain);
300#ifdef CONFIG_SCHED_CPU_AFFINITY
301 {
302 cpu_entry_t* affinity =
303 crm_srt_get_nearest_available_cpu(cluster,
304 &per_cpu(crm_srt_cpu_entries, task_cpu(task)));
305 if(affinity)
306 last = affinity;
307 else if(last->linked)
308 requeue(last->linked);
309 }
310#else
311 if (last->linked)
312 requeue(last->linked);
313#endif
314 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
315 task->pid, last->cpu);
316 link_task_to_cpu(task, last);
317 preempt(last);
318 }
319}
320
321/* crm_srt_job_arrival: task is either resumed or released */
322static noinline void crm_srt_job_arrival(struct task_struct* task)
323{
324 crm_srt_domain_t *cluster = task_cpu_cluster(task);
325 BUG_ON(!task);
326
327 requeue(task);
328 check_for_preemptions(cluster);
329}
330
331static void crm_srt_release_jobs(rt_domain_t* rt, struct bheap* tasks)
332{
333 crm_srt_domain_t* cluster = container_of(rt, crm_srt_domain_t, domain);
334 unsigned long flags;
335
336 raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
337
338 __merge_ready(&cluster->domain, tasks);
339 check_for_preemptions(cluster);
340
341 raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
342}
343
344/* caller holds crm_srt_lock */
345static noinline void job_completion(struct task_struct *t, int forced)
346{
347 BUG_ON(!t);
348
349 sched_trace_task_completion(t, forced);
350
351#ifdef CONFIG_LITMUS_NVIDIA
352 atomic_set(&tsk_rt(t)->nv_int_count, 0);
353#endif
354
355 TRACE_TASK(t, "job_completion().\n");
356
357 /* set flags */
358 set_rt_flags(t, RT_F_SLEEP);
359 /* prepare for next period */
360 prepare_for_next_period(t);
361 if (is_released(t, litmus_clock()))
362 sched_trace_task_release(t);
363 /* unlink */
364 unlink(t);
365 /* requeue
366 * But don't requeue a blocking task. */
367 if (is_running(t))
368 crm_srt_job_arrival(t);
369}
370
371/* crm_srt_tick - this function is called for every local timer
372 * interrupt.
373 *
374 * checks whether the current task has expired and checks
375 * whether we need to preempt it if it has not expired
376 */
377static void crm_srt_tick(struct task_struct* t)
378{
379 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
380 if (!is_np(t)) {
381 /* np tasks will be preempted when they become
382 * preemptable again
383 */
384 litmus_reschedule_local();
385 set_will_schedule();
386 TRACE("crm_srt_scheduler_tick: "
387 "%d is preemptable "
388 " => FORCE_RESCHED\n", t->pid);
389 } else if (is_user_np(t)) {
390 TRACE("crm_srt_scheduler_tick: "
391 "%d is non-preemptable, "
392 "preemption delayed.\n", t->pid);
393 request_exit_np(t);
394 }
395 }
396}
397
398/* Getting schedule() right is a bit tricky. schedule() may not make any
399 * assumptions on the state of the current task since it may be called for a
400 * number of reasons. The reasons include a scheduler_tick() determined that it
401 * was necessary, because sys_exit_np() was called, because some Linux
402 * subsystem determined so, or even (in the worst case) because there is a bug
403 * hidden somewhere. Thus, we must take extreme care to determine what the
404 * current state is.
405 *
406 * The CPU could currently be scheduling a task (or not), be linked (or not).
407 *
408 * The following assertions for the scheduled task could hold:
409 *
410 * - !is_running(scheduled) // the job blocks
411 * - scheduled->timeslice == 0 // the job completed (forcefully)
412 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
413 * - linked != scheduled // we need to reschedule (for any reason)
414 * - is_np(scheduled) // rescheduling must be delayed,
415 * sys_exit_np must be requested
416 *
417 * Any of these can occur together.
418 */
419static struct task_struct* crm_srt_schedule(struct task_struct * prev)
420{
421 cpu_entry_t* entry = &__get_cpu_var(crm_srt_cpu_entries);
422 crm_srt_domain_t *cluster = entry->cluster;
423 int out_of_time, sleep, preempt, np, exists, blocks;
424 struct task_struct* next = NULL;
425
426 raw_spin_lock(&cluster->crm_srt_lock);
427 clear_will_schedule();
428
429 /* sanity checking */
430 BUG_ON(entry->scheduled && entry->scheduled != prev);
431 BUG_ON(entry->scheduled && !is_realtime(prev));
432 BUG_ON(is_realtime(prev) && !entry->scheduled);
433
434 /* (0) Determine state */
435 exists = entry->scheduled != NULL;
436 blocks = exists && !is_running(entry->scheduled);
437 out_of_time = exists &&
438 budget_enforced(entry->scheduled) &&
439 budget_exhausted(entry->scheduled);
440 np = exists && is_np(entry->scheduled);
441 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
442 preempt = entry->scheduled != entry->linked;
443
444#ifdef WANT_ALL_SCHED_EVENTS
445 TRACE_TASK(prev, "invoked crm_srt_schedule.\n");
446#endif
447
448 if (exists)
449 TRACE_TASK(prev,
450 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
451 "state:%d sig:%d\n",
452 blocks, out_of_time, np, sleep, preempt,
453 prev->state, signal_pending(prev));
454 if (entry->linked && preempt)
455 TRACE_TASK(prev, "will be preempted by %s/%d\n",
456 entry->linked->comm, entry->linked->pid);
457
458
459 /* If a task blocks we have no choice but to reschedule.
460 */
461 if (blocks)
462 unlink(entry->scheduled);
463
464 /* Request a sys_exit_np() call if we would like to preempt but cannot.
465 * We need to make sure to update the link structure anyway in case
466 * that we are still linked. Multiple calls to request_exit_np() don't
467 * hurt.
468 */
469 if (np && (out_of_time || preempt || sleep)) {
470 unlink(entry->scheduled);
471 request_exit_np(entry->scheduled);
472 }
473
474 /* Any task that is preemptable and either exhausts its execution
475 * budget or wants to sleep completes. We may have to reschedule after
476 * this. Don't do a job completion if we block (can't have timers running
477 * for blocked jobs). Preemption go first for the same reason.
478 */
479 if (!np && (out_of_time || sleep) && !blocks && !preempt)
480 job_completion(entry->scheduled, !sleep);
481
482 /* Link pending task if we became unlinked.
483 */
484 if (!entry->linked)
485 link_task_to_cpu(__take_ready(&cluster->domain), entry);
486
487 /* The final scheduling decision. Do we need to switch for some reason?
488 * If linked is different from scheduled, then select linked as next.
489 */
490 if ((!np || blocks) &&
491 entry->linked != entry->scheduled) {
492 /* Schedule a linked job? */
493 if (entry->linked) {
494 entry->linked->rt_param.scheduled_on = entry->cpu;
495 next = entry->linked;
496 }
497 if (entry->scheduled) {
498 /* not gonna be scheduled soon */
499 entry->scheduled->rt_param.scheduled_on = NO_CPU;
500 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
501 }
502 } else
503 /* Only override Linux scheduler if we have a real-time task
504 * scheduled that needs to continue.
505 */
506 if (exists)
507 next = prev;
508
509 sched_state_task_picked();
510 raw_spin_unlock(&cluster->crm_srt_lock);
511
512#ifdef WANT_ALL_SCHED_EVENTS
513 TRACE("crm_srt_lock released, next=0x%p\n", next);
514
515 if (next)
516 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
517 else if (exists && !next)
518 TRACE("becomes idle at %llu.\n", litmus_clock());
519#endif
520
521
522 return next;
523}
524
525
526/* _finish_switch - we just finished the switch away from prev
527 */
528static void crm_srt_finish_switch(struct task_struct *prev)
529{
530 cpu_entry_t* entry = &__get_cpu_var(crm_srt_cpu_entries);
531
532 entry->scheduled = is_realtime(current) ? current : NULL;
533#ifdef WANT_ALL_SCHED_EVENTS
534 TRACE_TASK(prev, "switched away from\n");
535#endif
536}
537
538
539/* Prepare a task for running in RT mode
540 */
541static void crm_srt_task_new(struct task_struct * t, int on_rq, int running)
542{
543 unsigned long flags;
544 cpu_entry_t* entry;
545 crm_srt_domain_t* cluster;
546
547 TRACE("gsn edf: task new %d\n", t->pid);
548
549 /* the cluster doesn't change even if t is running */
550 cluster = task_cpu_cluster(t);
551
552 raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
553
554 /* setup job params */
555 release_at(t, litmus_clock());
556
557 if (running) {
558 entry = &per_cpu(crm_srt_cpu_entries, task_cpu(t));
559 BUG_ON(entry->scheduled);
560
561 entry->scheduled = t;
562 tsk_rt(t)->scheduled_on = task_cpu(t);
563 } else {
564 t->rt_param.scheduled_on = NO_CPU;
565 }
566 t->rt_param.linked_on = NO_CPU;
567
568 crm_srt_job_arrival(t);
569 raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
570}
571
572static void crm_srt_task_wake_up(struct task_struct *task)
573{
574 unsigned long flags;
575 //lt_t now;
576 crm_srt_domain_t *cluster;
577
578 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
579
580 cluster = task_cpu_cluster(task);
581
582 raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
583
584#if 0 // sporadic task model
585 /* We need to take suspensions because of semaphores into
586 * account! If a job resumes after being suspended due to acquiring
587 * a semaphore, it should never be treated as a new job release.
588 */
589 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
590 set_rt_flags(task, RT_F_RUNNING);
591 } else {
592 now = litmus_clock();
593 if (is_tardy(task, now)) {
594 /* new sporadic release */
595 release_at(task, now);
596 sched_trace_task_release(task);
597 }
598 else {
599 if (task->rt.time_slice) {
600 /* came back in time before deadline
601 */
602 set_rt_flags(task, RT_F_RUNNING);
603 }
604 }
605 }
606#endif
607
608 //BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
609 set_rt_flags(task, RT_F_RUNNING); // periodic model
610
611 if(tsk_rt(task)->linked_on == NO_CPU)
612 crm_srt_job_arrival(task);
613 else
614 TRACE("WTF, mate?!\n");
615
616 raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
617}
618
619static void crm_srt_task_block(struct task_struct *t)
620{
621 unsigned long flags;
622 crm_srt_domain_t *cluster;
623
624 TRACE_TASK(t, "block at %llu\n", litmus_clock());
625
626 cluster = task_cpu_cluster(t);
627
628 /* unlink if necessary */
629 raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
630 unlink(t);
631 raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
632
633 BUG_ON(!is_realtime(t));
634}
635
636
637static void crm_srt_task_exit(struct task_struct * t)
638{
639 unsigned long flags;
640 crm_srt_domain_t *cluster = task_cpu_cluster(t);
641
642 /* unlink if necessary */
643 raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
644 unlink(t);
645 if (tsk_rt(t)->scheduled_on != NO_CPU) {
646 cpu_entry_t *cpu;
647 cpu = &per_cpu(crm_srt_cpu_entries, tsk_rt(t)->scheduled_on);
648 cpu->scheduled = NULL;
649 tsk_rt(t)->scheduled_on = NO_CPU;
650 }
651 raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
652
653 BUG_ON(!is_realtime(t));
654 TRACE_TASK(t, "RIP\n");
655}
656
657static long crm_srt_admit_task(struct task_struct* tsk)
658{
659 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
660}
661
662
663
664
665
666
667
668
669
670
671
672
673
674#ifdef CONFIG_LITMUS_LOCKING
675
676#include <litmus/fdso.h>
677
678
679static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
680{
681 int linked_on;
682 int check_preempt = 0;
683
684 crm_srt_domain_t* cluster = task_cpu_cluster(t);
685
686 if(prio_inh != NULL)
687 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
688 else
689 TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
690
691 sched_trace_eff_prio_change(t, prio_inh);
692
693 tsk_rt(t)->inh_task = prio_inh;
694
695 linked_on = tsk_rt(t)->linked_on;
696
697 /* If it is scheduled, then we need to reorder the CPU heap. */
698 if (linked_on != NO_CPU) {
699 TRACE_TASK(t, "%s: linked on %d\n",
700 __FUNCTION__, linked_on);
701 /* Holder is scheduled; need to re-order CPUs.
702 * We can't use heap_decrease() here since
703 * the cpu_heap is ordered in reverse direction, so
704 * it is actually an increase. */
705 bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
706 per_cpu(crm_srt_cpu_entries, linked_on).hn);
707 bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
708 per_cpu(crm_srt_cpu_entries, linked_on).hn);
709 } else {
710 /* holder may be queued: first stop queue changes */
711 raw_spin_lock(&cluster->domain.release_lock);
712 if (is_queued(t)) {
713 TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
714
715 /* We need to update the position of holder in some
716 * heap. Note that this could be a release heap if we
717 * budget enforcement is used and this job overran. */
718 check_preempt = !bheap_decrease(rm_srt_ready_order, tsk_rt(t)->heap_node);
719
720 } else {
721 /* Nothing to do: if it is not queued and not linked
722 * then it is either sleeping or currently being moved
723 * by other code (e.g., a timer interrupt handler) that
724 * will use the correct priority when enqueuing the
725 * task. */
726 TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
727 }
728 raw_spin_unlock(&cluster->domain.release_lock);
729
730 /* If holder was enqueued in a release heap, then the following
731 * preemption check is pointless, but we can't easily detect
732 * that case. If you want to fix this, then consider that
733 * simply adding a state flag requires O(n) time to update when
734 * releasing n tasks, which conflicts with the goal to have
735 * O(log n) merges. */
736 if (check_preempt) {
737 /* heap_decrease() hit the top level of the heap: make
738 * sure preemption checks get the right task, not the
739 * potentially stale cache. */
740 bheap_uncache_min(rm_srt_ready_order, &cluster->domain.ready_queue);
741 check_for_preemptions(cluster);
742 }
743 }
744}
745
746/* called with IRQs off */
747static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
748{
749 crm_srt_domain_t* cluster = task_cpu_cluster(t);
750
751 raw_spin_lock(&cluster->crm_srt_lock);
752
753 __set_priority_inheritance(t, prio_inh);
754
755#ifdef CONFIG_LITMUS_SOFTIRQD
756 if(tsk_rt(t)->cur_klitirqd != NULL)
757 {
758 TRACE_TASK(t, "%s/%d inherits a new priority!\n",
759 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
760
761 __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
762 }
763#endif
764
765 raw_spin_unlock(&cluster->crm_srt_lock);
766}
767
768
769/* called with IRQs off */
770static void __clear_priority_inheritance(struct task_struct* t)
771{
772 TRACE_TASK(t, "priority restored\n");
773
774 if(tsk_rt(t)->scheduled_on != NO_CPU)
775 {
776 sched_trace_eff_prio_change(t, NULL);
777
778 tsk_rt(t)->inh_task = NULL;
779
780 /* Check if rescheduling is necessary. We can't use heap_decrease()
781 * since the priority was effectively lowered. */
782 unlink(t);
783 crm_srt_job_arrival(t);
784 }
785 else
786 {
787 __set_priority_inheritance(t, NULL);
788 }
789
790#ifdef CONFIG_LITMUS_SOFTIRQD
791 if(tsk_rt(t)->cur_klitirqd != NULL)
792 {
793 TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
794 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
795
796 if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
797 {
798 sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
799
800 tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
801
802 /* Check if rescheduling is necessary. We can't use heap_decrease()
803 * since the priority was effectively lowered. */
804 unlink(tsk_rt(t)->cur_klitirqd);
805 crm_srt_job_arrival(tsk_rt(t)->cur_klitirqd);
806 }
807 else
808 {
809 __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
810 }
811 }
812#endif
813}
814
815/* called with IRQs off */
816static void clear_priority_inheritance(struct task_struct* t)
817{
818 crm_srt_domain_t* cluster = task_cpu_cluster(t);
819
820 raw_spin_lock(&cluster->crm_srt_lock);
821 __clear_priority_inheritance(t);
822 raw_spin_unlock(&cluster->crm_srt_lock);
823}
824
825
826
827#ifdef CONFIG_LITMUS_SOFTIRQD
828/* called with IRQs off */
829static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
830 struct task_struct* old_owner,
831 struct task_struct* new_owner)
832{
833 crm_srt_domain_t* cluster = task_cpu_cluster(klitirqd);
834
835 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
836
837 raw_spin_lock(&cluster->crm_srt_lock);
838
839 if(old_owner != new_owner)
840 {
841 if(old_owner)
842 {
843 // unreachable?
844 tsk_rt(old_owner)->cur_klitirqd = NULL;
845 }
846
847 TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
848 new_owner->comm, new_owner->pid);
849
850 tsk_rt(new_owner)->cur_klitirqd = klitirqd;
851 }
852
853 __set_priority_inheritance(klitirqd,
854 (tsk_rt(new_owner)->inh_task == NULL) ?
855 new_owner :
856 tsk_rt(new_owner)->inh_task);
857
858 raw_spin_unlock(&cluster->crm_srt_lock);
859}
860
861/* called with IRQs off */
862static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
863 struct task_struct* old_owner)
864{
865 crm_srt_domain_t* cluster = task_cpu_cluster(klitirqd);
866
867 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
868
869 raw_spin_lock(&cluster->crm_srt_lock);
870
871 TRACE_TASK(klitirqd, "priority restored\n");
872
873 if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
874 {
875 tsk_rt(klitirqd)->inh_task = NULL;
876
877 /* Check if rescheduling is necessary. We can't use heap_decrease()
878 * since the priority was effectively lowered. */
879 unlink(klitirqd);
880 crm_srt_job_arrival(klitirqd);
881 }
882 else
883 {
884 __set_priority_inheritance(klitirqd, NULL);
885 }
886
887 tsk_rt(old_owner)->cur_klitirqd = NULL;
888
889 raw_spin_unlock(&cluster->crm_srt_lock);
890}
891#endif // CONFIG_LITMUS_SOFTIRQD
892
893
894/* ******************** KFMLP support ********************** */
895
896/* struct for semaphore with priority inheritance */
897struct kfmlp_queue
898{
899 wait_queue_head_t wait;
900 struct task_struct* owner;
901 struct task_struct* hp_waiter;
902 int count; /* number of waiters + holder */
903};
904
905struct kfmlp_semaphore
906{
907 struct litmus_lock litmus_lock;
908
909 spinlock_t lock;
910
911 int num_resources; /* aka k */
912 struct kfmlp_queue *queues; /* array */
913 struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
914};
915
916static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
917{
918 return container_of(lock, struct kfmlp_semaphore, litmus_lock);
919}
920
921static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
922 struct kfmlp_queue* queue)
923{
924 return (queue - &sem->queues[0]);
925}
926
927static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
928 struct task_struct* holder)
929{
930 int i;
931 for(i = 0; i < sem->num_resources; ++i)
932 if(sem->queues[i].owner == holder)
933 return(&sem->queues[i]);
934 return(NULL);
935}
936
937/* caller is responsible for locking */
938static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
939 struct task_struct *skip)
940{
941 struct list_head *pos;
942 struct task_struct *queued, *found = NULL;
943
944 list_for_each(pos, &kqueue->wait.task_list) {
945 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
946 task_list)->private;
947
948 /* Compare task prios, find high prio task. */
949 if (queued != skip && rm_srt_higher_prio(queued, found))
950 found = queued;
951 }
952 return found;
953}
954
955static inline struct kfmlp_queue* kfmlp_find_shortest(
956 struct kfmlp_semaphore* sem,
957 struct kfmlp_queue* search_start)
958{
959 // we start our search at search_start instead of at the beginning of the
960 // queue list to load-balance across all resources.
961 struct kfmlp_queue* step = search_start;
962 struct kfmlp_queue* shortest = sem->shortest_queue;
963
964 do
965 {
966 step = (step+1 != &sem->queues[sem->num_resources]) ?
967 step+1 : &sem->queues[0];
968 if(step->count < shortest->count)
969 {
970 shortest = step;
971 if(step->count == 0)
972 break; /* can't get any shorter */
973 }
974 }while(step != search_start);
975
976 return(shortest);
977}
978
979static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
980{
981 /* must hold sem->lock */
982
983 struct kfmlp_queue *my_queue = NULL;
984 struct task_struct *max_hp = NULL;
985
986
987 struct list_head *pos;
988 struct task_struct *queued;
989 int i;
990
991 for(i = 0; i < sem->num_resources; ++i)
992 {
993 if( (sem->queues[i].count > 1) &&
994 ((my_queue == NULL) ||
995 (rm_srt_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
996 {
997 my_queue = &sem->queues[i];
998 }
999 }
1000
1001 if(my_queue)
1002 {
1003 crm_srt_domain_t* cluster;
1004
1005 max_hp = my_queue->hp_waiter;
1006 BUG_ON(!max_hp);
1007
1008 TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
1009 kfmlp_get_idx(sem, my_queue),
1010 max_hp->comm, max_hp->pid,
1011 kfmlp_get_idx(sem, my_queue));
1012
1013 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
1014
1015 /*
1016 if(my_queue->hp_waiter)
1017 TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
1018 kfmlp_get_idx(sem, my_queue),
1019 my_queue->hp_waiter->comm,
1020 my_queue->hp_waiter->pid);
1021 else
1022 TRACE_CUR("queue %d: new hp_waiter is %p\n",
1023 kfmlp_get_idx(sem, my_queue), NULL);
1024 */
1025
1026 cluster = task_cpu_cluster(max_hp);
1027
1028 raw_spin_lock(&cluster->crm_srt_lock);
1029
1030 /*
1031 if(my_queue->owner)
1032 TRACE_CUR("queue %d: owner is %s/%d\n",
1033 kfmlp_get_idx(sem, my_queue),
1034 my_queue->owner->comm,
1035 my_queue->owner->pid);
1036 else
1037 TRACE_CUR("queue %d: owner is %p\n",
1038 kfmlp_get_idx(sem, my_queue),
1039 NULL);
1040 */
1041
1042 if(tsk_rt(my_queue->owner)->inh_task == max_hp)
1043 {
1044 __clear_priority_inheritance(my_queue->owner);
1045 if(my_queue->hp_waiter != NULL)
1046 {
1047 __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
1048 }
1049 }
1050 raw_spin_unlock(&cluster->crm_srt_lock);
1051
1052 list_for_each(pos, &my_queue->wait.task_list)
1053 {
1054 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
1055 task_list)->private;
1056 /* Compare task prios, find high prio task. */
1057 if (queued == max_hp)
1058 {
1059 /*
1060 TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n",
1061 kfmlp_get_idx(sem, my_queue));
1062 */
1063 __remove_wait_queue(&my_queue->wait,
1064 list_entry(pos, wait_queue_t, task_list));
1065 break;
1066 }
1067 }
1068 --(my_queue->count);
1069 }
1070
1071 return(max_hp);
1072}
1073
1074int crm_srt_kfmlp_lock(struct litmus_lock* l)
1075{
1076 struct task_struct* t = current;
1077 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1078 struct kfmlp_queue* my_queue;
1079 wait_queue_t wait;
1080 unsigned long flags;
1081
1082 if (!is_realtime(t))
1083 return -EPERM;
1084
1085 spin_lock_irqsave(&sem->lock, flags);
1086
1087 my_queue = sem->shortest_queue;
1088
1089 if (my_queue->owner) {
1090 /* resource is not free => must suspend and wait */
1091 TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
1092 kfmlp_get_idx(sem, my_queue));
1093
1094 init_waitqueue_entry(&wait, t);
1095
1096 /* FIXME: interruptible would be nice some day */
1097 set_task_state(t, TASK_UNINTERRUPTIBLE);
1098
1099 __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
1100
1101 /* check if we need to activate priority inheritance */
1102 if (rm_srt_higher_prio(t, my_queue->hp_waiter))
1103 {
1104 my_queue->hp_waiter = t;
1105 if (rm_srt_higher_prio(t, my_queue->owner))
1106 {
1107 set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
1108 }
1109 }
1110
1111 ++(my_queue->count);
1112 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
1113
1114 /* release lock before sleeping */
1115 spin_unlock_irqrestore(&sem->lock, flags);
1116
1117 /* We depend on the FIFO order. Thus, we don't need to recheck
1118 * when we wake up; we are guaranteed to have the lock since
1119 * there is only one wake up per release (or steal).
1120 */
1121 schedule();
1122
1123
1124 if(my_queue->owner == t)
1125 {
1126 TRACE_CUR("queue %d: acquired through waiting\n",
1127 kfmlp_get_idx(sem, my_queue));
1128 }
1129 else
1130 {
1131 /* this case may happen if our wait entry was stolen
1132 between queues. record where we went.*/
1133 my_queue = kfmlp_get_queue(sem, t);
1134 BUG_ON(!my_queue);
1135 TRACE_CUR("queue %d: acquired through stealing\n",
1136 kfmlp_get_idx(sem, my_queue));
1137 }
1138 }
1139 else
1140 {
1141 TRACE_CUR("queue %d: acquired immediately\n",
1142 kfmlp_get_idx(sem, my_queue));
1143
1144 my_queue->owner = t;
1145
1146 ++(my_queue->count);
1147 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
1148
1149 spin_unlock_irqrestore(&sem->lock, flags);
1150 }
1151
1152 return kfmlp_get_idx(sem, my_queue);
1153}
1154
1155int crm_srt_kfmlp_unlock(struct litmus_lock* l)
1156{
1157 struct task_struct *t = current, *next;
1158 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1159 struct kfmlp_queue *my_queue;
1160 unsigned long flags;
1161 int err = 0;
1162
1163 spin_lock_irqsave(&sem->lock, flags);
1164
1165 my_queue = kfmlp_get_queue(sem, t);
1166
1167 if (!my_queue) {
1168 err = -EINVAL;
1169 goto out;
1170 }
1171
1172 /* check if there are jobs waiting for this resource */
1173 next = __waitqueue_remove_first(&my_queue->wait);
1174 if (next) {
1175 /*
1176 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
1177 kfmlp_get_idx(sem, my_queue),
1178 next->comm, next->pid);
1179 */
1180 /* next becomes the resouce holder */
1181 my_queue->owner = next;
1182
1183 --(my_queue->count);
1184 if(my_queue->count < sem->shortest_queue->count)
1185 {
1186 sem->shortest_queue = my_queue;
1187 }
1188
1189 TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
1190 kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
1191
1192 /* determine new hp_waiter if necessary */
1193 if (next == my_queue->hp_waiter) {
1194 TRACE_TASK(next, "was highest-prio waiter\n");
1195 /* next has the highest priority --- it doesn't need to
1196 * inherit. However, we need to make sure that the
1197 * next-highest priority in the queue is reflected in
1198 * hp_waiter. */
1199 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
1200 if (my_queue->hp_waiter)
1201 TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
1202 else
1203 TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
1204 } else {
1205 /* Well, if next is not the highest-priority waiter,
1206 * then it ought to inherit the highest-priority
1207 * waiter's priority. */
1208 set_priority_inheritance(next, my_queue->hp_waiter);
1209 }
1210
1211 /* wake up next */
1212 wake_up_process(next);
1213 }
1214 else
1215 {
1216 TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
1217
1218 next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
1219
1220 /*
1221 if(next)
1222 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
1223 kfmlp_get_idx(sem, my_queue),
1224 next->comm, next->pid);
1225 */
1226
1227 my_queue->owner = next;
1228
1229 if(next)
1230 {
1231 TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
1232 kfmlp_get_idx(sem, my_queue),
1233 next->comm, next->pid);
1234
1235 /* wake up next */
1236 wake_up_process(next);
1237 }
1238 else
1239 {
1240 TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
1241
1242 --(my_queue->count);
1243 if(my_queue->count < sem->shortest_queue->count)
1244 {
1245 sem->shortest_queue = my_queue;
1246 }
1247 }
1248 }
1249
1250 /* we lose the benefit of priority inheritance (if any) */
1251 if (tsk_rt(t)->inh_task)
1252 clear_priority_inheritance(t);
1253
1254out:
1255 spin_unlock_irqrestore(&sem->lock, flags);
1256
1257 return err;
1258}
1259
1260int crm_srt_kfmlp_close(struct litmus_lock* l)
1261{
1262 struct task_struct *t = current;
1263 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1264 struct kfmlp_queue *my_queue;
1265 unsigned long flags;
1266
1267 int owner;
1268
1269 spin_lock_irqsave(&sem->lock, flags);
1270
1271 my_queue = kfmlp_get_queue(sem, t);
1272 owner = (my_queue) ? (my_queue->owner == t) : 0;
1273
1274 spin_unlock_irqrestore(&sem->lock, flags);
1275
1276 if (owner)
1277 crm_srt_kfmlp_unlock(l);
1278
1279 return 0;
1280}
1281
1282void crm_srt_kfmlp_free(struct litmus_lock* l)
1283{
1284 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
1285 kfree(sem->queues);
1286 kfree(sem);
1287}
1288
1289static struct litmus_lock_ops crm_srt_kfmlp_lock_ops = {
1290 .close = crm_srt_kfmlp_close,
1291 .lock = crm_srt_kfmlp_lock,
1292 .unlock = crm_srt_kfmlp_unlock,
1293 .deallocate = crm_srt_kfmlp_free,
1294};
1295
1296static struct litmus_lock* crm_srt_new_kfmlp(void* __user arg, int* ret_code)
1297{
1298 struct kfmlp_semaphore* sem;
1299 int num_resources = 0;
1300 int i;
1301
1302 if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
1303 {
1304 *ret_code = -EINVAL;
1305 return(NULL);
1306 }
1307 if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
1308 {
1309 *ret_code = -EINVAL;
1310 return(NULL);
1311 }
1312 if(num_resources < 1)
1313 {
1314 *ret_code = -EINVAL;
1315 return(NULL);
1316 }
1317
1318 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1319 if(!sem)
1320 {
1321 *ret_code = -ENOMEM;
1322 return NULL;
1323 }
1324
1325 sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
1326 if(!sem->queues)
1327 {
1328 kfree(sem);
1329 *ret_code = -ENOMEM;
1330 return NULL;
1331 }
1332
1333 sem->litmus_lock.ops = &crm_srt_kfmlp_lock_ops;
1334 spin_lock_init(&sem->lock);
1335 sem->num_resources = num_resources;
1336
1337 for(i = 0; i < num_resources; ++i)
1338 {
1339 sem->queues[i].owner = NULL;
1340 sem->queues[i].hp_waiter = NULL;
1341 init_waitqueue_head(&sem->queues[i].wait);
1342 sem->queues[i].count = 0;
1343 }
1344
1345 sem->shortest_queue = &sem->queues[0];
1346
1347 *ret_code = 0;
1348 return &sem->litmus_lock;
1349}
1350
1351
1352/* **** lock constructor **** */
1353
1354static long crm_srt_allocate_lock(struct litmus_lock **lock, int type,
1355 void* __user arg)
1356{
1357 int err = -ENXIO;
1358
1359 /* C-RM-SRT currently only supports the FMLP for global resources
1360 WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */
1361 switch (type) {
1362 case KFMLP_SEM:
1363 *lock = crm_srt_new_kfmlp(arg, &err);
1364 break;
1365 };
1366
1367 return err;
1368}
1369
1370#endif // CONFIG_LITMUS_LOCKING
1371
1372
1373
1374
1375
1376
1377/* total number of cluster */
1378static int num_clusters;
1379/* we do not support cluster of different sizes */
1380static unsigned int cluster_size;
1381
1382#ifdef VERBOSE_INIT
1383static void print_cluster_topology(cpumask_var_t mask, int cpu)
1384{
1385 int chk;
1386 char buf[255];
1387
1388 chk = cpulist_scnprintf(buf, 254, mask);
1389 buf[chk] = '\0';
1390 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
1391
1392}
1393#endif
1394
1395static int clusters_allocated = 0;
1396
1397static void cleanup_crm_srt(void)
1398{
1399 int i;
1400
1401 if (clusters_allocated) {
1402 for (i = 0; i < num_clusters; i++) {
1403 kfree(crm_srt[i].cpus);
1404 kfree(crm_srt[i].heap_node);
1405 free_cpumask_var(crm_srt[i].cpu_map);
1406 }
1407
1408 kfree(crm_srt);
1409 }
1410}
1411
1412static long crm_srt_activate_plugin(void)
1413{
1414 int i, j, cpu, ccpu, cpu_count;
1415 cpu_entry_t *entry;
1416
1417 cpumask_var_t mask;
1418 int chk = 0;
1419
1420 /* de-allocate old clusters, if any */
1421 cleanup_crm_srt();
1422
1423 printk(KERN_INFO "C-RM-SRT: Activate Plugin, cluster configuration = %d\n",
1424 cluster_config);
1425
1426 /* need to get cluster_size first */
1427 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
1428 return -ENOMEM;
1429
1430 if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
1431 cluster_size = num_online_cpus();
1432 } else {
1433 chk = get_shared_cpu_map(mask, 0, cluster_config);
1434 if (chk) {
1435 /* if chk != 0 then it is the max allowed index */
1436 printk(KERN_INFO "C-RM-SRT: Cluster configuration = %d "
1437 "is not supported on this hardware.\n",
1438 cluster_config);
1439 /* User should notice that the configuration failed, so
1440 * let's bail out. */
1441 return -EINVAL;
1442 }
1443
1444 cluster_size = cpumask_weight(mask);
1445 }
1446
1447 if ((num_online_cpus() % cluster_size) != 0) {
1448 /* this can't be right, some cpus are left out */
1449 printk(KERN_ERR "C-RM-SRT: Trying to group %d cpus in %d!\n",
1450 num_online_cpus(), cluster_size);
1451 return -1;
1452 }
1453
1454 num_clusters = num_online_cpus() / cluster_size;
1455 printk(KERN_INFO "C-RM-SRT: %d cluster(s) of size = %d\n",
1456 num_clusters, cluster_size);
1457
1458 /* initialize clusters */
1459 crm_srt = kmalloc(num_clusters * sizeof(crm_srt_domain_t), GFP_ATOMIC);
1460 for (i = 0; i < num_clusters; i++) {
1461
1462 crm_srt[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
1463 GFP_ATOMIC);
1464 crm_srt[i].heap_node = kmalloc(
1465 cluster_size * sizeof(struct bheap_node),
1466 GFP_ATOMIC);
1467 bheap_init(&(crm_srt[i].cpu_heap));
1468 rm_srt_domain_init(&(crm_srt[i].domain), NULL, crm_srt_release_jobs);
1469
1470 if(!zalloc_cpumask_var(&crm_srt[i].cpu_map, GFP_ATOMIC))
1471 return -ENOMEM;
1472 }
1473
1474 /* cycle through cluster and add cpus to them */
1475 for (i = 0; i < num_clusters; i++) {
1476
1477 for_each_online_cpu(cpu) {
1478 /* check if the cpu is already in a cluster */
1479 for (j = 0; j < num_clusters; j++)
1480 if (cpumask_test_cpu(cpu, crm_srt[j].cpu_map))
1481 break;
1482 /* if it is in a cluster go to next cpu */
1483 if (j < num_clusters &&
1484 cpumask_test_cpu(cpu, crm_srt[j].cpu_map))
1485 continue;
1486
1487 /* this cpu isn't in any cluster */
1488 /* get the shared cpus */
1489 if (unlikely(cluster_config == GLOBAL_CLUSTER))
1490 cpumask_copy(mask, cpu_online_mask);
1491 else
1492 get_shared_cpu_map(mask, cpu, cluster_config);
1493
1494 cpumask_copy(crm_srt[i].cpu_map, mask);
1495#ifdef VERBOSE_INIT
1496 print_cluster_topology(mask, cpu);
1497#endif
1498 /* add cpus to current cluster and init cpu_entry_t */
1499 cpu_count = 0;
1500 for_each_cpu(ccpu, crm_srt[i].cpu_map) {
1501
1502 entry = &per_cpu(crm_srt_cpu_entries, ccpu);
1503 crm_srt[i].cpus[cpu_count] = entry;
1504 atomic_set(&entry->will_schedule, 0);
1505 entry->cpu = ccpu;
1506 entry->cluster = &crm_srt[i];
1507 entry->hn = &(crm_srt[i].heap_node[cpu_count]);
1508 bheap_node_init(&entry->hn, entry);
1509
1510 cpu_count++;
1511
1512 entry->linked = NULL;
1513 entry->scheduled = NULL;
1514 update_cpu_position(entry);
1515 }
1516 /* done with this cluster */
1517 break;
1518 }
1519 }
1520
1521#ifdef CONFIG_LITMUS_SOFTIRQD
1522 {
1523 /* distribute the daemons evenly across the clusters. */
1524 int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
1525 int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
1526 int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
1527
1528 int daemon = 0;
1529 for(i = 0; i < num_clusters; ++i)
1530 {
1531 int num_on_this_cluster = num_daemons_per_cluster;
1532 if(left_over)
1533 {
1534 ++num_on_this_cluster;
1535 --left_over;
1536 }
1537
1538 for(j = 0; j < num_on_this_cluster; ++j)
1539 {
1540 // first CPU of this cluster
1541 affinity[daemon++] = i*cluster_size;
1542 }
1543 }
1544
1545 spawn_klitirqd(affinity);
1546
1547 kfree(affinity);
1548 }
1549#endif
1550
1551#ifdef CONFIG_LITMUS_NVIDIA
1552 init_nvidia_info();
1553#endif
1554
1555 free_cpumask_var(mask);
1556 clusters_allocated = 1;
1557 return 0;
1558}
1559
1560/* Plugin object */
1561static struct sched_plugin crm_srt_plugin __cacheline_aligned_in_smp = {
1562 .plugin_name = "C-RM-SRT",
1563 .finish_switch = crm_srt_finish_switch,
1564 .tick = crm_srt_tick,
1565 .task_new = crm_srt_task_new,
1566 .complete_job = complete_job,
1567 .task_exit = crm_srt_task_exit,
1568 .schedule = crm_srt_schedule,
1569 .task_wake_up = crm_srt_task_wake_up,
1570 .task_block = crm_srt_task_block,
1571 .admit_task = crm_srt_admit_task,
1572 .activate_plugin = crm_srt_activate_plugin,
1573#ifdef CONFIG_LITMUS_LOCKING
1574 .allocate_lock = crm_srt_allocate_lock,
1575 .set_prio_inh = set_priority_inheritance,
1576 .clear_prio_inh = clear_priority_inheritance,
1577#endif
1578#ifdef CONFIG_LITMUS_SOFTIRQD
1579 .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
1580 .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
1581#endif
1582};
1583
1584static struct proc_dir_entry *cluster_file = NULL, *crm_srt_dir = NULL;
1585
1586static int __init init_crm_srt(void)
1587{
1588 int err, fs;
1589
1590 err = register_sched_plugin(&crm_srt_plugin);
1591 if (!err) {
1592 fs = make_plugin_proc_dir(&crm_srt_plugin, &crm_srt_dir);
1593 if (!fs)
1594 cluster_file = create_cluster_file(crm_srt_dir, &cluster_config);
1595 else
1596 printk(KERN_ERR "Could not allocate C-RM-SRT procfs dir.\n");
1597 }
1598 return err;
1599}
1600
1601static void clean_crm_srt(void)
1602{
1603 cleanup_crm_srt();
1604 if (cluster_file)
1605 remove_proc_entry("cluster", crm_srt_dir);
1606 if (crm_srt_dir)
1607 remove_plugin_proc_dir(&crm_srt_plugin);
1608}
1609
1610module_init(init_crm_srt);
1611module_exit(clean_crm_srt);
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index d04e0703c154..ac7685fe69f0 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -1155,12 +1155,14 @@ static inline struct kfmlp_queue* kfmlp_find_shortest(
1155 { 1155 {
1156 step = (step+1 != &sem->queues[sem->num_resources]) ? 1156 step = (step+1 != &sem->queues[sem->num_resources]) ?
1157 step+1 : &sem->queues[0]; 1157 step+1 : &sem->queues[0];
1158
1158 if(step->count < shortest->count) 1159 if(step->count < shortest->count)
1159 { 1160 {
1160 shortest = step; 1161 shortest = step;
1161 if(step->count == 0) 1162 if(step->count == 0)
1162 break; /* can't get any shorter */ 1163 break; /* can't get any shorter */
1163 } 1164 }
1165
1164 }while(step != search_start); 1166 }while(step != search_start);
1165 1167
1166 return(shortest); 1168 return(shortest);
@@ -1369,7 +1371,9 @@ int gsnedf_kfmlp_unlock(struct litmus_lock* l)
1369 my_queue->owner = next; 1371 my_queue->owner = next;
1370 1372
1371 --(my_queue->count); 1373 --(my_queue->count);
1372 if(my_queue->count < sem->shortest_queue->count) 1374 // the '=' of '<=' is a dumb method to attempt to build
1375 // affinity until tasks can tell us where they ran last...
1376 if(my_queue->count <= sem->shortest_queue->count)
1373 { 1377 {
1374 sem->shortest_queue = my_queue; 1378 sem->shortest_queue = my_queue;
1375 } 1379 }
@@ -1428,7 +1432,9 @@ int gsnedf_kfmlp_unlock(struct litmus_lock* l)
1428 TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); 1432 TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
1429 1433
1430 --(my_queue->count); 1434 --(my_queue->count);
1431 if(my_queue->count < sem->shortest_queue->count) 1435 // the '=' of '<=' is a dumb method to attempt to build
1436 // affinity until tasks can tell us where they ran last...
1437 if(my_queue->count <= sem->shortest_queue->count)
1432 { 1438 {
1433 sem->shortest_queue = my_queue; 1439 sem->shortest_queue = my_queue;
1434 } 1440 }
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 8802670a4b0b..e393d749baf5 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -152,6 +152,14 @@ static void litmus_dummy_clear_prio_inh_klitirqd(struct task_struct* klitirqd,
152} 152}
153#endif 153#endif
154 154
155#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
156static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
157{
158 TRACE("PAI Tasklet unsupported in this plugin!!!!!!\n");
159 return(0); // failure.
160}
161#endif
162
155 163
156/* The default scheduler plugin. It doesn't do anything and lets Linux do its 164/* The default scheduler plugin. It doesn't do anything and lets Linux do its
157 * job. 165 * job.
@@ -177,6 +185,9 @@ struct sched_plugin linux_sched_plugin = {
177 .set_prio_inh_klitirqd = litmus_dummy_set_prio_inh_klitirq, 185 .set_prio_inh_klitirqd = litmus_dummy_set_prio_inh_klitirq,
178 .clear_prio_inh_klitirqd = litmus_dummy_clear_prio_inh_klitirqd, 186 .clear_prio_inh_klitirqd = litmus_dummy_clear_prio_inh_klitirqd,
179#endif 187#endif
188#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
189 .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
190#endif
180 .admit_task = litmus_dummy_admit_task 191 .admit_task = litmus_dummy_admit_task
181}; 192};
182 193
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 7aeb99b668d3..d079df2b292a 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -191,7 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
191 if (rec) { 191 if (rec) {
192 rec->data.completion.when = now(); 192 rec->data.completion.when = now();
193 rec->data.completion.forced = forced; 193 rec->data.completion.forced = forced;
194#ifdef LITMUS_NVIDIA
194 rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count); 195 rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
196#endif
195 put_record(rec); 197 put_record(rec);
196 } 198 }
197} 199}
@@ -367,24 +369,29 @@ feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
367 } 369 }
368} 370}
369 371
370
371/* pray for no nesting of nv interrupts on same CPU... */ 372/* pray for no nesting of nv interrupts on same CPU... */
372struct tracing_interrupt_map 373struct tracing_interrupt_map
373{ 374{
374 int active; 375 int active;
375 int count; 376 int count;
376 unsigned long data[128]; // assume nesting less than 128... 377 unsigned long data[128]; // assume nesting less than 128...
378 unsigned long serial[128];
377}; 379};
378DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing); 380DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
379 381
382
383DEFINE_PER_CPU(u32, intCounter);
384
380feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, 385feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
381 unsigned long _device) 386 unsigned long _device)
382{ 387{
383 struct st_event_record *rec; 388 struct st_event_record *rec;
389 u32 serialNum;
384 390
385 { 391 {
392 u32* serial;
386 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); 393 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
387 if(int_map->active == 0xcafebabe) 394 if(!int_map->active == 0xcafebabe)
388 { 395 {
389 int_map->count++; 396 int_map->count++;
390 } 397 }
@@ -393,7 +400,12 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
393 int_map->active = 0xcafebabe; 400 int_map->active = 0xcafebabe;
394 int_map->count = 1; 401 int_map->count = 1;
395 } 402 }
396 int_map->data[int_map->count-1] = _device; 403 //int_map->data[int_map->count-1] = _device;
404
405 serial = &per_cpu(intCounter, smp_processor_id());
406 *serial += num_online_cpus();
407 serialNum = *serial;
408 int_map->serial[int_map->count-1] = serialNum;
397 } 409 }
398 410
399 rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL); 411 rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
@@ -401,6 +413,7 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
401 u32 device = _device; 413 u32 device = _device;
402 rec->data.nv_interrupt_begin.when = now(); 414 rec->data.nv_interrupt_begin.when = now();
403 rec->data.nv_interrupt_begin.device = device; 415 rec->data.nv_interrupt_begin.device = device;
416 rec->data.nv_interrupt_begin.serialNumber = serialNum;
404 put_record(rec); 417 put_record(rec);
405 } 418 }
406} 419}
@@ -416,7 +429,7 @@ int is_interrupt_tracing_active(void)
416} 429}
417*/ 430*/
418 431
419feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long unused) 432feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device)
420{ 433{
421 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); 434 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
422 if(int_map->active == 0xcafebabe) 435 if(int_map->active == 0xcafebabe)
@@ -428,8 +441,11 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned
428 int_map->active = 0; 441 int_map->active = 0;
429 442
430 if(rec) { 443 if(rec) {
444 u32 device = _device;
431 rec->data.nv_interrupt_end.when = now(); 445 rec->data.nv_interrupt_end.when = now();
432 rec->data.nv_interrupt_end.device = int_map->data[int_map->count]; 446 //rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
447 rec->data.nv_interrupt_end.device = device;
448 rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count];
433 put_record(rec); 449 put_record(rec);
434 } 450 }
435 } 451 }
diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
index d7d7d8bae298..5b7e6152416a 100644
--- a/litmus/sched_trace_external.c
+++ b/litmus/sched_trace_external.c
@@ -1,5 +1,6 @@
1#include <linux/module.h> 1#include <linux/module.h>
2 2
3#include <litmus/trace.h>
3#include <litmus/sched_trace.h> 4#include <litmus/sched_trace.h>
4#include <litmus/litmus.h> 5#include <litmus/litmus.h>
5 6
@@ -38,8 +39,26 @@ void __sched_trace_nv_interrupt_begin_external(u32 device)
38} 39}
39EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external); 40EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
40 41
41void __sched_trace_nv_interrupt_end_external(void) 42void __sched_trace_nv_interrupt_end_external(u32 device)
42{ 43{
43 sched_trace_nv_interrupt_end(); 44 unsigned long _device = device;
45 sched_trace_nv_interrupt_end(_device);
44} 46}
45EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external); 47EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
48
49
50#ifdef CONFIG_LITMUS_NVIDIA
51
52#define EXX_TS(evt) \
53void __##evt(void) { evt; } \
54EXPORT_SYMBOL(__##evt);
55
56EXX_TS(TS_NV_TOPISR_START)
57EXX_TS(TS_NV_TOPISR_END)
58EXX_TS(TS_NV_BOTISR_START)
59EXX_TS(TS_NV_BOTISR_END)
60EXX_TS(TS_NV_RELEASE_BOTISR_START)
61EXX_TS(TS_NV_RELEASE_BOTISR_END)
62
63#endif
64