diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-01-11 14:37:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-01-11 14:37:13 -0500 |
commit | 5d7dcfa10ea0dd283773a301e3ce610a7797d582 (patch) | |
tree | 7f7a57ac940e7fe1f538cdd771a954d4fb28f8c0 | |
parent | 3d5537c160c1484e8d562b9828baf679cc53f67a (diff) |
PAI implementation, C-RM, C-FIFO.
30 files changed, 5874 insertions, 42 deletions
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5d22f5342376..a2f2880d5517 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -460,7 +460,7 @@ struct tasklet_struct | |||
460 | void (*func)(unsigned long); | 460 | void (*func)(unsigned long); |
461 | unsigned long data; | 461 | unsigned long data; |
462 | 462 | ||
463 | #ifdef CONFIG_LITMUS_SOFTIRQD | 463 | #if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD) |
464 | struct task_struct *owner; | 464 | struct task_struct *owner; |
465 | #endif | 465 | #endif |
466 | }; | 466 | }; |
diff --git a/include/litmus/fifo_common.h b/include/litmus/fifo_common.h new file mode 100644 index 000000000000..12cfbfea41ee --- /dev/null +++ b/include/litmus/fifo_common.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * EDF common data structures and utility functions shared by all EDF | ||
3 | * based scheduler plugins | ||
4 | */ | ||
5 | |||
6 | /* CLEANUP: Add comments and make it less messy. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #ifndef __UNC_FIFO_COMMON_H__ | ||
11 | #define __UNC_FIFO_COMMON_H__ | ||
12 | |||
13 | #include <litmus/rt_domain.h> | ||
14 | |||
15 | void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
16 | release_jobs_t release); | ||
17 | |||
18 | int fifo_higher_prio(struct task_struct* first, | ||
19 | struct task_struct* second); | ||
20 | |||
21 | int fifo_ready_order(struct bheap_node* a, struct bheap_node* b); | ||
22 | |||
23 | int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t); | ||
24 | |||
25 | #endif | ||
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h index 3df242bf272f..829c1c5ab91f 100644 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h | |||
@@ -118,7 +118,9 @@ static inline lt_t litmus_clock(void) | |||
118 | #define earlier_release(a, b) (lt_before(\ | 118 | #define earlier_release(a, b) (lt_before(\ |
119 | (a)->rt_param.job_params.release,\ | 119 | (a)->rt_param.job_params.release,\ |
120 | (b)->rt_param.job_params.release)) | 120 | (b)->rt_param.job_params.release)) |
121 | 121 | #define shorter_period(a, b) (lt_before(\ | |
122 | (a)->rt_param.task_params.period,\ | ||
123 | (b)->rt_param.task_params.period)) | ||
122 | void preempt_if_preemptable(struct task_struct* t, int on_cpu); | 124 | void preempt_if_preemptable(struct task_struct* t, int on_cpu); |
123 | 125 | ||
124 | #ifdef CONFIG_LITMUS_LOCKING | 126 | #ifdef CONFIG_LITMUS_LOCKING |
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h index 579301d77cf5..9e07a27fdee3 100644 --- a/include/litmus/nvidia_info.h +++ b/include/litmus/nvidia_info.h | |||
@@ -7,7 +7,8 @@ | |||
7 | #include <litmus/litmus_softirq.h> | 7 | #include <litmus/litmus_softirq.h> |
8 | 8 | ||
9 | 9 | ||
10 | #define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD | 10 | //#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD |
11 | #define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM | ||
11 | 12 | ||
12 | int init_nvidia_info(void); | 13 | int init_nvidia_info(void); |
13 | 14 | ||
diff --git a/include/litmus/rm_common.h b/include/litmus/rm_common.h new file mode 100644 index 000000000000..5991b0b4e758 --- /dev/null +++ b/include/litmus/rm_common.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * EDF common data structures and utility functions shared by all EDF | ||
3 | * based scheduler plugins | ||
4 | */ | ||
5 | |||
6 | /* CLEANUP: Add comments and make it less messy. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #ifndef __UNC_RM_COMMON_H__ | ||
11 | #define __UNC_RM_COMMON_H__ | ||
12 | |||
13 | #include <litmus/rt_domain.h> | ||
14 | |||
15 | void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
16 | release_jobs_t release); | ||
17 | |||
18 | int rm_higher_prio(struct task_struct* first, | ||
19 | struct task_struct* second); | ||
20 | |||
21 | int rm_ready_order(struct bheap_node* a, struct bheap_node* b); | ||
22 | |||
23 | int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t); | ||
24 | |||
25 | #endif | ||
diff --git a/include/litmus/rm_srt_common.h b/include/litmus/rm_srt_common.h new file mode 100644 index 000000000000..78aa287327a2 --- /dev/null +++ b/include/litmus/rm_srt_common.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * EDF common data structures and utility functions shared by all EDF | ||
3 | * based scheduler plugins | ||
4 | */ | ||
5 | |||
6 | /* CLEANUP: Add comments and make it less messy. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #ifndef __UNC_RM_SRT_COMMON_H__ | ||
11 | #define __UNC_RM_SRT_COMMON_H__ | ||
12 | |||
13 | #include <litmus/rt_domain.h> | ||
14 | |||
15 | void rm_srt_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
16 | release_jobs_t release); | ||
17 | |||
18 | int rm_srt_higher_prio(struct task_struct* first, | ||
19 | struct task_struct* second); | ||
20 | |||
21 | int rm_srt_ready_order(struct bheap_node* a, struct bheap_node* b); | ||
22 | |||
23 | int rm_srt_preemption_needed(rt_domain_t* rt, struct task_struct *t); | ||
24 | |||
25 | #endif | ||
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index df50930d14a0..12a9ab65a673 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h | |||
@@ -11,6 +11,10 @@ | |||
11 | #include <litmus/locking.h> | 11 | #include <litmus/locking.h> |
12 | #endif | 12 | #endif |
13 | 13 | ||
14 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
15 | #include <linux/interrupt.h> | ||
16 | #endif | ||
17 | |||
14 | /************************ setup/tear down ********************/ | 18 | /************************ setup/tear down ********************/ |
15 | 19 | ||
16 | typedef long (*activate_plugin_t) (void); | 20 | typedef long (*activate_plugin_t) (void); |
@@ -69,6 +73,9 @@ typedef void (*set_prio_inh_klitirq_t)(struct task_struct* klitirqd, | |||
69 | typedef void (*clear_prio_inh_klitirqd_t)(struct task_struct* klitirqd, | 73 | typedef void (*clear_prio_inh_klitirqd_t)(struct task_struct* klitirqd, |
70 | struct task_struct* old_owner); | 74 | struct task_struct* old_owner); |
71 | 75 | ||
76 | |||
77 | typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet); | ||
78 | |||
72 | /********************* sys call backends ********************/ | 79 | /********************* sys call backends ********************/ |
73 | /* This function causes the caller to sleep until the next release */ | 80 | /* This function causes the caller to sleep until the next release */ |
74 | typedef long (*complete_job_t) (void); | 81 | typedef long (*complete_job_t) (void); |
@@ -115,6 +122,10 @@ struct sched_plugin { | |||
115 | set_prio_inh_klitirq_t set_prio_inh_klitirqd; | 122 | set_prio_inh_klitirq_t set_prio_inh_klitirqd; |
116 | clear_prio_inh_klitirqd_t clear_prio_inh_klitirqd; | 123 | clear_prio_inh_klitirqd_t clear_prio_inh_klitirqd; |
117 | #endif | 124 | #endif |
125 | |||
126 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
127 | enqueue_pai_tasklet_t enqueue_pai_tasklet; | ||
128 | #endif | ||
118 | } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); | 129 | } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); |
119 | 130 | ||
120 | 131 | ||
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h index 1486c778aff8..232c7588d103 100644 --- a/include/litmus/sched_trace.h +++ b/include/litmus/sched_trace.h | |||
@@ -127,13 +127,13 @@ struct st_effective_priority_change_data { | |||
127 | struct st_nv_interrupt_begin_data { | 127 | struct st_nv_interrupt_begin_data { |
128 | u64 when; | 128 | u64 when; |
129 | u32 device; | 129 | u32 device; |
130 | u8 __unused[4]; | 130 | u32 serialNumber; |
131 | } __attribute__((packed)); | 131 | } __attribute__((packed)); |
132 | 132 | ||
133 | struct st_nv_interrupt_end_data { | 133 | struct st_nv_interrupt_end_data { |
134 | u64 when; | 134 | u64 when; |
135 | u32 device; | 135 | u32 device; |
136 | u8 __unused[4]; | 136 | u32 serialNumber; |
137 | } __attribute__((packed)); | 137 | } __attribute__((packed)); |
138 | 138 | ||
139 | #define DATA(x) struct st_ ## x ## _data x; | 139 | #define DATA(x) struct st_ ## x ## _data x; |
@@ -328,8 +328,8 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, | |||
328 | 328 | ||
329 | #define sched_trace_nv_interrupt_begin(d) \ | 329 | #define sched_trace_nv_interrupt_begin(d) \ |
330 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d) | 330 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d) |
331 | #define sched_trace_nv_interrupt_end() \ | 331 | #define sched_trace_nv_interrupt_end(d) \ |
332 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, 0ul) | 332 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d) |
333 | 333 | ||
334 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ | 334 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ |
335 | 335 | ||
diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h index c2c872639880..90424d5c564c 100644 --- a/include/litmus/sched_trace_external.h +++ b/include/litmus/sched_trace_external.h | |||
@@ -34,9 +34,25 @@ static inline void sched_trace_nv_interrupt_begin_external(u32 device) | |||
34 | __sched_trace_nv_interrupt_begin_external(device); | 34 | __sched_trace_nv_interrupt_begin_external(device); |
35 | } | 35 | } |
36 | 36 | ||
37 | extern void __sched_trace_nv_interrupt_end_external(void); | 37 | extern void __sched_trace_nv_interrupt_end_external(u32 device); |
38 | static inline void sched_trace_nv_interrupt_end_external(void) | 38 | static inline void sched_trace_nv_interrupt_end_external(u32 device) |
39 | { | 39 | { |
40 | __sched_trace_nv_interrupt_end_external(); | 40 | __sched_trace_nv_interrupt_end_external(device); |
41 | } | 41 | } |
42 | |||
43 | #ifdef CONFIG_LITMUS_NVIDIA | ||
44 | |||
45 | #define EX_TS(evt) \ | ||
46 | extern void __##evt(void); \ | ||
47 | static inline void EX_##evt(void) { __##evt(); } | ||
48 | |||
49 | EX_TS(TS_NV_TOPISR_START) | ||
50 | EX_TS(TS_NV_TOPISR_END) | ||
51 | EX_TS(TS_NV_BOTISR_START) | ||
52 | EX_TS(TS_NV_BOTISR_END) | ||
53 | EX_TS(TS_NV_RELEASE_BOTISR_START) | ||
54 | EX_TS(TS_NV_RELEASE_BOTISR_END) | ||
55 | |||
56 | #endif | ||
57 | |||
42 | #endif | 58 | #endif |
diff --git a/include/litmus/trace.h b/include/litmus/trace.h index 05f487263f28..aa3ee4a6757b 100644 --- a/include/litmus/trace.h +++ b/include/litmus/trace.h | |||
@@ -100,4 +100,18 @@ feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu) | |||
100 | #define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) | 100 | #define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) |
101 | 101 | ||
102 | 102 | ||
103 | |||
104 | #ifdef CONFIG_LITMUS_NVIDIA | ||
105 | |||
106 | #define TS_NV_TOPISR_START TIMESTAMP(200) | ||
107 | #define TS_NV_TOPISR_END TIMESTAMP(201) | ||
108 | |||
109 | #define TS_NV_BOTISR_START TIMESTAMP(202) | ||
110 | #define TS_NV_BOTISR_END TIMESTAMP(203) | ||
111 | |||
112 | #define TS_NV_RELEASE_BOTISR_START TIMESTAMP(204) | ||
113 | #define TS_NV_RELEASE_BOTISR_END TIMESTAMP(205) | ||
114 | |||
115 | #endif | ||
116 | |||
103 | #endif /* !_SYS_TRACE_H_ */ | 117 | #endif /* !_SYS_TRACE_H_ */ |
diff --git a/kernel/sched.c b/kernel/sched.c index 3162605ffc91..3aa2be09122b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3897,8 +3897,10 @@ need_resched_nonpreemptible: | |||
3897 | if (need_resched()) | 3897 | if (need_resched()) |
3898 | goto need_resched; | 3898 | goto need_resched; |
3899 | 3899 | ||
3900 | #ifdef LITMUS_SOFTIRQD | ||
3900 | reacquire_klitirqd_lock(prev); | 3901 | reacquire_klitirqd_lock(prev); |
3901 | 3902 | #endif | |
3903 | |||
3902 | srp_ceiling_block(); | 3904 | srp_ceiling_block(); |
3903 | } | 3905 | } |
3904 | EXPORT_SYMBOL(schedule); | 3906 | EXPORT_SYMBOL(schedule); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index be4b8fab3637..ae77c5c1d17e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -35,6 +35,7 @@ | |||
35 | 35 | ||
36 | #ifdef CONFIG_LITMUS_NVIDIA | 36 | #ifdef CONFIG_LITMUS_NVIDIA |
37 | #include <litmus/nvidia_info.h> | 37 | #include <litmus/nvidia_info.h> |
38 | #include <litmus/trace.h> | ||
38 | #endif | 39 | #endif |
39 | 40 | ||
40 | /* | 41 | /* |
@@ -441,6 +442,9 @@ void __tasklet_schedule(struct tasklet_struct *t) | |||
441 | if(likely(_litmus_tasklet_schedule(t,nvidia_device))) | 442 | if(likely(_litmus_tasklet_schedule(t,nvidia_device))) |
442 | { | 443 | { |
443 | unlock_nv_registry(nvidia_device, &flags); | 444 | unlock_nv_registry(nvidia_device, &flags); |
445 | |||
446 | TS_NV_RELEASE_BOTISR_END; | ||
447 | |||
444 | return; | 448 | return; |
445 | } | 449 | } |
446 | else | 450 | else |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8139208eaee1..637cadac2627 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -2531,7 +2531,7 @@ EXPORT_SYMBOL(cancel_delayed_work_sync); | |||
2531 | */ | 2531 | */ |
2532 | int schedule_work(struct work_struct *work) | 2532 | int schedule_work(struct work_struct *work) |
2533 | { | 2533 | { |
2534 | #ifdef CONFIG_LITMUS_NVIDIA | 2534 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) |
2535 | if(is_nvidia_func(work->func)) | 2535 | if(is_nvidia_func(work->func)) |
2536 | { | 2536 | { |
2537 | u32 nvidiaDevice = get_work_nv_device_num(work); | 2537 | u32 nvidiaDevice = get_work_nv_device_num(work); |
diff --git a/litmus/Kconfig b/litmus/Kconfig index 7e865d4dd703..5109cf7db7f6 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
@@ -218,18 +218,41 @@ config LITMUS_THREAD_ALL_SOFTIRQ | |||
218 | 218 | ||
219 | If unsure, say No. | 219 | If unsure, say No. |
220 | 220 | ||
221 | |||
222 | choice | ||
223 | prompt "Scheduling of interrupt bottom-halves in Litmus." | ||
224 | default LITMUS_SOFTIRQD_NONE | ||
225 | depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ | ||
226 | help | ||
227 | Schedule tasklets with known priorities in Litmus. | ||
228 | |||
229 | config LITMUS_SOFTIRQD_NONE | ||
230 | bool "No tasklet scheduling in Litmus." | ||
231 | help | ||
232 | Don't schedule tasklets in Litmus. Default. | ||
233 | |||
221 | config LITMUS_SOFTIRQD | 234 | config LITMUS_SOFTIRQD |
222 | bool "Spawn klitirqd interrupt handling threads." | 235 | bool "Spawn klitirqd interrupt handling threads." |
223 | depends on LITMUS_LOCKING | 236 | help |
224 | default n | 237 | Create klitirqd interrupt handling threads. Work must be |
225 | help | 238 | specifically dispatched to these workers. (Softirqs for |
226 | Create klitirqd interrupt handling threads. Work must be | 239 | Litmus tasks are not magically redirected to klitirqd.) |
227 | specifically dispatched to these workers. (Softirqs for | ||
228 | Litmus tasks are not magically redirected to klitirqd.) | ||
229 | 240 | ||
230 | G-EDF ONLY for now! | 241 | G-EDF/RM, C-EDF/RM ONLY for now! |
231 | 242 | ||
232 | If unsure, say No. | 243 | |
244 | config LITMUS_PAI_SOFTIRQD | ||
245 | bool "Defer tasklets to context switch points." | ||
246 | help | ||
247 | Only execute scheduled tasklet bottom halves at | ||
248 | scheduling points. Trades context switch overhead | ||
249 | at the cost of non-preemptive durations of bottom half | ||
250 | processing. | ||
251 | |||
252 | G-EDF/RM, C-EDF/RM ONLY for now! | ||
253 | |||
254 | endchoice | ||
255 | |||
233 | 256 | ||
234 | config NR_LITMUS_SOFTIRQD | 257 | config NR_LITMUS_SOFTIRQD |
235 | int "Number of klitirqd." | 258 | int "Number of klitirqd." |
@@ -241,13 +264,22 @@ config NR_LITMUS_SOFTIRQD | |||
241 | 264 | ||
242 | config LITMUS_NVIDIA | 265 | config LITMUS_NVIDIA |
243 | bool "Litmus handling of NVIDIA interrupts." | 266 | bool "Litmus handling of NVIDIA interrupts." |
244 | depends on LITMUS_SOFTIRQD | 267 | depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD |
245 | default n | 268 | default n |
246 | help | 269 | help |
247 | Direct tasklets from NVIDIA devices to Litmus's klitirqd. | 270 | Direct tasklets from NVIDIA devices to Litmus's klitirqd. |
248 | 271 | ||
249 | If unsure, say No. | 272 | If unsure, say No. |
250 | 273 | ||
274 | config NV_DEVICE_NUM | ||
275 | int "Number of NVIDIA GPUs." | ||
276 | depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD | ||
277 | range 1 4096 | ||
278 | default "1" | ||
279 | help | ||
280 | Should be (<= to the number of CPUs) and | ||
281 | (<= to the number of GPUs) in your system. | ||
282 | |||
251 | choice | 283 | choice |
252 | prompt "CUDA/Driver Version Support" | 284 | prompt "CUDA/Driver Version Support" |
253 | default CUDA_4_0 | 285 | default CUDA_4_0 |
diff --git a/litmus/Makefile b/litmus/Makefile index 892e01c2e1b3..869939e2270c 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -19,7 +19,7 @@ obj-y = sched_plugin.o litmus.o \ | |||
19 | sched_gsn_edf.o \ | 19 | sched_gsn_edf.o \ |
20 | sched_psn_edf.o | 20 | sched_psn_edf.o |
21 | 21 | ||
22 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o | 22 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o sched_cfifo.o fifo_common.o sched_crm.o rm_common.o sched_crm_srt.o rm_srt_common.o |
23 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o | 23 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o |
24 | obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o | 24 | obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o |
25 | 25 | ||
@@ -29,4 +29,5 @@ obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o | |||
29 | obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o | 29 | obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o |
30 | 30 | ||
31 | obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o | 31 | obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o |
32 | obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o | ||
32 | obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o | 33 | obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o |
diff --git a/litmus/edf_common.c b/litmus/edf_common.c index fbd67ab5f467..0a06d7a26c00 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c | |||
@@ -63,7 +63,45 @@ int edf_higher_prio(struct task_struct* first, | |||
63 | 63 | ||
64 | #endif | 64 | #endif |
65 | 65 | ||
66 | if (!is_realtime(second_task)) | ||
67 | return true; | ||
68 | |||
69 | if (earlier_deadline(first_task, second_task)) | ||
70 | return true; | ||
71 | |||
72 | if (get_deadline(first_task) == get_deadline(second_task)) | ||
73 | { | ||
74 | if (shorter_period(first_task, second_task)) | ||
75 | { | ||
76 | return true; | ||
77 | } | ||
78 | if (get_rt_period(first_task) == get_rt_period(second_task)) | ||
79 | { | ||
80 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
81 | if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread) | ||
82 | { | ||
83 | return true; | ||
84 | } | ||
85 | if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) | ||
86 | { | ||
87 | #endif | ||
88 | if (first_task->pid < second_task->pid) | ||
89 | { | ||
90 | return true; | ||
91 | } | ||
92 | if (first_task->pid == second_task->pid) | ||
93 | { | ||
94 | return !second->rt_param.inh_task; | ||
95 | } | ||
96 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
97 | } | ||
98 | #endif | ||
99 | } | ||
100 | } | ||
101 | |||
102 | return false; | ||
66 | 103 | ||
104 | #if 0 | ||
67 | return !is_realtime(second_task) || | 105 | return !is_realtime(second_task) || |
68 | 106 | ||
69 | #ifdef CONFIG_LITMUS_SOFTIRQD | 107 | #ifdef CONFIG_LITMUS_SOFTIRQD |
@@ -88,6 +126,7 @@ int edf_higher_prio(struct task_struct* first, | |||
88 | */ | 126 | */ |
89 | (first_task->pid == second_task->pid && | 127 | (first_task->pid == second_task->pid && |
90 | !second->rt_param.inh_task))); | 128 | !second->rt_param.inh_task))); |
129 | #endif | ||
91 | } | 130 | } |
92 | 131 | ||
93 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b) | 132 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b) |
diff --git a/litmus/fifo_common.c b/litmus/fifo_common.c new file mode 100644 index 000000000000..c94510a171d9 --- /dev/null +++ b/litmus/fifo_common.c | |||
@@ -0,0 +1,124 @@ | |||
1 | /* | ||
2 | * kernel/fifo_common.c | ||
3 | * | ||
4 | * Common functions for EDF based scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | #include <litmus/fifo_common.h> | ||
16 | |||
17 | /* fifo_higher_prio - returns true if first has a higher EDF priority | ||
18 | * than second. Deadline ties are broken by PID. | ||
19 | * | ||
20 | * both first and second may be NULL | ||
21 | */ | ||
22 | int fifo_higher_prio(struct task_struct* first, | ||
23 | struct task_struct* second) | ||
24 | { | ||
25 | struct task_struct *first_task = first; | ||
26 | struct task_struct *second_task = second; | ||
27 | |||
28 | /* There is no point in comparing a task to itself. */ | ||
29 | if (first && first == second) { | ||
30 | TRACE_TASK(first, | ||
31 | "WARNING: pointless edf priority comparison.\n"); | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | |||
36 | /* check for NULL tasks */ | ||
37 | if (!first || !second) | ||
38 | return first && !second; | ||
39 | |||
40 | #ifdef CONFIG_LITMUS_LOCKING | ||
41 | |||
42 | /* Check for inherited priorities. Change task | ||
43 | * used for comparison in such a case. | ||
44 | */ | ||
45 | if (unlikely(first->rt_param.inh_task)) | ||
46 | first_task = first->rt_param.inh_task; | ||
47 | if (unlikely(second->rt_param.inh_task)) | ||
48 | second_task = second->rt_param.inh_task; | ||
49 | |||
50 | /* Check for priority boosting. Tie-break by start of boosting. | ||
51 | */ | ||
52 | if (unlikely(is_priority_boosted(first_task))) { | ||
53 | /* first_task is boosted, how about second_task? */ | ||
54 | if (!is_priority_boosted(second_task) || | ||
55 | lt_before(get_boost_start(first_task), | ||
56 | get_boost_start(second_task))) | ||
57 | return 1; | ||
58 | else | ||
59 | return 0; | ||
60 | } else if (unlikely(is_priority_boosted(second_task))) | ||
61 | /* second_task is boosted, first is not*/ | ||
62 | return 0; | ||
63 | |||
64 | #endif | ||
65 | |||
66 | |||
67 | return !is_realtime(second_task) || | ||
68 | |||
69 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
70 | /* proxy threads always lose w/o inheritance. */ | ||
71 | (first_task->rt_param.is_proxy_thread < | ||
72 | second_task->rt_param.is_proxy_thread) || | ||
73 | #endif | ||
74 | |||
75 | /* is the deadline of the first task earlier? | ||
76 | * Then it has higher priority. | ||
77 | */ | ||
78 | earlier_release(first_task, second_task) || | ||
79 | |||
80 | /* Do we have a deadline tie? | ||
81 | * Then break by PID. | ||
82 | */ | ||
83 | (get_release(first_task) == get_release(second_task) && | ||
84 | (first_task->pid < second_task->pid || | ||
85 | |||
86 | /* If the PIDs are the same then the task with the inherited | ||
87 | * priority wins. | ||
88 | */ | ||
89 | (first_task->pid == second_task->pid && | ||
90 | !second->rt_param.inh_task))); | ||
91 | } | ||
92 | |||
93 | int fifo_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
94 | { | ||
95 | return fifo_higher_prio(bheap2task(a), bheap2task(b)); | ||
96 | } | ||
97 | |||
98 | void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
99 | release_jobs_t release) | ||
100 | { | ||
101 | rt_domain_init(rt, fifo_ready_order, resched, release); | ||
102 | } | ||
103 | |||
104 | /* need_to_preempt - check whether the task t needs to be preempted | ||
105 | * call only with irqs disabled and with ready_lock acquired | ||
106 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
107 | */ | ||
108 | int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t) | ||
109 | { | ||
110 | /* we need the read lock for fifo_ready_queue */ | ||
111 | /* no need to preempt if there is nothing pending */ | ||
112 | if (!__jobs_pending(rt)) | ||
113 | return 0; | ||
114 | /* we need to reschedule if t doesn't exist */ | ||
115 | if (!t) | ||
116 | return 1; | ||
117 | |||
118 | /* NOTE: We cannot check for non-preemptibility since we | ||
119 | * don't know what address space we're currently in. | ||
120 | */ | ||
121 | |||
122 | /* make sure to get non-rt stuff out of the way */ | ||
123 | return !is_realtime(t) || fifo_higher_prio(__next_ready(rt), t); | ||
124 | } | ||
diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c new file mode 100644 index 000000000000..b31eeb8a2538 --- /dev/null +++ b/litmus/litmus_pai_softirq.c | |||
@@ -0,0 +1,64 @@ | |||
1 | #include <linux/interrupt.h> | ||
2 | #include <linux/percpu.h> | ||
3 | #include <linux/cpu.h> | ||
4 | #include <linux/kthread.h> | ||
5 | #include <linux/ftrace.h> | ||
6 | #include <linux/smp.h> | ||
7 | #include <linux/slab.h> | ||
8 | #include <linux/mutex.h> | ||
9 | |||
10 | #include <linux/sched.h> | ||
11 | #include <linux/cpuset.h> | ||
12 | |||
13 | #include <litmus/litmus.h> | ||
14 | #include <litmus/sched_trace.h> | ||
15 | #include <litmus/jobs.h> | ||
16 | #include <litmus/sched_plugin.h> | ||
17 | #include <litmus/litmus_softirq.h> | ||
18 | |||
19 | |||
20 | |||
21 | int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) | ||
22 | { | ||
23 | int ret = 0; /* assume failure */ | ||
24 | if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) | ||
25 | { | ||
26 | TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); | ||
27 | BUG(); | ||
28 | } | ||
29 | |||
30 | ret = litmus->enqueue_pai_tasklet(t); | ||
31 | |||
32 | return(ret); | ||
33 | } | ||
34 | |||
35 | EXPORT_SYMBOL(__litmus_tasklet_schedule); | ||
36 | |||
37 | |||
38 | |||
39 | // failure causes default Linux handling. | ||
40 | int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id) | ||
41 | { | ||
42 | int ret = 0; /* assume failure */ | ||
43 | return(ret); | ||
44 | } | ||
45 | EXPORT_SYMBOL(__litmus_tasklet_hi_schedule); | ||
46 | |||
47 | |||
48 | // failure causes default Linux handling. | ||
49 | int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id) | ||
50 | { | ||
51 | int ret = 0; /* assume failure */ | ||
52 | return(ret); | ||
53 | } | ||
54 | EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first); | ||
55 | |||
56 | |||
57 | // failure causes default Linux handling. | ||
58 | int __litmus_schedule_work(struct work_struct *w, unsigned int k_id) | ||
59 | { | ||
60 | int ret = 0; /* assume failure */ | ||
61 | return(ret); | ||
62 | } | ||
63 | EXPORT_SYMBOL(__litmus_schedule_work); | ||
64 | |||
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c index 271e770dbaea..f5cca964b6c6 100644 --- a/litmus/litmus_softirq.c +++ b/litmus/litmus_softirq.c | |||
@@ -1166,7 +1166,7 @@ int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) | |||
1166 | TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); | 1166 | TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); |
1167 | BUG(); | 1167 | BUG(); |
1168 | } | 1168 | } |
1169 | 1169 | ||
1170 | if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) | 1170 | if(unlikely(k_id >= NR_LITMUS_SOFTIRQD)) |
1171 | { | 1171 | { |
1172 | TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); | 1172 | TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id); |
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 78f035244d21..d17152138c63 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c | |||
@@ -361,6 +361,7 @@ int get_nv_device_id(struct task_struct* owner) | |||
361 | 361 | ||
362 | static int __reg_nv_device(int reg_device_id) | 362 | static int __reg_nv_device(int reg_device_id) |
363 | { | 363 | { |
364 | int ret = 0; | ||
364 | struct task_struct* old = | 365 | struct task_struct* old = |
365 | cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner, | 366 | cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner, |
366 | NULL, | 367 | NULL, |
@@ -370,16 +371,21 @@ static int __reg_nv_device(int reg_device_id) | |||
370 | 371 | ||
371 | if(likely(old == NULL)) | 372 | if(likely(old == NULL)) |
372 | { | 373 | { |
374 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
373 | down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); | 375 | down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem); |
376 | #endif | ||
374 | TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); | 377 | TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id); |
375 | return(0); | ||
376 | } | 378 | } |
377 | else | 379 | else |
378 | { | 380 | { |
379 | TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); | 381 | TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); |
380 | return(-EBUSY); | 382 | ret = -EBUSY; |
381 | } | 383 | } |
382 | 384 | ||
385 | return(ret); | ||
386 | |||
387 | |||
388 | |||
383 | #if 0 | 389 | #if 0 |
384 | //unsigned long flags; | 390 | //unsigned long flags; |
385 | //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags); | 391 | //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags); |
@@ -411,19 +417,22 @@ static int __reg_nv_device(int reg_device_id) | |||
411 | 417 | ||
412 | static int __clear_reg_nv_device(int de_reg_device_id) | 418 | static int __clear_reg_nv_device(int de_reg_device_id) |
413 | { | 419 | { |
414 | int ret; | 420 | int ret = 0; |
415 | unsigned long flags; | ||
416 | struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id); | ||
417 | struct task_struct* old; | 421 | struct task_struct* old; |
418 | 422 | ||
423 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
424 | unsigned long flags; | ||
425 | struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id); | ||
419 | lock_nv_registry(de_reg_device_id, &flags); | 426 | lock_nv_registry(de_reg_device_id, &flags); |
427 | #endif | ||
420 | 428 | ||
421 | old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner, | 429 | old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner, |
422 | current, | 430 | current, |
423 | NULL); | 431 | NULL); |
424 | 432 | ||
425 | mb(); | 433 | mb(); |
426 | 434 | ||
435 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
427 | if(likely(old == current)) | 436 | if(likely(old == current)) |
428 | { | 437 | { |
429 | flush_pending(klitirqd_th, current); | 438 | flush_pending(klitirqd_th, current); |
@@ -448,6 +457,7 @@ static int __clear_reg_nv_device(int de_reg_device_id) | |||
448 | TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n", | 457 | TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n", |
449 | __FUNCTION__, de_reg_device_id); | 458 | __FUNCTION__, de_reg_device_id); |
450 | } | 459 | } |
460 | #endif | ||
451 | 461 | ||
452 | return(ret); | 462 | return(ret); |
453 | } | 463 | } |
diff --git a/litmus/rm_common.c b/litmus/rm_common.c new file mode 100644 index 000000000000..88f83bcbd9d8 --- /dev/null +++ b/litmus/rm_common.c | |||
@@ -0,0 +1,160 @@ | |||
1 | /* | ||
2 | * kernel/rm_common.c | ||
3 | * | ||
4 | * Common functions for EDF based scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | #include <litmus/rm_common.h> | ||
16 | |||
17 | /* rm_higher_prio - returns true if first has a higher EDF priority | ||
18 | * than second. Deadline ties are broken by PID. | ||
19 | * | ||
20 | * both first and second may be NULL | ||
21 | */ | ||
22 | int rm_higher_prio(struct task_struct* first, | ||
23 | struct task_struct* second) | ||
24 | { | ||
25 | struct task_struct *first_task = first; | ||
26 | struct task_struct *second_task = second; | ||
27 | |||
28 | /* There is no point in comparing a task to itself. */ | ||
29 | if (first && first == second) { | ||
30 | TRACE_TASK(first, | ||
31 | "WARNING: pointless edf priority comparison.\n"); | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | |||
36 | /* check for NULL tasks */ | ||
37 | if (!first || !second) | ||
38 | return first && !second; | ||
39 | |||
40 | #ifdef CONFIG_LITMUS_LOCKING | ||
41 | |||
42 | /* Check for inherited priorities. Change task | ||
43 | * used for comparison in such a case. | ||
44 | */ | ||
45 | if (unlikely(first->rt_param.inh_task)) | ||
46 | first_task = first->rt_param.inh_task; | ||
47 | if (unlikely(second->rt_param.inh_task)) | ||
48 | second_task = second->rt_param.inh_task; | ||
49 | |||
50 | /* Check for priority boosting. Tie-break by start of boosting. | ||
51 | */ | ||
52 | if (unlikely(is_priority_boosted(first_task))) { | ||
53 | /* first_task is boosted, how about second_task? */ | ||
54 | if (!is_priority_boosted(second_task) || | ||
55 | lt_before(get_boost_start(first_task), | ||
56 | get_boost_start(second_task))) | ||
57 | return 1; | ||
58 | else | ||
59 | return 0; | ||
60 | } else if (unlikely(is_priority_boosted(second_task))) | ||
61 | /* second_task is boosted, first is not*/ | ||
62 | return 0; | ||
63 | |||
64 | #endif | ||
65 | |||
66 | if (!is_realtime(second_task)) | ||
67 | return true; | ||
68 | |||
69 | if (shorter_period(first_task, second_task)) | ||
70 | return true; | ||
71 | |||
72 | if (get_rt_period(first_task) == get_rt_period(second_task)) | ||
73 | { | ||
74 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
75 | if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread) | ||
76 | { | ||
77 | return true; | ||
78 | } | ||
79 | if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) | ||
80 | { | ||
81 | #endif | ||
82 | if (first_task->pid < second_task->pid) | ||
83 | { | ||
84 | return true; | ||
85 | } | ||
86 | if (first_task->pid == second_task->pid) | ||
87 | { | ||
88 | return !second->rt_param.inh_task; | ||
89 | } | ||
90 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
91 | } | ||
92 | #endif | ||
93 | } | ||
94 | |||
95 | return false; | ||
96 | |||
97 | #if 0 | ||
98 | return !is_realtime(second_task) || | ||
99 | shorter_period(first_task, second_task) || | ||
100 | ((get_rt_period(first_task) == get_rt_period(second_task)) && earlier_deadline(first_task, second_task)) | ||
101 | |||
102 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
103 | /* proxy threads always lose w/o inheritance. */ | ||
104 | (first_task->rt_param.is_proxy_thread < | ||
105 | second_task->rt_param.is_proxy_thread) || | ||
106 | #endif | ||
107 | |||
108 | /* is the period of the first task shorter? | ||
109 | * Then it has higher priority. | ||
110 | */ | ||
111 | shorter_period(first_task, second_task) || | ||
112 | |||
113 | (earlier_deadline(first_task, second_task) || | ||
114 | |||
115 | /* Do we have a deadline tie? | ||
116 | * Then break by PID. | ||
117 | */ | ||
118 | (get_rt_period(first_task) == get_rt_period(second_task) && | ||
119 | (first_task->pid < second_task->pid || | ||
120 | |||
121 | /* If the PIDs are the same then the task with the inherited | ||
122 | * priority wins. | ||
123 | */ | ||
124 | (first_task->pid == second_task->pid && | ||
125 | !second->rt_param.inh_task))); | ||
126 | #endif | ||
127 | } | ||
128 | |||
129 | int rm_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
130 | { | ||
131 | return rm_higher_prio(bheap2task(a), bheap2task(b)); | ||
132 | } | ||
133 | |||
134 | void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
135 | release_jobs_t release) | ||
136 | { | ||
137 | rt_domain_init(rt, rm_ready_order, resched, release); | ||
138 | } | ||
139 | |||
140 | /* need_to_preempt - check whether the task t needs to be preempted | ||
141 | * call only with irqs disabled and with ready_lock acquired | ||
142 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
143 | */ | ||
144 | int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t) | ||
145 | { | ||
146 | /* we need the read lock for rm_ready_queue */ | ||
147 | /* no need to preempt if there is nothing pending */ | ||
148 | if (!__jobs_pending(rt)) | ||
149 | return 0; | ||
150 | /* we need to reschedule if t doesn't exist */ | ||
151 | if (!t) | ||
152 | return 1; | ||
153 | |||
154 | /* NOTE: We cannot check for non-preemptibility since we | ||
155 | * don't know what address space we're currently in. | ||
156 | */ | ||
157 | |||
158 | /* make sure to get non-rt stuff out of the way */ | ||
159 | return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t); | ||
160 | } | ||
diff --git a/litmus/rm_srt_common.c b/litmus/rm_srt_common.c new file mode 100644 index 000000000000..f58a8007678f --- /dev/null +++ b/litmus/rm_srt_common.c | |||
@@ -0,0 +1,167 @@ | |||
1 | /* | ||
2 | * kernel/rm_common.c | ||
3 | * | ||
4 | * Common functions for EDF based scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | #include <litmus/rm_common.h> | ||
16 | |||
17 | /* rm_srt_higher_prio - returns true if first has a higher EDF priority | ||
18 | * than second. Deadline ties are broken by PID. | ||
19 | * | ||
20 | * both first and second may be NULL | ||
21 | */ | ||
22 | int rm_srt_higher_prio(struct task_struct* first, | ||
23 | struct task_struct* second) | ||
24 | { | ||
25 | struct task_struct *first_task = first; | ||
26 | struct task_struct *second_task = second; | ||
27 | |||
28 | /* There is no point in comparing a task to itself. */ | ||
29 | if (first && first == second) { | ||
30 | TRACE_TASK(first, | ||
31 | "WARNING: pointless edf priority comparison.\n"); | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | |||
36 | /* check for NULL tasks */ | ||
37 | if (!first || !second) | ||
38 | return first && !second; | ||
39 | |||
40 | #ifdef CONFIG_LITMUS_LOCKING | ||
41 | |||
42 | /* Check for inherited priorities. Change task | ||
43 | * used for comparison in such a case. | ||
44 | */ | ||
45 | if (unlikely(first->rt_param.inh_task)) | ||
46 | first_task = first->rt_param.inh_task; | ||
47 | if (unlikely(second->rt_param.inh_task)) | ||
48 | second_task = second->rt_param.inh_task; | ||
49 | |||
50 | /* Check for priority boosting. Tie-break by start of boosting. | ||
51 | */ | ||
52 | if (unlikely(is_priority_boosted(first_task))) { | ||
53 | /* first_task is boosted, how about second_task? */ | ||
54 | if (!is_priority_boosted(second_task) || | ||
55 | lt_before(get_boost_start(first_task), | ||
56 | get_boost_start(second_task))) | ||
57 | return 1; | ||
58 | else | ||
59 | return 0; | ||
60 | } else if (unlikely(is_priority_boosted(second_task))) | ||
61 | /* second_task is boosted, first is not*/ | ||
62 | return 0; | ||
63 | |||
64 | #endif | ||
65 | |||
66 | if (!is_realtime(second_task)) | ||
67 | return true; | ||
68 | |||
69 | if (shorter_period(first_task, second_task)) | ||
70 | return true; | ||
71 | |||
72 | if (get_rt_period(first_task) == get_rt_period(second_task)) | ||
73 | { | ||
74 | if (earlier_deadline(first_task, second_task)) | ||
75 | { | ||
76 | return true; | ||
77 | } | ||
78 | if(get_deadline(first_task) == get_deadline(second_task)) | ||
79 | { | ||
80 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
81 | if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread) | ||
82 | { | ||
83 | return true; | ||
84 | } | ||
85 | if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) | ||
86 | { | ||
87 | #endif | ||
88 | if (first_task->pid < second_task->pid) | ||
89 | { | ||
90 | return true; | ||
91 | } | ||
92 | if (first_task->pid == second_task->pid) | ||
93 | { | ||
94 | return !second->rt_param.inh_task; | ||
95 | } | ||
96 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
97 | } | ||
98 | #endif | ||
99 | } | ||
100 | } | ||
101 | |||
102 | return false; | ||
103 | |||
104 | #if 0 | ||
105 | return !is_realtime(second_task) || | ||
106 | shorter_period(first_task, second_task) || | ||
107 | ((get_rt_period(first_task) == get_rt_period(second_task)) && earlier_deadline(first_task, second_task)) | ||
108 | |||
109 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
110 | /* proxy threads always lose w/o inheritance. */ | ||
111 | (first_task->rt_param.is_proxy_thread < | ||
112 | second_task->rt_param.is_proxy_thread) || | ||
113 | #endif | ||
114 | |||
115 | /* is the period of the first task shorter? | ||
116 | * Then it has higher priority. | ||
117 | */ | ||
118 | shorter_period(first_task, second_task) || | ||
119 | |||
120 | (earlier_deadline(first_task, second_task) || | ||
121 | |||
122 | /* Do we have a deadline tie? | ||
123 | * Then break by PID. | ||
124 | */ | ||
125 | (get_rt_period(first_task) == get_rt_period(second_task) && | ||
126 | (first_task->pid < second_task->pid || | ||
127 | |||
128 | /* If the PIDs are the same then the task with the inherited | ||
129 | * priority wins. | ||
130 | */ | ||
131 | (first_task->pid == second_task->pid && | ||
132 | !second->rt_param.inh_task))); | ||
133 | #endif | ||
134 | } | ||
135 | |||
136 | int rm_srt_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
137 | { | ||
138 | return rm_srt_higher_prio(bheap2task(a), bheap2task(b)); | ||
139 | } | ||
140 | |||
141 | void rm_srt_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
142 | release_jobs_t release) | ||
143 | { | ||
144 | rt_domain_init(rt, rm_srt_ready_order, resched, release); | ||
145 | } | ||
146 | |||
147 | /* need_to_preempt - check whether the task t needs to be preempted | ||
148 | * call only with irqs disabled and with ready_lock acquired | ||
149 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
150 | */ | ||
151 | int rm_srt_preemption_needed(rt_domain_t* rt, struct task_struct *t) | ||
152 | { | ||
153 | /* we need the read lock for rm_ready_queue */ | ||
154 | /* no need to preempt if there is nothing pending */ | ||
155 | if (!__jobs_pending(rt)) | ||
156 | return 0; | ||
157 | /* we need to reschedule if t doesn't exist */ | ||
158 | if (!t) | ||
159 | return 1; | ||
160 | |||
161 | /* NOTE: We cannot check for non-preemptibility since we | ||
162 | * don't know what address space we're currently in. | ||
163 | */ | ||
164 | |||
165 | /* make sure to get non-rt stuff out of the way */ | ||
166 | return !is_realtime(t) || rm_srt_higher_prio(__next_ready(rt), t); | ||
167 | } | ||
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 9b0a8d3b624d..f0356de60b2f 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c | |||
@@ -55,6 +55,10 @@ | |||
55 | #include <litmus/litmus_softirq.h> | 55 | #include <litmus/litmus_softirq.h> |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
59 | #include <linux/interrupt.h> | ||
60 | #endif | ||
61 | |||
58 | #ifdef CONFIG_LITMUS_NVIDIA | 62 | #ifdef CONFIG_LITMUS_NVIDIA |
59 | #include <litmus/nvidia_info.h> | 63 | #include <litmus/nvidia_info.h> |
60 | #endif | 64 | #endif |
@@ -91,6 +95,15 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); | |||
91 | #define test_will_schedule(cpu) \ | 95 | #define test_will_schedule(cpu) \ |
92 | (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) | 96 | (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) |
93 | 97 | ||
98 | |||
99 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
100 | struct tasklet_head | ||
101 | { | ||
102 | struct tasklet_struct *head; | ||
103 | struct tasklet_struct **tail; | ||
104 | }; | ||
105 | #endif | ||
106 | |||
94 | /* | 107 | /* |
95 | * In C-EDF there is a cedf domain _per_ cluster | 108 | * In C-EDF there is a cedf domain _per_ cluster |
96 | * The number of clusters is dynamically determined accordingly to the | 109 | * The number of clusters is dynamically determined accordingly to the |
@@ -108,6 +121,12 @@ typedef struct clusterdomain { | |||
108 | struct bheap cpu_heap; | 121 | struct bheap cpu_heap; |
109 | /* lock for this cluster */ | 122 | /* lock for this cluster */ |
110 | #define cedf_lock domain.ready_lock | 123 | #define cedf_lock domain.ready_lock |
124 | |||
125 | |||
126 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
127 | struct tasklet_head pending_tasklets; | ||
128 | #endif | ||
129 | |||
111 | } cedf_domain_t; | 130 | } cedf_domain_t; |
112 | 131 | ||
113 | /* a cedf_domain per cluster; allocation is done at init/activation time */ | 132 | /* a cedf_domain per cluster; allocation is done at init/activation time */ |
@@ -395,6 +414,198 @@ static void cedf_tick(struct task_struct* t) | |||
395 | } | 414 | } |
396 | } | 415 | } |
397 | 416 | ||
417 | |||
418 | |||
419 | |||
420 | |||
421 | |||
422 | |||
423 | |||
424 | |||
425 | |||
426 | |||
427 | |||
428 | |||
429 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
430 | |||
431 | |||
432 | void __do_lit_tasklet(struct tasklet_struct* tasklet) | ||
433 | { | ||
434 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) | ||
435 | { | ||
436 | BUG(); | ||
437 | } | ||
438 | TRACE("%s: Invoking tasklet with owner pid = %d.\n", __FUNCTION__, tasklet->owner->pid); | ||
439 | tasklet->func(tasklet->data); | ||
440 | tasklet_unlock(tasklet); | ||
441 | |||
442 | } | ||
443 | |||
444 | void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* next) | ||
445 | { | ||
446 | int work_to_do = 1; | ||
447 | struct tasklet_struct *tasklet = NULL; | ||
448 | |||
449 | TRACE("%s: entered.\n", __FUNCTION__); | ||
450 | |||
451 | while(work_to_do) { | ||
452 | // remove tasklet at head of list if it has higher priority. | ||
453 | raw_spin_lock(&cluster->cedf_lock); | ||
454 | // remove tasklet at head. | ||
455 | if(cluster->pending_tasklets.head != NULL) { | ||
456 | tasklet = cluster->pending_tasklets.head; | ||
457 | |||
458 | if(edf_higher_prio(tasklet->owner, next)) { | ||
459 | // remove the tasklet from the queue | ||
460 | cluster->pending_tasklets.head = tasklet->next; | ||
461 | |||
462 | TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); | ||
463 | } | ||
464 | else { | ||
465 | TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id()); | ||
466 | tasklet = NULL; | ||
467 | } | ||
468 | } | ||
469 | else { | ||
470 | //TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); | ||
471 | } | ||
472 | raw_spin_unlock(&cluster->cedf_lock); | ||
473 | |||
474 | if(tasklet) { | ||
475 | __do_lit_tasklet(tasklet); | ||
476 | tasklet = NULL; | ||
477 | } | ||
478 | else { | ||
479 | work_to_do = 0; | ||
480 | } | ||
481 | } | ||
482 | |||
483 | TRACE("%s: exited.\n", __FUNCTION__); | ||
484 | } | ||
485 | |||
486 | |||
487 | void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) | ||
488 | { | ||
489 | struct tasklet_struct* step; | ||
490 | |||
491 | step = cluster->pending_tasklets.head; | ||
492 | TRACE("%s: (BEFORE) dumping tasklet queue...\n"); | ||
493 | while(step != NULL){ | ||
494 | TRACE("%s: %d\n", __FUNCTION__, step->owner); | ||
495 | step = step->next; | ||
496 | } | ||
497 | TRACE("%s: done.\n", __FUNCTION__); | ||
498 | |||
499 | |||
500 | step = cluster->pending_tasklets.head; | ||
501 | if(step == NULL) { | ||
502 | TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); | ||
503 | // insert at tail. | ||
504 | tasklet->next = NULL; | ||
505 | *(cluster->pending_tasklets.tail) = tasklet; | ||
506 | cluster->pending_tasklets.tail = &tasklet->next; | ||
507 | } | ||
508 | else if((*cluster->pending_tasklets.tail != NULL) && | ||
509 | edf_higher_prio((*cluster->pending_tasklets.tail)->owner, tasklet->owner)) { | ||
510 | // insert at tail. | ||
511 | TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); | ||
512 | |||
513 | tasklet->next = NULL; | ||
514 | *(cluster->pending_tasklets.tail) = tasklet; | ||
515 | cluster->pending_tasklets.tail = &tasklet->next; | ||
516 | } | ||
517 | else { | ||
518 | // insert the tasklet somewhere in the middle. | ||
519 | |||
520 | while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) { | ||
521 | step = step->next; | ||
522 | } | ||
523 | |||
524 | // insert tasklet right before step->next. | ||
525 | |||
526 | TRACE("%s: tasklet belongs at end. inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); | ||
527 | |||
528 | tasklet->next = step->next; | ||
529 | step->next = tasklet; | ||
530 | |||
531 | // patch up the head if needed. | ||
532 | if(cluster->pending_tasklets.head == step) | ||
533 | { | ||
534 | TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); | ||
535 | cluster->pending_tasklets.head = tasklet; | ||
536 | } | ||
537 | } | ||
538 | |||
539 | |||
540 | step = cluster->pending_tasklets.head; | ||
541 | TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__); | ||
542 | while(step != NULL){ | ||
543 | TRACE("%s: %d\n", __FUNCTION__, step->owner); | ||
544 | step = step->next; | ||
545 | } | ||
546 | TRACE("%s: done.\n", __FUNCTION__); | ||
547 | |||
548 | // TODO: Maintain this list in priority order. | ||
549 | // tasklet->next = NULL; | ||
550 | // *(cluster->pending_tasklets.tail) = tasklet; | ||
551 | // cluster->pending_tasklets.tail = &tasklet->next; | ||
552 | } | ||
553 | |||
554 | int enqueue_pai_tasklet(struct tasklet_struct* tasklet) | ||
555 | { | ||
556 | cedf_domain_t* cluster = task_cpu_cluster(tasklet->owner); | ||
557 | cpu_entry_t *lowest; | ||
558 | unsigned long flags; | ||
559 | |||
560 | if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) | ||
561 | { | ||
562 | TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); | ||
563 | BUG(); | ||
564 | } | ||
565 | |||
566 | raw_spin_lock_irqsave(&cluster->cedf_lock, flags); | ||
567 | |||
568 | lowest = lowest_prio_cpu(cluster); | ||
569 | if (edf_higher_prio(tasklet->owner, lowest->linked)) { | ||
570 | if (smp_processor_id() == lowest->cpu) { | ||
571 | TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); | ||
572 | // execute the tasklet now. | ||
573 | __do_lit_tasklet(tasklet); | ||
574 | } | ||
575 | else { | ||
576 | // preempt the lowest CPU | ||
577 | __add_pai_tasklet(tasklet, cluster); | ||
578 | |||
579 | TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, lowest->cpu); | ||
580 | |||
581 | preempt(lowest); | ||
582 | } | ||
583 | } | ||
584 | |||
585 | raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); | ||
586 | |||
587 | return(1); // success | ||
588 | } | ||
589 | |||
590 | |||
591 | #endif | ||
592 | |||
593 | |||
594 | |||
595 | |||
596 | |||
597 | |||
598 | |||
599 | |||
600 | |||
601 | |||
602 | |||
603 | |||
604 | |||
605 | |||
606 | |||
607 | |||
608 | |||
398 | /* Getting schedule() right is a bit tricky. schedule() may not make any | 609 | /* Getting schedule() right is a bit tricky. schedule() may not make any |
399 | * assumptions on the state of the current task since it may be called for a | 610 | * assumptions on the state of the current task since it may be called for a |
400 | * number of reasons. The reasons include a scheduler_tick() determined that it | 611 | * number of reasons. The reasons include a scheduler_tick() determined that it |
@@ -507,8 +718,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
507 | next = prev; | 718 | next = prev; |
508 | 719 | ||
509 | sched_state_task_picked(); | 720 | sched_state_task_picked(); |
721 | |||
510 | raw_spin_unlock(&cluster->cedf_lock); | 722 | raw_spin_unlock(&cluster->cedf_lock); |
511 | 723 | ||
724 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
725 | do_lit_tasklets(cluster, next); | ||
726 | #endif | ||
727 | |||
512 | #ifdef WANT_ALL_SCHED_EVENTS | 728 | #ifdef WANT_ALL_SCHED_EVENTS |
513 | TRACE("cedf_lock released, next=0x%p\n", next); | 729 | TRACE("cedf_lock released, next=0x%p\n", next); |
514 | 730 | ||
@@ -518,7 +734,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
518 | TRACE("becomes idle at %llu.\n", litmus_clock()); | 734 | TRACE("becomes idle at %llu.\n", litmus_clock()); |
519 | #endif | 735 | #endif |
520 | 736 | ||
521 | |||
522 | return next; | 737 | return next; |
523 | } | 738 | } |
524 | 739 | ||
@@ -1467,6 +1682,13 @@ static long cedf_activate_plugin(void) | |||
1467 | bheap_init(&(cedf[i].cpu_heap)); | 1682 | bheap_init(&(cedf[i].cpu_heap)); |
1468 | edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); | 1683 | edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); |
1469 | 1684 | ||
1685 | |||
1686 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1687 | cedf[i].pending_tasklets.head = NULL; | ||
1688 | cedf[i].pending_tasklets.tail = &cedf[i].pending_tasklets.head; | ||
1689 | #endif | ||
1690 | |||
1691 | |||
1470 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) | 1692 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) |
1471 | return -ENOMEM; | 1693 | return -ENOMEM; |
1472 | } | 1694 | } |
@@ -1578,7 +1800,10 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { | |||
1578 | #ifdef CONFIG_LITMUS_SOFTIRQD | 1800 | #ifdef CONFIG_LITMUS_SOFTIRQD |
1579 | .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, | 1801 | .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, |
1580 | .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, | 1802 | .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, |
1581 | #endif | 1803 | #endif |
1804 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1805 | .enqueue_pai_tasklet = enqueue_pai_tasklet, | ||
1806 | #endif | ||
1582 | }; | 1807 | }; |
1583 | 1808 | ||
1584 | static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; | 1809 | static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; |
diff --git a/litmus/sched_cfifo.c b/litmus/sched_cfifo.c new file mode 100644 index 000000000000..f515446f76ed --- /dev/null +++ b/litmus/sched_cfifo.c | |||
@@ -0,0 +1,1611 @@ | |||
1 | /* | ||
2 | * litmus/sched_cfifo.c | ||
3 | * | ||
4 | * Implementation of the C-FIFO scheduling algorithm. | ||
5 | * | ||
6 | * This implementation is based on G-EDF: | ||
7 | * - CPUs are clustered around L2 or L3 caches. | ||
8 | * - Clusters topology is automatically detected (this is arch dependent | ||
9 | * and is working only on x86 at the moment --- and only with modern | ||
10 | * cpus that exports cpuid4 information) | ||
11 | * - The plugins _does not_ attempt to put tasks in the right cluster i.e. | ||
12 | * the programmer needs to be aware of the topology to place tasks | ||
13 | * in the desired cluster | ||
14 | * - default clustering is around L2 cache (cache index = 2) | ||
15 | * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all | ||
16 | * online_cpus are placed in a single cluster). | ||
17 | * | ||
18 | * For details on functions, take a look at sched_gsn_edf.c | ||
19 | * | ||
20 | * Currently, we do not support changes in the number of online cpus. | ||
21 | * If the num_online_cpus() dynamically changes, the plugin is broken. | ||
22 | * | ||
23 | * This version uses the simple approach and serializes all scheduling | ||
24 | * decisions by the use of a queue lock. This is probably not the | ||
25 | * best way to do it, but it should suffice for now. | ||
26 | */ | ||
27 | |||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/percpu.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/slab.h> | ||
32 | #include <linux/uaccess.h> | ||
33 | |||
34 | #include <linux/module.h> | ||
35 | |||
36 | #include <litmus/litmus.h> | ||
37 | #include <litmus/jobs.h> | ||
38 | #include <litmus/preempt.h> | ||
39 | #include <litmus/sched_plugin.h> | ||
40 | #include <litmus/fifo_common.h> | ||
41 | #include <litmus/sched_trace.h> | ||
42 | |||
43 | #include <litmus/clustered.h> | ||
44 | |||
45 | #include <litmus/bheap.h> | ||
46 | |||
47 | /* to configure the cluster size */ | ||
48 | #include <litmus/litmus_proc.h> | ||
49 | |||
50 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
51 | #include <litmus/affinity.h> | ||
52 | #endif | ||
53 | |||
54 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
55 | #include <litmus/litmus_softirq.h> | ||
56 | #endif | ||
57 | |||
58 | #ifdef CONFIG_LITMUS_NVIDIA | ||
59 | #include <litmus/nvidia_info.h> | ||
60 | #endif | ||
61 | |||
62 | /* Reference configuration variable. Determines which cache level is used to | ||
63 | * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that | ||
64 | * all CPUs form a single cluster (just like GSN-EDF). | ||
65 | */ | ||
66 | static enum cache_level cluster_config = GLOBAL_CLUSTER; | ||
67 | |||
68 | struct clusterdomain; | ||
69 | |||
70 | /* cpu_entry_t - maintain the linked and scheduled state | ||
71 | * | ||
72 | * A cpu also contains a pointer to the cfifo_domain_t cluster | ||
73 | * that owns it (struct clusterdomain*) | ||
74 | */ | ||
75 | typedef struct { | ||
76 | int cpu; | ||
77 | struct clusterdomain* cluster; /* owning cluster */ | ||
78 | struct task_struct* linked; /* only RT tasks */ | ||
79 | struct task_struct* scheduled; /* only RT tasks */ | ||
80 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
81 | struct bheap_node* hn; | ||
82 | } cpu_entry_t; | ||
83 | |||
84 | /* one cpu_entry_t per CPU */ | ||
85 | DEFINE_PER_CPU(cpu_entry_t, cfifo_cpu_entries); | ||
86 | |||
87 | #define set_will_schedule() \ | ||
88 | (atomic_set(&__get_cpu_var(cfifo_cpu_entries).will_schedule, 1)) | ||
89 | #define clear_will_schedule() \ | ||
90 | (atomic_set(&__get_cpu_var(cfifo_cpu_entries).will_schedule, 0)) | ||
91 | #define test_will_schedule(cpu) \ | ||
92 | (atomic_read(&per_cpu(cfifo_cpu_entries, cpu).will_schedule)) | ||
93 | |||
94 | /* | ||
95 | * In C-FIFO there is a cfifo domain _per_ cluster | ||
96 | * The number of clusters is dynamically determined accordingly to the | ||
97 | * total cpu number and the cluster size | ||
98 | */ | ||
99 | typedef struct clusterdomain { | ||
100 | /* rt_domain for this cluster */ | ||
101 | rt_domain_t domain; | ||
102 | /* cpus in this cluster */ | ||
103 | cpu_entry_t* *cpus; | ||
104 | /* map of this cluster cpus */ | ||
105 | cpumask_var_t cpu_map; | ||
106 | /* the cpus queue themselves according to priority in here */ | ||
107 | struct bheap_node *heap_node; | ||
108 | struct bheap cpu_heap; | ||
109 | /* lock for this cluster */ | ||
110 | #define cfifo_lock domain.ready_lock | ||
111 | } cfifo_domain_t; | ||
112 | |||
113 | /* a cfifo_domain per cluster; allocation is done at init/activation time */ | ||
114 | cfifo_domain_t *cfifo; | ||
115 | |||
116 | #define remote_cluster(cpu) ((cfifo_domain_t *) per_cpu(cfifo_cpu_entries, cpu).cluster) | ||
117 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | ||
118 | |||
119 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling | ||
120 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose | ||
121 | * information during the initialization of the plugin (e.g., topology) | ||
122 | #define WANT_ALL_SCHED_EVENTS | ||
123 | */ | ||
124 | #define VERBOSE_INIT | ||
125 | |||
126 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
127 | { | ||
128 | cpu_entry_t *a, *b; | ||
129 | a = _a->value; | ||
130 | b = _b->value; | ||
131 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
132 | * the top of the heap. | ||
133 | */ | ||
134 | return fifo_higher_prio(b->linked, a->linked); | ||
135 | } | ||
136 | |||
137 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
138 | * order in the cpu queue. Caller must hold cfifo lock. | ||
139 | */ | ||
140 | static void update_cpu_position(cpu_entry_t *entry) | ||
141 | { | ||
142 | cfifo_domain_t *cluster = entry->cluster; | ||
143 | |||
144 | if (likely(bheap_node_in_heap(entry->hn))) | ||
145 | bheap_delete(cpu_lower_prio, | ||
146 | &cluster->cpu_heap, | ||
147 | entry->hn); | ||
148 | |||
149 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); | ||
150 | } | ||
151 | |||
152 | /* caller must hold cfifo lock */ | ||
153 | static cpu_entry_t* lowest_prio_cpu(cfifo_domain_t *cluster) | ||
154 | { | ||
155 | struct bheap_node* hn; | ||
156 | hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); | ||
157 | return hn->value; | ||
158 | } | ||
159 | |||
160 | |||
161 | /* link_task_to_cpu - Update the link of a CPU. | ||
162 | * Handles the case where the to-be-linked task is already | ||
163 | * scheduled on a different CPU. | ||
164 | */ | ||
165 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
166 | cpu_entry_t *entry) | ||
167 | { | ||
168 | cpu_entry_t *sched; | ||
169 | struct task_struct* tmp; | ||
170 | int on_cpu; | ||
171 | |||
172 | BUG_ON(linked && !is_realtime(linked)); | ||
173 | |||
174 | /* Currently linked task is set to be unlinked. */ | ||
175 | if (entry->linked) { | ||
176 | entry->linked->rt_param.linked_on = NO_CPU; | ||
177 | } | ||
178 | |||
179 | /* Link new task to CPU. */ | ||
180 | if (linked) { | ||
181 | set_rt_flags(linked, RT_F_RUNNING); | ||
182 | /* handle task is already scheduled somewhere! */ | ||
183 | on_cpu = linked->rt_param.scheduled_on; | ||
184 | if (on_cpu != NO_CPU) { | ||
185 | sched = &per_cpu(cfifo_cpu_entries, on_cpu); | ||
186 | /* this should only happen if not linked already */ | ||
187 | BUG_ON(sched->linked == linked); | ||
188 | |||
189 | /* If we are already scheduled on the CPU to which we | ||
190 | * wanted to link, we don't need to do the swap -- | ||
191 | * we just link ourselves to the CPU and depend on | ||
192 | * the caller to get things right. | ||
193 | */ | ||
194 | if (entry != sched) { | ||
195 | TRACE_TASK(linked, | ||
196 | "already scheduled on %d, updating link.\n", | ||
197 | sched->cpu); | ||
198 | tmp = sched->linked; | ||
199 | linked->rt_param.linked_on = sched->cpu; | ||
200 | sched->linked = linked; | ||
201 | update_cpu_position(sched); | ||
202 | linked = tmp; | ||
203 | } | ||
204 | } | ||
205 | if (linked) /* might be NULL due to swap */ | ||
206 | linked->rt_param.linked_on = entry->cpu; | ||
207 | } | ||
208 | entry->linked = linked; | ||
209 | #ifdef WANT_ALL_SCHED_EVENTS | ||
210 | if (linked) | ||
211 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
212 | else | ||
213 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
214 | #endif | ||
215 | update_cpu_position(entry); | ||
216 | } | ||
217 | |||
218 | /* unlink - Make sure a task is not linked any longer to an entry | ||
219 | * where it was linked before. Must hold cfifo_lock. | ||
220 | */ | ||
221 | static noinline void unlink(struct task_struct* t) | ||
222 | { | ||
223 | cpu_entry_t *entry; | ||
224 | |||
225 | if (t->rt_param.linked_on != NO_CPU) { | ||
226 | /* unlink */ | ||
227 | entry = &per_cpu(cfifo_cpu_entries, t->rt_param.linked_on); | ||
228 | t->rt_param.linked_on = NO_CPU; | ||
229 | link_task_to_cpu(NULL, entry); | ||
230 | } else if (is_queued(t)) { | ||
231 | /* This is an interesting situation: t is scheduled, | ||
232 | * but was just recently unlinked. It cannot be | ||
233 | * linked anywhere else (because then it would have | ||
234 | * been relinked to this CPU), thus it must be in some | ||
235 | * queue. We must remove it from the list in this | ||
236 | * case. | ||
237 | * | ||
238 | * in C-FIFO case is should be somewhere in the queue for | ||
239 | * its domain, therefore and we can get the domain using | ||
240 | * task_cpu_cluster | ||
241 | */ | ||
242 | remove(&(task_cpu_cluster(t))->domain, t); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | |||
247 | /* preempt - force a CPU to reschedule | ||
248 | */ | ||
249 | static void preempt(cpu_entry_t *entry) | ||
250 | { | ||
251 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
252 | } | ||
253 | |||
254 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
255 | * Caller must hold cfifo_lock. | ||
256 | */ | ||
257 | static noinline void requeue(struct task_struct* task) | ||
258 | { | ||
259 | cfifo_domain_t *cluster = task_cpu_cluster(task); | ||
260 | BUG_ON(!task); | ||
261 | /* sanity check before insertion */ | ||
262 | BUG_ON(is_queued(task)); | ||
263 | |||
264 | if (is_released(task, litmus_clock())) | ||
265 | __add_ready(&cluster->domain, task); | ||
266 | else { | ||
267 | /* it has got to wait */ | ||
268 | add_release(&cluster->domain, task); | ||
269 | } | ||
270 | } | ||
271 | |||
272 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
273 | static cpu_entry_t* cfifo_get_nearest_available_cpu( | ||
274 | cfifo_domain_t *cluster, cpu_entry_t* start) | ||
275 | { | ||
276 | cpu_entry_t* affinity; | ||
277 | |||
278 | get_nearest_available_cpu(affinity, start, cfifo_cpu_entries, -1); | ||
279 | |||
280 | /* make sure CPU is in our cluster */ | ||
281 | if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) | ||
282 | return(affinity); | ||
283 | else | ||
284 | return(NULL); | ||
285 | } | ||
286 | #endif | ||
287 | |||
288 | |||
289 | /* check for any necessary preemptions */ | ||
290 | static void check_for_preemptions(cfifo_domain_t *cluster) | ||
291 | { | ||
292 | struct task_struct *task; | ||
293 | cpu_entry_t *last; | ||
294 | |||
295 | for(last = lowest_prio_cpu(cluster); | ||
296 | fifo_preemption_needed(&cluster->domain, last->linked); | ||
297 | last = lowest_prio_cpu(cluster)) { | ||
298 | /* preemption necessary */ | ||
299 | task = __take_ready(&cluster->domain); | ||
300 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
301 | { | ||
302 | cpu_entry_t* affinity = | ||
303 | cfifo_get_nearest_available_cpu(cluster, | ||
304 | &per_cpu(cfifo_cpu_entries, task_cpu(task))); | ||
305 | if(affinity) | ||
306 | last = affinity; | ||
307 | else if(last->linked) | ||
308 | requeue(last->linked); | ||
309 | } | ||
310 | #else | ||
311 | if (last->linked) | ||
312 | requeue(last->linked); | ||
313 | #endif | ||
314 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
315 | task->pid, last->cpu); | ||
316 | link_task_to_cpu(task, last); | ||
317 | preempt(last); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | /* cfifo_job_arrival: task is either resumed or released */ | ||
322 | static noinline void cfifo_job_arrival(struct task_struct* task) | ||
323 | { | ||
324 | cfifo_domain_t *cluster = task_cpu_cluster(task); | ||
325 | BUG_ON(!task); | ||
326 | |||
327 | requeue(task); | ||
328 | check_for_preemptions(cluster); | ||
329 | } | ||
330 | |||
331 | static void cfifo_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
332 | { | ||
333 | cfifo_domain_t* cluster = container_of(rt, cfifo_domain_t, domain); | ||
334 | unsigned long flags; | ||
335 | |||
336 | raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); | ||
337 | |||
338 | __merge_ready(&cluster->domain, tasks); | ||
339 | check_for_preemptions(cluster); | ||
340 | |||
341 | raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); | ||
342 | } | ||
343 | |||
344 | /* caller holds cfifo_lock */ | ||
345 | static noinline void job_completion(struct task_struct *t, int forced) | ||
346 | { | ||
347 | BUG_ON(!t); | ||
348 | |||
349 | sched_trace_task_completion(t, forced); | ||
350 | |||
351 | #ifdef CONFIG_LITMUS_NVIDIA | ||
352 | atomic_set(&tsk_rt(t)->nv_int_count, 0); | ||
353 | #endif | ||
354 | |||
355 | TRACE_TASK(t, "job_completion().\n"); | ||
356 | |||
357 | /* set flags */ | ||
358 | set_rt_flags(t, RT_F_SLEEP); | ||
359 | /* prepare for next period */ | ||
360 | prepare_for_next_period(t); | ||
361 | if (is_released(t, litmus_clock())) | ||
362 | sched_trace_task_release(t); | ||
363 | /* unlink */ | ||
364 | unlink(t); | ||
365 | /* requeue | ||
366 | * But don't requeue a blocking task. */ | ||
367 | if (is_running(t)) | ||
368 | cfifo_job_arrival(t); | ||
369 | } | ||
370 | |||
371 | /* cfifo_tick - this function is called for every local timer | ||
372 | * interrupt. | ||
373 | * | ||
374 | * checks whether the current task has expired and checks | ||
375 | * whether we need to preempt it if it has not expired | ||
376 | */ | ||
377 | static void cfifo_tick(struct task_struct* t) | ||
378 | { | ||
379 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
380 | if (!is_np(t)) { | ||
381 | /* np tasks will be preempted when they become | ||
382 | * preemptable again | ||
383 | */ | ||
384 | litmus_reschedule_local(); | ||
385 | set_will_schedule(); | ||
386 | TRACE("cfifo_scheduler_tick: " | ||
387 | "%d is preemptable " | ||
388 | " => FORCE_RESCHED\n", t->pid); | ||
389 | } else if (is_user_np(t)) { | ||
390 | TRACE("cfifo_scheduler_tick: " | ||
391 | "%d is non-preemptable, " | ||
392 | "preemption delayed.\n", t->pid); | ||
393 | request_exit_np(t); | ||
394 | } | ||
395 | } | ||
396 | } | ||
397 | |||
398 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
399 | * assumptions on the state of the current task since it may be called for a | ||
400 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
401 | * was necessary, because sys_exit_np() was called, because some Linux | ||
402 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
403 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
404 | * current state is. | ||
405 | * | ||
406 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
407 | * | ||
408 | * The following assertions for the scheduled task could hold: | ||
409 | * | ||
410 | * - !is_running(scheduled) // the job blocks | ||
411 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
412 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
413 | * - linked != scheduled // we need to reschedule (for any reason) | ||
414 | * - is_np(scheduled) // rescheduling must be delayed, | ||
415 | * sys_exit_np must be requested | ||
416 | * | ||
417 | * Any of these can occur together. | ||
418 | */ | ||
419 | static struct task_struct* cfifo_schedule(struct task_struct * prev) | ||
420 | { | ||
421 | cpu_entry_t* entry = &__get_cpu_var(cfifo_cpu_entries); | ||
422 | cfifo_domain_t *cluster = entry->cluster; | ||
423 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
424 | struct task_struct* next = NULL; | ||
425 | |||
426 | raw_spin_lock(&cluster->cfifo_lock); | ||
427 | clear_will_schedule(); | ||
428 | |||
429 | /* sanity checking */ | ||
430 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
431 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
432 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
433 | |||
434 | /* (0) Determine state */ | ||
435 | exists = entry->scheduled != NULL; | ||
436 | blocks = exists && !is_running(entry->scheduled); | ||
437 | out_of_time = exists && | ||
438 | budget_enforced(entry->scheduled) && | ||
439 | budget_exhausted(entry->scheduled); | ||
440 | np = exists && is_np(entry->scheduled); | ||
441 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
442 | preempt = entry->scheduled != entry->linked; | ||
443 | |||
444 | #ifdef WANT_ALL_SCHED_EVENTS | ||
445 | TRACE_TASK(prev, "invoked cfifo_schedule.\n"); | ||
446 | #endif | ||
447 | |||
448 | if (exists) | ||
449 | TRACE_TASK(prev, | ||
450 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
451 | "state:%d sig:%d\n", | ||
452 | blocks, out_of_time, np, sleep, preempt, | ||
453 | prev->state, signal_pending(prev)); | ||
454 | if (entry->linked && preempt) | ||
455 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
456 | entry->linked->comm, entry->linked->pid); | ||
457 | |||
458 | |||
459 | /* If a task blocks we have no choice but to reschedule. | ||
460 | */ | ||
461 | if (blocks) | ||
462 | unlink(entry->scheduled); | ||
463 | |||
464 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
465 | * We need to make sure to update the link structure anyway in case | ||
466 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
467 | * hurt. | ||
468 | */ | ||
469 | if (np && (out_of_time || preempt || sleep)) { | ||
470 | unlink(entry->scheduled); | ||
471 | request_exit_np(entry->scheduled); | ||
472 | } | ||
473 | |||
474 | /* Any task that is preemptable and either exhausts its execution | ||
475 | * budget or wants to sleep completes. We may have to reschedule after | ||
476 | * this. Don't do a job completion if we block (can't have timers running | ||
477 | * for blocked jobs). Preemption go first for the same reason. | ||
478 | */ | ||
479 | if (!np && (out_of_time || sleep) && !blocks && !preempt) | ||
480 | job_completion(entry->scheduled, !sleep); | ||
481 | |||
482 | /* Link pending task if we became unlinked. | ||
483 | */ | ||
484 | if (!entry->linked) | ||
485 | link_task_to_cpu(__take_ready(&cluster->domain), entry); | ||
486 | |||
487 | /* The final scheduling decision. Do we need to switch for some reason? | ||
488 | * If linked is different from scheduled, then select linked as next. | ||
489 | */ | ||
490 | if ((!np || blocks) && | ||
491 | entry->linked != entry->scheduled) { | ||
492 | /* Schedule a linked job? */ | ||
493 | if (entry->linked) { | ||
494 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
495 | next = entry->linked; | ||
496 | } | ||
497 | if (entry->scheduled) { | ||
498 | /* not gonna be scheduled soon */ | ||
499 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
500 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
501 | } | ||
502 | } else | ||
503 | /* Only override Linux scheduler if we have a real-time task | ||
504 | * scheduled that needs to continue. | ||
505 | */ | ||
506 | if (exists) | ||
507 | next = prev; | ||
508 | |||
509 | sched_state_task_picked(); | ||
510 | raw_spin_unlock(&cluster->cfifo_lock); | ||
511 | |||
512 | #ifdef WANT_ALL_SCHED_EVENTS | ||
513 | TRACE("cfifo_lock released, next=0x%p\n", next); | ||
514 | |||
515 | if (next) | ||
516 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
517 | else if (exists && !next) | ||
518 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
519 | #endif | ||
520 | |||
521 | |||
522 | return next; | ||
523 | } | ||
524 | |||
525 | |||
526 | /* _finish_switch - we just finished the switch away from prev | ||
527 | */ | ||
528 | static void cfifo_finish_switch(struct task_struct *prev) | ||
529 | { | ||
530 | cpu_entry_t* entry = &__get_cpu_var(cfifo_cpu_entries); | ||
531 | |||
532 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
533 | #ifdef WANT_ALL_SCHED_EVENTS | ||
534 | TRACE_TASK(prev, "switched away from\n"); | ||
535 | #endif | ||
536 | } | ||
537 | |||
538 | |||
539 | /* Prepare a task for running in RT mode | ||
540 | */ | ||
541 | static void cfifo_task_new(struct task_struct * t, int on_rq, int running) | ||
542 | { | ||
543 | unsigned long flags; | ||
544 | cpu_entry_t* entry; | ||
545 | cfifo_domain_t* cluster; | ||
546 | |||
547 | TRACE("gsn edf: task new %d\n", t->pid); | ||
548 | |||
549 | /* the cluster doesn't change even if t is running */ | ||
550 | cluster = task_cpu_cluster(t); | ||
551 | |||
552 | raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); | ||
553 | |||
554 | /* setup job params */ | ||
555 | release_at(t, litmus_clock()); | ||
556 | |||
557 | if (running) { | ||
558 | entry = &per_cpu(cfifo_cpu_entries, task_cpu(t)); | ||
559 | BUG_ON(entry->scheduled); | ||
560 | |||
561 | entry->scheduled = t; | ||
562 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
563 | } else { | ||
564 | t->rt_param.scheduled_on = NO_CPU; | ||
565 | } | ||
566 | t->rt_param.linked_on = NO_CPU; | ||
567 | |||
568 | cfifo_job_arrival(t); | ||
569 | raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); | ||
570 | } | ||
571 | |||
572 | static void cfifo_task_wake_up(struct task_struct *task) | ||
573 | { | ||
574 | unsigned long flags; | ||
575 | //lt_t now; | ||
576 | cfifo_domain_t *cluster; | ||
577 | |||
578 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
579 | |||
580 | cluster = task_cpu_cluster(task); | ||
581 | |||
582 | raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); | ||
583 | |||
584 | #if 0 // sporadic task model | ||
585 | /* We need to take suspensions because of semaphores into | ||
586 | * account! If a job resumes after being suspended due to acquiring | ||
587 | * a semaphore, it should never be treated as a new job release. | ||
588 | */ | ||
589 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
590 | set_rt_flags(task, RT_F_RUNNING); | ||
591 | } else { | ||
592 | now = litmus_clock(); | ||
593 | if (is_tardy(task, now)) { | ||
594 | /* new sporadic release */ | ||
595 | release_at(task, now); | ||
596 | sched_trace_task_release(task); | ||
597 | } | ||
598 | else { | ||
599 | if (task->rt.time_slice) { | ||
600 | /* came back in time before deadline | ||
601 | */ | ||
602 | set_rt_flags(task, RT_F_RUNNING); | ||
603 | } | ||
604 | } | ||
605 | } | ||
606 | #endif | ||
607 | |||
608 | //BUG_ON(tsk_rt(task)->linked_on != NO_CPU); | ||
609 | set_rt_flags(task, RT_F_RUNNING); // periodic model | ||
610 | |||
611 | if(tsk_rt(task)->linked_on == NO_CPU) | ||
612 | cfifo_job_arrival(task); | ||
613 | else | ||
614 | TRACE("WTF, mate?!\n"); | ||
615 | |||
616 | raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); | ||
617 | } | ||
618 | |||
619 | static void cfifo_task_block(struct task_struct *t) | ||
620 | { | ||
621 | unsigned long flags; | ||
622 | cfifo_domain_t *cluster; | ||
623 | |||
624 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
625 | |||
626 | cluster = task_cpu_cluster(t); | ||
627 | |||
628 | /* unlink if necessary */ | ||
629 | raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); | ||
630 | unlink(t); | ||
631 | raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); | ||
632 | |||
633 | BUG_ON(!is_realtime(t)); | ||
634 | } | ||
635 | |||
636 | |||
637 | static void cfifo_task_exit(struct task_struct * t) | ||
638 | { | ||
639 | unsigned long flags; | ||
640 | cfifo_domain_t *cluster = task_cpu_cluster(t); | ||
641 | |||
642 | /* unlink if necessary */ | ||
643 | raw_spin_lock_irqsave(&cluster->cfifo_lock, flags); | ||
644 | unlink(t); | ||
645 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
646 | cpu_entry_t *cpu; | ||
647 | cpu = &per_cpu(cfifo_cpu_entries, tsk_rt(t)->scheduled_on); | ||
648 | cpu->scheduled = NULL; | ||
649 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
650 | } | ||
651 | raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags); | ||
652 | |||
653 | BUG_ON(!is_realtime(t)); | ||
654 | TRACE_TASK(t, "RIP\n"); | ||
655 | } | ||
656 | |||
657 | static long cfifo_admit_task(struct task_struct* tsk) | ||
658 | { | ||
659 | return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; | ||
660 | } | ||
661 | |||
662 | |||
663 | |||
664 | |||
665 | |||
666 | |||
667 | |||
668 | |||
669 | |||
670 | |||
671 | |||
672 | |||
673 | |||
674 | #ifdef CONFIG_LITMUS_LOCKING | ||
675 | |||
676 | #include <litmus/fdso.h> | ||
677 | |||
678 | |||
679 | static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
680 | { | ||
681 | int linked_on; | ||
682 | int check_preempt = 0; | ||
683 | |||
684 | cfifo_domain_t* cluster = task_cpu_cluster(t); | ||
685 | |||
686 | if(prio_inh != NULL) | ||
687 | TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); | ||
688 | else | ||
689 | TRACE_TASK(t, "inherits priority from %p\n", prio_inh); | ||
690 | |||
691 | sched_trace_eff_prio_change(t, prio_inh); | ||
692 | |||
693 | tsk_rt(t)->inh_task = prio_inh; | ||
694 | |||
695 | linked_on = tsk_rt(t)->linked_on; | ||
696 | |||
697 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
698 | if (linked_on != NO_CPU) { | ||
699 | TRACE_TASK(t, "%s: linked on %d\n", | ||
700 | __FUNCTION__, linked_on); | ||
701 | /* Holder is scheduled; need to re-order CPUs. | ||
702 | * We can't use heap_decrease() here since | ||
703 | * the cpu_heap is ordered in reverse direction, so | ||
704 | * it is actually an increase. */ | ||
705 | bheap_delete(cpu_lower_prio, &cluster->cpu_heap, | ||
706 | per_cpu(cfifo_cpu_entries, linked_on).hn); | ||
707 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, | ||
708 | per_cpu(cfifo_cpu_entries, linked_on).hn); | ||
709 | } else { | ||
710 | /* holder may be queued: first stop queue changes */ | ||
711 | raw_spin_lock(&cluster->domain.release_lock); | ||
712 | if (is_queued(t)) { | ||
713 | TRACE_TASK(t, "%s: is queued\n", __FUNCTION__); | ||
714 | |||
715 | /* We need to update the position of holder in some | ||
716 | * heap. Note that this could be a release heap if we | ||
717 | * budget enforcement is used and this job overran. */ | ||
718 | check_preempt = !bheap_decrease(fifo_ready_order, tsk_rt(t)->heap_node); | ||
719 | |||
720 | } else { | ||
721 | /* Nothing to do: if it is not queued and not linked | ||
722 | * then it is either sleeping or currently being moved | ||
723 | * by other code (e.g., a timer interrupt handler) that | ||
724 | * will use the correct priority when enqueuing the | ||
725 | * task. */ | ||
726 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__); | ||
727 | } | ||
728 | raw_spin_unlock(&cluster->domain.release_lock); | ||
729 | |||
730 | /* If holder was enqueued in a release heap, then the following | ||
731 | * preemption check is pointless, but we can't easily detect | ||
732 | * that case. If you want to fix this, then consider that | ||
733 | * simply adding a state flag requires O(n) time to update when | ||
734 | * releasing n tasks, which conflicts with the goal to have | ||
735 | * O(log n) merges. */ | ||
736 | if (check_preempt) { | ||
737 | /* heap_decrease() hit the top level of the heap: make | ||
738 | * sure preemption checks get the right task, not the | ||
739 | * potentially stale cache. */ | ||
740 | bheap_uncache_min(fifo_ready_order, &cluster->domain.ready_queue); | ||
741 | check_for_preemptions(cluster); | ||
742 | } | ||
743 | } | ||
744 | } | ||
745 | |||
746 | /* called with IRQs off */ | ||
747 | static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
748 | { | ||
749 | cfifo_domain_t* cluster = task_cpu_cluster(t); | ||
750 | |||
751 | raw_spin_lock(&cluster->cfifo_lock); | ||
752 | |||
753 | __set_priority_inheritance(t, prio_inh); | ||
754 | |||
755 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
756 | if(tsk_rt(t)->cur_klitirqd != NULL) | ||
757 | { | ||
758 | TRACE_TASK(t, "%s/%d inherits a new priority!\n", | ||
759 | tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); | ||
760 | |||
761 | __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); | ||
762 | } | ||
763 | #endif | ||
764 | |||
765 | raw_spin_unlock(&cluster->cfifo_lock); | ||
766 | } | ||
767 | |||
768 | |||
769 | /* called with IRQs off */ | ||
770 | static void __clear_priority_inheritance(struct task_struct* t) | ||
771 | { | ||
772 | TRACE_TASK(t, "priority restored\n"); | ||
773 | |||
774 | if(tsk_rt(t)->scheduled_on != NO_CPU) | ||
775 | { | ||
776 | sched_trace_eff_prio_change(t, NULL); | ||
777 | |||
778 | tsk_rt(t)->inh_task = NULL; | ||
779 | |||
780 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
781 | * since the priority was effectively lowered. */ | ||
782 | unlink(t); | ||
783 | cfifo_job_arrival(t); | ||
784 | } | ||
785 | else | ||
786 | { | ||
787 | __set_priority_inheritance(t, NULL); | ||
788 | } | ||
789 | |||
790 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
791 | if(tsk_rt(t)->cur_klitirqd != NULL) | ||
792 | { | ||
793 | TRACE_TASK(t, "%s/%d inheritance set back to owner.\n", | ||
794 | tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); | ||
795 | |||
796 | if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU) | ||
797 | { | ||
798 | sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t); | ||
799 | |||
800 | tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t; | ||
801 | |||
802 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
803 | * since the priority was effectively lowered. */ | ||
804 | unlink(tsk_rt(t)->cur_klitirqd); | ||
805 | cfifo_job_arrival(tsk_rt(t)->cur_klitirqd); | ||
806 | } | ||
807 | else | ||
808 | { | ||
809 | __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t); | ||
810 | } | ||
811 | } | ||
812 | #endif | ||
813 | } | ||
814 | |||
815 | /* called with IRQs off */ | ||
816 | static void clear_priority_inheritance(struct task_struct* t) | ||
817 | { | ||
818 | cfifo_domain_t* cluster = task_cpu_cluster(t); | ||
819 | |||
820 | raw_spin_lock(&cluster->cfifo_lock); | ||
821 | __clear_priority_inheritance(t); | ||
822 | raw_spin_unlock(&cluster->cfifo_lock); | ||
823 | } | ||
824 | |||
825 | |||
826 | |||
827 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
828 | /* called with IRQs off */ | ||
829 | static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd, | ||
830 | struct task_struct* old_owner, | ||
831 | struct task_struct* new_owner) | ||
832 | { | ||
833 | cfifo_domain_t* cluster = task_cpu_cluster(klitirqd); | ||
834 | |||
835 | BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); | ||
836 | |||
837 | raw_spin_lock(&cluster->cfifo_lock); | ||
838 | |||
839 | if(old_owner != new_owner) | ||
840 | { | ||
841 | if(old_owner) | ||
842 | { | ||
843 | // unreachable? | ||
844 | tsk_rt(old_owner)->cur_klitirqd = NULL; | ||
845 | } | ||
846 | |||
847 | TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", | ||
848 | new_owner->comm, new_owner->pid); | ||
849 | |||
850 | tsk_rt(new_owner)->cur_klitirqd = klitirqd; | ||
851 | } | ||
852 | |||
853 | __set_priority_inheritance(klitirqd, | ||
854 | (tsk_rt(new_owner)->inh_task == NULL) ? | ||
855 | new_owner : | ||
856 | tsk_rt(new_owner)->inh_task); | ||
857 | |||
858 | raw_spin_unlock(&cluster->cfifo_lock); | ||
859 | } | ||
860 | |||
861 | /* called with IRQs off */ | ||
862 | static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd, | ||
863 | struct task_struct* old_owner) | ||
864 | { | ||
865 | cfifo_domain_t* cluster = task_cpu_cluster(klitirqd); | ||
866 | |||
867 | BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); | ||
868 | |||
869 | raw_spin_lock(&cluster->cfifo_lock); | ||
870 | |||
871 | TRACE_TASK(klitirqd, "priority restored\n"); | ||
872 | |||
873 | if(tsk_rt(klitirqd)->scheduled_on != NO_CPU) | ||
874 | { | ||
875 | tsk_rt(klitirqd)->inh_task = NULL; | ||
876 | |||
877 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
878 | * since the priority was effectively lowered. */ | ||
879 | unlink(klitirqd); | ||
880 | cfifo_job_arrival(klitirqd); | ||
881 | } | ||
882 | else | ||
883 | { | ||
884 | __set_priority_inheritance(klitirqd, NULL); | ||
885 | } | ||
886 | |||
887 | tsk_rt(old_owner)->cur_klitirqd = NULL; | ||
888 | |||
889 | raw_spin_unlock(&cluster->cfifo_lock); | ||
890 | } | ||
891 | #endif // CONFIG_LITMUS_SOFTIRQD | ||
892 | |||
893 | |||
894 | /* ******************** KFMLP support ********************** */ | ||
895 | |||
896 | /* struct for semaphore with priority inheritance */ | ||
897 | struct kfmlp_queue | ||
898 | { | ||
899 | wait_queue_head_t wait; | ||
900 | struct task_struct* owner; | ||
901 | struct task_struct* hp_waiter; | ||
902 | int count; /* number of waiters + holder */ | ||
903 | }; | ||
904 | |||
905 | struct kfmlp_semaphore | ||
906 | { | ||
907 | struct litmus_lock litmus_lock; | ||
908 | |||
909 | spinlock_t lock; | ||
910 | |||
911 | int num_resources; /* aka k */ | ||
912 | struct kfmlp_queue *queues; /* array */ | ||
913 | struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ | ||
914 | }; | ||
915 | |||
916 | static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) | ||
917 | { | ||
918 | return container_of(lock, struct kfmlp_semaphore, litmus_lock); | ||
919 | } | ||
920 | |||
921 | static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, | ||
922 | struct kfmlp_queue* queue) | ||
923 | { | ||
924 | return (queue - &sem->queues[0]); | ||
925 | } | ||
926 | |||
927 | static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, | ||
928 | struct task_struct* holder) | ||
929 | { | ||
930 | int i; | ||
931 | for(i = 0; i < sem->num_resources; ++i) | ||
932 | if(sem->queues[i].owner == holder) | ||
933 | return(&sem->queues[i]); | ||
934 | return(NULL); | ||
935 | } | ||
936 | |||
937 | /* caller is responsible for locking */ | ||
938 | static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, | ||
939 | struct task_struct *skip) | ||
940 | { | ||
941 | struct list_head *pos; | ||
942 | struct task_struct *queued, *found = NULL; | ||
943 | |||
944 | list_for_each(pos, &kqueue->wait.task_list) { | ||
945 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
946 | task_list)->private; | ||
947 | |||
948 | /* Compare task prios, find high prio task. */ | ||
949 | if (queued != skip && fifo_higher_prio(queued, found)) | ||
950 | found = queued; | ||
951 | } | ||
952 | return found; | ||
953 | } | ||
954 | |||
955 | static inline struct kfmlp_queue* kfmlp_find_shortest( | ||
956 | struct kfmlp_semaphore* sem, | ||
957 | struct kfmlp_queue* search_start) | ||
958 | { | ||
959 | // we start our search at search_start instead of at the beginning of the | ||
960 | // queue list to load-balance across all resources. | ||
961 | struct kfmlp_queue* step = search_start; | ||
962 | struct kfmlp_queue* shortest = sem->shortest_queue; | ||
963 | |||
964 | do | ||
965 | { | ||
966 | step = (step+1 != &sem->queues[sem->num_resources]) ? | ||
967 | step+1 : &sem->queues[0]; | ||
968 | if(step->count < shortest->count) | ||
969 | { | ||
970 | shortest = step; | ||
971 | if(step->count == 0) | ||
972 | break; /* can't get any shorter */ | ||
973 | } | ||
974 | }while(step != search_start); | ||
975 | |||
976 | return(shortest); | ||
977 | } | ||
978 | |||
979 | static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) | ||
980 | { | ||
981 | /* must hold sem->lock */ | ||
982 | |||
983 | struct kfmlp_queue *my_queue = NULL; | ||
984 | struct task_struct *max_hp = NULL; | ||
985 | |||
986 | |||
987 | struct list_head *pos; | ||
988 | struct task_struct *queued; | ||
989 | int i; | ||
990 | |||
991 | for(i = 0; i < sem->num_resources; ++i) | ||
992 | { | ||
993 | if( (sem->queues[i].count > 1) && | ||
994 | ((my_queue == NULL) || | ||
995 | (fifo_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) | ||
996 | { | ||
997 | my_queue = &sem->queues[i]; | ||
998 | } | ||
999 | } | ||
1000 | |||
1001 | if(my_queue) | ||
1002 | { | ||
1003 | cfifo_domain_t* cluster; | ||
1004 | |||
1005 | max_hp = my_queue->hp_waiter; | ||
1006 | BUG_ON(!max_hp); | ||
1007 | |||
1008 | TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", | ||
1009 | kfmlp_get_idx(sem, my_queue), | ||
1010 | max_hp->comm, max_hp->pid, | ||
1011 | kfmlp_get_idx(sem, my_queue)); | ||
1012 | |||
1013 | my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); | ||
1014 | |||
1015 | /* | ||
1016 | if(my_queue->hp_waiter) | ||
1017 | TRACE_CUR("queue %d: new hp_waiter is %s/%d\n", | ||
1018 | kfmlp_get_idx(sem, my_queue), | ||
1019 | my_queue->hp_waiter->comm, | ||
1020 | my_queue->hp_waiter->pid); | ||
1021 | else | ||
1022 | TRACE_CUR("queue %d: new hp_waiter is %p\n", | ||
1023 | kfmlp_get_idx(sem, my_queue), NULL); | ||
1024 | */ | ||
1025 | |||
1026 | cluster = task_cpu_cluster(max_hp); | ||
1027 | |||
1028 | raw_spin_lock(&cluster->cfifo_lock); | ||
1029 | |||
1030 | /* | ||
1031 | if(my_queue->owner) | ||
1032 | TRACE_CUR("queue %d: owner is %s/%d\n", | ||
1033 | kfmlp_get_idx(sem, my_queue), | ||
1034 | my_queue->owner->comm, | ||
1035 | my_queue->owner->pid); | ||
1036 | else | ||
1037 | TRACE_CUR("queue %d: owner is %p\n", | ||
1038 | kfmlp_get_idx(sem, my_queue), | ||
1039 | NULL); | ||
1040 | */ | ||
1041 | |||
1042 | if(tsk_rt(my_queue->owner)->inh_task == max_hp) | ||
1043 | { | ||
1044 | __clear_priority_inheritance(my_queue->owner); | ||
1045 | if(my_queue->hp_waiter != NULL) | ||
1046 | { | ||
1047 | __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); | ||
1048 | } | ||
1049 | } | ||
1050 | raw_spin_unlock(&cluster->cfifo_lock); | ||
1051 | |||
1052 | list_for_each(pos, &my_queue->wait.task_list) | ||
1053 | { | ||
1054 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
1055 | task_list)->private; | ||
1056 | /* Compare task prios, find high prio task. */ | ||
1057 | if (queued == max_hp) | ||
1058 | { | ||
1059 | /* | ||
1060 | TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", | ||
1061 | kfmlp_get_idx(sem, my_queue)); | ||
1062 | */ | ||
1063 | __remove_wait_queue(&my_queue->wait, | ||
1064 | list_entry(pos, wait_queue_t, task_list)); | ||
1065 | break; | ||
1066 | } | ||
1067 | } | ||
1068 | --(my_queue->count); | ||
1069 | } | ||
1070 | |||
1071 | return(max_hp); | ||
1072 | } | ||
1073 | |||
1074 | int cfifo_kfmlp_lock(struct litmus_lock* l) | ||
1075 | { | ||
1076 | struct task_struct* t = current; | ||
1077 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1078 | struct kfmlp_queue* my_queue; | ||
1079 | wait_queue_t wait; | ||
1080 | unsigned long flags; | ||
1081 | |||
1082 | if (!is_realtime(t)) | ||
1083 | return -EPERM; | ||
1084 | |||
1085 | spin_lock_irqsave(&sem->lock, flags); | ||
1086 | |||
1087 | my_queue = sem->shortest_queue; | ||
1088 | |||
1089 | if (my_queue->owner) { | ||
1090 | /* resource is not free => must suspend and wait */ | ||
1091 | TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n", | ||
1092 | kfmlp_get_idx(sem, my_queue)); | ||
1093 | |||
1094 | init_waitqueue_entry(&wait, t); | ||
1095 | |||
1096 | /* FIXME: interruptible would be nice some day */ | ||
1097 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
1098 | |||
1099 | __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); | ||
1100 | |||
1101 | /* check if we need to activate priority inheritance */ | ||
1102 | if (fifo_higher_prio(t, my_queue->hp_waiter)) | ||
1103 | { | ||
1104 | my_queue->hp_waiter = t; | ||
1105 | if (fifo_higher_prio(t, my_queue->owner)) | ||
1106 | { | ||
1107 | set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); | ||
1108 | } | ||
1109 | } | ||
1110 | |||
1111 | ++(my_queue->count); | ||
1112 | sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); | ||
1113 | |||
1114 | /* release lock before sleeping */ | ||
1115 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1116 | |||
1117 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
1118 | * when we wake up; we are guaranteed to have the lock since | ||
1119 | * there is only one wake up per release (or steal). | ||
1120 | */ | ||
1121 | schedule(); | ||
1122 | |||
1123 | |||
1124 | if(my_queue->owner == t) | ||
1125 | { | ||
1126 | TRACE_CUR("queue %d: acquired through waiting\n", | ||
1127 | kfmlp_get_idx(sem, my_queue)); | ||
1128 | } | ||
1129 | else | ||
1130 | { | ||
1131 | /* this case may happen if our wait entry was stolen | ||
1132 | between queues. record where we went.*/ | ||
1133 | my_queue = kfmlp_get_queue(sem, t); | ||
1134 | BUG_ON(!my_queue); | ||
1135 | TRACE_CUR("queue %d: acquired through stealing\n", | ||
1136 | kfmlp_get_idx(sem, my_queue)); | ||
1137 | } | ||
1138 | } | ||
1139 | else | ||
1140 | { | ||
1141 | TRACE_CUR("queue %d: acquired immediately\n", | ||
1142 | kfmlp_get_idx(sem, my_queue)); | ||
1143 | |||
1144 | my_queue->owner = t; | ||
1145 | |||
1146 | ++(my_queue->count); | ||
1147 | sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); | ||
1148 | |||
1149 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1150 | } | ||
1151 | |||
1152 | return kfmlp_get_idx(sem, my_queue); | ||
1153 | } | ||
1154 | |||
1155 | int cfifo_kfmlp_unlock(struct litmus_lock* l) | ||
1156 | { | ||
1157 | struct task_struct *t = current, *next; | ||
1158 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1159 | struct kfmlp_queue *my_queue; | ||
1160 | unsigned long flags; | ||
1161 | int err = 0; | ||
1162 | |||
1163 | spin_lock_irqsave(&sem->lock, flags); | ||
1164 | |||
1165 | my_queue = kfmlp_get_queue(sem, t); | ||
1166 | |||
1167 | if (!my_queue) { | ||
1168 | err = -EINVAL; | ||
1169 | goto out; | ||
1170 | } | ||
1171 | |||
1172 | /* check if there are jobs waiting for this resource */ | ||
1173 | next = __waitqueue_remove_first(&my_queue->wait); | ||
1174 | if (next) { | ||
1175 | /* | ||
1176 | TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", | ||
1177 | kfmlp_get_idx(sem, my_queue), | ||
1178 | next->comm, next->pid); | ||
1179 | */ | ||
1180 | /* next becomes the resouce holder */ | ||
1181 | my_queue->owner = next; | ||
1182 | |||
1183 | --(my_queue->count); | ||
1184 | if(my_queue->count < sem->shortest_queue->count) | ||
1185 | { | ||
1186 | sem->shortest_queue = my_queue; | ||
1187 | } | ||
1188 | |||
1189 | TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", | ||
1190 | kfmlp_get_idx(sem, my_queue), next->comm, next->pid); | ||
1191 | |||
1192 | /* determine new hp_waiter if necessary */ | ||
1193 | if (next == my_queue->hp_waiter) { | ||
1194 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
1195 | /* next has the highest priority --- it doesn't need to | ||
1196 | * inherit. However, we need to make sure that the | ||
1197 | * next-highest priority in the queue is reflected in | ||
1198 | * hp_waiter. */ | ||
1199 | my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); | ||
1200 | if (my_queue->hp_waiter) | ||
1201 | TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); | ||
1202 | else | ||
1203 | TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); | ||
1204 | } else { | ||
1205 | /* Well, if next is not the highest-priority waiter, | ||
1206 | * then it ought to inherit the highest-priority | ||
1207 | * waiter's priority. */ | ||
1208 | set_priority_inheritance(next, my_queue->hp_waiter); | ||
1209 | } | ||
1210 | |||
1211 | /* wake up next */ | ||
1212 | wake_up_process(next); | ||
1213 | } | ||
1214 | else | ||
1215 | { | ||
1216 | TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); | ||
1217 | |||
1218 | next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ | ||
1219 | |||
1220 | /* | ||
1221 | if(next) | ||
1222 | TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", | ||
1223 | kfmlp_get_idx(sem, my_queue), | ||
1224 | next->comm, next->pid); | ||
1225 | */ | ||
1226 | |||
1227 | my_queue->owner = next; | ||
1228 | |||
1229 | if(next) | ||
1230 | { | ||
1231 | TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", | ||
1232 | kfmlp_get_idx(sem, my_queue), | ||
1233 | next->comm, next->pid); | ||
1234 | |||
1235 | /* wake up next */ | ||
1236 | wake_up_process(next); | ||
1237 | } | ||
1238 | else | ||
1239 | { | ||
1240 | TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); | ||
1241 | |||
1242 | --(my_queue->count); | ||
1243 | if(my_queue->count < sem->shortest_queue->count) | ||
1244 | { | ||
1245 | sem->shortest_queue = my_queue; | ||
1246 | } | ||
1247 | } | ||
1248 | } | ||
1249 | |||
1250 | /* we lose the benefit of priority inheritance (if any) */ | ||
1251 | if (tsk_rt(t)->inh_task) | ||
1252 | clear_priority_inheritance(t); | ||
1253 | |||
1254 | out: | ||
1255 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1256 | |||
1257 | return err; | ||
1258 | } | ||
1259 | |||
1260 | int cfifo_kfmlp_close(struct litmus_lock* l) | ||
1261 | { | ||
1262 | struct task_struct *t = current; | ||
1263 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1264 | struct kfmlp_queue *my_queue; | ||
1265 | unsigned long flags; | ||
1266 | |||
1267 | int owner; | ||
1268 | |||
1269 | spin_lock_irqsave(&sem->lock, flags); | ||
1270 | |||
1271 | my_queue = kfmlp_get_queue(sem, t); | ||
1272 | owner = (my_queue) ? (my_queue->owner == t) : 0; | ||
1273 | |||
1274 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1275 | |||
1276 | if (owner) | ||
1277 | cfifo_kfmlp_unlock(l); | ||
1278 | |||
1279 | return 0; | ||
1280 | } | ||
1281 | |||
1282 | void cfifo_kfmlp_free(struct litmus_lock* l) | ||
1283 | { | ||
1284 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1285 | kfree(sem->queues); | ||
1286 | kfree(sem); | ||
1287 | } | ||
1288 | |||
1289 | static struct litmus_lock_ops cfifo_kfmlp_lock_ops = { | ||
1290 | .close = cfifo_kfmlp_close, | ||
1291 | .lock = cfifo_kfmlp_lock, | ||
1292 | .unlock = cfifo_kfmlp_unlock, | ||
1293 | .deallocate = cfifo_kfmlp_free, | ||
1294 | }; | ||
1295 | |||
1296 | static struct litmus_lock* cfifo_new_kfmlp(void* __user arg, int* ret_code) | ||
1297 | { | ||
1298 | struct kfmlp_semaphore* sem; | ||
1299 | int num_resources = 0; | ||
1300 | int i; | ||
1301 | |||
1302 | if(!access_ok(VERIFY_READ, arg, sizeof(num_resources))) | ||
1303 | { | ||
1304 | *ret_code = -EINVAL; | ||
1305 | return(NULL); | ||
1306 | } | ||
1307 | if(__copy_from_user(&num_resources, arg, sizeof(num_resources))) | ||
1308 | { | ||
1309 | *ret_code = -EINVAL; | ||
1310 | return(NULL); | ||
1311 | } | ||
1312 | if(num_resources < 1) | ||
1313 | { | ||
1314 | *ret_code = -EINVAL; | ||
1315 | return(NULL); | ||
1316 | } | ||
1317 | |||
1318 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1319 | if(!sem) | ||
1320 | { | ||
1321 | *ret_code = -ENOMEM; | ||
1322 | return NULL; | ||
1323 | } | ||
1324 | |||
1325 | sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); | ||
1326 | if(!sem->queues) | ||
1327 | { | ||
1328 | kfree(sem); | ||
1329 | *ret_code = -ENOMEM; | ||
1330 | return NULL; | ||
1331 | } | ||
1332 | |||
1333 | sem->litmus_lock.ops = &cfifo_kfmlp_lock_ops; | ||
1334 | spin_lock_init(&sem->lock); | ||
1335 | sem->num_resources = num_resources; | ||
1336 | |||
1337 | for(i = 0; i < num_resources; ++i) | ||
1338 | { | ||
1339 | sem->queues[i].owner = NULL; | ||
1340 | sem->queues[i].hp_waiter = NULL; | ||
1341 | init_waitqueue_head(&sem->queues[i].wait); | ||
1342 | sem->queues[i].count = 0; | ||
1343 | } | ||
1344 | |||
1345 | sem->shortest_queue = &sem->queues[0]; | ||
1346 | |||
1347 | *ret_code = 0; | ||
1348 | return &sem->litmus_lock; | ||
1349 | } | ||
1350 | |||
1351 | |||
1352 | /* **** lock constructor **** */ | ||
1353 | |||
1354 | static long cfifo_allocate_lock(struct litmus_lock **lock, int type, | ||
1355 | void* __user arg) | ||
1356 | { | ||
1357 | int err = -ENXIO; | ||
1358 | |||
1359 | /* C-FIFO currently only supports the FMLP for global resources | ||
1360 | WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */ | ||
1361 | switch (type) { | ||
1362 | case KFMLP_SEM: | ||
1363 | *lock = cfifo_new_kfmlp(arg, &err); | ||
1364 | break; | ||
1365 | }; | ||
1366 | |||
1367 | return err; | ||
1368 | } | ||
1369 | |||
1370 | #endif // CONFIG_LITMUS_LOCKING | ||
1371 | |||
1372 | |||
1373 | |||
1374 | |||
1375 | |||
1376 | |||
1377 | /* total number of cluster */ | ||
1378 | static int num_clusters; | ||
1379 | /* we do not support cluster of different sizes */ | ||
1380 | static unsigned int cluster_size; | ||
1381 | |||
1382 | #ifdef VERBOSE_INIT | ||
1383 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | ||
1384 | { | ||
1385 | int chk; | ||
1386 | char buf[255]; | ||
1387 | |||
1388 | chk = cpulist_scnprintf(buf, 254, mask); | ||
1389 | buf[chk] = '\0'; | ||
1390 | printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); | ||
1391 | |||
1392 | } | ||
1393 | #endif | ||
1394 | |||
1395 | static int clusters_allocated = 0; | ||
1396 | |||
1397 | static void cleanup_cfifo(void) | ||
1398 | { | ||
1399 | int i; | ||
1400 | |||
1401 | if (clusters_allocated) { | ||
1402 | for (i = 0; i < num_clusters; i++) { | ||
1403 | kfree(cfifo[i].cpus); | ||
1404 | kfree(cfifo[i].heap_node); | ||
1405 | free_cpumask_var(cfifo[i].cpu_map); | ||
1406 | } | ||
1407 | |||
1408 | kfree(cfifo); | ||
1409 | } | ||
1410 | } | ||
1411 | |||
1412 | static long cfifo_activate_plugin(void) | ||
1413 | { | ||
1414 | int i, j, cpu, ccpu, cpu_count; | ||
1415 | cpu_entry_t *entry; | ||
1416 | |||
1417 | cpumask_var_t mask; | ||
1418 | int chk = 0; | ||
1419 | |||
1420 | /* de-allocate old clusters, if any */ | ||
1421 | cleanup_cfifo(); | ||
1422 | |||
1423 | printk(KERN_INFO "C-FIFO: Activate Plugin, cluster configuration = %d\n", | ||
1424 | cluster_config); | ||
1425 | |||
1426 | /* need to get cluster_size first */ | ||
1427 | if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
1428 | return -ENOMEM; | ||
1429 | |||
1430 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) { | ||
1431 | cluster_size = num_online_cpus(); | ||
1432 | } else { | ||
1433 | chk = get_shared_cpu_map(mask, 0, cluster_config); | ||
1434 | if (chk) { | ||
1435 | /* if chk != 0 then it is the max allowed index */ | ||
1436 | printk(KERN_INFO "C-FIFO: Cluster configuration = %d " | ||
1437 | "is not supported on this hardware.\n", | ||
1438 | cluster_config); | ||
1439 | /* User should notice that the configuration failed, so | ||
1440 | * let's bail out. */ | ||
1441 | return -EINVAL; | ||
1442 | } | ||
1443 | |||
1444 | cluster_size = cpumask_weight(mask); | ||
1445 | } | ||
1446 | |||
1447 | if ((num_online_cpus() % cluster_size) != 0) { | ||
1448 | /* this can't be right, some cpus are left out */ | ||
1449 | printk(KERN_ERR "C-FIFO: Trying to group %d cpus in %d!\n", | ||
1450 | num_online_cpus(), cluster_size); | ||
1451 | return -1; | ||
1452 | } | ||
1453 | |||
1454 | num_clusters = num_online_cpus() / cluster_size; | ||
1455 | printk(KERN_INFO "C-FIFO: %d cluster(s) of size = %d\n", | ||
1456 | num_clusters, cluster_size); | ||
1457 | |||
1458 | /* initialize clusters */ | ||
1459 | cfifo = kmalloc(num_clusters * sizeof(cfifo_domain_t), GFP_ATOMIC); | ||
1460 | for (i = 0; i < num_clusters; i++) { | ||
1461 | |||
1462 | cfifo[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), | ||
1463 | GFP_ATOMIC); | ||
1464 | cfifo[i].heap_node = kmalloc( | ||
1465 | cluster_size * sizeof(struct bheap_node), | ||
1466 | GFP_ATOMIC); | ||
1467 | bheap_init(&(cfifo[i].cpu_heap)); | ||
1468 | fifo_domain_init(&(cfifo[i].domain), NULL, cfifo_release_jobs); | ||
1469 | |||
1470 | if(!zalloc_cpumask_var(&cfifo[i].cpu_map, GFP_ATOMIC)) | ||
1471 | return -ENOMEM; | ||
1472 | } | ||
1473 | |||
1474 | /* cycle through cluster and add cpus to them */ | ||
1475 | for (i = 0; i < num_clusters; i++) { | ||
1476 | |||
1477 | for_each_online_cpu(cpu) { | ||
1478 | /* check if the cpu is already in a cluster */ | ||
1479 | for (j = 0; j < num_clusters; j++) | ||
1480 | if (cpumask_test_cpu(cpu, cfifo[j].cpu_map)) | ||
1481 | break; | ||
1482 | /* if it is in a cluster go to next cpu */ | ||
1483 | if (j < num_clusters && | ||
1484 | cpumask_test_cpu(cpu, cfifo[j].cpu_map)) | ||
1485 | continue; | ||
1486 | |||
1487 | /* this cpu isn't in any cluster */ | ||
1488 | /* get the shared cpus */ | ||
1489 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) | ||
1490 | cpumask_copy(mask, cpu_online_mask); | ||
1491 | else | ||
1492 | get_shared_cpu_map(mask, cpu, cluster_config); | ||
1493 | |||
1494 | cpumask_copy(cfifo[i].cpu_map, mask); | ||
1495 | #ifdef VERBOSE_INIT | ||
1496 | print_cluster_topology(mask, cpu); | ||
1497 | #endif | ||
1498 | /* add cpus to current cluster and init cpu_entry_t */ | ||
1499 | cpu_count = 0; | ||
1500 | for_each_cpu(ccpu, cfifo[i].cpu_map) { | ||
1501 | |||
1502 | entry = &per_cpu(cfifo_cpu_entries, ccpu); | ||
1503 | cfifo[i].cpus[cpu_count] = entry; | ||
1504 | atomic_set(&entry->will_schedule, 0); | ||
1505 | entry->cpu = ccpu; | ||
1506 | entry->cluster = &cfifo[i]; | ||
1507 | entry->hn = &(cfifo[i].heap_node[cpu_count]); | ||
1508 | bheap_node_init(&entry->hn, entry); | ||
1509 | |||
1510 | cpu_count++; | ||
1511 | |||
1512 | entry->linked = NULL; | ||
1513 | entry->scheduled = NULL; | ||
1514 | update_cpu_position(entry); | ||
1515 | } | ||
1516 | /* done with this cluster */ | ||
1517 | break; | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
1522 | { | ||
1523 | /* distribute the daemons evenly across the clusters. */ | ||
1524 | int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC); | ||
1525 | int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters; | ||
1526 | int left_over = NR_LITMUS_SOFTIRQD % num_clusters; | ||
1527 | |||
1528 | int daemon = 0; | ||
1529 | for(i = 0; i < num_clusters; ++i) | ||
1530 | { | ||
1531 | int num_on_this_cluster = num_daemons_per_cluster; | ||
1532 | if(left_over) | ||
1533 | { | ||
1534 | ++num_on_this_cluster; | ||
1535 | --left_over; | ||
1536 | } | ||
1537 | |||
1538 | for(j = 0; j < num_on_this_cluster; ++j) | ||
1539 | { | ||
1540 | // first CPU of this cluster | ||
1541 | affinity[daemon++] = i*cluster_size; | ||
1542 | } | ||
1543 | } | ||
1544 | |||
1545 | spawn_klitirqd(affinity); | ||
1546 | |||
1547 | kfree(affinity); | ||
1548 | } | ||
1549 | #endif | ||
1550 | |||
1551 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1552 | init_nvidia_info(); | ||
1553 | #endif | ||
1554 | |||
1555 | free_cpumask_var(mask); | ||
1556 | clusters_allocated = 1; | ||
1557 | return 0; | ||
1558 | } | ||
1559 | |||
1560 | /* Plugin object */ | ||
1561 | static struct sched_plugin cfifo_plugin __cacheline_aligned_in_smp = { | ||
1562 | .plugin_name = "C-FIFO", | ||
1563 | .finish_switch = cfifo_finish_switch, | ||
1564 | .tick = cfifo_tick, | ||
1565 | .task_new = cfifo_task_new, | ||
1566 | .complete_job = complete_job, | ||
1567 | .task_exit = cfifo_task_exit, | ||
1568 | .schedule = cfifo_schedule, | ||
1569 | .task_wake_up = cfifo_task_wake_up, | ||
1570 | .task_block = cfifo_task_block, | ||
1571 | .admit_task = cfifo_admit_task, | ||
1572 | .activate_plugin = cfifo_activate_plugin, | ||
1573 | #ifdef CONFIG_LITMUS_LOCKING | ||
1574 | .allocate_lock = cfifo_allocate_lock, | ||
1575 | .set_prio_inh = set_priority_inheritance, | ||
1576 | .clear_prio_inh = clear_priority_inheritance, | ||
1577 | #endif | ||
1578 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
1579 | .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, | ||
1580 | .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, | ||
1581 | #endif | ||
1582 | }; | ||
1583 | |||
1584 | static struct proc_dir_entry *cluster_file = NULL, *cfifo_dir = NULL; | ||
1585 | |||
1586 | static int __init init_cfifo(void) | ||
1587 | { | ||
1588 | int err, fs; | ||
1589 | |||
1590 | err = register_sched_plugin(&cfifo_plugin); | ||
1591 | if (!err) { | ||
1592 | fs = make_plugin_proc_dir(&cfifo_plugin, &cfifo_dir); | ||
1593 | if (!fs) | ||
1594 | cluster_file = create_cluster_file(cfifo_dir, &cluster_config); | ||
1595 | else | ||
1596 | printk(KERN_ERR "Could not allocate C-FIFO procfs dir.\n"); | ||
1597 | } | ||
1598 | return err; | ||
1599 | } | ||
1600 | |||
1601 | static void clean_cfifo(void) | ||
1602 | { | ||
1603 | cleanup_cfifo(); | ||
1604 | if (cluster_file) | ||
1605 | remove_proc_entry("cluster", cfifo_dir); | ||
1606 | if (cfifo_dir) | ||
1607 | remove_plugin_proc_dir(&cfifo_plugin); | ||
1608 | } | ||
1609 | |||
1610 | module_init(init_cfifo); | ||
1611 | module_exit(clean_cfifo); | ||
diff --git a/litmus/sched_crm.c b/litmus/sched_crm.c new file mode 100644 index 000000000000..061b29eaff7e --- /dev/null +++ b/litmus/sched_crm.c | |||
@@ -0,0 +1,1611 @@ | |||
1 | /* | ||
2 | * litmus/sched_crm.c | ||
3 | * | ||
4 | * Implementation of the C-RM scheduling algorithm. | ||
5 | * | ||
6 | * This implementation is based on G-EDF: | ||
7 | * - CPUs are clustered around L2 or L3 caches. | ||
8 | * - Clusters topology is automatically detected (this is arch dependent | ||
9 | * and is working only on x86 at the moment --- and only with modern | ||
10 | * cpus that exports cpuid4 information) | ||
11 | * - The plugins _does not_ attempt to put tasks in the right cluster i.e. | ||
12 | * the programmer needs to be aware of the topology to place tasks | ||
13 | * in the desired cluster | ||
14 | * - default clustering is around L2 cache (cache index = 2) | ||
15 | * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all | ||
16 | * online_cpus are placed in a single cluster). | ||
17 | * | ||
18 | * For details on functions, take a look at sched_gsn_edf.c | ||
19 | * | ||
20 | * Currently, we do not support changes in the number of online cpus. | ||
21 | * If the num_online_cpus() dynamically changes, the plugin is broken. | ||
22 | * | ||
23 | * This version uses the simple approach and serializes all scheduling | ||
24 | * decisions by the use of a queue lock. This is probably not the | ||
25 | * best way to do it, but it should suffice for now. | ||
26 | */ | ||
27 | |||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/percpu.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/slab.h> | ||
32 | #include <linux/uaccess.h> | ||
33 | |||
34 | #include <linux/module.h> | ||
35 | |||
36 | #include <litmus/litmus.h> | ||
37 | #include <litmus/jobs.h> | ||
38 | #include <litmus/preempt.h> | ||
39 | #include <litmus/sched_plugin.h> | ||
40 | #include <litmus/rm_common.h> | ||
41 | #include <litmus/sched_trace.h> | ||
42 | |||
43 | #include <litmus/clustered.h> | ||
44 | |||
45 | #include <litmus/bheap.h> | ||
46 | |||
47 | /* to configure the cluster size */ | ||
48 | #include <litmus/litmus_proc.h> | ||
49 | |||
50 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
51 | #include <litmus/affinity.h> | ||
52 | #endif | ||
53 | |||
54 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
55 | #include <litmus/litmus_softirq.h> | ||
56 | #endif | ||
57 | |||
58 | #ifdef CONFIG_LITMUS_NVIDIA | ||
59 | #include <litmus/nvidia_info.h> | ||
60 | #endif | ||
61 | |||
62 | /* Reference configuration variable. Determines which cache level is used to | ||
63 | * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that | ||
64 | * all CPUs form a single cluster (just like GSN-EDF). | ||
65 | */ | ||
66 | static enum cache_level cluster_config = GLOBAL_CLUSTER; | ||
67 | |||
68 | struct clusterdomain; | ||
69 | |||
70 | /* cpu_entry_t - maintain the linked and scheduled state | ||
71 | * | ||
72 | * A cpu also contains a pointer to the crm_domain_t cluster | ||
73 | * that owns it (struct clusterdomain*) | ||
74 | */ | ||
75 | typedef struct { | ||
76 | int cpu; | ||
77 | struct clusterdomain* cluster; /* owning cluster */ | ||
78 | struct task_struct* linked; /* only RT tasks */ | ||
79 | struct task_struct* scheduled; /* only RT tasks */ | ||
80 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
81 | struct bheap_node* hn; | ||
82 | } cpu_entry_t; | ||
83 | |||
84 | /* one cpu_entry_t per CPU */ | ||
85 | DEFINE_PER_CPU(cpu_entry_t, crm_cpu_entries); | ||
86 | |||
87 | #define set_will_schedule() \ | ||
88 | (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 1)) | ||
89 | #define clear_will_schedule() \ | ||
90 | (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 0)) | ||
91 | #define test_will_schedule(cpu) \ | ||
92 | (atomic_read(&per_cpu(crm_cpu_entries, cpu).will_schedule)) | ||
93 | |||
94 | /* | ||
95 | * In C-RM there is a crm domain _per_ cluster | ||
96 | * The number of clusters is dynamically determined accordingly to the | ||
97 | * total cpu number and the cluster size | ||
98 | */ | ||
99 | typedef struct clusterdomain { | ||
100 | /* rt_domain for this cluster */ | ||
101 | rt_domain_t domain; | ||
102 | /* cpus in this cluster */ | ||
103 | cpu_entry_t* *cpus; | ||
104 | /* map of this cluster cpus */ | ||
105 | cpumask_var_t cpu_map; | ||
106 | /* the cpus queue themselves according to priority in here */ | ||
107 | struct bheap_node *heap_node; | ||
108 | struct bheap cpu_heap; | ||
109 | /* lock for this cluster */ | ||
110 | #define crm_lock domain.ready_lock | ||
111 | } crm_domain_t; | ||
112 | |||
113 | /* a crm_domain per cluster; allocation is done at init/activation time */ | ||
114 | crm_domain_t *crm; | ||
115 | |||
116 | #define remote_cluster(cpu) ((crm_domain_t *) per_cpu(crm_cpu_entries, cpu).cluster) | ||
117 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | ||
118 | |||
119 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling | ||
120 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose | ||
121 | * information during the initialization of the plugin (e.g., topology) | ||
122 | #define WANT_ALL_SCHED_EVENTS | ||
123 | */ | ||
124 | #define VERBOSE_INIT | ||
125 | |||
126 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
127 | { | ||
128 | cpu_entry_t *a, *b; | ||
129 | a = _a->value; | ||
130 | b = _b->value; | ||
131 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
132 | * the top of the heap. | ||
133 | */ | ||
134 | return rm_higher_prio(b->linked, a->linked); | ||
135 | } | ||
136 | |||
137 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
138 | * order in the cpu queue. Caller must hold crm lock. | ||
139 | */ | ||
140 | static void update_cpu_position(cpu_entry_t *entry) | ||
141 | { | ||
142 | crm_domain_t *cluster = entry->cluster; | ||
143 | |||
144 | if (likely(bheap_node_in_heap(entry->hn))) | ||
145 | bheap_delete(cpu_lower_prio, | ||
146 | &cluster->cpu_heap, | ||
147 | entry->hn); | ||
148 | |||
149 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); | ||
150 | } | ||
151 | |||
152 | /* caller must hold crm lock */ | ||
153 | static cpu_entry_t* lowest_prio_cpu(crm_domain_t *cluster) | ||
154 | { | ||
155 | struct bheap_node* hn; | ||
156 | hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); | ||
157 | return hn->value; | ||
158 | } | ||
159 | |||
160 | |||
161 | /* link_task_to_cpu - Update the link of a CPU. | ||
162 | * Handles the case where the to-be-linked task is already | ||
163 | * scheduled on a different CPU. | ||
164 | */ | ||
165 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
166 | cpu_entry_t *entry) | ||
167 | { | ||
168 | cpu_entry_t *sched; | ||
169 | struct task_struct* tmp; | ||
170 | int on_cpu; | ||
171 | |||
172 | BUG_ON(linked && !is_realtime(linked)); | ||
173 | |||
174 | /* Currently linked task is set to be unlinked. */ | ||
175 | if (entry->linked) { | ||
176 | entry->linked->rt_param.linked_on = NO_CPU; | ||
177 | } | ||
178 | |||
179 | /* Link new task to CPU. */ | ||
180 | if (linked) { | ||
181 | set_rt_flags(linked, RT_F_RUNNING); | ||
182 | /* handle task is already scheduled somewhere! */ | ||
183 | on_cpu = linked->rt_param.scheduled_on; | ||
184 | if (on_cpu != NO_CPU) { | ||
185 | sched = &per_cpu(crm_cpu_entries, on_cpu); | ||
186 | /* this should only happen if not linked already */ | ||
187 | BUG_ON(sched->linked == linked); | ||
188 | |||
189 | /* If we are already scheduled on the CPU to which we | ||
190 | * wanted to link, we don't need to do the swap -- | ||
191 | * we just link ourselves to the CPU and depend on | ||
192 | * the caller to get things right. | ||
193 | */ | ||
194 | if (entry != sched) { | ||
195 | TRACE_TASK(linked, | ||
196 | "already scheduled on %d, updating link.\n", | ||
197 | sched->cpu); | ||
198 | tmp = sched->linked; | ||
199 | linked->rt_param.linked_on = sched->cpu; | ||
200 | sched->linked = linked; | ||
201 | update_cpu_position(sched); | ||
202 | linked = tmp; | ||
203 | } | ||
204 | } | ||
205 | if (linked) /* might be NULL due to swap */ | ||
206 | linked->rt_param.linked_on = entry->cpu; | ||
207 | } | ||
208 | entry->linked = linked; | ||
209 | #ifdef WANT_ALL_SCHED_EVENTS | ||
210 | if (linked) | ||
211 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
212 | else | ||
213 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
214 | #endif | ||
215 | update_cpu_position(entry); | ||
216 | } | ||
217 | |||
218 | /* unlink - Make sure a task is not linked any longer to an entry | ||
219 | * where it was linked before. Must hold crm_lock. | ||
220 | */ | ||
221 | static noinline void unlink(struct task_struct* t) | ||
222 | { | ||
223 | cpu_entry_t *entry; | ||
224 | |||
225 | if (t->rt_param.linked_on != NO_CPU) { | ||
226 | /* unlink */ | ||
227 | entry = &per_cpu(crm_cpu_entries, t->rt_param.linked_on); | ||
228 | t->rt_param.linked_on = NO_CPU; | ||
229 | link_task_to_cpu(NULL, entry); | ||
230 | } else if (is_queued(t)) { | ||
231 | /* This is an interesting situation: t is scheduled, | ||
232 | * but was just recently unlinked. It cannot be | ||
233 | * linked anywhere else (because then it would have | ||
234 | * been relinked to this CPU), thus it must be in some | ||
235 | * queue. We must remove it from the list in this | ||
236 | * case. | ||
237 | * | ||
238 | * in C-RM case is should be somewhere in the queue for | ||
239 | * its domain, therefore and we can get the domain using | ||
240 | * task_cpu_cluster | ||
241 | */ | ||
242 | remove(&(task_cpu_cluster(t))->domain, t); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | |||
247 | /* preempt - force a CPU to reschedule | ||
248 | */ | ||
249 | static void preempt(cpu_entry_t *entry) | ||
250 | { | ||
251 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
252 | } | ||
253 | |||
254 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
255 | * Caller must hold crm_lock. | ||
256 | */ | ||
257 | static noinline void requeue(struct task_struct* task) | ||
258 | { | ||
259 | crm_domain_t *cluster = task_cpu_cluster(task); | ||
260 | BUG_ON(!task); | ||
261 | /* sanity check before insertion */ | ||
262 | BUG_ON(is_queued(task)); | ||
263 | |||
264 | if (is_released(task, litmus_clock())) | ||
265 | __add_ready(&cluster->domain, task); | ||
266 | else { | ||
267 | /* it has got to wait */ | ||
268 | add_release(&cluster->domain, task); | ||
269 | } | ||
270 | } | ||
271 | |||
272 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
273 | static cpu_entry_t* crm_get_nearest_available_cpu( | ||
274 | crm_domain_t *cluster, cpu_entry_t* start) | ||
275 | { | ||
276 | cpu_entry_t* affinity; | ||
277 | |||
278 | get_nearest_available_cpu(affinity, start, crm_cpu_entries, -1); | ||
279 | |||
280 | /* make sure CPU is in our cluster */ | ||
281 | if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) | ||
282 | return(affinity); | ||
283 | else | ||
284 | return(NULL); | ||
285 | } | ||
286 | #endif | ||
287 | |||
288 | |||
289 | /* check for any necessary preemptions */ | ||
290 | static void check_for_preemptions(crm_domain_t *cluster) | ||
291 | { | ||
292 | struct task_struct *task; | ||
293 | cpu_entry_t *last; | ||
294 | |||
295 | for(last = lowest_prio_cpu(cluster); | ||
296 | rm_preemption_needed(&cluster->domain, last->linked); | ||
297 | last = lowest_prio_cpu(cluster)) { | ||
298 | /* preemption necessary */ | ||
299 | task = __take_ready(&cluster->domain); | ||
300 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
301 | { | ||
302 | cpu_entry_t* affinity = | ||
303 | crm_get_nearest_available_cpu(cluster, | ||
304 | &per_cpu(crm_cpu_entries, task_cpu(task))); | ||
305 | if(affinity) | ||
306 | last = affinity; | ||
307 | else if(last->linked) | ||
308 | requeue(last->linked); | ||
309 | } | ||
310 | #else | ||
311 | if (last->linked) | ||
312 | requeue(last->linked); | ||
313 | #endif | ||
314 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
315 | task->pid, last->cpu); | ||
316 | link_task_to_cpu(task, last); | ||
317 | preempt(last); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | /* crm_job_arrival: task is either resumed or released */ | ||
322 | static noinline void crm_job_arrival(struct task_struct* task) | ||
323 | { | ||
324 | crm_domain_t *cluster = task_cpu_cluster(task); | ||
325 | BUG_ON(!task); | ||
326 | |||
327 | requeue(task); | ||
328 | check_for_preemptions(cluster); | ||
329 | } | ||
330 | |||
331 | static void crm_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
332 | { | ||
333 | crm_domain_t* cluster = container_of(rt, crm_domain_t, domain); | ||
334 | unsigned long flags; | ||
335 | |||
336 | raw_spin_lock_irqsave(&cluster->crm_lock, flags); | ||
337 | |||
338 | __merge_ready(&cluster->domain, tasks); | ||
339 | check_for_preemptions(cluster); | ||
340 | |||
341 | raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); | ||
342 | } | ||
343 | |||
344 | /* caller holds crm_lock */ | ||
345 | static noinline void job_completion(struct task_struct *t, int forced) | ||
346 | { | ||
347 | BUG_ON(!t); | ||
348 | |||
349 | sched_trace_task_completion(t, forced); | ||
350 | |||
351 | #ifdef CONFIG_LITMUS_NVIDIA | ||
352 | atomic_set(&tsk_rt(t)->nv_int_count, 0); | ||
353 | #endif | ||
354 | |||
355 | TRACE_TASK(t, "job_completion().\n"); | ||
356 | |||
357 | /* set flags */ | ||
358 | set_rt_flags(t, RT_F_SLEEP); | ||
359 | /* prepare for next period */ | ||
360 | prepare_for_next_period(t); | ||
361 | if (is_released(t, litmus_clock())) | ||
362 | sched_trace_task_release(t); | ||
363 | /* unlink */ | ||
364 | unlink(t); | ||
365 | /* requeue | ||
366 | * But don't requeue a blocking task. */ | ||
367 | if (is_running(t)) | ||
368 | crm_job_arrival(t); | ||
369 | } | ||
370 | |||
371 | /* crm_tick - this function is called for every local timer | ||
372 | * interrupt. | ||
373 | * | ||
374 | * checks whether the current task has expired and checks | ||
375 | * whether we need to preempt it if it has not expired | ||
376 | */ | ||
377 | static void crm_tick(struct task_struct* t) | ||
378 | { | ||
379 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
380 | if (!is_np(t)) { | ||
381 | /* np tasks will be preempted when they become | ||
382 | * preemptable again | ||
383 | */ | ||
384 | litmus_reschedule_local(); | ||
385 | set_will_schedule(); | ||
386 | TRACE("crm_scheduler_tick: " | ||
387 | "%d is preemptable " | ||
388 | " => FORCE_RESCHED\n", t->pid); | ||
389 | } else if (is_user_np(t)) { | ||
390 | TRACE("crm_scheduler_tick: " | ||
391 | "%d is non-preemptable, " | ||
392 | "preemption delayed.\n", t->pid); | ||
393 | request_exit_np(t); | ||
394 | } | ||
395 | } | ||
396 | } | ||
397 | |||
398 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
399 | * assumptions on the state of the current task since it may be called for a | ||
400 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
401 | * was necessary, because sys_exit_np() was called, because some Linux | ||
402 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
403 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
404 | * current state is. | ||
405 | * | ||
406 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
407 | * | ||
408 | * The following assertions for the scheduled task could hold: | ||
409 | * | ||
410 | * - !is_running(scheduled) // the job blocks | ||
411 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
412 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
413 | * - linked != scheduled // we need to reschedule (for any reason) | ||
414 | * - is_np(scheduled) // rescheduling must be delayed, | ||
415 | * sys_exit_np must be requested | ||
416 | * | ||
417 | * Any of these can occur together. | ||
418 | */ | ||
419 | static struct task_struct* crm_schedule(struct task_struct * prev) | ||
420 | { | ||
421 | cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries); | ||
422 | crm_domain_t *cluster = entry->cluster; | ||
423 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
424 | struct task_struct* next = NULL; | ||
425 | |||
426 | raw_spin_lock(&cluster->crm_lock); | ||
427 | clear_will_schedule(); | ||
428 | |||
429 | /* sanity checking */ | ||
430 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
431 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
432 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
433 | |||
434 | /* (0) Determine state */ | ||
435 | exists = entry->scheduled != NULL; | ||
436 | blocks = exists && !is_running(entry->scheduled); | ||
437 | out_of_time = exists && | ||
438 | budget_enforced(entry->scheduled) && | ||
439 | budget_exhausted(entry->scheduled); | ||
440 | np = exists && is_np(entry->scheduled); | ||
441 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
442 | preempt = entry->scheduled != entry->linked; | ||
443 | |||
444 | #ifdef WANT_ALL_SCHED_EVENTS | ||
445 | TRACE_TASK(prev, "invoked crm_schedule.\n"); | ||
446 | #endif | ||
447 | |||
448 | if (exists) | ||
449 | TRACE_TASK(prev, | ||
450 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
451 | "state:%d sig:%d\n", | ||
452 | blocks, out_of_time, np, sleep, preempt, | ||
453 | prev->state, signal_pending(prev)); | ||
454 | if (entry->linked && preempt) | ||
455 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
456 | entry->linked->comm, entry->linked->pid); | ||
457 | |||
458 | |||
459 | /* If a task blocks we have no choice but to reschedule. | ||
460 | */ | ||
461 | if (blocks) | ||
462 | unlink(entry->scheduled); | ||
463 | |||
464 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
465 | * We need to make sure to update the link structure anyway in case | ||
466 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
467 | * hurt. | ||
468 | */ | ||
469 | if (np && (out_of_time || preempt || sleep)) { | ||
470 | unlink(entry->scheduled); | ||
471 | request_exit_np(entry->scheduled); | ||
472 | } | ||
473 | |||
474 | /* Any task that is preemptable and either exhausts its execution | ||
475 | * budget or wants to sleep completes. We may have to reschedule after | ||
476 | * this. Don't do a job completion if we block (can't have timers running | ||
477 | * for blocked jobs). Preemption go first for the same reason. | ||
478 | */ | ||
479 | if (!np && (out_of_time || sleep) && !blocks && !preempt) | ||
480 | job_completion(entry->scheduled, !sleep); | ||
481 | |||
482 | /* Link pending task if we became unlinked. | ||
483 | */ | ||
484 | if (!entry->linked) | ||
485 | link_task_to_cpu(__take_ready(&cluster->domain), entry); | ||
486 | |||
487 | /* The final scheduling decision. Do we need to switch for some reason? | ||
488 | * If linked is different from scheduled, then select linked as next. | ||
489 | */ | ||
490 | if ((!np || blocks) && | ||
491 | entry->linked != entry->scheduled) { | ||
492 | /* Schedule a linked job? */ | ||
493 | if (entry->linked) { | ||
494 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
495 | next = entry->linked; | ||
496 | } | ||
497 | if (entry->scheduled) { | ||
498 | /* not gonna be scheduled soon */ | ||
499 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
500 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
501 | } | ||
502 | } else | ||
503 | /* Only override Linux scheduler if we have a real-time task | ||
504 | * scheduled that needs to continue. | ||
505 | */ | ||
506 | if (exists) | ||
507 | next = prev; | ||
508 | |||
509 | sched_state_task_picked(); | ||
510 | raw_spin_unlock(&cluster->crm_lock); | ||
511 | |||
512 | #ifdef WANT_ALL_SCHED_EVENTS | ||
513 | TRACE("crm_lock released, next=0x%p\n", next); | ||
514 | |||
515 | if (next) | ||
516 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
517 | else if (exists && !next) | ||
518 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
519 | #endif | ||
520 | |||
521 | |||
522 | return next; | ||
523 | } | ||
524 | |||
525 | |||
526 | /* _finish_switch - we just finished the switch away from prev | ||
527 | */ | ||
528 | static void crm_finish_switch(struct task_struct *prev) | ||
529 | { | ||
530 | cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries); | ||
531 | |||
532 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
533 | #ifdef WANT_ALL_SCHED_EVENTS | ||
534 | TRACE_TASK(prev, "switched away from\n"); | ||
535 | #endif | ||
536 | } | ||
537 | |||
538 | |||
539 | /* Prepare a task for running in RT mode | ||
540 | */ | ||
541 | static void crm_task_new(struct task_struct * t, int on_rq, int running) | ||
542 | { | ||
543 | unsigned long flags; | ||
544 | cpu_entry_t* entry; | ||
545 | crm_domain_t* cluster; | ||
546 | |||
547 | TRACE("gsn edf: task new %d\n", t->pid); | ||
548 | |||
549 | /* the cluster doesn't change even if t is running */ | ||
550 | cluster = task_cpu_cluster(t); | ||
551 | |||
552 | raw_spin_lock_irqsave(&cluster->crm_lock, flags); | ||
553 | |||
554 | /* setup job params */ | ||
555 | release_at(t, litmus_clock()); | ||
556 | |||
557 | if (running) { | ||
558 | entry = &per_cpu(crm_cpu_entries, task_cpu(t)); | ||
559 | BUG_ON(entry->scheduled); | ||
560 | |||
561 | entry->scheduled = t; | ||
562 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
563 | } else { | ||
564 | t->rt_param.scheduled_on = NO_CPU; | ||
565 | } | ||
566 | t->rt_param.linked_on = NO_CPU; | ||
567 | |||
568 | crm_job_arrival(t); | ||
569 | raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); | ||
570 | } | ||
571 | |||
572 | static void crm_task_wake_up(struct task_struct *task) | ||
573 | { | ||
574 | unsigned long flags; | ||
575 | //lt_t now; | ||
576 | crm_domain_t *cluster; | ||
577 | |||
578 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
579 | |||
580 | cluster = task_cpu_cluster(task); | ||
581 | |||
582 | raw_spin_lock_irqsave(&cluster->crm_lock, flags); | ||
583 | |||
584 | #if 0 // sporadic task model | ||
585 | /* We need to take suspensions because of semaphores into | ||
586 | * account! If a job resumes after being suspended due to acquiring | ||
587 | * a semaphore, it should never be treated as a new job release. | ||
588 | */ | ||
589 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
590 | set_rt_flags(task, RT_F_RUNNING); | ||
591 | } else { | ||
592 | now = litmus_clock(); | ||
593 | if (is_tardy(task, now)) { | ||
594 | /* new sporadic release */ | ||
595 | release_at(task, now); | ||
596 | sched_trace_task_release(task); | ||
597 | } | ||
598 | else { | ||
599 | if (task->rt.time_slice) { | ||
600 | /* came back in time before deadline | ||
601 | */ | ||
602 | set_rt_flags(task, RT_F_RUNNING); | ||
603 | } | ||
604 | } | ||
605 | } | ||
606 | #endif | ||
607 | |||
608 | //BUG_ON(tsk_rt(task)->linked_on != NO_CPU); | ||
609 | set_rt_flags(task, RT_F_RUNNING); // periodic model | ||
610 | |||
611 | if(tsk_rt(task)->linked_on == NO_CPU) | ||
612 | crm_job_arrival(task); | ||
613 | else | ||
614 | TRACE("WTF, mate?!\n"); | ||
615 | |||
616 | raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); | ||
617 | } | ||
618 | |||
619 | static void crm_task_block(struct task_struct *t) | ||
620 | { | ||
621 | unsigned long flags; | ||
622 | crm_domain_t *cluster; | ||
623 | |||
624 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
625 | |||
626 | cluster = task_cpu_cluster(t); | ||
627 | |||
628 | /* unlink if necessary */ | ||
629 | raw_spin_lock_irqsave(&cluster->crm_lock, flags); | ||
630 | unlink(t); | ||
631 | raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); | ||
632 | |||
633 | BUG_ON(!is_realtime(t)); | ||
634 | } | ||
635 | |||
636 | |||
637 | static void crm_task_exit(struct task_struct * t) | ||
638 | { | ||
639 | unsigned long flags; | ||
640 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
641 | |||
642 | /* unlink if necessary */ | ||
643 | raw_spin_lock_irqsave(&cluster->crm_lock, flags); | ||
644 | unlink(t); | ||
645 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
646 | cpu_entry_t *cpu; | ||
647 | cpu = &per_cpu(crm_cpu_entries, tsk_rt(t)->scheduled_on); | ||
648 | cpu->scheduled = NULL; | ||
649 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
650 | } | ||
651 | raw_spin_unlock_irqrestore(&cluster->crm_lock, flags); | ||
652 | |||
653 | BUG_ON(!is_realtime(t)); | ||
654 | TRACE_TASK(t, "RIP\n"); | ||
655 | } | ||
656 | |||
657 | static long crm_admit_task(struct task_struct* tsk) | ||
658 | { | ||
659 | return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; | ||
660 | } | ||
661 | |||
662 | |||
663 | |||
664 | |||
665 | |||
666 | |||
667 | |||
668 | |||
669 | |||
670 | |||
671 | |||
672 | |||
673 | |||
674 | #ifdef CONFIG_LITMUS_LOCKING | ||
675 | |||
676 | #include <litmus/fdso.h> | ||
677 | |||
678 | |||
679 | static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
680 | { | ||
681 | int linked_on; | ||
682 | int check_preempt = 0; | ||
683 | |||
684 | crm_domain_t* cluster = task_cpu_cluster(t); | ||
685 | |||
686 | if(prio_inh != NULL) | ||
687 | TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); | ||
688 | else | ||
689 | TRACE_TASK(t, "inherits priority from %p\n", prio_inh); | ||
690 | |||
691 | sched_trace_eff_prio_change(t, prio_inh); | ||
692 | |||
693 | tsk_rt(t)->inh_task = prio_inh; | ||
694 | |||
695 | linked_on = tsk_rt(t)->linked_on; | ||
696 | |||
697 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
698 | if (linked_on != NO_CPU) { | ||
699 | TRACE_TASK(t, "%s: linked on %d\n", | ||
700 | __FUNCTION__, linked_on); | ||
701 | /* Holder is scheduled; need to re-order CPUs. | ||
702 | * We can't use heap_decrease() here since | ||
703 | * the cpu_heap is ordered in reverse direction, so | ||
704 | * it is actually an increase. */ | ||
705 | bheap_delete(cpu_lower_prio, &cluster->cpu_heap, | ||
706 | per_cpu(crm_cpu_entries, linked_on).hn); | ||
707 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, | ||
708 | per_cpu(crm_cpu_entries, linked_on).hn); | ||
709 | } else { | ||
710 | /* holder may be queued: first stop queue changes */ | ||
711 | raw_spin_lock(&cluster->domain.release_lock); | ||
712 | if (is_queued(t)) { | ||
713 | TRACE_TASK(t, "%s: is queued\n", __FUNCTION__); | ||
714 | |||
715 | /* We need to update the position of holder in some | ||
716 | * heap. Note that this could be a release heap if we | ||
717 | * budget enforcement is used and this job overran. */ | ||
718 | check_preempt = !bheap_decrease(rm_ready_order, tsk_rt(t)->heap_node); | ||
719 | |||
720 | } else { | ||
721 | /* Nothing to do: if it is not queued and not linked | ||
722 | * then it is either sleeping or currently being moved | ||
723 | * by other code (e.g., a timer interrupt handler) that | ||
724 | * will use the correct priority when enqueuing the | ||
725 | * task. */ | ||
726 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__); | ||
727 | } | ||
728 | raw_spin_unlock(&cluster->domain.release_lock); | ||
729 | |||
730 | /* If holder was enqueued in a release heap, then the following | ||
731 | * preemption check is pointless, but we can't easily detect | ||
732 | * that case. If you want to fix this, then consider that | ||
733 | * simply adding a state flag requires O(n) time to update when | ||
734 | * releasing n tasks, which conflicts with the goal to have | ||
735 | * O(log n) merges. */ | ||
736 | if (check_preempt) { | ||
737 | /* heap_decrease() hit the top level of the heap: make | ||
738 | * sure preemption checks get the right task, not the | ||
739 | * potentially stale cache. */ | ||
740 | bheap_uncache_min(rm_ready_order, &cluster->domain.ready_queue); | ||
741 | check_for_preemptions(cluster); | ||
742 | } | ||
743 | } | ||
744 | } | ||
745 | |||
746 | /* called with IRQs off */ | ||
747 | static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
748 | { | ||
749 | crm_domain_t* cluster = task_cpu_cluster(t); | ||
750 | |||
751 | raw_spin_lock(&cluster->crm_lock); | ||
752 | |||
753 | __set_priority_inheritance(t, prio_inh); | ||
754 | |||
755 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
756 | if(tsk_rt(t)->cur_klitirqd != NULL) | ||
757 | { | ||
758 | TRACE_TASK(t, "%s/%d inherits a new priority!\n", | ||
759 | tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); | ||
760 | |||
761 | __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); | ||
762 | } | ||
763 | #endif | ||
764 | |||
765 | raw_spin_unlock(&cluster->crm_lock); | ||
766 | } | ||
767 | |||
768 | |||
769 | /* called with IRQs off */ | ||
770 | static void __clear_priority_inheritance(struct task_struct* t) | ||
771 | { | ||
772 | TRACE_TASK(t, "priority restored\n"); | ||
773 | |||
774 | if(tsk_rt(t)->scheduled_on != NO_CPU) | ||
775 | { | ||
776 | sched_trace_eff_prio_change(t, NULL); | ||
777 | |||
778 | tsk_rt(t)->inh_task = NULL; | ||
779 | |||
780 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
781 | * since the priority was effectively lowered. */ | ||
782 | unlink(t); | ||
783 | crm_job_arrival(t); | ||
784 | } | ||
785 | else | ||
786 | { | ||
787 | __set_priority_inheritance(t, NULL); | ||
788 | } | ||
789 | |||
790 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
791 | if(tsk_rt(t)->cur_klitirqd != NULL) | ||
792 | { | ||
793 | TRACE_TASK(t, "%s/%d inheritance set back to owner.\n", | ||
794 | tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); | ||
795 | |||
796 | if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU) | ||
797 | { | ||
798 | sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t); | ||
799 | |||
800 | tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t; | ||
801 | |||
802 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
803 | * since the priority was effectively lowered. */ | ||
804 | unlink(tsk_rt(t)->cur_klitirqd); | ||
805 | crm_job_arrival(tsk_rt(t)->cur_klitirqd); | ||
806 | } | ||
807 | else | ||
808 | { | ||
809 | __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t); | ||
810 | } | ||
811 | } | ||
812 | #endif | ||
813 | } | ||
814 | |||
815 | /* called with IRQs off */ | ||
816 | static void clear_priority_inheritance(struct task_struct* t) | ||
817 | { | ||
818 | crm_domain_t* cluster = task_cpu_cluster(t); | ||
819 | |||
820 | raw_spin_lock(&cluster->crm_lock); | ||
821 | __clear_priority_inheritance(t); | ||
822 | raw_spin_unlock(&cluster->crm_lock); | ||
823 | } | ||
824 | |||
825 | |||
826 | |||
827 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
828 | /* called with IRQs off */ | ||
829 | static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd, | ||
830 | struct task_struct* old_owner, | ||
831 | struct task_struct* new_owner) | ||
832 | { | ||
833 | crm_domain_t* cluster = task_cpu_cluster(klitirqd); | ||
834 | |||
835 | BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); | ||
836 | |||
837 | raw_spin_lock(&cluster->crm_lock); | ||
838 | |||
839 | if(old_owner != new_owner) | ||
840 | { | ||
841 | if(old_owner) | ||
842 | { | ||
843 | // unreachable? | ||
844 | tsk_rt(old_owner)->cur_klitirqd = NULL; | ||
845 | } | ||
846 | |||
847 | TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", | ||
848 | new_owner->comm, new_owner->pid); | ||
849 | |||
850 | tsk_rt(new_owner)->cur_klitirqd = klitirqd; | ||
851 | } | ||
852 | |||
853 | __set_priority_inheritance(klitirqd, | ||
854 | (tsk_rt(new_owner)->inh_task == NULL) ? | ||
855 | new_owner : | ||
856 | tsk_rt(new_owner)->inh_task); | ||
857 | |||
858 | raw_spin_unlock(&cluster->crm_lock); | ||
859 | } | ||
860 | |||
861 | /* called with IRQs off */ | ||
862 | static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd, | ||
863 | struct task_struct* old_owner) | ||
864 | { | ||
865 | crm_domain_t* cluster = task_cpu_cluster(klitirqd); | ||
866 | |||
867 | BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); | ||
868 | |||
869 | raw_spin_lock(&cluster->crm_lock); | ||
870 | |||
871 | TRACE_TASK(klitirqd, "priority restored\n"); | ||
872 | |||
873 | if(tsk_rt(klitirqd)->scheduled_on != NO_CPU) | ||
874 | { | ||
875 | tsk_rt(klitirqd)->inh_task = NULL; | ||
876 | |||
877 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
878 | * since the priority was effectively lowered. */ | ||
879 | unlink(klitirqd); | ||
880 | crm_job_arrival(klitirqd); | ||
881 | } | ||
882 | else | ||
883 | { | ||
884 | __set_priority_inheritance(klitirqd, NULL); | ||
885 | } | ||
886 | |||
887 | tsk_rt(old_owner)->cur_klitirqd = NULL; | ||
888 | |||
889 | raw_spin_unlock(&cluster->crm_lock); | ||
890 | } | ||
891 | #endif // CONFIG_LITMUS_SOFTIRQD | ||
892 | |||
893 | |||
894 | /* ******************** KFMLP support ********************** */ | ||
895 | |||
896 | /* struct for semaphore with priority inheritance */ | ||
897 | struct kfmlp_queue | ||
898 | { | ||
899 | wait_queue_head_t wait; | ||
900 | struct task_struct* owner; | ||
901 | struct task_struct* hp_waiter; | ||
902 | int count; /* number of waiters + holder */ | ||
903 | }; | ||
904 | |||
905 | struct kfmlp_semaphore | ||
906 | { | ||
907 | struct litmus_lock litmus_lock; | ||
908 | |||
909 | spinlock_t lock; | ||
910 | |||
911 | int num_resources; /* aka k */ | ||
912 | struct kfmlp_queue *queues; /* array */ | ||
913 | struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ | ||
914 | }; | ||
915 | |||
916 | static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) | ||
917 | { | ||
918 | return container_of(lock, struct kfmlp_semaphore, litmus_lock); | ||
919 | } | ||
920 | |||
921 | static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, | ||
922 | struct kfmlp_queue* queue) | ||
923 | { | ||
924 | return (queue - &sem->queues[0]); | ||
925 | } | ||
926 | |||
927 | static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, | ||
928 | struct task_struct* holder) | ||
929 | { | ||
930 | int i; | ||
931 | for(i = 0; i < sem->num_resources; ++i) | ||
932 | if(sem->queues[i].owner == holder) | ||
933 | return(&sem->queues[i]); | ||
934 | return(NULL); | ||
935 | } | ||
936 | |||
937 | /* caller is responsible for locking */ | ||
938 | static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, | ||
939 | struct task_struct *skip) | ||
940 | { | ||
941 | struct list_head *pos; | ||
942 | struct task_struct *queued, *found = NULL; | ||
943 | |||
944 | list_for_each(pos, &kqueue->wait.task_list) { | ||
945 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
946 | task_list)->private; | ||
947 | |||
948 | /* Compare task prios, find high prio task. */ | ||
949 | if (queued != skip && rm_higher_prio(queued, found)) | ||
950 | found = queued; | ||
951 | } | ||
952 | return found; | ||
953 | } | ||
954 | |||
955 | static inline struct kfmlp_queue* kfmlp_find_shortest( | ||
956 | struct kfmlp_semaphore* sem, | ||
957 | struct kfmlp_queue* search_start) | ||
958 | { | ||
959 | // we start our search at search_start instead of at the beginning of the | ||
960 | // queue list to load-balance across all resources. | ||
961 | struct kfmlp_queue* step = search_start; | ||
962 | struct kfmlp_queue* shortest = sem->shortest_queue; | ||
963 | |||
964 | do | ||
965 | { | ||
966 | step = (step+1 != &sem->queues[sem->num_resources]) ? | ||
967 | step+1 : &sem->queues[0]; | ||
968 | if(step->count < shortest->count) | ||
969 | { | ||
970 | shortest = step; | ||
971 | if(step->count == 0) | ||
972 | break; /* can't get any shorter */ | ||
973 | } | ||
974 | }while(step != search_start); | ||
975 | |||
976 | return(shortest); | ||
977 | } | ||
978 | |||
979 | static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) | ||
980 | { | ||
981 | /* must hold sem->lock */ | ||
982 | |||
983 | struct kfmlp_queue *my_queue = NULL; | ||
984 | struct task_struct *max_hp = NULL; | ||
985 | |||
986 | |||
987 | struct list_head *pos; | ||
988 | struct task_struct *queued; | ||
989 | int i; | ||
990 | |||
991 | for(i = 0; i < sem->num_resources; ++i) | ||
992 | { | ||
993 | if( (sem->queues[i].count > 1) && | ||
994 | ((my_queue == NULL) || | ||
995 | (rm_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) | ||
996 | { | ||
997 | my_queue = &sem->queues[i]; | ||
998 | } | ||
999 | } | ||
1000 | |||
1001 | if(my_queue) | ||
1002 | { | ||
1003 | crm_domain_t* cluster; | ||
1004 | |||
1005 | max_hp = my_queue->hp_waiter; | ||
1006 | BUG_ON(!max_hp); | ||
1007 | |||
1008 | TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", | ||
1009 | kfmlp_get_idx(sem, my_queue), | ||
1010 | max_hp->comm, max_hp->pid, | ||
1011 | kfmlp_get_idx(sem, my_queue)); | ||
1012 | |||
1013 | my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); | ||
1014 | |||
1015 | /* | ||
1016 | if(my_queue->hp_waiter) | ||
1017 | TRACE_CUR("queue %d: new hp_waiter is %s/%d\n", | ||
1018 | kfmlp_get_idx(sem, my_queue), | ||
1019 | my_queue->hp_waiter->comm, | ||
1020 | my_queue->hp_waiter->pid); | ||
1021 | else | ||
1022 | TRACE_CUR("queue %d: new hp_waiter is %p\n", | ||
1023 | kfmlp_get_idx(sem, my_queue), NULL); | ||
1024 | */ | ||
1025 | |||
1026 | cluster = task_cpu_cluster(max_hp); | ||
1027 | |||
1028 | raw_spin_lock(&cluster->crm_lock); | ||
1029 | |||
1030 | /* | ||
1031 | if(my_queue->owner) | ||
1032 | TRACE_CUR("queue %d: owner is %s/%d\n", | ||
1033 | kfmlp_get_idx(sem, my_queue), | ||
1034 | my_queue->owner->comm, | ||
1035 | my_queue->owner->pid); | ||
1036 | else | ||
1037 | TRACE_CUR("queue %d: owner is %p\n", | ||
1038 | kfmlp_get_idx(sem, my_queue), | ||
1039 | NULL); | ||
1040 | */ | ||
1041 | |||
1042 | if(tsk_rt(my_queue->owner)->inh_task == max_hp) | ||
1043 | { | ||
1044 | __clear_priority_inheritance(my_queue->owner); | ||
1045 | if(my_queue->hp_waiter != NULL) | ||
1046 | { | ||
1047 | __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); | ||
1048 | } | ||
1049 | } | ||
1050 | raw_spin_unlock(&cluster->crm_lock); | ||
1051 | |||
1052 | list_for_each(pos, &my_queue->wait.task_list) | ||
1053 | { | ||
1054 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
1055 | task_list)->private; | ||
1056 | /* Compare task prios, find high prio task. */ | ||
1057 | if (queued == max_hp) | ||
1058 | { | ||
1059 | /* | ||
1060 | TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", | ||
1061 | kfmlp_get_idx(sem, my_queue)); | ||
1062 | */ | ||
1063 | __remove_wait_queue(&my_queue->wait, | ||
1064 | list_entry(pos, wait_queue_t, task_list)); | ||
1065 | break; | ||
1066 | } | ||
1067 | } | ||
1068 | --(my_queue->count); | ||
1069 | } | ||
1070 | |||
1071 | return(max_hp); | ||
1072 | } | ||
1073 | |||
1074 | int crm_kfmlp_lock(struct litmus_lock* l) | ||
1075 | { | ||
1076 | struct task_struct* t = current; | ||
1077 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1078 | struct kfmlp_queue* my_queue; | ||
1079 | wait_queue_t wait; | ||
1080 | unsigned long flags; | ||
1081 | |||
1082 | if (!is_realtime(t)) | ||
1083 | return -EPERM; | ||
1084 | |||
1085 | spin_lock_irqsave(&sem->lock, flags); | ||
1086 | |||
1087 | my_queue = sem->shortest_queue; | ||
1088 | |||
1089 | if (my_queue->owner) { | ||
1090 | /* resource is not free => must suspend and wait */ | ||
1091 | TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n", | ||
1092 | kfmlp_get_idx(sem, my_queue)); | ||
1093 | |||
1094 | init_waitqueue_entry(&wait, t); | ||
1095 | |||
1096 | /* FIXME: interruptible would be nice some day */ | ||
1097 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
1098 | |||
1099 | __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); | ||
1100 | |||
1101 | /* check if we need to activate priority inheritance */ | ||
1102 | if (rm_higher_prio(t, my_queue->hp_waiter)) | ||
1103 | { | ||
1104 | my_queue->hp_waiter = t; | ||
1105 | if (rm_higher_prio(t, my_queue->owner)) | ||
1106 | { | ||
1107 | set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); | ||
1108 | } | ||
1109 | } | ||
1110 | |||
1111 | ++(my_queue->count); | ||
1112 | sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); | ||
1113 | |||
1114 | /* release lock before sleeping */ | ||
1115 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1116 | |||
1117 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
1118 | * when we wake up; we are guaranteed to have the lock since | ||
1119 | * there is only one wake up per release (or steal). | ||
1120 | */ | ||
1121 | schedule(); | ||
1122 | |||
1123 | |||
1124 | if(my_queue->owner == t) | ||
1125 | { | ||
1126 | TRACE_CUR("queue %d: acquired through waiting\n", | ||
1127 | kfmlp_get_idx(sem, my_queue)); | ||
1128 | } | ||
1129 | else | ||
1130 | { | ||
1131 | /* this case may happen if our wait entry was stolen | ||
1132 | between queues. record where we went.*/ | ||
1133 | my_queue = kfmlp_get_queue(sem, t); | ||
1134 | BUG_ON(!my_queue); | ||
1135 | TRACE_CUR("queue %d: acquired through stealing\n", | ||
1136 | kfmlp_get_idx(sem, my_queue)); | ||
1137 | } | ||
1138 | } | ||
1139 | else | ||
1140 | { | ||
1141 | TRACE_CUR("queue %d: acquired immediately\n", | ||
1142 | kfmlp_get_idx(sem, my_queue)); | ||
1143 | |||
1144 | my_queue->owner = t; | ||
1145 | |||
1146 | ++(my_queue->count); | ||
1147 | sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); | ||
1148 | |||
1149 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1150 | } | ||
1151 | |||
1152 | return kfmlp_get_idx(sem, my_queue); | ||
1153 | } | ||
1154 | |||
1155 | int crm_kfmlp_unlock(struct litmus_lock* l) | ||
1156 | { | ||
1157 | struct task_struct *t = current, *next; | ||
1158 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1159 | struct kfmlp_queue *my_queue; | ||
1160 | unsigned long flags; | ||
1161 | int err = 0; | ||
1162 | |||
1163 | spin_lock_irqsave(&sem->lock, flags); | ||
1164 | |||
1165 | my_queue = kfmlp_get_queue(sem, t); | ||
1166 | |||
1167 | if (!my_queue) { | ||
1168 | err = -EINVAL; | ||
1169 | goto out; | ||
1170 | } | ||
1171 | |||
1172 | /* check if there are jobs waiting for this resource */ | ||
1173 | next = __waitqueue_remove_first(&my_queue->wait); | ||
1174 | if (next) { | ||
1175 | /* | ||
1176 | TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", | ||
1177 | kfmlp_get_idx(sem, my_queue), | ||
1178 | next->comm, next->pid); | ||
1179 | */ | ||
1180 | /* next becomes the resouce holder */ | ||
1181 | my_queue->owner = next; | ||
1182 | |||
1183 | --(my_queue->count); | ||
1184 | if(my_queue->count < sem->shortest_queue->count) | ||
1185 | { | ||
1186 | sem->shortest_queue = my_queue; | ||
1187 | } | ||
1188 | |||
1189 | TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", | ||
1190 | kfmlp_get_idx(sem, my_queue), next->comm, next->pid); | ||
1191 | |||
1192 | /* determine new hp_waiter if necessary */ | ||
1193 | if (next == my_queue->hp_waiter) { | ||
1194 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
1195 | /* next has the highest priority --- it doesn't need to | ||
1196 | * inherit. However, we need to make sure that the | ||
1197 | * next-highest priority in the queue is reflected in | ||
1198 | * hp_waiter. */ | ||
1199 | my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); | ||
1200 | if (my_queue->hp_waiter) | ||
1201 | TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); | ||
1202 | else | ||
1203 | TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); | ||
1204 | } else { | ||
1205 | /* Well, if next is not the highest-priority waiter, | ||
1206 | * then it ought to inherit the highest-priority | ||
1207 | * waiter's priority. */ | ||
1208 | set_priority_inheritance(next, my_queue->hp_waiter); | ||
1209 | } | ||
1210 | |||
1211 | /* wake up next */ | ||
1212 | wake_up_process(next); | ||
1213 | } | ||
1214 | else | ||
1215 | { | ||
1216 | TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); | ||
1217 | |||
1218 | next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ | ||
1219 | |||
1220 | /* | ||
1221 | if(next) | ||
1222 | TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", | ||
1223 | kfmlp_get_idx(sem, my_queue), | ||
1224 | next->comm, next->pid); | ||
1225 | */ | ||
1226 | |||
1227 | my_queue->owner = next; | ||
1228 | |||
1229 | if(next) | ||
1230 | { | ||
1231 | TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", | ||
1232 | kfmlp_get_idx(sem, my_queue), | ||
1233 | next->comm, next->pid); | ||
1234 | |||
1235 | /* wake up next */ | ||
1236 | wake_up_process(next); | ||
1237 | } | ||
1238 | else | ||
1239 | { | ||
1240 | TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); | ||
1241 | |||
1242 | --(my_queue->count); | ||
1243 | if(my_queue->count < sem->shortest_queue->count) | ||
1244 | { | ||
1245 | sem->shortest_queue = my_queue; | ||
1246 | } | ||
1247 | } | ||
1248 | } | ||
1249 | |||
1250 | /* we lose the benefit of priority inheritance (if any) */ | ||
1251 | if (tsk_rt(t)->inh_task) | ||
1252 | clear_priority_inheritance(t); | ||
1253 | |||
1254 | out: | ||
1255 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1256 | |||
1257 | return err; | ||
1258 | } | ||
1259 | |||
1260 | int crm_kfmlp_close(struct litmus_lock* l) | ||
1261 | { | ||
1262 | struct task_struct *t = current; | ||
1263 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1264 | struct kfmlp_queue *my_queue; | ||
1265 | unsigned long flags; | ||
1266 | |||
1267 | int owner; | ||
1268 | |||
1269 | spin_lock_irqsave(&sem->lock, flags); | ||
1270 | |||
1271 | my_queue = kfmlp_get_queue(sem, t); | ||
1272 | owner = (my_queue) ? (my_queue->owner == t) : 0; | ||
1273 | |||
1274 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1275 | |||
1276 | if (owner) | ||
1277 | crm_kfmlp_unlock(l); | ||
1278 | |||
1279 | return 0; | ||
1280 | } | ||
1281 | |||
1282 | void crm_kfmlp_free(struct litmus_lock* l) | ||
1283 | { | ||
1284 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1285 | kfree(sem->queues); | ||
1286 | kfree(sem); | ||
1287 | } | ||
1288 | |||
1289 | static struct litmus_lock_ops crm_kfmlp_lock_ops = { | ||
1290 | .close = crm_kfmlp_close, | ||
1291 | .lock = crm_kfmlp_lock, | ||
1292 | .unlock = crm_kfmlp_unlock, | ||
1293 | .deallocate = crm_kfmlp_free, | ||
1294 | }; | ||
1295 | |||
1296 | static struct litmus_lock* crm_new_kfmlp(void* __user arg, int* ret_code) | ||
1297 | { | ||
1298 | struct kfmlp_semaphore* sem; | ||
1299 | int num_resources = 0; | ||
1300 | int i; | ||
1301 | |||
1302 | if(!access_ok(VERIFY_READ, arg, sizeof(num_resources))) | ||
1303 | { | ||
1304 | *ret_code = -EINVAL; | ||
1305 | return(NULL); | ||
1306 | } | ||
1307 | if(__copy_from_user(&num_resources, arg, sizeof(num_resources))) | ||
1308 | { | ||
1309 | *ret_code = -EINVAL; | ||
1310 | return(NULL); | ||
1311 | } | ||
1312 | if(num_resources < 1) | ||
1313 | { | ||
1314 | *ret_code = -EINVAL; | ||
1315 | return(NULL); | ||
1316 | } | ||
1317 | |||
1318 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1319 | if(!sem) | ||
1320 | { | ||
1321 | *ret_code = -ENOMEM; | ||
1322 | return NULL; | ||
1323 | } | ||
1324 | |||
1325 | sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); | ||
1326 | if(!sem->queues) | ||
1327 | { | ||
1328 | kfree(sem); | ||
1329 | *ret_code = -ENOMEM; | ||
1330 | return NULL; | ||
1331 | } | ||
1332 | |||
1333 | sem->litmus_lock.ops = &crm_kfmlp_lock_ops; | ||
1334 | spin_lock_init(&sem->lock); | ||
1335 | sem->num_resources = num_resources; | ||
1336 | |||
1337 | for(i = 0; i < num_resources; ++i) | ||
1338 | { | ||
1339 | sem->queues[i].owner = NULL; | ||
1340 | sem->queues[i].hp_waiter = NULL; | ||
1341 | init_waitqueue_head(&sem->queues[i].wait); | ||
1342 | sem->queues[i].count = 0; | ||
1343 | } | ||
1344 | |||
1345 | sem->shortest_queue = &sem->queues[0]; | ||
1346 | |||
1347 | *ret_code = 0; | ||
1348 | return &sem->litmus_lock; | ||
1349 | } | ||
1350 | |||
1351 | |||
1352 | /* **** lock constructor **** */ | ||
1353 | |||
1354 | static long crm_allocate_lock(struct litmus_lock **lock, int type, | ||
1355 | void* __user arg) | ||
1356 | { | ||
1357 | int err = -ENXIO; | ||
1358 | |||
1359 | /* C-RM currently only supports the FMLP for global resources | ||
1360 | WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */ | ||
1361 | switch (type) { | ||
1362 | case KFMLP_SEM: | ||
1363 | *lock = crm_new_kfmlp(arg, &err); | ||
1364 | break; | ||
1365 | }; | ||
1366 | |||
1367 | return err; | ||
1368 | } | ||
1369 | |||
1370 | #endif // CONFIG_LITMUS_LOCKING | ||
1371 | |||
1372 | |||
1373 | |||
1374 | |||
1375 | |||
1376 | |||
1377 | /* total number of cluster */ | ||
1378 | static int num_clusters; | ||
1379 | /* we do not support cluster of different sizes */ | ||
1380 | static unsigned int cluster_size; | ||
1381 | |||
1382 | #ifdef VERBOSE_INIT | ||
1383 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | ||
1384 | { | ||
1385 | int chk; | ||
1386 | char buf[255]; | ||
1387 | |||
1388 | chk = cpulist_scnprintf(buf, 254, mask); | ||
1389 | buf[chk] = '\0'; | ||
1390 | printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); | ||
1391 | |||
1392 | } | ||
1393 | #endif | ||
1394 | |||
1395 | static int clusters_allocated = 0; | ||
1396 | |||
1397 | static void cleanup_crm(void) | ||
1398 | { | ||
1399 | int i; | ||
1400 | |||
1401 | if (clusters_allocated) { | ||
1402 | for (i = 0; i < num_clusters; i++) { | ||
1403 | kfree(crm[i].cpus); | ||
1404 | kfree(crm[i].heap_node); | ||
1405 | free_cpumask_var(crm[i].cpu_map); | ||
1406 | } | ||
1407 | |||
1408 | kfree(crm); | ||
1409 | } | ||
1410 | } | ||
1411 | |||
1412 | static long crm_activate_plugin(void) | ||
1413 | { | ||
1414 | int i, j, cpu, ccpu, cpu_count; | ||
1415 | cpu_entry_t *entry; | ||
1416 | |||
1417 | cpumask_var_t mask; | ||
1418 | int chk = 0; | ||
1419 | |||
1420 | /* de-allocate old clusters, if any */ | ||
1421 | cleanup_crm(); | ||
1422 | |||
1423 | printk(KERN_INFO "C-RM: Activate Plugin, cluster configuration = %d\n", | ||
1424 | cluster_config); | ||
1425 | |||
1426 | /* need to get cluster_size first */ | ||
1427 | if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
1428 | return -ENOMEM; | ||
1429 | |||
1430 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) { | ||
1431 | cluster_size = num_online_cpus(); | ||
1432 | } else { | ||
1433 | chk = get_shared_cpu_map(mask, 0, cluster_config); | ||
1434 | if (chk) { | ||
1435 | /* if chk != 0 then it is the max allowed index */ | ||
1436 | printk(KERN_INFO "C-RM: Cluster configuration = %d " | ||
1437 | "is not supported on this hardware.\n", | ||
1438 | cluster_config); | ||
1439 | /* User should notice that the configuration failed, so | ||
1440 | * let's bail out. */ | ||
1441 | return -EINVAL; | ||
1442 | } | ||
1443 | |||
1444 | cluster_size = cpumask_weight(mask); | ||
1445 | } | ||
1446 | |||
1447 | if ((num_online_cpus() % cluster_size) != 0) { | ||
1448 | /* this can't be right, some cpus are left out */ | ||
1449 | printk(KERN_ERR "C-RM: Trying to group %d cpus in %d!\n", | ||
1450 | num_online_cpus(), cluster_size); | ||
1451 | return -1; | ||
1452 | } | ||
1453 | |||
1454 | num_clusters = num_online_cpus() / cluster_size; | ||
1455 | printk(KERN_INFO "C-RM: %d cluster(s) of size = %d\n", | ||
1456 | num_clusters, cluster_size); | ||
1457 | |||
1458 | /* initialize clusters */ | ||
1459 | crm = kmalloc(num_clusters * sizeof(crm_domain_t), GFP_ATOMIC); | ||
1460 | for (i = 0; i < num_clusters; i++) { | ||
1461 | |||
1462 | crm[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), | ||
1463 | GFP_ATOMIC); | ||
1464 | crm[i].heap_node = kmalloc( | ||
1465 | cluster_size * sizeof(struct bheap_node), | ||
1466 | GFP_ATOMIC); | ||
1467 | bheap_init(&(crm[i].cpu_heap)); | ||
1468 | rm_domain_init(&(crm[i].domain), NULL, crm_release_jobs); | ||
1469 | |||
1470 | if(!zalloc_cpumask_var(&crm[i].cpu_map, GFP_ATOMIC)) | ||
1471 | return -ENOMEM; | ||
1472 | } | ||
1473 | |||
1474 | /* cycle through cluster and add cpus to them */ | ||
1475 | for (i = 0; i < num_clusters; i++) { | ||
1476 | |||
1477 | for_each_online_cpu(cpu) { | ||
1478 | /* check if the cpu is already in a cluster */ | ||
1479 | for (j = 0; j < num_clusters; j++) | ||
1480 | if (cpumask_test_cpu(cpu, crm[j].cpu_map)) | ||
1481 | break; | ||
1482 | /* if it is in a cluster go to next cpu */ | ||
1483 | if (j < num_clusters && | ||
1484 | cpumask_test_cpu(cpu, crm[j].cpu_map)) | ||
1485 | continue; | ||
1486 | |||
1487 | /* this cpu isn't in any cluster */ | ||
1488 | /* get the shared cpus */ | ||
1489 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) | ||
1490 | cpumask_copy(mask, cpu_online_mask); | ||
1491 | else | ||
1492 | get_shared_cpu_map(mask, cpu, cluster_config); | ||
1493 | |||
1494 | cpumask_copy(crm[i].cpu_map, mask); | ||
1495 | #ifdef VERBOSE_INIT | ||
1496 | print_cluster_topology(mask, cpu); | ||
1497 | #endif | ||
1498 | /* add cpus to current cluster and init cpu_entry_t */ | ||
1499 | cpu_count = 0; | ||
1500 | for_each_cpu(ccpu, crm[i].cpu_map) { | ||
1501 | |||
1502 | entry = &per_cpu(crm_cpu_entries, ccpu); | ||
1503 | crm[i].cpus[cpu_count] = entry; | ||
1504 | atomic_set(&entry->will_schedule, 0); | ||
1505 | entry->cpu = ccpu; | ||
1506 | entry->cluster = &crm[i]; | ||
1507 | entry->hn = &(crm[i].heap_node[cpu_count]); | ||
1508 | bheap_node_init(&entry->hn, entry); | ||
1509 | |||
1510 | cpu_count++; | ||
1511 | |||
1512 | entry->linked = NULL; | ||
1513 | entry->scheduled = NULL; | ||
1514 | update_cpu_position(entry); | ||
1515 | } | ||
1516 | /* done with this cluster */ | ||
1517 | break; | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
1522 | { | ||
1523 | /* distribute the daemons evenly across the clusters. */ | ||
1524 | int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC); | ||
1525 | int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters; | ||
1526 | int left_over = NR_LITMUS_SOFTIRQD % num_clusters; | ||
1527 | |||
1528 | int daemon = 0; | ||
1529 | for(i = 0; i < num_clusters; ++i) | ||
1530 | { | ||
1531 | int num_on_this_cluster = num_daemons_per_cluster; | ||
1532 | if(left_over) | ||
1533 | { | ||
1534 | ++num_on_this_cluster; | ||
1535 | --left_over; | ||
1536 | } | ||
1537 | |||
1538 | for(j = 0; j < num_on_this_cluster; ++j) | ||
1539 | { | ||
1540 | // first CPU of this cluster | ||
1541 | affinity[daemon++] = i*cluster_size; | ||
1542 | } | ||
1543 | } | ||
1544 | |||
1545 | spawn_klitirqd(affinity); | ||
1546 | |||
1547 | kfree(affinity); | ||
1548 | } | ||
1549 | #endif | ||
1550 | |||
1551 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1552 | init_nvidia_info(); | ||
1553 | #endif | ||
1554 | |||
1555 | free_cpumask_var(mask); | ||
1556 | clusters_allocated = 1; | ||
1557 | return 0; | ||
1558 | } | ||
1559 | |||
1560 | /* Plugin object */ | ||
1561 | static struct sched_plugin crm_plugin __cacheline_aligned_in_smp = { | ||
1562 | .plugin_name = "C-RM", | ||
1563 | .finish_switch = crm_finish_switch, | ||
1564 | .tick = crm_tick, | ||
1565 | .task_new = crm_task_new, | ||
1566 | .complete_job = complete_job, | ||
1567 | .task_exit = crm_task_exit, | ||
1568 | .schedule = crm_schedule, | ||
1569 | .task_wake_up = crm_task_wake_up, | ||
1570 | .task_block = crm_task_block, | ||
1571 | .admit_task = crm_admit_task, | ||
1572 | .activate_plugin = crm_activate_plugin, | ||
1573 | #ifdef CONFIG_LITMUS_LOCKING | ||
1574 | .allocate_lock = crm_allocate_lock, | ||
1575 | .set_prio_inh = set_priority_inheritance, | ||
1576 | .clear_prio_inh = clear_priority_inheritance, | ||
1577 | #endif | ||
1578 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
1579 | .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, | ||
1580 | .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, | ||
1581 | #endif | ||
1582 | }; | ||
1583 | |||
1584 | static struct proc_dir_entry *cluster_file = NULL, *crm_dir = NULL; | ||
1585 | |||
1586 | static int __init init_crm(void) | ||
1587 | { | ||
1588 | int err, fs; | ||
1589 | |||
1590 | err = register_sched_plugin(&crm_plugin); | ||
1591 | if (!err) { | ||
1592 | fs = make_plugin_proc_dir(&crm_plugin, &crm_dir); | ||
1593 | if (!fs) | ||
1594 | cluster_file = create_cluster_file(crm_dir, &cluster_config); | ||
1595 | else | ||
1596 | printk(KERN_ERR "Could not allocate C-RM procfs dir.\n"); | ||
1597 | } | ||
1598 | return err; | ||
1599 | } | ||
1600 | |||
1601 | static void clean_crm(void) | ||
1602 | { | ||
1603 | cleanup_crm(); | ||
1604 | if (cluster_file) | ||
1605 | remove_proc_entry("cluster", crm_dir); | ||
1606 | if (crm_dir) | ||
1607 | remove_plugin_proc_dir(&crm_plugin); | ||
1608 | } | ||
1609 | |||
1610 | module_init(init_crm); | ||
1611 | module_exit(clean_crm); | ||
diff --git a/litmus/sched_crm_srt.c b/litmus/sched_crm_srt.c new file mode 100644 index 000000000000..4473f35e64cd --- /dev/null +++ b/litmus/sched_crm_srt.c | |||
@@ -0,0 +1,1611 @@ | |||
1 | /* | ||
2 | * litmus/sched_crm_srt.c | ||
3 | * | ||
4 | * Implementation of the C-RM-SRT scheduling algorithm. | ||
5 | * | ||
6 | * This implementation is based on G-EDF: | ||
7 | * - CPUs are clustered around L2 or L3 caches. | ||
8 | * - Clusters topology is automatically detected (this is arch dependent | ||
9 | * and is working only on x86 at the moment --- and only with modern | ||
10 | * cpus that exports cpuid4 information) | ||
11 | * - The plugins _does not_ attempt to put tasks in the right cluster i.e. | ||
12 | * the programmer needs to be aware of the topology to place tasks | ||
13 | * in the desired cluster | ||
14 | * - default clustering is around L2 cache (cache index = 2) | ||
15 | * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all | ||
16 | * online_cpus are placed in a single cluster). | ||
17 | * | ||
18 | * For details on functions, take a look at sched_gsn_edf.c | ||
19 | * | ||
20 | * Currently, we do not support changes in the number of online cpus. | ||
21 | * If the num_online_cpus() dynamically changes, the plugin is broken. | ||
22 | * | ||
23 | * This version uses the simple approach and serializes all scheduling | ||
24 | * decisions by the use of a queue lock. This is probably not the | ||
25 | * best way to do it, but it should suffice for now. | ||
26 | */ | ||
27 | |||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/percpu.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/slab.h> | ||
32 | #include <linux/uaccess.h> | ||
33 | |||
34 | #include <linux/module.h> | ||
35 | |||
36 | #include <litmus/litmus.h> | ||
37 | #include <litmus/jobs.h> | ||
38 | #include <litmus/preempt.h> | ||
39 | #include <litmus/sched_plugin.h> | ||
40 | #include <litmus/rm_srt_common.h> | ||
41 | #include <litmus/sched_trace.h> | ||
42 | |||
43 | #include <litmus/clustered.h> | ||
44 | |||
45 | #include <litmus/bheap.h> | ||
46 | |||
47 | /* to configure the cluster size */ | ||
48 | #include <litmus/litmus_proc.h> | ||
49 | |||
50 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
51 | #include <litmus/affinity.h> | ||
52 | #endif | ||
53 | |||
54 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
55 | #include <litmus/litmus_softirq.h> | ||
56 | #endif | ||
57 | |||
58 | #ifdef CONFIG_LITMUS_NVIDIA | ||
59 | #include <litmus/nvidia_info.h> | ||
60 | #endif | ||
61 | |||
62 | /* Reference configuration variable. Determines which cache level is used to | ||
63 | * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that | ||
64 | * all CPUs form a single cluster (just like GSN-EDF). | ||
65 | */ | ||
66 | static enum cache_level cluster_config = GLOBAL_CLUSTER; | ||
67 | |||
68 | struct clusterdomain; | ||
69 | |||
70 | /* cpu_entry_t - maintain the linked and scheduled state | ||
71 | * | ||
72 | * A cpu also contains a pointer to the crm_srt_domain_t cluster | ||
73 | * that owns it (struct clusterdomain*) | ||
74 | */ | ||
75 | typedef struct { | ||
76 | int cpu; | ||
77 | struct clusterdomain* cluster; /* owning cluster */ | ||
78 | struct task_struct* linked; /* only RT tasks */ | ||
79 | struct task_struct* scheduled; /* only RT tasks */ | ||
80 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
81 | struct bheap_node* hn; | ||
82 | } cpu_entry_t; | ||
83 | |||
84 | /* one cpu_entry_t per CPU */ | ||
85 | DEFINE_PER_CPU(cpu_entry_t, crm_srt_cpu_entries); | ||
86 | |||
87 | #define set_will_schedule() \ | ||
88 | (atomic_set(&__get_cpu_var(crm_srt_cpu_entries).will_schedule, 1)) | ||
89 | #define clear_will_schedule() \ | ||
90 | (atomic_set(&__get_cpu_var(crm_srt_cpu_entries).will_schedule, 0)) | ||
91 | #define test_will_schedule(cpu) \ | ||
92 | (atomic_read(&per_cpu(crm_srt_cpu_entries, cpu).will_schedule)) | ||
93 | |||
94 | /* | ||
95 | * In C-RM-SRT there is a crm_srt domain _per_ cluster | ||
96 | * The number of clusters is dynamically determined accordingly to the | ||
97 | * total cpu number and the cluster size | ||
98 | */ | ||
99 | typedef struct clusterdomain { | ||
100 | /* rt_domain for this cluster */ | ||
101 | rt_domain_t domain; | ||
102 | /* cpus in this cluster */ | ||
103 | cpu_entry_t* *cpus; | ||
104 | /* map of this cluster cpus */ | ||
105 | cpumask_var_t cpu_map; | ||
106 | /* the cpus queue themselves according to priority in here */ | ||
107 | struct bheap_node *heap_node; | ||
108 | struct bheap cpu_heap; | ||
109 | /* lock for this cluster */ | ||
110 | #define crm_srt_lock domain.ready_lock | ||
111 | } crm_srt_domain_t; | ||
112 | |||
113 | /* a crm_srt_domain per cluster; allocation is done at init/activation time */ | ||
114 | crm_srt_domain_t *crm_srt; | ||
115 | |||
116 | #define remote_cluster(cpu) ((crm_srt_domain_t *) per_cpu(crm_srt_cpu_entries, cpu).cluster) | ||
117 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | ||
118 | |||
119 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling | ||
120 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose | ||
121 | * information during the initialization of the plugin (e.g., topology) | ||
122 | #define WANT_ALL_SCHED_EVENTS | ||
123 | */ | ||
124 | #define VERBOSE_INIT | ||
125 | |||
126 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
127 | { | ||
128 | cpu_entry_t *a, *b; | ||
129 | a = _a->value; | ||
130 | b = _b->value; | ||
131 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
132 | * the top of the heap. | ||
133 | */ | ||
134 | return rm_srt_higher_prio(b->linked, a->linked); | ||
135 | } | ||
136 | |||
137 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
138 | * order in the cpu queue. Caller must hold crm_srt lock. | ||
139 | */ | ||
140 | static void update_cpu_position(cpu_entry_t *entry) | ||
141 | { | ||
142 | crm_srt_domain_t *cluster = entry->cluster; | ||
143 | |||
144 | if (likely(bheap_node_in_heap(entry->hn))) | ||
145 | bheap_delete(cpu_lower_prio, | ||
146 | &cluster->cpu_heap, | ||
147 | entry->hn); | ||
148 | |||
149 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); | ||
150 | } | ||
151 | |||
152 | /* caller must hold crm_srt lock */ | ||
153 | static cpu_entry_t* lowest_prio_cpu(crm_srt_domain_t *cluster) | ||
154 | { | ||
155 | struct bheap_node* hn; | ||
156 | hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); | ||
157 | return hn->value; | ||
158 | } | ||
159 | |||
160 | |||
161 | /* link_task_to_cpu - Update the link of a CPU. | ||
162 | * Handles the case where the to-be-linked task is already | ||
163 | * scheduled on a different CPU. | ||
164 | */ | ||
165 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
166 | cpu_entry_t *entry) | ||
167 | { | ||
168 | cpu_entry_t *sched; | ||
169 | struct task_struct* tmp; | ||
170 | int on_cpu; | ||
171 | |||
172 | BUG_ON(linked && !is_realtime(linked)); | ||
173 | |||
174 | /* Currently linked task is set to be unlinked. */ | ||
175 | if (entry->linked) { | ||
176 | entry->linked->rt_param.linked_on = NO_CPU; | ||
177 | } | ||
178 | |||
179 | /* Link new task to CPU. */ | ||
180 | if (linked) { | ||
181 | set_rt_flags(linked, RT_F_RUNNING); | ||
182 | /* handle task is already scheduled somewhere! */ | ||
183 | on_cpu = linked->rt_param.scheduled_on; | ||
184 | if (on_cpu != NO_CPU) { | ||
185 | sched = &per_cpu(crm_srt_cpu_entries, on_cpu); | ||
186 | /* this should only happen if not linked already */ | ||
187 | BUG_ON(sched->linked == linked); | ||
188 | |||
189 | /* If we are already scheduled on the CPU to which we | ||
190 | * wanted to link, we don't need to do the swap -- | ||
191 | * we just link ourselves to the CPU and depend on | ||
192 | * the caller to get things right. | ||
193 | */ | ||
194 | if (entry != sched) { | ||
195 | TRACE_TASK(linked, | ||
196 | "already scheduled on %d, updating link.\n", | ||
197 | sched->cpu); | ||
198 | tmp = sched->linked; | ||
199 | linked->rt_param.linked_on = sched->cpu; | ||
200 | sched->linked = linked; | ||
201 | update_cpu_position(sched); | ||
202 | linked = tmp; | ||
203 | } | ||
204 | } | ||
205 | if (linked) /* might be NULL due to swap */ | ||
206 | linked->rt_param.linked_on = entry->cpu; | ||
207 | } | ||
208 | entry->linked = linked; | ||
209 | #ifdef WANT_ALL_SCHED_EVENTS | ||
210 | if (linked) | ||
211 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
212 | else | ||
213 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
214 | #endif | ||
215 | update_cpu_position(entry); | ||
216 | } | ||
217 | |||
218 | /* unlink - Make sure a task is not linked any longer to an entry | ||
219 | * where it was linked before. Must hold crm_srt_lock. | ||
220 | */ | ||
221 | static noinline void unlink(struct task_struct* t) | ||
222 | { | ||
223 | cpu_entry_t *entry; | ||
224 | |||
225 | if (t->rt_param.linked_on != NO_CPU) { | ||
226 | /* unlink */ | ||
227 | entry = &per_cpu(crm_srt_cpu_entries, t->rt_param.linked_on); | ||
228 | t->rt_param.linked_on = NO_CPU; | ||
229 | link_task_to_cpu(NULL, entry); | ||
230 | } else if (is_queued(t)) { | ||
231 | /* This is an interesting situation: t is scheduled, | ||
232 | * but was just recently unlinked. It cannot be | ||
233 | * linked anywhere else (because then it would have | ||
234 | * been relinked to this CPU), thus it must be in some | ||
235 | * queue. We must remove it from the list in this | ||
236 | * case. | ||
237 | * | ||
238 | * in C-RM-SRT case is should be somewhere in the queue for | ||
239 | * its domain, therefore and we can get the domain using | ||
240 | * task_cpu_cluster | ||
241 | */ | ||
242 | remove(&(task_cpu_cluster(t))->domain, t); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | |||
247 | /* preempt - force a CPU to reschedule | ||
248 | */ | ||
249 | static void preempt(cpu_entry_t *entry) | ||
250 | { | ||
251 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
252 | } | ||
253 | |||
254 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
255 | * Caller must hold crm_srt_lock. | ||
256 | */ | ||
257 | static noinline void requeue(struct task_struct* task) | ||
258 | { | ||
259 | crm_srt_domain_t *cluster = task_cpu_cluster(task); | ||
260 | BUG_ON(!task); | ||
261 | /* sanity check before insertion */ | ||
262 | BUG_ON(is_queued(task)); | ||
263 | |||
264 | if (is_released(task, litmus_clock())) | ||
265 | __add_ready(&cluster->domain, task); | ||
266 | else { | ||
267 | /* it has got to wait */ | ||
268 | add_release(&cluster->domain, task); | ||
269 | } | ||
270 | } | ||
271 | |||
272 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
273 | static cpu_entry_t* crm_srt_get_nearest_available_cpu( | ||
274 | crm_srt_domain_t *cluster, cpu_entry_t* start) | ||
275 | { | ||
276 | cpu_entry_t* affinity; | ||
277 | |||
278 | get_nearest_available_cpu(affinity, start, crm_srt_cpu_entries, -1); | ||
279 | |||
280 | /* make sure CPU is in our cluster */ | ||
281 | if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) | ||
282 | return(affinity); | ||
283 | else | ||
284 | return(NULL); | ||
285 | } | ||
286 | #endif | ||
287 | |||
288 | |||
289 | /* check for any necessary preemptions */ | ||
290 | static void check_for_preemptions(crm_srt_domain_t *cluster) | ||
291 | { | ||
292 | struct task_struct *task; | ||
293 | cpu_entry_t *last; | ||
294 | |||
295 | for(last = lowest_prio_cpu(cluster); | ||
296 | rm_srt_preemption_needed(&cluster->domain, last->linked); | ||
297 | last = lowest_prio_cpu(cluster)) { | ||
298 | /* preemption necessary */ | ||
299 | task = __take_ready(&cluster->domain); | ||
300 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
301 | { | ||
302 | cpu_entry_t* affinity = | ||
303 | crm_srt_get_nearest_available_cpu(cluster, | ||
304 | &per_cpu(crm_srt_cpu_entries, task_cpu(task))); | ||
305 | if(affinity) | ||
306 | last = affinity; | ||
307 | else if(last->linked) | ||
308 | requeue(last->linked); | ||
309 | } | ||
310 | #else | ||
311 | if (last->linked) | ||
312 | requeue(last->linked); | ||
313 | #endif | ||
314 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
315 | task->pid, last->cpu); | ||
316 | link_task_to_cpu(task, last); | ||
317 | preempt(last); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | /* crm_srt_job_arrival: task is either resumed or released */ | ||
322 | static noinline void crm_srt_job_arrival(struct task_struct* task) | ||
323 | { | ||
324 | crm_srt_domain_t *cluster = task_cpu_cluster(task); | ||
325 | BUG_ON(!task); | ||
326 | |||
327 | requeue(task); | ||
328 | check_for_preemptions(cluster); | ||
329 | } | ||
330 | |||
331 | static void crm_srt_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
332 | { | ||
333 | crm_srt_domain_t* cluster = container_of(rt, crm_srt_domain_t, domain); | ||
334 | unsigned long flags; | ||
335 | |||
336 | raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); | ||
337 | |||
338 | __merge_ready(&cluster->domain, tasks); | ||
339 | check_for_preemptions(cluster); | ||
340 | |||
341 | raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); | ||
342 | } | ||
343 | |||
344 | /* caller holds crm_srt_lock */ | ||
345 | static noinline void job_completion(struct task_struct *t, int forced) | ||
346 | { | ||
347 | BUG_ON(!t); | ||
348 | |||
349 | sched_trace_task_completion(t, forced); | ||
350 | |||
351 | #ifdef CONFIG_LITMUS_NVIDIA | ||
352 | atomic_set(&tsk_rt(t)->nv_int_count, 0); | ||
353 | #endif | ||
354 | |||
355 | TRACE_TASK(t, "job_completion().\n"); | ||
356 | |||
357 | /* set flags */ | ||
358 | set_rt_flags(t, RT_F_SLEEP); | ||
359 | /* prepare for next period */ | ||
360 | prepare_for_next_period(t); | ||
361 | if (is_released(t, litmus_clock())) | ||
362 | sched_trace_task_release(t); | ||
363 | /* unlink */ | ||
364 | unlink(t); | ||
365 | /* requeue | ||
366 | * But don't requeue a blocking task. */ | ||
367 | if (is_running(t)) | ||
368 | crm_srt_job_arrival(t); | ||
369 | } | ||
370 | |||
371 | /* crm_srt_tick - this function is called for every local timer | ||
372 | * interrupt. | ||
373 | * | ||
374 | * checks whether the current task has expired and checks | ||
375 | * whether we need to preempt it if it has not expired | ||
376 | */ | ||
377 | static void crm_srt_tick(struct task_struct* t) | ||
378 | { | ||
379 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
380 | if (!is_np(t)) { | ||
381 | /* np tasks will be preempted when they become | ||
382 | * preemptable again | ||
383 | */ | ||
384 | litmus_reschedule_local(); | ||
385 | set_will_schedule(); | ||
386 | TRACE("crm_srt_scheduler_tick: " | ||
387 | "%d is preemptable " | ||
388 | " => FORCE_RESCHED\n", t->pid); | ||
389 | } else if (is_user_np(t)) { | ||
390 | TRACE("crm_srt_scheduler_tick: " | ||
391 | "%d is non-preemptable, " | ||
392 | "preemption delayed.\n", t->pid); | ||
393 | request_exit_np(t); | ||
394 | } | ||
395 | } | ||
396 | } | ||
397 | |||
398 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
399 | * assumptions on the state of the current task since it may be called for a | ||
400 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
401 | * was necessary, because sys_exit_np() was called, because some Linux | ||
402 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
403 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
404 | * current state is. | ||
405 | * | ||
406 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
407 | * | ||
408 | * The following assertions for the scheduled task could hold: | ||
409 | * | ||
410 | * - !is_running(scheduled) // the job blocks | ||
411 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
412 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
413 | * - linked != scheduled // we need to reschedule (for any reason) | ||
414 | * - is_np(scheduled) // rescheduling must be delayed, | ||
415 | * sys_exit_np must be requested | ||
416 | * | ||
417 | * Any of these can occur together. | ||
418 | */ | ||
419 | static struct task_struct* crm_srt_schedule(struct task_struct * prev) | ||
420 | { | ||
421 | cpu_entry_t* entry = &__get_cpu_var(crm_srt_cpu_entries); | ||
422 | crm_srt_domain_t *cluster = entry->cluster; | ||
423 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
424 | struct task_struct* next = NULL; | ||
425 | |||
426 | raw_spin_lock(&cluster->crm_srt_lock); | ||
427 | clear_will_schedule(); | ||
428 | |||
429 | /* sanity checking */ | ||
430 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
431 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
432 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
433 | |||
434 | /* (0) Determine state */ | ||
435 | exists = entry->scheduled != NULL; | ||
436 | blocks = exists && !is_running(entry->scheduled); | ||
437 | out_of_time = exists && | ||
438 | budget_enforced(entry->scheduled) && | ||
439 | budget_exhausted(entry->scheduled); | ||
440 | np = exists && is_np(entry->scheduled); | ||
441 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
442 | preempt = entry->scheduled != entry->linked; | ||
443 | |||
444 | #ifdef WANT_ALL_SCHED_EVENTS | ||
445 | TRACE_TASK(prev, "invoked crm_srt_schedule.\n"); | ||
446 | #endif | ||
447 | |||
448 | if (exists) | ||
449 | TRACE_TASK(prev, | ||
450 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
451 | "state:%d sig:%d\n", | ||
452 | blocks, out_of_time, np, sleep, preempt, | ||
453 | prev->state, signal_pending(prev)); | ||
454 | if (entry->linked && preempt) | ||
455 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
456 | entry->linked->comm, entry->linked->pid); | ||
457 | |||
458 | |||
459 | /* If a task blocks we have no choice but to reschedule. | ||
460 | */ | ||
461 | if (blocks) | ||
462 | unlink(entry->scheduled); | ||
463 | |||
464 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
465 | * We need to make sure to update the link structure anyway in case | ||
466 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
467 | * hurt. | ||
468 | */ | ||
469 | if (np && (out_of_time || preempt || sleep)) { | ||
470 | unlink(entry->scheduled); | ||
471 | request_exit_np(entry->scheduled); | ||
472 | } | ||
473 | |||
474 | /* Any task that is preemptable and either exhausts its execution | ||
475 | * budget or wants to sleep completes. We may have to reschedule after | ||
476 | * this. Don't do a job completion if we block (can't have timers running | ||
477 | * for blocked jobs). Preemption go first for the same reason. | ||
478 | */ | ||
479 | if (!np && (out_of_time || sleep) && !blocks && !preempt) | ||
480 | job_completion(entry->scheduled, !sleep); | ||
481 | |||
482 | /* Link pending task if we became unlinked. | ||
483 | */ | ||
484 | if (!entry->linked) | ||
485 | link_task_to_cpu(__take_ready(&cluster->domain), entry); | ||
486 | |||
487 | /* The final scheduling decision. Do we need to switch for some reason? | ||
488 | * If linked is different from scheduled, then select linked as next. | ||
489 | */ | ||
490 | if ((!np || blocks) && | ||
491 | entry->linked != entry->scheduled) { | ||
492 | /* Schedule a linked job? */ | ||
493 | if (entry->linked) { | ||
494 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
495 | next = entry->linked; | ||
496 | } | ||
497 | if (entry->scheduled) { | ||
498 | /* not gonna be scheduled soon */ | ||
499 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
500 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
501 | } | ||
502 | } else | ||
503 | /* Only override Linux scheduler if we have a real-time task | ||
504 | * scheduled that needs to continue. | ||
505 | */ | ||
506 | if (exists) | ||
507 | next = prev; | ||
508 | |||
509 | sched_state_task_picked(); | ||
510 | raw_spin_unlock(&cluster->crm_srt_lock); | ||
511 | |||
512 | #ifdef WANT_ALL_SCHED_EVENTS | ||
513 | TRACE("crm_srt_lock released, next=0x%p\n", next); | ||
514 | |||
515 | if (next) | ||
516 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
517 | else if (exists && !next) | ||
518 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
519 | #endif | ||
520 | |||
521 | |||
522 | return next; | ||
523 | } | ||
524 | |||
525 | |||
526 | /* _finish_switch - we just finished the switch away from prev | ||
527 | */ | ||
528 | static void crm_srt_finish_switch(struct task_struct *prev) | ||
529 | { | ||
530 | cpu_entry_t* entry = &__get_cpu_var(crm_srt_cpu_entries); | ||
531 | |||
532 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
533 | #ifdef WANT_ALL_SCHED_EVENTS | ||
534 | TRACE_TASK(prev, "switched away from\n"); | ||
535 | #endif | ||
536 | } | ||
537 | |||
538 | |||
539 | /* Prepare a task for running in RT mode | ||
540 | */ | ||
541 | static void crm_srt_task_new(struct task_struct * t, int on_rq, int running) | ||
542 | { | ||
543 | unsigned long flags; | ||
544 | cpu_entry_t* entry; | ||
545 | crm_srt_domain_t* cluster; | ||
546 | |||
547 | TRACE("gsn edf: task new %d\n", t->pid); | ||
548 | |||
549 | /* the cluster doesn't change even if t is running */ | ||
550 | cluster = task_cpu_cluster(t); | ||
551 | |||
552 | raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); | ||
553 | |||
554 | /* setup job params */ | ||
555 | release_at(t, litmus_clock()); | ||
556 | |||
557 | if (running) { | ||
558 | entry = &per_cpu(crm_srt_cpu_entries, task_cpu(t)); | ||
559 | BUG_ON(entry->scheduled); | ||
560 | |||
561 | entry->scheduled = t; | ||
562 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
563 | } else { | ||
564 | t->rt_param.scheduled_on = NO_CPU; | ||
565 | } | ||
566 | t->rt_param.linked_on = NO_CPU; | ||
567 | |||
568 | crm_srt_job_arrival(t); | ||
569 | raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); | ||
570 | } | ||
571 | |||
572 | static void crm_srt_task_wake_up(struct task_struct *task) | ||
573 | { | ||
574 | unsigned long flags; | ||
575 | //lt_t now; | ||
576 | crm_srt_domain_t *cluster; | ||
577 | |||
578 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
579 | |||
580 | cluster = task_cpu_cluster(task); | ||
581 | |||
582 | raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); | ||
583 | |||
584 | #if 0 // sporadic task model | ||
585 | /* We need to take suspensions because of semaphores into | ||
586 | * account! If a job resumes after being suspended due to acquiring | ||
587 | * a semaphore, it should never be treated as a new job release. | ||
588 | */ | ||
589 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
590 | set_rt_flags(task, RT_F_RUNNING); | ||
591 | } else { | ||
592 | now = litmus_clock(); | ||
593 | if (is_tardy(task, now)) { | ||
594 | /* new sporadic release */ | ||
595 | release_at(task, now); | ||
596 | sched_trace_task_release(task); | ||
597 | } | ||
598 | else { | ||
599 | if (task->rt.time_slice) { | ||
600 | /* came back in time before deadline | ||
601 | */ | ||
602 | set_rt_flags(task, RT_F_RUNNING); | ||
603 | } | ||
604 | } | ||
605 | } | ||
606 | #endif | ||
607 | |||
608 | //BUG_ON(tsk_rt(task)->linked_on != NO_CPU); | ||
609 | set_rt_flags(task, RT_F_RUNNING); // periodic model | ||
610 | |||
611 | if(tsk_rt(task)->linked_on == NO_CPU) | ||
612 | crm_srt_job_arrival(task); | ||
613 | else | ||
614 | TRACE("WTF, mate?!\n"); | ||
615 | |||
616 | raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); | ||
617 | } | ||
618 | |||
619 | static void crm_srt_task_block(struct task_struct *t) | ||
620 | { | ||
621 | unsigned long flags; | ||
622 | crm_srt_domain_t *cluster; | ||
623 | |||
624 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
625 | |||
626 | cluster = task_cpu_cluster(t); | ||
627 | |||
628 | /* unlink if necessary */ | ||
629 | raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); | ||
630 | unlink(t); | ||
631 | raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); | ||
632 | |||
633 | BUG_ON(!is_realtime(t)); | ||
634 | } | ||
635 | |||
636 | |||
637 | static void crm_srt_task_exit(struct task_struct * t) | ||
638 | { | ||
639 | unsigned long flags; | ||
640 | crm_srt_domain_t *cluster = task_cpu_cluster(t); | ||
641 | |||
642 | /* unlink if necessary */ | ||
643 | raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags); | ||
644 | unlink(t); | ||
645 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
646 | cpu_entry_t *cpu; | ||
647 | cpu = &per_cpu(crm_srt_cpu_entries, tsk_rt(t)->scheduled_on); | ||
648 | cpu->scheduled = NULL; | ||
649 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
650 | } | ||
651 | raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags); | ||
652 | |||
653 | BUG_ON(!is_realtime(t)); | ||
654 | TRACE_TASK(t, "RIP\n"); | ||
655 | } | ||
656 | |||
657 | static long crm_srt_admit_task(struct task_struct* tsk) | ||
658 | { | ||
659 | return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; | ||
660 | } | ||
661 | |||
662 | |||
663 | |||
664 | |||
665 | |||
666 | |||
667 | |||
668 | |||
669 | |||
670 | |||
671 | |||
672 | |||
673 | |||
674 | #ifdef CONFIG_LITMUS_LOCKING | ||
675 | |||
676 | #include <litmus/fdso.h> | ||
677 | |||
678 | |||
679 | static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
680 | { | ||
681 | int linked_on; | ||
682 | int check_preempt = 0; | ||
683 | |||
684 | crm_srt_domain_t* cluster = task_cpu_cluster(t); | ||
685 | |||
686 | if(prio_inh != NULL) | ||
687 | TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); | ||
688 | else | ||
689 | TRACE_TASK(t, "inherits priority from %p\n", prio_inh); | ||
690 | |||
691 | sched_trace_eff_prio_change(t, prio_inh); | ||
692 | |||
693 | tsk_rt(t)->inh_task = prio_inh; | ||
694 | |||
695 | linked_on = tsk_rt(t)->linked_on; | ||
696 | |||
697 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
698 | if (linked_on != NO_CPU) { | ||
699 | TRACE_TASK(t, "%s: linked on %d\n", | ||
700 | __FUNCTION__, linked_on); | ||
701 | /* Holder is scheduled; need to re-order CPUs. | ||
702 | * We can't use heap_decrease() here since | ||
703 | * the cpu_heap is ordered in reverse direction, so | ||
704 | * it is actually an increase. */ | ||
705 | bheap_delete(cpu_lower_prio, &cluster->cpu_heap, | ||
706 | per_cpu(crm_srt_cpu_entries, linked_on).hn); | ||
707 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, | ||
708 | per_cpu(crm_srt_cpu_entries, linked_on).hn); | ||
709 | } else { | ||
710 | /* holder may be queued: first stop queue changes */ | ||
711 | raw_spin_lock(&cluster->domain.release_lock); | ||
712 | if (is_queued(t)) { | ||
713 | TRACE_TASK(t, "%s: is queued\n", __FUNCTION__); | ||
714 | |||
715 | /* We need to update the position of holder in some | ||
716 | * heap. Note that this could be a release heap if we | ||
717 | * budget enforcement is used and this job overran. */ | ||
718 | check_preempt = !bheap_decrease(rm_srt_ready_order, tsk_rt(t)->heap_node); | ||
719 | |||
720 | } else { | ||
721 | /* Nothing to do: if it is not queued and not linked | ||
722 | * then it is either sleeping or currently being moved | ||
723 | * by other code (e.g., a timer interrupt handler) that | ||
724 | * will use the correct priority when enqueuing the | ||
725 | * task. */ | ||
726 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__); | ||
727 | } | ||
728 | raw_spin_unlock(&cluster->domain.release_lock); | ||
729 | |||
730 | /* If holder was enqueued in a release heap, then the following | ||
731 | * preemption check is pointless, but we can't easily detect | ||
732 | * that case. If you want to fix this, then consider that | ||
733 | * simply adding a state flag requires O(n) time to update when | ||
734 | * releasing n tasks, which conflicts with the goal to have | ||
735 | * O(log n) merges. */ | ||
736 | if (check_preempt) { | ||
737 | /* heap_decrease() hit the top level of the heap: make | ||
738 | * sure preemption checks get the right task, not the | ||
739 | * potentially stale cache. */ | ||
740 | bheap_uncache_min(rm_srt_ready_order, &cluster->domain.ready_queue); | ||
741 | check_for_preemptions(cluster); | ||
742 | } | ||
743 | } | ||
744 | } | ||
745 | |||
746 | /* called with IRQs off */ | ||
747 | static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
748 | { | ||
749 | crm_srt_domain_t* cluster = task_cpu_cluster(t); | ||
750 | |||
751 | raw_spin_lock(&cluster->crm_srt_lock); | ||
752 | |||
753 | __set_priority_inheritance(t, prio_inh); | ||
754 | |||
755 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
756 | if(tsk_rt(t)->cur_klitirqd != NULL) | ||
757 | { | ||
758 | TRACE_TASK(t, "%s/%d inherits a new priority!\n", | ||
759 | tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); | ||
760 | |||
761 | __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh); | ||
762 | } | ||
763 | #endif | ||
764 | |||
765 | raw_spin_unlock(&cluster->crm_srt_lock); | ||
766 | } | ||
767 | |||
768 | |||
769 | /* called with IRQs off */ | ||
770 | static void __clear_priority_inheritance(struct task_struct* t) | ||
771 | { | ||
772 | TRACE_TASK(t, "priority restored\n"); | ||
773 | |||
774 | if(tsk_rt(t)->scheduled_on != NO_CPU) | ||
775 | { | ||
776 | sched_trace_eff_prio_change(t, NULL); | ||
777 | |||
778 | tsk_rt(t)->inh_task = NULL; | ||
779 | |||
780 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
781 | * since the priority was effectively lowered. */ | ||
782 | unlink(t); | ||
783 | crm_srt_job_arrival(t); | ||
784 | } | ||
785 | else | ||
786 | { | ||
787 | __set_priority_inheritance(t, NULL); | ||
788 | } | ||
789 | |||
790 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
791 | if(tsk_rt(t)->cur_klitirqd != NULL) | ||
792 | { | ||
793 | TRACE_TASK(t, "%s/%d inheritance set back to owner.\n", | ||
794 | tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid); | ||
795 | |||
796 | if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU) | ||
797 | { | ||
798 | sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t); | ||
799 | |||
800 | tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t; | ||
801 | |||
802 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
803 | * since the priority was effectively lowered. */ | ||
804 | unlink(tsk_rt(t)->cur_klitirqd); | ||
805 | crm_srt_job_arrival(tsk_rt(t)->cur_klitirqd); | ||
806 | } | ||
807 | else | ||
808 | { | ||
809 | __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t); | ||
810 | } | ||
811 | } | ||
812 | #endif | ||
813 | } | ||
814 | |||
815 | /* called with IRQs off */ | ||
816 | static void clear_priority_inheritance(struct task_struct* t) | ||
817 | { | ||
818 | crm_srt_domain_t* cluster = task_cpu_cluster(t); | ||
819 | |||
820 | raw_spin_lock(&cluster->crm_srt_lock); | ||
821 | __clear_priority_inheritance(t); | ||
822 | raw_spin_unlock(&cluster->crm_srt_lock); | ||
823 | } | ||
824 | |||
825 | |||
826 | |||
827 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
828 | /* called with IRQs off */ | ||
829 | static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd, | ||
830 | struct task_struct* old_owner, | ||
831 | struct task_struct* new_owner) | ||
832 | { | ||
833 | crm_srt_domain_t* cluster = task_cpu_cluster(klitirqd); | ||
834 | |||
835 | BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); | ||
836 | |||
837 | raw_spin_lock(&cluster->crm_srt_lock); | ||
838 | |||
839 | if(old_owner != new_owner) | ||
840 | { | ||
841 | if(old_owner) | ||
842 | { | ||
843 | // unreachable? | ||
844 | tsk_rt(old_owner)->cur_klitirqd = NULL; | ||
845 | } | ||
846 | |||
847 | TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n", | ||
848 | new_owner->comm, new_owner->pid); | ||
849 | |||
850 | tsk_rt(new_owner)->cur_klitirqd = klitirqd; | ||
851 | } | ||
852 | |||
853 | __set_priority_inheritance(klitirqd, | ||
854 | (tsk_rt(new_owner)->inh_task == NULL) ? | ||
855 | new_owner : | ||
856 | tsk_rt(new_owner)->inh_task); | ||
857 | |||
858 | raw_spin_unlock(&cluster->crm_srt_lock); | ||
859 | } | ||
860 | |||
861 | /* called with IRQs off */ | ||
862 | static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd, | ||
863 | struct task_struct* old_owner) | ||
864 | { | ||
865 | crm_srt_domain_t* cluster = task_cpu_cluster(klitirqd); | ||
866 | |||
867 | BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread)); | ||
868 | |||
869 | raw_spin_lock(&cluster->crm_srt_lock); | ||
870 | |||
871 | TRACE_TASK(klitirqd, "priority restored\n"); | ||
872 | |||
873 | if(tsk_rt(klitirqd)->scheduled_on != NO_CPU) | ||
874 | { | ||
875 | tsk_rt(klitirqd)->inh_task = NULL; | ||
876 | |||
877 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
878 | * since the priority was effectively lowered. */ | ||
879 | unlink(klitirqd); | ||
880 | crm_srt_job_arrival(klitirqd); | ||
881 | } | ||
882 | else | ||
883 | { | ||
884 | __set_priority_inheritance(klitirqd, NULL); | ||
885 | } | ||
886 | |||
887 | tsk_rt(old_owner)->cur_klitirqd = NULL; | ||
888 | |||
889 | raw_spin_unlock(&cluster->crm_srt_lock); | ||
890 | } | ||
891 | #endif // CONFIG_LITMUS_SOFTIRQD | ||
892 | |||
893 | |||
894 | /* ******************** KFMLP support ********************** */ | ||
895 | |||
896 | /* struct for semaphore with priority inheritance */ | ||
897 | struct kfmlp_queue | ||
898 | { | ||
899 | wait_queue_head_t wait; | ||
900 | struct task_struct* owner; | ||
901 | struct task_struct* hp_waiter; | ||
902 | int count; /* number of waiters + holder */ | ||
903 | }; | ||
904 | |||
905 | struct kfmlp_semaphore | ||
906 | { | ||
907 | struct litmus_lock litmus_lock; | ||
908 | |||
909 | spinlock_t lock; | ||
910 | |||
911 | int num_resources; /* aka k */ | ||
912 | struct kfmlp_queue *queues; /* array */ | ||
913 | struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ | ||
914 | }; | ||
915 | |||
916 | static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) | ||
917 | { | ||
918 | return container_of(lock, struct kfmlp_semaphore, litmus_lock); | ||
919 | } | ||
920 | |||
921 | static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, | ||
922 | struct kfmlp_queue* queue) | ||
923 | { | ||
924 | return (queue - &sem->queues[0]); | ||
925 | } | ||
926 | |||
927 | static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, | ||
928 | struct task_struct* holder) | ||
929 | { | ||
930 | int i; | ||
931 | for(i = 0; i < sem->num_resources; ++i) | ||
932 | if(sem->queues[i].owner == holder) | ||
933 | return(&sem->queues[i]); | ||
934 | return(NULL); | ||
935 | } | ||
936 | |||
937 | /* caller is responsible for locking */ | ||
938 | static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, | ||
939 | struct task_struct *skip) | ||
940 | { | ||
941 | struct list_head *pos; | ||
942 | struct task_struct *queued, *found = NULL; | ||
943 | |||
944 | list_for_each(pos, &kqueue->wait.task_list) { | ||
945 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
946 | task_list)->private; | ||
947 | |||
948 | /* Compare task prios, find high prio task. */ | ||
949 | if (queued != skip && rm_srt_higher_prio(queued, found)) | ||
950 | found = queued; | ||
951 | } | ||
952 | return found; | ||
953 | } | ||
954 | |||
955 | static inline struct kfmlp_queue* kfmlp_find_shortest( | ||
956 | struct kfmlp_semaphore* sem, | ||
957 | struct kfmlp_queue* search_start) | ||
958 | { | ||
959 | // we start our search at search_start instead of at the beginning of the | ||
960 | // queue list to load-balance across all resources. | ||
961 | struct kfmlp_queue* step = search_start; | ||
962 | struct kfmlp_queue* shortest = sem->shortest_queue; | ||
963 | |||
964 | do | ||
965 | { | ||
966 | step = (step+1 != &sem->queues[sem->num_resources]) ? | ||
967 | step+1 : &sem->queues[0]; | ||
968 | if(step->count < shortest->count) | ||
969 | { | ||
970 | shortest = step; | ||
971 | if(step->count == 0) | ||
972 | break; /* can't get any shorter */ | ||
973 | } | ||
974 | }while(step != search_start); | ||
975 | |||
976 | return(shortest); | ||
977 | } | ||
978 | |||
979 | static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) | ||
980 | { | ||
981 | /* must hold sem->lock */ | ||
982 | |||
983 | struct kfmlp_queue *my_queue = NULL; | ||
984 | struct task_struct *max_hp = NULL; | ||
985 | |||
986 | |||
987 | struct list_head *pos; | ||
988 | struct task_struct *queued; | ||
989 | int i; | ||
990 | |||
991 | for(i = 0; i < sem->num_resources; ++i) | ||
992 | { | ||
993 | if( (sem->queues[i].count > 1) && | ||
994 | ((my_queue == NULL) || | ||
995 | (rm_srt_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) | ||
996 | { | ||
997 | my_queue = &sem->queues[i]; | ||
998 | } | ||
999 | } | ||
1000 | |||
1001 | if(my_queue) | ||
1002 | { | ||
1003 | crm_srt_domain_t* cluster; | ||
1004 | |||
1005 | max_hp = my_queue->hp_waiter; | ||
1006 | BUG_ON(!max_hp); | ||
1007 | |||
1008 | TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", | ||
1009 | kfmlp_get_idx(sem, my_queue), | ||
1010 | max_hp->comm, max_hp->pid, | ||
1011 | kfmlp_get_idx(sem, my_queue)); | ||
1012 | |||
1013 | my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp); | ||
1014 | |||
1015 | /* | ||
1016 | if(my_queue->hp_waiter) | ||
1017 | TRACE_CUR("queue %d: new hp_waiter is %s/%d\n", | ||
1018 | kfmlp_get_idx(sem, my_queue), | ||
1019 | my_queue->hp_waiter->comm, | ||
1020 | my_queue->hp_waiter->pid); | ||
1021 | else | ||
1022 | TRACE_CUR("queue %d: new hp_waiter is %p\n", | ||
1023 | kfmlp_get_idx(sem, my_queue), NULL); | ||
1024 | */ | ||
1025 | |||
1026 | cluster = task_cpu_cluster(max_hp); | ||
1027 | |||
1028 | raw_spin_lock(&cluster->crm_srt_lock); | ||
1029 | |||
1030 | /* | ||
1031 | if(my_queue->owner) | ||
1032 | TRACE_CUR("queue %d: owner is %s/%d\n", | ||
1033 | kfmlp_get_idx(sem, my_queue), | ||
1034 | my_queue->owner->comm, | ||
1035 | my_queue->owner->pid); | ||
1036 | else | ||
1037 | TRACE_CUR("queue %d: owner is %p\n", | ||
1038 | kfmlp_get_idx(sem, my_queue), | ||
1039 | NULL); | ||
1040 | */ | ||
1041 | |||
1042 | if(tsk_rt(my_queue->owner)->inh_task == max_hp) | ||
1043 | { | ||
1044 | __clear_priority_inheritance(my_queue->owner); | ||
1045 | if(my_queue->hp_waiter != NULL) | ||
1046 | { | ||
1047 | __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); | ||
1048 | } | ||
1049 | } | ||
1050 | raw_spin_unlock(&cluster->crm_srt_lock); | ||
1051 | |||
1052 | list_for_each(pos, &my_queue->wait.task_list) | ||
1053 | { | ||
1054 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
1055 | task_list)->private; | ||
1056 | /* Compare task prios, find high prio task. */ | ||
1057 | if (queued == max_hp) | ||
1058 | { | ||
1059 | /* | ||
1060 | TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n", | ||
1061 | kfmlp_get_idx(sem, my_queue)); | ||
1062 | */ | ||
1063 | __remove_wait_queue(&my_queue->wait, | ||
1064 | list_entry(pos, wait_queue_t, task_list)); | ||
1065 | break; | ||
1066 | } | ||
1067 | } | ||
1068 | --(my_queue->count); | ||
1069 | } | ||
1070 | |||
1071 | return(max_hp); | ||
1072 | } | ||
1073 | |||
1074 | int crm_srt_kfmlp_lock(struct litmus_lock* l) | ||
1075 | { | ||
1076 | struct task_struct* t = current; | ||
1077 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1078 | struct kfmlp_queue* my_queue; | ||
1079 | wait_queue_t wait; | ||
1080 | unsigned long flags; | ||
1081 | |||
1082 | if (!is_realtime(t)) | ||
1083 | return -EPERM; | ||
1084 | |||
1085 | spin_lock_irqsave(&sem->lock, flags); | ||
1086 | |||
1087 | my_queue = sem->shortest_queue; | ||
1088 | |||
1089 | if (my_queue->owner) { | ||
1090 | /* resource is not free => must suspend and wait */ | ||
1091 | TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n", | ||
1092 | kfmlp_get_idx(sem, my_queue)); | ||
1093 | |||
1094 | init_waitqueue_entry(&wait, t); | ||
1095 | |||
1096 | /* FIXME: interruptible would be nice some day */ | ||
1097 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
1098 | |||
1099 | __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); | ||
1100 | |||
1101 | /* check if we need to activate priority inheritance */ | ||
1102 | if (rm_srt_higher_prio(t, my_queue->hp_waiter)) | ||
1103 | { | ||
1104 | my_queue->hp_waiter = t; | ||
1105 | if (rm_srt_higher_prio(t, my_queue->owner)) | ||
1106 | { | ||
1107 | set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); | ||
1108 | } | ||
1109 | } | ||
1110 | |||
1111 | ++(my_queue->count); | ||
1112 | sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); | ||
1113 | |||
1114 | /* release lock before sleeping */ | ||
1115 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1116 | |||
1117 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
1118 | * when we wake up; we are guaranteed to have the lock since | ||
1119 | * there is only one wake up per release (or steal). | ||
1120 | */ | ||
1121 | schedule(); | ||
1122 | |||
1123 | |||
1124 | if(my_queue->owner == t) | ||
1125 | { | ||
1126 | TRACE_CUR("queue %d: acquired through waiting\n", | ||
1127 | kfmlp_get_idx(sem, my_queue)); | ||
1128 | } | ||
1129 | else | ||
1130 | { | ||
1131 | /* this case may happen if our wait entry was stolen | ||
1132 | between queues. record where we went.*/ | ||
1133 | my_queue = kfmlp_get_queue(sem, t); | ||
1134 | BUG_ON(!my_queue); | ||
1135 | TRACE_CUR("queue %d: acquired through stealing\n", | ||
1136 | kfmlp_get_idx(sem, my_queue)); | ||
1137 | } | ||
1138 | } | ||
1139 | else | ||
1140 | { | ||
1141 | TRACE_CUR("queue %d: acquired immediately\n", | ||
1142 | kfmlp_get_idx(sem, my_queue)); | ||
1143 | |||
1144 | my_queue->owner = t; | ||
1145 | |||
1146 | ++(my_queue->count); | ||
1147 | sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); | ||
1148 | |||
1149 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1150 | } | ||
1151 | |||
1152 | return kfmlp_get_idx(sem, my_queue); | ||
1153 | } | ||
1154 | |||
1155 | int crm_srt_kfmlp_unlock(struct litmus_lock* l) | ||
1156 | { | ||
1157 | struct task_struct *t = current, *next; | ||
1158 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1159 | struct kfmlp_queue *my_queue; | ||
1160 | unsigned long flags; | ||
1161 | int err = 0; | ||
1162 | |||
1163 | spin_lock_irqsave(&sem->lock, flags); | ||
1164 | |||
1165 | my_queue = kfmlp_get_queue(sem, t); | ||
1166 | |||
1167 | if (!my_queue) { | ||
1168 | err = -EINVAL; | ||
1169 | goto out; | ||
1170 | } | ||
1171 | |||
1172 | /* check if there are jobs waiting for this resource */ | ||
1173 | next = __waitqueue_remove_first(&my_queue->wait); | ||
1174 | if (next) { | ||
1175 | /* | ||
1176 | TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", | ||
1177 | kfmlp_get_idx(sem, my_queue), | ||
1178 | next->comm, next->pid); | ||
1179 | */ | ||
1180 | /* next becomes the resouce holder */ | ||
1181 | my_queue->owner = next; | ||
1182 | |||
1183 | --(my_queue->count); | ||
1184 | if(my_queue->count < sem->shortest_queue->count) | ||
1185 | { | ||
1186 | sem->shortest_queue = my_queue; | ||
1187 | } | ||
1188 | |||
1189 | TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", | ||
1190 | kfmlp_get_idx(sem, my_queue), next->comm, next->pid); | ||
1191 | |||
1192 | /* determine new hp_waiter if necessary */ | ||
1193 | if (next == my_queue->hp_waiter) { | ||
1194 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
1195 | /* next has the highest priority --- it doesn't need to | ||
1196 | * inherit. However, we need to make sure that the | ||
1197 | * next-highest priority in the queue is reflected in | ||
1198 | * hp_waiter. */ | ||
1199 | my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); | ||
1200 | if (my_queue->hp_waiter) | ||
1201 | TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); | ||
1202 | else | ||
1203 | TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); | ||
1204 | } else { | ||
1205 | /* Well, if next is not the highest-priority waiter, | ||
1206 | * then it ought to inherit the highest-priority | ||
1207 | * waiter's priority. */ | ||
1208 | set_priority_inheritance(next, my_queue->hp_waiter); | ||
1209 | } | ||
1210 | |||
1211 | /* wake up next */ | ||
1212 | wake_up_process(next); | ||
1213 | } | ||
1214 | else | ||
1215 | { | ||
1216 | TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue)); | ||
1217 | |||
1218 | next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */ | ||
1219 | |||
1220 | /* | ||
1221 | if(next) | ||
1222 | TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n", | ||
1223 | kfmlp_get_idx(sem, my_queue), | ||
1224 | next->comm, next->pid); | ||
1225 | */ | ||
1226 | |||
1227 | my_queue->owner = next; | ||
1228 | |||
1229 | if(next) | ||
1230 | { | ||
1231 | TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n", | ||
1232 | kfmlp_get_idx(sem, my_queue), | ||
1233 | next->comm, next->pid); | ||
1234 | |||
1235 | /* wake up next */ | ||
1236 | wake_up_process(next); | ||
1237 | } | ||
1238 | else | ||
1239 | { | ||
1240 | TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); | ||
1241 | |||
1242 | --(my_queue->count); | ||
1243 | if(my_queue->count < sem->shortest_queue->count) | ||
1244 | { | ||
1245 | sem->shortest_queue = my_queue; | ||
1246 | } | ||
1247 | } | ||
1248 | } | ||
1249 | |||
1250 | /* we lose the benefit of priority inheritance (if any) */ | ||
1251 | if (tsk_rt(t)->inh_task) | ||
1252 | clear_priority_inheritance(t); | ||
1253 | |||
1254 | out: | ||
1255 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1256 | |||
1257 | return err; | ||
1258 | } | ||
1259 | |||
1260 | int crm_srt_kfmlp_close(struct litmus_lock* l) | ||
1261 | { | ||
1262 | struct task_struct *t = current; | ||
1263 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1264 | struct kfmlp_queue *my_queue; | ||
1265 | unsigned long flags; | ||
1266 | |||
1267 | int owner; | ||
1268 | |||
1269 | spin_lock_irqsave(&sem->lock, flags); | ||
1270 | |||
1271 | my_queue = kfmlp_get_queue(sem, t); | ||
1272 | owner = (my_queue) ? (my_queue->owner == t) : 0; | ||
1273 | |||
1274 | spin_unlock_irqrestore(&sem->lock, flags); | ||
1275 | |||
1276 | if (owner) | ||
1277 | crm_srt_kfmlp_unlock(l); | ||
1278 | |||
1279 | return 0; | ||
1280 | } | ||
1281 | |||
1282 | void crm_srt_kfmlp_free(struct litmus_lock* l) | ||
1283 | { | ||
1284 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
1285 | kfree(sem->queues); | ||
1286 | kfree(sem); | ||
1287 | } | ||
1288 | |||
1289 | static struct litmus_lock_ops crm_srt_kfmlp_lock_ops = { | ||
1290 | .close = crm_srt_kfmlp_close, | ||
1291 | .lock = crm_srt_kfmlp_lock, | ||
1292 | .unlock = crm_srt_kfmlp_unlock, | ||
1293 | .deallocate = crm_srt_kfmlp_free, | ||
1294 | }; | ||
1295 | |||
1296 | static struct litmus_lock* crm_srt_new_kfmlp(void* __user arg, int* ret_code) | ||
1297 | { | ||
1298 | struct kfmlp_semaphore* sem; | ||
1299 | int num_resources = 0; | ||
1300 | int i; | ||
1301 | |||
1302 | if(!access_ok(VERIFY_READ, arg, sizeof(num_resources))) | ||
1303 | { | ||
1304 | *ret_code = -EINVAL; | ||
1305 | return(NULL); | ||
1306 | } | ||
1307 | if(__copy_from_user(&num_resources, arg, sizeof(num_resources))) | ||
1308 | { | ||
1309 | *ret_code = -EINVAL; | ||
1310 | return(NULL); | ||
1311 | } | ||
1312 | if(num_resources < 1) | ||
1313 | { | ||
1314 | *ret_code = -EINVAL; | ||
1315 | return(NULL); | ||
1316 | } | ||
1317 | |||
1318 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1319 | if(!sem) | ||
1320 | { | ||
1321 | *ret_code = -ENOMEM; | ||
1322 | return NULL; | ||
1323 | } | ||
1324 | |||
1325 | sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); | ||
1326 | if(!sem->queues) | ||
1327 | { | ||
1328 | kfree(sem); | ||
1329 | *ret_code = -ENOMEM; | ||
1330 | return NULL; | ||
1331 | } | ||
1332 | |||
1333 | sem->litmus_lock.ops = &crm_srt_kfmlp_lock_ops; | ||
1334 | spin_lock_init(&sem->lock); | ||
1335 | sem->num_resources = num_resources; | ||
1336 | |||
1337 | for(i = 0; i < num_resources; ++i) | ||
1338 | { | ||
1339 | sem->queues[i].owner = NULL; | ||
1340 | sem->queues[i].hp_waiter = NULL; | ||
1341 | init_waitqueue_head(&sem->queues[i].wait); | ||
1342 | sem->queues[i].count = 0; | ||
1343 | } | ||
1344 | |||
1345 | sem->shortest_queue = &sem->queues[0]; | ||
1346 | |||
1347 | *ret_code = 0; | ||
1348 | return &sem->litmus_lock; | ||
1349 | } | ||
1350 | |||
1351 | |||
1352 | /* **** lock constructor **** */ | ||
1353 | |||
1354 | static long crm_srt_allocate_lock(struct litmus_lock **lock, int type, | ||
1355 | void* __user arg) | ||
1356 | { | ||
1357 | int err = -ENXIO; | ||
1358 | |||
1359 | /* C-RM-SRT currently only supports the FMLP for global resources | ||
1360 | WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */ | ||
1361 | switch (type) { | ||
1362 | case KFMLP_SEM: | ||
1363 | *lock = crm_srt_new_kfmlp(arg, &err); | ||
1364 | break; | ||
1365 | }; | ||
1366 | |||
1367 | return err; | ||
1368 | } | ||
1369 | |||
1370 | #endif // CONFIG_LITMUS_LOCKING | ||
1371 | |||
1372 | |||
1373 | |||
1374 | |||
1375 | |||
1376 | |||
1377 | /* total number of cluster */ | ||
1378 | static int num_clusters; | ||
1379 | /* we do not support cluster of different sizes */ | ||
1380 | static unsigned int cluster_size; | ||
1381 | |||
1382 | #ifdef VERBOSE_INIT | ||
1383 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | ||
1384 | { | ||
1385 | int chk; | ||
1386 | char buf[255]; | ||
1387 | |||
1388 | chk = cpulist_scnprintf(buf, 254, mask); | ||
1389 | buf[chk] = '\0'; | ||
1390 | printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); | ||
1391 | |||
1392 | } | ||
1393 | #endif | ||
1394 | |||
1395 | static int clusters_allocated = 0; | ||
1396 | |||
1397 | static void cleanup_crm_srt(void) | ||
1398 | { | ||
1399 | int i; | ||
1400 | |||
1401 | if (clusters_allocated) { | ||
1402 | for (i = 0; i < num_clusters; i++) { | ||
1403 | kfree(crm_srt[i].cpus); | ||
1404 | kfree(crm_srt[i].heap_node); | ||
1405 | free_cpumask_var(crm_srt[i].cpu_map); | ||
1406 | } | ||
1407 | |||
1408 | kfree(crm_srt); | ||
1409 | } | ||
1410 | } | ||
1411 | |||
1412 | static long crm_srt_activate_plugin(void) | ||
1413 | { | ||
1414 | int i, j, cpu, ccpu, cpu_count; | ||
1415 | cpu_entry_t *entry; | ||
1416 | |||
1417 | cpumask_var_t mask; | ||
1418 | int chk = 0; | ||
1419 | |||
1420 | /* de-allocate old clusters, if any */ | ||
1421 | cleanup_crm_srt(); | ||
1422 | |||
1423 | printk(KERN_INFO "C-RM-SRT: Activate Plugin, cluster configuration = %d\n", | ||
1424 | cluster_config); | ||
1425 | |||
1426 | /* need to get cluster_size first */ | ||
1427 | if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
1428 | return -ENOMEM; | ||
1429 | |||
1430 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) { | ||
1431 | cluster_size = num_online_cpus(); | ||
1432 | } else { | ||
1433 | chk = get_shared_cpu_map(mask, 0, cluster_config); | ||
1434 | if (chk) { | ||
1435 | /* if chk != 0 then it is the max allowed index */ | ||
1436 | printk(KERN_INFO "C-RM-SRT: Cluster configuration = %d " | ||
1437 | "is not supported on this hardware.\n", | ||
1438 | cluster_config); | ||
1439 | /* User should notice that the configuration failed, so | ||
1440 | * let's bail out. */ | ||
1441 | return -EINVAL; | ||
1442 | } | ||
1443 | |||
1444 | cluster_size = cpumask_weight(mask); | ||
1445 | } | ||
1446 | |||
1447 | if ((num_online_cpus() % cluster_size) != 0) { | ||
1448 | /* this can't be right, some cpus are left out */ | ||
1449 | printk(KERN_ERR "C-RM-SRT: Trying to group %d cpus in %d!\n", | ||
1450 | num_online_cpus(), cluster_size); | ||
1451 | return -1; | ||
1452 | } | ||
1453 | |||
1454 | num_clusters = num_online_cpus() / cluster_size; | ||
1455 | printk(KERN_INFO "C-RM-SRT: %d cluster(s) of size = %d\n", | ||
1456 | num_clusters, cluster_size); | ||
1457 | |||
1458 | /* initialize clusters */ | ||
1459 | crm_srt = kmalloc(num_clusters * sizeof(crm_srt_domain_t), GFP_ATOMIC); | ||
1460 | for (i = 0; i < num_clusters; i++) { | ||
1461 | |||
1462 | crm_srt[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), | ||
1463 | GFP_ATOMIC); | ||
1464 | crm_srt[i].heap_node = kmalloc( | ||
1465 | cluster_size * sizeof(struct bheap_node), | ||
1466 | GFP_ATOMIC); | ||
1467 | bheap_init(&(crm_srt[i].cpu_heap)); | ||
1468 | rm_srt_domain_init(&(crm_srt[i].domain), NULL, crm_srt_release_jobs); | ||
1469 | |||
1470 | if(!zalloc_cpumask_var(&crm_srt[i].cpu_map, GFP_ATOMIC)) | ||
1471 | return -ENOMEM; | ||
1472 | } | ||
1473 | |||
1474 | /* cycle through cluster and add cpus to them */ | ||
1475 | for (i = 0; i < num_clusters; i++) { | ||
1476 | |||
1477 | for_each_online_cpu(cpu) { | ||
1478 | /* check if the cpu is already in a cluster */ | ||
1479 | for (j = 0; j < num_clusters; j++) | ||
1480 | if (cpumask_test_cpu(cpu, crm_srt[j].cpu_map)) | ||
1481 | break; | ||
1482 | /* if it is in a cluster go to next cpu */ | ||
1483 | if (j < num_clusters && | ||
1484 | cpumask_test_cpu(cpu, crm_srt[j].cpu_map)) | ||
1485 | continue; | ||
1486 | |||
1487 | /* this cpu isn't in any cluster */ | ||
1488 | /* get the shared cpus */ | ||
1489 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) | ||
1490 | cpumask_copy(mask, cpu_online_mask); | ||
1491 | else | ||
1492 | get_shared_cpu_map(mask, cpu, cluster_config); | ||
1493 | |||
1494 | cpumask_copy(crm_srt[i].cpu_map, mask); | ||
1495 | #ifdef VERBOSE_INIT | ||
1496 | print_cluster_topology(mask, cpu); | ||
1497 | #endif | ||
1498 | /* add cpus to current cluster and init cpu_entry_t */ | ||
1499 | cpu_count = 0; | ||
1500 | for_each_cpu(ccpu, crm_srt[i].cpu_map) { | ||
1501 | |||
1502 | entry = &per_cpu(crm_srt_cpu_entries, ccpu); | ||
1503 | crm_srt[i].cpus[cpu_count] = entry; | ||
1504 | atomic_set(&entry->will_schedule, 0); | ||
1505 | entry->cpu = ccpu; | ||
1506 | entry->cluster = &crm_srt[i]; | ||
1507 | entry->hn = &(crm_srt[i].heap_node[cpu_count]); | ||
1508 | bheap_node_init(&entry->hn, entry); | ||
1509 | |||
1510 | cpu_count++; | ||
1511 | |||
1512 | entry->linked = NULL; | ||
1513 | entry->scheduled = NULL; | ||
1514 | update_cpu_position(entry); | ||
1515 | } | ||
1516 | /* done with this cluster */ | ||
1517 | break; | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
1522 | { | ||
1523 | /* distribute the daemons evenly across the clusters. */ | ||
1524 | int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC); | ||
1525 | int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters; | ||
1526 | int left_over = NR_LITMUS_SOFTIRQD % num_clusters; | ||
1527 | |||
1528 | int daemon = 0; | ||
1529 | for(i = 0; i < num_clusters; ++i) | ||
1530 | { | ||
1531 | int num_on_this_cluster = num_daemons_per_cluster; | ||
1532 | if(left_over) | ||
1533 | { | ||
1534 | ++num_on_this_cluster; | ||
1535 | --left_over; | ||
1536 | } | ||
1537 | |||
1538 | for(j = 0; j < num_on_this_cluster; ++j) | ||
1539 | { | ||
1540 | // first CPU of this cluster | ||
1541 | affinity[daemon++] = i*cluster_size; | ||
1542 | } | ||
1543 | } | ||
1544 | |||
1545 | spawn_klitirqd(affinity); | ||
1546 | |||
1547 | kfree(affinity); | ||
1548 | } | ||
1549 | #endif | ||
1550 | |||
1551 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1552 | init_nvidia_info(); | ||
1553 | #endif | ||
1554 | |||
1555 | free_cpumask_var(mask); | ||
1556 | clusters_allocated = 1; | ||
1557 | return 0; | ||
1558 | } | ||
1559 | |||
1560 | /* Plugin object */ | ||
1561 | static struct sched_plugin crm_srt_plugin __cacheline_aligned_in_smp = { | ||
1562 | .plugin_name = "C-RM-SRT", | ||
1563 | .finish_switch = crm_srt_finish_switch, | ||
1564 | .tick = crm_srt_tick, | ||
1565 | .task_new = crm_srt_task_new, | ||
1566 | .complete_job = complete_job, | ||
1567 | .task_exit = crm_srt_task_exit, | ||
1568 | .schedule = crm_srt_schedule, | ||
1569 | .task_wake_up = crm_srt_task_wake_up, | ||
1570 | .task_block = crm_srt_task_block, | ||
1571 | .admit_task = crm_srt_admit_task, | ||
1572 | .activate_plugin = crm_srt_activate_plugin, | ||
1573 | #ifdef CONFIG_LITMUS_LOCKING | ||
1574 | .allocate_lock = crm_srt_allocate_lock, | ||
1575 | .set_prio_inh = set_priority_inheritance, | ||
1576 | .clear_prio_inh = clear_priority_inheritance, | ||
1577 | #endif | ||
1578 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
1579 | .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd, | ||
1580 | .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd, | ||
1581 | #endif | ||
1582 | }; | ||
1583 | |||
1584 | static struct proc_dir_entry *cluster_file = NULL, *crm_srt_dir = NULL; | ||
1585 | |||
1586 | static int __init init_crm_srt(void) | ||
1587 | { | ||
1588 | int err, fs; | ||
1589 | |||
1590 | err = register_sched_plugin(&crm_srt_plugin); | ||
1591 | if (!err) { | ||
1592 | fs = make_plugin_proc_dir(&crm_srt_plugin, &crm_srt_dir); | ||
1593 | if (!fs) | ||
1594 | cluster_file = create_cluster_file(crm_srt_dir, &cluster_config); | ||
1595 | else | ||
1596 | printk(KERN_ERR "Could not allocate C-RM-SRT procfs dir.\n"); | ||
1597 | } | ||
1598 | return err; | ||
1599 | } | ||
1600 | |||
1601 | static void clean_crm_srt(void) | ||
1602 | { | ||
1603 | cleanup_crm_srt(); | ||
1604 | if (cluster_file) | ||
1605 | remove_proc_entry("cluster", crm_srt_dir); | ||
1606 | if (crm_srt_dir) | ||
1607 | remove_plugin_proc_dir(&crm_srt_plugin); | ||
1608 | } | ||
1609 | |||
1610 | module_init(init_crm_srt); | ||
1611 | module_exit(clean_crm_srt); | ||
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index d04e0703c154..ac7685fe69f0 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c | |||
@@ -1155,12 +1155,14 @@ static inline struct kfmlp_queue* kfmlp_find_shortest( | |||
1155 | { | 1155 | { |
1156 | step = (step+1 != &sem->queues[sem->num_resources]) ? | 1156 | step = (step+1 != &sem->queues[sem->num_resources]) ? |
1157 | step+1 : &sem->queues[0]; | 1157 | step+1 : &sem->queues[0]; |
1158 | |||
1158 | if(step->count < shortest->count) | 1159 | if(step->count < shortest->count) |
1159 | { | 1160 | { |
1160 | shortest = step; | 1161 | shortest = step; |
1161 | if(step->count == 0) | 1162 | if(step->count == 0) |
1162 | break; /* can't get any shorter */ | 1163 | break; /* can't get any shorter */ |
1163 | } | 1164 | } |
1165 | |||
1164 | }while(step != search_start); | 1166 | }while(step != search_start); |
1165 | 1167 | ||
1166 | return(shortest); | 1168 | return(shortest); |
@@ -1369,7 +1371,9 @@ int gsnedf_kfmlp_unlock(struct litmus_lock* l) | |||
1369 | my_queue->owner = next; | 1371 | my_queue->owner = next; |
1370 | 1372 | ||
1371 | --(my_queue->count); | 1373 | --(my_queue->count); |
1372 | if(my_queue->count < sem->shortest_queue->count) | 1374 | // the '=' of '<=' is a dumb method to attempt to build |
1375 | // affinity until tasks can tell us where they ran last... | ||
1376 | if(my_queue->count <= sem->shortest_queue->count) | ||
1373 | { | 1377 | { |
1374 | sem->shortest_queue = my_queue; | 1378 | sem->shortest_queue = my_queue; |
1375 | } | 1379 | } |
@@ -1428,7 +1432,9 @@ int gsnedf_kfmlp_unlock(struct litmus_lock* l) | |||
1428 | TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); | 1432 | TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue)); |
1429 | 1433 | ||
1430 | --(my_queue->count); | 1434 | --(my_queue->count); |
1431 | if(my_queue->count < sem->shortest_queue->count) | 1435 | // the '=' of '<=' is a dumb method to attempt to build |
1436 | // affinity until tasks can tell us where they ran last... | ||
1437 | if(my_queue->count <= sem->shortest_queue->count) | ||
1432 | { | 1438 | { |
1433 | sem->shortest_queue = my_queue; | 1439 | sem->shortest_queue = my_queue; |
1434 | } | 1440 | } |
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index 8802670a4b0b..e393d749baf5 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c | |||
@@ -152,6 +152,14 @@ static void litmus_dummy_clear_prio_inh_klitirqd(struct task_struct* klitirqd, | |||
152 | } | 152 | } |
153 | #endif | 153 | #endif |
154 | 154 | ||
155 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
156 | static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t) | ||
157 | { | ||
158 | TRACE("PAI Tasklet unsupported in this plugin!!!!!!\n"); | ||
159 | return(0); // failure. | ||
160 | } | ||
161 | #endif | ||
162 | |||
155 | 163 | ||
156 | /* The default scheduler plugin. It doesn't do anything and lets Linux do its | 164 | /* The default scheduler plugin. It doesn't do anything and lets Linux do its |
157 | * job. | 165 | * job. |
@@ -177,6 +185,9 @@ struct sched_plugin linux_sched_plugin = { | |||
177 | .set_prio_inh_klitirqd = litmus_dummy_set_prio_inh_klitirq, | 185 | .set_prio_inh_klitirqd = litmus_dummy_set_prio_inh_klitirq, |
178 | .clear_prio_inh_klitirqd = litmus_dummy_clear_prio_inh_klitirqd, | 186 | .clear_prio_inh_klitirqd = litmus_dummy_clear_prio_inh_klitirqd, |
179 | #endif | 187 | #endif |
188 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
189 | .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet, | ||
190 | #endif | ||
180 | .admit_task = litmus_dummy_admit_task | 191 | .admit_task = litmus_dummy_admit_task |
181 | }; | 192 | }; |
182 | 193 | ||
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c index 7aeb99b668d3..d079df2b292a 100644 --- a/litmus/sched_task_trace.c +++ b/litmus/sched_task_trace.c | |||
@@ -191,7 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id, | |||
191 | if (rec) { | 191 | if (rec) { |
192 | rec->data.completion.when = now(); | 192 | rec->data.completion.when = now(); |
193 | rec->data.completion.forced = forced; | 193 | rec->data.completion.forced = forced; |
194 | #ifdef LITMUS_NVIDIA | ||
194 | rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count); | 195 | rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count); |
196 | #endif | ||
195 | put_record(rec); | 197 | put_record(rec); |
196 | } | 198 | } |
197 | } | 199 | } |
@@ -367,24 +369,29 @@ feather_callback void do_sched_trace_eff_prio_change(unsigned long id, | |||
367 | } | 369 | } |
368 | } | 370 | } |
369 | 371 | ||
370 | |||
371 | /* pray for no nesting of nv interrupts on same CPU... */ | 372 | /* pray for no nesting of nv interrupts on same CPU... */ |
372 | struct tracing_interrupt_map | 373 | struct tracing_interrupt_map |
373 | { | 374 | { |
374 | int active; | 375 | int active; |
375 | int count; | 376 | int count; |
376 | unsigned long data[128]; // assume nesting less than 128... | 377 | unsigned long data[128]; // assume nesting less than 128... |
378 | unsigned long serial[128]; | ||
377 | }; | 379 | }; |
378 | DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing); | 380 | DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing); |
379 | 381 | ||
382 | |||
383 | DEFINE_PER_CPU(u32, intCounter); | ||
384 | |||
380 | feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, | 385 | feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, |
381 | unsigned long _device) | 386 | unsigned long _device) |
382 | { | 387 | { |
383 | struct st_event_record *rec; | 388 | struct st_event_record *rec; |
389 | u32 serialNum; | ||
384 | 390 | ||
385 | { | 391 | { |
392 | u32* serial; | ||
386 | struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); | 393 | struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); |
387 | if(int_map->active == 0xcafebabe) | 394 | if(!int_map->active == 0xcafebabe) |
388 | { | 395 | { |
389 | int_map->count++; | 396 | int_map->count++; |
390 | } | 397 | } |
@@ -393,7 +400,12 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, | |||
393 | int_map->active = 0xcafebabe; | 400 | int_map->active = 0xcafebabe; |
394 | int_map->count = 1; | 401 | int_map->count = 1; |
395 | } | 402 | } |
396 | int_map->data[int_map->count-1] = _device; | 403 | //int_map->data[int_map->count-1] = _device; |
404 | |||
405 | serial = &per_cpu(intCounter, smp_processor_id()); | ||
406 | *serial += num_online_cpus(); | ||
407 | serialNum = *serial; | ||
408 | int_map->serial[int_map->count-1] = serialNum; | ||
397 | } | 409 | } |
398 | 410 | ||
399 | rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL); | 411 | rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL); |
@@ -401,6 +413,7 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, | |||
401 | u32 device = _device; | 413 | u32 device = _device; |
402 | rec->data.nv_interrupt_begin.when = now(); | 414 | rec->data.nv_interrupt_begin.when = now(); |
403 | rec->data.nv_interrupt_begin.device = device; | 415 | rec->data.nv_interrupt_begin.device = device; |
416 | rec->data.nv_interrupt_begin.serialNumber = serialNum; | ||
404 | put_record(rec); | 417 | put_record(rec); |
405 | } | 418 | } |
406 | } | 419 | } |
@@ -416,7 +429,7 @@ int is_interrupt_tracing_active(void) | |||
416 | } | 429 | } |
417 | */ | 430 | */ |
418 | 431 | ||
419 | feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long unused) | 432 | feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device) |
420 | { | 433 | { |
421 | struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); | 434 | struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); |
422 | if(int_map->active == 0xcafebabe) | 435 | if(int_map->active == 0xcafebabe) |
@@ -428,8 +441,11 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned | |||
428 | int_map->active = 0; | 441 | int_map->active = 0; |
429 | 442 | ||
430 | if(rec) { | 443 | if(rec) { |
444 | u32 device = _device; | ||
431 | rec->data.nv_interrupt_end.when = now(); | 445 | rec->data.nv_interrupt_end.when = now(); |
432 | rec->data.nv_interrupt_end.device = int_map->data[int_map->count]; | 446 | //rec->data.nv_interrupt_end.device = int_map->data[int_map->count]; |
447 | rec->data.nv_interrupt_end.device = device; | ||
448 | rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count]; | ||
433 | put_record(rec); | 449 | put_record(rec); |
434 | } | 450 | } |
435 | } | 451 | } |
diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c index d7d7d8bae298..5b7e6152416a 100644 --- a/litmus/sched_trace_external.c +++ b/litmus/sched_trace_external.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/module.h> | 1 | #include <linux/module.h> |
2 | 2 | ||
3 | #include <litmus/trace.h> | ||
3 | #include <litmus/sched_trace.h> | 4 | #include <litmus/sched_trace.h> |
4 | #include <litmus/litmus.h> | 5 | #include <litmus/litmus.h> |
5 | 6 | ||
@@ -38,8 +39,26 @@ void __sched_trace_nv_interrupt_begin_external(u32 device) | |||
38 | } | 39 | } |
39 | EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external); | 40 | EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external); |
40 | 41 | ||
41 | void __sched_trace_nv_interrupt_end_external(void) | 42 | void __sched_trace_nv_interrupt_end_external(u32 device) |
42 | { | 43 | { |
43 | sched_trace_nv_interrupt_end(); | 44 | unsigned long _device = device; |
45 | sched_trace_nv_interrupt_end(_device); | ||
44 | } | 46 | } |
45 | EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external); | 47 | EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external); |
48 | |||
49 | |||
50 | #ifdef CONFIG_LITMUS_NVIDIA | ||
51 | |||
52 | #define EXX_TS(evt) \ | ||
53 | void __##evt(void) { evt; } \ | ||
54 | EXPORT_SYMBOL(__##evt); | ||
55 | |||
56 | EXX_TS(TS_NV_TOPISR_START) | ||
57 | EXX_TS(TS_NV_TOPISR_END) | ||
58 | EXX_TS(TS_NV_BOTISR_START) | ||
59 | EXX_TS(TS_NV_BOTISR_END) | ||
60 | EXX_TS(TS_NV_RELEASE_BOTISR_START) | ||
61 | EXX_TS(TS_NV_RELEASE_BOTISR_END) | ||
62 | |||
63 | #endif | ||
64 | |||