aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2013-03-06 14:20:55 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2013-03-06 14:20:55 -0500
commit22da1b2b4f02413e58bf01caa5b14e42e7913598 (patch)
tree6e4022a5140e682d287c4206550848300bb7986b
parentda954aa12e99b502356ca62bff822cb6a95cba7a (diff)
parentc7cd5432b98df518b05bc8978d34382797fd9a05 (diff)
Merge remote-tracking branch 'github/master' into wip-mmap-uncache
-rw-r--r--arch/x86/include/asm/feather_trace_32.h96
-rw-r--r--arch/x86/include/asm/feather_trace_64.h101
-rw-r--r--arch/x86/kernel/smp.c16
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/litmus/binheap.h206
-rw-r--r--include/litmus/budget.h27
-rw-r--r--include/litmus/debug_trace.h7
-rw-r--r--include/litmus/fdso.h10
-rw-r--r--include/litmus/fp_common.h105
-rw-r--r--include/litmus/fpmath.h147
-rw-r--r--include/litmus/litmus.h77
-rw-r--r--include/litmus/rt_param.h60
-rw-r--r--include/litmus/sched_plugin.h2
-rw-r--r--include/litmus/sched_trace.h105
-rw-r--r--include/litmus/trace.h49
-rw-r--r--include/litmus/trace_irq.h9
-rw-r--r--include/litmus/wait.h57
-rw-r--r--include/trace/events/litmus.h231
-rw-r--r--kernel/sched.c45
-rw-r--r--kernel/sched_rt.c13
-rw-r--r--kernel/softirq.c3
-rw-r--r--litmus/Kconfig66
-rw-r--r--litmus/Makefile5
-rw-r--r--litmus/binheap.c388
-rw-r--r--litmus/budget.c2
-rw-r--r--litmus/ctrldev.c56
-rw-r--r--litmus/edf_common.c108
-rw-r--r--litmus/fdso.c26
-rw-r--r--litmus/fp_common.c119
-rw-r--r--litmus/ftdev.c9
-rw-r--r--litmus/jobs.c30
-rw-r--r--litmus/litmus.c129
-rw-r--r--litmus/locking.c49
-rw-r--r--litmus/preempt.c6
-rw-r--r--litmus/rt_domain.c18
-rw-r--r--litmus/sched_cedf.c47
-rw-r--r--litmus/sched_gsn_edf.c54
-rw-r--r--litmus/sched_litmus.c9
-rw-r--r--litmus/sched_pfair.c19
-rw-r--r--litmus/sched_pfp.c1711
-rw-r--r--litmus/sched_psn_edf.c30
-rw-r--r--litmus/sync.c106
-rw-r--r--litmus/trace.c145
43 files changed, 4072 insertions, 427 deletions
diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h
index 70202f90f169..75e81a9f9382 100644
--- a/arch/x86/include/asm/feather_trace_32.h
+++ b/arch/x86/include/asm/feather_trace_32.h
@@ -1,12 +1,45 @@
1/* Copyright (c) 2007-2012 Björn Brandenburg, <bbb@mpi-sws.org>
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
1/* Do not directly include this file. Include feather_trace.h instead */ 24/* Do not directly include this file. Include feather_trace.h instead */
2 25
3#define feather_callback __attribute__((regparm(0))) 26#define feather_callback __attribute__((regparm(3))) __attribute__((used))
4 27
5/* 28/*
6 * make the compiler reload any register that is not saved in 29 * Make the compiler reload any register that is not saved in a cdecl function
7 * a cdecl function call 30 * call (minus the registers that we explicitly clobber as output registers).
8 */ 31 */
9#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx" 32#define __FT_CLOBBER_LIST0 "memory", "cc", "eax", "edx", "ecx"
33#define __FT_CLOBBER_LIST1 "memory", "cc", "eax", "ecx"
34#define __FT_CLOBBER_LIST2 "memory", "cc", "eax"
35#define __FT_CLOBBER_LIST3 "memory", "cc", "eax"
36
37#define __FT_TMP1(x) "=d" (x)
38#define __FT_ARG1(x) "0" ((long) (x))
39#define __FT_TMP2(x) "=c" (x)
40#define __FT_ARG2(x) "1" ((long) (x))
41
42#define __FT_ARG3(x) "r" ((long) (x))
10 43
11#define ft_event(id, callback) \ 44#define ft_event(id, callback) \
12 __asm__ __volatile__( \ 45 __asm__ __volatile__( \
@@ -16,64 +49,67 @@
16 ".long " #id ", 0, 1b, 2f \n\t" \ 49 ".long " #id ", 0, 1b, 2f \n\t" \
17 ".previous \n\t" \ 50 ".previous \n\t" \
18 "2: \n\t" \ 51 "2: \n\t" \
19 : : : CLOBBER_LIST) 52 : : : __FT_CLOBBER_LIST0)
20 53
21#define ft_event0(id, callback) \ 54#define ft_event0(id, callback) \
22 __asm__ __volatile__( \ 55 __asm__ __volatile__( \
23 "1: jmp 2f \n\t" \ 56 "1: jmp 2f \n\t" \
24 " subl $4, %%esp \n\t" \ 57 " movl $" #id ", %%eax \n\t" \
25 " movl $" #id ", (%%esp) \n\t" \
26 " call " #callback " \n\t" \ 58 " call " #callback " \n\t" \
27 " addl $4, %%esp \n\t" \
28 ".section __event_table, \"aw\" \n\t" \ 59 ".section __event_table, \"aw\" \n\t" \
29 ".long " #id ", 0, 1b, 2f \n\t" \ 60 ".long " #id ", 0, 1b, 2f \n\t" \
30 ".previous \n\t" \ 61 ".previous \n\t" \
31 "2: \n\t" \ 62 "2: \n\t" \
32 : : : CLOBBER_LIST) 63 : : : __FT_CLOBBER_LIST0)
33 64
34#define ft_event1(id, callback, param) \ 65#define ft_event1(id, callback, param) \
66 do { \
67 long __ft_tmp1; \
35 __asm__ __volatile__( \ 68 __asm__ __volatile__( \
36 "1: jmp 2f \n\t" \ 69 "1: jmp 2f \n\t" \
37 " subl $8, %%esp \n\t" \ 70 " movl $" #id ", %%eax \n\t" \
38 " movl %0, 4(%%esp) \n\t" \
39 " movl $" #id ", (%%esp) \n\t" \
40 " call " #callback " \n\t" \ 71 " call " #callback " \n\t" \
41 " addl $8, %%esp \n\t" \
42 ".section __event_table, \"aw\" \n\t" \ 72 ".section __event_table, \"aw\" \n\t" \
43 ".long " #id ", 0, 1b, 2f \n\t" \ 73 ".long " #id ", 0, 1b, 2f \n\t" \
44 ".previous \n\t" \ 74 ".previous \n\t" \
45 "2: \n\t" \ 75 "2: \n\t" \
46 : : "r" (param) : CLOBBER_LIST) 76 : __FT_TMP1(__ft_tmp1) \
77 : __FT_ARG1(param) \
78 : __FT_CLOBBER_LIST1); \
79 } while (0);
47 80
48#define ft_event2(id, callback, param, param2) \ 81#define ft_event2(id, callback, param, param2) \
82 do { \
83 long __ft_tmp1, __ft_tmp2; \
49 __asm__ __volatile__( \ 84 __asm__ __volatile__( \
50 "1: jmp 2f \n\t" \ 85 "1: jmp 2f \n\t" \
51 " subl $12, %%esp \n\t" \ 86 " movl $" #id ", %%eax \n\t" \
52 " movl %1, 8(%%esp) \n\t" \
53 " movl %0, 4(%%esp) \n\t" \
54 " movl $" #id ", (%%esp) \n\t" \
55 " call " #callback " \n\t" \ 87 " call " #callback " \n\t" \
56 " addl $12, %%esp \n\t" \
57 ".section __event_table, \"aw\" \n\t" \ 88 ".section __event_table, \"aw\" \n\t" \
58 ".long " #id ", 0, 1b, 2f \n\t" \ 89 ".long " #id ", 0, 1b, 2f \n\t" \
59 ".previous \n\t" \ 90 ".previous \n\t" \
60 "2: \n\t" \ 91 "2: \n\t" \
61 : : "r" (param), "r" (param2) : CLOBBER_LIST) 92 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \
93 : __FT_ARG1(param), __FT_ARG2(param2) \
94 : __FT_CLOBBER_LIST2); \
95 } while (0);
62 96
63 97
64#define ft_event3(id, callback, p, p2, p3) \ 98#define ft_event3(id, callback, param, param2, param3) \
99 do { \
100 long __ft_tmp1, __ft_tmp2; \
65 __asm__ __volatile__( \ 101 __asm__ __volatile__( \
66 "1: jmp 2f \n\t" \ 102 "1: jmp 2f \n\t" \
67 " subl $16, %%esp \n\t" \ 103 " subl $4, %%esp \n\t" \
68 " movl %2, 12(%%esp) \n\t" \ 104 " movl $" #id ", %%eax \n\t" \
69 " movl %1, 8(%%esp) \n\t" \ 105 " movl %2, (%%esp) \n\t" \
70 " movl %0, 4(%%esp) \n\t" \
71 " movl $" #id ", (%%esp) \n\t" \
72 " call " #callback " \n\t" \ 106 " call " #callback " \n\t" \
73 " addl $16, %%esp \n\t" \ 107 " addl $4, %%esp \n\t" \
74 ".section __event_table, \"aw\" \n\t" \ 108 ".section __event_table, \"aw\" \n\t" \
75 ".long " #id ", 0, 1b, 2f \n\t" \ 109 ".long " #id ", 0, 1b, 2f \n\t" \
76 ".previous \n\t" \ 110 ".previous \n\t" \
77 "2: \n\t" \ 111 "2: \n\t" \
78 : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST) 112 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \
79 113 : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3) \
114 : __FT_CLOBBER_LIST3); \
115 } while (0);
diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h
index 54ac2aeb3a28..5ce49e2eebba 100644
--- a/arch/x86/include/asm/feather_trace_64.h
+++ b/arch/x86/include/asm/feather_trace_64.h
@@ -1,67 +1,124 @@
1/* Copyright (c) 2010 Andrea Bastoni, <bastoni@cs.unc.edu>
2 * Copyright (c) 2012 Björn Brandenburg, <bbb@mpi-sws.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
1/* Do not directly include this file. Include feather_trace.h instead */ 25/* Do not directly include this file. Include feather_trace.h instead */
2 26
3/* regparm is the default on x86_64 */ 27/* regparm is the default on x86_64 */
4#define feather_callback 28#define feather_callback __attribute__((used))
5 29
6# define _EVENT_TABLE(id,from,to) \ 30#define __FT_EVENT_TABLE(id,from,to) \
7 ".section __event_table, \"aw\"\n\t" \ 31 ".section __event_table, \"aw\"\n\t" \
8 ".balign 8\n\t" \ 32 ".balign 8\n\t" \
9 ".quad " #id ", 0, " #from ", " #to " \n\t" \ 33 ".quad " #id ", 0, " #from ", " #to " \n\t" \
10 ".previous \n\t" 34 ".previous \n\t"
11 35
12/* 36/*
13 * x86_64 callee only owns rbp, rbx, r12 -> r15 37 * x86_64 caller only owns rbp, rbx, r12-r15;
14 * the called can freely modify the others 38 * the callee can freely modify the others.
15 */ 39 */
16#define CLOBBER_LIST "memory", "cc", "rdi", "rsi", "rdx", "rcx", \ 40#define __FT_CLOBBER_LIST0 "memory", "cc", "rdi", "rsi", "rdx", "rcx", \
41 "r8", "r9", "r10", "r11", "rax"
42
43#define __FT_CLOBBER_LIST1 "memory", "cc", "rdi", "rdx", "rcx", \
44 "r8", "r9", "r10", "r11", "rax"
45
46#define __FT_CLOBBER_LIST2 "memory", "cc", "rdi", "rcx", \
17 "r8", "r9", "r10", "r11", "rax" 47 "r8", "r9", "r10", "r11", "rax"
18 48
49#define __FT_CLOBBER_LIST3 "memory", "cc", "rdi", \
50 "r8", "r9", "r10", "r11", "rax"
51
52/* The registers RDI, RSI, RDX, RCX, R8 and R9 are used for integer and pointer
53 * arguments. */
54
55/* RSI */
56#define __FT_TMP1(x) "=S" (x)
57#define __FT_ARG1(x) "0" ((long) (x))
58
59/* RDX */
60#define __FT_TMP2(x) "=d" (x)
61#define __FT_ARG2(x) "1" ((long) (x))
62
63/* RCX */
64#define __FT_TMP3(x) "=c" (x)
65#define __FT_ARG3(x) "2" ((long) (x))
66
19#define ft_event(id, callback) \ 67#define ft_event(id, callback) \
20 __asm__ __volatile__( \ 68 __asm__ __volatile__( \
21 "1: jmp 2f \n\t" \ 69 "1: jmp 2f \n\t" \
22 " call " #callback " \n\t" \ 70 " call " #callback " \n\t" \
23 _EVENT_TABLE(id,1b,2f) \ 71 __FT_EVENT_TABLE(id,1b,2f) \
24 "2: \n\t" \ 72 "2: \n\t" \
25 : : : CLOBBER_LIST) 73 : : : __FT_CLOBBER_LIST0)
26 74
27#define ft_event0(id, callback) \ 75#define ft_event0(id, callback) \
28 __asm__ __volatile__( \ 76 __asm__ __volatile__( \
29 "1: jmp 2f \n\t" \ 77 "1: jmp 2f \n\t" \
30 " movq $" #id ", %%rdi \n\t" \ 78 " movq $" #id ", %%rdi \n\t" \
31 " call " #callback " \n\t" \ 79 " call " #callback " \n\t" \
32 _EVENT_TABLE(id,1b,2f) \ 80 __FT_EVENT_TABLE(id,1b,2f) \
33 "2: \n\t" \ 81 "2: \n\t" \
34 : : : CLOBBER_LIST) 82 : : : __FT_CLOBBER_LIST0)
35 83
36#define ft_event1(id, callback, param) \ 84#define ft_event1(id, callback, param) \
85 do { \
86 long __ft_tmp1; \
37 __asm__ __volatile__( \ 87 __asm__ __volatile__( \
38 "1: jmp 2f \n\t" \ 88 "1: jmp 2f \n\t" \
39 " movq %0, %%rsi \n\t" \
40 " movq $" #id ", %%rdi \n\t" \ 89 " movq $" #id ", %%rdi \n\t" \
41 " call " #callback " \n\t" \ 90 " call " #callback " \n\t" \
42 _EVENT_TABLE(id,1b,2f) \ 91 __FT_EVENT_TABLE(id,1b,2f) \
43 "2: \n\t" \ 92 "2: \n\t" \
44 : : "r" (param) : CLOBBER_LIST) 93 : __FT_TMP1(__ft_tmp1) \
94 : __FT_ARG1(param) \
95 : __FT_CLOBBER_LIST1); \
96 } while (0);
45 97
46#define ft_event2(id, callback, param, param2) \ 98#define ft_event2(id, callback, param, param2) \
99 do { \
100 long __ft_tmp1, __ft_tmp2; \
47 __asm__ __volatile__( \ 101 __asm__ __volatile__( \
48 "1: jmp 2f \n\t" \ 102 "1: jmp 2f \n\t" \
49 " movq %1, %%rdx \n\t" \
50 " movq %0, %%rsi \n\t" \
51 " movq $" #id ", %%rdi \n\t" \ 103 " movq $" #id ", %%rdi \n\t" \
52 " call " #callback " \n\t" \ 104 " call " #callback " \n\t" \
53 _EVENT_TABLE(id,1b,2f) \ 105 __FT_EVENT_TABLE(id,1b,2f) \
54 "2: \n\t" \ 106 "2: \n\t" \
55 : : "r" (param), "r" (param2) : CLOBBER_LIST) 107 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \
108 : __FT_ARG1(param), __FT_ARG2(param2) \
109 : __FT_CLOBBER_LIST2); \
110 } while (0);
56 111
57#define ft_event3(id, callback, p, p2, p3) \ 112#define ft_event3(id, callback, param, param2, param3) \
113 do { \
114 long __ft_tmp1, __ft_tmp2, __ft_tmp3; \
58 __asm__ __volatile__( \ 115 __asm__ __volatile__( \
59 "1: jmp 2f \n\t" \ 116 "1: jmp 2f \n\t" \
60 " movq %2, %%rcx \n\t" \
61 " movq %1, %%rdx \n\t" \
62 " movq %0, %%rsi \n\t" \
63 " movq $" #id ", %%rdi \n\t" \ 117 " movq $" #id ", %%rdi \n\t" \
64 " call " #callback " \n\t" \ 118 " call " #callback " \n\t" \
65 _EVENT_TABLE(id,1b,2f) \ 119 __FT_EVENT_TABLE(id,1b,2f) \
66 "2: \n\t" \ 120 "2: \n\t" \
67 : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST) 121 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2), __FT_TMP3(__ft_tmp3) \
122 : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3) \
123 : __FT_CLOBBER_LIST3); \
124 } while (0);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index ed4c4f54e2ae..7539d84628f7 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -25,7 +25,6 @@
25 25
26#include <litmus/preempt.h> 26#include <litmus/preempt.h>
27#include <litmus/debug_trace.h> 27#include <litmus/debug_trace.h>
28#include <litmus/trace.h>
29 28
30#include <asm/mtrr.h> 29#include <asm/mtrr.h>
31#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
@@ -122,7 +121,6 @@ static void native_smp_send_reschedule(int cpu)
122 WARN_ON(1); 121 WARN_ON(1);
123 return; 122 return;
124 } 123 }
125 TS_SEND_RESCHED_START(cpu);
126 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); 124 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
127} 125}
128 126
@@ -214,18 +212,16 @@ static void native_stop_other_cpus(int wait)
214void smp_reschedule_interrupt(struct pt_regs *regs) 212void smp_reschedule_interrupt(struct pt_regs *regs)
215{ 213{
216 ack_APIC_irq(); 214 ack_APIC_irq();
217 /* LITMUS^RT: this IPI might need to trigger the sched state machine. */
218 sched_state_ipi();
219 inc_irq_stat(irq_resched_count); 215 inc_irq_stat(irq_resched_count);
220 /*
221 * LITMUS^RT: starting from 3.0 schedule_ipi() actually does something.
222 * This may increase IPI latencies compared with previous versions.
223 */
224 scheduler_ipi(); 216 scheduler_ipi();
225 TS_SEND_RESCHED_END;
226 /* 217 /*
227 * KVM uses this interrupt to force a cpu out of guest mode 218 * KVM uses this interrupt to force a cpu out of guest mode
228 */ 219 */
220
221 /* LITMUS^RT: this IPI might need to trigger the sched state machine.
222 * Starting from 3.0 schedule_ipi() actually does something. This may
223 * increase IPI latencies compared with previous versions. */
224 sched_state_ipi();
229} 225}
230 226
231void smp_call_function_interrupt(struct pt_regs *regs) 227void smp_call_function_interrupt(struct pt_regs *regs)
@@ -251,8 +247,10 @@ extern void hrtimer_pull(void);
251void smp_pull_timers_interrupt(struct pt_regs *regs) 247void smp_pull_timers_interrupt(struct pt_regs *regs)
252{ 248{
253 ack_APIC_irq(); 249 ack_APIC_irq();
250 irq_enter();
254 TRACE("pull timer interrupt\n"); 251 TRACE("pull timer interrupt\n");
255 hrtimer_pull(); 252 hrtimer_pull();
253 irq_exit();
256} 254}
257 255
258struct smp_ops smp_ops = { 256struct smp_ops smp_ops = {
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 9d727271c9fe..51494e6b5548 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -90,7 +90,6 @@ extern bool completion_done(struct completion *x);
90 90
91extern void complete(struct completion *); 91extern void complete(struct completion *);
92extern void complete_all(struct completion *); 92extern void complete_all(struct completion *);
93extern void complete_n(struct completion *, int n);
94 93
95/** 94/**
96 * INIT_COMPLETION - reinitialize a completion structure 95 * INIT_COMPLETION - reinitialize a completion structure
diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
new file mode 100644
index 000000000000..901a30a3e296
--- /dev/null
+++ b/include/litmus/binheap.h
@@ -0,0 +1,206 @@
1#ifndef LITMUS_BINARY_HEAP_H
2#define LITMUS_BINARY_HEAP_H
3
4#include <linux/kernel.h>
5
6/**
7 * Simple binary heap with add, arbitrary delete, delete_root, and top
8 * operations.
9 *
10 * Style meant to conform with list.h.
11 *
12 * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
13 * heap may be overkill (and perhaps not general enough) for some applications.
14 *
15 * Note: In order to make node swaps fast, a node inserted with a data pointer
16 * may not always hold said data pointer. This is similar to the binomial heap
17 * implementation. This does make node deletion tricky since we have to
18 * (1) locate the node that holds the data pointer to delete, and (2) the
19 * node that was originally inserted with said data pointer. These have to be
20 * coalesced into a single node before removal (see usage of
21 * __binheap_safe_swap()). We have to track node references to accomplish this.
22 */
23
24struct binheap_node {
25 void *data;
26 struct binheap_node *parent;
27 struct binheap_node *left;
28 struct binheap_node *right;
29
30 /* pointer to binheap_node that holds *data for which this binheap_node
31 * was originally inserted. (*data "owns" this node)
32 */
33 struct binheap_node *ref;
34 struct binheap_node **ref_ptr;
35};
36
37/**
38 * Signature of compator function. Assumed 'less-than' (min-heap).
39 * Pass in 'greater-than' for max-heap.
40 *
41 * TODO: Consider macro-based implementation that allows comparator to be
42 * inlined (similar to Linux red/black tree) for greater efficiency.
43 */
44typedef int (*binheap_order_t)(struct binheap_node *a,
45 struct binheap_node *b);
46
47
48struct binheap {
49 struct binheap_node *root;
50
51 /* pointer to node to take next inserted child */
52 struct binheap_node *next;
53
54 /* pointer to last node in complete binary tree */
55 struct binheap_node *last;
56
57 /* comparator function pointer */
58 binheap_order_t compare;
59};
60
61
62/* Initialized heap nodes not in a heap have parent
63 * set to BINHEAP_POISON.
64 */
65#define BINHEAP_POISON ((void*)(0xdeadbeef))
66
67
68/**
69 * binheap_entry - get the struct for this heap node.
70 * Only valid when called upon heap nodes other than the root handle.
71 * @ptr: the heap node.
72 * @type: the type of struct pointed to by binheap_node::data.
73 * @member: unused.
74 */
75#define binheap_entry(ptr, type, member) \
76((type *)((ptr)->data))
77
78/**
79 * binheap_node_container - get the struct that contains this node.
80 * Only valid when called upon heap nodes other than the root handle.
81 * @ptr: the heap node.
82 * @type: the type of struct the node is embedded in.
83 * @member: the name of the binheap_struct within the (type) struct.
84 */
85#define binheap_node_container(ptr, type, member) \
86container_of((ptr), type, member)
87
88/**
89 * binheap_top_entry - get the struct for the node at the top of the heap.
90 * Only valid when called upon the heap handle node.
91 * @ptr: the special heap-handle node.
92 * @type: the type of the struct the head is embedded in.
93 * @member: the name of the binheap_struct within the (type) struct.
94 */
95#define binheap_top_entry(ptr, type, member) \
96binheap_entry((ptr)->root, type, member)
97
98/**
99 * binheap_delete_root - remove the root element from the heap.
100 * @handle: handle to the heap.
101 * @type: the type of the struct the head is embedded in.
102 * @member: the name of the binheap_struct within the (type) struct.
103 */
104#define binheap_delete_root(handle, type, member) \
105__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
106
107/**
108 * binheap_delete - remove an arbitrary element from the heap.
109 * @to_delete: pointer to node to be removed.
110 * @handle: handle to the heap.
111 */
112#define binheap_delete(to_delete, handle) \
113__binheap_delete((to_delete), (handle))
114
115/**
116 * binheap_add - insert an element to the heap
117 * new_node: node to add.
118 * @handle: handle to the heap.
119 * @type: the type of the struct the head is embedded in.
120 * @member: the name of the binheap_struct within the (type) struct.
121 */
122#define binheap_add(new_node, handle, type, member) \
123__binheap_add((new_node), (handle), container_of((new_node), type, member))
124
125/**
126 * binheap_decrease - re-eval the position of a node (based upon its
127 * original data pointer).
128 * @handle: handle to the heap.
129 * @orig_node: node that was associated with the data pointer
130 * (whose value has changed) when said pointer was
131 * added to the heap.
132 */
133#define binheap_decrease(orig_node, handle) \
134__binheap_decrease((orig_node), (handle))
135
136#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
137
138#define BINHEAP_NODE(name) \
139 struct binheap_node name = BINHEAP_NODE_INIT()
140
141
142static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
143{
144 n->data = NULL;
145 n->parent = BINHEAP_POISON;
146 n->left = NULL;
147 n->right = NULL;
148 n->ref = NULL;
149 n->ref_ptr = NULL;
150}
151
152static inline void INIT_BINHEAP_HANDLE(struct binheap *handle,
153 binheap_order_t compare)
154{
155 handle->root = NULL;
156 handle->next = NULL;
157 handle->last = NULL;
158 handle->compare = compare;
159}
160
161/* Returns true if binheap is empty. */
162static inline int binheap_empty(struct binheap *handle)
163{
164 return(handle->root == NULL);
165}
166
167/* Returns true if binheap node is in a heap. */
168static inline int binheap_is_in_heap(struct binheap_node *node)
169{
170 return (node->parent != BINHEAP_POISON);
171}
172
173/* Returns true if binheap node is in given heap. */
174int binheap_is_in_this_heap(struct binheap_node *node, struct binheap* heap);
175
176/* Add a node to a heap */
177void __binheap_add(struct binheap_node *new_node,
178 struct binheap *handle,
179 void *data);
180
181/**
182 * Removes the root node from the heap. The node is removed after coalescing
183 * the binheap_node with its original data pointer at the root of the tree.
184 *
185 * The 'last' node in the tree is then swapped up to the root and bubbled
186 * down.
187 */
188void __binheap_delete_root(struct binheap *handle,
189 struct binheap_node *container);
190
191/**
192 * Delete an arbitrary node. Bubble node to delete up to the root,
193 * and then delete to root.
194 */
195void __binheap_delete(struct binheap_node *node_to_delete,
196 struct binheap *handle);
197
198/**
199 * Bubble up a node whose pointer has decreased in value.
200 */
201void __binheap_decrease(struct binheap_node *orig_node,
202 struct binheap *handle);
203
204
205#endif
206
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
index 732530e63491..33344ee8d5f9 100644
--- a/include/litmus/budget.h
+++ b/include/litmus/budget.h
@@ -5,4 +5,31 @@
5 * the next task. */ 5 * the next task. */
6void update_enforcement_timer(struct task_struct* t); 6void update_enforcement_timer(struct task_struct* t);
7 7
8inline static int budget_exhausted(struct task_struct* t)
9{
10 return get_exec_time(t) >= get_exec_cost(t);
11}
12
13inline static lt_t budget_remaining(struct task_struct* t)
14{
15 if (!budget_exhausted(t))
16 return get_exec_cost(t) - get_exec_time(t);
17 else
18 /* avoid overflow */
19 return 0;
20}
21
22#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
23
24#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
25 == PRECISE_ENFORCEMENT)
26
27static inline int requeue_preempted_job(struct task_struct* t)
28{
29 /* Add task to ready queue only if not subject to budget enforcement or
30 * if the job has budget remaining. t may be NULL.
31 */
32 return t && (!budget_exhausted(t) || !budget_enforced(t));
33}
34
8#endif 35#endif
diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h
index 48d086d5a44c..1266ac6a760c 100644
--- a/include/litmus/debug_trace.h
+++ b/include/litmus/debug_trace.h
@@ -28,8 +28,11 @@ extern atomic_t __log_seq_no;
28 TRACE_ARGS, ## args) 28 TRACE_ARGS, ## args)
29 29
30#define TRACE_TASK(t, fmt, args...) \ 30#define TRACE_TASK(t, fmt, args...) \
31 TRACE("(%s/%d:%d) " fmt, (t)->comm, (t)->pid, \ 31 TRACE("(%s/%d:%d) " fmt, \
32 (t)->rt_param.job_params.job_no, ##args) 32 t ? (t)->comm : "null", \
33 t ? (t)->pid : 0, \
34 t ? (t)->rt_param.job_params.job_no : 0, \
35 ##args)
33 36
34#define TRACE_CUR(fmt, args...) \ 37#define TRACE_CUR(fmt, args...) \
35 TRACE_TASK(current, fmt, ## args) 38 TRACE_TASK(current, fmt, ## args)
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index caf2a1e6918c..f2115b83f1e4 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -12,7 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14 14
15#define MAX_OBJECT_DESCRIPTORS 32 15#define MAX_OBJECT_DESCRIPTORS 85
16 16
17typedef enum { 17typedef enum {
18 MIN_OBJ_TYPE = 0, 18 MIN_OBJ_TYPE = 0,
@@ -20,7 +20,13 @@ typedef enum {
20 FMLP_SEM = 0, 20 FMLP_SEM = 0,
21 SRP_SEM = 1, 21 SRP_SEM = 1,
22 22
23 MAX_OBJ_TYPE = 1 23 MPCP_SEM = 2,
24 MPCP_VS_SEM = 3,
25 DPCP_SEM = 4,
26
27 PCP_SEM = 5,
28
29 MAX_OBJ_TYPE = 5
24} obj_type_t; 30} obj_type_t;
25 31
26struct inode_obj_id { 32struct inode_obj_id {
diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h
new file mode 100644
index 000000000000..19356c0fa6c1
--- /dev/null
+++ b/include/litmus/fp_common.h
@@ -0,0 +1,105 @@
1/* Fixed-priority scheduler support.
2 */
3
4#ifndef __FP_COMMON_H__
5#define __FP_COMMON_H__
6
7#include <litmus/rt_domain.h>
8
9#include <asm/bitops.h>
10
11
12void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
13 release_jobs_t release);
14
15int fp_higher_prio(struct task_struct* first,
16 struct task_struct* second);
17
18int fp_ready_order(struct bheap_node* a, struct bheap_node* b);
19
20#define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG)
21
22#if (LITMUS_MAX_PRIORITY % BITS_PER_LONG)
23#error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG
24#endif
25
26/* bitmask-inexed priority queue */
27struct fp_prio_queue {
28 unsigned long bitmask[FP_PRIO_BIT_WORDS];
29 struct bheap queue[LITMUS_MAX_PRIORITY];
30};
31
32void fp_prio_queue_init(struct fp_prio_queue* q);
33
34static inline void fpq_set(struct fp_prio_queue* q, unsigned int index)
35{
36 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
37 __set_bit(index % BITS_PER_LONG, word);
38}
39
40static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index)
41{
42 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
43 __clear_bit(index % BITS_PER_LONG, word);
44}
45
46static inline unsigned int fpq_find(struct fp_prio_queue* q)
47{
48 int i;
49
50 /* loop optimizer should unroll this */
51 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
52 if (q->bitmask[i])
53 return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
54
55 return LITMUS_MAX_PRIORITY; /* nothing found */
56}
57
58static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
59{
60 BUG_ON(index >= LITMUS_MAX_PRIORITY);
61 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
62
63 fpq_set(q, index);
64 bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
65}
66
67static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
68{
69 BUG_ON(!is_queued(t));
70
71 bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
72 if (likely(bheap_empty(&q->queue[index])))
73 fpq_clear(q, index);
74}
75
76static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q)
77{
78 unsigned int idx = fpq_find(q);
79 struct bheap_node* hn;
80
81 if (idx < LITMUS_MAX_PRIORITY) {
82 hn = bheap_peek(fp_ready_order, &q->queue[idx]);
83 return bheap2task(hn);
84 } else
85 return NULL;
86}
87
88static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q)
89{
90 unsigned int idx = fpq_find(q);
91 struct bheap_node* hn;
92
93 if (idx < LITMUS_MAX_PRIORITY) {
94 hn = bheap_take(fp_ready_order, &q->queue[idx]);
95 if (likely(bheap_empty(&q->queue[idx])))
96 fpq_clear(q, idx);
97 return bheap2task(hn);
98 } else
99 return NULL;
100}
101
102int fp_preemption_needed(struct fp_prio_queue* q, struct task_struct *t);
103
104
105#endif
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..642de98542c8
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,147 @@
1#ifndef __FP_MATH_H__
2#define __FP_MATH_H__
3
4#include <linux/math64.h>
5
6#ifndef __KERNEL__
7#include <stdint.h>
8#define abs(x) (((x) < 0) ? -(x) : x)
9#endif
10
11// Use 64-bit because we want to track things at the nanosecond scale.
12// This can lead to very large numbers.
13typedef int64_t fpbuf_t;
14typedef struct
15{
16 fpbuf_t val;
17} fp_t;
18
19#define FP_SHIFT 10
20#define ROUND_BIT (FP_SHIFT - 1)
21
22#define _fp(x) ((fp_t) {x})
23
24#ifdef __KERNEL__
25static const fp_t LITMUS_FP_ZERO = {.val = 0};
26static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
27#endif
28
29static inline fp_t FP(fpbuf_t x)
30{
31 return _fp(((fpbuf_t) x) << FP_SHIFT);
32}
33
34/* divide two integers to obtain a fixed point value */
35static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
36{
37 return _fp(div64_s64(FP(a).val, (b)));
38}
39
40static inline fpbuf_t _point(fp_t x)
41{
42 return (x.val % (1 << FP_SHIFT));
43
44}
45
46#define fp2str(x) x.val
47/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
48#define _FP_ "%ld/1024"
49
50static inline fpbuf_t _floor(fp_t x)
51{
52 return x.val >> FP_SHIFT;
53}
54
55/* FIXME: negative rounding */
56static inline fpbuf_t _round(fp_t x)
57{
58 return _floor(x) + ((x.val >> ROUND_BIT) & 1);
59}
60
61/* multiply two fixed point values */
62static inline fp_t _mul(fp_t a, fp_t b)
63{
64 return _fp((a.val * b.val) >> FP_SHIFT);
65}
66
67static inline fp_t _div(fp_t a, fp_t b)
68{
69#if !defined(__KERNEL__) && !defined(unlikely)
70#define unlikely(x) (x)
71#define DO_UNDEF_UNLIKELY
72#endif
73 /* try not to overflow */
74 if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
75 return _fp((a.val / b.val) << FP_SHIFT);
76 else
77 return _fp((a.val << FP_SHIFT) / b.val);
78#ifdef DO_UNDEF_UNLIKELY
79#undef unlikely
80#undef DO_UNDEF_UNLIKELY
81#endif
82}
83
84static inline fp_t _add(fp_t a, fp_t b)
85{
86 return _fp(a.val + b.val);
87}
88
89static inline fp_t _sub(fp_t a, fp_t b)
90{
91 return _fp(a.val - b.val);
92}
93
94static inline fp_t _neg(fp_t x)
95{
96 return _fp(-x.val);
97}
98
99static inline fp_t _abs(fp_t x)
100{
101 return _fp(abs(x.val));
102}
103
104/* works the same as casting float/double to integer */
105static inline fpbuf_t _fp_to_integer(fp_t x)
106{
107 return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
108}
109
110static inline fp_t _integer_to_fp(fpbuf_t x)
111{
112 return _frac(x,1);
113}
114
115static inline int _leq(fp_t a, fp_t b)
116{
117 return a.val <= b.val;
118}
119
120static inline int _geq(fp_t a, fp_t b)
121{
122 return a.val >= b.val;
123}
124
125static inline int _lt(fp_t a, fp_t b)
126{
127 return a.val < b.val;
128}
129
130static inline int _gt(fp_t a, fp_t b)
131{
132 return a.val > b.val;
133}
134
135static inline int _eq(fp_t a, fp_t b)
136{
137 return a.val == b.val;
138}
139
140static inline fp_t _max(fp_t a, fp_t b)
141{
142 if (a.val < b.val)
143 return b;
144 else
145 return a;
146}
147#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 0b071fd359f9..875783e6a67b 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -45,38 +45,23 @@ void litmus_exit_task(struct task_struct *tsk);
45#define tsk_rt(t) (&(t)->rt_param) 45#define tsk_rt(t) (&(t)->rt_param)
46 46
47/* Realtime utility macros */ 47/* Realtime utility macros */
48#define get_rt_flags(t) (tsk_rt(t)->flags) 48#define is_priority_boosted(t) (tsk_rt(t)->priority_boosted)
49#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f)) 49#define get_boost_start(t) (tsk_rt(t)->boost_start_time)
50
51/* task_params macros */
50#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost) 52#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost)
51#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
52#define get_rt_period(t) (tsk_rt(t)->task_params.period) 53#define get_rt_period(t) (tsk_rt(t)->task_params.period)
54#define get_rt_relative_deadline(t) (tsk_rt(t)->task_params.relative_deadline)
53#define get_rt_phase(t) (tsk_rt(t)->task_params.phase) 55#define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
54#define get_partition(t) (tsk_rt(t)->task_params.cpu) 56#define get_partition(t) (tsk_rt(t)->task_params.cpu)
57#define get_priority(t) (tsk_rt(t)->task_params.priority)
58#define get_class(t) (tsk_rt(t)->task_params.cls)
59
60/* job_param macros */
61#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
55#define get_deadline(t) (tsk_rt(t)->job_params.deadline) 62#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
56#define get_release(t) (tsk_rt(t)->job_params.release) 63#define get_release(t) (tsk_rt(t)->job_params.release)
57#define get_class(t) (tsk_rt(t)->task_params.cls) 64#define get_lateness(t) (tsk_rt(t)->job_params.lateness)
58
59#define is_priority_boosted(t) (tsk_rt(t)->priority_boosted)
60#define get_boost_start(t) (tsk_rt(t)->boost_start_time)
61
62inline static int budget_exhausted(struct task_struct* t)
63{
64 return get_exec_time(t) >= get_exec_cost(t);
65}
66
67inline static lt_t budget_remaining(struct task_struct* t)
68{
69 if (!budget_exhausted(t))
70 return get_exec_cost(t) - get_exec_time(t);
71 else
72 /* avoid overflow */
73 return 0;
74}
75
76#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
77
78#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
79 == PRECISE_ENFORCEMENT)
80 65
81#define is_hrt(t) \ 66#define is_hrt(t) \
82 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD) 67 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
@@ -245,6 +230,11 @@ static inline int is_present(struct task_struct* t)
245 return t && tsk_rt(t)->present; 230 return t && tsk_rt(t)->present;
246} 231}
247 232
233static inline int is_completed(struct task_struct* t)
234{
235 return t && tsk_rt(t)->completed;
236}
237
248 238
249/* make the unit explicit */ 239/* make the unit explicit */
250typedef unsigned long quanta_t; 240typedef unsigned long quanta_t;
@@ -272,4 +262,39 @@ static inline quanta_t time2quanta(lt_t time, enum round round)
272/* By how much is cpu staggered behind CPU 0? */ 262/* By how much is cpu staggered behind CPU 0? */
273u64 cpu_stagger_offset(int cpu); 263u64 cpu_stagger_offset(int cpu);
274 264
265static inline struct control_page* get_control_page(struct task_struct *t)
266{
267 return tsk_rt(t)->ctrl_page;
268}
269
270static inline int has_control_page(struct task_struct* t)
271{
272 return tsk_rt(t)->ctrl_page != NULL;
273}
274
275
276#ifdef CONFIG_SCHED_OVERHEAD_TRACE
277
278#define TS_SYSCALL_IN_START \
279 if (has_control_page(current)) { \
280 __TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start); \
281 }
282
283#define TS_SYSCALL_IN_END \
284 if (has_control_page(current)) { \
285 uint64_t irqs; \
286 local_irq_disable(); \
287 irqs = get_control_page(current)->irq_count - \
288 get_control_page(current)->irq_syscall_start; \
289 __TS_SYSCALL_IN_END(&irqs); \
290 local_irq_enable(); \
291 }
292
293#else
294
295#define TS_SYSCALL_IN_START
296#define TS_SYSCALL_IN_END
297
298#endif
299
275#endif 300#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index d6d799174160..4cd06dd32906 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -33,22 +33,44 @@ typedef enum {
33 PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */ 33 PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */
34} budget_policy_t; 34} budget_policy_t;
35 35
36/* We use the common priority interpretation "lower index == higher priority",
37 * which is commonly used in fixed-priority schedulability analysis papers.
38 * So, a numerically lower priority value implies higher scheduling priority,
39 * with priority 1 being the highest priority. Priority 0 is reserved for
40 * priority boosting. LITMUS_MAX_PRIORITY denotes the maximum priority value
41 * range.
42 */
43
44#define LITMUS_MAX_PRIORITY 512
45#define LITMUS_HIGHEST_PRIORITY 1
46#define LITMUS_LOWEST_PRIORITY (LITMUS_MAX_PRIORITY - 1)
47
48/* Provide generic comparison macros for userspace,
49 * in case that we change this later. */
50#define litmus_higher_fixed_prio(a, b) (a < b)
51#define litmus_lower_fixed_prio(a, b) (a > b)
52#define litmus_is_valid_fixed_prio(p) \
53 ((p) >= LITMUS_HIGHEST_PRIORITY && \
54 (p) <= LITMUS_LOWEST_PRIORITY)
55
36struct rt_task { 56struct rt_task {
37 lt_t exec_cost; 57 lt_t exec_cost;
38 lt_t period; 58 lt_t period;
59 lt_t relative_deadline;
39 lt_t phase; 60 lt_t phase;
40 unsigned int cpu; 61 unsigned int cpu;
62 unsigned int priority;
41 task_class_t cls; 63 task_class_t cls;
42 budget_policy_t budget_policy; /* ignored by pfair */ 64 budget_policy_t budget_policy; /* ignored by pfair */
43}; 65};
44 66
45union np_flag { 67union np_flag {
46 uint32_t raw; 68 uint64_t raw;
47 struct { 69 struct {
48 /* Is the task currently in a non-preemptive section? */ 70 /* Is the task currently in a non-preemptive section? */
49 uint32_t flag:31; 71 uint64_t flag:31;
50 /* Should the task call into the scheduler? */ 72 /* Should the task call into the scheduler? */
51 uint32_t preempt:1; 73 uint64_t preempt:1;
52 } np; 74 } np;
53}; 75};
54 76
@@ -67,11 +89,29 @@ union np_flag {
67 * determining preemption/migration overheads). 89 * determining preemption/migration overheads).
68 */ 90 */
69struct control_page { 91struct control_page {
92 /* This flag is used by userspace to communicate non-preempive
93 * sections. */
70 volatile union np_flag sched; 94 volatile union np_flag sched;
71 95
96 volatile uint64_t irq_count; /* Incremented by the kernel each time an IRQ is
97 * handled. */
98
99 /* Locking overhead tracing: userspace records here the time stamp
100 * and IRQ counter prior to starting the system call. */
101 uint64_t ts_syscall_start; /* Feather-Trace cycles */
102 uint64_t irq_syscall_start; /* Snapshot of irq_count when the syscall
103 * started. */
104
72 /* to be extended */ 105 /* to be extended */
73}; 106};
74 107
108/* Expected offsets within the control page. */
109
110#define LITMUS_CP_OFFSET_SCHED 0
111#define LITMUS_CP_OFFSET_IRQ_COUNT 8
112#define LITMUS_CP_OFFSET_TS_SC_START 16
113#define LITMUS_CP_OFFSET_IRQ_SC_START 24
114
75/* don't export internal data structures to user space (liblitmus) */ 115/* don't export internal data structures to user space (liblitmus) */
76#ifdef __KERNEL__ 116#ifdef __KERNEL__
77 117
@@ -88,6 +128,12 @@ struct rt_job {
88 /* How much service has this job received so far? */ 128 /* How much service has this job received so far? */
89 lt_t exec_time; 129 lt_t exec_time;
90 130
131 /* By how much did the prior job miss its deadline by?
132 * Value differs from tardiness in that lateness may
133 * be negative (when job finishes before its deadline).
134 */
135 long long lateness;
136
91 /* Which job is this. This is used to let user space 137 /* Which job is this. This is used to let user space
92 * specify which job to wait for, which is important if jobs 138 * specify which job to wait for, which is important if jobs
93 * overrun. If we just call sys_sleep_next_period() then we 139 * overrun. If we just call sys_sleep_next_period() then we
@@ -114,6 +160,9 @@ struct rt_param {
114 /* is the task present? (true if it can be scheduled) */ 160 /* is the task present? (true if it can be scheduled) */
115 unsigned int present:1; 161 unsigned int present:1;
116 162
163 /* has the task completed? */
164 unsigned int completed:1;
165
117#ifdef CONFIG_LITMUS_LOCKING 166#ifdef CONFIG_LITMUS_LOCKING
118 /* Is the task being priority-boosted by a locking protocol? */ 167 /* Is the task being priority-boosted by a locking protocol? */
119 unsigned int priority_boosted:1; 168 unsigned int priority_boosted:1;
@@ -199,11 +248,6 @@ struct rt_param {
199 struct control_page * ctrl_page; 248 struct control_page * ctrl_page;
200}; 249};
201 250
202/* Possible RT flags */
203#define RT_F_RUNNING 0x00000000
204#define RT_F_SLEEP 0x00000001
205#define RT_F_EXIT_SEM 0x00000008
206
207#endif 251#endif
208 252
209#endif 253#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 6e7cabdddae8..1546ab7f1d66 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -53,10 +53,12 @@ typedef void (*task_block_t) (struct task_struct *task);
53 */ 53 */
54typedef void (*task_exit_t) (struct task_struct *); 54typedef void (*task_exit_t) (struct task_struct *);
55 55
56#ifdef CONFIG_LITMUS_LOCKING
56/* Called when the current task attempts to create a new lock of a given 57/* Called when the current task attempts to create a new lock of a given
57 * protocol type. */ 58 * protocol type. */
58typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type, 59typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
59 void* __user config); 60 void* __user config);
61#endif
60 62
61 63
62/********************* sys call backends ********************/ 64/********************* sys call backends ********************/
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 7ca34cb13881..82bde8241298 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -164,34 +164,93 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
164 164
165#endif 165#endif
166 166
167#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
168
169#include <trace/events/litmus.h>
170
171#else
172
173/* Override trace macros to actually do nothing */
174#define trace_litmus_task_param(t)
175#define trace_litmus_task_release(t)
176#define trace_litmus_switch_to(t)
177#define trace_litmus_switch_away(prev)
178#define trace_litmus_task_completion(t, forced)
179#define trace_litmus_task_block(t)
180#define trace_litmus_task_resume(t)
181#define trace_litmus_sys_release(start)
182
183#endif
184
167 185
168#define SCHED_TRACE_BASE_ID 500 186#define SCHED_TRACE_BASE_ID 500
169 187
170 188
171#define sched_trace_task_name(t) \ 189#define sched_trace_task_name(t) \
172 SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, do_sched_trace_task_name, t) 190 SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, \
173#define sched_trace_task_param(t) \ 191 do_sched_trace_task_name, t)
174 SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, do_sched_trace_task_param, t) 192
175#define sched_trace_task_release(t) \ 193#define sched_trace_task_param(t) \
176 SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, do_sched_trace_task_release, t) 194 do { \
177#define sched_trace_task_switch_to(t) \ 195 SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, \
178 SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, do_sched_trace_task_switch_to, t) 196 do_sched_trace_task_param, t); \
179#define sched_trace_task_switch_away(t) \ 197 trace_litmus_task_param(t); \
180 SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, do_sched_trace_task_switch_away, t) 198 } while (0)
181#define sched_trace_task_completion(t, forced) \ 199
182 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, do_sched_trace_task_completion, t, \ 200#define sched_trace_task_release(t) \
183 (unsigned long) forced) 201 do { \
184#define sched_trace_task_block(t) \ 202 SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, \
185 SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, do_sched_trace_task_block, t) 203 do_sched_trace_task_release, t); \
186#define sched_trace_task_resume(t) \ 204 trace_litmus_task_release(t); \
187 SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, do_sched_trace_task_resume, t) 205 } while (0)
188#define sched_trace_action(t, action) \ 206
189 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 9, do_sched_trace_action, t, \ 207#define sched_trace_task_switch_to(t) \
190 (unsigned long) action); 208 do { \
191/* when is a pointer, it does not need an explicit cast to unsigned long */ 209 SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, \
192#define sched_trace_sys_release(when) \ 210 do_sched_trace_task_switch_to, t); \
193 SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when) 211 trace_litmus_switch_to(t); \
212 } while (0)
213
214#define sched_trace_task_switch_away(t) \
215 do { \
216 SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, \
217 do_sched_trace_task_switch_away, t); \
218 trace_litmus_switch_away(t); \
219 } while (0)
220
221#define sched_trace_task_completion(t, forced) \
222 do { \
223 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, \
224 do_sched_trace_task_completion, t, \
225 (unsigned long) forced); \
226 trace_litmus_task_completion(t, forced); \
227 } while (0)
228
229#define sched_trace_task_block(t) \
230 do { \
231 SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, \
232 do_sched_trace_task_block, t); \
233 trace_litmus_task_block(t); \
234 } while (0)
235
236#define sched_trace_task_resume(t) \
237 do { \
238 SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, \
239 do_sched_trace_task_resume, t); \
240 trace_litmus_task_resume(t); \
241 } while (0)
242
243#define sched_trace_action(t, action) \
244 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 9, \
245 do_sched_trace_action, t, (unsigned long) action);
194 246
247/* when is a pointer, it does not need an explicit cast to unsigned long */
248#define sched_trace_sys_release(when) \
249 do { \
250 SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, \
251 do_sched_trace_sys_release, when); \
252 trace_litmus_sys_release(when); \
253 } while (0)
195 254
196#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ 255#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
197 256
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index e809376d6487..8ad4966c602e 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -3,6 +3,7 @@
3 3
4#ifdef CONFIG_SCHED_OVERHEAD_TRACE 4#ifdef CONFIG_SCHED_OVERHEAD_TRACE
5 5
6
6#include <litmus/feather_trace.h> 7#include <litmus/feather_trace.h>
7#include <litmus/feather_buffer.h> 8#include <litmus/feather_buffer.h>
8 9
@@ -16,7 +17,8 @@ enum task_type_marker {
16}; 17};
17 18
18struct timestamp { 19struct timestamp {
19 uint64_t timestamp; 20 uint64_t timestamp:48;
21 uint64_t pid:16;
20 uint32_t seq_no; 22 uint32_t seq_no;
21 uint8_t cpu; 23 uint8_t cpu;
22 uint8_t event; 24 uint8_t event;
@@ -31,11 +33,16 @@ feather_callback void save_timestamp_def(unsigned long event, unsigned long type
31feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr); 33feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr);
32feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu); 34feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu);
33feather_callback void save_task_latency(unsigned long event, unsigned long when_ptr); 35feather_callback void save_task_latency(unsigned long event, unsigned long when_ptr);
36feather_callback void save_timestamp_time(unsigned long event, unsigned long time_ptr);
37feather_callback void save_timestamp_irq(unsigned long event, unsigned long irq_count_ptr);
38feather_callback void save_timestamp_hide_irq(unsigned long event);
34 39
35#define TIMESTAMP(id) ft_event0(id, save_timestamp) 40#define TIMESTAMP(id) ft_event0(id, save_timestamp)
36 41
37#define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def) 42#define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def)
38 43
44#define TIMESTAMP_CUR(id) DTIMESTAMP(id, is_realtime(current) ? TSK_RT : TSK_BE)
45
39#define TTIMESTAMP(id, task) \ 46#define TTIMESTAMP(id, task) \
40 ft_event1(id, save_timestamp_task, (unsigned long) task) 47 ft_event1(id, save_timestamp_task, (unsigned long) task)
41 48
@@ -45,18 +52,35 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
45#define LTIMESTAMP(id, task) \ 52#define LTIMESTAMP(id, task) \
46 ft_event1(id, save_task_latency, (unsigned long) task) 53 ft_event1(id, save_task_latency, (unsigned long) task)
47 54
55#define TIMESTAMP_TIME(id, time_ptr) \
56 ft_event1(id, save_timestamp_time, (unsigned long) time_ptr)
57
58#define TIMESTAMP_IRQ(id, irq_count_ptr) \
59 ft_event1(id, save_timestamp_irq, (unsigned long) irq_count_ptr)
60
61#define TIMESTAMP_IN_IRQ(id) \
62 ft_event0(id, save_timestamp_hide_irq)
63
48#else /* !CONFIG_SCHED_OVERHEAD_TRACE */ 64#else /* !CONFIG_SCHED_OVERHEAD_TRACE */
49 65
50#define TIMESTAMP(id) /* no tracing */ 66#define TIMESTAMP(id) /* no tracing */
51 67
52#define DTIMESTAMP(id, def) /* no tracing */ 68#define DTIMESTAMP(id, def) /* no tracing */
53 69
70#define TIMESTAMP_CUR(id) /* no tracing */
71
54#define TTIMESTAMP(id, task) /* no tracing */ 72#define TTIMESTAMP(id, task) /* no tracing */
55 73
56#define CTIMESTAMP(id, cpu) /* no tracing */ 74#define CTIMESTAMP(id, cpu) /* no tracing */
57 75
58#define LTIMESTAMP(id, when_ptr) /* no tracing */ 76#define LTIMESTAMP(id, when_ptr) /* no tracing */
59 77
78#define TIMESTAMP_TIME(id, time_ptr) /* no tracing */
79
80#define TIMESTAMP_IRQ(id, irq_count_ptr) /* no tracing */
81
82#define TIMESTAMP_IN_IRQ(id) /* no tracing */
83
60#endif 84#endif
61 85
62 86
@@ -68,7 +92,20 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
68 * always the next number after the start time event id. 92 * always the next number after the start time event id.
69 */ 93 */
70 94
95#define __TS_SYSCALL_IN_START(p) TIMESTAMP_TIME(10, p)
96#define __TS_SYSCALL_IN_END(p) TIMESTAMP_IRQ(11, p)
97
98#define TS_SYSCALL_OUT_START TIMESTAMP_CUR(20)
99#define TS_SYSCALL_OUT_END TIMESTAMP_CUR(21)
100
101#define TS_LOCK_START TIMESTAMP_CUR(30)
102#define TS_LOCK_END TIMESTAMP_CUR(31)
71 103
104#define TS_LOCK_SUSPEND TIMESTAMP_CUR(38)
105#define TS_LOCK_RESUME TIMESTAMP_CUR(39)
106
107#define TS_UNLOCK_START TIMESTAMP_CUR(40)
108#define TS_UNLOCK_END TIMESTAMP_CUR(41)
72 109
73#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only 110#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only
74 * care 111 * care
@@ -100,16 +137,8 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
100#define TS_EXIT_NP_START TIMESTAMP(150) 137#define TS_EXIT_NP_START TIMESTAMP(150)
101#define TS_EXIT_NP_END TIMESTAMP(151) 138#define TS_EXIT_NP_END TIMESTAMP(151)
102 139
103#define TS_LOCK_START TIMESTAMP(170)
104#define TS_LOCK_SUSPEND TIMESTAMP(171)
105#define TS_LOCK_RESUME TIMESTAMP(172)
106#define TS_LOCK_END TIMESTAMP(173)
107
108#define TS_UNLOCK_START TIMESTAMP(180)
109#define TS_UNLOCK_END TIMESTAMP(181)
110
111#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) 140#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c)
112#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) 141#define TS_SEND_RESCHED_END TIMESTAMP_IN_IRQ(191)
113 142
114#define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when)) 143#define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when))
115 144
diff --git a/include/litmus/trace_irq.h b/include/litmus/trace_irq.h
index f18b127a089d..0d0c042ba9c3 100644
--- a/include/litmus/trace_irq.h
+++ b/include/litmus/trace_irq.h
@@ -3,14 +3,7 @@
3 3
4#ifdef CONFIG_SCHED_OVERHEAD_TRACE 4#ifdef CONFIG_SCHED_OVERHEAD_TRACE
5 5
6extern DEFINE_PER_CPU(atomic_t, irq_fired_count); 6void ft_irq_fired(void);
7
8static inline void ft_irq_fired(void)
9{
10 /* Only called with preemptions disabled. */
11 atomic_inc(&__get_cpu_var(irq_fired_count));
12}
13
14 7
15#else 8#else
16 9
diff --git a/include/litmus/wait.h b/include/litmus/wait.h
new file mode 100644
index 000000000000..ce1347c355f8
--- /dev/null
+++ b/include/litmus/wait.h
@@ -0,0 +1,57 @@
1#ifndef _LITMUS_WAIT_H_
2#define _LITMUS_WAIT_H_
3
4struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
5
6/* wrap regular wait_queue_t head */
7struct __prio_wait_queue {
8 wait_queue_t wq;
9
10 /* some priority point */
11 lt_t priority;
12 /* break ties in priority by lower tie_breaker */
13 unsigned int tie_breaker;
14};
15
16typedef struct __prio_wait_queue prio_wait_queue_t;
17
18static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq,
19 struct task_struct* t,
20 lt_t priority)
21{
22 init_waitqueue_entry(&pwq->wq, t);
23 pwq->priority = priority;
24 pwq->tie_breaker = 0;
25}
26
27static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq,
28 struct task_struct* t,
29 lt_t priority,
30 unsigned int tie_breaker)
31{
32 init_waitqueue_entry(&pwq->wq, t);
33 pwq->priority = priority;
34 pwq->tie_breaker = tie_breaker;
35}
36
37unsigned int __add_wait_queue_prio_exclusive(
38 wait_queue_head_t* head,
39 prio_wait_queue_t *new);
40
41static inline unsigned int add_wait_queue_prio_exclusive(
42 wait_queue_head_t* head,
43 prio_wait_queue_t *new)
44{
45 unsigned long flags;
46 unsigned int passed;
47
48 spin_lock_irqsave(&head->lock, flags);
49 passed = __add_wait_queue_prio_exclusive(head, new);
50
51 spin_unlock_irqrestore(&head->lock, flags);
52
53 return passed;
54}
55
56
57#endif
diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h
new file mode 100644
index 000000000000..0fffcee02be0
--- /dev/null
+++ b/include/trace/events/litmus.h
@@ -0,0 +1,231 @@
1/*
2 * LITMUS^RT kernel style scheduling tracepoints
3 */
4#undef TRACE_SYSTEM
5#define TRACE_SYSTEM litmus
6
7#if !defined(_SCHED_TASK_TRACEPOINT_H) || defined(TRACE_HEADER_MULTI_READ)
8#define _SCHED_TASK_TRACEPOINT_H
9
10#include <linux/tracepoint.h>
11
12#include <litmus/litmus.h>
13#include <litmus/rt_param.h>
14
15/*
16 * Tracing task admission
17 */
18TRACE_EVENT(litmus_task_param,
19
20 TP_PROTO(struct task_struct *t),
21
22 TP_ARGS(t),
23
24 TP_STRUCT__entry(
25 __field( pid_t, pid )
26 __field( unsigned int, job )
27 __field( lt_t, wcet )
28 __field( lt_t, period )
29 __field( lt_t, phase )
30 __field( int, partition )
31 ),
32
33 TP_fast_assign(
34 __entry->pid = t ? t->pid : 0;
35 __entry->job = t ? t->rt_param.job_params.job_no : 0;
36 __entry->wcet = get_exec_cost(t);
37 __entry->period = get_rt_period(t);
38 __entry->phase = get_rt_phase(t);
39 __entry->partition = get_partition(t);
40 ),
41
42 TP_printk("period(%d, %Lu).\nwcet(%d, %Lu).\n",
43 __entry->pid, __entry->period,
44 __entry->pid, __entry->wcet)
45);
46
47/*
48 * Tracing jobs release
49 */
50TRACE_EVENT(litmus_task_release,
51
52 TP_PROTO(struct task_struct *t),
53
54 TP_ARGS(t),
55
56 TP_STRUCT__entry(
57 __field( pid_t, pid )
58 __field( unsigned int, job )
59 __field( lt_t, release )
60 __field( lt_t, deadline )
61 ),
62
63 TP_fast_assign(
64 __entry->pid = t ? t->pid : 0;
65 __entry->job = t ? t->rt_param.job_params.job_no : 0;
66 __entry->release = get_release(t);
67 __entry->deadline = get_deadline(t);
68 ),
69
70 TP_printk("release(job(%u, %u)): %Lu\ndeadline(job(%u, %u)): %Lu\n",
71 __entry->pid, __entry->job, __entry->release,
72 __entry->pid, __entry->job, __entry->deadline)
73);
74
75/*
76 * Tracepoint for switching to new task
77 */
78TRACE_EVENT(litmus_switch_to,
79
80 TP_PROTO(struct task_struct *t),
81
82 TP_ARGS(t),
83
84 TP_STRUCT__entry(
85 __field( pid_t, pid )
86 __field( unsigned int, job )
87 __field( lt_t, when )
88 __field( lt_t, exec_time )
89 ),
90
91 TP_fast_assign(
92 __entry->pid = is_realtime(t) ? t->pid : 0;
93 __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0;
94 __entry->when = litmus_clock();
95 __entry->exec_time = get_exec_time(t);
96 ),
97
98 TP_printk("switch_to(job(%u, %u)): %Lu (exec: %Lu)\n",
99 __entry->pid, __entry->job,
100 __entry->when, __entry->exec_time)
101);
102
103/*
104 * Tracepoint for switching away previous task
105 */
106TRACE_EVENT(litmus_switch_away,
107
108 TP_PROTO(struct task_struct *t),
109
110 TP_ARGS(t),
111
112 TP_STRUCT__entry(
113 __field( pid_t, pid )
114 __field( unsigned int, job )
115 __field( lt_t, when )
116 __field( lt_t, exec_time )
117 ),
118
119 TP_fast_assign(
120 __entry->pid = is_realtime(t) ? t->pid : 0;
121 __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0;
122 __entry->when = litmus_clock();
123 __entry->exec_time = get_exec_time(t);
124 ),
125
126 TP_printk("switch_away(job(%u, %u)): %Lu (exec: %Lu)\n",
127 __entry->pid, __entry->job,
128 __entry->when, __entry->exec_time)
129);
130
131/*
132 * Tracing jobs completion
133 */
134TRACE_EVENT(litmus_task_completion,
135
136 TP_PROTO(struct task_struct *t, unsigned long forced),
137
138 TP_ARGS(t, forced),
139
140 TP_STRUCT__entry(
141 __field( pid_t, pid )
142 __field( unsigned int, job )
143 __field( lt_t, when )
144 __field( unsigned long, forced )
145 ),
146
147 TP_fast_assign(
148 __entry->pid = t ? t->pid : 0;
149 __entry->job = t ? t->rt_param.job_params.job_no : 0;
150 __entry->when = litmus_clock();
151 __entry->forced = forced;
152 ),
153
154 TP_printk("completed(job(%u, %u)): %Lu (forced: %lu)\n",
155 __entry->pid, __entry->job,
156 __entry->when, __entry->forced)
157);
158
159/*
160 * Trace blocking tasks.
161 */
162TRACE_EVENT(litmus_task_block,
163
164 TP_PROTO(struct task_struct *t),
165
166 TP_ARGS(t),
167
168 TP_STRUCT__entry(
169 __field( pid_t, pid )
170 __field( lt_t, when )
171 ),
172
173 TP_fast_assign(
174 __entry->pid = t ? t->pid : 0;
175 __entry->when = litmus_clock();
176 ),
177
178 TP_printk("(%u) blocks: %Lu\n", __entry->pid, __entry->when)
179);
180
181/*
182 * Tracing jobs resume
183 */
184TRACE_EVENT(litmus_task_resume,
185
186 TP_PROTO(struct task_struct *t),
187
188 TP_ARGS(t),
189
190 TP_STRUCT__entry(
191 __field( pid_t, pid )
192 __field( unsigned int, job )
193 __field( lt_t, when )
194 ),
195
196 TP_fast_assign(
197 __entry->pid = t ? t->pid : 0;
198 __entry->job = t ? t->rt_param.job_params.job_no : 0;
199 __entry->when = litmus_clock();
200 ),
201
202 TP_printk("resume(job(%u, %u)): %Lu\n",
203 __entry->pid, __entry->job, __entry->when)
204);
205
206/*
207 * Trace synchronous release
208 */
209TRACE_EVENT(litmus_sys_release,
210
211 TP_PROTO(lt_t *start),
212
213 TP_ARGS(start),
214
215 TP_STRUCT__entry(
216 __field( lt_t, rel )
217 __field( lt_t, when )
218 ),
219
220 TP_fast_assign(
221 __entry->rel = *start;
222 __entry->when = litmus_clock();
223 ),
224
225 TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when)
226);
227
228#endif /* _SCHED_TASK_TRACEPOINT_H */
229
230/* Must stay outside the protection */
231#include <trace/define_trace.h>
diff --git a/kernel/sched.c b/kernel/sched.c
index baaca61bc3a3..c4b6bd5151ff 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -80,14 +80,14 @@
80#include "workqueue_sched.h" 80#include "workqueue_sched.h"
81#include "sched_autogroup.h" 81#include "sched_autogroup.h"
82 82
83#define CREATE_TRACE_POINTS
84#include <trace/events/sched.h>
85
83#include <litmus/sched_trace.h> 86#include <litmus/sched_trace.h>
84#include <litmus/trace.h> 87#include <litmus/trace.h>
85 88
86static void litmus_tick(struct rq*, struct task_struct*); 89static void litmus_tick(struct rq*, struct task_struct*);
87 90
88#define CREATE_TRACE_POINTS
89#include <trace/events/sched.h>
90
91/* 91/*
92 * Convert user-nice values [ -20 ... 0 ... 19 ] 92 * Convert user-nice values [ -20 ... 0 ... 19 ]
93 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], 93 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -2597,8 +2597,12 @@ void scheduler_ipi(void)
2597 struct rq *rq = this_rq(); 2597 struct rq *rq = this_rq();
2598 struct task_struct *list = xchg(&rq->wake_list, NULL); 2598 struct task_struct *list = xchg(&rq->wake_list, NULL);
2599 2599
2600 if (!list) 2600 if (!list) {
2601 /* If we don't call irq_enter(), we need to trigger the IRQ
2602 * tracing manually. */
2603 ft_irq_fired();
2601 return; 2604 return;
2605 }
2602 2606
2603 /* 2607 /*
2604 * Not all reschedule IPI handlers call irq_enter/irq_exit, since 2608 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
@@ -3163,16 +3167,26 @@ static inline void post_schedule(struct rq *rq)
3163asmlinkage void schedule_tail(struct task_struct *prev) 3167asmlinkage void schedule_tail(struct task_struct *prev)
3164 __releases(rq->lock) 3168 __releases(rq->lock)
3165{ 3169{
3166 struct rq *rq = this_rq(); 3170 struct rq *rq;
3167 3171
3172 preempt_disable();
3173
3174 rq = this_rq();
3168 finish_task_switch(rq, prev); 3175 finish_task_switch(rq, prev);
3169 3176
3177 sched_trace_task_switch_to(current);
3178
3170 /* 3179 /*
3171 * FIXME: do we need to worry about rq being invalidated by the 3180 * FIXME: do we need to worry about rq being invalidated by the
3172 * task_switch? 3181 * task_switch?
3173 */ 3182 */
3174 post_schedule(rq); 3183 post_schedule(rq);
3175 3184
3185 if (sched_state_validate_switch())
3186 litmus_reschedule_local();
3187
3188 preempt_enable();
3189
3176#ifdef __ARCH_WANT_UNLOCKED_CTXSW 3190#ifdef __ARCH_WANT_UNLOCKED_CTXSW
3177 /* In this case, finish_task_switch does not reenable preemption */ 3191 /* In this case, finish_task_switch does not reenable preemption */
3178 preempt_enable(); 3192 preempt_enable();
@@ -4403,14 +4417,20 @@ litmus_need_resched_nonpreemptible:
4403 raw_spin_unlock_irq(&rq->lock); 4417 raw_spin_unlock_irq(&rq->lock);
4404 } 4418 }
4405 4419
4420 TS_SCHED2_START(prev);
4406 sched_trace_task_switch_to(current); 4421 sched_trace_task_switch_to(current);
4407 4422
4408 post_schedule(rq); 4423 post_schedule(rq);
4409 4424
4410 if (sched_state_validate_switch()) 4425 if (sched_state_validate_switch()) {
4426 TS_SCHED2_END(prev);
4411 goto litmus_need_resched_nonpreemptible; 4427 goto litmus_need_resched_nonpreemptible;
4428 }
4412 4429
4413 preempt_enable_no_resched(); 4430 preempt_enable_no_resched();
4431
4432 TS_SCHED2_END(prev);
4433
4414 if (need_resched()) 4434 if (need_resched())
4415 goto need_resched; 4435 goto need_resched;
4416 4436
@@ -4684,17 +4704,6 @@ void complete_all(struct completion *x)
4684} 4704}
4685EXPORT_SYMBOL(complete_all); 4705EXPORT_SYMBOL(complete_all);
4686 4706
4687void complete_n(struct completion *x, int n)
4688{
4689 unsigned long flags;
4690
4691 spin_lock_irqsave(&x->wait.lock, flags);
4692 x->done += n;
4693 __wake_up_common(&x->wait, TASK_NORMAL, n, 0, NULL);
4694 spin_unlock_irqrestore(&x->wait.lock, flags);
4695}
4696EXPORT_SYMBOL(complete_n);
4697
4698static inline long __sched 4707static inline long __sched
4699do_wait_for_common(struct completion *x, long timeout, int state) 4708do_wait_for_common(struct completion *x, long timeout, int state)
4700{ 4709{
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 58cf5d18dfdc..db04161fe37c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -3,6 +3,8 @@
3 * policies) 3 * policies)
4 */ 4 */
5 5
6#include <litmus/litmus.h>
7
6#ifdef CONFIG_RT_GROUP_SCHED 8#ifdef CONFIG_RT_GROUP_SCHED
7 9
8#define rt_entity_is_task(rt_se) (!(rt_se)->my_q) 10#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
@@ -228,8 +230,11 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
228 if (rt_rq->rt_nr_running) { 230 if (rt_rq->rt_nr_running) {
229 if (rt_se && !on_rt_rq(rt_se)) 231 if (rt_se && !on_rt_rq(rt_se))
230 enqueue_rt_entity(rt_se, false); 232 enqueue_rt_entity(rt_se, false);
231 if (rt_rq->highest_prio.curr < curr->prio) 233 if (rt_rq->highest_prio.curr < curr->prio &&
234 /* Don't subject LITMUS tasks to remote reschedules */
235 !is_realtime(curr)) {
232 resched_task(curr); 236 resched_task(curr);
237 }
233 } 238 }
234} 239}
235 240
@@ -322,8 +327,10 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
322 327
323static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 328static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
324{ 329{
325 if (rt_rq->rt_nr_running) 330 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
326 resched_task(rq_of_rt_rq(rt_rq)->curr); 331
332 if (rt_rq->rt_nr_running && !is_realtime(curr))
333 resched_task(curr);
327} 334}
328 335
329static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 336static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index fca82c32042b..2f2df08df395 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -211,6 +211,9 @@ asmlinkage void __do_softirq(void)
211 int max_restart = MAX_SOFTIRQ_RESTART; 211 int max_restart = MAX_SOFTIRQ_RESTART;
212 int cpu; 212 int cpu;
213 213
214 /* Mark Feather-Trace samples as "disturbed". */
215 ft_irq_fired();
216
214 pending = local_softirq_pending(); 217 pending = local_softirq_pending();
215 account_system_vtime(current); 218 account_system_vtime(current);
216 219
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 94b48e199577..bd6635c8de08 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -79,6 +79,52 @@ config SCHED_CPU_AFFINITY
79 79
80 Say Yes if unsure. 80 Say Yes if unsure.
81 81
82choice
83 prompt "EDF Tie-Break Behavior"
84 default EDF_TIE_BREAK_LATENESS_NORM
85 help
86 Allows the configuration of tie-breaking behavior when the deadlines
87 of two EDF-scheduled tasks are equal.
88
89 config EDF_TIE_BREAK_LATENESS
90 bool "Lateness-based Tie Break"
91 help
92 Break ties between two jobs, A and B, based upon the lateness of their
93 prior jobs. The job with the greatest lateness has priority. Note that
94 lateness has a negative value if the prior job finished before its
95 deadline.
96
97 config EDF_TIE_BREAK_LATENESS_NORM
98 bool "Normalized Lateness-based Tie Break"
99 help
100 Break ties between two jobs, A and B, based upon the lateness, normalized
101 by relative deadline, of their prior jobs. The job with the greatest
102 normalized lateness has priority. Note that lateness has a negative value
103 if the prior job finished before its deadline.
104
105 Normalized lateness tie-breaks are likely desireable over non-normalized
106 tie-breaks if the execution times and/or relative deadlines of tasks in a
107 task set vary greatly.
108
109 config EDF_TIE_BREAK_HASH
110 bool "Hash-based Tie Breaks"
111 help
112 Break ties between two jobs, A and B, with equal deadlines by using a
113 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
114 A has ~50% of winning a given tie-break.
115
116 config EDF_PID_TIE_BREAK
117 bool "PID-based Tie Breaks"
118 help
119 Break ties based upon OS-assigned thread IDs. Use this option if
120 required by algorithm's real-time analysis or per-task response-time
121 jitter must be minimized.
122
123 NOTES:
124 * This tie-breaking method was default in Litmus 2012.2 and before.
125
126endchoice
127
82endmenu 128endmenu
83 129
84menu "Tracing" 130menu "Tracing"
@@ -138,6 +184,24 @@ config SCHED_TASK_TRACE_SHIFT
138 10 => 1k events 184 10 => 1k events
139 8 => 512 events 185 8 => 512 events
140 186
187config SCHED_LITMUS_TRACEPOINT
188 bool "Enable Event/Tracepoint Tracing for real-time task tracing"
189 depends on TRACEPOINTS
190 default n
191 help
192 Enable kernel-style events (tracepoint) for Litmus. Litmus events
193 trace the same functions as the above sched_trace_XXX(), but can
194 be enabled independently.
195 Litmus tracepoints can be recorded and analyzed together (single
196 time reference) with all other kernel tracing events (e.g.,
197 sched:sched_switch, etc.).
198
199 This also enables a quick way to visualize schedule traces using
200 trace-cmd utility and kernelshark visualizer.
201
202 Say Yes for debugging and visualization purposes.
203 Say No for overhead tracing.
204
141config SCHED_OVERHEAD_TRACE 205config SCHED_OVERHEAD_TRACE
142 bool "Record timestamps for overhead measurements" 206 bool "Record timestamps for overhead measurements"
143 depends on FEATHER_TRACE 207 depends on FEATHER_TRACE
@@ -201,7 +265,7 @@ config SCHED_DEBUG_TRACE_CALLER
201 265
202config PREEMPT_STATE_TRACE 266config PREEMPT_STATE_TRACE
203 bool "Trace preemption state machine transitions" 267 bool "Trace preemption state machine transitions"
204 depends on SCHED_DEBUG_TRACE 268 depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
205 default n 269 default n
206 help 270 help
207 With this option enabled, each CPU will log when it transitions 271 With this option enabled, each CPU will log when it transitions
diff --git a/litmus/Makefile b/litmus/Makefile
index 7338180f196f..d26ca7076b62 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -11,13 +11,16 @@ obj-y = sched_plugin.o litmus.o \
11 sync.o \ 11 sync.o \
12 rt_domain.o \ 12 rt_domain.o \
13 edf_common.o \ 13 edf_common.o \
14 fp_common.o \
14 fdso.o \ 15 fdso.o \
15 locking.o \ 16 locking.o \
16 srp.o \ 17 srp.o \
17 bheap.o \ 18 bheap.o \
19 binheap.o \
18 ctrldev.o \ 20 ctrldev.o \
19 sched_gsn_edf.o \ 21 sched_gsn_edf.o \
20 sched_psn_edf.o 22 sched_psn_edf.o \
23 sched_pfp.o
21 24
22obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 25obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
23obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 26obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..40a913f4b5a7
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,388 @@
1#include <litmus/binheap.h>
2
3/* Returns true of the root ancestor of node is the root of the given heap. */
4int binheap_is_in_this_heap(struct binheap_node *node,
5 struct binheap* heap)
6{
7 if(!binheap_is_in_heap(node)) {
8 return 0;
9 }
10
11 while(node->parent != NULL) {
12 node = node->parent;
13 }
14
15 return (node == heap->root);
16}
17
18
19/* Update the node reference pointers. Same logic as Litmus binomial heap. */
20static void __update_ref(struct binheap_node *parent,
21 struct binheap_node *child)
22{
23 *(parent->ref_ptr) = child;
24 *(child->ref_ptr) = parent;
25
26 swap(parent->ref_ptr, child->ref_ptr);
27}
28
29
30/* Swaps data between two nodes. */
31static void __binheap_swap(struct binheap_node *parent,
32 struct binheap_node *child)
33{
34 swap(parent->data, child->data);
35 __update_ref(parent, child);
36}
37
38
39/* Swaps memory and data between two nodes. Actual nodes swap instead of
40 * just data. Needed when we delete nodes from the heap.
41 */
42static void __binheap_swap_safe(struct binheap *handle,
43 struct binheap_node *a,
44 struct binheap_node *b)
45{
46 swap(a->data, b->data);
47 __update_ref(a, b);
48
49 if((a->parent != NULL) && (a->parent == b->parent)) {
50 /* special case: shared parent */
51 swap(a->parent->left, a->parent->right);
52 }
53 else {
54 /* Update pointers to swap parents. */
55
56 if(a->parent) {
57 if(a == a->parent->left) {
58 a->parent->left = b;
59 }
60 else {
61 a->parent->right = b;
62 }
63 }
64
65 if(b->parent) {
66 if(b == b->parent->left) {
67 b->parent->left = a;
68 }
69 else {
70 b->parent->right = a;
71 }
72 }
73
74 swap(a->parent, b->parent);
75 }
76
77 /* swap children */
78
79 if(a->left) {
80 a->left->parent = b;
81
82 if(a->right) {
83 a->right->parent = b;
84 }
85 }
86
87 if(b->left) {
88 b->left->parent = a;
89
90 if(b->right) {
91 b->right->parent = a;
92 }
93 }
94
95 swap(a->left, b->left);
96 swap(a->right, b->right);
97
98
99 /* update next/last/root pointers */
100
101 if(a == handle->next) {
102 handle->next = b;
103 }
104 else if(b == handle->next) {
105 handle->next = a;
106 }
107
108 if(a == handle->last) {
109 handle->last = b;
110 }
111 else if(b == handle->last) {
112 handle->last = a;
113 }
114
115 if(a == handle->root) {
116 handle->root = b;
117 }
118 else if(b == handle->root) {
119 handle->root = a;
120 }
121}
122
123
124/**
125 * Update the pointer to the last node in the complete binary tree.
126 * Called internally after the root node has been deleted.
127 */
128static void __binheap_update_last(struct binheap *handle)
129{
130 struct binheap_node *temp = handle->last;
131
132 /* find a "bend" in the tree. */
133 while(temp->parent && (temp == temp->parent->left)) {
134 temp = temp->parent;
135 }
136
137 /* step over to sibling if we're not at root */
138 if(temp->parent != NULL) {
139 temp = temp->parent->left;
140 }
141
142 /* now travel right as far as possible. */
143 while(temp->right != NULL) {
144 temp = temp->right;
145 }
146
147 /* take one step to the left if we're not at the bottom-most level. */
148 if(temp->left != NULL) {
149 temp = temp->left;
150 }
151
152 handle->last = temp;
153}
154
155
156/**
157 * Update the pointer to the node that will take the next inserted node.
158 * Called internally after a node has been inserted.
159 */
160static void __binheap_update_next(struct binheap *handle)
161{
162 struct binheap_node *temp = handle->next;
163
164 /* find a "bend" in the tree. */
165 while(temp->parent && (temp == temp->parent->right)) {
166 temp = temp->parent;
167 }
168
169 /* step over to sibling if we're not at root */
170 if(temp->parent != NULL) {
171 temp = temp->parent->right;
172 }
173
174 /* now travel left as far as possible. */
175 while(temp->left != NULL) {
176 temp = temp->left;
177 }
178
179 handle->next = temp;
180}
181
182
183
184/* bubble node up towards root */
185static void __binheap_bubble_up(struct binheap *handle,
186 struct binheap_node *node)
187{
188 /* let BINHEAP_POISON data bubble to the top */
189
190 while((node->parent != NULL) &&
191 ((node->data == BINHEAP_POISON) ||
192 handle->compare(node, node->parent))) {
193 __binheap_swap(node->parent, node);
194 node = node->parent;
195 }
196}
197
198
199/* bubble node down, swapping with min-child */
200static void __binheap_bubble_down(struct binheap *handle)
201{
202 struct binheap_node *node = handle->root;
203
204 while(node->left != NULL) {
205 if(node->right && handle->compare(node->right, node->left)) {
206 if(handle->compare(node->right, node)) {
207 __binheap_swap(node, node->right);
208 node = node->right;
209 }
210 else {
211 break;
212 }
213 }
214 else {
215 if(handle->compare(node->left, node)) {
216 __binheap_swap(node, node->left);
217 node = node->left;
218 }
219 else {
220 break;
221 }
222 }
223 }
224}
225
226
227void __binheap_add(struct binheap_node *new_node,
228 struct binheap *handle,
229 void *data)
230{
231 new_node->data = data;
232 new_node->ref = new_node;
233 new_node->ref_ptr = &(new_node->ref);
234
235 if(!binheap_empty(handle)) {
236 /* insert left side first */
237 if(handle->next->left == NULL) {
238 handle->next->left = new_node;
239 new_node->parent = handle->next;
240 new_node->left = NULL;
241 new_node->right = NULL;
242
243 handle->last = new_node;
244
245 __binheap_bubble_up(handle, new_node);
246 }
247 else {
248 /* left occupied. insert right. */
249 handle->next->right = new_node;
250 new_node->parent = handle->next;
251 new_node->left = NULL;
252 new_node->right = NULL;
253
254 handle->last = new_node;
255
256 __binheap_update_next(handle);
257 __binheap_bubble_up(handle, new_node);
258 }
259 }
260 else {
261 /* first node in heap */
262
263 new_node->parent = NULL;
264 new_node->left = NULL;
265 new_node->right = NULL;
266
267 handle->root = new_node;
268 handle->next = new_node;
269 handle->last = new_node;
270 }
271}
272
273
274/**
275 * Removes the root node from the heap. The node is removed after coalescing
276 * the binheap_node with its original data pointer at the root of the tree.
277 *
278 * The 'last' node in the tree is then swapped up to the root and bubbled
279 * down.
280 */
281void __binheap_delete_root(struct binheap *handle,
282 struct binheap_node *container)
283{
284 struct binheap_node *root = handle->root;
285
286 if(root != container) {
287 /* coalesce */
288 __binheap_swap_safe(handle, root, container);
289 root = container;
290 }
291
292 if(handle->last != root) {
293 /* swap 'last' node up to root and bubble it down. */
294
295 struct binheap_node *to_move = handle->last;
296
297 if(to_move->parent != root) {
298 handle->next = to_move->parent;
299
300 if(handle->next->right == to_move) {
301 /* disconnect from parent */
302 to_move->parent->right = NULL;
303 handle->last = handle->next->left;
304 }
305 else {
306 /* find new 'last' before we disconnect */
307 __binheap_update_last(handle);
308
309 /* disconnect from parent */
310 to_move->parent->left = NULL;
311 }
312 }
313 else {
314 /* 'last' is direct child of root */
315
316 handle->next = to_move;
317
318 if(to_move == to_move->parent->right) {
319 to_move->parent->right = NULL;
320 handle->last = to_move->parent->left;
321 }
322 else {
323 to_move->parent->left = NULL;
324 handle->last = to_move;
325 }
326 }
327 to_move->parent = NULL;
328
329 /* reconnect as root. We can't just swap data ptrs since root node
330 * may be freed after this function returns.
331 */
332 to_move->left = root->left;
333 to_move->right = root->right;
334 if(to_move->left != NULL) {
335 to_move->left->parent = to_move;
336 }
337 if(to_move->right != NULL) {
338 to_move->right->parent = to_move;
339 }
340
341 handle->root = to_move;
342
343 /* bubble down */
344 __binheap_bubble_down(handle);
345 }
346 else {
347 /* removing last node in tree */
348 handle->root = NULL;
349 handle->next = NULL;
350 handle->last = NULL;
351 }
352
353 /* mark as removed */
354 container->parent = BINHEAP_POISON;
355}
356
357
358/**
359 * Delete an arbitrary node. Bubble node to delete up to the root,
360 * and then delete to root.
361 */
362void __binheap_delete(struct binheap_node *node_to_delete,
363 struct binheap *handle)
364{
365 struct binheap_node *target = node_to_delete->ref;
366 void *temp_data = target->data;
367
368 /* temporarily set data to null to allow node to bubble up to the top. */
369 target->data = BINHEAP_POISON;
370
371 __binheap_bubble_up(handle, target);
372 __binheap_delete_root(handle, node_to_delete);
373
374 node_to_delete->data = temp_data; /* restore node data pointer */
375}
376
377
378/**
379 * Bubble up a node whose pointer has decreased in value.
380 */
381void __binheap_decrease(struct binheap_node *orig_node,
382 struct binheap *handle)
383{
384 struct binheap_node *target = orig_node->ref;
385
386 __binheap_bubble_up(handle, target);
387}
388
diff --git a/litmus/budget.c b/litmus/budget.c
index 310e9a3d4172..f7712be29adb 100644
--- a/litmus/budget.c
+++ b/litmus/budget.c
@@ -5,6 +5,8 @@
5#include <litmus/litmus.h> 5#include <litmus/litmus.h>
6#include <litmus/preempt.h> 6#include <litmus/preempt.h>
7 7
8#include <litmus/budget.h>
9
8struct enforcement_timer { 10struct enforcement_timer {
9 /* The enforcement timer is used to accurately police 11 /* The enforcement timer is used to accurately police
10 * slice budgets. */ 12 * slice budgets. */
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
index 6677a67cc945..41919b2714cb 100644
--- a/litmus/ctrldev.c
+++ b/litmus/ctrldev.c
@@ -30,27 +30,19 @@ static int alloc_ctrl_page(struct task_struct *t)
30static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma) 30static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
31{ 31{
32 int err; 32 int err;
33 unsigned long pfn;
34 33
35 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page); 34 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
36 35
37 /* Increase ref count. Is decreased when vma is destroyed. */
38 get_page(ctrl);
39
40 /* compute page frame number */
41 pfn = page_to_pfn(ctrl);
42
43 TRACE_CUR(CTRL_NAME 36 TRACE_CUR(CTRL_NAME
44 ": mapping %p (pfn:%lx, %lx) to 0x%lx (prot:%lx)\n", 37 ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n",
45 tsk_rt(t)->ctrl_page, pfn, page_to_pfn(ctrl), vma->vm_start, 38 tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start,
46 vma->vm_page_prot); 39 vma->vm_page_prot);
47 40
48 /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise 41 /* Map it into the vma. */
49 * userspace actually gets a copy-on-write page. */ 42 err = vm_insert_page(vma, vma->vm_start, ctrl);
50 err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED);
51 43
52 if (err) 44 if (err)
53 TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err); 45 TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
54 46
55 return err; 47 return err;
56} 48}
@@ -63,19 +55,19 @@ static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
63 TRACE_CUR(CTRL_NAME 55 TRACE_CUR(CTRL_NAME
64 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n", 56 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
65 (void*) vma->vm_start, (void*) vma->vm_end, vma, 57 (void*) vma->vm_start, (void*) vma->vm_end, vma,
66 vma->vm_private_data, current->comm, 58 vma->vm_private_data);
67 current->pid);
68} 59}
69 60
70static int litmus_ctrl_vm_fault(struct vm_area_struct* vma, 61static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
71 struct vm_fault* vmf) 62 struct vm_fault* vmf)
72{ 63{
73 /* This function should never be called, since 64 TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__,
74 * all pages should have been mapped by mmap() 65 vma->vm_flags, vmf->pgoff);
75 * already. */ 66
76 TRACE_CUR("%s flags=0x%x\n", __FUNCTION__, vma->vm_flags); 67 /* This function should never be called, since all pages should have
68 * been mapped by mmap() already. */
69 WARN_ONCE(1, "Page faults should be impossible in the control page\n");
77 70
78 /* nope, you only get one page */
79 return VM_FAULT_SIGBUS; 71 return VM_FAULT_SIGBUS;
80} 72}
81 73
@@ -103,9 +95,16 @@ static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
103 return -EINVAL; 95 return -EINVAL;
104 96
105 vma->vm_ops = &litmus_ctrl_vm_ops; 97 vma->vm_ops = &litmus_ctrl_vm_ops;
106 /* this mapping should not be kept across forks, 98 /* This mapping should not be kept across forks,
107 * and cannot be expanded */ 99 * cannot be expanded, and is not a "normal" page. */
108 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 100 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO;
101
102 /* We don't want the first write access to trigger a "minor" page fault
103 * to mark the page as dirty. This is transient, private memory, we
104 * don't care if it was touched or not. __S011 means RW access, but not
105 * execute, and avoids copy-on-write behavior.
106 * See protection_map in mmap.c. */
107 vma->vm_page_prot = __S011;
109 108
110 err = alloc_ctrl_page(current); 109 err = alloc_ctrl_page(current);
111 if (!err) 110 if (!err)
@@ -134,6 +133,17 @@ static int __init init_litmus_ctrl_dev(void)
134 133
135 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE); 134 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
136 135
136 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint64_t));
137
138 BUILD_BUG_ON(offsetof(struct control_page, sched.raw)
139 != LITMUS_CP_OFFSET_SCHED);
140 BUILD_BUG_ON(offsetof(struct control_page, irq_count)
141 != LITMUS_CP_OFFSET_IRQ_COUNT);
142 BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start)
143 != LITMUS_CP_OFFSET_TS_SC_START);
144 BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start)
145 != LITMUS_CP_OFFSET_IRQ_SC_START);
146
137 printk("Initializing LITMUS^RT control device.\n"); 147 printk("Initializing LITMUS^RT control device.\n");
138 err = misc_register(&litmus_ctrl_dev); 148 err = misc_register(&litmus_ctrl_dev);
139 if (err) 149 if (err)
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 9b44dc2d8d1e..5aca2934a7b5 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -14,6 +14,32 @@
14 14
15#include <litmus/edf_common.h> 15#include <litmus/edf_common.h>
16 16
17#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
18#include <litmus/fpmath.h>
19#endif
20
21#ifdef CONFIG_EDF_TIE_BREAK_HASH
22#include <linux/hash.h>
23static inline long edf_hash(struct task_struct *t)
24{
25 /* pid is 32 bits, so normally we would shove that into the
26 * upper 32-bits and and put the job number in the bottom
27 * and hash the 64-bit number with hash_64(). Sadly,
28 * in testing, hash_64() doesn't distribute keys were the
29 * upper bits are close together (as would be the case with
30 * pids) and job numbers are equal (as would be the case with
31 * synchronous task sets with all relative deadlines equal).
32 *
33 * A 2006 Linux patch proposed the following solution
34 * (but for some reason it wasn't accepted...).
35 *
36 * At least this workaround works for 32-bit systems as well.
37 */
38 return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
39}
40#endif
41
42
17/* edf_higher_prio - returns true if first has a higher EDF priority 43/* edf_higher_prio - returns true if first has a higher EDF priority
18 * than second. Deadline ties are broken by PID. 44 * than second. Deadline ties are broken by PID.
19 * 45 *
@@ -63,25 +89,81 @@ int edf_higher_prio(struct task_struct* first,
63 89
64#endif 90#endif
65 91
92 if (earlier_deadline(first_task, second_task)) {
93 return 1;
94 }
95 else if (get_deadline(first_task) == get_deadline(second_task)) {
96 /* Need to tie break. All methods must set pid_break to 0/1 if
97 * first_task does not have priority over second_task.
98 */
99 int pid_break;
66 100
67 return !is_realtime(second_task) ||
68 101
69 /* is the deadline of the first task earlier? 102#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
70 * Then it has higher priority. 103 /* Tie break by lateness. Jobs with greater lateness get
104 * priority. This should spread tardiness across all tasks,
105 * especially in task sets where all tasks have the same
106 * period and relative deadlines.
71 */ 107 */
72 earlier_deadline(first_task, second_task) || 108 if (get_lateness(first_task) > get_lateness(second_task)) {
73 109 return 1;
74 /* Do we have a deadline tie? 110 }
75 * Then break by PID. 111 pid_break = (get_lateness(first_task) == get_lateness(second_task));
112
113
114#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
115 /* Tie break by lateness, normalized by relative deadline. Jobs with
116 * greater normalized lateness get priority.
117 *
118 * Note: Considered using the algebraically equivalent
119 * lateness(first)*relative_deadline(second) >
120 lateness(second)*relative_deadline(first)
121 * to avoid fixed-point math, but values are prone to overflow if inputs
122 * are on the order of several seconds, even in 64-bit.
76 */ 123 */
77 (get_deadline(first_task) == get_deadline(second_task) && 124 fp_t fnorm = _frac(get_lateness(first_task),
78 (first_task->pid < second_task->pid || 125 get_rt_relative_deadline(first_task));
126 fp_t snorm = _frac(get_lateness(second_task),
127 get_rt_relative_deadline(second_task));
128 if (_gt(fnorm, snorm)) {
129 return 1;
130 }
131 pid_break = _eq(fnorm, snorm);
79 132
80 /* If the PIDs are the same then the task with the inherited 133
81 * priority wins. 134#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
135 /* Tie break by comparing hashs of (pid, job#) tuple. There should be
136 * a 50% chance that first_task has a higher priority than second_task.
82 */ 137 */
83 (first_task->pid == second_task->pid && 138 long fhash = edf_hash(first_task);
84 !second->rt_param.inh_task))); 139 long shash = edf_hash(second_task);
140 if (fhash < shash) {
141 return 1;
142 }
143 pid_break = (fhash == shash);
144#else
145
146
147 /* CONFIG_EDF_PID_TIE_BREAK */
148 pid_break = 1; // fall through to tie-break by pid;
149#endif
150
151 /* Tie break by pid */
152 if(pid_break) {
153 if (first_task->pid < second_task->pid) {
154 return 1;
155 }
156 else if (first_task->pid == second_task->pid) {
157 /* If the PIDs are the same then the task with the
158 * inherited priority wins.
159 */
160 if (!second->rt_param.inh_task) {
161 return 1;
162 }
163 }
164 }
165 }
166 return 0; /* fall-through. prio(second_task) > prio(first_task) */
85} 167}
86 168
87int edf_ready_order(struct bheap_node* a, struct bheap_node* b) 169int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
diff --git a/litmus/fdso.c b/litmus/fdso.c
index aa7b384264e3..c4b450be4509 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -23,10 +23,16 @@ extern struct fdso_ops generic_lock_ops;
23static const struct fdso_ops* fdso_ops[] = { 23static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */ 24 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */ 25 &generic_lock_ops, /* SRP_SEM */
26 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* PCP_SEM */
26}; 30};
27 31
28static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) 32static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
29{ 33{
34 BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
35
30 if (fdso_ops[type]->create) 36 if (fdso_ops[type]->create)
31 return fdso_ops[type]->create(obj_ref, type, config); 37 return fdso_ops[type]->create(obj_ref, type, config);
32 else 38 else
@@ -162,6 +168,18 @@ static int put_od_entry(struct od_table_entry* od)
162 return 0; 168 return 0;
163} 169}
164 170
171static long close_od_entry(struct od_table_entry *od)
172{
173 long ret;
174
175 /* Give the class a chance to reject the close. */
176 ret = fdso_close(od);
177 if (ret == 0)
178 ret = put_od_entry(od);
179
180 return ret;
181}
182
165void exit_od_table(struct task_struct* t) 183void exit_od_table(struct task_struct* t)
166{ 184{
167 int i; 185 int i;
@@ -169,7 +187,7 @@ void exit_od_table(struct task_struct* t)
169 if (t->od_table) { 187 if (t->od_table) {
170 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++) 188 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
171 if (t->od_table[i].used) 189 if (t->od_table[i].used)
172 put_od_entry(t->od_table + i); 190 close_od_entry(t->od_table + i);
173 kfree(t->od_table); 191 kfree(t->od_table);
174 t->od_table = NULL; 192 t->od_table = NULL;
175 } 193 }
@@ -283,11 +301,7 @@ asmlinkage long sys_od_close(int od)
283 return ret; 301 return ret;
284 302
285 303
286 /* give the class a chance to reject the close 304 ret = close_od_entry(t->od_table + od);
287 */
288 ret = fdso_close(t->od_table + od);
289 if (ret == 0)
290 ret = put_od_entry(t->od_table + od);
291 305
292 return ret; 306 return ret;
293} 307}
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 000000000000..964a4729deff
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,119 @@
1/*
2 * litmus/fp_common.c
3 *
4 * Common functions for fixed-priority scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/fp_common.h>
16
17/* fp_higher_prio - returns true if first has a higher static priority
18 * than second. Ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int fp_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (unlikely(first && first == second)) {
30 TRACE_TASK(first,
31 "WARNING: pointless FP priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40 if (!is_realtime(second_task))
41 return 1;
42
43#ifdef CONFIG_LITMUS_LOCKING
44
45 /* Check for inherited priorities. Change task
46 * used for comparison in such a case.
47 */
48 if (unlikely(first->rt_param.inh_task))
49 first_task = first->rt_param.inh_task;
50 if (unlikely(second->rt_param.inh_task))
51 second_task = second->rt_param.inh_task;
52
53 /* Check for priority boosting. Tie-break by start of boosting.
54 */
55 if (unlikely(is_priority_boosted(first_task))) {
56 /* first_task is boosted, how about second_task? */
57 if (is_priority_boosted(second_task))
58 /* break by priority point */
59 return lt_before(get_boost_start(first_task),
60 get_boost_start(second_task));
61 else
62 /* priority boosting wins. */
63 return 1;
64 } else if (unlikely(is_priority_boosted(second_task)))
65 /* second_task is boosted, first is not*/
66 return 0;
67
68#endif
69
70 /* Comparisons to itself are not expected; priority inheritance
71 * should also not cause this to happen. */
72 BUG_ON(first_task == second_task);
73
74 if (get_priority(first_task) < get_priority(second_task))
75 return 1;
76 else if (get_priority(first_task) == get_priority(second_task))
77 /* Break by PID. */
78 return first_task->pid < second_task->pid;
79 else
80 return 0;
81}
82
83int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
84{
85 return fp_higher_prio(bheap2task(a), bheap2task(b));
86}
87
88void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
89 release_jobs_t release)
90{
91 rt_domain_init(rt, fp_ready_order, resched, release);
92}
93
94/* need_to_preempt - check whether the task t needs to be preempted
95 */
96int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
97{
98 struct task_struct *pending;
99
100 pending = fp_prio_peek(q);
101
102 if (!pending)
103 return 0;
104 if (!t)
105 return 1;
106
107 /* make sure to get non-rt stuff out of the way */
108 return !is_realtime(t) || fp_higher_prio(pending, t);
109}
110
111void fp_prio_queue_init(struct fp_prio_queue* q)
112{
113 int i;
114
115 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
116 q->bitmask[i] = 0;
117 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
118 bheap_init(&q->queue[i]);
119}
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
index 06fcf4cf77dc..99bc39ffbcef 100644
--- a/litmus/ftdev.c
+++ b/litmus/ftdev.c
@@ -230,13 +230,20 @@ static ssize_t ftdev_read(struct file *filp,
230 * here with copied data because that data would get 230 * here with copied data because that data would get
231 * lost if the task is interrupted (e.g., killed). 231 * lost if the task is interrupted (e.g., killed).
232 */ 232 */
233 mutex_unlock(&ftdm->lock);
233 set_current_state(TASK_INTERRUPTIBLE); 234 set_current_state(TASK_INTERRUPTIBLE);
235
234 schedule_timeout(50); 236 schedule_timeout(50);
237
235 if (signal_pending(current)) { 238 if (signal_pending(current)) {
236 if (err == 0) 239 if (err == 0)
237 /* nothing read yet, signal problem */ 240 /* nothing read yet, signal problem */
238 err = -ERESTARTSYS; 241 err = -ERESTARTSYS;
239 break; 242 goto out;
243 }
244 if (mutex_lock_interruptible(&ftdm->lock)) {
245 err = -ERESTARTSYS;
246 goto out;
240 } 247 }
241 } else if (copied < 0) { 248 } else if (copied < 0) {
242 /* page fault */ 249 /* page fault */
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 36e314625d86..13a4ed4c9e93 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -6,13 +6,13 @@
6#include <litmus/litmus.h> 6#include <litmus/litmus.h>
7#include <litmus/jobs.h> 7#include <litmus/jobs.h>
8 8
9void prepare_for_next_period(struct task_struct *t) 9static inline void setup_release(struct task_struct *t, lt_t release)
10{ 10{
11 BUG_ON(!t);
12 /* prepare next release */ 11 /* prepare next release */
13 t->rt_param.job_params.release = t->rt_param.job_params.deadline; 12 t->rt_param.job_params.release = release;
14 t->rt_param.job_params.deadline += get_rt_period(t); 13 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
15 t->rt_param.job_params.exec_time = 0; 14 t->rt_param.job_params.exec_time = 0;
15
16 /* update job sequence number */ 16 /* update job sequence number */
17 t->rt_param.job_params.job_no++; 17 t->rt_param.job_params.job_no++;
18 18
@@ -20,11 +20,25 @@ void prepare_for_next_period(struct task_struct *t)
20 t->rt.time_slice = 1; 20 t->rt.time_slice = 1;
21} 21}
22 22
23void prepare_for_next_period(struct task_struct *t)
24{
25 BUG_ON(!t);
26
27 /* Record lateness before we set up the next job's
28 * release and deadline. Lateness may be negative.
29 */
30 t->rt_param.job_params.lateness =
31 (long long)litmus_clock() -
32 (long long)t->rt_param.job_params.deadline;
33
34 setup_release(t, get_release(t) + get_rt_period(t));
35}
36
23void release_at(struct task_struct *t, lt_t start) 37void release_at(struct task_struct *t, lt_t start)
24{ 38{
25 t->rt_param.job_params.deadline = start; 39 BUG_ON(!t);
26 prepare_for_next_period(t); 40 setup_release(t, start);
27 set_rt_flags(t, RT_F_RUNNING); 41 tsk_rt(t)->completed = 0;
28} 42}
29 43
30 44
@@ -34,7 +48,7 @@ void release_at(struct task_struct *t, lt_t start)
34long complete_job(void) 48long complete_job(void)
35{ 49{
36 /* Mark that we do not excute anymore */ 50 /* Mark that we do not excute anymore */
37 set_rt_flags(current, RT_F_SLEEP); 51 tsk_rt(current)->completed = 1;
38 /* call schedule, this will return when a new job arrives 52 /* call schedule, this will return when a new job arrives
39 * it also takes care of preparing for the next release 53 * it also takes care of preparing for the next release
40 */ 54 */
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 301390148d02..dc94be71bfb6 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -9,6 +9,8 @@
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/reboot.h>
13#include <linux/stop_machine.h>
12 14
13#include <litmus/litmus.h> 15#include <litmus/litmus.h>
14#include <litmus/bheap.h> 16#include <litmus/bheap.h>
@@ -23,9 +25,6 @@
23 25
24/* Number of RT tasks that exist in the system */ 26/* Number of RT tasks that exist in the system */
25atomic_t rt_task_count = ATOMIC_INIT(0); 27atomic_t rt_task_count = ATOMIC_INIT(0);
26static DEFINE_RAW_SPINLOCK(task_transition_lock);
27/* synchronize plugin switching */
28atomic_t cannot_use_plugin = ATOMIC_INIT(0);
29 28
30/* Give log messages sequential IDs. */ 29/* Give log messages sequential IDs. */
31atomic_t __log_seq_no = ATOMIC_INIT(0); 30atomic_t __log_seq_no = ATOMIC_INIT(0);
@@ -102,21 +101,25 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
102 goto out_unlock; 101 goto out_unlock;
103 } 102 }
104 103
104 /* set relative deadline to be implicit if left unspecified */
105 if (tp.relative_deadline == 0)
106 tp.relative_deadline = tp.period;
107
105 if (tp.exec_cost <= 0) 108 if (tp.exec_cost <= 0)
106 goto out_unlock; 109 goto out_unlock;
107 if (tp.period <= 0) 110 if (tp.period <= 0)
108 goto out_unlock; 111 goto out_unlock;
109 if (!cpu_online(tp.cpu)) 112 if (!cpu_online(tp.cpu))
110 goto out_unlock; 113 goto out_unlock;
111 if (tp.period < tp.exec_cost) 114 if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
112 { 115 {
113 printk(KERN_INFO "litmus: real-time task %d rejected " 116 printk(KERN_INFO "litmus: real-time task %d rejected "
114 "because wcet > period\n", pid); 117 "because task density > 1.0\n", pid);
115 goto out_unlock; 118 goto out_unlock;
116 } 119 }
117 if ( tp.cls != RT_CLASS_HARD && 120 if (tp.cls != RT_CLASS_HARD &&
118 tp.cls != RT_CLASS_SOFT && 121 tp.cls != RT_CLASS_SOFT &&
119 tp.cls != RT_CLASS_BEST_EFFORT) 122 tp.cls != RT_CLASS_BEST_EFFORT)
120 { 123 {
121 printk(KERN_INFO "litmus: real-time task %d rejected " 124 printk(KERN_INFO "litmus: real-time task %d rejected "
122 "because its class is invalid\n", pid); 125 "because its class is invalid\n", pid);
@@ -317,15 +320,20 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
317long litmus_admit_task(struct task_struct* tsk) 320long litmus_admit_task(struct task_struct* tsk)
318{ 321{
319 long retval = 0; 322 long retval = 0;
320 unsigned long flags;
321 323
322 BUG_ON(is_realtime(tsk)); 324 BUG_ON(is_realtime(tsk));
323 325
324 if (get_rt_period(tsk) == 0 || 326 tsk_rt(tsk)->heap_node = NULL;
325 get_exec_cost(tsk) > get_rt_period(tsk)) { 327 tsk_rt(tsk)->rel_heap = NULL;
326 TRACE_TASK(tsk, "litmus admit: invalid task parameters " 328
327 "(%lu, %lu)\n", 329 if (get_rt_relative_deadline(tsk) == 0 ||
328 get_exec_cost(tsk), get_rt_period(tsk)); 330 get_exec_cost(tsk) >
331 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
332 TRACE_TASK(tsk,
333 "litmus admit: invalid task parameters "
334 "(e = %lu, p = %lu, d = %lu)\n",
335 get_exec_cost(tsk), get_rt_period(tsk),
336 get_rt_relative_deadline(tsk));
329 retval = -EINVAL; 337 retval = -EINVAL;
330 goto out; 338 goto out;
331 } 339 }
@@ -339,9 +347,6 @@ long litmus_admit_task(struct task_struct* tsk)
339 347
340 INIT_LIST_HEAD(&tsk_rt(tsk)->list); 348 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
341 349
342 /* avoid scheduler plugin changing underneath us */
343 raw_spin_lock_irqsave(&task_transition_lock, flags);
344
345 /* allocate heap node for this task */ 350 /* allocate heap node for this task */
346 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC); 351 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
347 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC); 352 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
@@ -349,15 +354,14 @@ long litmus_admit_task(struct task_struct* tsk)
349 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) { 354 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
350 printk(KERN_WARNING "litmus: no more heap node memory!?\n"); 355 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
351 356
352 bheap_node_free(tsk_rt(tsk)->heap_node);
353 release_heap_free(tsk_rt(tsk)->rel_heap);
354
355 retval = -ENOMEM; 357 retval = -ENOMEM;
356 goto out_unlock; 358 goto out;
357 } else { 359 } else {
358 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); 360 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
359 } 361 }
360 362
363 preempt_disable();
364
361 retval = litmus->admit_task(tsk); 365 retval = litmus->admit_task(tsk);
362 366
363 if (!retval) { 367 if (!retval) {
@@ -366,9 +370,13 @@ long litmus_admit_task(struct task_struct* tsk)
366 atomic_inc(&rt_task_count); 370 atomic_inc(&rt_task_count);
367 } 371 }
368 372
369out_unlock: 373 preempt_enable();
370 raw_spin_unlock_irqrestore(&task_transition_lock, flags); 374
371out: 375out:
376 if (retval) {
377 bheap_node_free(tsk_rt(tsk)->heap_node);
378 release_heap_free(tsk_rt(tsk)->rel_heap);
379 }
372 return retval; 380 return retval;
373} 381}
374 382
@@ -388,37 +396,10 @@ void litmus_exit_task(struct task_struct* tsk)
388 } 396 }
389} 397}
390 398
391/* IPI callback to synchronize plugin switching */ 399static int do_plugin_switch(void *_plugin)
392static void synch_on_plugin_switch(void* info)
393{ 400{
394 atomic_inc(&cannot_use_plugin); 401 int ret;
395 while (atomic_read(&cannot_use_plugin) > 0) 402 struct sched_plugin* plugin = _plugin;
396 cpu_relax();
397}
398
399/* Switching a plugin in use is tricky.
400 * We must watch out that no real-time tasks exists
401 * (and that none is created in parallel) and that the plugin is not
402 * currently in use on any processor (in theory).
403 */
404int switch_sched_plugin(struct sched_plugin* plugin)
405{
406 unsigned long flags;
407 int ret = 0;
408
409 BUG_ON(!plugin);
410
411 /* forbid other cpus to use the plugin */
412 atomic_set(&cannot_use_plugin, 1);
413 /* send IPI to force other CPUs to synch with us */
414 smp_call_function(synch_on_plugin_switch, NULL, 0);
415
416 /* wait until all other CPUs have started synch */
417 while (atomic_read(&cannot_use_plugin) < num_online_cpus())
418 cpu_relax();
419
420 /* stop task transitions */
421 raw_spin_lock_irqsave(&task_transition_lock, flags);
422 403
423 /* don't switch if there are active real-time tasks */ 404 /* don't switch if there are active real-time tasks */
424 if (atomic_read(&rt_task_count) == 0) { 405 if (atomic_read(&rt_task_count) == 0) {
@@ -436,11 +417,24 @@ int switch_sched_plugin(struct sched_plugin* plugin)
436 } else 417 } else
437 ret = -EBUSY; 418 ret = -EBUSY;
438out: 419out:
439 raw_spin_unlock_irqrestore(&task_transition_lock, flags);
440 atomic_set(&cannot_use_plugin, 0);
441 return ret; 420 return ret;
442} 421}
443 422
423/* Switching a plugin in use is tricky.
424 * We must watch out that no real-time tasks exists
425 * (and that none is created in parallel) and that the plugin is not
426 * currently in use on any processor (in theory).
427 */
428int switch_sched_plugin(struct sched_plugin* plugin)
429{
430 BUG_ON(!plugin);
431
432 if (atomic_read(&rt_task_count) == 0)
433 return stop_machine(do_plugin_switch, plugin, NULL);
434 else
435 return -EBUSY;
436}
437
444/* Called upon fork. 438/* Called upon fork.
445 * p is the newly forked task. 439 * p is the newly forked task.
446 */ 440 */
@@ -521,6 +515,25 @@ static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
521 515
522extern struct sched_plugin linux_sched_plugin; 516extern struct sched_plugin linux_sched_plugin;
523 517
518static int litmus_shutdown_nb(struct notifier_block *unused1,
519 unsigned long unused2, void *unused3)
520{
521 /* Attempt to switch back to regular Linux scheduling.
522 * Forces the active plugin to clean up.
523 */
524 if (litmus != &linux_sched_plugin) {
525 int ret = switch_sched_plugin(&linux_sched_plugin);
526 if (ret) {
527 printk("Auto-shutdown of active Litmus plugin failed.\n");
528 }
529 }
530 return NOTIFY_DONE;
531}
532
533static struct notifier_block shutdown_notifier = {
534 .notifier_call = litmus_shutdown_nb,
535};
536
524static int __init _init_litmus(void) 537static int __init _init_litmus(void)
525{ 538{
526 /* Common initializers, 539 /* Common initializers,
@@ -529,8 +542,6 @@ static int __init _init_litmus(void)
529 */ 542 */
530 printk("Starting LITMUS^RT kernel\n"); 543 printk("Starting LITMUS^RT kernel\n");
531 544
532 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
533
534 register_sched_plugin(&linux_sched_plugin); 545 register_sched_plugin(&linux_sched_plugin);
535 546
536 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); 547 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
@@ -550,11 +561,15 @@ static int __init _init_litmus(void)
550 init_topology(); 561 init_topology();
551#endif 562#endif
552 563
564 register_reboot_notifier(&shutdown_notifier);
565
553 return 0; 566 return 0;
554} 567}
555 568
556static void _exit_litmus(void) 569static void _exit_litmus(void)
557{ 570{
571 unregister_reboot_notifier(&shutdown_notifier);
572
558 exit_litmus_proc(); 573 exit_litmus_proc();
559 kmem_cache_destroy(bheap_node_cache); 574 kmem_cache_destroy(bheap_node_cache);
560 kmem_cache_destroy(release_heap_cache); 575 kmem_cache_destroy(release_heap_cache);
diff --git a/litmus/locking.c b/litmus/locking.c
index 0c1aa6aa40b7..43d9aece2e74 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -1,9 +1,14 @@
1#include <linux/sched.h>
2#include <litmus/litmus.h>
1#include <litmus/fdso.h> 3#include <litmus/fdso.h>
2 4
3#ifdef CONFIG_LITMUS_LOCKING 5#ifdef CONFIG_LITMUS_LOCKING
4 6
7#include <linux/sched.h>
8#include <litmus/litmus.h>
5#include <litmus/sched_plugin.h> 9#include <litmus/sched_plugin.h>
6#include <litmus/trace.h> 10#include <litmus/trace.h>
11#include <litmus/wait.h>
7 12
8static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); 13static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
9static int open_generic_lock(struct od_table_entry* entry, void* __user arg); 14static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
@@ -69,6 +74,10 @@ asmlinkage long sys_litmus_lock(int lock_od)
69 struct od_table_entry* entry; 74 struct od_table_entry* entry;
70 struct litmus_lock* l; 75 struct litmus_lock* l;
71 76
77 TS_SYSCALL_IN_START;
78
79 TS_SYSCALL_IN_END;
80
72 TS_LOCK_START; 81 TS_LOCK_START;
73 82
74 entry = get_entry_for_od(lock_od); 83 entry = get_entry_for_od(lock_od);
@@ -82,6 +91,8 @@ asmlinkage long sys_litmus_lock(int lock_od)
82 * this into account when computing overheads. */ 91 * this into account when computing overheads. */
83 TS_LOCK_END; 92 TS_LOCK_END;
84 93
94 TS_SYSCALL_OUT_START;
95
85 return err; 96 return err;
86} 97}
87 98
@@ -91,6 +102,10 @@ asmlinkage long sys_litmus_unlock(int lock_od)
91 struct od_table_entry* entry; 102 struct od_table_entry* entry;
92 struct litmus_lock* l; 103 struct litmus_lock* l;
93 104
105 TS_SYSCALL_IN_START;
106
107 TS_SYSCALL_IN_END;
108
94 TS_UNLOCK_START; 109 TS_UNLOCK_START;
95 110
96 entry = get_entry_for_od(lock_od); 111 entry = get_entry_for_od(lock_od);
@@ -104,6 +119,8 @@ asmlinkage long sys_litmus_unlock(int lock_od)
104 * account when computing overheads. */ 119 * account when computing overheads. */
105 TS_UNLOCK_END; 120 TS_UNLOCK_END;
106 121
122 TS_SYSCALL_OUT_START;
123
107 return err; 124 return err;
108} 125}
109 126
@@ -121,6 +138,38 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
121 return(t); 138 return(t);
122} 139}
123 140
141unsigned int __add_wait_queue_prio_exclusive(
142 wait_queue_head_t* head,
143 prio_wait_queue_t *new)
144{
145 struct list_head *pos;
146 unsigned int passed = 0;
147
148 new->wq.flags |= WQ_FLAG_EXCLUSIVE;
149
150 /* find a spot where the new entry is less than the next */
151 list_for_each(pos, &head->task_list) {
152 prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
153 wq.task_list);
154
155 if (unlikely(lt_before(new->priority, queued->priority) ||
156 (new->priority == queued->priority &&
157 new->tie_breaker < queued->tie_breaker))) {
158 /* pos is not less than new, thus insert here */
159 __list_add(&new->wq.task_list, pos->prev, pos);
160 goto out;
161 }
162 passed++;
163 }
164
165 /* if we get to this point either the list is empty or every entry
166 * queued element is less than new.
167 * Let's add new to the end. */
168 list_add_tail(&new->wq.task_list, &head->task_list);
169out:
170 return passed;
171}
172
124 173
125#else 174#else
126 175
diff --git a/litmus/preempt.c b/litmus/preempt.c
index 5704d0bf4c0b..6be2f26728b8 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -2,6 +2,7 @@
2 2
3#include <litmus/litmus.h> 3#include <litmus/litmus.h>
4#include <litmus/preempt.h> 4#include <litmus/preempt.h>
5#include <litmus/trace.h>
5 6
6/* The rescheduling state of each processor. 7/* The rescheduling state of each processor.
7 */ 8 */
@@ -47,6 +48,7 @@ void sched_state_ipi(void)
47 set_tsk_need_resched(current); 48 set_tsk_need_resched(current);
48 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", 49 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
49 current->comm, current->pid); 50 current->comm, current->pid);
51 TS_SEND_RESCHED_END;
50 } else { 52 } else {
51 /* ignore */ 53 /* ignore */
52 TRACE_STATE("ignoring IPI in state %x (%s)\n", 54 TRACE_STATE("ignoring IPI in state %x (%s)\n",
@@ -85,8 +87,10 @@ void litmus_reschedule(int cpu)
85 if (scheduled_transition_ok) { 87 if (scheduled_transition_ok) {
86 if (smp_processor_id() == cpu) 88 if (smp_processor_id() == cpu)
87 set_tsk_need_resched(current); 89 set_tsk_need_resched(current);
88 else 90 else {
91 TS_SEND_RESCHED_START(cpu);
89 smp_send_reschedule(cpu); 92 smp_send_reschedule(cpu);
93 }
90 } 94 }
91 95
92 TRACE_STATE("%s picked-ok:%d sched-ok:%d\n", 96 TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index d405854cd39c..1683d3847560 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -300,9 +300,11 @@ void rt_domain_init(rt_domain_t *rt,
300 */ 300 */
301void __add_ready(rt_domain_t* rt, struct task_struct *new) 301void __add_ready(rt_domain_t* rt, struct task_struct *new)
302{ 302{
303 TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n", 303 TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
304 new->comm, new->pid, get_exec_cost(new), get_rt_period(new), 304 "to ready queue at %llu\n",
305 get_release(new), litmus_clock()); 305 new->comm, new->pid,
306 get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
307 get_release(new), litmus_clock());
306 308
307 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); 309 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
308 310
@@ -329,12 +331,7 @@ void __add_release_on(rt_domain_t* rt, struct task_struct *task,
329 list_add(&tsk_rt(task)->list, &rt->tobe_released); 331 list_add(&tsk_rt(task)->list, &rt->tobe_released);
330 task->rt_param.domain = rt; 332 task->rt_param.domain = rt;
331 333
332 /* start release timer */
333 TS_SCHED2_START(task);
334
335 arm_release_timer_on(rt, target_cpu); 334 arm_release_timer_on(rt, target_cpu);
336
337 TS_SCHED2_END(task);
338} 335}
339#endif 336#endif
340 337
@@ -347,11 +344,6 @@ void __add_release(rt_domain_t* rt, struct task_struct *task)
347 list_add(&tsk_rt(task)->list, &rt->tobe_released); 344 list_add(&tsk_rt(task)->list, &rt->tobe_released);
348 task->rt_param.domain = rt; 345 task->rt_param.domain = rt;
349 346
350 /* start release timer */
351 TS_SCHED2_START(task);
352
353 arm_release_timer(rt); 347 arm_release_timer(rt);
354
355 TS_SCHED2_END(task);
356} 348}
357 349
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 480c62bc895b..b45b46fc4fca 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -35,6 +35,7 @@
35#include <litmus/litmus.h> 35#include <litmus/litmus.h>
36#include <litmus/jobs.h> 36#include <litmus/jobs.h>
37#include <litmus/preempt.h> 37#include <litmus/preempt.h>
38#include <litmus/budget.h>
38#include <litmus/sched_plugin.h> 39#include <litmus/sched_plugin.h>
39#include <litmus/edf_common.h> 40#include <litmus/edf_common.h>
40#include <litmus/sched_trace.h> 41#include <litmus/sched_trace.h>
@@ -170,7 +171,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
170 171
171 /* Link new task to CPU. */ 172 /* Link new task to CPU. */
172 if (linked) { 173 if (linked) {
173 set_rt_flags(linked, RT_F_RUNNING); 174 tsk_rt(linked)->completed = 0;
174 /* handle task is already scheduled somewhere! */ 175 /* handle task is already scheduled somewhere! */
175 on_cpu = linked->rt_param.scheduled_on; 176 on_cpu = linked->rt_param.scheduled_on;
176 if (on_cpu != NO_CPU) { 177 if (on_cpu != NO_CPU) {
@@ -304,11 +305,11 @@ static void check_for_preemptions(cedf_domain_t *cluster)
304 &per_cpu(cedf_cpu_entries, task_cpu(task))); 305 &per_cpu(cedf_cpu_entries, task_cpu(task)));
305 if(affinity) 306 if(affinity)
306 last = affinity; 307 last = affinity;
307 else if(last->linked) 308 else if(requeue_preempted_job(last->linked))
308 requeue(last->linked); 309 requeue(last->linked);
309 } 310 }
310#else 311#else
311 if (last->linked) 312 if (requeue_preempted_job(last->linked))
312 requeue(last->linked); 313 requeue(last->linked);
313#endif 314#endif
314 link_task_to_cpu(task, last); 315 link_task_to_cpu(task, last);
@@ -349,7 +350,7 @@ static noinline void job_completion(struct task_struct *t, int forced)
349 TRACE_TASK(t, "job_completion().\n"); 350 TRACE_TASK(t, "job_completion().\n");
350 351
351 /* set flags */ 352 /* set flags */
352 set_rt_flags(t, RT_F_SLEEP); 353 tsk_rt(t)->completed = 1;
353 /* prepare for next period */ 354 /* prepare for next period */
354 prepare_for_next_period(t); 355 prepare_for_next_period(t);
355 if (is_released(t, litmus_clock())) 356 if (is_released(t, litmus_clock()))
@@ -403,7 +404,7 @@ static void cedf_tick(struct task_struct* t)
403 * 404 *
404 * - !is_running(scheduled) // the job blocks 405 * - !is_running(scheduled) // the job blocks
405 * - scheduled->timeslice == 0 // the job completed (forcefully) 406 * - scheduled->timeslice == 0 // the job completed (forcefully)
406 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) 407 * - is_completed() // the job completed (by syscall)
407 * - linked != scheduled // we need to reschedule (for any reason) 408 * - linked != scheduled // we need to reschedule (for any reason)
408 * - is_np(scheduled) // rescheduling must be delayed, 409 * - is_np(scheduled) // rescheduling must be delayed,
409 * sys_exit_np must be requested 410 * sys_exit_np must be requested
@@ -442,7 +443,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
442 budget_enforced(entry->scheduled) && 443 budget_enforced(entry->scheduled) &&
443 budget_exhausted(entry->scheduled); 444 budget_exhausted(entry->scheduled);
444 np = exists && is_np(entry->scheduled); 445 np = exists && is_np(entry->scheduled);
445 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; 446 sleep = exists && is_completed(entry->scheduled);
446 preempt = entry->scheduled != entry->linked; 447 preempt = entry->scheduled != entry->linked;
447 448
448#ifdef WANT_ALL_SCHED_EVENTS 449#ifdef WANT_ALL_SCHED_EVENTS
@@ -478,9 +479,9 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
478 /* Any task that is preemptable and either exhausts its execution 479 /* Any task that is preemptable and either exhausts its execution
479 * budget or wants to sleep completes. We may have to reschedule after 480 * budget or wants to sleep completes. We may have to reschedule after
480 * this. Don't do a job completion if we block (can't have timers running 481 * this. Don't do a job completion if we block (can't have timers running
481 * for blocked jobs). Preemption go first for the same reason. 482 * for blocked jobs).
482 */ 483 */
483 if (!np && (out_of_time || sleep) && !blocks && !preempt) 484 if (!np && (out_of_time || sleep) && !blocks)
484 job_completion(entry->scheduled, !sleep); 485 job_completion(entry->scheduled, !sleep);
485 486
486 /* Link pending task if we became unlinked. 487 /* Link pending task if we became unlinked.
@@ -594,25 +595,17 @@ static void cedf_task_wake_up(struct task_struct *task)
594 cluster = task_cpu_cluster(task); 595 cluster = task_cpu_cluster(task);
595 596
596 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 597 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
597 /* We need to take suspensions because of semaphores into 598 now = litmus_clock();
598 * account! If a job resumes after being suspended due to acquiring 599 if (is_tardy(task, now)) {
599 * a semaphore, it should never be treated as a new job release. 600 /* new sporadic release */
600 */ 601 release_at(task, now);
601 if (get_rt_flags(task) == RT_F_EXIT_SEM) { 602 sched_trace_task_release(task);
602 set_rt_flags(task, RT_F_RUNNING); 603 }
603 } else { 604 else {
604 now = litmus_clock(); 605 if (task->rt.time_slice) {
605 if (is_tardy(task, now)) { 606 /* came back in time before deadline
606 /* new sporadic release */ 607 */
607 release_at(task, now); 608 tsk_rt(task)->completed = 0;
608 sched_trace_task_release(task);
609 }
610 else {
611 if (task->rt.time_slice) {
612 /* came back in time before deadline
613 */
614 set_rt_flags(task, RT_F_RUNNING);
615 }
616 } 609 }
617 } 610 }
618 cedf_job_arrival(task); 611 cedf_job_arrival(task);
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 6ed504f4750e..b8548b885b35 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -21,6 +21,7 @@
21#include <litmus/trace.h> 21#include <litmus/trace.h>
22 22
23#include <litmus/preempt.h> 23#include <litmus/preempt.h>
24#include <litmus/budget.h>
24 25
25#include <litmus/bheap.h> 26#include <litmus/bheap.h>
26 27
@@ -43,7 +44,7 @@
43 * (thereby removing its association with this 44 * (thereby removing its association with this
44 * CPU). However, it will not requeue the 45 * CPU). However, it will not requeue the
45 * previously linked task (if any). It will set 46 * previously linked task (if any). It will set
46 * T's state to RT_F_RUNNING and check whether 47 * T's state to not completed and check whether
47 * it is already running somewhere else. If T 48 * it is already running somewhere else. If T
48 * is scheduled somewhere else it will link 49 * is scheduled somewhere else it will link
49 * it to that CPU instead (and pull the linked 50 * it to that CPU instead (and pull the linked
@@ -172,7 +173,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
172 173
173 /* Link new task to CPU. */ 174 /* Link new task to CPU. */
174 if (linked) { 175 if (linked) {
175 set_rt_flags(linked, RT_F_RUNNING); 176 tsk_rt(linked)->completed = 0;
176 /* handle task is already scheduled somewhere! */ 177 /* handle task is already scheduled somewhere! */
177 on_cpu = linked->rt_param.scheduled_on; 178 on_cpu = linked->rt_param.scheduled_on;
178 if (on_cpu != NO_CPU) { 179 if (on_cpu != NO_CPU) {
@@ -296,11 +297,11 @@ static void check_for_preemptions(void)
296 &per_cpu(gsnedf_cpu_entries, task_cpu(task))); 297 &per_cpu(gsnedf_cpu_entries, task_cpu(task)));
297 if (affinity) 298 if (affinity)
298 last = affinity; 299 last = affinity;
299 else if (last->linked) 300 else if (requeue_preempted_job(last->linked))
300 requeue(last->linked); 301 requeue(last->linked);
301 } 302 }
302#else 303#else
303 if (last->linked) 304 if (requeue_preempted_job(last->linked))
304 requeue(last->linked); 305 requeue(last->linked);
305#endif 306#endif
306 307
@@ -340,7 +341,7 @@ static noinline void job_completion(struct task_struct *t, int forced)
340 TRACE_TASK(t, "job_completion().\n"); 341 TRACE_TASK(t, "job_completion().\n");
341 342
342 /* set flags */ 343 /* set flags */
343 set_rt_flags(t, RT_F_SLEEP); 344 tsk_rt(t)->completed = 1;
344 /* prepare for next period */ 345 /* prepare for next period */
345 prepare_for_next_period(t); 346 prepare_for_next_period(t);
346 if (is_released(t, litmus_clock())) 347 if (is_released(t, litmus_clock()))
@@ -393,7 +394,7 @@ static void gsnedf_tick(struct task_struct* t)
393 * 394 *
394 * - !is_running(scheduled) // the job blocks 395 * - !is_running(scheduled) // the job blocks
395 * - scheduled->timeslice == 0 // the job completed (forcefully) 396 * - scheduled->timeslice == 0 // the job completed (forcefully)
396 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) 397 * - is_completed() // the job completed (by syscall)
397 * - linked != scheduled // we need to reschedule (for any reason) 398 * - linked != scheduled // we need to reschedule (for any reason)
398 * - is_np(scheduled) // rescheduling must be delayed, 399 * - is_np(scheduled) // rescheduling must be delayed,
399 * sys_exit_np must be requested 400 * sys_exit_np must be requested
@@ -426,11 +427,10 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
426 /* (0) Determine state */ 427 /* (0) Determine state */
427 exists = entry->scheduled != NULL; 428 exists = entry->scheduled != NULL;
428 blocks = exists && !is_running(entry->scheduled); 429 blocks = exists && !is_running(entry->scheduled);
429 out_of_time = exists && 430 out_of_time = exists && budget_enforced(entry->scheduled)
430 budget_enforced(entry->scheduled) && 431 && budget_exhausted(entry->scheduled);
431 budget_exhausted(entry->scheduled);
432 np = exists && is_np(entry->scheduled); 432 np = exists && is_np(entry->scheduled);
433 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; 433 sleep = exists && is_completed(entry->scheduled);
434 preempt = entry->scheduled != entry->linked; 434 preempt = entry->scheduled != entry->linked;
435 435
436#ifdef WANT_ALL_SCHED_EVENTS 436#ifdef WANT_ALL_SCHED_EVENTS
@@ -466,9 +466,9 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
466 /* Any task that is preemptable and either exhausts its execution 466 /* Any task that is preemptable and either exhausts its execution
467 * budget or wants to sleep completes. We may have to reschedule after 467 * budget or wants to sleep completes. We may have to reschedule after
468 * this. Don't do a job completion if we block (can't have timers running 468 * this. Don't do a job completion if we block (can't have timers running
469 * for blocked jobs). Preemption go first for the same reason. 469 * for blocked jobs).
470 */ 470 */
471 if (!np && (out_of_time || sleep) && !blocks && !preempt) 471 if (!np && (out_of_time || sleep) && !blocks)
472 job_completion(entry->scheduled, !sleep); 472 job_completion(entry->scheduled, !sleep);
473 473
474 /* Link pending task if we became unlinked. 474 /* Link pending task if we became unlinked.
@@ -577,25 +577,17 @@ static void gsnedf_task_wake_up(struct task_struct *task)
577 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 577 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
578 578
579 raw_spin_lock_irqsave(&gsnedf_lock, flags); 579 raw_spin_lock_irqsave(&gsnedf_lock, flags);
580 /* We need to take suspensions because of semaphores into 580 now = litmus_clock();
581 * account! If a job resumes after being suspended due to acquiring 581 if (is_tardy(task, now)) {
582 * a semaphore, it should never be treated as a new job release. 582 /* new sporadic release */
583 */ 583 release_at(task, now);
584 if (get_rt_flags(task) == RT_F_EXIT_SEM) { 584 sched_trace_task_release(task);
585 set_rt_flags(task, RT_F_RUNNING); 585 }
586 } else { 586 else {
587 now = litmus_clock(); 587 if (task->rt.time_slice) {
588 if (is_tardy(task, now)) { 588 /* came back in time before deadline
589 /* new sporadic release */ 589 */
590 release_at(task, now); 590 tsk_rt(task)->completed = 0;
591 sched_trace_task_release(task);
592 }
593 else {
594 if (task->rt.time_slice) {
595 /* came back in time before deadline
596 */
597 set_rt_flags(task, RT_F_RUNNING);
598 }
599 } 591 }
600 } 592 }
601 gsnedf_job_arrival(task); 593 gsnedf_job_arrival(task);
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index 5a15ce938984..6b32cf09abbd 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -102,9 +102,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
102 } 102 }
103 } 103 }
104#ifdef __ARCH_WANT_UNLOCKED_CTXSW 104#ifdef __ARCH_WANT_UNLOCKED_CTXSW
105 if (next->oncpu) 105 if (next->on_cpu)
106 TRACE_TASK(next, "waiting for !oncpu"); 106 TRACE_TASK(next, "waiting for !oncpu");
107 while (next->oncpu) { 107 while (next->on_cpu) {
108 cpu_relax(); 108 cpu_relax();
109 mb(); 109 mb();
110 } 110 }
@@ -194,6 +194,9 @@ static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
194 194
195static void yield_task_litmus(struct rq *rq) 195static void yield_task_litmus(struct rq *rq)
196{ 196{
197 TS_SYSCALL_IN_START;
198 TS_SYSCALL_IN_END;
199
197 BUG_ON(rq->curr != current); 200 BUG_ON(rq->curr != current);
198 /* sched_yield() is called to trigger delayed preemptions. 201 /* sched_yield() is called to trigger delayed preemptions.
199 * Thus, mark the current task as needing to be rescheduled. 202 * Thus, mark the current task as needing to be rescheduled.
@@ -202,6 +205,8 @@ static void yield_task_litmus(struct rq *rq)
202 */ 205 */
203 clear_exit_np(current); 206 clear_exit_np(current);
204 litmus_reschedule_local(); 207 litmus_reschedule_local();
208
209 TS_SYSCALL_OUT_START;
205} 210}
206 211
207/* Plugins are responsible for this. 212/* Plugins are responsible for this.
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
index 16f1065bbdca..6a89b003306c 100644
--- a/litmus/sched_pfair.c
+++ b/litmus/sched_pfair.c
@@ -254,7 +254,7 @@ static void check_preempt(struct task_struct* t)
254{ 254{
255 int cpu = NO_CPU; 255 int cpu = NO_CPU;
256 if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on && 256 if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on &&
257 tsk_rt(t)->present) { 257 is_present(t)) {
258 /* the task can be scheduled and 258 /* the task can be scheduled and
259 * is not scheduled where it ought to be scheduled 259 * is not scheduled where it ought to be scheduled
260 */ 260 */
@@ -299,7 +299,7 @@ static void pfair_prepare_next_period(struct task_struct* t)
299 struct pfair_param* p = tsk_pfair(t); 299 struct pfair_param* p = tsk_pfair(t);
300 300
301 prepare_for_next_period(t); 301 prepare_for_next_period(t);
302 get_rt_flags(t) = RT_F_RUNNING; 302 tsk_rt(t)->completed = 0;
303 p->release += p->period; 303 p->release += p->period;
304} 304}
305 305
@@ -310,7 +310,7 @@ static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
310 int to_relq; 310 int to_relq;
311 p->cur = (p->cur + 1) % p->quanta; 311 p->cur = (p->cur + 1) % p->quanta;
312 if (!p->cur) { 312 if (!p->cur) {
313 if (tsk_rt(t)->present) { 313 if (is_present(t)) {
314 /* The job overran; we start a new budget allocation. */ 314 /* The job overran; we start a new budget allocation. */
315 pfair_prepare_next_period(t); 315 pfair_prepare_next_period(t);
316 } else { 316 } else {
@@ -598,7 +598,7 @@ static int safe_to_schedule(struct task_struct* t, int cpu)
598 "scheduled already on %d.\n", cpu, where); 598 "scheduled already on %d.\n", cpu, where);
599 return 0; 599 return 0;
600 } else 600 } else
601 return tsk_rt(t)->present && get_rt_flags(t) == RT_F_RUNNING; 601 return is_present(t) && !is_completed(t);
602} 602}
603 603
604static struct task_struct* pfair_schedule(struct task_struct * prev) 604static struct task_struct* pfair_schedule(struct task_struct * prev)
@@ -621,7 +621,7 @@ static struct task_struct* pfair_schedule(struct task_struct * prev)
621 raw_spin_lock(cpu_lock(state)); 621 raw_spin_lock(cpu_lock(state));
622 622
623 blocks = is_realtime(prev) && !is_running(prev); 623 blocks = is_realtime(prev) && !is_running(prev);
624 completion = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP; 624 completion = is_realtime(prev) && is_completed(prev);
625 out_of_time = is_realtime(prev) && time_after(cur_release(prev), 625 out_of_time = is_realtime(prev) && time_after(cur_release(prev),
626 state->local_tick); 626 state->local_tick);
627 627
@@ -720,7 +720,7 @@ static void pfair_task_wake_up(struct task_struct *t)
720 /* only add to ready queue if the task isn't still linked somewhere */ 720 /* only add to ready queue if the task isn't still linked somewhere */
721 if (requeue) { 721 if (requeue) {
722 TRACE_TASK(t, "requeueing required\n"); 722 TRACE_TASK(t, "requeueing required\n");
723 tsk_rt(t)->flags = RT_F_RUNNING; 723 tsk_rt(t)->completed = 0;
724 __add_ready(&cluster->pfair, t); 724 __add_ready(&cluster->pfair, t);
725 } 725 }
726 726
@@ -850,6 +850,13 @@ static long pfair_admit_task(struct task_struct* t)
850 cpu_cluster(pstate[task_cpu(t)])) 850 cpu_cluster(pstate[task_cpu(t)]))
851 return -EINVAL; 851 return -EINVAL;
852 852
853 if (get_rt_period(t) != get_rt_relative_deadline(t)) {
854 printk(KERN_INFO "%s: Admission rejected. "
855 "Only implicit deadlines are currently supported.\n",
856 litmus->plugin_name);
857 return -EINVAL;
858 }
859
853 /* Pfair is a tick-based method, so the time 860 /* Pfair is a tick-based method, so the time
854 * of interest is jiffies. Calculate tick-based 861 * of interest is jiffies. Calculate tick-based
855 * times for everything. 862 * times for everything.
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
new file mode 100644
index 000000000000..0e875a3b5cba
--- /dev/null
+++ b/litmus/sched_pfp.c
@@ -0,0 +1,1711 @@
1/*
2 * litmus/sched_pfp.c
3 *
4 * Implementation of partitioned fixed-priority scheduling.
5 * Based on PSN-EDF.
6 */
7
8#include <linux/percpu.h>
9#include <linux/sched.h>
10#include <linux/list.h>
11#include <linux/spinlock.h>
12#include <linux/module.h>
13
14#include <litmus/litmus.h>
15#include <litmus/wait.h>
16#include <litmus/jobs.h>
17#include <litmus/preempt.h>
18#include <litmus/fp_common.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22#include <litmus/budget.h>
23
24#include <linux/uaccess.h>
25
26
27typedef struct {
28 rt_domain_t domain;
29 struct fp_prio_queue ready_queue;
30 int cpu;
31 struct task_struct* scheduled; /* only RT tasks */
32/*
33 * scheduling lock slock
34 * protects the domain and serializes scheduling decisions
35 */
36#define slock domain.ready_lock
37
38} pfp_domain_t;
39
40DEFINE_PER_CPU(pfp_domain_t, pfp_domains);
41
42pfp_domain_t* pfp_doms[NR_CPUS];
43
44#define local_pfp (&__get_cpu_var(pfp_domains))
45#define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain)
46#define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu))
47#define task_dom(task) remote_dom(get_partition(task))
48#define task_pfp(task) remote_pfp(get_partition(task))
49
50/* we assume the lock is being held */
51static void preempt(pfp_domain_t *pfp)
52{
53 preempt_if_preemptable(pfp->scheduled, pfp->cpu);
54}
55
56static unsigned int priority_index(struct task_struct* t)
57{
58#ifdef CONFIG_LITMUS_LOCKING
59 if (unlikely(t->rt_param.inh_task))
60 /* use effective priority */
61 t = t->rt_param.inh_task;
62
63 if (is_priority_boosted(t)) {
64 /* zero is reserved for priority-boosted tasks */
65 return 0;
66 } else
67#endif
68 return get_priority(t);
69}
70
71
72static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks)
73{
74 pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain);
75 unsigned long flags;
76 struct task_struct* t;
77 struct bheap_node* hn;
78
79 raw_spin_lock_irqsave(&pfp->slock, flags);
80
81 while (!bheap_empty(tasks)) {
82 hn = bheap_take(fp_ready_order, tasks);
83 t = bheap2task(hn);
84 TRACE_TASK(t, "released (part:%d prio:%d)\n",
85 get_partition(t), get_priority(t));
86 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
87 }
88
89 /* do we need to preempt? */
90 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) {
91 TRACE_CUR("preempted by new release\n");
92 preempt(pfp);
93 }
94
95 raw_spin_unlock_irqrestore(&pfp->slock, flags);
96}
97
98static void pfp_preempt_check(pfp_domain_t *pfp)
99{
100 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
101 preempt(pfp);
102}
103
104static void pfp_domain_init(pfp_domain_t* pfp,
105 int cpu)
106{
107 fp_domain_init(&pfp->domain, NULL, pfp_release_jobs);
108 pfp->cpu = cpu;
109 pfp->scheduled = NULL;
110 fp_prio_queue_init(&pfp->ready_queue);
111}
112
113static void requeue(struct task_struct* t, pfp_domain_t *pfp)
114{
115 BUG_ON(!is_running(t));
116
117 tsk_rt(t)->completed = 0;
118 if (is_released(t, litmus_clock()))
119 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
120 else
121 add_release(&pfp->domain, t); /* it has got to wait */
122}
123
124static void job_completion(struct task_struct* t, int forced)
125{
126 sched_trace_task_completion(t,forced);
127 TRACE_TASK(t, "job_completion().\n");
128
129 tsk_rt(t)->completed = 1;
130 prepare_for_next_period(t);
131 if (is_released(t, litmus_clock()))
132 sched_trace_task_release(t);
133}
134
135static void pfp_tick(struct task_struct *t)
136{
137 pfp_domain_t *pfp = local_pfp;
138
139 /* Check for inconsistency. We don't need the lock for this since
140 * ->scheduled is only changed in schedule, which obviously is not
141 * executing in parallel on this CPU
142 */
143 BUG_ON(is_realtime(t) && t != pfp->scheduled);
144
145 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
146 if (!is_np(t)) {
147 litmus_reschedule_local();
148 TRACE("pfp_scheduler_tick: "
149 "%d is preemptable "
150 " => FORCE_RESCHED\n", t->pid);
151 } else if (is_user_np(t)) {
152 TRACE("pfp_scheduler_tick: "
153 "%d is non-preemptable, "
154 "preemption delayed.\n", t->pid);
155 request_exit_np(t);
156 }
157 }
158}
159
160static struct task_struct* pfp_schedule(struct task_struct * prev)
161{
162 pfp_domain_t* pfp = local_pfp;
163 struct task_struct* next;
164
165 int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate;
166
167 raw_spin_lock(&pfp->slock);
168
169 /* sanity checking
170 * differently from gedf, when a task exits (dead)
171 * pfp->schedule may be null and prev _is_ realtime
172 */
173 BUG_ON(pfp->scheduled && pfp->scheduled != prev);
174 BUG_ON(pfp->scheduled && !is_realtime(prev));
175
176 /* (0) Determine state */
177 exists = pfp->scheduled != NULL;
178 blocks = exists && !is_running(pfp->scheduled);
179 out_of_time = exists &&
180 budget_enforced(pfp->scheduled) &&
181 budget_exhausted(pfp->scheduled);
182 np = exists && is_np(pfp->scheduled);
183 sleep = exists && is_completed(pfp->scheduled);
184 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
185 preempt = !blocks && (migrate || fp_preemption_needed(&pfp->ready_queue, prev));
186
187 /* If we need to preempt do so.
188 * The following checks set resched to 1 in case of special
189 * circumstances.
190 */
191 resched = preempt;
192
193 /* If a task blocks we have no choice but to reschedule.
194 */
195 if (blocks)
196 resched = 1;
197
198 /* Request a sys_exit_np() call if we would like to preempt but cannot.
199 * Multiple calls to request_exit_np() don't hurt.
200 */
201 if (np && (out_of_time || preempt || sleep))
202 request_exit_np(pfp->scheduled);
203
204 /* Any task that is preemptable and either exhausts its execution
205 * budget or wants to sleep completes. We may have to reschedule after
206 * this.
207 */
208 if (!np && (out_of_time || sleep) && !blocks && !migrate) {
209 job_completion(pfp->scheduled, !sleep);
210 resched = 1;
211 }
212
213 /* The final scheduling decision. Do we need to switch for some reason?
214 * Switch if we are in RT mode and have no task or if we need to
215 * resched.
216 */
217 next = NULL;
218 if ((!np || blocks) && (resched || !exists)) {
219 /* When preempting a task that does not block, then
220 * re-insert it into either the ready queue or the
221 * release queue (if it completed). requeue() picks
222 * the appropriate queue.
223 */
224 if (pfp->scheduled && !blocks && !migrate)
225 requeue(pfp->scheduled, pfp);
226 next = fp_prio_take(&pfp->ready_queue);
227 if (next == prev) {
228 struct task_struct *t = fp_prio_peek(&pfp->ready_queue);
229 TRACE_TASK(next, "next==prev sleep=%d oot=%d np=%d preempt=%d migrate=%d "
230 "boost=%d empty=%d prio-idx=%u prio=%u\n",
231 sleep, out_of_time, np, preempt, migrate,
232 is_priority_boosted(next),
233 t == NULL,
234 priority_index(next),
235 get_priority(next));
236 if (t)
237 TRACE_TASK(t, "waiter boost=%d prio-idx=%u prio=%u\n",
238 is_priority_boosted(t),
239 priority_index(t),
240 get_priority(t));
241 }
242 /* If preempt is set, we should not see the same task again. */
243 BUG_ON(preempt && next == prev);
244 /* Similarly, if preempt is set, then next may not be NULL,
245 * unless it's a migration. */
246 BUG_ON(preempt && !migrate && next == NULL);
247 } else
248 /* Only override Linux scheduler if we have a real-time task
249 * scheduled that needs to continue.
250 */
251 if (exists)
252 next = prev;
253
254 if (next) {
255 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
256 tsk_rt(next)->completed = 0;
257 } else {
258 TRACE("becoming idle at %llu\n", litmus_clock());
259 }
260
261 pfp->scheduled = next;
262 sched_state_task_picked();
263 raw_spin_unlock(&pfp->slock);
264
265 return next;
266}
267
268#ifdef CONFIG_LITMUS_LOCKING
269
270/* prev is no longer scheduled --- see if it needs to migrate */
271static void pfp_finish_switch(struct task_struct *prev)
272{
273 pfp_domain_t *to;
274
275 if (is_realtime(prev) &&
276 is_running(prev) &&
277 get_partition(prev) != smp_processor_id()) {
278 TRACE_TASK(prev, "needs to migrate from P%d to P%d\n",
279 smp_processor_id(), get_partition(prev));
280
281 to = task_pfp(prev);
282
283 raw_spin_lock(&to->slock);
284
285 TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu);
286 requeue(prev, to);
287 if (fp_preemption_needed(&to->ready_queue, to->scheduled))
288 preempt(to);
289
290 raw_spin_unlock(&to->slock);
291
292 }
293}
294
295#endif
296
297/* Prepare a task for running in RT mode
298 */
299static void pfp_task_new(struct task_struct * t, int on_rq, int running)
300{
301 pfp_domain_t* pfp = task_pfp(t);
302 unsigned long flags;
303
304 TRACE_TASK(t, "P-FP: task new, cpu = %d\n",
305 t->rt_param.task_params.cpu);
306
307 /* setup job parameters */
308 release_at(t, litmus_clock());
309
310 /* The task should be running in the queue, otherwise signal
311 * code will try to wake it up with fatal consequences.
312 */
313 raw_spin_lock_irqsave(&pfp->slock, flags);
314 if (running) {
315 /* there shouldn't be anything else running at the time */
316 BUG_ON(pfp->scheduled);
317 pfp->scheduled = t;
318 } else {
319 requeue(t, pfp);
320 /* maybe we have to reschedule */
321 pfp_preempt_check(pfp);
322 }
323 raw_spin_unlock_irqrestore(&pfp->slock, flags);
324}
325
326static void pfp_task_wake_up(struct task_struct *task)
327{
328 unsigned long flags;
329 pfp_domain_t* pfp = task_pfp(task);
330 lt_t now;
331
332 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
333 raw_spin_lock_irqsave(&pfp->slock, flags);
334
335#ifdef CONFIG_LITMUS_LOCKING
336 /* Should only be queued when processing a fake-wake up due to a
337 * migration-related state change. */
338 if (unlikely(is_queued(task))) {
339 TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n");
340 goto out_unlock;
341 }
342#else
343 BUG_ON(is_queued(task));
344#endif
345 now = litmus_clock();
346 if (is_tardy(task, now)
347#ifdef CONFIG_LITMUS_LOCKING
348 /* We need to take suspensions because of semaphores into
349 * account! If a job resumes after being suspended due to acquiring
350 * a semaphore, it should never be treated as a new job release.
351 */
352 && !is_priority_boosted(task)
353#endif
354 ) {
355 /* new sporadic release */
356 release_at(task, now);
357 sched_trace_task_release(task);
358 }
359
360 /* Only add to ready queue if it is not the currently-scheduled
361 * task. This could be the case if a task was woken up concurrently
362 * on a remote CPU before the executing CPU got around to actually
363 * de-scheduling the task, i.e., wake_up() raced with schedule()
364 * and won. Also, don't requeue if it is still queued, which can
365 * happen under the DPCP due wake-ups racing with migrations.
366 */
367 if (pfp->scheduled != task) {
368 requeue(task, pfp);
369 pfp_preempt_check(pfp);
370 }
371
372#ifdef CONFIG_LITMUS_LOCKING
373out_unlock:
374#endif
375 raw_spin_unlock_irqrestore(&pfp->slock, flags);
376 TRACE_TASK(task, "wake up done\n");
377}
378
379static void pfp_task_block(struct task_struct *t)
380{
381 /* only running tasks can block, thus t is in no queue */
382 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
383
384 BUG_ON(!is_realtime(t));
385
386 /* If this task blocked normally, it shouldn't be queued. The exception is
387 * if this is a simulated block()/wakeup() pair from the pull-migration code path.
388 * This should only happen if the DPCP is being used.
389 */
390#ifdef CONFIG_LITMUS_LOCKING
391 if (unlikely(is_queued(t)))
392 TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n");
393#else
394 BUG_ON(is_queued(t));
395#endif
396}
397
398static void pfp_task_exit(struct task_struct * t)
399{
400 unsigned long flags;
401 pfp_domain_t* pfp = task_pfp(t);
402 rt_domain_t* dom;
403
404 raw_spin_lock_irqsave(&pfp->slock, flags);
405 if (is_queued(t)) {
406 BUG(); /* This currently doesn't work. */
407 /* dequeue */
408 dom = task_dom(t);
409 remove(dom, t);
410 }
411 if (pfp->scheduled == t) {
412 pfp->scheduled = NULL;
413 preempt(pfp);
414 }
415 TRACE_TASK(t, "RIP, now reschedule\n");
416
417 raw_spin_unlock_irqrestore(&pfp->slock, flags);
418}
419
420#ifdef CONFIG_LITMUS_LOCKING
421
422#include <litmus/fdso.h>
423#include <litmus/srp.h>
424
425static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t)
426{
427 BUG_ON(pfp->scheduled == t && is_queued(t));
428 if (is_queued(t))
429 fp_prio_remove(&pfp->ready_queue, t, priority_index(t));
430}
431
432static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t,
433 struct task_struct* prio_inh)
434{
435 int requeue;
436
437 if (!t || t->rt_param.inh_task == prio_inh) {
438 /* no update required */
439 if (t)
440 TRACE_TASK(t, "no prio-inh update required\n");
441 return;
442 }
443
444 requeue = is_queued(t);
445 TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue);
446
447 if (requeue)
448 /* first remove */
449 fp_dequeue(pfp, t);
450
451 t->rt_param.inh_task = prio_inh;
452
453 if (requeue)
454 /* add again to the right queue */
455 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
456}
457
458static int effective_agent_priority(int prio)
459{
460 /* make sure agents have higher priority */
461 return prio - LITMUS_MAX_PRIORITY;
462}
463
464static lt_t prio_point(int eprio)
465{
466 /* make sure we have non-negative prio points */
467 return eprio + LITMUS_MAX_PRIORITY;
468}
469
470static int prio_from_point(lt_t prio_point)
471{
472 return ((int) prio_point) - LITMUS_MAX_PRIORITY;
473}
474
475static void boost_priority(struct task_struct* t, lt_t priority_point)
476{
477 unsigned long flags;
478 pfp_domain_t* pfp = task_pfp(t);
479
480 raw_spin_lock_irqsave(&pfp->slock, flags);
481
482
483 TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock());
484
485 tsk_rt(t)->priority_boosted = 1;
486 /* tie-break by protocol-specific priority point */
487 tsk_rt(t)->boost_start_time = priority_point;
488
489 /* Priority boosting currently only takes effect for already-scheduled
490 * tasks. This is sufficient since priority boosting only kicks in as
491 * part of lock acquisitions. */
492 BUG_ON(pfp->scheduled != t);
493
494 raw_spin_unlock_irqrestore(&pfp->slock, flags);
495}
496
497static void unboost_priority(struct task_struct* t)
498{
499 unsigned long flags;
500 pfp_domain_t* pfp = task_pfp(t);
501 lt_t now;
502
503 raw_spin_lock_irqsave(&pfp->slock, flags);
504 now = litmus_clock();
505
506 /* assumption: this only happens when the job is scheduled */
507 BUG_ON(pfp->scheduled != t);
508
509 TRACE_TASK(t, "priority restored at %llu\n", now);
510
511 /* priority boosted jobs must be scheduled */
512 BUG_ON(pfp->scheduled != t);
513
514 tsk_rt(t)->priority_boosted = 0;
515 tsk_rt(t)->boost_start_time = 0;
516
517 /* check if this changes anything */
518 if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
519 preempt(pfp);
520
521 raw_spin_unlock_irqrestore(&pfp->slock, flags);
522}
523
524/* ******************** SRP support ************************ */
525
526static unsigned int pfp_get_srp_prio(struct task_struct* t)
527{
528 return get_priority(t);
529}
530
531/* ******************** FMLP support ********************** */
532
533struct fmlp_semaphore {
534 struct litmus_lock litmus_lock;
535
536 /* current resource holder */
537 struct task_struct *owner;
538
539 /* FIFO queue of waiting tasks */
540 wait_queue_head_t wait;
541};
542
543static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
544{
545 return container_of(lock, struct fmlp_semaphore, litmus_lock);
546}
547int pfp_fmlp_lock(struct litmus_lock* l)
548{
549 struct task_struct* t = current;
550 struct fmlp_semaphore *sem = fmlp_from_lock(l);
551 wait_queue_t wait;
552 unsigned long flags;
553 lt_t time_of_request;
554
555 if (!is_realtime(t))
556 return -EPERM;
557
558 spin_lock_irqsave(&sem->wait.lock, flags);
559
560 /* tie-break by this point in time */
561 time_of_request = litmus_clock();
562
563 /* Priority-boost ourself *before* we suspend so that
564 * our priority is boosted when we resume. */
565 boost_priority(t, time_of_request);
566
567 if (sem->owner) {
568 /* resource is not free => must suspend and wait */
569
570 init_waitqueue_entry(&wait, t);
571
572 /* FIXME: interruptible would be nice some day */
573 set_task_state(t, TASK_UNINTERRUPTIBLE);
574
575 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
576
577 TS_LOCK_SUSPEND;
578
579 /* release lock before sleeping */
580 spin_unlock_irqrestore(&sem->wait.lock, flags);
581
582 /* We depend on the FIFO order. Thus, we don't need to recheck
583 * when we wake up; we are guaranteed to have the lock since
584 * there is only one wake up per release.
585 */
586
587 schedule();
588
589 TS_LOCK_RESUME;
590
591 /* Since we hold the lock, no other task will change
592 * ->owner. We can thus check it without acquiring the spin
593 * lock. */
594 BUG_ON(sem->owner != t);
595 } else {
596 /* it's ours now */
597 sem->owner = t;
598
599 spin_unlock_irqrestore(&sem->wait.lock, flags);
600 }
601
602 return 0;
603}
604
605int pfp_fmlp_unlock(struct litmus_lock* l)
606{
607 struct task_struct *t = current, *next;
608 struct fmlp_semaphore *sem = fmlp_from_lock(l);
609 unsigned long flags;
610 int err = 0;
611
612 spin_lock_irqsave(&sem->wait.lock, flags);
613
614 if (sem->owner != t) {
615 err = -EINVAL;
616 goto out;
617 }
618
619 /* we lose the benefit of priority boosting */
620
621 unboost_priority(t);
622
623 /* check if there are jobs waiting for this resource */
624 next = __waitqueue_remove_first(&sem->wait);
625 if (next) {
626 /* next becomes the resouce holder */
627 sem->owner = next;
628
629 /* Wake up next. The waiting job is already priority-boosted. */
630 wake_up_process(next);
631 } else
632 /* resource becomes available */
633 sem->owner = NULL;
634
635out:
636 spin_unlock_irqrestore(&sem->wait.lock, flags);
637 return err;
638}
639
640int pfp_fmlp_close(struct litmus_lock* l)
641{
642 struct task_struct *t = current;
643 struct fmlp_semaphore *sem = fmlp_from_lock(l);
644 unsigned long flags;
645
646 int owner;
647
648 spin_lock_irqsave(&sem->wait.lock, flags);
649
650 owner = sem->owner == t;
651
652 spin_unlock_irqrestore(&sem->wait.lock, flags);
653
654 if (owner)
655 pfp_fmlp_unlock(l);
656
657 return 0;
658}
659
660void pfp_fmlp_free(struct litmus_lock* lock)
661{
662 kfree(fmlp_from_lock(lock));
663}
664
665static struct litmus_lock_ops pfp_fmlp_lock_ops = {
666 .close = pfp_fmlp_close,
667 .lock = pfp_fmlp_lock,
668 .unlock = pfp_fmlp_unlock,
669 .deallocate = pfp_fmlp_free,
670};
671
672static struct litmus_lock* pfp_new_fmlp(void)
673{
674 struct fmlp_semaphore* sem;
675
676 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
677 if (!sem)
678 return NULL;
679
680 sem->owner = NULL;
681 init_waitqueue_head(&sem->wait);
682 sem->litmus_lock.ops = &pfp_fmlp_lock_ops;
683
684 return &sem->litmus_lock;
685}
686
687/* ******************** MPCP support ********************** */
688
689struct mpcp_semaphore {
690 struct litmus_lock litmus_lock;
691
692 /* current resource holder */
693 struct task_struct *owner;
694
695 /* priority queue of waiting tasks */
696 wait_queue_head_t wait;
697
698 /* priority ceiling per cpu */
699 unsigned int prio_ceiling[NR_CPUS];
700
701 /* should jobs spin "virtually" for this resource? */
702 int vspin;
703};
704
705#define OMEGA_CEILING UINT_MAX
706
707/* Since jobs spin "virtually" while waiting to acquire a lock,
708 * they first must aquire a local per-cpu resource.
709 */
710static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait);
711static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin);
712
713/* called with preemptions off <=> no local modifications */
714static void mpcp_vspin_enter(void)
715{
716 struct task_struct* t = current;
717
718 while (1) {
719 if (__get_cpu_var(mpcpvs_vspin) == NULL) {
720 /* good, we get to issue our request */
721 __get_cpu_var(mpcpvs_vspin) = t;
722 break;
723 } else {
724 /* some job is spinning => enqueue in request queue */
725 prio_wait_queue_t wait;
726 wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
727 unsigned long flags;
728
729 /* ordered by regular priority */
730 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
731
732 spin_lock_irqsave(&vspin->lock, flags);
733
734 set_task_state(t, TASK_UNINTERRUPTIBLE);
735
736 __add_wait_queue_prio_exclusive(vspin, &wait);
737
738 spin_unlock_irqrestore(&vspin->lock, flags);
739
740 TS_LOCK_SUSPEND;
741
742 preempt_enable_no_resched();
743
744 schedule();
745
746 preempt_disable();
747
748 TS_LOCK_RESUME;
749 /* Recheck if we got it --- some higher-priority process might
750 * have swooped in. */
751 }
752 }
753 /* ok, now it is ours */
754}
755
756/* called with preemptions off */
757static void mpcp_vspin_exit(void)
758{
759 struct task_struct* t = current, *next;
760 unsigned long flags;
761 wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
762
763 BUG_ON(__get_cpu_var(mpcpvs_vspin) != t);
764
765 /* no spinning job */
766 __get_cpu_var(mpcpvs_vspin) = NULL;
767
768 /* see if anyone is waiting for us to stop "spinning" */
769 spin_lock_irqsave(&vspin->lock, flags);
770 next = __waitqueue_remove_first(vspin);
771
772 if (next)
773 wake_up_process(next);
774
775 spin_unlock_irqrestore(&vspin->lock, flags);
776}
777
778static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock)
779{
780 return container_of(lock, struct mpcp_semaphore, litmus_lock);
781}
782
783int pfp_mpcp_lock(struct litmus_lock* l)
784{
785 struct task_struct* t = current;
786 struct mpcp_semaphore *sem = mpcp_from_lock(l);
787 prio_wait_queue_t wait;
788 unsigned long flags;
789
790 if (!is_realtime(t))
791 return -EPERM;
792
793 preempt_disable();
794
795 if (sem->vspin)
796 mpcp_vspin_enter();
797
798 /* Priority-boost ourself *before* we suspend so that
799 * our priority is boosted when we resume. Use the priority
800 * ceiling for the local partition. */
801 boost_priority(t, sem->prio_ceiling[get_partition(t)]);
802
803 spin_lock_irqsave(&sem->wait.lock, flags);
804
805 preempt_enable_no_resched();
806
807 if (sem->owner) {
808 /* resource is not free => must suspend and wait */
809
810 /* ordered by regular priority */
811 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
812
813 /* FIXME: interruptible would be nice some day */
814 set_task_state(t, TASK_UNINTERRUPTIBLE);
815
816 __add_wait_queue_prio_exclusive(&sem->wait, &wait);
817
818 TS_LOCK_SUSPEND;
819
820 /* release lock before sleeping */
821 spin_unlock_irqrestore(&sem->wait.lock, flags);
822
823 /* We depend on the FIFO order. Thus, we don't need to recheck
824 * when we wake up; we are guaranteed to have the lock since
825 * there is only one wake up per release.
826 */
827
828 schedule();
829
830 TS_LOCK_RESUME;
831
832 /* Since we hold the lock, no other task will change
833 * ->owner. We can thus check it without acquiring the spin
834 * lock. */
835 BUG_ON(sem->owner != t);
836 } else {
837 /* it's ours now */
838 sem->owner = t;
839
840 spin_unlock_irqrestore(&sem->wait.lock, flags);
841 }
842
843 return 0;
844}
845
846int pfp_mpcp_unlock(struct litmus_lock* l)
847{
848 struct task_struct *t = current, *next;
849 struct mpcp_semaphore *sem = mpcp_from_lock(l);
850 unsigned long flags;
851 int err = 0;
852
853 spin_lock_irqsave(&sem->wait.lock, flags);
854
855 if (sem->owner != t) {
856 err = -EINVAL;
857 goto out;
858 }
859
860 /* we lose the benefit of priority boosting */
861
862 unboost_priority(t);
863
864 /* check if there are jobs waiting for this resource */
865 next = __waitqueue_remove_first(&sem->wait);
866 if (next) {
867 /* next becomes the resouce holder */
868 sem->owner = next;
869
870 /* Wake up next. The waiting job is already priority-boosted. */
871 wake_up_process(next);
872 } else
873 /* resource becomes available */
874 sem->owner = NULL;
875
876out:
877 spin_unlock_irqrestore(&sem->wait.lock, flags);
878
879 if (sem->vspin && err == 0) {
880 preempt_disable();
881 mpcp_vspin_exit();
882 preempt_enable();
883 }
884
885 return err;
886}
887
888int pfp_mpcp_open(struct litmus_lock* l, void* config)
889{
890 struct task_struct *t = current;
891 struct mpcp_semaphore *sem = mpcp_from_lock(l);
892 int cpu, local_cpu;
893 unsigned long flags;
894
895 if (!is_realtime(t))
896 /* we need to know the real-time priority */
897 return -EPERM;
898
899 local_cpu = get_partition(t);
900
901 spin_lock_irqsave(&sem->wait.lock, flags);
902
903 for (cpu = 0; cpu < NR_CPUS; cpu++)
904 if (cpu != local_cpu)
905 {
906 sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu],
907 get_priority(t));
908 TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n",
909 sem, sem->prio_ceiling[cpu], cpu);
910 }
911
912 spin_unlock_irqrestore(&sem->wait.lock, flags);
913
914 return 0;
915}
916
917int pfp_mpcp_close(struct litmus_lock* l)
918{
919 struct task_struct *t = current;
920 struct mpcp_semaphore *sem = mpcp_from_lock(l);
921 unsigned long flags;
922
923 int owner;
924
925 spin_lock_irqsave(&sem->wait.lock, flags);
926
927 owner = sem->owner == t;
928
929 spin_unlock_irqrestore(&sem->wait.lock, flags);
930
931 if (owner)
932 pfp_mpcp_unlock(l);
933
934 return 0;
935}
936
937void pfp_mpcp_free(struct litmus_lock* lock)
938{
939 kfree(mpcp_from_lock(lock));
940}
941
942static struct litmus_lock_ops pfp_mpcp_lock_ops = {
943 .close = pfp_mpcp_close,
944 .lock = pfp_mpcp_lock,
945 .open = pfp_mpcp_open,
946 .unlock = pfp_mpcp_unlock,
947 .deallocate = pfp_mpcp_free,
948};
949
950static struct litmus_lock* pfp_new_mpcp(int vspin)
951{
952 struct mpcp_semaphore* sem;
953 int cpu;
954
955 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
956 if (!sem)
957 return NULL;
958
959 sem->owner = NULL;
960 init_waitqueue_head(&sem->wait);
961 sem->litmus_lock.ops = &pfp_mpcp_lock_ops;
962
963 for (cpu = 0; cpu < NR_CPUS; cpu++)
964 sem->prio_ceiling[cpu] = OMEGA_CEILING;
965
966 /* mark as virtual spinning */
967 sem->vspin = vspin;
968
969 return &sem->litmus_lock;
970}
971
972
973/* ******************** PCP support ********************** */
974
975
976struct pcp_semaphore {
977 struct litmus_lock litmus_lock;
978
979 struct list_head ceiling;
980
981 /* current resource holder */
982 struct task_struct *owner;
983
984 /* priority ceiling --- can be negative due to DPCP support */
985 int prio_ceiling;
986
987 /* on which processor is this PCP semaphore allocated? */
988 int on_cpu;
989};
990
991static inline struct pcp_semaphore* pcp_from_lock(struct litmus_lock* lock)
992{
993 return container_of(lock, struct pcp_semaphore, litmus_lock);
994}
995
996
997struct pcp_state {
998 struct list_head system_ceiling;
999
1000 /* highest-priority waiting task */
1001 struct task_struct* hp_waiter;
1002
1003 /* list of jobs waiting to get past the system ceiling */
1004 wait_queue_head_t ceiling_blocked;
1005};
1006
1007static void pcp_init_state(struct pcp_state* s)
1008{
1009 INIT_LIST_HEAD(&s->system_ceiling);
1010 s->hp_waiter = NULL;
1011 init_waitqueue_head(&s->ceiling_blocked);
1012}
1013
1014static DEFINE_PER_CPU(struct pcp_state, pcp_state);
1015
1016/* assumes preemptions are off */
1017static struct pcp_semaphore* pcp_get_ceiling(void)
1018{
1019 struct list_head* top = __get_cpu_var(pcp_state).system_ceiling.next;
1020
1021 if (top)
1022 return list_entry(top, struct pcp_semaphore, ceiling);
1023 else
1024 return NULL;
1025}
1026
1027/* assumes preempt off */
1028static void pcp_add_ceiling(struct pcp_semaphore* sem)
1029{
1030 struct list_head *pos;
1031 struct list_head *in_use = &__get_cpu_var(pcp_state).system_ceiling;
1032 struct pcp_semaphore* held;
1033
1034 BUG_ON(sem->on_cpu != smp_processor_id());
1035 BUG_ON(in_list(&sem->ceiling));
1036
1037 list_for_each(pos, in_use) {
1038 held = list_entry(pos, struct pcp_semaphore, ceiling);
1039 if (held->prio_ceiling >= sem->prio_ceiling) {
1040 __list_add(&sem->ceiling, pos->prev, pos);
1041 return;
1042 }
1043 }
1044
1045 /* we hit the end of the list */
1046
1047 list_add_tail(&sem->ceiling, in_use);
1048}
1049
1050/* assumes preempt off */
1051static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling,
1052 struct task_struct* task,
1053 int effective_prio)
1054{
1055 return ceiling == NULL ||
1056 ceiling->prio_ceiling > effective_prio ||
1057 ceiling->owner == task;
1058}
1059
1060/* assumes preempt off */
1061static void pcp_priority_inheritance(void)
1062{
1063 unsigned long flags;
1064 pfp_domain_t* pfp = local_pfp;
1065
1066 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1067 struct task_struct *blocker, *blocked;
1068
1069 blocker = ceiling ? ceiling->owner : NULL;
1070 blocked = __get_cpu_var(pcp_state).hp_waiter;
1071
1072 raw_spin_lock_irqsave(&pfp->slock, flags);
1073
1074 /* Current is no longer inheriting anything by default. This should be
1075 * the currently scheduled job, and hence not currently queued. */
1076 BUG_ON(current != pfp->scheduled);
1077
1078 fp_set_prio_inh(pfp, current, NULL);
1079 fp_set_prio_inh(pfp, blocked, NULL);
1080 fp_set_prio_inh(pfp, blocker, NULL);
1081
1082
1083 /* Let blocking job inherit priority of blocked job, if required. */
1084 if (blocker && blocked &&
1085 fp_higher_prio(blocked, blocker)) {
1086 TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n",
1087 blocked->comm, blocked->pid,
1088 get_priority(blocker), get_priority(blocked));
1089 fp_set_prio_inh(pfp, blocker, blocked);
1090 }
1091
1092 /* Check if anything changed. If the blocked job is current, then it is
1093 * just blocking and hence is going to call the scheduler anyway. */
1094 if (blocked != current &&
1095 fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
1096 preempt(pfp);
1097
1098 raw_spin_unlock_irqrestore(&pfp->slock, flags);
1099}
1100
1101/* called with preemptions off */
1102static void pcp_raise_ceiling(struct pcp_semaphore* sem,
1103 int effective_prio)
1104{
1105 struct task_struct* t = current;
1106 struct pcp_semaphore* ceiling;
1107 prio_wait_queue_t wait;
1108 unsigned int waiting_higher_prio;
1109
1110 do {
1111 ceiling = pcp_get_ceiling();
1112 if (pcp_exceeds_ceiling(ceiling, t, effective_prio))
1113 break;
1114
1115 TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n",
1116 sem, ceiling->owner->comm, ceiling->owner->pid);
1117
1118 /* we need to wait until the ceiling is lowered */
1119
1120 /* enqueue in priority order */
1121 init_prio_waitqueue_entry(&wait, t, prio_point(effective_prio));
1122 set_task_state(t, TASK_UNINTERRUPTIBLE);
1123 waiting_higher_prio = add_wait_queue_prio_exclusive(
1124 &__get_cpu_var(pcp_state).ceiling_blocked, &wait);
1125
1126 if (waiting_higher_prio == 0) {
1127 TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n");
1128
1129 /* we are the new highest-priority waiting job
1130 * => update inheritance */
1131 __get_cpu_var(pcp_state).hp_waiter = t;
1132 pcp_priority_inheritance();
1133 }
1134
1135 TS_LOCK_SUSPEND;
1136
1137 preempt_enable_no_resched();
1138 schedule();
1139 preempt_disable();
1140
1141 /* pcp_resume_unblocked() removed us from wait queue */
1142
1143 TS_LOCK_RESUME;
1144 } while(1);
1145
1146 TRACE_CUR("PCP got the ceiling and sem %p\n", sem);
1147
1148 /* We are good to go. The semaphore should be available. */
1149 BUG_ON(sem->owner != NULL);
1150
1151 sem->owner = t;
1152
1153 pcp_add_ceiling(sem);
1154}
1155
1156static void pcp_resume_unblocked(void)
1157{
1158 wait_queue_head_t *blocked = &__get_cpu_var(pcp_state).ceiling_blocked;
1159 unsigned long flags;
1160 prio_wait_queue_t* q;
1161 struct task_struct* t = NULL;
1162
1163 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1164
1165 spin_lock_irqsave(&blocked->lock, flags);
1166
1167 while (waitqueue_active(blocked)) {
1168 /* check first == highest-priority waiting job */
1169 q = list_entry(blocked->task_list.next,
1170 prio_wait_queue_t, wq.task_list);
1171 t = (struct task_struct*) q->wq.private;
1172
1173 /* can it proceed now? => let it go */
1174 if (pcp_exceeds_ceiling(ceiling, t,
1175 prio_from_point(q->priority))) {
1176 __remove_wait_queue(blocked, &q->wq);
1177 wake_up_process(t);
1178 } else {
1179 /* We are done. Update highest-priority waiter. */
1180 __get_cpu_var(pcp_state).hp_waiter = t;
1181 goto out;
1182 }
1183 }
1184 /* If we get here, then there are no more waiting
1185 * jobs. */
1186 __get_cpu_var(pcp_state).hp_waiter = NULL;
1187out:
1188 spin_unlock_irqrestore(&blocked->lock, flags);
1189}
1190
1191/* assumes preempt off */
1192static void pcp_lower_ceiling(struct pcp_semaphore* sem)
1193{
1194 BUG_ON(!in_list(&sem->ceiling));
1195 BUG_ON(sem->owner != current);
1196 BUG_ON(sem->on_cpu != smp_processor_id());
1197
1198 /* remove from ceiling list */
1199 list_del(&sem->ceiling);
1200
1201 /* release */
1202 sem->owner = NULL;
1203
1204 TRACE_CUR("PCP released sem %p\n", sem);
1205
1206 pcp_priority_inheritance();
1207
1208 /* Wake up all ceiling-blocked jobs that now pass the ceiling. */
1209 pcp_resume_unblocked();
1210}
1211
1212static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
1213 int effective_prio)
1214{
1215 /* This needs to be synchronized on something.
1216 * Might as well use waitqueue lock for the processor.
1217 * We assume this happens only before the task set starts execution,
1218 * (i.e., during initialization), but it may happen on multiple processors
1219 * at the same time.
1220 */
1221 unsigned long flags;
1222
1223 struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu);
1224
1225 spin_lock_irqsave(&s->ceiling_blocked.lock, flags);
1226
1227 sem->prio_ceiling = min(sem->prio_ceiling, effective_prio);
1228
1229 spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags);
1230}
1231
1232static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu)
1233{
1234 sem->owner = NULL;
1235 INIT_LIST_HEAD(&sem->ceiling);
1236 sem->prio_ceiling = INT_MAX;
1237 sem->on_cpu = cpu;
1238}
1239
1240int pfp_pcp_lock(struct litmus_lock* l)
1241{
1242 struct task_struct* t = current;
1243 struct pcp_semaphore *sem = pcp_from_lock(l);
1244
1245 int eprio = effective_agent_priority(get_priority(t));
1246 int from = get_partition(t);
1247 int to = sem->on_cpu;
1248
1249 if (!is_realtime(t) || from != to)
1250 return -EPERM;
1251
1252 preempt_disable();
1253
1254 pcp_raise_ceiling(sem, eprio);
1255
1256 preempt_enable();
1257
1258 return 0;
1259}
1260
1261int pfp_pcp_unlock(struct litmus_lock* l)
1262{
1263 struct task_struct *t = current;
1264 struct pcp_semaphore *sem = pcp_from_lock(l);
1265
1266 int err = 0;
1267
1268 preempt_disable();
1269
1270 if (sem->on_cpu != smp_processor_id() || sem->owner != t) {
1271 err = -EINVAL;
1272 goto out;
1273 }
1274
1275 /* give it back */
1276 pcp_lower_ceiling(sem);
1277
1278out:
1279 preempt_enable();
1280
1281 return err;
1282}
1283
1284int pfp_pcp_open(struct litmus_lock* l, void* __user config)
1285{
1286 struct task_struct *t = current;
1287 struct pcp_semaphore *sem = pcp_from_lock(l);
1288
1289 int cpu, eprio;
1290
1291 if (!is_realtime(t))
1292 /* we need to know the real-time priority */
1293 return -EPERM;
1294
1295 if (get_user(cpu, (int*) config))
1296 return -EFAULT;
1297
1298 /* make sure the resource location matches */
1299 if (cpu != sem->on_cpu)
1300 return -EINVAL;
1301
1302 eprio = effective_agent_priority(get_priority(t));
1303
1304 pcp_update_prio_ceiling(sem, eprio);
1305
1306 return 0;
1307}
1308
1309int pfp_pcp_close(struct litmus_lock* l)
1310{
1311 struct task_struct *t = current;
1312 struct pcp_semaphore *sem = pcp_from_lock(l);
1313
1314 int owner = 0;
1315
1316 preempt_disable();
1317
1318 if (sem->on_cpu == smp_processor_id())
1319 owner = sem->owner == t;
1320
1321 preempt_enable();
1322
1323 if (owner)
1324 pfp_pcp_unlock(l);
1325
1326 return 0;
1327}
1328
1329void pfp_pcp_free(struct litmus_lock* lock)
1330{
1331 kfree(pcp_from_lock(lock));
1332}
1333
1334
1335static struct litmus_lock_ops pfp_pcp_lock_ops = {
1336 .close = pfp_pcp_close,
1337 .lock = pfp_pcp_lock,
1338 .open = pfp_pcp_open,
1339 .unlock = pfp_pcp_unlock,
1340 .deallocate = pfp_pcp_free,
1341};
1342
1343
1344static struct litmus_lock* pfp_new_pcp(int on_cpu)
1345{
1346 struct pcp_semaphore* sem;
1347
1348 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1349 if (!sem)
1350 return NULL;
1351
1352 sem->litmus_lock.ops = &pfp_pcp_lock_ops;
1353 pcp_init_semaphore(sem, on_cpu);
1354
1355 return &sem->litmus_lock;
1356}
1357
1358/* ******************** DPCP support ********************** */
1359
1360struct dpcp_semaphore {
1361 struct litmus_lock litmus_lock;
1362 struct pcp_semaphore pcp;
1363 int owner_cpu;
1364};
1365
1366static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock)
1367{
1368 return container_of(lock, struct dpcp_semaphore, litmus_lock);
1369}
1370
1371/* called with preemptions disabled */
1372static void pfp_migrate_to(int target_cpu)
1373{
1374 struct task_struct* t = current;
1375 pfp_domain_t *from;
1376
1377 if (get_partition(t) == target_cpu)
1378 return;
1379
1380 /* make sure target_cpu makes sense */
1381 BUG_ON(!cpu_online(target_cpu));
1382
1383 local_irq_disable();
1384
1385 /* scheduled task should not be in any ready or release queue */
1386 BUG_ON(is_queued(t));
1387
1388 /* lock both pfp domains in order of address */
1389 from = task_pfp(t);
1390
1391 raw_spin_lock(&from->slock);
1392
1393 /* switch partitions */
1394 tsk_rt(t)->task_params.cpu = target_cpu;
1395
1396 raw_spin_unlock(&from->slock);
1397
1398 /* Don't trace scheduler costs as part of
1399 * locking overhead. Scheduling costs are accounted for
1400 * explicitly. */
1401 TS_LOCK_SUSPEND;
1402
1403 local_irq_enable();
1404 preempt_enable_no_resched();
1405
1406 /* deschedule to be migrated */
1407 schedule();
1408
1409 /* we are now on the target processor */
1410 preempt_disable();
1411
1412 /* start recording costs again */
1413 TS_LOCK_RESUME;
1414
1415 BUG_ON(smp_processor_id() != target_cpu);
1416}
1417
1418int pfp_dpcp_lock(struct litmus_lock* l)
1419{
1420 struct task_struct* t = current;
1421 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1422 int eprio = effective_agent_priority(get_priority(t));
1423 int from = get_partition(t);
1424 int to = sem->pcp.on_cpu;
1425
1426 if (!is_realtime(t))
1427 return -EPERM;
1428
1429 preempt_disable();
1430
1431 /* Priority-boost ourself *before* we suspend so that
1432 * our priority is boosted when we resume. */
1433
1434 boost_priority(t, get_priority(t));
1435
1436 pfp_migrate_to(to);
1437
1438 pcp_raise_ceiling(&sem->pcp, eprio);
1439
1440 /* yep, we got it => execute request */
1441 sem->owner_cpu = from;
1442
1443 preempt_enable();
1444
1445 return 0;
1446}
1447
1448int pfp_dpcp_unlock(struct litmus_lock* l)
1449{
1450 struct task_struct *t = current;
1451 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1452 int err = 0;
1453 int home;
1454
1455 preempt_disable();
1456
1457 if (sem->pcp.on_cpu != smp_processor_id() || sem->pcp.owner != t) {
1458 err = -EINVAL;
1459 goto out;
1460 }
1461
1462 home = sem->owner_cpu;
1463
1464 /* give it back */
1465 pcp_lower_ceiling(&sem->pcp);
1466
1467 /* we lose the benefit of priority boosting */
1468 unboost_priority(t);
1469
1470 pfp_migrate_to(home);
1471
1472out:
1473 preempt_enable();
1474
1475 return err;
1476}
1477
1478int pfp_dpcp_open(struct litmus_lock* l, void* __user config)
1479{
1480 struct task_struct *t = current;
1481 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1482 int cpu, eprio;
1483
1484 if (!is_realtime(t))
1485 /* we need to know the real-time priority */
1486 return -EPERM;
1487
1488 if (get_user(cpu, (int*) config))
1489 return -EFAULT;
1490
1491 /* make sure the resource location matches */
1492 if (cpu != sem->pcp.on_cpu)
1493 return -EINVAL;
1494
1495 eprio = effective_agent_priority(get_priority(t));
1496
1497 pcp_update_prio_ceiling(&sem->pcp, eprio);
1498
1499 return 0;
1500}
1501
1502int pfp_dpcp_close(struct litmus_lock* l)
1503{
1504 struct task_struct *t = current;
1505 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1506 int owner = 0;
1507
1508 preempt_disable();
1509
1510 if (sem->pcp.on_cpu == smp_processor_id())
1511 owner = sem->pcp.owner == t;
1512
1513 preempt_enable();
1514
1515 if (owner)
1516 pfp_dpcp_unlock(l);
1517
1518 return 0;
1519}
1520
1521void pfp_dpcp_free(struct litmus_lock* lock)
1522{
1523 kfree(dpcp_from_lock(lock));
1524}
1525
1526static struct litmus_lock_ops pfp_dpcp_lock_ops = {
1527 .close = pfp_dpcp_close,
1528 .lock = pfp_dpcp_lock,
1529 .open = pfp_dpcp_open,
1530 .unlock = pfp_dpcp_unlock,
1531 .deallocate = pfp_dpcp_free,
1532};
1533
1534static struct litmus_lock* pfp_new_dpcp(int on_cpu)
1535{
1536 struct dpcp_semaphore* sem;
1537
1538 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1539 if (!sem)
1540 return NULL;
1541
1542 sem->litmus_lock.ops = &pfp_dpcp_lock_ops;
1543 sem->owner_cpu = NO_CPU;
1544 pcp_init_semaphore(&sem->pcp, on_cpu);
1545
1546 return &sem->litmus_lock;
1547}
1548
1549
1550/* **** lock constructor **** */
1551
1552
1553static long pfp_allocate_lock(struct litmus_lock **lock, int type,
1554 void* __user config)
1555{
1556 int err = -ENXIO, cpu;
1557 struct srp_semaphore* srp;
1558
1559 /* P-FP currently supports the SRP for local resources and the FMLP
1560 * for global resources. */
1561 switch (type) {
1562 case FMLP_SEM:
1563 /* FIFO Mutex Locking Protocol */
1564 *lock = pfp_new_fmlp();
1565 if (*lock)
1566 err = 0;
1567 else
1568 err = -ENOMEM;
1569 break;
1570
1571 case MPCP_SEM:
1572 /* Multiprocesor Priority Ceiling Protocol */
1573 *lock = pfp_new_mpcp(0);
1574 if (*lock)
1575 err = 0;
1576 else
1577 err = -ENOMEM;
1578 break;
1579
1580 case MPCP_VS_SEM:
1581 /* Multiprocesor Priority Ceiling Protocol with virtual spinning */
1582 *lock = pfp_new_mpcp(1);
1583 if (*lock)
1584 err = 0;
1585 else
1586 err = -ENOMEM;
1587 break;
1588
1589 case DPCP_SEM:
1590 /* Distributed Priority Ceiling Protocol */
1591 if (get_user(cpu, (int*) config))
1592 return -EFAULT;
1593
1594 if (!cpu_online(cpu))
1595 return -EINVAL;
1596
1597 *lock = pfp_new_dpcp(cpu);
1598 if (*lock)
1599 err = 0;
1600 else
1601 err = -ENOMEM;
1602 break;
1603
1604 case SRP_SEM:
1605 /* Baker's Stack Resource Policy */
1606 srp = allocate_srp_semaphore();
1607 if (srp) {
1608 *lock = &srp->litmus_lock;
1609 err = 0;
1610 } else
1611 err = -ENOMEM;
1612 break;
1613
1614 case PCP_SEM:
1615 /* Priority Ceiling Protocol */
1616 if (get_user(cpu, (int*) config))
1617 return -EFAULT;
1618
1619 if (!cpu_online(cpu))
1620 return -EINVAL;
1621
1622 *lock = pfp_new_pcp(cpu);
1623 if (*lock)
1624 err = 0;
1625 else
1626 err = -ENOMEM;
1627 break;
1628 };
1629
1630 return err;
1631}
1632
1633#endif
1634
1635static long pfp_admit_task(struct task_struct* tsk)
1636{
1637 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu &&
1638#ifdef CONFIG_RELEASE_MASTER
1639 /* don't allow tasks on release master CPU */
1640 task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master &&
1641#endif
1642 litmus_is_valid_fixed_prio(get_priority(tsk)))
1643 return 0;
1644 else
1645 return -EINVAL;
1646}
1647
1648static long pfp_activate_plugin(void)
1649{
1650#if defined(CONFIG_RELEASE_MASTER) || defined(CONFIG_LITMUS_LOCKING)
1651 int cpu;
1652#endif
1653
1654#ifdef CONFIG_RELEASE_MASTER
1655 for_each_online_cpu(cpu) {
1656 remote_dom(cpu)->release_master = atomic_read(&release_master_cpu);
1657 }
1658#endif
1659
1660#ifdef CONFIG_LITMUS_LOCKING
1661 get_srp_prio = pfp_get_srp_prio;
1662
1663 for_each_online_cpu(cpu) {
1664 init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu));
1665 per_cpu(mpcpvs_vspin, cpu) = NULL;
1666
1667 pcp_init_state(&per_cpu(pcp_state, cpu));
1668 pfp_doms[cpu] = remote_pfp(cpu);
1669 }
1670
1671#endif
1672
1673 return 0;
1674}
1675
1676
1677/* Plugin object */
1678static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
1679 .plugin_name = "P-FP",
1680 .tick = pfp_tick,
1681 .task_new = pfp_task_new,
1682 .complete_job = complete_job,
1683 .task_exit = pfp_task_exit,
1684 .schedule = pfp_schedule,
1685 .task_wake_up = pfp_task_wake_up,
1686 .task_block = pfp_task_block,
1687 .admit_task = pfp_admit_task,
1688 .activate_plugin = pfp_activate_plugin,
1689#ifdef CONFIG_LITMUS_LOCKING
1690 .allocate_lock = pfp_allocate_lock,
1691 .finish_switch = pfp_finish_switch,
1692#endif
1693};
1694
1695
1696static int __init init_pfp(void)
1697{
1698 int i;
1699
1700 /* We do not really want to support cpu hotplug, do we? ;)
1701 * However, if we are so crazy to do so,
1702 * we cannot use num_online_cpu()
1703 */
1704 for (i = 0; i < num_online_cpus(); i++) {
1705 pfp_domain_init(remote_pfp(i), i);
1706 }
1707 return register_sched_plugin(&pfp_plugin);
1708}
1709
1710module_init(init_pfp);
1711
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
index 8e4a22dd8d6a..0e1675d2e572 100644
--- a/litmus/sched_psn_edf.c
+++ b/litmus/sched_psn_edf.c
@@ -17,6 +17,7 @@
17#include <litmus/litmus.h> 17#include <litmus/litmus.h>
18#include <litmus/jobs.h> 18#include <litmus/jobs.h>
19#include <litmus/preempt.h> 19#include <litmus/preempt.h>
20#include <litmus/budget.h>
20#include <litmus/sched_plugin.h> 21#include <litmus/sched_plugin.h>
21#include <litmus/edf_common.h> 22#include <litmus/edf_common.h>
22#include <litmus/sched_trace.h> 23#include <litmus/sched_trace.h>
@@ -59,7 +60,7 @@ static void requeue(struct task_struct* t, rt_domain_t *edf)
59 if (t->state != TASK_RUNNING) 60 if (t->state != TASK_RUNNING)
60 TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); 61 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
61 62
62 set_rt_flags(t, RT_F_RUNNING); 63 tsk_rt(t)->completed = 0;
63 if (is_released(t, litmus_clock())) 64 if (is_released(t, litmus_clock()))
64 __add_ready(edf, t); 65 __add_ready(edf, t);
65 else 66 else
@@ -132,6 +133,15 @@ static void unboost_priority(struct task_struct* t)
132 133
133#endif 134#endif
134 135
136static int psnedf_preempt_check(psnedf_domain_t *pedf)
137{
138 if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
139 preempt(pedf);
140 return 1;
141 } else
142 return 0;
143}
144
135/* This check is trivial in partioned systems as we only have to consider 145/* This check is trivial in partioned systems as we only have to consider
136 * the CPU of the partition. 146 * the CPU of the partition.
137 */ 147 */
@@ -142,11 +152,7 @@ static int psnedf_check_resched(rt_domain_t *edf)
142 /* because this is a callback from rt_domain_t we already hold 152 /* because this is a callback from rt_domain_t we already hold
143 * the necessary lock for the ready queue 153 * the necessary lock for the ready queue
144 */ 154 */
145 if (edf_preemption_needed(edf, pedf->scheduled)) { 155 return psnedf_preempt_check(pedf);
146 preempt(pedf);
147 return 1;
148 } else
149 return 0;
150} 156}
151 157
152static void job_completion(struct task_struct* t, int forced) 158static void job_completion(struct task_struct* t, int forced)
@@ -154,7 +160,7 @@ static void job_completion(struct task_struct* t, int forced)
154 sched_trace_task_completion(t,forced); 160 sched_trace_task_completion(t,forced);
155 TRACE_TASK(t, "job_completion().\n"); 161 TRACE_TASK(t, "job_completion().\n");
156 162
157 set_rt_flags(t, RT_F_SLEEP); 163 tsk_rt(t)->completed = 1;
158 prepare_for_next_period(t); 164 prepare_for_next_period(t);
159} 165}
160 166
@@ -208,7 +214,7 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
208 budget_enforced(pedf->scheduled) && 214 budget_enforced(pedf->scheduled) &&
209 budget_exhausted(pedf->scheduled); 215 budget_exhausted(pedf->scheduled);
210 np = exists && is_np(pedf->scheduled); 216 np = exists && is_np(pedf->scheduled);
211 sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP; 217 sleep = exists && is_completed(pedf->scheduled);
212 preempt = edf_preemption_needed(edf, prev); 218 preempt = edf_preemption_needed(edf, prev);
213 219
214 /* If we need to preempt do so. 220 /* If we need to preempt do so.
@@ -260,7 +266,7 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
260 266
261 if (next) { 267 if (next) {
262 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); 268 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
263 set_rt_flags(next, RT_F_RUNNING); 269 tsk_rt(next)->completed = 0;
264 } else { 270 } else {
265 TRACE("becoming idle at %llu\n", litmus_clock()); 271 TRACE("becoming idle at %llu\n", litmus_clock());
266 } 272 }
@@ -298,7 +304,7 @@ static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
298 } else { 304 } else {
299 requeue(t, edf); 305 requeue(t, edf);
300 /* maybe we have to reschedule */ 306 /* maybe we have to reschedule */
301 preempt(pedf); 307 psnedf_preempt_check(pedf);
302 } 308 }
303 raw_spin_unlock_irqrestore(&pedf->slock, flags); 309 raw_spin_unlock_irqrestore(&pedf->slock, flags);
304} 310}
@@ -334,8 +340,10 @@ static void psnedf_task_wake_up(struct task_struct *task)
334 * de-scheduling the task, i.e., wake_up() raced with schedule() 340 * de-scheduling the task, i.e., wake_up() raced with schedule()
335 * and won. 341 * and won.
336 */ 342 */
337 if (pedf->scheduled != task) 343 if (pedf->scheduled != task) {
338 requeue(task, edf); 344 requeue(task, edf);
345 psnedf_preempt_check(pedf);
346 }
339 347
340 raw_spin_unlock_irqrestore(&pedf->slock, flags); 348 raw_spin_unlock_irqrestore(&pedf->slock, flags);
341 TRACE_TASK(task, "wake up done\n"); 349 TRACE_TASK(task, "wake up done\n");
diff --git a/litmus/sync.c b/litmus/sync.c
index bf75fde5450b..3e79e0a12a5a 100644
--- a/litmus/sync.c
+++ b/litmus/sync.c
@@ -16,63 +16,106 @@
16 16
17#include <litmus/sched_trace.h> 17#include <litmus/sched_trace.h>
18 18
19static DECLARE_COMPLETION(ts_release); 19struct ts_release_wait {
20 struct list_head list;
21 struct completion completion;
22 lt_t ts_release_time;
23};
24
25#define DECLARE_TS_RELEASE_WAIT(symb) \
26 struct ts_release_wait symb = \
27 { \
28 LIST_HEAD_INIT(symb.list), \
29 COMPLETION_INITIALIZER_ONSTACK(symb.completion), \
30 0 \
31 }
32
33static LIST_HEAD(task_release_list);
34static DEFINE_MUTEX(task_release_lock);
20 35
21static long do_wait_for_ts_release(void) 36static long do_wait_for_ts_release(void)
22{ 37{
23 long ret = 0; 38 DECLARE_TS_RELEASE_WAIT(wait);
39
40 long ret = -ERESTARTSYS;
41
42 if (mutex_lock_interruptible(&task_release_lock))
43 goto out;
44
45 list_add(&wait.list, &task_release_list);
24 46
25 /* If the interruption races with a release, the completion object 47 mutex_unlock(&task_release_lock);
26 * may have a non-zero counter. To avoid this problem, this should
27 * be replaced by wait_for_completion().
28 *
29 * For debugging purposes, this is interruptible for now.
30 */
31 ret = wait_for_completion_interruptible(&ts_release);
32 48
49 /* We are enqueued, now we wait for someone to wake us up. */
50 ret = wait_for_completion_interruptible(&wait.completion);
51
52 if (!ret) {
53 /* Completion succeeded, setup release. */
54 litmus->release_at(current, wait.ts_release_time
55 + current->rt_param.task_params.phase
56 - current->rt_param.task_params.period);
57 /* trigger advance to next job release at the programmed time */
58 ret = complete_job();
59 } else {
60 /* We were interrupted, must cleanup list. */
61 mutex_lock(&task_release_lock);
62 if (!wait.completion.done)
63 list_del(&wait.list);
64 mutex_unlock(&task_release_lock);
65 }
66
67out:
33 return ret; 68 return ret;
34} 69}
35 70
36int count_tasks_waiting_for_release(void) 71int count_tasks_waiting_for_release(void)
37{ 72{
38 unsigned long flags;
39 int task_count = 0; 73 int task_count = 0;
40 struct list_head *pos; 74 struct list_head *pos;
41 75
42 spin_lock_irqsave(&ts_release.wait.lock, flags); 76 mutex_lock(&task_release_lock);
43 list_for_each(pos, &ts_release.wait.task_list) { 77
78 list_for_each(pos, &task_release_list) {
44 task_count++; 79 task_count++;
45 } 80 }
46 spin_unlock_irqrestore(&ts_release.wait.lock, flags); 81
82 mutex_unlock(&task_release_lock);
83
47 84
48 return task_count; 85 return task_count;
49} 86}
50 87
51static long do_release_ts(lt_t start) 88static long do_release_ts(lt_t start)
52{ 89{
53 int task_count = 0; 90 long task_count = 0;
54 unsigned long flags;
55 struct list_head *pos;
56 struct task_struct *t;
57 91
92 struct list_head *pos, *safe;
93 struct ts_release_wait *wait;
58 94
59 spin_lock_irqsave(&ts_release.wait.lock, flags); 95 if (mutex_lock_interruptible(&task_release_lock)) {
60 TRACE("<<<<<< synchronous task system release >>>>>>\n"); 96 task_count = -ERESTARTSYS;
97 goto out;
98 }
61 99
100 TRACE("<<<<<< synchronous task system release >>>>>>\n");
62 sched_trace_sys_release(&start); 101 sched_trace_sys_release(&start);
63 list_for_each(pos, &ts_release.wait.task_list) { 102
64 t = (struct task_struct*) list_entry(pos, 103 task_count = 0;
65 struct __wait_queue, 104 list_for_each_safe(pos, safe, &task_release_list) {
66 task_list)->private; 105 wait = (struct ts_release_wait*)
106 list_entry(pos, struct ts_release_wait, list);
107
67 task_count++; 108 task_count++;
68 litmus->release_at(t, start + t->rt_param.task_params.phase); 109 wait->ts_release_time = start;
69 sched_trace_task_release(t); 110 complete(&wait->completion);
70 } 111 }
71 112
72 spin_unlock_irqrestore(&ts_release.wait.lock, flags); 113 /* clear stale list */
114 INIT_LIST_HEAD(&task_release_list);
73 115
74 complete_n(&ts_release, task_count); 116 mutex_unlock(&task_release_lock);
75 117
118out:
76 return task_count; 119 return task_count;
77} 120}
78 121
@@ -88,17 +131,22 @@ asmlinkage long sys_wait_for_ts_release(void)
88 return ret; 131 return ret;
89} 132}
90 133
134#define ONE_MS 1000000
91 135
92asmlinkage long sys_release_ts(lt_t __user *__delay) 136asmlinkage long sys_release_ts(lt_t __user *__delay)
93{ 137{
94 long ret; 138 long ret;
95 lt_t delay; 139 lt_t delay;
140 lt_t start_time;
96 141
97 /* FIXME: check capabilities... */ 142 /* FIXME: check capabilities... */
98 143
99 ret = copy_from_user(&delay, __delay, sizeof(delay)); 144 ret = copy_from_user(&delay, __delay, sizeof(delay));
100 if (ret == 0) 145 if (ret == 0) {
101 ret = do_release_ts(litmus_clock() + delay); 146 /* round up to next larger integral millisecond */
147 start_time = ((litmus_clock() / ONE_MS) + 1) * ONE_MS;
148 ret = do_release_ts(start_time + delay);
149 }
102 150
103 return ret; 151 return ret;
104} 152}
diff --git a/litmus/trace.c b/litmus/trace.c
index 3c35c527e805..7dbb98e4a3cd 100644
--- a/litmus/trace.c
+++ b/litmus/trace.c
@@ -18,6 +18,15 @@ static unsigned int ts_seq_no = 0;
18 18
19DEFINE_PER_CPU(atomic_t, irq_fired_count); 19DEFINE_PER_CPU(atomic_t, irq_fired_count);
20 20
21void ft_irq_fired(void)
22{
23 /* Only called with preemptions disabled. */
24 atomic_inc(&__get_cpu_var(irq_fired_count));
25
26 if (has_control_page(current))
27 get_control_page(current)->irq_count++;
28}
29
21static inline void clear_irq_fired(void) 30static inline void clear_irq_fired(void)
22{ 31{
23 atomic_set(&__raw_get_cpu_var(irq_fired_count), 0); 32 atomic_set(&__raw_get_cpu_var(irq_fired_count), 0);
@@ -34,77 +43,119 @@ static inline unsigned int get_and_clear_irq_fired(void)
34 return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0); 43 return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0);
35} 44}
36 45
37static inline void __save_irq_flags(struct timestamp *ts) 46static inline void save_irq_flags(struct timestamp *ts, unsigned int irq_count)
38{ 47{
39 unsigned int irq_count;
40
41 irq_count = get_and_clear_irq_fired();
42 /* Store how many interrupts occurred. */ 48 /* Store how many interrupts occurred. */
43 ts->irq_count = irq_count; 49 ts->irq_count = irq_count;
44 /* Extra flag because ts->irq_count overflows quickly. */ 50 /* Extra flag because ts->irq_count overflows quickly. */
45 ts->irq_flag = irq_count > 0; 51 ts->irq_flag = irq_count > 0;
52
46} 53}
47 54
48static inline void __save_timestamp_cpu(unsigned long event, 55static inline void write_timestamp(uint8_t event,
49 uint8_t type, uint8_t cpu) 56 uint8_t type,
57 uint8_t cpu,
58 uint16_t pid_fragment,
59 unsigned int irq_count,
60 int record_irq,
61 int hide_irq,
62 uint64_t timestamp,
63 int record_timestamp)
50{ 64{
65 unsigned long flags;
51 unsigned int seq_no; 66 unsigned int seq_no;
52 struct timestamp *ts; 67 struct timestamp *ts;
68
69 /* Avoid preemptions while recording the timestamp. This reduces the
70 * number of "out of order" timestamps in the stream and makes
71 * post-processing easier. */
72
73 local_irq_save(flags);
74
53 seq_no = fetch_and_inc((int *) &ts_seq_no); 75 seq_no = fetch_and_inc((int *) &ts_seq_no);
54 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { 76 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
55 ts->event = event; 77 ts->event = event;
56 ts->seq_no = seq_no; 78 ts->seq_no = seq_no;
57 ts->cpu = cpu; 79
58 ts->task_type = type; 80 ts->task_type = type;
59 __save_irq_flags(ts); 81 ts->pid = pid_fragment;
60 barrier(); 82
61 /* prevent re-ordering of ft_timestamp() */ 83 ts->cpu = cpu;
62 ts->timestamp = ft_timestamp(); 84
85 if (record_irq)
86 irq_count = get_and_clear_irq_fired();
87
88 save_irq_flags(ts, irq_count - hide_irq);
89
90 if (record_timestamp)
91 timestamp = ft_timestamp();
92
93 ts->timestamp = timestamp;
63 ft_buffer_finish_write(trace_ts_buf, ts); 94 ft_buffer_finish_write(trace_ts_buf, ts);
64 } 95 }
96
97 local_irq_restore(flags);
65} 98}
66 99
67static void __add_timestamp_user(struct timestamp *pre_recorded) 100static void __add_timestamp_user(struct timestamp *pre_recorded)
68{ 101{
102 unsigned long flags;
69 unsigned int seq_no; 103 unsigned int seq_no;
70 struct timestamp *ts; 104 struct timestamp *ts;
105
106
107 local_irq_save(flags);
108
71 seq_no = fetch_and_inc((int *) &ts_seq_no); 109 seq_no = fetch_and_inc((int *) &ts_seq_no);
72 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { 110 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
73 *ts = *pre_recorded; 111 *ts = *pre_recorded;
74 ts->seq_no = seq_no; 112 ts->seq_no = seq_no;
75 __save_irq_flags(ts); 113 ts->cpu = raw_smp_processor_id();
114 save_irq_flags(ts, get_and_clear_irq_fired());
76 ft_buffer_finish_write(trace_ts_buf, ts); 115 ft_buffer_finish_write(trace_ts_buf, ts);
77 } 116 }
78}
79 117
80static inline void __save_timestamp(unsigned long event, 118 local_irq_restore(flags);
81 uint8_t type)
82{
83 __save_timestamp_cpu(event, type, raw_smp_processor_id());
84} 119}
85 120
86feather_callback void save_timestamp(unsigned long event) 121feather_callback void save_timestamp(unsigned long event)
87{ 122{
88 __save_timestamp(event, TSK_UNKNOWN); 123 write_timestamp(event, TSK_UNKNOWN,
124 raw_smp_processor_id(),
125 current->pid,
126 0, 1, 0,
127 0, 1);
89} 128}
90 129
91feather_callback void save_timestamp_def(unsigned long event, 130feather_callback void save_timestamp_def(unsigned long event,
92 unsigned long type) 131 unsigned long type)
93{ 132{
94 __save_timestamp(event, (uint8_t) type); 133 write_timestamp(event, type,
134 raw_smp_processor_id(),
135 current->pid,
136 0, 1, 0,
137 0, 1);
95} 138}
96 139
97feather_callback void save_timestamp_task(unsigned long event, 140feather_callback void save_timestamp_task(unsigned long event,
98 unsigned long t_ptr) 141 unsigned long t_ptr)
99{ 142{
100 int rt = is_realtime((struct task_struct *) t_ptr); 143 struct task_struct *t = (struct task_struct *) t_ptr;
101 __save_timestamp(event, rt ? TSK_RT : TSK_BE); 144 int rt = is_realtime(t);
145
146 write_timestamp(event, rt ? TSK_RT : TSK_BE,
147 raw_smp_processor_id(),
148 t->pid,
149 0, 1, 0,
150 0, 1);
102} 151}
103 152
104feather_callback void save_timestamp_cpu(unsigned long event, 153feather_callback void save_timestamp_cpu(unsigned long event,
105 unsigned long cpu) 154 unsigned long cpu)
106{ 155{
107 __save_timestamp_cpu(event, TSK_UNKNOWN, cpu); 156 write_timestamp(event, TSK_UNKNOWN, cpu, current->pid,
157 0, 1, 0,
158 0, 1);
108} 159}
109 160
110feather_callback void save_task_latency(unsigned long event, 161feather_callback void save_task_latency(unsigned long event,
@@ -112,20 +163,44 @@ feather_callback void save_task_latency(unsigned long event,
112{ 163{
113 lt_t now = litmus_clock(); 164 lt_t now = litmus_clock();
114 lt_t *when = (lt_t*) when_ptr; 165 lt_t *when = (lt_t*) when_ptr;
115 unsigned int seq_no;
116 int cpu = raw_smp_processor_id();
117 struct timestamp *ts;
118 166
119 seq_no = fetch_and_inc((int *) &ts_seq_no); 167 write_timestamp(event, TSK_RT, raw_smp_processor_id(), 0,
120 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { 168 0, 1, 0,
121 ts->event = event; 169 now - *when, 0);
122 ts->timestamp = now - *when; 170}
123 ts->seq_no = seq_no; 171
124 ts->cpu = cpu; 172/* fake timestamp to user-reported time */
125 ts->task_type = TSK_RT; 173feather_callback void save_timestamp_time(unsigned long event,
126 __save_irq_flags(ts); 174 unsigned long ptr)
127 ft_buffer_finish_write(trace_ts_buf, ts); 175{
128 } 176 uint64_t* time = (uint64_t*) ptr;
177
178 write_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE,
179 raw_smp_processor_id(), current->pid,
180 0, 1, 0,
181 *time, 0);
182}
183
184/* Record user-reported IRQ count */
185feather_callback void save_timestamp_irq(unsigned long event,
186 unsigned long irq_counter_ptr)
187{
188 uint64_t* irqs = (uint64_t*) irq_counter_ptr;
189
190 write_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE,
191 raw_smp_processor_id(), current->pid,
192 *irqs, 0, 0,
193 0, 1);
194}
195
196/* Suppress one IRQ from the irq count. Used by TS_SEND_RESCHED_END, which is
197 * called from within an interrupt that is expected. */
198feather_callback void save_timestamp_hide_irq(unsigned long event)
199{
200 write_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE,
201 raw_smp_processor_id(), current->pid,
202 0, 1, 1,
203 0, 1);
129} 204}
130 205
131/******************************************************************************/ 206/******************************************************************************/