aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2010-11-26 15:49:50 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2010-11-26 15:49:50 -0500
commita58179081abb08862bef0d2fa9f3dc90ac89a74d (patch)
tree430bb77433d2d7f8316bdc1fad4974a69abf0a0e
parent1baad08397910f4dee59e071808d74ea4ff8cf11 (diff)
Implementation of Adaptive-EDZL
This patch introduces the Adaptive-EDZL (AEDZL) scheduler. AEDZL uses feedback-control to estimate job execution time. This improves the detection of zero-laxity points if WCETs are not tight.
-rw-r--r--include/litmus/edzl_common.h5
-rw-r--r--include/litmus/fpmath.h124
-rw-r--r--include/litmus/litmus.h47
-rw-r--r--include/litmus/rt_param.h13
-rw-r--r--litmus/Kconfig12
-rw-r--r--litmus/Makefile1
-rw-r--r--litmus/edzl_common.c28
-rw-r--r--litmus/sched_aedzl.c902
8 files changed, 1131 insertions, 1 deletions
diff --git a/include/litmus/edzl_common.h b/include/litmus/edzl_common.h
index d9ecfa1a70f9..8b2fbcd455ab 100644
--- a/include/litmus/edzl_common.h
+++ b/include/litmus/edzl_common.h
@@ -21,4 +21,9 @@ int edzl_higher_prio(struct task_struct* first,
21int edzl_ready_order(struct bheap_node* a, struct bheap_node* b); 21int edzl_ready_order(struct bheap_node* a, struct bheap_node* b);
22 22
23int edzl_preemption_needed(rt_domain_t* rt, struct task_struct *t); 23int edzl_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#ifdef CONFIG_PLUGIN_AEDZL
26int aedzl_preemption_needed(rt_domain_t* rt, struct task_struct *t);
27#endif
28
24#endif 29#endif
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..0ad1927e2261
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,124 @@
1#ifndef __FP_MATH_H__
2#define __FP_MATH_H__
3
4#define FP_SHIFT 10
5#define ROUND_BIT (FP_SHIFT - 1)
6#define ONE FP(1)
7
8#define _fp(x) ((fp_t) {x})
9
10static const fp_t FP_ZERO = {.val = 0};
11static const fp_t FP_ONE = {.val = (1 << FP_SHIFT)};
12
13static inline fpbuf_t _point(fp_t x)
14{
15 return (x.val % (1 << FP_SHIFT));
16
17}
18
19#define fp2str(x) x.val
20/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
21#define _FP_ "%ld/1024"
22
23static inline fp_t FP(fpbuf_t x)
24{
25 return _fp(((fpbuf_t) x) << FP_SHIFT);
26}
27
28static inline fpbuf_t _floor(fp_t x)
29{
30 return x.val >> FP_SHIFT;
31}
32
33/* FIXME: negative rounding */
34static inline fpbuf_t _round(fp_t x)
35{
36 return _floor(x) + ((x.val >> ROUND_BIT) & 1);
37}
38
39/* divide two integers to obtain a fixed point value */
40static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
41{
42 return _fp(FP(a).val / (b));
43}
44
45/* multiply two fixed point values */
46static inline fp_t _mul(fp_t a, fp_t b)
47{
48 return _fp((a.val * b.val) >> FP_SHIFT);
49}
50
51static inline fp_t _div(fp_t a, fp_t b)
52{
53 /* try not to overflow */
54 if (unlikely( a.val > (2l << (BITS_PER_LONG - FP_SHIFT)) ))
55 return _fp((a.val / b.val) << FP_SHIFT);
56 else
57 return _fp((a.val << FP_SHIFT) / b.val);
58}
59
60static inline fp_t _add(fp_t a, fp_t b)
61{
62 return _fp(a.val + b.val);
63}
64
65static inline fp_t _sub(fp_t a, fp_t b)
66{
67 return _fp(a.val - b.val);
68}
69
70static inline fp_t _neg(fp_t x)
71{
72 return _fp(-x.val);
73}
74
75static inline fp_t _abs(fp_t x)
76{
77 return _fp(abs(x.val));
78}
79
80/* equiv. to casting float/double to integer */
81static inline fpbuf_t _fp_to_integer(fp_t x)
82{
83 return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
84}
85
86static inline fp_t _integer_to_fp(fpbuf_t x)
87{
88 return _frac(x,1);
89}
90
91static inline int _leq(fp_t a, fp_t b)
92{
93 return a.val <= b.val;
94}
95
96static inline int _geq(fp_t a, fp_t b)
97{
98 return a.val >= b.val;
99}
100
101static inline int _lt(fp_t a, fp_t b)
102{
103 return a.val < b.val;
104}
105
106static inline int _gt(fp_t a, fp_t b)
107{
108 return a.val > b.val;
109}
110
111static inline int _eq(fp_t a, fp_t b)
112{
113 return a.val == b.val;
114}
115
116static inline fp_t _max(fp_t a, fp_t b)
117{
118 if (a.val < b.val)
119 return b;
120 else
121 return a;
122}
123
124#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 26cddf5c9c84..95125d23c458 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -9,6 +9,10 @@
9#include <linux/jiffies.h> 9#include <linux/jiffies.h>
10#include <litmus/sched_trace.h> 10#include <litmus/sched_trace.h>
11 11
12#ifdef CONFIG_PLUGIN_AEDZL
13#include <litmus/fpmath.h>
14#endif
15
12#ifdef CONFIG_RELEASE_MASTER 16#ifdef CONFIG_RELEASE_MASTER
13extern atomic_t release_master_cpu; 17extern atomic_t release_master_cpu;
14#endif 18#endif
@@ -96,6 +100,35 @@ inline static lt_t budget_remaining(struct task_struct* t)
96#ifdef CONFIG_PLUGIN_EDZL 100#ifdef CONFIG_PLUGIN_EDZL
97#define get_zerolaxity(t) (tsk_rt(t)->job_params.zero_laxity) 101#define get_zerolaxity(t) (tsk_rt(t)->job_params.zero_laxity)
98#define set_zerolaxity(t,s) (tsk_rt(t)->job_params.zero_laxity=(s)) 102#define set_zerolaxity(t,s) (tsk_rt(t)->job_params.zero_laxity=(s))
103
104#ifdef CONFIG_PLUGIN_AEDZL
105inline static lt_t get_exec_cost_est(struct task_struct* t)
106{
107 return (lt_t)( /* express cost in terms of lt_t */
108 abs( /* fp_t sometimes has both num and denom < 0,
109 assume fraction is not negative and take abs()*/
110 _fp_to_integer( /* truncate off fractional part */
111 _mul( /* exe = util * period */
112 tsk_rt(t)->task_params.util_est, _frac(get_rt_period(t), 1)
113 )
114 )
115 )
116 );
117}
118
119inline static int budget_exhausted_est(struct task_struct* t)
120{
121 return get_exec_time(t) >= get_exec_cost_est(t);
122}
123
124inline static lt_t budget_remaining_est(struct task_struct* t)
125{
126 if (!budget_exhausted_est(t))
127 return get_exec_cost_est(t) - get_exec_time(t);
128 else
129 return 0; /* avoid overflow */
130}
131#endif
99#endif 132#endif
100 133
101#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT) 134#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
@@ -157,6 +190,20 @@ inline static lt_t laxity_remaining(struct task_struct* t)
157 else 190 else
158 return 0; 191 return 0;
159} 192}
193
194#ifdef CONFIG_PLUGIN_AEDZL
195inline static lt_t laxity_remaining_est(struct task_struct* t)
196{
197 lt_t now = litmus_clock();
198 lt_t remaining = budget_remaining_est(t);
199 lt_t deadline = get_deadline(t);
200
201 if(now + remaining < deadline)
202 return (deadline - (now + remaining));
203 else
204 return 0;
205}
206#endif
160#endif 207#endif
161 208
162 209
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 53741727d5d0..d1a1f6ab523a 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -33,6 +33,14 @@ typedef enum {
33 PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ 33 PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */
34} budget_policy_t; 34} budget_policy_t;
35 35
36#ifdef CONFIG_PLUGIN_AEDZL
37typedef long fpbuf_t;
38typedef struct
39{
40 fpbuf_t val;
41} fp_t;
42#endif
43
36struct rt_task { 44struct rt_task {
37 lt_t exec_cost; 45 lt_t exec_cost;
38 lt_t period; 46 lt_t period;
@@ -40,6 +48,11 @@ struct rt_task {
40 unsigned int cpu; 48 unsigned int cpu;
41 task_class_t cls; 49 task_class_t cls;
42 budget_policy_t budget_policy; /* ignored by pfair */ 50 budget_policy_t budget_policy; /* ignored by pfair */
51
52#ifdef CONFIG_PLUGIN_AEDZL
53 fp_t util_est;
54 fp_t accum_err;
55#endif
43}; 56};
44 57
45/* The definition of the data that is shared between the kernel and real-time 58/* The definition of the data that is shared between the kernel and real-time
diff --git a/litmus/Kconfig b/litmus/Kconfig
index cb0ff9785152..55cfb5de4637 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -34,6 +34,18 @@ config PLUGIN_EDZL
34 34
35 if unsure, say Yes. 35 if unsure, say Yes.
36 36
37config PLUGIN_AEDZL
38 bool "AEDZL"
39 depends on PLUGIN_EDZL
40 default y
41 help
42 Include the AEDZL (Adpative Earliest Deadline, Zero Laxity) plugin in
43 the kernel. AEDZL functions like EDZL, except that it uses feedback-
44 control methods to estimate actual job execution time. This improves
45 the accuracy of determined zero-laxity points.
46
47 If unsure, say Yes.
48
37config RELEASE_MASTER 49config RELEASE_MASTER
38 bool "Release-master Support" 50 bool "Release-master Support"
39 depends on ARCH_HAS_SEND_PULL_TIMERS 51 depends on ARCH_HAS_SEND_PULL_TIMERS
diff --git a/litmus/Makefile b/litmus/Makefile
index 5ec42ac7cafc..6a3b5274f2c0 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -19,6 +19,7 @@ obj-y = sched_plugin.o litmus.o \
19obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 19obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
20obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 20obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
21obj-$(CONFIG_PLUGIN_EDZL) += sched_edzl.o edzl_common.o 21obj-$(CONFIG_PLUGIN_EDZL) += sched_edzl.o edzl_common.o
22obj-$(CONFIG_PLUGIN_AEDZL) += sched_aedzl.o
22 23
23obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 24obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
24obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 25obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
diff --git a/litmus/edzl_common.c b/litmus/edzl_common.c
index f925901703ab..aa035ecdd8f9 100644
--- a/litmus/edzl_common.c
+++ b/litmus/edzl_common.c
@@ -72,7 +72,7 @@ void edzl_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
72 */ 72 */
73int edzl_preemption_needed(rt_domain_t* rt, struct task_struct *t) 73int edzl_preemption_needed(rt_domain_t* rt, struct task_struct *t)
74{ 74{
75 /* we need the read lock for edf_ready_queue */ 75 /* we need the read lock for edzl_ready_queue */
76 /* no need to preempt if there is nothing pending */ 76 /* no need to preempt if there is nothing pending */
77 if (!__jobs_pending(rt)) 77 if (!__jobs_pending(rt))
78 return 0; 78 return 0;
@@ -96,3 +96,29 @@ int edzl_preemption_needed(rt_domain_t* rt, struct task_struct *t)
96 96
97 return edzl_higher_prio(__next_ready(rt), t); 97 return edzl_higher_prio(__next_ready(rt), t);
98} 98}
99
100
101#ifdef CONFIG_PLUGIN_AEDZL
102int aedzl_preemption_needed(rt_domain_t* rt, struct task_struct *t)
103{
104 /* we need the read lock for edzl_ready_queue */
105 /* no need to preempt if there is nothing pending */
106 if (!__jobs_pending(rt))
107 return 0;
108 /* we need to reschedule if t doesn't exist */
109 if (!t)
110 return 1;
111 /* make sure to get non-rt stuff out of the way */
112 if (!is_realtime(t))
113 return 1;
114
115 /* Detect zero-laxity as needed. Easier to do it here than in tick.
116 (No timer is used to detect zero-laxity while a job is running.) */
117 if(unlikely(!get_zerolaxity(t) && laxity_remaining_est(t) == 0))
118 {
119 set_zerolaxity(t, 1);
120 }
121
122 return edzl_higher_prio(__next_ready(rt), t);
123}
124#endif
diff --git a/litmus/sched_aedzl.c b/litmus/sched_aedzl.c
new file mode 100644
index 000000000000..d521a6497677
--- /dev/null
+++ b/litmus/sched_aedzl.c
@@ -0,0 +1,902 @@
1/*
2 * litmus/sched_aedzl.c
3 *
4 * Implementation of the Adaptive-EDZL scheduling algorithm.
5 *
6 * This version uses the simple approach and serializes all scheduling
7 * decisions by the use of a queue lock. This is probably not the
8 * best way to do it, but it should suffice for now.
9 */
10
11#include <linux/spinlock.h>
12#include <linux/percpu.h>
13#include <linux/sched.h>
14#include <linux/hrtimer.h>
15
16#include <litmus/litmus.h>
17#include <litmus/jobs.h>
18#include <litmus/sched_plugin.h>
19#include <litmus/edzl_common.h>
20#include <litmus/sched_trace.h>
21
22#include <litmus/bheap.h>
23
24#include <linux/module.h>
25
26/* Overview of AEDZL operations.
27 *
28 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
29 * structure (NOT the actually scheduled
30 * task). If there is another linked task To
31 * already it will set To->linked_on = NO_CPU
32 * (thereby removing its association with this
33 * CPU). However, it will not requeue the
34 * previously linked task (if any). It will set
35 * T's state to RT_F_RUNNING and check whether
36 * it is already running somewhere else. If T
37 * is scheduled somewhere else it will link
38 * it to that CPU instead (and pull the linked
39 * task to cpu). T may be NULL.
40 *
41 * unlink(T) - Unlink removes T from all scheduler data
42 * structures. If it is linked to some CPU it
43 * will link NULL to that CPU. If it is
44 * currently queued in the edzl queue it will
45 * be removed from the rt_domain. It is safe to
46 * call unlink(T) if T is not linked. T may not
47 * be NULL.
48 *
49 * requeue(T) - Requeue will insert T into the appropriate
50 * queue. If the system is in real-time mode and
51 * the T is released already, it will go into the
52 * ready queue. If the system is not in
53 * real-time mode is T, then T will go into the
54 * release queue. If T's release time is in the
55 * future, it will go into the release
56 * queue. That means that T's release time/job
57 * no/etc. has to be updated before requeu(T) is
58 * called. It is not safe to call requeue(T)
59 * when T is already queued. T may not be NULL.
60 *
61 * aedzl_job_arrival(T) - This is the catch all function when T enters
62 * the system after either a suspension or at a
63 * job release. It will queue T (which means it
64 * is not safe to call aedzl_job_arrival(T) if
65 * T is already queued) and then check whether a
66 * preemption is necessary. If a preemption is
67 * necessary it will update the linkage
68 * accordingly and cause scheduled to be called
69 * (either with an IPI or need_resched). It is
70 * safe to call aedzl_job_arrival(T) if T's
71 * next job has not been actually released yet
72 * (releast time in the future). T will be put
73 * on the release queue in that case.
74 *
75 * job_completion(T) - Take care of everything that needs to be done
76 * to prepare T for its next release and place
77 * it in the right queue with
78 * aedzl_job_arrival().
79 *
80 *
81 * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
82 * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
83 * the functions will automatically propagate pending task from the ready queue
84 * to a linked task. This is the job of the calling function ( by means of
85 * __take_ready).
86 */
87
88
89/* cpu_entry_t - maintain the linked and scheduled state
90 */
91typedef struct {
92 int cpu;
93 struct task_struct* linked; /* only RT tasks */
94 struct task_struct* scheduled; /* only RT tasks */
95 atomic_t will_schedule; /* prevent unneeded IPIs */
96 struct bheap_node* hn;
97} cpu_entry_t;
98DEFINE_PER_CPU(cpu_entry_t, aedzl_cpu_entries);
99
100cpu_entry_t* aedzl_cpus[NR_CPUS];
101
102#define set_will_schedule() \
103 (atomic_set(&__get_cpu_var(aedzl_cpu_entries).will_schedule, 1))
104#define clear_will_schedule() \
105 (atomic_set(&__get_cpu_var(aedzl_cpu_entries).will_schedule, 0))
106#define test_will_schedule(cpu) \
107 (atomic_read(&per_cpu(aedzl_cpu_entries, cpu).will_schedule))
108
109
110/* the cpus queue themselves according to priority in here */
111static struct bheap_node aedzl_heap_node[NR_CPUS];
112static struct bheap aedzl_cpu_heap;
113
114/* Design choice: use one RT domain for both zero-laxity and regular jobs. */
115
116static rt_domain_t aedzl;
117#define aedzl_lock (aedzl.ready_lock)
118
119static const fp_t a_fp = {.val = 104}; /* 0.102 -- 102/1000 */
120static const fp_t b_fp = {.val = 310}; /* 0.303 -- 303/1000 */
121
122/* Uncomment this if you want to see all scheduling decisions in the
123 * TRACE() log.
124#define WANT_ALL_SCHED_EVENTS
125 */
126
127static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
128{
129 cpu_entry_t *a, *b;
130 a = _a->value;
131 b = _b->value;
132 /* Note that a and b are inverted: we want the lowest-priority CPU at
133 * the top of the heap.
134 */
135 return edzl_higher_prio(b->linked, a->linked);
136}
137
138/* update_cpu_position - Move the cpu entry to the correct place to maintain
139 * order in the cpu queue. Caller must hold edzl lock.
140 */
141static void update_cpu_position(cpu_entry_t *entry)
142{
143 if (likely(bheap_node_in_heap(entry->hn)))
144 bheap_delete(cpu_lower_prio, &aedzl_cpu_heap, entry->hn);
145 bheap_insert(cpu_lower_prio, &aedzl_cpu_heap, entry->hn);
146}
147
148/* caller must hold edzl lock */
149static cpu_entry_t* lowest_prio_cpu(void)
150{
151 struct bheap_node* hn;
152 hn = bheap_peek(cpu_lower_prio, &aedzl_cpu_heap);
153 return hn->value;
154}
155
156
157/* link_task_to_cpu - Update the link of a CPU.
158 * Handles the case where the to-be-linked task is already
159 * scheduled on a different CPU.
160 */
161static noinline void link_task_to_cpu(struct task_struct* linked,
162 cpu_entry_t *entry)
163{
164 cpu_entry_t *sched;
165 struct task_struct* tmp;
166 int on_cpu;
167
168 BUG_ON(linked && !is_realtime(linked));
169
170 /* Currently linked task is set to be unlinked. */
171 if (entry->linked) {
172 entry->linked->rt_param.linked_on = NO_CPU;
173 }
174
175 /* Link new task to CPU. */
176 if (linked) {
177 set_rt_flags(linked, RT_F_RUNNING);
178 /* handle task is already scheduled somewhere! */
179 on_cpu = linked->rt_param.scheduled_on;
180 if (on_cpu != NO_CPU) {
181 sched = &per_cpu(aedzl_cpu_entries, on_cpu);
182 /* this should only happen if not linked already */
183 BUG_ON(sched->linked == linked);
184
185 /* If we are already scheduled on the CPU to which we
186 * wanted to link, we don't need to do the swap --
187 * we just link ourselves to the CPU and depend on
188 * the caller to get things right.
189 */
190 if (entry != sched) {
191 TRACE_TASK(linked,
192 "already scheduled on %d, updating link.\n",
193 sched->cpu);
194 tmp = sched->linked;
195 linked->rt_param.linked_on = sched->cpu;
196 sched->linked = linked;
197 update_cpu_position(sched);
198 linked = tmp;
199 }
200 }
201 if (linked) /* might be NULL due to swap */
202 linked->rt_param.linked_on = entry->cpu;
203 }
204 entry->linked = linked;
205#ifdef WANT_ALL_SCHED_EVENTS
206 if (linked)
207 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
208 else
209 TRACE("NULL linked to %d.\n", entry->cpu);
210#endif
211 update_cpu_position(entry);
212}
213
214/* unlink - Make sure a task is not linked any longer to an entry
215 * where it was linked before. Must hold aedzl_lock.
216 */
217static noinline void unlink(struct task_struct* t)
218{
219 cpu_entry_t *entry;
220
221 if (unlikely(!t)) {
222 TRACE_BUG_ON(!t);
223 return;
224 }
225
226 if (t->rt_param.linked_on != NO_CPU) {
227 /* unlink */
228 entry = &per_cpu(aedzl_cpu_entries, t->rt_param.linked_on);
229 t->rt_param.linked_on = NO_CPU;
230 link_task_to_cpu(NULL, entry);
231 } else if (is_queued(t)) {
232 /* This is an interesting situation: t is scheduled,
233 * but was just recently unlinked. It cannot be
234 * linked anywhere else (because then it would have
235 * been relinked to this CPU), thus it must be in some
236 * queue. We must remove it from the list in this
237 * case.
238 */
239 remove(&aedzl, t);
240 }
241}
242
243static inline void update_exe_estimate(struct task_struct *t, lt_t actual_cost)
244{
245 fp_t err, new;
246 fp_t actual_util = _frac(actual_cost, get_rt_period(t));
247
248 TRACE_TASK(t, "OLD cost: %llu, est cost: %llu, est util: %d.%d, est err: %d.%d\n",
249 tsk_rt(t)->task_params.exec_cost,
250 get_exec_cost_est(t),
251 _fp_to_integer(tsk_rt(t)->task_params.util_est), _point(tsk_rt(t)->task_params.util_est),
252 _fp_to_integer(tsk_rt(t)->task_params.accum_err), _point(tsk_rt(t)->task_params.accum_err));
253
254 err = _sub(actual_util, tsk_rt(t)->task_params.util_est);
255 /*
256 TRACE_TASK(t, "delta err = actual - est\n"
257 "\t%d.%d = %d.%d - %d.%d\n",
258 _fp_to_integer(err), _point(err),
259 _fp_to_integer(actual_util), _point(actual_util),
260 _fp_to_integer(tsk_rt(t)->task_params.util_est), _point(tsk_rt(t)->task_params.util_est));
261 */
262
263 new = _add(_mul(a_fp, err),
264 _mul(b_fp, tsk_rt(t)->task_params.accum_err));
265 /*
266 TRACE_TASK(t, "util est = a*(delta error) + b*(accum error)\n"
267 "\t%d.%d = %d.%d * %d.%d + %d.%d * %d.%d\n",
268 _fp_to_integer(new), _point(new),
269 _fp_to_integer(a_fp), _point(a_fp),
270 _fp_to_integer(err), _point(err),
271 _fp_to_integer(b_fp), _point(b_fp),
272 _fp_to_integer(tsk_rt(t)->task_params.accum_err), _point(tsk_rt(t)->task_params.accum_err));
273 */
274
275 tsk_rt(t)->task_params.util_est = new;
276 tsk_rt(t)->task_params.accum_err = _add(tsk_rt(t)->task_params.accum_err, err);
277
278 TRACE_TASK(t, "cost: %llu, est cost: %llu, est util: %d.%d, est err: %d.%d, (delta cost: %d.%d, delta err: %d.%d)\n",
279 tsk_rt(t)->task_params.exec_cost,
280 get_exec_cost_est(t),
281 _fp_to_integer(tsk_rt(t)->task_params.util_est), _point(tsk_rt(t)->task_params.util_est),
282 _fp_to_integer(tsk_rt(t)->task_params.accum_err), _point(tsk_rt(t)->task_params.accum_err),
283 _fp_to_integer(new), _point(new),
284 _fp_to_integer(err), _point(err));
285}
286
287
288static void update_queue_position(struct task_struct *t);
289
290static enum hrtimer_restart on_zero_laxity(struct hrtimer *timer)
291{
292 unsigned long flags;
293 struct task_struct* t;
294
295 lt_t now = litmus_clock();
296
297 TRACE("Zero-laxity timer went off!\n");
298
299 raw_spin_lock_irqsave(&aedzl_lock, flags);
300
301 t = container_of(container_of(timer, struct rt_param, zl_timer),
302 struct task_struct,
303 rt_param);
304
305 TRACE_TASK(t, "Reached zero-laxity. (now: %llu, zl-pt: %lld, time remaining (now): %lld)\n",
306 now,
307 get_deadline(t) - budget_remaining_est(t),
308 get_deadline(t) - now);
309
310 tsk_rt(t)->zl_timer_armed = 0;
311 set_zerolaxity(t, 1);
312 update_queue_position(t);
313
314 raw_spin_unlock_irqrestore(&aedzl_lock, flags);
315
316 return HRTIMER_NORESTART;
317}
318
319/* __aedzl_take_ready - call's __take_ready with EDZL timer cancelation side-effect. */
320static inline struct task_struct* __aedzl_take_ready(rt_domain_t* rt)
321{
322 struct task_struct* t = __take_ready(rt);
323
324 if(t)
325 {
326 if(get_zerolaxity(t) == 0)
327 {
328 if(tsk_rt(t)->zl_timer_armed)
329 {
330 int cancel_ret;
331
332 TRACE_TASK(t, "Canceling zero-laxity timer.\n");
333 cancel_ret = hrtimer_try_to_cancel(&tsk_rt(t)->zl_timer);
334 WARN_ON(cancel_ret == 0); /* should never be inactive. */
335 tsk_rt(t)->zl_timer_armed = 0;
336 }
337 }
338 else
339 {
340 TRACE_TASK(t, "Task already has zero-laxity flagged.\n");
341 }
342 }
343
344 return t;
345}
346
347/* __aedzl_add_ready - call's __add_ready with EDZL setting timer side-effect. */
348static inline void __aedzl_add_ready(rt_domain_t* rt, struct task_struct *new)
349{
350 __add_ready(rt, new);
351
352 if(get_zerolaxity(new) == 0)
353 {
354 lt_t when_to_fire;
355
356 when_to_fire = get_deadline(new) - budget_remaining_est(new);
357
358 TRACE_TASK(new, "Setting zero-laxity timer for %llu. (deadline: %llu, est remaining: %llu)\n",
359 when_to_fire,
360 get_deadline(new),
361 budget_remaining_est(new));
362
363 __hrtimer_start_range_ns(&tsk_rt(new)->zl_timer,
364 ns_to_ktime(when_to_fire),
365 0,
366 HRTIMER_MODE_ABS_PINNED,
367 0);
368
369 tsk_rt(new)->zl_timer_armed = 1;
370 }
371 else
372 {
373 TRACE_TASK(new, "Already has zero-laxity when added to ready queue. (deadline: %llu, est remaining: %llu))\n",
374 get_deadline(new),
375 budget_remaining_est(new));
376 }
377}
378
379static void check_for_preemptions(void);
380
381/* Glenn: Originally written for FMLP prio-inheritance, but
382 we can use it to upgrade the priority of a zero-laxity task. */
383static void update_queue_position(struct task_struct *t)
384{
385 /* Assumption: caller holds aedzl_lock */
386
387 int check_preempt = 0;
388
389 if (tsk_rt(t)->linked_on != NO_CPU) {
390 TRACE_TASK(t, "%s: linked on %d\n",
391 __FUNCTION__, tsk_rt(t)->linked_on);
392 /* Holder is scheduled; need to re-order CPUs.
393 * We can't use heap_decrease() here since
394 * the cpu_heap is ordered in reverse direction, so
395 * it is actually an increase. */
396 bheap_delete(cpu_lower_prio, &aedzl_cpu_heap,
397 aedzl_cpus[tsk_rt(t)->linked_on]->hn);
398 bheap_insert(cpu_lower_prio, &aedzl_cpu_heap,
399 aedzl_cpus[tsk_rt(t)->linked_on]->hn);
400 } else {
401 /* holder may be queued: first stop queue changes */
402 raw_spin_lock(&aedzl.release_lock);
403 if (is_queued(t)) {
404 TRACE_TASK(t, "%s: is queued\n",
405 __FUNCTION__);
406 /* We need to update the position
407 * of holder in some heap. Note that this
408 * may be a release heap. */
409 check_preempt =
410 !bheap_decrease(edzl_ready_order,
411 tsk_rt(t)->heap_node);
412 } else {
413 /* Nothing to do: if it is not queued and not linked
414 * then it is currently being moved by other code
415 * (e.g., a timer interrupt handler) that will use the
416 * correct priority when enqueuing the task. */
417 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
418 __FUNCTION__);
419 }
420 raw_spin_unlock(&aedzl.release_lock);
421
422 /* If holder was enqueued in a release heap, then the following
423 * preemption check is pointless, but we can't easily detect
424 * that case. If you want to fix this, then consider that
425 * simply adding a state flag requires O(n) time to update when
426 * releasing n tasks, which conflicts with the goal to have
427 * O(log n) merges. */
428 if (check_preempt) {
429 /* heap_decrease() hit the top level of the heap: make
430 * sure preemption checks get the right task, not the
431 * potentially stale cache. */
432 bheap_uncache_min(edzl_ready_order,
433 &aedzl.ready_queue);
434 check_for_preemptions();
435 }
436 }
437}
438
439
440
441/* preempt - force a CPU to reschedule
442 */
443static void preempt(cpu_entry_t *entry)
444{
445 preempt_if_preemptable(entry->scheduled, entry->cpu);
446}
447
448/* requeue - Put an unlinked task into gsn-edf domain.
449 * Caller must hold aedzl_lock.
450 */
451static noinline void requeue(struct task_struct* task)
452{
453 BUG_ON(!task);
454 /* sanity check before insertion */
455 BUG_ON(is_queued(task));
456
457 if (is_released(task, litmus_clock())) {
458 __aedzl_add_ready(&aedzl, task);
459 }
460 else {
461 /* it has got to wait */
462 add_release(&aedzl, task);
463 }
464}
465
466/* check for any necessary preemptions */
467static void check_for_preemptions(void)
468{
469 struct task_struct *task;
470 cpu_entry_t* last;
471
472 for(last = lowest_prio_cpu();
473 aedzl_preemption_needed(&aedzl, last->linked);
474 last = lowest_prio_cpu()) {
475 /* preemption necessary */
476 task = __aedzl_take_ready(&aedzl);
477
478 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
479 task->pid, last->cpu);
480 if (last->linked)
481 requeue(last->linked);
482 link_task_to_cpu(task, last);
483 preempt(last);
484 }
485}
486
487/* aedzl_job_arrival: task is either resumed or released */
488static noinline void aedzl_job_arrival(struct task_struct* task)
489{
490 BUG_ON(!task);
491
492 /* tag zero-laxity at release time */
493 set_zerolaxity(task, laxity_remaining_est(task) == 0);
494
495 requeue(task);
496 check_for_preemptions();
497}
498
499static void aedzl_release_jobs(rt_domain_t* rt, struct bheap* tasks)
500{
501 unsigned long flags;
502
503 raw_spin_lock_irqsave(&aedzl_lock, flags);
504
505 __merge_ready(rt, tasks);
506 check_for_preemptions();
507
508 raw_spin_unlock_irqrestore(&aedzl_lock, flags);
509}
510
511/* caller holds aedzl_lock */
512static noinline void job_completion(struct task_struct *t, int forced)
513{
514 BUG_ON(!t);
515
516 sched_trace_task_completion(t, forced);
517
518 TRACE_TASK(t, "job_completion().\n");
519
520 /* set flags */
521 set_rt_flags(t, RT_F_SLEEP);
522
523 /* update exe estimate */
524 update_exe_estimate(t, get_exec_time(t));
525
526 /* prepare for next period */
527 prepare_for_next_period(t);
528
529 if (is_released(t, litmus_clock()))
530 sched_trace_task_release(t);
531 /* unlink */
532 unlink(t);
533 /* requeue
534 * But don't requeue a blocking task. */
535 if (is_running(t))
536 aedzl_job_arrival(t);
537}
538
539/* aedzl_tick - this function is called for every local timer
540 * interrupt.
541 *
542 * checks whether the current task has expired and checks
543 * whether we need to preempt it if it has not expired
544 */
545static void aedzl_tick(struct task_struct* t)
546{
547 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
548 if (!is_np(t)) {
549 /* np tasks will be preempted when they become
550 * preemptable again
551 */
552 set_tsk_need_resched(t);
553 set_will_schedule();
554 TRACE("aedzl_scheduler_tick: "
555 "%d is preemptable "
556 " => FORCE_RESCHED\n", t->pid);
557 } else if (is_user_np(t)) {
558 TRACE("aedzl_scheduler_tick: "
559 "%d is non-preemptable, "
560 "preemption delayed.\n", t->pid);
561 request_exit_np(t);
562 }
563 }
564}
565
566/* Getting schedule() right is a bit tricky. schedule() may not make any
567 * assumptions on the state of the current task since it may be called for a
568 * number of reasons. The reasons include a scheduler_tick() determined that it
569 * was necessary, because sys_exit_np() was called, because some Linux
570 * subsystem determined so, or even (in the worst case) because there is a bug
571 * hidden somewhere. Thus, we must take extreme care to determine what the
572 * current state is.
573 *
574 * The CPU could currently be scheduling a task (or not), be linked (or not).
575 *
576 * The following assertions for the scheduled task could hold:
577 *
578 * - !is_running(scheduled) // the job blocks
579 * - scheduled->timeslice == 0 // the job completed (forcefully)
580 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
581 * - linked != scheduled // we need to reschedule (for any reason)
582 * - is_np(scheduled) // rescheduling must be delayed,
583 * sys_exit_np must be requested
584 *
585 * Any of these can occur together.
586 */
587static struct task_struct* aedzl_schedule(struct task_struct * prev)
588{
589 cpu_entry_t* entry = &__get_cpu_var(aedzl_cpu_entries);
590 int out_of_time, sleep, preempt, np, exists, blocks;
591 struct task_struct* next = NULL;
592
593#ifdef CONFIG_RELEASE_MASTER
594 /* Bail out early if we are the release master.
595 * The release master never schedules any real-time tasks.
596 */
597 if (aedzl.release_master == entry->cpu)
598 return NULL;
599#endif
600
601 raw_spin_lock(&aedzl_lock);
602 clear_will_schedule();
603
604 /* sanity checking */
605 BUG_ON(entry->scheduled && entry->scheduled != prev);
606 BUG_ON(entry->scheduled && !is_realtime(prev));
607 BUG_ON(is_realtime(prev) && !entry->scheduled);
608
609 /* (0) Determine state */
610 exists = entry->scheduled != NULL;
611 blocks = exists && !is_running(entry->scheduled);
612 out_of_time = exists &&
613 budget_enforced(entry->scheduled) &&
614 budget_exhausted(entry->scheduled);
615 np = exists && is_np(entry->scheduled);
616 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
617 preempt = entry->scheduled != entry->linked;
618
619#ifdef WANT_ALL_SCHED_EVENTS
620 TRACE_TASK(prev, "invoked aedzl_schedule.\n");
621#endif
622
623 if (exists)
624 TRACE_TASK(prev,
625 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
626 "state:%d sig:%d\n",
627 blocks, out_of_time, np, sleep, preempt,
628 prev->state, signal_pending(prev));
629 if (entry->linked && preempt)
630 TRACE_TASK(prev, "will be preempted by %s/%d\n",
631 entry->linked->comm, entry->linked->pid);
632
633
634 /* If a task blocks we have no choice but to reschedule.
635 */
636 if (blocks)
637 unlink(entry->scheduled);
638
639 /* Request a sys_exit_np() call if we would like to preempt but cannot.
640 * We need to make sure to update the link structure anyway in case
641 * that we are still linked. Multiple calls to request_exit_np() don't
642 * hurt.
643 */
644 if (np && (out_of_time || preempt || sleep)) {
645 unlink(entry->scheduled);
646 request_exit_np(entry->scheduled);
647 }
648
649 /* Any task that is preemptable and either exhausts its execution
650 * budget or wants to sleep completes. We may have to reschedule after
651 * this. Don't do a job completion if we block (can't have timers running
652 * for blocked jobs). Preemption go first for the same reason.
653 */
654 if (!np && (out_of_time || sleep) && !blocks && !preempt)
655 job_completion(entry->scheduled, !sleep);
656
657 /* Link pending task if we became unlinked.
658 */
659 if (!entry->linked) {
660 link_task_to_cpu(__aedzl_take_ready(&aedzl), entry);
661 }
662
663 /* The final scheduling decision. Do we need to switch for some reason?
664 * If linked is different from scheduled, then select linked as next.
665 */
666 if ((!np || blocks) &&
667 entry->linked != entry->scheduled) {
668 /* Schedule a linked job? */
669 if (entry->linked) {
670 entry->linked->rt_param.scheduled_on = entry->cpu;
671 next = entry->linked;
672 }
673 if (entry->scheduled) {
674 /* not gonna be scheduled soon */
675 entry->scheduled->rt_param.scheduled_on = NO_CPU;
676 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
677 }
678 } else
679 /* Only override Linux scheduler if we have a real-time task
680 * scheduled that needs to continue.
681 */
682 if (exists)
683 next = prev;
684
685 raw_spin_unlock(&aedzl_lock);
686
687#ifdef WANT_ALL_SCHED_EVENTS
688 TRACE("aedzl_lock released, next=0x%p\n", next);
689
690 if (next)
691 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
692 else if (exists && !next)
693 TRACE("becomes idle at %llu.\n", litmus_clock());
694#endif
695
696
697 return next;
698}
699
700
701/* _finish_switch - we just finished the switch away from prev
702 */
703static void aedzl_finish_switch(struct task_struct *prev)
704{
705 cpu_entry_t* entry = &__get_cpu_var(aedzl_cpu_entries);
706
707 entry->scheduled = is_realtime(current) ? current : NULL;
708#ifdef WANT_ALL_SCHED_EVENTS
709 TRACE_TASK(prev, "switched away from\n");
710#endif
711}
712
713
714/* Prepare a task for running in RT mode
715 */
716static void aedzl_task_new(struct task_struct * t, int on_rq, int running)
717{
718 unsigned long flags;
719 cpu_entry_t* entry;
720
721 TRACE("edzl: task new %d\n", t->pid);
722
723 raw_spin_lock_irqsave(&aedzl_lock, flags);
724
725 t->rt_param.zl_timer_armed = 0;
726 hrtimer_init(&t->rt_param.zl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
727 t->rt_param.zl_timer.function = on_zero_laxity;
728
729 /* setup job params */
730 release_at(t, litmus_clock());
731
732 if (running) {
733 entry = &per_cpu(aedzl_cpu_entries, task_cpu(t));
734 BUG_ON(entry->scheduled);
735
736#ifdef CONFIG_RELEASE_MASTER
737 if (entry->cpu != aedzl.release_master) {
738#endif
739 entry->scheduled = t;
740 tsk_rt(t)->scheduled_on = task_cpu(t);
741#ifdef CONFIG_RELEASE_MASTER
742 } else {
743 /* do not schedule on release master */
744 preempt(entry); /* force resched */
745 tsk_rt(t)->scheduled_on = NO_CPU;
746 }
747#endif
748 } else {
749 t->rt_param.scheduled_on = NO_CPU;
750 }
751 t->rt_param.linked_on = NO_CPU;
752
753 aedzl_job_arrival(t);
754 raw_spin_unlock_irqrestore(&aedzl_lock, flags);
755}
756
757static void aedzl_task_wake_up(struct task_struct *task)
758{
759 unsigned long flags;
760 lt_t now;
761
762 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
763
764 raw_spin_lock_irqsave(&aedzl_lock, flags);
765 /* We need to take suspensions because of semaphores into
766 * account! If a job resumes after being suspended due to acquiring
767 * a semaphore, it should never be treated as a new job release.
768 */
769 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
770 set_rt_flags(task, RT_F_RUNNING);
771 } else {
772 now = litmus_clock();
773 if (is_tardy(task, now)) {
774 /* new sporadic release */
775 release_at(task, now);
776 sched_trace_task_release(task);
777 }
778 else {
779 if (task->rt.time_slice) {
780 /* came back in time before deadline
781 */
782 set_rt_flags(task, RT_F_RUNNING);
783 }
784 }
785 }
786 aedzl_job_arrival(task);
787 raw_spin_unlock_irqrestore(&aedzl_lock, flags);
788}
789
790static void aedzl_task_block(struct task_struct *t)
791{
792 unsigned long flags;
793
794 TRACE_TASK(t, "block at %llu\n", litmus_clock());
795
796 /* unlink if necessary */
797 raw_spin_lock_irqsave(&aedzl_lock, flags);
798 unlink(t);
799 raw_spin_unlock_irqrestore(&aedzl_lock, flags);
800
801 BUG_ON(!is_realtime(t));
802}
803
804
805static void aedzl_task_exit(struct task_struct * t)
806{
807 unsigned long flags;
808
809 /* unlink if necessary */
810 raw_spin_lock_irqsave(&aedzl_lock, flags);
811 unlink(t);
812 if (tsk_rt(t)->scheduled_on != NO_CPU) {
813 aedzl_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
814 tsk_rt(t)->scheduled_on = NO_CPU;
815 }
816
817 if(tsk_rt(t)->zl_timer_armed)
818 {
819 /* BUG if reached? */
820 TRACE_TASK(t, "Canceled armed timer while exiting.\n");
821 hrtimer_cancel(&tsk_rt(t)->zl_timer);
822 tsk_rt(t)->zl_timer_armed = 0;
823 }
824
825 raw_spin_unlock_irqrestore(&aedzl_lock, flags);
826
827 BUG_ON(!is_realtime(t));
828 TRACE_TASK(t, "RIP\n");
829}
830
831static long aedzl_admit_task(struct task_struct* tsk)
832{
833 return 0;
834}
835
836static long aedzl_activate_plugin(void)
837{
838 int cpu;
839 cpu_entry_t *entry;
840
841 bheap_init(&aedzl_cpu_heap);
842#ifdef CONFIG_RELEASE_MASTER
843 aedzl.release_master = atomic_read(&release_master_cpu);
844#endif
845
846 for_each_online_cpu(cpu) {
847 entry = &per_cpu(aedzl_cpu_entries, cpu);
848 bheap_node_init(&entry->hn, entry);
849 atomic_set(&entry->will_schedule, 0);
850 entry->linked = NULL;
851 entry->scheduled = NULL;
852#ifdef CONFIG_RELEASE_MASTER
853 if (cpu != aedzl.release_master) {
854#endif
855 TRACE("AEDZL: Initializing CPU #%d.\n", cpu);
856 update_cpu_position(entry);
857#ifdef CONFIG_RELEASE_MASTER
858 } else {
859 TRACE("AEDZL: CPU %d is release master.\n", cpu);
860 }
861#endif
862 }
863 return 0;
864}
865
866/* Plugin object */
867static struct sched_plugin aedzl_plugin __cacheline_aligned_in_smp = {
868 .plugin_name = "AEDZL",
869 .finish_switch = aedzl_finish_switch,
870 .tick = aedzl_tick,
871 .task_new = aedzl_task_new,
872 .complete_job = complete_job,
873 .task_exit = aedzl_task_exit,
874 .schedule = aedzl_schedule,
875 .task_wake_up = aedzl_task_wake_up,
876 .task_block = aedzl_task_block,
877 .admit_task = aedzl_admit_task,
878 .activate_plugin = aedzl_activate_plugin,
879};
880
881
882static int __init init_aedzl(void)
883{
884 int cpu;
885 cpu_entry_t *entry;
886
887 bheap_init(&aedzl_cpu_heap);
888 /* initialize CPU state */
889 for (cpu = 0; cpu < NR_CPUS; cpu++) {
890 entry = &per_cpu(aedzl_cpu_entries, cpu);
891 aedzl_cpus[cpu] = entry;
892 atomic_set(&entry->will_schedule, 0);
893 entry->cpu = cpu;
894 entry->hn = &aedzl_heap_node[cpu];
895 bheap_node_init(&entry->hn, entry);
896 }
897 edzl_domain_init(&aedzl, NULL, aedzl_release_jobs);
898 return register_sched_plugin(&aedzl_plugin);
899}
900
901
902module_init(init_aedzl);