diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2011-09-07 18:03:33 -0400 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2011-09-07 18:03:33 -0400 |
commit | 0b096fbe159a60c56190f8a627d764051e1e52ea (patch) | |
tree | 89535a50e48ae92d0add444684ef28603ea0bf3f /litmus/sched_mc.c | |
parent | d5e965b0074d6ef10f5a77112fc3671613a2150f (diff) |
Refactor to allow generic domains
Diffstat (limited to 'litmus/sched_mc.c')
-rw-r--r-- | litmus/sched_mc.c | 1624 |
1 files changed, 581 insertions, 1043 deletions
diff --git a/litmus/sched_mc.c b/litmus/sched_mc.c index dcf86d60275a..bc4b46165b64 100644 --- a/litmus/sched_mc.c +++ b/litmus/sched_mc.c | |||
@@ -8,12 +8,6 @@ | |||
8 | * This version uses the simple approach and serializes all scheduling | 8 | * This version uses the simple approach and serializes all scheduling |
9 | * decisions by the use of a queue lock. This is probably not the | 9 | * decisions by the use of a queue lock. This is probably not the |
10 | * best way to do it, but it should suffice for now. | 10 | * best way to do it, but it should suffice for now. |
11 | * | ||
12 | * --Todo-- | ||
13 | * Timer Accounting: adjusting the clock values of the ghost timer using | ||
14 | * the _tick() method is relatively expensive. This should be changed. | ||
15 | * Locks: Accesses to domains should be serialized using a per-domain lock. | ||
16 | * CPU locks should be properly used e.g. wip-semi-part | ||
17 | */ | 11 | */ |
18 | 12 | ||
19 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
@@ -21,1106 +15,533 @@ | |||
21 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
22 | #include <linux/hrtimer.h> | 16 | #include <linux/hrtimer.h> |
23 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/module.h> | ||
24 | 19 | ||
25 | #include <litmus/litmus.h> | 20 | #include <litmus/litmus.h> |
26 | #include <litmus/jobs.h> | 21 | #include <litmus/jobs.h> |
27 | #include <litmus/sched_plugin.h> | 22 | #include <litmus/sched_plugin.h> |
28 | #include <litmus/edf_common.h> | 23 | #include <litmus/edf_common.h> |
29 | #include <litmus/sched_trace.h> | 24 | #include <litmus/sched_trace.h> |
30 | 25 | #include <litmus/domain.h> | |
31 | #include <litmus/bheap.h> | 26 | #include <litmus/bheap.h> |
32 | 27 | ||
33 | #include <linux/module.h> | ||
34 | |||
35 | #include <litmus/sched_mc.h> | 28 | #include <litmus/sched_mc.h> |
36 | 29 | ||
37 | /* Overview of MC operations. | 30 | /* Per CPU per criticality level state */ |
38 | * | 31 | typedef struct { |
39 | * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage | 32 | enum crit_level level; |
40 | * structure (NOT the actually scheduled | 33 | struct task_struct* linked; /* Logically running task */ |
41 | * task). If there is another linked task To | 34 | domain_t* domain; |
42 | * already it will set To->linked_on = NO_CPU | ||
43 | * (thereby removing its association with this | ||
44 | * CPU). However, it will not requeue the | ||
45 | * previously linked task (if any). It will set | ||
46 | * T's state to RT_F_RUNNING and check whether | ||
47 | * it is already running somewhere else. If T | ||
48 | * is scheduled somewhere else it will link | ||
49 | * it to that CPU instead (and pull the linked | ||
50 | * task to cpu). T may be NULL. | ||
51 | * | ||
52 | * unlink(T) - Unlink removes T from all scheduler data | ||
53 | * structures. If it is linked to some CPU it | ||
54 | * will link NULL to that CPU. If it is | ||
55 | * currently queued in the mc queue it will | ||
56 | * be removed from the rt_domain. It is safe to | ||
57 | * call unlink(T) if T is not linked. T may not | ||
58 | * be NULL. | ||
59 | * | ||
60 | * requeue(T) - Requeue will insert T into the appropriate | ||
61 | * queue. If the system is in real-time mode and | ||
62 | * the T is released already, it will go into the | ||
63 | * ready queue. If the system is not in | ||
64 | * real-time mode is T, then T will go into the | ||
65 | * release queue. If T's release time is in the | ||
66 | * future, it will go into the release | ||
67 | * queue. That means that T's release time/job | ||
68 | * no/etc. has to be updated before requeu(T) is | ||
69 | * called. It is not safe to call requeue(T) | ||
70 | * when T is already queued. T may not be NULL. | ||
71 | * | ||
72 | * mc_job_arrival(T) - This is the catch all function when T enters | ||
73 | * the system after either a suspension or at a | ||
74 | * job release. It will queue T (which means it | ||
75 | * is not safe to call mc_job_arrival(T) if | ||
76 | * T is already queued) and then check whether a | ||
77 | * preemption is necessary. If a preemption is | ||
78 | * necessary it will update the linkage | ||
79 | * accordingly and cause scheduled to be called | ||
80 | * (either with an IPI or need_resched). It is | ||
81 | * safe to call mc_job_arrival(T) if T's | ||
82 | * next job has not been actually released yet | ||
83 | * (releast time in the future). T will be put | ||
84 | * on the release queue in that case. | ||
85 | * | ||
86 | * job_completion(T) - Take care of everything that needs to be done | ||
87 | * to prepare T for its next release and place | ||
88 | * it in the right queue with | ||
89 | * mc_job_arrival(). | ||
90 | * | ||
91 | * | ||
92 | * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is | ||
93 | * equivalent to unlink(T). Note that if you unlink a task from a CPU none of | ||
94 | * the functions will automatically propagate pending task from the ready queue | ||
95 | * to a linked task. This is the job of the calling function ( by means of | ||
96 | * __take_ready). | ||
97 | */ | ||
98 | |||
99 | |||
100 | /* cpu_entry_t - maintain the linked and scheduled state | ||
101 | */ | ||
102 | typedef struct { | ||
103 | int cpu; | ||
104 | struct task_struct* linked; /* only RT tasks */ | ||
105 | struct task_struct* scheduled; /* only RT tasks */ | ||
106 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
107 | struct bheap_node* hn_c; | ||
108 | struct bheap_node* hn_d; | ||
109 | struct task_struct* ghost_tasks[NUM_CRIT_LEVELS]; | ||
110 | } cpu_entry_t; | ||
111 | |||
112 | /*This code is heavily based on Bjoern's budget enforcement code. */ | ||
113 | struct watchdog_timer { | ||
114 | /* The watchdog timers determine when ghost jobs finish. */ | ||
115 | struct hrtimer timer; | ||
116 | struct task_struct* task; | ||
117 | }; | ||
118 | |||
119 | DEFINE_PER_CPU(struct watchdog_timer[NUM_CRIT_LEVELS], ghost_timers); | ||
120 | #define ghost_timer(cpu, crit) (&(per_cpu(ghost_timers, cpu)[crit])) | ||
121 | |||
122 | DEFINE_PER_CPU(cpu_entry_t, mc_cpu_entries); | ||
123 | 35 | ||
124 | cpu_entry_t* mc_cpus[NR_CPUS]; | 36 | struct hrtimer timer; /* For ghost task budget enforcement */ |
37 | struct bheap_node* node; /* For membership in global domains */ | ||
38 | } crit_cpu_entry_t; | ||
125 | 39 | ||
126 | #define set_will_schedule() \ | 40 | /* Per CPU state */ |
127 | (atomic_set(&__get_cpu_var(mc_cpu_entries).will_schedule, 1)) | 41 | typedef struct { |
128 | #define clear_will_schedule() \ | 42 | int cpu; |
129 | (atomic_set(&__get_cpu_var(mc_cpu_entries).will_schedule, 0)) | 43 | struct task_struct* scheduled; /* Task that is physically running */ |
130 | #define test_will_schedule(cpu) \ | 44 | struct task_struct* linked; /* Task that is logically running */ |
131 | (atomic_read(&per_cpu(mc_cpu_entries, cpu).will_schedule)) | ||
132 | #define remote_cpu_entry(cpu) (&per_cpu(mc_cpu_entries, cpu)) | ||
133 | 45 | ||
134 | #define tsk_mc_data(t) (tsk_rt(t)->mc_data) | 46 | crit_cpu_entry_t crit_entries[NUM_CRIT_LEVELS]; |
135 | #define tsk_mc_crit(t) (tsk_mc_data(t)->mc_task.crit) | 47 | } cpu_entry_t; |
136 | #define TRACE_TASK(t, fmt, args...) \ | ||
137 | TRACE("(%s/%d:%d:%d) " fmt, (t)->comm, (t)->pid, \ | ||
138 | (t)->rt_param.job_params.job_no, \ | ||
139 | (tsk_mc_data(t)) ? tsk_mc_crit(t) : -1, ##args) | ||
140 | |||
141 | /* need to do a short-circuit null check on mc_data before checking is_ghost */ | ||
142 | static inline int is_ghost(struct task_struct *t) | ||
143 | { | ||
144 | struct mc_data *mc_data = tsk_mc_data(t); | ||
145 | return mc_data && mc_data->mc_job.is_ghost; | ||
146 | } | ||
147 | |||
148 | /* the cpus queue themselves according to priority in here */ | ||
149 | static struct bheap_node mc_heap_node_c[NR_CPUS], mc_heap_node_d[NR_CPUS]; | ||
150 | static struct bheap mc_cpu_heap_c, mc_cpu_heap_d; | ||
151 | |||
152 | /* Create per-CPU domains for criticality A */ | ||
153 | DEFINE_PER_CPU(rt_domain_t, crit_a); | ||
154 | #define remote_a_queue(cpu) (&per_cpu(crit_a, cpu)) | ||
155 | #define local_a_queue (&__get_cpu_var(crit_a)) | ||
156 | |||
157 | /* Create per-CPU domains for criticality B */ | ||
158 | DEFINE_PER_CPU(rt_domain_t, crit_b); | ||
159 | #define remote_b_queue(cpu) (&per_cpu(crit_b, cpu)) | ||
160 | #define local_b_queue (&__get_cpu_var(crit_b)) | ||
161 | |||
162 | /* Create global domains for criticalities C and D */ | ||
163 | static rt_domain_t crit_c; | ||
164 | static rt_domain_t crit_d; | ||
165 | |||
166 | /* We use crit_c for shared globals */ | ||
167 | #define global_lock (crit_c.ready_lock) | ||
168 | #define mc_release_master (crit_c.release_master) | ||
169 | |||
170 | /* BEGIN clone of edf_common.c to allow shared C/D run queue*/ | ||
171 | |||
172 | static int mc_edf_higher_prio(struct task_struct* first, struct task_struct* | ||
173 | second) | ||
174 | { | ||
175 | /*Only differs from normal EDF when two tasks of differing criticality | ||
176 | are compared.*/ | ||
177 | if (first && second) { | ||
178 | enum crit_level first_crit = tsk_mc_crit(first); | ||
179 | enum crit_level second_crit = tsk_mc_crit(second); | ||
180 | /*Lower criticality numbers are higher priority*/ | ||
181 | if (first_crit < second_crit) { | ||
182 | return 1; | ||
183 | } | ||
184 | else if (second_crit < first_crit) { | ||
185 | return 0; | ||
186 | } | ||
187 | } | ||
188 | return edf_higher_prio(first, second); | ||
189 | } | ||
190 | 48 | ||
191 | static int mc_edf_entry_higher_prio(cpu_entry_t* first, cpu_entry_t* second, | 49 | /* Wrapper necessary until cpu linking code is moved into header file */ |
192 | enum crit_level crit) | 50 | typedef struct domain_data { |
193 | { | 51 | domain_t domain; |
194 | struct task_struct *first_active, *second_active; | 52 | struct bheap* heap; /* For global domains */ |
195 | first_active = first->linked; | 53 | crit_cpu_entry_t* crit_entry; /* For partitioned domains */ |
196 | second_active = second->linked; | 54 | } domain_data_t; |
197 | if (first->ghost_tasks[crit]) { | 55 | |
198 | first_active = first->ghost_tasks[crit]; | 56 | static cpu_entry_t* cpus[NR_CPUS]; |
199 | } | 57 | static raw_spinlock_t global_lock; |
200 | if (second->ghost_tasks[crit]) { | 58 | |
201 | second_active = second->ghost_tasks[crit]; | 59 | #define domain_data(dom) (container_of(dom, domain_data_t, domain)) |
202 | } | 60 | #define is_global(dom) (domain_data(dom)->heap) |
203 | return mc_edf_higher_prio(first_active, second_active); | 61 | #define is_global_task(t) (is_global(get_task_domain(t))) |
204 | } | 62 | #define crit_cpu(ce) \ |
63 | (container_of((void*)(ce - ce->level), cpu_entry_t, crit_entries)) | ||
64 | |||
65 | #define TS "(%s/%d:%d:%s)" | ||
66 | #define TA(t) (t) ? (is_ghost(t)) ? "ghost" : t->comm : "NULL", (t) ? t->pid : 1, \ | ||
67 | (t) ? t->rt_param.job_params.job_no : 1, \ | ||
68 | (t && get_task_domain(t)) ? get_task_domain(t)->name : "" | ||
69 | #define TRACE_ENTRY(e, fmt, args...) \ | ||
70 | TRACE("P%d, linked=" TS " " fmt "\n", \ | ||
71 | e->cpu, TA(e->linked), ##args) | ||
72 | #define TRACE_CRIT_ENTRY(ce, fmt, args...) \ | ||
73 | TRACE("%s P%d, linked=" TS " " fmt "\n", \ | ||
74 | ce->domain->name, crit_cpu(ce)->cpu, TA(ce->linked), ##args) | ||
75 | #define TRACE_TASK(t, fmt, args...) \ | ||
76 | TRACE(TS " " fmt "\n", TA(t), ##args) | ||
205 | 77 | ||
206 | /* need_to_preempt - check whether the task t needs to be preempted | 78 | /* |
207 | * call only with irqs disabled and with ready_lock acquired | 79 | * Sort CPUs within a global domain by the domain's priority function. |
208 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
209 | */ | 80 | */ |
210 | static int mc_edf_preemption_needed(rt_domain_t* rt, enum crit_level crit, | 81 | static int cpu_lower_prio(struct bheap_node *a, struct bheap_node *b) |
211 | cpu_entry_t* entry) | ||
212 | { | 82 | { |
213 | struct task_struct *active_task; | 83 | domain_t *domain; |
84 | crit_cpu_entry_t *first, *second; | ||
85 | struct task_struct *first_link, *second_link; | ||
214 | 86 | ||
215 | /* we need the read lock for edf_ready_queue */ | 87 | first = a->value; |
216 | /* no need to preempt if there is nothing pending */ | 88 | second = b->value; |
217 | if (!__jobs_pending(rt)) | 89 | first_link = first->linked; |
218 | return 0; | 90 | second_link = second->linked; |
219 | 91 | ||
220 | active_task = entry->linked; | 92 | if (!first_link || !second_link) { |
221 | /* A ghost task can only exist if we haven't scheduled something above | 93 | return second_link && !first_link; |
222 | * its level | 94 | } else { |
223 | */ | 95 | domain = get_task_domain(first_link); |
224 | if (entry->ghost_tasks[crit]) { | 96 | BUG_ON(domain != get_task_domain(second_link)); |
225 | active_task = entry->ghost_tasks[crit]; | 97 | return domain->higher_prio(second_link, first_link); |
226 | } | 98 | } |
227 | /* we need to reschedule if t doesn't exist */ | ||
228 | if (!active_task) | ||
229 | return 1; | ||
230 | |||
231 | /* NOTE: We cannot check for non-preemptibility since we | ||
232 | * don't know what address space we're currently in. | ||
233 | */ | ||
234 | |||
235 | /* make sure to get non-rt stuff out of the way */ | ||
236 | return !is_realtime(active_task) || | ||
237 | mc_edf_higher_prio(__next_ready(rt), active_task); | ||
238 | } | ||
239 | |||
240 | static int mc_edf_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
241 | { | ||
242 | return mc_edf_higher_prio(bheap2task(a), bheap2task(b)); | ||
243 | } | ||
244 | |||
245 | static void mc_edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
246 | release_jobs_t release) | ||
247 | { | ||
248 | rt_domain_init(rt, mc_edf_ready_order, resched, release); | ||
249 | } | ||
250 | |||
251 | #define WANT_ALL_SCHED_EVENTS | ||
252 | |||
253 | /* Called by update_cpu_position and lowest_prio_cpu in bheap operations | ||
254 | * Callers always have global lock | ||
255 | */ | ||
256 | static int cpu_lower_prio_c(struct bheap_node *_a, struct bheap_node *_b) | ||
257 | { | ||
258 | cpu_entry_t *a, *b; | ||
259 | a = _a->value; | ||
260 | b = _b->value; | ||
261 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
262 | * the top of the heap. | ||
263 | */ | ||
264 | return mc_edf_entry_higher_prio(b, a, CRIT_LEVEL_C); | ||
265 | } | ||
266 | |||
267 | /* Called by update_cpu_position and lowest_prio_cpu in bheap operations | ||
268 | * Callers always have global lock | ||
269 | */ | ||
270 | static int cpu_lower_prio_d(struct bheap_node *_a, struct bheap_node *_b) | ||
271 | { | ||
272 | cpu_entry_t *a, *b; | ||
273 | a = _a->value; | ||
274 | b = _b->value; | ||
275 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
276 | * the top of the heap. | ||
277 | */ | ||
278 | return mc_edf_entry_higher_prio(b, a, CRIT_LEVEL_D); | ||
279 | } | ||
280 | |||
281 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
282 | * order in the cpu queue. Caller must hold global lock. | ||
283 | * Called from link_task_to_cpu, which holds global lock | ||
284 | * link_task_to_cpu is the only way a CPU can get a new task, and hence have its | ||
285 | * priority change. | ||
286 | */ | ||
287 | static void update_cpu_position(cpu_entry_t *entry) | ||
288 | { | ||
289 | if (likely(bheap_node_in_heap(entry->hn_c))) | ||
290 | bheap_delete(cpu_lower_prio_c, &mc_cpu_heap_c, entry->hn_c); | ||
291 | if (likely(bheap_node_in_heap(entry->hn_d))) | ||
292 | bheap_delete(cpu_lower_prio_d, &mc_cpu_heap_d, entry->hn_d); | ||
293 | bheap_insert(cpu_lower_prio_c, &mc_cpu_heap_c, entry->hn_c); | ||
294 | bheap_insert(cpu_lower_prio_d, &mc_cpu_heap_d, entry->hn_d); | ||
295 | } | 99 | } |
296 | 100 | ||
297 | /* caller must hold global lock | 101 | /* |
298 | * Only called when checking for gedf preemptions by check_for_gedf_preemptions, | 102 | * Return next CPU which should preempted or NULL if the domain has no |
299 | * which always has global lock | 103 | * preemptable CPUs. |
300 | */ | ||
301 | static cpu_entry_t* lowest_prio_cpu_c(void) | ||
302 | { | ||
303 | struct bheap_node* hn; | ||
304 | hn = bheap_peek(cpu_lower_prio_c, &mc_cpu_heap_c); | ||
305 | return hn->value; | ||
306 | } | ||
307 | |||
308 | /* caller must hold global lock | ||
309 | * Only called when checking for gedf preemptions by check_for_gedf_preemptions, | ||
310 | * which always has global lock | ||
311 | */ | 104 | */ |
312 | static cpu_entry_t* lowest_prio_cpu_d(void) | 105 | static inline crit_cpu_entry_t* lowest_prio_cpu(domain_t *dom) |
313 | { | 106 | { |
107 | struct bheap *heap = domain_data(dom)->heap; | ||
314 | struct bheap_node* hn; | 108 | struct bheap_node* hn; |
315 | hn = bheap_peek(cpu_lower_prio_d, &mc_cpu_heap_d); | 109 | hn = bheap_peek(cpu_lower_prio, heap); |
316 | return hn->value; | 110 | return (hn) ? hn->value : NULL; |
317 | } | 111 | } |
318 | 112 | ||
319 | /* Forward Declarations*/ | 113 | /* |
320 | static noinline void unlink(struct task_struct* t); | 114 | * Time accounting for ghost tasks. Called during ticks and linking. |
321 | static noinline void job_completion(struct task_struct *t, int forced); | ||
322 | |||
323 | /* update_ghost_time - Do time accounting for a ghost job. | ||
324 | * Updates ghost budget and handles expired ghost budget. | ||
325 | * Called from unlink(), mc_tick(). | ||
326 | * Caller holds global lock. | ||
327 | */ | 115 | */ |
328 | static void update_ghost_time(struct task_struct *p) | 116 | static void update_ghost_time(struct task_struct *p) |
329 | { | 117 | { |
330 | u64 delta; | 118 | u64 delta, clock; |
331 | u64 clock; | ||
332 | 119 | ||
333 | BUG_ON(!is_ghost(p)); | 120 | BUG_ON(!is_ghost(p)); |
334 | clock = litmus_clock(); | 121 | clock = litmus_clock(); |
335 | delta = clock - p->se.exec_start; | 122 | delta = clock - p->se.exec_start; |
336 | if (unlikely ((s64)delta < 0)) { | 123 | if (unlikely ((s64)delta < 0)) { |
337 | delta = 0; | 124 | delta = 0; |
338 | TRACE_TASK(p, "WARNING: negative time delta.\n"); | 125 | TRACE_TASK(p, "WARNING: negative time delta"); |
339 | } | 126 | } |
340 | if (tsk_mc_data(p)->mc_job.ghost_budget <= delta) { | 127 | if (tsk_mc_data(p)->mc_job.ghost_budget <= delta) { |
341 | /* Currently will just set ghost budget to zero since | 128 | TRACE_TASK(p, "Ghost job could have ended"); |
342 | * task has already been queued. Could probably do | ||
343 | * more efficiently with significant reworking. | ||
344 | */ | ||
345 | TRACE_TASK(p, "Ghost job could have ended\n"); | ||
346 | tsk_mc_data(p)->mc_job.ghost_budget = 0; | 129 | tsk_mc_data(p)->mc_job.ghost_budget = 0; |
347 | p->se.exec_start = clock; | 130 | p->se.exec_start = clock; |
348 | } else { | 131 | } else { |
349 | TRACE_TASK(p, "Ghost jub updated, but didn't finish\n"); | 132 | TRACE_TASK(p, "Ghost job updated, but didn't finish"); |
350 | tsk_mc_data(p)->mc_job.ghost_budget -= delta; | 133 | tsk_mc_data(p)->mc_job.ghost_budget -= delta; |
351 | p->se.exec_start = clock; | 134 | p->se.exec_start = clock; |
352 | } | 135 | } |
353 | } | 136 | } |
354 | 137 | ||
355 | /* | 138 | /* |
356 | * | 139 | * Logically set running task for a domain on a CPU. |
357 | */ | 140 | */ |
358 | static void cancel_watchdog_timer(struct watchdog_timer* wt) | 141 | static void link_task_to_crit(crit_cpu_entry_t *ce, |
142 | struct task_struct *task) | ||
359 | { | 143 | { |
360 | int ret; | 144 | lt_t when_to_fire; |
361 | 145 | struct bheap *heap; | |
362 | if (wt->task) { | 146 | |
363 | TRACE_TASK(wt->task, "Cancelling watchdog timer.\n"); | 147 | TRACE_TASK(task, "Linking to P%d", crit_cpu(ce)->cpu); |
364 | ret = hrtimer_try_to_cancel(&wt->timer); | 148 | BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU); |
365 | /*Should never be inactive.*/ | 149 | BUG_ON(task && is_global(ce->domain) && |
366 | BUG_ON(ret == 0); | 150 | !bheap_node_in_heap(ce->node)); |
367 | /*Running concurrently is an unusual situation - log it. */ | 151 | |
368 | /*TODO: is there a way to prevent this? This probably means | 152 | /* Unlink last task */ |
369 | * the timer task is waiting to acquire the lock while the | 153 | if (ce->linked) { |
370 | * cancellation attempt is happening. | 154 | TRACE_TASK(ce->linked, "Unlinking"); |
371 | * | 155 | ce->linked->rt_param.linked_on = NO_CPU; |
372 | * And are we even in a correct state when this happens? | 156 | if (is_ghost(ce->linked)) { |
373 | */ | 157 | hrtimer_try_to_cancel(&ce->timer); |
374 | if (ret == -1) | 158 | if (tsk_mc_data(ce->linked)->mc_job.ghost_budget > 0) { |
375 | TRACE_TASK(wt->task, "Timer cancellation " | 159 | /* Job isn't finished, so do accounting */ |
376 | "attempted while task completing\n"); | 160 | update_ghost_time(ce->linked); |
161 | } | ||
162 | } | ||
163 | } | ||
164 | |||
165 | /* Actually link task */ | ||
166 | ce->linked = task; | ||
167 | if (task) { | ||
168 | task->rt_param.linked_on = crit_cpu(ce)->cpu; | ||
169 | if (is_ghost(task)) { | ||
170 | /* Reset budget timer */ | ||
171 | task->se.exec_start = litmus_clock(); | ||
172 | when_to_fire = litmus_clock() + | ||
173 | tsk_mc_data(task)->mc_job.ghost_budget; | ||
174 | __hrtimer_start_range_ns(&ce->timer, | ||
175 | ns_to_ktime(when_to_fire), | ||
176 | 0 /* delta */, | ||
177 | HRTIMER_MODE_ABS_PINNED, | ||
178 | 0 /* no wakeup */); | ||
179 | } | ||
180 | } | ||
377 | 181 | ||
378 | wt->task = NULL; | 182 | /* Update global heap node position */ |
183 | if (is_global(ce->domain) && bheap_node_in_heap(ce->node)) { | ||
184 | heap = domain_data(ce->domain)->heap; | ||
185 | bheap_delete(cpu_lower_prio, heap, ce->node); | ||
186 | bheap_insert(cpu_lower_prio, heap, ce->node); | ||
379 | } | 187 | } |
380 | } | 188 | } |
381 | 189 | ||
382 | /* link_task_to_cpu - Update the link of a CPU. | 190 | static void check_for_preempt(domain_t*); |
383 | * Handles the case where the to-be-linked task is already | 191 | |
384 | * scheduled on a different CPU. | 192 | /* |
385 | * Also handles ghost jobs and preemption of ghost jobs. | 193 | * Catch all function for when a task enters the system after a suspension |
386 | * Called from unlink(), prepare_preemption(), and mc_schedule() | 194 | * or a release. Requeues the task and causes a preemption, if necessary. |
387 | * Callers hold global lock | ||
388 | */ | 195 | */ |
389 | static noinline void link_task_to_cpu(struct task_struct* linked, | 196 | static void job_arrival(struct task_struct* task) |
390 | cpu_entry_t *entry) | ||
391 | { | 197 | { |
392 | cpu_entry_t *sched; | 198 | domain_t *dom = get_task_domain(task); |
393 | struct task_struct* tmp; | ||
394 | int on_cpu; | ||
395 | int i; | ||
396 | struct watchdog_timer* timer; | ||
397 | lt_t when_to_fire; | ||
398 | 199 | ||
399 | BUG_ON(linked && !is_realtime(linked)); | 200 | TRACE_TASK(task, "Job arriving"); |
400 | BUG_ON(linked && is_realtime(linked) && | 201 | BUG_ON(!task); |
401 | (tsk_mc_crit(linked) < CRIT_LEVEL_C) && | ||
402 | (tsk_rt(linked)->task_params.cpu != entry->cpu)); | ||
403 | |||
404 | if (linked && is_ghost(linked)) { | ||
405 | TRACE_TASK(linked, "Linking ghost job to CPU %d.\n", | ||
406 | entry->cpu); | ||
407 | BUG_ON(entry->linked && | ||
408 | tsk_mc_crit(entry->linked) < tsk_mc_crit(linked)); | ||
409 | 202 | ||
410 | tmp = entry->ghost_tasks[tsk_mc_crit(linked)]; | 203 | if (!is_global(dom) || tsk_rt(task)->scheduled_on == NO_CPU) { |
411 | if (tmp) { | 204 | dom->requeue(dom, task); |
412 | unlink(tmp); | 205 | check_for_preempt(dom); |
413 | } | 206 | } else { |
414 | /* We shouldn't link a ghost job that is already somewhere | 207 | /* If a global task is scheduled on one cpu, it CANNOT |
415 | * else (or here) - the caller is responsible for unlinking] | 208 | * be requeued into a global domain. Another cpu might |
416 | * first. | 209 | * dequeue the global task before it is descheduled, |
210 | * causing the system to crash when the task is scheduled | ||
211 | * in two places simultaneously. | ||
417 | */ | 212 | */ |
418 | BUG_ON(linked->rt_param.linked_on != NO_CPU); | 213 | TRACE_TASK(task, "Delayed arrival of scheduled task"); |
419 | linked->rt_param.linked_on = entry->cpu; | ||
420 | linked->se.exec_start = litmus_clock(); | ||
421 | entry->ghost_tasks[tsk_mc_crit(linked)] = linked; | ||
422 | /* Set up the watchdog timer. */ | ||
423 | timer = ghost_timer(entry->cpu, tsk_mc_crit(linked)); | ||
424 | if (timer->task) { | ||
425 | cancel_watchdog_timer(timer); | ||
426 | } | ||
427 | when_to_fire = litmus_clock() + | ||
428 | tsk_mc_data(linked)->mc_job.ghost_budget; | ||
429 | timer->task = linked; | ||
430 | __hrtimer_start_range_ns(&timer->timer, | ||
431 | ns_to_ktime(when_to_fire), | ||
432 | 0 /* delta */, | ||
433 | HRTIMER_MODE_ABS_PINNED, | ||
434 | 0 /* no wakeup */); | ||
435 | } | 214 | } |
436 | else{ | 215 | } |
437 | /* Currently linked task is set to be unlinked. */ | 216 | |
438 | if (entry->linked) { | 217 | /* |
439 | entry->linked->rt_param.linked_on = NO_CPU; | 218 | * Logically run a task on a CPU. The task must first have been linked |
219 | * to one of the criticalities running on this CPU. | ||
220 | */ | ||
221 | static void link_task_to_cpu(cpu_entry_t *entry, struct task_struct *task) | ||
222 | { | ||
223 | int i, in_heap; | ||
224 | crit_cpu_entry_t *ce; | ||
225 | struct bheap *heap; | ||
226 | struct task_struct *tmp; | ||
227 | enum crit_level last, next; | ||
228 | |||
229 | next = (task) ? tsk_mc_crit(task) : NUM_CRIT_LEVELS - 1; | ||
230 | last = (entry->linked) ? tsk_mc_crit(entry->linked) : | ||
231 | NUM_CRIT_LEVELS - 1; | ||
232 | |||
233 | TRACE_TASK(task, "Linking to P%d", entry->cpu); | ||
234 | BUG_ON(task && tsk_rt(task)->linked_on != entry->cpu); | ||
235 | BUG_ON(task && is_ghost(task)); | ||
236 | BUG_ON(entry->linked && task && tsk_mc_crit(entry->linked) < next); | ||
237 | |||
238 | /* Actually link task */ | ||
239 | if (task && !is_ghost(task)) { | ||
240 | set_rt_flags(task, RT_F_RUNNING); | ||
241 | entry->linked = task; | ||
242 | } else { | ||
243 | entry->linked = NULL; | ||
244 | } | ||
245 | |||
246 | /* Update CPU states */ | ||
247 | for (i = ((next < last) ? next : last); | ||
248 | i <= ((next > last) ? next : last); i++) { | ||
249 | ce = &entry->crit_entries[i]; | ||
250 | |||
251 | /* Put CPU only in heaps which can preempt the linked task */ | ||
252 | if (is_global(ce->domain)) { | ||
253 | heap = domain_data(ce->domain)->heap; | ||
254 | in_heap = bheap_node_in_heap(ce->node); | ||
255 | if (ce->level > next && in_heap) { | ||
256 | bheap_delete(cpu_lower_prio, heap, ce->node); | ||
257 | } else if ((ce->level < next || !task) && !in_heap) { | ||
258 | bheap_insert(cpu_lower_prio, heap, ce->node); | ||
259 | } | ||
440 | } | 260 | } |
441 | 261 | ||
442 | /* Link new task to CPU. */ | 262 | /* Remove and requeue lower priority tasks on this CPU */ |
443 | if (linked) { | 263 | if (ce->linked && ce->level > next) { |
444 | set_rt_flags(linked, RT_F_RUNNING); | 264 | TRACE_TASK(ce->linked, "Removed by higher priority"); |
445 | /* handle task is already scheduled somewhere! */ | 265 | tmp = ce->linked; |
446 | on_cpu = linked->rt_param.scheduled_on; | 266 | link_task_to_crit(ce, NULL); |
447 | if (on_cpu != NO_CPU) { | 267 | if (is_global(ce->domain)) { |
448 | sched = &per_cpu(mc_cpu_entries, on_cpu); | 268 | /* Need to check for a preemption. |
449 | /* this should only happen if not linked | 269 | * We know this CPU is no longer in the heap |
450 | * already | 270 | * so it cannot get re-preempted here. |
451 | */ | ||
452 | BUG_ON(sched->linked == linked); | ||
453 | |||
454 | /* If we are already scheduled on the CPU to | ||
455 | * which we wanted to link, we don't need to do | ||
456 | * the swap -- we just link ourselves to the | ||
457 | * CPU and depend on the caller to get things | ||
458 | * right. | ||
459 | * | ||
460 | * Also, we can only safely swap if neither | ||
461 | * task is partitioned. | ||
462 | */ | 271 | */ |
463 | tmp = sched->linked; | 272 | job_arrival(tmp); |
464 | if (entry != sched && tsk_mc_crit(linked) > | 273 | } else { |
465 | CRIT_LEVEL_B && | 274 | ce->domain->requeue(ce->domain, tmp); |
466 | (!tmp || tsk_mc_crit(tmp) | ||
467 | > CRIT_LEVEL_B)) { | ||
468 | TRACE_TASK(linked, | ||
469 | "already scheduled on %d, updating link.\n", | ||
470 | sched->cpu); | ||
471 | linked->rt_param.linked_on = sched->cpu; | ||
472 | sched->linked = linked; | ||
473 | for (i = tsk_mc_crit(linked); | ||
474 | i < NUM_CRIT_LEVELS; i++) { | ||
475 | if (sched->ghost_tasks[i]) { | ||
476 | unlink(sched-> | ||
477 | ghost_tasks[i]); | ||
478 | } | ||
479 | } | ||
480 | update_cpu_position(sched); | ||
481 | linked = tmp; | ||
482 | } | ||
483 | } | ||
484 | if (linked) { /* might be NULL due to swap */ | ||
485 | linked->rt_param.linked_on = entry->cpu; | ||
486 | for (i = tsk_mc_crit(linked); | ||
487 | i < NUM_CRIT_LEVELS; i++) { | ||
488 | if (entry->ghost_tasks[i]) { | ||
489 | unlink(entry->ghost_tasks[i]); | ||
490 | /* WARNING: it is up to the | ||
491 | * caller to requeue ghost jobs | ||
492 | */ | ||
493 | } | ||
494 | } | ||
495 | } | 275 | } |
496 | } | 276 | } |
497 | entry->linked = linked; | ||
498 | } | 277 | } |
499 | #ifdef WANT_ALL_SCHED_EVENTS | ||
500 | if (linked) | ||
501 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
502 | else | ||
503 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
504 | #endif | ||
505 | update_cpu_position(entry); | ||
506 | } | 278 | } |
507 | 279 | ||
508 | /* unlink - Make sure a task is not linked any longer to a cpu entry | 280 | /* |
509 | * where it was linked before. | 281 | * Preempt logically running task in a domain. If the preempting task should be |
510 | * Can handle ghost jobs. | 282 | * running on the domain's CPU, also links the task to the CPU and causes |
511 | * Called by schedule, task_block, task_exit, and job_completion | 283 | * a physical preemption. |
512 | * Caller assumed to hold global lock | ||
513 | */ | 284 | */ |
514 | static noinline void unlink(struct task_struct* t) | 285 | static void preempt(domain_t *dom, crit_cpu_entry_t *ce) |
515 | { | 286 | { |
516 | int cpu; | 287 | struct task_struct *task = dom->take_ready(dom); |
517 | cpu_entry_t *entry; | 288 | cpu_entry_t *entry = crit_cpu(ce); |
518 | struct watchdog_timer *timer; | 289 | |
290 | TRACE_CRIT_ENTRY(ce, "Preempted by " TS, TA(task)); | ||
291 | BUG_ON(!task); | ||
519 | 292 | ||
520 | if (unlikely(!t)) { | 293 | /* Per-domain preemption */ |
521 | BUG_ON(1); | 294 | if (ce->linked) { |
522 | return; | 295 | dom->requeue(dom, ce->linked); |
523 | } | 296 | } |
297 | link_task_to_crit(ce, task); | ||
524 | 298 | ||
525 | cpu = t->rt_param.linked_on; | 299 | /* Preempt actual execution if this is a running task */ |
526 | if (cpu != NO_CPU) { | 300 | if (!is_ghost(task)) { |
527 | /* unlink */ | 301 | link_task_to_cpu(entry, task); |
528 | entry = remote_cpu_entry(cpu); | 302 | preempt_if_preemptable(entry->scheduled, entry->cpu); |
529 | t->rt_param.linked_on = NO_CPU; | ||
530 | if (is_ghost(t)) { | ||
531 | /* Clear the timer if it's set. | ||
532 | * It may be unset if we are called as a result of | ||
533 | * the watchdog timer triggering. | ||
534 | */ | ||
535 | timer = ghost_timer(cpu, tsk_mc_crit(t)); | ||
536 | if (timer->task) { | ||
537 | /* Should already be watching task.*/ | ||
538 | BUG_ON(timer->task != t); | ||
539 | cancel_watchdog_timer(timer); | ||
540 | } | ||
541 | if (tsk_mc_data(t)->mc_job.ghost_budget > 0) { | ||
542 | /* Job isn't finished, so do accounting. */ | ||
543 | update_ghost_time(t); | ||
544 | /* Just remove from CPU, even in the rare case | ||
545 | * of zero time left - it will be scheduled | ||
546 | * with an immediate timer fire. | ||
547 | */ | ||
548 | entry->ghost_tasks[tsk_mc_crit(t)] = NULL; | ||
549 | /*TODO: maybe make more efficient by | ||
550 | * only updating on C/D completion? | ||
551 | */ | ||
552 | update_cpu_position(entry); | ||
553 | } | ||
554 | else{ | ||
555 | /* Job finished, so just remove */ | ||
556 | entry->ghost_tasks[tsk_mc_crit(t)] = NULL; | ||
557 | update_cpu_position(entry); | ||
558 | } | ||
559 | } | ||
560 | else { | ||
561 | link_task_to_cpu(NULL, entry); | ||
562 | } | ||
563 | } else if (is_queued(t)) { | ||
564 | /* This is an interesting situation: t is scheduled, | ||
565 | * but was just recently unlinked. It cannot be | ||
566 | * linked anywhere else (because then it would have | ||
567 | * been relinked to this CPU), thus it must be in some | ||
568 | * queue. We must remove it from the list in this | ||
569 | * case. | ||
570 | */ | ||
571 | TRACE("Weird is_queued situation happened\n"); | ||
572 | remove(tsk_rt(t)->domain, t); | ||
573 | } | 303 | } |
574 | } | 304 | } |
575 | 305 | ||
576 | 306 | /* | |
577 | /* preempt - force a CPU to reschedule | 307 | * Causes a logical preemption if the domain has a higher-priority ready task. |
578 | * Just sets a Linux scheduler flag. | ||
579 | */ | 308 | */ |
580 | static void preempt(cpu_entry_t *entry) | 309 | static void check_for_preempt(domain_t *dom) |
581 | { | 310 | { |
582 | preempt_if_preemptable(entry->scheduled, entry->cpu); | 311 | int lower_prio; |
312 | cpu_entry_t *entry; | ||
313 | crit_cpu_entry_t *ce; | ||
314 | |||
315 | if (is_global(dom)) { | ||
316 | /* If a higher priority task is running on a CPU, | ||
317 | * it will not be present in the domain heap. | ||
318 | */ | ||
319 | for (ce = lowest_prio_cpu(dom); | ||
320 | ce && dom->preempt_needed(dom, ce->linked); | ||
321 | ce = lowest_prio_cpu(dom)) { | ||
322 | |||
323 | preempt(dom, ce); | ||
324 | } | ||
325 | } else /* Partitioned */ { | ||
326 | ce = domain_data(dom)->crit_entry; | ||
327 | entry = crit_cpu(ce); | ||
328 | /* A higher priority task might be running, in which case | ||
329 | * this level cannot link any task. | ||
330 | */ | ||
331 | lower_prio = entry->linked && | ||
332 | tsk_mc_crit(entry->linked) < ce->level; | ||
333 | if (!lower_prio && dom->preempt_needed(dom, ce->linked)) { | ||
334 | preempt(dom, ce); | ||
335 | } | ||
336 | } | ||
583 | } | 337 | } |
584 | 338 | ||
585 | /* requeue - Put an unlinked task into the proper domain. | 339 | /* |
586 | * Caller holds global lock. | 340 | * Remove a running task from all structures. |
587 | * Called by mc_job_arrival() and prepare_preemption(). | ||
588 | */ | 341 | */ |
589 | static noinline void requeue(struct task_struct* task) | 342 | static void remove_from_all(struct task_struct* task) |
590 | { | 343 | { |
591 | BUG_ON(!task || !is_realtime(task)); | 344 | int cpu, level; |
592 | /* sanity check before insertion */ | 345 | cpu_entry_t *entry; |
593 | BUG_ON(is_queued(task)); | 346 | crit_cpu_entry_t *ce; |
594 | 347 | ||
595 | if (is_released(task, litmus_clock())) { | 348 | TRACE_TASK(task, "Removing from everything"); |
596 | __add_ready(tsk_rt(task)->domain, task); | 349 | BUG_ON(!task); |
597 | } else { | ||
598 | /* it has got to wait */ | ||
599 | add_release(tsk_rt(task)->domain, task); | ||
600 | } | ||
601 | } | ||
602 | 350 | ||
603 | static void prepare_preemption(rt_domain_t *dom, cpu_entry_t *cpu, | 351 | cpu = task->rt_param.linked_on; |
604 | enum crit_level crit) { | 352 | level = tsk_mc_crit(task); |
605 | struct task_struct* task; | 353 | if (cpu != NO_CPU) { |
606 | int i; | 354 | /* Unlink */ |
607 | task = __take_ready(dom); | 355 | entry = cpus[cpu]; |
608 | TRACE("prepare_preemption: attempting to link task %d to %d\n", | 356 | ce = &entry->crit_entries[level]; |
609 | task->pid, cpu->cpu); | 357 | link_task_to_crit(ce, NULL); |
610 | if (is_ghost(task)) { | 358 | if (!is_ghost(task)) { |
611 | /* Changing ghost task only affects linked task at our level */ | 359 | link_task_to_cpu(entry, NULL); |
612 | if (cpu->linked && tsk_mc_crit(cpu->linked) == crit) | ||
613 | requeue(cpu->linked); | ||
614 | /* Can change ghost task at our level as well. */ | ||
615 | if (cpu->ghost_tasks[crit]) | ||
616 | requeue(cpu->ghost_tasks[crit]); | ||
617 | } | ||
618 | else { | ||
619 | /* Changing linked tasks could affect both real and ghost | ||
620 | * tasks at multiple levels | ||
621 | */ | ||
622 | if (cpu->linked) | ||
623 | requeue(cpu->linked); | ||
624 | for (i = crit; i < NUM_CRIT_LEVELS; i++) { | ||
625 | if (cpu->ghost_tasks[i]) | ||
626 | requeue(cpu->ghost_tasks[i]); | ||
627 | } | 360 | } |
361 | BUG_ON(is_queued(task)); | ||
362 | } else if (is_queued(task)) { | ||
363 | /* This is an interesting situation: t is scheduled, | ||
364 | * but was just recently unlinked. It cannot be | ||
365 | * linked anywhere else (because then it would have | ||
366 | * been relinked to this CPU), thus it must be in some | ||
367 | * queue. We must remove it from the list in this | ||
368 | * case. | ||
369 | */ | ||
370 | TRACE_TASK(task, "Weird is_queued situation happened"); | ||
371 | remove((rt_domain_t*)get_task_domain(task)->data, task); | ||
628 | } | 372 | } |
629 | link_task_to_cpu(task, cpu); | ||
630 | preempt(cpu); | ||
631 | } | 373 | } |
632 | 374 | ||
633 | /* Callers always have global lock for functions in this section*/ | 375 | /* |
634 | static noinline void check_for_c_preemptions(rt_domain_t *dom) { | 376 | * Prepares a task for its next period and causes a preemption, if necessary. |
635 | cpu_entry_t* last; | 377 | * Converts tasks which completed their execution early into ghost tasks. |
636 | TRACE("Checking for c preempt"); | 378 | */ |
637 | for (last = lowest_prio_cpu_c(); | 379 | static void job_completion(struct task_struct *task, int forced) |
638 | mc_edf_preemption_needed(dom, CRIT_LEVEL_C, | 380 | { |
639 | last); | 381 | TRACE_TASK(task, "Completed"); |
640 | last = lowest_prio_cpu_c()) { | 382 | sched_trace_task_completion(task, forced); |
641 | prepare_preemption(dom, last, CRIT_LEVEL_C); | 383 | BUG_ON(!task); |
642 | } | ||
643 | } | ||
644 | 384 | ||
645 | static noinline void check_for_d_preemptions(rt_domain_t *dom) { | 385 | /* Logically stop the task execution */ |
646 | cpu_entry_t* last; | 386 | set_rt_flags(task, RT_F_SLEEP); |
647 | TRACE("Checking for d preempt"); | 387 | remove_from_all(task); |
648 | for (last = lowest_prio_cpu_d(); | ||
649 | mc_edf_preemption_needed(dom, CRIT_LEVEL_D, | ||
650 | last); | ||
651 | last = lowest_prio_cpu_d()) { | ||
652 | prepare_preemption(dom, last, CRIT_LEVEL_D); | ||
653 | } | ||
654 | } | ||
655 | 388 | ||
656 | static noinline void check_for_a_preemption(rt_domain_t *dom, cpu_entry_t *cpu) { | 389 | /* If it's not a ghost job, do ghost job conversion */ |
657 | TRACE("Checking for a preempt"); | 390 | if (!is_ghost(task)) { |
658 | if (mc_edf_preemption_needed(dom, CRIT_LEVEL_A, cpu)) { | 391 | tsk_mc_data(task)->mc_job.ghost_budget = budget_remaining(task); |
659 | prepare_preemption(dom, cpu, CRIT_LEVEL_A); | 392 | tsk_mc_data(task)->mc_job.is_ghost = 1; |
660 | } | 393 | } |
661 | } | ||
662 | 394 | ||
663 | static noinline void check_for_b_preemption(rt_domain_t *dom, cpu_entry_t *cpu) { | 395 | /* If the task is a ghost job with no budget, it either exhausted |
664 | TRACE("Checking for b preempt"); | 396 | * its ghost budget or there was no ghost budget after the job |
665 | if (mc_edf_preemption_needed(dom, CRIT_LEVEL_B, cpu)) { | 397 | * conversion. Revert back to a normal task and complete the period. |
666 | prepare_preemption(dom, cpu, CRIT_LEVEL_B); | 398 | */ |
399 | if (tsk_mc_data(task)->mc_job.ghost_budget == 0) { | ||
400 | tsk_mc_data(task)->mc_job.is_ghost = 0; | ||
401 | prepare_for_next_period(task); | ||
402 | if (is_released(task, litmus_clock())) { | ||
403 | sched_trace_task_release(task); | ||
404 | } | ||
667 | } | 405 | } |
406 | |||
407 | /* Requeue non-blocking tasks */ | ||
408 | if (is_running(task)) | ||
409 | job_arrival(task); | ||
668 | } | 410 | } |
669 | 411 | ||
670 | /* mc_job_arrival: task is either resumed or released | 412 | /* |
671 | * Called from job_completion(), mc_task_new(), and mc_task_wake_up(), all | 413 | * Return true if the domain has a higher priority ready task. The curr |
672 | * of which have the global lock | 414 | * task must belong to the domain. |
673 | * Requeues task and checks for/triggers preemptions | ||
674 | */ | 415 | */ |
675 | static noinline void mc_job_arrival(struct task_struct* task) | 416 | static noinline int mc_preempt_needed(domain_t *dom, struct task_struct* curr) |
676 | { | 417 | { |
677 | enum crit_level task_crit_level; | 418 | struct task_struct *next = dom->peek_ready(dom); |
678 | BUG_ON(!task); | ||
679 | 419 | ||
680 | TRACE("mc_job_arrival triggered\n"); | 420 | if (!next || !curr) { |
681 | task_crit_level = tsk_mc_crit(task); | 421 | return next && !curr; |
682 | requeue(task); | 422 | } else { |
683 | if (task_crit_level == CRIT_LEVEL_A) { | 423 | BUG_ON(tsk_mc_crit(next) != tsk_mc_crit(curr)); |
684 | check_for_a_preemption(remote_a_queue(get_partition(task)), | 424 | return get_task_domain(next)->higher_prio(next, curr); |
685 | remote_cpu_entry(get_partition(task))); | ||
686 | } else if (task_crit_level == CRIT_LEVEL_B) { | ||
687 | check_for_b_preemption(remote_b_queue(get_partition(task)), | ||
688 | remote_cpu_entry(get_partition(task))); | ||
689 | } else if (task_crit_level == CRIT_LEVEL_C) { | ||
690 | check_for_c_preemptions(&crit_c); | ||
691 | } else if (task_crit_level == CRIT_LEVEL_D) { | ||
692 | check_for_d_preemptions(&crit_d); | ||
693 | } | 425 | } |
694 | } | 426 | } |
695 | 427 | ||
696 | /* Called by the domain | 428 | /* |
697 | * Obtains global lock, merges ready tasks, checks for/triggers preemptions, | 429 | * Completes a logically (but not physically) running ghost task. |
698 | * and releases global lock | 430 | */ |
699 | */ | 431 | static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer) |
700 | static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
701 | { | 432 | { |
702 | unsigned long flags; | 433 | unsigned long flags; |
703 | int i; | 434 | crit_cpu_entry_t *ce; |
704 | 435 | ||
705 | raw_spin_lock_irqsave(&global_lock, flags); | 436 | raw_spin_lock_irqsave(&global_lock, flags); |
706 | TRACE("mc_release_jobs triggered\n"); | ||
707 | 437 | ||
708 | __merge_ready(rt, tasks); | 438 | ce = container_of(timer, crit_cpu_entry_t, timer); |
439 | TRACE_CRIT_ENTRY(ce, "Ghost exhausted firing"); | ||
709 | 440 | ||
710 | for (i = 0; i < NR_CPUS; i++) { | 441 | /* Due to race conditions, we cannot just set the linked |
711 | if (rt == remote_b_queue(i)) { | 442 | * task's budget to 0 as it may no longer be the task |
712 | check_for_b_preemption(rt, remote_cpu_entry(i)); | 443 | * for which this timer was armed. |
713 | } | 444 | */ |
714 | else if (rt == remote_a_queue(i)) { | 445 | if (ce->linked && is_ghost(ce->linked)) { |
715 | check_for_a_preemption(rt, remote_cpu_entry(i)); | 446 | update_ghost_time(ce->linked); |
447 | if (tsk_mc_data(ce->linked)->mc_job.ghost_budget == 0) { | ||
448 | job_completion(ce->linked, 0); | ||
449 | goto out; | ||
716 | } | 450 | } |
717 | } | 451 | } |
718 | if (rt == &crit_c) { | ||
719 | check_for_c_preemptions(rt); | ||
720 | } | ||
721 | if (rt == &crit_d) { | ||
722 | check_for_d_preemptions(rt); | ||
723 | } | ||
724 | 452 | ||
453 | TRACE_TASK(ce->linked, "Was not exhausted"); | ||
454 | out: | ||
725 | raw_spin_unlock_irqrestore(&global_lock, flags); | 455 | raw_spin_unlock_irqrestore(&global_lock, flags); |
456 | return HRTIMER_NORESTART; | ||
726 | } | 457 | } |
727 | 458 | ||
728 | /* caller holds global_lock | 459 | /* |
729 | * Called only by mc_schedule() which holds global lock | 460 | * Adds released jobs to a domain and causes a preemption, if necessary. |
730 | * Prepares task for next period, unlinks it, and calls mc_job_arrival | 461 | */ |
731 | * Converts jobs to ghost jobs as necessary, or finishes end of ghost jobs. | 462 | static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks) |
732 | */ | ||
733 | static noinline void job_completion(struct task_struct *t, int forced) | ||
734 | { | 463 | { |
735 | cpu_entry_t *cpu; | 464 | unsigned long flags; |
736 | BUG_ON(!t); | 465 | struct task_struct *first; |
737 | 466 | ||
738 | sched_trace_task_completion(t, forced); | 467 | raw_spin_lock_irqsave(&global_lock, flags); |
739 | 468 | ||
740 | TRACE_TASK(t, "job_completion().\n"); | 469 | first = bheap_peek(rt->order, tasks)->value; |
470 | TRACE_TASK(first, "Jobs released"); | ||
741 | 471 | ||
742 | /* set flags */ | 472 | __merge_ready(rt, tasks); |
743 | set_rt_flags(t, RT_F_SLEEP); | 473 | check_for_preempt(get_task_domain(first)); |
744 | /* If it's not a ghost job, do ghost job conversion and return if | ||
745 | * needed. | ||
746 | */ | ||
747 | if (!is_ghost(t)) { | ||
748 | TRACE_TASK(t, "Converting to ghost from %d.\n", t->rt_param.scheduled_on); | ||
749 | cpu = remote_cpu_entry(t->rt_param.scheduled_on); | ||
750 | /*Unlink first while it's not a ghost job.*/ | ||
751 | unlink(t); | ||
752 | tsk_mc_data(t)->mc_job.ghost_budget = budget_remaining(t); | ||
753 | tsk_mc_data(t)->mc_job.is_ghost = 1; | ||
754 | |||
755 | /* If we did just convert the job to ghost, we can safely | ||
756 | * reschedule it and then let schedule() determine a new | ||
757 | * job to run in the slack. | ||
758 | * | ||
759 | * If it actually needs to run as a ghost job, we'll do so | ||
760 | * here. | ||
761 | * | ||
762 | * If it doesn't need to, it will fall through and be handled | ||
763 | * properly as well. | ||
764 | */ | ||
765 | if (tsk_mc_data(t)->mc_job.ghost_budget > 0) { | ||
766 | link_task_to_cpu(t, cpu); | ||
767 | preempt(cpu); | ||
768 | return; | ||
769 | } | ||
770 | } | ||
771 | /* prepare for next period - we either just became ghost but with no | ||
772 | * budget left, or we were already ghost and the ghost job expired*/ | ||
773 | if (is_ghost(t)) { | ||
774 | tsk_mc_data(t)->mc_job.ghost_budget = 0; | ||
775 | /*Need to unlink here so prepare_for_next_period doesn't try | ||
776 | * to unlink us | ||
777 | */ | ||
778 | unlink(t); | ||
779 | tsk_mc_data(t)->mc_job.is_ghost = 0; | ||
780 | tsk_mc_data(t)->mc_job.ghost_budget = 0; | ||
781 | prepare_for_next_period(t); | ||
782 | } | ||
783 | if (is_released(t, litmus_clock())) | ||
784 | sched_trace_task_release(t); | ||
785 | /* requeue | ||
786 | * But don't requeue a blocking task. */ | ||
787 | if (is_running(t)) | ||
788 | mc_job_arrival(t); | ||
789 | } | ||
790 | |||
791 | /* watchdog_timeout - this function is called when a watchdog timer expires. | ||
792 | * | ||
793 | * Acquires global lock | ||
794 | */ | ||
795 | 474 | ||
796 | static enum hrtimer_restart watchdog_timeout(struct hrtimer *timer) | ||
797 | { | ||
798 | struct watchdog_timer* wt = container_of(timer, | ||
799 | struct watchdog_timer, | ||
800 | timer); | ||
801 | unsigned long flags; | ||
802 | struct task_struct* task = wt->task; | ||
803 | raw_spin_lock_irqsave(&global_lock, flags); | ||
804 | /*If we have triggered, we know the budget must have expired.*/ | ||
805 | /*This needs to run first, so it doesn't look to job_completion like | ||
806 | * we have an active timer. | ||
807 | */ | ||
808 | wt->task = NULL; | ||
809 | tsk_mc_data(task)->mc_job.ghost_budget = 0; | ||
810 | job_completion(task, 0); | ||
811 | TRACE_TASK(task, "Watchdog timeout\n"); | ||
812 | raw_spin_unlock_irqrestore(&global_lock, flags); | 475 | raw_spin_unlock_irqrestore(&global_lock, flags); |
813 | return HRTIMER_NORESTART; | ||
814 | } | 476 | } |
815 | 477 | ||
816 | 478 | /* | |
817 | /* mc_tick - this function is called for every local timer | 479 | * Ghost time accounting. |
818 | * interrupt. | 480 | * TODO: remove |
819 | * | ||
820 | * checks whether the current task has expired and checks | ||
821 | * whether we need to preempt it if it has not expired | ||
822 | * Called from LITMUS core | ||
823 | * Locks when calling update_ghost_time(t) | ||
824 | * Just sets reschedule flags on task and CPU and request_exit_np flag on task | ||
825 | */ | 481 | */ |
826 | static void mc_tick(struct task_struct* t) | 482 | static void mc_tick(struct task_struct* t) |
827 | { | 483 | { |
828 | unsigned long flags; | 484 | unsigned long flags; |
829 | if (is_ghost(t)) { | 485 | if (is_realtime(t) && is_ghost(t)) { |
830 | raw_spin_lock_irqsave(&global_lock, flags); | 486 | raw_spin_lock_irqsave(&global_lock, flags); |
831 | update_ghost_time(t); | 487 | update_ghost_time(t); |
832 | raw_spin_unlock_irqrestore(&global_lock, flags); | 488 | raw_spin_unlock_irqrestore(&global_lock, flags); |
833 | } | 489 | } |
834 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
835 | if (!is_np(t)) { | ||
836 | /* np tasks will be preempted when they become | ||
837 | * preemptable again | ||
838 | */ | ||
839 | set_tsk_need_resched(t); | ||
840 | set_will_schedule(); | ||
841 | TRACE("mc_scheduler_tick: " | ||
842 | "%d is preemptable " | ||
843 | " => FORCE_RESCHED\n", t->pid); | ||
844 | } else if (is_user_np(t)) { | ||
845 | TRACE("mc_scheduler_tick: " | ||
846 | "%d is non-preemptable, " | ||
847 | "preemption delayed.\n", t->pid); | ||
848 | request_exit_np(t); | ||
849 | } | ||
850 | } | ||
851 | } | ||
852 | |||
853 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
854 | * assumptions on the state of the current task since it may be called for a | ||
855 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
856 | * was necessary, because sys_exit_np() was called, because some Linux | ||
857 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
858 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
859 | * current state is. | ||
860 | * | ||
861 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
862 | * | ||
863 | * The following assertions for the scheduled task could hold: | ||
864 | * | ||
865 | * - !is_running(scheduled) // the job blocks | ||
866 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
867 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
868 | * - linked != scheduled // we need to reschedule (for any reason) | ||
869 | * - is_np(scheduled) // rescheduling must be delayed, | ||
870 | * sys_exit_np must be requested | ||
871 | * | ||
872 | * Any of these can occur together. | ||
873 | * | ||
874 | * | ||
875 | * Called by LITMUS core | ||
876 | * No lock required by caller | ||
877 | * Obtains global lock | ||
878 | * can call unlink(), request_exit_np(), job_completion(), __take_ready() | ||
879 | * modifies next, scheduled->scheduled_on, linked->scheduled_on | ||
880 | * Releases global lock | ||
881 | */ | ||
882 | static struct task_struct* mc_schedule(struct task_struct * prev) | ||
883 | { | ||
884 | cpu_entry_t* entry = &__get_cpu_var(mc_cpu_entries); | ||
885 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
886 | struct task_struct* next = NULL; | ||
887 | struct task_struct* ready_task = NULL; | ||
888 | enum crit_level ready_crit; | ||
889 | int i; | ||
890 | |||
891 | #ifdef CONFIG_RELEASE_MASTER | ||
892 | /* Bail out early if we are the release master. | ||
893 | * The release master never schedules any real-time tasks. | ||
894 | */ | ||
895 | if (mc_release_master == entry->cpu) { | ||
896 | sched_state_task_picked(); | ||
897 | return NULL; | ||
898 | } | ||
899 | #endif | ||
900 | |||
901 | raw_spin_lock(&global_lock); | ||
902 | clear_will_schedule(); | ||
903 | |||
904 | /* sanity checking */ | ||
905 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
906 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
907 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
908 | |||
909 | /* (0) Determine state */ | ||
910 | exists = entry->scheduled != NULL; | ||
911 | blocks = exists && !is_running(entry->scheduled); | ||
912 | out_of_time = exists && budget_enforced(entry->scheduled) && | ||
913 | budget_exhausted(entry->scheduled); | ||
914 | np = exists && is_np(entry->scheduled); | ||
915 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
916 | preempt = entry->scheduled != entry->linked; | ||
917 | |||
918 | #ifdef WANT_ALL_SCHED_EVENTS | ||
919 | TRACE_TASK(prev, "invoked mc_schedule.\n"); | ||
920 | #endif | ||
921 | |||
922 | if (exists) | ||
923 | TRACE_TASK(prev, | ||
924 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
925 | "state:%d sig:%d\n", | ||
926 | blocks, out_of_time, np, sleep, preempt, | ||
927 | prev->state, signal_pending(prev)); | ||
928 | if (entry->linked && preempt) | ||
929 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
930 | entry->linked->comm, entry->linked->pid); | ||
931 | |||
932 | |||
933 | /* If a task blocks we have no choice but to reschedule. | ||
934 | */ | ||
935 | if (blocks) | ||
936 | unlink(entry->scheduled); | ||
937 | |||
938 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
939 | * We need to make sure to update the link structure anyway in case | ||
940 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
941 | * hurt. | ||
942 | */ | ||
943 | if (np && (out_of_time || preempt || sleep)) { | ||
944 | unlink(entry->scheduled); | ||
945 | request_exit_np(entry->scheduled); | ||
946 | } | ||
947 | |||
948 | /* Any task that is preemptable and either exhausts its execution | ||
949 | * budget or wants to sleep completes. We may have to reschedule after | ||
950 | * this. Don't do a job completion if we block (can't have timers running | ||
951 | * for blocked jobs). Preemption go first for the same reason. | ||
952 | */ | ||
953 | if (!np && (out_of_time || sleep) && !blocks && !preempt) | ||
954 | job_completion(entry->scheduled, !sleep); | ||
955 | |||
956 | /* Link pending task if we became unlinked. | ||
957 | */ | ||
958 | if (!entry->linked) { | ||
959 | if (!entry->ghost_tasks[CRIT_LEVEL_A]) { | ||
960 | ready_task = __take_ready(local_a_queue); | ||
961 | ready_crit = CRIT_LEVEL_A; | ||
962 | if (ready_task && is_ghost(ready_task)) { | ||
963 | link_task_to_cpu(ready_task, entry); | ||
964 | ready_task = NULL; | ||
965 | } | ||
966 | } | ||
967 | if (!ready_task && !entry->ghost_tasks[CRIT_LEVEL_B]) { | ||
968 | ready_task = __take_ready(local_b_queue); | ||
969 | ready_crit = CRIT_LEVEL_B; | ||
970 | if (ready_task && is_ghost(ready_task)) { | ||
971 | link_task_to_cpu(ready_task, entry); | ||
972 | ready_task = NULL; | ||
973 | } | ||
974 | } | ||
975 | if (!ready_task && !entry->ghost_tasks[CRIT_LEVEL_C]) { | ||
976 | ready_task = __take_ready(&crit_c); | ||
977 | ready_crit = CRIT_LEVEL_C; | ||
978 | if (ready_task && is_ghost(ready_task)) { | ||
979 | link_task_to_cpu(ready_task, entry); | ||
980 | ready_task = NULL; | ||
981 | } | ||
982 | } | ||
983 | if (!ready_task && !entry->ghost_tasks[CRIT_LEVEL_D]) { | ||
984 | ready_task = __take_ready(&crit_d); | ||
985 | ready_crit = CRIT_LEVEL_D; | ||
986 | if (ready_task && is_ghost(ready_task)) { | ||
987 | link_task_to_cpu(ready_task, entry); | ||
988 | ready_task = NULL; | ||
989 | } | ||
990 | } | ||
991 | if (!ready_task) { | ||
992 | /* set to something invalid? */ | ||
993 | ready_crit = NUM_CRIT_LEVELS; | ||
994 | } | ||
995 | for (i = ready_crit; i < NUM_CRIT_LEVELS; i++) { | ||
996 | if (entry->ghost_tasks[i]) | ||
997 | requeue(entry->ghost_tasks[i]); | ||
998 | } | ||
999 | link_task_to_cpu(ready_task, entry); | ||
1000 | if (ready_task) | ||
1001 | TRACE_TASK(ready_task, | ||
1002 | "Linked task inside scheduler\n"); | ||
1003 | } | ||
1004 | |||
1005 | /* The final scheduling decision. Do we need to switch for some reason? | ||
1006 | * If linked is different from scheduled, then select linked as next. | ||
1007 | */ | ||
1008 | if ((!np || blocks) && | ||
1009 | entry->linked != entry->scheduled) { | ||
1010 | /* Schedule a linked job? */ | ||
1011 | if (entry->linked) { | ||
1012 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
1013 | next = entry->linked; | ||
1014 | } | ||
1015 | if (entry->scheduled) { | ||
1016 | /* not gonna be scheduled soon */ | ||
1017 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
1018 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
1019 | } | ||
1020 | } else | ||
1021 | /* Only override Linux scheduler if we have a real-time task | ||
1022 | * scheduled that needs to continue. | ||
1023 | */ | ||
1024 | if (exists) | ||
1025 | next = prev; | ||
1026 | |||
1027 | sched_state_task_picked(); | ||
1028 | |||
1029 | /*TODO: Update name of locking, reflect that we're locking all queues*/ | ||
1030 | raw_spin_unlock(&global_lock); | ||
1031 | |||
1032 | #ifdef WANT_ALL_SCHED_EVENTS | ||
1033 | TRACE("global_lock released, next=0x%p\n", next); | ||
1034 | |||
1035 | if (next) | ||
1036 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
1037 | else if (exists && !next) | ||
1038 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
1039 | #endif | ||
1040 | |||
1041 | |||
1042 | return next; | ||
1043 | } | ||
1044 | |||
1045 | |||
1046 | /* _finish_switch - we just finished the switch away from prev | ||
1047 | * Called by LITMUS core | ||
1048 | * No locks | ||
1049 | */ | ||
1050 | static void mc_finish_switch(struct task_struct *prev) | ||
1051 | { | ||
1052 | cpu_entry_t* entry = &__get_cpu_var(mc_cpu_entries); | ||
1053 | |||
1054 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
1055 | #ifdef WANT_ALL_SCHED_EVENTS | ||
1056 | TRACE_TASK(prev, "switched away from\n"); | ||
1057 | #endif | ||
1058 | } | 490 | } |
1059 | 491 | ||
1060 | 492 | /* | |
1061 | /* Prepare a task for running in RT mode | 493 | * Setup new mixed-criticality task. |
1062 | * Called by LITMUS core | ||
1063 | * No lock required by caller | ||
1064 | * Obtains lock and calls mc_job_arrival before releasing lock | ||
1065 | */ | 494 | */ |
1066 | static void mc_task_new(struct task_struct *t, int on_rq, int running) | 495 | static void mc_task_new(struct task_struct *t, int on_rq, int running) |
1067 | { | 496 | { |
1068 | unsigned long flags; | 497 | unsigned long flags; |
1069 | cpu_entry_t* entry; | 498 | cpu_entry_t* entry; |
1070 | enum crit_level lvl; | 499 | enum crit_level level; |
1071 | 500 | ||
1072 | TRACE("mixed crit: task new %d\n", t->pid); | 501 | TRACE("New mixed criticality task %d\n", t->pid); |
1073 | 502 | ||
1074 | raw_spin_lock_irqsave(&global_lock, flags); | 503 | raw_spin_lock_irqsave(&global_lock, flags); |
1075 | 504 | ||
1076 | lvl = tsk_mc_crit(t); | 505 | /* Assign domain */ |
1077 | t->rt_param.domain = | 506 | level = tsk_mc_crit(t); |
1078 | (lvl == CRIT_LEVEL_A) ? remote_a_queue(get_partition(t)) : | 507 | if (level < CRIT_LEVEL_C) { |
1079 | (lvl == CRIT_LEVEL_B) ? remote_b_queue(get_partition(t)) : | 508 | entry = cpus[get_partition(t)]; |
1080 | (lvl == CRIT_LEVEL_C) ? &crit_c : &crit_d; | 509 | } else { |
510 | entry = cpus[task_cpu(t)]; | ||
511 | } | ||
512 | level = tsk_mc_crit(t); | ||
513 | t->rt_param._domain = entry->crit_entries[level].domain; | ||
1081 | 514 | ||
1082 | /* setup job params */ | 515 | /* Setup job params */ |
1083 | release_at(t, litmus_clock()); | 516 | release_at(t, litmus_clock()); |
1084 | tsk_mc_data(t)->mc_job.ghost_budget = 0; | 517 | tsk_mc_data(t)->mc_job.ghost_budget = 0; |
1085 | tsk_mc_data(t)->mc_job.is_ghost = 0; | 518 | tsk_mc_data(t)->mc_job.is_ghost = 0; |
1086 | 519 | ||
1087 | if (running) { | 520 | if (running) { |
1088 | entry = &per_cpu(mc_cpu_entries, task_cpu(t)); | ||
1089 | BUG_ON(entry->scheduled); | 521 | BUG_ON(entry->scheduled); |
1090 | 522 | entry->scheduled = t; | |
1091 | #ifdef CONFIG_RELEASE_MASTER | 523 | tsk_rt(t)->scheduled_on = entry->cpu; |
1092 | if (entry->cpu != mc_release_master) { | ||
1093 | #endif | ||
1094 | entry->scheduled = t; | ||
1095 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
1096 | #ifdef CONFIG_RELEASE_MASTER | ||
1097 | } else { | ||
1098 | /* do not schedule on release master */ | ||
1099 | preempt(entry); /* force resched */ | ||
1100 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
1101 | } | ||
1102 | #endif | ||
1103 | } else { | 524 | } else { |
1104 | t->rt_param.scheduled_on = NO_CPU; | 525 | t->rt_param.scheduled_on = NO_CPU; |
1105 | } | 526 | } |
1106 | t->rt_param.linked_on = NO_CPU; | 527 | t->rt_param.linked_on = NO_CPU; |
528 | |||
529 | job_arrival(t); | ||
1107 | 530 | ||
1108 | mc_job_arrival(t); | ||
1109 | raw_spin_unlock_irqrestore(&global_lock, flags); | 531 | raw_spin_unlock_irqrestore(&global_lock, flags); |
1110 | } | 532 | } |
1111 | 533 | ||
1112 | /* Called by LITMUS core | 534 | /* |
1113 | * No lock required by caller | 535 | * Add task back into its domain and cause any necessary preemptions. |
1114 | * Obtains lock and calls mc_job_arrival before releasing lock | ||
1115 | */ | 536 | */ |
1116 | static void mc_task_wake_up(struct task_struct *task) | 537 | static void mc_task_wake_up(struct task_struct *task) |
1117 | { | 538 | { |
1118 | unsigned long flags; | 539 | unsigned long flags; |
1119 | lt_t now; | 540 | lt_t now; |
1120 | 541 | ||
1121 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
1122 | |||
1123 | raw_spin_lock_irqsave(&global_lock, flags); | 542 | raw_spin_lock_irqsave(&global_lock, flags); |
543 | TRACE_TASK(task, "Wakes up"); | ||
544 | |||
1124 | /* We need to take suspensions because of semaphores into | 545 | /* We need to take suspensions because of semaphores into |
1125 | * account! If a job resumes after being suspended due to acquiring | 546 | * account! If a job resumes after being suspended due to acquiring |
1126 | * a semaphore, it should never be treated as a new job release. | 547 | * a semaphore, it should never be treated as a new job release. |
@@ -1130,114 +551,167 @@ static void mc_task_wake_up(struct task_struct *task) | |||
1130 | } else { | 551 | } else { |
1131 | now = litmus_clock(); | 552 | now = litmus_clock(); |
1132 | if (is_tardy(task, now)) { | 553 | if (is_tardy(task, now)) { |
1133 | /* new sporadic release */ | 554 | /* New sporadic release */ |
1134 | release_at(task, now); | 555 | release_at(task, now); |
1135 | sched_trace_task_release(task); | 556 | sched_trace_task_release(task); |
1136 | } | 557 | } else { |
1137 | else { | ||
1138 | if (task->rt.time_slice) { | 558 | if (task->rt.time_slice) { |
1139 | /* came back in time before deadline | 559 | /* Came back in time before deadline */ |
1140 | */ | ||
1141 | set_rt_flags(task, RT_F_RUNNING); | 560 | set_rt_flags(task, RT_F_RUNNING); |
1142 | } | 561 | } |
1143 | } | 562 | } |
1144 | } | 563 | } |
1145 | /*Delay job arrival if we still have an active ghost job*/ | 564 | |
1146 | if (!is_ghost(task)) | 565 | if (!is_ghost(task)) |
1147 | mc_job_arrival(task); | 566 | job_arrival(task); |
567 | |||
1148 | raw_spin_unlock_irqrestore(&global_lock, flags); | 568 | raw_spin_unlock_irqrestore(&global_lock, flags); |
1149 | } | 569 | } |
1150 | 570 | ||
1151 | /* Called by LITMUS core | 571 | /* |
1152 | * No lock required by caller | 572 | * Remove task from global state to prevent it from being linked / run |
1153 | * Obtains and releases global lock | 573 | * on any CPU. |
1154 | */ | 574 | */ |
1155 | static void mc_task_block(struct task_struct *t) | 575 | static void mc_task_block(struct task_struct *task) |
1156 | { | 576 | { |
1157 | unsigned long flags; | 577 | unsigned long flags; |
578 | raw_spin_lock_irqsave(&global_lock, flags); | ||
579 | TRACE_TASK(task, "Block at %llu", litmus_clock()); | ||
1158 | 580 | ||
1159 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | 581 | remove_from_all(task); |
1160 | 582 | ||
1161 | /* unlink if necessary */ | ||
1162 | raw_spin_lock_irqsave(&global_lock, flags); | ||
1163 | unlink(t); | ||
1164 | raw_spin_unlock_irqrestore(&global_lock, flags); | 583 | raw_spin_unlock_irqrestore(&global_lock, flags); |
1165 | |||
1166 | BUG_ON(!is_realtime(t)); | ||
1167 | } | 584 | } |
1168 | 585 | ||
1169 | 586 | /* | |
1170 | /* Called by LITMUS core | 587 | * Remove task from the system. |
1171 | * No lock required by caller | ||
1172 | * Obtains and releases global lock | ||
1173 | */ | 588 | */ |
1174 | static void mc_task_exit(struct task_struct * t) | 589 | static void mc_task_exit(struct task_struct *task) |
1175 | { | 590 | { |
1176 | unsigned long flags; | 591 | unsigned long flags; |
1177 | 592 | ||
1178 | /* unlink if necessary */ | 593 | BUG_ON(!is_realtime(task)); |
594 | TRACE_TASK(task, "RIP"); | ||
595 | |||
1179 | raw_spin_lock_irqsave(&global_lock, flags); | 596 | raw_spin_lock_irqsave(&global_lock, flags); |
1180 | unlink(t); | 597 | remove_from_all(task); |
1181 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | 598 | if (tsk_rt(task)->scheduled_on != NO_CPU) { |
1182 | mc_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; | 599 | cpus[tsk_rt(task)->scheduled_on]->scheduled = NULL; |
1183 | tsk_rt(t)->scheduled_on = NO_CPU; | 600 | tsk_rt(task)->scheduled_on = NO_CPU; |
1184 | } | 601 | } |
1185 | raw_spin_unlock_irqrestore(&global_lock, flags); | 602 | raw_spin_unlock_irqrestore(&global_lock, flags); |
1186 | |||
1187 | BUG_ON(!is_realtime(t)); | ||
1188 | TRACE_TASK(t, "RIP\n"); | ||
1189 | } | 603 | } |
1190 | 604 | ||
1191 | static long mc_admit_task(struct task_struct* tsk) | 605 | /* |
606 | * Return true if the task is a valid mixed-criticality task. | ||
607 | */ | ||
608 | static long mc_admit_task(struct task_struct* task) | ||
1192 | { | 609 | { |
1193 | if (!tsk_mc_data(tsk)) | 610 | if (!tsk_mc_data(task)) { |
1194 | { | 611 | printk(KERN_WARNING "Tried to admit task with no criticality " |
1195 | printk(KERN_WARNING "tried to admit task with no criticality " | ||
1196 | "level\n"); | 612 | "level\n"); |
1197 | return -EINVAL; | 613 | return -EINVAL; |
1198 | } | 614 | } |
1199 | printk(KERN_INFO "admitted task with criticality level %d\n", | 615 | if (tsk_mc_crit(task) < CRIT_LEVEL_C && get_partition(task) == NO_CPU) { |
1200 | tsk_mc_crit(tsk)); | 616 | printk(KERN_WARNING "Tried to admit partitioned task with no " |
617 | "partition\n"); | ||
618 | return -EINVAL; | ||
619 | } | ||
620 | printk(KERN_INFO "Admitted task with criticality level %d\n", | ||
621 | tsk_mc_crit(task)); | ||
1201 | return 0; | 622 | return 0; |
1202 | } | 623 | } |
1203 | 624 | ||
1204 | static long mc_activate_plugin(void) | 625 | /* |
626 | * Return next task which should be scheduled. | ||
627 | */ | ||
628 | static struct task_struct* mc_schedule(struct task_struct * prev) | ||
1205 | { | 629 | { |
1206 | int cpu; | 630 | domain_t *dom; |
1207 | cpu_entry_t *entry; | 631 | crit_cpu_entry_t *ce; |
632 | cpu_entry_t* entry = cpus[smp_processor_id()]; | ||
633 | int i, out_of_time, sleep, preempt, exists, blocks, global; | ||
634 | struct task_struct *dtask = NULL, *ready_task = NULL, *next = NULL; | ||
1208 | 635 | ||
1209 | bheap_init(&mc_cpu_heap_c); | 636 | raw_spin_lock(&global_lock); |
1210 | bheap_init(&mc_cpu_heap_d); | ||
1211 | #ifdef CONFIG_RELEASE_MASTER | ||
1212 | crit_c.release_master = atomic_read(&release_master_cpu); | ||
1213 | crit_d.release_master = crit_c.release_master; | ||
1214 | #endif | ||
1215 | 637 | ||
1216 | for_each_online_cpu(cpu) { | 638 | /* Sanity checking */ |
1217 | entry = &per_cpu(mc_cpu_entries, cpu); | 639 | BUG_ON(entry->scheduled && entry->scheduled != prev); |
1218 | bheap_node_init(&entry->hn_c, entry); | 640 | BUG_ON(entry->scheduled && !is_realtime(prev)); |
1219 | bheap_node_init(&entry->hn_d, entry); | 641 | BUG_ON(is_realtime(prev) && !entry->scheduled); |
1220 | atomic_set(&entry->will_schedule, 0); | 642 | |
1221 | entry->linked = NULL; | 643 | /* Determine state */ |
1222 | entry->scheduled = NULL; | 644 | exists = entry->scheduled != NULL; |
1223 | #ifdef CONFIG_RELEASE_MASTER | 645 | blocks = exists && !is_running(entry->scheduled); |
1224 | if (cpu != mc_release_master) { | 646 | out_of_time = exists && budget_enforced(entry->scheduled) && |
1225 | #endif | 647 | budget_exhausted(entry->scheduled); |
1226 | TRACE("MC: Initializing CPU #%d.\n", cpu); | 648 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; |
1227 | update_cpu_position(entry); | 649 | global = exists && is_global_task(entry->scheduled); |
1228 | #ifdef CONFIG_RELEASE_MASTER | 650 | preempt = entry->scheduled != entry->linked; |
1229 | } else { | 651 | |
1230 | TRACE("MC: CPU %d is release master.\n", cpu); | 652 | if (exists) { |
653 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
654 | TRACE_TASK(prev, | ||
655 | "blocks:%d out_of_time:%d sleep:%d preempt:%d " | ||
656 | "state:%d sig:%d global:%d", | ||
657 | blocks, out_of_time, sleep, preempt, | ||
658 | prev->state, signal_pending(prev), global); | ||
659 | } | ||
660 | |||
661 | /* If a task blocks we have no choice but to reschedule */ | ||
662 | if (blocks) | ||
663 | remove_from_all(entry->scheduled); | ||
664 | /* Any task which exhausts its budget or sleeps waiting for its next | ||
665 | * period completes unless its execution has been forcibly stopped. | ||
666 | */ | ||
667 | if ((out_of_time || sleep) && !blocks && !preempt) | ||
668 | job_completion(entry->scheduled, !sleep); | ||
669 | /* Global scheduled tasks must wait for a deschedule before they | ||
670 | * can rejoin a global domain. See comment in job_arrival. | ||
671 | */ | ||
672 | else if (global && preempt && !blocks) | ||
673 | job_arrival(entry->scheduled); | ||
674 | |||
675 | /* Pick next task if none is linked */ | ||
676 | if (!entry->linked) { | ||
677 | for (i = 0; i < NUM_CRIT_LEVELS && !ready_task; i++) { | ||
678 | ce = &entry->crit_entries[i]; | ||
679 | dom = ce->domain; | ||
680 | dtask = dom->peek_ready(dom); | ||
681 | if (!ce->linked && dtask) { | ||
682 | dom->take_ready(dom); | ||
683 | link_task_to_crit(ce, dtask); | ||
684 | ready_task = (is_ghost(dtask)) ? NULL : dtask; | ||
685 | } | ||
1231 | } | 686 | } |
1232 | #endif | 687 | if (ready_task) |
688 | link_task_to_cpu(entry, ready_task); | ||
1233 | } | 689 | } |
1234 | return 0; | 690 | |
691 | /* Schedule next task */ | ||
692 | next = entry->linked; | ||
693 | entry->scheduled = next; | ||
694 | if (entry->scheduled) | ||
695 | entry->scheduled->rt_param.scheduled_on = entry->cpu; | ||
696 | |||
697 | sched_state_task_picked(); | ||
698 | |||
699 | raw_spin_unlock(&global_lock); | ||
700 | |||
701 | if (next) | ||
702 | TRACE_TASK(next, "Scheduled at %llu", litmus_clock()); | ||
703 | else if (exists && !next) | ||
704 | TRACE("Becomes idle at %llu\n", litmus_clock()); | ||
705 | |||
706 | return next; | ||
1235 | } | 707 | } |
1236 | 708 | ||
1237 | /* Plugin object */ | 709 | /* ************************************************************************** |
710 | * Initialization | ||
711 | * ************************************************************************** */ | ||
712 | |||
1238 | static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = { | 713 | static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = { |
1239 | .plugin_name = "MC", | 714 | .plugin_name = "MC", |
1240 | .finish_switch = mc_finish_switch, | ||
1241 | .tick = mc_tick, | 715 | .tick = mc_tick, |
1242 | .task_new = mc_task_new, | 716 | .task_new = mc_task_new, |
1243 | .complete_job = complete_job, | 717 | .complete_job = complete_job, |
@@ -1246,48 +720,112 @@ static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = { | |||
1246 | .task_wake_up = mc_task_wake_up, | 720 | .task_wake_up = mc_task_wake_up, |
1247 | .task_block = mc_task_block, | 721 | .task_block = mc_task_block, |
1248 | .admit_task = mc_admit_task, | 722 | .admit_task = mc_admit_task, |
1249 | .activate_plugin = mc_activate_plugin, | ||
1250 | }; | 723 | }; |
1251 | 724 | ||
725 | /* Initialize values here so that they are allocated with the module | ||
726 | * and destroyed when the module is unloaded. | ||
727 | */ | ||
728 | DEFINE_PER_CPU(cpu_entry_t, _mc_cpus); | ||
729 | DEFINE_PER_CPU(domain_data_t, _mc_crit_a); | ||
730 | DEFINE_PER_CPU(domain_data_t, _mc_crit_b); | ||
731 | static domain_data_t _mc_crit_c, _mc_crit_d; | ||
732 | struct bheap _mc_heap_c, _mc_heap_d; | ||
733 | struct bheap_node _mc_nodes_c[NR_CPUS], _mc_nodes_d[NR_CPUS]; | ||
734 | |||
735 | static void init_crit_entry(crit_cpu_entry_t *ce, enum crit_level level, | ||
736 | domain_data_t *dom_data, | ||
737 | struct bheap_node *node) | ||
738 | { | ||
739 | ce->level = level; | ||
740 | ce->linked = NULL; | ||
741 | ce->node = node; | ||
742 | ce->domain = &dom_data->domain; | ||
1252 | 743 | ||
1253 | static int __init init_mc(void) | 744 | hrtimer_init(&ce->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
745 | ce->timer.function = mc_ghost_exhausted; | ||
746 | } | ||
747 | |||
748 | static void init_local_domain(cpu_entry_t *entry, domain_data_t *dom_data, | ||
749 | enum crit_level level) | ||
750 | { | ||
751 | dom_data->heap = NULL; | ||
752 | dom_data->crit_entry = &entry->crit_entries[level]; | ||
753 | init_crit_entry(dom_data->crit_entry, level, dom_data, NULL); | ||
754 | } | ||
755 | |||
756 | static void init_global_domain(domain_data_t *dom_data, enum crit_level level, | ||
757 | struct bheap *heap, struct bheap_node *nodes) | ||
1254 | { | 758 | { |
1255 | int cpu; | 759 | int cpu; |
1256 | int i; | ||
1257 | cpu_entry_t *entry; | 760 | cpu_entry_t *entry; |
1258 | struct watchdog_timer *timer; | 761 | crit_cpu_entry_t *ce; |
1259 | 762 | struct bheap_node *node; | |
1260 | bheap_init(&mc_cpu_heap_c); | 763 | |
1261 | bheap_init(&mc_cpu_heap_d); | 764 | dom_data->crit_entry = NULL; |
1262 | /* initialize CPU state */ | 765 | dom_data->heap = heap; |
1263 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 766 | bheap_init(heap); |
1264 | entry = &per_cpu(mc_cpu_entries, cpu); | 767 | |
1265 | mc_cpus[cpu] = entry; | 768 | for_each_online_cpu(cpu) { |
1266 | atomic_set(&entry->will_schedule, 0); | 769 | entry = cpus[cpu]; |
1267 | entry->cpu = cpu; | 770 | node = &nodes[cpu]; |
1268 | entry->hn_c = &mc_heap_node_c[cpu]; | 771 | ce = &entry->crit_entries[level]; |
1269 | entry->hn_d = &mc_heap_node_d[cpu]; | 772 | |
1270 | bheap_node_init(&entry->hn_c, entry); | 773 | init_crit_entry(ce, level, dom_data, node); |
1271 | bheap_node_init(&entry->hn_d, entry); | 774 | |
1272 | for (i = CRIT_LEVEL_A; i < NUM_CRIT_LEVELS; i++) { | 775 | bheap_node_init(&ce->node, ce); |
1273 | timer = ghost_timer(cpu, i); | 776 | bheap_insert(cpu_lower_prio, heap, node); |
1274 | hrtimer_init(&timer->timer, CLOCK_MONOTONIC, | ||
1275 | HRTIMER_MODE_ABS); | ||
1276 | timer->timer.function = watchdog_timeout; | ||
1277 | } | ||
1278 | } | ||
1279 | mc_edf_domain_init(&crit_c, NULL, mc_release_jobs); | ||
1280 | mc_edf_domain_init(&crit_d, NULL, mc_release_jobs); | ||
1281 | for (i = 0; i < NR_CPUS; i++) { | ||
1282 | mc_edf_domain_init(remote_b_queue(i), NULL, | ||
1283 | mc_release_jobs); | ||
1284 | } | 777 | } |
1285 | for (i = 0; i < NR_CPUS; i++) { | 778 | } |
1286 | mc_edf_domain_init(remote_a_queue(i), NULL, | 779 | |
1287 | mc_release_jobs); | 780 | static inline void init_edf_domain(domain_t *dom) |
781 | { | ||
782 | pd_domain_init(dom, edf_ready_order, NULL, | ||
783 | mc_release_jobs, mc_preempt_needed, | ||
784 | edf_higher_prio); | ||
785 | } | ||
786 | |||
787 | static int __init init_mc(void) | ||
788 | { | ||
789 | int cpu; | ||
790 | cpu_entry_t *entry; | ||
791 | domain_data_t *dom_data; | ||
792 | |||
793 | raw_spin_lock_init(&global_lock); | ||
794 | |||
795 | for_each_online_cpu(cpu) { | ||
796 | entry = &per_cpu(_mc_cpus, cpu); | ||
797 | cpus[cpu] = entry; | ||
798 | |||
799 | entry->cpu = cpu; | ||
800 | entry->scheduled = NULL; | ||
801 | entry->linked = NULL; | ||
802 | |||
803 | /* CRIT_LEVEL_A */ | ||
804 | dom_data = &per_cpu(_mc_crit_a, cpu); | ||
805 | init_local_domain(entry, dom_data, CRIT_LEVEL_A); | ||
806 | init_edf_domain(&dom_data->domain); | ||
807 | dom_data->domain.name = "LVL-A"; | ||
808 | |||
809 | /* CRIT_LEVEL_B */ | ||
810 | dom_data = &per_cpu(_mc_crit_b, cpu); | ||
811 | init_local_domain(entry, dom_data, CRIT_LEVEL_B); | ||
812 | init_edf_domain(&dom_data->domain); | ||
813 | dom_data->domain.name = "LVL-B"; | ||
1288 | } | 814 | } |
815 | |||
816 | /* CRIT_LEVEL_C */ | ||
817 | init_global_domain(&_mc_crit_c, CRIT_LEVEL_C, | ||
818 | &_mc_heap_c, _mc_nodes_c); | ||
819 | init_edf_domain(&_mc_crit_c.domain); | ||
820 | _mc_crit_c.domain.name = "LVL-C"; | ||
821 | |||
822 | /* CRIT_LEVEL_D */ | ||
823 | init_global_domain(&_mc_crit_d, CRIT_LEVEL_D, | ||
824 | &_mc_heap_d, _mc_nodes_d); | ||
825 | init_edf_domain(&_mc_crit_d.domain); | ||
826 | _mc_crit_d.domain.name = "LVL-D"; | ||
827 | |||
1289 | return register_sched_plugin(&mc_plugin); | 828 | return register_sched_plugin(&mc_plugin); |
1290 | } | 829 | } |
1291 | 830 | ||
1292 | |||
1293 | module_init(init_mc); | 831 | module_init(init_mc); |