aboutsummaryrefslogtreecommitdiffstats
path: root/litmus
diff options
context:
space:
mode:
Diffstat (limited to 'litmus')
-rw-r--r--litmus/Kconfig33
-rw-r--r--litmus/Makefile1
-rw-r--r--litmus/affinity.c42
-rw-r--r--litmus/clustered.c2
-rw-r--r--litmus/ftdev.c73
-rw-r--r--litmus/litmus.c18
-rw-r--r--litmus/locking.c2
-rw-r--r--litmus/preempt.c2
-rw-r--r--litmus/rt_domain.c4
-rw-r--r--litmus/sched_cedf.c133
-rw-r--r--litmus/sched_gsn_edf.c52
-rw-r--r--litmus/sched_litmus.c15
-rw-r--r--litmus/sched_pfair.c225
-rw-r--r--litmus/sched_plugin.c23
-rw-r--r--litmus/sched_psn_edf.c41
-rw-r--r--litmus/trace.c109
16 files changed, 562 insertions, 213 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index ad8dc8308cf0..94b48e199577 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -62,6 +62,25 @@ config LITMUS_LOCKING
62 62
63endmenu 63endmenu
64 64
65menu "Performance Enhancements"
66
67config SCHED_CPU_AFFINITY
68 bool "Local Migration Affinity"
69 depends on X86
70 default y
71 help
72 Rescheduled tasks prefer CPUs near to their previously used CPU. This
73 may improve performance through possible preservation of cache affinity.
74
75 Warning: May make bugs harder to find since tasks may migrate less often.
76
77 NOTES:
78 * Feature is not utilized by PFair/PD^2.
79
80 Say Yes if unsure.
81
82endmenu
83
65menu "Tracing" 84menu "Tracing"
66 85
67config FEATHER_TRACE 86config FEATHER_TRACE
@@ -180,6 +199,20 @@ config SCHED_DEBUG_TRACE_CALLER
180 199
181 If unsure, say No. 200 If unsure, say No.
182 201
202config PREEMPT_STATE_TRACE
203 bool "Trace preemption state machine transitions"
204 depends on SCHED_DEBUG_TRACE
205 default n
206 help
207 With this option enabled, each CPU will log when it transitions
208 states in the preemption state machine. This state machine is
209 used to determine how to react to IPIs (avoid races with in-flight IPIs).
210
211 Warning: this creates a lot of information in the debug trace. Only
212 recommended when you are debugging preemption-related races.
213
214 If unsure, say No.
215
183endmenu 216endmenu
184 217
185endmenu 218endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index ad9936e07b83..7338180f196f 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -21,6 +21,7 @@ obj-y = sched_plugin.o litmus.o \
21 21
22obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 22obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
23obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 23obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
24obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
24 25
25obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 26obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
26obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 27obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
diff --git a/litmus/affinity.c b/litmus/affinity.c
new file mode 100644
index 000000000000..3fa6dd789400
--- /dev/null
+++ b/litmus/affinity.c
@@ -0,0 +1,42 @@
1#include <linux/cpu.h>
2
3#include <litmus/affinity.h>
4
5struct neighborhood neigh_info[NR_CPUS];
6
7/* called by _init_litmus() */
8void init_topology(void) {
9 int cpu;
10 int i;
11 int chk;
12 int depth = num_cache_leaves;
13
14 if (depth > NUM_CACHE_LEVELS)
15 depth = NUM_CACHE_LEVELS;
16
17 for_each_online_cpu(cpu) {
18 for (i = 0; i < depth; ++i) {
19 chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i);
20 if (chk) {
21 /* failed */
22 neigh_info[cpu].size[i] = 0;
23 } else {
24 /* size = num bits in mask */
25 neigh_info[cpu].size[i] =
26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
27 }
28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
29 cpu, neigh_info[cpu].size[i], i,
30 *cpumask_bits(neigh_info[cpu].neighbors[i]));
31 }
32
33 /* set data for non-existent levels */
34 for (; i < NUM_CACHE_LEVELS; ++i) {
35 neigh_info[cpu].size[i] = 0;
36
37 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
38 cpu, neigh_info[cpu].size[i], i, 0lu);
39 }
40 }
41}
42
diff --git a/litmus/clustered.c b/litmus/clustered.c
index 04450a8ad4fe..6fe1b512f628 100644
--- a/litmus/clustered.c
+++ b/litmus/clustered.c
@@ -102,7 +102,7 @@ int assign_cpus_to_clusters(enum cache_level level,
102 cpus[i]->cluster = cpus[low_cpu]->cluster; 102 cpus[i]->cluster = cpus[low_cpu]->cluster;
103 } 103 }
104 /* enqueue in cpus list */ 104 /* enqueue in cpus list */
105 list_add(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus); 105 list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
106 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id); 106 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
107 } 107 }
108out: 108out:
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
index 4a4b2e3e56c2..06fcf4cf77dc 100644
--- a/litmus/ftdev.c
+++ b/litmus/ftdev.c
@@ -114,6 +114,7 @@ static int ftdev_open(struct inode *in, struct file *filp)
114 goto out; 114 goto out;
115 115
116 ftdm = ftdev->minor + buf_idx; 116 ftdm = ftdev->minor + buf_idx;
117 ftdm->ftdev = ftdev;
117 filp->private_data = ftdm; 118 filp->private_data = ftdm;
118 119
119 if (mutex_lock_interruptible(&ftdm->lock)) { 120 if (mutex_lock_interruptible(&ftdm->lock)) {
@@ -250,64 +251,61 @@ out:
250 return err; 251 return err;
251} 252}
252 253
253typedef uint32_t cmd_t; 254static long ftdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
254
255static ssize_t ftdev_write(struct file *filp, const char __user *from,
256 size_t len, loff_t *f_pos)
257{ 255{
256 long err = -ENOIOCTLCMD;
258 struct ftdev_minor* ftdm = filp->private_data; 257 struct ftdev_minor* ftdm = filp->private_data;
259 ssize_t err = -EINVAL;
260 cmd_t cmd;
261 cmd_t id;
262
263 if (len % sizeof(cmd) || len < 2 * sizeof(cmd))
264 goto out;
265
266 if (copy_from_user(&cmd, from, sizeof(cmd))) {
267 err = -EFAULT;
268 goto out;
269 }
270 len -= sizeof(cmd);
271 from += sizeof(cmd);
272
273 if (cmd != FTDEV_ENABLE_CMD && cmd != FTDEV_DISABLE_CMD)
274 goto out;
275 258
276 if (mutex_lock_interruptible(&ftdm->lock)) { 259 if (mutex_lock_interruptible(&ftdm->lock)) {
277 err = -ERESTARTSYS; 260 err = -ERESTARTSYS;
278 goto out; 261 goto out;
279 } 262 }
280 263
281 err = sizeof(cmd); 264 /* FIXME: check id against list of acceptable events */
282 while (len) { 265
283 if (copy_from_user(&id, from, sizeof(cmd))) { 266 switch (cmd) {
284 err = -EFAULT; 267 case FTDEV_ENABLE_CMD:
285 goto out_unlock; 268 if (activate(&ftdm->events, arg))
286 }
287 /* FIXME: check id against list of acceptable events */
288 len -= sizeof(cmd);
289 from += sizeof(cmd);
290 if (cmd == FTDEV_DISABLE_CMD)
291 deactivate(&ftdm->events, id);
292 else if (activate(&ftdm->events, id) != 0) {
293 err = -ENOMEM; 269 err = -ENOMEM;
294 goto out_unlock; 270 else
295 } 271 err = 0;
296 err += sizeof(cmd); 272 break;
297 } 273
274 case FTDEV_DISABLE_CMD:
275 deactivate(&ftdm->events, arg);
276 err = 0;
277 break;
278
279 default:
280 printk(KERN_DEBUG "ftdev: strange ioctl (%u, %lu)\n", cmd, arg);
281 };
298 282
299out_unlock:
300 mutex_unlock(&ftdm->lock); 283 mutex_unlock(&ftdm->lock);
301out: 284out:
302 return err; 285 return err;
303} 286}
304 287
288static ssize_t ftdev_write(struct file *filp, const char __user *from,
289 size_t len, loff_t *f_pos)
290{
291 struct ftdev_minor* ftdm = filp->private_data;
292 ssize_t err = -EINVAL;
293 struct ftdev* ftdev = ftdm->ftdev;
294
295 /* dispatch write to buffer-specific code, if available */
296 if (ftdev->write)
297 err = ftdev->write(ftdm->buf, len, from);
298
299 return err;
300}
301
305struct file_operations ftdev_fops = { 302struct file_operations ftdev_fops = {
306 .owner = THIS_MODULE, 303 .owner = THIS_MODULE,
307 .open = ftdev_open, 304 .open = ftdev_open,
308 .release = ftdev_release, 305 .release = ftdev_release,
309 .write = ftdev_write, 306 .write = ftdev_write,
310 .read = ftdev_read, 307 .read = ftdev_read,
308 .unlocked_ioctl = ftdev_ioctl,
311}; 309};
312 310
313int ftdev_init( struct ftdev* ftdev, struct module* owner, 311int ftdev_init( struct ftdev* ftdev, struct module* owner,
@@ -325,6 +323,7 @@ int ftdev_init( struct ftdev* ftdev, struct module* owner,
325 ftdev->alloc = NULL; 323 ftdev->alloc = NULL;
326 ftdev->free = NULL; 324 ftdev->free = NULL;
327 ftdev->can_open = NULL; 325 ftdev->can_open = NULL;
326 ftdev->write = NULL;
328 327
329 ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor), 328 ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor),
330 GFP_KERNEL); 329 GFP_KERNEL);
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 11ccaafd50de..301390148d02 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -17,6 +17,10 @@
17#include <litmus/litmus_proc.h> 17#include <litmus/litmus_proc.h>
18#include <litmus/sched_trace.h> 18#include <litmus/sched_trace.h>
19 19
20#ifdef CONFIG_SCHED_CPU_AFFINITY
21#include <litmus/affinity.h>
22#endif
23
20/* Number of RT tasks that exist in the system */ 24/* Number of RT tasks that exist in the system */
21atomic_t rt_task_count = ATOMIC_INIT(0); 25atomic_t rt_task_count = ATOMIC_INIT(0);
22static DEFINE_RAW_SPINLOCK(task_transition_lock); 26static DEFINE_RAW_SPINLOCK(task_transition_lock);
@@ -110,6 +114,14 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
110 "because wcet > period\n", pid); 114 "because wcet > period\n", pid);
111 goto out_unlock; 115 goto out_unlock;
112 } 116 }
117 if ( tp.cls != RT_CLASS_HARD &&
118 tp.cls != RT_CLASS_SOFT &&
119 tp.cls != RT_CLASS_BEST_EFFORT)
120 {
121 printk(KERN_INFO "litmus: real-time task %d rejected "
122 "because its class is invalid\n", pid);
123 goto out_unlock;
124 }
113 if (tp.budget_policy != NO_ENFORCEMENT && 125 if (tp.budget_policy != NO_ENFORCEMENT &&
114 tp.budget_policy != QUANTUM_ENFORCEMENT && 126 tp.budget_policy != QUANTUM_ENFORCEMENT &&
115 tp.budget_policy != PRECISE_ENFORCEMENT) 127 tp.budget_policy != PRECISE_ENFORCEMENT)
@@ -517,6 +529,8 @@ static int __init _init_litmus(void)
517 */ 529 */
518 printk("Starting LITMUS^RT kernel\n"); 530 printk("Starting LITMUS^RT kernel\n");
519 531
532 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
533
520 register_sched_plugin(&linux_sched_plugin); 534 register_sched_plugin(&linux_sched_plugin);
521 535
522 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); 536 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
@@ -532,6 +546,10 @@ static int __init _init_litmus(void)
532 546
533 init_litmus_proc(); 547 init_litmus_proc();
534 548
549#ifdef CONFIG_SCHED_CPU_AFFINITY
550 init_topology();
551#endif
552
535 return 0; 553 return 0;
536} 554}
537 555
diff --git a/litmus/locking.c b/litmus/locking.c
index cfce98e7480d..b3279c1930b7 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -80,7 +80,7 @@ asmlinkage long sys_litmus_lock(int lock_od)
80 80
81 /* Note: task my have been suspended or preempted in between! Take 81 /* Note: task my have been suspended or preempted in between! Take
82 * this into account when computing overheads. */ 82 * this into account when computing overheads. */
83 TS_UNLOCK_END; 83 TS_LOCK_END;
84 84
85 return err; 85 return err;
86} 86}
diff --git a/litmus/preempt.c b/litmus/preempt.c
index ebe2e3461895..5704d0bf4c0b 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -30,8 +30,10 @@ void sched_state_will_schedule(struct task_struct* tsk)
30 /* Litmus tasks should never be subject to a remote 30 /* Litmus tasks should never be subject to a remote
31 * set_tsk_need_resched(). */ 31 * set_tsk_need_resched(). */
32 BUG_ON(is_realtime(tsk)); 32 BUG_ON(is_realtime(tsk));
33#ifdef CONFIG_PREEMPT_STATE_TRACE
33 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", 34 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
34 __builtin_return_address(0)); 35 __builtin_return_address(0));
36#endif
35} 37}
36 38
37/* Called by the IPI handler after another CPU called smp_send_resched(). */ 39/* Called by the IPI handler after another CPU called smp_send_resched(). */
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index 81a5ac16f164..d405854cd39c 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -55,12 +55,14 @@ static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
55{ 55{
56 unsigned long flags; 56 unsigned long flags;
57 struct release_heap* rh; 57 struct release_heap* rh;
58 rh = container_of(timer, struct release_heap, timer);
59
60 TS_RELEASE_LATENCY(rh->release_time);
58 61
59 VTRACE("on_release_timer(0x%p) starts.\n", timer); 62 VTRACE("on_release_timer(0x%p) starts.\n", timer);
60 63
61 TS_RELEASE_START; 64 TS_RELEASE_START;
62 65
63 rh = container_of(timer, struct release_heap, timer);
64 66
65 raw_spin_lock_irqsave(&rh->dom->release_lock, flags); 67 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
66 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); 68 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 5e977dd2fef0..87f8bc9bb50b 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -44,6 +44,10 @@
44 44
45#include <litmus/bheap.h> 45#include <litmus/bheap.h>
46 46
47#ifdef CONFIG_SCHED_CPU_AFFINITY
48#include <litmus/affinity.h>
49#endif
50
47/* to configure the cluster size */ 51/* to configure the cluster size */
48#include <litmus/litmus_proc.h> 52#include <litmus/litmus_proc.h>
49 53
@@ -95,7 +99,7 @@ typedef struct clusterdomain {
95 struct bheap_node *heap_node; 99 struct bheap_node *heap_node;
96 struct bheap cpu_heap; 100 struct bheap cpu_heap;
97 /* lock for this cluster */ 101 /* lock for this cluster */
98#define cedf_lock domain.ready_lock 102#define cluster_lock domain.ready_lock
99} cedf_domain_t; 103} cedf_domain_t;
100 104
101/* a cedf_domain per cluster; allocation is done at init/activation time */ 105/* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -204,7 +208,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
204} 208}
205 209
206/* unlink - Make sure a task is not linked any longer to an entry 210/* unlink - Make sure a task is not linked any longer to an entry
207 * where it was linked before. Must hold cedf_lock. 211 * where it was linked before. Must hold cluster_lock.
208 */ 212 */
209static noinline void unlink(struct task_struct* t) 213static noinline void unlink(struct task_struct* t)
210{ 214{
@@ -240,7 +244,7 @@ static void preempt(cpu_entry_t *entry)
240} 244}
241 245
242/* requeue - Put an unlinked task into gsn-edf domain. 246/* requeue - Put an unlinked task into gsn-edf domain.
243 * Caller must hold cedf_lock. 247 * Caller must hold cluster_lock.
244 */ 248 */
245static noinline void requeue(struct task_struct* task) 249static noinline void requeue(struct task_struct* task)
246{ 250{
@@ -257,11 +261,34 @@ static noinline void requeue(struct task_struct* task)
257 } 261 }
258} 262}
259 263
264#ifdef CONFIG_SCHED_CPU_AFFINITY
265static cpu_entry_t* cedf_get_nearest_available_cpu(
266 cedf_domain_t *cluster, cpu_entry_t *start)
267{
268 cpu_entry_t *affinity;
269
270 get_nearest_available_cpu(affinity, start, cedf_cpu_entries,
271#ifdef CONFIG_RELEASE_MASTER
272 cluster->domain.release_master
273#else
274 NO_CPU
275#endif
276 );
277
278 /* make sure CPU is in our cluster */
279 if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
280 return(affinity);
281 else
282 return(NULL);
283}
284#endif
285
286
260/* check for any necessary preemptions */ 287/* check for any necessary preemptions */
261static void check_for_preemptions(cedf_domain_t *cluster) 288static void check_for_preemptions(cedf_domain_t *cluster)
262{ 289{
263 struct task_struct *task; 290 struct task_struct *task;
264 cpu_entry_t* last; 291 cpu_entry_t *last;
265 292
266 for(last = lowest_prio_cpu(cluster); 293 for(last = lowest_prio_cpu(cluster);
267 edf_preemption_needed(&cluster->domain, last->linked); 294 edf_preemption_needed(&cluster->domain, last->linked);
@@ -270,8 +297,20 @@ static void check_for_preemptions(cedf_domain_t *cluster)
270 task = __take_ready(&cluster->domain); 297 task = __take_ready(&cluster->domain);
271 TRACE("check_for_preemptions: attempting to link task %d to %d\n", 298 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
272 task->pid, last->cpu); 299 task->pid, last->cpu);
300#ifdef CONFIG_SCHED_CPU_AFFINITY
301 {
302 cpu_entry_t *affinity =
303 cedf_get_nearest_available_cpu(cluster,
304 &per_cpu(cedf_cpu_entries, task_cpu(task)));
305 if(affinity)
306 last = affinity;
307 else if(last->linked)
308 requeue(last->linked);
309 }
310#else
273 if (last->linked) 311 if (last->linked)
274 requeue(last->linked); 312 requeue(last->linked);
313#endif
275 link_task_to_cpu(task, last); 314 link_task_to_cpu(task, last);
276 preempt(last); 315 preempt(last);
277 } 316 }
@@ -292,15 +331,15 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
292 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); 331 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
293 unsigned long flags; 332 unsigned long flags;
294 333
295 raw_spin_lock_irqsave(&cluster->cedf_lock, flags); 334 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
296 335
297 __merge_ready(&cluster->domain, tasks); 336 __merge_ready(&cluster->domain, tasks);
298 check_for_preemptions(cluster); 337 check_for_preemptions(cluster);
299 338
300 raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); 339 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
301} 340}
302 341
303/* caller holds cedf_lock */ 342/* caller holds cluster_lock */
304static noinline void job_completion(struct task_struct *t, int forced) 343static noinline void job_completion(struct task_struct *t, int forced)
305{ 344{
306 BUG_ON(!t); 345 BUG_ON(!t);
@@ -378,7 +417,18 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
378 int out_of_time, sleep, preempt, np, exists, blocks; 417 int out_of_time, sleep, preempt, np, exists, blocks;
379 struct task_struct* next = NULL; 418 struct task_struct* next = NULL;
380 419
381 raw_spin_lock(&cluster->cedf_lock); 420#ifdef CONFIG_RELEASE_MASTER
421 /* Bail out early if we are the release master.
422 * The release master never schedules any real-time tasks.
423 */
424 if (unlikely(cluster->domain.release_master == entry->cpu)) {
425 sched_state_task_picked();
426 return NULL;
427 }
428#endif
429
430 raw_spin_lock(&cluster->cluster_lock);
431
382 clear_will_schedule(); 432 clear_will_schedule();
383 433
384 /* sanity checking */ 434 /* sanity checking */
@@ -462,10 +512,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
462 next = prev; 512 next = prev;
463 513
464 sched_state_task_picked(); 514 sched_state_task_picked();
465 raw_spin_unlock(&cluster->cedf_lock); 515 raw_spin_unlock(&cluster->cluster_lock);
466 516
467#ifdef WANT_ALL_SCHED_EVENTS 517#ifdef WANT_ALL_SCHED_EVENTS
468 TRACE("cedf_lock released, next=0x%p\n", next); 518 TRACE("cluster_lock released, next=0x%p\n", next);
469 519
470 if (next) 520 if (next)
471 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); 521 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
@@ -504,7 +554,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
504 /* the cluster doesn't change even if t is running */ 554 /* the cluster doesn't change even if t is running */
505 cluster = task_cpu_cluster(t); 555 cluster = task_cpu_cluster(t);
506 556
507 raw_spin_lock_irqsave(&cluster->cedf_lock, flags); 557 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
508 558
509 /* setup job params */ 559 /* setup job params */
510 release_at(t, litmus_clock()); 560 release_at(t, litmus_clock());
@@ -513,15 +563,25 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
513 entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); 563 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
514 BUG_ON(entry->scheduled); 564 BUG_ON(entry->scheduled);
515 565
516 entry->scheduled = t; 566#ifdef CONFIG_RELEASE_MASTER
517 tsk_rt(t)->scheduled_on = task_cpu(t); 567 if (entry->cpu != cluster->domain.release_master) {
568#endif
569 entry->scheduled = t;
570 tsk_rt(t)->scheduled_on = task_cpu(t);
571#ifdef CONFIG_RELEASE_MASTER
572 } else {
573 /* do not schedule on release master */
574 preempt(entry); /* force resched */
575 tsk_rt(t)->scheduled_on = NO_CPU;
576 }
577#endif
518 } else { 578 } else {
519 t->rt_param.scheduled_on = NO_CPU; 579 t->rt_param.scheduled_on = NO_CPU;
520 } 580 }
521 t->rt_param.linked_on = NO_CPU; 581 t->rt_param.linked_on = NO_CPU;
522 582
523 cedf_job_arrival(t); 583 cedf_job_arrival(t);
524 raw_spin_unlock_irqrestore(&(cluster->cedf_lock), flags); 584 raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
525} 585}
526 586
527static void cedf_task_wake_up(struct task_struct *task) 587static void cedf_task_wake_up(struct task_struct *task)
@@ -534,7 +594,8 @@ static void cedf_task_wake_up(struct task_struct *task)
534 594
535 cluster = task_cpu_cluster(task); 595 cluster = task_cpu_cluster(task);
536 596
537 raw_spin_lock_irqsave(&cluster->cedf_lock, flags); 597 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
598
538 /* We need to take suspensions because of semaphores into 599 /* We need to take suspensions because of semaphores into
539 * account! If a job resumes after being suspended due to acquiring 600 * account! If a job resumes after being suspended due to acquiring
540 * a semaphore, it should never be treated as a new job release. 601 * a semaphore, it should never be treated as a new job release.
@@ -557,7 +618,8 @@ static void cedf_task_wake_up(struct task_struct *task)
557 } 618 }
558 } 619 }
559 cedf_job_arrival(task); 620 cedf_job_arrival(task);
560 raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); 621
622 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
561} 623}
562 624
563static void cedf_task_block(struct task_struct *t) 625static void cedf_task_block(struct task_struct *t)
@@ -570,9 +632,9 @@ static void cedf_task_block(struct task_struct *t)
570 cluster = task_cpu_cluster(t); 632 cluster = task_cpu_cluster(t);
571 633
572 /* unlink if necessary */ 634 /* unlink if necessary */
573 raw_spin_lock_irqsave(&cluster->cedf_lock, flags); 635 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
574 unlink(t); 636 unlink(t);
575 raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); 637 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
576 638
577 BUG_ON(!is_realtime(t)); 639 BUG_ON(!is_realtime(t));
578} 640}
@@ -584,7 +646,7 @@ static void cedf_task_exit(struct task_struct * t)
584 cedf_domain_t *cluster = task_cpu_cluster(t); 646 cedf_domain_t *cluster = task_cpu_cluster(t);
585 647
586 /* unlink if necessary */ 648 /* unlink if necessary */
587 raw_spin_lock_irqsave(&cluster->cedf_lock, flags); 649 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
588 unlink(t); 650 unlink(t);
589 if (tsk_rt(t)->scheduled_on != NO_CPU) { 651 if (tsk_rt(t)->scheduled_on != NO_CPU) {
590 cpu_entry_t *cpu; 652 cpu_entry_t *cpu;
@@ -592,7 +654,7 @@ static void cedf_task_exit(struct task_struct * t)
592 cpu->scheduled = NULL; 654 cpu->scheduled = NULL;
593 tsk_rt(t)->scheduled_on = NO_CPU; 655 tsk_rt(t)->scheduled_on = NO_CPU;
594 } 656 }
595 raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); 657 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
596 658
597 BUG_ON(!is_realtime(t)); 659 BUG_ON(!is_realtime(t));
598 TRACE_TASK(t, "RIP\n"); 660 TRACE_TASK(t, "RIP\n");
@@ -605,16 +667,6 @@ static long cedf_admit_task(struct task_struct* tsk)
605 667
606 668
607 669
608
609
610
611
612
613
614
615
616
617
618#ifdef CONFIG_LITMUS_LOCKING 670#ifdef CONFIG_LITMUS_LOCKING
619 671
620#include <litmus/fdso.h> 672#include <litmus/fdso.h>
@@ -692,11 +744,11 @@ static void set_priority_inheritance(struct task_struct* t, struct task_struct*
692{ 744{
693 cedf_domain_t* cluster = task_cpu_cluster(t); 745 cedf_domain_t* cluster = task_cpu_cluster(t);
694 746
695 raw_spin_lock(&cluster->cedf_lock); 747 raw_spin_lock(&cluster->cluster_lock);
696 748
697 __set_priority_inheritance(t, prio_inh); 749 __set_priority_inheritance(t, prio_inh);
698 750
699 raw_spin_unlock(&cluster->cedf_lock); 751 raw_spin_unlock(&cluster->cluster_lock);
700} 752}
701 753
702 754
@@ -727,9 +779,9 @@ static void clear_priority_inheritance(struct task_struct* t)
727{ 779{
728 cedf_domain_t* cluster = task_cpu_cluster(t); 780 cedf_domain_t* cluster = task_cpu_cluster(t);
729 781
730 raw_spin_lock(&cluster->cedf_lock); 782 raw_spin_lock(&cluster->cluster_lock);
731 __clear_priority_inheritance(t); 783 __clear_priority_inheritance(t);
732 raw_spin_unlock(&cluster->cedf_lock); 784 raw_spin_unlock(&cluster->cluster_lock);
733} 785}
734 786
735 787
@@ -857,7 +909,7 @@ static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
857 909
858 cluster = task_cpu_cluster(max_hp); 910 cluster = task_cpu_cluster(max_hp);
859 911
860 raw_spin_lock(&cluster->cedf_lock); 912 raw_spin_lock(&cluster->cluster_lock);
861 913
862 if(tsk_rt(my_queue->owner)->inh_task == max_hp) 914 if(tsk_rt(my_queue->owner)->inh_task == max_hp)
863 { 915 {
@@ -867,7 +919,7 @@ static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
867 __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); 919 __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
868 } 920 }
869 } 921 }
870 raw_spin_unlock(&cluster->cedf_lock); 922 raw_spin_unlock(&cluster->cluster_lock);
871 923
872 list_for_each(pos, &my_queue->wait.task_list) 924 list_for_each(pos, &my_queue->wait.task_list)
873 { 925 {
@@ -1270,6 +1322,9 @@ static long cedf_activate_plugin(void)
1270 1322
1271 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) 1323 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
1272 return -ENOMEM; 1324 return -ENOMEM;
1325#ifdef CONFIG_RELEASE_MASTER
1326 cedf[i].domain.release_master = atomic_read(&release_master_cpu);
1327#endif
1273 } 1328 }
1274 1329
1275 /* cycle through cluster and add cpus to them */ 1330 /* cycle through cluster and add cpus to them */
@@ -1312,7 +1367,11 @@ static long cedf_activate_plugin(void)
1312 1367
1313 entry->linked = NULL; 1368 entry->linked = NULL;
1314 entry->scheduled = NULL; 1369 entry->scheduled = NULL;
1315 update_cpu_position(entry); 1370#ifdef CONFIG_RELEASE_MASTER
1371 /* only add CPUs that should schedule jobs */
1372 if (entry->cpu != entry->cluster->domain.release_master)
1373#endif
1374 update_cpu_position(entry);
1316 } 1375 }
1317 /* done with this cluster */ 1376 /* done with this cluster */
1318 break; 1377 break;
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index b87524cf1802..d5bb326ebc9b 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -20,11 +20,16 @@
20#include <litmus/sched_plugin.h> 20#include <litmus/sched_plugin.h>
21#include <litmus/edf_common.h> 21#include <litmus/edf_common.h>
22#include <litmus/sched_trace.h> 22#include <litmus/sched_trace.h>
23#include <litmus/trace.h>
23 24
24#include <litmus/preempt.h> 25#include <litmus/preempt.h>
25 26
26#include <litmus/bheap.h> 27#include <litmus/bheap.h>
27 28
29#ifdef CONFIG_SCHED_CPU_AFFINITY
30#include <litmus/affinity.h>
31#endif
32
28#include <linux/module.h> 33#include <linux/module.h>
29 34
30/* Overview of GSN-EDF operations. 35/* Overview of GSN-EDF operations.
@@ -255,21 +260,52 @@ static noinline void requeue(struct task_struct* task)
255 } 260 }
256} 261}
257 262
263#ifdef CONFIG_SCHED_CPU_AFFINITY
264static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start)
265{
266 cpu_entry_t *affinity;
267
268 get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
269#ifdef CONFIG_RELEASE_MASTER
270 gsnedf.release_master
271#else
272 NO_CPU
273#endif
274 );
275
276 return(affinity);
277}
278#endif
279
258/* check for any necessary preemptions */ 280/* check for any necessary preemptions */
259static void check_for_preemptions(void) 281static void check_for_preemptions(void)
260{ 282{
261 struct task_struct *task; 283 struct task_struct *task;
262 cpu_entry_t* last; 284 cpu_entry_t *last;
263 285
264 for(last = lowest_prio_cpu(); 286 for (last = lowest_prio_cpu();
265 edf_preemption_needed(&gsnedf, last->linked); 287 edf_preemption_needed(&gsnedf, last->linked);
266 last = lowest_prio_cpu()) { 288 last = lowest_prio_cpu()) {
267 /* preemption necessary */ 289 /* preemption necessary */
268 task = __take_ready(&gsnedf); 290 task = __take_ready(&gsnedf);
269 TRACE("check_for_preemptions: attempting to link task %d to %d\n", 291 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
270 task->pid, last->cpu); 292 task->pid, last->cpu);
293
294#ifdef CONFIG_SCHED_CPU_AFFINITY
295 {
296 cpu_entry_t *affinity =
297 gsnedf_get_nearest_available_cpu(
298 &per_cpu(gsnedf_cpu_entries, task_cpu(task)));
299 if (affinity)
300 last = affinity;
301 else if (last->linked)
302 requeue(last->linked);
303 }
304#else
271 if (last->linked) 305 if (last->linked)
272 requeue(last->linked); 306 requeue(last->linked);
307#endif
308
273 link_task_to_cpu(task, last); 309 link_task_to_cpu(task, last);
274 preempt(last); 310 preempt(last);
275 } 311 }
@@ -376,8 +412,10 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
376 /* Bail out early if we are the release master. 412 /* Bail out early if we are the release master.
377 * The release master never schedules any real-time tasks. 413 * The release master never schedules any real-time tasks.
378 */ 414 */
379 if (gsnedf.release_master == entry->cpu) 415 if (unlikely(gsnedf.release_master == entry->cpu)) {
416 sched_state_task_picked();
380 return NULL; 417 return NULL;
418 }
381#endif 419#endif
382 420
383 raw_spin_lock(&gsnedf_lock); 421 raw_spin_lock(&gsnedf_lock);
@@ -783,6 +821,8 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
783 set_priority_inheritance(sem->owner, sem->hp_waiter); 821 set_priority_inheritance(sem->owner, sem->hp_waiter);
784 } 822 }
785 823
824 TS_LOCK_SUSPEND;
825
786 /* release lock before sleeping */ 826 /* release lock before sleeping */
787 spin_unlock_irqrestore(&sem->wait.lock, flags); 827 spin_unlock_irqrestore(&sem->wait.lock, flags);
788 828
@@ -793,6 +833,8 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
793 833
794 schedule(); 834 schedule();
795 835
836 TS_LOCK_RESUME;
837
796 /* Since we hold the lock, no other task will change 838 /* Since we hold the lock, no other task will change
797 * ->owner. We can thus check it without acquiring the spin 839 * ->owner. We can thus check it without acquiring the spin
798 * lock. */ 840 * lock. */
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index 1bca2e1a33cd..9a6fe487718e 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -254,12 +254,12 @@ static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
254 return; 254 return;
255} 255}
256 256
257static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running) 257static void switched_to_litmus(struct rq *rq, struct task_struct *p)
258{ 258{
259} 259}
260 260
261static void prio_changed_litmus(struct rq *rq, struct task_struct *p, 261static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
262 int oldprio, int running) 262 int oldprio)
263{ 263{
264} 264}
265 265
@@ -285,8 +285,8 @@ static void set_curr_task_litmus(struct rq *rq)
285 * We don't care about the scheduling domain; can gets called from 285 * We don't care about the scheduling domain; can gets called from
286 * exec, fork, wakeup. 286 * exec, fork, wakeup.
287 */ 287 */
288static int select_task_rq_litmus(struct rq *rq, struct task_struct *p, 288static int
289 int sd_flag, int flags) 289select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags)
290{ 290{
291 /* preemption is already disabled. 291 /* preemption is already disabled.
292 * We don't want to change cpu here 292 * We don't want to change cpu here
@@ -296,7 +296,12 @@ static int select_task_rq_litmus(struct rq *rq, struct task_struct *p,
296#endif 296#endif
297 297
298static const struct sched_class litmus_sched_class = { 298static const struct sched_class litmus_sched_class = {
299 .next = &rt_sched_class, 299 /* From 34f971f6 the stop/migrate worker threads have a class on
300 * their own, which is the highest prio class. We don't support
301 * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
302 * CPU capacity.
303 */
304 .next = &stop_sched_class,
300 .enqueue_task = enqueue_task_litmus, 305 .enqueue_task = enqueue_task_litmus,
301 .dequeue_task = dequeue_task_litmus, 306 .dequeue_task = dequeue_task_litmus,
302 .yield_task = yield_task_litmus, 307 .yield_task = yield_task_litmus,
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
index 0a64273daa47..16f1065bbdca 100644
--- a/litmus/sched_pfair.c
+++ b/litmus/sched_pfair.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * kernel/sched_pfair.c 2 * kernel/sched_pfair.c
3 * 3 *
4 * Implementation of the (global) Pfair scheduling algorithm. 4 * Implementation of the PD^2 pfair scheduling algorithm. This
5 * implementation realizes "early releasing," i.e., it is work-conserving.
5 * 6 *
6 */ 7 */
7 8
@@ -76,36 +77,29 @@ struct pfair_state {
76 struct task_struct* local; /* the local copy of linked */ 77 struct task_struct* local; /* the local copy of linked */
77 struct task_struct* scheduled; /* what is actually scheduled */ 78 struct task_struct* scheduled; /* what is actually scheduled */
78 79
79 unsigned long missed_quanta;
80 lt_t offset; /* stagger offset */ 80 lt_t offset; /* stagger offset */
81 unsigned int missed_updates;
82 unsigned int missed_quanta;
81}; 83};
82 84
83/* Currently, we limit the maximum period of any task to 2000 quanta.
84 * The reason is that it makes the implementation easier since we do not
85 * need to reallocate the release wheel on task arrivals.
86 * In the future
87 */
88#define PFAIR_MAX_PERIOD 2000
89
90struct pfair_cluster { 85struct pfair_cluster {
91 struct scheduling_cluster topology; 86 struct scheduling_cluster topology;
92 87
93 /* The "global" time in this cluster. */ 88 /* The "global" time in this cluster. */
94 quanta_t pfair_time; /* the "official" PFAIR clock */ 89 quanta_t pfair_time; /* the "official" PFAIR clock */
95 quanta_t merge_time; /* Updated after the release queue has been
96 * merged. Used by drop_all_references().
97 */
98 90
99 /* The ready queue for this cluster. */ 91 /* The ready queue for this cluster. */
100 rt_domain_t pfair; 92 rt_domain_t pfair;
101 93
102 /* This is the release queue wheel for this cluster. It is indexed by 94 /* The set of jobs that should have their release enacted at the next
103 * pfair_time % PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR 95 * quantum boundary.
104 * priority, so that it can be merged with the ready queue.
105 */ 96 */
106 struct bheap release_queue[PFAIR_MAX_PERIOD]; 97 struct bheap release_queue;
98 raw_spinlock_t release_lock;
107}; 99};
108 100
101#define RT_F_REQUEUE 0x2
102
109static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) 103static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)
110{ 104{
111 return container_of(state->topology.cluster, struct pfair_cluster, topology); 105 return container_of(state->topology.cluster, struct pfair_cluster, topology);
@@ -121,6 +115,11 @@ static inline struct pfair_state* from_cluster_list(struct list_head* pos)
121 return list_entry(pos, struct pfair_state, topology.cluster_list); 115 return list_entry(pos, struct pfair_state, topology.cluster_list);
122} 116}
123 117
118static inline struct pfair_cluster* from_domain(rt_domain_t* rt)
119{
120 return container_of(rt, struct pfair_cluster, pfair);
121}
122
124static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) 123static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)
125{ 124{
126 /* The ready_lock is used to serialize all scheduling events. */ 125 /* The ready_lock is used to serialize all scheduling events. */
@@ -161,21 +160,11 @@ static quanta_t cur_deadline(struct task_struct* t)
161 return cur_subtask(t)->deadline + tsk_pfair(t)->release; 160 return cur_subtask(t)->deadline + tsk_pfair(t)->release;
162} 161}
163 162
164
165static quanta_t cur_sub_release(struct task_struct* t)
166{
167 return cur_subtask(t)->release + tsk_pfair(t)->release;
168}
169
170static quanta_t cur_release(struct task_struct* t) 163static quanta_t cur_release(struct task_struct* t)
171{ 164{
172#ifdef EARLY_RELEASE 165 /* This is early releasing: only the release of the first subtask
173 /* only the release of the first subtask counts when we early 166 * counts. */
174 * release */
175 return tsk_pfair(t)->release; 167 return tsk_pfair(t)->release;
176#else
177 return cur_sub_release(t);
178#endif
179} 168}
180 169
181static quanta_t cur_overlap(struct task_struct* t) 170static quanta_t cur_overlap(struct task_struct* t)
@@ -235,11 +224,16 @@ int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
235 return pfair_higher_prio(bheap2task(a), bheap2task(b)); 224 return pfair_higher_prio(bheap2task(a), bheap2task(b));
236} 225}
237 226
238/* return the proper release queue for time t */ 227static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks)
239static struct bheap* relq(struct pfair_cluster* cluster, quanta_t t)
240{ 228{
241 struct bheap* rq = cluster->release_queue + (t % PFAIR_MAX_PERIOD); 229 struct pfair_cluster* cluster = from_domain(rt);
242 return rq; 230 unsigned long flags;
231
232 raw_spin_lock_irqsave(&cluster->release_lock, flags);
233
234 bheap_union(pfair_ready_order, &cluster->release_queue, tasks);
235
236 raw_spin_unlock_irqrestore(&cluster->release_lock, flags);
243} 237}
244 238
245static void prepare_release(struct task_struct* t, quanta_t at) 239static void prepare_release(struct task_struct* t, quanta_t at)
@@ -248,25 +242,12 @@ static void prepare_release(struct task_struct* t, quanta_t at)
248 tsk_pfair(t)->cur = 0; 242 tsk_pfair(t)->cur = 0;
249} 243}
250 244
251static void __pfair_add_release(struct task_struct* t, struct bheap* queue)
252{
253 bheap_insert(pfair_ready_order, queue,
254 tsk_rt(t)->heap_node);
255}
256
257static void pfair_add_release(struct pfair_cluster* cluster,
258 struct task_struct* t)
259{
260 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
261 __pfair_add_release(t, relq(cluster, cur_release(t)));
262}
263
264/* pull released tasks from the release queue */ 245/* pull released tasks from the release queue */
265static void poll_releases(struct pfair_cluster* cluster, 246static void poll_releases(struct pfair_cluster* cluster)
266 quanta_t time)
267{ 247{
268 __merge_ready(&cluster->pfair, relq(cluster, time)); 248 raw_spin_lock(&cluster->release_lock);
269 cluster->merge_time = time; 249 __merge_ready(&cluster->pfair, &cluster->release_queue);
250 raw_spin_unlock(&cluster->release_lock);
270} 251}
271 252
272static void check_preempt(struct task_struct* t) 253static void check_preempt(struct task_struct* t)
@@ -292,16 +273,12 @@ static void drop_all_references(struct task_struct *t)
292{ 273{
293 int cpu; 274 int cpu;
294 struct pfair_state* s; 275 struct pfair_state* s;
295 struct bheap* q;
296 struct pfair_cluster* cluster; 276 struct pfair_cluster* cluster;
297 if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { 277 if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
298 /* figure out what queue the node is in */ 278 /* It must be in the ready queue; drop references isn't called
279 * when the job is in a release queue. */
299 cluster = tsk_pfair(t)->cluster; 280 cluster = tsk_pfair(t)->cluster;
300 if (time_before_eq(cur_release(t), cluster->merge_time)) 281 bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue,
301 q = &cluster->pfair.ready_queue;
302 else
303 q = relq(cluster, cur_release(t));
304 bheap_delete(pfair_ready_order, q,
305 tsk_rt(t)->heap_node); 282 tsk_rt(t)->heap_node);
306 } 283 }
307 for (cpu = 0; cpu < num_online_cpus(); cpu++) { 284 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
@@ -313,6 +290,17 @@ static void drop_all_references(struct task_struct *t)
313 if (s->scheduled == t) 290 if (s->scheduled == t)
314 s->scheduled = NULL; 291 s->scheduled = NULL;
315 } 292 }
293 /* make sure we don't have a stale linked_on field */
294 tsk_rt(t)->linked_on = NO_CPU;
295}
296
297static void pfair_prepare_next_period(struct task_struct* t)
298{
299 struct pfair_param* p = tsk_pfair(t);
300
301 prepare_for_next_period(t);
302 get_rt_flags(t) = RT_F_RUNNING;
303 p->release += p->period;
316} 304}
317 305
318/* returns 1 if the task needs to go the release queue */ 306/* returns 1 if the task needs to go the release queue */
@@ -322,30 +310,26 @@ static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
322 int to_relq; 310 int to_relq;
323 p->cur = (p->cur + 1) % p->quanta; 311 p->cur = (p->cur + 1) % p->quanta;
324 if (!p->cur) { 312 if (!p->cur) {
325 sched_trace_task_completion(t, 1);
326 if (tsk_rt(t)->present) { 313 if (tsk_rt(t)->present) {
327 /* we start a new job */ 314 /* The job overran; we start a new budget allocation. */
328 prepare_for_next_period(t); 315 pfair_prepare_next_period(t);
329 sched_trace_task_release(t);
330 get_rt_flags(t) = RT_F_RUNNING;
331 p->release += p->period;
332 } else { 316 } else {
333 /* remove task from system until it wakes */ 317 /* remove task from system until it wakes */
334 drop_all_references(t); 318 drop_all_references(t);
319 tsk_rt(t)->flags = RT_F_REQUEUE;
335 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n", 320 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
336 cpu, p->cur); 321 cpu, p->cur);
337 return 0; 322 return 0;
338 } 323 }
339 } 324 }
340 to_relq = time_after(cur_release(t), time); 325 to_relq = time_after(cur_release(t), time);
341 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n", 326 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n",
342 cpu, p->cur, to_relq); 327 cpu, p->cur, to_relq, cur_release(t), time);
343 return to_relq; 328 return to_relq;
344} 329}
345 330
346static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time) 331static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
347{ 332{
348 int missed;
349 struct task_struct* l; 333 struct task_struct* l;
350 struct pfair_param* p; 334 struct pfair_param* p;
351 struct list_head* pos; 335 struct list_head* pos;
@@ -354,14 +338,17 @@ static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
354 list_for_each(pos, &cluster->topology.cpus) { 338 list_for_each(pos, &cluster->topology.cpus) {
355 cpu = from_cluster_list(pos); 339 cpu = from_cluster_list(pos);
356 l = cpu->linked; 340 l = cpu->linked;
357 missed = cpu->linked != cpu->local; 341 cpu->missed_updates += cpu->linked != cpu->local;
358 if (l) { 342 if (l) {
359 p = tsk_pfair(l); 343 p = tsk_pfair(l);
360 p->last_quantum = time; 344 p->last_quantum = time;
361 p->last_cpu = cpu_id(cpu); 345 p->last_cpu = cpu_id(cpu);
362 if (advance_subtask(time, l, cpu_id(cpu))) { 346 if (advance_subtask(time, l, cpu_id(cpu))) {
363 cpu->linked = NULL; 347 //cpu->linked = NULL;
364 pfair_add_release(cluster, l); 348 PTRACE_TASK(l, "should go to release queue. "
349 "scheduled_on=%d present=%d\n",
350 tsk_rt(l)->scheduled_on,
351 tsk_rt(l)->present);
365 } 352 }
366 } 353 }
367 } 354 }
@@ -445,6 +432,11 @@ static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time)
445 list_for_each(pos, &cluster->topology.cpus) { 432 list_for_each(pos, &cluster->topology.cpus) {
446 cpu_state = from_cluster_list(pos); 433 cpu_state = from_cluster_list(pos);
447 retry = 1; 434 retry = 1;
435#ifdef CONFIG_RELEASE_MASTER
436 /* skip release master */
437 if (cluster->pfair.release_master == cpu_id(cpu_state))
438 continue;
439#endif
448 while (retry) { 440 while (retry) {
449 if (pfair_higher_prio(__peek_ready(&cluster->pfair), 441 if (pfair_higher_prio(__peek_ready(&cluster->pfair),
450 cpu_state->linked)) 442 cpu_state->linked))
@@ -471,13 +463,13 @@ static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time)
471 sched_trace_quantum_boundary(); 463 sched_trace_quantum_boundary();
472 464
473 advance_subtasks(cluster, time); 465 advance_subtasks(cluster, time);
474 poll_releases(cluster, time); 466 poll_releases(cluster);
475 schedule_subtasks(cluster, time); 467 schedule_subtasks(cluster, time);
476 468
477 list_for_each(pos, &cluster->topology.cpus) { 469 list_for_each(pos, &cluster->topology.cpus) {
478 cpu = from_cluster_list(pos); 470 cpu = from_cluster_list(pos);
479 if (cpu->linked) 471 if (cpu->linked)
480 PTRACE_TASK(pstate[cpu]->linked, 472 PTRACE_TASK(cpu->linked,
481 " linked on %d.\n", cpu_id(cpu)); 473 " linked on %d.\n", cpu_id(cpu));
482 else 474 else
483 PTRACE("(null) linked on %d.\n", cpu_id(cpu)); 475 PTRACE("(null) linked on %d.\n", cpu_id(cpu));
@@ -612,12 +604,42 @@ static int safe_to_schedule(struct task_struct* t, int cpu)
612static struct task_struct* pfair_schedule(struct task_struct * prev) 604static struct task_struct* pfair_schedule(struct task_struct * prev)
613{ 605{
614 struct pfair_state* state = &__get_cpu_var(pfair_state); 606 struct pfair_state* state = &__get_cpu_var(pfair_state);
615 int blocks; 607 struct pfair_cluster* cluster = cpu_cluster(state);
608 int blocks, completion, out_of_time;
616 struct task_struct* next = NULL; 609 struct task_struct* next = NULL;
617 610
611#ifdef CONFIG_RELEASE_MASTER
612 /* Bail out early if we are the release master.
613 * The release master never schedules any real-time tasks.
614 */
615 if (unlikely(cluster->pfair.release_master == cpu_id(state))) {
616 sched_state_task_picked();
617 return NULL;
618 }
619#endif
620
618 raw_spin_lock(cpu_lock(state)); 621 raw_spin_lock(cpu_lock(state));
619 622
620 blocks = is_realtime(prev) && !is_running(prev); 623 blocks = is_realtime(prev) && !is_running(prev);
624 completion = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP;
625 out_of_time = is_realtime(prev) && time_after(cur_release(prev),
626 state->local_tick);
627
628 if (is_realtime(prev))
629 PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n",
630 blocks, completion, out_of_time);
631
632 if (completion) {
633 sched_trace_task_completion(prev, 0);
634 pfair_prepare_next_period(prev);
635 prepare_release(prev, cur_release(prev));
636 }
637
638 if (!blocks && (completion || out_of_time)) {
639 drop_all_references(prev);
640 sched_trace_task_release(prev);
641 add_release(&cluster->pfair, prev);
642 }
621 643
622 if (state->local && safe_to_schedule(state->local, cpu_id(state))) 644 if (state->local && safe_to_schedule(state->local, cpu_id(state)))
623 next = state->local; 645 next = state->local;
@@ -649,13 +671,19 @@ static void pfair_task_new(struct task_struct * t, int on_rq, int running)
649 cluster = tsk_pfair(t)->cluster; 671 cluster = tsk_pfair(t)->cluster;
650 672
651 raw_spin_lock_irqsave(cluster_lock(cluster), flags); 673 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
652 if (running)
653 t->rt_param.scheduled_on = task_cpu(t);
654 else
655 t->rt_param.scheduled_on = NO_CPU;
656 674
657 prepare_release(t, cluster->pfair_time + 1); 675 prepare_release(t, cluster->pfair_time + 1);
658 pfair_add_release(cluster, t); 676
677 t->rt_param.scheduled_on = NO_CPU;
678
679 if (running) {
680#ifdef CONFIG_RELEASE_MASTER
681 if (task_cpu(t) != cluster->pfair.release_master)
682#endif
683 t->rt_param.scheduled_on = task_cpu(t);
684 __add_ready(&cluster->pfair, t);
685 }
686
659 check_preempt(t); 687 check_preempt(t);
660 688
661 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); 689 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
@@ -665,6 +693,7 @@ static void pfair_task_wake_up(struct task_struct *t)
665{ 693{
666 unsigned long flags; 694 unsigned long flags;
667 lt_t now; 695 lt_t now;
696 int requeue = 0;
668 struct pfair_cluster* cluster; 697 struct pfair_cluster* cluster;
669 698
670 cluster = tsk_pfair(t)->cluster; 699 cluster = tsk_pfair(t)->cluster;
@@ -679,13 +708,20 @@ static void pfair_task_wake_up(struct task_struct *t)
679 * (as if it never blocked at all). Otherwise, we have a 708 * (as if it never blocked at all). Otherwise, we have a
680 * new sporadic job release. 709 * new sporadic job release.
681 */ 710 */
711 requeue = tsk_rt(t)->flags == RT_F_REQUEUE;
682 now = litmus_clock(); 712 now = litmus_clock();
683 if (lt_before(get_deadline(t), now)) { 713 if (lt_before(get_deadline(t), now)) {
714 TRACE_TASK(t, "sporadic release!\n");
684 release_at(t, now); 715 release_at(t, now);
685 prepare_release(t, time2quanta(now, CEIL)); 716 prepare_release(t, time2quanta(now, CEIL));
686 sched_trace_task_release(t); 717 sched_trace_task_release(t);
687 /* FIXME: race with pfair_time advancing */ 718 }
688 pfair_add_release(cluster, t); 719
720 /* only add to ready queue if the task isn't still linked somewhere */
721 if (requeue) {
722 TRACE_TASK(t, "requeueing required\n");
723 tsk_rt(t)->flags = RT_F_RUNNING;
724 __add_ready(&cluster->pfair, t);
689 } 725 }
690 726
691 check_preempt(t); 727 check_preempt(t);
@@ -744,15 +780,11 @@ static void pfair_release_at(struct task_struct* task, lt_t start)
744 release_at(task, start); 780 release_at(task, start);
745 release = time2quanta(start, CEIL); 781 release = time2quanta(start, CEIL);
746 782
747 /* FIXME: support arbitrary offsets. */
748 if (release - cluster->pfair_time >= PFAIR_MAX_PERIOD)
749 release = cluster->pfair_time + PFAIR_MAX_PERIOD;
750
751 TRACE_TASK(task, "sys release at %lu\n", release); 783 TRACE_TASK(task, "sys release at %lu\n", release);
752 784
753 drop_all_references(task); 785 drop_all_references(task);
754 prepare_release(task, release); 786 prepare_release(task, release);
755 pfair_add_release(cluster, task); 787 add_release(&cluster->pfair, task);
756 788
757 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); 789 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
758} 790}
@@ -834,13 +866,6 @@ static long pfair_admit_task(struct task_struct* t)
834 "The period of %s/%d is not a multiple of %llu.\n", 866 "The period of %s/%d is not a multiple of %llu.\n",
835 t->comm, t->pid, (unsigned long long) quantum_length); 867 t->comm, t->pid, (unsigned long long) quantum_length);
836 868
837 if (period >= PFAIR_MAX_PERIOD) {
838 printk(KERN_WARNING
839 "PFAIR: Rejecting task %s/%d; its period is too long.\n",
840 t->comm, t->pid);
841 return -EINVAL;
842 }
843
844 if (quanta == period) { 869 if (quanta == period) {
845 /* special case: task has weight 1.0 */ 870 /* special case: task has weight 1.0 */
846 printk(KERN_INFO 871 printk(KERN_INFO
@@ -880,12 +905,9 @@ static long pfair_admit_task(struct task_struct* t)
880 905
881static void pfair_init_cluster(struct pfair_cluster* cluster) 906static void pfair_init_cluster(struct pfair_cluster* cluster)
882{ 907{
883 int i; 908 rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs);
884 909 bheap_init(&cluster->release_queue);
885 /* initialize release queue */ 910 raw_spin_lock_init(&cluster->release_lock);
886 for (i = 0; i < PFAIR_MAX_PERIOD; i++)
887 bheap_init(&cluster->release_queue[i]);
888 rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, NULL);
889 INIT_LIST_HEAD(&cluster->topology.cpus); 911 INIT_LIST_HEAD(&cluster->topology.cpus);
890} 912}
891 913
@@ -899,8 +921,11 @@ static void cleanup_clusters(void)
899 num_pfair_clusters = 0; 921 num_pfair_clusters = 0;
900 922
901 /* avoid stale pointers */ 923 /* avoid stale pointers */
902 for (i = 0; i < NR_CPUS; i++) 924 for (i = 0; i < num_online_cpus(); i++) {
903 pstate[i]->topology.cluster = NULL; 925 pstate[i]->topology.cluster = NULL;
926 printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]),
927 pstate[i]->missed_updates, pstate[i]->missed_quanta);
928 }
904} 929}
905 930
906static long pfair_activate_plugin(void) 931static long pfair_activate_plugin(void)
@@ -936,6 +961,9 @@ static long pfair_activate_plugin(void)
936 pfair_init_cluster(cluster); 961 pfair_init_cluster(cluster);
937 cluster->pfair_time = now; 962 cluster->pfair_time = now;
938 clust[i] = &cluster->topology; 963 clust[i] = &cluster->topology;
964#ifdef CONFIG_RELEASE_MASTER
965 cluster->pfair.release_master = atomic_read(&release_master_cpu);
966#endif
939 } 967 }
940 968
941 for (i = 0; i < num_online_cpus(); i++) { 969 for (i = 0; i < num_online_cpus(); i++) {
@@ -943,6 +971,7 @@ static long pfair_activate_plugin(void)
943 state->cur_tick = now; 971 state->cur_tick = now;
944 state->local_tick = now; 972 state->local_tick = now;
945 state->missed_quanta = 0; 973 state->missed_quanta = 0;
974 state->missed_updates = 0;
946 state->offset = cpu_stagger_offset(i); 975 state->offset = cpu_stagger_offset(i);
947 printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus()); 976 printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus());
948 cpus[i] = &state->topology; 977 cpus[i] = &state->topology;
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index d54886df1f57..00a1900d6457 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -35,29 +35,18 @@ void preempt_if_preemptable(struct task_struct* t, int cpu)
35 /* local CPU case */ 35 /* local CPU case */
36 /* check if we need to poke userspace */ 36 /* check if we need to poke userspace */
37 if (is_user_np(t)) 37 if (is_user_np(t))
38 /* yes, poke it */ 38 /* Yes, poke it. This doesn't have to be atomic since
39 * the task is definitely not executing. */
39 request_exit_np(t); 40 request_exit_np(t);
40 else if (!is_kernel_np(t)) 41 else if (!is_kernel_np(t))
41 /* only if we are allowed to preempt the 42 /* only if we are allowed to preempt the
42 * currently-executing task */ 43 * currently-executing task */
43 reschedule = 1; 44 reschedule = 1;
44 } else { 45 } else {
45 /* remote CPU case */ 46 /* Remote CPU case. Only notify if it's not a kernel
46 if (is_user_np(t)) { 47 * NP section and if we didn't set the userspace
47 /* need to notify user space of delayed 48 * flag. */
48 * preemption */ 49 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
49
50 /* to avoid a race, set the flag, then test
51 * again */
52 request_exit_np(t);
53 /* make sure it got written */
54 mb();
55 }
56 /* Only send an ipi if remote task might have raced our
57 * request, i.e., send an IPI to make sure in case it
58 * exited its critical section.
59 */
60 reschedule = !is_np(t) && !is_kernel_np(t);
61 } 50 }
62 } 51 }
63 if (likely(reschedule)) 52 if (likely(reschedule))
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
index 71c02409efa2..8e4a22dd8d6a 100644
--- a/litmus/sched_psn_edf.c
+++ b/litmus/sched_psn_edf.c
@@ -20,6 +20,7 @@
20#include <litmus/sched_plugin.h> 20#include <litmus/sched_plugin.h>
21#include <litmus/edf_common.h> 21#include <litmus/edf_common.h>
22#include <litmus/sched_trace.h> 22#include <litmus/sched_trace.h>
23#include <litmus/trace.h>
23 24
24typedef struct { 25typedef struct {
25 rt_domain_t domain; 26 rt_domain_t domain;
@@ -383,12 +384,6 @@ static unsigned int psnedf_get_srp_prio(struct task_struct* t)
383 return get_rt_period(t); 384 return get_rt_period(t);
384} 385}
385 386
386static long psnedf_activate_plugin(void)
387{
388 get_srp_prio = psnedf_get_srp_prio;
389 return 0;
390}
391
392/* ******************** FMLP support ********************** */ 387/* ******************** FMLP support ********************** */
393 388
394/* struct for semaphore with priority inheritance */ 389/* struct for semaphore with priority inheritance */
@@ -428,6 +423,8 @@ int psnedf_fmlp_lock(struct litmus_lock* l)
428 423
429 __add_wait_queue_tail_exclusive(&sem->wait, &wait); 424 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
430 425
426 TS_LOCK_SUSPEND;
427
431 /* release lock before sleeping */ 428 /* release lock before sleeping */
432 spin_unlock_irqrestore(&sem->wait.lock, flags); 429 spin_unlock_irqrestore(&sem->wait.lock, flags);
433 430
@@ -438,6 +435,8 @@ int psnedf_fmlp_lock(struct litmus_lock* l)
438 435
439 schedule(); 436 schedule();
440 437
438 TS_LOCK_RESUME;
439
441 /* Since we hold the lock, no other task will change 440 /* Since we hold the lock, no other task will change
442 * ->owner. We can thus check it without acquiring the spin 441 * ->owner. We can thus check it without acquiring the spin
443 * lock. */ 442 * lock. */
@@ -577,9 +576,35 @@ static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
577 576
578#endif 577#endif
579 578
579
580static long psnedf_activate_plugin(void)
581{
582#ifdef CONFIG_RELEASE_MASTER
583 int cpu;
584
585 for_each_online_cpu(cpu) {
586 remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
587 }
588#endif
589
590#ifdef CONFIG_LITMUS_LOCKING
591 get_srp_prio = psnedf_get_srp_prio;
592#endif
593
594 return 0;
595}
596
580static long psnedf_admit_task(struct task_struct* tsk) 597static long psnedf_admit_task(struct task_struct* tsk)
581{ 598{
582 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; 599 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
600#ifdef CONFIG_RELEASE_MASTER
601 /* don't allow tasks on release master CPU */
602 && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
603#endif
604 )
605 return 0;
606 else
607 return -EINVAL;
583} 608}
584 609
585/* Plugin object */ 610/* Plugin object */
@@ -593,9 +618,9 @@ static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
593 .task_wake_up = psnedf_task_wake_up, 618 .task_wake_up = psnedf_task_wake_up,
594 .task_block = psnedf_task_block, 619 .task_block = psnedf_task_block,
595 .admit_task = psnedf_admit_task, 620 .admit_task = psnedf_admit_task,
621 .activate_plugin = psnedf_activate_plugin,
596#ifdef CONFIG_LITMUS_LOCKING 622#ifdef CONFIG_LITMUS_LOCKING
597 .allocate_lock = psnedf_allocate_lock, 623 .allocate_lock = psnedf_allocate_lock,
598 .activate_plugin = psnedf_activate_plugin,
599#endif 624#endif
600}; 625};
601 626
diff --git a/litmus/trace.c b/litmus/trace.c
index e7ea1c2ab3e4..3c35c527e805 100644
--- a/litmus/trace.c
+++ b/litmus/trace.c
@@ -1,5 +1,6 @@
1#include <linux/sched.h> 1#include <linux/sched.h>
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/uaccess.h>
3 4
4#include <litmus/ftdev.h> 5#include <litmus/ftdev.h>
5#include <litmus/litmus.h> 6#include <litmus/litmus.h>
@@ -15,6 +16,35 @@ static struct ftdev overhead_dev;
15 16
16static unsigned int ts_seq_no = 0; 17static unsigned int ts_seq_no = 0;
17 18
19DEFINE_PER_CPU(atomic_t, irq_fired_count);
20
21static inline void clear_irq_fired(void)
22{
23 atomic_set(&__raw_get_cpu_var(irq_fired_count), 0);
24}
25
26static inline unsigned int get_and_clear_irq_fired(void)
27{
28 /* This is potentially not atomic since we might migrate if
29 * preemptions are not disabled. As a tradeoff between
30 * accuracy and tracing overheads, this seems acceptable.
31 * If it proves to be a problem, then one could add a callback
32 * from the migration code to invalidate irq_fired_count.
33 */
34 return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0);
35}
36
37static inline void __save_irq_flags(struct timestamp *ts)
38{
39 unsigned int irq_count;
40
41 irq_count = get_and_clear_irq_fired();
42 /* Store how many interrupts occurred. */
43 ts->irq_count = irq_count;
44 /* Extra flag because ts->irq_count overflows quickly. */
45 ts->irq_flag = irq_count > 0;
46}
47
18static inline void __save_timestamp_cpu(unsigned long event, 48static inline void __save_timestamp_cpu(unsigned long event,
19 uint8_t type, uint8_t cpu) 49 uint8_t type, uint8_t cpu)
20{ 50{
@@ -23,10 +53,26 @@ static inline void __save_timestamp_cpu(unsigned long event,
23 seq_no = fetch_and_inc((int *) &ts_seq_no); 53 seq_no = fetch_and_inc((int *) &ts_seq_no);
24 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { 54 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
25 ts->event = event; 55 ts->event = event;
26 ts->timestamp = ft_timestamp();
27 ts->seq_no = seq_no; 56 ts->seq_no = seq_no;
28 ts->cpu = cpu; 57 ts->cpu = cpu;
29 ts->task_type = type; 58 ts->task_type = type;
59 __save_irq_flags(ts);
60 barrier();
61 /* prevent re-ordering of ft_timestamp() */
62 ts->timestamp = ft_timestamp();
63 ft_buffer_finish_write(trace_ts_buf, ts);
64 }
65}
66
67static void __add_timestamp_user(struct timestamp *pre_recorded)
68{
69 unsigned int seq_no;
70 struct timestamp *ts;
71 seq_no = fetch_and_inc((int *) &ts_seq_no);
72 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
73 *ts = *pre_recorded;
74 ts->seq_no = seq_no;
75 __save_irq_flags(ts);
30 ft_buffer_finish_write(trace_ts_buf, ts); 76 ft_buffer_finish_write(trace_ts_buf, ts);
31 } 77 }
32} 78}
@@ -61,6 +107,27 @@ feather_callback void save_timestamp_cpu(unsigned long event,
61 __save_timestamp_cpu(event, TSK_UNKNOWN, cpu); 107 __save_timestamp_cpu(event, TSK_UNKNOWN, cpu);
62} 108}
63 109
110feather_callback void save_task_latency(unsigned long event,
111 unsigned long when_ptr)
112{
113 lt_t now = litmus_clock();
114 lt_t *when = (lt_t*) when_ptr;
115 unsigned int seq_no;
116 int cpu = raw_smp_processor_id();
117 struct timestamp *ts;
118
119 seq_no = fetch_and_inc((int *) &ts_seq_no);
120 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
121 ts->event = event;
122 ts->timestamp = now - *when;
123 ts->seq_no = seq_no;
124 ts->cpu = cpu;
125 ts->task_type = TSK_RT;
126 __save_irq_flags(ts);
127 ft_buffer_finish_write(trace_ts_buf, ts);
128 }
129}
130
64/******************************************************************************/ 131/******************************************************************************/
65/* DEVICE FILE DRIVER */ 132/* DEVICE FILE DRIVER */
66/******************************************************************************/ 133/******************************************************************************/
@@ -69,11 +136,15 @@ feather_callback void save_timestamp_cpu(unsigned long event,
69 * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER) 136 * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER)
70 * and we might not get as much 137 * and we might not get as much
71 */ 138 */
72#define NO_TIMESTAMPS (2 << 11) 139#define NO_TIMESTAMPS (2 << 16)
73 140
74static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) 141static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
75{ 142{
76 unsigned int count = NO_TIMESTAMPS; 143 unsigned int count = NO_TIMESTAMPS;
144
145 /* An overhead-tracing timestamp should be exactly 16 bytes long. */
146 BUILD_BUG_ON(sizeof(struct timestamp) != 16);
147
77 while (count && !trace_ts_buf) { 148 while (count && !trace_ts_buf) {
78 printk("time stamp buffer: trying to allocate %u time stamps.\n", count); 149 printk("time stamp buffer: trying to allocate %u time stamps.\n", count);
79 ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp)); 150 ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
@@ -88,9 +159,35 @@ static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
88 ftdev->minor[idx].buf = NULL; 159 ftdev->minor[idx].buf = NULL;
89} 160}
90 161
162static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len,
163 const char __user *from)
164{
165 ssize_t consumed = 0;
166 struct timestamp ts;
167
168 /* don't give us partial timestamps */
169 if (len % sizeof(ts))
170 return -EINVAL;
171
172 while (len >= sizeof(ts)) {
173 if (copy_from_user(&ts, from, sizeof(ts))) {
174 consumed = -EFAULT;
175 goto out;
176 }
177 len -= sizeof(ts);
178 from += sizeof(ts);
179 consumed += sizeof(ts);
180
181 __add_timestamp_user(&ts);
182 }
183
184out:
185 return consumed;
186}
187
91static int __init init_ft_overhead_trace(void) 188static int __init init_ft_overhead_trace(void)
92{ 189{
93 int err; 190 int err, cpu;
94 191
95 printk("Initializing Feather-Trace overhead tracing device.\n"); 192 printk("Initializing Feather-Trace overhead tracing device.\n");
96 err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace"); 193 err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace");
@@ -99,11 +196,17 @@ static int __init init_ft_overhead_trace(void)
99 196
100 overhead_dev.alloc = alloc_timestamp_buffer; 197 overhead_dev.alloc = alloc_timestamp_buffer;
101 overhead_dev.free = free_timestamp_buffer; 198 overhead_dev.free = free_timestamp_buffer;
199 overhead_dev.write = write_timestamp_from_user;
102 200
103 err = register_ftdev(&overhead_dev); 201 err = register_ftdev(&overhead_dev);
104 if (err) 202 if (err)
105 goto err_dealloc; 203 goto err_dealloc;
106 204
205 /* initialize IRQ flags */
206 for (cpu = 0; cpu < NR_CPUS; cpu++) {
207 clear_irq_fired();
208 }
209
107 return 0; 210 return 0;
108 211
109err_dealloc: 212err_dealloc: