diff options
Diffstat (limited to 'litmus')
-rw-r--r-- | litmus/Kconfig | 33 | ||||
-rw-r--r-- | litmus/Makefile | 1 | ||||
-rw-r--r-- | litmus/affinity.c | 42 | ||||
-rw-r--r-- | litmus/clustered.c | 2 | ||||
-rw-r--r-- | litmus/ftdev.c | 73 | ||||
-rw-r--r-- | litmus/litmus.c | 18 | ||||
-rw-r--r-- | litmus/locking.c | 2 | ||||
-rw-r--r-- | litmus/preempt.c | 2 | ||||
-rw-r--r-- | litmus/rt_domain.c | 4 | ||||
-rw-r--r-- | litmus/sched_cedf.c | 133 | ||||
-rw-r--r-- | litmus/sched_gsn_edf.c | 52 | ||||
-rw-r--r-- | litmus/sched_litmus.c | 15 | ||||
-rw-r--r-- | litmus/sched_pfair.c | 225 | ||||
-rw-r--r-- | litmus/sched_plugin.c | 23 | ||||
-rw-r--r-- | litmus/sched_psn_edf.c | 41 | ||||
-rw-r--r-- | litmus/trace.c | 109 |
16 files changed, 562 insertions, 213 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig index ad8dc8308cf0..94b48e199577 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
@@ -62,6 +62,25 @@ config LITMUS_LOCKING | |||
62 | 62 | ||
63 | endmenu | 63 | endmenu |
64 | 64 | ||
65 | menu "Performance Enhancements" | ||
66 | |||
67 | config SCHED_CPU_AFFINITY | ||
68 | bool "Local Migration Affinity" | ||
69 | depends on X86 | ||
70 | default y | ||
71 | help | ||
72 | Rescheduled tasks prefer CPUs near to their previously used CPU. This | ||
73 | may improve performance through possible preservation of cache affinity. | ||
74 | |||
75 | Warning: May make bugs harder to find since tasks may migrate less often. | ||
76 | |||
77 | NOTES: | ||
78 | * Feature is not utilized by PFair/PD^2. | ||
79 | |||
80 | Say Yes if unsure. | ||
81 | |||
82 | endmenu | ||
83 | |||
65 | menu "Tracing" | 84 | menu "Tracing" |
66 | 85 | ||
67 | config FEATHER_TRACE | 86 | config FEATHER_TRACE |
@@ -180,6 +199,20 @@ config SCHED_DEBUG_TRACE_CALLER | |||
180 | 199 | ||
181 | If unsure, say No. | 200 | If unsure, say No. |
182 | 201 | ||
202 | config PREEMPT_STATE_TRACE | ||
203 | bool "Trace preemption state machine transitions" | ||
204 | depends on SCHED_DEBUG_TRACE | ||
205 | default n | ||
206 | help | ||
207 | With this option enabled, each CPU will log when it transitions | ||
208 | states in the preemption state machine. This state machine is | ||
209 | used to determine how to react to IPIs (avoid races with in-flight IPIs). | ||
210 | |||
211 | Warning: this creates a lot of information in the debug trace. Only | ||
212 | recommended when you are debugging preemption-related races. | ||
213 | |||
214 | If unsure, say No. | ||
215 | |||
183 | endmenu | 216 | endmenu |
184 | 217 | ||
185 | endmenu | 218 | endmenu |
diff --git a/litmus/Makefile b/litmus/Makefile index ad9936e07b83..7338180f196f 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -21,6 +21,7 @@ obj-y = sched_plugin.o litmus.o \ | |||
21 | 21 | ||
22 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o | 22 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o |
23 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o | 23 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o |
24 | obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o | ||
24 | 25 | ||
25 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o | 26 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o |
26 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o | 27 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o |
diff --git a/litmus/affinity.c b/litmus/affinity.c new file mode 100644 index 000000000000..3fa6dd789400 --- /dev/null +++ b/litmus/affinity.c | |||
@@ -0,0 +1,42 @@ | |||
1 | #include <linux/cpu.h> | ||
2 | |||
3 | #include <litmus/affinity.h> | ||
4 | |||
5 | struct neighborhood neigh_info[NR_CPUS]; | ||
6 | |||
7 | /* called by _init_litmus() */ | ||
8 | void init_topology(void) { | ||
9 | int cpu; | ||
10 | int i; | ||
11 | int chk; | ||
12 | int depth = num_cache_leaves; | ||
13 | |||
14 | if (depth > NUM_CACHE_LEVELS) | ||
15 | depth = NUM_CACHE_LEVELS; | ||
16 | |||
17 | for_each_online_cpu(cpu) { | ||
18 | for (i = 0; i < depth; ++i) { | ||
19 | chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i); | ||
20 | if (chk) { | ||
21 | /* failed */ | ||
22 | neigh_info[cpu].size[i] = 0; | ||
23 | } else { | ||
24 | /* size = num bits in mask */ | ||
25 | neigh_info[cpu].size[i] = | ||
26 | cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); | ||
27 | } | ||
28 | printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", | ||
29 | cpu, neigh_info[cpu].size[i], i, | ||
30 | *cpumask_bits(neigh_info[cpu].neighbors[i])); | ||
31 | } | ||
32 | |||
33 | /* set data for non-existent levels */ | ||
34 | for (; i < NUM_CACHE_LEVELS; ++i) { | ||
35 | neigh_info[cpu].size[i] = 0; | ||
36 | |||
37 | printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", | ||
38 | cpu, neigh_info[cpu].size[i], i, 0lu); | ||
39 | } | ||
40 | } | ||
41 | } | ||
42 | |||
diff --git a/litmus/clustered.c b/litmus/clustered.c index 04450a8ad4fe..6fe1b512f628 100644 --- a/litmus/clustered.c +++ b/litmus/clustered.c | |||
@@ -102,7 +102,7 @@ int assign_cpus_to_clusters(enum cache_level level, | |||
102 | cpus[i]->cluster = cpus[low_cpu]->cluster; | 102 | cpus[i]->cluster = cpus[low_cpu]->cluster; |
103 | } | 103 | } |
104 | /* enqueue in cpus list */ | 104 | /* enqueue in cpus list */ |
105 | list_add(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus); | 105 | list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus); |
106 | printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id); | 106 | printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id); |
107 | } | 107 | } |
108 | out: | 108 | out: |
diff --git a/litmus/ftdev.c b/litmus/ftdev.c index 4a4b2e3e56c2..06fcf4cf77dc 100644 --- a/litmus/ftdev.c +++ b/litmus/ftdev.c | |||
@@ -114,6 +114,7 @@ static int ftdev_open(struct inode *in, struct file *filp) | |||
114 | goto out; | 114 | goto out; |
115 | 115 | ||
116 | ftdm = ftdev->minor + buf_idx; | 116 | ftdm = ftdev->minor + buf_idx; |
117 | ftdm->ftdev = ftdev; | ||
117 | filp->private_data = ftdm; | 118 | filp->private_data = ftdm; |
118 | 119 | ||
119 | if (mutex_lock_interruptible(&ftdm->lock)) { | 120 | if (mutex_lock_interruptible(&ftdm->lock)) { |
@@ -250,64 +251,61 @@ out: | |||
250 | return err; | 251 | return err; |
251 | } | 252 | } |
252 | 253 | ||
253 | typedef uint32_t cmd_t; | 254 | static long ftdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
254 | |||
255 | static ssize_t ftdev_write(struct file *filp, const char __user *from, | ||
256 | size_t len, loff_t *f_pos) | ||
257 | { | 255 | { |
256 | long err = -ENOIOCTLCMD; | ||
258 | struct ftdev_minor* ftdm = filp->private_data; | 257 | struct ftdev_minor* ftdm = filp->private_data; |
259 | ssize_t err = -EINVAL; | ||
260 | cmd_t cmd; | ||
261 | cmd_t id; | ||
262 | |||
263 | if (len % sizeof(cmd) || len < 2 * sizeof(cmd)) | ||
264 | goto out; | ||
265 | |||
266 | if (copy_from_user(&cmd, from, sizeof(cmd))) { | ||
267 | err = -EFAULT; | ||
268 | goto out; | ||
269 | } | ||
270 | len -= sizeof(cmd); | ||
271 | from += sizeof(cmd); | ||
272 | |||
273 | if (cmd != FTDEV_ENABLE_CMD && cmd != FTDEV_DISABLE_CMD) | ||
274 | goto out; | ||
275 | 258 | ||
276 | if (mutex_lock_interruptible(&ftdm->lock)) { | 259 | if (mutex_lock_interruptible(&ftdm->lock)) { |
277 | err = -ERESTARTSYS; | 260 | err = -ERESTARTSYS; |
278 | goto out; | 261 | goto out; |
279 | } | 262 | } |
280 | 263 | ||
281 | err = sizeof(cmd); | 264 | /* FIXME: check id against list of acceptable events */ |
282 | while (len) { | 265 | |
283 | if (copy_from_user(&id, from, sizeof(cmd))) { | 266 | switch (cmd) { |
284 | err = -EFAULT; | 267 | case FTDEV_ENABLE_CMD: |
285 | goto out_unlock; | 268 | if (activate(&ftdm->events, arg)) |
286 | } | ||
287 | /* FIXME: check id against list of acceptable events */ | ||
288 | len -= sizeof(cmd); | ||
289 | from += sizeof(cmd); | ||
290 | if (cmd == FTDEV_DISABLE_CMD) | ||
291 | deactivate(&ftdm->events, id); | ||
292 | else if (activate(&ftdm->events, id) != 0) { | ||
293 | err = -ENOMEM; | 269 | err = -ENOMEM; |
294 | goto out_unlock; | 270 | else |
295 | } | 271 | err = 0; |
296 | err += sizeof(cmd); | 272 | break; |
297 | } | 273 | |
274 | case FTDEV_DISABLE_CMD: | ||
275 | deactivate(&ftdm->events, arg); | ||
276 | err = 0; | ||
277 | break; | ||
278 | |||
279 | default: | ||
280 | printk(KERN_DEBUG "ftdev: strange ioctl (%u, %lu)\n", cmd, arg); | ||
281 | }; | ||
298 | 282 | ||
299 | out_unlock: | ||
300 | mutex_unlock(&ftdm->lock); | 283 | mutex_unlock(&ftdm->lock); |
301 | out: | 284 | out: |
302 | return err; | 285 | return err; |
303 | } | 286 | } |
304 | 287 | ||
288 | static ssize_t ftdev_write(struct file *filp, const char __user *from, | ||
289 | size_t len, loff_t *f_pos) | ||
290 | { | ||
291 | struct ftdev_minor* ftdm = filp->private_data; | ||
292 | ssize_t err = -EINVAL; | ||
293 | struct ftdev* ftdev = ftdm->ftdev; | ||
294 | |||
295 | /* dispatch write to buffer-specific code, if available */ | ||
296 | if (ftdev->write) | ||
297 | err = ftdev->write(ftdm->buf, len, from); | ||
298 | |||
299 | return err; | ||
300 | } | ||
301 | |||
305 | struct file_operations ftdev_fops = { | 302 | struct file_operations ftdev_fops = { |
306 | .owner = THIS_MODULE, | 303 | .owner = THIS_MODULE, |
307 | .open = ftdev_open, | 304 | .open = ftdev_open, |
308 | .release = ftdev_release, | 305 | .release = ftdev_release, |
309 | .write = ftdev_write, | 306 | .write = ftdev_write, |
310 | .read = ftdev_read, | 307 | .read = ftdev_read, |
308 | .unlocked_ioctl = ftdev_ioctl, | ||
311 | }; | 309 | }; |
312 | 310 | ||
313 | int ftdev_init( struct ftdev* ftdev, struct module* owner, | 311 | int ftdev_init( struct ftdev* ftdev, struct module* owner, |
@@ -325,6 +323,7 @@ int ftdev_init( struct ftdev* ftdev, struct module* owner, | |||
325 | ftdev->alloc = NULL; | 323 | ftdev->alloc = NULL; |
326 | ftdev->free = NULL; | 324 | ftdev->free = NULL; |
327 | ftdev->can_open = NULL; | 325 | ftdev->can_open = NULL; |
326 | ftdev->write = NULL; | ||
328 | 327 | ||
329 | ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor), | 328 | ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor), |
330 | GFP_KERNEL); | 329 | GFP_KERNEL); |
diff --git a/litmus/litmus.c b/litmus/litmus.c index 11ccaafd50de..301390148d02 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c | |||
@@ -17,6 +17,10 @@ | |||
17 | #include <litmus/litmus_proc.h> | 17 | #include <litmus/litmus_proc.h> |
18 | #include <litmus/sched_trace.h> | 18 | #include <litmus/sched_trace.h> |
19 | 19 | ||
20 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
21 | #include <litmus/affinity.h> | ||
22 | #endif | ||
23 | |||
20 | /* Number of RT tasks that exist in the system */ | 24 | /* Number of RT tasks that exist in the system */ |
21 | atomic_t rt_task_count = ATOMIC_INIT(0); | 25 | atomic_t rt_task_count = ATOMIC_INIT(0); |
22 | static DEFINE_RAW_SPINLOCK(task_transition_lock); | 26 | static DEFINE_RAW_SPINLOCK(task_transition_lock); |
@@ -110,6 +114,14 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) | |||
110 | "because wcet > period\n", pid); | 114 | "because wcet > period\n", pid); |
111 | goto out_unlock; | 115 | goto out_unlock; |
112 | } | 116 | } |
117 | if ( tp.cls != RT_CLASS_HARD && | ||
118 | tp.cls != RT_CLASS_SOFT && | ||
119 | tp.cls != RT_CLASS_BEST_EFFORT) | ||
120 | { | ||
121 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
122 | "because its class is invalid\n", pid); | ||
123 | goto out_unlock; | ||
124 | } | ||
113 | if (tp.budget_policy != NO_ENFORCEMENT && | 125 | if (tp.budget_policy != NO_ENFORCEMENT && |
114 | tp.budget_policy != QUANTUM_ENFORCEMENT && | 126 | tp.budget_policy != QUANTUM_ENFORCEMENT && |
115 | tp.budget_policy != PRECISE_ENFORCEMENT) | 127 | tp.budget_policy != PRECISE_ENFORCEMENT) |
@@ -517,6 +529,8 @@ static int __init _init_litmus(void) | |||
517 | */ | 529 | */ |
518 | printk("Starting LITMUS^RT kernel\n"); | 530 | printk("Starting LITMUS^RT kernel\n"); |
519 | 531 | ||
532 | BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t)); | ||
533 | |||
520 | register_sched_plugin(&linux_sched_plugin); | 534 | register_sched_plugin(&linux_sched_plugin); |
521 | 535 | ||
522 | bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); | 536 | bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); |
@@ -532,6 +546,10 @@ static int __init _init_litmus(void) | |||
532 | 546 | ||
533 | init_litmus_proc(); | 547 | init_litmus_proc(); |
534 | 548 | ||
549 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
550 | init_topology(); | ||
551 | #endif | ||
552 | |||
535 | return 0; | 553 | return 0; |
536 | } | 554 | } |
537 | 555 | ||
diff --git a/litmus/locking.c b/litmus/locking.c index cfce98e7480d..b3279c1930b7 100644 --- a/litmus/locking.c +++ b/litmus/locking.c | |||
@@ -80,7 +80,7 @@ asmlinkage long sys_litmus_lock(int lock_od) | |||
80 | 80 | ||
81 | /* Note: task my have been suspended or preempted in between! Take | 81 | /* Note: task my have been suspended or preempted in between! Take |
82 | * this into account when computing overheads. */ | 82 | * this into account when computing overheads. */ |
83 | TS_UNLOCK_END; | 83 | TS_LOCK_END; |
84 | 84 | ||
85 | return err; | 85 | return err; |
86 | } | 86 | } |
diff --git a/litmus/preempt.c b/litmus/preempt.c index ebe2e3461895..5704d0bf4c0b 100644 --- a/litmus/preempt.c +++ b/litmus/preempt.c | |||
@@ -30,8 +30,10 @@ void sched_state_will_schedule(struct task_struct* tsk) | |||
30 | /* Litmus tasks should never be subject to a remote | 30 | /* Litmus tasks should never be subject to a remote |
31 | * set_tsk_need_resched(). */ | 31 | * set_tsk_need_resched(). */ |
32 | BUG_ON(is_realtime(tsk)); | 32 | BUG_ON(is_realtime(tsk)); |
33 | #ifdef CONFIG_PREEMPT_STATE_TRACE | ||
33 | TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", | 34 | TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", |
34 | __builtin_return_address(0)); | 35 | __builtin_return_address(0)); |
36 | #endif | ||
35 | } | 37 | } |
36 | 38 | ||
37 | /* Called by the IPI handler after another CPU called smp_send_resched(). */ | 39 | /* Called by the IPI handler after another CPU called smp_send_resched(). */ |
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c index 81a5ac16f164..d405854cd39c 100644 --- a/litmus/rt_domain.c +++ b/litmus/rt_domain.c | |||
@@ -55,12 +55,14 @@ static enum hrtimer_restart on_release_timer(struct hrtimer *timer) | |||
55 | { | 55 | { |
56 | unsigned long flags; | 56 | unsigned long flags; |
57 | struct release_heap* rh; | 57 | struct release_heap* rh; |
58 | rh = container_of(timer, struct release_heap, timer); | ||
59 | |||
60 | TS_RELEASE_LATENCY(rh->release_time); | ||
58 | 61 | ||
59 | VTRACE("on_release_timer(0x%p) starts.\n", timer); | 62 | VTRACE("on_release_timer(0x%p) starts.\n", timer); |
60 | 63 | ||
61 | TS_RELEASE_START; | 64 | TS_RELEASE_START; |
62 | 65 | ||
63 | rh = container_of(timer, struct release_heap, timer); | ||
64 | 66 | ||
65 | raw_spin_lock_irqsave(&rh->dom->release_lock, flags); | 67 | raw_spin_lock_irqsave(&rh->dom->release_lock, flags); |
66 | VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); | 68 | VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); |
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 5e977dd2fef0..87f8bc9bb50b 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c | |||
@@ -44,6 +44,10 @@ | |||
44 | 44 | ||
45 | #include <litmus/bheap.h> | 45 | #include <litmus/bheap.h> |
46 | 46 | ||
47 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
48 | #include <litmus/affinity.h> | ||
49 | #endif | ||
50 | |||
47 | /* to configure the cluster size */ | 51 | /* to configure the cluster size */ |
48 | #include <litmus/litmus_proc.h> | 52 | #include <litmus/litmus_proc.h> |
49 | 53 | ||
@@ -95,7 +99,7 @@ typedef struct clusterdomain { | |||
95 | struct bheap_node *heap_node; | 99 | struct bheap_node *heap_node; |
96 | struct bheap cpu_heap; | 100 | struct bheap cpu_heap; |
97 | /* lock for this cluster */ | 101 | /* lock for this cluster */ |
98 | #define cedf_lock domain.ready_lock | 102 | #define cluster_lock domain.ready_lock |
99 | } cedf_domain_t; | 103 | } cedf_domain_t; |
100 | 104 | ||
101 | /* a cedf_domain per cluster; allocation is done at init/activation time */ | 105 | /* a cedf_domain per cluster; allocation is done at init/activation time */ |
@@ -204,7 +208,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked, | |||
204 | } | 208 | } |
205 | 209 | ||
206 | /* unlink - Make sure a task is not linked any longer to an entry | 210 | /* unlink - Make sure a task is not linked any longer to an entry |
207 | * where it was linked before. Must hold cedf_lock. | 211 | * where it was linked before. Must hold cluster_lock. |
208 | */ | 212 | */ |
209 | static noinline void unlink(struct task_struct* t) | 213 | static noinline void unlink(struct task_struct* t) |
210 | { | 214 | { |
@@ -240,7 +244,7 @@ static void preempt(cpu_entry_t *entry) | |||
240 | } | 244 | } |
241 | 245 | ||
242 | /* requeue - Put an unlinked task into gsn-edf domain. | 246 | /* requeue - Put an unlinked task into gsn-edf domain. |
243 | * Caller must hold cedf_lock. | 247 | * Caller must hold cluster_lock. |
244 | */ | 248 | */ |
245 | static noinline void requeue(struct task_struct* task) | 249 | static noinline void requeue(struct task_struct* task) |
246 | { | 250 | { |
@@ -257,11 +261,34 @@ static noinline void requeue(struct task_struct* task) | |||
257 | } | 261 | } |
258 | } | 262 | } |
259 | 263 | ||
264 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
265 | static cpu_entry_t* cedf_get_nearest_available_cpu( | ||
266 | cedf_domain_t *cluster, cpu_entry_t *start) | ||
267 | { | ||
268 | cpu_entry_t *affinity; | ||
269 | |||
270 | get_nearest_available_cpu(affinity, start, cedf_cpu_entries, | ||
271 | #ifdef CONFIG_RELEASE_MASTER | ||
272 | cluster->domain.release_master | ||
273 | #else | ||
274 | NO_CPU | ||
275 | #endif | ||
276 | ); | ||
277 | |||
278 | /* make sure CPU is in our cluster */ | ||
279 | if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) | ||
280 | return(affinity); | ||
281 | else | ||
282 | return(NULL); | ||
283 | } | ||
284 | #endif | ||
285 | |||
286 | |||
260 | /* check for any necessary preemptions */ | 287 | /* check for any necessary preemptions */ |
261 | static void check_for_preemptions(cedf_domain_t *cluster) | 288 | static void check_for_preemptions(cedf_domain_t *cluster) |
262 | { | 289 | { |
263 | struct task_struct *task; | 290 | struct task_struct *task; |
264 | cpu_entry_t* last; | 291 | cpu_entry_t *last; |
265 | 292 | ||
266 | for(last = lowest_prio_cpu(cluster); | 293 | for(last = lowest_prio_cpu(cluster); |
267 | edf_preemption_needed(&cluster->domain, last->linked); | 294 | edf_preemption_needed(&cluster->domain, last->linked); |
@@ -270,8 +297,20 @@ static void check_for_preemptions(cedf_domain_t *cluster) | |||
270 | task = __take_ready(&cluster->domain); | 297 | task = __take_ready(&cluster->domain); |
271 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | 298 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", |
272 | task->pid, last->cpu); | 299 | task->pid, last->cpu); |
300 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
301 | { | ||
302 | cpu_entry_t *affinity = | ||
303 | cedf_get_nearest_available_cpu(cluster, | ||
304 | &per_cpu(cedf_cpu_entries, task_cpu(task))); | ||
305 | if(affinity) | ||
306 | last = affinity; | ||
307 | else if(last->linked) | ||
308 | requeue(last->linked); | ||
309 | } | ||
310 | #else | ||
273 | if (last->linked) | 311 | if (last->linked) |
274 | requeue(last->linked); | 312 | requeue(last->linked); |
313 | #endif | ||
275 | link_task_to_cpu(task, last); | 314 | link_task_to_cpu(task, last); |
276 | preempt(last); | 315 | preempt(last); |
277 | } | 316 | } |
@@ -292,15 +331,15 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | |||
292 | cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); | 331 | cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); |
293 | unsigned long flags; | 332 | unsigned long flags; |
294 | 333 | ||
295 | raw_spin_lock_irqsave(&cluster->cedf_lock, flags); | 334 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); |
296 | 335 | ||
297 | __merge_ready(&cluster->domain, tasks); | 336 | __merge_ready(&cluster->domain, tasks); |
298 | check_for_preemptions(cluster); | 337 | check_for_preemptions(cluster); |
299 | 338 | ||
300 | raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); | 339 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); |
301 | } | 340 | } |
302 | 341 | ||
303 | /* caller holds cedf_lock */ | 342 | /* caller holds cluster_lock */ |
304 | static noinline void job_completion(struct task_struct *t, int forced) | 343 | static noinline void job_completion(struct task_struct *t, int forced) |
305 | { | 344 | { |
306 | BUG_ON(!t); | 345 | BUG_ON(!t); |
@@ -378,7 +417,18 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
378 | int out_of_time, sleep, preempt, np, exists, blocks; | 417 | int out_of_time, sleep, preempt, np, exists, blocks; |
379 | struct task_struct* next = NULL; | 418 | struct task_struct* next = NULL; |
380 | 419 | ||
381 | raw_spin_lock(&cluster->cedf_lock); | 420 | #ifdef CONFIG_RELEASE_MASTER |
421 | /* Bail out early if we are the release master. | ||
422 | * The release master never schedules any real-time tasks. | ||
423 | */ | ||
424 | if (unlikely(cluster->domain.release_master == entry->cpu)) { | ||
425 | sched_state_task_picked(); | ||
426 | return NULL; | ||
427 | } | ||
428 | #endif | ||
429 | |||
430 | raw_spin_lock(&cluster->cluster_lock); | ||
431 | |||
382 | clear_will_schedule(); | 432 | clear_will_schedule(); |
383 | 433 | ||
384 | /* sanity checking */ | 434 | /* sanity checking */ |
@@ -462,10 +512,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
462 | next = prev; | 512 | next = prev; |
463 | 513 | ||
464 | sched_state_task_picked(); | 514 | sched_state_task_picked(); |
465 | raw_spin_unlock(&cluster->cedf_lock); | 515 | raw_spin_unlock(&cluster->cluster_lock); |
466 | 516 | ||
467 | #ifdef WANT_ALL_SCHED_EVENTS | 517 | #ifdef WANT_ALL_SCHED_EVENTS |
468 | TRACE("cedf_lock released, next=0x%p\n", next); | 518 | TRACE("cluster_lock released, next=0x%p\n", next); |
469 | 519 | ||
470 | if (next) | 520 | if (next) |
471 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | 521 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); |
@@ -504,7 +554,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running) | |||
504 | /* the cluster doesn't change even if t is running */ | 554 | /* the cluster doesn't change even if t is running */ |
505 | cluster = task_cpu_cluster(t); | 555 | cluster = task_cpu_cluster(t); |
506 | 556 | ||
507 | raw_spin_lock_irqsave(&cluster->cedf_lock, flags); | 557 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); |
508 | 558 | ||
509 | /* setup job params */ | 559 | /* setup job params */ |
510 | release_at(t, litmus_clock()); | 560 | release_at(t, litmus_clock()); |
@@ -513,15 +563,25 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running) | |||
513 | entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); | 563 | entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); |
514 | BUG_ON(entry->scheduled); | 564 | BUG_ON(entry->scheduled); |
515 | 565 | ||
516 | entry->scheduled = t; | 566 | #ifdef CONFIG_RELEASE_MASTER |
517 | tsk_rt(t)->scheduled_on = task_cpu(t); | 567 | if (entry->cpu != cluster->domain.release_master) { |
568 | #endif | ||
569 | entry->scheduled = t; | ||
570 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
571 | #ifdef CONFIG_RELEASE_MASTER | ||
572 | } else { | ||
573 | /* do not schedule on release master */ | ||
574 | preempt(entry); /* force resched */ | ||
575 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
576 | } | ||
577 | #endif | ||
518 | } else { | 578 | } else { |
519 | t->rt_param.scheduled_on = NO_CPU; | 579 | t->rt_param.scheduled_on = NO_CPU; |
520 | } | 580 | } |
521 | t->rt_param.linked_on = NO_CPU; | 581 | t->rt_param.linked_on = NO_CPU; |
522 | 582 | ||
523 | cedf_job_arrival(t); | 583 | cedf_job_arrival(t); |
524 | raw_spin_unlock_irqrestore(&(cluster->cedf_lock), flags); | 584 | raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags); |
525 | } | 585 | } |
526 | 586 | ||
527 | static void cedf_task_wake_up(struct task_struct *task) | 587 | static void cedf_task_wake_up(struct task_struct *task) |
@@ -534,7 +594,8 @@ static void cedf_task_wake_up(struct task_struct *task) | |||
534 | 594 | ||
535 | cluster = task_cpu_cluster(task); | 595 | cluster = task_cpu_cluster(task); |
536 | 596 | ||
537 | raw_spin_lock_irqsave(&cluster->cedf_lock, flags); | 597 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); |
598 | |||
538 | /* We need to take suspensions because of semaphores into | 599 | /* We need to take suspensions because of semaphores into |
539 | * account! If a job resumes after being suspended due to acquiring | 600 | * account! If a job resumes after being suspended due to acquiring |
540 | * a semaphore, it should never be treated as a new job release. | 601 | * a semaphore, it should never be treated as a new job release. |
@@ -557,7 +618,8 @@ static void cedf_task_wake_up(struct task_struct *task) | |||
557 | } | 618 | } |
558 | } | 619 | } |
559 | cedf_job_arrival(task); | 620 | cedf_job_arrival(task); |
560 | raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); | 621 | |
622 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
561 | } | 623 | } |
562 | 624 | ||
563 | static void cedf_task_block(struct task_struct *t) | 625 | static void cedf_task_block(struct task_struct *t) |
@@ -570,9 +632,9 @@ static void cedf_task_block(struct task_struct *t) | |||
570 | cluster = task_cpu_cluster(t); | 632 | cluster = task_cpu_cluster(t); |
571 | 633 | ||
572 | /* unlink if necessary */ | 634 | /* unlink if necessary */ |
573 | raw_spin_lock_irqsave(&cluster->cedf_lock, flags); | 635 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); |
574 | unlink(t); | 636 | unlink(t); |
575 | raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); | 637 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); |
576 | 638 | ||
577 | BUG_ON(!is_realtime(t)); | 639 | BUG_ON(!is_realtime(t)); |
578 | } | 640 | } |
@@ -584,7 +646,7 @@ static void cedf_task_exit(struct task_struct * t) | |||
584 | cedf_domain_t *cluster = task_cpu_cluster(t); | 646 | cedf_domain_t *cluster = task_cpu_cluster(t); |
585 | 647 | ||
586 | /* unlink if necessary */ | 648 | /* unlink if necessary */ |
587 | raw_spin_lock_irqsave(&cluster->cedf_lock, flags); | 649 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); |
588 | unlink(t); | 650 | unlink(t); |
589 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | 651 | if (tsk_rt(t)->scheduled_on != NO_CPU) { |
590 | cpu_entry_t *cpu; | 652 | cpu_entry_t *cpu; |
@@ -592,7 +654,7 @@ static void cedf_task_exit(struct task_struct * t) | |||
592 | cpu->scheduled = NULL; | 654 | cpu->scheduled = NULL; |
593 | tsk_rt(t)->scheduled_on = NO_CPU; | 655 | tsk_rt(t)->scheduled_on = NO_CPU; |
594 | } | 656 | } |
595 | raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags); | 657 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); |
596 | 658 | ||
597 | BUG_ON(!is_realtime(t)); | 659 | BUG_ON(!is_realtime(t)); |
598 | TRACE_TASK(t, "RIP\n"); | 660 | TRACE_TASK(t, "RIP\n"); |
@@ -605,16 +667,6 @@ static long cedf_admit_task(struct task_struct* tsk) | |||
605 | 667 | ||
606 | 668 | ||
607 | 669 | ||
608 | |||
609 | |||
610 | |||
611 | |||
612 | |||
613 | |||
614 | |||
615 | |||
616 | |||
617 | |||
618 | #ifdef CONFIG_LITMUS_LOCKING | 670 | #ifdef CONFIG_LITMUS_LOCKING |
619 | 671 | ||
620 | #include <litmus/fdso.h> | 672 | #include <litmus/fdso.h> |
@@ -692,11 +744,11 @@ static void set_priority_inheritance(struct task_struct* t, struct task_struct* | |||
692 | { | 744 | { |
693 | cedf_domain_t* cluster = task_cpu_cluster(t); | 745 | cedf_domain_t* cluster = task_cpu_cluster(t); |
694 | 746 | ||
695 | raw_spin_lock(&cluster->cedf_lock); | 747 | raw_spin_lock(&cluster->cluster_lock); |
696 | 748 | ||
697 | __set_priority_inheritance(t, prio_inh); | 749 | __set_priority_inheritance(t, prio_inh); |
698 | 750 | ||
699 | raw_spin_unlock(&cluster->cedf_lock); | 751 | raw_spin_unlock(&cluster->cluster_lock); |
700 | } | 752 | } |
701 | 753 | ||
702 | 754 | ||
@@ -727,9 +779,9 @@ static void clear_priority_inheritance(struct task_struct* t) | |||
727 | { | 779 | { |
728 | cedf_domain_t* cluster = task_cpu_cluster(t); | 780 | cedf_domain_t* cluster = task_cpu_cluster(t); |
729 | 781 | ||
730 | raw_spin_lock(&cluster->cedf_lock); | 782 | raw_spin_lock(&cluster->cluster_lock); |
731 | __clear_priority_inheritance(t); | 783 | __clear_priority_inheritance(t); |
732 | raw_spin_unlock(&cluster->cedf_lock); | 784 | raw_spin_unlock(&cluster->cluster_lock); |
733 | } | 785 | } |
734 | 786 | ||
735 | 787 | ||
@@ -857,7 +909,7 @@ static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) | |||
857 | 909 | ||
858 | cluster = task_cpu_cluster(max_hp); | 910 | cluster = task_cpu_cluster(max_hp); |
859 | 911 | ||
860 | raw_spin_lock(&cluster->cedf_lock); | 912 | raw_spin_lock(&cluster->cluster_lock); |
861 | 913 | ||
862 | if(tsk_rt(my_queue->owner)->inh_task == max_hp) | 914 | if(tsk_rt(my_queue->owner)->inh_task == max_hp) |
863 | { | 915 | { |
@@ -867,7 +919,7 @@ static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem) | |||
867 | __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); | 919 | __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter); |
868 | } | 920 | } |
869 | } | 921 | } |
870 | raw_spin_unlock(&cluster->cedf_lock); | 922 | raw_spin_unlock(&cluster->cluster_lock); |
871 | 923 | ||
872 | list_for_each(pos, &my_queue->wait.task_list) | 924 | list_for_each(pos, &my_queue->wait.task_list) |
873 | { | 925 | { |
@@ -1270,6 +1322,9 @@ static long cedf_activate_plugin(void) | |||
1270 | 1322 | ||
1271 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) | 1323 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) |
1272 | return -ENOMEM; | 1324 | return -ENOMEM; |
1325 | #ifdef CONFIG_RELEASE_MASTER | ||
1326 | cedf[i].domain.release_master = atomic_read(&release_master_cpu); | ||
1327 | #endif | ||
1273 | } | 1328 | } |
1274 | 1329 | ||
1275 | /* cycle through cluster and add cpus to them */ | 1330 | /* cycle through cluster and add cpus to them */ |
@@ -1312,7 +1367,11 @@ static long cedf_activate_plugin(void) | |||
1312 | 1367 | ||
1313 | entry->linked = NULL; | 1368 | entry->linked = NULL; |
1314 | entry->scheduled = NULL; | 1369 | entry->scheduled = NULL; |
1315 | update_cpu_position(entry); | 1370 | #ifdef CONFIG_RELEASE_MASTER |
1371 | /* only add CPUs that should schedule jobs */ | ||
1372 | if (entry->cpu != entry->cluster->domain.release_master) | ||
1373 | #endif | ||
1374 | update_cpu_position(entry); | ||
1316 | } | 1375 | } |
1317 | /* done with this cluster */ | 1376 | /* done with this cluster */ |
1318 | break; | 1377 | break; |
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index b87524cf1802..d5bb326ebc9b 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c | |||
@@ -20,11 +20,16 @@ | |||
20 | #include <litmus/sched_plugin.h> | 20 | #include <litmus/sched_plugin.h> |
21 | #include <litmus/edf_common.h> | 21 | #include <litmus/edf_common.h> |
22 | #include <litmus/sched_trace.h> | 22 | #include <litmus/sched_trace.h> |
23 | #include <litmus/trace.h> | ||
23 | 24 | ||
24 | #include <litmus/preempt.h> | 25 | #include <litmus/preempt.h> |
25 | 26 | ||
26 | #include <litmus/bheap.h> | 27 | #include <litmus/bheap.h> |
27 | 28 | ||
29 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
30 | #include <litmus/affinity.h> | ||
31 | #endif | ||
32 | |||
28 | #include <linux/module.h> | 33 | #include <linux/module.h> |
29 | 34 | ||
30 | /* Overview of GSN-EDF operations. | 35 | /* Overview of GSN-EDF operations. |
@@ -255,21 +260,52 @@ static noinline void requeue(struct task_struct* task) | |||
255 | } | 260 | } |
256 | } | 261 | } |
257 | 262 | ||
263 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
264 | static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start) | ||
265 | { | ||
266 | cpu_entry_t *affinity; | ||
267 | |||
268 | get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries, | ||
269 | #ifdef CONFIG_RELEASE_MASTER | ||
270 | gsnedf.release_master | ||
271 | #else | ||
272 | NO_CPU | ||
273 | #endif | ||
274 | ); | ||
275 | |||
276 | return(affinity); | ||
277 | } | ||
278 | #endif | ||
279 | |||
258 | /* check for any necessary preemptions */ | 280 | /* check for any necessary preemptions */ |
259 | static void check_for_preemptions(void) | 281 | static void check_for_preemptions(void) |
260 | { | 282 | { |
261 | struct task_struct *task; | 283 | struct task_struct *task; |
262 | cpu_entry_t* last; | 284 | cpu_entry_t *last; |
263 | 285 | ||
264 | for(last = lowest_prio_cpu(); | 286 | for (last = lowest_prio_cpu(); |
265 | edf_preemption_needed(&gsnedf, last->linked); | 287 | edf_preemption_needed(&gsnedf, last->linked); |
266 | last = lowest_prio_cpu()) { | 288 | last = lowest_prio_cpu()) { |
267 | /* preemption necessary */ | 289 | /* preemption necessary */ |
268 | task = __take_ready(&gsnedf); | 290 | task = __take_ready(&gsnedf); |
269 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | 291 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", |
270 | task->pid, last->cpu); | 292 | task->pid, last->cpu); |
293 | |||
294 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
295 | { | ||
296 | cpu_entry_t *affinity = | ||
297 | gsnedf_get_nearest_available_cpu( | ||
298 | &per_cpu(gsnedf_cpu_entries, task_cpu(task))); | ||
299 | if (affinity) | ||
300 | last = affinity; | ||
301 | else if (last->linked) | ||
302 | requeue(last->linked); | ||
303 | } | ||
304 | #else | ||
271 | if (last->linked) | 305 | if (last->linked) |
272 | requeue(last->linked); | 306 | requeue(last->linked); |
307 | #endif | ||
308 | |||
273 | link_task_to_cpu(task, last); | 309 | link_task_to_cpu(task, last); |
274 | preempt(last); | 310 | preempt(last); |
275 | } | 311 | } |
@@ -376,8 +412,10 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) | |||
376 | /* Bail out early if we are the release master. | 412 | /* Bail out early if we are the release master. |
377 | * The release master never schedules any real-time tasks. | 413 | * The release master never schedules any real-time tasks. |
378 | */ | 414 | */ |
379 | if (gsnedf.release_master == entry->cpu) | 415 | if (unlikely(gsnedf.release_master == entry->cpu)) { |
416 | sched_state_task_picked(); | ||
380 | return NULL; | 417 | return NULL; |
418 | } | ||
381 | #endif | 419 | #endif |
382 | 420 | ||
383 | raw_spin_lock(&gsnedf_lock); | 421 | raw_spin_lock(&gsnedf_lock); |
@@ -783,6 +821,8 @@ int gsnedf_fmlp_lock(struct litmus_lock* l) | |||
783 | set_priority_inheritance(sem->owner, sem->hp_waiter); | 821 | set_priority_inheritance(sem->owner, sem->hp_waiter); |
784 | } | 822 | } |
785 | 823 | ||
824 | TS_LOCK_SUSPEND; | ||
825 | |||
786 | /* release lock before sleeping */ | 826 | /* release lock before sleeping */ |
787 | spin_unlock_irqrestore(&sem->wait.lock, flags); | 827 | spin_unlock_irqrestore(&sem->wait.lock, flags); |
788 | 828 | ||
@@ -793,6 +833,8 @@ int gsnedf_fmlp_lock(struct litmus_lock* l) | |||
793 | 833 | ||
794 | schedule(); | 834 | schedule(); |
795 | 835 | ||
836 | TS_LOCK_RESUME; | ||
837 | |||
796 | /* Since we hold the lock, no other task will change | 838 | /* Since we hold the lock, no other task will change |
797 | * ->owner. We can thus check it without acquiring the spin | 839 | * ->owner. We can thus check it without acquiring the spin |
798 | * lock. */ | 840 | * lock. */ |
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c index 1bca2e1a33cd..9a6fe487718e 100644 --- a/litmus/sched_litmus.c +++ b/litmus/sched_litmus.c | |||
@@ -254,12 +254,12 @@ static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued) | |||
254 | return; | 254 | return; |
255 | } | 255 | } |
256 | 256 | ||
257 | static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running) | 257 | static void switched_to_litmus(struct rq *rq, struct task_struct *p) |
258 | { | 258 | { |
259 | } | 259 | } |
260 | 260 | ||
261 | static void prio_changed_litmus(struct rq *rq, struct task_struct *p, | 261 | static void prio_changed_litmus(struct rq *rq, struct task_struct *p, |
262 | int oldprio, int running) | 262 | int oldprio) |
263 | { | 263 | { |
264 | } | 264 | } |
265 | 265 | ||
@@ -285,8 +285,8 @@ static void set_curr_task_litmus(struct rq *rq) | |||
285 | * We don't care about the scheduling domain; can gets called from | 285 | * We don't care about the scheduling domain; can gets called from |
286 | * exec, fork, wakeup. | 286 | * exec, fork, wakeup. |
287 | */ | 287 | */ |
288 | static int select_task_rq_litmus(struct rq *rq, struct task_struct *p, | 288 | static int |
289 | int sd_flag, int flags) | 289 | select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags) |
290 | { | 290 | { |
291 | /* preemption is already disabled. | 291 | /* preemption is already disabled. |
292 | * We don't want to change cpu here | 292 | * We don't want to change cpu here |
@@ -296,7 +296,12 @@ static int select_task_rq_litmus(struct rq *rq, struct task_struct *p, | |||
296 | #endif | 296 | #endif |
297 | 297 | ||
298 | static const struct sched_class litmus_sched_class = { | 298 | static const struct sched_class litmus_sched_class = { |
299 | .next = &rt_sched_class, | 299 | /* From 34f971f6 the stop/migrate worker threads have a class on |
300 | * their own, which is the highest prio class. We don't support | ||
301 | * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0 | ||
302 | * CPU capacity. | ||
303 | */ | ||
304 | .next = &stop_sched_class, | ||
300 | .enqueue_task = enqueue_task_litmus, | 305 | .enqueue_task = enqueue_task_litmus, |
301 | .dequeue_task = dequeue_task_litmus, | 306 | .dequeue_task = dequeue_task_litmus, |
302 | .yield_task = yield_task_litmus, | 307 | .yield_task = yield_task_litmus, |
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c index 0a64273daa47..16f1065bbdca 100644 --- a/litmus/sched_pfair.c +++ b/litmus/sched_pfair.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * kernel/sched_pfair.c | 2 | * kernel/sched_pfair.c |
3 | * | 3 | * |
4 | * Implementation of the (global) Pfair scheduling algorithm. | 4 | * Implementation of the PD^2 pfair scheduling algorithm. This |
5 | * implementation realizes "early releasing," i.e., it is work-conserving. | ||
5 | * | 6 | * |
6 | */ | 7 | */ |
7 | 8 | ||
@@ -76,36 +77,29 @@ struct pfair_state { | |||
76 | struct task_struct* local; /* the local copy of linked */ | 77 | struct task_struct* local; /* the local copy of linked */ |
77 | struct task_struct* scheduled; /* what is actually scheduled */ | 78 | struct task_struct* scheduled; /* what is actually scheduled */ |
78 | 79 | ||
79 | unsigned long missed_quanta; | ||
80 | lt_t offset; /* stagger offset */ | 80 | lt_t offset; /* stagger offset */ |
81 | unsigned int missed_updates; | ||
82 | unsigned int missed_quanta; | ||
81 | }; | 83 | }; |
82 | 84 | ||
83 | /* Currently, we limit the maximum period of any task to 2000 quanta. | ||
84 | * The reason is that it makes the implementation easier since we do not | ||
85 | * need to reallocate the release wheel on task arrivals. | ||
86 | * In the future | ||
87 | */ | ||
88 | #define PFAIR_MAX_PERIOD 2000 | ||
89 | |||
90 | struct pfair_cluster { | 85 | struct pfair_cluster { |
91 | struct scheduling_cluster topology; | 86 | struct scheduling_cluster topology; |
92 | 87 | ||
93 | /* The "global" time in this cluster. */ | 88 | /* The "global" time in this cluster. */ |
94 | quanta_t pfair_time; /* the "official" PFAIR clock */ | 89 | quanta_t pfair_time; /* the "official" PFAIR clock */ |
95 | quanta_t merge_time; /* Updated after the release queue has been | ||
96 | * merged. Used by drop_all_references(). | ||
97 | */ | ||
98 | 90 | ||
99 | /* The ready queue for this cluster. */ | 91 | /* The ready queue for this cluster. */ |
100 | rt_domain_t pfair; | 92 | rt_domain_t pfair; |
101 | 93 | ||
102 | /* This is the release queue wheel for this cluster. It is indexed by | 94 | /* The set of jobs that should have their release enacted at the next |
103 | * pfair_time % PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR | 95 | * quantum boundary. |
104 | * priority, so that it can be merged with the ready queue. | ||
105 | */ | 96 | */ |
106 | struct bheap release_queue[PFAIR_MAX_PERIOD]; | 97 | struct bheap release_queue; |
98 | raw_spinlock_t release_lock; | ||
107 | }; | 99 | }; |
108 | 100 | ||
101 | #define RT_F_REQUEUE 0x2 | ||
102 | |||
109 | static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) | 103 | static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) |
110 | { | 104 | { |
111 | return container_of(state->topology.cluster, struct pfair_cluster, topology); | 105 | return container_of(state->topology.cluster, struct pfair_cluster, topology); |
@@ -121,6 +115,11 @@ static inline struct pfair_state* from_cluster_list(struct list_head* pos) | |||
121 | return list_entry(pos, struct pfair_state, topology.cluster_list); | 115 | return list_entry(pos, struct pfair_state, topology.cluster_list); |
122 | } | 116 | } |
123 | 117 | ||
118 | static inline struct pfair_cluster* from_domain(rt_domain_t* rt) | ||
119 | { | ||
120 | return container_of(rt, struct pfair_cluster, pfair); | ||
121 | } | ||
122 | |||
124 | static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) | 123 | static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) |
125 | { | 124 | { |
126 | /* The ready_lock is used to serialize all scheduling events. */ | 125 | /* The ready_lock is used to serialize all scheduling events. */ |
@@ -161,21 +160,11 @@ static quanta_t cur_deadline(struct task_struct* t) | |||
161 | return cur_subtask(t)->deadline + tsk_pfair(t)->release; | 160 | return cur_subtask(t)->deadline + tsk_pfair(t)->release; |
162 | } | 161 | } |
163 | 162 | ||
164 | |||
165 | static quanta_t cur_sub_release(struct task_struct* t) | ||
166 | { | ||
167 | return cur_subtask(t)->release + tsk_pfair(t)->release; | ||
168 | } | ||
169 | |||
170 | static quanta_t cur_release(struct task_struct* t) | 163 | static quanta_t cur_release(struct task_struct* t) |
171 | { | 164 | { |
172 | #ifdef EARLY_RELEASE | 165 | /* This is early releasing: only the release of the first subtask |
173 | /* only the release of the first subtask counts when we early | 166 | * counts. */ |
174 | * release */ | ||
175 | return tsk_pfair(t)->release; | 167 | return tsk_pfair(t)->release; |
176 | #else | ||
177 | return cur_sub_release(t); | ||
178 | #endif | ||
179 | } | 168 | } |
180 | 169 | ||
181 | static quanta_t cur_overlap(struct task_struct* t) | 170 | static quanta_t cur_overlap(struct task_struct* t) |
@@ -235,11 +224,16 @@ int pfair_ready_order(struct bheap_node* a, struct bheap_node* b) | |||
235 | return pfair_higher_prio(bheap2task(a), bheap2task(b)); | 224 | return pfair_higher_prio(bheap2task(a), bheap2task(b)); |
236 | } | 225 | } |
237 | 226 | ||
238 | /* return the proper release queue for time t */ | 227 | static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks) |
239 | static struct bheap* relq(struct pfair_cluster* cluster, quanta_t t) | ||
240 | { | 228 | { |
241 | struct bheap* rq = cluster->release_queue + (t % PFAIR_MAX_PERIOD); | 229 | struct pfair_cluster* cluster = from_domain(rt); |
242 | return rq; | 230 | unsigned long flags; |
231 | |||
232 | raw_spin_lock_irqsave(&cluster->release_lock, flags); | ||
233 | |||
234 | bheap_union(pfair_ready_order, &cluster->release_queue, tasks); | ||
235 | |||
236 | raw_spin_unlock_irqrestore(&cluster->release_lock, flags); | ||
243 | } | 237 | } |
244 | 238 | ||
245 | static void prepare_release(struct task_struct* t, quanta_t at) | 239 | static void prepare_release(struct task_struct* t, quanta_t at) |
@@ -248,25 +242,12 @@ static void prepare_release(struct task_struct* t, quanta_t at) | |||
248 | tsk_pfair(t)->cur = 0; | 242 | tsk_pfair(t)->cur = 0; |
249 | } | 243 | } |
250 | 244 | ||
251 | static void __pfair_add_release(struct task_struct* t, struct bheap* queue) | ||
252 | { | ||
253 | bheap_insert(pfair_ready_order, queue, | ||
254 | tsk_rt(t)->heap_node); | ||
255 | } | ||
256 | |||
257 | static void pfair_add_release(struct pfair_cluster* cluster, | ||
258 | struct task_struct* t) | ||
259 | { | ||
260 | BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node)); | ||
261 | __pfair_add_release(t, relq(cluster, cur_release(t))); | ||
262 | } | ||
263 | |||
264 | /* pull released tasks from the release queue */ | 245 | /* pull released tasks from the release queue */ |
265 | static void poll_releases(struct pfair_cluster* cluster, | 246 | static void poll_releases(struct pfair_cluster* cluster) |
266 | quanta_t time) | ||
267 | { | 247 | { |
268 | __merge_ready(&cluster->pfair, relq(cluster, time)); | 248 | raw_spin_lock(&cluster->release_lock); |
269 | cluster->merge_time = time; | 249 | __merge_ready(&cluster->pfair, &cluster->release_queue); |
250 | raw_spin_unlock(&cluster->release_lock); | ||
270 | } | 251 | } |
271 | 252 | ||
272 | static void check_preempt(struct task_struct* t) | 253 | static void check_preempt(struct task_struct* t) |
@@ -292,16 +273,12 @@ static void drop_all_references(struct task_struct *t) | |||
292 | { | 273 | { |
293 | int cpu; | 274 | int cpu; |
294 | struct pfair_state* s; | 275 | struct pfair_state* s; |
295 | struct bheap* q; | ||
296 | struct pfair_cluster* cluster; | 276 | struct pfair_cluster* cluster; |
297 | if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { | 277 | if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { |
298 | /* figure out what queue the node is in */ | 278 | /* It must be in the ready queue; drop references isn't called |
279 | * when the job is in a release queue. */ | ||
299 | cluster = tsk_pfair(t)->cluster; | 280 | cluster = tsk_pfair(t)->cluster; |
300 | if (time_before_eq(cur_release(t), cluster->merge_time)) | 281 | bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue, |
301 | q = &cluster->pfair.ready_queue; | ||
302 | else | ||
303 | q = relq(cluster, cur_release(t)); | ||
304 | bheap_delete(pfair_ready_order, q, | ||
305 | tsk_rt(t)->heap_node); | 282 | tsk_rt(t)->heap_node); |
306 | } | 283 | } |
307 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | 284 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { |
@@ -313,6 +290,17 @@ static void drop_all_references(struct task_struct *t) | |||
313 | if (s->scheduled == t) | 290 | if (s->scheduled == t) |
314 | s->scheduled = NULL; | 291 | s->scheduled = NULL; |
315 | } | 292 | } |
293 | /* make sure we don't have a stale linked_on field */ | ||
294 | tsk_rt(t)->linked_on = NO_CPU; | ||
295 | } | ||
296 | |||
297 | static void pfair_prepare_next_period(struct task_struct* t) | ||
298 | { | ||
299 | struct pfair_param* p = tsk_pfair(t); | ||
300 | |||
301 | prepare_for_next_period(t); | ||
302 | get_rt_flags(t) = RT_F_RUNNING; | ||
303 | p->release += p->period; | ||
316 | } | 304 | } |
317 | 305 | ||
318 | /* returns 1 if the task needs to go the release queue */ | 306 | /* returns 1 if the task needs to go the release queue */ |
@@ -322,30 +310,26 @@ static int advance_subtask(quanta_t time, struct task_struct* t, int cpu) | |||
322 | int to_relq; | 310 | int to_relq; |
323 | p->cur = (p->cur + 1) % p->quanta; | 311 | p->cur = (p->cur + 1) % p->quanta; |
324 | if (!p->cur) { | 312 | if (!p->cur) { |
325 | sched_trace_task_completion(t, 1); | ||
326 | if (tsk_rt(t)->present) { | 313 | if (tsk_rt(t)->present) { |
327 | /* we start a new job */ | 314 | /* The job overran; we start a new budget allocation. */ |
328 | prepare_for_next_period(t); | 315 | pfair_prepare_next_period(t); |
329 | sched_trace_task_release(t); | ||
330 | get_rt_flags(t) = RT_F_RUNNING; | ||
331 | p->release += p->period; | ||
332 | } else { | 316 | } else { |
333 | /* remove task from system until it wakes */ | 317 | /* remove task from system until it wakes */ |
334 | drop_all_references(t); | 318 | drop_all_references(t); |
319 | tsk_rt(t)->flags = RT_F_REQUEUE; | ||
335 | TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n", | 320 | TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n", |
336 | cpu, p->cur); | 321 | cpu, p->cur); |
337 | return 0; | 322 | return 0; |
338 | } | 323 | } |
339 | } | 324 | } |
340 | to_relq = time_after(cur_release(t), time); | 325 | to_relq = time_after(cur_release(t), time); |
341 | TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n", | 326 | TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n", |
342 | cpu, p->cur, to_relq); | 327 | cpu, p->cur, to_relq, cur_release(t), time); |
343 | return to_relq; | 328 | return to_relq; |
344 | } | 329 | } |
345 | 330 | ||
346 | static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time) | 331 | static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time) |
347 | { | 332 | { |
348 | int missed; | ||
349 | struct task_struct* l; | 333 | struct task_struct* l; |
350 | struct pfair_param* p; | 334 | struct pfair_param* p; |
351 | struct list_head* pos; | 335 | struct list_head* pos; |
@@ -354,14 +338,17 @@ static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time) | |||
354 | list_for_each(pos, &cluster->topology.cpus) { | 338 | list_for_each(pos, &cluster->topology.cpus) { |
355 | cpu = from_cluster_list(pos); | 339 | cpu = from_cluster_list(pos); |
356 | l = cpu->linked; | 340 | l = cpu->linked; |
357 | missed = cpu->linked != cpu->local; | 341 | cpu->missed_updates += cpu->linked != cpu->local; |
358 | if (l) { | 342 | if (l) { |
359 | p = tsk_pfair(l); | 343 | p = tsk_pfair(l); |
360 | p->last_quantum = time; | 344 | p->last_quantum = time; |
361 | p->last_cpu = cpu_id(cpu); | 345 | p->last_cpu = cpu_id(cpu); |
362 | if (advance_subtask(time, l, cpu_id(cpu))) { | 346 | if (advance_subtask(time, l, cpu_id(cpu))) { |
363 | cpu->linked = NULL; | 347 | //cpu->linked = NULL; |
364 | pfair_add_release(cluster, l); | 348 | PTRACE_TASK(l, "should go to release queue. " |
349 | "scheduled_on=%d present=%d\n", | ||
350 | tsk_rt(l)->scheduled_on, | ||
351 | tsk_rt(l)->present); | ||
365 | } | 352 | } |
366 | } | 353 | } |
367 | } | 354 | } |
@@ -445,6 +432,11 @@ static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time) | |||
445 | list_for_each(pos, &cluster->topology.cpus) { | 432 | list_for_each(pos, &cluster->topology.cpus) { |
446 | cpu_state = from_cluster_list(pos); | 433 | cpu_state = from_cluster_list(pos); |
447 | retry = 1; | 434 | retry = 1; |
435 | #ifdef CONFIG_RELEASE_MASTER | ||
436 | /* skip release master */ | ||
437 | if (cluster->pfair.release_master == cpu_id(cpu_state)) | ||
438 | continue; | ||
439 | #endif | ||
448 | while (retry) { | 440 | while (retry) { |
449 | if (pfair_higher_prio(__peek_ready(&cluster->pfair), | 441 | if (pfair_higher_prio(__peek_ready(&cluster->pfair), |
450 | cpu_state->linked)) | 442 | cpu_state->linked)) |
@@ -471,13 +463,13 @@ static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time) | |||
471 | sched_trace_quantum_boundary(); | 463 | sched_trace_quantum_boundary(); |
472 | 464 | ||
473 | advance_subtasks(cluster, time); | 465 | advance_subtasks(cluster, time); |
474 | poll_releases(cluster, time); | 466 | poll_releases(cluster); |
475 | schedule_subtasks(cluster, time); | 467 | schedule_subtasks(cluster, time); |
476 | 468 | ||
477 | list_for_each(pos, &cluster->topology.cpus) { | 469 | list_for_each(pos, &cluster->topology.cpus) { |
478 | cpu = from_cluster_list(pos); | 470 | cpu = from_cluster_list(pos); |
479 | if (cpu->linked) | 471 | if (cpu->linked) |
480 | PTRACE_TASK(pstate[cpu]->linked, | 472 | PTRACE_TASK(cpu->linked, |
481 | " linked on %d.\n", cpu_id(cpu)); | 473 | " linked on %d.\n", cpu_id(cpu)); |
482 | else | 474 | else |
483 | PTRACE("(null) linked on %d.\n", cpu_id(cpu)); | 475 | PTRACE("(null) linked on %d.\n", cpu_id(cpu)); |
@@ -612,12 +604,42 @@ static int safe_to_schedule(struct task_struct* t, int cpu) | |||
612 | static struct task_struct* pfair_schedule(struct task_struct * prev) | 604 | static struct task_struct* pfair_schedule(struct task_struct * prev) |
613 | { | 605 | { |
614 | struct pfair_state* state = &__get_cpu_var(pfair_state); | 606 | struct pfair_state* state = &__get_cpu_var(pfair_state); |
615 | int blocks; | 607 | struct pfair_cluster* cluster = cpu_cluster(state); |
608 | int blocks, completion, out_of_time; | ||
616 | struct task_struct* next = NULL; | 609 | struct task_struct* next = NULL; |
617 | 610 | ||
611 | #ifdef CONFIG_RELEASE_MASTER | ||
612 | /* Bail out early if we are the release master. | ||
613 | * The release master never schedules any real-time tasks. | ||
614 | */ | ||
615 | if (unlikely(cluster->pfair.release_master == cpu_id(state))) { | ||
616 | sched_state_task_picked(); | ||
617 | return NULL; | ||
618 | } | ||
619 | #endif | ||
620 | |||
618 | raw_spin_lock(cpu_lock(state)); | 621 | raw_spin_lock(cpu_lock(state)); |
619 | 622 | ||
620 | blocks = is_realtime(prev) && !is_running(prev); | 623 | blocks = is_realtime(prev) && !is_running(prev); |
624 | completion = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP; | ||
625 | out_of_time = is_realtime(prev) && time_after(cur_release(prev), | ||
626 | state->local_tick); | ||
627 | |||
628 | if (is_realtime(prev)) | ||
629 | PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n", | ||
630 | blocks, completion, out_of_time); | ||
631 | |||
632 | if (completion) { | ||
633 | sched_trace_task_completion(prev, 0); | ||
634 | pfair_prepare_next_period(prev); | ||
635 | prepare_release(prev, cur_release(prev)); | ||
636 | } | ||
637 | |||
638 | if (!blocks && (completion || out_of_time)) { | ||
639 | drop_all_references(prev); | ||
640 | sched_trace_task_release(prev); | ||
641 | add_release(&cluster->pfair, prev); | ||
642 | } | ||
621 | 643 | ||
622 | if (state->local && safe_to_schedule(state->local, cpu_id(state))) | 644 | if (state->local && safe_to_schedule(state->local, cpu_id(state))) |
623 | next = state->local; | 645 | next = state->local; |
@@ -649,13 +671,19 @@ static void pfair_task_new(struct task_struct * t, int on_rq, int running) | |||
649 | cluster = tsk_pfair(t)->cluster; | 671 | cluster = tsk_pfair(t)->cluster; |
650 | 672 | ||
651 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | 673 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); |
652 | if (running) | ||
653 | t->rt_param.scheduled_on = task_cpu(t); | ||
654 | else | ||
655 | t->rt_param.scheduled_on = NO_CPU; | ||
656 | 674 | ||
657 | prepare_release(t, cluster->pfair_time + 1); | 675 | prepare_release(t, cluster->pfair_time + 1); |
658 | pfair_add_release(cluster, t); | 676 | |
677 | t->rt_param.scheduled_on = NO_CPU; | ||
678 | |||
679 | if (running) { | ||
680 | #ifdef CONFIG_RELEASE_MASTER | ||
681 | if (task_cpu(t) != cluster->pfair.release_master) | ||
682 | #endif | ||
683 | t->rt_param.scheduled_on = task_cpu(t); | ||
684 | __add_ready(&cluster->pfair, t); | ||
685 | } | ||
686 | |||
659 | check_preempt(t); | 687 | check_preempt(t); |
660 | 688 | ||
661 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | 689 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); |
@@ -665,6 +693,7 @@ static void pfair_task_wake_up(struct task_struct *t) | |||
665 | { | 693 | { |
666 | unsigned long flags; | 694 | unsigned long flags; |
667 | lt_t now; | 695 | lt_t now; |
696 | int requeue = 0; | ||
668 | struct pfair_cluster* cluster; | 697 | struct pfair_cluster* cluster; |
669 | 698 | ||
670 | cluster = tsk_pfair(t)->cluster; | 699 | cluster = tsk_pfair(t)->cluster; |
@@ -679,13 +708,20 @@ static void pfair_task_wake_up(struct task_struct *t) | |||
679 | * (as if it never blocked at all). Otherwise, we have a | 708 | * (as if it never blocked at all). Otherwise, we have a |
680 | * new sporadic job release. | 709 | * new sporadic job release. |
681 | */ | 710 | */ |
711 | requeue = tsk_rt(t)->flags == RT_F_REQUEUE; | ||
682 | now = litmus_clock(); | 712 | now = litmus_clock(); |
683 | if (lt_before(get_deadline(t), now)) { | 713 | if (lt_before(get_deadline(t), now)) { |
714 | TRACE_TASK(t, "sporadic release!\n"); | ||
684 | release_at(t, now); | 715 | release_at(t, now); |
685 | prepare_release(t, time2quanta(now, CEIL)); | 716 | prepare_release(t, time2quanta(now, CEIL)); |
686 | sched_trace_task_release(t); | 717 | sched_trace_task_release(t); |
687 | /* FIXME: race with pfair_time advancing */ | 718 | } |
688 | pfair_add_release(cluster, t); | 719 | |
720 | /* only add to ready queue if the task isn't still linked somewhere */ | ||
721 | if (requeue) { | ||
722 | TRACE_TASK(t, "requeueing required\n"); | ||
723 | tsk_rt(t)->flags = RT_F_RUNNING; | ||
724 | __add_ready(&cluster->pfair, t); | ||
689 | } | 725 | } |
690 | 726 | ||
691 | check_preempt(t); | 727 | check_preempt(t); |
@@ -744,15 +780,11 @@ static void pfair_release_at(struct task_struct* task, lt_t start) | |||
744 | release_at(task, start); | 780 | release_at(task, start); |
745 | release = time2quanta(start, CEIL); | 781 | release = time2quanta(start, CEIL); |
746 | 782 | ||
747 | /* FIXME: support arbitrary offsets. */ | ||
748 | if (release - cluster->pfair_time >= PFAIR_MAX_PERIOD) | ||
749 | release = cluster->pfair_time + PFAIR_MAX_PERIOD; | ||
750 | |||
751 | TRACE_TASK(task, "sys release at %lu\n", release); | 783 | TRACE_TASK(task, "sys release at %lu\n", release); |
752 | 784 | ||
753 | drop_all_references(task); | 785 | drop_all_references(task); |
754 | prepare_release(task, release); | 786 | prepare_release(task, release); |
755 | pfair_add_release(cluster, task); | 787 | add_release(&cluster->pfair, task); |
756 | 788 | ||
757 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | 789 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); |
758 | } | 790 | } |
@@ -834,13 +866,6 @@ static long pfair_admit_task(struct task_struct* t) | |||
834 | "The period of %s/%d is not a multiple of %llu.\n", | 866 | "The period of %s/%d is not a multiple of %llu.\n", |
835 | t->comm, t->pid, (unsigned long long) quantum_length); | 867 | t->comm, t->pid, (unsigned long long) quantum_length); |
836 | 868 | ||
837 | if (period >= PFAIR_MAX_PERIOD) { | ||
838 | printk(KERN_WARNING | ||
839 | "PFAIR: Rejecting task %s/%d; its period is too long.\n", | ||
840 | t->comm, t->pid); | ||
841 | return -EINVAL; | ||
842 | } | ||
843 | |||
844 | if (quanta == period) { | 869 | if (quanta == period) { |
845 | /* special case: task has weight 1.0 */ | 870 | /* special case: task has weight 1.0 */ |
846 | printk(KERN_INFO | 871 | printk(KERN_INFO |
@@ -880,12 +905,9 @@ static long pfair_admit_task(struct task_struct* t) | |||
880 | 905 | ||
881 | static void pfair_init_cluster(struct pfair_cluster* cluster) | 906 | static void pfair_init_cluster(struct pfair_cluster* cluster) |
882 | { | 907 | { |
883 | int i; | 908 | rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs); |
884 | 909 | bheap_init(&cluster->release_queue); | |
885 | /* initialize release queue */ | 910 | raw_spin_lock_init(&cluster->release_lock); |
886 | for (i = 0; i < PFAIR_MAX_PERIOD; i++) | ||
887 | bheap_init(&cluster->release_queue[i]); | ||
888 | rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, NULL); | ||
889 | INIT_LIST_HEAD(&cluster->topology.cpus); | 911 | INIT_LIST_HEAD(&cluster->topology.cpus); |
890 | } | 912 | } |
891 | 913 | ||
@@ -899,8 +921,11 @@ static void cleanup_clusters(void) | |||
899 | num_pfair_clusters = 0; | 921 | num_pfair_clusters = 0; |
900 | 922 | ||
901 | /* avoid stale pointers */ | 923 | /* avoid stale pointers */ |
902 | for (i = 0; i < NR_CPUS; i++) | 924 | for (i = 0; i < num_online_cpus(); i++) { |
903 | pstate[i]->topology.cluster = NULL; | 925 | pstate[i]->topology.cluster = NULL; |
926 | printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]), | ||
927 | pstate[i]->missed_updates, pstate[i]->missed_quanta); | ||
928 | } | ||
904 | } | 929 | } |
905 | 930 | ||
906 | static long pfair_activate_plugin(void) | 931 | static long pfair_activate_plugin(void) |
@@ -936,6 +961,9 @@ static long pfair_activate_plugin(void) | |||
936 | pfair_init_cluster(cluster); | 961 | pfair_init_cluster(cluster); |
937 | cluster->pfair_time = now; | 962 | cluster->pfair_time = now; |
938 | clust[i] = &cluster->topology; | 963 | clust[i] = &cluster->topology; |
964 | #ifdef CONFIG_RELEASE_MASTER | ||
965 | cluster->pfair.release_master = atomic_read(&release_master_cpu); | ||
966 | #endif | ||
939 | } | 967 | } |
940 | 968 | ||
941 | for (i = 0; i < num_online_cpus(); i++) { | 969 | for (i = 0; i < num_online_cpus(); i++) { |
@@ -943,6 +971,7 @@ static long pfair_activate_plugin(void) | |||
943 | state->cur_tick = now; | 971 | state->cur_tick = now; |
944 | state->local_tick = now; | 972 | state->local_tick = now; |
945 | state->missed_quanta = 0; | 973 | state->missed_quanta = 0; |
974 | state->missed_updates = 0; | ||
946 | state->offset = cpu_stagger_offset(i); | 975 | state->offset = cpu_stagger_offset(i); |
947 | printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus()); | 976 | printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus()); |
948 | cpus[i] = &state->topology; | 977 | cpus[i] = &state->topology; |
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index d54886df1f57..00a1900d6457 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c | |||
@@ -35,29 +35,18 @@ void preempt_if_preemptable(struct task_struct* t, int cpu) | |||
35 | /* local CPU case */ | 35 | /* local CPU case */ |
36 | /* check if we need to poke userspace */ | 36 | /* check if we need to poke userspace */ |
37 | if (is_user_np(t)) | 37 | if (is_user_np(t)) |
38 | /* yes, poke it */ | 38 | /* Yes, poke it. This doesn't have to be atomic since |
39 | * the task is definitely not executing. */ | ||
39 | request_exit_np(t); | 40 | request_exit_np(t); |
40 | else if (!is_kernel_np(t)) | 41 | else if (!is_kernel_np(t)) |
41 | /* only if we are allowed to preempt the | 42 | /* only if we are allowed to preempt the |
42 | * currently-executing task */ | 43 | * currently-executing task */ |
43 | reschedule = 1; | 44 | reschedule = 1; |
44 | } else { | 45 | } else { |
45 | /* remote CPU case */ | 46 | /* Remote CPU case. Only notify if it's not a kernel |
46 | if (is_user_np(t)) { | 47 | * NP section and if we didn't set the userspace |
47 | /* need to notify user space of delayed | 48 | * flag. */ |
48 | * preemption */ | 49 | reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t)); |
49 | |||
50 | /* to avoid a race, set the flag, then test | ||
51 | * again */ | ||
52 | request_exit_np(t); | ||
53 | /* make sure it got written */ | ||
54 | mb(); | ||
55 | } | ||
56 | /* Only send an ipi if remote task might have raced our | ||
57 | * request, i.e., send an IPI to make sure in case it | ||
58 | * exited its critical section. | ||
59 | */ | ||
60 | reschedule = !is_np(t) && !is_kernel_np(t); | ||
61 | } | 50 | } |
62 | } | 51 | } |
63 | if (likely(reschedule)) | 52 | if (likely(reschedule)) |
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c index 71c02409efa2..8e4a22dd8d6a 100644 --- a/litmus/sched_psn_edf.c +++ b/litmus/sched_psn_edf.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <litmus/sched_plugin.h> | 20 | #include <litmus/sched_plugin.h> |
21 | #include <litmus/edf_common.h> | 21 | #include <litmus/edf_common.h> |
22 | #include <litmus/sched_trace.h> | 22 | #include <litmus/sched_trace.h> |
23 | #include <litmus/trace.h> | ||
23 | 24 | ||
24 | typedef struct { | 25 | typedef struct { |
25 | rt_domain_t domain; | 26 | rt_domain_t domain; |
@@ -383,12 +384,6 @@ static unsigned int psnedf_get_srp_prio(struct task_struct* t) | |||
383 | return get_rt_period(t); | 384 | return get_rt_period(t); |
384 | } | 385 | } |
385 | 386 | ||
386 | static long psnedf_activate_plugin(void) | ||
387 | { | ||
388 | get_srp_prio = psnedf_get_srp_prio; | ||
389 | return 0; | ||
390 | } | ||
391 | |||
392 | /* ******************** FMLP support ********************** */ | 387 | /* ******************** FMLP support ********************** */ |
393 | 388 | ||
394 | /* struct for semaphore with priority inheritance */ | 389 | /* struct for semaphore with priority inheritance */ |
@@ -428,6 +423,8 @@ int psnedf_fmlp_lock(struct litmus_lock* l) | |||
428 | 423 | ||
429 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | 424 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); |
430 | 425 | ||
426 | TS_LOCK_SUSPEND; | ||
427 | |||
431 | /* release lock before sleeping */ | 428 | /* release lock before sleeping */ |
432 | spin_unlock_irqrestore(&sem->wait.lock, flags); | 429 | spin_unlock_irqrestore(&sem->wait.lock, flags); |
433 | 430 | ||
@@ -438,6 +435,8 @@ int psnedf_fmlp_lock(struct litmus_lock* l) | |||
438 | 435 | ||
439 | schedule(); | 436 | schedule(); |
440 | 437 | ||
438 | TS_LOCK_RESUME; | ||
439 | |||
441 | /* Since we hold the lock, no other task will change | 440 | /* Since we hold the lock, no other task will change |
442 | * ->owner. We can thus check it without acquiring the spin | 441 | * ->owner. We can thus check it without acquiring the spin |
443 | * lock. */ | 442 | * lock. */ |
@@ -577,9 +576,35 @@ static long psnedf_allocate_lock(struct litmus_lock **lock, int type, | |||
577 | 576 | ||
578 | #endif | 577 | #endif |
579 | 578 | ||
579 | |||
580 | static long psnedf_activate_plugin(void) | ||
581 | { | ||
582 | #ifdef CONFIG_RELEASE_MASTER | ||
583 | int cpu; | ||
584 | |||
585 | for_each_online_cpu(cpu) { | ||
586 | remote_edf(cpu)->release_master = atomic_read(&release_master_cpu); | ||
587 | } | ||
588 | #endif | ||
589 | |||
590 | #ifdef CONFIG_LITMUS_LOCKING | ||
591 | get_srp_prio = psnedf_get_srp_prio; | ||
592 | #endif | ||
593 | |||
594 | return 0; | ||
595 | } | ||
596 | |||
580 | static long psnedf_admit_task(struct task_struct* tsk) | 597 | static long psnedf_admit_task(struct task_struct* tsk) |
581 | { | 598 | { |
582 | return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; | 599 | if (task_cpu(tsk) == tsk->rt_param.task_params.cpu |
600 | #ifdef CONFIG_RELEASE_MASTER | ||
601 | /* don't allow tasks on release master CPU */ | ||
602 | && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master | ||
603 | #endif | ||
604 | ) | ||
605 | return 0; | ||
606 | else | ||
607 | return -EINVAL; | ||
583 | } | 608 | } |
584 | 609 | ||
585 | /* Plugin object */ | 610 | /* Plugin object */ |
@@ -593,9 +618,9 @@ static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = { | |||
593 | .task_wake_up = psnedf_task_wake_up, | 618 | .task_wake_up = psnedf_task_wake_up, |
594 | .task_block = psnedf_task_block, | 619 | .task_block = psnedf_task_block, |
595 | .admit_task = psnedf_admit_task, | 620 | .admit_task = psnedf_admit_task, |
621 | .activate_plugin = psnedf_activate_plugin, | ||
596 | #ifdef CONFIG_LITMUS_LOCKING | 622 | #ifdef CONFIG_LITMUS_LOCKING |
597 | .allocate_lock = psnedf_allocate_lock, | 623 | .allocate_lock = psnedf_allocate_lock, |
598 | .activate_plugin = psnedf_activate_plugin, | ||
599 | #endif | 624 | #endif |
600 | }; | 625 | }; |
601 | 626 | ||
diff --git a/litmus/trace.c b/litmus/trace.c index e7ea1c2ab3e4..3c35c527e805 100644 --- a/litmus/trace.c +++ b/litmus/trace.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/sched.h> | 1 | #include <linux/sched.h> |
2 | #include <linux/module.h> | 2 | #include <linux/module.h> |
3 | #include <linux/uaccess.h> | ||
3 | 4 | ||
4 | #include <litmus/ftdev.h> | 5 | #include <litmus/ftdev.h> |
5 | #include <litmus/litmus.h> | 6 | #include <litmus/litmus.h> |
@@ -15,6 +16,35 @@ static struct ftdev overhead_dev; | |||
15 | 16 | ||
16 | static unsigned int ts_seq_no = 0; | 17 | static unsigned int ts_seq_no = 0; |
17 | 18 | ||
19 | DEFINE_PER_CPU(atomic_t, irq_fired_count); | ||
20 | |||
21 | static inline void clear_irq_fired(void) | ||
22 | { | ||
23 | atomic_set(&__raw_get_cpu_var(irq_fired_count), 0); | ||
24 | } | ||
25 | |||
26 | static inline unsigned int get_and_clear_irq_fired(void) | ||
27 | { | ||
28 | /* This is potentially not atomic since we might migrate if | ||
29 | * preemptions are not disabled. As a tradeoff between | ||
30 | * accuracy and tracing overheads, this seems acceptable. | ||
31 | * If it proves to be a problem, then one could add a callback | ||
32 | * from the migration code to invalidate irq_fired_count. | ||
33 | */ | ||
34 | return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0); | ||
35 | } | ||
36 | |||
37 | static inline void __save_irq_flags(struct timestamp *ts) | ||
38 | { | ||
39 | unsigned int irq_count; | ||
40 | |||
41 | irq_count = get_and_clear_irq_fired(); | ||
42 | /* Store how many interrupts occurred. */ | ||
43 | ts->irq_count = irq_count; | ||
44 | /* Extra flag because ts->irq_count overflows quickly. */ | ||
45 | ts->irq_flag = irq_count > 0; | ||
46 | } | ||
47 | |||
18 | static inline void __save_timestamp_cpu(unsigned long event, | 48 | static inline void __save_timestamp_cpu(unsigned long event, |
19 | uint8_t type, uint8_t cpu) | 49 | uint8_t type, uint8_t cpu) |
20 | { | 50 | { |
@@ -23,10 +53,26 @@ static inline void __save_timestamp_cpu(unsigned long event, | |||
23 | seq_no = fetch_and_inc((int *) &ts_seq_no); | 53 | seq_no = fetch_and_inc((int *) &ts_seq_no); |
24 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | 54 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { |
25 | ts->event = event; | 55 | ts->event = event; |
26 | ts->timestamp = ft_timestamp(); | ||
27 | ts->seq_no = seq_no; | 56 | ts->seq_no = seq_no; |
28 | ts->cpu = cpu; | 57 | ts->cpu = cpu; |
29 | ts->task_type = type; | 58 | ts->task_type = type; |
59 | __save_irq_flags(ts); | ||
60 | barrier(); | ||
61 | /* prevent re-ordering of ft_timestamp() */ | ||
62 | ts->timestamp = ft_timestamp(); | ||
63 | ft_buffer_finish_write(trace_ts_buf, ts); | ||
64 | } | ||
65 | } | ||
66 | |||
67 | static void __add_timestamp_user(struct timestamp *pre_recorded) | ||
68 | { | ||
69 | unsigned int seq_no; | ||
70 | struct timestamp *ts; | ||
71 | seq_no = fetch_and_inc((int *) &ts_seq_no); | ||
72 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | ||
73 | *ts = *pre_recorded; | ||
74 | ts->seq_no = seq_no; | ||
75 | __save_irq_flags(ts); | ||
30 | ft_buffer_finish_write(trace_ts_buf, ts); | 76 | ft_buffer_finish_write(trace_ts_buf, ts); |
31 | } | 77 | } |
32 | } | 78 | } |
@@ -61,6 +107,27 @@ feather_callback void save_timestamp_cpu(unsigned long event, | |||
61 | __save_timestamp_cpu(event, TSK_UNKNOWN, cpu); | 107 | __save_timestamp_cpu(event, TSK_UNKNOWN, cpu); |
62 | } | 108 | } |
63 | 109 | ||
110 | feather_callback void save_task_latency(unsigned long event, | ||
111 | unsigned long when_ptr) | ||
112 | { | ||
113 | lt_t now = litmus_clock(); | ||
114 | lt_t *when = (lt_t*) when_ptr; | ||
115 | unsigned int seq_no; | ||
116 | int cpu = raw_smp_processor_id(); | ||
117 | struct timestamp *ts; | ||
118 | |||
119 | seq_no = fetch_and_inc((int *) &ts_seq_no); | ||
120 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | ||
121 | ts->event = event; | ||
122 | ts->timestamp = now - *when; | ||
123 | ts->seq_no = seq_no; | ||
124 | ts->cpu = cpu; | ||
125 | ts->task_type = TSK_RT; | ||
126 | __save_irq_flags(ts); | ||
127 | ft_buffer_finish_write(trace_ts_buf, ts); | ||
128 | } | ||
129 | } | ||
130 | |||
64 | /******************************************************************************/ | 131 | /******************************************************************************/ |
65 | /* DEVICE FILE DRIVER */ | 132 | /* DEVICE FILE DRIVER */ |
66 | /******************************************************************************/ | 133 | /******************************************************************************/ |
@@ -69,11 +136,15 @@ feather_callback void save_timestamp_cpu(unsigned long event, | |||
69 | * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER) | 136 | * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER) |
70 | * and we might not get as much | 137 | * and we might not get as much |
71 | */ | 138 | */ |
72 | #define NO_TIMESTAMPS (2 << 11) | 139 | #define NO_TIMESTAMPS (2 << 16) |
73 | 140 | ||
74 | static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) | 141 | static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) |
75 | { | 142 | { |
76 | unsigned int count = NO_TIMESTAMPS; | 143 | unsigned int count = NO_TIMESTAMPS; |
144 | |||
145 | /* An overhead-tracing timestamp should be exactly 16 bytes long. */ | ||
146 | BUILD_BUG_ON(sizeof(struct timestamp) != 16); | ||
147 | |||
77 | while (count && !trace_ts_buf) { | 148 | while (count && !trace_ts_buf) { |
78 | printk("time stamp buffer: trying to allocate %u time stamps.\n", count); | 149 | printk("time stamp buffer: trying to allocate %u time stamps.\n", count); |
79 | ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp)); | 150 | ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp)); |
@@ -88,9 +159,35 @@ static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) | |||
88 | ftdev->minor[idx].buf = NULL; | 159 | ftdev->minor[idx].buf = NULL; |
89 | } | 160 | } |
90 | 161 | ||
162 | static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len, | ||
163 | const char __user *from) | ||
164 | { | ||
165 | ssize_t consumed = 0; | ||
166 | struct timestamp ts; | ||
167 | |||
168 | /* don't give us partial timestamps */ | ||
169 | if (len % sizeof(ts)) | ||
170 | return -EINVAL; | ||
171 | |||
172 | while (len >= sizeof(ts)) { | ||
173 | if (copy_from_user(&ts, from, sizeof(ts))) { | ||
174 | consumed = -EFAULT; | ||
175 | goto out; | ||
176 | } | ||
177 | len -= sizeof(ts); | ||
178 | from += sizeof(ts); | ||
179 | consumed += sizeof(ts); | ||
180 | |||
181 | __add_timestamp_user(&ts); | ||
182 | } | ||
183 | |||
184 | out: | ||
185 | return consumed; | ||
186 | } | ||
187 | |||
91 | static int __init init_ft_overhead_trace(void) | 188 | static int __init init_ft_overhead_trace(void) |
92 | { | 189 | { |
93 | int err; | 190 | int err, cpu; |
94 | 191 | ||
95 | printk("Initializing Feather-Trace overhead tracing device.\n"); | 192 | printk("Initializing Feather-Trace overhead tracing device.\n"); |
96 | err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace"); | 193 | err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace"); |
@@ -99,11 +196,17 @@ static int __init init_ft_overhead_trace(void) | |||
99 | 196 | ||
100 | overhead_dev.alloc = alloc_timestamp_buffer; | 197 | overhead_dev.alloc = alloc_timestamp_buffer; |
101 | overhead_dev.free = free_timestamp_buffer; | 198 | overhead_dev.free = free_timestamp_buffer; |
199 | overhead_dev.write = write_timestamp_from_user; | ||
102 | 200 | ||
103 | err = register_ftdev(&overhead_dev); | 201 | err = register_ftdev(&overhead_dev); |
104 | if (err) | 202 | if (err) |
105 | goto err_dealloc; | 203 | goto err_dealloc; |
106 | 204 | ||
205 | /* initialize IRQ flags */ | ||
206 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
207 | clear_irq_fired(); | ||
208 | } | ||
209 | |||
107 | return 0; | 210 | return 0; |
108 | 211 | ||
109 | err_dealloc: | 212 | err_dealloc: |