diff options
-rw-r--r-- | arch/x86/include/asm/processor.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel_cacheinfo.c | 17 | ||||
-rw-r--r-- | include/litmus/sched_plugin.h | 3 | ||||
-rw-r--r-- | kernel/sched.c | 4 | ||||
-rw-r--r-- | litmus/litmus.c | 82 | ||||
-rw-r--r-- | litmus/rt_domain.c | 4 | ||||
-rw-r--r-- | litmus/sched_cedf.c | 711 | ||||
-rw-r--r-- | litmus/sched_plugin.c | 8 |
8 files changed, 483 insertions, 348 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c9786480f0fe..e75daac64962 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -172,6 +172,8 @@ extern void print_cpu_info(struct cpuinfo_x86 *); | |||
172 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); | 172 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); |
173 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | 173 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); |
174 | extern unsigned short num_cache_leaves; | 174 | extern unsigned short num_cache_leaves; |
175 | extern int get_shared_cpu_map(cpumask_var_t mask, | ||
176 | unsigned int cpu, int index); | ||
175 | 177 | ||
176 | extern void detect_extended_topology(struct cpuinfo_x86 *c); | 178 | extern void detect_extended_topology(struct cpuinfo_x86 *c); |
177 | extern void detect_ht(struct cpuinfo_x86 *c); | 179 | extern void detect_ht(struct cpuinfo_x86 *c); |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 804c40e2bc3e..3167c3d72596 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -515,6 +515,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
515 | static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); | 515 | static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); |
516 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) | 516 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) |
517 | 517 | ||
518 | /* returns CPUs that share the index cache with cpu */ | ||
519 | int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) | ||
520 | { | ||
521 | int ret = 0; | ||
522 | struct _cpuid4_info *this_leaf; | ||
523 | |||
524 | if (index >= num_cache_leaves) { | ||
525 | index = num_cache_leaves - 1; | ||
526 | ret = index; | ||
527 | } | ||
528 | |||
529 | this_leaf = CPUID4_INFO_IDX(cpu,index); | ||
530 | cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map)); | ||
531 | |||
532 | return ret; | ||
533 | } | ||
534 | |||
518 | #ifdef CONFIG_SMP | 535 | #ifdef CONFIG_SMP |
519 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | 536 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) |
520 | { | 537 | { |
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index 2d856d587041..9c1c9f28ba79 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h | |||
@@ -133,6 +133,9 @@ struct sched_plugin { | |||
133 | 133 | ||
134 | extern struct sched_plugin *litmus; | 134 | extern struct sched_plugin *litmus; |
135 | 135 | ||
136 | /* cluster size: cache_index = 2 L2, cache_index = 3 L3 */ | ||
137 | extern int cluster_cache_index; | ||
138 | |||
136 | int register_sched_plugin(struct sched_plugin* plugin); | 139 | int register_sched_plugin(struct sched_plugin* plugin); |
137 | struct sched_plugin* find_sched_plugin(const char* name); | 140 | struct sched_plugin* find_sched_plugin(const char* name); |
138 | int print_sched_plugins(char* buf, int max); | 141 | int print_sched_plugins(char* buf, int max); |
diff --git a/kernel/sched.c b/kernel/sched.c index 1701eaebb79c..adb5e923cc61 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -5520,20 +5520,16 @@ need_resched_nonpreemptible: | |||
5520 | spin_unlock_irq(&rq->lock); | 5520 | spin_unlock_irq(&rq->lock); |
5521 | } | 5521 | } |
5522 | 5522 | ||
5523 | TS_SCHED2_START(current); | ||
5524 | sched_trace_task_switch_to(current); | 5523 | sched_trace_task_switch_to(current); |
5525 | 5524 | ||
5526 | post_schedule(rq); | 5525 | post_schedule(rq); |
5527 | 5526 | ||
5528 | if (unlikely(reacquire_kernel_lock(current) < 0)) { | 5527 | if (unlikely(reacquire_kernel_lock(current) < 0)) { |
5529 | TS_SCHED2_END(current); | ||
5530 | goto need_resched_nonpreemptible; | 5528 | goto need_resched_nonpreemptible; |
5531 | } | 5529 | } |
5532 | 5530 | ||
5533 | preempt_enable_no_resched(); | 5531 | preempt_enable_no_resched(); |
5534 | 5532 | ||
5535 | TS_SCHED2_END(current); | ||
5536 | |||
5537 | if (need_resched()) | 5533 | if (need_resched()) |
5538 | goto need_resched; | 5534 | goto need_resched; |
5539 | 5535 | ||
diff --git a/litmus/litmus.c b/litmus/litmus.c index 3cf7cb9e8a9f..e43596a5104c 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c | |||
@@ -24,6 +24,8 @@ | |||
24 | /* Number of RT tasks that exist in the system */ | 24 | /* Number of RT tasks that exist in the system */ |
25 | atomic_t rt_task_count = ATOMIC_INIT(0); | 25 | atomic_t rt_task_count = ATOMIC_INIT(0); |
26 | static DEFINE_SPINLOCK(task_transition_lock); | 26 | static DEFINE_SPINLOCK(task_transition_lock); |
27 | /* synchronize plugin switching */ | ||
28 | atomic_t cannot_use_plugin = ATOMIC_INIT(0); | ||
27 | 29 | ||
28 | /* Give log messages sequential IDs. */ | 30 | /* Give log messages sequential IDs. */ |
29 | atomic_t __log_seq_no = ATOMIC_INIT(0); | 31 | atomic_t __log_seq_no = ATOMIC_INIT(0); |
@@ -369,13 +371,17 @@ void litmus_exit_task(struct task_struct* tsk) | |||
369 | } | 371 | } |
370 | } | 372 | } |
371 | 373 | ||
374 | /* IPI callback to synchronize plugin switching */ | ||
375 | static void synch_on_plugin_switch(void* info) | ||
376 | { | ||
377 | while (atomic_read(&cannot_use_plugin)) | ||
378 | cpu_relax(); | ||
379 | } | ||
380 | |||
372 | /* Switching a plugin in use is tricky. | 381 | /* Switching a plugin in use is tricky. |
373 | * We must watch out that no real-time tasks exists | 382 | * We must watch out that no real-time tasks exists |
374 | * (and that none is created in parallel) and that the plugin is not | 383 | * (and that none is created in parallel) and that the plugin is not |
375 | * currently in use on any processor (in theory). | 384 | * currently in use on any processor (in theory). |
376 | * | ||
377 | * For now, we don't enforce the second part since it is unlikely to cause | ||
378 | * any trouble by itself as long as we don't unload modules. | ||
379 | */ | 385 | */ |
380 | int switch_sched_plugin(struct sched_plugin* plugin) | 386 | int switch_sched_plugin(struct sched_plugin* plugin) |
381 | { | 387 | { |
@@ -384,6 +390,11 @@ int switch_sched_plugin(struct sched_plugin* plugin) | |||
384 | 390 | ||
385 | BUG_ON(!plugin); | 391 | BUG_ON(!plugin); |
386 | 392 | ||
393 | /* forbid other cpus to use the plugin */ | ||
394 | atomic_set(&cannot_use_plugin, 1); | ||
395 | /* send IPI to force other CPUs to synch with us */ | ||
396 | smp_call_function(synch_on_plugin_switch, NULL, 0); | ||
397 | |||
387 | /* stop task transitions */ | 398 | /* stop task transitions */ |
388 | spin_lock_irqsave(&task_transition_lock, flags); | 399 | spin_lock_irqsave(&task_transition_lock, flags); |
389 | 400 | ||
@@ -404,6 +415,7 @@ int switch_sched_plugin(struct sched_plugin* plugin) | |||
404 | ret = -EBUSY; | 415 | ret = -EBUSY; |
405 | out: | 416 | out: |
406 | spin_unlock_irqrestore(&task_transition_lock, flags); | 417 | spin_unlock_irqrestore(&task_transition_lock, flags); |
418 | atomic_set(&cannot_use_plugin, 0); | ||
407 | return ret; | 419 | return ret; |
408 | } | 420 | } |
409 | 421 | ||
@@ -554,6 +566,55 @@ static int proc_write_curr(struct file *file, | |||
554 | return len; | 566 | return len; |
555 | } | 567 | } |
556 | 568 | ||
569 | static int proc_read_cluster_size(char *page, char **start, | ||
570 | off_t off, int count, | ||
571 | int *eof, void *data) | ||
572 | { | ||
573 | int len; | ||
574 | if (cluster_cache_index == 2) | ||
575 | len = snprintf(page, PAGE_SIZE, "L2\n"); | ||
576 | else if (cluster_cache_index == 3) | ||
577 | len = snprintf(page, PAGE_SIZE, "L3\n"); | ||
578 | else /* (cluster_cache_index == 1) */ | ||
579 | len = snprintf(page, PAGE_SIZE, "L1\n"); | ||
580 | |||
581 | return len; | ||
582 | } | ||
583 | |||
584 | static int proc_write_cluster_size(struct file *file, | ||
585 | const char *buffer, | ||
586 | unsigned long count, | ||
587 | void *data) | ||
588 | { | ||
589 | int len; | ||
590 | /* L2, L3 */ | ||
591 | char cache_name[33]; | ||
592 | |||
593 | if(count > 32) | ||
594 | len = 32; | ||
595 | else | ||
596 | len = count; | ||
597 | |||
598 | if(copy_from_user(cache_name, buffer, len)) | ||
599 | return -EFAULT; | ||
600 | |||
601 | cache_name[len] = '\0'; | ||
602 | /* chomp name */ | ||
603 | if (len > 1 && cache_name[len - 1] == '\n') | ||
604 | cache_name[len - 1] = '\0'; | ||
605 | |||
606 | /* do a quick and dirty comparison to find the cluster size */ | ||
607 | if (!strcmp(cache_name, "L2")) | ||
608 | cluster_cache_index = 2; | ||
609 | else if (!strcmp(cache_name, "L3")) | ||
610 | cluster_cache_index = 3; | ||
611 | else if (!strcmp(cache_name, "L1")) | ||
612 | cluster_cache_index = 1; | ||
613 | else | ||
614 | printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name); | ||
615 | |||
616 | return len; | ||
617 | } | ||
557 | 618 | ||
558 | static int proc_read_release_master(char *page, char **start, | 619 | static int proc_read_release_master(char *page, char **start, |
559 | off_t off, int count, | 620 | off_t off, int count, |
@@ -609,6 +670,7 @@ static struct proc_dir_entry *litmus_dir = NULL, | |||
609 | *curr_file = NULL, | 670 | *curr_file = NULL, |
610 | *stat_file = NULL, | 671 | *stat_file = NULL, |
611 | *plugs_file = NULL, | 672 | *plugs_file = NULL, |
673 | *clus_cache_idx_file = NULL, | ||
612 | *release_master_file = NULL; | 674 | *release_master_file = NULL; |
613 | 675 | ||
614 | static int __init init_litmus_proc(void) | 676 | static int __init init_litmus_proc(void) |
@@ -639,6 +701,16 @@ static int __init init_litmus_proc(void) | |||
639 | release_master_file->read_proc = proc_read_release_master; | 701 | release_master_file->read_proc = proc_read_release_master; |
640 | release_master_file->write_proc = proc_write_release_master; | 702 | release_master_file->write_proc = proc_write_release_master; |
641 | 703 | ||
704 | clus_cache_idx_file = create_proc_entry("cluster_cache", | ||
705 | 0644, litmus_dir); | ||
706 | if (!clus_cache_idx_file) { | ||
707 | printk(KERN_ERR "Could not allocate cluster_cache " | ||
708 | "procfs entry.\n"); | ||
709 | return -ENOMEM; | ||
710 | } | ||
711 | clus_cache_idx_file->read_proc = proc_read_cluster_size; | ||
712 | clus_cache_idx_file->write_proc = proc_write_cluster_size; | ||
713 | |||
642 | stat_file = create_proc_read_entry("stats", 0444, litmus_dir, | 714 | stat_file = create_proc_read_entry("stats", 0444, litmus_dir, |
643 | proc_read_stats, NULL); | 715 | proc_read_stats, NULL); |
644 | 716 | ||
@@ -656,6 +728,10 @@ static void exit_litmus_proc(void) | |||
656 | remove_proc_entry("stats", litmus_dir); | 728 | remove_proc_entry("stats", litmus_dir); |
657 | if (curr_file) | 729 | if (curr_file) |
658 | remove_proc_entry("active_plugin", litmus_dir); | 730 | remove_proc_entry("active_plugin", litmus_dir); |
731 | if (clus_cache_idx_file) | ||
732 | remove_proc_entry("cluster_cache", litmus_dir); | ||
733 | if (release_master_file) | ||
734 | remove_proc_entry("release_master", litmus_dir); | ||
659 | if (litmus_dir) | 735 | if (litmus_dir) |
660 | remove_proc_entry("litmus", NULL); | 736 | remove_proc_entry("litmus", NULL); |
661 | } | 737 | } |
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c index 0ed6d5cbbfc5..609ff0f82abb 100644 --- a/litmus/rt_domain.c +++ b/litmus/rt_domain.c | |||
@@ -301,6 +301,10 @@ void __add_release(rt_domain_t* rt, struct task_struct *task) | |||
301 | task->rt_param.domain = rt; | 301 | task->rt_param.domain = rt; |
302 | 302 | ||
303 | /* start release timer */ | 303 | /* start release timer */ |
304 | TS_SCHED2_START(task); | ||
305 | |||
304 | arm_release_timer(rt); | 306 | arm_release_timer(rt); |
307 | |||
308 | TS_SCHED2_END(task); | ||
305 | } | 309 | } |
306 | 310 | ||
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index d0767ce9e178..da44b451c9ad 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c | |||
@@ -1,9 +1,20 @@ | |||
1 | /* | 1 | /* |
2 | * kernel/sched_cedf.c | 2 | * litmus/sched_cedf.c |
3 | * | 3 | * |
4 | * Implementation of the Clustered EDF (C-EDF) scheduling algorithm. | 4 | * Implementation of the C-EDF scheduling algorithm. |
5 | * Linking is included so that support for synchronization (e.g., through | 5 | * |
6 | * the implementation of a "CSN-EDF" algorithm) can be added later if desired. | 6 | * This implementation is based on G-EDF: |
7 | * - CPUs are clustered around L2 or L3 caches. | ||
8 | * - Clusters topology is automatically detected (this is arch dependent | ||
9 | * and is working only on x86 at the moment --- and only with modern | ||
10 | * cpus that exports cpuid4 information) | ||
11 | * - The plugins _does not_ attempt to put tasks in the right cluster i.e. | ||
12 | * the programmer needs to be aware of the topology to place tasks | ||
13 | * in the desired cluster | ||
14 | * - default clustering is around L2 cache (cache index = 2) | ||
15 | * supported clusters are: L1 (private cache: pedf), L2, L3 | ||
16 | * | ||
17 | * For details on functions, take a look at sched_gsn_edf.c | ||
7 | * | 18 | * |
8 | * This version uses the simple approach and serializes all scheduling | 19 | * This version uses the simple approach and serializes all scheduling |
9 | * decisions by the use of a queue lock. This is probably not the | 20 | * decisions by the use of a queue lock. This is probably not the |
@@ -13,92 +24,36 @@ | |||
13 | #include <linux/spinlock.h> | 24 | #include <linux/spinlock.h> |
14 | #include <linux/percpu.h> | 25 | #include <linux/percpu.h> |
15 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
16 | #include <linux/list.h> | ||
17 | 27 | ||
18 | #include <litmus/litmus.h> | 28 | #include <litmus/litmus.h> |
19 | #include <litmus/jobs.h> | 29 | #include <litmus/jobs.h> |
20 | #include <litmus/sched_plugin.h> | 30 | #include <litmus/sched_plugin.h> |
21 | #include <litmus/edf_common.h> | 31 | #include <litmus/edf_common.h> |
22 | #include <litmus/sched_trace.h> | 32 | #include <litmus/sched_trace.h> |
33 | |||
23 | #include <litmus/bheap.h> | 34 | #include <litmus/bheap.h> |
24 | 35 | ||
25 | #include <linux/module.h> | 36 | #include <linux/module.h> |
26 | 37 | ||
27 | /* Overview of C-EDF operations. | 38 | /* forward declaration... a funny thing with C ;) */ |
28 | * | 39 | struct clusterdomain; |
29 | * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage | ||
30 | * structure (NOT the actually scheduled | ||
31 | * task). If there is another linked task To | ||
32 | * already it will set To->linked_on = NO_CPU | ||
33 | * (thereby removing its association with this | ||
34 | * CPU). However, it will not requeue the | ||
35 | * previously linked task (if any). It will set | ||
36 | * T's state to RT_F_RUNNING and check whether | ||
37 | * it is already running somewhere else. If T | ||
38 | * is scheduled somewhere else it will link | ||
39 | * it to that CPU instead (and pull the linked | ||
40 | * task to cpu). T may be NULL. | ||
41 | * | ||
42 | * unlink(T) - Unlink removes T from all scheduler data | ||
43 | * structures. If it is linked to some CPU it | ||
44 | * will link NULL to that CPU. If it is | ||
45 | * currently queued in the cedf queue for | ||
46 | * a partition, it will be removed from | ||
47 | * the rt_domain. It is safe to call | ||
48 | * unlink(T) if T is not linked. T may not | ||
49 | * be NULL. | ||
50 | * | ||
51 | * requeue(T) - Requeue will insert T into the appropriate | ||
52 | * queue. If the system is in real-time mode and | ||
53 | * the T is released already, it will go into the | ||
54 | * ready queue. If the system is not in | ||
55 | * real-time mode is T, then T will go into the | ||
56 | * release queue. If T's release time is in the | ||
57 | * future, it will go into the release | ||
58 | * queue. That means that T's release time/job | ||
59 | * no/etc. has to be updated before requeue(T) is | ||
60 | * called. It is not safe to call requeue(T) | ||
61 | * when T is already queued. T may not be NULL. | ||
62 | * | ||
63 | * cedf_job_arrival(T) - This is the catch-all function when T enters | ||
64 | * the system after either a suspension or at a | ||
65 | * job release. It will queue T (which means it | ||
66 | * is not safe to call cedf_job_arrival(T) if | ||
67 | * T is already queued) and then check whether a | ||
68 | * preemption is necessary. If a preemption is | ||
69 | * necessary it will update the linkage | ||
70 | * accordingly and cause scheduled to be called | ||
71 | * (either with an IPI or need_resched). It is | ||
72 | * safe to call cedf_job_arrival(T) if T's | ||
73 | * next job has not been actually released yet | ||
74 | * (release time in the future). T will be put | ||
75 | * on the release queue in that case. | ||
76 | * | ||
77 | * job_completion(T) - Take care of everything that needs to be done | ||
78 | * to prepare T for its next release and place | ||
79 | * it in the right queue with | ||
80 | * cedf_job_arrival(). | ||
81 | * | ||
82 | * | ||
83 | * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is | ||
84 | * equivalent to unlink(T). Note that if you unlink a task from a CPU none of | ||
85 | * the functions will automatically propagate pending task from the ready queue | ||
86 | * to a linked task. This is the job of the calling function ( by means of | ||
87 | * __take_ready). | ||
88 | */ | ||
89 | 40 | ||
90 | /* cpu_entry_t - maintain the linked and scheduled state | 41 | /* cpu_entry_t - maintain the linked and scheduled state |
42 | * | ||
43 | * A cpu also contains a pointer to the cedf_domain_t cluster | ||
44 | * that owns it (struct clusterdomain*) | ||
91 | */ | 45 | */ |
92 | typedef struct { | 46 | typedef struct { |
93 | int cpu; | 47 | int cpu; |
48 | struct clusterdomain* cluster; /* owning cluster */ | ||
94 | struct task_struct* linked; /* only RT tasks */ | 49 | struct task_struct* linked; /* only RT tasks */ |
95 | struct task_struct* scheduled; /* only RT tasks */ | 50 | struct task_struct* scheduled; /* only RT tasks */ |
96 | struct list_head list; | ||
97 | atomic_t will_schedule; /* prevent unneeded IPIs */ | 51 | atomic_t will_schedule; /* prevent unneeded IPIs */ |
52 | struct bheap_node* hn; | ||
98 | } cpu_entry_t; | 53 | } cpu_entry_t; |
99 | DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); | ||
100 | 54 | ||
101 | cpu_entry_t* *cedf_cpu_entries_array; | 55 | /* one cpu_entry_t per CPU */ |
56 | DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); | ||
102 | 57 | ||
103 | #define set_will_schedule() \ | 58 | #define set_will_schedule() \ |
104 | (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1)) | 59 | (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1)) |
@@ -107,75 +62,73 @@ cpu_entry_t* *cedf_cpu_entries_array; | |||
107 | #define test_will_schedule(cpu) \ | 62 | #define test_will_schedule(cpu) \ |
108 | (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) | 63 | (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) |
109 | 64 | ||
110 | /* Cluster size -- currently four. This is a variable to allow for | 65 | /* |
111 | * the possibility of changing the cluster size online in the future. | 66 | * In C-EDF there is a cedf domain _per_ cluster |
67 | * The number of clusters is dynamically determined accordingly to the | ||
68 | * total cpu number and the cluster size | ||
112 | */ | 69 | */ |
113 | int cluster_size = 4; | 70 | typedef struct clusterdomain { |
114 | 71 | /* rt_domain for this cluster */ | |
115 | int do_cleanup = 1; | 72 | rt_domain_t domain; |
116 | 73 | /* cpus in this cluster */ | |
117 | typedef struct { | 74 | cpu_entry_t* *cpus; |
118 | rt_domain_t domain; | 75 | /* map of this cluster cpus */ |
119 | int first_cpu; | 76 | cpumask_var_t cpu_map; |
120 | int last_cpu; | ||
121 | |||
122 | /* the cpus queue themselves according to priority in here */ | 77 | /* the cpus queue themselves according to priority in here */ |
123 | struct list_head cedf_cpu_queue; | 78 | struct bheap_node *heap_node; |
124 | 79 | struct bheap cpu_heap; | |
125 | /* per-partition spinlock: protects the domain and | 80 | /* lock for this cluster */ |
126 | * serializes scheduling decisions | 81 | #define lock domain.ready_lock |
127 | */ | ||
128 | #define slock domain.ready_lock | ||
129 | } cedf_domain_t; | 82 | } cedf_domain_t; |
130 | 83 | ||
131 | DEFINE_PER_CPU(cedf_domain_t*, cedf_domains) = NULL; | 84 | /* a cedf_domain per cluster; allocation is done at init/activation time */ |
132 | 85 | cedf_domain_t *cedf; | |
133 | cedf_domain_t* *cedf_domains_array; | ||
134 | 86 | ||
87 | #define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) | ||
88 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | ||
135 | 89 | ||
136 | /* These are defined similarly to partitioning, except that a | 90 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling |
137 | * tasks partition is any cpu of the cluster to which it | 91 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose |
138 | * is assigned, typically the lowest-numbered cpu. | 92 | * information during the initialization of the plugin (e.g., topology) |
93 | #define WANT_ALL_SCHED_EVENTS | ||
139 | */ | 94 | */ |
140 | #define local_edf (&__get_cpu_var(cedf_domains)->domain) | 95 | #define VERBOSE_INIT |
141 | #define local_cedf __get_cpu_var(cedf_domains) | 96 | |
142 | #define remote_edf(cpu) (&per_cpu(cedf_domains, cpu)->domain) | 97 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) |
143 | #define remote_cedf(cpu) per_cpu(cedf_domains, cpu) | 98 | { |
144 | #define task_edf(task) remote_edf(get_partition(task)) | 99 | cpu_entry_t *a, *b; |
145 | #define task_cedf(task) remote_cedf(get_partition(task)) | 100 | a = _a->value; |
101 | b = _b->value; | ||
102 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
103 | * the top of the heap. | ||
104 | */ | ||
105 | return edf_higher_prio(b->linked, a->linked); | ||
106 | } | ||
146 | 107 | ||
147 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | 108 | /* update_cpu_position - Move the cpu entry to the correct place to maintain |
148 | * order in the cpu queue. Caller must hold cedf lock. | 109 | * order in the cpu queue. Caller must hold cedf lock. |
149 | * | ||
150 | * This really should be a heap. | ||
151 | */ | 110 | */ |
152 | static void update_cpu_position(cpu_entry_t *entry) | 111 | static void update_cpu_position(cpu_entry_t *entry) |
153 | { | 112 | { |
154 | cpu_entry_t *other; | 113 | cedf_domain_t *cluster = entry->cluster; |
155 | struct list_head *cedf_cpu_queue = | ||
156 | &(remote_cedf(entry->cpu))->cedf_cpu_queue; | ||
157 | struct list_head *pos; | ||
158 | 114 | ||
159 | BUG_ON(!cedf_cpu_queue); | 115 | if (likely(bheap_node_in_heap(entry->hn))) |
116 | bheap_delete(cpu_lower_prio, | ||
117 | &cluster->cpu_heap, | ||
118 | entry->hn); | ||
160 | 119 | ||
161 | if (likely(in_list(&entry->list))) | 120 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); |
162 | list_del(&entry->list); | 121 | } |
163 | /* if we do not execute real-time jobs we just move | 122 | |
164 | * to the end of the queue | 123 | /* caller must hold cedf lock */ |
165 | */ | 124 | static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) |
166 | if (entry->linked) { | 125 | { |
167 | list_for_each(pos, cedf_cpu_queue) { | 126 | struct bheap_node* hn; |
168 | other = list_entry(pos, cpu_entry_t, list); | 127 | hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); |
169 | if (edf_higher_prio(entry->linked, other->linked)) { | 128 | return hn->value; |
170 | __list_add(&entry->list, pos->prev, pos); | ||
171 | return; | ||
172 | } | ||
173 | } | ||
174 | } | ||
175 | /* if we get this far we have the lowest priority job */ | ||
176 | list_add_tail(&entry->list, cedf_cpu_queue); | ||
177 | } | 129 | } |
178 | 130 | ||
131 | |||
179 | /* link_task_to_cpu - Update the link of a CPU. | 132 | /* link_task_to_cpu - Update the link of a CPU. |
180 | * Handles the case where the to-be-linked task is already | 133 | * Handles the case where the to-be-linked task is already |
181 | * scheduled on a different CPU. | 134 | * scheduled on a different CPU. |
@@ -189,9 +142,6 @@ static noinline void link_task_to_cpu(struct task_struct* linked, | |||
189 | 142 | ||
190 | BUG_ON(linked && !is_realtime(linked)); | 143 | BUG_ON(linked && !is_realtime(linked)); |
191 | 144 | ||
192 | /* Cannot link task to a CPU that doesn't belong to its partition... */ | ||
193 | BUG_ON(linked && remote_cedf(entry->cpu) != task_cedf(linked)); | ||
194 | |||
195 | /* Currently linked task is set to be unlinked. */ | 145 | /* Currently linked task is set to be unlinked. */ |
196 | if (entry->linked) { | 146 | if (entry->linked) { |
197 | entry->linked->rt_param.linked_on = NO_CPU; | 147 | entry->linked->rt_param.linked_on = NO_CPU; |
@@ -213,6 +163,9 @@ static noinline void link_task_to_cpu(struct task_struct* linked, | |||
213 | * the caller to get things right. | 163 | * the caller to get things right. |
214 | */ | 164 | */ |
215 | if (entry != sched) { | 165 | if (entry != sched) { |
166 | TRACE_TASK(linked, | ||
167 | "already scheduled on %d, updating link.\n", | ||
168 | sched->cpu); | ||
216 | tmp = sched->linked; | 169 | tmp = sched->linked; |
217 | linked->rt_param.linked_on = sched->cpu; | 170 | linked->rt_param.linked_on = sched->cpu; |
218 | sched->linked = linked; | 171 | sched->linked = linked; |
@@ -224,13 +177,12 @@ static noinline void link_task_to_cpu(struct task_struct* linked, | |||
224 | linked->rt_param.linked_on = entry->cpu; | 177 | linked->rt_param.linked_on = entry->cpu; |
225 | } | 178 | } |
226 | entry->linked = linked; | 179 | entry->linked = linked; |
227 | 180 | #ifdef WANT_ALL_SCHED_EVENTS | |
228 | if (entry->linked) | 181 | if (linked) |
229 | TRACE_TASK(entry->linked, "linked to CPU %d, state:%d\n", | 182 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); |
230 | entry->cpu, entry->linked->state); | ||
231 | else | 183 | else |
232 | TRACE("NULL linked to CPU %d\n", entry->cpu); | 184 | TRACE("NULL linked to %d.\n", entry->cpu); |
233 | 185 | #endif | |
234 | update_cpu_position(entry); | 186 | update_cpu_position(entry); |
235 | } | 187 | } |
236 | 188 | ||
@@ -246,6 +198,7 @@ static noinline void unlink(struct task_struct* t) | |||
246 | return; | 198 | return; |
247 | } | 199 | } |
248 | 200 | ||
201 | |||
249 | if (t->rt_param.linked_on != NO_CPU) { | 202 | if (t->rt_param.linked_on != NO_CPU) { |
250 | /* unlink */ | 203 | /* unlink */ |
251 | entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on); | 204 | entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on); |
@@ -258,95 +211,105 @@ static noinline void unlink(struct task_struct* t) | |||
258 | * been relinked to this CPU), thus it must be in some | 211 | * been relinked to this CPU), thus it must be in some |
259 | * queue. We must remove it from the list in this | 212 | * queue. We must remove it from the list in this |
260 | * case. | 213 | * case. |
214 | * | ||
215 | * in C-EDF case is should be somewhere in the queue for | ||
216 | * its domain, therefore and we can get the domain using | ||
217 | * task_cpu_cluster | ||
261 | */ | 218 | */ |
262 | remove(task_edf(t), t); | 219 | remove(&(task_cpu_cluster(t))->domain, t); |
263 | } | 220 | } |
264 | } | 221 | } |
265 | 222 | ||
266 | 223 | ||
267 | /* preempt - force a CPU to reschedule | 224 | /* preempt - force a CPU to reschedule |
268 | */ | 225 | */ |
269 | static noinline void preempt(cpu_entry_t *entry) | 226 | static void preempt(cpu_entry_t *entry) |
270 | { | 227 | { |
271 | preempt_if_preemptable(entry->scheduled, entry->cpu); | 228 | preempt_if_preemptable(entry->scheduled, entry->cpu); |
272 | } | 229 | } |
273 | 230 | ||
274 | /* requeue - Put an unlinked task into c-edf domain. | 231 | /* requeue - Put an unlinked task into gsn-edf domain. |
275 | * Caller must hold cedf_lock. | 232 | * Caller must hold cedf_lock. |
276 | */ | 233 | */ |
277 | static noinline void requeue(struct task_struct* task) | 234 | static noinline void requeue(struct task_struct* task) |
278 | { | 235 | { |
279 | cedf_domain_t* cedf; | 236 | cedf_domain_t *cluster = task_cpu_cluster(task); |
280 | rt_domain_t* edf; | ||
281 | |||
282 | BUG_ON(!task); | 237 | BUG_ON(!task); |
283 | /* sanity check rt_list before insertion */ | 238 | /* sanity check before insertion */ |
284 | BUG_ON(is_queued(task)); | 239 | BUG_ON(is_queued(task)); |
285 | 240 | ||
286 | /* Get correct real-time domain. */ | ||
287 | cedf = task_cedf(task); | ||
288 | edf = &cedf->domain; | ||
289 | |||
290 | if (is_released(task, litmus_clock())) | 241 | if (is_released(task, litmus_clock())) |
291 | __add_ready(edf, task); | 242 | __add_ready(&cluster->domain, task); |
292 | else { | 243 | else { |
293 | /* it has got to wait */ | 244 | /* it has got to wait */ |
294 | add_release(edf, task); | 245 | add_release(&cluster->domain, task); |
295 | } | 246 | } |
296 | } | 247 | } |
297 | 248 | ||
298 | static void check_for_preemptions(cedf_domain_t* cedf) | 249 | /* check for any necessary preemptions */ |
250 | static void check_for_preemptions(cedf_domain_t *cluster) | ||
299 | { | 251 | { |
300 | cpu_entry_t *last; | ||
301 | struct task_struct *task; | 252 | struct task_struct *task; |
302 | struct list_head *cedf_cpu_queue; | 253 | cpu_entry_t* last; |
303 | cedf_cpu_queue = &cedf->cedf_cpu_queue; | ||
304 | 254 | ||
305 | for(last = list_entry(cedf_cpu_queue->prev, cpu_entry_t, list); | 255 | for(last = lowest_prio_cpu(cluster); |
306 | edf_preemption_needed(&cedf->domain, last->linked); | 256 | edf_preemption_needed(&cluster->domain, last->linked); |
307 | last = list_entry(cedf_cpu_queue->prev, cpu_entry_t, list)) { | 257 | last = lowest_prio_cpu(cluster)) { |
308 | /* preemption necessary */ | 258 | /* preemption necessary */ |
309 | task = __take_ready(&cedf->domain); | 259 | task = __take_ready(&cluster->domain); |
310 | TRACE("check_for_preemptions: task %d linked to %d, state:%d\n", | 260 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", |
311 | task->pid, last->cpu, task->state); | 261 | task->pid, last->cpu); |
312 | if (last->linked) | 262 | if (last->linked) |
313 | requeue(last->linked); | 263 | requeue(last->linked); |
314 | link_task_to_cpu(task, last); | 264 | link_task_to_cpu(task, last); |
315 | preempt(last); | 265 | preempt(last); |
316 | } | 266 | } |
317 | |||
318 | } | 267 | } |
319 | 268 | ||
320 | /* cedf_job_arrival: task is either resumed or released */ | 269 | /* cedf_job_arrival: task is either resumed or released */ |
321 | static noinline void cedf_job_arrival(struct task_struct* task) | 270 | static noinline void cedf_job_arrival(struct task_struct* task) |
322 | { | 271 | { |
323 | cedf_domain_t* cedf; | 272 | cedf_domain_t *cluster = task_cpu_cluster(task); |
324 | rt_domain_t* edf; | ||
325 | |||
326 | BUG_ON(!task); | 273 | BUG_ON(!task); |
327 | 274 | ||
328 | /* Get correct real-time domain. */ | ||
329 | cedf = task_cedf(task); | ||
330 | edf = &cedf->domain; | ||
331 | |||
332 | /* first queue arriving job */ | ||
333 | requeue(task); | 275 | requeue(task); |
334 | 276 | check_for_preemptions(cluster); | |
335 | /* then check for any necessary preemptions */ | ||
336 | check_for_preemptions(cedf); | ||
337 | } | 277 | } |
338 | 278 | ||
339 | /* check for current job releases */ | ||
340 | static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | 279 | static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) |
341 | { | 280 | { |
342 | cedf_domain_t* cedf = container_of(rt, cedf_domain_t, domain); | 281 | cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); |
343 | unsigned long flags; | 282 | unsigned long flags; |
283 | |||
284 | spin_lock_irqsave(&cluster->lock, flags); | ||
285 | |||
286 | __merge_ready(&cluster->domain, tasks); | ||
287 | check_for_preemptions(cluster); | ||
288 | |||
289 | spin_unlock_irqrestore(&cluster->lock, flags); | ||
290 | } | ||
291 | |||
292 | /* caller holds cedf_lock */ | ||
293 | static noinline void job_completion(struct task_struct *t, int forced) | ||
294 | { | ||
295 | BUG_ON(!t); | ||
344 | 296 | ||
345 | spin_lock_irqsave(&cedf->slock, flags); | 297 | sched_trace_task_completion(t, forced); |
298 | |||
299 | TRACE_TASK(t, "job_completion().\n"); | ||
346 | 300 | ||
347 | __merge_ready(&cedf->domain, tasks); | 301 | /* set flags */ |
348 | check_for_preemptions(cedf); | 302 | set_rt_flags(t, RT_F_SLEEP); |
349 | spin_unlock_irqrestore(&cedf->slock, flags); | 303 | /* prepare for next period */ |
304 | prepare_for_next_period(t); | ||
305 | if (is_released(t, litmus_clock())) | ||
306 | sched_trace_task_release(t); | ||
307 | /* unlink */ | ||
308 | unlink(t); | ||
309 | /* requeue | ||
310 | * But don't requeue a blocking task. */ | ||
311 | if (is_running(t)) | ||
312 | cedf_job_arrival(t); | ||
350 | } | 313 | } |
351 | 314 | ||
352 | /* cedf_tick - this function is called for every local timer | 315 | /* cedf_tick - this function is called for every local timer |
@@ -357,8 +320,6 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | |||
357 | */ | 320 | */ |
358 | static void cedf_tick(struct task_struct* t) | 321 | static void cedf_tick(struct task_struct* t) |
359 | { | 322 | { |
360 | BUG_ON(!t); | ||
361 | |||
362 | if (is_realtime(t) && budget_exhausted(t)) { | 323 | if (is_realtime(t) && budget_exhausted(t)) { |
363 | if (!is_np(t)) { | 324 | if (!is_np(t)) { |
364 | /* np tasks will be preempted when they become | 325 | /* np tasks will be preempted when they become |
@@ -367,38 +328,17 @@ static void cedf_tick(struct task_struct* t) | |||
367 | set_tsk_need_resched(t); | 328 | set_tsk_need_resched(t); |
368 | set_will_schedule(); | 329 | set_will_schedule(); |
369 | TRACE("cedf_scheduler_tick: " | 330 | TRACE("cedf_scheduler_tick: " |
370 | "%d is preemptable (state:%d) " | 331 | "%d is preemptable " |
371 | " => FORCE_RESCHED\n", t->pid, t->state); | 332 | " => FORCE_RESCHED\n", t->pid); |
372 | } else if(is_user_np(t)) { | 333 | } else if (is_user_np(t)) { |
373 | TRACE("cedf_scheduler_tick: " | 334 | TRACE("cedf_scheduler_tick: " |
374 | "%d is non-preemptable (state:%d), " | 335 | "%d is non-preemptable, " |
375 | "preemption delayed.\n", t->pid, t->state); | 336 | "preemption delayed.\n", t->pid); |
376 | request_exit_np(t); | 337 | request_exit_np(t); |
377 | } | 338 | } |
378 | } | 339 | } |
379 | } | 340 | } |
380 | 341 | ||
381 | /* caller holds cedf_lock */ | ||
382 | static noinline void job_completion(struct task_struct *t, int forced) | ||
383 | { | ||
384 | BUG_ON(!t); | ||
385 | |||
386 | sched_trace_task_completion(t, forced); | ||
387 | |||
388 | TRACE_TASK(t, "job_completion(). [state:%d]\n", t->state); | ||
389 | |||
390 | /* set flags */ | ||
391 | set_rt_flags(t, RT_F_SLEEP); | ||
392 | /* prepare for next period */ | ||
393 | prepare_for_next_period(t); | ||
394 | /* unlink */ | ||
395 | unlink(t); | ||
396 | /* requeue | ||
397 | * But don't requeue a blocking task. */ | ||
398 | if (is_running(t)) | ||
399 | cedf_job_arrival(t); | ||
400 | } | ||
401 | |||
402 | /* Getting schedule() right is a bit tricky. schedule() may not make any | 342 | /* Getting schedule() right is a bit tricky. schedule() may not make any |
403 | * assumptions on the state of the current task since it may be called for a | 343 | * assumptions on the state of the current task since it may be called for a |
404 | * number of reasons. The reasons include a scheduler_tick() determined that it | 344 | * number of reasons. The reasons include a scheduler_tick() determined that it |
@@ -422,22 +362,12 @@ static noinline void job_completion(struct task_struct *t, int forced) | |||
422 | */ | 362 | */ |
423 | static struct task_struct* cedf_schedule(struct task_struct * prev) | 363 | static struct task_struct* cedf_schedule(struct task_struct * prev) |
424 | { | 364 | { |
425 | cedf_domain_t* cedf = local_cedf; | 365 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); |
426 | rt_domain_t* edf = &cedf->domain; | 366 | cedf_domain_t *cluster = entry->cluster; |
427 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | 367 | int out_of_time, sleep, preempt, np, exists, blocks; |
428 | int out_of_time, sleep, preempt, np, | 368 | struct task_struct* next = NULL; |
429 | exists, blocks; | 369 | |
430 | struct task_struct* next = NULL; | 370 | spin_lock(&cluster->lock); |
431 | |||
432 | BUG_ON(!prev); | ||
433 | BUG_ON(!cedf); | ||
434 | BUG_ON(!edf); | ||
435 | BUG_ON(!entry); | ||
436 | BUG_ON(cedf != remote_cedf(entry->cpu)); | ||
437 | BUG_ON(is_realtime(prev) && cedf != task_cedf(prev)); | ||
438 | |||
439 | /* Will be released in finish_switch. */ | ||
440 | spin_lock(&cedf->slock); | ||
441 | clear_will_schedule(); | 371 | clear_will_schedule(); |
442 | 372 | ||
443 | /* sanity checking */ | 373 | /* sanity checking */ |
@@ -453,6 +383,21 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
453 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | 383 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; |
454 | preempt = entry->scheduled != entry->linked; | 384 | preempt = entry->scheduled != entry->linked; |
455 | 385 | ||
386 | #ifdef WANT_ALL_SCHED_EVENTS | ||
387 | TRACE_TASK(prev, "invoked cedf_schedule.\n"); | ||
388 | #endif | ||
389 | |||
390 | if (exists) | ||
391 | TRACE_TASK(prev, | ||
392 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
393 | "state:%d sig:%d\n", | ||
394 | blocks, out_of_time, np, sleep, preempt, | ||
395 | prev->state, signal_pending(prev)); | ||
396 | if (entry->linked && preempt) | ||
397 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
398 | entry->linked->comm, entry->linked->pid); | ||
399 | |||
400 | |||
456 | /* If a task blocks we have no choice but to reschedule. | 401 | /* If a task blocks we have no choice but to reschedule. |
457 | */ | 402 | */ |
458 | if (blocks) | 403 | if (blocks) |
@@ -470,8 +415,8 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
470 | 415 | ||
471 | /* Any task that is preemptable and either exhausts its execution | 416 | /* Any task that is preemptable and either exhausts its execution |
472 | * budget or wants to sleep completes. We may have to reschedule after | 417 | * budget or wants to sleep completes. We may have to reschedule after |
473 | * this. Don't do a job completion if blocks (can't have timers | 418 | * this. Don't do a job completion if we block (can't have timers running |
474 | * running for blocked jobs). Preemption go first for the same reason. | 419 | * for blocked jobs). Preemption go first for the same reason. |
475 | */ | 420 | */ |
476 | if (!np && (out_of_time || sleep) && !blocks && !preempt) | 421 | if (!np && (out_of_time || sleep) && !blocks && !preempt) |
477 | job_completion(entry->scheduled, !sleep); | 422 | job_completion(entry->scheduled, !sleep); |
@@ -479,10 +424,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
479 | /* Link pending task if we became unlinked. | 424 | /* Link pending task if we became unlinked. |
480 | */ | 425 | */ |
481 | if (!entry->linked) | 426 | if (!entry->linked) |
482 | link_task_to_cpu(__take_ready(edf), entry); | 427 | link_task_to_cpu(__take_ready(&cluster->domain), entry); |
483 | 428 | ||
484 | /* The final scheduling decision. Do we need to switch for some reason? | 429 | /* The final scheduling decision. Do we need to switch for some reason? |
485 | * If linked different from scheduled select linked as next. | 430 | * If linked is different from scheduled, then select linked as next. |
486 | */ | 431 | */ |
487 | if ((!np || blocks) && | 432 | if ((!np || blocks) && |
488 | entry->linked != entry->scheduled) { | 433 | entry->linked != entry->scheduled) { |
@@ -491,76 +436,91 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
491 | entry->linked->rt_param.scheduled_on = entry->cpu; | 436 | entry->linked->rt_param.scheduled_on = entry->cpu; |
492 | next = entry->linked; | 437 | next = entry->linked; |
493 | } | 438 | } |
494 | if (entry->scheduled) { | 439 | if (entry->scheduled) { |
495 | /* not gonna be scheduled soon */ | 440 | /* not gonna be scheduled soon */ |
496 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | 441 | entry->scheduled->rt_param.scheduled_on = NO_CPU; |
497 | TRACE_TASK(entry->scheduled, "cedf_schedule: scheduled_on = NO_CPU\n"); | 442 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); |
498 | } | 443 | } |
499 | } else | 444 | } else |
500 | /* Only override Linux scheduler if we have real-time task | 445 | /* Only override Linux scheduler if we have a real-time task |
501 | * scheduled that needs to continue. | 446 | * scheduled that needs to continue. |
502 | */ | 447 | */ |
503 | if (exists) | 448 | if (exists) |
504 | next = prev; | 449 | next = prev; |
505 | 450 | ||
506 | spin_unlock(&cedf->slock); | 451 | spin_unlock(&cluster->lock); |
452 | |||
453 | #ifdef WANT_ALL_SCHED_EVENTS | ||
454 | TRACE("cedf_lock released, next=0x%p\n", next); | ||
455 | |||
456 | if (next) | ||
457 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
458 | else if (exists && !next) | ||
459 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
460 | #endif | ||
461 | |||
507 | 462 | ||
508 | return next; | 463 | return next; |
509 | } | 464 | } |
510 | 465 | ||
466 | |||
511 | /* _finish_switch - we just finished the switch away from prev | 467 | /* _finish_switch - we just finished the switch away from prev |
512 | */ | 468 | */ |
513 | static void cedf_finish_switch(struct task_struct *prev) | 469 | static void cedf_finish_switch(struct task_struct *prev) |
514 | { | 470 | { |
515 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | 471 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); |
516 | |||
517 | BUG_ON(!prev); | ||
518 | BUG_ON(!entry); | ||
519 | 472 | ||
520 | entry->scheduled = is_realtime(current) ? current : NULL; | 473 | entry->scheduled = is_realtime(current) ? current : NULL; |
474 | #ifdef WANT_ALL_SCHED_EVENTS | ||
475 | TRACE_TASK(prev, "switched away from\n"); | ||
476 | #endif | ||
521 | } | 477 | } |
522 | 478 | ||
479 | |||
523 | /* Prepare a task for running in RT mode | 480 | /* Prepare a task for running in RT mode |
524 | */ | 481 | */ |
525 | static void cedf_task_new(struct task_struct *t, int on_rq, int running) | 482 | static void cedf_task_new(struct task_struct * t, int on_rq, int running) |
526 | { | 483 | { |
527 | unsigned long flags; | 484 | unsigned long flags; |
528 | cedf_domain_t* cedf = task_cedf(t); | ||
529 | cpu_entry_t* entry; | 485 | cpu_entry_t* entry; |
486 | cedf_domain_t* cluster; | ||
487 | |||
488 | TRACE("gsn edf: task new %d\n", t->pid); | ||
489 | |||
490 | /* the cluster doesn't change even if t is running */ | ||
491 | cluster = task_cpu_cluster(t); | ||
530 | 492 | ||
531 | BUG_ON(!cedf); | 493 | spin_lock_irqsave(&cluster->domain.ready_lock, flags); |
494 | |||
495 | /* setup job params */ | ||
496 | release_at(t, litmus_clock()); | ||
532 | 497 | ||
533 | spin_lock_irqsave(&cedf->slock, flags); | ||
534 | if (running) { | 498 | if (running) { |
535 | entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); | 499 | entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); |
536 | BUG_ON(!entry); | ||
537 | BUG_ON(entry->scheduled); | 500 | BUG_ON(entry->scheduled); |
501 | |||
538 | entry->scheduled = t; | 502 | entry->scheduled = t; |
539 | t->rt_param.scheduled_on = task_cpu(t); | 503 | tsk_rt(t)->scheduled_on = task_cpu(t); |
540 | } else | 504 | } else { |
541 | t->rt_param.scheduled_on = NO_CPU; | 505 | t->rt_param.scheduled_on = NO_CPU; |
542 | t->rt_param.linked_on = NO_CPU; | 506 | } |
543 | 507 | t->rt_param.linked_on = NO_CPU; | |
544 | /* setup job params */ | ||
545 | release_at(t, litmus_clock()); | ||
546 | 508 | ||
547 | cedf_job_arrival(t); | 509 | cedf_job_arrival(t); |
548 | spin_unlock_irqrestore(&cedf->slock, flags); | 510 | spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags); |
549 | } | 511 | } |
550 | 512 | ||
551 | |||
552 | static void cedf_task_wake_up(struct task_struct *task) | 513 | static void cedf_task_wake_up(struct task_struct *task) |
553 | { | 514 | { |
554 | unsigned long flags; | 515 | unsigned long flags; |
555 | cedf_domain_t* cedf; | 516 | lt_t now; |
556 | lt_t now; | 517 | cedf_domain_t *cluster; |
557 | 518 | ||
558 | BUG_ON(!task); | 519 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); |
559 | 520 | ||
560 | cedf = task_cedf(task); | 521 | cluster = task_cpu_cluster(task); |
561 | BUG_ON(!cedf); | ||
562 | 522 | ||
563 | spin_lock_irqsave(&cedf->slock, flags); | 523 | spin_lock_irqsave(&cluster->lock, flags); |
564 | /* We need to take suspensions because of semaphores into | 524 | /* We need to take suspensions because of semaphores into |
565 | * account! If a job resumes after being suspended due to acquiring | 525 | * account! If a job resumes after being suspended due to acquiring |
566 | * a semaphore, it should never be treated as a new job release. | 526 | * a semaphore, it should never be treated as a new job release. |
@@ -574,48 +534,49 @@ static void cedf_task_wake_up(struct task_struct *task) | |||
574 | release_at(task, now); | 534 | release_at(task, now); |
575 | sched_trace_task_release(task); | 535 | sched_trace_task_release(task); |
576 | } | 536 | } |
577 | else if (task->rt.time_slice) | 537 | else { |
578 | /* came back in time before deadline | 538 | if (task->rt.time_slice) { |
579 | */ | 539 | /* came back in time before deadline |
580 | set_rt_flags(task, RT_F_RUNNING); | 540 | */ |
541 | set_rt_flags(task, RT_F_RUNNING); | ||
542 | } | ||
543 | } | ||
581 | } | 544 | } |
582 | cedf_job_arrival(task); | 545 | cedf_job_arrival(task); |
583 | spin_unlock_irqrestore(&cedf->slock, flags); | 546 | spin_unlock_irqrestore(&cluster->lock, flags); |
584 | } | 547 | } |
585 | 548 | ||
586 | |||
587 | static void cedf_task_block(struct task_struct *t) | 549 | static void cedf_task_block(struct task_struct *t) |
588 | { | 550 | { |
589 | unsigned long flags; | 551 | unsigned long flags; |
552 | cedf_domain_t *cluster; | ||
590 | 553 | ||
591 | BUG_ON(!t); | 554 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); |
592 | 555 | ||
593 | /* unlink if necessary */ | 556 | cluster = task_cpu_cluster(t); |
594 | spin_lock_irqsave(&task_cedf(t)->slock, flags); | ||
595 | 557 | ||
596 | t->rt_param.scheduled_on = NO_CPU; | 558 | /* unlink if necessary */ |
559 | spin_lock_irqsave(&cluster->lock, flags); | ||
597 | unlink(t); | 560 | unlink(t); |
598 | 561 | spin_unlock_irqrestore(&cluster->lock, flags); | |
599 | spin_unlock_irqrestore(&task_cedf(t)->slock, flags); | ||
600 | 562 | ||
601 | BUG_ON(!is_realtime(t)); | 563 | BUG_ON(!is_realtime(t)); |
602 | } | 564 | } |
603 | 565 | ||
566 | |||
604 | static void cedf_task_exit(struct task_struct * t) | 567 | static void cedf_task_exit(struct task_struct * t) |
605 | { | 568 | { |
606 | unsigned long flags; | 569 | unsigned long flags; |
607 | 570 | cedf_domain_t *cluster = task_cpu_cluster(t); | |
608 | BUG_ON(!t); | ||
609 | 571 | ||
610 | /* unlink if necessary */ | 572 | /* unlink if necessary */ |
611 | spin_lock_irqsave(&task_cedf(t)->slock, flags); | 573 | spin_lock_irqsave(&cluster->lock, flags); |
612 | unlink(t); | 574 | unlink(t); |
613 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | 575 | if (tsk_rt(t)->scheduled_on != NO_CPU) { |
614 | cedf_cpu_entries_array[tsk_rt(t)->scheduled_on]-> | 576 | cluster->cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; |
615 | scheduled = NULL; | ||
616 | tsk_rt(t)->scheduled_on = NO_CPU; | 577 | tsk_rt(t)->scheduled_on = NO_CPU; |
617 | } | 578 | } |
618 | spin_unlock_irqrestore(&task_cedf(t)->slock, flags); | 579 | spin_unlock_irqrestore(&cluster->lock, flags); |
619 | 580 | ||
620 | BUG_ON(!is_realtime(t)); | 581 | BUG_ON(!is_realtime(t)); |
621 | TRACE_TASK(t, "RIP\n"); | 582 | TRACE_TASK(t, "RIP\n"); |
@@ -623,10 +584,147 @@ static void cedf_task_exit(struct task_struct * t) | |||
623 | 584 | ||
624 | static long cedf_admit_task(struct task_struct* tsk) | 585 | static long cedf_admit_task(struct task_struct* tsk) |
625 | { | 586 | { |
626 | return (task_cpu(tsk) >= task_cedf(tsk)->first_cpu && | 587 | return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; |
627 | task_cpu(tsk) <= task_cedf(tsk)->last_cpu) ? 0 : -EINVAL; | 588 | } |
589 | |||
590 | /* total number of cluster */ | ||
591 | static int num_clusters; | ||
592 | /* we do not support cluster of different sizes */ | ||
593 | static unsigned int cluster_size; | ||
594 | |||
595 | #ifdef VERBOSE_INIT | ||
596 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | ||
597 | { | ||
598 | int chk; | ||
599 | char buf[255]; | ||
600 | |||
601 | chk = cpulist_scnprintf(buf, 254, mask); | ||
602 | buf[chk] = '\0'; | ||
603 | printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); | ||
604 | |||
628 | } | 605 | } |
606 | #endif | ||
629 | 607 | ||
608 | static int clusters_allocated = 0; | ||
609 | |||
610 | static void cleanup_cedf(void) | ||
611 | { | ||
612 | int i; | ||
613 | |||
614 | if (clusters_allocated) { | ||
615 | for (i = 0; i < num_clusters; i++) { | ||
616 | kfree(cedf[i].cpus); | ||
617 | kfree(cedf[i].heap_node); | ||
618 | free_cpumask_var(cedf[i].cpu_map); | ||
619 | } | ||
620 | |||
621 | kfree(cedf); | ||
622 | } | ||
623 | } | ||
624 | |||
625 | static long cedf_activate_plugin(void) | ||
626 | { | ||
627 | int i, j, cpu, ccpu, cpu_count; | ||
628 | cpu_entry_t *entry; | ||
629 | |||
630 | cpumask_var_t mask; | ||
631 | int chk = 0; | ||
632 | |||
633 | /* de-allocate old clusters, if any */ | ||
634 | cleanup_cedf(); | ||
635 | |||
636 | printk(KERN_INFO "C-EDF: Activate Plugin, cache index = %d\n", | ||
637 | cluster_cache_index); | ||
638 | |||
639 | /* need to get cluster_size first */ | ||
640 | if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
641 | return -ENOMEM; | ||
642 | |||
643 | chk = get_shared_cpu_map(mask, 0, cluster_cache_index); | ||
644 | if (chk) { | ||
645 | /* if chk != 0 then it is the max allowed index */ | ||
646 | printk(KERN_INFO "C-EDF: Cannot support cache index = %d\n", | ||
647 | cluster_cache_index); | ||
648 | printk(KERN_INFO "C-EDF: Using cache index = %d\n", | ||
649 | chk); | ||
650 | cluster_cache_index = chk; | ||
651 | } | ||
652 | |||
653 | cluster_size = cpumask_weight(mask); | ||
654 | |||
655 | if ((num_online_cpus() % cluster_size) != 0) { | ||
656 | /* this can't be right, some cpus are left out */ | ||
657 | printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n", | ||
658 | num_online_cpus(), cluster_size); | ||
659 | return -1; | ||
660 | } | ||
661 | |||
662 | num_clusters = num_online_cpus() / cluster_size; | ||
663 | printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", | ||
664 | num_clusters, cluster_size); | ||
665 | |||
666 | /* initialize clusters */ | ||
667 | cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); | ||
668 | for (i = 0; i < num_clusters; i++) { | ||
669 | |||
670 | cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), | ||
671 | GFP_ATOMIC); | ||
672 | cedf[i].heap_node = kmalloc( | ||
673 | cluster_size * sizeof(struct bheap_node), | ||
674 | GFP_ATOMIC); | ||
675 | bheap_init(&(cedf[i].cpu_heap)); | ||
676 | edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); | ||
677 | |||
678 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) | ||
679 | return -ENOMEM; | ||
680 | } | ||
681 | |||
682 | /* cycle through cluster and add cpus to them */ | ||
683 | for (i = 0; i < num_clusters; i++) { | ||
684 | |||
685 | for_each_online_cpu(cpu) { | ||
686 | /* check if the cpu is already in a cluster */ | ||
687 | for (j = 0; j < num_clusters; j++) | ||
688 | if (cpumask_test_cpu(cpu, cedf[j].cpu_map)) | ||
689 | break; | ||
690 | /* if it is in a cluster go to next cpu */ | ||
691 | if (cpumask_test_cpu(cpu, cedf[j].cpu_map)) | ||
692 | continue; | ||
693 | |||
694 | /* this cpu isn't in any cluster */ | ||
695 | /* get the shared cpus */ | ||
696 | get_shared_cpu_map(mask, cpu, cluster_cache_index); | ||
697 | cpumask_copy(cedf[i].cpu_map, mask); | ||
698 | #ifdef VERBOSE_INIT | ||
699 | print_cluster_topology(mask, cpu); | ||
700 | #endif | ||
701 | /* add cpus to current cluster and init cpu_entry_t */ | ||
702 | cpu_count = 0; | ||
703 | for_each_cpu(ccpu, cedf[i].cpu_map) { | ||
704 | |||
705 | entry = &per_cpu(cedf_cpu_entries, ccpu); | ||
706 | cedf[i].cpus[cpu_count] = entry; | ||
707 | atomic_set(&entry->will_schedule, 0); | ||
708 | entry->cpu = ccpu; | ||
709 | entry->cluster = &cedf[i]; | ||
710 | entry->hn = &(cedf[i].heap_node[cpu_count]); | ||
711 | bheap_node_init(&entry->hn, entry); | ||
712 | |||
713 | cpu_count++; | ||
714 | |||
715 | entry->linked = NULL; | ||
716 | entry->scheduled = NULL; | ||
717 | update_cpu_position(entry); | ||
718 | } | ||
719 | /* done with this cluster */ | ||
720 | break; | ||
721 | } | ||
722 | } | ||
723 | |||
724 | free_cpumask_var(mask); | ||
725 | clusters_allocated = 1; | ||
726 | return 0; | ||
727 | } | ||
630 | 728 | ||
631 | /* Plugin object */ | 729 | /* Plugin object */ |
632 | static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { | 730 | static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { |
@@ -639,89 +737,20 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { | |||
639 | .schedule = cedf_schedule, | 737 | .schedule = cedf_schedule, |
640 | .task_wake_up = cedf_task_wake_up, | 738 | .task_wake_up = cedf_task_wake_up, |
641 | .task_block = cedf_task_block, | 739 | .task_block = cedf_task_block, |
642 | .admit_task = cedf_admit_task | 740 | .admit_task = cedf_admit_task, |
741 | .activate_plugin = cedf_activate_plugin, | ||
643 | }; | 742 | }; |
644 | 743 | ||
645 | static void cedf_domain_init(int first_cpu, int last_cpu) | ||
646 | { | ||
647 | int cpu; | ||
648 | |||
649 | /* Create new domain for this cluster. */ | ||
650 | cedf_domain_t *new_cedf_domain = kmalloc(sizeof(*new_cedf_domain), | ||
651 | GFP_KERNEL); | ||
652 | |||
653 | /* Initialize cluster domain. */ | ||
654 | edf_domain_init(&new_cedf_domain->domain, NULL, | ||
655 | cedf_release_jobs); | ||
656 | new_cedf_domain->first_cpu = first_cpu; | ||
657 | new_cedf_domain->last_cpu = last_cpu; | ||
658 | INIT_LIST_HEAD(&new_cedf_domain->cedf_cpu_queue); | ||
659 | |||
660 | /* Assign all cpus in cluster to point to this domain. */ | ||
661 | for (cpu = first_cpu; cpu <= last_cpu; cpu++) { | ||
662 | remote_cedf(cpu) = new_cedf_domain; | ||
663 | cedf_domains_array[cpu] = new_cedf_domain; | ||
664 | } | ||
665 | } | ||
666 | 744 | ||
667 | static int __init init_cedf(void) | 745 | static int __init init_cedf(void) |
668 | { | 746 | { |
669 | int cpu; | ||
670 | cpu_entry_t *entry; | ||
671 | |||
672 | /* num_online_cpus() should have been set already | ||
673 | * if the number of available cpus is less then the cluster | ||
674 | * size (currently 4) then it is pointless trying to use | ||
675 | * CEDF, so we disable this plugin | ||
676 | */ | ||
677 | if(num_online_cpus() < cluster_size) { | ||
678 | printk(KERN_INFO "Not registering C-EDF plugin: " | ||
679 | "Num Online Cpus (%d) < Min Cluster Size (%d)\n", | ||
680 | num_online_cpus(), cluster_size); | ||
681 | do_cleanup = 0; | ||
682 | return 0; | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * initialize short_cut for per-cpu cedf state; | ||
687 | * there may be a problem here if someone removes a cpu | ||
688 | * while we are doing this initialization... and if cpus | ||
689 | * are added / removed later... is it a _real_ problem for cedf? | ||
690 | */ | ||
691 | cedf_cpu_entries_array = kmalloc( | ||
692 | sizeof(cpu_entry_t *) * num_online_cpus(), | ||
693 | GFP_KERNEL); | ||
694 | |||
695 | cedf_domains_array = kmalloc( | ||
696 | sizeof(cedf_domain_t *) * num_online_cpus(), | ||
697 | GFP_KERNEL); | ||
698 | |||
699 | /* initialize CPU state */ | ||
700 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | ||
701 | entry = &per_cpu(cedf_cpu_entries, cpu); | ||
702 | cedf_cpu_entries_array[cpu] = entry; | ||
703 | atomic_set(&entry->will_schedule, 0); | ||
704 | entry->linked = NULL; | ||
705 | entry->scheduled = NULL; | ||
706 | entry->cpu = cpu; | ||
707 | INIT_LIST_HEAD(&entry->list); | ||
708 | } | ||
709 | |||
710 | /* initialize all cluster domains */ | ||
711 | for (cpu = 0; cpu < num_online_cpus(); cpu += cluster_size) | ||
712 | cedf_domain_init(cpu, cpu+cluster_size-1); | ||
713 | |||
714 | return register_sched_plugin(&cedf_plugin); | 747 | return register_sched_plugin(&cedf_plugin); |
715 | } | 748 | } |
716 | 749 | ||
717 | static void clean_cedf(void) | 750 | static void clean_cedf(void) |
718 | { | 751 | { |
719 | if(do_cleanup) { | 752 | cleanup_cedf(); |
720 | kfree(cedf_cpu_entries_array); | ||
721 | kfree(cedf_domains_array); | ||
722 | } | ||
723 | } | 753 | } |
724 | 754 | ||
725 | module_init(init_cedf); | 755 | module_init(init_cedf); |
726 | module_exit(clean_cedf); | 756 | module_exit(clean_cedf); |
727 | |||
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index bc7c0e93fb18..3767b30e610a 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c | |||
@@ -171,6 +171,14 @@ struct sched_plugin linux_sched_plugin = { | |||
171 | }; | 171 | }; |
172 | 172 | ||
173 | /* | 173 | /* |
174 | * The cluster size is needed in C-EDF: it makes sense only to cluster | ||
175 | * around L2 or L3, so if cluster_cache_index = 2 (default) we cluster | ||
176 | * all the CPUs that shares a L2 cache, while cluster_cache_index = 3 | ||
177 | * we cluster all CPs that shares a L3 cache | ||
178 | */ | ||
179 | int cluster_cache_index = 2; | ||
180 | |||
181 | /* | ||
174 | * The reference to current plugin that is used to schedule tasks within | 182 | * The reference to current plugin that is used to schedule tasks within |
175 | * the system. It stores references to actual function implementations | 183 | * the system. It stores references to actual function implementations |
176 | * Should be initialized by calling "init_***_plugin()" | 184 | * Should be initialized by calling "init_***_plugin()" |