aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c17
-rw-r--r--include/litmus/sched_plugin.h3
-rw-r--r--kernel/sched.c4
-rw-r--r--litmus/litmus.c82
-rw-r--r--litmus/rt_domain.c4
-rw-r--r--litmus/sched_cedf.c711
-rw-r--r--litmus/sched_plugin.c8
8 files changed, 483 insertions, 348 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c9786480f0fe..e75daac64962 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -172,6 +172,8 @@ extern void print_cpu_info(struct cpuinfo_x86 *);
172extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); 172extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
173extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 173extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
174extern unsigned short num_cache_leaves; 174extern unsigned short num_cache_leaves;
175extern int get_shared_cpu_map(cpumask_var_t mask,
176 unsigned int cpu, int index);
175 177
176extern void detect_extended_topology(struct cpuinfo_x86 *c); 178extern void detect_extended_topology(struct cpuinfo_x86 *c);
177extern void detect_ht(struct cpuinfo_x86 *c); 179extern void detect_ht(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 804c40e2bc3e..3167c3d72596 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -515,6 +515,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
515static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); 515static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
516#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) 516#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
517 517
518/* returns CPUs that share the index cache with cpu */
519int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
520{
521 int ret = 0;
522 struct _cpuid4_info *this_leaf;
523
524 if (index >= num_cache_leaves) {
525 index = num_cache_leaves - 1;
526 ret = index;
527 }
528
529 this_leaf = CPUID4_INFO_IDX(cpu,index);
530 cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map));
531
532 return ret;
533}
534
518#ifdef CONFIG_SMP 535#ifdef CONFIG_SMP
519static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 536static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
520{ 537{
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 2d856d587041..9c1c9f28ba79 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -133,6 +133,9 @@ struct sched_plugin {
133 133
134extern struct sched_plugin *litmus; 134extern struct sched_plugin *litmus;
135 135
136/* cluster size: cache_index = 2 L2, cache_index = 3 L3 */
137extern int cluster_cache_index;
138
136int register_sched_plugin(struct sched_plugin* plugin); 139int register_sched_plugin(struct sched_plugin* plugin);
137struct sched_plugin* find_sched_plugin(const char* name); 140struct sched_plugin* find_sched_plugin(const char* name);
138int print_sched_plugins(char* buf, int max); 141int print_sched_plugins(char* buf, int max);
diff --git a/kernel/sched.c b/kernel/sched.c
index 1701eaebb79c..adb5e923cc61 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5520,20 +5520,16 @@ need_resched_nonpreemptible:
5520 spin_unlock_irq(&rq->lock); 5520 spin_unlock_irq(&rq->lock);
5521 } 5521 }
5522 5522
5523 TS_SCHED2_START(current);
5524 sched_trace_task_switch_to(current); 5523 sched_trace_task_switch_to(current);
5525 5524
5526 post_schedule(rq); 5525 post_schedule(rq);
5527 5526
5528 if (unlikely(reacquire_kernel_lock(current) < 0)) { 5527 if (unlikely(reacquire_kernel_lock(current) < 0)) {
5529 TS_SCHED2_END(current);
5530 goto need_resched_nonpreemptible; 5528 goto need_resched_nonpreemptible;
5531 } 5529 }
5532 5530
5533 preempt_enable_no_resched(); 5531 preempt_enable_no_resched();
5534 5532
5535 TS_SCHED2_END(current);
5536
5537 if (need_resched()) 5533 if (need_resched())
5538 goto need_resched; 5534 goto need_resched;
5539 5535
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 3cf7cb9e8a9f..e43596a5104c 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -24,6 +24,8 @@
24/* Number of RT tasks that exist in the system */ 24/* Number of RT tasks that exist in the system */
25atomic_t rt_task_count = ATOMIC_INIT(0); 25atomic_t rt_task_count = ATOMIC_INIT(0);
26static DEFINE_SPINLOCK(task_transition_lock); 26static DEFINE_SPINLOCK(task_transition_lock);
27/* synchronize plugin switching */
28atomic_t cannot_use_plugin = ATOMIC_INIT(0);
27 29
28/* Give log messages sequential IDs. */ 30/* Give log messages sequential IDs. */
29atomic_t __log_seq_no = ATOMIC_INIT(0); 31atomic_t __log_seq_no = ATOMIC_INIT(0);
@@ -369,13 +371,17 @@ void litmus_exit_task(struct task_struct* tsk)
369 } 371 }
370} 372}
371 373
374/* IPI callback to synchronize plugin switching */
375static void synch_on_plugin_switch(void* info)
376{
377 while (atomic_read(&cannot_use_plugin))
378 cpu_relax();
379}
380
372/* Switching a plugin in use is tricky. 381/* Switching a plugin in use is tricky.
373 * We must watch out that no real-time tasks exists 382 * We must watch out that no real-time tasks exists
374 * (and that none is created in parallel) and that the plugin is not 383 * (and that none is created in parallel) and that the plugin is not
375 * currently in use on any processor (in theory). 384 * currently in use on any processor (in theory).
376 *
377 * For now, we don't enforce the second part since it is unlikely to cause
378 * any trouble by itself as long as we don't unload modules.
379 */ 385 */
380int switch_sched_plugin(struct sched_plugin* plugin) 386int switch_sched_plugin(struct sched_plugin* plugin)
381{ 387{
@@ -384,6 +390,11 @@ int switch_sched_plugin(struct sched_plugin* plugin)
384 390
385 BUG_ON(!plugin); 391 BUG_ON(!plugin);
386 392
393 /* forbid other cpus to use the plugin */
394 atomic_set(&cannot_use_plugin, 1);
395 /* send IPI to force other CPUs to synch with us */
396 smp_call_function(synch_on_plugin_switch, NULL, 0);
397
387 /* stop task transitions */ 398 /* stop task transitions */
388 spin_lock_irqsave(&task_transition_lock, flags); 399 spin_lock_irqsave(&task_transition_lock, flags);
389 400
@@ -404,6 +415,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
404 ret = -EBUSY; 415 ret = -EBUSY;
405out: 416out:
406 spin_unlock_irqrestore(&task_transition_lock, flags); 417 spin_unlock_irqrestore(&task_transition_lock, flags);
418 atomic_set(&cannot_use_plugin, 0);
407 return ret; 419 return ret;
408} 420}
409 421
@@ -554,6 +566,55 @@ static int proc_write_curr(struct file *file,
554 return len; 566 return len;
555} 567}
556 568
569static int proc_read_cluster_size(char *page, char **start,
570 off_t off, int count,
571 int *eof, void *data)
572{
573 int len;
574 if (cluster_cache_index == 2)
575 len = snprintf(page, PAGE_SIZE, "L2\n");
576 else if (cluster_cache_index == 3)
577 len = snprintf(page, PAGE_SIZE, "L3\n");
578 else /* (cluster_cache_index == 1) */
579 len = snprintf(page, PAGE_SIZE, "L1\n");
580
581 return len;
582}
583
584static int proc_write_cluster_size(struct file *file,
585 const char *buffer,
586 unsigned long count,
587 void *data)
588{
589 int len;
590 /* L2, L3 */
591 char cache_name[33];
592
593 if(count > 32)
594 len = 32;
595 else
596 len = count;
597
598 if(copy_from_user(cache_name, buffer, len))
599 return -EFAULT;
600
601 cache_name[len] = '\0';
602 /* chomp name */
603 if (len > 1 && cache_name[len - 1] == '\n')
604 cache_name[len - 1] = '\0';
605
606 /* do a quick and dirty comparison to find the cluster size */
607 if (!strcmp(cache_name, "L2"))
608 cluster_cache_index = 2;
609 else if (!strcmp(cache_name, "L3"))
610 cluster_cache_index = 3;
611 else if (!strcmp(cache_name, "L1"))
612 cluster_cache_index = 1;
613 else
614 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
615
616 return len;
617}
557 618
558static int proc_read_release_master(char *page, char **start, 619static int proc_read_release_master(char *page, char **start,
559 off_t off, int count, 620 off_t off, int count,
@@ -609,6 +670,7 @@ static struct proc_dir_entry *litmus_dir = NULL,
609 *curr_file = NULL, 670 *curr_file = NULL,
610 *stat_file = NULL, 671 *stat_file = NULL,
611 *plugs_file = NULL, 672 *plugs_file = NULL,
673 *clus_cache_idx_file = NULL,
612 *release_master_file = NULL; 674 *release_master_file = NULL;
613 675
614static int __init init_litmus_proc(void) 676static int __init init_litmus_proc(void)
@@ -639,6 +701,16 @@ static int __init init_litmus_proc(void)
639 release_master_file->read_proc = proc_read_release_master; 701 release_master_file->read_proc = proc_read_release_master;
640 release_master_file->write_proc = proc_write_release_master; 702 release_master_file->write_proc = proc_write_release_master;
641 703
704 clus_cache_idx_file = create_proc_entry("cluster_cache",
705 0644, litmus_dir);
706 if (!clus_cache_idx_file) {
707 printk(KERN_ERR "Could not allocate cluster_cache "
708 "procfs entry.\n");
709 return -ENOMEM;
710 }
711 clus_cache_idx_file->read_proc = proc_read_cluster_size;
712 clus_cache_idx_file->write_proc = proc_write_cluster_size;
713
642 stat_file = create_proc_read_entry("stats", 0444, litmus_dir, 714 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
643 proc_read_stats, NULL); 715 proc_read_stats, NULL);
644 716
@@ -656,6 +728,10 @@ static void exit_litmus_proc(void)
656 remove_proc_entry("stats", litmus_dir); 728 remove_proc_entry("stats", litmus_dir);
657 if (curr_file) 729 if (curr_file)
658 remove_proc_entry("active_plugin", litmus_dir); 730 remove_proc_entry("active_plugin", litmus_dir);
731 if (clus_cache_idx_file)
732 remove_proc_entry("cluster_cache", litmus_dir);
733 if (release_master_file)
734 remove_proc_entry("release_master", litmus_dir);
659 if (litmus_dir) 735 if (litmus_dir)
660 remove_proc_entry("litmus", NULL); 736 remove_proc_entry("litmus", NULL);
661} 737}
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index 0ed6d5cbbfc5..609ff0f82abb 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -301,6 +301,10 @@ void __add_release(rt_domain_t* rt, struct task_struct *task)
301 task->rt_param.domain = rt; 301 task->rt_param.domain = rt;
302 302
303 /* start release timer */ 303 /* start release timer */
304 TS_SCHED2_START(task);
305
304 arm_release_timer(rt); 306 arm_release_timer(rt);
307
308 TS_SCHED2_END(task);
305} 309}
306 310
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index d0767ce9e178..da44b451c9ad 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -1,9 +1,20 @@
1/* 1/*
2 * kernel/sched_cedf.c 2 * litmus/sched_cedf.c
3 * 3 *
4 * Implementation of the Clustered EDF (C-EDF) scheduling algorithm. 4 * Implementation of the C-EDF scheduling algorithm.
5 * Linking is included so that support for synchronization (e.g., through 5 *
6 * the implementation of a "CSN-EDF" algorithm) can be added later if desired. 6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3
16 *
17 * For details on functions, take a look at sched_gsn_edf.c
7 * 18 *
8 * This version uses the simple approach and serializes all scheduling 19 * This version uses the simple approach and serializes all scheduling
9 * decisions by the use of a queue lock. This is probably not the 20 * decisions by the use of a queue lock. This is probably not the
@@ -13,92 +24,36 @@
13#include <linux/spinlock.h> 24#include <linux/spinlock.h>
14#include <linux/percpu.h> 25#include <linux/percpu.h>
15#include <linux/sched.h> 26#include <linux/sched.h>
16#include <linux/list.h>
17 27
18#include <litmus/litmus.h> 28#include <litmus/litmus.h>
19#include <litmus/jobs.h> 29#include <litmus/jobs.h>
20#include <litmus/sched_plugin.h> 30#include <litmus/sched_plugin.h>
21#include <litmus/edf_common.h> 31#include <litmus/edf_common.h>
22#include <litmus/sched_trace.h> 32#include <litmus/sched_trace.h>
33
23#include <litmus/bheap.h> 34#include <litmus/bheap.h>
24 35
25#include <linux/module.h> 36#include <linux/module.h>
26 37
27/* Overview of C-EDF operations. 38/* forward declaration... a funny thing with C ;) */
28 * 39struct clusterdomain;
29 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
30 * structure (NOT the actually scheduled
31 * task). If there is another linked task To
32 * already it will set To->linked_on = NO_CPU
33 * (thereby removing its association with this
34 * CPU). However, it will not requeue the
35 * previously linked task (if any). It will set
36 * T's state to RT_F_RUNNING and check whether
37 * it is already running somewhere else. If T
38 * is scheduled somewhere else it will link
39 * it to that CPU instead (and pull the linked
40 * task to cpu). T may be NULL.
41 *
42 * unlink(T) - Unlink removes T from all scheduler data
43 * structures. If it is linked to some CPU it
44 * will link NULL to that CPU. If it is
45 * currently queued in the cedf queue for
46 * a partition, it will be removed from
47 * the rt_domain. It is safe to call
48 * unlink(T) if T is not linked. T may not
49 * be NULL.
50 *
51 * requeue(T) - Requeue will insert T into the appropriate
52 * queue. If the system is in real-time mode and
53 * the T is released already, it will go into the
54 * ready queue. If the system is not in
55 * real-time mode is T, then T will go into the
56 * release queue. If T's release time is in the
57 * future, it will go into the release
58 * queue. That means that T's release time/job
59 * no/etc. has to be updated before requeue(T) is
60 * called. It is not safe to call requeue(T)
61 * when T is already queued. T may not be NULL.
62 *
63 * cedf_job_arrival(T) - This is the catch-all function when T enters
64 * the system after either a suspension or at a
65 * job release. It will queue T (which means it
66 * is not safe to call cedf_job_arrival(T) if
67 * T is already queued) and then check whether a
68 * preemption is necessary. If a preemption is
69 * necessary it will update the linkage
70 * accordingly and cause scheduled to be called
71 * (either with an IPI or need_resched). It is
72 * safe to call cedf_job_arrival(T) if T's
73 * next job has not been actually released yet
74 * (release time in the future). T will be put
75 * on the release queue in that case.
76 *
77 * job_completion(T) - Take care of everything that needs to be done
78 * to prepare T for its next release and place
79 * it in the right queue with
80 * cedf_job_arrival().
81 *
82 *
83 * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
84 * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
85 * the functions will automatically propagate pending task from the ready queue
86 * to a linked task. This is the job of the calling function ( by means of
87 * __take_ready).
88 */
89 40
90/* cpu_entry_t - maintain the linked and scheduled state 41/* cpu_entry_t - maintain the linked and scheduled state
42 *
43 * A cpu also contains a pointer to the cedf_domain_t cluster
44 * that owns it (struct clusterdomain*)
91 */ 45 */
92typedef struct { 46typedef struct {
93 int cpu; 47 int cpu;
48 struct clusterdomain* cluster; /* owning cluster */
94 struct task_struct* linked; /* only RT tasks */ 49 struct task_struct* linked; /* only RT tasks */
95 struct task_struct* scheduled; /* only RT tasks */ 50 struct task_struct* scheduled; /* only RT tasks */
96 struct list_head list;
97 atomic_t will_schedule; /* prevent unneeded IPIs */ 51 atomic_t will_schedule; /* prevent unneeded IPIs */
52 struct bheap_node* hn;
98} cpu_entry_t; 53} cpu_entry_t;
99DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
100 54
101cpu_entry_t* *cedf_cpu_entries_array; 55/* one cpu_entry_t per CPU */
56DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
102 57
103#define set_will_schedule() \ 58#define set_will_schedule() \
104 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1)) 59 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
@@ -107,75 +62,73 @@ cpu_entry_t* *cedf_cpu_entries_array;
107#define test_will_schedule(cpu) \ 62#define test_will_schedule(cpu) \
108 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) 63 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
109 64
110/* Cluster size -- currently four. This is a variable to allow for 65/*
111 * the possibility of changing the cluster size online in the future. 66 * In C-EDF there is a cedf domain _per_ cluster
67 * The number of clusters is dynamically determined accordingly to the
68 * total cpu number and the cluster size
112 */ 69 */
113int cluster_size = 4; 70typedef struct clusterdomain {
114 71 /* rt_domain for this cluster */
115int do_cleanup = 1; 72 rt_domain_t domain;
116 73 /* cpus in this cluster */
117typedef struct { 74 cpu_entry_t* *cpus;
118 rt_domain_t domain; 75 /* map of this cluster cpus */
119 int first_cpu; 76 cpumask_var_t cpu_map;
120 int last_cpu;
121
122 /* the cpus queue themselves according to priority in here */ 77 /* the cpus queue themselves according to priority in here */
123 struct list_head cedf_cpu_queue; 78 struct bheap_node *heap_node;
124 79 struct bheap cpu_heap;
125 /* per-partition spinlock: protects the domain and 80 /* lock for this cluster */
126 * serializes scheduling decisions 81#define lock domain.ready_lock
127 */
128#define slock domain.ready_lock
129} cedf_domain_t; 82} cedf_domain_t;
130 83
131DEFINE_PER_CPU(cedf_domain_t*, cedf_domains) = NULL; 84/* a cedf_domain per cluster; allocation is done at init/activation time */
132 85cedf_domain_t *cedf;
133cedf_domain_t* *cedf_domains_array;
134 86
87#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
88#define task_cpu_cluster(task) remote_cluster(get_partition(task))
135 89
136/* These are defined similarly to partitioning, except that a 90/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
137 * tasks partition is any cpu of the cluster to which it 91 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
138 * is assigned, typically the lowest-numbered cpu. 92 * information during the initialization of the plugin (e.g., topology)
93#define WANT_ALL_SCHED_EVENTS
139 */ 94 */
140#define local_edf (&__get_cpu_var(cedf_domains)->domain) 95#define VERBOSE_INIT
141#define local_cedf __get_cpu_var(cedf_domains) 96
142#define remote_edf(cpu) (&per_cpu(cedf_domains, cpu)->domain) 97static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
143#define remote_cedf(cpu) per_cpu(cedf_domains, cpu) 98{
144#define task_edf(task) remote_edf(get_partition(task)) 99 cpu_entry_t *a, *b;
145#define task_cedf(task) remote_cedf(get_partition(task)) 100 a = _a->value;
101 b = _b->value;
102 /* Note that a and b are inverted: we want the lowest-priority CPU at
103 * the top of the heap.
104 */
105 return edf_higher_prio(b->linked, a->linked);
106}
146 107
147/* update_cpu_position - Move the cpu entry to the correct place to maintain 108/* update_cpu_position - Move the cpu entry to the correct place to maintain
148 * order in the cpu queue. Caller must hold cedf lock. 109 * order in the cpu queue. Caller must hold cedf lock.
149 *
150 * This really should be a heap.
151 */ 110 */
152static void update_cpu_position(cpu_entry_t *entry) 111static void update_cpu_position(cpu_entry_t *entry)
153{ 112{
154 cpu_entry_t *other; 113 cedf_domain_t *cluster = entry->cluster;
155 struct list_head *cedf_cpu_queue =
156 &(remote_cedf(entry->cpu))->cedf_cpu_queue;
157 struct list_head *pos;
158 114
159 BUG_ON(!cedf_cpu_queue); 115 if (likely(bheap_node_in_heap(entry->hn)))
116 bheap_delete(cpu_lower_prio,
117 &cluster->cpu_heap,
118 entry->hn);
160 119
161 if (likely(in_list(&entry->list))) 120 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
162 list_del(&entry->list); 121}
163 /* if we do not execute real-time jobs we just move 122
164 * to the end of the queue 123/* caller must hold cedf lock */
165 */ 124static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
166 if (entry->linked) { 125{
167 list_for_each(pos, cedf_cpu_queue) { 126 struct bheap_node* hn;
168 other = list_entry(pos, cpu_entry_t, list); 127 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
169 if (edf_higher_prio(entry->linked, other->linked)) { 128 return hn->value;
170 __list_add(&entry->list, pos->prev, pos);
171 return;
172 }
173 }
174 }
175 /* if we get this far we have the lowest priority job */
176 list_add_tail(&entry->list, cedf_cpu_queue);
177} 129}
178 130
131
179/* link_task_to_cpu - Update the link of a CPU. 132/* link_task_to_cpu - Update the link of a CPU.
180 * Handles the case where the to-be-linked task is already 133 * Handles the case where the to-be-linked task is already
181 * scheduled on a different CPU. 134 * scheduled on a different CPU.
@@ -189,9 +142,6 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
189 142
190 BUG_ON(linked && !is_realtime(linked)); 143 BUG_ON(linked && !is_realtime(linked));
191 144
192 /* Cannot link task to a CPU that doesn't belong to its partition... */
193 BUG_ON(linked && remote_cedf(entry->cpu) != task_cedf(linked));
194
195 /* Currently linked task is set to be unlinked. */ 145 /* Currently linked task is set to be unlinked. */
196 if (entry->linked) { 146 if (entry->linked) {
197 entry->linked->rt_param.linked_on = NO_CPU; 147 entry->linked->rt_param.linked_on = NO_CPU;
@@ -213,6 +163,9 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
213 * the caller to get things right. 163 * the caller to get things right.
214 */ 164 */
215 if (entry != sched) { 165 if (entry != sched) {
166 TRACE_TASK(linked,
167 "already scheduled on %d, updating link.\n",
168 sched->cpu);
216 tmp = sched->linked; 169 tmp = sched->linked;
217 linked->rt_param.linked_on = sched->cpu; 170 linked->rt_param.linked_on = sched->cpu;
218 sched->linked = linked; 171 sched->linked = linked;
@@ -224,13 +177,12 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
224 linked->rt_param.linked_on = entry->cpu; 177 linked->rt_param.linked_on = entry->cpu;
225 } 178 }
226 entry->linked = linked; 179 entry->linked = linked;
227 180#ifdef WANT_ALL_SCHED_EVENTS
228 if (entry->linked) 181 if (linked)
229 TRACE_TASK(entry->linked, "linked to CPU %d, state:%d\n", 182 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
230 entry->cpu, entry->linked->state);
231 else 183 else
232 TRACE("NULL linked to CPU %d\n", entry->cpu); 184 TRACE("NULL linked to %d.\n", entry->cpu);
233 185#endif
234 update_cpu_position(entry); 186 update_cpu_position(entry);
235} 187}
236 188
@@ -246,6 +198,7 @@ static noinline void unlink(struct task_struct* t)
246 return; 198 return;
247 } 199 }
248 200
201
249 if (t->rt_param.linked_on != NO_CPU) { 202 if (t->rt_param.linked_on != NO_CPU) {
250 /* unlink */ 203 /* unlink */
251 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on); 204 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
@@ -258,95 +211,105 @@ static noinline void unlink(struct task_struct* t)
258 * been relinked to this CPU), thus it must be in some 211 * been relinked to this CPU), thus it must be in some
259 * queue. We must remove it from the list in this 212 * queue. We must remove it from the list in this
260 * case. 213 * case.
214 *
215 * in C-EDF case is should be somewhere in the queue for
216 * its domain, therefore and we can get the domain using
217 * task_cpu_cluster
261 */ 218 */
262 remove(task_edf(t), t); 219 remove(&(task_cpu_cluster(t))->domain, t);
263 } 220 }
264} 221}
265 222
266 223
267/* preempt - force a CPU to reschedule 224/* preempt - force a CPU to reschedule
268 */ 225 */
269static noinline void preempt(cpu_entry_t *entry) 226static void preempt(cpu_entry_t *entry)
270{ 227{
271 preempt_if_preemptable(entry->scheduled, entry->cpu); 228 preempt_if_preemptable(entry->scheduled, entry->cpu);
272} 229}
273 230
274/* requeue - Put an unlinked task into c-edf domain. 231/* requeue - Put an unlinked task into gsn-edf domain.
275 * Caller must hold cedf_lock. 232 * Caller must hold cedf_lock.
276 */ 233 */
277static noinline void requeue(struct task_struct* task) 234static noinline void requeue(struct task_struct* task)
278{ 235{
279 cedf_domain_t* cedf; 236 cedf_domain_t *cluster = task_cpu_cluster(task);
280 rt_domain_t* edf;
281
282 BUG_ON(!task); 237 BUG_ON(!task);
283 /* sanity check rt_list before insertion */ 238 /* sanity check before insertion */
284 BUG_ON(is_queued(task)); 239 BUG_ON(is_queued(task));
285 240
286 /* Get correct real-time domain. */
287 cedf = task_cedf(task);
288 edf = &cedf->domain;
289
290 if (is_released(task, litmus_clock())) 241 if (is_released(task, litmus_clock()))
291 __add_ready(edf, task); 242 __add_ready(&cluster->domain, task);
292 else { 243 else {
293 /* it has got to wait */ 244 /* it has got to wait */
294 add_release(edf, task); 245 add_release(&cluster->domain, task);
295 } 246 }
296} 247}
297 248
298static void check_for_preemptions(cedf_domain_t* cedf) 249/* check for any necessary preemptions */
250static void check_for_preemptions(cedf_domain_t *cluster)
299{ 251{
300 cpu_entry_t *last;
301 struct task_struct *task; 252 struct task_struct *task;
302 struct list_head *cedf_cpu_queue; 253 cpu_entry_t* last;
303 cedf_cpu_queue = &cedf->cedf_cpu_queue;
304 254
305 for(last = list_entry(cedf_cpu_queue->prev, cpu_entry_t, list); 255 for(last = lowest_prio_cpu(cluster);
306 edf_preemption_needed(&cedf->domain, last->linked); 256 edf_preemption_needed(&cluster->domain, last->linked);
307 last = list_entry(cedf_cpu_queue->prev, cpu_entry_t, list)) { 257 last = lowest_prio_cpu(cluster)) {
308 /* preemption necessary */ 258 /* preemption necessary */
309 task = __take_ready(&cedf->domain); 259 task = __take_ready(&cluster->domain);
310 TRACE("check_for_preemptions: task %d linked to %d, state:%d\n", 260 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
311 task->pid, last->cpu, task->state); 261 task->pid, last->cpu);
312 if (last->linked) 262 if (last->linked)
313 requeue(last->linked); 263 requeue(last->linked);
314 link_task_to_cpu(task, last); 264 link_task_to_cpu(task, last);
315 preempt(last); 265 preempt(last);
316 } 266 }
317
318} 267}
319 268
320/* cedf_job_arrival: task is either resumed or released */ 269/* cedf_job_arrival: task is either resumed or released */
321static noinline void cedf_job_arrival(struct task_struct* task) 270static noinline void cedf_job_arrival(struct task_struct* task)
322{ 271{
323 cedf_domain_t* cedf; 272 cedf_domain_t *cluster = task_cpu_cluster(task);
324 rt_domain_t* edf;
325
326 BUG_ON(!task); 273 BUG_ON(!task);
327 274
328 /* Get correct real-time domain. */
329 cedf = task_cedf(task);
330 edf = &cedf->domain;
331
332 /* first queue arriving job */
333 requeue(task); 275 requeue(task);
334 276 check_for_preemptions(cluster);
335 /* then check for any necessary preemptions */
336 check_for_preemptions(cedf);
337} 277}
338 278
339/* check for current job releases */
340static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) 279static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
341{ 280{
342 cedf_domain_t* cedf = container_of(rt, cedf_domain_t, domain); 281 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
343 unsigned long flags; 282 unsigned long flags;
283
284 spin_lock_irqsave(&cluster->lock, flags);
285
286 __merge_ready(&cluster->domain, tasks);
287 check_for_preemptions(cluster);
288
289 spin_unlock_irqrestore(&cluster->lock, flags);
290}
291
292/* caller holds cedf_lock */
293static noinline void job_completion(struct task_struct *t, int forced)
294{
295 BUG_ON(!t);
344 296
345 spin_lock_irqsave(&cedf->slock, flags); 297 sched_trace_task_completion(t, forced);
298
299 TRACE_TASK(t, "job_completion().\n");
346 300
347 __merge_ready(&cedf->domain, tasks); 301 /* set flags */
348 check_for_preemptions(cedf); 302 set_rt_flags(t, RT_F_SLEEP);
349 spin_unlock_irqrestore(&cedf->slock, flags); 303 /* prepare for next period */
304 prepare_for_next_period(t);
305 if (is_released(t, litmus_clock()))
306 sched_trace_task_release(t);
307 /* unlink */
308 unlink(t);
309 /* requeue
310 * But don't requeue a blocking task. */
311 if (is_running(t))
312 cedf_job_arrival(t);
350} 313}
351 314
352/* cedf_tick - this function is called for every local timer 315/* cedf_tick - this function is called for every local timer
@@ -357,8 +320,6 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
357 */ 320 */
358static void cedf_tick(struct task_struct* t) 321static void cedf_tick(struct task_struct* t)
359{ 322{
360 BUG_ON(!t);
361
362 if (is_realtime(t) && budget_exhausted(t)) { 323 if (is_realtime(t) && budget_exhausted(t)) {
363 if (!is_np(t)) { 324 if (!is_np(t)) {
364 /* np tasks will be preempted when they become 325 /* np tasks will be preempted when they become
@@ -367,38 +328,17 @@ static void cedf_tick(struct task_struct* t)
367 set_tsk_need_resched(t); 328 set_tsk_need_resched(t);
368 set_will_schedule(); 329 set_will_schedule();
369 TRACE("cedf_scheduler_tick: " 330 TRACE("cedf_scheduler_tick: "
370 "%d is preemptable (state:%d) " 331 "%d is preemptable "
371 " => FORCE_RESCHED\n", t->pid, t->state); 332 " => FORCE_RESCHED\n", t->pid);
372 } else if(is_user_np(t)) { 333 } else if (is_user_np(t)) {
373 TRACE("cedf_scheduler_tick: " 334 TRACE("cedf_scheduler_tick: "
374 "%d is non-preemptable (state:%d), " 335 "%d is non-preemptable, "
375 "preemption delayed.\n", t->pid, t->state); 336 "preemption delayed.\n", t->pid);
376 request_exit_np(t); 337 request_exit_np(t);
377 } 338 }
378 } 339 }
379} 340}
380 341
381/* caller holds cedf_lock */
382static noinline void job_completion(struct task_struct *t, int forced)
383{
384 BUG_ON(!t);
385
386 sched_trace_task_completion(t, forced);
387
388 TRACE_TASK(t, "job_completion(). [state:%d]\n", t->state);
389
390 /* set flags */
391 set_rt_flags(t, RT_F_SLEEP);
392 /* prepare for next period */
393 prepare_for_next_period(t);
394 /* unlink */
395 unlink(t);
396 /* requeue
397 * But don't requeue a blocking task. */
398 if (is_running(t))
399 cedf_job_arrival(t);
400}
401
402/* Getting schedule() right is a bit tricky. schedule() may not make any 342/* Getting schedule() right is a bit tricky. schedule() may not make any
403 * assumptions on the state of the current task since it may be called for a 343 * assumptions on the state of the current task since it may be called for a
404 * number of reasons. The reasons include a scheduler_tick() determined that it 344 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -422,22 +362,12 @@ static noinline void job_completion(struct task_struct *t, int forced)
422 */ 362 */
423static struct task_struct* cedf_schedule(struct task_struct * prev) 363static struct task_struct* cedf_schedule(struct task_struct * prev)
424{ 364{
425 cedf_domain_t* cedf = local_cedf; 365 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
426 rt_domain_t* edf = &cedf->domain; 366 cedf_domain_t *cluster = entry->cluster;
427 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); 367 int out_of_time, sleep, preempt, np, exists, blocks;
428 int out_of_time, sleep, preempt, np, 368 struct task_struct* next = NULL;
429 exists, blocks; 369
430 struct task_struct* next = NULL; 370 spin_lock(&cluster->lock);
431
432 BUG_ON(!prev);
433 BUG_ON(!cedf);
434 BUG_ON(!edf);
435 BUG_ON(!entry);
436 BUG_ON(cedf != remote_cedf(entry->cpu));
437 BUG_ON(is_realtime(prev) && cedf != task_cedf(prev));
438
439 /* Will be released in finish_switch. */
440 spin_lock(&cedf->slock);
441 clear_will_schedule(); 371 clear_will_schedule();
442 372
443 /* sanity checking */ 373 /* sanity checking */
@@ -453,6 +383,21 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
453 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; 383 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
454 preempt = entry->scheduled != entry->linked; 384 preempt = entry->scheduled != entry->linked;
455 385
386#ifdef WANT_ALL_SCHED_EVENTS
387 TRACE_TASK(prev, "invoked cedf_schedule.\n");
388#endif
389
390 if (exists)
391 TRACE_TASK(prev,
392 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
393 "state:%d sig:%d\n",
394 blocks, out_of_time, np, sleep, preempt,
395 prev->state, signal_pending(prev));
396 if (entry->linked && preempt)
397 TRACE_TASK(prev, "will be preempted by %s/%d\n",
398 entry->linked->comm, entry->linked->pid);
399
400
456 /* If a task blocks we have no choice but to reschedule. 401 /* If a task blocks we have no choice but to reschedule.
457 */ 402 */
458 if (blocks) 403 if (blocks)
@@ -470,8 +415,8 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
470 415
471 /* Any task that is preemptable and either exhausts its execution 416 /* Any task that is preemptable and either exhausts its execution
472 * budget or wants to sleep completes. We may have to reschedule after 417 * budget or wants to sleep completes. We may have to reschedule after
473 * this. Don't do a job completion if blocks (can't have timers 418 * this. Don't do a job completion if we block (can't have timers running
474 * running for blocked jobs). Preemption go first for the same reason. 419 * for blocked jobs). Preemption go first for the same reason.
475 */ 420 */
476 if (!np && (out_of_time || sleep) && !blocks && !preempt) 421 if (!np && (out_of_time || sleep) && !blocks && !preempt)
477 job_completion(entry->scheduled, !sleep); 422 job_completion(entry->scheduled, !sleep);
@@ -479,10 +424,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
479 /* Link pending task if we became unlinked. 424 /* Link pending task if we became unlinked.
480 */ 425 */
481 if (!entry->linked) 426 if (!entry->linked)
482 link_task_to_cpu(__take_ready(edf), entry); 427 link_task_to_cpu(__take_ready(&cluster->domain), entry);
483 428
484 /* The final scheduling decision. Do we need to switch for some reason? 429 /* The final scheduling decision. Do we need to switch for some reason?
485 * If linked different from scheduled select linked as next. 430 * If linked is different from scheduled, then select linked as next.
486 */ 431 */
487 if ((!np || blocks) && 432 if ((!np || blocks) &&
488 entry->linked != entry->scheduled) { 433 entry->linked != entry->scheduled) {
@@ -491,76 +436,91 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
491 entry->linked->rt_param.scheduled_on = entry->cpu; 436 entry->linked->rt_param.scheduled_on = entry->cpu;
492 next = entry->linked; 437 next = entry->linked;
493 } 438 }
494 if (entry->scheduled) { 439 if (entry->scheduled) {
495 /* not gonna be scheduled soon */ 440 /* not gonna be scheduled soon */
496 entry->scheduled->rt_param.scheduled_on = NO_CPU; 441 entry->scheduled->rt_param.scheduled_on = NO_CPU;
497 TRACE_TASK(entry->scheduled, "cedf_schedule: scheduled_on = NO_CPU\n"); 442 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
498 } 443 }
499 } else 444 } else
500 /* Only override Linux scheduler if we have real-time task 445 /* Only override Linux scheduler if we have a real-time task
501 * scheduled that needs to continue. 446 * scheduled that needs to continue.
502 */ 447 */
503 if (exists) 448 if (exists)
504 next = prev; 449 next = prev;
505 450
506 spin_unlock(&cedf->slock); 451 spin_unlock(&cluster->lock);
452
453#ifdef WANT_ALL_SCHED_EVENTS
454 TRACE("cedf_lock released, next=0x%p\n", next);
455
456 if (next)
457 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
458 else if (exists && !next)
459 TRACE("becomes idle at %llu.\n", litmus_clock());
460#endif
461
507 462
508 return next; 463 return next;
509} 464}
510 465
466
511/* _finish_switch - we just finished the switch away from prev 467/* _finish_switch - we just finished the switch away from prev
512 */ 468 */
513static void cedf_finish_switch(struct task_struct *prev) 469static void cedf_finish_switch(struct task_struct *prev)
514{ 470{
515 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); 471 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
516
517 BUG_ON(!prev);
518 BUG_ON(!entry);
519 472
520 entry->scheduled = is_realtime(current) ? current : NULL; 473 entry->scheduled = is_realtime(current) ? current : NULL;
474#ifdef WANT_ALL_SCHED_EVENTS
475 TRACE_TASK(prev, "switched away from\n");
476#endif
521} 477}
522 478
479
523/* Prepare a task for running in RT mode 480/* Prepare a task for running in RT mode
524 */ 481 */
525static void cedf_task_new(struct task_struct *t, int on_rq, int running) 482static void cedf_task_new(struct task_struct * t, int on_rq, int running)
526{ 483{
527 unsigned long flags; 484 unsigned long flags;
528 cedf_domain_t* cedf = task_cedf(t);
529 cpu_entry_t* entry; 485 cpu_entry_t* entry;
486 cedf_domain_t* cluster;
487
488 TRACE("gsn edf: task new %d\n", t->pid);
489
490 /* the cluster doesn't change even if t is running */
491 cluster = task_cpu_cluster(t);
530 492
531 BUG_ON(!cedf); 493 spin_lock_irqsave(&cluster->domain.ready_lock, flags);
494
495 /* setup job params */
496 release_at(t, litmus_clock());
532 497
533 spin_lock_irqsave(&cedf->slock, flags);
534 if (running) { 498 if (running) {
535 entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); 499 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
536 BUG_ON(!entry);
537 BUG_ON(entry->scheduled); 500 BUG_ON(entry->scheduled);
501
538 entry->scheduled = t; 502 entry->scheduled = t;
539 t->rt_param.scheduled_on = task_cpu(t); 503 tsk_rt(t)->scheduled_on = task_cpu(t);
540 } else 504 } else {
541 t->rt_param.scheduled_on = NO_CPU; 505 t->rt_param.scheduled_on = NO_CPU;
542 t->rt_param.linked_on = NO_CPU; 506 }
543 507 t->rt_param.linked_on = NO_CPU;
544 /* setup job params */
545 release_at(t, litmus_clock());
546 508
547 cedf_job_arrival(t); 509 cedf_job_arrival(t);
548 spin_unlock_irqrestore(&cedf->slock, flags); 510 spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags);
549} 511}
550 512
551
552static void cedf_task_wake_up(struct task_struct *task) 513static void cedf_task_wake_up(struct task_struct *task)
553{ 514{
554 unsigned long flags; 515 unsigned long flags;
555 cedf_domain_t* cedf; 516 lt_t now;
556 lt_t now; 517 cedf_domain_t *cluster;
557 518
558 BUG_ON(!task); 519 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
559 520
560 cedf = task_cedf(task); 521 cluster = task_cpu_cluster(task);
561 BUG_ON(!cedf);
562 522
563 spin_lock_irqsave(&cedf->slock, flags); 523 spin_lock_irqsave(&cluster->lock, flags);
564 /* We need to take suspensions because of semaphores into 524 /* We need to take suspensions because of semaphores into
565 * account! If a job resumes after being suspended due to acquiring 525 * account! If a job resumes after being suspended due to acquiring
566 * a semaphore, it should never be treated as a new job release. 526 * a semaphore, it should never be treated as a new job release.
@@ -574,48 +534,49 @@ static void cedf_task_wake_up(struct task_struct *task)
574 release_at(task, now); 534 release_at(task, now);
575 sched_trace_task_release(task); 535 sched_trace_task_release(task);
576 } 536 }
577 else if (task->rt.time_slice) 537 else {
578 /* came back in time before deadline 538 if (task->rt.time_slice) {
579 */ 539 /* came back in time before deadline
580 set_rt_flags(task, RT_F_RUNNING); 540 */
541 set_rt_flags(task, RT_F_RUNNING);
542 }
543 }
581 } 544 }
582 cedf_job_arrival(task); 545 cedf_job_arrival(task);
583 spin_unlock_irqrestore(&cedf->slock, flags); 546 spin_unlock_irqrestore(&cluster->lock, flags);
584} 547}
585 548
586
587static void cedf_task_block(struct task_struct *t) 549static void cedf_task_block(struct task_struct *t)
588{ 550{
589 unsigned long flags; 551 unsigned long flags;
552 cedf_domain_t *cluster;
590 553
591 BUG_ON(!t); 554 TRACE_TASK(t, "block at %llu\n", litmus_clock());
592 555
593 /* unlink if necessary */ 556 cluster = task_cpu_cluster(t);
594 spin_lock_irqsave(&task_cedf(t)->slock, flags);
595 557
596 t->rt_param.scheduled_on = NO_CPU; 558 /* unlink if necessary */
559 spin_lock_irqsave(&cluster->lock, flags);
597 unlink(t); 560 unlink(t);
598 561 spin_unlock_irqrestore(&cluster->lock, flags);
599 spin_unlock_irqrestore(&task_cedf(t)->slock, flags);
600 562
601 BUG_ON(!is_realtime(t)); 563 BUG_ON(!is_realtime(t));
602} 564}
603 565
566
604static void cedf_task_exit(struct task_struct * t) 567static void cedf_task_exit(struct task_struct * t)
605{ 568{
606 unsigned long flags; 569 unsigned long flags;
607 570 cedf_domain_t *cluster = task_cpu_cluster(t);
608 BUG_ON(!t);
609 571
610 /* unlink if necessary */ 572 /* unlink if necessary */
611 spin_lock_irqsave(&task_cedf(t)->slock, flags); 573 spin_lock_irqsave(&cluster->lock, flags);
612 unlink(t); 574 unlink(t);
613 if (tsk_rt(t)->scheduled_on != NO_CPU) { 575 if (tsk_rt(t)->scheduled_on != NO_CPU) {
614 cedf_cpu_entries_array[tsk_rt(t)->scheduled_on]-> 576 cluster->cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
615 scheduled = NULL;
616 tsk_rt(t)->scheduled_on = NO_CPU; 577 tsk_rt(t)->scheduled_on = NO_CPU;
617 } 578 }
618 spin_unlock_irqrestore(&task_cedf(t)->slock, flags); 579 spin_unlock_irqrestore(&cluster->lock, flags);
619 580
620 BUG_ON(!is_realtime(t)); 581 BUG_ON(!is_realtime(t));
621 TRACE_TASK(t, "RIP\n"); 582 TRACE_TASK(t, "RIP\n");
@@ -623,10 +584,147 @@ static void cedf_task_exit(struct task_struct * t)
623 584
624static long cedf_admit_task(struct task_struct* tsk) 585static long cedf_admit_task(struct task_struct* tsk)
625{ 586{
626 return (task_cpu(tsk) >= task_cedf(tsk)->first_cpu && 587 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
627 task_cpu(tsk) <= task_cedf(tsk)->last_cpu) ? 0 : -EINVAL; 588}
589
590/* total number of cluster */
591static int num_clusters;
592/* we do not support cluster of different sizes */
593static unsigned int cluster_size;
594
595#ifdef VERBOSE_INIT
596static void print_cluster_topology(cpumask_var_t mask, int cpu)
597{
598 int chk;
599 char buf[255];
600
601 chk = cpulist_scnprintf(buf, 254, mask);
602 buf[chk] = '\0';
603 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
604
628} 605}
606#endif
629 607
608static int clusters_allocated = 0;
609
610static void cleanup_cedf(void)
611{
612 int i;
613
614 if (clusters_allocated) {
615 for (i = 0; i < num_clusters; i++) {
616 kfree(cedf[i].cpus);
617 kfree(cedf[i].heap_node);
618 free_cpumask_var(cedf[i].cpu_map);
619 }
620
621 kfree(cedf);
622 }
623}
624
625static long cedf_activate_plugin(void)
626{
627 int i, j, cpu, ccpu, cpu_count;
628 cpu_entry_t *entry;
629
630 cpumask_var_t mask;
631 int chk = 0;
632
633 /* de-allocate old clusters, if any */
634 cleanup_cedf();
635
636 printk(KERN_INFO "C-EDF: Activate Plugin, cache index = %d\n",
637 cluster_cache_index);
638
639 /* need to get cluster_size first */
640 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
641 return -ENOMEM;
642
643 chk = get_shared_cpu_map(mask, 0, cluster_cache_index);
644 if (chk) {
645 /* if chk != 0 then it is the max allowed index */
646 printk(KERN_INFO "C-EDF: Cannot support cache index = %d\n",
647 cluster_cache_index);
648 printk(KERN_INFO "C-EDF: Using cache index = %d\n",
649 chk);
650 cluster_cache_index = chk;
651 }
652
653 cluster_size = cpumask_weight(mask);
654
655 if ((num_online_cpus() % cluster_size) != 0) {
656 /* this can't be right, some cpus are left out */
657 printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
658 num_online_cpus(), cluster_size);
659 return -1;
660 }
661
662 num_clusters = num_online_cpus() / cluster_size;
663 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
664 num_clusters, cluster_size);
665
666 /* initialize clusters */
667 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
668 for (i = 0; i < num_clusters; i++) {
669
670 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
671 GFP_ATOMIC);
672 cedf[i].heap_node = kmalloc(
673 cluster_size * sizeof(struct bheap_node),
674 GFP_ATOMIC);
675 bheap_init(&(cedf[i].cpu_heap));
676 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
677
678 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
679 return -ENOMEM;
680 }
681
682 /* cycle through cluster and add cpus to them */
683 for (i = 0; i < num_clusters; i++) {
684
685 for_each_online_cpu(cpu) {
686 /* check if the cpu is already in a cluster */
687 for (j = 0; j < num_clusters; j++)
688 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
689 break;
690 /* if it is in a cluster go to next cpu */
691 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
692 continue;
693
694 /* this cpu isn't in any cluster */
695 /* get the shared cpus */
696 get_shared_cpu_map(mask, cpu, cluster_cache_index);
697 cpumask_copy(cedf[i].cpu_map, mask);
698#ifdef VERBOSE_INIT
699 print_cluster_topology(mask, cpu);
700#endif
701 /* add cpus to current cluster and init cpu_entry_t */
702 cpu_count = 0;
703 for_each_cpu(ccpu, cedf[i].cpu_map) {
704
705 entry = &per_cpu(cedf_cpu_entries, ccpu);
706 cedf[i].cpus[cpu_count] = entry;
707 atomic_set(&entry->will_schedule, 0);
708 entry->cpu = ccpu;
709 entry->cluster = &cedf[i];
710 entry->hn = &(cedf[i].heap_node[cpu_count]);
711 bheap_node_init(&entry->hn, entry);
712
713 cpu_count++;
714
715 entry->linked = NULL;
716 entry->scheduled = NULL;
717 update_cpu_position(entry);
718 }
719 /* done with this cluster */
720 break;
721 }
722 }
723
724 free_cpumask_var(mask);
725 clusters_allocated = 1;
726 return 0;
727}
630 728
631/* Plugin object */ 729/* Plugin object */
632static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { 730static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
@@ -639,89 +737,20 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
639 .schedule = cedf_schedule, 737 .schedule = cedf_schedule,
640 .task_wake_up = cedf_task_wake_up, 738 .task_wake_up = cedf_task_wake_up,
641 .task_block = cedf_task_block, 739 .task_block = cedf_task_block,
642 .admit_task = cedf_admit_task 740 .admit_task = cedf_admit_task,
741 .activate_plugin = cedf_activate_plugin,
643}; 742};
644 743
645static void cedf_domain_init(int first_cpu, int last_cpu)
646{
647 int cpu;
648
649 /* Create new domain for this cluster. */
650 cedf_domain_t *new_cedf_domain = kmalloc(sizeof(*new_cedf_domain),
651 GFP_KERNEL);
652
653 /* Initialize cluster domain. */
654 edf_domain_init(&new_cedf_domain->domain, NULL,
655 cedf_release_jobs);
656 new_cedf_domain->first_cpu = first_cpu;
657 new_cedf_domain->last_cpu = last_cpu;
658 INIT_LIST_HEAD(&new_cedf_domain->cedf_cpu_queue);
659
660 /* Assign all cpus in cluster to point to this domain. */
661 for (cpu = first_cpu; cpu <= last_cpu; cpu++) {
662 remote_cedf(cpu) = new_cedf_domain;
663 cedf_domains_array[cpu] = new_cedf_domain;
664 }
665}
666 744
667static int __init init_cedf(void) 745static int __init init_cedf(void)
668{ 746{
669 int cpu;
670 cpu_entry_t *entry;
671
672 /* num_online_cpus() should have been set already
673 * if the number of available cpus is less then the cluster
674 * size (currently 4) then it is pointless trying to use
675 * CEDF, so we disable this plugin
676 */
677 if(num_online_cpus() < cluster_size) {
678 printk(KERN_INFO "Not registering C-EDF plugin: "
679 "Num Online Cpus (%d) < Min Cluster Size (%d)\n",
680 num_online_cpus(), cluster_size);
681 do_cleanup = 0;
682 return 0;
683 }
684
685 /*
686 * initialize short_cut for per-cpu cedf state;
687 * there may be a problem here if someone removes a cpu
688 * while we are doing this initialization... and if cpus
689 * are added / removed later... is it a _real_ problem for cedf?
690 */
691 cedf_cpu_entries_array = kmalloc(
692 sizeof(cpu_entry_t *) * num_online_cpus(),
693 GFP_KERNEL);
694
695 cedf_domains_array = kmalloc(
696 sizeof(cedf_domain_t *) * num_online_cpus(),
697 GFP_KERNEL);
698
699 /* initialize CPU state */
700 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
701 entry = &per_cpu(cedf_cpu_entries, cpu);
702 cedf_cpu_entries_array[cpu] = entry;
703 atomic_set(&entry->will_schedule, 0);
704 entry->linked = NULL;
705 entry->scheduled = NULL;
706 entry->cpu = cpu;
707 INIT_LIST_HEAD(&entry->list);
708 }
709
710 /* initialize all cluster domains */
711 for (cpu = 0; cpu < num_online_cpus(); cpu += cluster_size)
712 cedf_domain_init(cpu, cpu+cluster_size-1);
713
714 return register_sched_plugin(&cedf_plugin); 747 return register_sched_plugin(&cedf_plugin);
715} 748}
716 749
717static void clean_cedf(void) 750static void clean_cedf(void)
718{ 751{
719 if(do_cleanup) { 752 cleanup_cedf();
720 kfree(cedf_cpu_entries_array);
721 kfree(cedf_domains_array);
722 }
723} 753}
724 754
725module_init(init_cedf); 755module_init(init_cedf);
726module_exit(clean_cedf); 756module_exit(clean_cedf);
727
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index bc7c0e93fb18..3767b30e610a 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -171,6 +171,14 @@ struct sched_plugin linux_sched_plugin = {
171}; 171};
172 172
173/* 173/*
174 * The cluster size is needed in C-EDF: it makes sense only to cluster
175 * around L2 or L3, so if cluster_cache_index = 2 (default) we cluster
176 * all the CPUs that shares a L2 cache, while cluster_cache_index = 3
177 * we cluster all CPs that shares a L3 cache
178 */
179int cluster_cache_index = 2;
180
181/*
174 * The reference to current plugin that is used to schedule tasks within 182 * The reference to current plugin that is used to schedule tasks within
175 * the system. It stores references to actual function implementations 183 * the system. It stores references to actual function implementations
176 * Should be initialized by calling "init_***_plugin()" 184 * Should be initialized by calling "init_***_plugin()"