aboutsummaryrefslogtreecommitdiffstats
path: root/litmus
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2012-09-29 13:04:40 -0400
committerJonathan Herman <hermanjl@cs.unc.edu>2012-09-29 13:04:40 -0400
commitdaf1e620bff2cb6d830ef66725369bba9c858f62 (patch)
tree1aed8f7cb55371c70d2139b6754d90ea89a26147 /litmus
parent451ed3b075c2a8e322e5a44f177e2470426a821d (diff)
parent1cb90226816c7af7808be4c0de866c54da17ecc9 (diff)
Merge branch 'wip-color' into wip-mc
Conflicts: include/litmus/budget.h include/litmus/litmus.h include/litmus/rt_param.h include/litmus/sched_trace.h include/litmus/trace.h include/trace/events/litmus.h litmus/Makefile litmus/budget.c litmus/ftdev.c litmus/jobs.c litmus/litmus.c litmus/locking.c litmus/preempt.c litmus/rt_domain.c litmus/sched_gsn_edf.c litmus/trace.c
Diffstat (limited to 'litmus')
-rw-r--r--litmus/Kconfig47
-rw-r--r--litmus/Makefile14
-rw-r--r--litmus/affinity.c42
-rw-r--r--litmus/budget.c31
-rw-r--r--litmus/clustered.c2
-rw-r--r--litmus/color.c357
-rw-r--r--litmus/color_dev.c351
-rw-r--r--litmus/color_proc.c220
-rw-r--r--litmus/dgl.c300
-rw-r--r--litmus/fifo_common.c58
-rw-r--r--litmus/ftdev.c15
-rw-r--r--litmus/jobs.c12
-rw-r--r--litmus/litmus.c41
-rw-r--r--litmus/locking.c14
-rw-r--r--litmus/preempt.c3
-rw-r--r--litmus/rm_common.c91
-rw-r--r--litmus/rt_domain.c8
-rw-r--r--litmus/rt_server.c23
-rw-r--r--litmus/sched_cedf.c100
-rw-r--r--litmus/sched_color.c888
-rw-r--r--litmus/sched_gsn_edf.c56
-rw-r--r--litmus/sched_litmus.c19
-rw-r--r--litmus/sched_pfair.c225
-rw-r--r--litmus/sched_plugin.c23
-rw-r--r--litmus/sched_psn_edf.c47
-rw-r--r--litmus/sched_task_trace.c34
-rw-r--r--litmus/trace.c109
27 files changed, 2932 insertions, 198 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index bd6ec9f2d3e8..48d6f28c6e4a 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -12,6 +12,19 @@ config PLUGIN_CEDF
12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF 12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
13 makes little sense since there aren't any shared caches. 13 makes little sense since there aren't any shared caches.
14 14
15config PLUGIN_COLOR
16 bool "Scheduling with Colors"
17 default y
18 help
19 Include the scheduling with colors scheduler.
20
21config PLUGIN_COLOR_UNCACHABLE
22 bool "Colored memory is not cachable"
23 depends on PLUGIN_COLOR && X86_PAT
24 default n
25 help
26 Any memory allocated to the color plugin is not CPU cached.
27
15config PLUGIN_PFAIR 28config PLUGIN_PFAIR
16 bool "PFAIR" 29 bool "PFAIR"
17 depends on HIGH_RES_TIMERS && !NO_HZ 30 depends on HIGH_RES_TIMERS && !NO_HZ
@@ -102,7 +115,6 @@ config NP_SECTION
102 115
103config LITMUS_LOCKING 116config LITMUS_LOCKING
104 bool "Support for real-time locking protocols" 117 bool "Support for real-time locking protocols"
105 depends on NP_SECTION
106 default n 118 default n
107 help 119 help
108 Enable LITMUS^RT's deterministic multiprocessor real-time 120 Enable LITMUS^RT's deterministic multiprocessor real-time
@@ -113,6 +125,25 @@ config LITMUS_LOCKING
113 125
114endmenu 126endmenu
115 127
128menu "Performance Enhancements"
129
130config SCHED_CPU_AFFINITY
131 bool "Local Migration Affinity"
132 depends on X86
133 default y
134 help
135 Rescheduled tasks prefer CPUs near to their previously used CPU. This
136 may improve performance through possible preservation of cache affinity.
137
138 Warning: May make bugs harder to find since tasks may migrate less often.
139
140 NOTES:
141 * Feature is not utilized by PFair/PD^2.
142
143 Say Yes if unsure.
144
145endmenu
146
116menu "Tracing" 147menu "Tracing"
117 148
118config FEATHER_TRACE 149config FEATHER_TRACE
@@ -249,6 +280,20 @@ config SCHED_DEBUG_TRACE_CALLER
249 280
250 If unsure, say No. 281 If unsure, say No.
251 282
283config PREEMPT_STATE_TRACE
284 bool "Trace preemption state machine transitions"
285 depends on SCHED_DEBUG_TRACE
286 default n
287 help
288 With this option enabled, each CPU will log when it transitions
289 states in the preemption state machine. This state machine is
290 used to determine how to react to IPIs (avoid races with in-flight IPIs).
291
292 Warning: this creates a lot of information in the debug trace. Only
293 recommended when you are debugging preemption-related races.
294
295 If unsure, say No.
296
252endmenu 297endmenu
253 298
254endmenu 299endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index 51e979506d84..b406cf2ad9e1 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -19,14 +19,26 @@ obj-y = sched_plugin.o litmus.o \
19 domain.o \ 19 domain.o \
20 sched_psn_edf.o \ 20 sched_psn_edf.o \
21 sched_gsn_edf.o 21 sched_gsn_edf.o
22 color.o \
23 color_proc.o \
24 color_dev.o \
25 rt_server.o \
26 dgl.o \
27 fifo_common.o \
28 rm_common.o \
29 sched_psn_edf.o \
30 sched_gsn_edf.o
31
22 32
23 33
24obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 34obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
25obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 35obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
36obj-$(CONFIG_PLUGIN_COLOR) += sched_color.o
37obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
26obj-$(CONFIG_PLUGIN_MC) += sched_mc.o sched_mc_ce.o ce_domain.o 38obj-$(CONFIG_PLUGIN_MC) += sched_mc.o sched_mc_ce.o ce_domain.o
27obj-$(CONFIG_MERGE_TIMERS) += event_group.o 39obj-$(CONFIG_MERGE_TIMERS) += event_group.o
28
29obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 40obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
30obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 41obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
31obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 42obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
32obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o 43obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
44
diff --git a/litmus/affinity.c b/litmus/affinity.c
new file mode 100644
index 000000000000..3fa6dd789400
--- /dev/null
+++ b/litmus/affinity.c
@@ -0,0 +1,42 @@
1#include <linux/cpu.h>
2
3#include <litmus/affinity.h>
4
5struct neighborhood neigh_info[NR_CPUS];
6
7/* called by _init_litmus() */
8void init_topology(void) {
9 int cpu;
10 int i;
11 int chk;
12 int depth = num_cache_leaves;
13
14 if (depth > NUM_CACHE_LEVELS)
15 depth = NUM_CACHE_LEVELS;
16
17 for_each_online_cpu(cpu) {
18 for (i = 0; i < depth; ++i) {
19 chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i);
20 if (chk) {
21 /* failed */
22 neigh_info[cpu].size[i] = 0;
23 } else {
24 /* size = num bits in mask */
25 neigh_info[cpu].size[i] =
26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
27 }
28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
29 cpu, neigh_info[cpu].size[i], i,
30 *cpumask_bits(neigh_info[cpu].neighbors[i]));
31 }
32
33 /* set data for non-existent levels */
34 for (; i < NUM_CACHE_LEVELS; ++i) {
35 neigh_info[cpu].size[i] = 0;
36
37 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
38 cpu, neigh_info[cpu].size[i], i, 0lu);
39 }
40 }
41}
42
diff --git a/litmus/budget.c b/litmus/budget.c
index 172c12b369da..d63e484ba160 100644
--- a/litmus/budget.c
+++ b/litmus/budget.c
@@ -7,16 +7,9 @@
7#include <litmus/budget.h> 7#include <litmus/budget.h>
8#include <litmus/sched_trace.h> 8#include <litmus/sched_trace.h>
9 9
10struct enforcement_timer {
11 /* The enforcement timer is used to accurately police
12 * slice budgets. */
13 struct hrtimer timer;
14 int armed;
15};
16
17DEFINE_PER_CPU(struct enforcement_timer, budget_timer); 10DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
18 11
19static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer) 12enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
20{ 13{
21 struct enforcement_timer* et = container_of(timer, 14 struct enforcement_timer* et = container_of(timer,
22 struct enforcement_timer, 15 struct enforcement_timer,
@@ -34,7 +27,7 @@ static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
34} 27}
35 28
36/* assumes called with IRQs off */ 29/* assumes called with IRQs off */
37static void cancel_enforcement_timer(struct enforcement_timer* et) 30void cancel_enforcement_timer(struct enforcement_timer* et)
38{ 31{
39 int ret; 32 int ret;
40 33
@@ -56,11 +49,10 @@ static void cancel_enforcement_timer(struct enforcement_timer* et)
56} 49}
57 50
58/* assumes called with IRQs off */ 51/* assumes called with IRQs off */
59static void arm_enforcement_timer(struct enforcement_timer* et, 52void arm_enforcement_timer(struct enforcement_timer* et,
60 struct task_struct* t) 53 struct task_struct* t)
61{ 54{
62 lt_t when_to_fire; 55 lt_t when_to_fire;
63 TRACE_TASK(t, "arming enforcement timer.\n");
64 56
65 /* Calling this when there is no budget left for the task 57 /* Calling this when there is no budget left for the task
66 * makes no sense, unless the task is non-preemptive. */ 58 * makes no sense, unless the task is non-preemptive. */
@@ -69,8 +61,11 @@ static void arm_enforcement_timer(struct enforcement_timer* et,
69 /* __hrtimer_start_range_ns() cancels the timer 61 /* __hrtimer_start_range_ns() cancels the timer
70 * anyway, so we don't have to check whether it is still armed */ 62 * anyway, so we don't have to check whether it is still armed */
71 63
72 if (likely(!is_np(t))) { 64 if (likely(!is_user_np(t))) {
73 when_to_fire = litmus_clock() + budget_remaining(t); 65 when_to_fire = litmus_clock() + budget_remaining(t);
66 TRACE_TASK(t, "arming enforcement timer for %llu.\n",
67 when_to_fire);
68
74 __hrtimer_start_range_ns(&et->timer, 69 __hrtimer_start_range_ns(&et->timer,
75 ns_to_ktime(when_to_fire), 70 ns_to_ktime(when_to_fire),
76 0 /* delta */, 71 0 /* delta */,
@@ -96,6 +91,11 @@ void update_enforcement_timer(struct task_struct* t)
96 } 91 }
97} 92}
98 93
94void init_enforcement_timer(struct enforcement_timer *et)
95{
96 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
97 et->timer.function = on_enforcement_timeout;
98}
99 99
100static int __init init_budget_enforcement(void) 100static int __init init_budget_enforcement(void)
101{ 101{
@@ -104,14 +104,14 @@ static int __init init_budget_enforcement(void)
104 104
105 for (cpu = 0; cpu < NR_CPUS; cpu++) { 105 for (cpu = 0; cpu < NR_CPUS; cpu++) {
106 et = &per_cpu(budget_timer, cpu); 106 et = &per_cpu(budget_timer, cpu);
107 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 107 init_enforcement_timer(et);
108 et->timer.function = on_enforcement_timeout;
109 } 108 }
110 return 0; 109 return 0;
111} 110}
112 111
113void task_release(struct task_struct *t) 112void task_release(struct task_struct *t)
114{ 113{
114 /* Also wrong */
115 t->rt_param.job_params.real_release = t->rt_param.job_params.real_deadline; 115 t->rt_param.job_params.real_release = t->rt_param.job_params.real_deadline;
116 t->rt_param.job_params.real_deadline += get_rt_period(t); 116 t->rt_param.job_params.real_deadline += get_rt_period(t);
117 t->rt_param.job_params.job_no++; 117 t->rt_param.job_params.job_no++;
@@ -120,6 +120,7 @@ void task_release(struct task_struct *t)
120 120
121void server_release(struct task_struct *t) 121void server_release(struct task_struct *t)
122{ 122{
123 /* TODO: so wrong with respect to time accounting */
123 lt_t now = litmus_clock(); 124 lt_t now = litmus_clock();
124 t->rt_param.job_params.exec_time = 0; 125 t->rt_param.job_params.exec_time = 0;
125 t->rt_param.job_params.release = t->rt_param.job_params.deadline; 126 t->rt_param.job_params.release = t->rt_param.job_params.deadline;
diff --git a/litmus/clustered.c b/litmus/clustered.c
index 04450a8ad4fe..6fe1b512f628 100644
--- a/litmus/clustered.c
+++ b/litmus/clustered.c
@@ -102,7 +102,7 @@ int assign_cpus_to_clusters(enum cache_level level,
102 cpus[i]->cluster = cpus[low_cpu]->cluster; 102 cpus[i]->cluster = cpus[low_cpu]->cluster;
103 } 103 }
104 /* enqueue in cpus list */ 104 /* enqueue in cpus list */
105 list_add(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus); 105 list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
106 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id); 106 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
107 } 107 }
108out: 108out:
diff --git a/litmus/color.c b/litmus/color.c
new file mode 100644
index 000000000000..ecc191137137
--- /dev/null
+++ b/litmus/color.c
@@ -0,0 +1,357 @@
1#include <linux/spinlock.h>
2
3#include <linux/module.h>
4#include <linux/mm.h>
5#include <linux/slab.h>
6#include <linux/sysctl.h>
7#include <linux/lockdep.h>
8#include <linux/sched.h> /* required by litmus.h */
9#include <asm/io.h> /* page_to_phys on SPARC */
10
11#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
12#include <asm/cacheflush.h> /* set_memory_uc */
13#endif
14
15#include <litmus/color.h>
16#include <litmus/litmus.h> /* for in_list(...) */
17
18#define PAGES_PER_COLOR 3072
19
20/*
21 * This is used only to "trick" lockdep into permitting dynamically allocated
22 * locks of different classes that are initialized on the same line.
23 */
24#define LOCKDEP_MAX_NR_COLORS 512
25static struct lock_class_key color_lock_keys[LOCKDEP_MAX_NR_COLORS];
26
27struct color_group {
28 spinlock_t lock;
29 char _lock_name[LITMUS_LOCKDEP_NAME_MAX_LEN];
30 struct list_head list;
31 atomic_t nr_pages;
32};
33
34static unsigned long color_mask;
35static struct color_group *color_groups;
36
37
38/* non-static: extern'ed in various files */
39struct color_cache_info color_cache_info;
40int color_sysctl_add_pages_data;
41
42static inline unsigned long page_color(struct page *page)
43{
44 return ((page_to_phys(page) & color_mask) >> PAGE_SHIFT);
45}
46
47/*
48 * Page's count should be one, it sould not be on any LRU list.
49 */
50void add_page_to_color_list(struct page *page)
51{
52 const unsigned long color = page_color(page);
53 struct color_group *cgroup = &color_groups[color];
54 BUG_ON(in_list(&page->lru) || PageLRU(page));
55 BUG_ON(page_count(page) > 1);
56 spin_lock(&cgroup->lock);
57 list_add_tail(&page->lru, &cgroup->list);
58 atomic_inc(&cgroup->nr_pages);
59 SetPageLRU(page);
60 spin_unlock(&cgroup->lock);
61}
62
63/*
64 * Increase's page's count to two.
65 */
66struct page* get_colored_page(unsigned long color)
67{
68 struct color_group *cgroup;
69 struct page *page = NULL;
70
71 if (color >= color_cache_info.nr_colors)
72 goto out;
73
74 cgroup = &color_groups[color];
75 spin_lock(&cgroup->lock);
76 if (unlikely(!atomic_read(&cgroup->nr_pages))) {
77 TRACE_CUR("No free %lu colored pages.\n", color);
78 printk(KERN_WARNING "no free %lu colored pages.\n", color);
79 goto out_unlock;
80 }
81 page = list_first_entry(&cgroup->list, struct page, lru);
82 BUG_ON(page_count(page) > 1);
83 get_page(page);
84 list_del(&page->lru);
85 atomic_dec(&cgroup->nr_pages);
86 ClearPageLRU(page);
87out_unlock:
88 spin_unlock(&cgroup->lock);
89out:
90 return page;
91}
92
93static unsigned long smallest_nr_pages(void)
94{
95 unsigned long i, min_pages = -1;
96 struct color_group *cgroup;
97 for (i = 0; i < color_cache_info.nr_colors; ++i) {
98 cgroup = &color_groups[i];
99 if (atomic_read(&cgroup->nr_pages) < min_pages)
100 min_pages = atomic_read(&cgroup->nr_pages);
101 }
102 return min_pages;
103}
104
105static int do_add_pages(void)
106{
107 struct page *page, *page_tmp;
108 LIST_HEAD(free_later);
109 unsigned long color;
110 int ret = 0;
111
112 while (smallest_nr_pages() < PAGES_PER_COLOR) {
113#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
114 unsigned long vaddr;
115#endif
116
117#if defined(CONFIG_X86)
118 page = alloc_page(GFP_HIGHUSER | __GFP_ZERO |
119 __GFP_MOVABLE);
120#elif defined(CONFIG_SPARC) /* X86 */
121 page = alloc_page(GFP_HIGHUSER | __GFP_MOVABLE);
122#else
123#error What architecture are you using?
124#endif
125 if (unlikely(!page)) {
126 printk(KERN_WARNING "Could not allocate pages.\n");
127 ret = -ENOMEM;
128 goto out;
129 }
130 color = page_color(page);
131 if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR) {
132 SetPageReserved(page);
133#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
134 vaddr = (unsigned long) pfn_to_kaddr(page_to_pfn(page));
135 if (set_memory_uc(vaddr, 1)) {
136 printk(KERN_ALERT "Could not set_memory_uc\n");
137 BUG();
138 }
139#endif
140 add_page_to_color_list(page);
141 } else
142 list_add_tail(&page->lru, &free_later);
143 }
144 list_for_each_entry_safe(page, page_tmp, &free_later, lru) {
145 list_del(&page->lru);
146 __free_page(page);
147 }
148out:
149 return ret;
150}
151
152static struct alloced_pages {
153 spinlock_t lock;
154 struct list_head list;
155} alloced_pages;
156
157struct alloced_page {
158 struct page *page;
159 struct vm_area_struct *vma;
160 struct list_head list;
161};
162
163static struct alloced_page * new_alloced_page(struct page *page,
164 struct vm_area_struct *vma)
165{
166 struct alloced_page *ap = kmalloc(sizeof(*ap), GFP_KERNEL);
167 INIT_LIST_HEAD(&ap->list);
168 ap->page = page;
169 ap->vma = vma;
170 return ap;
171}
172
173/*
174 * Page's count should be two or more. It should not be on aly LRU list.
175 */
176void add_page_to_alloced_list(struct page *page, struct vm_area_struct *vma)
177{
178 struct alloced_page *ap;
179
180 BUG_ON(page_count(page) < 2);
181 ap = new_alloced_page(page, vma);
182 spin_lock(&alloced_pages.lock);
183 list_add_tail(&ap->list, &alloced_pages.list);
184 spin_unlock(&alloced_pages.lock);
185}
186
187/*
188 * Reclaim pages.
189 */
190void reclaim_pages(struct vm_area_struct *vma)
191{
192 struct alloced_page *ap, *ap_tmp;
193 unsigned long nr_reclaimed = 0;
194 spin_lock(&alloced_pages.lock);
195 list_for_each_entry_safe(ap, ap_tmp, &alloced_pages.list, list) {
196 if (vma == ap->vma) {
197 list_del(&ap->list);
198 put_page(ap->page);
199 add_page_to_color_list(ap->page);
200 nr_reclaimed++;
201 TRACE_CUR("reclaiming page (pa:0x%10llx, pfn:%8lu, "
202 "color:%3lu)\n", page_to_phys(ap->page),
203 page_to_pfn(ap->page), page_color(ap->page));
204 kfree(ap);
205 }
206 }
207 spin_unlock(&alloced_pages.lock);
208 TRACE_CUR("Reclaimed %lu pages.\n", nr_reclaimed);
209}
210
211/***********************************************************
212 * Proc
213***********************************************************/
214
215int color_add_pages_handler(struct ctl_table *table, int write, void __user *buffer,
216 size_t *lenp, loff_t *ppos)
217{
218 int ret = 0;
219 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
220 if (ret)
221 goto out;
222 if (write && color_sysctl_add_pages_data)
223 ret = do_add_pages();
224out:
225 return ret;
226}
227
228
229int color_nr_pages_handler(struct ctl_table *table, int write, void __user *buffer,
230 size_t *lenp, loff_t *ppos)
231{
232 struct color_group *cgroup;
233 char *buf;
234 unsigned long i;
235 int used = 0, ret = 0;
236
237 if (write) {
238 ret = -EPERM;
239 goto out;
240 }
241 for (i = 0; i < color_cache_info.nr_colors; ++i) {
242 cgroup = &color_groups[i];
243 buf = ((char*)table->data) + used;
244 used += snprintf(buf, table->maxlen - used, ONE_COLOR_FMT,
245 i, atomic_read(&cgroup->nr_pages));
246 }
247 ret = proc_dostring(table, write, buffer, lenp, ppos);
248out:
249 return ret;
250}
251
252/***********************************************************
253 * Initialization
254***********************************************************/
255
256#if defined(CONFIG_X86)
257/* slowest possible way to find a log, but we only do this once on boot */
258static unsigned int __init slow_log(unsigned int v)
259{
260 unsigned int r = 0;
261 while (v >>= 1)
262 r++;
263 return r;
264}
265
266static int __init init_mask(void)
267{
268 unsigned int line_size_log = slow_log(color_cache_info.line_size);
269 int err = 0;
270
271 BUG_ON(color_cache_info.size <= 1048576 ||
272 color_cache_info.ways < 15 ||
273 color_cache_info.line_size != 64);
274
275 printk("Cache size: %lu line-size: %lu ways: %lu sets: %lu\n",
276 color_cache_info.size, color_cache_info.line_size,
277 color_cache_info.ways, color_cache_info.sets);
278 if (!color_cache_info.size) {
279 printk(KERN_WARNING "No cache information found.\n");
280 err = -EINVAL;
281 goto out;
282 }
283
284
285 BUG_ON(color_cache_info.size / color_cache_info.line_size /
286 color_cache_info.ways != color_cache_info.sets);
287 BUG_ON(PAGE_SIZE >= (color_cache_info.sets << line_size_log));
288 color_mask = ((color_cache_info.sets << line_size_log) - 1) ^
289 (PAGE_SIZE - 1);
290 color_cache_info.nr_colors = (color_mask >> PAGE_SHIFT) + 1;
291out:
292 return err;
293}
294#elif defined(CONFIG_SPARC) /* X86 */
295static int __init init_mask(void)
296{
297 /*
298 * Static assuming we are using Flare (our Niagara machine).
299 * This machine has weirdness with cache banks, and I don't want
300 * to waste time trying to auto-detect this.
301 */
302 color_mask = 0x3e000UL; /* bits 17:13 */
303 color_cache_info.size = 3 * 1024 * 1024; /* 3 MB */
304 color_cache_info.line_size = 64;
305 color_cache_info.ways = 12;
306 color_cache_info.sets = 1024 * 4;
307 color_cache_info.nr_colors = (1 << hweight_long(color_mask));
308 return 0;
309}
310#endif /* SPARC/X86 */
311
312
313
314static int __init init_color_groups(void)
315{
316 struct color_group *cgroup;
317 unsigned long i;
318 int err = 0;
319
320 color_groups = kmalloc(color_cache_info.nr_colors *
321 sizeof(struct color_group), GFP_KERNEL);
322 if (!color_groups) {
323 printk(KERN_WARNING "Could not allocate color groups.\n");
324 err = -ENOMEM;
325 goto out;
326 }
327
328 for (i = 0; i < color_cache_info.nr_colors; ++i) {
329 cgroup = &color_groups[i];
330 atomic_set(&cgroup->nr_pages, 0);
331 INIT_LIST_HEAD(&cgroup->list);
332 spin_lock_init(&cgroup->lock);
333 LOCKDEP_DYNAMIC_ALLOC(&cgroup->lock, &color_lock_keys[i],
334 cgroup->_lock_name, "color%lu", i);
335 }
336out:
337 return err;
338}
339
340static int __init init_color(void)
341{
342 int err = 0;
343 printk("Initializing LITMUS^RT cache coloring.\n");
344
345 INIT_LIST_HEAD(&alloced_pages.list);
346 spin_lock_init(&alloced_pages.lock);
347
348 err = init_mask();
349 printk("PAGE_SIZE: %lu Color mask: 0x%lx Total colors: %lu\n",
350 PAGE_SIZE, color_mask, color_cache_info.nr_colors);
351
352 BUG_ON(LOCKDEP_MAX_NR_COLORS < color_cache_info.nr_colors);
353 err = init_color_groups();
354 return err;
355}
356
357module_init(init_color);
diff --git a/litmus/color_dev.c b/litmus/color_dev.c
new file mode 100644
index 000000000000..51760328418e
--- /dev/null
+++ b/litmus/color_dev.c
@@ -0,0 +1,351 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/spinlock.h>
6#include <linux/module.h>
7#include <linux/highmem.h>
8#include <asm/io.h> /* page_to_phys on SPARC */
9
10#include <litmus/litmus.h>
11#include <litmus/color.h>
12
13#define ALLOC_NAME "litmus/color_alloc"
14#define CTRL_NAME "litmus/color_ctrl"
15
16extern unsigned long nr_colors;
17
18/***********************************************************
19 * Control device
20***********************************************************/
21
22static void litmus_color_ctrl_vm_close(struct vm_area_struct *vma)
23{
24 TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__,
25 vma->vm_flags, pgprot_val(vma->vm_page_prot));
26
27 TRACE_CUR(CTRL_NAME ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
28 (void*) vma->vm_start, (void*) vma->vm_end, vma,
29 vma->vm_private_data);
30}
31
32static int litmus_color_ctrl_vm_fault(struct vm_area_struct *vma,
33 struct vm_fault *vmf)
34{
35 /* This function should never be called, since
36 * all pages should have been mapped by mmap()
37 * already. */
38 TRACE_CUR("%s flags=0x%lx\n", __FUNCTION__, vma->vm_flags);
39 printk(KERN_WARNING "fault: %s flags=0x%lx\n", __FUNCTION__,
40 vma->vm_flags);
41
42 /* nope, you only get one page */
43 return VM_FAULT_SIGBUS;
44}
45
46static struct vm_operations_struct litmus_color_ctrl_vm_ops = {
47 .close = litmus_color_ctrl_vm_close,
48 .fault = litmus_color_ctrl_vm_fault,
49};
50
51static int mmap_common_checks(struct vm_area_struct *vma)
52{
53 /* you can only map the "first" page */
54 if (vma->vm_pgoff != 0)
55 return -EINVAL;
56
57#if 0
58 /* you can't share it with anyone */
59 /* well, maybe you can... */
60 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
61 return -EINVAL;
62#endif
63
64 return 0;
65}
66
67static int alloc_color_ctrl_page(void)
68{
69 struct task_struct *t;
70 int err = 0;
71
72 t = current;
73 /* only allocate if the task doesn't have one yet */
74 if (!tsk_rt(t)->color_ctrl_page) {
75 tsk_rt(t)->color_ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
76 if (!tsk_rt(t)->color_ctrl_page)
77 err = -ENOMEM;
78 /* will get de-allocated in task teardown */
79 TRACE_TASK(t, "%s color_ctrl_page = %p\n", __FUNCTION__,
80 tsk_rt(t)->color_ctrl_page);
81 }
82 return err;
83}
84
85static int map_color_ctrl_page(struct vm_area_struct *vma)
86{
87 int err;
88 unsigned long pfn;
89 struct task_struct *t = current;
90 struct page *color_ctrl = virt_to_page(tsk_rt(t)->color_ctrl_page);
91
92 t = current;
93 /* Increase ref count. Is decreased when vma is destroyed. */
94 get_page(color_ctrl);
95 pfn = page_to_pfn(color_ctrl);
96
97 TRACE_CUR(CTRL_NAME
98 ": mapping %p (pfn:%lx, %lx) to 0x%lx (flags:%lx prot:%lx)\n",
99 tsk_rt(t)->color_ctrl_page, pfn, page_to_pfn(color_ctrl),
100 vma->vm_start, vma->vm_flags, pgprot_val(vma->vm_page_prot));
101
102 /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise
103 * userspace actually gets a copy-on-write page. */
104 err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED);
105
106 if (err)
107 TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err);
108
109 return err;
110}
111
112static int litmus_color_ctrl_mmap(struct file *filp, struct vm_area_struct *vma)
113{
114 int err = 0;
115
116 /* you can only get one page */
117 if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
118 TRACE_CUR(CTRL_NAME ": must allocate a multiple of PAGE_SIZE\n");
119 err = -EINVAL;
120 goto out;
121 }
122
123 err = mmap_common_checks(vma);
124 if (err) {
125 TRACE_CUR(CTRL_NAME ": failed common mmap checks.\n");
126 goto out;
127 }
128
129 vma->vm_ops = &litmus_color_ctrl_vm_ops;
130 /* this mapping should not be kept across forks,
131 * and cannot be expanded */
132 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
133
134 err = alloc_color_ctrl_page();
135 if (!err)
136 err = map_color_ctrl_page(vma);
137
138 TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__, vma->vm_flags,
139 pgprot_val(vma->vm_page_prot));
140out:
141 return err;
142}
143
144
145/***********************************************************
146 * Allocation device
147***********************************************************/
148
149#define vma_nr_pages(vma) \
150 ({unsigned long v = ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); v;})
151
152static int do_map_colored_pages(struct vm_area_struct *vma)
153{
154 const unsigned long nr_pages = vma_nr_pages(vma);
155 struct color_ctrl_page *color_ctrl = tsk_rt(current)->color_ctrl_page;
156 unsigned long nr_mapped;
157 int i, err = 0;
158
159 TRACE_CUR(ALLOC_NAME ": allocating %lu pages (flags:%lx prot:%lx)\n",
160 nr_pages, vma->vm_flags, pgprot_val(vma->vm_page_prot));
161
162#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
163 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
164#endif
165
166 for (i = 0, nr_mapped = 0; nr_mapped < nr_pages; ++i) {
167 const unsigned long color_no = color_ctrl->colors[i];
168 unsigned int page_no = 0;
169
170 for (; page_no < color_ctrl->pages[i]; ++page_no, ++nr_mapped) {
171 const unsigned long addr = vma->vm_start +
172 (nr_mapped << PAGE_SHIFT);
173 struct page *page = get_colored_page(color_no);
174#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
175 const pgprot_t ins_prot = pgprot_noncached(PAGE_SHARED);
176#else
177 const pgprot_t ins_prot = PAGE_SHARED;
178#endif
179
180 if (!page) {
181 TRACE_CUR(ALLOC_NAME ": Could not get page with"
182 " color %lu.\n", color_no);
183 /* TODO unmap mapped pages */
184 err = -ENOMEM;
185 goto out;
186 }
187
188#ifdef CONFIG_SPARC
189 clear_user_highpage(page, addr);
190#endif
191
192 TRACE_CUR(ALLOC_NAME ": insert page (pa:0x%10llx, "
193 "pfn:%8lu, color:%3lu, prot:%lx) at 0x%lx "
194 "vma:(flags:%16lx prot:%16lx)\n",
195 page_to_phys(page),
196 page_to_pfn(page), color_no,
197 pgprot_val(ins_prot), addr,
198 vma->vm_flags,
199 pgprot_val(vma->vm_page_prot));
200
201 err = remap_pfn_range(vma, addr, page_to_pfn(page),
202 PAGE_SIZE, ins_prot);
203 if (err) {
204 TRACE_CUR(ALLOC_NAME ": remap_pfn_range() fail "
205 "(%d)\n", err);
206 /* TODO unmap mapped pages */
207 err = -EINVAL;
208 goto out;
209 }
210 add_page_to_alloced_list(page, vma);
211 }
212
213 if (!page_no) {
214 TRACE_CUR(ALLOC_NAME ": 0 pages given for color %lu\n",
215 color_no);
216 err = -EINVAL;
217 goto out;
218 }
219 }
220 out:
221 return err;
222}
223
224static int map_colored_pages(struct vm_area_struct *vma)
225{
226 int err = 0;
227
228 if (!tsk_rt(current)->color_ctrl_page) {
229 TRACE_CUR("Process has no color control page.\n");
230 err = -EINVAL;
231 goto out;
232 }
233
234 if (COLORS_PER_CONTROL_PAGE < vma_nr_pages(vma)) {
235 TRACE_CUR("Max page request %lu but want %lu.\n",
236 COLORS_PER_CONTROL_PAGE, vma_nr_pages(vma));
237 err = -EINVAL;
238 goto out;
239 }
240 err = do_map_colored_pages(vma);
241out:
242 return err;
243}
244
245static void litmus_color_alloc_vm_close(struct vm_area_struct *vma)
246{
247 TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__,
248 vma->vm_flags, pgprot_val(vma->vm_page_prot));
249
250 TRACE_CUR(ALLOC_NAME ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
251 (void*) vma->vm_start, (void*) vma->vm_end, vma,
252 vma->vm_private_data);
253 reclaim_pages(vma);
254}
255
256static int litmus_color_alloc_vm_fault(struct vm_area_struct *vma,
257 struct vm_fault *vmf)
258{
259 /* This function should never be called, since
260 * all pages should have been mapped by mmap()
261 * already. */
262 TRACE_CUR("%s flags=0x%lx\n", __FUNCTION__, vma->vm_flags);
263 printk(KERN_WARNING "fault: %s flags=0x%lx\n", __FUNCTION__,
264 vma->vm_flags);
265
266 /* nope, you only get one page */
267 return VM_FAULT_SIGBUS;
268}
269
270static struct vm_operations_struct litmus_color_alloc_vm_ops = {
271 .close = litmus_color_alloc_vm_close,
272 .fault = litmus_color_alloc_vm_fault,
273};
274
275static int litmus_color_alloc_mmap(struct file *filp, struct vm_area_struct *vma)
276{
277 int err = 0;
278
279 /* you may only request integer multiple of PAGE_SIZE */
280 if (offset_in_page(vma->vm_end - vma->vm_start)) {
281 err = -EINVAL;
282 goto out;
283 }
284
285 err = mmap_common_checks(vma);
286 if (err)
287 goto out;
288
289 vma->vm_ops = &litmus_color_alloc_vm_ops;
290 /* this mapping should not be kept across forks,
291 * and cannot be expanded */
292 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
293
294 err = map_colored_pages(vma);
295
296 TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__, vma->vm_flags,
297 pgprot_val(vma->vm_page_prot));
298out:
299 return err;
300}
301
302/***********************************************************
303 * Initilization
304***********************************************************/
305
306static struct file_operations litmus_color_ctrl_fops = {
307 .owner = THIS_MODULE,
308 .mmap = litmus_color_ctrl_mmap,
309};
310
311static struct miscdevice litmus_color_ctrl_dev = {
312 .name = CTRL_NAME,
313 .minor = MISC_DYNAMIC_MINOR,
314 .fops = &litmus_color_ctrl_fops,
315};
316
317static struct file_operations litmus_color_alloc_fops = {
318 .owner = THIS_MODULE,
319 .mmap = litmus_color_alloc_mmap,
320};
321
322static struct miscdevice litmus_color_alloc_dev = {
323 .name = ALLOC_NAME,
324 .minor = MISC_DYNAMIC_MINOR,
325 .fops = &litmus_color_alloc_fops,
326};
327
328static int __init init_dev(const char* name, struct miscdevice *dev)
329{
330 int err;
331 err = misc_register(dev);
332 if (err)
333 printk(KERN_WARNING "Could not allocate %s device (%d).\n",
334 name, err);
335 return err;
336}
337
338static int __init init_color_devices(void)
339{
340 int err;
341
342 printk("Allocating LITMUS^RT color devices.\n");
343 err = init_dev(ALLOC_NAME, &litmus_color_alloc_dev);
344 if (err)
345 goto out;
346 err = init_dev(CTRL_NAME, &litmus_color_ctrl_dev);
347out:
348 return err;
349}
350
351module_init(init_color_devices);
diff --git a/litmus/color_proc.c b/litmus/color_proc.c
new file mode 100644
index 000000000000..d770123c5f02
--- /dev/null
+++ b/litmus/color_proc.c
@@ -0,0 +1,220 @@
1#include <linux/module.h>
2#include <linux/sysctl.h>
3#include <linux/slab.h>
4
5#include <litmus/sched_trace.h>
6#include <litmus/color.h>
7
8extern int color_sysctl_add_pages_data; /* litmus/color.c */
9
10static int zero = 0;
11static int one = 1;
12/* used as names for server proc entries */
13static char *period_str = "period";
14static char *wcet_str = "wcet";
15
16/* servers have a WCET and period */
17#define NR_SERVER_PARAMS 2
18#define CPU_NAME_LEN 3
19struct color_cpu_server {
20 char name[CPU_NAME_LEN];
21 unsigned long wcet;
22 unsigned long period;
23 /* the + 1 is for the sentinel element */
24 struct ctl_table table[NR_SERVER_PARAMS + 1];
25};
26static struct color_cpu_server color_cpu_servers[NR_CPUS];
27
28/* the + 1 is for the sentinel element */
29static struct ctl_table color_cpu_tables[NR_CPUS + 1];
30
31unsigned long color_chunk;
32
33#define INFO_BUFFER_SIZE 100
34static char info_buffer[100];
35
36#define NR_PAGES_INDEX 0 /* location of nr_pages in the table below */
37static struct ctl_table color_table[] =
38{
39 {
40 /* you MUST update NR_PAGES_INDEX if you move this entry */
41 .procname = "nr_pages",
42 .mode = 0444,
43 .proc_handler = color_nr_pages_handler,
44 .data = NULL, /* dynamically set later */
45 .maxlen = 0, /* also set later */
46 },
47 {
48 .procname = "servers",
49 .mode = 0555,
50 .child = color_cpu_tables,
51 },
52 {
53 .procname = "add_pages",
54 .data = &color_sysctl_add_pages_data,
55 .maxlen = sizeof(int),
56 .mode = 0644,
57 .proc_handler = color_add_pages_handler,
58 .extra1 = &zero,
59 .extra2 = &one,
60 },
61 {
62 .procname = "cache_info",
63 .mode = 0444,
64 .proc_handler = proc_dostring,
65 .data = info_buffer,
66 .maxlen = INFO_BUFFER_SIZE,
67 },
68 {
69 .procname = "chunk_size",
70 .mode = 0666,
71 .proc_handler = proc_doulongvec_minmax,
72 .data = &color_chunk,
73 .maxlen = sizeof(color_chunk),
74 },
75 { }
76};
77
78static struct ctl_table litmus_table[] =
79{
80 {
81 .procname = "color",
82 .mode = 0555,
83 .child = color_table,
84 },
85 { }
86};
87
88static struct ctl_table litmus_dir_table[] = {
89 {
90 .procname = "litmus",
91 .mode = 0555,
92 .child = litmus_table,
93 },
94 { }
95};
96
97int color_server_params(int cpu, unsigned long *wcet, unsigned long *period)
98{
99 struct color_cpu_server *svr;
100
101 if (cpu >= num_online_cpus()) {
102 printk(KERN_WARNING "Cannot access illegal CPU: %d\n", cpu);
103 return -EFAULT;
104 }
105
106 svr = &color_cpu_servers[cpu];
107 if (svr->wcet == 0 || svr->period == 0) {
108 printk(KERN_WARNING "Server %d is uninitialized!\n", cpu);
109 return -EPERM;
110 }
111
112 *wcet = svr->wcet;
113 *period = svr->period;
114
115 TRACE("For %d: %lu, %lu\n", cpu, svr->wcet, svr->period);
116
117 return 0;
118}
119
120/* must be called AFTER nr_colors is set */
121static int __init init_sysctl_nr_colors(void)
122{
123 int ret = 0, maxlen = ONE_COLOR_LEN * color_cache_info.nr_colors;
124 color_table[NR_PAGES_INDEX].data = kmalloc(maxlen, GFP_KERNEL);
125 if (!color_table[NR_PAGES_INDEX].data) {
126 printk(KERN_WARNING "Could not allocate nr_pages buffer.\n");
127 ret = -ENOMEM;
128 goto out;
129 }
130 color_table[NR_PAGES_INDEX].maxlen = maxlen;
131out:
132 return ret;
133}
134
135static void __init init_server_entry(struct ctl_table *entry,
136 unsigned long *parameter,
137 char *name)
138{
139 entry->procname = name;
140 entry->mode = 0666;
141 entry->proc_handler = proc_doulongvec_minmax;
142 entry->data = parameter;
143 entry->maxlen = sizeof(*parameter);
144}
145
146static int __init init_cpu_entry(struct ctl_table *cpu_table,
147 struct color_cpu_server *svr, int cpu)
148{
149 struct ctl_table *entry = svr->table;
150
151 printk(KERN_INFO "Creating cpu %d\n", cpu);
152
153 init_server_entry(entry, &svr->wcet, wcet_str);
154 entry++;
155 init_server_entry(entry, &svr->period, period_str);
156
157 /* minus one for the null byte */
158 snprintf(svr->name, CPU_NAME_LEN - 1, "%d", cpu);
159 cpu_table->procname = svr->name;
160 cpu_table->mode = 0555;
161 cpu_table->child = svr->table;
162
163 return 0;
164}
165
166static int __init init_server_entries(void)
167{
168 int cpu, err = 0;
169 struct ctl_table *cpu_table;
170 struct color_cpu_server *svr;
171
172 for_each_online_cpu(cpu) {
173 cpu_table = &color_cpu_tables[cpu];
174 svr = &color_cpu_servers[cpu];
175 err = init_cpu_entry(cpu_table, svr, cpu);
176 if (err)
177 goto out;
178 }
179out:
180 return err;
181}
182
183
184static struct ctl_table_header *litmus_sysctls;
185
186static int __init litmus_sysctl_init(void)
187{
188 int ret = 0;
189
190 printk(KERN_INFO "Registering LITMUS^RT proc sysctl.\n");
191 litmus_sysctls = register_sysctl_table(litmus_dir_table);
192 if (!litmus_sysctls) {
193 printk(KERN_WARNING "Could not register LITMUS^RT sysctl.\n");
194 ret = -EFAULT;
195 goto out;
196 }
197 ret = init_sysctl_nr_colors();
198 if (ret)
199 goto out;
200
201 ret = init_server_entries();
202 if (ret)
203 goto out;
204
205 snprintf(info_buffer, INFO_BUFFER_SIZE,
206 "Cache size\t: %lu B\n"
207 "Line size\t: %lu B\n"
208 "Page size\t: %lu B\n"
209 "Ways\t\t: %lu\n"
210 "Sets\t\t: %lu\n"
211 "Colors\t\t: %lu",
212 color_cache_info.size, color_cache_info.line_size, PAGE_SIZE,
213 color_cache_info.ways, color_cache_info.sets,
214 color_cache_info.nr_colors);
215
216out:
217 return ret;
218}
219
220module_init(litmus_sysctl_init);
diff --git a/litmus/dgl.c b/litmus/dgl.c
new file mode 100644
index 000000000000..dd2a42cc9ca6
--- /dev/null
+++ b/litmus/dgl.c
@@ -0,0 +1,300 @@
1#include <linux/sched.h>
2#include <linux/slab.h>
3
4#include <litmus/litmus.h>
5#include <litmus/dgl.h>
6#include <litmus/sched_trace.h>
7
8#define MASK_SIZE (sizeof(unsigned long) * 8)
9
10/* Return number of MASK_SIZE fields needed to store a mask in d */
11#define WP(num, word) (num / word + (num % word != 0))
12#define MASK_WORDS(d) WP(d->num_resources, MASK_SIZE)
13
14/* Word, bit -> resource id */
15#define ri(w, b) (w * MASK_SIZE + b)
16
17 /* For loop, where @i iterates over each set bit in @bit_arr */
18#define for_each_resource(bit_arr, d, w, b, i) \
19 for(w = 0; w < MASK_WORDS(d); ++w) \
20 for(b = find_first_bit(&bit_arr[w],MASK_SIZE), i = ri(w, b); \
21 b < MASK_SIZE; \
22 b = find_next_bit(&bit_arr[w],MASK_SIZE,b+1), i = ri(w, b))
23
24/* Return resource id in dgl @d for resource @r */
25#define resource_id(d, r) ((((void*)r) - (void*)((d)->resources))/ sizeof(*r))
26
27/* Return request group of req @r for resource @i */
28#define req_group(r, i) (container_of(((void*)r) - sizeof(*r)*(i), \
29 struct dgl_group_req, requests))
30
31/* Resource id -> word, bit */
32static inline void mask_idx(int resource, int *word, int *bit)
33{
34 *word = resource / MASK_SIZE;
35 *bit = resource % MASK_SIZE;
36}
37
38
39static void print_waiting(struct dgl *dgl, struct dgl_resource *resource)
40{
41 struct dgl_req *pos;
42 struct dgl_group_req *greq;
43 unsigned long long last = 0;
44
45 TRACE("List for rid %d\n", resource_id(dgl, resource));
46 list_for_each_entry(pos, &resource->waiting, list) {
47 greq = pos->greq;
48 TRACE(" 0x%p with timestamp %llu\n", greq, greq->ts);
49 BUG_ON(greq->ts < last);
50 last = greq->ts;
51 }
52}
53
54void dgl_init(struct dgl *dgl, unsigned long num_resources,
55 unsigned long num_replicas)
56{
57 int i;
58 struct dgl_resource *resource;
59
60 dgl->num_replicas = num_replicas;
61 dgl->num_resources = num_resources;
62
63 dgl->resources = kmalloc(sizeof(*dgl->resources) * num_resources,
64 GFP_ATOMIC);
65 dgl->acquired = kmalloc(sizeof(*dgl->acquired) * num_online_cpus(),
66 GFP_ATOMIC);
67
68 for (i = 0; i < num_online_cpus(); ++i)
69 dgl->acquired[i] = NULL;
70
71 for (i = 0; i < num_resources; i++) {
72 resource = &dgl->resources[i];
73
74 INIT_LIST_HEAD(&resource->waiting);
75 resource->free_replicas = dgl->num_replicas;
76 }
77
78 dgl->requests = 0;
79 dgl->running = 0;
80 dgl->ts = 0;
81}
82
83void dgl_free(struct dgl *dgl)
84{
85 kfree(dgl->resources);
86 kfree(dgl->acquired);
87}
88
89void dgl_group_req_init(struct dgl *dgl, struct dgl_group_req *greq)
90{
91 int i;
92
93 greq->requested = kmalloc(sizeof(*greq->requested) * MASK_WORDS(dgl),
94 GFP_ATOMIC);
95 greq->waiting = kmalloc(sizeof(*greq->waiting) * MASK_WORDS(dgl),
96 GFP_ATOMIC);
97 greq->requests = kmalloc(sizeof(*greq->requests) * dgl->num_resources,
98 GFP_ATOMIC);
99
100 BUG_ON(!greq->requested);
101 BUG_ON(!greq->waiting);
102 BUG_ON(!greq->requests);
103
104 greq->cpu = NO_CPU;
105 for (i = 0; i < MASK_WORDS(dgl); ++i) {
106 greq->requested[i] = 0;
107 greq->waiting[i] = 0;
108 }
109}
110
111void dgl_group_req_free(struct dgl_group_req *greq)
112{
113 kfree(greq->requested);
114 kfree(greq->waiting);
115 kfree(greq->requests);
116}
117
118/**
119 * set_req - create request for @replicas of @resource.
120 */
121void set_req(struct dgl *dgl, struct dgl_group_req *greq,
122 int resource, int replicas)
123{
124 int word, bit;
125 struct dgl_req *req;
126
127 if (replicas > dgl->num_replicas)
128 replicas = dgl->num_replicas;
129
130 mask_idx(resource, &word, &bit);
131 __set_bit(bit, &greq->requested[word]);
132
133 TRACE("0x%p requesting %d of %d\n", greq, replicas, resource);
134
135 req = &greq->requests[resource];
136 req->greq = greq;
137 INIT_LIST_HEAD(&req->list);
138 req->replicas = replicas;
139}
140
141/*
142 * Attempt to fulfill request @req for @resource.
143 * Return 1 if successful. If the matching group request has acquired all of
144 * its needed resources, this will then set that req as dgl->acquired[cpu].
145 */
146static unsigned long try_acquire(struct dgl *dgl, struct dgl_resource *resource,
147 struct dgl_req *req)
148{
149 int word, bit, rid, head, empty, room;
150 unsigned long waiting;
151 struct dgl_group_req *greq;
152
153 rid = resource_id(dgl, resource);
154 greq = req->greq;
155
156 TRACE("0x%p greq\n", greq);
157
158 head = resource->waiting.next == &req->list;
159 empty = list_empty(&resource->waiting);
160 room = resource->free_replicas >= req->replicas;
161
162 if (! (room && (head || empty)) ) {
163 TRACE("0x%p cannot acquire %d replicas, %d free\n",
164 greq, req->replicas, resource->free_replicas,
165 room, head, empty);
166 return 0;
167 }
168
169 resource->free_replicas -= req->replicas;
170
171 TRACE("0x%p acquired %d replicas of rid %d\n",
172 greq, req->replicas, rid);
173
174 mask_idx(rid, &word, &bit);
175
176
177 TRACE("0x%p, %lu, 0x%p\n", greq->waiting, greq->waiting[word],
178 &greq->waiting[word]);
179
180 clear_bit(bit, &greq->waiting[word]);
181
182 waiting = 0;
183 for (word = 0; word < MASK_WORDS(dgl); word++) {
184 waiting |= greq->waiting[word];
185 if (waiting)
186 break;
187 }
188
189 if (!waiting) {
190 TRACE("0x%p acquired all resources\n", greq);
191 BUG_ON(dgl->acquired[greq->cpu]);
192 dgl->acquired[greq->cpu] = greq;
193 litmus_reschedule(greq->cpu);
194 dgl->running++;
195 }
196
197 return 1;
198}
199
200/**
201 * add_group_req - initiate group request.
202 */
203void add_group_req(struct dgl *dgl, struct dgl_group_req *greq, int cpu)
204{
205 int b, w, i, succ, all_succ = 1;
206 struct dgl_req *req;
207 struct dgl_resource *resource;
208
209 greq->cpu = cpu;
210 greq->ts = dgl->ts++;
211
212 TRACE("0x%p group request added for CPU %d\n", greq, cpu);
213 BUG_ON(dgl->acquired[cpu] == greq);
214
215 ++dgl->requests;
216
217 for_each_resource(greq->requested, dgl, w, b, i) {
218 __set_bit(b, &greq->waiting[w]);
219 }
220
221 for_each_resource(greq->requested, dgl, w, b, i) {
222 req = &greq->requests[i];
223 resource = &dgl->resources[i];
224
225 succ = try_acquire(dgl, resource, req);
226 all_succ &= succ;
227
228 if (!succ) {
229 TRACE("0x%p waiting on rid %d\n", greq, i);
230 list_add_tail(&req->list, &resource->waiting);
231 }
232 }
233
234 /* Grant empty requests */
235 if (all_succ && !dgl->acquired[cpu]) {
236 TRACE("0x%p empty group request acquired cpu %d\n", greq, cpu);
237 dgl->acquired[cpu] = greq;
238 ++dgl->running;
239 }
240
241 BUG_ON(dgl->requests && !dgl->running);
242}
243
244/**
245 * remove_group_req - abandon group request.
246 *
247 * This will also progress the waiting queues of resources acquired by @greq.
248 */
249void remove_group_req(struct dgl *dgl, struct dgl_group_req *greq)
250{
251 int b, w, i;
252 struct dgl_req *req, *next;
253 struct dgl_resource *resource;
254
255 TRACE("0x%p removing group request for CPU %d\n", greq, greq->cpu);
256
257 --dgl->requests;
258
259 if (dgl->acquired[greq->cpu] == greq) {
260 TRACE("0x%p no longer acquired on CPU %d\n", greq, greq->cpu);
261 dgl->acquired[greq->cpu] = NULL;
262 --dgl->running;
263 }
264
265 for_each_resource(greq->requested, dgl, w, b, i) {
266 req = &greq->requests[i];
267 resource = &dgl->resources[i];
268
269 if (!list_empty(&req->list)) {
270 /* Waiting on resource */
271 clear_bit(b, &greq->waiting[w]);
272 list_del_init(&req->list);
273 TRACE("Quitting 0x%p from rid %d\n",
274 req, i);
275 } else {
276 /* Have resource */
277 resource->free_replicas += req->replicas;
278 BUG_ON(resource->free_replicas > dgl->num_replicas);
279 TRACE("0x%p releasing %d of %d replicas, rid %d\n",
280 greq, req->replicas, resource->free_replicas, i);
281
282 if (!list_empty(&resource->waiting)) {
283 /* Give it to the next guy */
284 next = list_first_entry(&resource->waiting,
285 struct dgl_req,
286 list);
287
288 BUG_ON(next->greq->ts < greq->ts);
289
290 if (try_acquire(dgl, resource, next)) {
291 list_del_init(&next->list);
292 print_waiting(dgl, resource);
293
294 }
295 }
296 }
297 }
298
299 BUG_ON(dgl->requests && !dgl->running);
300}
diff --git a/litmus/fifo_common.c b/litmus/fifo_common.c
new file mode 100644
index 000000000000..84ae98e42ae4
--- /dev/null
+++ b/litmus/fifo_common.c
@@ -0,0 +1,58 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/fifo_common.h>
16
17int fifo_higher_prio(struct task_struct* first,
18 struct task_struct* second)
19{
20 /* There is no point in comparing a task to itself. */
21 if (first && first == second) {
22 TRACE_TASK(first,
23 "WARNING: pointless fifo priority comparison.\n");
24 BUG_ON(1);
25 return 0;
26 }
27
28 if (!first || !second)
29 return first && !second;
30
31 /* Tiebreak by PID */
32 return (get_release(first) == get_release(second) &&
33 first->pid > second->pid) ||
34 (get_release(first) < get_release(second));
35
36
37}
38
39int fifo_ready_order(struct bheap_node* a, struct bheap_node* b)
40{
41 return fifo_higher_prio(bheap2task(a), bheap2task(b));
42}
43
44void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
45 release_jobs_t release)
46{
47 rt_domain_init(rt, fifo_ready_order, resched, release);
48}
49
50int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t)
51{
52 if (!__jobs_pending(rt))
53 return 0;
54 if (!t)
55 return 1;
56
57 return !is_realtime(t) || fifo_higher_prio(__next_ready(rt), t);
58}
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
index e282f8a9c067..999290fc8302 100644
--- a/litmus/ftdev.c
+++ b/litmus/ftdev.c
@@ -114,6 +114,7 @@ static int ftdev_open(struct inode *in, struct file *filp)
114 goto out; 114 goto out;
115 115
116 ftdm = ftdev->minor + buf_idx; 116 ftdm = ftdev->minor + buf_idx;
117 ftdm->ftdev = ftdev;
117 filp->private_data = ftdm; 118 filp->private_data = ftdm;
118 119
119 if (mutex_lock_interruptible(&ftdm->lock)) { 120 if (mutex_lock_interruptible(&ftdm->lock)) {
@@ -291,6 +292,19 @@ out:
291 return err; 292 return err;
292} 293}
293 294
295static ssize_t ftdev_write(struct file *filp, const char __user *from,
296 size_t len, loff_t *f_pos)
297{
298 struct ftdev_minor* ftdm = filp->private_data;
299 ssize_t err = -EINVAL;
300 struct ftdev* ftdev = ftdm->ftdev;
301
302 /* dispatch write to buffer-specific code, if available */
303 if (ftdev->write)
304 err = ftdev->write(ftdm->buf, len, from);
305
306 return err;
307}
294 308
295struct file_operations ftdev_fops = { 309struct file_operations ftdev_fops = {
296 .owner = THIS_MODULE, 310 .owner = THIS_MODULE,
@@ -315,6 +329,7 @@ int ftdev_init( struct ftdev* ftdev, struct module* owner,
315 ftdev->alloc = NULL; 329 ftdev->alloc = NULL;
316 ftdev->free = NULL; 330 ftdev->free = NULL;
317 ftdev->can_open = NULL; 331 ftdev->can_open = NULL;
332 ftdev->write = NULL;
318 333
319 ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor), 334 ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor),
320 GFP_KERNEL); 335 GFP_KERNEL);
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 10a42db1165e..7263cabf8c6c 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -9,15 +9,21 @@
9void prepare_for_next_period(struct task_struct *t) 9void prepare_for_next_period(struct task_struct *t)
10{ 10{
11 BUG_ON(!t); 11 BUG_ON(!t);
12#ifdef CONFIG_PLUGIN_COLOR
13 tsk_rt(t)->tot_exec_time += tsk_rt(t)->job_params.exec_time;
14#endif
12 /* prepare next release */ 15 /* prepare next release */
13
14 t->rt_param.job_params.release = t->rt_param.job_params.deadline; 16 t->rt_param.job_params.release = t->rt_param.job_params.deadline;
15 t->rt_param.job_params.real_release = t->rt_param.job_params.release; 17 t->rt_param.job_params.real_release = t->rt_param.job_params.release;
16 t->rt_param.job_params.deadline += get_rt_period(t); 18 t->rt_param.job_params.deadline += get_rt_period(t);
17 t->rt_param.job_params.real_deadline = t->rt_param.job_params.deadline; 19 t->rt_param.job_params.real_deadline = t->rt_param.job_params.deadline;
18 t->rt_param.job_params.exec_time = 0; 20 t->rt_param.job_params.exec_time = 0;
21 tsk_rt(t)->job_params.release = tsk_rt(t)->job_params.deadline;
22 tsk_rt(t)->job_params.deadline += get_rt_period(t);
23 tsk_rt(t)->job_params.exec_time = 0;
24
19 /* update job sequence number */ 25 /* update job sequence number */
20 t->rt_param.job_params.job_no++; 26 tsk_rt(t)->job_params.job_no++;
21 27
22 /* don't confuse Linux */ 28 /* don't confuse Linux */
23 t->rt.time_slice = 1; 29 t->rt.time_slice = 1;
@@ -25,7 +31,7 @@ void prepare_for_next_period(struct task_struct *t)
25 31
26void release_at(struct task_struct *t, lt_t start) 32void release_at(struct task_struct *t, lt_t start)
27{ 33{
28 t->rt_param.job_params.deadline = start; 34 tsk_rt(t)->job_params.deadline = start;
29 prepare_for_next_period(t); 35 prepare_for_next_period(t);
30 set_rt_flags(t, RT_F_RUNNING); 36 set_rt_flags(t, RT_F_RUNNING);
31} 37}
diff --git a/litmus/litmus.c b/litmus/litmus.c
index f4d676c17d5f..b76e1496d7f4 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -24,6 +24,8 @@
24#include <litmus/sched_mc.h> 24#include <litmus/sched_mc.h>
25#else 25#else
26struct mc_task; 26struct mc_task;
27#ifdef CONFIG_SCHED_CPU_AFFINITY
28#include <litmus/affinity.h>
27#endif 29#endif
28 30
29/* Number of RT tasks that exist in the system */ 31/* Number of RT tasks that exist in the system */
@@ -127,6 +129,14 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
127 "because wcet > period\n", pid); 129 "because wcet > period\n", pid);
128 goto out_unlock; 130 goto out_unlock;
129 } 131 }
132 if ( tp.cls != RT_CLASS_HARD &&
133 tp.cls != RT_CLASS_SOFT &&
134 tp.cls != RT_CLASS_BEST_EFFORT)
135 {
136 printk(KERN_INFO "litmus: real-time task %d rejected "
137 "because its class is invalid\n", pid);
138 goto out_unlock;
139 }
130 if (tp.budget_policy != NO_ENFORCEMENT && 140 if (tp.budget_policy != NO_ENFORCEMENT &&
131 tp.budget_policy != QUANTUM_ENFORCEMENT && 141 tp.budget_policy != QUANTUM_ENFORCEMENT &&
132 tp.budget_policy != PRECISE_ENFORCEMENT) 142 tp.budget_policy != PRECISE_ENFORCEMENT)
@@ -369,12 +379,14 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
369{ 379{
370 struct rt_task user_config = {}; 380 struct rt_task user_config = {};
371 void* ctrl_page = NULL; 381 void* ctrl_page = NULL;
382 void* color_ctrl_page = NULL;
372 383
373 if (restore) { 384 if (restore) {
374 /* Safe user-space provided configuration data. 385 /* Safe user-space provided configuration data.
375 * and allocated page. */ 386 * and allocated page. */
376 user_config = p->rt_param.task_params; 387 user_config = p->rt_param.task_params;
377 ctrl_page = p->rt_param.ctrl_page; 388 ctrl_page = p->rt_param.ctrl_page;
389 color_ctrl_page = p->rt_param.color_ctrl_page;
378 } 390 }
379 391
380 /* We probably should not be inheriting any task's priority 392 /* We probably should not be inheriting any task's priority
@@ -387,8 +399,9 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
387 399
388 /* Restore preserved fields. */ 400 /* Restore preserved fields. */
389 if (restore) { 401 if (restore) {
390 p->rt_param.task_params = user_config; 402 p->rt_param.task_params = user_config;
391 p->rt_param.ctrl_page = ctrl_page; 403 p->rt_param.ctrl_page = ctrl_page;
404 p->rt_param.color_ctrl_page = color_ctrl_page;
392 } 405 }
393} 406}
394 407
@@ -529,9 +542,11 @@ void litmus_fork(struct task_struct* p)
529 reinit_litmus_state(p, 0); 542 reinit_litmus_state(p, 0);
530 /* Don't let the child be a real-time task. */ 543 /* Don't let the child be a real-time task. */
531 p->sched_reset_on_fork = 1; 544 p->sched_reset_on_fork = 1;
532 } else 545 } else {
533 /* non-rt tasks might have ctrl_page set */ 546 /* non-rt tasks might have ctrl_page set */
534 tsk_rt(p)->ctrl_page = NULL; 547 tsk_rt(p)->ctrl_page = NULL;
548 tsk_rt(p)->color_ctrl_page = NULL;
549 }
535 550
536 /* od tables are never inherited across a fork */ 551 /* od tables are never inherited across a fork */
537 p->od_table = NULL; 552 p->od_table = NULL;
@@ -551,6 +566,10 @@ void litmus_exec(void)
551 free_page((unsigned long) tsk_rt(p)->ctrl_page); 566 free_page((unsigned long) tsk_rt(p)->ctrl_page);
552 tsk_rt(p)->ctrl_page = NULL; 567 tsk_rt(p)->ctrl_page = NULL;
553 } 568 }
569 if (tsk_rt(p)->color_ctrl_page) {
570 free_page((unsigned long) tsk_rt(p)->color_ctrl_page);
571 tsk_rt(p)->color_ctrl_page = NULL;
572 }
554 } 573 }
555} 574}
556 575
@@ -568,6 +587,12 @@ void exit_litmus(struct task_struct *dead_tsk)
568 tsk_rt(dead_tsk)->ctrl_page); 587 tsk_rt(dead_tsk)->ctrl_page);
569 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page); 588 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
570 } 589 }
590 if (tsk_rt(dead_tsk)->color_ctrl_page) {
591 TRACE_TASK(dead_tsk,
592 "freeing color_ctrl_page %p\n",
593 tsk_rt(dead_tsk)->color_ctrl_page);
594 free_page((unsigned long) tsk_rt(dead_tsk)->color_ctrl_page);
595 }
571 596
572#ifdef CONFIG_PLUGIN_MC 597#ifdef CONFIG_PLUGIN_MC
573 /* The MC-setup syscall might succeed and allocate mc_data, but the 598 /* The MC-setup syscall might succeed and allocate mc_data, but the
@@ -616,6 +641,8 @@ static int __init _init_litmus(void)
616 */ 641 */
617 printk("Starting LITMUS^RT kernel\n"); 642 printk("Starting LITMUS^RT kernel\n");
618 643
644 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
645
619 register_sched_plugin(&linux_sched_plugin); 646 register_sched_plugin(&linux_sched_plugin);
620 647
621 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); 648 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
@@ -637,6 +664,10 @@ static int __init _init_litmus(void)
637 664
638 init_litmus_proc(); 665 init_litmus_proc();
639 666
667#ifdef CONFIG_SCHED_CPU_AFFINITY
668 init_topology();
669#endif
670
640 return 0; 671 return 0;
641} 672}
642 673
diff --git a/litmus/locking.c b/litmus/locking.c
index 91aa0f9724b0..e051a288aba0 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -92,8 +92,6 @@ asmlinkage long sys_litmus_lock(int lock_od)
92 * this into account when computing overheads. */ 92 * this into account when computing overheads. */
93 TS_LOCK_END; 93 TS_LOCK_END;
94 94
95 TS_SYSCALL_OUT_START;
96
97 return err; 95 return err;
98} 96}
99 97
@@ -125,16 +123,18 @@ asmlinkage long sys_litmus_unlock(int lock_od)
125 return err; 123 return err;
126} 124}
127 125
128struct task_struct* waitqueue_first(wait_queue_head_t *wq) 126struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
129{ 127{
130 wait_queue_t *q; 128 wait_queue_t* q;
129 struct task_struct* t = NULL;
131 130
132 if (waitqueue_active(wq)) { 131 if (waitqueue_active(wq)) {
133 q = list_entry(wq->task_list.next, 132 q = list_entry(wq->task_list.next,
134 wait_queue_t, task_list); 133 wait_queue_t, task_list);
135 return (struct task_struct*) q->private; 134 t = (struct task_struct*) q->private;
136 } else 135 __remove_wait_queue(wq, q);
137 return NULL; 136 }
137 return(t);
138} 138}
139 139
140 140
diff --git a/litmus/preempt.c b/litmus/preempt.c
index 528d7131fe12..3606cd7ffae7 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -32,8 +32,11 @@ void sched_state_will_schedule(struct task_struct* tsk)
32 /* /\* Litmus tasks should never be subject to a remote */ 32 /* /\* Litmus tasks should never be subject to a remote */
33 /* * set_tsk_need_resched(). *\/ */ 33 /* * set_tsk_need_resched(). *\/ */
34 /* BUG_ON(is_realtime(tsk)); */ 34 /* BUG_ON(is_realtime(tsk)); */
35
36#ifdef CONFIG_PREEMPT_STATE_TRACE
35 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", 37 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
36 __builtin_return_address(0)); 38 __builtin_return_address(0));
39#endif
37} 40}
38 41
39/* Called by the IPI handler after another CPU called smp_send_resched(). */ 42/* Called by the IPI handler after another CPU called smp_send_resched(). */
diff --git a/litmus/rm_common.c b/litmus/rm_common.c
new file mode 100644
index 000000000000..f608a084d3b8
--- /dev/null
+++ b/litmus/rm_common.c
@@ -0,0 +1,91 @@
1/*
2 * kernel/rm_common.c
3 *
4 * Common functions for RM based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/rm_common.h>
16
17/* rm_higher_prio - returns true if first has a higher RM priority
18 * than second. Deadline ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int rm_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (first && first == second) {
30 TRACE_TASK(first,
31 "WARNING: pointless rm priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40 return !is_realtime(second_task) ||
41
42 /* is the deadline of the first task earlier?
43 * Then it has higher priority.
44 */
45 lt_before(get_rt_period(first_task), get_rt_period(second_task)) ||
46
47 /* Do we have a deadline tie?
48 * Then break by PID.
49 */
50 (get_rt_period(first_task) == get_rt_period(second_task) &&
51 (first_task->pid < second_task->pid ||
52
53 /* If the PIDs are the same then the task with the inherited
54 * priority wins.
55 */
56 (first_task->pid == second_task->pid &&
57 !second->rt_param.inh_task)));
58}
59
60int rm_ready_order(struct bheap_node* a, struct bheap_node* b)
61{
62 return rm_higher_prio(bheap2task(a), bheap2task(b));
63}
64
65void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
66 release_jobs_t release)
67{
68 rt_domain_init(rt, rm_ready_order, resched, release);
69}
70
71/* need_to_preempt - check whether the task t needs to be preempted
72 * call only with irqs disabled and with ready_lock acquired
73 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
74 */
75int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
76{
77 /* we need the read lock for rm_ready_queue */
78 /* no need to preempt if there is nothing pending */
79 if (!__jobs_pending(rt))
80 return 0;
81 /* we need to reschedule if t doesn't exist */
82 if (!t)
83 return 1;
84
85 /* NOTE: We cannot check for non-preemptibility since we
86 * don't know what address space we're currently in.
87 */
88
89 /* make sure to get non-rt stuff out of the way */
90 return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t);
91}
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index 3b3b49ed48ea..79243f92edbe 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -57,6 +57,14 @@ static void do_release(struct release_heap *rh)
57 TS_LVLB_RELEASE_START; 57 TS_LVLB_RELEASE_START;
58 else 58 else
59 TS_LVLC_RELEASE_START; 59 TS_LVLC_RELEASE_START;
60 struct release_heap* rh;
61 rh = container_of(timer, struct release_heap, timer);
62
63 TS_RELEASE_LATENCY(rh->release_time);
64
65 VTRACE("on_release_timer(0x%p) starts.\n", timer);
66
67 TS_RELEASE_START;
60 68
61 raw_spin_lock_irqsave(&rh->dom->release_lock, flags); 69 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
62 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); 70 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
diff --git a/litmus/rt_server.c b/litmus/rt_server.c
new file mode 100644
index 000000000000..74d7c7b0f81a
--- /dev/null
+++ b/litmus/rt_server.c
@@ -0,0 +1,23 @@
1#include <litmus/rt_server.h>
2
3static void default_server_update(struct rt_server *srv)
4{
5}
6
7void init_rt_server(struct rt_server *server,
8 int sid, int cpu, rt_domain_t *domain,
9 need_preempt_t need_preempt,
10 server_update_t update)
11{
12 if (!need_preempt)
13 BUG_ON(1);
14
15 server->need_preempt = need_preempt;
16 server->update = (update) ? update : default_server_update;
17
18 server->sid = sid;
19 server->cpu = cpu;
20 server->linked = NULL;
21 server->domain = domain;
22 server->running = 0;
23}
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 73fe1c442a0d..480c62bc895b 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -43,6 +43,10 @@
43 43
44#include <litmus/bheap.h> 44#include <litmus/bheap.h>
45 45
46#ifdef CONFIG_SCHED_CPU_AFFINITY
47#include <litmus/affinity.h>
48#endif
49
46/* to configure the cluster size */ 50/* to configure the cluster size */
47#include <litmus/litmus_proc.h> 51#include <litmus/litmus_proc.h>
48#include <linux/uaccess.h> 52#include <linux/uaccess.h>
@@ -95,7 +99,7 @@ typedef struct clusterdomain {
95 struct bheap_node *heap_node; 99 struct bheap_node *heap_node;
96 struct bheap cpu_heap; 100 struct bheap cpu_heap;
97 /* lock for this cluster */ 101 /* lock for this cluster */
98#define lock domain.ready_lock 102#define cluster_lock domain.ready_lock
99} cedf_domain_t; 103} cedf_domain_t;
100 104
101/* a cedf_domain per cluster; allocation is done at init/activation time */ 105/* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -257,11 +261,34 @@ static noinline void requeue(struct task_struct* task)
257 } 261 }
258} 262}
259 263
264#ifdef CONFIG_SCHED_CPU_AFFINITY
265static cpu_entry_t* cedf_get_nearest_available_cpu(
266 cedf_domain_t *cluster, cpu_entry_t *start)
267{
268 cpu_entry_t *affinity;
269
270 get_nearest_available_cpu(affinity, start, cedf_cpu_entries,
271#ifdef CONFIG_RELEASE_MASTER
272 cluster->domain.release_master
273#else
274 NO_CPU
275#endif
276 );
277
278 /* make sure CPU is in our cluster */
279 if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
280 return(affinity);
281 else
282 return(NULL);
283}
284#endif
285
286
260/* check for any necessary preemptions */ 287/* check for any necessary preemptions */
261static void check_for_preemptions(cedf_domain_t *cluster) 288static void check_for_preemptions(cedf_domain_t *cluster)
262{ 289{
263 struct task_struct *task; 290 struct task_struct *task;
264 cpu_entry_t* last; 291 cpu_entry_t *last;
265 292
266 for(last = lowest_prio_cpu(cluster); 293 for(last = lowest_prio_cpu(cluster);
267 edf_preemption_needed(&cluster->domain, last->linked); 294 edf_preemption_needed(&cluster->domain, last->linked);
@@ -270,8 +297,20 @@ static void check_for_preemptions(cedf_domain_t *cluster)
270 task = __take_ready(&cluster->domain); 297 task = __take_ready(&cluster->domain);
271 TRACE("check_for_preemptions: attempting to link task %d to %d\n", 298 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
272 task->pid, last->cpu); 299 task->pid, last->cpu);
300#ifdef CONFIG_SCHED_CPU_AFFINITY
301 {
302 cpu_entry_t *affinity =
303 cedf_get_nearest_available_cpu(cluster,
304 &per_cpu(cedf_cpu_entries, task_cpu(task)));
305 if(affinity)
306 last = affinity;
307 else if(last->linked)
308 requeue(last->linked);
309 }
310#else
273 if (last->linked) 311 if (last->linked)
274 requeue(last->linked); 312 requeue(last->linked);
313#endif
275 link_task_to_cpu(task, last); 314 link_task_to_cpu(task, last);
276 preempt(last); 315 preempt(last);
277 } 316 }
@@ -292,12 +331,12 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
292 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); 331 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
293 unsigned long flags; 332 unsigned long flags;
294 333
295 raw_spin_lock_irqsave(&cluster->lock, flags); 334 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
296 335
297 __merge_ready(&cluster->domain, tasks); 336 __merge_ready(&cluster->domain, tasks);
298 check_for_preemptions(cluster); 337 check_for_preemptions(cluster);
299 338
300 raw_spin_unlock_irqrestore(&cluster->lock, flags); 339 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
301} 340}
302 341
303/* caller holds cedf_lock */ 342/* caller holds cedf_lock */
@@ -378,7 +417,17 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
378 int out_of_time, sleep, preempt, np, exists, blocks; 417 int out_of_time, sleep, preempt, np, exists, blocks;
379 struct task_struct* next = NULL; 418 struct task_struct* next = NULL;
380 419
381 raw_spin_lock(&cluster->lock); 420#ifdef CONFIG_RELEASE_MASTER
421 /* Bail out early if we are the release master.
422 * The release master never schedules any real-time tasks.
423 */
424 if (unlikely(cluster->domain.release_master == entry->cpu)) {
425 sched_state_task_picked();
426 return NULL;
427 }
428#endif
429
430 raw_spin_lock(&cluster->cluster_lock);
382 clear_will_schedule(); 431 clear_will_schedule();
383 432
384 /* sanity checking */ 433 /* sanity checking */
@@ -462,7 +511,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
462 next = prev; 511 next = prev;
463 512
464 sched_state_task_picked(); 513 sched_state_task_picked();
465 raw_spin_unlock(&cluster->lock); 514 raw_spin_unlock(&cluster->cluster_lock);
466 515
467#ifdef WANT_ALL_SCHED_EVENTS 516#ifdef WANT_ALL_SCHED_EVENTS
468 TRACE("cedf_lock released, next=0x%p\n", next); 517 TRACE("cedf_lock released, next=0x%p\n", next);
@@ -504,7 +553,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
504 /* the cluster doesn't change even if t is running */ 553 /* the cluster doesn't change even if t is running */
505 cluster = task_cpu_cluster(t); 554 cluster = task_cpu_cluster(t);
506 555
507 raw_spin_lock_irqsave(&cluster->domain.ready_lock, flags); 556 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
508 557
509 /* setup job params */ 558 /* setup job params */
510 release_at(t, litmus_clock()); 559 release_at(t, litmus_clock());
@@ -513,15 +562,25 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
513 entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); 562 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
514 BUG_ON(entry->scheduled); 563 BUG_ON(entry->scheduled);
515 564
516 entry->scheduled = t; 565#ifdef CONFIG_RELEASE_MASTER
517 tsk_rt(t)->scheduled_on = task_cpu(t); 566 if (entry->cpu != cluster->domain.release_master) {
567#endif
568 entry->scheduled = t;
569 tsk_rt(t)->scheduled_on = task_cpu(t);
570#ifdef CONFIG_RELEASE_MASTER
571 } else {
572 /* do not schedule on release master */
573 preempt(entry); /* force resched */
574 tsk_rt(t)->scheduled_on = NO_CPU;
575 }
576#endif
518 } else { 577 } else {
519 t->rt_param.scheduled_on = NO_CPU; 578 t->rt_param.scheduled_on = NO_CPU;
520 } 579 }
521 t->rt_param.linked_on = NO_CPU; 580 t->rt_param.linked_on = NO_CPU;
522 581
523 cedf_job_arrival(t); 582 cedf_job_arrival(t);
524 raw_spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags); 583 raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
525} 584}
526 585
527static void cedf_task_wake_up(struct task_struct *task) 586static void cedf_task_wake_up(struct task_struct *task)
@@ -534,7 +593,7 @@ static void cedf_task_wake_up(struct task_struct *task)
534 593
535 cluster = task_cpu_cluster(task); 594 cluster = task_cpu_cluster(task);
536 595
537 raw_spin_lock_irqsave(&cluster->lock, flags); 596 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
538 /* We need to take suspensions because of semaphores into 597 /* We need to take suspensions because of semaphores into
539 * account! If a job resumes after being suspended due to acquiring 598 * account! If a job resumes after being suspended due to acquiring
540 * a semaphore, it should never be treated as a new job release. 599 * a semaphore, it should never be treated as a new job release.
@@ -557,7 +616,7 @@ static void cedf_task_wake_up(struct task_struct *task)
557 } 616 }
558 } 617 }
559 cedf_job_arrival(task); 618 cedf_job_arrival(task);
560 raw_spin_unlock_irqrestore(&cluster->lock, flags); 619 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
561} 620}
562 621
563static void cedf_task_block(struct task_struct *t) 622static void cedf_task_block(struct task_struct *t)
@@ -570,9 +629,9 @@ static void cedf_task_block(struct task_struct *t)
570 cluster = task_cpu_cluster(t); 629 cluster = task_cpu_cluster(t);
571 630
572 /* unlink if necessary */ 631 /* unlink if necessary */
573 raw_spin_lock_irqsave(&cluster->lock, flags); 632 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
574 unlink(t); 633 unlink(t);
575 raw_spin_unlock_irqrestore(&cluster->lock, flags); 634 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
576 635
577 BUG_ON(!is_realtime(t)); 636 BUG_ON(!is_realtime(t));
578} 637}
@@ -584,7 +643,7 @@ static void cedf_task_exit(struct task_struct * t)
584 cedf_domain_t *cluster = task_cpu_cluster(t); 643 cedf_domain_t *cluster = task_cpu_cluster(t);
585 644
586 /* unlink if necessary */ 645 /* unlink if necessary */
587 raw_spin_lock_irqsave(&cluster->lock, flags); 646 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
588 unlink(t); 647 unlink(t);
589 if (tsk_rt(t)->scheduled_on != NO_CPU) { 648 if (tsk_rt(t)->scheduled_on != NO_CPU) {
590 cpu_entry_t *cpu; 649 cpu_entry_t *cpu;
@@ -592,7 +651,7 @@ static void cedf_task_exit(struct task_struct * t)
592 cpu->scheduled = NULL; 651 cpu->scheduled = NULL;
593 tsk_rt(t)->scheduled_on = NO_CPU; 652 tsk_rt(t)->scheduled_on = NO_CPU;
594 } 653 }
595 raw_spin_unlock_irqrestore(&cluster->lock, flags); 654 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
596 655
597 BUG_ON(!is_realtime(t)); 656 BUG_ON(!is_realtime(t));
598 TRACE_TASK(t, "RIP\n"); 657 TRACE_TASK(t, "RIP\n");
@@ -698,6 +757,9 @@ static long cedf_activate_plugin(void)
698 757
699 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) 758 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
700 return -ENOMEM; 759 return -ENOMEM;
760#ifdef CONFIG_RELEASE_MASTER
761 cedf[i].domain.release_master = atomic_read(&release_master_cpu);
762#endif
701 } 763 }
702 764
703 /* cycle through cluster and add cpus to them */ 765 /* cycle through cluster and add cpus to them */
@@ -740,7 +802,11 @@ static long cedf_activate_plugin(void)
740 802
741 entry->linked = NULL; 803 entry->linked = NULL;
742 entry->scheduled = NULL; 804 entry->scheduled = NULL;
743 update_cpu_position(entry); 805#ifdef CONFIG_RELEASE_MASTER
806 /* only add CPUs that should schedule jobs */
807 if (entry->cpu != entry->cluster->domain.release_master)
808#endif
809 update_cpu_position(entry);
744 } 810 }
745 /* done with this cluster */ 811 /* done with this cluster */
746 break; 812 break;
diff --git a/litmus/sched_color.c b/litmus/sched_color.c
new file mode 100644
index 000000000000..44327d60aaa5
--- /dev/null
+++ b/litmus/sched_color.c
@@ -0,0 +1,888 @@
1#include <linux/percpu.h>
2#include <linux/sched.h>
3#include <linux/list.h>
4#include <linux/spinlock.h>
5#include <linux/module.h>
6#include <linux/slab.h>
7
8#include <litmus/litmus.h>
9#include <litmus/jobs.h>
10#include <litmus/preempt.h>
11#include <litmus/sched_plugin.h>
12#include <litmus/rm_common.h>
13#include <litmus/sched_trace.h>
14#include <litmus/color.h>
15#include <litmus/fifo_common.h>
16#include <litmus/budget.h>
17#include <litmus/rt_server.h>
18#include <litmus/dgl.h>
19
20/**
21 * @rt_server Common server functionality.
22 * @task Task used to schedule server.
23 * @timer Budget enforcement for @task
24 * @start_time If set, time at which server began running.
25 */
26struct fifo_server {
27 struct rt_server server;
28 struct task_struct* task;
29 struct enforcement_timer timer;
30 lt_t start_time;
31};
32
33/**
34 * @server Common server functionality.
35 * @rm_domain PRM domain.
36 * @scheduled Task physically running on CPU.
37 * @fifo_server Server partitioned to this CPU.
38 */
39struct cpu_entry {
40 struct rt_server server;
41 rt_domain_t rm_domain;
42 struct task_struct* scheduled;
43 struct fifo_server fifo_server;
44 struct hrtimer chunk_timer;
45};
46
47DEFINE_PER_CPU(struct cpu_entry, color_cpus);
48
49static rt_domain_t fifo_domain;
50static raw_spinlock_t fifo_lock;
51
52static struct dgl group_lock;
53static raw_spinlock_t dgl_lock;
54
55#define local_entry (&__get_cpu_var(color_cpus))
56#define remote_entry(cpu) (&per_cpu(color_cpus, cpu))
57#define task_entry(task) remote_entry(get_partition(task))
58#define task_fserver(task) (&task_entry(task)->fifo_server.server)
59#define entry_lock(entry) (&(entry)->rm_domain.ready_lock)
60
61
62#define task_dom(entry, task) (is_be(task) ? &fifo_domain : &entry->rm_domain)
63#define task_lock(entry, task) (is_be(task) ? &fifo_lock : entry_lock(entry))
64#define is_fifo_server(s) ((s)->sid > num_online_cpus())
65#define lock_if(lock, cond) do { if (cond) raw_spin_lock(lock);} while(0)
66#define unlock_if(lock, cond) do { if (cond) raw_spin_unlock(lock);} while(0)
67
68#ifdef CONFIG_NP_SECTION
69#define has_resources(t, c) (tsk_rt(t)->req == group_lock.acquired[c])
70#else
71#define has_resources(t, c) (1)
72#endif
73
74/*
75 * Requeue onto domain's release or ready queue based on task state.
76 */
77static void requeue(rt_domain_t *dom, struct task_struct* t)
78{
79 if (is_server(t) && !tsk_rt(t)->present)
80 /* Remove stopped server from the system */
81 return;
82
83 TRACE_TASK(t, "Requeueing\n");
84 if (is_queued(t)) {
85 TRACE_TASK(t, "Already queued!\n");
86 return;
87 }
88
89 set_rt_flags(t, RT_F_RUNNING);
90 if (is_released(t, litmus_clock()))
91 __add_ready(dom, t);
92 else
93 add_release(dom, t);
94}
95
96enum hrtimer_restart chunk_fire(struct hrtimer *timer)
97{
98 unsigned long flags;
99 local_irq_save(flags);
100 TRACE("Chunk timer fired.\n");
101 litmus_reschedule_local();
102 local_irq_restore(flags);
103 return HRTIMER_NORESTART;
104}
105
106void chunk_arm(struct cpu_entry *entry)
107{
108 unsigned long fire;
109 if (color_chunk) {
110 fire = litmus_clock() + color_chunk;
111 TRACE("Arming chunk timer for %llu\n", fire);
112 __hrtimer_start_range_ns(&entry->chunk_timer,
113 ns_to_ktime(fire), 0,
114 HRTIMER_MODE_ABS_PINNED, 0);
115 }
116}
117
118void chunk_cancel(struct cpu_entry *entry)
119{
120 TRACE("Cancelling chunk timer\n");
121 hrtimer_try_to_cancel(&entry->chunk_timer);
122}
123
124/*
125 * Relinquish resources held by @t (or its children).
126 */
127static void release_resources(struct task_struct *t)
128{
129 struct task_struct *sched;
130#ifdef CONFIG_NP_SECTION
131
132 TRACE_TASK(t, "Releasing resources\n");
133
134 if (is_server(t)) {
135 sched = task_fserver(t)->linked;
136 if (sched)
137 release_resources(sched);
138 } else if (is_kernel_np(t))
139 remove_group_req(&group_lock, tsk_rt(t)->req);
140 take_np(t);
141#endif
142}
143
144/*
145 * Put in requests for resources needed by @t. If @t is a server, this will
146 * set @t's np flag to reflect resources held by @t's children.
147 */
148static void acquire_resources(struct task_struct *t)
149{
150 int cpu;
151 struct rt_server *server;
152 struct task_struct *sched;
153
154#ifdef CONFIG_NP_SECTION
155 /* Can't acquire resources if t is not running */
156 BUG_ON(!get_task_server(t));
157
158 if (is_kernel_np(t)) {
159 TRACE_TASK(t, "Already contending for resources\n");
160 return;
161 }
162 cpu = get_task_server(t)->cpu;
163
164 if (is_server(t)) {
165 server = task_fserver(t);
166 sched = server->linked;
167
168 /* Happens when server is booted off on completion or
169 * has just completed executing a task.
170 */
171 if (sched && !is_kernel_np(sched))
172 acquire_resources(sched);
173
174 /* Become np if there is a running task */
175 if (sched && has_resources(sched, cpu)) {
176 TRACE_TASK(t, "Running task with resource\n");
177 make_np(t);
178 } else {
179 TRACE_TASK(t, "Running no resources\n");
180 take_np(t);
181 }
182 } else {
183 TRACE_TASK(t, "Acquiring resources\n");
184 if (!has_resources(t, cpu))
185 add_group_req(&group_lock, tsk_rt(t)->req, cpu);
186 make_np(t);
187 }
188#endif
189}
190
191/*
192 * Stop logically running the currently linked task.
193 */
194static void unlink(struct rt_server *server)
195{
196 BUG_ON(!server->linked);
197
198 if (is_server(server->linked))
199 task_fserver(server->linked)->running = 0;
200
201
202 sched_trace_server_switch_away(server->sid, 0,
203 server->linked->pid,
204 get_rt_job(server->linked));
205 TRACE_TASK(server->linked, "No longer run by server %d\n", server->sid);
206
207 raw_spin_lock(&dgl_lock);
208 release_resources(server->linked);
209 raw_spin_unlock(&dgl_lock);
210
211 get_task_server(server->linked) = NULL;
212 server->linked = NULL;
213}
214
215static struct task_struct* schedule_server(struct rt_server *server);
216
217/*
218 * Logically run @task.
219 */
220static void link(struct rt_server *server, struct task_struct *task)
221{
222 struct rt_server *tserv;
223
224 BUG_ON(server->linked);
225 BUG_ON(!server->running);
226 BUG_ON(is_kernel_np(task));
227
228 TRACE_TASK(task, "Run by server %d\n", server->sid);
229
230 if (is_server(task)) {
231 tserv = task_fserver(task);
232 tserv->running = 1;
233 schedule_server(tserv);
234 }
235
236 server->linked = task;
237 get_task_server(task) = server;
238
239 sched_trace_server_switch_to(server->sid, 0,
240 task->pid, get_rt_job(task));
241}
242
243/*
244 * Triggers preemption on first FIFO server which is running NULL.
245 */
246static void check_for_fifo_preempt(void)
247{
248 int ret = 0, cpu;
249 struct cpu_entry *entry;
250 struct rt_server *cpu_server, *fifo_server;
251
252 TRACE("Checking for FIFO preempt\n");
253
254 for_each_online_cpu(cpu) {
255 entry = remote_entry(cpu);
256 cpu_server = &entry->server;
257 fifo_server = &entry->fifo_server.server;
258
259 raw_spin_lock(entry_lock(entry));
260 raw_spin_lock(&fifo_lock);
261
262 if (cpu_server->linked && is_server(cpu_server->linked) &&
263 !fifo_server->linked) {
264 litmus_reschedule(cpu);
265 ret = 1;
266 }
267
268 raw_spin_unlock(&fifo_lock);
269 raw_spin_unlock(entry_lock(entry));
270
271 if (ret)
272 break;
273 }
274}
275
276/*
277 * Rejoin a task into the system.
278 */
279static void job_arrival(struct task_struct *t)
280{
281 int i;
282 rt_domain_t *dom = task_dom(task_entry(t), t);
283 struct dgl_group_req *gr = tsk_rt(t)->req;
284 struct control_page *cp = tsk_rt(t)->ctrl_page;
285 struct color_ctrl_page *ccp = tsk_rt(t)->color_ctrl_page;
286
287 /* Fill request */
288 if (cp && ccp && cp->colors_updated) {
289 cp->colors_updated = 0;
290 dgl_group_req_init(&group_lock, gr);
291 for (i = 0; ccp->pages[i]; ++i)
292 set_req(&group_lock, gr, ccp->colors[i], ccp->pages[i]);
293 } else {
294 TRACE("Oh noz: %p %p %d\n", cp, ccp, ((cp) ? cp->colors_updated : -1));
295 }
296
297 lock_if(&fifo_lock, is_be(t));
298 requeue(dom, t);
299 unlock_if(&fifo_lock, is_be(t));
300}
301
302/*
303 * Complete job for task linked to @server.
304 */
305static void job_completion(struct rt_server *server)
306{
307 struct task_struct *t = server->linked;
308 lt_t et, now = litmus_clock();
309
310 TRACE_TASK(t, "Job completed\n");
311 if (is_server(t))
312 sched_trace_server_completion(t->pid, get_rt_job(t));
313 else
314 sched_trace_task_completion(t, 0);
315
316 if (1 < get_rt_job(t)) {
317 /* our releases happen at the second job */
318 et = get_exec_time(t);
319 if (et > tsk_rt(t)->max_exec_time)
320 tsk_rt(t)->max_exec_time = et;
321 }
322
323 if (is_tardy(t, now)) {
324 lt_t miss = now - get_deadline(t);
325 ++tsk_rt(t)->missed;
326 tsk_rt(t)->total_tardy += miss;
327 if (lt_before(tsk_rt(t)->max_tardy, miss)) {
328 tsk_rt(t)->max_tardy = miss;
329 }
330 }
331
332 unlink(server);
333 set_rt_flags(t, RT_F_SLEEP);
334 prepare_for_next_period(t);
335
336 if (is_server(t))
337 sched_trace_server_release(t->pid, get_rt_job(t),
338 get_release(t), get_deadline(t));
339 else
340 sched_trace_task_release(t);
341
342 if (is_running(t))
343 job_arrival(t);
344}
345
346/*
347 * Update @server state to reflect task's state.
348 */
349static void update_task(struct rt_server *server)
350{
351 int oot, sleep, block, np, chunked;
352 struct task_struct *t = server->linked;
353 lt_t last = tsk_rt(t)->last_exec_time;
354
355 block = !is_running(t);
356 oot = budget_enforced(t) && budget_exhausted(t);
357 np = is_kernel_np(t);
358 sleep = get_rt_flags(t) == RT_F_SLEEP;
359
360 chunked = color_chunk && last && (lt_after(litmus_clock() - last, color_chunk));
361
362 TRACE_TASK(t, "Updating task, block: %d, oot: %d, np: %d, sleep: %d, chunk: %d\n",
363 block, oot, np, sleep, chunked);
364
365 if (block)
366 unlink(server);
367 else if (oot || sleep)
368 job_completion(server);
369 else if (chunked) {
370 unlink(server);
371 job_arrival(t);
372 }
373}
374
375/*
376 * Link next task for @server.
377 */
378static struct task_struct* schedule_server(struct rt_server *server)
379{
380 struct task_struct *next;
381 struct rt_server *lserver;
382
383 TRACE("Scheduling server %d\n", server->sid);
384
385 if (server->linked) {
386 if (is_server(server->linked)) {
387 lserver = task_fserver(server->linked);
388 lserver->update(lserver);
389 }
390 update_task(server);
391 }
392
393 next = server->linked;
394 lock_if(&fifo_lock, is_fifo_server(server));
395 if ((!next || !is_np(next)) &&
396 server->need_preempt(server->domain, next)) {
397 if (next) {
398 TRACE_TASK(next, "Preempted\n");
399 unlink(server);
400 requeue(server->domain, next);
401 }
402 next = __take_ready(server->domain);
403 link(server, next);
404 }
405 unlock_if(&fifo_lock, is_fifo_server(server));
406
407 return next;
408}
409
410/*
411 * Update server state, including picking next running task and incrementing
412 * server execution time.
413 */
414static void fifo_update(struct rt_server *server)
415{
416 lt_t delta;
417 struct fifo_server *fserver;
418
419 fserver = container_of(server, struct fifo_server, server);
420 TRACE_TASK(fserver->task, "Updating FIFO server\n");
421
422 if (!server->linked || has_resources(server->linked, server->cpu)) {
423 /* Running here means linked to a parent server */
424 /* BUG_ON(!server->running); */
425
426 /* Stop executing */
427 if (fserver->start_time) {
428 delta = litmus_clock() - fserver->start_time;
429 tsk_rt(fserver->task)->job_params.exec_time += delta;
430 fserver->start_time = 0;
431 cancel_enforcement_timer(&fserver->timer);
432 } else {
433 /* Server is linked, but not executing */
434 /* BUG_ON(fserver->timer.armed); */
435 }
436
437 /* Calculate next task */
438 schedule_server(&fserver->server);
439
440 /* Reserve needed resources */
441 raw_spin_lock(&dgl_lock);
442 acquire_resources(fserver->task);
443 raw_spin_unlock(&dgl_lock);
444 }
445}
446
447/*
448 * Triggers preemption on rm-scheduled "linked" field only.
449 */
450static void color_rm_release(rt_domain_t *rm, struct bheap *tasks)
451{
452 unsigned long flags;
453 struct cpu_entry *entry;
454
455 TRACE_TASK(bheap2task(bheap_peek(rm->order, tasks)),
456 "Released set of RM tasks\n");
457
458 entry = container_of(rm, struct cpu_entry, rm_domain);
459 raw_spin_lock_irqsave(entry_lock(entry), flags);
460
461 __merge_ready(rm, tasks);
462
463 if (rm_preemption_needed(rm, entry->server.linked) &&
464 (!entry->server.linked || !is_kernel_np(entry->server.linked))) {
465 litmus_reschedule(entry->server.cpu);
466 }
467
468 raw_spin_unlock_irqrestore(entry_lock(entry), flags);
469}
470
471static void color_fifo_release(rt_domain_t *dom, struct bheap *tasks)
472{
473 unsigned long flags;
474
475 TRACE_TASK(bheap2task(bheap_peek(dom->order, tasks)),
476 "Released set of FIFO tasks\n");
477 local_irq_save(flags);
478
479 raw_spin_lock(&fifo_lock);
480 __merge_ready(dom, tasks);
481 raw_spin_unlock(&fifo_lock);
482
483 check_for_fifo_preempt();
484
485 local_irq_restore(flags);
486}
487
488#define cpu_empty(entry, run) \
489 (!(run) || (is_server(run) && !(entry)->fifo_server.server.linked))
490
491static struct task_struct* color_schedule(struct task_struct *prev)
492{
493 unsigned long flags;
494 int server_running;
495 struct cpu_entry *entry = local_entry;
496 struct task_struct *next, *plink = entry->server.linked;
497
498 TRACE("Reschedule on %d at %llu\n", entry->server.cpu, litmus_clock());
499 BUG_ON(entry->scheduled && entry->scheduled != prev);
500 BUG_ON(entry->scheduled && !is_realtime(prev));
501
502 raw_spin_lock_irqsave(entry_lock(entry), flags);
503
504 if (entry->scheduled && cpu_empty(entry, plink) && is_running(prev)) {
505 TRACE_TASK(prev, "Snuck in on new!\n");
506 job_arrival(entry->scheduled);
507 }
508
509 /* Pick next top-level task */
510 next = schedule_server(&entry->server);
511 /* Schedule hierarchically */
512 server_running = next && is_server(next);
513 if (server_running)
514 next = task_fserver(next)->linked;
515
516 /* Selected tasks must contend for group lock */
517 if (next) {
518 raw_spin_lock(&dgl_lock);
519 acquire_resources(next);
520 if (has_resources(next, entry->server.cpu)) {
521 TRACE_TASK(next, "Has group lock\n");
522 sched_trace_task_resume(next, 1);
523 } else {
524 TRACE_TASK(next, "Does not have lock, 0x%p does\n",
525 group_lock.acquired[entry->server.cpu]);
526 if (next != prev)
527 sched_trace_task_block(next, 1);
528 next = NULL;
529 server_running = 0;
530 }
531 raw_spin_unlock(&dgl_lock);
532 }
533
534 /* Server is blocked if its running task is blocked. Note that if the
535 * server has no running task, the server will now execute NULL.
536 */
537 if (server_running) {
538 TRACE_TASK(entry->server.linked, "Server running\n");
539 arm_enforcement_timer(&entry->fifo_server.timer,
540 entry->fifo_server.task);
541 entry->fifo_server.start_time = litmus_clock();
542 }
543
544 if (prev) {
545 tsk_rt(prev)->scheduled_on = NO_CPU;
546 tsk_rt(prev)->last_exec_time = 0;
547 chunk_cancel(entry);
548 }
549 if (next) {
550 tsk_rt(next)->scheduled_on = entry->server.cpu;
551 tsk_rt(next)->last_exec_time = litmus_clock();
552 chunk_arm(entry);
553 }
554
555 entry->scheduled = next;
556 sched_state_task_picked();
557
558 raw_spin_unlock_irqrestore(entry_lock(entry), flags);
559
560 return entry->scheduled;
561}
562
563static void color_task_new(struct task_struct *t, int on_rq, int running)
564{
565 unsigned long flags;
566 struct cpu_entry *entry;
567 struct dgl_group_req *req;
568
569 TRACE_TASK(t, "New colored task\n");
570 entry = (is_be(t)) ? local_entry : task_entry(t);
571
572 raw_spin_lock_irqsave(entry_lock(entry), flags);
573
574 req = kmalloc(sizeof(*req), GFP_ATOMIC);
575 tsk_rt(t)->req = req;
576 tsk_rt(t)->tot_exec_time = 0;
577 tsk_rt(t)->max_exec_time = 0;
578 tsk_rt(t)->max_tardy = 0;
579 tsk_rt(t)->missed = 0;
580 tsk_rt(t)->total_tardy = 0;
581 tsk_rt(t)->ctrl_page->colors_updated = 1;
582 tsk_rt(t)->last_exec_time = 0;
583
584 release_at(t, litmus_clock());
585
586 if (running) {
587 /* No need to lock with irqs disabled */
588 TRACE_TASK(t, "Already scheduled on %d\n", entry->server.cpu);
589 BUG_ON(entry->scheduled);
590 entry->scheduled = t;
591 tsk_rt(t)->scheduled_on = entry->server.cpu;
592 } else {
593 job_arrival(t);
594 }
595
596 raw_spin_unlock(entry_lock(entry));
597
598 if (is_be(t))
599 check_for_fifo_preempt();
600 else
601 litmus_reschedule_local();
602
603 local_irq_restore(flags);
604}
605
606static void color_task_wake_up(struct task_struct *task)
607{
608 unsigned long flags;
609 struct cpu_entry* entry = local_entry;
610 int sched;
611 lt_t now = litmus_clock();
612
613 TRACE_TASK(task, "Wake up at %llu\n", now);
614
615 raw_spin_lock_irqsave(entry_lock(entry), flags);
616
617 /* Abuse sporadic model */
618 if (is_tardy(task, now)) {
619 release_at(task, now);
620 sched_trace_task_release(task);
621 }
622
623 sched = (entry->scheduled == task);
624
625 if (!sched)
626 job_arrival(task);
627 else
628 TRACE_TASK(task, "Is already scheduled on %d!\n",
629 entry->scheduled);
630
631 raw_spin_unlock(entry_lock(entry));
632 if (is_be(task))
633 check_for_fifo_preempt();
634 else
635 litmus_reschedule_local();
636
637
638 local_irq_restore(flags);
639}
640
641static void color_task_block(struct task_struct *t)
642{
643 TRACE_TASK(t, "Block at %llu, state=%d\n", litmus_clock(), t->state);
644 BUG_ON(!is_realtime(t));
645 BUG_ON(is_queued(t));
646}
647
648static void color_task_exit(struct task_struct *t)
649{
650 unsigned long flags;
651 struct cpu_entry *entry = task_entry(t);
652 raw_spinlock_t *lock = task_lock(entry, t);
653
654 TRACE_TASK(t, "RIP, now reschedule\n");
655
656 local_irq_save(flags);
657
658 sched_trace_task_exit(t);
659 sched_trace_task_tardy(t);
660
661 /* Remove from scheduler consideration */
662 if (is_queued(t)) {
663 raw_spin_lock(lock);
664 remove(task_dom(entry, t), t);
665 raw_spin_unlock(lock);
666 }
667
668 /* Stop parent server */
669 if (get_task_server(t))
670 unlink(get_task_server(t));
671
672 /* Unschedule running task */
673 if (tsk_rt(t)->scheduled_on != NO_CPU) {
674 entry = remote_entry(tsk_rt(t)->scheduled_on);
675
676 raw_spin_lock(entry_lock(entry));
677
678 tsk_rt(t)->scheduled_on = NO_CPU;
679 entry->scheduled = NULL;
680 litmus_reschedule(entry->server.cpu);
681
682 raw_spin_unlock(entry_lock(entry));
683 }
684
685 /* Remove dgl request from system */
686 raw_spin_lock(&dgl_lock);
687 release_resources(t);
688 raw_spin_unlock(&dgl_lock);
689
690 dgl_group_req_free(tsk_rt(t)->req);
691 kfree(tsk_rt(t)->req);
692
693 local_irq_restore(flags);
694}
695
696/*
697 * Non-be tasks must have migrated to the right CPU.
698 */
699static long color_admit_task(struct task_struct* t)
700{
701 int ret = is_be(t) || task_cpu(t) == get_partition(t) ? 0 : -EINVAL;
702 if (!ret) {
703 printk(KERN_WARNING "Task failed to migrate to CPU %d\n",
704 get_partition(t));
705 }
706 return ret;
707}
708
709/*
710 * Load server parameters.
711 */
712static long color_activate_plugin(void)
713{
714 int cpu, ret = 0;
715 struct rt_task tp;
716 struct task_struct *server_task;
717 struct cpu_entry *entry;
718
719 color_chunk = 0;
720
721 for_each_online_cpu(cpu) {
722 entry = remote_entry(cpu);
723 server_task = entry->fifo_server.task;
724
725 raw_spin_lock(entry_lock(entry));
726
727 ret = color_server_params(cpu, ((unsigned long*)&tp.exec_cost),
728 ((unsigned long*)&tp.period));
729 if (ret) {
730 printk(KERN_WARNING "Uninitialized server for CPU %d\n",
731 entry->server.cpu);
732 goto loop_end;
733 }
734
735 /* Fill rt parameters */
736 tp.phase = 0;
737 tp.cpu = cpu;
738 tp.cls = RT_CLASS_SOFT;
739 tp.budget_policy = PRECISE_ENFORCEMENT;
740 tsk_rt(server_task)->task_params = tp;
741 tsk_rt(server_task)->present = 1;
742
743 entry->scheduled = NULL;
744
745 TRACE_TASK(server_task, "Created server with wcet: %llu, "
746 "period: %llu\n", tp.exec_cost, tp.period);
747
748 loop_end:
749 raw_spin_unlock(entry_lock(entry));
750 }
751
752 return ret;
753}
754
755/*
756 * Mark servers as unused, making future calls to requeue fail.
757 */
758static long color_deactivate_plugin(void)
759{
760 int cpu;
761 struct cpu_entry *entry;
762
763 for_each_online_cpu(cpu) {
764 entry = remote_entry(cpu);
765 if (entry->fifo_server.task) {
766 tsk_rt(entry->fifo_server.task)->present = 0;
767 }
768 }
769 return 0;
770}
771
772/*
773 * Dump container and server parameters for tracing.
774 */
775static void color_release_ts(lt_t time)
776{
777 int cpu, fifo_cid;
778 char fifo_name[TASK_COMM_LEN], cpu_name[TASK_COMM_LEN];
779 struct cpu_entry *entry;
780 struct task_struct *stask;
781
782 strcpy(cpu_name, "CPU");
783 strcpy(fifo_name, "BE");
784
785 fifo_cid = num_online_cpus();
786 trace_litmus_container_param(fifo_cid, fifo_name);
787
788 for_each_online_cpu(cpu) {
789 entry = remote_entry(cpu);
790 trace_litmus_container_param(cpu, cpu_name);
791 trace_litmus_server_param(entry->server.sid, cpu, 0, 0);
792 stask = entry->fifo_server.task;
793 trace_litmus_server_param(stask->pid, fifo_cid,
794 get_exec_cost(stask),
795 get_rt_period(stask));
796
797 /* Make runnable */
798 release_at(stask, time);
799 entry->fifo_server.start_time = 0;
800
801 cancel_enforcement_timer(&entry->fifo_server.timer);
802
803 if (!is_queued(stask))
804 requeue(&entry->rm_domain, stask);
805 }
806}
807
808static struct sched_plugin color_plugin __cacheline_aligned_in_smp = {
809 .plugin_name = "COLOR",
810 .task_new = color_task_new,
811 .complete_job = complete_job,
812 .task_exit = color_task_exit,
813 .schedule = color_schedule,
814 .task_wake_up = color_task_wake_up,
815 .task_block = color_task_block,
816 .admit_task = color_admit_task,
817
818 .release_ts = color_release_ts,
819
820 .activate_plugin = color_activate_plugin,
821 .deactivate_plugin = color_deactivate_plugin,
822};
823
824static int __init init_color(void)
825{
826 int cpu;
827 struct cpu_entry *entry;
828 struct task_struct *server_task;
829 struct fifo_server *fifo_server;
830 struct rt_server *cpu_server;
831
832 for_each_online_cpu(cpu) {
833 entry = remote_entry(cpu);
834 rm_domain_init(&entry->rm_domain, NULL, color_rm_release);
835
836 entry->scheduled = NULL;
837
838 /* Create FIFO server */
839 fifo_server = &entry->fifo_server;
840 init_rt_server(&fifo_server->server,
841 cpu + num_online_cpus() + 1,
842 cpu,
843 &fifo_domain,
844 fifo_preemption_needed, fifo_update);
845
846
847 /* Create task struct for FIFO server */
848 server_task = kmalloc(sizeof(struct task_struct), GFP_ATOMIC);
849 memset(server_task, 0, sizeof(*server_task));
850 server_task->policy = SCHED_LITMUS;
851 strcpy(server_task->comm, "server");
852 server_task->pid = fifo_server->server.sid;
853 fifo_server->task = server_task;
854
855 /* Create rt_params for FIFO server */
856 tsk_rt(server_task)->heap_node = bheap_node_alloc(GFP_ATOMIC);
857 tsk_rt(server_task)->rel_heap = release_heap_alloc(GFP_ATOMIC);
858 bheap_node_init(&tsk_rt(server_task)->heap_node, server_task);
859 tsk_rt(server_task)->is_server = 1;
860
861 /* Create CPU server */
862 cpu_server = &entry->server;
863 init_rt_server(cpu_server, cpu + 1, cpu,
864 &entry->rm_domain, rm_preemption_needed, NULL);
865 cpu_server->running = 1;
866
867 init_enforcement_timer(&fifo_server->timer);
868 hrtimer_init(&entry->chunk_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
869 entry->chunk_timer.function = chunk_fire;
870 }
871
872 fifo_domain_init(&fifo_domain, NULL, color_fifo_release);
873 raw_spin_lock_init(&fifo_lock);
874
875 dgl_init(&group_lock, color_cache_info.nr_colors,
876 color_cache_info.ways);
877 raw_spin_lock_init(&dgl_lock);
878
879 return register_sched_plugin(&color_plugin);
880}
881
882static void exit_color(void)
883{
884 dgl_free(&group_lock);
885}
886
887module_init(init_color);
888module_exit(exit_color);
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 0afd49155999..0aa44dbddbd6 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -18,11 +18,16 @@
18#include <litmus/sched_plugin.h> 18#include <litmus/sched_plugin.h>
19#include <litmus/edf_common.h> 19#include <litmus/edf_common.h>
20#include <litmus/sched_trace.h> 20#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
21 22
22#include <litmus/preempt.h> 23#include <litmus/preempt.h>
23 24
24#include <litmus/bheap.h> 25#include <litmus/bheap.h>
25 26
27#ifdef CONFIG_SCHED_CPU_AFFINITY
28#include <litmus/affinity.h>
29#endif
30
26#include <linux/module.h> 31#include <linux/module.h>
27 32
28/* Overview of GSN-EDF operations. 33/* Overview of GSN-EDF operations.
@@ -253,21 +258,52 @@ static noinline void requeue(struct task_struct* task)
253 } 258 }
254} 259}
255 260
261#ifdef CONFIG_SCHED_CPU_AFFINITY
262static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start)
263{
264 cpu_entry_t *affinity;
265
266 get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
267#ifdef CONFIG_RELEASE_MASTER
268 gsnedf.release_master
269#else
270 NO_CPU
271#endif
272 );
273
274 return(affinity);
275}
276#endif
277
256/* check for any necessary preemptions */ 278/* check for any necessary preemptions */
257static void check_for_preemptions(void) 279static void check_for_preemptions(void)
258{ 280{
259 struct task_struct *task; 281 struct task_struct *task;
260 cpu_entry_t* last; 282 cpu_entry_t *last;
261 283
262 for(last = lowest_prio_cpu(); 284 for (last = lowest_prio_cpu();
263 edf_preemption_needed(&gsnedf, last->linked); 285 edf_preemption_needed(&gsnedf, last->linked);
264 last = lowest_prio_cpu()) { 286 last = lowest_prio_cpu()) {
265 /* preemption necessary */ 287 /* preemption necessary */
266 task = __take_ready(&gsnedf); 288 task = __take_ready(&gsnedf);
267 TRACE("check_for_preemptions: attempting to link task %d to %d\n", 289 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
268 task->pid, last->cpu); 290 task->pid, last->cpu);
291
292#ifdef CONFIG_SCHED_CPU_AFFINITY
293 {
294 cpu_entry_t *affinity =
295 gsnedf_get_nearest_available_cpu(
296 &per_cpu(gsnedf_cpu_entries, task_cpu(task)));
297 if (affinity)
298 last = affinity;
299 else if (last->linked)
300 requeue(last->linked);
301 }
302#else
269 if (last->linked) 303 if (last->linked)
270 requeue(last->linked); 304 requeue(last->linked);
305#endif
306
271 link_task_to_cpu(task, last); 307 link_task_to_cpu(task, last);
272 preempt(last); 308 preempt(last);
273 } 309 }
@@ -374,8 +410,10 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
374 /* Bail out early if we are the release master. 410 /* Bail out early if we are the release master.
375 * The release master never schedules any real-time tasks. 411 * The release master never schedules any real-time tasks.
376 */ 412 */
377 if (gsnedf.release_master == entry->cpu) 413 if (unlikely(gsnedf.release_master == entry->cpu)) {
414 sched_state_task_picked();
378 return NULL; 415 return NULL;
416 }
379#endif 417#endif
380 418
381 raw_spin_lock(&gsnedf_lock); 419 raw_spin_lock(&gsnedf_lock);
@@ -765,6 +803,8 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
765 } 803 }
766 } 804 }
767 805
806 TS_LOCK_SUSPEND;
807
768 /* release lock before sleeping */ 808 /* release lock before sleeping */
769 spin_unlock_irqrestore(&sem->wait.lock, flags); 809 spin_unlock_irqrestore(&sem->wait.lock, flags);
770 810
@@ -777,14 +817,12 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
777 817
778 schedule(); 818 schedule();
779 819
780 sched_trace_task_resume(t, l->id); 820 TS_LOCK_RESUME;
781 821
782 /* Since we hold the lock, no other task will change 822 /* Since we hold the lock, no other task will change
783 * ->owner. We can thus check it without acquiring the spin 823 * ->owner. We can thus check it without acquiring the spin
784 * lock. */ 824 * lock. */
785 BUG_ON(sem->owner != t); 825 BUG_ON(sem->owner != t);
786
787 remove_wait_queue(&sem->wait, &wait);
788 } else { 826 } else {
789 /* it's ours now */ 827 /* it's ours now */
790 sem->owner = t; 828 sem->owner = t;
@@ -812,7 +850,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
812 } 850 }
813 851
814 /* check if there are jobs waiting for this resource */ 852 /* check if there are jobs waiting for this resource */
815 next = waitqueue_first(&sem->wait); 853 next = __waitqueue_remove_first(&sem->wait);
816 if (next) { 854 if (next) {
817 /* next becomes the resouce holder */ 855 /* next becomes the resouce holder */
818 sem->owner = next; 856 sem->owner = next;
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index c2f1e49692c3..39673ab6c7cd 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -102,9 +102,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
102 } 102 }
103 } 103 }
104#ifdef __ARCH_WANT_UNLOCKED_CTXSW 104#ifdef __ARCH_WANT_UNLOCKED_CTXSW
105 if (next->oncpu) 105 if (next->on_cpu)
106 TRACE_TASK(next, "waiting for !oncpu"); 106 TRACE_TASK(next, "waiting for !oncpu");
107 while (next->oncpu) { 107 while (next->on_cpu) {
108 cpu_relax(); 108 cpu_relax();
109 mb(); 109 mb();
110 } 110 }
@@ -257,12 +257,12 @@ static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
257 return; 257 return;
258} 258}
259 259
260static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running) 260static void switched_to_litmus(struct rq *rq, struct task_struct *p)
261{ 261{
262} 262}
263 263
264static void prio_changed_litmus(struct rq *rq, struct task_struct *p, 264static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
265 int oldprio, int running) 265 int oldprio)
266{ 266{
267} 267}
268 268
@@ -288,8 +288,8 @@ static void set_curr_task_litmus(struct rq *rq)
288 * We don't care about the scheduling domain; can gets called from 288 * We don't care about the scheduling domain; can gets called from
289 * exec, fork, wakeup. 289 * exec, fork, wakeup.
290 */ 290 */
291static int select_task_rq_litmus(struct rq *rq, struct task_struct *p, 291static int
292 int sd_flag, int flags) 292select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags)
293{ 293{
294 /* preemption is already disabled. 294 /* preemption is already disabled.
295 * We don't want to change cpu here 295 * We don't want to change cpu here
@@ -299,7 +299,12 @@ static int select_task_rq_litmus(struct rq *rq, struct task_struct *p,
299#endif 299#endif
300 300
301static const struct sched_class litmus_sched_class = { 301static const struct sched_class litmus_sched_class = {
302 .next = &rt_sched_class, 302 /* From 34f971f6 the stop/migrate worker threads have a class on
303 * their own, which is the highest prio class. We don't support
304 * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
305 * CPU capacity.
306 */
307 .next = &stop_sched_class,
303 .enqueue_task = enqueue_task_litmus, 308 .enqueue_task = enqueue_task_litmus,
304 .dequeue_task = dequeue_task_litmus, 309 .dequeue_task = dequeue_task_litmus,
305 .yield_task = yield_task_litmus, 310 .yield_task = yield_task_litmus,
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
index 0a64273daa47..16f1065bbdca 100644
--- a/litmus/sched_pfair.c
+++ b/litmus/sched_pfair.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * kernel/sched_pfair.c 2 * kernel/sched_pfair.c
3 * 3 *
4 * Implementation of the (global) Pfair scheduling algorithm. 4 * Implementation of the PD^2 pfair scheduling algorithm. This
5 * implementation realizes "early releasing," i.e., it is work-conserving.
5 * 6 *
6 */ 7 */
7 8
@@ -76,36 +77,29 @@ struct pfair_state {
76 struct task_struct* local; /* the local copy of linked */ 77 struct task_struct* local; /* the local copy of linked */
77 struct task_struct* scheduled; /* what is actually scheduled */ 78 struct task_struct* scheduled; /* what is actually scheduled */
78 79
79 unsigned long missed_quanta;
80 lt_t offset; /* stagger offset */ 80 lt_t offset; /* stagger offset */
81 unsigned int missed_updates;
82 unsigned int missed_quanta;
81}; 83};
82 84
83/* Currently, we limit the maximum period of any task to 2000 quanta.
84 * The reason is that it makes the implementation easier since we do not
85 * need to reallocate the release wheel on task arrivals.
86 * In the future
87 */
88#define PFAIR_MAX_PERIOD 2000
89
90struct pfair_cluster { 85struct pfair_cluster {
91 struct scheduling_cluster topology; 86 struct scheduling_cluster topology;
92 87
93 /* The "global" time in this cluster. */ 88 /* The "global" time in this cluster. */
94 quanta_t pfair_time; /* the "official" PFAIR clock */ 89 quanta_t pfair_time; /* the "official" PFAIR clock */
95 quanta_t merge_time; /* Updated after the release queue has been
96 * merged. Used by drop_all_references().
97 */
98 90
99 /* The ready queue for this cluster. */ 91 /* The ready queue for this cluster. */
100 rt_domain_t pfair; 92 rt_domain_t pfair;
101 93
102 /* This is the release queue wheel for this cluster. It is indexed by 94 /* The set of jobs that should have their release enacted at the next
103 * pfair_time % PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR 95 * quantum boundary.
104 * priority, so that it can be merged with the ready queue.
105 */ 96 */
106 struct bheap release_queue[PFAIR_MAX_PERIOD]; 97 struct bheap release_queue;
98 raw_spinlock_t release_lock;
107}; 99};
108 100
101#define RT_F_REQUEUE 0x2
102
109static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) 103static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)
110{ 104{
111 return container_of(state->topology.cluster, struct pfair_cluster, topology); 105 return container_of(state->topology.cluster, struct pfair_cluster, topology);
@@ -121,6 +115,11 @@ static inline struct pfair_state* from_cluster_list(struct list_head* pos)
121 return list_entry(pos, struct pfair_state, topology.cluster_list); 115 return list_entry(pos, struct pfair_state, topology.cluster_list);
122} 116}
123 117
118static inline struct pfair_cluster* from_domain(rt_domain_t* rt)
119{
120 return container_of(rt, struct pfair_cluster, pfair);
121}
122
124static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) 123static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)
125{ 124{
126 /* The ready_lock is used to serialize all scheduling events. */ 125 /* The ready_lock is used to serialize all scheduling events. */
@@ -161,21 +160,11 @@ static quanta_t cur_deadline(struct task_struct* t)
161 return cur_subtask(t)->deadline + tsk_pfair(t)->release; 160 return cur_subtask(t)->deadline + tsk_pfair(t)->release;
162} 161}
163 162
164
165static quanta_t cur_sub_release(struct task_struct* t)
166{
167 return cur_subtask(t)->release + tsk_pfair(t)->release;
168}
169
170static quanta_t cur_release(struct task_struct* t) 163static quanta_t cur_release(struct task_struct* t)
171{ 164{
172#ifdef EARLY_RELEASE 165 /* This is early releasing: only the release of the first subtask
173 /* only the release of the first subtask counts when we early 166 * counts. */
174 * release */
175 return tsk_pfair(t)->release; 167 return tsk_pfair(t)->release;
176#else
177 return cur_sub_release(t);
178#endif
179} 168}
180 169
181static quanta_t cur_overlap(struct task_struct* t) 170static quanta_t cur_overlap(struct task_struct* t)
@@ -235,11 +224,16 @@ int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
235 return pfair_higher_prio(bheap2task(a), bheap2task(b)); 224 return pfair_higher_prio(bheap2task(a), bheap2task(b));
236} 225}
237 226
238/* return the proper release queue for time t */ 227static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks)
239static struct bheap* relq(struct pfair_cluster* cluster, quanta_t t)
240{ 228{
241 struct bheap* rq = cluster->release_queue + (t % PFAIR_MAX_PERIOD); 229 struct pfair_cluster* cluster = from_domain(rt);
242 return rq; 230 unsigned long flags;
231
232 raw_spin_lock_irqsave(&cluster->release_lock, flags);
233
234 bheap_union(pfair_ready_order, &cluster->release_queue, tasks);
235
236 raw_spin_unlock_irqrestore(&cluster->release_lock, flags);
243} 237}
244 238
245static void prepare_release(struct task_struct* t, quanta_t at) 239static void prepare_release(struct task_struct* t, quanta_t at)
@@ -248,25 +242,12 @@ static void prepare_release(struct task_struct* t, quanta_t at)
248 tsk_pfair(t)->cur = 0; 242 tsk_pfair(t)->cur = 0;
249} 243}
250 244
251static void __pfair_add_release(struct task_struct* t, struct bheap* queue)
252{
253 bheap_insert(pfair_ready_order, queue,
254 tsk_rt(t)->heap_node);
255}
256
257static void pfair_add_release(struct pfair_cluster* cluster,
258 struct task_struct* t)
259{
260 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
261 __pfair_add_release(t, relq(cluster, cur_release(t)));
262}
263
264/* pull released tasks from the release queue */ 245/* pull released tasks from the release queue */
265static void poll_releases(struct pfair_cluster* cluster, 246static void poll_releases(struct pfair_cluster* cluster)
266 quanta_t time)
267{ 247{
268 __merge_ready(&cluster->pfair, relq(cluster, time)); 248 raw_spin_lock(&cluster->release_lock);
269 cluster->merge_time = time; 249 __merge_ready(&cluster->pfair, &cluster->release_queue);
250 raw_spin_unlock(&cluster->release_lock);
270} 251}
271 252
272static void check_preempt(struct task_struct* t) 253static void check_preempt(struct task_struct* t)
@@ -292,16 +273,12 @@ static void drop_all_references(struct task_struct *t)
292{ 273{
293 int cpu; 274 int cpu;
294 struct pfair_state* s; 275 struct pfair_state* s;
295 struct bheap* q;
296 struct pfair_cluster* cluster; 276 struct pfair_cluster* cluster;
297 if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { 277 if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
298 /* figure out what queue the node is in */ 278 /* It must be in the ready queue; drop references isn't called
279 * when the job is in a release queue. */
299 cluster = tsk_pfair(t)->cluster; 280 cluster = tsk_pfair(t)->cluster;
300 if (time_before_eq(cur_release(t), cluster->merge_time)) 281 bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue,
301 q = &cluster->pfair.ready_queue;
302 else
303 q = relq(cluster, cur_release(t));
304 bheap_delete(pfair_ready_order, q,
305 tsk_rt(t)->heap_node); 282 tsk_rt(t)->heap_node);
306 } 283 }
307 for (cpu = 0; cpu < num_online_cpus(); cpu++) { 284 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
@@ -313,6 +290,17 @@ static void drop_all_references(struct task_struct *t)
313 if (s->scheduled == t) 290 if (s->scheduled == t)
314 s->scheduled = NULL; 291 s->scheduled = NULL;
315 } 292 }
293 /* make sure we don't have a stale linked_on field */
294 tsk_rt(t)->linked_on = NO_CPU;
295}
296
297static void pfair_prepare_next_period(struct task_struct* t)
298{
299 struct pfair_param* p = tsk_pfair(t);
300
301 prepare_for_next_period(t);
302 get_rt_flags(t) = RT_F_RUNNING;
303 p->release += p->period;
316} 304}
317 305
318/* returns 1 if the task needs to go the release queue */ 306/* returns 1 if the task needs to go the release queue */
@@ -322,30 +310,26 @@ static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
322 int to_relq; 310 int to_relq;
323 p->cur = (p->cur + 1) % p->quanta; 311 p->cur = (p->cur + 1) % p->quanta;
324 if (!p->cur) { 312 if (!p->cur) {
325 sched_trace_task_completion(t, 1);
326 if (tsk_rt(t)->present) { 313 if (tsk_rt(t)->present) {
327 /* we start a new job */ 314 /* The job overran; we start a new budget allocation. */
328 prepare_for_next_period(t); 315 pfair_prepare_next_period(t);
329 sched_trace_task_release(t);
330 get_rt_flags(t) = RT_F_RUNNING;
331 p->release += p->period;
332 } else { 316 } else {
333 /* remove task from system until it wakes */ 317 /* remove task from system until it wakes */
334 drop_all_references(t); 318 drop_all_references(t);
319 tsk_rt(t)->flags = RT_F_REQUEUE;
335 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n", 320 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
336 cpu, p->cur); 321 cpu, p->cur);
337 return 0; 322 return 0;
338 } 323 }
339 } 324 }
340 to_relq = time_after(cur_release(t), time); 325 to_relq = time_after(cur_release(t), time);
341 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n", 326 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n",
342 cpu, p->cur, to_relq); 327 cpu, p->cur, to_relq, cur_release(t), time);
343 return to_relq; 328 return to_relq;
344} 329}
345 330
346static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time) 331static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
347{ 332{
348 int missed;
349 struct task_struct* l; 333 struct task_struct* l;
350 struct pfair_param* p; 334 struct pfair_param* p;
351 struct list_head* pos; 335 struct list_head* pos;
@@ -354,14 +338,17 @@ static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
354 list_for_each(pos, &cluster->topology.cpus) { 338 list_for_each(pos, &cluster->topology.cpus) {
355 cpu = from_cluster_list(pos); 339 cpu = from_cluster_list(pos);
356 l = cpu->linked; 340 l = cpu->linked;
357 missed = cpu->linked != cpu->local; 341 cpu->missed_updates += cpu->linked != cpu->local;
358 if (l) { 342 if (l) {
359 p = tsk_pfair(l); 343 p = tsk_pfair(l);
360 p->last_quantum = time; 344 p->last_quantum = time;
361 p->last_cpu = cpu_id(cpu); 345 p->last_cpu = cpu_id(cpu);
362 if (advance_subtask(time, l, cpu_id(cpu))) { 346 if (advance_subtask(time, l, cpu_id(cpu))) {
363 cpu->linked = NULL; 347 //cpu->linked = NULL;
364 pfair_add_release(cluster, l); 348 PTRACE_TASK(l, "should go to release queue. "
349 "scheduled_on=%d present=%d\n",
350 tsk_rt(l)->scheduled_on,
351 tsk_rt(l)->present);
365 } 352 }
366 } 353 }
367 } 354 }
@@ -445,6 +432,11 @@ static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time)
445 list_for_each(pos, &cluster->topology.cpus) { 432 list_for_each(pos, &cluster->topology.cpus) {
446 cpu_state = from_cluster_list(pos); 433 cpu_state = from_cluster_list(pos);
447 retry = 1; 434 retry = 1;
435#ifdef CONFIG_RELEASE_MASTER
436 /* skip release master */
437 if (cluster->pfair.release_master == cpu_id(cpu_state))
438 continue;
439#endif
448 while (retry) { 440 while (retry) {
449 if (pfair_higher_prio(__peek_ready(&cluster->pfair), 441 if (pfair_higher_prio(__peek_ready(&cluster->pfair),
450 cpu_state->linked)) 442 cpu_state->linked))
@@ -471,13 +463,13 @@ static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time)
471 sched_trace_quantum_boundary(); 463 sched_trace_quantum_boundary();
472 464
473 advance_subtasks(cluster, time); 465 advance_subtasks(cluster, time);
474 poll_releases(cluster, time); 466 poll_releases(cluster);
475 schedule_subtasks(cluster, time); 467 schedule_subtasks(cluster, time);
476 468
477 list_for_each(pos, &cluster->topology.cpus) { 469 list_for_each(pos, &cluster->topology.cpus) {
478 cpu = from_cluster_list(pos); 470 cpu = from_cluster_list(pos);
479 if (cpu->linked) 471 if (cpu->linked)
480 PTRACE_TASK(pstate[cpu]->linked, 472 PTRACE_TASK(cpu->linked,
481 " linked on %d.\n", cpu_id(cpu)); 473 " linked on %d.\n", cpu_id(cpu));
482 else 474 else
483 PTRACE("(null) linked on %d.\n", cpu_id(cpu)); 475 PTRACE("(null) linked on %d.\n", cpu_id(cpu));
@@ -612,12 +604,42 @@ static int safe_to_schedule(struct task_struct* t, int cpu)
612static struct task_struct* pfair_schedule(struct task_struct * prev) 604static struct task_struct* pfair_schedule(struct task_struct * prev)
613{ 605{
614 struct pfair_state* state = &__get_cpu_var(pfair_state); 606 struct pfair_state* state = &__get_cpu_var(pfair_state);
615 int blocks; 607 struct pfair_cluster* cluster = cpu_cluster(state);
608 int blocks, completion, out_of_time;
616 struct task_struct* next = NULL; 609 struct task_struct* next = NULL;
617 610
611#ifdef CONFIG_RELEASE_MASTER
612 /* Bail out early if we are the release master.
613 * The release master never schedules any real-time tasks.
614 */
615 if (unlikely(cluster->pfair.release_master == cpu_id(state))) {
616 sched_state_task_picked();
617 return NULL;
618 }
619#endif
620
618 raw_spin_lock(cpu_lock(state)); 621 raw_spin_lock(cpu_lock(state));
619 622
620 blocks = is_realtime(prev) && !is_running(prev); 623 blocks = is_realtime(prev) && !is_running(prev);
624 completion = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP;
625 out_of_time = is_realtime(prev) && time_after(cur_release(prev),
626 state->local_tick);
627
628 if (is_realtime(prev))
629 PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n",
630 blocks, completion, out_of_time);
631
632 if (completion) {
633 sched_trace_task_completion(prev, 0);
634 pfair_prepare_next_period(prev);
635 prepare_release(prev, cur_release(prev));
636 }
637
638 if (!blocks && (completion || out_of_time)) {
639 drop_all_references(prev);
640 sched_trace_task_release(prev);
641 add_release(&cluster->pfair, prev);
642 }
621 643
622 if (state->local && safe_to_schedule(state->local, cpu_id(state))) 644 if (state->local && safe_to_schedule(state->local, cpu_id(state)))
623 next = state->local; 645 next = state->local;
@@ -649,13 +671,19 @@ static void pfair_task_new(struct task_struct * t, int on_rq, int running)
649 cluster = tsk_pfair(t)->cluster; 671 cluster = tsk_pfair(t)->cluster;
650 672
651 raw_spin_lock_irqsave(cluster_lock(cluster), flags); 673 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
652 if (running)
653 t->rt_param.scheduled_on = task_cpu(t);
654 else
655 t->rt_param.scheduled_on = NO_CPU;
656 674
657 prepare_release(t, cluster->pfair_time + 1); 675 prepare_release(t, cluster->pfair_time + 1);
658 pfair_add_release(cluster, t); 676
677 t->rt_param.scheduled_on = NO_CPU;
678
679 if (running) {
680#ifdef CONFIG_RELEASE_MASTER
681 if (task_cpu(t) != cluster->pfair.release_master)
682#endif
683 t->rt_param.scheduled_on = task_cpu(t);
684 __add_ready(&cluster->pfair, t);
685 }
686
659 check_preempt(t); 687 check_preempt(t);
660 688
661 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); 689 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
@@ -665,6 +693,7 @@ static void pfair_task_wake_up(struct task_struct *t)
665{ 693{
666 unsigned long flags; 694 unsigned long flags;
667 lt_t now; 695 lt_t now;
696 int requeue = 0;
668 struct pfair_cluster* cluster; 697 struct pfair_cluster* cluster;
669 698
670 cluster = tsk_pfair(t)->cluster; 699 cluster = tsk_pfair(t)->cluster;
@@ -679,13 +708,20 @@ static void pfair_task_wake_up(struct task_struct *t)
679 * (as if it never blocked at all). Otherwise, we have a 708 * (as if it never blocked at all). Otherwise, we have a
680 * new sporadic job release. 709 * new sporadic job release.
681 */ 710 */
711 requeue = tsk_rt(t)->flags == RT_F_REQUEUE;
682 now = litmus_clock(); 712 now = litmus_clock();
683 if (lt_before(get_deadline(t), now)) { 713 if (lt_before(get_deadline(t), now)) {
714 TRACE_TASK(t, "sporadic release!\n");
684 release_at(t, now); 715 release_at(t, now);
685 prepare_release(t, time2quanta(now, CEIL)); 716 prepare_release(t, time2quanta(now, CEIL));
686 sched_trace_task_release(t); 717 sched_trace_task_release(t);
687 /* FIXME: race with pfair_time advancing */ 718 }
688 pfair_add_release(cluster, t); 719
720 /* only add to ready queue if the task isn't still linked somewhere */
721 if (requeue) {
722 TRACE_TASK(t, "requeueing required\n");
723 tsk_rt(t)->flags = RT_F_RUNNING;
724 __add_ready(&cluster->pfair, t);
689 } 725 }
690 726
691 check_preempt(t); 727 check_preempt(t);
@@ -744,15 +780,11 @@ static void pfair_release_at(struct task_struct* task, lt_t start)
744 release_at(task, start); 780 release_at(task, start);
745 release = time2quanta(start, CEIL); 781 release = time2quanta(start, CEIL);
746 782
747 /* FIXME: support arbitrary offsets. */
748 if (release - cluster->pfair_time >= PFAIR_MAX_PERIOD)
749 release = cluster->pfair_time + PFAIR_MAX_PERIOD;
750
751 TRACE_TASK(task, "sys release at %lu\n", release); 783 TRACE_TASK(task, "sys release at %lu\n", release);
752 784
753 drop_all_references(task); 785 drop_all_references(task);
754 prepare_release(task, release); 786 prepare_release(task, release);
755 pfair_add_release(cluster, task); 787 add_release(&cluster->pfair, task);
756 788
757 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); 789 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
758} 790}
@@ -834,13 +866,6 @@ static long pfair_admit_task(struct task_struct* t)
834 "The period of %s/%d is not a multiple of %llu.\n", 866 "The period of %s/%d is not a multiple of %llu.\n",
835 t->comm, t->pid, (unsigned long long) quantum_length); 867 t->comm, t->pid, (unsigned long long) quantum_length);
836 868
837 if (period >= PFAIR_MAX_PERIOD) {
838 printk(KERN_WARNING
839 "PFAIR: Rejecting task %s/%d; its period is too long.\n",
840 t->comm, t->pid);
841 return -EINVAL;
842 }
843
844 if (quanta == period) { 869 if (quanta == period) {
845 /* special case: task has weight 1.0 */ 870 /* special case: task has weight 1.0 */
846 printk(KERN_INFO 871 printk(KERN_INFO
@@ -880,12 +905,9 @@ static long pfair_admit_task(struct task_struct* t)
880 905
881static void pfair_init_cluster(struct pfair_cluster* cluster) 906static void pfair_init_cluster(struct pfair_cluster* cluster)
882{ 907{
883 int i; 908 rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs);
884 909 bheap_init(&cluster->release_queue);
885 /* initialize release queue */ 910 raw_spin_lock_init(&cluster->release_lock);
886 for (i = 0; i < PFAIR_MAX_PERIOD; i++)
887 bheap_init(&cluster->release_queue[i]);
888 rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, NULL);
889 INIT_LIST_HEAD(&cluster->topology.cpus); 911 INIT_LIST_HEAD(&cluster->topology.cpus);
890} 912}
891 913
@@ -899,8 +921,11 @@ static void cleanup_clusters(void)
899 num_pfair_clusters = 0; 921 num_pfair_clusters = 0;
900 922
901 /* avoid stale pointers */ 923 /* avoid stale pointers */
902 for (i = 0; i < NR_CPUS; i++) 924 for (i = 0; i < num_online_cpus(); i++) {
903 pstate[i]->topology.cluster = NULL; 925 pstate[i]->topology.cluster = NULL;
926 printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]),
927 pstate[i]->missed_updates, pstate[i]->missed_quanta);
928 }
904} 929}
905 930
906static long pfair_activate_plugin(void) 931static long pfair_activate_plugin(void)
@@ -936,6 +961,9 @@ static long pfair_activate_plugin(void)
936 pfair_init_cluster(cluster); 961 pfair_init_cluster(cluster);
937 cluster->pfair_time = now; 962 cluster->pfair_time = now;
938 clust[i] = &cluster->topology; 963 clust[i] = &cluster->topology;
964#ifdef CONFIG_RELEASE_MASTER
965 cluster->pfair.release_master = atomic_read(&release_master_cpu);
966#endif
939 } 967 }
940 968
941 for (i = 0; i < num_online_cpus(); i++) { 969 for (i = 0; i < num_online_cpus(); i++) {
@@ -943,6 +971,7 @@ static long pfair_activate_plugin(void)
943 state->cur_tick = now; 971 state->cur_tick = now;
944 state->local_tick = now; 972 state->local_tick = now;
945 state->missed_quanta = 0; 973 state->missed_quanta = 0;
974 state->missed_updates = 0;
946 state->offset = cpu_stagger_offset(i); 975 state->offset = cpu_stagger_offset(i);
947 printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus()); 976 printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus());
948 cpus[i] = &state->topology; 977 cpus[i] = &state->topology;
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 4eaf30659af3..123c7516fb76 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -35,29 +35,18 @@ void preempt_if_preemptable(struct task_struct* t, int cpu)
35 /* local CPU case */ 35 /* local CPU case */
36 /* check if we need to poke userspace */ 36 /* check if we need to poke userspace */
37 if (is_user_np(t)) 37 if (is_user_np(t))
38 /* yes, poke it */ 38 /* Yes, poke it. This doesn't have to be atomic since
39 * the task is definitely not executing. */
39 request_exit_np(t); 40 request_exit_np(t);
40 else if (!is_kernel_np(t)) 41 else if (!is_kernel_np(t))
41 /* only if we are allowed to preempt the 42 /* only if we are allowed to preempt the
42 * currently-executing task */ 43 * currently-executing task */
43 reschedule = 1; 44 reschedule = 1;
44 } else { 45 } else {
45 /* remote CPU case */ 46 /* Remote CPU case. Only notify if it's not a kernel
46 if (is_user_np(t)) { 47 * NP section and if we didn't set the userspace
47 /* need to notify user space of delayed 48 * flag. */
48 * preemption */ 49 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
49
50 /* to avoid a race, set the flag, then test
51 * again */
52 request_exit_np(t);
53 /* make sure it got written */
54 mb();
55 }
56 /* Only send an ipi if remote task might have raced our
57 * request, i.e., send an IPI to make sure in case it
58 * exited its critical section.
59 */
60 reschedule = !is_np(t) && !is_kernel_np(t);
61 } 50 }
62 } 51 }
63 if (likely(reschedule)) 52 if (likely(reschedule))
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
index e66397b7e8a2..eaaec38f43da 100644
--- a/litmus/sched_psn_edf.c
+++ b/litmus/sched_psn_edf.c
@@ -20,6 +20,7 @@
20#include <litmus/sched_plugin.h> 20#include <litmus/sched_plugin.h>
21#include <litmus/edf_common.h> 21#include <litmus/edf_common.h>
22#include <litmus/sched_trace.h> 22#include <litmus/sched_trace.h>
23#include <litmus/trace.h>
23 24
24typedef struct { 25typedef struct {
25 rt_domain_t domain; 26 rt_domain_t domain;
@@ -386,12 +387,6 @@ static unsigned int psnedf_get_srp_prio(struct task_struct* t)
386 return get_rt_period(t); 387 return get_rt_period(t);
387} 388}
388 389
389static long psnedf_activate_plugin(void)
390{
391 get_srp_prio = psnedf_get_srp_prio;
392 return 0;
393}
394
395/* ******************** FMLP support ********************** */ 390/* ******************** FMLP support ********************** */
396 391
397/* struct for semaphore with priority inheritance */ 392/* struct for semaphore with priority inheritance */
@@ -431,6 +426,8 @@ int psnedf_fmlp_lock(struct litmus_lock* l)
431 426
432 __add_wait_queue_tail_exclusive(&sem->wait, &wait); 427 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
433 428
429 TS_LOCK_SUSPEND;
430
434 /* release lock before sleeping */ 431 /* release lock before sleeping */
435 spin_unlock_irqrestore(&sem->wait.lock, flags); 432 spin_unlock_irqrestore(&sem->wait.lock, flags);
436 433
@@ -441,14 +438,12 @@ int psnedf_fmlp_lock(struct litmus_lock* l)
441 438
442 schedule(); 439 schedule();
443 440
441 TS_LOCK_RESUME;
442
444 /* Since we hold the lock, no other task will change 443 /* Since we hold the lock, no other task will change
445 * ->owner. We can thus check it without acquiring the spin 444 * ->owner. We can thus check it without acquiring the spin
446 * lock. */ 445 * lock. */
447 BUG_ON(sem->owner != t); 446 BUG_ON(sem->owner != t);
448
449 /* FIXME: could we punt the dequeuing to the previous job,
450 * which is holding the spinlock anyway? */
451 remove_wait_queue(&sem->wait, &wait);
452 } else { 447 } else {
453 /* it's ours now */ 448 /* it's ours now */
454 sem->owner = t; 449 sem->owner = t;
@@ -481,7 +476,7 @@ int psnedf_fmlp_unlock(struct litmus_lock* l)
481 unboost_priority(t); 476 unboost_priority(t);
482 477
483 /* check if there are jobs waiting for this resource */ 478 /* check if there are jobs waiting for this resource */
484 next = waitqueue_first(&sem->wait); 479 next = __waitqueue_remove_first(&sem->wait);
485 if (next) { 480 if (next) {
486 /* boost next job */ 481 /* boost next job */
487 boost_priority(next); 482 boost_priority(next);
@@ -584,9 +579,35 @@ static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
584 579
585#endif 580#endif
586 581
582
583static long psnedf_activate_plugin(void)
584{
585#ifdef CONFIG_RELEASE_MASTER
586 int cpu;
587
588 for_each_online_cpu(cpu) {
589 remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
590 }
591#endif
592
593#ifdef CONFIG_LITMUS_LOCKING
594 get_srp_prio = psnedf_get_srp_prio;
595#endif
596
597 return 0;
598}
599
587static long psnedf_admit_task(struct task_struct* tsk) 600static long psnedf_admit_task(struct task_struct* tsk)
588{ 601{
589 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; 602 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
603#ifdef CONFIG_RELEASE_MASTER
604 /* don't allow tasks on release master CPU */
605 && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
606#endif
607 )
608 return 0;
609 else
610 return -EINVAL;
590} 611}
591 612
592/* Plugin object */ 613/* Plugin object */
@@ -600,9 +621,9 @@ static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
600 .task_wake_up = psnedf_task_wake_up, 621 .task_wake_up = psnedf_task_wake_up,
601 .task_block = psnedf_task_block, 622 .task_block = psnedf_task_block,
602 .admit_task = psnedf_admit_task, 623 .admit_task = psnedf_admit_task,
624 .activate_plugin = psnedf_activate_plugin,
603#ifdef CONFIG_LITMUS_LOCKING 625#ifdef CONFIG_LITMUS_LOCKING
604 .allocate_lock = psnedf_allocate_lock, 626 .allocate_lock = psnedf_allocate_lock,
605 .activate_plugin = psnedf_activate_plugin,
606#endif 627#endif
607}; 628};
608 629
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index f923280b3146..8c1ca188bce1 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -191,7 +191,7 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
191 struct task_struct *t = (struct task_struct*) _task; 191 struct task_struct *t = (struct task_struct*) _task;
192 struct st_event_record* rec = get_record(ST_COMPLETION, t); 192 struct st_event_record* rec = get_record(ST_COMPLETION, t);
193 if (rec) { 193 if (rec) {
194 rec->data.completion.when = now(); 194 rec->data.completion.when = get_exec_time(t);
195 rec->data.completion.forced = forced; 195 rec->data.completion.forced = forced;
196 put_record(rec); 196 put_record(rec);
197 } 197 }
@@ -231,6 +231,38 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
231 } 231 }
232} 232}
233 233
234feather_callback void do_sched_trace_task_exit(unsigned long id,
235 unsigned long _task)
236{
237 struct task_struct *t = (struct task_struct*) _task;
238#ifdef CONFIG_PLUGIN_COLOR
239 const lt_t max_exec_time = tsk_rt(t)->max_exec_time;
240 const lt_t avg_exec_time = tsk_rt(t)->tot_exec_time / (get_rt_job(t) - 1);
241#else
242 const lt_t max_exec_time = 0;
243 const lt_t avg_exec_time = 0;
244#endif
245 struct st_event_record *rec = get_record(ST_TASK_EXIT, t);
246 if (rec) {
247 rec->data.task_exit.avg_exec_time = avg_exec_time;
248 rec->data.task_exit.max_exec_time = max_exec_time;
249 put_record(rec);
250 }
251}
252
253feather_callback void do_sched_trace_task_tardy(unsigned long id,
254 unsigned long _task)
255{
256 struct task_struct *t = (struct task_struct*) _task;
257 struct st_event_record *rec = get_record(ST_TASK_TARDY, t);
258 if (rec) {
259 rec->data.task_tardy.max_tardy = tsk_rt(t)->max_tardy;
260 rec->data.task_tardy.total_tardy = tsk_rt(t)->total_tardy;
261 rec->data.task_tardy.missed = tsk_rt(t)->missed;
262 put_record(rec);
263 }
264}
265
234feather_callback void do_sched_trace_action(unsigned long id, 266feather_callback void do_sched_trace_action(unsigned long id,
235 unsigned long _task, 267 unsigned long _task,
236 unsigned long action) 268 unsigned long action)
diff --git a/litmus/trace.c b/litmus/trace.c
index 209524fd345e..4722ffa443c6 100644
--- a/litmus/trace.c
+++ b/litmus/trace.c
@@ -1,5 +1,6 @@
1#include <linux/sched.h> 1#include <linux/sched.h>
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/uaccess.h>
3 4
4#include <litmus/ftdev.h> 5#include <litmus/ftdev.h>
5#include <litmus/litmus.h> 6#include <litmus/litmus.h>
@@ -19,6 +20,35 @@ static struct ftdev overhead_dev;
19 20
20static unsigned int ts_seq_no = 0; 21static unsigned int ts_seq_no = 0;
21 22
23DEFINE_PER_CPU(atomic_t, irq_fired_count);
24
25static inline void clear_irq_fired(void)
26{
27 atomic_set(&__raw_get_cpu_var(irq_fired_count), 0);
28}
29
30static inline unsigned int get_and_clear_irq_fired(void)
31{
32 /* This is potentially not atomic since we might migrate if
33 * preemptions are not disabled. As a tradeoff between
34 * accuracy and tracing overheads, this seems acceptable.
35 * If it proves to be a problem, then one could add a callback
36 * from the migration code to invalidate irq_fired_count.
37 */
38 return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0);
39}
40
41static inline void __save_irq_flags(struct timestamp *ts)
42{
43 unsigned int irq_count;
44
45 irq_count = get_and_clear_irq_fired();
46 /* Store how many interrupts occurred. */
47 ts->irq_count = irq_count;
48 /* Extra flag because ts->irq_count overflows quickly. */
49 ts->irq_flag = irq_count > 0;
50}
51
22static inline void __save_timestamp_cpu(unsigned long event, 52static inline void __save_timestamp_cpu(unsigned long event,
23 uint8_t type, uint8_t cpu) 53 uint8_t type, uint8_t cpu)
24{ 54{
@@ -27,10 +57,26 @@ static inline void __save_timestamp_cpu(unsigned long event,
27 seq_no = fetch_and_inc((int *) &ts_seq_no); 57 seq_no = fetch_and_inc((int *) &ts_seq_no);
28 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { 58 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
29 ts->event = event; 59 ts->event = event;
30 ts->timestamp = ft_timestamp();
31 ts->seq_no = seq_no; 60 ts->seq_no = seq_no;
32 ts->cpu = cpu; 61 ts->cpu = cpu;
33 ts->task_type = type; 62 ts->task_type = type;
63 __save_irq_flags(ts);
64 barrier();
65 /* prevent re-ordering of ft_timestamp() */
66 ts->timestamp = ft_timestamp();
67 ft_buffer_finish_write(trace_ts_buf, ts);
68 }
69}
70
71static void __add_timestamp_user(struct timestamp *pre_recorded)
72{
73 unsigned int seq_no;
74 struct timestamp *ts;
75 seq_no = fetch_and_inc((int *) &ts_seq_no);
76 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
77 *ts = *pre_recorded;
78 ts->seq_no = seq_no;
79 __save_irq_flags(ts);
34 ft_buffer_finish_write(trace_ts_buf, ts); 80 ft_buffer_finish_write(trace_ts_buf, ts);
35 } 81 }
36} 82}
@@ -108,6 +154,27 @@ feather_callback void save_timestamp_cpu(unsigned long event,
108 __save_timestamp_cpu(event, TSK_UNKNOWN, cpu); 154 __save_timestamp_cpu(event, TSK_UNKNOWN, cpu);
109} 155}
110 156
157feather_callback void save_task_latency(unsigned long event,
158 unsigned long when_ptr)
159{
160 lt_t now = litmus_clock();
161 lt_t *when = (lt_t*) when_ptr;
162 unsigned int seq_no;
163 int cpu = raw_smp_processor_id();
164 struct timestamp *ts;
165
166 seq_no = fetch_and_inc((int *) &ts_seq_no);
167 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
168 ts->event = event;
169 ts->timestamp = now - *when;
170 ts->seq_no = seq_no;
171 ts->cpu = cpu;
172 ts->task_type = TSK_RT;
173 __save_irq_flags(ts);
174 ft_buffer_finish_write(trace_ts_buf, ts);
175 }
176}
177
111/******************************************************************************/ 178/******************************************************************************/
112/* DEVICE FILE DRIVER */ 179/* DEVICE FILE DRIVER */
113/******************************************************************************/ 180/******************************************************************************/
@@ -116,11 +183,15 @@ feather_callback void save_timestamp_cpu(unsigned long event,
116 * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER) 183 * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER)
117 * and we might not get as much 184 * and we might not get as much
118 */ 185 */
119#define NO_TIMESTAMPS (2 << 13) 186#define NO_TIMESTAMPS (2 << 16)
120 187
121static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) 188static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
122{ 189{
123 unsigned int count = NO_TIMESTAMPS; 190 unsigned int count = NO_TIMESTAMPS;
191
192 /* An overhead-tracing timestamp should be exactly 16 bytes long. */
193 BUILD_BUG_ON(sizeof(struct timestamp) != 16);
194
124 while (count && !trace_ts_buf) { 195 while (count && !trace_ts_buf) {
125 printk("time stamp buffer: trying to allocate %u time stamps.\n", count); 196 printk("time stamp buffer: trying to allocate %u time stamps.\n", count);
126 ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp)); 197 ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
@@ -135,9 +206,35 @@ static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
135 ftdev->minor[idx].buf = NULL; 206 ftdev->minor[idx].buf = NULL;
136} 207}
137 208
209static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len,
210 const char __user *from)
211{
212 ssize_t consumed = 0;
213 struct timestamp ts;
214
215 /* don't give us partial timestamps */
216 if (len % sizeof(ts))
217 return -EINVAL;
218
219 while (len >= sizeof(ts)) {
220 if (copy_from_user(&ts, from, sizeof(ts))) {
221 consumed = -EFAULT;
222 goto out;
223 }
224 len -= sizeof(ts);
225 from += sizeof(ts);
226 consumed += sizeof(ts);
227
228 __add_timestamp_user(&ts);
229 }
230
231out:
232 return consumed;
233}
234
138static int __init init_ft_overhead_trace(void) 235static int __init init_ft_overhead_trace(void)
139{ 236{
140 int err; 237 int err, cpu;
141 238
142 printk("Initializing Feather-Trace overhead tracing device.\n"); 239 printk("Initializing Feather-Trace overhead tracing device.\n");
143 err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace"); 240 err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace");
@@ -146,11 +243,17 @@ static int __init init_ft_overhead_trace(void)
146 243
147 overhead_dev.alloc = alloc_timestamp_buffer; 244 overhead_dev.alloc = alloc_timestamp_buffer;
148 overhead_dev.free = free_timestamp_buffer; 245 overhead_dev.free = free_timestamp_buffer;
246 overhead_dev.write = write_timestamp_from_user;
149 247
150 err = register_ftdev(&overhead_dev); 248 err = register_ftdev(&overhead_dev);
151 if (err) 249 if (err)
152 goto err_dealloc; 250 goto err_dealloc;
153 251
252 /* initialize IRQ flags */
253 for (cpu = 0; cpu < NR_CPUS; cpu++) {
254 clear_irq_fired();
255 }
256
154 return 0; 257 return 0;
155 258
156err_dealloc: 259err_dealloc: