kernel/sched_part_edf.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340

/*
 * kernel/sched_part_edf.c
 *
 * Implementation of the partitioned EDF scheduler plugin.
 */

#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/spinlock.h>

#include <linux/litmus.h>
#include <linux/sched_plugin.h>
#include <linux/edf_common.h>


typedef struct {
	rt_domain_t 		domain;
	int          		cpu;
	struct task_struct* 	scheduled; /* only RT tasks */
	spinlock_t   		lock;
} part_edf_domain_t;


#define local_edf		(&__get_cpu_var(part_edf_domains).domain)
#define local_pedf		(&__get_cpu_var(part_edf_domains))
#define remote_edf(cpu)		(&per_cpu(part_edf_domains, cpu).domain)
#define remote_pedf(cpu)	(&per_cpu(part_edf_domains, cpu))
#define task_edf(task)		remote_edf(get_partition(task))

static void part_edf_domain_init(part_edf_domain_t* pedf, 
				 check_resched_needed_t check,
				 int cpu)
{
	edf_domain_init(&pedf->domain, check);
	pedf->cpu      		= cpu;
	pedf->lock     		= SPIN_LOCK_UNLOCKED;
	pedf->scheduled		= NULL;
}

DEFINE_PER_CPU(part_edf_domain_t, part_edf_domains);

/* This check is trivial in partioned systems as we only have to consider
 * the CPU of the partition.
 *
 */
static int part_edf_check_resched(rt_domain_t *edf) 
{
	part_edf_domain_t *pedf = container_of(edf, part_edf_domain_t, domain);
	int ret = 0;

	spin_lock(&pedf->lock);
       
	/* because this is a callback from rt_domain_t we already hold
	 * the necessary lock for the ready queue
	 */
	if (edf_preemption_needed(edf, pedf->scheduled)) {
		if (pedf->cpu == smp_processor_id())
			set_tsk_need_resched(current);
		else
			smp_send_reschedule(pedf->cpu);
		ret = 1;
	}
	spin_unlock(&pedf->lock);
	return ret;
}


static reschedule_check_t part_edf_scheduler_tick(void)
{
	unsigned long flags;
	struct task_struct *t = current;
	reschedule_check_t want_resched = NO_RESCHED;
	rt_domain_t *edf       = local_edf;
	part_edf_domain_t *pedf = local_pedf;

	/* Check for inconsistency. We don't need the lock for this since
	 * ->scheduled is only changed in schedule, which obviously is not 
	 *  executing in parallel on this CPU
	 */
	BUG_ON(is_realtime(t) && t != pedf->scheduled);

	/* expire tasks even if not in real-time mode
	 * this makes sure that at the end of real-time mode
	 * no tasks "run away forever".
	 */
	if (is_realtime(t) && (!--t->time_slice)) {
		/* this task has exhausted its budget in this period */
		set_rt_flags(t, RT_F_SLEEP);
		want_resched = FORCE_RESCHED;
	} 
	if (get_rt_mode() == MODE_RT_RUN)
	{
		/* check whether anything is waiting to be released 
		 * this could probably be moved to the global timer
		 * interrupt handler since the state will only change
		 * once per jiffie
		 */
		try_release_pending(edf);
		if (want_resched != FORCE_RESCHED)
		{
			read_lock_irqsave(&edf->ready_lock, flags);
			if (edf_preemption_needed(edf, t))
				want_resched = FORCE_RESCHED;
			read_unlock_irqrestore(&edf->ready_lock, flags);
		}
	}       
	return want_resched;
}

static int part_edf_schedule(struct task_struct * prev, 
			     struct task_struct ** next, 
			     runqueue_t * rq)
{
	int 			need_deactivate = 1;
	part_edf_domain_t* 	pedf = local_pedf;
	rt_domain_t*		edf  = &pedf->domain; 


	if (is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP)
		edf_prepare_for_next_period(prev);	
	
	if (get_rt_mode() == MODE_RT_RUN) {
		write_lock(&edf->ready_lock);
		if (is_realtime(prev) && is_released(prev) && is_running(prev)
		    && !edf_preemption_needed(edf, prev)) {
			/* this really should only happen if the task has 
			 * 100% utilization... 
			 */
			TRACE("prev will be next, already released\n");
			*next = prev;
			need_deactivate = 0;
		} else {
			/* either not yet released, preempted, or non-rt */
			*next = __take_ready(edf);
			if (*next) {				
				/* stick the task into the runqueue */
				__activate_task(*next, rq);
				set_task_cpu(*next, smp_processor_id());
			}
		}
		spin_lock(&pedf->lock);
		pedf->scheduled = *next;
		spin_unlock(&pedf->lock);
		if (*next)
			set_rt_flags(*next, RT_F_RUNNING);
		
		write_unlock(&edf->ready_lock);
	} 

	if (is_realtime(prev) && need_deactivate && prev->array) {
		/* take it out of the run queue */
		deactivate_task(prev, rq);
	}

	return 0;
}


static void part_edf_finish_switch(struct task_struct *prev) 
{
	rt_domain_t* 	edf = local_edf;

	if (!is_realtime(prev) || !is_running(prev))
		return;

	if (get_rt_flags(prev) == RT_F_SLEEP || 
	    get_rt_mode() != MODE_RT_RUN) {
		/* this task has expired
		 * _schedule has already taken care of updating 
		 * the release and
		 * deadline. We just must check if has been released.
		 */
		if (is_released(prev) && get_rt_mode() == MODE_RT_RUN) {
			/* already released */
			add_ready(edf, prev);
			TRACE("%d goes straight to ready queue\n", prev->pid);
		} else 
			/* it has got to wait */
			add_release(edf, prev);		
	} else {
		/* this is a forced preemption 
		 * thus the task stays in the ready_queue
		 * we only must make it available to others
		 */
		add_ready(edf, prev);
	}	
}


/*	Prepare a task for running in RT mode
 *	Enqueues the task into master queue data structure
 *	returns 
 *		-EPERM  if task is not TASK_STOPPED
 */
static long part_edf_prepare_task(struct task_struct * t)
{
	rt_domain_t* 	edf = task_edf(t);


	TRACE("[%d] part edf: prepare task %d on CPU %d\n", 
		smp_processor_id(), t->pid, get_partition(t));
	if (t->state == TASK_STOPPED) {
		__setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1);

		if (get_rt_mode() == MODE_RT_RUN)
			/* The action is already on. 
			 * Prepare immediate release.
			 */
			edf_release_now(t);		
		/* The task should be running in the queue, otherwise signal 
		 * code will try to wake it up with fatal consequences.
		 */
		t->state = TASK_RUNNING; 
		add_release(edf, t);
		return 0;
	} else
		return -EPERM;
}

static void part_edf_wake_up_task(struct task_struct *task) 
{
	rt_domain_t* edf;

	edf = task_edf(task);

	/* We must determine whether task should go into the release 	   
	 * queue or into the ready queue. It may enter the ready queue 
	 * if it has credit left in its time slice and has not yet reached 
	 * its deadline. If it is now passed its deadline we assume this the 
	 * arrival of a new sporadic job and thus put it in the ready queue 
	 * anyway.If it has zero budget and the next release is in the future 
	 * it has to go to the release queue.
	 */
	TRACE("part edf: wake up %d with budget=%d for cpu %d\n", 
	      task->pid, task->time_slice, get_partition(task));
	task->state = TASK_RUNNING;
	if (is_tardy(task)) {
		/* new sporadic release */
		edf_release_now(task);
		add_ready(edf, task);

	} else if (task->time_slice) {
		/* Came back in time before deadline. This may cause
		 * deadline overruns, but since we don't handle suspensions
		 * in the analytical model, we don't care since we can't
		 * guarantee anything at all if tasks block.
		 */
		set_rt_flags(task, RT_F_RUNNING);
		add_ready(edf, task);

	} else {
		add_release(edf, task);
	}
		
}

static void part_edf_task_blocks(struct task_struct *t)
{
	BUG_ON(!is_realtime(t));
	/* not really anything to do since it can only block if 
	 * it is running, and when it is not running it is not in any 
	 * queue anyway.
	 *
	 */
	TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice);
	BUG_ON(in_list(&t->rt_list));
}


/* When _tear_down is called, the task should not be in any queue any more
 * as it must have blocked first. We don't have any internal state for the task,
 * it is all in the task_struct.
 */
static long part_edf_tear_down(struct task_struct * t)
{
	BUG_ON(!is_realtime(t));
        TRACE("part edf: tear down called for %d \n", t->pid);
	BUG_ON(t->array);
	BUG_ON(in_list(&t->rt_list));
	return 0;
}


static int part_edf_mode_change(int new_mode)
{
	int cpu;

	if (new_mode == MODE_RT_RUN)
		for_each_online_cpu(cpu)
			rerelease_all(remote_edf(cpu), edf_release_at);
	TRACE("[%d] part edf: mode changed to %d\n", 
	       smp_processor_id(), new_mode);
	return 0;
}


/*	Plugin object	*/
static sched_plugin_t s_plugin __cacheline_aligned_in_smp = {
	.ready_to_use = 0 
};


/*
 *	Plugin initialization code.
 */
#define INIT_SCHED_PLUGIN (struct sched_plugin) {\
	.plugin_name		= "Partitioned EDF",\
	.ready_to_use		= 1,\
	.scheduler_tick		= part_edf_scheduler_tick,\
	.prepare_task		= part_edf_prepare_task,\
	.sleep_next_period	= edf_sleep_next_period,\
	.tear_down		= part_edf_tear_down,\
	.schedule		= part_edf_schedule,\
	.finish_switch 		= part_edf_finish_switch,\
	.mode_change		= part_edf_mode_change,\
	.wake_up_task		= part_edf_wake_up_task,\
	.task_blocks		= part_edf_task_blocks \
}


sched_plugin_t *__init init_part_edf_plugin(void)
{
	int i;

	if (!s_plugin.ready_to_use)
	{
		for (i = 0; i < NR_CPUS; i++)
		{
			part_edf_domain_init(remote_pedf(i), 
					     part_edf_check_resched, i);
		        printk("CPU partition %d initialized.", i);
		}
		s_plugin = INIT_SCHED_PLUGIN;
	}
	return &s_plugin;
}