aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2011-01-26 17:17:27 -0500
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2011-01-26 17:44:47 -0500
commit2f25da028afca99d903f60a0396a44d657b863dd (patch)
tree8141487e48175b33161573467421fd18605d9ebf
parent00ffad8cfa533223121c8b400ae829ccef2ddfe8 (diff)
Add NPS-F plugin
[semi-part backport]
-rw-r--r--include/litmus/rt_param.h18
-rw-r--r--include/litmus/sched_plugin.h5
-rw-r--r--include/litmus/unistd_64.h4
-rw-r--r--litmus/Makefile3
-rw-r--r--litmus/litmus.c64
-rw-r--r--litmus/sched_npsf.c1185
-rw-r--r--litmus/sched_plugin.c6
7 files changed, 1282 insertions, 3 deletions
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 9927b09e0a01..1290e2939e33 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -35,6 +35,17 @@ typedef enum {
35 PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ 35 PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */
36} budget_policy_t; 36} budget_policy_t;
37 37
38/* Parameters for NPS-F semi-partitioned scheduling algorithm.
39 * Each (cpu, budget) entry defines the share ('budget' in ns, a % of
40 * the slot_length) of the notional processor on the CPU 'cpu'.
41 * This structure is used by the library - syscall interface in order
42 * to go through the overhead of a syscall only once per server.
43 */
44struct npsf_budgets {
45 int cpu;
46 lt_t budget;
47};
48
38/* The parameters for the EDF-WM semi-partitioned scheduler. 49/* The parameters for the EDF-WM semi-partitioned scheduler.
39 * Each task may be split across multiple cpus. Each per-cpu allocation 50 * Each task may be split across multiple cpus. Each per-cpu allocation
40 * is called a 'slice'. 51 * is called a 'slice'.
@@ -75,6 +86,13 @@ struct rt_task {
75 86
76 /* parameters used by the semi-partitioned algorithms */ 87 /* parameters used by the semi-partitioned algorithms */
77 union { 88 union {
89 /* NPS-F; defined in sched_npsf.c
90 * id for the server (notional processor) that holds
91 * this task; the same npfs_id can be assigned to "the same"
92 * server split on different cpus
93 */
94 int npsf_id;
95
78 /* EDF-WM; defined in sched_edf_wm.c */ 96 /* EDF-WM; defined in sched_edf_wm.c */
79 struct edf_wm_params wm; 97 struct edf_wm_params wm;
80 } semi_part; 98 } semi_part;
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 9c1c9f28ba79..7ea9176624ff 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -6,6 +6,8 @@
6#define _LINUX_SCHED_PLUGIN_H_ 6#define _LINUX_SCHED_PLUGIN_H_
7 7
8#include <linux/sched.h> 8#include <linux/sched.h>
9/* NSEC_PER... conversions */
10#include <linux/time.h>
9 11
10/* struct for semaphore with priority inheritance */ 12/* struct for semaphore with priority inheritance */
11struct pi_semaphore { 13struct pi_semaphore {
@@ -136,6 +138,9 @@ extern struct sched_plugin *litmus;
136/* cluster size: cache_index = 2 L2, cache_index = 3 L3 */ 138/* cluster size: cache_index = 2 L2, cache_index = 3 L3 */
137extern int cluster_cache_index; 139extern int cluster_cache_index;
138 140
141/* Slot length (ns) for NPS-F semi-part. algo */
142extern lt_t npsf_slot_length;
143
139int register_sched_plugin(struct sched_plugin* plugin); 144int register_sched_plugin(struct sched_plugin* plugin);
140struct sched_plugin* find_sched_plugin(const char* name); 145struct sched_plugin* find_sched_plugin(const char* name);
141int print_sched_plugins(char* buf, int max); 146int print_sched_plugins(char* buf, int max);
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index f0618e75348d..4e82c52722c8 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -33,5 +33,7 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
33__SYSCALL(__NR_release_ts, sys_release_ts) 33__SYSCALL(__NR_release_ts, sys_release_ts)
34#define __NR_null_call __LSC(13) 34#define __NR_null_call __LSC(13)
35__SYSCALL(__NR_null_call, sys_null_call) 35__SYSCALL(__NR_null_call, sys_null_call)
36#define __NR_add_server __LSC(14)
37__SYSCALL(__NR_add_server, sys_add_server)
36 38
37#define NR_litmus_syscalls 14 39#define NR_litmus_syscalls 15
diff --git a/litmus/Makefile b/litmus/Makefile
index 7fe37a59c425..f26736964479 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -15,7 +15,8 @@ obj-y = sched_plugin.o litmus.o \
15 ctrldev.o \ 15 ctrldev.o \
16 sched_gsn_edf.o \ 16 sched_gsn_edf.o \
17 sched_psn_edf.o \ 17 sched_psn_edf.o \
18 sched_edf_wm.o 18 sched_edf_wm.o \
19 sched_npsf.o
19 20
20obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 21obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
21obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 22obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
diff --git a/litmus/litmus.c b/litmus/litmus.c
index b04a42b0da9c..2f780222d8e8 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -632,6 +632,55 @@ static int proc_write_cluster_size(struct file *file,
632 return len; 632 return len;
633} 633}
634 634
635static int proc_read_npsf_slot_length(char *page, char **start,
636 off_t off, int count,
637 int *eof, void *data)
638{
639 return snprintf(page, PAGE_SIZE, "%d us\n",
640 (int) (npsf_slot_length / NSEC_PER_USEC));
641}
642
643extern void npsf_hrtimers_cleanup(void);
644/* NPS-F slot length in us.
645 *
646 * Writing 0 as npsf_slot_length will trigger the removal of the
647 * hrtimers for the domain_reschedule_tick() in the NPS-F plugin.
648 */
649static int proc_write_npsf_slot_length(struct file *file,
650 const char *buffer,
651 unsigned long count,
652 void *data)
653{
654 int err, slot_length;
655 char msg[64];
656
657 if (count > 63)
658 return -EINVAL;
659
660 if (copy_from_user(msg, buffer, count))
661 return -EFAULT;
662
663 /* terminate */
664 msg[count] = '\0';
665 /* chomp */
666 if (count > 1 && msg[count - 1] == '\n')
667 msg[count - 1] = '\0';
668
669 err = sscanf(msg, "%d", &slot_length);
670
671 if (err == 1) {
672 if (!slot_length) {
673 npsf_hrtimers_cleanup();
674 /* reset to default */
675 slot_length = 5000;
676 }
677 npsf_slot_length = (lt_t)((lt_t) slot_length * NSEC_PER_USEC);
678 return count;
679 }
680
681 return -EINVAL;
682}
683
635#ifdef CONFIG_RELEASE_MASTER 684#ifdef CONFIG_RELEASE_MASTER
636static int proc_read_release_master(char *page, char **start, 685static int proc_read_release_master(char *page, char **start,
637 off_t off, int count, 686 off_t off, int count,
@@ -691,7 +740,8 @@ static struct proc_dir_entry *litmus_dir = NULL,
691#ifdef CONFIG_RELEASE_MASTER 740#ifdef CONFIG_RELEASE_MASTER
692 *release_master_file = NULL, 741 *release_master_file = NULL,
693#endif 742#endif
694 *clus_cache_idx_file = NULL; 743 *clus_cache_idx_file = NULL,
744 *npsf_slot_length_file = NULL;
695 745
696static int __init init_litmus_proc(void) 746static int __init init_litmus_proc(void)
697{ 747{
@@ -733,6 +783,16 @@ static int __init init_litmus_proc(void)
733 clus_cache_idx_file->read_proc = proc_read_cluster_size; 783 clus_cache_idx_file->read_proc = proc_read_cluster_size;
734 clus_cache_idx_file->write_proc = proc_write_cluster_size; 784 clus_cache_idx_file->write_proc = proc_write_cluster_size;
735 785
786 npsf_slot_length_file = create_proc_entry("npsf_slot_length",
787 0644, litmus_dir);
788 if (!npsf_slot_length_file) {
789 printk(KERN_ERR "Could not allocate npsf_slot_length "
790 "procfs entry.\n");
791 return -ENOMEM;
792 }
793 npsf_slot_length_file->read_proc = proc_read_npsf_slot_length;
794 npsf_slot_length_file->write_proc = proc_write_npsf_slot_length;
795
736 stat_file = create_proc_read_entry("stats", 0444, litmus_dir, 796 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
737 proc_read_stats, NULL); 797 proc_read_stats, NULL);
738 798
@@ -752,6 +812,8 @@ static void exit_litmus_proc(void)
752 remove_proc_entry("active_plugin", litmus_dir); 812 remove_proc_entry("active_plugin", litmus_dir);
753 if (clus_cache_idx_file) 813 if (clus_cache_idx_file)
754 remove_proc_entry("cluster_cache", litmus_dir); 814 remove_proc_entry("cluster_cache", litmus_dir);
815 if (npsf_slot_length_file)
816 remove_proc_entry("npsf_slot_length", litmus_dir);
755#ifdef CONFIG_RELEASE_MASTER 817#ifdef CONFIG_RELEASE_MASTER
756 if (release_master_file) 818 if (release_master_file)
757 remove_proc_entry("release_master", litmus_dir); 819 remove_proc_entry("release_master", litmus_dir);
diff --git a/litmus/sched_npsf.c b/litmus/sched_npsf.c
new file mode 100644
index 000000000000..aad99c7e447c
--- /dev/null
+++ b/litmus/sched_npsf.c
@@ -0,0 +1,1185 @@
1/*
2 * litmus/sched_npsf.c
3 *
4 * Implementation of the NPS-F scheduling algorithm.
5 *
6 * A _server_ may span on multiple _reserves_ on different CPUs.
7 *
8 * * 1
9 * +--------------+ +--> +--------------+ +--> +--------------+
10 * | cpu_entry_t | | | npsf_reserve | | | npsf_server |
11 * +--------------+ | +--------------+ | +--------------+
12 * | |1 | | |1 | | |
13 * | cpu_reserve |--+ 1| server |--+ 1| |
14 * | | +---| cpu | +---| curr_reserve |
15 * +--------------+ <-+ +--------------+ <-+ +--------------+
16 * 1 *
17 */
18
19#include <asm/uaccess.h>
20#include <linux/percpu.h>
21#include <linux/sched.h>
22#include <linux/list.h>
23#include <linux/spinlock.h>
24#include <linux/slab.h>
25
26#include <linux/module.h>
27
28#include <litmus/litmus.h>
29#include <litmus/jobs.h>
30#include <litmus/sched_plugin.h>
31#include <litmus/edf_common.h>
32
33/* Be extra verbose (log spam) */
34#define NPSF_VERBOSE
35
36#ifdef NPSF_VERBOSE
37#define npsf_printk(fmt, arg...) printk(KERN_INFO fmt, ##arg)
38#else
39#define npsf_printk(fmt, arg...)
40#endif
41
42struct npsf_reserve;
43
44/* cpu_entry_t
45 *
46 * Each cpu has a list of reserves assigned on the cpu.
47 * Each reserve has a pointer to its server (Notional processor)
48 * that may be shared among multiple reserves.
49 */
50typedef struct {
51 /* lock to protect cpu_reserve and list changes */
52 raw_spinlock_t cpu_res_lock;
53 /* the reserve currently executing on this cpu */
54 struct npsf_reserve *cpu_reserve;
55 /* list of reserves on this cpu */
56 struct list_head npsf_reserves;
57 /* cpu ID */
58 int cpu;
59 /* timer to control reserve switching */
60 struct hrtimer timer;
61 /* virtual timer expiring (wrt time_origin) */
62 lt_t should_expire;
63 /* delegate timer firing to proper cpu */
64 struct hrtimer_start_on_info info;
65 /* FIXME: the ids for servers should be an increasing int >=0 */
66 int last_seen_npsf_id;
67} cpu_entry_t;
68
69/* one cpu_entry_t per CPU */
70DEFINE_PER_CPU(cpu_entry_t, npsf_cpu_entries);
71
72/* This is the "notional processor" (i.e., simple server) abstraction. */
73typedef struct npsf_server {
74 /* shared among reserves */
75 rt_domain_t dom;
76 /* the real-time task that this server *SHOULD* be scheduling */
77 struct task_struct *highest_prio;
78 /* current reserve where this dom is executing */
79 struct npsf_reserve *curr_reserve;
80 /* The "first" reserve for this server in a time slot.
81 * For non-migrating servers this will always be the same as curr_reserve. */
82 struct npsf_reserve *first_reserve;
83 /* Prevent a race between the last CPU in a reserve chain an the first. */
84 int first_cpu_wants_ipi;
85 /* rt_domain_t lock + npsf_server_t lock */
86#define lock dom.ready_lock
87} npsf_server_t;
88
89typedef struct npsf_reserve {
90 /* Pointer to the server for this reserve: a server may be shared among
91 * multiple cpus with different budget per cpu, but same npsf_id. */
92 npsf_server_t *server;
93 /* we queue here in npsf_reserves */
94 struct list_head node;
95 /* budget of this npsf_id on this cpu */
96 lt_t budget;
97 /* cpu for this (portion of) server */
98 cpu_entry_t *cpu;
99 /* id of this server, it is the same for the
100 * same server on different cpus */
101 int npsf_id;
102 /* Can be used to identify if a reserve continues
103 * next npsf in the chain, needed for proper server deletion */
104 struct npsf_reserve *next_npsf;
105 /* flag that is true if the reserve is currently scheduled */
106 int is_currently_scheduled;
107} npsf_reserve_t;
108
109/* synchronization point to start moving and switching servers only
110 * when all servers have been properly set up by the user.
111 */
112static atomic_t all_servers_added;
113static atomic_t timers_activated = ATOMIC_INIT(0);
114
115/* Virtual time starts here */
116static lt_t time_origin;
117
118/* save number of online cpus seen at init time */
119static unsigned int _online_cpus = 1;
120
121#define no_reserves(entry) (list_empty(&((entry)->npsf_reserves)))
122#define local_entry (&__get_cpu_var(npsf_cpu_entries))
123#define remote_entry(cpu) (&per_cpu(npsf_cpu_entries, (cpu)))
124
125#define server_from_dom(domain) (container_of((domain), npsf_server_t, dom))
126
127/* task_entry uses get_partition() therefore we must take care of
128 * updating correclty the task_params.cpu whenever we switch task,
129 * otherwise we'll deadlock.
130 */
131#define task_entry(task) remote_entry(get_partition(task))
132#define domain_edf(npsf) (&((npsf)->server->dom))
133
134#define task_npsfid(task) ((task)->rt_param.task_params.semi_part.npsf_id)
135
136static inline int owns_server(npsf_reserve_t *npsf)
137{
138 return (npsf->server->curr_reserve == npsf);
139}
140
141/* utility functions to get next and prev domains; must hold entry lock */
142static inline npsf_reserve_t* local_next_reserve(npsf_reserve_t *curr,
143 cpu_entry_t *entry)
144{
145 return (list_is_last(&curr->node, &entry->npsf_reserves)) ?
146 list_entry(entry->npsf_reserves.next, npsf_reserve_t, node) :
147 list_entry(curr->node.next, npsf_reserve_t, node);
148
149}
150
151static inline npsf_reserve_t* local_prev_reserve(npsf_reserve_t *curr,
152 cpu_entry_t *entry)
153{
154 return ((curr->node.prev == &entry->npsf_reserves) ?
155 list_entry(entry->npsf_reserves.prev, npsf_reserve_t, node) :
156 list_entry(curr->node.prev, npsf_reserve_t, node));
157}
158static void requeue(struct task_struct* t, rt_domain_t *edf)
159{
160 if (t->state != TASK_RUNNING)
161 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
162
163 BUG_ON(is_queued(t));
164
165 set_rt_flags(t, RT_F_RUNNING);
166 if (is_released(t, litmus_clock()))
167 __add_ready(edf, t);
168 else
169 add_release(edf, t); /* it has got to wait */
170}
171
172/* we assume the lock is being held */
173static void preempt(npsf_reserve_t *npsf)
174{
175 /* Since we do not support non-preemptable sections,
176 * we don't need to pass in a task. If we call this,
177 * we want the remote CPU to reschedule, no matter what.
178 */
179 preempt_if_preemptable(NULL, npsf->cpu->cpu);
180}
181
182
183static void npsf_preempt_if_server_is_scheduled(npsf_server_t* srv)
184{
185 npsf_reserve_t *reserve = srv->curr_reserve;
186 if (reserve->is_currently_scheduled) {
187 preempt(reserve);
188 }
189}
190
191/* assumes lock is held by caller */
192static void npsf_reschedule_server(npsf_server_t* srv)
193{
194 struct task_struct* hp = srv->highest_prio;
195 rt_domain_t* edf = &srv->dom;
196
197 if (edf_preemption_needed(edf, hp)) {
198 srv->highest_prio = __take_ready(edf);
199 if (hp) {
200 TRACE_TASK(hp, "requeue: no longer highest prio\n");
201 requeue(hp, edf);
202 }
203 npsf_preempt_if_server_is_scheduled(srv);
204 }
205}
206
207static void npsf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
208{
209 npsf_server_t *srv = server_from_dom(rt);
210 unsigned long flags;
211
212 raw_spin_lock_irqsave(&srv->lock, flags);
213
214 __merge_ready(rt, tasks);
215 npsf_reschedule_server(srv);
216
217 raw_spin_unlock_irqrestore(&srv->lock, flags);
218}
219
220static void job_completion(struct task_struct* t, int forced)
221{
222 sched_trace_task_completion(t, forced);
223 TRACE_TASK(t, "job_completion().\n");
224
225 set_rt_flags(t, RT_F_SLEEP);
226 prepare_for_next_period(t);
227}
228
229/* When did this slot start ? */
230static inline lt_t slot_begin(lt_t now)
231{
232 return (((now - time_origin) / npsf_slot_length)
233 * npsf_slot_length + time_origin);
234}
235
236/* Compute the delta from the beginning of the current slot. */
237static inline lt_t delta_from_slot_begin(lt_t now)
238{
239 return (now - slot_begin(now));
240}
241
242/* Given an offset into a slot, return the corresponding eligible reserve.
243 * The output param reservation_end is used to return the (relative) time at which
244 * the returned reserve ends.
245 */
246static npsf_reserve_t* get_reserve_for_offset(cpu_entry_t *entry, lt_t offset,
247 lt_t *reservation_end)
248{
249 npsf_reserve_t *tmp;
250
251 *reservation_end = 0;
252
253 /* linear search through all reserves, figure out which one is the last one
254 * to become eligible before delta */
255 list_for_each_entry(tmp, &entry->npsf_reserves, node) {
256 *reservation_end += tmp->budget;
257
258 /* We are always "late". Found tmp is the right one */
259 if ((*reservation_end > offset))
260 return tmp;
261 }
262
263 /* error: we should never fall of the reserve list */
264 BUG();
265 return NULL;
266}
267
268/* Determine which reserve is eligible based on the current time.
269 */
270static npsf_reserve_t* get_current_reserve(cpu_entry_t *entry)
271{
272 lt_t reservation_end;
273 lt_t offset = delta_from_slot_begin(litmus_clock());
274 return get_reserve_for_offset(entry, offset, &reservation_end);
275}
276
277/* This is used to ensure that we are "always" late, i.e., to make
278 * sure that the timer jitter is always positive. This should
279 * only trigger in KVM (or in real machines with bad TSC drift after
280 * an IPI).
281 *
282 * ATM proper tracing for this event is done in reserve_switch_tick().
283 */
284static noinline ktime_t catchup_time(lt_t from, lt_t target)
285{
286 while(lt_before(from, target)) {
287 from = litmus_clock();
288
289 mb();
290 cpu_relax();
291 }
292
293 return ns_to_ktime(from);
294}
295
296
297/* compute the next ABSOLUTE timer value */
298static lt_t get_next_reserve_switch_time(void)
299{
300 cpu_entry_t *entry = local_entry;
301 lt_t now = litmus_clock();
302 lt_t slot_start = slot_begin(now);
303 lt_t offset = now - slot_start;
304 lt_t next_time;
305 npsf_reserve_t* reserve;
306
307 /* compute the absolute litmus time of the next reserve switch */
308 reserve = get_reserve_for_offset(entry, offset, &next_time);
309 /* get_reserve_for_offset returns a relative start time; let's make it
310 absolute */
311 next_time += slot_start;
312
313 /* Let's see if we need to skip the next timer. */
314 reserve = local_next_reserve(reserve, entry);
315 /* if the next reserve is a continuing reserve
316 * (i.e., if it belongs to a migrating server),
317 * then we skip the timer event because we will
318 * receive an IPI from the previous processor instead. */
319 if (reserve->server->first_reserve != reserve) {
320 /* it is indeed not the first reserve */
321 next_time += reserve->budget;
322 }
323
324 return next_time;
325}
326
327/* This is the callback for reserve-switching interrupts.
328 * The timer is reprogrammed to expire at the beginning of every logical
329 * reserve (i.e., a continuing reserve may be split among different CPUs
330 * but is a _single_ logical reserve). get_next_reserve_switch_time()
331 * will return the right next_expire time.
332 */
333static enum hrtimer_restart reserve_switch_tick(struct hrtimer *timer)
334{
335 unsigned long flags;
336 cpu_entry_t *entry;
337 /* we are using CLOCK_MONOTONIC */
338 ktime_t now = ktime_get();
339 ktime_t delta;
340 int late;
341
342 entry = container_of(timer, cpu_entry_t, timer);
343 raw_spin_lock_irqsave(&entry->cpu_res_lock, flags);
344
345 /* jitter wrt virtual time */
346 delta = ktime_sub(now, ns_to_ktime(entry->should_expire));
347 late = (ktime_to_ns(delta) >= 0) ? 1 : 0;
348
349#ifdef NPSF_VERBOSE
350 if (entry->cpu_reserve && atomic_read(&all_servers_added))
351 TRACE("(npsf_id: %d) tick starts at %Ld, "
352 "now - should_expire: %Ld\n",
353 entry->cpu_reserve->npsf_id,
354 ktime_to_ns(now), ktime_to_ns(delta));
355#endif
356 /* if the timer expires earlier than the should_expire time,
357 * we delay the switching until time it's synchronized with
358 * the switch boundary. Otherwise next reserve will execute
359 * longer (wrong).
360 */
361 if (!late) {
362 TRACE("+++ Timer fired early, waiting...\n");
363 now = catchup_time(ktime_to_ns(now), entry->should_expire);
364
365 delta = ktime_sub(now, ns_to_ktime(entry->should_expire));
366 TRACE("+++ done, tick restarts at %Ld, "
367 "now - should_expire: %Ld\n",
368 ktime_to_ns(now), ktime_to_ns(delta));
369 }
370
371 BUG_ON(!atomic_read(&all_servers_added));
372 BUG_ON(no_reserves(entry));
373
374 /* Compute the next time that we need to be notified. */
375 entry->should_expire = get_next_reserve_switch_time();
376
377 /* kindly ask the Penguin to let us know... */
378 hrtimer_set_expires(timer, ns_to_ktime(entry->should_expire));
379
380 /* set resched flag to reschedule local cpu */
381 set_need_resched();
382
383 raw_spin_unlock_irqrestore(&entry->cpu_res_lock, flags);
384#ifdef NPSF_VERBOSE
385 if (atomic_read(&all_servers_added))
386 TRACE("(npsf_id: %d) tick ends at %Ld, should_expire: %llu\n",
387 entry->cpu_reserve->npsf_id, ktime_to_ns(ktime_get()),
388 entry->should_expire);
389#endif
390
391 return HRTIMER_RESTART;
392}
393
394static void npsf_scheduler_tick(struct task_struct *t)
395{
396 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
397 set_tsk_need_resched(t);
398 TRACE("npsf_tick: %d is preemptable "
399 " => FORCE_RESCHED\n", t->pid);
400 }
401}
402
403/* Assumption: caller holds srv lock and prev belongs to
404 * the currently-scheduled reservation.
405 */
406static void npsf_schedule_server(struct task_struct* prev,
407 cpu_entry_t *entry)
408{
409 npsf_server_t* srv = entry->cpu_reserve->server;
410
411 int out_of_time, sleep, exists, blocks;
412
413 exists = is_realtime(prev);
414 blocks = exists && !is_running(prev);
415 out_of_time = exists &&
416 budget_enforced(prev) &&
417 budget_exhausted(prev);
418 sleep = exists && get_rt_flags(prev) == RT_F_SLEEP;
419
420 if (exists)
421 TRACE_TASK(prev, "(npsf_id %d) blocks:%d "
422 "out_of_time:%d sleep:%d state:%d sig:%d\n",
423 task_npsfid(prev),
424 blocks, out_of_time, sleep,
425 prev->state,
426 signal_pending(prev));
427
428 /* Any task that is preemptable and either exhausts its
429 * execution budget or wants to sleep completes. We may have
430 * to reschedule after this.
431 */
432 if ((out_of_time || sleep) && !blocks) {
433 job_completion(prev, !sleep);
434
435 if (srv->highest_prio != prev) {
436 BUG_ON(!is_queued(prev));
437 remove(&srv->dom, prev);
438 }
439
440 requeue(prev, &srv->dom);
441
442 if (srv->highest_prio == prev)
443 srv->highest_prio = __take_ready(&srv->dom);
444 }
445
446 BUG_ON(blocks && prev == srv->highest_prio);
447// BUG_ON(!srv->highest_prio && jobs_pending(&srv->dom));
448}
449
450static void npsf_notify_next_cpu(npsf_reserve_t *npsf_prev)
451{
452 npsf_server_t *srv;
453
454 if (unlikely(npsf_prev->next_npsf != npsf_prev)) {
455 /* This reserve is actually shared. Let's update its 'owner'
456 * and notify the next CPU. */
457 srv = npsf_prev->server;
458 raw_spin_lock(&srv->lock);
459 srv->curr_reserve = npsf_prev->next_npsf;
460 if (srv->first_reserve != srv->curr_reserve ||
461 srv->first_cpu_wants_ipi) {
462 /* send an IPI to notify next CPU in chain */
463 srv->first_cpu_wants_ipi = 0;
464 TRACE("sending IPI\n");
465 preempt(srv->curr_reserve);
466 }
467 raw_spin_unlock(&srv->lock);
468 }
469}
470
471static struct task_struct* npsf_schedule(struct task_struct * prev)
472{
473 npsf_reserve_t *npsf_prev, *npsf_next;
474 npsf_server_t *srv_prev, *srv_next;
475 cpu_entry_t *entry = local_entry;
476 struct task_struct *next;
477
478 int reserve_switch;
479
480 /* servers not ready yet, yield to linux */
481 if (!atomic_read(&all_servers_added))
482 return NULL;
483
484#ifdef NPSF_VERBOSE
485 TRACE_TASK(prev, "schedule\n");
486#endif
487 raw_spin_lock(&entry->cpu_res_lock);
488
489 BUG_ON(no_reserves(entry));
490
491 /* step 1: what are we currently serving? */
492 npsf_prev = entry->cpu_reserve;
493 srv_prev = npsf_prev->server;
494
495 /* step 2: what SHOULD we be currently serving? */
496 npsf_next = get_current_reserve(entry);
497 srv_next = npsf_next->server;
498
499 /* TODO second measuring point for IPI receiving
500 * if (!srv_next->measure_wait_IPI) --- the remote reset
501 * trace_time_end.
502 */
503 raw_spin_lock(&srv_prev->lock);
504
505
506 /* step 3: update prev server */
507 if (is_realtime(prev) && task_npsfid(prev) == entry->cpu_reserve->npsf_id)
508 npsf_schedule_server(prev, entry);
509 else if (is_realtime(prev))
510 TRACE_TASK(prev, "npsf_id %d != cpu_reserve npsf_id %d\n",
511 task_npsfid(prev), entry->cpu_reserve->npsf_id);
512
513 /* step 4: determine if we need to switch to another reserve */
514 reserve_switch = npsf_prev != npsf_next;
515
516 if (!reserve_switch) {
517 /* easy case: just enact what the server scheduler decided */
518 next = srv_prev->highest_prio;
519
520 /* Unlock AFTER observing highest_prio to avoid races with
521 * remote rescheduling activity. */
522 raw_spin_unlock(&srv_prev->lock);
523 } else {
524 /* In this case we have a reserve switch. We are done with the
525 * previous server, so release its lock. */
526 TRACE("switch reserve npsf_id %d -> npsf_id %d\n",
527 npsf_prev->npsf_id, npsf_next->npsf_id);
528 npsf_prev->is_currently_scheduled = 0;
529 raw_spin_unlock(&srv_prev->lock);
530
531 /* Move on to the next server. */
532
533 raw_spin_lock(&srv_next->lock);
534 npsf_next->is_currently_scheduled = 1;
535
536 /* make sure we are owner of a server (if it is shared) */
537 if (unlikely(srv_next->curr_reserve != npsf_next)) {
538 /* We raced with the previous owner. Let's schedule
539 * the previous reserve for now. The previous owner
540 * will send us an IPI when the server has been pushed
541 * to us.
542 */
543 TRACE("(npsf_id %d) raced with previous server owner\n",
544 npsf_next->npsf_id);
545
546 /* check if we are the first CPU, in which case we need
547 * to request a notification explicitly */
548 if (srv_next->first_reserve == npsf_next)
549 srv_next->first_cpu_wants_ipi = 1;
550
551 npsf_next->is_currently_scheduled = 0;
552 raw_spin_unlock(&srv_next->lock);
553
554 /* just keep the previous reserve one more time */
555 raw_spin_lock(&srv_prev->lock);
556
557 npsf_prev->is_currently_scheduled = 1;
558 /* Note that there is not a race condition here.
559 * Since curr_reserve didn't point yet to this reserve,
560 * so no processor would have observed the one in npsf_next.
561 * A processor might have observed the flag being zero
562 * in npsf_prev and decided not to send an IPI, which
563 * doesn't matter since we are going to reschedule
564 * below anyay. */
565
566 next = srv_prev->highest_prio;
567
568 raw_spin_unlock(&srv_prev->lock);
569
570 /* TODO first measuring point for '0'-switching time
571 * remote is not ready yet and will send us an IPI
572 * when it's done.
573 * local:
574 * srv_next->measure_wait_IPI = 1;
575 * remote before sending IPI:
576 * if (srv_next->measure_wait_IPI) reset;
577 */
578 } else {
579 /* invariant: srv->highest_prio is always the
580 * highest-priority job in the server, and it is always
581 * runnable. Any update to the server must maintain
582 * this invariant. */
583 next = srv_next->highest_prio;
584
585 entry->cpu_reserve = npsf_next;
586 raw_spin_unlock(&srv_next->lock);
587
588 /* send an IPI (if necessary) */
589 npsf_notify_next_cpu(npsf_prev);
590 }
591
592 }
593
594 if (next) {
595 TRACE_TASK(next, "(npsf_id %d) scheduled at %llu\n",
596 task_npsfid(next), litmus_clock());
597 set_rt_flags(next, RT_F_RUNNING);
598 /* The TASK_RUNNING flag is set by the Penguin _way_ after
599 * activating a task. This dosn't matter much to Linux as
600 * the rq lock will prevent any changes, but it matters to
601 * us. It is possible for a remote cpu waking up this task
602 * to requeue the task before it's runnable, send an IPI here,
603 * we schedule that task (still "not-runnable"), and only
604 * before the real execution of next, the running flag is set.
605 */
606 if (!is_running(next))
607 TRACE_TASK(next, "BAD: !TASK_RUNNING\n");
608 } else {
609 /* FIXME npsf_id is wrong if reserve switch but "switching back"
610 * if we race */
611 TRACE("(npsf_id %d) becoming idle at %llu\n",
612 reserve_switch ? npsf_next->npsf_id : npsf_prev->npsf_id,
613 litmus_clock());
614 }
615
616 raw_spin_unlock(&entry->cpu_res_lock);
617
618 return next;
619}
620
621/* Prepare a task for running in RT mode
622 *
623 * We can only be sure that the cpu is a right one (admit checks
624 * against tasks released on a cpu that doesn't host the right npsf_id)
625 * but we _cannot_ be sure that:
626 * 1) the found npsf is the reserve currently running on this cpu.
627 * 2) the current reserve (the one in charge of scheduling) is not
628 * running on a different cpu.
629 */
630static void npsf_task_new(struct task_struct * t, int on_rq, int running)
631{
632 npsf_reserve_t *npsf;
633 npsf_server_t *srv;
634 cpu_entry_t *entry = task_entry(t);
635 rt_domain_t *edf;
636 unsigned long flags;
637
638 BUG_ON(no_reserves(entry));
639
640 /* search the proper npsf_server where to add the new task */
641 list_for_each_entry(npsf, &entry->npsf_reserves, node) {
642 if (npsf->npsf_id == task_npsfid(t))
643 break;
644 }
645
646
647 srv = npsf->server;
648
649 /* The task should be running in the queue, otherwise signal
650 * code will try to wake it up with fatal consequences.
651 */
652 raw_spin_lock_irqsave(&entry->cpu_res_lock, flags);
653 raw_spin_lock(&srv->lock);
654
655 edf = domain_edf(npsf);
656 tsk_rt(t)->domain = edf;
657
658 TRACE_TASK(t, "task_new: P%d, task_npsfid %d, "
659 "npsf->npsf_id %d, entry->cpu %d\n",
660 t->rt_param.task_params.cpu, task_npsfid(t),
661 npsf->npsf_id, entry->cpu);
662
663 /* setup job parameters */
664 release_at(t, litmus_clock());
665
666 /* There are four basic scenarios that could happen:
667 * 1) the server is on another cpu and scheduled;
668 * 2) the server is on another cpu and not scheduled;
669 * 3) the server is on this cpu and scheduled; and
670 * 4) the server is on this cpu and not scheduled.
671 *
672 * Whatever scenario we're in, it cannot change while we are
673 * holding the server lock.
674 *
675 * If the new task does not have a high priority, then
676 * we can just queue it and be done.
677 *
678 * In theory, the requeue() and reschedule_server() code
679 * take care of all that.
680 */
681
682 requeue(t, edf);
683 /* reschedule will cause a remote preemption, if required */
684 npsf_reschedule_server(srv);
685 /* always preempt to make sure we don't
686 * use the stack if it needs to migrate */
687 set_tsk_need_resched(t);
688
689 raw_spin_unlock(&srv->lock);
690 raw_spin_unlock_irqrestore(&entry->cpu_res_lock, flags);
691}
692
693static void npsf_task_wake_up(struct task_struct *t)
694{
695 rt_domain_t *edf;
696 npsf_server_t* srv;
697 unsigned long flags;
698 lt_t now;
699
700 BUG_ON(!is_realtime(t));
701
702 edf = tsk_rt(t)->domain;
703 srv = server_from_dom(edf);
704
705 raw_spin_lock_irqsave(&srv->lock, flags);
706
707 BUG_ON(is_queued(t));
708
709 now = litmus_clock();
710 /* FIXME: this should be a configurable policy... */
711 if (is_tardy(t, now)) {
712 /* new sporadic release */
713 release_at(t, now);
714 sched_trace_task_release(t);
715 }
716
717 /* Only add to ready queue if it is not the
718 * currently-scheduled task.
719 */
720 if (srv->highest_prio != t) {
721 requeue(t, edf);
722 npsf_reschedule_server(srv);
723 }
724#ifdef NPSF_VERBOSE
725 else
726 TRACE_TASK(t, "wake_up, is curr_sched, not requeued\n");
727#endif
728
729 raw_spin_unlock_irqrestore(&srv->lock, flags);
730
731 TRACE_TASK(t, "wake up done\n");
732}
733
734static void remove_from_server(struct task_struct *t, npsf_server_t* srv)
735{
736 if (srv->highest_prio == t) {
737 TRACE_TASK(t, "remove from server: is highest-prio task\n");
738 srv->highest_prio = NULL;
739 npsf_reschedule_server(srv);
740 } else if (is_queued(t)) {
741 TRACE_TASK(t, "remove from server: removed from queue\n");
742 remove(&srv->dom, t);
743 }
744#ifdef NPSF_VERBOSE
745 else
746 TRACE_TASK(t, "WARN: where is this task?\n");
747#endif
748}
749
750static void npsf_task_block(struct task_struct *t)
751{
752 rt_domain_t *edf;
753 npsf_server_t* srv;
754 unsigned long flags;
755
756 TRACE_TASK(t, "(npsf_id %d) block at %llu, state=%d\n",
757 task_npsfid(t), litmus_clock(), t->state);
758
759 BUG_ON(!is_realtime(t));
760
761 edf = tsk_rt(t)->domain;
762 srv = server_from_dom(edf);
763
764 raw_spin_lock_irqsave(&srv->lock, flags);
765
766 remove_from_server(t, srv);
767
768 raw_spin_unlock_irqrestore(&srv->lock, flags);
769}
770
771static void npsf_task_exit(struct task_struct * t)
772{
773 rt_domain_t *edf;
774 npsf_server_t* srv;
775 unsigned long flags;
776
777 BUG_ON(!is_realtime(t));
778
779 edf = tsk_rt(t)->domain;
780 srv = server_from_dom(edf);
781
782 raw_spin_lock_irqsave(&srv->lock, flags);
783
784 remove_from_server(t, srv);
785
786 raw_spin_unlock_irqrestore(&srv->lock, flags);
787
788 TRACE_TASK(t, "RIP, now reschedule\n");
789}
790
791static long npsf_admit_task(struct task_struct* tsk)
792{
793 npsf_reserve_t *npsf;
794 cpu_entry_t *entry = task_entry(tsk);
795 int id_ok = 0;
796
797 if (!atomic_read(&all_servers_added)) {
798 printk(KERN_DEBUG "not all servers added\n");
799 return -ENODEV;
800 }
801
802 /* check to be on the right cpu and on the right server */
803 if (task_cpu(tsk) != tsk->rt_param.task_params.cpu) {
804 printk(KERN_DEBUG "wrong CPU(%d, %d, %d) for npsf_id %d\n",
805 task_cpu(tsk), tsk->rt_param.task_params.cpu,
806 entry->cpu, task_npsfid(tsk));
807 return -EINVAL;
808 }
809
810 /* 1) this cpu should have the proper npsf_id in the list
811 * 2) the rt_domain for the proper npsf_id is not null
812 */
813 list_for_each_entry(npsf, &entry->npsf_reserves, node) {
814 if (npsf->npsf_id == task_npsfid(tsk)) {
815 id_ok = 1;
816 break;
817 }
818 }
819 if (!id_ok)
820 printk(KERN_DEBUG "wrong npsf_id (%d) for entry %d\n",
821 task_npsfid(tsk), entry->cpu);
822
823 return id_ok ? 0 : -EINVAL;
824}
825
826/* in litmus.c */
827extern atomic_t rt_task_count;
828
829/* initialization status control */
830static int reserves_allocated = 0;
831
832#ifdef NPSF_VERBOSE
833static void print_reserve(cpu_entry_t *cpu)
834{
835 npsf_reserve_t *tmp;
836
837 printk(KERN_INFO "NPS-F: reserves on CPU %d:\n", cpu->cpu);
838 list_for_each_entry(tmp, &cpu->npsf_reserves, node) {
839 BUG_ON(!tmp->server);
840 BUG_ON(!&(tmp->server->dom));
841 BUG_ON(tmp->server->highest_prio);
842 printk(KERN_INFO "%d: %d us\n", tmp->npsf_id,
843 (int)(tmp->budget / 1000));
844 }
845}
846#endif
847/*
848 * do_add_reserve: add a reserve(cpu, id, budget)
849 *
850 * Callback for syscall add_server(); it allows to add the reserve "id"
851 * to the CPU "cpu". "budget" is the length of the reserve for the
852 * notional processor (server) id on the cpu cpu.
853 */
854static long do_add_reserve(npsf_reserve_t **new, cpu_entry_t *cpu,
855 npsf_server_t *the_dom, int npsf_id, lt_t budget)
856{
857 unsigned long flags;
858
859 /* npsf_id for each cpu should be given in increasing order,
860 * it doesn't make sense the same np on the same cpu.
861 * The last_seen_npsf_id is reset upon plugin insertion.
862 */
863 if (cpu->last_seen_npsf_id >= npsf_id)
864 return -EINVAL;
865
866 /* don't allow server changes if there are tasks in the system */
867 if (atomic_read(&rt_task_count))
868 return -EACCES;
869
870 if ((*new = kmalloc(sizeof(npsf_reserve_t), GFP_ATOMIC)) == NULL)
871 return -ENOMEM;
872
873 (*new)->server = the_dom;
874 (*new)->npsf_id = npsf_id;
875 (*new)->budget = budget;
876 (*new)->cpu = cpu;
877
878 npsf_printk("Add npsf_id %d on P%d with budget %llu\n", (*new)->npsf_id,
879 (*new)->cpu->cpu, (*new)->budget);
880
881 raw_spin_lock_irqsave(&cpu->cpu_res_lock, flags);
882
883 list_add_tail(&(*new)->node, &cpu->npsf_reserves);
884 cpu->last_seen_npsf_id = npsf_id;
885 cpu->cpu_reserve = list_first_entry(&cpu->npsf_reserves, npsf_reserve_t, node);
886
887 raw_spin_unlock_irqrestore(&cpu->cpu_res_lock, flags);
888
889 return 0;
890}
891
892static void kickoff_timers(void)
893{
894 int cpu;
895 cpu_entry_t *entry;
896 lt_t kickoff;
897
898 kickoff = slot_begin(litmus_clock() + npsf_slot_length * 2);
899
900 for_each_online_cpu(cpu) {
901 entry = &per_cpu(npsf_cpu_entries, cpu);
902 hrtimer_start_on(cpu, &entry->info, &entry->timer,
903 ns_to_ktime(kickoff),
904 HRTIMER_MODE_ABS_PINNED);
905 entry->should_expire = kickoff;
906 }
907 atomic_set(&timers_activated, 1);
908}
909
910/* We offer to library a budgets array interface (so we go through the
911 * syscall path only once) and we internally cycle on do_add_reserve.
912 *
913 * last == 1 means that the user is adding the last server and after
914 * the insertion the plugin is properly set up. (FIXME it should be
915 * done in a better way, but I doubt this plugin will ever go
916 * to the master branch).
917 */
918asmlinkage long sys_add_server(int __user *__id,
919 struct npsf_budgets __user *__budgets, int last)
920{
921 int id, i;
922 int ret = -EFAULT;
923 struct npsf_budgets *budgets;
924 cpu_entry_t *entry;
925 npsf_server_t *npsfserver;
926 npsf_reserve_t *npsf_reserve_array[NR_CPUS];
927 npsf_reserve_t *first_reserve;
928
929 if (_online_cpus != num_online_cpus())
930 return ret;
931
932 if (copy_from_user(&id, __id, sizeof(id)))
933 return ret;
934
935 budgets = kmalloc(_online_cpus * sizeof(struct npsf_budgets),
936 GFP_ATOMIC);
937
938 for (i = 0; i < _online_cpus; i++) {
939 budgets[i].cpu = NO_CPU;
940 budgets[i].budget = 0;
941 }
942
943 if (copy_from_user(budgets, __budgets,
944 sizeof(budgets) * _online_cpus))
945 goto err;
946
947 /* initialize the npsf_server_t for this npsf_server series */
948 npsfserver = kmalloc(sizeof(npsf_server_t), GFP_ATOMIC);
949 if (!npsfserver) {
950 ret = -ENOMEM;
951 goto err;
952 }
953 edf_domain_init(&npsfserver->dom, NULL, npsf_release_jobs);
954 npsfserver->highest_prio = NULL;
955
956 /* initialize all npsf_reserve_t for this server */
957 for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) {
958 entry = &per_cpu(npsf_cpu_entries, budgets[i].cpu);
959 if ((ret = do_add_reserve(&npsf_reserve_array[i], entry,
960 npsfserver,
961 id, budgets[i].budget)) < 0)
962 goto err;
963 }
964 /* set the current reserve to the first (and possibly unique)
965 * slice for this npsf_id */
966 npsfserver->curr_reserve = npsf_reserve_array[0];
967 npsfserver->first_reserve = npsf_reserve_array[0];
968 npsfserver->first_cpu_wants_ipi = 0;
969 for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) {
970
971 if (i == 0 && budgets[i+1].cpu == NO_CPU) {
972 /* Fixed reserve always has itself as next */
973 npsf_reserve_array[i]->next_npsf = npsf_reserve_array[i];
974 } else if (((i+1) < _online_cpus) &&
975 (i > 0 && budgets[i+1].cpu == NO_CPU)) {
976 /* Last reserve in the chain has the first reserve as next */
977 npsf_reserve_array[i]->next_npsf = npsf_reserve_array[0];
978 } else {
979 /* Normal continuing reserve */
980 npsf_reserve_array[i]->next_npsf = npsf_reserve_array[i+1];
981 }
982 }
983#ifdef NPSF_VERBOSE
984 for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) {
985 entry = &per_cpu(npsf_cpu_entries, budgets[i].cpu);
986 print_reserve(entry);
987 }
988#endif
989
990 if (last) {
991 /* force the first slot switching by setting the
992 * current_reserve to the last server for each cpu.
993 *
994 * FIXME:don't assume there exists at least one reserve per CPU
995 */
996 for_each_online_cpu(i) {
997 entry = &per_cpu(npsf_cpu_entries, i);
998 first_reserve = list_entry(entry->npsf_reserves.next,
999 npsf_reserve_t, node);
1000
1001 first_reserve->server->curr_reserve = first_reserve;
1002 entry->cpu_reserve = first_reserve;
1003 npsf_printk("npsf_id %d is the current reserve "
1004 "and server on CPU %d\n",
1005 first_reserve->npsf_id, entry->cpu);
1006
1007 }
1008
1009 kickoff_timers();
1010
1011 /* real plugin enable */
1012 atomic_set(&all_servers_added, 1);
1013 mb();
1014 }
1015
1016 /* at least one server was initialized and may need deletion */
1017 reserves_allocated = 1;
1018err:
1019 kfree(budgets);
1020 return ret;
1021}
1022
1023
1024/* Cancel server_reschedule_tick() hrtimers. Wait for all callbacks
1025 * to complete. The function is triggered writing 0 as npsf_slot_length.
1026 */
1027void npsf_hrtimers_cleanup(void)
1028{
1029 int cpu;
1030 cpu_entry_t *entry;
1031 int redo;
1032
1033 if (!atomic_read(&timers_activated))
1034 return;
1035
1036 atomic_set(&timers_activated, 0);
1037
1038 /* prevent the firing of the timer on this cpu */
1039 do {
1040 redo = 0;
1041 for_each_online_cpu(cpu) {
1042 entry = &per_cpu(npsf_cpu_entries, cpu);
1043
1044 /* if callback active, skip it for now and redo later */
1045 if (hrtimer_try_to_cancel(&entry->timer) == -1) {
1046 redo = 1;
1047#ifdef NPSF_VERBOSE
1048 printk(KERN_INFO "(P%d) hrtimer on P%d was "
1049 "active, try to delete again\n",
1050 get_cpu(), cpu);
1051 put_cpu();
1052#endif
1053 }
1054 }
1055 } while (redo);
1056
1057 printk(KERN_INFO "npsf hrtimers deleted\n");
1058}
1059
1060static void cleanup_npsf(void)
1061{
1062 int cpu;
1063 cpu_entry_t *entry;
1064 struct list_head *nd, *next;
1065 npsf_reserve_t *tmp, *tmp_save;
1066
1067 for_each_online_cpu(cpu) {
1068 entry = &per_cpu(npsf_cpu_entries, cpu);
1069
1070 /* FIXME probably not needed as we should be the only cpu
1071 * doing the removal */
1072 raw_spin_lock(&entry->cpu_res_lock);
1073
1074 list_for_each_safe(nd, next, &entry->npsf_reserves) {
1075 tmp = list_entry(nd, npsf_reserve_t, node);
1076 npsf_printk("Del. (id, cpu):(%d, %d)\n",
1077 tmp->npsf_id,
1078 tmp->cpu->cpu);
1079 if (tmp->server) {
1080 npsf_printk("Del. reserves for npsf_id %d\n",
1081 tmp->npsf_id);
1082 tmp_save = tmp;
1083 while (tmp_save->next_npsf &&
1084 tmp_save->next_npsf != tmp) {
1085 tmp_save = tmp_save->next_npsf;
1086 tmp_save->server = NULL;
1087 }
1088 npsf_printk("Freeing server 0x%p\n", tmp->server);
1089 kfree(tmp->server);
1090 }
1091 npsf_printk("Freeing npsf_reserve_t 0x%p\n", tmp);
1092 kfree(tmp);
1093 }
1094 list_del(&entry->npsf_reserves);
1095 raw_spin_unlock(&entry->cpu_res_lock);
1096 }
1097}
1098
1099/* prevent plugin deactivation if timers are still active */
1100static long npsf_deactivate_plugin(void)
1101{
1102 return (atomic_read(&timers_activated)) ? -1 : 0;
1103}
1104
1105static long npsf_activate_plugin(void)
1106{
1107 int cpu;
1108 cpu_entry_t *entry;
1109 ktime_t now = ktime_get();
1110
1111 /* prevent plugin switching if timers are active */
1112 if (atomic_read(&timers_activated))
1113 return -1;
1114
1115 atomic_set(&all_servers_added, 0);
1116
1117 /* de-allocate old servers (if any) */
1118 if (reserves_allocated)
1119 cleanup_npsf();
1120
1121 _online_cpus = num_online_cpus();
1122
1123 for_each_online_cpu(cpu) {
1124 entry = &per_cpu(npsf_cpu_entries, cpu);
1125
1126 raw_spin_lock_init(&entry->cpu_res_lock);
1127
1128 entry->cpu_reserve = NULL;
1129 INIT_LIST_HEAD(&entry->npsf_reserves);
1130
1131 entry->cpu = cpu;
1132 hrtimer_init(&entry->timer, CLOCK_MONOTONIC,
1133 HRTIMER_MODE_ABS_PINNED);
1134
1135 /* initialize (reinitialize) pull timers */
1136 hrtimer_start_on_info_init(&entry->info);
1137
1138 entry->timer.function = reserve_switch_tick;
1139 entry->last_seen_npsf_id = -1;
1140 }
1141
1142 printk(KERN_INFO "NPS-F activated: slot length = %lld ns\n",
1143 npsf_slot_length);
1144
1145 /* time starts now! */
1146 time_origin = (lt_t) ktime_to_ns(now);
1147 TRACE("Time_origin = %llu\n", time_origin);
1148 return 0;
1149}
1150
1151/* Plugin object */
1152static struct sched_plugin npsf_plugin __cacheline_aligned_in_smp = {
1153 .plugin_name = "NPS-F",
1154
1155 .tick = npsf_scheduler_tick,
1156 .task_new = npsf_task_new,
1157 .complete_job = complete_job,
1158 .task_exit = npsf_task_exit,
1159 .schedule = npsf_schedule,
1160 .task_wake_up = npsf_task_wake_up,
1161 .task_block = npsf_task_block,
1162 .admit_task = npsf_admit_task,
1163 .activate_plugin = npsf_activate_plugin,
1164 .deactivate_plugin = npsf_deactivate_plugin,
1165};
1166
1167static int __init init_npsf(void)
1168{
1169 return register_sched_plugin(&npsf_plugin);
1170}
1171
1172static void __exit exit_npsf(void)
1173{
1174 if (atomic_read(&timers_activated)) {
1175 atomic_set(&timers_activated, 0);
1176 return;
1177 }
1178
1179 if (reserves_allocated)
1180 cleanup_npsf();
1181}
1182
1183module_init(init_npsf);
1184module_exit(exit_npsf);
1185
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 3543b7baff53..3036df9b12e3 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -179,6 +179,12 @@ struct sched_plugin linux_sched_plugin = {
179int cluster_cache_index = 2; 179int cluster_cache_index = 2;
180 180
181/* 181/*
182 * Slot length (in ns) for NPS-F semi-partitioned plugin.
183 * This value can be changed at "runtime" through proc file.
184 */
185lt_t npsf_slot_length = 5 * NSEC_PER_MSEC;
186
187/*
182 * The reference to current plugin that is used to schedule tasks within 188 * The reference to current plugin that is used to schedule tasks within
183 * the system. It stores references to actual function implementations 189 * the system. It stores references to actual function implementations
184 * Should be initialized by calling "init_***_plugin()" 190 * Should be initialized by calling "init_***_plugin()"