summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2011-01-26 18:39:46 -0500
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2011-01-26 18:39:46 -0500
commite0fc039330051c525245cea8c120c45d59836140 (patch)
tree74a9ef00ecc584f1c516fb69eda7f011bdec28a2
parent226c85f00c5ac5181c9825af04ec1bd8c9b7eaab (diff)
Provide semi-part plugins as patches.
-rw-r--r--download/ECRTS11/liblitmus-semi-part.patch1657
-rw-r--r--download/ECRTS11/litmus-rt-semi-part.patch2809
-rw-r--r--index.html23
3 files changed, 4488 insertions, 1 deletions
diff --git a/download/ECRTS11/liblitmus-semi-part.patch b/download/ECRTS11/liblitmus-semi-part.patch
new file mode 100644
index 0000000..fd99526
--- /dev/null
+++ b/download/ECRTS11/liblitmus-semi-part.patch
@@ -0,0 +1,1657 @@
1diff --git a/SConstruct b/SConstruct
2index c41e41e..9935396 100644
3--- a/SConstruct
4+++ b/SConstruct
5@@ -208,6 +208,13 @@ rt.Program('base_task', 'bin/base_task.c')
6 mtrt.Program('base_mt_task', 'bin/base_mt_task.c')
7 rt.Program('rt_launch', ['bin/rt_launch.c', 'bin/common.c'])
8 rt.Program('rtspin', ['bin/rtspin.c', 'bin/common.c'])
9+rt.Program('rtspin_edffm', ['bin/rtspin_edffm.c', 'bin/common.c'])
10+rt.Program('rt_launch_edffm', ['bin/rt_launch_edffm.c', 'bin/common.c'])
11+rt.Program('rtspin_npsf', ['bin/rtspin_npsf.c', 'bin/common.c'])
12+rt.Program('npsf_add_server', ['bin/npsf_add_server.c', 'bin/common.c'])
13+rt.Program('rt_launch_npsf', ['bin/rt_launch_npsf.c', 'bin/common.c'])
14+rt.Program('rtspin_edfwm', ['bin/rtspin_edfwm.c', 'bin/common.c'])
15+rt.Program('rt_launch_edfwm', ['bin/rt_launch_edfwm.c', 'bin/common.c'])
16 rt.Program('release_ts', 'bin/release_ts.c')
17 rtm.Program('measure_syscall', 'bin/null_call.c')
18
19diff --git a/bin/common.c b/bin/common.c
20index 452b882..167344a 100644
21--- a/bin/common.c
22+++ b/bin/common.c
23@@ -1,6 +1,7 @@
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <errno.h>
27+#include <string.h>
28
29 #include "common.h"
30
31@@ -9,3 +10,120 @@ void bail_out(const char* msg)
32 perror(msg);
33 exit(-1 * errno);
34 }
35+
36+/* EDF-WM helper functions to parse a custom text file format to "easily"
37+ * launch tests with rtspin and rt_launch:
38+ *
39+ * Format for task:
40+ *
41+ * <task_id execution_cost period phase cpu slices_number> .
42+ *
43+ * If the task is split on multiple slices, slices_number is non 0
44+ * and we scan a list of slice parameters up to slices_number:
45+ *
46+ * Format for slices:
47+ *
48+ * <task_id cpu deadline(from job release) budget offset> .
49+ *
50+ * The offset is the start time for the slice relative to the job release.
51+ *
52+ * Example:
53+ * 14 2.26245771754 10 0 5 2
54+ * 14 5 5.000000 1.497306 0.000000
55+ * 14 7 10.000000 0.765152 5.000000
56+ */
57+
58+#define fms_to_ns(x) (lt_t)(((x) * __NS_PER_MS))
59+/*
60+ * <task_id, cpu, deadline (from job release), budget, offset> .
61+ */
62+int parse_edfwm_slice(FILE *ts, int slices_no, int task_id,
63+ struct rt_task *rt)
64+{
65+ int i, tid;
66+ unsigned int cpu;
67+ double deadline, budget, offset;
68+
69+ lt_t total_budget = 0;
70+
71+ struct edf_wm_params* wm = (struct edf_wm_params*) &rt->semi_part;
72+
73+ for (i = 0; i < slices_no; i++) {
74+
75+ if (fscanf(ts, "%d %u %lf %lf %lf\n", &tid, &cpu,
76+ &deadline, &budget, &offset) != EOF) {
77+
78+ if (task_id != tid) {
79+ fprintf(stderr, "task_id %d != tid %d\n",
80+ task_id, tid);
81+ return -1;
82+ }
83+
84+ wm->slices[i].deadline = fms_to_ns(deadline);
85+ wm->slices[i].budget = fms_to_ns(budget);
86+ wm->slices[i].offset = fms_to_ns(offset);
87+ wm->slices[i].cpu = cpu;
88+
89+ printf("slice(tid, cpu, d, e, ph) = (%d, %u, %llu, %llu, %llu)\n",
90+ tid, cpu, wm->slices[i].deadline,
91+ wm->slices[i].budget, wm->slices[i].offset);
92+
93+ total_budget += wm->slices[i].budget;
94+ if (wm->slices[i].budget < MIN_EDF_WM_SLICE_SIZE) {
95+
96+ fprintf(stderr, "Slice %llu is too small\n",
97+ wm->slices[i].budget);
98+ return -1;
99+ }
100+ }
101+
102+ if (ferror(ts)) {
103+ fprintf(stderr, "Cannot read file\n");
104+ return -1;
105+ }
106+ }
107+ wm->count = slices_no;
108+ rt->exec_cost = total_budget;
109+ printf("--- total %u slices ---\n", wm->count);
110+ return 0;
111+}
112+
113+/*
114+ * <task_id, execution_cost, period, phase, cpu, slices_number> .
115+ */
116+int parse_edfwm_ts_file(FILE *ts, struct rt_task *rt)
117+{
118+ int task_id, ret = 1;
119+ unsigned int cpu, sliceno;
120+ double fwcet, fperiod, fphase;
121+
122+ ret = fscanf(ts, "%d %lf %lf %lf %d %d\n",
123+ &task_id, &fwcet, &fperiod, &fphase, &cpu, &sliceno);
124+
125+ if (ferror(ts))
126+ goto err;
127+
128+ rt->exec_cost = fms_to_ns(fwcet);
129+ rt->period = fms_to_ns(fperiod);
130+ rt->phase = fms_to_ns(fphase);
131+ rt->cpu = cpu;
132+ rt->cls = RT_CLASS_HARD;
133+ rt->budget_policy = PRECISE_ENFORCEMENT;
134+
135+ printf("(tid, wcet, period, ph, cpu, slices) = "
136+ "(%d, %llu, %llu, %llu, %u, %u)\n",
137+ task_id, rt->exec_cost, rt->period, rt->phase, cpu, sliceno);
138+ if (sliceno > 0) {
139+ memset(&rt->semi_part, 0, sizeof(struct edf_wm_params));
140+ ret = parse_edfwm_slice(ts, sliceno, task_id, rt);
141+ if (ret < 0)
142+ goto err;
143+ }
144+
145+ return 0;
146+
147+err:
148+ fprintf(stderr, "Error parsing file\n");
149+ return -1;
150+}
151+
152diff --git a/bin/npsf_add_server.c b/bin/npsf_add_server.c
153new file mode 100644
154index 0000000..9f3f92c
155--- /dev/null
156+++ b/bin/npsf_add_server.c
157@@ -0,0 +1,108 @@
158+/* wrapper for sys_add_server
159+ *
160+ * Input: a file with on each line:
161+ * npsf_id cpu budget(us)
162+ */
163+#include <stdio.h>
164+#include <stdlib.h>
165+#include <unistd.h>
166+
167+#include "litmus.h"
168+#include "common.h"
169+
170+void usage(char *error) {
171+ fprintf(stderr, "Error: %s\n", error);
172+ fprintf(stderr,
173+ "Usage: npsf_add_server SERVERS-FILE MAX-SPLITS-PER-NPSFID\n");
174+ exit(1);
175+}
176+
177+int main(int argc, char** argv)
178+{
179+ int ret;
180+ FILE *file;
181+ int i,j;
182+ int npsf_id, curr_id = -1;
183+ int cpu, max_splits_server;
184+ int budget_us;
185+ struct npsf_budgets *budgets;
186+
187+ if (argc < 3)
188+ usage("Arguments missing.");
189+
190+ max_splits_server = atoi(argv[2]);
191+
192+ if ((file = fopen(argv[1], "r")) == NULL) {
193+ fprintf(stderr, "Cannot open %s\n", argv[1]);
194+ return -1;
195+ }
196+
197+ /* format: npsf_id cpu budget-us */
198+ i = 0;
199+ while (fscanf(file, "%d %d %d\n", &npsf_id, &cpu, &budget_us) != EOF) {
200+
201+ printf("Read: %d %d %d\n", npsf_id, cpu, budget_us);
202+
203+ if (curr_id == -1) {
204+ curr_id = npsf_id;
205+ budgets = malloc(max_splits_server *
206+ sizeof(struct npsf_budgets));
207+ for(j = 0; j < max_splits_server; j++) {
208+ budgets[j].cpu = -1;
209+ budgets[j].budget = 0;
210+ }
211+ }
212+
213+ if (npsf_id == curr_id) {
214+ /* same notional processor, different cpu and budget */
215+ budgets[i].cpu = cpu;
216+ budgets[i].budget = (lt_t) (budget_us * 1000);
217+ i++;
218+ } else {
219+ /* different notional processor */
220+ /* add server */
221+ printf("Adding npsf_id = %d\n", curr_id);
222+ ret = add_server(&curr_id, budgets, 0);
223+
224+ if (ret < 0) {
225+ fclose(file);
226+ free(budgets);
227+ printf("Cannot add Notional Processor %d\n",
228+ curr_id);
229+ return ret;
230+ }
231+
232+ /* reinit new */
233+ i = 0;
234+ budgets = malloc(max_splits_server *
235+ sizeof(struct npsf_budgets));
236+ for(j = 0; j < max_splits_server; j++) {
237+ budgets[j].cpu = -1;
238+ budgets[j].budget = 0;
239+ }
240+ curr_id = npsf_id;
241+ budgets[i].cpu = cpu;
242+ budgets[i].budget = (lt_t) (budget_us * 1000);
243+ i++;
244+ }
245+ }
246+
247+ if (ferror(file)) {
248+ fprintf(stderr, "Error while reading\n");
249+ fclose(file);
250+ return -1;
251+ }
252+
253+ /* save the last entry */
254+ ret = add_server(&curr_id, budgets, 1);
255+ printf("Adding npsf_id = %d\n", curr_id);
256+ if (ret < 0) {
257+ fclose(file);
258+ free(budgets);
259+ bail_out("Cannot add Notional Processor: ");
260+ }
261+
262+ fclose(file);
263+
264+ return 0;
265+}
266diff --git a/bin/rt_launch_edffm.c b/bin/rt_launch_edffm.c
267new file mode 100644
268index 0000000..ddde7dd
269--- /dev/null
270+++ b/bin/rt_launch_edffm.c
271@@ -0,0 +1,133 @@
272+#include <stdio.h>
273+#include <stdlib.h>
274+#include <string.h>
275+#include <unistd.h>
276+#include <limits.h>
277+#include <signal.h>
278+
279+#include "litmus.h"
280+#include "common.h"
281+
282+typedef struct {
283+ int wait;
284+ char * exec_path;
285+ char ** argv;
286+} startup_info_t;
287+
288+
289+int launch(void *task_info_p) {
290+ startup_info_t *info = (startup_info_t*) task_info_p;
291+ int ret;
292+ if (info->wait) {
293+ ret = wait_for_ts_release();
294+ if (ret != 0)
295+ perror("wait_for_ts_release()");
296+ }
297+ ret = execvp(info->exec_path, info->argv);
298+ perror("execv failed");
299+ return ret;
300+}
301+
302+void usage(char *error) {
303+ fprintf(stderr, "%s\nUsage: rt_launch [-w][-v][-p cpu][-c hrt | srt | be] wcet period"
304+ " fracnum1 fracden1 cpu1 fracnum2 fracden2 cpu2 program [arg1 arg2 ...]\n"
305+ "\t-w\tSynchronous release\n"
306+ "\t-v\tVerbose\n"
307+ "\t-p\tcpu (or initial cpu)\n"
308+ "\t-c\tClass\n"
309+ "\twcet, period in ms\n"
310+ "\tprogram to be launched\n",
311+ error);
312+ exit(1);
313+}
314+
315+
316+#define OPTSTR "p:c:vw"
317+
318+int main(int argc, char** argv)
319+{
320+ int ret;
321+ lt_t wcet;
322+ lt_t period;
323+ /* [num,den] */
324+ lt_t frac1[2], frac2[2];
325+ int cpu1, cpu2;
326+ int migrate = 0;
327+ int cpu = 0;
328+ int opt;
329+ int verbose = 0;
330+ int wait = 0;
331+ startup_info_t info;
332+ task_class_t class = RT_CLASS_HARD;
333+
334+ while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
335+ switch (opt) {
336+ case 'w':
337+ wait = 1;
338+ break;
339+ case 'v':
340+ verbose = 1;
341+ break;
342+ case 'p':
343+ cpu = atoi(optarg);
344+ migrate = 1;
345+ break;
346+ case 'c':
347+ class = str2class(optarg);
348+ if (class == -1)
349+ usage("Unknown task class.");
350+ break;
351+
352+ case ':':
353+ usage("Argument missing.");
354+ break;
355+ case '?':
356+ default:
357+ usage("Bad argument.");
358+ break;
359+ }
360+ }
361+
362+ signal(SIGUSR1, SIG_IGN);
363+
364+ if (argc - optind < 8)
365+ usage("Arguments missing.");
366+ wcet = ms2lt(atoi(argv[optind + 0]));
367+ period = ms2lt(atoi(argv[optind + 1]));
368+ /* frac num, den = 0 means fixed task */
369+ frac1[0] = atoi(argv[optind + 2]);
370+ frac1[1] = atoi(argv[optind + 3]);
371+ cpu1 = atoi(argv[optind + 4]);
372+ frac2[0] = atoi(argv[optind + 5]);
373+ frac2[1] = atoi(argv[optind + 6]);
374+ cpu2 = atoi(argv[optind + 7]);
375+ if (wcet <= 0)
376+ usage("The worst-case execution time must be a "
377+ "positive number.");
378+ if (period <= 0)
379+ usage("The period must be a positive number.");
380+ if (wcet > period) {
381+ usage("The worst-case execution time must not "
382+ "exceed the period.");
383+ }
384+ info.exec_path = argv[optind + 8];
385+ info.argv = argv + optind + 8;
386+ info.wait = wait;
387+ if (migrate) {
388+ ret = be_migrate_to(cpu);
389+ if (ret < 0)
390+ bail_out("could not migrate to target partition");
391+ }
392+ /* create in src/task.c a new wrapper for the __launch_rt_task
393+ * which takes the fraction and the cpus */
394+ ret = __create_rt_task_edffm(launch, &info, cpu, wcet, period, frac1,
395+ frac2, cpu1, cpu2, class);
396+
397+
398+ if (ret < 0)
399+ bail_out("could not create rt child process");
400+ else if (verbose)
401+ printf("%d\n", ret);
402+
403+ return 0;
404+}
405diff --git a/bin/rt_launch_edfwm.c b/bin/rt_launch_edfwm.c
406new file mode 100644
407index 0000000..9e8a322
408--- /dev/null
409+++ b/bin/rt_launch_edfwm.c
410@@ -0,0 +1,98 @@
411+#include <stdio.h>
412+#include <stdlib.h>
413+#include <string.h>
414+#include <unistd.h>
415+#include <limits.h>
416+#include <signal.h>
417+
418+#include "litmus.h"
419+#include "common.h"
420+
421+typedef struct {
422+ int wait;
423+ char * exec_path;
424+ char ** argv;
425+} startup_info_t;
426+
427+
428+int launch(void *task_info_p) {
429+ startup_info_t *info = (startup_info_t*) task_info_p;
430+ int ret;
431+ if (info->wait) {
432+ ret = wait_for_ts_release();
433+ if (ret != 0)
434+ perror("wait_for_ts_release()");
435+ }
436+ ret = execvp(info->exec_path, info->argv);
437+ perror("execv failed");
438+ return ret;
439+}
440+
441+void usage(char *error) {
442+ fprintf(stderr, "%s\nUsage: rt_launch [-w] task_parameters_file "
443+ "program [arg1 arg2 ...]\n"
444+ "\t-w\tSynchronous release\n"
445+ "\tprogram to be launched\n",
446+ error);
447+ exit(1);
448+}
449+
450+#define OPTSTR "w"
451+
452+int main(int argc, char** argv)
453+{
454+ int ret;
455+ int wait = 0;
456+ int opt;
457+ startup_info_t info;
458+
459+ struct rt_task rt;
460+ FILE *file;
461+
462+ while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
463+ switch (opt) {
464+ case 'w':
465+ wait = 1;
466+ break;
467+ case ':':
468+ usage("Argument missing.");
469+ break;
470+ case '?':
471+ default:
472+ usage("Bad argument.");
473+ break;
474+ }
475+ }
476+
477+ signal(SIGUSR1, SIG_IGN);
478+
479+ if (argc - optind < 2)
480+ usage("Arguments missing.");
481+
482+ if ((file = fopen(argv[optind + 0], "r")) == NULL) {
483+ fprintf(stderr, "Cannot open %s\n", argv[1]);
484+ return -1;
485+ }
486+
487+ memset(&rt, 0, sizeof(struct rt_task));
488+
489+ if (parse_edfwm_ts_file(file, &rt) < 0)
490+ bail_out("Could not parse file\n");
491+
492+ if (sporadic_task_ns_semi(&rt) < 0)
493+ bail_out("could not setup rt task params");
494+
495+ fclose(file);
496+
497+ info.exec_path = argv[optind + 1];
498+ info.argv = argv + optind + 1;
499+ info.wait = wait;
500+
501+ ret = create_rt_task_semi(launch, &info, &rt);
502+
503+ if (ret < 0)
504+ bail_out("could not create rt child process");
505+
506+ return 0;
507+}
508+
509diff --git a/bin/rt_launch_npsf.c b/bin/rt_launch_npsf.c
510new file mode 100644
511index 0000000..97ad361
512--- /dev/null
513+++ b/bin/rt_launch_npsf.c
514@@ -0,0 +1,110 @@
515+#include <stdio.h>
516+#include <stdlib.h>
517+#include <string.h>
518+#include <unistd.h>
519+#include <limits.h>
520+#include <signal.h>
521+
522+#include "litmus.h"
523+#include "common.h"
524+
525+typedef struct {
526+ int wait;
527+ char * exec_path;
528+ char ** argv;
529+} startup_info_t;
530+
531+
532+int launch(void *task_info_p) {
533+ startup_info_t *info = (startup_info_t*) task_info_p;
534+ int ret;
535+ if (info->wait) {
536+ ret = wait_for_ts_release();
537+ if (ret != 0)
538+ perror("wait_for_ts_release()");
539+ }
540+ ret = execvp(info->exec_path, info->argv);
541+ perror("execv failed");
542+ return ret;
543+}
544+
545+void usage(char *error) {
546+ fprintf(stderr, "%s\nUsage: rt_launch [-w][-v] wcet period cpu npsf-id program [arg1 arg2 ...]\n"
547+ "\t-w\tSynchronous release\n"
548+ "\t-v\tVerbose\n"
549+ "\twcet, period in ms\n"
550+ "\tprogram to be launched\n",
551+ error);
552+ exit(1);
553+}
554+
555+
556+#define OPTSTR "vw"
557+
558+int main(int argc, char** argv)
559+{
560+ int ret;
561+ lt_t wcet;
562+ lt_t period;
563+ int migrate = 0;
564+ int cpu = 0;
565+ int npsf_id;
566+ int opt;
567+ int verbose = 0;
568+ int wait = 0;
569+ startup_info_t info;
570+
571+ while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
572+ switch (opt) {
573+ case 'w':
574+ wait = 1;
575+ break;
576+ case 'v':
577+ verbose = 1;
578+ break;
579+ case ':':
580+ usage("Argument missing.");
581+ break;
582+ case '?':
583+ default:
584+ usage("Bad argument.");
585+ break;
586+ }
587+ }
588+
589+ signal(SIGUSR1, SIG_IGN);
590+
591+ if (argc - optind < 5)
592+ usage("Arguments missing.");
593+ wcet = ms2lt(atoi(argv[optind + 0]));
594+ period = ms2lt(atoi(argv[optind + 1]));
595+ cpu = atoi(argv[optind + 2]);
596+ migrate = 1;
597+ npsf_id = atoi(argv[optind + 3]);
598+ if (wcet <= 0)
599+ usage("The worst-case execution time must be a "
600+ "positive number.");
601+ if (period <= 0)
602+ usage("The period must be a positive number.");
603+ if (wcet > period) {
604+ usage("The worst-case execution time must not "
605+ "exceed the period.");
606+ }
607+ info.exec_path = argv[optind + 4];
608+ info.argv = argv + optind + 4;
609+ info.wait = wait;
610+ if (migrate) {
611+ ret = be_migrate_to(cpu);
612+ if (ret < 0)
613+ bail_out("could not migrate to target partition");
614+ }
615+ ret = __create_rt_task_npsf(launch, &info, cpu, wcet, period, npsf_id, RT_CLASS_HARD);
616+
617+
618+ if (ret < 0)
619+ bail_out("could not create rt child process");
620+ else if (verbose)
621+ printf("%d\n", ret);
622+
623+ return 0;
624+}
625diff --git a/bin/rtspin_edffm.c b/bin/rtspin_edffm.c
626new file mode 100644
627index 0000000..5db79b8
628--- /dev/null
629+++ b/bin/rtspin_edffm.c
630@@ -0,0 +1,263 @@
631+#include <sys/time.h>
632+
633+#include <stdio.h>
634+#include <stdlib.h>
635+#include <unistd.h>
636+#include <time.h>
637+
638+
639+#include "litmus.h"
640+#include "common.h"
641+
642+
643+static double cputime()
644+{
645+ struct timespec ts;
646+ int err;
647+ err = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
648+ if (err != 0)
649+ perror("clock_gettime");
650+ return (ts.tv_sec + 1E-9 * ts.tv_nsec);
651+}
652+
653+static double wctime()
654+{
655+ struct timeval tv;
656+ gettimeofday(&tv, NULL);
657+ return (tv.tv_sec + 1E-6 * tv.tv_usec);
658+}
659+
660+void usage(char *error) {
661+ fprintf(stderr, "Error: %s\n", error);
662+ fprintf(stderr,
663+ "Usage: rt_spin [-w] [-p PARTITION] [-c CLASS] WCET PERIOD DURATION fracnum1 fracden1 cpu1 fracnum2 fracden2 cpu2\n"
664+ " rt_spin -l\n");
665+ exit(1);
666+}
667+
668+#define NUMS 4096
669+static int num[NUMS];
670+static double loop_length = 1.0;
671+static char* progname;
672+
673+static int loop_once(void)
674+{
675+ int i, j = 0;
676+ for (i = 0; i < NUMS; i++)
677+ j += num[i]++;
678+ return j;
679+}
680+
681+static int loop_for(double exec_time)
682+{
683+ double t = 0;
684+ int tmp = 0;
685+/* while (t + loop_length < exec_time) {
686+ tmp += loop_once();
687+ t += loop_length;
688+ }
689+*/
690+ double start = cputime();
691+ double now = cputime();
692+ while (now + loop_length < start + exec_time) {
693+ tmp += loop_once();
694+ t += loop_length;
695+ now = cputime();
696+ }
697+
698+ return tmp;
699+}
700+
701+static void fine_tune(double interval)
702+{
703+ double start, end, delta;
704+
705+ start = wctime();
706+ loop_for(interval);
707+ end = wctime();
708+ delta = (end - start - interval) / interval;
709+ if (delta != 0)
710+ loop_length = loop_length / (1 - delta);
711+}
712+
713+static void configure_loop(void)
714+{
715+ double start;
716+
717+ /* prime cache */
718+ loop_once();
719+ loop_once();
720+ loop_once();
721+
722+ /* measure */
723+ start = wctime();
724+ loop_once(); /* hope we didn't get preempted */
725+ loop_length = wctime();
726+ loop_length -= start;
727+
728+ /* fine tune */
729+ fine_tune(0.1);
730+ fine_tune(0.1);
731+ fine_tune(0.1);
732+}
733+
734+static void show_loop_length(void)
735+{
736+ printf("%s/%d: loop_length=%f (%ldus)\n",
737+ progname, getpid(), loop_length,
738+ (long) (loop_length * 1000000));
739+}
740+
741+static void debug_delay_loop(void)
742+{
743+ double start, end, delay;
744+ show_loop_length();
745+ while (1) {
746+ for (delay = 0.5; delay > 0.01; delay -= 0.01) {
747+ start = wctime();
748+ loop_for(delay);
749+ end = wctime();
750+ printf("%6.4fs: looped for %10.8fs, delta=%11.8fs, error=%7.4f%%\n",
751+ delay,
752+ end - start,
753+ end - start - delay,
754+ 100 * (end - start - delay) / delay);
755+ }
756+ }
757+}
758+
759+static int job(double exec_time)
760+{
761+ loop_for(exec_time);
762+ sleep_next_period();
763+ return 0;
764+}
765+
766+#define OPTSTR "p:c:wld:v"
767+
768+int main(int argc, char** argv)
769+{
770+ int ret;
771+ lt_t wcet;
772+ lt_t period;
773+ /* [num,den] */
774+ lt_t frac1[2], frac2[2];
775+ int cpu1, cpu2;
776+ double wcet_ms, period_ms;
777+ int migrate = 0;
778+ int cpu = 0;
779+ int opt;
780+ int wait = 0;
781+ int test_loop = 0;
782+ int skip_config = 0;
783+ int verbose = 0;
784+ double duration, start;
785+ task_class_t class = RT_CLASS_HARD;
786+
787+ progname = argv[0];
788+
789+ while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
790+ switch (opt) {
791+ case 'w':
792+ wait = 1;
793+ break;
794+ case 'p':
795+ cpu = atoi(optarg);
796+ migrate = 1;
797+ break;
798+ case 'c':
799+ class = str2class(optarg);
800+ if (class == -1)
801+ usage("Unknown task class.");
802+ break;
803+ case 'l':
804+ test_loop = 1;
805+ break;
806+ case 'd':
807+ /* manually configure delay per loop iteration
808+ * unit: microseconds */
809+ loop_length = atof(optarg) / 1000000;
810+ skip_config = 1;
811+ break;
812+ case 'v':
813+ verbose = 1;
814+ break;
815+ case ':':
816+ usage("Argument missing.");
817+ break;
818+ case '?':
819+ default:
820+ usage("Bad argument.");
821+ break;
822+ }
823+ }
824+
825+
826+ if (!skip_config)
827+ configure_loop();
828+
829+ if (test_loop) {
830+ debug_delay_loop();
831+ return 0;
832+ }
833+
834+ if (argc - optind < 9)
835+ usage("Arguments missing.");
836+ wcet_ms = atof(argv[optind + 0]);
837+ period_ms = atof(argv[optind + 1]);
838+ duration = atof(argv[optind + 2]);
839+ /* frac num, den = 0 means fixed task */
840+ frac1[0] = atoi(argv[optind + 3]);
841+ frac1[1] = atoi(argv[optind + 4]);
842+ cpu1 = atoi(argv[optind + 5]);
843+ frac2[0] = atoi(argv[optind + 6]);
844+ frac2[1] = atoi(argv[optind + 7]);
845+ cpu2 = atoi(argv[optind + 8]);
846+ wcet = wcet_ms * __NS_PER_MS;
847+ period = period_ms * __NS_PER_MS;
848+ if (wcet <= 0)
849+ usage("The worst-case execution time must be a "
850+ "positive number.");
851+ if (period <= 0)
852+ usage("The period must be a positive number.");
853+ if (wcet > period) {
854+ usage("The worst-case execution time must not "
855+ "exceed the period.");
856+ }
857+
858+ if (migrate) {
859+ ret = be_migrate_to(cpu);
860+ if (ret < 0)
861+ bail_out("could not migrate to target partition");
862+ }
863+
864+ ret = sporadic_task_ns_edffm(wcet, period, 0, cpu,
865+ frac1, frac2, cpu1, cpu2,
866+ class, NO_ENFORCEMENT, migrate);
867+
868+ if (ret < 0)
869+ bail_out("could not setup rt task params");
870+
871+ if (verbose)
872+ show_loop_length();
873+
874+ init_litmus();
875+
876+ ret = task_mode(LITMUS_RT_TASK);
877+ if (ret != 0)
878+ bail_out("could not become RT task");
879+
880+ if (wait) {
881+ ret = wait_for_ts_release();
882+ if (ret != 0)
883+ bail_out("wait_for_ts_release()");
884+ }
885+
886+ start = wctime();
887+
888+ while (start + duration > wctime()) {
889+ job(wcet_ms * 0.0009); /* 90% wcet, in seconds */
890+ }
891+
892+ return 0;
893+}
894diff --git a/bin/rtspin_edfwm.c b/bin/rtspin_edfwm.c
895new file mode 100644
896index 0000000..21a5f3b
897--- /dev/null
898+++ b/bin/rtspin_edfwm.c
899@@ -0,0 +1,233 @@
900+#include <sys/time.h>
901+
902+#include <stdio.h>
903+#include <stdlib.h>
904+#include <unistd.h>
905+#include <time.h>
906+#include <string.h>
907+
908+#include "litmus.h"
909+#include "common.h"
910+
911+
912+static double cputime()
913+{
914+ struct timespec ts;
915+ int err;
916+ err = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
917+ if (err != 0)
918+ perror("clock_gettime");
919+ return (ts.tv_sec + 1E-9 * ts.tv_nsec);
920+}
921+
922+static double wctime()
923+{
924+ struct timeval tv;
925+ gettimeofday(&tv, NULL);
926+ return (tv.tv_sec + 1E-6 * tv.tv_usec);
927+}
928+
929+void usage(char *error) {
930+ fprintf(stderr, "Error: %s\n", error);
931+ fprintf(stderr,
932+ "Usage: rt_spin [-w] task_parameters_file duration\n"
933+ " rt_spin -l\n");
934+ exit(1);
935+}
936+
937+#define NUMS 4096
938+static int num[NUMS];
939+static double loop_length = 1.0;
940+static char* progname;
941+
942+static int loop_once(void)
943+{
944+ int i, j = 0;
945+ for (i = 0; i < NUMS; i++)
946+ j += num[i]++;
947+ return j;
948+}
949+
950+static int loop_for(double exec_time)
951+{
952+ double t = 0;
953+ int tmp = 0;
954+/* while (t + loop_length < exec_time) {
955+ tmp += loop_once();
956+ t += loop_length;
957+ }
958+*/
959+ double start = cputime();
960+ double now = cputime();
961+ while (now + loop_length < start + exec_time) {
962+ tmp += loop_once();
963+ t += loop_length;
964+ now = cputime();
965+ }
966+
967+ return tmp;
968+}
969+
970+static void fine_tune(double interval)
971+{
972+ double start, end, delta;
973+
974+ start = wctime();
975+ loop_for(interval);
976+ end = wctime();
977+ delta = (end - start - interval) / interval;
978+ if (delta != 0)
979+ loop_length = loop_length / (1 - delta);
980+}
981+
982+static void configure_loop(void)
983+{
984+ double start;
985+
986+ /* prime cache */
987+ loop_once();
988+ loop_once();
989+ loop_once();
990+
991+ /* measure */
992+ start = wctime();
993+ loop_once(); /* hope we didn't get preempted */
994+ loop_length = wctime();
995+ loop_length -= start;
996+
997+ /* fine tune */
998+ fine_tune(0.1);
999+ fine_tune(0.1);
1000+ fine_tune(0.1);
1001+}
1002+
1003+static void show_loop_length(void)
1004+{
1005+ printf("%s/%d: loop_length=%f (%ldus)\n",
1006+ progname, getpid(), loop_length,
1007+ (long) (loop_length * 1000000));
1008+}
1009+
1010+static void debug_delay_loop(void)
1011+{
1012+ double start, end, delay;
1013+ show_loop_length();
1014+ while (1) {
1015+ for (delay = 0.5; delay > 0.01; delay -= 0.01) {
1016+ start = wctime();
1017+ loop_for(delay);
1018+ end = wctime();
1019+ printf("%6.4fs: looped for %10.8fs, delta=%11.8fs, error=%7.4f%%\n",
1020+ delay,
1021+ end - start,
1022+ end - start - delay,
1023+ 100 * (end - start - delay) / delay);
1024+ }
1025+ }
1026+}
1027+
1028+static int job(double exec_time)
1029+{
1030+ loop_for(exec_time);
1031+ sleep_next_period();
1032+ return 0;
1033+}
1034+
1035+#define OPTSTR "wld:v"
1036+
1037+int main(int argc, char** argv)
1038+{
1039+ int ret;
1040+
1041+ int opt;
1042+ int wait = 0;
1043+ int test_loop = 0;
1044+ int skip_config = 0;
1045+ int verbose = 0;
1046+ double wcet_ms;
1047+ double duration, start;
1048+
1049+ struct rt_task rt;
1050+ FILE *file;
1051+
1052+ progname = argv[0];
1053+
1054+ while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
1055+ switch (opt) {
1056+ case 'w':
1057+ wait = 1;
1058+ break;
1059+ case 'l':
1060+ test_loop = 1;
1061+ break;
1062+ case 'd':
1063+ /* manually configure delay per loop iteration
1064+ * unit: microseconds */
1065+ loop_length = atof(optarg) / 1000000;
1066+ skip_config = 1;
1067+ break;
1068+ case 'v':
1069+ verbose = 1;
1070+ break;
1071+ case ':':
1072+ usage("Argument missing.");
1073+ break;
1074+ case '?':
1075+ default:
1076+ usage("Bad argument.");
1077+ break;
1078+ }
1079+ }
1080+
1081+
1082+ if (!skip_config)
1083+ configure_loop();
1084+
1085+ if (test_loop) {
1086+ debug_delay_loop();
1087+ return 0;
1088+ }
1089+
1090+ if (argc - optind < 2)
1091+ usage("Arguments missing.");
1092+
1093+ if ((file = fopen(argv[optind + 0], "r")) == NULL) {
1094+ fprintf(stderr, "Cannot open %s\n", argv[1]);
1095+ return -1;
1096+ }
1097+ duration = atof(argv[optind + 1]);
1098+
1099+ memset(&rt, 0, sizeof(struct rt_task));
1100+
1101+ if (parse_edfwm_ts_file(file, &rt) < 0)
1102+ bail_out("Could not parse file\n");
1103+
1104+ if (sporadic_task_ns_semi(&rt) < 0)
1105+ bail_out("could not setup rt task params");
1106+
1107+ fclose(file);
1108+
1109+ if (verbose)
1110+ show_loop_length();
1111+
1112+ init_litmus();
1113+
1114+ ret = task_mode(LITMUS_RT_TASK);
1115+ if (ret != 0)
1116+ bail_out("could not become RT task");
1117+
1118+ if (wait) {
1119+ ret = wait_for_ts_release();
1120+ if (ret != 0)
1121+ bail_out("wait_for_ts_release()");
1122+ }
1123+
1124+ wcet_ms = ((double) rt.exec_cost ) / __NS_PER_MS;
1125+ start = wctime();
1126+
1127+ while (start + duration > wctime()) {
1128+ job(wcet_ms * 0.0009); /* 90% wcet, in seconds */
1129+ }
1130+
1131+ return 0;
1132+}
1133diff --git a/bin/rtspin_npsf.c b/bin/rtspin_npsf.c
1134new file mode 100644
1135index 0000000..d5dff3d
1136--- /dev/null
1137+++ b/bin/rtspin_npsf.c
1138@@ -0,0 +1,252 @@
1139+#include <sys/time.h>
1140+
1141+#include <stdio.h>
1142+#include <stdlib.h>
1143+#include <unistd.h>
1144+#include <time.h>
1145+
1146+
1147+#include "litmus.h"
1148+#include "common.h"
1149+
1150+
1151+static double cputime()
1152+{
1153+ struct timespec ts;
1154+ int err;
1155+ err = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
1156+ if (err != 0)
1157+ perror("clock_gettime");
1158+ return (ts.tv_sec + 1E-9 * ts.tv_nsec);
1159+}
1160+
1161+static double wctime()
1162+{
1163+ struct timeval tv;
1164+ gettimeofday(&tv, NULL);
1165+ return (tv.tv_sec + 1E-6 * tv.tv_usec);
1166+}
1167+
1168+void usage(char *error) {
1169+ fprintf(stderr, "Error: %s\n", error);
1170+ fprintf(stderr,
1171+ "Usage: rt_spin [-w] WCET PERIOD DURATION CPU NPSF-ID\n"
1172+ " rt_spin -l\n");
1173+ exit(1);
1174+}
1175+
1176+#define NUMS 4096
1177+static int num[NUMS];
1178+static double loop_length = 1.0;
1179+static char* progname;
1180+
1181+static int loop_once(void)
1182+{
1183+ int i, j = 0;
1184+ for (i = 0; i < NUMS; i++)
1185+ j += num[i]++;
1186+ return j;
1187+}
1188+
1189+static int loop_for(double exec_time)
1190+{
1191+ double t = 0;
1192+ int tmp = 0;
1193+/* while (t + loop_length < exec_time) {
1194+ tmp += loop_once();
1195+ t += loop_length;
1196+ }
1197+*/
1198+ double start = cputime();
1199+ double now = cputime();
1200+ while (now + loop_length < start + exec_time) {
1201+ tmp += loop_once();
1202+ t += loop_length;
1203+ now = cputime();
1204+ }
1205+
1206+ return tmp;
1207+}
1208+
1209+static void fine_tune(double interval)
1210+{
1211+ double start, end, delta;
1212+
1213+ start = wctime();
1214+ loop_for(interval);
1215+ end = wctime();
1216+ delta = (end - start - interval) / interval;
1217+ if (delta != 0)
1218+ loop_length = loop_length / (1 - delta);
1219+}
1220+
1221+static void configure_loop(void)
1222+{
1223+ double start;
1224+
1225+ /* prime cache */
1226+ loop_once();
1227+ loop_once();
1228+ loop_once();
1229+
1230+ /* measure */
1231+ start = wctime();
1232+ loop_once(); /* hope we didn't get preempted */
1233+ loop_length = wctime();
1234+ loop_length -= start;
1235+
1236+ /* fine tune */
1237+ fine_tune(0.1);
1238+ fine_tune(0.1);
1239+ fine_tune(0.1);
1240+}
1241+
1242+static void show_loop_length(void)
1243+{
1244+ printf("%s/%d: loop_length=%f (%ldus)\n",
1245+ progname, getpid(), loop_length,
1246+ (long) (loop_length * 1000000));
1247+}
1248+
1249+static void debug_delay_loop(void)
1250+{
1251+ double start, end, delay;
1252+ show_loop_length();
1253+ while (1) {
1254+ for (delay = 0.5; delay > 0.01; delay -= 0.01) {
1255+ start = wctime();
1256+ loop_for(delay);
1257+ end = wctime();
1258+ printf("%6.4fs: looped for %10.8fs, delta=%11.8fs, error=%7.4f%%\n",
1259+ delay,
1260+ end - start,
1261+ end - start - delay,
1262+ 100 * (end - start - delay) / delay);
1263+ }
1264+ }
1265+}
1266+
1267+static int job(double exec_time)
1268+{
1269+ loop_for(exec_time);
1270+ sleep_next_period();
1271+ return 0;
1272+}
1273+
1274+#define OPTSTR "c:wld:v"
1275+
1276+int main(int argc, char** argv)
1277+{
1278+ int ret;
1279+ lt_t wcet;
1280+ lt_t period;
1281+ double wcet_ms, period_ms;
1282+ int migrate = 0;
1283+ int cpu = 0;
1284+ int npsf_id = 0;
1285+ int opt;
1286+ int wait = 0;
1287+ int test_loop = 0;
1288+ int skip_config = 0;
1289+ int verbose = 0;
1290+ double duration, start;
1291+ task_class_t class = RT_CLASS_HARD;
1292+
1293+ progname = argv[0];
1294+
1295+ while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
1296+ switch (opt) {
1297+ case 'w':
1298+ wait = 1;
1299+ break;
1300+ case 'c':
1301+ class = str2class(optarg);
1302+ if (class == -1)
1303+ usage("Unknown task class.");
1304+ break;
1305+ case 'l':
1306+ test_loop = 1;
1307+ break;
1308+ case 'd':
1309+ /* manually configure delay per loop iteration
1310+ * unit: microseconds */
1311+ loop_length = atof(optarg) / 1000000;
1312+ skip_config = 1;
1313+ break;
1314+ case 'v':
1315+ verbose = 1;
1316+ break;
1317+ case ':':
1318+ usage("Argument missing.");
1319+ break;
1320+ case '?':
1321+ default:
1322+ usage("Bad argument.");
1323+ break;
1324+ }
1325+ }
1326+
1327+
1328+ if (!skip_config)
1329+ configure_loop();
1330+
1331+ if (test_loop) {
1332+ debug_delay_loop();
1333+ return 0;
1334+ }
1335+
1336+ if (argc - optind < 5)
1337+ usage("Arguments missing.");
1338+ wcet_ms = atof(argv[optind + 0]);
1339+ period_ms = atof(argv[optind + 1]);
1340+ duration = atof(argv[optind + 2]);
1341+ cpu = atoi(argv[optind + 3]);
1342+ migrate = 1;
1343+ npsf_id = atoi(argv[optind + 4]);
1344+ wcet = wcet_ms * __NS_PER_MS;
1345+ period = period_ms * __NS_PER_MS;
1346+ if (wcet <= 0)
1347+ usage("The worst-case execution time must be a "
1348+ "positive number.");
1349+ if (period <= 0)
1350+ usage("The period must be a positive number.");
1351+ if (wcet > period) {
1352+ usage("The worst-case execution time must not "
1353+ "exceed the period.");
1354+ }
1355+
1356+ if (migrate) {
1357+ ret = be_migrate_to(cpu);
1358+ if (ret < 0)
1359+ bail_out("could not migrate to target partition");
1360+ }
1361+
1362+ ret = sporadic_task_ns_npsf(wcet, period, 0, cpu, class, npsf_id,
1363+ NO_ENFORCEMENT, migrate);
1364+
1365+ if (ret < 0)
1366+ bail_out("could not setup rt task params");
1367+
1368+ if (verbose)
1369+ show_loop_length();
1370+
1371+ init_litmus();
1372+
1373+ ret = task_mode(LITMUS_RT_TASK);
1374+ if (ret != 0)
1375+ bail_out("could not become RT task");
1376+
1377+ if (wait) {
1378+ ret = wait_for_ts_release();
1379+ if (ret != 0)
1380+ bail_out("wait_for_ts_release()");
1381+ }
1382+
1383+ start = wctime();
1384+
1385+ while (start + duration > wctime()) {
1386+ job(wcet_ms * 0.0009); /* 90% wcet, in seconds */
1387+ }
1388+
1389+ return 0;
1390+}
1391diff --git a/include/common.h b/include/common.h
1392index d1234ba..dbcfd34 100644
1393--- a/include/common.h
1394+++ b/include/common.h
1395@@ -1,7 +1,11 @@
1396 #ifndef COMMON_H
1397 #define COMMON_H
1398
1399+#include "litmus.h"
1400
1401 void bail_out(const char* msg);
1402
1403+/* EDF-WM helper functions to parse task parameters from file */
1404+int parse_edfwm_ts_file(FILE *ts, struct rt_task *rt);
1405+
1406 #endif
1407diff --git a/include/litmus.h b/include/litmus.h
1408index b798c92..9179ca6 100644
1409--- a/include/litmus.h
1410+++ b/include/litmus.h
1411@@ -9,6 +9,7 @@ extern "C" {
1412 * This is required for the rt_param
1413 * and control_page structures.
1414 */
1415+#include <linux/threads.h>
1416 #include <litmus/rt_param.h>
1417
1418 #include <sys/types.h>
1419@@ -40,6 +41,19 @@ int sporadic_task_ns(
1420 int cpu, task_class_t cls,
1421 budget_policy_t budget_policy, int set_cpu_set);
1422
1423+int sporadic_task_ns_edffm(lt_t e, lt_t p, lt_t phase, int cpu,
1424+ lt_t *frac1, lt_t *frac2, int cpu1, int cpu2,
1425+ task_class_t cls, budget_policy_t budget_policy,
1426+ int set_cpu_set);
1427+
1428+int sporadic_task_ns_npsf(
1429+ lt_t e, lt_t p, lt_t phase,
1430+ int cpu, task_class_t cls, int npsf_id,
1431+ budget_policy_t budget_policy, int set_cpu_set);
1432+
1433+/* times are in ns, specific helper for semi-partitioned algos */
1434+int sporadic_task_ns_semi(struct rt_task *rt);
1435+
1436 /* budget enforcement off by default in these macros */
1437 #define sporadic_global(e, p) \
1438 sporadic_task(e, p, 0, 0, RT_CLASS_SOFT, NO_ENFORCEMENT, 0)
1439@@ -85,6 +99,17 @@ typedef int (*rt_fn_t)(void*);
1440 int create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet, int period);
1441 int __create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet,
1442 int period, task_class_t cls);
1443+int __create_rt_task_edffm(rt_fn_t rt_prog, void *arg, int cpu, int wcet,
1444+ int period, lt_t *frac1, lt_t *frac2,
1445+ int cpu1, int cpu2, task_class_t class);
1446+int __create_rt_task_npsf(rt_fn_t rt_prog, void *arg, int cpu, int wcet,
1447+ int period, int npsf_id, task_class_t class);
1448+
1449+/* wrapper to mask __launch_rt_task() for semi-partitioned algorithms
1450+ * (it can be extended to cover all algorithms that directly submit
1451+ * an rt_task structure instead of a set of values).
1452+ */
1453+int create_rt_task_semi(rt_fn_t rt_prog, void *arg, struct rt_task *params);
1454
1455 /* per-task modes */
1456 enum rt_task_mode_t {
1457@@ -134,6 +159,21 @@ int null_call(cycles_t *timestamp);
1458 */
1459 struct control_page* get_ctrl_page(void);
1460
1461+/* NPS-F syscall to add a notional processor (a server) to a cpu.
1462+ * A notional processor may span across multiple cpu.
1463+ *
1464+ * @npsf_id: a "unique" identifier for the notional processor.
1465+ * @budgets: array of (cpu, budget-ns) that describes this np.
1466+ * on possibly more than one cpu.
1467+ * @last: marks the end of servers initialization and trigger
1468+ * the switching of servers in the plugin.
1469+ * Should be set to 1 only once at the end of the sequence
1470+ * of add_server() calls
1471+ *
1472+ * Currently implemented on x86_64 only.
1473+ */
1474+int add_server(int *npsf_id, struct npsf_budgets *budgets, int last);
1475+
1476 #ifdef __cplusplus
1477 }
1478 #endif
1479diff --git a/src/litmus.c b/src/litmus.c
1480index d3cc6bb..c0dae95 100644
1481--- a/src/litmus.c
1482+++ b/src/litmus.c
1483@@ -29,8 +29,6 @@ task_class_t str2class(const char* str)
1484 return -1;
1485 }
1486
1487-#define NS_PER_MS 1000000
1488-
1489 /* only for best-effort execution: migrate to target_cpu */
1490 int be_migrate_to(int target_cpu)
1491 {
1492@@ -45,7 +43,7 @@ int sporadic_task(lt_t e, lt_t p, lt_t phase,
1493 int cpu, task_class_t cls,
1494 budget_policy_t budget_policy, int set_cpu_set)
1495 {
1496- return sporadic_task_ns(e * NS_PER_MS, p * NS_PER_MS, phase * NS_PER_MS,
1497+ return sporadic_task_ns(e * __NS_PER_MS, p * __NS_PER_MS, phase * __NS_PER_MS,
1498 cpu, cls, budget_policy, set_cpu_set);
1499 }
1500
1501@@ -75,6 +73,72 @@ int sporadic_task_ns(lt_t e, lt_t p, lt_t phase,
1502 return set_rt_task_param(gettid(), &param);
1503 }
1504
1505+int sporadic_task_ns_edffm(lt_t e, lt_t p, lt_t phase, int cpu,
1506+ lt_t *frac1, lt_t *frac2, int cpu1, int cpu2,
1507+ task_class_t cls, budget_policy_t budget_policy,
1508+ int set_cpu_set)
1509+{
1510+ struct rt_task param;
1511+ struct edffm_params fm;
1512+ int ret;
1513+ param.exec_cost = e;
1514+ param.period = p;
1515+ param.cpu = cpu;
1516+ /* check on denominators */
1517+ if (frac1[1] != 0 && frac2[1] != 0) {
1518+ /* edf-fm migrat task */
1519+ fm.nr_cpus = 1;
1520+ fm.cpus[0] = cpu1;
1521+ fm.cpus[1] = cpu2;
1522+ fm.fraction[0][0] = frac1[0];
1523+ fm.fraction[1][0] = frac1[1];
1524+ fm.fraction[0][1] = frac2[0];
1525+ fm.fraction[1][1] = frac2[1];
1526+ }
1527+ param.semi_part.fm = fm;
1528+ param.cls = cls;
1529+ param.phase = phase;
1530+ param.budget_policy = budget_policy;
1531+
1532+ if (set_cpu_set) {
1533+ ret = be_migrate_to(cpu);
1534+ check("migrate to cpu");
1535+ }
1536+ return set_rt_task_param(gettid(), &param);
1537+}
1538+
1539+int sporadic_task_ns_npsf(lt_t e, lt_t p, lt_t phase,
1540+ int cpu, task_class_t cls, int npsf_id,
1541+ budget_policy_t budget_policy, int set_cpu_set)
1542+{
1543+ struct rt_task param;
1544+ int ret;
1545+ param.exec_cost = e;
1546+ param.period = p;
1547+ param.cpu = cpu;
1548+ param.cls = cls;
1549+ param.phase = phase;
1550+ param.budget_policy = budget_policy;
1551+ param.semi_part.npsf_id = (int) npsf_id;
1552+
1553+ if (set_cpu_set) {
1554+ ret = be_migrate_to(cpu);
1555+ check("migrate to cpu");
1556+ }
1557+ return set_rt_task_param(gettid(), &param);
1558+}
1559+
1560+/* Sporadic task helper function for Semi-Partitioned algorithms. */
1561+int sporadic_task_ns_semi(struct rt_task *param)
1562+{
1563+ int ret;
1564+
1565+ ret = be_migrate_to(param->cpu);
1566+ check("migrate to cpu");
1567+
1568+ return set_rt_task_param(gettid(), param);
1569+}
1570+
1571 int init_kernel_iface(void);
1572
1573 int init_litmus(void)
1574diff --git a/src/syscalls.c b/src/syscalls.c
1575index 77a6277..7ac488a 100644
1576--- a/src/syscalls.c
1577+++ b/src/syscalls.c
1578@@ -95,3 +95,8 @@ int null_call(cycles_t *timestamp)
1579 {
1580 return syscall(__NR_null_call, timestamp);
1581 }
1582+
1583+int add_server(int *npsf_id, struct npsf_budgets *budgets, int last)
1584+{
1585+ return syscall(__NR_add_server, npsf_id, budgets, last);
1586+}
1587diff --git a/src/task.c b/src/task.c
1588index 4d237bd..daf95ca 100644
1589--- a/src/task.c
1590+++ b/src/task.c
1591@@ -40,6 +40,54 @@ int __launch_rt_task(rt_fn_t rt_prog, void *rt_arg, rt_setup_fn_t setup,
1592 return rt_task;
1593 }
1594
1595+int __create_rt_task_edffm(rt_fn_t rt_prog, void *arg, int cpu, int wcet,
1596+ int period, lt_t *frac1, lt_t *frac2,
1597+ int cpu1, int cpu2, task_class_t class)
1598+{
1599+ struct rt_task params;
1600+ struct edffm_params fm;
1601+ params.cpu = cpu;
1602+ params.period = period;
1603+ params.exec_cost = wcet;
1604+ params.cls = class;
1605+ params.phase = 0;
1606+ /* enforce budget for tasks that might not use sleep_next_period() */
1607+ params.budget_policy = QUANTUM_ENFORCEMENT;
1608+
1609+ /* edf-fm check on denominators for migratory tasks */
1610+ if (frac1[1] != 0 && frac2[1] != 0) {
1611+ /* edf-fm migrat task */
1612+ fm.nr_cpus = 1;
1613+ fm.cpus[0] = cpu1;
1614+ fm.cpus[1] = cpu2;
1615+ fm.fraction[0][0] = frac1[0];
1616+ fm.fraction[1][0] = frac1[1];
1617+ fm.fraction[0][1] = frac2[0];
1618+ fm.fraction[1][1] = frac2[1];
1619+ }
1620+ params.semi_part.fm = fm;
1621+
1622+ return __launch_rt_task(rt_prog, arg,
1623+ (rt_setup_fn_t) set_rt_task_param, &params);
1624+}
1625+
1626+int __create_rt_task_npsf(rt_fn_t rt_prog, void *arg, int cpu, int wcet,
1627+ int period, int npsf_id, task_class_t class)
1628+{
1629+ struct rt_task params;
1630+ params.cpu = cpu;
1631+ params.period = period;
1632+ params.exec_cost = wcet;
1633+ params.cls = class;
1634+ params.phase = 0;
1635+ /* enforce budget for tasks that might not use sleep_next_period() */
1636+ params.budget_policy = QUANTUM_ENFORCEMENT;
1637+ params.semi_part.npsf_id = (int) npsf_id;
1638+
1639+ return __launch_rt_task(rt_prog, arg,
1640+ (rt_setup_fn_t) set_rt_task_param, &params);
1641+}
1642+
1643 int __create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet, int period,
1644 task_class_t class)
1645 {
1646@@ -60,6 +108,11 @@ int create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet, int period) {
1647 return __create_rt_task(rt_prog, arg, cpu, wcet, period, RT_CLASS_HARD);
1648 }
1649
1650+int create_rt_task_semi(rt_fn_t rt_prog, void *arg, struct rt_task *params)
1651+{
1652+ return __launch_rt_task(rt_prog, arg,
1653+ (rt_setup_fn_t) set_rt_task_param, params);
1654+}
1655
1656 #define SCHED_NORMAL 0
1657 #define SCHED_LITMUS 6
diff --git a/download/ECRTS11/litmus-rt-semi-part.patch b/download/ECRTS11/litmus-rt-semi-part.patch
new file mode 100644
index 0000000..67cb738
--- /dev/null
+++ b/download/ECRTS11/litmus-rt-semi-part.patch
@@ -0,0 +1,2809 @@
1diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
2index 5d20276..8672398 100644
3--- a/include/litmus/litmus.h
4+++ b/include/litmus/litmus.h
5@@ -88,7 +88,7 @@ inline static int budget_exhausted(struct task_struct* t)
6 inline static lt_t budget_remaining(struct task_struct* t)
7 {
8 if (!budget_exhausted(t))
9- return get_exec_time(t) - get_exec_cost(t);
10+ return get_exec_cost(t) - get_exec_time(t);
11 else
12 /* avoid overflow */
13 return 0;
14diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
15index a7a183f..6f43dea 100644
16--- a/include/litmus/rt_param.h
17+++ b/include/litmus/rt_param.h
18@@ -1,3 +1,5 @@
19+#include <linux/threads.h>
20+
21 /*
22 * Definition of the scheduler plugin interface.
23 *
24@@ -33,6 +35,72 @@ typedef enum {
25 PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */
26 } budget_policy_t;
27
28+
29+/* The parameters for EDF-Fm scheduling algorithm.
30+ * Each task may be fixed or migratory. Migratory tasks may
31+ * migrate on 2 (contiguous) CPU only. NR_CPUS_EDF_FM = 2.
32+ */
33+#define NR_CPUS_EDF_FM 2
34+
35+struct edffm_params {
36+ /* EDF-fm where can a migratory task execute? */
37+ unsigned int cpus[NR_CPUS_EDF_FM];
38+ /* how many cpus are used by this task?
39+ * fixed = 0, migratory = (NR_CPUS_EDF_FM - 1)
40+ * Efficient way to allow writing cpus[nr_cpus].
41+ */
42+ unsigned int nr_cpus;
43+ /* Fraction of this task exec_cost that each CPU should handle.
44+ * We keep the fraction divided in num/denom : a matrix of
45+ * (NR_CPUS_EDF_FM rows) x (2 columns).
46+ * The first column is the numerator of the fraction.
47+ * The second column is the denominator.
48+ * In EDF-fm this is a 2*2 matrix
49+ */
50+ lt_t fraction[2][NR_CPUS_EDF_FM];
51+};
52+
53+/* Parameters for NPS-F semi-partitioned scheduling algorithm.
54+ * Each (cpu, budget) entry defines the share ('budget' in ns, a % of
55+ * the slot_length) of the notional processor on the CPU 'cpu'.
56+ * This structure is used by the library - syscall interface in order
57+ * to go through the overhead of a syscall only once per server.
58+ */
59+struct npsf_budgets {
60+ int cpu;
61+ lt_t budget;
62+};
63+
64+/* The parameters for the EDF-WM semi-partitioned scheduler.
65+ * Each task may be split across multiple cpus. Each per-cpu allocation
66+ * is called a 'slice'.
67+ */
68+#define MAX_EDF_WM_SLICES 24
69+#define MIN_EDF_WM_SLICE_SIZE 50000 /* .05 millisecond = 50us */
70+
71+struct edf_wm_slice {
72+ /* on which CPU is this slice allocated */
73+ unsigned int cpu;
74+ /* relative deadline from job release (not from slice release!) */
75+ lt_t deadline;
76+ /* budget of this slice; must be precisely enforced */
77+ lt_t budget;
78+ /* offset of this slice relative to the job release */
79+ lt_t offset;
80+};
81+
82+/* If a job is not sliced across multiple CPUs, then
83+ * count is set to zero and none of the slices is used.
84+ * This implies that count == 1 is illegal.
85+ */
86+struct edf_wm_params {
87+ /* enumeration of all slices */
88+ struct edf_wm_slice slices[MAX_EDF_WM_SLICES];
89+
90+ /* how many slices are defined? */
91+ unsigned int count;
92+};
93+
94 struct rt_task {
95 lt_t exec_cost;
96 lt_t period;
97@@ -40,6 +108,22 @@ struct rt_task {
98 unsigned int cpu;
99 task_class_t cls;
100 budget_policy_t budget_policy; /* ignored by pfair */
101+
102+ /* parameters used by the semi-partitioned algorithms */
103+ union {
104+ /* EDF-Fm; defined in sched_edf_fm.c */
105+ struct edffm_params fm;
106+
107+ /* NPS-F; defined in sched_npsf.c
108+ * id for the server (notional processor) that holds
109+ * this task; the same npfs_id can be assigned to "the same"
110+ * server split on different cpus
111+ */
112+ int npsf_id;
113+
114+ /* EDF-WM; defined in sched_edf_wm.c */
115+ struct edf_wm_params wm;
116+ } semi_part;
117 };
118
119 /* The definition of the data that is shared between the kernel and real-time
120@@ -184,6 +268,27 @@ struct rt_param {
121
122 /* Pointer to the page shared between userspace and kernel. */
123 struct control_page * ctrl_page;
124+
125+ /* runtime info for the semi-part plugins */
126+ union {
127+ /* EDF-Fm runtime information
128+ * number of jobs handled by this cpu
129+ * (to determine next cpu for a migrating task)
130+ */
131+ unsigned int cpu_job_no[NR_CPUS_EDF_FM];
132+
133+ /* EDF-WM runtime information */
134+ struct {
135+ /* at which exec time did the current slice start? */
136+ lt_t exec_time;
137+ /* when did the job suspend? */
138+ lt_t suspend_time;
139+ /* cached job parameters */
140+ lt_t job_release, job_deadline;
141+ /* pointer to the current slice */
142+ struct edf_wm_slice* slice;
143+ } wm;
144+ } semi_part;
145 };
146
147 /* Possible RT flags */
148diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
149index 9c1c9f2..7ea9176 100644
150--- a/include/litmus/sched_plugin.h
151+++ b/include/litmus/sched_plugin.h
152@@ -6,6 +6,8 @@
153 #define _LINUX_SCHED_PLUGIN_H_
154
155 #include <linux/sched.h>
156+/* NSEC_PER... conversions */
157+#include <linux/time.h>
158
159 /* struct for semaphore with priority inheritance */
160 struct pi_semaphore {
161@@ -136,6 +138,9 @@ extern struct sched_plugin *litmus;
162 /* cluster size: cache_index = 2 L2, cache_index = 3 L3 */
163 extern int cluster_cache_index;
164
165+/* Slot length (ns) for NPS-F semi-part. algo */
166+extern lt_t npsf_slot_length;
167+
168 int register_sched_plugin(struct sched_plugin* plugin);
169 struct sched_plugin* find_sched_plugin(const char* name);
170 int print_sched_plugins(char* buf, int max);
171diff --git a/include/litmus/trace.h b/include/litmus/trace.h
172index b32c711..6afbf96 100644
173--- a/include/litmus/trace.h
174+++ b/include/litmus/trace.h
175@@ -78,6 +78,8 @@ feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu)
176 #define TS_TICK_START(t) TTIMESTAMP(110, t)
177 #define TS_TICK_END(t) TTIMESTAMP(111, t)
178
179+#define TS_PULL_TIMER_START TIMESTAMP(112)
180+#define TS_PULL_TIMER_END TIMESTAMP(113)
181
182 #define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */
183 #define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */
184diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
185index f0618e7..4e82c52 100644
186--- a/include/litmus/unistd_64.h
187+++ b/include/litmus/unistd_64.h
188@@ -33,5 +33,7 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
189 __SYSCALL(__NR_release_ts, sys_release_ts)
190 #define __NR_null_call __LSC(13)
191 __SYSCALL(__NR_null_call, sys_null_call)
192+#define __NR_add_server __LSC(14)
193+__SYSCALL(__NR_add_server, sys_add_server)
194
195-#define NR_litmus_syscalls 14
196+#define NR_litmus_syscalls 15
197diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
198index fdf9596..23d3712 100644
199--- a/kernel/hrtimer.c
200+++ b/kernel/hrtimer.c
201@@ -47,6 +47,7 @@
202 #include <linux/timer.h>
203
204 #include <litmus/litmus.h>
205+#include <litmus/trace.h>
206
207 #include <asm/uaccess.h>
208
209@@ -1063,6 +1064,7 @@ void hrtimer_pull(void)
210 struct hrtimer_start_on_info *info;
211 struct list_head *pos, *safe, list;
212
213+ TS_PULL_TIMER_START;
214 raw_spin_lock(&base->lock);
215 list_replace_init(&base->to_pull, &list);
216 raw_spin_unlock(&base->lock);
217@@ -1073,6 +1075,7 @@ void hrtimer_pull(void)
218 list_del(pos);
219 hrtimer_start(info->timer, info->time, info->mode);
220 }
221+ TS_PULL_TIMER_END;
222 }
223
224 /**
225diff --git a/litmus/Makefile b/litmus/Makefile
226index f301d28..5533a58 100644
227--- a/litmus/Makefile
228+++ b/litmus/Makefile
229@@ -14,7 +14,10 @@ obj-y = sched_plugin.o litmus.o \
230 bheap.o \
231 ctrldev.o \
232 sched_gsn_edf.o \
233- sched_psn_edf.o
234+ sched_psn_edf.o \
235+ sched_edf_wm.o \
236+ sched_npsf.o \
237+ sched_edf_fm.o
238
239 obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
240 obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
241diff --git a/litmus/litmus.c b/litmus/litmus.c
242index b04a42b..2f78022 100644
243--- a/litmus/litmus.c
244+++ b/litmus/litmus.c
245@@ -632,6 +632,55 @@ static int proc_write_cluster_size(struct file *file,
246 return len;
247 }
248
249+static int proc_read_npsf_slot_length(char *page, char **start,
250+ off_t off, int count,
251+ int *eof, void *data)
252+{
253+ return snprintf(page, PAGE_SIZE, "%d us\n",
254+ (int) (npsf_slot_length / NSEC_PER_USEC));
255+}
256+
257+extern void npsf_hrtimers_cleanup(void);
258+/* NPS-F slot length in us.
259+ *
260+ * Writing 0 as npsf_slot_length will trigger the removal of the
261+ * hrtimers for the domain_reschedule_tick() in the NPS-F plugin.
262+ */
263+static int proc_write_npsf_slot_length(struct file *file,
264+ const char *buffer,
265+ unsigned long count,
266+ void *data)
267+{
268+ int err, slot_length;
269+ char msg[64];
270+
271+ if (count > 63)
272+ return -EINVAL;
273+
274+ if (copy_from_user(msg, buffer, count))
275+ return -EFAULT;
276+
277+ /* terminate */
278+ msg[count] = '\0';
279+ /* chomp */
280+ if (count > 1 && msg[count - 1] == '\n')
281+ msg[count - 1] = '\0';
282+
283+ err = sscanf(msg, "%d", &slot_length);
284+
285+ if (err == 1) {
286+ if (!slot_length) {
287+ npsf_hrtimers_cleanup();
288+ /* reset to default */
289+ slot_length = 5000;
290+ }
291+ npsf_slot_length = (lt_t)((lt_t) slot_length * NSEC_PER_USEC);
292+ return count;
293+ }
294+
295+ return -EINVAL;
296+}
297+
298 #ifdef CONFIG_RELEASE_MASTER
299 static int proc_read_release_master(char *page, char **start,
300 off_t off, int count,
301@@ -691,7 +740,8 @@ static struct proc_dir_entry *litmus_dir = NULL,
302 #ifdef CONFIG_RELEASE_MASTER
303 *release_master_file = NULL,
304 #endif
305- *clus_cache_idx_file = NULL;
306+ *clus_cache_idx_file = NULL,
307+ *npsf_slot_length_file = NULL;
308
309 static int __init init_litmus_proc(void)
310 {
311@@ -733,6 +783,16 @@ static int __init init_litmus_proc(void)
312 clus_cache_idx_file->read_proc = proc_read_cluster_size;
313 clus_cache_idx_file->write_proc = proc_write_cluster_size;
314
315+ npsf_slot_length_file = create_proc_entry("npsf_slot_length",
316+ 0644, litmus_dir);
317+ if (!npsf_slot_length_file) {
318+ printk(KERN_ERR "Could not allocate npsf_slot_length "
319+ "procfs entry.\n");
320+ return -ENOMEM;
321+ }
322+ npsf_slot_length_file->read_proc = proc_read_npsf_slot_length;
323+ npsf_slot_length_file->write_proc = proc_write_npsf_slot_length;
324+
325 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
326 proc_read_stats, NULL);
327
328@@ -752,6 +812,8 @@ static void exit_litmus_proc(void)
329 remove_proc_entry("active_plugin", litmus_dir);
330 if (clus_cache_idx_file)
331 remove_proc_entry("cluster_cache", litmus_dir);
332+ if (npsf_slot_length_file)
333+ remove_proc_entry("npsf_slot_length", litmus_dir);
334 #ifdef CONFIG_RELEASE_MASTER
335 if (release_master_file)
336 remove_proc_entry("release_master", litmus_dir);
337diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c
338new file mode 100644
339index 0000000..b721072
340--- /dev/null
341+++ b/litmus/sched_edf_fm.c
342@@ -0,0 +1,565 @@
343+/*
344+ * litmus/sched_edf_fm.c
345+ *
346+ * Implementation of the EDF-fm scheduling algorithm.
347+ */
348+
349+#include <linux/percpu.h>
350+#include <linux/sched.h>
351+#include <linux/list.h>
352+#include <linux/spinlock.h>
353+
354+#include <linux/module.h>
355+
356+#include <litmus/litmus.h>
357+#include <litmus/jobs.h>
358+#include <litmus/sched_plugin.h>
359+#include <litmus/edf_common.h>
360+
361+typedef struct {
362+ rt_domain_t domain;
363+ int cpu;
364+ struct task_struct* scheduled; /* only RT tasks */
365+/* domain lock */
366+#define slock domain.ready_lock
367+} edffm_domain_t;
368+
369+DEFINE_PER_CPU(edffm_domain_t, edffm_domains);
370+
371+#define local_edffm (&__get_cpu_var(edffm_domains))
372+#define remote_edf(cpu) (&per_cpu(edffm_domains, cpu).domain)
373+#define remote_edffm(cpu) (&per_cpu(edffm_domains, cpu))
374+#define task_edf(task) remote_edf(get_partition(task))
375+#define task_edffm(task) remote_edffm(get_partition(task))
376+
377+#define edffm_params(t) (t->rt_param.task_params.semi_part.fm)
378+
379+/* Is the task a migratory task? */
380+#define is_migrat_task(task) (edffm_params(task).nr_cpus)
381+/* t is on the wrong CPU (it should be requeued properly) */
382+#define wrong_cpu(t) is_migrat_task((t)) && task_cpu((t)) != get_partition((t))
383+/* Get next CPU */
384+#define migrat_next_cpu(t) \
385+ ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
386+ edffm_params(t).cpus[1] : \
387+ edffm_params(t).cpus[0])
388+/* Get current cpu */
389+#define migrat_cur_cpu(t) \
390+ ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
391+ edffm_params(t).cpus[0] : \
392+ edffm_params(t).cpus[1])
393+/* Manipulate share for current cpu */
394+#define cur_cpu_fract_num(t) \
395+ ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
396+ edffm_params(t).fraction[0][0] : \
397+ edffm_params(t).fraction[0][1])
398+#define cur_cpu_fract_den(t) \
399+ ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
400+ edffm_params(t).fraction[1][0] : \
401+ edffm_params(t).fraction[1][1])
402+/* Get job number for current cpu */
403+#define cur_cpu_job_no(t) \
404+ ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
405+ tsk_rt(t)->semi_part.cpu_job_no[0] : \
406+ tsk_rt(t)->semi_part.cpu_job_no[1])
407+/* What is the current cpu position in the array? */
408+#define edffm_cpu_pos(cpu,t) \
409+ ((cpu == edffm_params(t).cpus[0]) ? \
410+ 0 : 1)
411+
412+/*
413+ * EDF-fm: migratory tasks have higher prio than fixed, EDF in both classes.
414+ * (Both first and second may be NULL).
415+ */
416+int edffm_higher_prio(struct task_struct* first, struct task_struct* second)
417+{
418+ if ((first && edffm_params(first).nr_cpus) ||
419+ (second && edffm_params(second).nr_cpus)) {
420+ if ((first && edffm_params(first).nr_cpus) &&
421+ (second && edffm_params(second).nr_cpus))
422+ /* both are migrating */
423+ return edf_higher_prio(first, second);
424+
425+ if (first && edffm_params(first).nr_cpus)
426+ /* first is migrating */
427+ return 1;
428+ else
429+ /* second is migrating */
430+ return 0;
431+ }
432+
433+ /* both are fixed or not real time */
434+ return edf_higher_prio(first, second);
435+}
436+
437+/* need_to_preempt - check whether the task t needs to be preempted
438+ * call only with irqs disabled and with ready_lock acquired
439+ */
440+int edffm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
441+{
442+ /* we need the read lock for edf_ready_queue */
443+ /* no need to preempt if there is nothing pending */
444+ if (!__jobs_pending(rt))
445+ return 0;
446+ /* we need to reschedule if t doesn't exist */
447+ if (!t)
448+ return 1;
449+
450+ /* make sure to get non-rt stuff out of the way */
451+ return !is_realtime(t) || edffm_higher_prio(__next_ready(rt), t);
452+}
453+
454+/* we assume the lock is being held */
455+static void preempt(edffm_domain_t *edffm)
456+{
457+ preempt_if_preemptable(edffm->scheduled, edffm->cpu);
458+}
459+
460+static void edffm_release_jobs(rt_domain_t* rt, struct bheap* tasks)
461+{
462+ unsigned long flags;
463+ edffm_domain_t *edffm = container_of(rt, edffm_domain_t, domain);
464+
465+ raw_spin_lock_irqsave(&edffm->slock, flags);
466+
467+ __merge_ready(rt, tasks);
468+
469+ if (edffm_preemption_needed(rt, edffm->scheduled))
470+ preempt(edffm);
471+
472+ raw_spin_unlock_irqrestore(&edffm->slock, flags);
473+}
474+
475+/* EDF-fm uses the "release_master" field to force the next release for
476+ * the task 'task' to happen on a remote CPU. The remote cpu for task is
477+ * previously set up during job_completion() taking into consideration
478+ * whether a task is a migratory task or not.
479+ */
480+static inline void
481+edffm_add_release_remote(struct task_struct *task)
482+{
483+ unsigned long flags;
484+ rt_domain_t *rt = task_edf(task);
485+
486+ raw_spin_lock_irqsave(&rt->tobe_lock, flags);
487+
488+ /* "modify" destination cpu */
489+ rt->release_master = get_partition(task);
490+
491+ TRACE_TASK(task, "Add remote release: smp_proc_id = %d, cpu = %d, remote = %d\n",
492+ smp_processor_id(), task_cpu(task), rt->release_master);
493+
494+ /* trigger future release */
495+ __add_release(rt, task);
496+
497+ /* reset proper release_master and unlock */
498+ rt->release_master = NO_CPU;
499+ raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
500+}
501+
502+/* perform double ready_queue locking in an orderwise fashion
503+ * this is called with: interrupt disabled and rq->lock held (from
504+ * schedule())
505+ */
506+static noinline void double_domain_lock(edffm_domain_t *dom1, edffm_domain_t *dom2)
507+{
508+ if (dom1 == dom2) {
509+ /* fake */
510+ raw_spin_lock(&dom1->slock);
511+ } else {
512+ if (dom1 < dom2) {
513+ raw_spin_lock(&dom1->slock);
514+ raw_spin_lock(&dom2->slock);
515+ TRACE("acquired %d and %d\n", dom1->cpu, dom2->cpu);
516+ } else {
517+ raw_spin_lock(&dom2->slock);
518+ raw_spin_lock(&dom1->slock);
519+ TRACE("acquired %d and %d\n", dom2->cpu, dom1->cpu);
520+ }
521+ }
522+}
523+
524+/* Directly insert a task in a remote ready queue. This function
525+ * should only be called if this task is a migrating task and its
526+ * last job for this CPU just completed (a new one is released for
527+ * a remote CPU), but the new job is already tardy.
528+ */
529+static noinline void insert_task_in_remote_ready(struct task_struct *task)
530+{
531+ edffm_domain_t *this = remote_edffm(task_cpu(task));
532+ edffm_domain_t *remote = remote_edffm(get_partition(task));
533+
534+ BUG_ON(get_partition(task) != remote->cpu);
535+
536+ TRACE_TASK(task, "Migrate From P%d -> To P%d\n",
537+ this->cpu, remote->cpu);
538+ TRACE_TASK(task, "Inserting in remote ready queue\n");
539+
540+ WARN_ON(!irqs_disabled());
541+
542+ raw_spin_unlock(&this->slock);
543+ mb();
544+ TRACE_TASK(task,"edffm_lock %d released\n", this->cpu);
545+
546+ /* lock both ready queues */
547+ double_domain_lock(this, remote);
548+ mb();
549+
550+ __add_ready(&remote->domain, task);
551+
552+ /* release remote but keep ours */
553+ raw_spin_unlock(&remote->slock);
554+ TRACE_TASK(task,"edffm_lock %d released\n", remote->cpu);
555+
556+ /* ask remote cpu to reschedule, we are already rescheduling on this */
557+ preempt(remote);
558+}
559+
560+static void requeue(struct task_struct* t, rt_domain_t *edf)
561+{
562+ if (t->state != TASK_RUNNING)
563+ TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
564+
565+ set_rt_flags(t, RT_F_RUNNING);
566+ if (is_released(t, litmus_clock())) {
567+ if (wrong_cpu(t)) {
568+ /* this should only happen if t just completed, but
569+ * its next release is already tardy, so it should be
570+ * migrated and inserted in the remote ready queue
571+ */
572+ TRACE_TASK(t, "Migrating task already released, "
573+ "move from P%d to P%d\n",
574+ task_cpu(t), get_partition(t));
575+
576+ insert_task_in_remote_ready(t);
577+ } else {
578+ /* not a migrat task or the job is on the right CPU */
579+ __add_ready(edf, t);
580+ }
581+ } else {
582+ if (wrong_cpu(t)) {
583+
584+ TRACE_TASK(t, "Migrating task, adding remote release\n");
585+ edffm_add_release_remote(t);
586+ } else {
587+ TRACE_TASK(t, "Adding local release\n");
588+ add_release(edf, t);
589+ }
590+ }
591+}
592+
593+/* Update statistics for the _current_ job.
594+ * - job_no was incremented _before_ starting this job
595+ * (release_at / prepare_for_next_period)
596+ * - cpu_job_no is incremented when the job completes
597+ */
598+static void update_job_counter(struct task_struct *t)
599+{
600+ int cpu_pos;
601+
602+ /* Which CPU counter should be incremented? */
603+ cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t);
604+ t->rt_param.semi_part.cpu_job_no[cpu_pos]++;
605+
606+ TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n",
607+ t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t),
608+ t->rt_param.task_params.cpu);
609+}
610+
611+/* What is the next cpu for this job? (eq. 8, in EDF-Fm paper) */
612+static int next_cpu_for_job(struct task_struct *t)
613+{
614+ BUG_ON(!is_migrat_task(t));
615+
616+ TRACE_TASK(t, "%u = %u * %u / %u\n",
617+ t->rt_param.job_params.job_no, cur_cpu_job_no(t),
618+ cur_cpu_fract_den(t), cur_cpu_fract_num(t));
619+ if ((t->rt_param.job_params.job_no) ==
620+ (((lt_t) cur_cpu_job_no(t) * cur_cpu_fract_den(t)) /
621+ cur_cpu_fract_num(t)))
622+ return edffm_params(t).cpus[0];
623+
624+ return edffm_params(t).cpus[1];
625+}
626+
627+/* If needed (the share for task t on this CPU is exhausted), updates
628+ * the task_params.cpu for the _migrating_ task t
629+ */
630+static void change_migrat_cpu_if_needed(struct task_struct *t)
631+{
632+ BUG_ON(!is_migrat_task(t));
633+ /* EDF-fm: if it is a migrating task and it has already executed
634+ * the required number of jobs on this CPU, we need to move it
635+ * on its next CPU; changing the cpu here will affect the requeue
636+ * and the next release
637+ */
638+ if (unlikely(next_cpu_for_job(t) != migrat_cur_cpu(t))) {
639+
640+ tsk_rt(t)->task_params.cpu = migrat_next_cpu(t);
641+ TRACE_TASK(t, "EDF-fm: will migrate job %d -> %d\n",
642+ task_cpu(t), tsk_rt(t)->task_params.cpu);
643+ return;
644+ }
645+
646+ TRACE_TASK(t, "EDF-fm: job will stay on %d -> %d\n",
647+ task_cpu(t), tsk_rt(t)->task_params.cpu);
648+}
649+
650+static void job_completion(struct task_struct* t, int forced)
651+{
652+ sched_trace_task_completion(t,forced);
653+ TRACE_TASK(t, "job_completion().\n");
654+
655+ if (unlikely(is_migrat_task(t))) {
656+ update_job_counter(t);
657+ change_migrat_cpu_if_needed(t);
658+ }
659+
660+ set_rt_flags(t, RT_F_SLEEP);
661+ prepare_for_next_period(t);
662+}
663+
664+static void edffm_tick(struct task_struct *t)
665+{
666+ edffm_domain_t *edffm = local_edffm;
667+
668+ BUG_ON(is_realtime(t) && t != edffm->scheduled);
669+
670+ if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
671+ set_tsk_need_resched(t);
672+ TRACE("edffm_scheduler_tick: "
673+ "%d is preemptable "
674+ " => FORCE_RESCHED\n", t->pid);
675+ }
676+}
677+
678+static struct task_struct* edffm_schedule(struct task_struct * prev)
679+{
680+ edffm_domain_t* edffm = local_edffm;
681+ rt_domain_t* edf = &edffm->domain;
682+ struct task_struct* next;
683+
684+ int out_of_time, sleep, preempt, exists, blocks, change_cpu, resched;
685+
686+ raw_spin_lock(&edffm->slock);
687+
688+ BUG_ON(edffm->scheduled && edffm->scheduled != prev);
689+ BUG_ON(edffm->scheduled && !is_realtime(prev));
690+
691+ /* (0) Determine state */
692+ exists = edffm->scheduled != NULL;
693+ blocks = exists && !is_running(edffm->scheduled);
694+ out_of_time = exists &&
695+ budget_enforced(edffm->scheduled) &&
696+ budget_exhausted(edffm->scheduled);
697+ sleep = exists && get_rt_flags(edffm->scheduled) == RT_F_SLEEP;
698+ change_cpu = exists && wrong_cpu(edffm->scheduled);
699+ preempt = edffm_preemption_needed(edf, prev);
700+
701+ BUG_ON(blocks && change_cpu);
702+
703+ if (exists)
704+ TRACE_TASK(prev,
705+ "blocks:%d out_of_time:%d sleep:%d preempt:%d "
706+ "wrong_cpu:%d state:%d sig:%d\n",
707+ blocks, out_of_time, sleep, preempt,
708+ change_cpu, prev->state, signal_pending(prev));
709+
710+ /* If we need to preempt do so. */
711+ resched = preempt;
712+
713+ /* If a task blocks we have no choice but to reschedule. */
714+ if (blocks)
715+ resched = 1;
716+
717+ /* If a task has just woken up, it was tardy and the wake up
718+ * raced with this schedule, a new job has already been released,
719+ * but scheduled should be enqueued on a remote ready queue, and a
720+ * new task should be selected for the current queue.
721+ */
722+ if (change_cpu)
723+ resched = 1;
724+
725+ /* Any task that is preemptable and either exhausts its execution
726+ * budget or wants to sleep completes. We may have to reschedule after
727+ * this.
728+ */
729+ if ((out_of_time || sleep) && !blocks) {
730+ job_completion(edffm->scheduled, !sleep);
731+ resched = 1;
732+ }
733+
734+ /* The final scheduling decision. Do we need to switch for some reason?
735+ * Switch if we are in RT mode and have no task or if we need to
736+ * resched.
737+ */
738+ next = NULL;
739+ if (resched || !exists) {
740+
741+ if (edffm->scheduled && !blocks)
742+ requeue(edffm->scheduled, edf);
743+ next = __take_ready(edf);
744+ } else
745+ /* Only override Linux scheduler if we have a real-time task
746+ * scheduled that needs to continue.
747+ */
748+ if (exists)
749+ next = prev;
750+
751+ if (next) {
752+ TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
753+ set_rt_flags(next, RT_F_RUNNING);
754+ } else {
755+ TRACE("becoming idle at %llu\n", litmus_clock());
756+ }
757+
758+ edffm->scheduled = next;
759+ raw_spin_unlock(&edffm->slock);
760+
761+ return next;
762+}
763+
764+/* Prepare a task for running in RT mode
765+ */
766+static void edffm_task_new(struct task_struct * t, int on_rq, int running)
767+{
768+ rt_domain_t* edf = task_edf(t);
769+ edffm_domain_t* edffm = task_edffm(t);
770+ unsigned long flags;
771+
772+ TRACE_TASK(t, "EDF-fm: task new, cpu = %d\n",
773+ t->rt_param.task_params.cpu);
774+
775+ release_at(t, litmus_clock());
776+ update_job_counter(t);
777+
778+ /* The task should be running in the queue, otherwise signal
779+ * code will try to wake it up with fatal consequences.
780+ */
781+ raw_spin_lock_irqsave(&edffm->slock, flags);
782+ if (running) {
783+ /* there shouldn't be anything else running at the time */
784+ BUG_ON(edffm->scheduled);
785+ edffm->scheduled = t;
786+ } else {
787+ requeue(t, edf);
788+ /* maybe we have to reschedule */
789+ preempt(edffm);
790+ }
791+ raw_spin_unlock_irqrestore(&edffm->slock, flags);
792+}
793+
794+static void edffm_task_wake_up(struct task_struct *task)
795+{
796+ unsigned long flags;
797+ edffm_domain_t* edffm = task_edffm(task);
798+ rt_domain_t* edf = task_edf(task);
799+ lt_t now;
800+
801+ TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
802+
803+ TRACE_TASK(task, "acquire edffm %d\n", edffm->cpu);
804+ raw_spin_lock_irqsave(&edffm->slock, flags);
805+
806+ BUG_ON(edffm != task_edffm(task));
807+ BUG_ON(is_queued(task));
808+
809+ now = litmus_clock();
810+ if (is_tardy(task, now)) {
811+ if (unlikely(is_migrat_task(task))) {
812+ /* a new job will be released.
813+ * Update current job counter */
814+ update_job_counter(task);
815+ /* Switch CPU if needed */
816+ change_migrat_cpu_if_needed(task);
817+ }
818+ /* new sporadic release */
819+ TRACE_TASK(task, "release new\n");
820+ release_at(task, now);
821+ sched_trace_task_release(task);
822+ }
823+
824+ /* Only add to ready queue if it is not the currently-scheduled
825+ * task. This could be the case if a task was woken up concurrently
826+ * on a remote CPU before the executing CPU got around to actually
827+ * de-scheduling the task, i.e., wake_up() raced with schedule()
828+ * and won.
829+ */
830+ if (edffm->scheduled != task)
831+ requeue(task, edf);
832+
833+ raw_spin_unlock_irqrestore(&edffm->slock, flags);
834+ TRACE_TASK(task, "release edffm %d\n", edffm->cpu);
835+ TRACE_TASK(task, "wake up done\n");
836+}
837+
838+static void edffm_task_block(struct task_struct *t)
839+{
840+ TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
841+
842+ BUG_ON(!is_realtime(t));
843+ if (is_queued(t)) {
844+ edffm_domain_t *edffm = local_edffm;
845+ TRACE_TASK(t, "task blocked, race with wakeup, "
846+ "remove from queue %d\n", edffm->cpu);
847+ remove(&edffm->domain, t);
848+ }
849+}
850+
851+static void edffm_task_exit(struct task_struct * t)
852+{
853+ unsigned long flags;
854+ edffm_domain_t* edffm = task_edffm(t);
855+ rt_domain_t* edf;
856+
857+ raw_spin_lock_irqsave(&edffm->slock, flags);
858+ if (is_queued(t)) {
859+ /* dequeue */
860+ edf = task_edf(t);
861+ remove(edf, t);
862+ }
863+ if (edffm->scheduled == t)
864+ edffm->scheduled = NULL;
865+
866+ TRACE_TASK(t, "RIP\n");
867+
868+ preempt(edffm);
869+ raw_spin_unlock_irqrestore(&edffm->slock, flags);
870+}
871+
872+static long edffm_admit_task(struct task_struct* tsk)
873+{
874+ return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
875+}
876+
877+/* Plugin object */
878+static struct sched_plugin edffm_plugin __cacheline_aligned_in_smp = {
879+ .plugin_name = "EDF-fm",
880+ .tick = edffm_tick,
881+ .task_new = edffm_task_new,
882+ .complete_job = complete_job,
883+ .task_exit = edffm_task_exit,
884+ .schedule = edffm_schedule,
885+ .task_wake_up = edffm_task_wake_up,
886+ .task_block = edffm_task_block,
887+ .admit_task = edffm_admit_task
888+};
889+
890+static int __init init_edffm(void)
891+{
892+ int i;
893+ edffm_domain_t *edffm;
894+
895+ /* Note, broken if num_online_cpus() may change */
896+ for (i = 0; i < num_online_cpus(); i++) {
897+ edffm = remote_edffm(i);
898+ edffm->cpu = i;
899+ edffm->scheduled = NULL;
900+ edf_domain_init(&edffm->domain, NULL, edffm_release_jobs);
901+ }
902+
903+ return register_sched_plugin(&edffm_plugin);
904+}
905+
906+module_init(init_edffm);
907+
908diff --git a/litmus/sched_edf_wm.c b/litmus/sched_edf_wm.c
909new file mode 100644
910index 0000000..8b7be32
911--- /dev/null
912+++ b/litmus/sched_edf_wm.c
913@@ -0,0 +1,688 @@
914+/* EDF-WM: based on PSN-EDF.
915+ */
916+
917+#include <linux/percpu.h>
918+#include <linux/sched.h>
919+#include <linux/list.h>
920+#include <linux/spinlock.h>
921+
922+#include <linux/module.h>
923+
924+#include <litmus/litmus.h>
925+#include <litmus/jobs.h>
926+#include <litmus/sched_plugin.h>
927+#include <litmus/edf_common.h>
928+
929+typedef struct {
930+ rt_domain_t domain;
931+ int cpu;
932+ struct task_struct* scheduled; /* only RT tasks */
933+
934+/*
935+ * scheduling lock slock
936+ * protects the domain and serializes scheduling decisions
937+ */
938+#define slock domain.ready_lock
939+
940+} wm_domain_t;
941+
942+DEFINE_PER_CPU(wm_domain_t, wm_domains);
943+
944+#define TRACE_DOM(dom, fmt, args...) \
945+ TRACE("(wm_domains[%d]) " fmt, (dom)->cpu, ##args)
946+
947+
948+#define local_domain (&__get_cpu_var(wm_domains))
949+#define remote_domain(cpu) (&per_cpu(wm_domains, cpu))
950+#define domain_of_task(task) (remote_domain(get_partition(task)))
951+
952+static int is_sliced_task(struct task_struct* t)
953+{
954+ return tsk_rt(t)->task_params.semi_part.wm.count;
955+}
956+
957+static struct edf_wm_slice* get_last_slice(struct task_struct* t)
958+{
959+ int idx = tsk_rt(t)->task_params.semi_part.wm.count - 1;
960+ return tsk_rt(t)->task_params.semi_part.wm.slices + idx;
961+}
962+
963+static void compute_slice_params(struct task_struct* t)
964+{
965+ struct rt_param* p = tsk_rt(t);
966+ /* Here we do a little trick to make the generic EDF code
967+ * play well with job slices. We overwrite the job-level
968+ * release and deadline fields with the slice-specific values
969+ * so that we can enqueue this task in an EDF rt_domain_t
970+ * without issue. The actual values are cached in the semi_part.wm
971+ * structure. */
972+ p->job_params.deadline = p->semi_part.wm.job_release +
973+ p->semi_part.wm.slice->deadline;
974+ p->job_params.release = p->semi_part.wm.job_release +
975+ p->semi_part.wm.slice->offset;
976+
977+ /* Similarly, we play a trick on the cpu field. */
978+ p->task_params.cpu = p->semi_part.wm.slice->cpu;
979+
980+ /* update the per-slice budget reference */
981+ p->semi_part.wm.exec_time = p->job_params.exec_time;
982+}
983+
984+static void complete_sliced_job(struct task_struct* t)
985+{
986+ struct rt_param* p = tsk_rt(t);
987+
988+ /* We need to undo our trickery to the
989+ * job parameters (see above). */
990+ p->job_params.release = p->semi_part.wm.job_release;
991+ p->job_params.deadline = p->semi_part.wm.job_deadline;
992+
993+ /* Ok, now let generic code do the actual work. */
994+ prepare_for_next_period(t);
995+
996+ /* And finally cache the updated parameters. */
997+ p->semi_part.wm.job_release = p->job_params.release;
998+ p->semi_part.wm.job_deadline = p->job_params.deadline;
999+}
1000+
1001+static lt_t slice_exec_time(struct task_struct* t)
1002+{
1003+ struct rt_param* p = tsk_rt(t);
1004+
1005+ /* Compute how much execution time has been consumed
1006+ * since last slice advancement. */
1007+ return p->job_params.exec_time - p->semi_part.wm.exec_time;
1008+}
1009+
1010+static lt_t slice_budget(struct task_struct* t)
1011+{
1012+ return tsk_rt(t)->semi_part.wm.slice->budget;
1013+}
1014+
1015+static int slice_budget_exhausted(struct task_struct* t)
1016+{
1017+ return slice_exec_time(t) >= slice_budget(t);
1018+}
1019+
1020+/* assumes positive remainder; overflows otherwise */
1021+static lt_t slice_budget_remaining(struct task_struct* t)
1022+{
1023+ return slice_budget(t) - slice_exec_time(t);
1024+}
1025+
1026+static int wm_budget_exhausted(struct task_struct* t)
1027+{
1028+ if (is_sliced_task(t))
1029+ return slice_budget_exhausted(t);
1030+ else
1031+ return budget_exhausted(t);
1032+}
1033+
1034+static void advance_next_slice(struct task_struct* t, int completion_signaled)
1035+{
1036+ int idx;
1037+ struct rt_param* p = tsk_rt(t);
1038+
1039+ /* make sure this is actually a sliced job */
1040+ BUG_ON(!is_sliced_task(t));
1041+ BUG_ON(is_queued(t));
1042+
1043+ /* determine index of current slice */
1044+ idx = p->semi_part.wm.slice -
1045+ p->task_params.semi_part.wm.slices;
1046+
1047+ TRACE_TASK(t, "advancing slice %d; excess=%lluns; "
1048+ "completion_signaled=%d.\n",
1049+ idx, slice_exec_time(t) - slice_budget(t),
1050+ completion_signaled);
1051+
1052+ if (completion_signaled)
1053+ idx = 0;
1054+ else
1055+ /* increment and wrap around, if necessary */
1056+ idx = (idx + 1) % p->task_params.semi_part.wm.count;
1057+
1058+ /* point to next slice */
1059+ p->semi_part.wm.slice =
1060+ p->task_params.semi_part.wm.slices + idx;
1061+
1062+ /* Check if we need to update essential job parameters. */
1063+ if (!idx) {
1064+ /* job completion */
1065+ sched_trace_task_completion(t, !completion_signaled);
1066+ TRACE_TASK(t, "completed sliced job"
1067+ "(signaled:%d)\n", completion_signaled);
1068+ complete_sliced_job(t);
1069+ }
1070+
1071+ /* Update job parameters for new slice. */
1072+ compute_slice_params(t);
1073+}
1074+
1075+/* assumes time_passed does not advance past the last slice */
1076+static void fast_forward_slices(struct task_struct* t, lt_t time_passed)
1077+{
1078+ TRACE_TASK(t, "fast forwarding %lluns\n", time_passed);
1079+
1080+ /* this is NOT the slice version */
1081+ BUG_ON(budget_remaining(t) <= time_passed);
1082+
1083+ if (wm_budget_exhausted(t)) {
1084+ /* This can happen if a suspension raced
1085+ * with a normal slice advancement. wm_schedule()
1086+ * does not process out_of_time when a task blocks. */
1087+ TRACE_TASK(t, "block raced with out_of_time?\n");
1088+ advance_next_slice(t, 0);
1089+ }
1090+
1091+ while (time_passed &&
1092+ time_passed >= slice_budget_remaining(t)) {
1093+ /* slice completely exhausted */
1094+ time_passed -= slice_budget_remaining(t);
1095+ tsk_rt(t)->job_params.exec_time +=
1096+ slice_budget_remaining(t);
1097+
1098+ BUG_ON(!slice_budget_exhausted(t));
1099+ BUG_ON(slice_budget_remaining(t) != 0);
1100+ BUG_ON(tsk_rt(t)->semi_part.wm.slice == get_last_slice(t));
1101+
1102+ advance_next_slice(t, 0);
1103+ }
1104+ /* add remainder to exec cost */
1105+ tsk_rt(t)->job_params.exec_time += time_passed;
1106+}
1107+
1108+/* we assume the lock is being held */
1109+static void preempt(wm_domain_t *dom)
1110+{
1111+ TRACE_DOM(dom, "will be preempted.\n");
1112+ /* We pass NULL as the task since non-preemptive sections are not
1113+ * supported in this plugin, so per-task checks are not needed. */
1114+ preempt_if_preemptable(NULL, dom->cpu);
1115+}
1116+
1117+static void wm_domain_init(wm_domain_t* dom,
1118+ check_resched_needed_t check,
1119+ release_jobs_t release,
1120+ int cpu)
1121+{
1122+ edf_domain_init(&dom->domain, check, release);
1123+ dom->cpu = cpu;
1124+ dom->scheduled = NULL;
1125+}
1126+
1127+static void wm_requeue_remote(struct task_struct *t)
1128+{
1129+ wm_domain_t *dom = domain_of_task(t);
1130+
1131+ set_rt_flags(t, RT_F_RUNNING);
1132+ if (is_released(t, litmus_clock()))
1133+ /* acquires necessary lock */
1134+ add_ready(&dom->domain, t);
1135+ else
1136+ /* force timer on remote CPU */
1137+ add_release_on(&dom->domain, t, get_partition(t));
1138+}
1139+
1140+static void wm_requeue_local(struct task_struct* t, rt_domain_t *edf)
1141+{
1142+ if (t->state != TASK_RUNNING)
1143+ TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
1144+
1145+ set_rt_flags(t, RT_F_RUNNING);
1146+ if (is_released(t, litmus_clock()))
1147+ __add_ready(edf, t);
1148+ else
1149+ add_release(edf, t); /* it has got to wait */
1150+}
1151+
1152+static int wm_check_resched(rt_domain_t *edf)
1153+{
1154+ wm_domain_t *dom = container_of(edf, wm_domain_t, domain);
1155+
1156+ /* because this is a callback from rt_domain_t we already hold
1157+ * the necessary lock for the ready queue
1158+ */
1159+ if (edf_preemption_needed(edf, dom->scheduled)) {
1160+ preempt(dom);
1161+ return 1;
1162+ } else
1163+ return 0;
1164+}
1165+
1166+static void regular_job_completion(struct task_struct* t, int forced)
1167+{
1168+ sched_trace_task_completion(t, forced);
1169+ TRACE_TASK(t, "job_completion().\n");
1170+
1171+ set_rt_flags(t, RT_F_SLEEP);
1172+ prepare_for_next_period(t);
1173+}
1174+
1175+static void wm_job_or_slice_completion(struct task_struct* t,
1176+ int completion_signaled)
1177+{
1178+ if (is_sliced_task(t))
1179+ advance_next_slice(t, completion_signaled);
1180+ else
1181+ regular_job_completion(t, !completion_signaled);
1182+}
1183+
1184+static void wm_tick(struct task_struct *t)
1185+{
1186+ wm_domain_t *dom = local_domain;
1187+
1188+ /* Check for inconsistency. We don't need the lock for this since
1189+ * ->scheduled is only changed in schedule, which obviously is not
1190+ * executing in parallel on this CPU
1191+ */
1192+ BUG_ON(is_realtime(t) && t != dom->scheduled);
1193+
1194+ if (is_realtime(t) && budget_enforced(t) && wm_budget_exhausted(t)) {
1195+ set_tsk_need_resched(t);
1196+ TRACE_DOM(dom, "budget of %d exhausted in tick\n",
1197+ t->pid);
1198+ }
1199+}
1200+
1201+static struct task_struct* wm_schedule(struct task_struct * prev)
1202+{
1203+ wm_domain_t *dom = local_domain;
1204+ rt_domain_t *edf = &dom->domain;
1205+ struct task_struct *next, *migrate = NULL;
1206+
1207+ int out_of_time, sleep, preempt, wrong_cpu, exists, blocks, resched;
1208+
1209+ raw_spin_lock(&dom->slock);
1210+
1211+ /* Sanity checking:
1212+ * When a task exits (dead) dom->schedule may be null
1213+ * and prev _is_ realtime. */
1214+ BUG_ON(dom->scheduled && dom->scheduled != prev);
1215+ BUG_ON(dom->scheduled && !is_realtime(prev));
1216+
1217+ /* (0) Determine state */
1218+ exists = dom->scheduled != NULL;
1219+ wrong_cpu = exists && get_partition(dom->scheduled) != dom->cpu;
1220+ blocks = exists && !is_running(dom->scheduled);
1221+ out_of_time = exists
1222+ && budget_enforced(dom->scheduled)
1223+ && wm_budget_exhausted(dom->scheduled);
1224+ sleep = exists && get_rt_flags(dom->scheduled) == RT_F_SLEEP;
1225+ preempt = edf_preemption_needed(edf, prev);
1226+
1227+ /* If we need to preempt do so.
1228+ * The following checks set resched to 1 in case of special
1229+ * circumstances.
1230+ */
1231+ resched = preempt;
1232+
1233+
1234+ if (exists)
1235+ TRACE_TASK(prev,
1236+ "blocks:%d out_of_time:%d sleep:%d preempt:%d "
1237+ "wrong_cpu:%d state:%d sig:%d\n",
1238+ blocks, out_of_time, sleep, preempt, wrong_cpu,
1239+ prev->state, signal_pending(prev));
1240+
1241+ /* If a task blocks we have no choice but to reschedule.
1242+ */
1243+ if (blocks)
1244+ resched = 1;
1245+
1246+ /* This can happen if sliced task was moved to the next slice
1247+ * by the wake_up() code path while still being scheduled.
1248+ */
1249+ if (wrong_cpu)
1250+ resched = 1;
1251+
1252+ /* Any task that is preemptable and either exhausts its execution
1253+ * budget or wants to sleep completes. We may have to reschedule after
1254+ * this.
1255+ */
1256+ if ((out_of_time || sleep) && !blocks) {
1257+ wm_job_or_slice_completion(dom->scheduled, sleep);
1258+ resched = 1;
1259+ }
1260+
1261+ /* The final scheduling decision. Do we need to switch for some reason?
1262+ * Switch if we are in RT mode and have no task or if we need to
1263+ * resched.
1264+ */
1265+ next = NULL;
1266+ if (resched || !exists) {
1267+ if (dom->scheduled && !blocks) {
1268+ if (get_partition(dom->scheduled) == dom->cpu)
1269+ /* local task */
1270+ wm_requeue_local(dom->scheduled, edf);
1271+ else
1272+ /* not local anymore; wait until we drop the
1273+ * ready queue lock */
1274+ migrate = dom->scheduled;
1275+ }
1276+ next = __take_ready(edf);
1277+ } else
1278+ /* Only override Linux scheduler if we have a real-time task
1279+ * scheduled that needs to continue. */
1280+ if (exists)
1281+ next = prev;
1282+
1283+ if (next) {
1284+ TRACE_TASK(next, "scheduled at %llu (state:%d/%d)\n", litmus_clock(),
1285+ next->state, is_running(next));
1286+ set_rt_flags(next, RT_F_RUNNING);
1287+ } else if (exists) {
1288+ TRACE("becoming idle at %llu\n", litmus_clock());
1289+ }
1290+
1291+ dom->scheduled = next;
1292+ raw_spin_unlock(&dom->slock);
1293+
1294+ /* check if we need to push the previous task onto another queue */
1295+ if (migrate) {
1296+ TRACE_TASK(migrate, "schedule-initiated migration to %d\n",
1297+ get_partition(migrate));
1298+ wm_requeue_remote(migrate);
1299+ }
1300+
1301+ return next;
1302+}
1303+
1304+
1305+/* Prepare a task for running in RT mode
1306+ */
1307+static void wm_task_new(struct task_struct * t, int on_rq, int running)
1308+{
1309+ wm_domain_t* dom = domain_of_task(t);
1310+ rt_domain_t* edf = &dom->domain;
1311+ unsigned long flags;
1312+
1313+ TRACE_TASK(t, "edf-wm: task new, cpu = %d\n",
1314+ t->rt_param.task_params.cpu);
1315+
1316+ /* setup job parameters */
1317+ release_at(t, litmus_clock());
1318+
1319+ /* The task should be running in the queue, otherwise signal
1320+ * code will try to wake it up with fatal consequences.
1321+ */
1322+ raw_spin_lock_irqsave(&dom->slock, flags);
1323+
1324+ if (is_sliced_task(t)) {
1325+ /* make sure parameters are initialized consistently */
1326+ tsk_rt(t)->semi_part.wm.exec_time = 0;
1327+ tsk_rt(t)->semi_part.wm.job_release = get_release(t);
1328+ tsk_rt(t)->semi_part.wm.job_deadline = get_deadline(t);
1329+ tsk_rt(t)->semi_part.wm.slice = tsk_rt(t)->task_params.semi_part.wm.slices;
1330+ tsk_rt(t)->job_params.exec_time = 0;
1331+ }
1332+
1333+ if (running) {
1334+ /* there shouldn't be anything else running at the time */
1335+ BUG_ON(dom->scheduled);
1336+ dom->scheduled = t;
1337+ } else {
1338+ wm_requeue_local(t, edf);
1339+ /* maybe we have to reschedule */
1340+ preempt(dom);
1341+ }
1342+ raw_spin_unlock_irqrestore(&dom->slock, flags);
1343+}
1344+
1345+static void wm_release_at(struct task_struct *t, lt_t start)
1346+{
1347+ struct rt_param* p = tsk_rt(t);
1348+
1349+ if (is_sliced_task(t)) {
1350+ /* simulate wrapping to the first slice */
1351+ p->semi_part.wm.job_deadline = start;
1352+ p->semi_part.wm.slice = get_last_slice(t);
1353+ /* FIXME: creates bogus completion event... */
1354+ advance_next_slice(t, 0);
1355+ set_rt_flags(t, RT_F_RUNNING);
1356+ } else
1357+ /* generic code handles it */
1358+ release_at(t, start);
1359+}
1360+
1361+static lt_t wm_earliest_release(struct task_struct *t, lt_t now)
1362+{
1363+ lt_t deadline;
1364+ if (is_sliced_task(t))
1365+ deadline = tsk_rt(t)->semi_part.wm.job_deadline;
1366+ else
1367+ deadline = get_deadline(t);
1368+ if (lt_before(deadline, now))
1369+ return now;
1370+ else
1371+ return deadline;
1372+}
1373+
1374+static void wm_task_wake_up(struct task_struct *t)
1375+{
1376+ unsigned long flags;
1377+ wm_domain_t* dom = domain_of_task(t);
1378+ rt_domain_t* edf = &dom->domain;
1379+ struct rt_param* p = tsk_rt(t);
1380+ lt_t now, sleep_time;
1381+ int migrate = 0;
1382+
1383+ raw_spin_lock_irqsave(&dom->slock, flags);
1384+ BUG_ON(is_queued(t));
1385+
1386+ now = litmus_clock();
1387+
1388+ sleep_time = now - p->semi_part.wm.suspend_time;
1389+
1390+ TRACE_TASK(t, "wake_up at %llu after %llu, still-scheduled:%d\n",
1391+ now, sleep_time, dom->scheduled == t);
1392+
1393+ /* account sleep time as execution time */
1394+ if (get_exec_time(t) + sleep_time >= get_exec_cost(t)) {
1395+ /* new sporadic release */
1396+ TRACE_TASK(t, "new sporadic release\n");
1397+ wm_release_at(t, wm_earliest_release(t, now));
1398+ sched_trace_task_release(t);
1399+ } else if (is_sliced_task(t)) {
1400+ /* figure out which slice we should be executing on */
1401+ fast_forward_slices(t, sleep_time);
1402+ /* can't be exhausted now */
1403+ BUG_ON(wm_budget_exhausted(t));
1404+ } else {
1405+ /* simply add to the execution time */
1406+ tsk_rt(t)->job_params.exec_time += sleep_time;
1407+ }
1408+
1409+
1410+ /* Only add to ready queue if it is not the currently-scheduled
1411+ * task. This could be the case if a task was woken up concurrently
1412+ * on a remote CPU before the executing CPU got around to actually
1413+ * de-scheduling the task, i.e., wake_up() raced with schedule()
1414+ * and won.
1415+ */
1416+ if (dom->scheduled != t) {
1417+ if (get_partition(t) == dom->cpu)
1418+ wm_requeue_local(t, edf);
1419+ else
1420+ /* post-pone migration until after unlocking */
1421+ migrate = 1;
1422+ }
1423+
1424+ raw_spin_unlock_irqrestore(&dom->slock, flags);
1425+
1426+ if (migrate) {
1427+ TRACE_TASK(t, "wake_up-initiated migration to %d\n",
1428+ get_partition(t));
1429+ wm_requeue_remote(t);
1430+ }
1431+
1432+ TRACE_TASK(t, "wake up done\n");
1433+}
1434+
1435+static void wm_task_block(struct task_struct *t)
1436+{
1437+ wm_domain_t* dom = domain_of_task(t);
1438+ unsigned long flags;
1439+ lt_t now = litmus_clock();
1440+
1441+ TRACE_TASK(t, "block at %llu, state=%d\n", now, t->state);
1442+
1443+ tsk_rt(t)->semi_part.wm.suspend_time = now;
1444+
1445+ raw_spin_lock_irqsave(&dom->slock, flags);
1446+ if (is_queued(t)) {
1447+ TRACE_TASK(t, "still queued; migration invariant failed?\n");
1448+ remove(&dom->domain, t);
1449+ }
1450+ raw_spin_unlock_irqrestore(&dom->slock, flags);
1451+
1452+ BUG_ON(!is_realtime(t));
1453+}
1454+
1455+static void wm_task_exit(struct task_struct * t)
1456+{
1457+ unsigned long flags;
1458+ wm_domain_t* dom = domain_of_task(t);
1459+ rt_domain_t* edf = &dom->domain;
1460+
1461+ raw_spin_lock_irqsave(&dom->slock, flags);
1462+ if (is_queued(t)) {
1463+ /* dequeue */
1464+ remove(edf, t);
1465+ }
1466+ if (dom->scheduled == t)
1467+ dom->scheduled = NULL;
1468+
1469+ TRACE_TASK(t, "RIP, now reschedule\n");
1470+
1471+ preempt(dom);
1472+ raw_spin_unlock_irqrestore(&dom->slock, flags);
1473+}
1474+
1475+static long wm_check_params(struct task_struct *t)
1476+{
1477+ struct rt_param* p = tsk_rt(t);
1478+ struct edf_wm_params* wm = &p->task_params.semi_part.wm;
1479+ int i;
1480+ lt_t tmp;
1481+
1482+ if (!is_sliced_task(t)) {
1483+ /* regular task; nothing to check */
1484+ TRACE_TASK(t, "accepted regular (non-sliced) task with "
1485+ "%d slices\n",
1486+ wm->count);
1487+ return 0;
1488+ }
1489+
1490+ /* (1) Either not sliced, or more than 1 slice. */
1491+ if (wm->count == 1 || wm->count > MAX_EDF_WM_SLICES) {
1492+ TRACE_TASK(t, "bad number of slices (%u) \n",
1493+ wm->count);
1494+ return -EINVAL;
1495+ }
1496+
1497+ /* (2) The partition has to agree with the first slice. */
1498+ if (get_partition(t) != wm->slices[0].cpu) {
1499+ TRACE_TASK(t, "partition and first slice CPU differ "
1500+ "(%d != %d)\n", get_partition(t), wm->slices[0].cpu);
1501+ return -EINVAL;
1502+ }
1503+
1504+ /* (3) The total budget must agree. */
1505+ for (i = 0, tmp = 0; i < wm->count; i++)
1506+ tmp += wm->slices[i].budget;
1507+ if (get_exec_cost(t) != tmp) {
1508+ TRACE_TASK(t, "total budget and sum of slice budgets differ\n");
1509+ return -EINVAL;
1510+ }
1511+
1512+ /* (4) The release of each slice must not precede the previous
1513+ * deadline. */
1514+ for (i = 0; i < wm->count - 1; i++)
1515+ if (wm->slices[i].deadline > wm->slices[i + 1].offset) {
1516+ TRACE_TASK(t, "slice %d overlaps with slice %d\n",
1517+ i, i + 1);
1518+ return -EINVAL;
1519+ }
1520+
1521+ /* (5) The budget of each slice must fit within [offset, deadline] */
1522+ for (i = 0; i < wm->count; i++)
1523+ if (lt_before(wm->slices[i].deadline, wm->slices[i].offset) ||
1524+ wm->slices[i].deadline - wm->slices[i].offset <
1525+ wm->slices[i].budget) {
1526+ TRACE_TASK(t, "slice %d is overloaded\n", i);
1527+ return -EINVAL;
1528+ }
1529+
1530+ /* (6) The budget of each slice must exceed the minimum budget size. */
1531+ for (i = 0; i < wm->count; i++)
1532+ if (wm->slices[i].budget < MIN_EDF_WM_SLICE_SIZE) {
1533+ TRACE_TASK(t, "slice %d is too short\n", i);
1534+ return -EINVAL;
1535+ }
1536+
1537+ /* (7) The CPU of each slice must be different from the previous CPU. */
1538+ for (i = 0; i < wm->count - 1; i++)
1539+ if (wm->slices[i].cpu == wm->slices[i + 1].cpu) {
1540+ TRACE_TASK(t, "slice %d does not migrate\n", i);
1541+ return -EINVAL;
1542+ }
1543+
1544+ /* (8) The CPU of each slice must be online. */
1545+ for (i = 0; i < wm->count; i++)
1546+ if (!cpu_online(wm->slices[i].cpu)) {
1547+ TRACE_TASK(t, "slice %d is allocated on offline CPU\n",
1548+ i);
1549+ return -EINVAL;
1550+ }
1551+
1552+ /* (9) A sliced task's budget must be precisely enforced. */
1553+ if (!budget_precisely_enforced(t)) {
1554+ TRACE_TASK(t, "budget is not precisely enforced "
1555+ "(policy: %d).\n",
1556+ tsk_rt(t)->task_params.budget_policy);
1557+ return -EINVAL;
1558+ }
1559+
1560+ TRACE_TASK(t, "accepted sliced task with %d slices\n",
1561+ wm->count);
1562+
1563+ return 0;
1564+}
1565+
1566+static long wm_admit_task(struct task_struct* t)
1567+{
1568+ return task_cpu(t) == get_partition(t) ? wm_check_params(t) : -EINVAL;
1569+}
1570+
1571+/* Plugin object */
1572+static struct sched_plugin edf_wm_plugin __cacheline_aligned_in_smp = {
1573+ .plugin_name = "EDF-WM",
1574+ .tick = wm_tick,
1575+ .task_new = wm_task_new,
1576+ .complete_job = complete_job,
1577+ .task_exit = wm_task_exit,
1578+ .schedule = wm_schedule,
1579+ .release_at = wm_release_at,
1580+ .task_wake_up = wm_task_wake_up,
1581+ .task_block = wm_task_block,
1582+ .admit_task = wm_admit_task
1583+};
1584+
1585+
1586+static int __init init_edf_wm(void)
1587+{
1588+ int i;
1589+
1590+ /* FIXME: breaks with CPU hotplug
1591+ */
1592+ for (i = 0; i < num_online_cpus(); i++) {
1593+ wm_domain_init(remote_domain(i),
1594+ wm_check_resched,
1595+ NULL, i);
1596+ }
1597+ return register_sched_plugin(&edf_wm_plugin);
1598+}
1599+
1600+module_init(init_edf_wm);
1601+
1602diff --git a/litmus/sched_npsf.c b/litmus/sched_npsf.c
1603new file mode 100644
1604index 0000000..aad99c7
1605--- /dev/null
1606+++ b/litmus/sched_npsf.c
1607@@ -0,0 +1,1185 @@
1608+/*
1609+ * litmus/sched_npsf.c
1610+ *
1611+ * Implementation of the NPS-F scheduling algorithm.
1612+ *
1613+ * A _server_ may span on multiple _reserves_ on different CPUs.
1614+ *
1615+ * * 1
1616+ * +--------------+ +--> +--------------+ +--> +--------------+
1617+ * | cpu_entry_t | | | npsf_reserve | | | npsf_server |
1618+ * +--------------+ | +--------------+ | +--------------+
1619+ * | |1 | | |1 | | |
1620+ * | cpu_reserve |--+ 1| server |--+ 1| |
1621+ * | | +---| cpu | +---| curr_reserve |
1622+ * +--------------+ <-+ +--------------+ <-+ +--------------+
1623+ * 1 *
1624+ */
1625+
1626+#include <asm/uaccess.h>
1627+#include <linux/percpu.h>
1628+#include <linux/sched.h>
1629+#include <linux/list.h>
1630+#include <linux/spinlock.h>
1631+#include <linux/slab.h>
1632+
1633+#include <linux/module.h>
1634+
1635+#include <litmus/litmus.h>
1636+#include <litmus/jobs.h>
1637+#include <litmus/sched_plugin.h>
1638+#include <litmus/edf_common.h>
1639+
1640+/* Be extra verbose (log spam) */
1641+#define NPSF_VERBOSE
1642+
1643+#ifdef NPSF_VERBOSE
1644+#define npsf_printk(fmt, arg...) printk(KERN_INFO fmt, ##arg)
1645+#else
1646+#define npsf_printk(fmt, arg...)
1647+#endif
1648+
1649+struct npsf_reserve;
1650+
1651+/* cpu_entry_t
1652+ *
1653+ * Each cpu has a list of reserves assigned on the cpu.
1654+ * Each reserve has a pointer to its server (Notional processor)
1655+ * that may be shared among multiple reserves.
1656+ */
1657+typedef struct {
1658+ /* lock to protect cpu_reserve and list changes */
1659+ raw_spinlock_t cpu_res_lock;
1660+ /* the reserve currently executing on this cpu */
1661+ struct npsf_reserve *cpu_reserve;
1662+ /* list of reserves on this cpu */
1663+ struct list_head npsf_reserves;
1664+ /* cpu ID */
1665+ int cpu;
1666+ /* timer to control reserve switching */
1667+ struct hrtimer timer;
1668+ /* virtual timer expiring (wrt time_origin) */
1669+ lt_t should_expire;
1670+ /* delegate timer firing to proper cpu */
1671+ struct hrtimer_start_on_info info;
1672+ /* FIXME: the ids for servers should be an increasing int >=0 */
1673+ int last_seen_npsf_id;
1674+} cpu_entry_t;
1675+
1676+/* one cpu_entry_t per CPU */
1677+DEFINE_PER_CPU(cpu_entry_t, npsf_cpu_entries);
1678+
1679+/* This is the "notional processor" (i.e., simple server) abstraction. */
1680+typedef struct npsf_server {
1681+ /* shared among reserves */
1682+ rt_domain_t dom;
1683+ /* the real-time task that this server *SHOULD* be scheduling */
1684+ struct task_struct *highest_prio;
1685+ /* current reserve where this dom is executing */
1686+ struct npsf_reserve *curr_reserve;
1687+ /* The "first" reserve for this server in a time slot.
1688+ * For non-migrating servers this will always be the same as curr_reserve. */
1689+ struct npsf_reserve *first_reserve;
1690+ /* Prevent a race between the last CPU in a reserve chain an the first. */
1691+ int first_cpu_wants_ipi;
1692+ /* rt_domain_t lock + npsf_server_t lock */
1693+#define lock dom.ready_lock
1694+} npsf_server_t;
1695+
1696+typedef struct npsf_reserve {
1697+ /* Pointer to the server for this reserve: a server may be shared among
1698+ * multiple cpus with different budget per cpu, but same npsf_id. */
1699+ npsf_server_t *server;
1700+ /* we queue here in npsf_reserves */
1701+ struct list_head node;
1702+ /* budget of this npsf_id on this cpu */
1703+ lt_t budget;
1704+ /* cpu for this (portion of) server */
1705+ cpu_entry_t *cpu;
1706+ /* id of this server, it is the same for the
1707+ * same server on different cpus */
1708+ int npsf_id;
1709+ /* Can be used to identify if a reserve continues
1710+ * next npsf in the chain, needed for proper server deletion */
1711+ struct npsf_reserve *next_npsf;
1712+ /* flag that is true if the reserve is currently scheduled */
1713+ int is_currently_scheduled;
1714+} npsf_reserve_t;
1715+
1716+/* synchronization point to start moving and switching servers only
1717+ * when all servers have been properly set up by the user.
1718+ */
1719+static atomic_t all_servers_added;
1720+static atomic_t timers_activated = ATOMIC_INIT(0);
1721+
1722+/* Virtual time starts here */
1723+static lt_t time_origin;
1724+
1725+/* save number of online cpus seen at init time */
1726+static unsigned int _online_cpus = 1;
1727+
1728+#define no_reserves(entry) (list_empty(&((entry)->npsf_reserves)))
1729+#define local_entry (&__get_cpu_var(npsf_cpu_entries))
1730+#define remote_entry(cpu) (&per_cpu(npsf_cpu_entries, (cpu)))
1731+
1732+#define server_from_dom(domain) (container_of((domain), npsf_server_t, dom))
1733+
1734+/* task_entry uses get_partition() therefore we must take care of
1735+ * updating correclty the task_params.cpu whenever we switch task,
1736+ * otherwise we'll deadlock.
1737+ */
1738+#define task_entry(task) remote_entry(get_partition(task))
1739+#define domain_edf(npsf) (&((npsf)->server->dom))
1740+
1741+#define task_npsfid(task) ((task)->rt_param.task_params.semi_part.npsf_id)
1742+
1743+static inline int owns_server(npsf_reserve_t *npsf)
1744+{
1745+ return (npsf->server->curr_reserve == npsf);
1746+}
1747+
1748+/* utility functions to get next and prev domains; must hold entry lock */
1749+static inline npsf_reserve_t* local_next_reserve(npsf_reserve_t *curr,
1750+ cpu_entry_t *entry)
1751+{
1752+ return (list_is_last(&curr->node, &entry->npsf_reserves)) ?
1753+ list_entry(entry->npsf_reserves.next, npsf_reserve_t, node) :
1754+ list_entry(curr->node.next, npsf_reserve_t, node);
1755+
1756+}
1757+
1758+static inline npsf_reserve_t* local_prev_reserve(npsf_reserve_t *curr,
1759+ cpu_entry_t *entry)
1760+{
1761+ return ((curr->node.prev == &entry->npsf_reserves) ?
1762+ list_entry(entry->npsf_reserves.prev, npsf_reserve_t, node) :
1763+ list_entry(curr->node.prev, npsf_reserve_t, node));
1764+}
1765+static void requeue(struct task_struct* t, rt_domain_t *edf)
1766+{
1767+ if (t->state != TASK_RUNNING)
1768+ TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
1769+
1770+ BUG_ON(is_queued(t));
1771+
1772+ set_rt_flags(t, RT_F_RUNNING);
1773+ if (is_released(t, litmus_clock()))
1774+ __add_ready(edf, t);
1775+ else
1776+ add_release(edf, t); /* it has got to wait */
1777+}
1778+
1779+/* we assume the lock is being held */
1780+static void preempt(npsf_reserve_t *npsf)
1781+{
1782+ /* Since we do not support non-preemptable sections,
1783+ * we don't need to pass in a task. If we call this,
1784+ * we want the remote CPU to reschedule, no matter what.
1785+ */
1786+ preempt_if_preemptable(NULL, npsf->cpu->cpu);
1787+}
1788+
1789+
1790+static void npsf_preempt_if_server_is_scheduled(npsf_server_t* srv)
1791+{
1792+ npsf_reserve_t *reserve = srv->curr_reserve;
1793+ if (reserve->is_currently_scheduled) {
1794+ preempt(reserve);
1795+ }
1796+}
1797+
1798+/* assumes lock is held by caller */
1799+static void npsf_reschedule_server(npsf_server_t* srv)
1800+{
1801+ struct task_struct* hp = srv->highest_prio;
1802+ rt_domain_t* edf = &srv->dom;
1803+
1804+ if (edf_preemption_needed(edf, hp)) {
1805+ srv->highest_prio = __take_ready(edf);
1806+ if (hp) {
1807+ TRACE_TASK(hp, "requeue: no longer highest prio\n");
1808+ requeue(hp, edf);
1809+ }
1810+ npsf_preempt_if_server_is_scheduled(srv);
1811+ }
1812+}
1813+
1814+static void npsf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
1815+{
1816+ npsf_server_t *srv = server_from_dom(rt);
1817+ unsigned long flags;
1818+
1819+ raw_spin_lock_irqsave(&srv->lock, flags);
1820+
1821+ __merge_ready(rt, tasks);
1822+ npsf_reschedule_server(srv);
1823+
1824+ raw_spin_unlock_irqrestore(&srv->lock, flags);
1825+}
1826+
1827+static void job_completion(struct task_struct* t, int forced)
1828+{
1829+ sched_trace_task_completion(t, forced);
1830+ TRACE_TASK(t, "job_completion().\n");
1831+
1832+ set_rt_flags(t, RT_F_SLEEP);
1833+ prepare_for_next_period(t);
1834+}
1835+
1836+/* When did this slot start ? */
1837+static inline lt_t slot_begin(lt_t now)
1838+{
1839+ return (((now - time_origin) / npsf_slot_length)
1840+ * npsf_slot_length + time_origin);
1841+}
1842+
1843+/* Compute the delta from the beginning of the current slot. */
1844+static inline lt_t delta_from_slot_begin(lt_t now)
1845+{
1846+ return (now - slot_begin(now));
1847+}
1848+
1849+/* Given an offset into a slot, return the corresponding eligible reserve.
1850+ * The output param reservation_end is used to return the (relative) time at which
1851+ * the returned reserve ends.
1852+ */
1853+static npsf_reserve_t* get_reserve_for_offset(cpu_entry_t *entry, lt_t offset,
1854+ lt_t *reservation_end)
1855+{
1856+ npsf_reserve_t *tmp;
1857+
1858+ *reservation_end = 0;
1859+
1860+ /* linear search through all reserves, figure out which one is the last one
1861+ * to become eligible before delta */
1862+ list_for_each_entry(tmp, &entry->npsf_reserves, node) {
1863+ *reservation_end += tmp->budget;
1864+
1865+ /* We are always "late". Found tmp is the right one */
1866+ if ((*reservation_end > offset))
1867+ return tmp;
1868+ }
1869+
1870+ /* error: we should never fall of the reserve list */
1871+ BUG();
1872+ return NULL;
1873+}
1874+
1875+/* Determine which reserve is eligible based on the current time.
1876+ */
1877+static npsf_reserve_t* get_current_reserve(cpu_entry_t *entry)
1878+{
1879+ lt_t reservation_end;
1880+ lt_t offset = delta_from_slot_begin(litmus_clock());
1881+ return get_reserve_for_offset(entry, offset, &reservation_end);
1882+}
1883+
1884+/* This is used to ensure that we are "always" late, i.e., to make
1885+ * sure that the timer jitter is always positive. This should
1886+ * only trigger in KVM (or in real machines with bad TSC drift after
1887+ * an IPI).
1888+ *
1889+ * ATM proper tracing for this event is done in reserve_switch_tick().
1890+ */
1891+static noinline ktime_t catchup_time(lt_t from, lt_t target)
1892+{
1893+ while(lt_before(from, target)) {
1894+ from = litmus_clock();
1895+
1896+ mb();
1897+ cpu_relax();
1898+ }
1899+
1900+ return ns_to_ktime(from);
1901+}
1902+
1903+
1904+/* compute the next ABSOLUTE timer value */
1905+static lt_t get_next_reserve_switch_time(void)
1906+{
1907+ cpu_entry_t *entry = local_entry;
1908+ lt_t now = litmus_clock();
1909+ lt_t slot_start = slot_begin(now);
1910+ lt_t offset = now - slot_start;
1911+ lt_t next_time;
1912+ npsf_reserve_t* reserve;
1913+
1914+ /* compute the absolute litmus time of the next reserve switch */
1915+ reserve = get_reserve_for_offset(entry, offset, &next_time);
1916+ /* get_reserve_for_offset returns a relative start time; let's make it
1917+ absolute */
1918+ next_time += slot_start;
1919+
1920+ /* Let's see if we need to skip the next timer. */
1921+ reserve = local_next_reserve(reserve, entry);
1922+ /* if the next reserve is a continuing reserve
1923+ * (i.e., if it belongs to a migrating server),
1924+ * then we skip the timer event because we will
1925+ * receive an IPI from the previous processor instead. */
1926+ if (reserve->server->first_reserve != reserve) {
1927+ /* it is indeed not the first reserve */
1928+ next_time += reserve->budget;
1929+ }
1930+
1931+ return next_time;
1932+}
1933+
1934+/* This is the callback for reserve-switching interrupts.
1935+ * The timer is reprogrammed to expire at the beginning of every logical
1936+ * reserve (i.e., a continuing reserve may be split among different CPUs
1937+ * but is a _single_ logical reserve). get_next_reserve_switch_time()
1938+ * will return the right next_expire time.
1939+ */
1940+static enum hrtimer_restart reserve_switch_tick(struct hrtimer *timer)
1941+{
1942+ unsigned long flags;
1943+ cpu_entry_t *entry;
1944+ /* we are using CLOCK_MONOTONIC */
1945+ ktime_t now = ktime_get();
1946+ ktime_t delta;
1947+ int late;
1948+
1949+ entry = container_of(timer, cpu_entry_t, timer);
1950+ raw_spin_lock_irqsave(&entry->cpu_res_lock, flags);
1951+
1952+ /* jitter wrt virtual time */
1953+ delta = ktime_sub(now, ns_to_ktime(entry->should_expire));
1954+ late = (ktime_to_ns(delta) >= 0) ? 1 : 0;
1955+
1956+#ifdef NPSF_VERBOSE
1957+ if (entry->cpu_reserve && atomic_read(&all_servers_added))
1958+ TRACE("(npsf_id: %d) tick starts at %Ld, "
1959+ "now - should_expire: %Ld\n",
1960+ entry->cpu_reserve->npsf_id,
1961+ ktime_to_ns(now), ktime_to_ns(delta));
1962+#endif
1963+ /* if the timer expires earlier than the should_expire time,
1964+ * we delay the switching until time it's synchronized with
1965+ * the switch boundary. Otherwise next reserve will execute
1966+ * longer (wrong).
1967+ */
1968+ if (!late) {
1969+ TRACE("+++ Timer fired early, waiting...\n");
1970+ now = catchup_time(ktime_to_ns(now), entry->should_expire);
1971+
1972+ delta = ktime_sub(now, ns_to_ktime(entry->should_expire));
1973+ TRACE("+++ done, tick restarts at %Ld, "
1974+ "now - should_expire: %Ld\n",
1975+ ktime_to_ns(now), ktime_to_ns(delta));
1976+ }
1977+
1978+ BUG_ON(!atomic_read(&all_servers_added));
1979+ BUG_ON(no_reserves(entry));
1980+
1981+ /* Compute the next time that we need to be notified. */
1982+ entry->should_expire = get_next_reserve_switch_time();
1983+
1984+ /* kindly ask the Penguin to let us know... */
1985+ hrtimer_set_expires(timer, ns_to_ktime(entry->should_expire));
1986+
1987+ /* set resched flag to reschedule local cpu */
1988+ set_need_resched();
1989+
1990+ raw_spin_unlock_irqrestore(&entry->cpu_res_lock, flags);
1991+#ifdef NPSF_VERBOSE
1992+ if (atomic_read(&all_servers_added))
1993+ TRACE("(npsf_id: %d) tick ends at %Ld, should_expire: %llu\n",
1994+ entry->cpu_reserve->npsf_id, ktime_to_ns(ktime_get()),
1995+ entry->should_expire);
1996+#endif
1997+
1998+ return HRTIMER_RESTART;
1999+}
2000+
2001+static void npsf_scheduler_tick(struct task_struct *t)
2002+{
2003+ if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
2004+ set_tsk_need_resched(t);
2005+ TRACE("npsf_tick: %d is preemptable "
2006+ " => FORCE_RESCHED\n", t->pid);
2007+ }
2008+}
2009+
2010+/* Assumption: caller holds srv lock and prev belongs to
2011+ * the currently-scheduled reservation.
2012+ */
2013+static void npsf_schedule_server(struct task_struct* prev,
2014+ cpu_entry_t *entry)
2015+{
2016+ npsf_server_t* srv = entry->cpu_reserve->server;
2017+
2018+ int out_of_time, sleep, exists, blocks;
2019+
2020+ exists = is_realtime(prev);
2021+ blocks = exists && !is_running(prev);
2022+ out_of_time = exists &&
2023+ budget_enforced(prev) &&
2024+ budget_exhausted(prev);
2025+ sleep = exists && get_rt_flags(prev) == RT_F_SLEEP;
2026+
2027+ if (exists)
2028+ TRACE_TASK(prev, "(npsf_id %d) blocks:%d "
2029+ "out_of_time:%d sleep:%d state:%d sig:%d\n",
2030+ task_npsfid(prev),
2031+ blocks, out_of_time, sleep,
2032+ prev->state,
2033+ signal_pending(prev));
2034+
2035+ /* Any task that is preemptable and either exhausts its
2036+ * execution budget or wants to sleep completes. We may have
2037+ * to reschedule after this.
2038+ */
2039+ if ((out_of_time || sleep) && !blocks) {
2040+ job_completion(prev, !sleep);
2041+
2042+ if (srv->highest_prio != prev) {
2043+ BUG_ON(!is_queued(prev));
2044+ remove(&srv->dom, prev);
2045+ }
2046+
2047+ requeue(prev, &srv->dom);
2048+
2049+ if (srv->highest_prio == prev)
2050+ srv->highest_prio = __take_ready(&srv->dom);
2051+ }
2052+
2053+ BUG_ON(blocks && prev == srv->highest_prio);
2054+// BUG_ON(!srv->highest_prio && jobs_pending(&srv->dom));
2055+}
2056+
2057+static void npsf_notify_next_cpu(npsf_reserve_t *npsf_prev)
2058+{
2059+ npsf_server_t *srv;
2060+
2061+ if (unlikely(npsf_prev->next_npsf != npsf_prev)) {
2062+ /* This reserve is actually shared. Let's update its 'owner'
2063+ * and notify the next CPU. */
2064+ srv = npsf_prev->server;
2065+ raw_spin_lock(&srv->lock);
2066+ srv->curr_reserve = npsf_prev->next_npsf;
2067+ if (srv->first_reserve != srv->curr_reserve ||
2068+ srv->first_cpu_wants_ipi) {
2069+ /* send an IPI to notify next CPU in chain */
2070+ srv->first_cpu_wants_ipi = 0;
2071+ TRACE("sending IPI\n");
2072+ preempt(srv->curr_reserve);
2073+ }
2074+ raw_spin_unlock(&srv->lock);
2075+ }
2076+}
2077+
2078+static struct task_struct* npsf_schedule(struct task_struct * prev)
2079+{
2080+ npsf_reserve_t *npsf_prev, *npsf_next;
2081+ npsf_server_t *srv_prev, *srv_next;
2082+ cpu_entry_t *entry = local_entry;
2083+ struct task_struct *next;
2084+
2085+ int reserve_switch;
2086+
2087+ /* servers not ready yet, yield to linux */
2088+ if (!atomic_read(&all_servers_added))
2089+ return NULL;
2090+
2091+#ifdef NPSF_VERBOSE
2092+ TRACE_TASK(prev, "schedule\n");
2093+#endif
2094+ raw_spin_lock(&entry->cpu_res_lock);
2095+
2096+ BUG_ON(no_reserves(entry));
2097+
2098+ /* step 1: what are we currently serving? */
2099+ npsf_prev = entry->cpu_reserve;
2100+ srv_prev = npsf_prev->server;
2101+
2102+ /* step 2: what SHOULD we be currently serving? */
2103+ npsf_next = get_current_reserve(entry);
2104+ srv_next = npsf_next->server;
2105+
2106+ /* TODO second measuring point for IPI receiving
2107+ * if (!srv_next->measure_wait_IPI) --- the remote reset
2108+ * trace_time_end.
2109+ */
2110+ raw_spin_lock(&srv_prev->lock);
2111+
2112+
2113+ /* step 3: update prev server */
2114+ if (is_realtime(prev) && task_npsfid(prev) == entry->cpu_reserve->npsf_id)
2115+ npsf_schedule_server(prev, entry);
2116+ else if (is_realtime(prev))
2117+ TRACE_TASK(prev, "npsf_id %d != cpu_reserve npsf_id %d\n",
2118+ task_npsfid(prev), entry->cpu_reserve->npsf_id);
2119+
2120+ /* step 4: determine if we need to switch to another reserve */
2121+ reserve_switch = npsf_prev != npsf_next;
2122+
2123+ if (!reserve_switch) {
2124+ /* easy case: just enact what the server scheduler decided */
2125+ next = srv_prev->highest_prio;
2126+
2127+ /* Unlock AFTER observing highest_prio to avoid races with
2128+ * remote rescheduling activity. */
2129+ raw_spin_unlock(&srv_prev->lock);
2130+ } else {
2131+ /* In this case we have a reserve switch. We are done with the
2132+ * previous server, so release its lock. */
2133+ TRACE("switch reserve npsf_id %d -> npsf_id %d\n",
2134+ npsf_prev->npsf_id, npsf_next->npsf_id);
2135+ npsf_prev->is_currently_scheduled = 0;
2136+ raw_spin_unlock(&srv_prev->lock);
2137+
2138+ /* Move on to the next server. */
2139+
2140+ raw_spin_lock(&srv_next->lock);
2141+ npsf_next->is_currently_scheduled = 1;
2142+
2143+ /* make sure we are owner of a server (if it is shared) */
2144+ if (unlikely(srv_next->curr_reserve != npsf_next)) {
2145+ /* We raced with the previous owner. Let's schedule
2146+ * the previous reserve for now. The previous owner
2147+ * will send us an IPI when the server has been pushed
2148+ * to us.
2149+ */
2150+ TRACE("(npsf_id %d) raced with previous server owner\n",
2151+ npsf_next->npsf_id);
2152+
2153+ /* check if we are the first CPU, in which case we need
2154+ * to request a notification explicitly */
2155+ if (srv_next->first_reserve == npsf_next)
2156+ srv_next->first_cpu_wants_ipi = 1;
2157+
2158+ npsf_next->is_currently_scheduled = 0;
2159+ raw_spin_unlock(&srv_next->lock);
2160+
2161+ /* just keep the previous reserve one more time */
2162+ raw_spin_lock(&srv_prev->lock);
2163+
2164+ npsf_prev->is_currently_scheduled = 1;
2165+ /* Note that there is not a race condition here.
2166+ * Since curr_reserve didn't point yet to this reserve,
2167+ * so no processor would have observed the one in npsf_next.
2168+ * A processor might have observed the flag being zero
2169+ * in npsf_prev and decided not to send an IPI, which
2170+ * doesn't matter since we are going to reschedule
2171+ * below anyay. */
2172+
2173+ next = srv_prev->highest_prio;
2174+
2175+ raw_spin_unlock(&srv_prev->lock);
2176+
2177+ /* TODO first measuring point for '0'-switching time
2178+ * remote is not ready yet and will send us an IPI
2179+ * when it's done.
2180+ * local:
2181+ * srv_next->measure_wait_IPI = 1;
2182+ * remote before sending IPI:
2183+ * if (srv_next->measure_wait_IPI) reset;
2184+ */
2185+ } else {
2186+ /* invariant: srv->highest_prio is always the
2187+ * highest-priority job in the server, and it is always
2188+ * runnable. Any update to the server must maintain
2189+ * this invariant. */
2190+ next = srv_next->highest_prio;
2191+
2192+ entry->cpu_reserve = npsf_next;
2193+ raw_spin_unlock(&srv_next->lock);
2194+
2195+ /* send an IPI (if necessary) */
2196+ npsf_notify_next_cpu(npsf_prev);
2197+ }
2198+
2199+ }
2200+
2201+ if (next) {
2202+ TRACE_TASK(next, "(npsf_id %d) scheduled at %llu\n",
2203+ task_npsfid(next), litmus_clock());
2204+ set_rt_flags(next, RT_F_RUNNING);
2205+ /* The TASK_RUNNING flag is set by the Penguin _way_ after
2206+ * activating a task. This dosn't matter much to Linux as
2207+ * the rq lock will prevent any changes, but it matters to
2208+ * us. It is possible for a remote cpu waking up this task
2209+ * to requeue the task before it's runnable, send an IPI here,
2210+ * we schedule that task (still "not-runnable"), and only
2211+ * before the real execution of next, the running flag is set.
2212+ */
2213+ if (!is_running(next))
2214+ TRACE_TASK(next, "BAD: !TASK_RUNNING\n");
2215+ } else {
2216+ /* FIXME npsf_id is wrong if reserve switch but "switching back"
2217+ * if we race */
2218+ TRACE("(npsf_id %d) becoming idle at %llu\n",
2219+ reserve_switch ? npsf_next->npsf_id : npsf_prev->npsf_id,
2220+ litmus_clock());
2221+ }
2222+
2223+ raw_spin_unlock(&entry->cpu_res_lock);
2224+
2225+ return next;
2226+}
2227+
2228+/* Prepare a task for running in RT mode
2229+ *
2230+ * We can only be sure that the cpu is a right one (admit checks
2231+ * against tasks released on a cpu that doesn't host the right npsf_id)
2232+ * but we _cannot_ be sure that:
2233+ * 1) the found npsf is the reserve currently running on this cpu.
2234+ * 2) the current reserve (the one in charge of scheduling) is not
2235+ * running on a different cpu.
2236+ */
2237+static void npsf_task_new(struct task_struct * t, int on_rq, int running)
2238+{
2239+ npsf_reserve_t *npsf;
2240+ npsf_server_t *srv;
2241+ cpu_entry_t *entry = task_entry(t);
2242+ rt_domain_t *edf;
2243+ unsigned long flags;
2244+
2245+ BUG_ON(no_reserves(entry));
2246+
2247+ /* search the proper npsf_server where to add the new task */
2248+ list_for_each_entry(npsf, &entry->npsf_reserves, node) {
2249+ if (npsf->npsf_id == task_npsfid(t))
2250+ break;
2251+ }
2252+
2253+
2254+ srv = npsf->server;
2255+
2256+ /* The task should be running in the queue, otherwise signal
2257+ * code will try to wake it up with fatal consequences.
2258+ */
2259+ raw_spin_lock_irqsave(&entry->cpu_res_lock, flags);
2260+ raw_spin_lock(&srv->lock);
2261+
2262+ edf = domain_edf(npsf);
2263+ tsk_rt(t)->domain = edf;
2264+
2265+ TRACE_TASK(t, "task_new: P%d, task_npsfid %d, "
2266+ "npsf->npsf_id %d, entry->cpu %d\n",
2267+ t->rt_param.task_params.cpu, task_npsfid(t),
2268+ npsf->npsf_id, entry->cpu);
2269+
2270+ /* setup job parameters */
2271+ release_at(t, litmus_clock());
2272+
2273+ /* There are four basic scenarios that could happen:
2274+ * 1) the server is on another cpu and scheduled;
2275+ * 2) the server is on another cpu and not scheduled;
2276+ * 3) the server is on this cpu and scheduled; and
2277+ * 4) the server is on this cpu and not scheduled.
2278+ *
2279+ * Whatever scenario we're in, it cannot change while we are
2280+ * holding the server lock.
2281+ *
2282+ * If the new task does not have a high priority, then
2283+ * we can just queue it and be done.
2284+ *
2285+ * In theory, the requeue() and reschedule_server() code
2286+ * take care of all that.
2287+ */
2288+
2289+ requeue(t, edf);
2290+ /* reschedule will cause a remote preemption, if required */
2291+ npsf_reschedule_server(srv);
2292+ /* always preempt to make sure we don't
2293+ * use the stack if it needs to migrate */
2294+ set_tsk_need_resched(t);
2295+
2296+ raw_spin_unlock(&srv->lock);
2297+ raw_spin_unlock_irqrestore(&entry->cpu_res_lock, flags);
2298+}
2299+
2300+static void npsf_task_wake_up(struct task_struct *t)
2301+{
2302+ rt_domain_t *edf;
2303+ npsf_server_t* srv;
2304+ unsigned long flags;
2305+ lt_t now;
2306+
2307+ BUG_ON(!is_realtime(t));
2308+
2309+ edf = tsk_rt(t)->domain;
2310+ srv = server_from_dom(edf);
2311+
2312+ raw_spin_lock_irqsave(&srv->lock, flags);
2313+
2314+ BUG_ON(is_queued(t));
2315+
2316+ now = litmus_clock();
2317+ /* FIXME: this should be a configurable policy... */
2318+ if (is_tardy(t, now)) {
2319+ /* new sporadic release */
2320+ release_at(t, now);
2321+ sched_trace_task_release(t);
2322+ }
2323+
2324+ /* Only add to ready queue if it is not the
2325+ * currently-scheduled task.
2326+ */
2327+ if (srv->highest_prio != t) {
2328+ requeue(t, edf);
2329+ npsf_reschedule_server(srv);
2330+ }
2331+#ifdef NPSF_VERBOSE
2332+ else
2333+ TRACE_TASK(t, "wake_up, is curr_sched, not requeued\n");
2334+#endif
2335+
2336+ raw_spin_unlock_irqrestore(&srv->lock, flags);
2337+
2338+ TRACE_TASK(t, "wake up done\n");
2339+}
2340+
2341+static void remove_from_server(struct task_struct *t, npsf_server_t* srv)
2342+{
2343+ if (srv->highest_prio == t) {
2344+ TRACE_TASK(t, "remove from server: is highest-prio task\n");
2345+ srv->highest_prio = NULL;
2346+ npsf_reschedule_server(srv);
2347+ } else if (is_queued(t)) {
2348+ TRACE_TASK(t, "remove from server: removed from queue\n");
2349+ remove(&srv->dom, t);
2350+ }
2351+#ifdef NPSF_VERBOSE
2352+ else
2353+ TRACE_TASK(t, "WARN: where is this task?\n");
2354+#endif
2355+}
2356+
2357+static void npsf_task_block(struct task_struct *t)
2358+{
2359+ rt_domain_t *edf;
2360+ npsf_server_t* srv;
2361+ unsigned long flags;
2362+
2363+ TRACE_TASK(t, "(npsf_id %d) block at %llu, state=%d\n",
2364+ task_npsfid(t), litmus_clock(), t->state);
2365+
2366+ BUG_ON(!is_realtime(t));
2367+
2368+ edf = tsk_rt(t)->domain;
2369+ srv = server_from_dom(edf);
2370+
2371+ raw_spin_lock_irqsave(&srv->lock, flags);
2372+
2373+ remove_from_server(t, srv);
2374+
2375+ raw_spin_unlock_irqrestore(&srv->lock, flags);
2376+}
2377+
2378+static void npsf_task_exit(struct task_struct * t)
2379+{
2380+ rt_domain_t *edf;
2381+ npsf_server_t* srv;
2382+ unsigned long flags;
2383+
2384+ BUG_ON(!is_realtime(t));
2385+
2386+ edf = tsk_rt(t)->domain;
2387+ srv = server_from_dom(edf);
2388+
2389+ raw_spin_lock_irqsave(&srv->lock, flags);
2390+
2391+ remove_from_server(t, srv);
2392+
2393+ raw_spin_unlock_irqrestore(&srv->lock, flags);
2394+
2395+ TRACE_TASK(t, "RIP, now reschedule\n");
2396+}
2397+
2398+static long npsf_admit_task(struct task_struct* tsk)
2399+{
2400+ npsf_reserve_t *npsf;
2401+ cpu_entry_t *entry = task_entry(tsk);
2402+ int id_ok = 0;
2403+
2404+ if (!atomic_read(&all_servers_added)) {
2405+ printk(KERN_DEBUG "not all servers added\n");
2406+ return -ENODEV;
2407+ }
2408+
2409+ /* check to be on the right cpu and on the right server */
2410+ if (task_cpu(tsk) != tsk->rt_param.task_params.cpu) {
2411+ printk(KERN_DEBUG "wrong CPU(%d, %d, %d) for npsf_id %d\n",
2412+ task_cpu(tsk), tsk->rt_param.task_params.cpu,
2413+ entry->cpu, task_npsfid(tsk));
2414+ return -EINVAL;
2415+ }
2416+
2417+ /* 1) this cpu should have the proper npsf_id in the list
2418+ * 2) the rt_domain for the proper npsf_id is not null
2419+ */
2420+ list_for_each_entry(npsf, &entry->npsf_reserves, node) {
2421+ if (npsf->npsf_id == task_npsfid(tsk)) {
2422+ id_ok = 1;
2423+ break;
2424+ }
2425+ }
2426+ if (!id_ok)
2427+ printk(KERN_DEBUG "wrong npsf_id (%d) for entry %d\n",
2428+ task_npsfid(tsk), entry->cpu);
2429+
2430+ return id_ok ? 0 : -EINVAL;
2431+}
2432+
2433+/* in litmus.c */
2434+extern atomic_t rt_task_count;
2435+
2436+/* initialization status control */
2437+static int reserves_allocated = 0;
2438+
2439+#ifdef NPSF_VERBOSE
2440+static void print_reserve(cpu_entry_t *cpu)
2441+{
2442+ npsf_reserve_t *tmp;
2443+
2444+ printk(KERN_INFO "NPS-F: reserves on CPU %d:\n", cpu->cpu);
2445+ list_for_each_entry(tmp, &cpu->npsf_reserves, node) {
2446+ BUG_ON(!tmp->server);
2447+ BUG_ON(!&(tmp->server->dom));
2448+ BUG_ON(tmp->server->highest_prio);
2449+ printk(KERN_INFO "%d: %d us\n", tmp->npsf_id,
2450+ (int)(tmp->budget / 1000));
2451+ }
2452+}
2453+#endif
2454+/*
2455+ * do_add_reserve: add a reserve(cpu, id, budget)
2456+ *
2457+ * Callback for syscall add_server(); it allows to add the reserve "id"
2458+ * to the CPU "cpu". "budget" is the length of the reserve for the
2459+ * notional processor (server) id on the cpu cpu.
2460+ */
2461+static long do_add_reserve(npsf_reserve_t **new, cpu_entry_t *cpu,
2462+ npsf_server_t *the_dom, int npsf_id, lt_t budget)
2463+{
2464+ unsigned long flags;
2465+
2466+ /* npsf_id for each cpu should be given in increasing order,
2467+ * it doesn't make sense the same np on the same cpu.
2468+ * The last_seen_npsf_id is reset upon plugin insertion.
2469+ */
2470+ if (cpu->last_seen_npsf_id >= npsf_id)
2471+ return -EINVAL;
2472+
2473+ /* don't allow server changes if there are tasks in the system */
2474+ if (atomic_read(&rt_task_count))
2475+ return -EACCES;
2476+
2477+ if ((*new = kmalloc(sizeof(npsf_reserve_t), GFP_ATOMIC)) == NULL)
2478+ return -ENOMEM;
2479+
2480+ (*new)->server = the_dom;
2481+ (*new)->npsf_id = npsf_id;
2482+ (*new)->budget = budget;
2483+ (*new)->cpu = cpu;
2484+
2485+ npsf_printk("Add npsf_id %d on P%d with budget %llu\n", (*new)->npsf_id,
2486+ (*new)->cpu->cpu, (*new)->budget);
2487+
2488+ raw_spin_lock_irqsave(&cpu->cpu_res_lock, flags);
2489+
2490+ list_add_tail(&(*new)->node, &cpu->npsf_reserves);
2491+ cpu->last_seen_npsf_id = npsf_id;
2492+ cpu->cpu_reserve = list_first_entry(&cpu->npsf_reserves, npsf_reserve_t, node);
2493+
2494+ raw_spin_unlock_irqrestore(&cpu->cpu_res_lock, flags);
2495+
2496+ return 0;
2497+}
2498+
2499+static void kickoff_timers(void)
2500+{
2501+ int cpu;
2502+ cpu_entry_t *entry;
2503+ lt_t kickoff;
2504+
2505+ kickoff = slot_begin(litmus_clock() + npsf_slot_length * 2);
2506+
2507+ for_each_online_cpu(cpu) {
2508+ entry = &per_cpu(npsf_cpu_entries, cpu);
2509+ hrtimer_start_on(cpu, &entry->info, &entry->timer,
2510+ ns_to_ktime(kickoff),
2511+ HRTIMER_MODE_ABS_PINNED);
2512+ entry->should_expire = kickoff;
2513+ }
2514+ atomic_set(&timers_activated, 1);
2515+}
2516+
2517+/* We offer to library a budgets array interface (so we go through the
2518+ * syscall path only once) and we internally cycle on do_add_reserve.
2519+ *
2520+ * last == 1 means that the user is adding the last server and after
2521+ * the insertion the plugin is properly set up. (FIXME it should be
2522+ * done in a better way, but I doubt this plugin will ever go
2523+ * to the master branch).
2524+ */
2525+asmlinkage long sys_add_server(int __user *__id,
2526+ struct npsf_budgets __user *__budgets, int last)
2527+{
2528+ int id, i;
2529+ int ret = -EFAULT;
2530+ struct npsf_budgets *budgets;
2531+ cpu_entry_t *entry;
2532+ npsf_server_t *npsfserver;
2533+ npsf_reserve_t *npsf_reserve_array[NR_CPUS];
2534+ npsf_reserve_t *first_reserve;
2535+
2536+ if (_online_cpus != num_online_cpus())
2537+ return ret;
2538+
2539+ if (copy_from_user(&id, __id, sizeof(id)))
2540+ return ret;
2541+
2542+ budgets = kmalloc(_online_cpus * sizeof(struct npsf_budgets),
2543+ GFP_ATOMIC);
2544+
2545+ for (i = 0; i < _online_cpus; i++) {
2546+ budgets[i].cpu = NO_CPU;
2547+ budgets[i].budget = 0;
2548+ }
2549+
2550+ if (copy_from_user(budgets, __budgets,
2551+ sizeof(budgets) * _online_cpus))
2552+ goto err;
2553+
2554+ /* initialize the npsf_server_t for this npsf_server series */
2555+ npsfserver = kmalloc(sizeof(npsf_server_t), GFP_ATOMIC);
2556+ if (!npsfserver) {
2557+ ret = -ENOMEM;
2558+ goto err;
2559+ }
2560+ edf_domain_init(&npsfserver->dom, NULL, npsf_release_jobs);
2561+ npsfserver->highest_prio = NULL;
2562+
2563+ /* initialize all npsf_reserve_t for this server */
2564+ for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) {
2565+ entry = &per_cpu(npsf_cpu_entries, budgets[i].cpu);
2566+ if ((ret = do_add_reserve(&npsf_reserve_array[i], entry,
2567+ npsfserver,
2568+ id, budgets[i].budget)) < 0)
2569+ goto err;
2570+ }
2571+ /* set the current reserve to the first (and possibly unique)
2572+ * slice for this npsf_id */
2573+ npsfserver->curr_reserve = npsf_reserve_array[0];
2574+ npsfserver->first_reserve = npsf_reserve_array[0];
2575+ npsfserver->first_cpu_wants_ipi = 0;
2576+ for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) {
2577+
2578+ if (i == 0 && budgets[i+1].cpu == NO_CPU) {
2579+ /* Fixed reserve always has itself as next */
2580+ npsf_reserve_array[i]->next_npsf = npsf_reserve_array[i];
2581+ } else if (((i+1) < _online_cpus) &&
2582+ (i > 0 && budgets[i+1].cpu == NO_CPU)) {
2583+ /* Last reserve in the chain has the first reserve as next */
2584+ npsf_reserve_array[i]->next_npsf = npsf_reserve_array[0];
2585+ } else {
2586+ /* Normal continuing reserve */
2587+ npsf_reserve_array[i]->next_npsf = npsf_reserve_array[i+1];
2588+ }
2589+ }
2590+#ifdef NPSF_VERBOSE
2591+ for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) {
2592+ entry = &per_cpu(npsf_cpu_entries, budgets[i].cpu);
2593+ print_reserve(entry);
2594+ }
2595+#endif
2596+
2597+ if (last) {
2598+ /* force the first slot switching by setting the
2599+ * current_reserve to the last server for each cpu.
2600+ *
2601+ * FIXME:don't assume there exists at least one reserve per CPU
2602+ */
2603+ for_each_online_cpu(i) {
2604+ entry = &per_cpu(npsf_cpu_entries, i);
2605+ first_reserve = list_entry(entry->npsf_reserves.next,
2606+ npsf_reserve_t, node);
2607+
2608+ first_reserve->server->curr_reserve = first_reserve;
2609+ entry->cpu_reserve = first_reserve;
2610+ npsf_printk("npsf_id %d is the current reserve "
2611+ "and server on CPU %d\n",
2612+ first_reserve->npsf_id, entry->cpu);
2613+
2614+ }
2615+
2616+ kickoff_timers();
2617+
2618+ /* real plugin enable */
2619+ atomic_set(&all_servers_added, 1);
2620+ mb();
2621+ }
2622+
2623+ /* at least one server was initialized and may need deletion */
2624+ reserves_allocated = 1;
2625+err:
2626+ kfree(budgets);
2627+ return ret;
2628+}
2629+
2630+
2631+/* Cancel server_reschedule_tick() hrtimers. Wait for all callbacks
2632+ * to complete. The function is triggered writing 0 as npsf_slot_length.
2633+ */
2634+void npsf_hrtimers_cleanup(void)
2635+{
2636+ int cpu;
2637+ cpu_entry_t *entry;
2638+ int redo;
2639+
2640+ if (!atomic_read(&timers_activated))
2641+ return;
2642+
2643+ atomic_set(&timers_activated, 0);
2644+
2645+ /* prevent the firing of the timer on this cpu */
2646+ do {
2647+ redo = 0;
2648+ for_each_online_cpu(cpu) {
2649+ entry = &per_cpu(npsf_cpu_entries, cpu);
2650+
2651+ /* if callback active, skip it for now and redo later */
2652+ if (hrtimer_try_to_cancel(&entry->timer) == -1) {
2653+ redo = 1;
2654+#ifdef NPSF_VERBOSE
2655+ printk(KERN_INFO "(P%d) hrtimer on P%d was "
2656+ "active, try to delete again\n",
2657+ get_cpu(), cpu);
2658+ put_cpu();
2659+#endif
2660+ }
2661+ }
2662+ } while (redo);
2663+
2664+ printk(KERN_INFO "npsf hrtimers deleted\n");
2665+}
2666+
2667+static void cleanup_npsf(void)
2668+{
2669+ int cpu;
2670+ cpu_entry_t *entry;
2671+ struct list_head *nd, *next;
2672+ npsf_reserve_t *tmp, *tmp_save;
2673+
2674+ for_each_online_cpu(cpu) {
2675+ entry = &per_cpu(npsf_cpu_entries, cpu);
2676+
2677+ /* FIXME probably not needed as we should be the only cpu
2678+ * doing the removal */
2679+ raw_spin_lock(&entry->cpu_res_lock);
2680+
2681+ list_for_each_safe(nd, next, &entry->npsf_reserves) {
2682+ tmp = list_entry(nd, npsf_reserve_t, node);
2683+ npsf_printk("Del. (id, cpu):(%d, %d)\n",
2684+ tmp->npsf_id,
2685+ tmp->cpu->cpu);
2686+ if (tmp->server) {
2687+ npsf_printk("Del. reserves for npsf_id %d\n",
2688+ tmp->npsf_id);
2689+ tmp_save = tmp;
2690+ while (tmp_save->next_npsf &&
2691+ tmp_save->next_npsf != tmp) {
2692+ tmp_save = tmp_save->next_npsf;
2693+ tmp_save->server = NULL;
2694+ }
2695+ npsf_printk("Freeing server 0x%p\n", tmp->server);
2696+ kfree(tmp->server);
2697+ }
2698+ npsf_printk("Freeing npsf_reserve_t 0x%p\n", tmp);
2699+ kfree(tmp);
2700+ }
2701+ list_del(&entry->npsf_reserves);
2702+ raw_spin_unlock(&entry->cpu_res_lock);
2703+ }
2704+}
2705+
2706+/* prevent plugin deactivation if timers are still active */
2707+static long npsf_deactivate_plugin(void)
2708+{
2709+ return (atomic_read(&timers_activated)) ? -1 : 0;
2710+}
2711+
2712+static long npsf_activate_plugin(void)
2713+{
2714+ int cpu;
2715+ cpu_entry_t *entry;
2716+ ktime_t now = ktime_get();
2717+
2718+ /* prevent plugin switching if timers are active */
2719+ if (atomic_read(&timers_activated))
2720+ return -1;
2721+
2722+ atomic_set(&all_servers_added, 0);
2723+
2724+ /* de-allocate old servers (if any) */
2725+ if (reserves_allocated)
2726+ cleanup_npsf();
2727+
2728+ _online_cpus = num_online_cpus();
2729+
2730+ for_each_online_cpu(cpu) {
2731+ entry = &per_cpu(npsf_cpu_entries, cpu);
2732+
2733+ raw_spin_lock_init(&entry->cpu_res_lock);
2734+
2735+ entry->cpu_reserve = NULL;
2736+ INIT_LIST_HEAD(&entry->npsf_reserves);
2737+
2738+ entry->cpu = cpu;
2739+ hrtimer_init(&entry->timer, CLOCK_MONOTONIC,
2740+ HRTIMER_MODE_ABS_PINNED);
2741+
2742+ /* initialize (reinitialize) pull timers */
2743+ hrtimer_start_on_info_init(&entry->info);
2744+
2745+ entry->timer.function = reserve_switch_tick;
2746+ entry->last_seen_npsf_id = -1;
2747+ }
2748+
2749+ printk(KERN_INFO "NPS-F activated: slot length = %lld ns\n",
2750+ npsf_slot_length);
2751+
2752+ /* time starts now! */
2753+ time_origin = (lt_t) ktime_to_ns(now);
2754+ TRACE("Time_origin = %llu\n", time_origin);
2755+ return 0;
2756+}
2757+
2758+/* Plugin object */
2759+static struct sched_plugin npsf_plugin __cacheline_aligned_in_smp = {
2760+ .plugin_name = "NPS-F",
2761+
2762+ .tick = npsf_scheduler_tick,
2763+ .task_new = npsf_task_new,
2764+ .complete_job = complete_job,
2765+ .task_exit = npsf_task_exit,
2766+ .schedule = npsf_schedule,
2767+ .task_wake_up = npsf_task_wake_up,
2768+ .task_block = npsf_task_block,
2769+ .admit_task = npsf_admit_task,
2770+ .activate_plugin = npsf_activate_plugin,
2771+ .deactivate_plugin = npsf_deactivate_plugin,
2772+};
2773+
2774+static int __init init_npsf(void)
2775+{
2776+ return register_sched_plugin(&npsf_plugin);
2777+}
2778+
2779+static void __exit exit_npsf(void)
2780+{
2781+ if (atomic_read(&timers_activated)) {
2782+ atomic_set(&timers_activated, 0);
2783+ return;
2784+ }
2785+
2786+ if (reserves_allocated)
2787+ cleanup_npsf();
2788+}
2789+
2790+module_init(init_npsf);
2791+module_exit(exit_npsf);
2792+
2793diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
2794index 3543b7b..3036df9 100644
2795--- a/litmus/sched_plugin.c
2796+++ b/litmus/sched_plugin.c
2797@@ -179,6 +179,12 @@ struct sched_plugin linux_sched_plugin = {
2798 int cluster_cache_index = 2;
2799
2800 /*
2801+ * Slot length (in ns) for NPS-F semi-partitioned plugin.
2802+ * This value can be changed at "runtime" through proc file.
2803+ */
2804+lt_t npsf_slot_length = 5 * NSEC_PER_MSEC;
2805+
2806+/*
2807 * The reference to current plugin that is used to schedule tasks within
2808 * the system. It stores references to actual function implementations
2809 * Should be initialized by calling "init_***_plugin()"
diff --git a/index.html b/index.html
index 7d3d555..653e8f3 100644
--- a/index.html
+++ b/index.html
@@ -135,9 +135,30 @@ Have a look at our group's <a href="http://www.cs.unc.edu/%7Eanderson/papers.htm
135 <div class="box"> 135 <div class="box">
136 136
137 <ol class="nomargin"> 137 <ol class="nomargin">
138
139 <li><p>
140 A. Bastoni, B. Brandenburg and J. Anderson,
141 &ldquo;Is Semi-Partitioned Scheduling Practical?&rdquo;,
142 <cite>in submission</cite>, January 2011.
143 <a href="http://www.cs.unc.edu/~bbb/papers/ecrts11a.pdf">PDF</a>.
144 Longer version with all graphs: <a href="http://www.cs.unc.edu/~bbb/papers/ecrts11a-long.pdf">PDF</a> </p>
145 <p> For reference, all evaluated plugins as well as the required userspace tools are provided as part of the following patches (against version 2010.2).
146</p>
147 <ul>
148 <li>
149 <a href="download/ECRTS11/litmus-rt-semi-part.patch">litmus-rt-semi-part.patch</a>
150 </li>
151 <li>
152 <a href="download/ECRTS11/liblitmus-semi-part.patch">liblitmus-semi-part.patch</a>
153 </li>
154
155 </ul>
156 </li>
157
158
138 159
139 <li><p> 160 <li><p>
140 A.Bastoni, B. Brandenburg and J. Anderson, 161 A. Bastoni, B. Brandenburg and J. Anderson,
141 &ldquo;An Empirical Comparison of Global, Partitioned, and Clustered Multiprocessor Real-Time Schedulers&rdquo;, 162 &ldquo;An Empirical Comparison of Global, Partitioned, and Clustered Multiprocessor Real-Time Schedulers&rdquo;,
142 <cite>Proceedings of the 31th IEEE Real-Time Systems Symposium</cite>, pp.&nbsp;14-24, December 2010. 163 <cite>Proceedings of the 31th IEEE Real-Time Systems Symposium</cite>, pp.&nbsp;14-24, December 2010.
143 <a href="http://www.cs.unc.edu/~anderson/papers/rtss10c.pdf">PDF</a>. 164 <a href="http://www.cs.unc.edu/~anderson/papers/rtss10c.pdf">PDF</a>.