From e0fc039330051c525245cea8c120c45d59836140 Mon Sep 17 00:00:00 2001 From: "Bjoern B. Brandenburg" Date: Wed, 26 Jan 2011 18:39:46 -0500 Subject: Provide semi-part plugins as patches. --- download/ECRTS11/liblitmus-semi-part.patch | 1657 ++++++++++++++++ download/ECRTS11/litmus-rt-semi-part.patch | 2809 ++++++++++++++++++++++++++++ index.html | 23 +- 3 files changed, 4488 insertions(+), 1 deletion(-) create mode 100644 download/ECRTS11/liblitmus-semi-part.patch create mode 100644 download/ECRTS11/litmus-rt-semi-part.patch diff --git a/download/ECRTS11/liblitmus-semi-part.patch b/download/ECRTS11/liblitmus-semi-part.patch new file mode 100644 index 0000000..fd99526 --- /dev/null +++ b/download/ECRTS11/liblitmus-semi-part.patch @@ -0,0 +1,1657 @@ +diff --git a/SConstruct b/SConstruct +index c41e41e..9935396 100644 +--- a/SConstruct ++++ b/SConstruct +@@ -208,6 +208,13 @@ rt.Program('base_task', 'bin/base_task.c') + mtrt.Program('base_mt_task', 'bin/base_mt_task.c') + rt.Program('rt_launch', ['bin/rt_launch.c', 'bin/common.c']) + rt.Program('rtspin', ['bin/rtspin.c', 'bin/common.c']) ++rt.Program('rtspin_edffm', ['bin/rtspin_edffm.c', 'bin/common.c']) ++rt.Program('rt_launch_edffm', ['bin/rt_launch_edffm.c', 'bin/common.c']) ++rt.Program('rtspin_npsf', ['bin/rtspin_npsf.c', 'bin/common.c']) ++rt.Program('npsf_add_server', ['bin/npsf_add_server.c', 'bin/common.c']) ++rt.Program('rt_launch_npsf', ['bin/rt_launch_npsf.c', 'bin/common.c']) ++rt.Program('rtspin_edfwm', ['bin/rtspin_edfwm.c', 'bin/common.c']) ++rt.Program('rt_launch_edfwm', ['bin/rt_launch_edfwm.c', 'bin/common.c']) + rt.Program('release_ts', 'bin/release_ts.c') + rtm.Program('measure_syscall', 'bin/null_call.c') + +diff --git a/bin/common.c b/bin/common.c +index 452b882..167344a 100644 +--- a/bin/common.c ++++ b/bin/common.c +@@ -1,6 +1,7 @@ + #include + #include + #include ++#include + + #include "common.h" + +@@ -9,3 +10,120 @@ void bail_out(const char* msg) + perror(msg); + exit(-1 * errno); + } ++ ++/* EDF-WM helper functions to parse a custom text file format to "easily" ++ * launch tests with rtspin and rt_launch: ++ * ++ * Format for task: ++ * ++ * . ++ * ++ * If the task is split on multiple slices, slices_number is non 0 ++ * and we scan a list of slice parameters up to slices_number: ++ * ++ * Format for slices: ++ * ++ * . ++ * ++ * The offset is the start time for the slice relative to the job release. ++ * ++ * Example: ++ * 14 2.26245771754 10 0 5 2 ++ * 14 5 5.000000 1.497306 0.000000 ++ * 14 7 10.000000 0.765152 5.000000 ++ */ ++ ++#define fms_to_ns(x) (lt_t)(((x) * __NS_PER_MS)) ++/* ++ * . ++ */ ++int parse_edfwm_slice(FILE *ts, int slices_no, int task_id, ++ struct rt_task *rt) ++{ ++ int i, tid; ++ unsigned int cpu; ++ double deadline, budget, offset; ++ ++ lt_t total_budget = 0; ++ ++ struct edf_wm_params* wm = (struct edf_wm_params*) &rt->semi_part; ++ ++ for (i = 0; i < slices_no; i++) { ++ ++ if (fscanf(ts, "%d %u %lf %lf %lf\n", &tid, &cpu, ++ &deadline, &budget, &offset) != EOF) { ++ ++ if (task_id != tid) { ++ fprintf(stderr, "task_id %d != tid %d\n", ++ task_id, tid); ++ return -1; ++ } ++ ++ wm->slices[i].deadline = fms_to_ns(deadline); ++ wm->slices[i].budget = fms_to_ns(budget); ++ wm->slices[i].offset = fms_to_ns(offset); ++ wm->slices[i].cpu = cpu; ++ ++ printf("slice(tid, cpu, d, e, ph) = (%d, %u, %llu, %llu, %llu)\n", ++ tid, cpu, wm->slices[i].deadline, ++ wm->slices[i].budget, wm->slices[i].offset); ++ ++ total_budget += wm->slices[i].budget; ++ if (wm->slices[i].budget < MIN_EDF_WM_SLICE_SIZE) { ++ ++ fprintf(stderr, "Slice %llu is too small\n", ++ wm->slices[i].budget); ++ return -1; ++ } ++ } ++ ++ if (ferror(ts)) { ++ fprintf(stderr, "Cannot read file\n"); ++ return -1; ++ } ++ } ++ wm->count = slices_no; ++ rt->exec_cost = total_budget; ++ printf("--- total %u slices ---\n", wm->count); ++ return 0; ++} ++ ++/* ++ * . ++ */ ++int parse_edfwm_ts_file(FILE *ts, struct rt_task *rt) ++{ ++ int task_id, ret = 1; ++ unsigned int cpu, sliceno; ++ double fwcet, fperiod, fphase; ++ ++ ret = fscanf(ts, "%d %lf %lf %lf %d %d\n", ++ &task_id, &fwcet, &fperiod, &fphase, &cpu, &sliceno); ++ ++ if (ferror(ts)) ++ goto err; ++ ++ rt->exec_cost = fms_to_ns(fwcet); ++ rt->period = fms_to_ns(fperiod); ++ rt->phase = fms_to_ns(fphase); ++ rt->cpu = cpu; ++ rt->cls = RT_CLASS_HARD; ++ rt->budget_policy = PRECISE_ENFORCEMENT; ++ ++ printf("(tid, wcet, period, ph, cpu, slices) = " ++ "(%d, %llu, %llu, %llu, %u, %u)\n", ++ task_id, rt->exec_cost, rt->period, rt->phase, cpu, sliceno); ++ if (sliceno > 0) { ++ memset(&rt->semi_part, 0, sizeof(struct edf_wm_params)); ++ ret = parse_edfwm_slice(ts, sliceno, task_id, rt); ++ if (ret < 0) ++ goto err; ++ } ++ ++ return 0; ++ ++err: ++ fprintf(stderr, "Error parsing file\n"); ++ return -1; ++} ++ +diff --git a/bin/npsf_add_server.c b/bin/npsf_add_server.c +new file mode 100644 +index 0000000..9f3f92c +--- /dev/null ++++ b/bin/npsf_add_server.c +@@ -0,0 +1,108 @@ ++/* wrapper for sys_add_server ++ * ++ * Input: a file with on each line: ++ * npsf_id cpu budget(us) ++ */ ++#include ++#include ++#include ++ ++#include "litmus.h" ++#include "common.h" ++ ++void usage(char *error) { ++ fprintf(stderr, "Error: %s\n", error); ++ fprintf(stderr, ++ "Usage: npsf_add_server SERVERS-FILE MAX-SPLITS-PER-NPSFID\n"); ++ exit(1); ++} ++ ++int main(int argc, char** argv) ++{ ++ int ret; ++ FILE *file; ++ int i,j; ++ int npsf_id, curr_id = -1; ++ int cpu, max_splits_server; ++ int budget_us; ++ struct npsf_budgets *budgets; ++ ++ if (argc < 3) ++ usage("Arguments missing."); ++ ++ max_splits_server = atoi(argv[2]); ++ ++ if ((file = fopen(argv[1], "r")) == NULL) { ++ fprintf(stderr, "Cannot open %s\n", argv[1]); ++ return -1; ++ } ++ ++ /* format: npsf_id cpu budget-us */ ++ i = 0; ++ while (fscanf(file, "%d %d %d\n", &npsf_id, &cpu, &budget_us) != EOF) { ++ ++ printf("Read: %d %d %d\n", npsf_id, cpu, budget_us); ++ ++ if (curr_id == -1) { ++ curr_id = npsf_id; ++ budgets = malloc(max_splits_server * ++ sizeof(struct npsf_budgets)); ++ for(j = 0; j < max_splits_server; j++) { ++ budgets[j].cpu = -1; ++ budgets[j].budget = 0; ++ } ++ } ++ ++ if (npsf_id == curr_id) { ++ /* same notional processor, different cpu and budget */ ++ budgets[i].cpu = cpu; ++ budgets[i].budget = (lt_t) (budget_us * 1000); ++ i++; ++ } else { ++ /* different notional processor */ ++ /* add server */ ++ printf("Adding npsf_id = %d\n", curr_id); ++ ret = add_server(&curr_id, budgets, 0); ++ ++ if (ret < 0) { ++ fclose(file); ++ free(budgets); ++ printf("Cannot add Notional Processor %d\n", ++ curr_id); ++ return ret; ++ } ++ ++ /* reinit new */ ++ i = 0; ++ budgets = malloc(max_splits_server * ++ sizeof(struct npsf_budgets)); ++ for(j = 0; j < max_splits_server; j++) { ++ budgets[j].cpu = -1; ++ budgets[j].budget = 0; ++ } ++ curr_id = npsf_id; ++ budgets[i].cpu = cpu; ++ budgets[i].budget = (lt_t) (budget_us * 1000); ++ i++; ++ } ++ } ++ ++ if (ferror(file)) { ++ fprintf(stderr, "Error while reading\n"); ++ fclose(file); ++ return -1; ++ } ++ ++ /* save the last entry */ ++ ret = add_server(&curr_id, budgets, 1); ++ printf("Adding npsf_id = %d\n", curr_id); ++ if (ret < 0) { ++ fclose(file); ++ free(budgets); ++ bail_out("Cannot add Notional Processor: "); ++ } ++ ++ fclose(file); ++ ++ return 0; ++} +diff --git a/bin/rt_launch_edffm.c b/bin/rt_launch_edffm.c +new file mode 100644 +index 0000000..ddde7dd +--- /dev/null ++++ b/bin/rt_launch_edffm.c +@@ -0,0 +1,133 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "litmus.h" ++#include "common.h" ++ ++typedef struct { ++ int wait; ++ char * exec_path; ++ char ** argv; ++} startup_info_t; ++ ++ ++int launch(void *task_info_p) { ++ startup_info_t *info = (startup_info_t*) task_info_p; ++ int ret; ++ if (info->wait) { ++ ret = wait_for_ts_release(); ++ if (ret != 0) ++ perror("wait_for_ts_release()"); ++ } ++ ret = execvp(info->exec_path, info->argv); ++ perror("execv failed"); ++ return ret; ++} ++ ++void usage(char *error) { ++ fprintf(stderr, "%s\nUsage: rt_launch [-w][-v][-p cpu][-c hrt | srt | be] wcet period" ++ " fracnum1 fracden1 cpu1 fracnum2 fracden2 cpu2 program [arg1 arg2 ...]\n" ++ "\t-w\tSynchronous release\n" ++ "\t-v\tVerbose\n" ++ "\t-p\tcpu (or initial cpu)\n" ++ "\t-c\tClass\n" ++ "\twcet, period in ms\n" ++ "\tprogram to be launched\n", ++ error); ++ exit(1); ++} ++ ++ ++#define OPTSTR "p:c:vw" ++ ++int main(int argc, char** argv) ++{ ++ int ret; ++ lt_t wcet; ++ lt_t period; ++ /* [num,den] */ ++ lt_t frac1[2], frac2[2]; ++ int cpu1, cpu2; ++ int migrate = 0; ++ int cpu = 0; ++ int opt; ++ int verbose = 0; ++ int wait = 0; ++ startup_info_t info; ++ task_class_t class = RT_CLASS_HARD; ++ ++ while ((opt = getopt(argc, argv, OPTSTR)) != -1) { ++ switch (opt) { ++ case 'w': ++ wait = 1; ++ break; ++ case 'v': ++ verbose = 1; ++ break; ++ case 'p': ++ cpu = atoi(optarg); ++ migrate = 1; ++ break; ++ case 'c': ++ class = str2class(optarg); ++ if (class == -1) ++ usage("Unknown task class."); ++ break; ++ ++ case ':': ++ usage("Argument missing."); ++ break; ++ case '?': ++ default: ++ usage("Bad argument."); ++ break; ++ } ++ } ++ ++ signal(SIGUSR1, SIG_IGN); ++ ++ if (argc - optind < 8) ++ usage("Arguments missing."); ++ wcet = ms2lt(atoi(argv[optind + 0])); ++ period = ms2lt(atoi(argv[optind + 1])); ++ /* frac num, den = 0 means fixed task */ ++ frac1[0] = atoi(argv[optind + 2]); ++ frac1[1] = atoi(argv[optind + 3]); ++ cpu1 = atoi(argv[optind + 4]); ++ frac2[0] = atoi(argv[optind + 5]); ++ frac2[1] = atoi(argv[optind + 6]); ++ cpu2 = atoi(argv[optind + 7]); ++ if (wcet <= 0) ++ usage("The worst-case execution time must be a " ++ "positive number."); ++ if (period <= 0) ++ usage("The period must be a positive number."); ++ if (wcet > period) { ++ usage("The worst-case execution time must not " ++ "exceed the period."); ++ } ++ info.exec_path = argv[optind + 8]; ++ info.argv = argv + optind + 8; ++ info.wait = wait; ++ if (migrate) { ++ ret = be_migrate_to(cpu); ++ if (ret < 0) ++ bail_out("could not migrate to target partition"); ++ } ++ /* create in src/task.c a new wrapper for the __launch_rt_task ++ * which takes the fraction and the cpus */ ++ ret = __create_rt_task_edffm(launch, &info, cpu, wcet, period, frac1, ++ frac2, cpu1, cpu2, class); ++ ++ ++ if (ret < 0) ++ bail_out("could not create rt child process"); ++ else if (verbose) ++ printf("%d\n", ret); ++ ++ return 0; ++} +diff --git a/bin/rt_launch_edfwm.c b/bin/rt_launch_edfwm.c +new file mode 100644 +index 0000000..9e8a322 +--- /dev/null ++++ b/bin/rt_launch_edfwm.c +@@ -0,0 +1,98 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "litmus.h" ++#include "common.h" ++ ++typedef struct { ++ int wait; ++ char * exec_path; ++ char ** argv; ++} startup_info_t; ++ ++ ++int launch(void *task_info_p) { ++ startup_info_t *info = (startup_info_t*) task_info_p; ++ int ret; ++ if (info->wait) { ++ ret = wait_for_ts_release(); ++ if (ret != 0) ++ perror("wait_for_ts_release()"); ++ } ++ ret = execvp(info->exec_path, info->argv); ++ perror("execv failed"); ++ return ret; ++} ++ ++void usage(char *error) { ++ fprintf(stderr, "%s\nUsage: rt_launch [-w] task_parameters_file " ++ "program [arg1 arg2 ...]\n" ++ "\t-w\tSynchronous release\n" ++ "\tprogram to be launched\n", ++ error); ++ exit(1); ++} ++ ++#define OPTSTR "w" ++ ++int main(int argc, char** argv) ++{ ++ int ret; ++ int wait = 0; ++ int opt; ++ startup_info_t info; ++ ++ struct rt_task rt; ++ FILE *file; ++ ++ while ((opt = getopt(argc, argv, OPTSTR)) != -1) { ++ switch (opt) { ++ case 'w': ++ wait = 1; ++ break; ++ case ':': ++ usage("Argument missing."); ++ break; ++ case '?': ++ default: ++ usage("Bad argument."); ++ break; ++ } ++ } ++ ++ signal(SIGUSR1, SIG_IGN); ++ ++ if (argc - optind < 2) ++ usage("Arguments missing."); ++ ++ if ((file = fopen(argv[optind + 0], "r")) == NULL) { ++ fprintf(stderr, "Cannot open %s\n", argv[1]); ++ return -1; ++ } ++ ++ memset(&rt, 0, sizeof(struct rt_task)); ++ ++ if (parse_edfwm_ts_file(file, &rt) < 0) ++ bail_out("Could not parse file\n"); ++ ++ if (sporadic_task_ns_semi(&rt) < 0) ++ bail_out("could not setup rt task params"); ++ ++ fclose(file); ++ ++ info.exec_path = argv[optind + 1]; ++ info.argv = argv + optind + 1; ++ info.wait = wait; ++ ++ ret = create_rt_task_semi(launch, &info, &rt); ++ ++ if (ret < 0) ++ bail_out("could not create rt child process"); ++ ++ return 0; ++} ++ +diff --git a/bin/rt_launch_npsf.c b/bin/rt_launch_npsf.c +new file mode 100644 +index 0000000..97ad361 +--- /dev/null ++++ b/bin/rt_launch_npsf.c +@@ -0,0 +1,110 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "litmus.h" ++#include "common.h" ++ ++typedef struct { ++ int wait; ++ char * exec_path; ++ char ** argv; ++} startup_info_t; ++ ++ ++int launch(void *task_info_p) { ++ startup_info_t *info = (startup_info_t*) task_info_p; ++ int ret; ++ if (info->wait) { ++ ret = wait_for_ts_release(); ++ if (ret != 0) ++ perror("wait_for_ts_release()"); ++ } ++ ret = execvp(info->exec_path, info->argv); ++ perror("execv failed"); ++ return ret; ++} ++ ++void usage(char *error) { ++ fprintf(stderr, "%s\nUsage: rt_launch [-w][-v] wcet period cpu npsf-id program [arg1 arg2 ...]\n" ++ "\t-w\tSynchronous release\n" ++ "\t-v\tVerbose\n" ++ "\twcet, period in ms\n" ++ "\tprogram to be launched\n", ++ error); ++ exit(1); ++} ++ ++ ++#define OPTSTR "vw" ++ ++int main(int argc, char** argv) ++{ ++ int ret; ++ lt_t wcet; ++ lt_t period; ++ int migrate = 0; ++ int cpu = 0; ++ int npsf_id; ++ int opt; ++ int verbose = 0; ++ int wait = 0; ++ startup_info_t info; ++ ++ while ((opt = getopt(argc, argv, OPTSTR)) != -1) { ++ switch (opt) { ++ case 'w': ++ wait = 1; ++ break; ++ case 'v': ++ verbose = 1; ++ break; ++ case ':': ++ usage("Argument missing."); ++ break; ++ case '?': ++ default: ++ usage("Bad argument."); ++ break; ++ } ++ } ++ ++ signal(SIGUSR1, SIG_IGN); ++ ++ if (argc - optind < 5) ++ usage("Arguments missing."); ++ wcet = ms2lt(atoi(argv[optind + 0])); ++ period = ms2lt(atoi(argv[optind + 1])); ++ cpu = atoi(argv[optind + 2]); ++ migrate = 1; ++ npsf_id = atoi(argv[optind + 3]); ++ if (wcet <= 0) ++ usage("The worst-case execution time must be a " ++ "positive number."); ++ if (period <= 0) ++ usage("The period must be a positive number."); ++ if (wcet > period) { ++ usage("The worst-case execution time must not " ++ "exceed the period."); ++ } ++ info.exec_path = argv[optind + 4]; ++ info.argv = argv + optind + 4; ++ info.wait = wait; ++ if (migrate) { ++ ret = be_migrate_to(cpu); ++ if (ret < 0) ++ bail_out("could not migrate to target partition"); ++ } ++ ret = __create_rt_task_npsf(launch, &info, cpu, wcet, period, npsf_id, RT_CLASS_HARD); ++ ++ ++ if (ret < 0) ++ bail_out("could not create rt child process"); ++ else if (verbose) ++ printf("%d\n", ret); ++ ++ return 0; ++} +diff --git a/bin/rtspin_edffm.c b/bin/rtspin_edffm.c +new file mode 100644 +index 0000000..5db79b8 +--- /dev/null ++++ b/bin/rtspin_edffm.c +@@ -0,0 +1,263 @@ ++#include ++ ++#include ++#include ++#include ++#include ++ ++ ++#include "litmus.h" ++#include "common.h" ++ ++ ++static double cputime() ++{ ++ struct timespec ts; ++ int err; ++ err = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); ++ if (err != 0) ++ perror("clock_gettime"); ++ return (ts.tv_sec + 1E-9 * ts.tv_nsec); ++} ++ ++static double wctime() ++{ ++ struct timeval tv; ++ gettimeofday(&tv, NULL); ++ return (tv.tv_sec + 1E-6 * tv.tv_usec); ++} ++ ++void usage(char *error) { ++ fprintf(stderr, "Error: %s\n", error); ++ fprintf(stderr, ++ "Usage: rt_spin [-w] [-p PARTITION] [-c CLASS] WCET PERIOD DURATION fracnum1 fracden1 cpu1 fracnum2 fracden2 cpu2\n" ++ " rt_spin -l\n"); ++ exit(1); ++} ++ ++#define NUMS 4096 ++static int num[NUMS]; ++static double loop_length = 1.0; ++static char* progname; ++ ++static int loop_once(void) ++{ ++ int i, j = 0; ++ for (i = 0; i < NUMS; i++) ++ j += num[i]++; ++ return j; ++} ++ ++static int loop_for(double exec_time) ++{ ++ double t = 0; ++ int tmp = 0; ++/* while (t + loop_length < exec_time) { ++ tmp += loop_once(); ++ t += loop_length; ++ } ++*/ ++ double start = cputime(); ++ double now = cputime(); ++ while (now + loop_length < start + exec_time) { ++ tmp += loop_once(); ++ t += loop_length; ++ now = cputime(); ++ } ++ ++ return tmp; ++} ++ ++static void fine_tune(double interval) ++{ ++ double start, end, delta; ++ ++ start = wctime(); ++ loop_for(interval); ++ end = wctime(); ++ delta = (end - start - interval) / interval; ++ if (delta != 0) ++ loop_length = loop_length / (1 - delta); ++} ++ ++static void configure_loop(void) ++{ ++ double start; ++ ++ /* prime cache */ ++ loop_once(); ++ loop_once(); ++ loop_once(); ++ ++ /* measure */ ++ start = wctime(); ++ loop_once(); /* hope we didn't get preempted */ ++ loop_length = wctime(); ++ loop_length -= start; ++ ++ /* fine tune */ ++ fine_tune(0.1); ++ fine_tune(0.1); ++ fine_tune(0.1); ++} ++ ++static void show_loop_length(void) ++{ ++ printf("%s/%d: loop_length=%f (%ldus)\n", ++ progname, getpid(), loop_length, ++ (long) (loop_length * 1000000)); ++} ++ ++static void debug_delay_loop(void) ++{ ++ double start, end, delay; ++ show_loop_length(); ++ while (1) { ++ for (delay = 0.5; delay > 0.01; delay -= 0.01) { ++ start = wctime(); ++ loop_for(delay); ++ end = wctime(); ++ printf("%6.4fs: looped for %10.8fs, delta=%11.8fs, error=%7.4f%%\n", ++ delay, ++ end - start, ++ end - start - delay, ++ 100 * (end - start - delay) / delay); ++ } ++ } ++} ++ ++static int job(double exec_time) ++{ ++ loop_for(exec_time); ++ sleep_next_period(); ++ return 0; ++} ++ ++#define OPTSTR "p:c:wld:v" ++ ++int main(int argc, char** argv) ++{ ++ int ret; ++ lt_t wcet; ++ lt_t period; ++ /* [num,den] */ ++ lt_t frac1[2], frac2[2]; ++ int cpu1, cpu2; ++ double wcet_ms, period_ms; ++ int migrate = 0; ++ int cpu = 0; ++ int opt; ++ int wait = 0; ++ int test_loop = 0; ++ int skip_config = 0; ++ int verbose = 0; ++ double duration, start; ++ task_class_t class = RT_CLASS_HARD; ++ ++ progname = argv[0]; ++ ++ while ((opt = getopt(argc, argv, OPTSTR)) != -1) { ++ switch (opt) { ++ case 'w': ++ wait = 1; ++ break; ++ case 'p': ++ cpu = atoi(optarg); ++ migrate = 1; ++ break; ++ case 'c': ++ class = str2class(optarg); ++ if (class == -1) ++ usage("Unknown task class."); ++ break; ++ case 'l': ++ test_loop = 1; ++ break; ++ case 'd': ++ /* manually configure delay per loop iteration ++ * unit: microseconds */ ++ loop_length = atof(optarg) / 1000000; ++ skip_config = 1; ++ break; ++ case 'v': ++ verbose = 1; ++ break; ++ case ':': ++ usage("Argument missing."); ++ break; ++ case '?': ++ default: ++ usage("Bad argument."); ++ break; ++ } ++ } ++ ++ ++ if (!skip_config) ++ configure_loop(); ++ ++ if (test_loop) { ++ debug_delay_loop(); ++ return 0; ++ } ++ ++ if (argc - optind < 9) ++ usage("Arguments missing."); ++ wcet_ms = atof(argv[optind + 0]); ++ period_ms = atof(argv[optind + 1]); ++ duration = atof(argv[optind + 2]); ++ /* frac num, den = 0 means fixed task */ ++ frac1[0] = atoi(argv[optind + 3]); ++ frac1[1] = atoi(argv[optind + 4]); ++ cpu1 = atoi(argv[optind + 5]); ++ frac2[0] = atoi(argv[optind + 6]); ++ frac2[1] = atoi(argv[optind + 7]); ++ cpu2 = atoi(argv[optind + 8]); ++ wcet = wcet_ms * __NS_PER_MS; ++ period = period_ms * __NS_PER_MS; ++ if (wcet <= 0) ++ usage("The worst-case execution time must be a " ++ "positive number."); ++ if (period <= 0) ++ usage("The period must be a positive number."); ++ if (wcet > period) { ++ usage("The worst-case execution time must not " ++ "exceed the period."); ++ } ++ ++ if (migrate) { ++ ret = be_migrate_to(cpu); ++ if (ret < 0) ++ bail_out("could not migrate to target partition"); ++ } ++ ++ ret = sporadic_task_ns_edffm(wcet, period, 0, cpu, ++ frac1, frac2, cpu1, cpu2, ++ class, NO_ENFORCEMENT, migrate); ++ ++ if (ret < 0) ++ bail_out("could not setup rt task params"); ++ ++ if (verbose) ++ show_loop_length(); ++ ++ init_litmus(); ++ ++ ret = task_mode(LITMUS_RT_TASK); ++ if (ret != 0) ++ bail_out("could not become RT task"); ++ ++ if (wait) { ++ ret = wait_for_ts_release(); ++ if (ret != 0) ++ bail_out("wait_for_ts_release()"); ++ } ++ ++ start = wctime(); ++ ++ while (start + duration > wctime()) { ++ job(wcet_ms * 0.0009); /* 90% wcet, in seconds */ ++ } ++ ++ return 0; ++} +diff --git a/bin/rtspin_edfwm.c b/bin/rtspin_edfwm.c +new file mode 100644 +index 0000000..21a5f3b +--- /dev/null ++++ b/bin/rtspin_edfwm.c +@@ -0,0 +1,233 @@ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "litmus.h" ++#include "common.h" ++ ++ ++static double cputime() ++{ ++ struct timespec ts; ++ int err; ++ err = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); ++ if (err != 0) ++ perror("clock_gettime"); ++ return (ts.tv_sec + 1E-9 * ts.tv_nsec); ++} ++ ++static double wctime() ++{ ++ struct timeval tv; ++ gettimeofday(&tv, NULL); ++ return (tv.tv_sec + 1E-6 * tv.tv_usec); ++} ++ ++void usage(char *error) { ++ fprintf(stderr, "Error: %s\n", error); ++ fprintf(stderr, ++ "Usage: rt_spin [-w] task_parameters_file duration\n" ++ " rt_spin -l\n"); ++ exit(1); ++} ++ ++#define NUMS 4096 ++static int num[NUMS]; ++static double loop_length = 1.0; ++static char* progname; ++ ++static int loop_once(void) ++{ ++ int i, j = 0; ++ for (i = 0; i < NUMS; i++) ++ j += num[i]++; ++ return j; ++} ++ ++static int loop_for(double exec_time) ++{ ++ double t = 0; ++ int tmp = 0; ++/* while (t + loop_length < exec_time) { ++ tmp += loop_once(); ++ t += loop_length; ++ } ++*/ ++ double start = cputime(); ++ double now = cputime(); ++ while (now + loop_length < start + exec_time) { ++ tmp += loop_once(); ++ t += loop_length; ++ now = cputime(); ++ } ++ ++ return tmp; ++} ++ ++static void fine_tune(double interval) ++{ ++ double start, end, delta; ++ ++ start = wctime(); ++ loop_for(interval); ++ end = wctime(); ++ delta = (end - start - interval) / interval; ++ if (delta != 0) ++ loop_length = loop_length / (1 - delta); ++} ++ ++static void configure_loop(void) ++{ ++ double start; ++ ++ /* prime cache */ ++ loop_once(); ++ loop_once(); ++ loop_once(); ++ ++ /* measure */ ++ start = wctime(); ++ loop_once(); /* hope we didn't get preempted */ ++ loop_length = wctime(); ++ loop_length -= start; ++ ++ /* fine tune */ ++ fine_tune(0.1); ++ fine_tune(0.1); ++ fine_tune(0.1); ++} ++ ++static void show_loop_length(void) ++{ ++ printf("%s/%d: loop_length=%f (%ldus)\n", ++ progname, getpid(), loop_length, ++ (long) (loop_length * 1000000)); ++} ++ ++static void debug_delay_loop(void) ++{ ++ double start, end, delay; ++ show_loop_length(); ++ while (1) { ++ for (delay = 0.5; delay > 0.01; delay -= 0.01) { ++ start = wctime(); ++ loop_for(delay); ++ end = wctime(); ++ printf("%6.4fs: looped for %10.8fs, delta=%11.8fs, error=%7.4f%%\n", ++ delay, ++ end - start, ++ end - start - delay, ++ 100 * (end - start - delay) / delay); ++ } ++ } ++} ++ ++static int job(double exec_time) ++{ ++ loop_for(exec_time); ++ sleep_next_period(); ++ return 0; ++} ++ ++#define OPTSTR "wld:v" ++ ++int main(int argc, char** argv) ++{ ++ int ret; ++ ++ int opt; ++ int wait = 0; ++ int test_loop = 0; ++ int skip_config = 0; ++ int verbose = 0; ++ double wcet_ms; ++ double duration, start; ++ ++ struct rt_task rt; ++ FILE *file; ++ ++ progname = argv[0]; ++ ++ while ((opt = getopt(argc, argv, OPTSTR)) != -1) { ++ switch (opt) { ++ case 'w': ++ wait = 1; ++ break; ++ case 'l': ++ test_loop = 1; ++ break; ++ case 'd': ++ /* manually configure delay per loop iteration ++ * unit: microseconds */ ++ loop_length = atof(optarg) / 1000000; ++ skip_config = 1; ++ break; ++ case 'v': ++ verbose = 1; ++ break; ++ case ':': ++ usage("Argument missing."); ++ break; ++ case '?': ++ default: ++ usage("Bad argument."); ++ break; ++ } ++ } ++ ++ ++ if (!skip_config) ++ configure_loop(); ++ ++ if (test_loop) { ++ debug_delay_loop(); ++ return 0; ++ } ++ ++ if (argc - optind < 2) ++ usage("Arguments missing."); ++ ++ if ((file = fopen(argv[optind + 0], "r")) == NULL) { ++ fprintf(stderr, "Cannot open %s\n", argv[1]); ++ return -1; ++ } ++ duration = atof(argv[optind + 1]); ++ ++ memset(&rt, 0, sizeof(struct rt_task)); ++ ++ if (parse_edfwm_ts_file(file, &rt) < 0) ++ bail_out("Could not parse file\n"); ++ ++ if (sporadic_task_ns_semi(&rt) < 0) ++ bail_out("could not setup rt task params"); ++ ++ fclose(file); ++ ++ if (verbose) ++ show_loop_length(); ++ ++ init_litmus(); ++ ++ ret = task_mode(LITMUS_RT_TASK); ++ if (ret != 0) ++ bail_out("could not become RT task"); ++ ++ if (wait) { ++ ret = wait_for_ts_release(); ++ if (ret != 0) ++ bail_out("wait_for_ts_release()"); ++ } ++ ++ wcet_ms = ((double) rt.exec_cost ) / __NS_PER_MS; ++ start = wctime(); ++ ++ while (start + duration > wctime()) { ++ job(wcet_ms * 0.0009); /* 90% wcet, in seconds */ ++ } ++ ++ return 0; ++} +diff --git a/bin/rtspin_npsf.c b/bin/rtspin_npsf.c +new file mode 100644 +index 0000000..d5dff3d +--- /dev/null ++++ b/bin/rtspin_npsf.c +@@ -0,0 +1,252 @@ ++#include ++ ++#include ++#include ++#include ++#include ++ ++ ++#include "litmus.h" ++#include "common.h" ++ ++ ++static double cputime() ++{ ++ struct timespec ts; ++ int err; ++ err = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); ++ if (err != 0) ++ perror("clock_gettime"); ++ return (ts.tv_sec + 1E-9 * ts.tv_nsec); ++} ++ ++static double wctime() ++{ ++ struct timeval tv; ++ gettimeofday(&tv, NULL); ++ return (tv.tv_sec + 1E-6 * tv.tv_usec); ++} ++ ++void usage(char *error) { ++ fprintf(stderr, "Error: %s\n", error); ++ fprintf(stderr, ++ "Usage: rt_spin [-w] WCET PERIOD DURATION CPU NPSF-ID\n" ++ " rt_spin -l\n"); ++ exit(1); ++} ++ ++#define NUMS 4096 ++static int num[NUMS]; ++static double loop_length = 1.0; ++static char* progname; ++ ++static int loop_once(void) ++{ ++ int i, j = 0; ++ for (i = 0; i < NUMS; i++) ++ j += num[i]++; ++ return j; ++} ++ ++static int loop_for(double exec_time) ++{ ++ double t = 0; ++ int tmp = 0; ++/* while (t + loop_length < exec_time) { ++ tmp += loop_once(); ++ t += loop_length; ++ } ++*/ ++ double start = cputime(); ++ double now = cputime(); ++ while (now + loop_length < start + exec_time) { ++ tmp += loop_once(); ++ t += loop_length; ++ now = cputime(); ++ } ++ ++ return tmp; ++} ++ ++static void fine_tune(double interval) ++{ ++ double start, end, delta; ++ ++ start = wctime(); ++ loop_for(interval); ++ end = wctime(); ++ delta = (end - start - interval) / interval; ++ if (delta != 0) ++ loop_length = loop_length / (1 - delta); ++} ++ ++static void configure_loop(void) ++{ ++ double start; ++ ++ /* prime cache */ ++ loop_once(); ++ loop_once(); ++ loop_once(); ++ ++ /* measure */ ++ start = wctime(); ++ loop_once(); /* hope we didn't get preempted */ ++ loop_length = wctime(); ++ loop_length -= start; ++ ++ /* fine tune */ ++ fine_tune(0.1); ++ fine_tune(0.1); ++ fine_tune(0.1); ++} ++ ++static void show_loop_length(void) ++{ ++ printf("%s/%d: loop_length=%f (%ldus)\n", ++ progname, getpid(), loop_length, ++ (long) (loop_length * 1000000)); ++} ++ ++static void debug_delay_loop(void) ++{ ++ double start, end, delay; ++ show_loop_length(); ++ while (1) { ++ for (delay = 0.5; delay > 0.01; delay -= 0.01) { ++ start = wctime(); ++ loop_for(delay); ++ end = wctime(); ++ printf("%6.4fs: looped for %10.8fs, delta=%11.8fs, error=%7.4f%%\n", ++ delay, ++ end - start, ++ end - start - delay, ++ 100 * (end - start - delay) / delay); ++ } ++ } ++} ++ ++static int job(double exec_time) ++{ ++ loop_for(exec_time); ++ sleep_next_period(); ++ return 0; ++} ++ ++#define OPTSTR "c:wld:v" ++ ++int main(int argc, char** argv) ++{ ++ int ret; ++ lt_t wcet; ++ lt_t period; ++ double wcet_ms, period_ms; ++ int migrate = 0; ++ int cpu = 0; ++ int npsf_id = 0; ++ int opt; ++ int wait = 0; ++ int test_loop = 0; ++ int skip_config = 0; ++ int verbose = 0; ++ double duration, start; ++ task_class_t class = RT_CLASS_HARD; ++ ++ progname = argv[0]; ++ ++ while ((opt = getopt(argc, argv, OPTSTR)) != -1) { ++ switch (opt) { ++ case 'w': ++ wait = 1; ++ break; ++ case 'c': ++ class = str2class(optarg); ++ if (class == -1) ++ usage("Unknown task class."); ++ break; ++ case 'l': ++ test_loop = 1; ++ break; ++ case 'd': ++ /* manually configure delay per loop iteration ++ * unit: microseconds */ ++ loop_length = atof(optarg) / 1000000; ++ skip_config = 1; ++ break; ++ case 'v': ++ verbose = 1; ++ break; ++ case ':': ++ usage("Argument missing."); ++ break; ++ case '?': ++ default: ++ usage("Bad argument."); ++ break; ++ } ++ } ++ ++ ++ if (!skip_config) ++ configure_loop(); ++ ++ if (test_loop) { ++ debug_delay_loop(); ++ return 0; ++ } ++ ++ if (argc - optind < 5) ++ usage("Arguments missing."); ++ wcet_ms = atof(argv[optind + 0]); ++ period_ms = atof(argv[optind + 1]); ++ duration = atof(argv[optind + 2]); ++ cpu = atoi(argv[optind + 3]); ++ migrate = 1; ++ npsf_id = atoi(argv[optind + 4]); ++ wcet = wcet_ms * __NS_PER_MS; ++ period = period_ms * __NS_PER_MS; ++ if (wcet <= 0) ++ usage("The worst-case execution time must be a " ++ "positive number."); ++ if (period <= 0) ++ usage("The period must be a positive number."); ++ if (wcet > period) { ++ usage("The worst-case execution time must not " ++ "exceed the period."); ++ } ++ ++ if (migrate) { ++ ret = be_migrate_to(cpu); ++ if (ret < 0) ++ bail_out("could not migrate to target partition"); ++ } ++ ++ ret = sporadic_task_ns_npsf(wcet, period, 0, cpu, class, npsf_id, ++ NO_ENFORCEMENT, migrate); ++ ++ if (ret < 0) ++ bail_out("could not setup rt task params"); ++ ++ if (verbose) ++ show_loop_length(); ++ ++ init_litmus(); ++ ++ ret = task_mode(LITMUS_RT_TASK); ++ if (ret != 0) ++ bail_out("could not become RT task"); ++ ++ if (wait) { ++ ret = wait_for_ts_release(); ++ if (ret != 0) ++ bail_out("wait_for_ts_release()"); ++ } ++ ++ start = wctime(); ++ ++ while (start + duration > wctime()) { ++ job(wcet_ms * 0.0009); /* 90% wcet, in seconds */ ++ } ++ ++ return 0; ++} +diff --git a/include/common.h b/include/common.h +index d1234ba..dbcfd34 100644 +--- a/include/common.h ++++ b/include/common.h +@@ -1,7 +1,11 @@ + #ifndef COMMON_H + #define COMMON_H + ++#include "litmus.h" + + void bail_out(const char* msg); + ++/* EDF-WM helper functions to parse task parameters from file */ ++int parse_edfwm_ts_file(FILE *ts, struct rt_task *rt); ++ + #endif +diff --git a/include/litmus.h b/include/litmus.h +index b798c92..9179ca6 100644 +--- a/include/litmus.h ++++ b/include/litmus.h +@@ -9,6 +9,7 @@ extern "C" { + * This is required for the rt_param + * and control_page structures. + */ ++#include + #include + + #include +@@ -40,6 +41,19 @@ int sporadic_task_ns( + int cpu, task_class_t cls, + budget_policy_t budget_policy, int set_cpu_set); + ++int sporadic_task_ns_edffm(lt_t e, lt_t p, lt_t phase, int cpu, ++ lt_t *frac1, lt_t *frac2, int cpu1, int cpu2, ++ task_class_t cls, budget_policy_t budget_policy, ++ int set_cpu_set); ++ ++int sporadic_task_ns_npsf( ++ lt_t e, lt_t p, lt_t phase, ++ int cpu, task_class_t cls, int npsf_id, ++ budget_policy_t budget_policy, int set_cpu_set); ++ ++/* times are in ns, specific helper for semi-partitioned algos */ ++int sporadic_task_ns_semi(struct rt_task *rt); ++ + /* budget enforcement off by default in these macros */ + #define sporadic_global(e, p) \ + sporadic_task(e, p, 0, 0, RT_CLASS_SOFT, NO_ENFORCEMENT, 0) +@@ -85,6 +99,17 @@ typedef int (*rt_fn_t)(void*); + int create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet, int period); + int __create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet, + int period, task_class_t cls); ++int __create_rt_task_edffm(rt_fn_t rt_prog, void *arg, int cpu, int wcet, ++ int period, lt_t *frac1, lt_t *frac2, ++ int cpu1, int cpu2, task_class_t class); ++int __create_rt_task_npsf(rt_fn_t rt_prog, void *arg, int cpu, int wcet, ++ int period, int npsf_id, task_class_t class); ++ ++/* wrapper to mask __launch_rt_task() for semi-partitioned algorithms ++ * (it can be extended to cover all algorithms that directly submit ++ * an rt_task structure instead of a set of values). ++ */ ++int create_rt_task_semi(rt_fn_t rt_prog, void *arg, struct rt_task *params); + + /* per-task modes */ + enum rt_task_mode_t { +@@ -134,6 +159,21 @@ int null_call(cycles_t *timestamp); + */ + struct control_page* get_ctrl_page(void); + ++/* NPS-F syscall to add a notional processor (a server) to a cpu. ++ * A notional processor may span across multiple cpu. ++ * ++ * @npsf_id: a "unique" identifier for the notional processor. ++ * @budgets: array of (cpu, budget-ns) that describes this np. ++ * on possibly more than one cpu. ++ * @last: marks the end of servers initialization and trigger ++ * the switching of servers in the plugin. ++ * Should be set to 1 only once at the end of the sequence ++ * of add_server() calls ++ * ++ * Currently implemented on x86_64 only. ++ */ ++int add_server(int *npsf_id, struct npsf_budgets *budgets, int last); ++ + #ifdef __cplusplus + } + #endif +diff --git a/src/litmus.c b/src/litmus.c +index d3cc6bb..c0dae95 100644 +--- a/src/litmus.c ++++ b/src/litmus.c +@@ -29,8 +29,6 @@ task_class_t str2class(const char* str) + return -1; + } + +-#define NS_PER_MS 1000000 +- + /* only for best-effort execution: migrate to target_cpu */ + int be_migrate_to(int target_cpu) + { +@@ -45,7 +43,7 @@ int sporadic_task(lt_t e, lt_t p, lt_t phase, + int cpu, task_class_t cls, + budget_policy_t budget_policy, int set_cpu_set) + { +- return sporadic_task_ns(e * NS_PER_MS, p * NS_PER_MS, phase * NS_PER_MS, ++ return sporadic_task_ns(e * __NS_PER_MS, p * __NS_PER_MS, phase * __NS_PER_MS, + cpu, cls, budget_policy, set_cpu_set); + } + +@@ -75,6 +73,72 @@ int sporadic_task_ns(lt_t e, lt_t p, lt_t phase, + return set_rt_task_param(gettid(), ¶m); + } + ++int sporadic_task_ns_edffm(lt_t e, lt_t p, lt_t phase, int cpu, ++ lt_t *frac1, lt_t *frac2, int cpu1, int cpu2, ++ task_class_t cls, budget_policy_t budget_policy, ++ int set_cpu_set) ++{ ++ struct rt_task param; ++ struct edffm_params fm; ++ int ret; ++ param.exec_cost = e; ++ param.period = p; ++ param.cpu = cpu; ++ /* check on denominators */ ++ if (frac1[1] != 0 && frac2[1] != 0) { ++ /* edf-fm migrat task */ ++ fm.nr_cpus = 1; ++ fm.cpus[0] = cpu1; ++ fm.cpus[1] = cpu2; ++ fm.fraction[0][0] = frac1[0]; ++ fm.fraction[1][0] = frac1[1]; ++ fm.fraction[0][1] = frac2[0]; ++ fm.fraction[1][1] = frac2[1]; ++ } ++ param.semi_part.fm = fm; ++ param.cls = cls; ++ param.phase = phase; ++ param.budget_policy = budget_policy; ++ ++ if (set_cpu_set) { ++ ret = be_migrate_to(cpu); ++ check("migrate to cpu"); ++ } ++ return set_rt_task_param(gettid(), ¶m); ++} ++ ++int sporadic_task_ns_npsf(lt_t e, lt_t p, lt_t phase, ++ int cpu, task_class_t cls, int npsf_id, ++ budget_policy_t budget_policy, int set_cpu_set) ++{ ++ struct rt_task param; ++ int ret; ++ param.exec_cost = e; ++ param.period = p; ++ param.cpu = cpu; ++ param.cls = cls; ++ param.phase = phase; ++ param.budget_policy = budget_policy; ++ param.semi_part.npsf_id = (int) npsf_id; ++ ++ if (set_cpu_set) { ++ ret = be_migrate_to(cpu); ++ check("migrate to cpu"); ++ } ++ return set_rt_task_param(gettid(), ¶m); ++} ++ ++/* Sporadic task helper function for Semi-Partitioned algorithms. */ ++int sporadic_task_ns_semi(struct rt_task *param) ++{ ++ int ret; ++ ++ ret = be_migrate_to(param->cpu); ++ check("migrate to cpu"); ++ ++ return set_rt_task_param(gettid(), param); ++} ++ + int init_kernel_iface(void); + + int init_litmus(void) +diff --git a/src/syscalls.c b/src/syscalls.c +index 77a6277..7ac488a 100644 +--- a/src/syscalls.c ++++ b/src/syscalls.c +@@ -95,3 +95,8 @@ int null_call(cycles_t *timestamp) + { + return syscall(__NR_null_call, timestamp); + } ++ ++int add_server(int *npsf_id, struct npsf_budgets *budgets, int last) ++{ ++ return syscall(__NR_add_server, npsf_id, budgets, last); ++} +diff --git a/src/task.c b/src/task.c +index 4d237bd..daf95ca 100644 +--- a/src/task.c ++++ b/src/task.c +@@ -40,6 +40,54 @@ int __launch_rt_task(rt_fn_t rt_prog, void *rt_arg, rt_setup_fn_t setup, + return rt_task; + } + ++int __create_rt_task_edffm(rt_fn_t rt_prog, void *arg, int cpu, int wcet, ++ int period, lt_t *frac1, lt_t *frac2, ++ int cpu1, int cpu2, task_class_t class) ++{ ++ struct rt_task params; ++ struct edffm_params fm; ++ params.cpu = cpu; ++ params.period = period; ++ params.exec_cost = wcet; ++ params.cls = class; ++ params.phase = 0; ++ /* enforce budget for tasks that might not use sleep_next_period() */ ++ params.budget_policy = QUANTUM_ENFORCEMENT; ++ ++ /* edf-fm check on denominators for migratory tasks */ ++ if (frac1[1] != 0 && frac2[1] != 0) { ++ /* edf-fm migrat task */ ++ fm.nr_cpus = 1; ++ fm.cpus[0] = cpu1; ++ fm.cpus[1] = cpu2; ++ fm.fraction[0][0] = frac1[0]; ++ fm.fraction[1][0] = frac1[1]; ++ fm.fraction[0][1] = frac2[0]; ++ fm.fraction[1][1] = frac2[1]; ++ } ++ params.semi_part.fm = fm; ++ ++ return __launch_rt_task(rt_prog, arg, ++ (rt_setup_fn_t) set_rt_task_param, ¶ms); ++} ++ ++int __create_rt_task_npsf(rt_fn_t rt_prog, void *arg, int cpu, int wcet, ++ int period, int npsf_id, task_class_t class) ++{ ++ struct rt_task params; ++ params.cpu = cpu; ++ params.period = period; ++ params.exec_cost = wcet; ++ params.cls = class; ++ params.phase = 0; ++ /* enforce budget for tasks that might not use sleep_next_period() */ ++ params.budget_policy = QUANTUM_ENFORCEMENT; ++ params.semi_part.npsf_id = (int) npsf_id; ++ ++ return __launch_rt_task(rt_prog, arg, ++ (rt_setup_fn_t) set_rt_task_param, ¶ms); ++} ++ + int __create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet, int period, + task_class_t class) + { +@@ -60,6 +108,11 @@ int create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet, int period) { + return __create_rt_task(rt_prog, arg, cpu, wcet, period, RT_CLASS_HARD); + } + ++int create_rt_task_semi(rt_fn_t rt_prog, void *arg, struct rt_task *params) ++{ ++ return __launch_rt_task(rt_prog, arg, ++ (rt_setup_fn_t) set_rt_task_param, params); ++} + + #define SCHED_NORMAL 0 + #define SCHED_LITMUS 6 diff --git a/download/ECRTS11/litmus-rt-semi-part.patch b/download/ECRTS11/litmus-rt-semi-part.patch new file mode 100644 index 0000000..67cb738 --- /dev/null +++ b/download/ECRTS11/litmus-rt-semi-part.patch @@ -0,0 +1,2809 @@ +diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h +index 5d20276..8672398 100644 +--- a/include/litmus/litmus.h ++++ b/include/litmus/litmus.h +@@ -88,7 +88,7 @@ inline static int budget_exhausted(struct task_struct* t) + inline static lt_t budget_remaining(struct task_struct* t) + { + if (!budget_exhausted(t)) +- return get_exec_time(t) - get_exec_cost(t); ++ return get_exec_cost(t) - get_exec_time(t); + else + /* avoid overflow */ + return 0; +diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h +index a7a183f..6f43dea 100644 +--- a/include/litmus/rt_param.h ++++ b/include/litmus/rt_param.h +@@ -1,3 +1,5 @@ ++#include ++ + /* + * Definition of the scheduler plugin interface. + * +@@ -33,6 +35,72 @@ typedef enum { + PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ + } budget_policy_t; + ++ ++/* The parameters for EDF-Fm scheduling algorithm. ++ * Each task may be fixed or migratory. Migratory tasks may ++ * migrate on 2 (contiguous) CPU only. NR_CPUS_EDF_FM = 2. ++ */ ++#define NR_CPUS_EDF_FM 2 ++ ++struct edffm_params { ++ /* EDF-fm where can a migratory task execute? */ ++ unsigned int cpus[NR_CPUS_EDF_FM]; ++ /* how many cpus are used by this task? ++ * fixed = 0, migratory = (NR_CPUS_EDF_FM - 1) ++ * Efficient way to allow writing cpus[nr_cpus]. ++ */ ++ unsigned int nr_cpus; ++ /* Fraction of this task exec_cost that each CPU should handle. ++ * We keep the fraction divided in num/denom : a matrix of ++ * (NR_CPUS_EDF_FM rows) x (2 columns). ++ * The first column is the numerator of the fraction. ++ * The second column is the denominator. ++ * In EDF-fm this is a 2*2 matrix ++ */ ++ lt_t fraction[2][NR_CPUS_EDF_FM]; ++}; ++ ++/* Parameters for NPS-F semi-partitioned scheduling algorithm. ++ * Each (cpu, budget) entry defines the share ('budget' in ns, a % of ++ * the slot_length) of the notional processor on the CPU 'cpu'. ++ * This structure is used by the library - syscall interface in order ++ * to go through the overhead of a syscall only once per server. ++ */ ++struct npsf_budgets { ++ int cpu; ++ lt_t budget; ++}; ++ ++/* The parameters for the EDF-WM semi-partitioned scheduler. ++ * Each task may be split across multiple cpus. Each per-cpu allocation ++ * is called a 'slice'. ++ */ ++#define MAX_EDF_WM_SLICES 24 ++#define MIN_EDF_WM_SLICE_SIZE 50000 /* .05 millisecond = 50us */ ++ ++struct edf_wm_slice { ++ /* on which CPU is this slice allocated */ ++ unsigned int cpu; ++ /* relative deadline from job release (not from slice release!) */ ++ lt_t deadline; ++ /* budget of this slice; must be precisely enforced */ ++ lt_t budget; ++ /* offset of this slice relative to the job release */ ++ lt_t offset; ++}; ++ ++/* If a job is not sliced across multiple CPUs, then ++ * count is set to zero and none of the slices is used. ++ * This implies that count == 1 is illegal. ++ */ ++struct edf_wm_params { ++ /* enumeration of all slices */ ++ struct edf_wm_slice slices[MAX_EDF_WM_SLICES]; ++ ++ /* how many slices are defined? */ ++ unsigned int count; ++}; ++ + struct rt_task { + lt_t exec_cost; + lt_t period; +@@ -40,6 +108,22 @@ struct rt_task { + unsigned int cpu; + task_class_t cls; + budget_policy_t budget_policy; /* ignored by pfair */ ++ ++ /* parameters used by the semi-partitioned algorithms */ ++ union { ++ /* EDF-Fm; defined in sched_edf_fm.c */ ++ struct edffm_params fm; ++ ++ /* NPS-F; defined in sched_npsf.c ++ * id for the server (notional processor) that holds ++ * this task; the same npfs_id can be assigned to "the same" ++ * server split on different cpus ++ */ ++ int npsf_id; ++ ++ /* EDF-WM; defined in sched_edf_wm.c */ ++ struct edf_wm_params wm; ++ } semi_part; + }; + + /* The definition of the data that is shared between the kernel and real-time +@@ -184,6 +268,27 @@ struct rt_param { + + /* Pointer to the page shared between userspace and kernel. */ + struct control_page * ctrl_page; ++ ++ /* runtime info for the semi-part plugins */ ++ union { ++ /* EDF-Fm runtime information ++ * number of jobs handled by this cpu ++ * (to determine next cpu for a migrating task) ++ */ ++ unsigned int cpu_job_no[NR_CPUS_EDF_FM]; ++ ++ /* EDF-WM runtime information */ ++ struct { ++ /* at which exec time did the current slice start? */ ++ lt_t exec_time; ++ /* when did the job suspend? */ ++ lt_t suspend_time; ++ /* cached job parameters */ ++ lt_t job_release, job_deadline; ++ /* pointer to the current slice */ ++ struct edf_wm_slice* slice; ++ } wm; ++ } semi_part; + }; + + /* Possible RT flags */ +diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h +index 9c1c9f2..7ea9176 100644 +--- a/include/litmus/sched_plugin.h ++++ b/include/litmus/sched_plugin.h +@@ -6,6 +6,8 @@ + #define _LINUX_SCHED_PLUGIN_H_ + + #include ++/* NSEC_PER... conversions */ ++#include + + /* struct for semaphore with priority inheritance */ + struct pi_semaphore { +@@ -136,6 +138,9 @@ extern struct sched_plugin *litmus; + /* cluster size: cache_index = 2 L2, cache_index = 3 L3 */ + extern int cluster_cache_index; + ++/* Slot length (ns) for NPS-F semi-part. algo */ ++extern lt_t npsf_slot_length; ++ + int register_sched_plugin(struct sched_plugin* plugin); + struct sched_plugin* find_sched_plugin(const char* name); + int print_sched_plugins(char* buf, int max); +diff --git a/include/litmus/trace.h b/include/litmus/trace.h +index b32c711..6afbf96 100644 +--- a/include/litmus/trace.h ++++ b/include/litmus/trace.h +@@ -78,6 +78,8 @@ feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu) + #define TS_TICK_START(t) TTIMESTAMP(110, t) + #define TS_TICK_END(t) TTIMESTAMP(111, t) + ++#define TS_PULL_TIMER_START TIMESTAMP(112) ++#define TS_PULL_TIMER_END TIMESTAMP(113) + + #define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */ + #define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */ +diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h +index f0618e7..4e82c52 100644 +--- a/include/litmus/unistd_64.h ++++ b/include/litmus/unistd_64.h +@@ -33,5 +33,7 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release) + __SYSCALL(__NR_release_ts, sys_release_ts) + #define __NR_null_call __LSC(13) + __SYSCALL(__NR_null_call, sys_null_call) ++#define __NR_add_server __LSC(14) ++__SYSCALL(__NR_add_server, sys_add_server) + +-#define NR_litmus_syscalls 14 ++#define NR_litmus_syscalls 15 +diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c +index fdf9596..23d3712 100644 +--- a/kernel/hrtimer.c ++++ b/kernel/hrtimer.c +@@ -47,6 +47,7 @@ + #include + + #include ++#include + + #include + +@@ -1063,6 +1064,7 @@ void hrtimer_pull(void) + struct hrtimer_start_on_info *info; + struct list_head *pos, *safe, list; + ++ TS_PULL_TIMER_START; + raw_spin_lock(&base->lock); + list_replace_init(&base->to_pull, &list); + raw_spin_unlock(&base->lock); +@@ -1073,6 +1075,7 @@ void hrtimer_pull(void) + list_del(pos); + hrtimer_start(info->timer, info->time, info->mode); + } ++ TS_PULL_TIMER_END; + } + + /** +diff --git a/litmus/Makefile b/litmus/Makefile +index f301d28..5533a58 100644 +--- a/litmus/Makefile ++++ b/litmus/Makefile +@@ -14,7 +14,10 @@ obj-y = sched_plugin.o litmus.o \ + bheap.o \ + ctrldev.o \ + sched_gsn_edf.o \ +- sched_psn_edf.o ++ sched_psn_edf.o \ ++ sched_edf_wm.o \ ++ sched_npsf.o \ ++ sched_edf_fm.o + + obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o + obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o +diff --git a/litmus/litmus.c b/litmus/litmus.c +index b04a42b..2f78022 100644 +--- a/litmus/litmus.c ++++ b/litmus/litmus.c +@@ -632,6 +632,55 @@ static int proc_write_cluster_size(struct file *file, + return len; + } + ++static int proc_read_npsf_slot_length(char *page, char **start, ++ off_t off, int count, ++ int *eof, void *data) ++{ ++ return snprintf(page, PAGE_SIZE, "%d us\n", ++ (int) (npsf_slot_length / NSEC_PER_USEC)); ++} ++ ++extern void npsf_hrtimers_cleanup(void); ++/* NPS-F slot length in us. ++ * ++ * Writing 0 as npsf_slot_length will trigger the removal of the ++ * hrtimers for the domain_reschedule_tick() in the NPS-F plugin. ++ */ ++static int proc_write_npsf_slot_length(struct file *file, ++ const char *buffer, ++ unsigned long count, ++ void *data) ++{ ++ int err, slot_length; ++ char msg[64]; ++ ++ if (count > 63) ++ return -EINVAL; ++ ++ if (copy_from_user(msg, buffer, count)) ++ return -EFAULT; ++ ++ /* terminate */ ++ msg[count] = '\0'; ++ /* chomp */ ++ if (count > 1 && msg[count - 1] == '\n') ++ msg[count - 1] = '\0'; ++ ++ err = sscanf(msg, "%d", &slot_length); ++ ++ if (err == 1) { ++ if (!slot_length) { ++ npsf_hrtimers_cleanup(); ++ /* reset to default */ ++ slot_length = 5000; ++ } ++ npsf_slot_length = (lt_t)((lt_t) slot_length * NSEC_PER_USEC); ++ return count; ++ } ++ ++ return -EINVAL; ++} ++ + #ifdef CONFIG_RELEASE_MASTER + static int proc_read_release_master(char *page, char **start, + off_t off, int count, +@@ -691,7 +740,8 @@ static struct proc_dir_entry *litmus_dir = NULL, + #ifdef CONFIG_RELEASE_MASTER + *release_master_file = NULL, + #endif +- *clus_cache_idx_file = NULL; ++ *clus_cache_idx_file = NULL, ++ *npsf_slot_length_file = NULL; + + static int __init init_litmus_proc(void) + { +@@ -733,6 +783,16 @@ static int __init init_litmus_proc(void) + clus_cache_idx_file->read_proc = proc_read_cluster_size; + clus_cache_idx_file->write_proc = proc_write_cluster_size; + ++ npsf_slot_length_file = create_proc_entry("npsf_slot_length", ++ 0644, litmus_dir); ++ if (!npsf_slot_length_file) { ++ printk(KERN_ERR "Could not allocate npsf_slot_length " ++ "procfs entry.\n"); ++ return -ENOMEM; ++ } ++ npsf_slot_length_file->read_proc = proc_read_npsf_slot_length; ++ npsf_slot_length_file->write_proc = proc_write_npsf_slot_length; ++ + stat_file = create_proc_read_entry("stats", 0444, litmus_dir, + proc_read_stats, NULL); + +@@ -752,6 +812,8 @@ static void exit_litmus_proc(void) + remove_proc_entry("active_plugin", litmus_dir); + if (clus_cache_idx_file) + remove_proc_entry("cluster_cache", litmus_dir); ++ if (npsf_slot_length_file) ++ remove_proc_entry("npsf_slot_length", litmus_dir); + #ifdef CONFIG_RELEASE_MASTER + if (release_master_file) + remove_proc_entry("release_master", litmus_dir); +diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c +new file mode 100644 +index 0000000..b721072 +--- /dev/null ++++ b/litmus/sched_edf_fm.c +@@ -0,0 +1,565 @@ ++/* ++ * litmus/sched_edf_fm.c ++ * ++ * Implementation of the EDF-fm scheduling algorithm. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++typedef struct { ++ rt_domain_t domain; ++ int cpu; ++ struct task_struct* scheduled; /* only RT tasks */ ++/* domain lock */ ++#define slock domain.ready_lock ++} edffm_domain_t; ++ ++DEFINE_PER_CPU(edffm_domain_t, edffm_domains); ++ ++#define local_edffm (&__get_cpu_var(edffm_domains)) ++#define remote_edf(cpu) (&per_cpu(edffm_domains, cpu).domain) ++#define remote_edffm(cpu) (&per_cpu(edffm_domains, cpu)) ++#define task_edf(task) remote_edf(get_partition(task)) ++#define task_edffm(task) remote_edffm(get_partition(task)) ++ ++#define edffm_params(t) (t->rt_param.task_params.semi_part.fm) ++ ++/* Is the task a migratory task? */ ++#define is_migrat_task(task) (edffm_params(task).nr_cpus) ++/* t is on the wrong CPU (it should be requeued properly) */ ++#define wrong_cpu(t) is_migrat_task((t)) && task_cpu((t)) != get_partition((t)) ++/* Get next CPU */ ++#define migrat_next_cpu(t) \ ++ ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ ++ edffm_params(t).cpus[1] : \ ++ edffm_params(t).cpus[0]) ++/* Get current cpu */ ++#define migrat_cur_cpu(t) \ ++ ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ ++ edffm_params(t).cpus[0] : \ ++ edffm_params(t).cpus[1]) ++/* Manipulate share for current cpu */ ++#define cur_cpu_fract_num(t) \ ++ ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ ++ edffm_params(t).fraction[0][0] : \ ++ edffm_params(t).fraction[0][1]) ++#define cur_cpu_fract_den(t) \ ++ ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ ++ edffm_params(t).fraction[1][0] : \ ++ edffm_params(t).fraction[1][1]) ++/* Get job number for current cpu */ ++#define cur_cpu_job_no(t) \ ++ ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ ++ tsk_rt(t)->semi_part.cpu_job_no[0] : \ ++ tsk_rt(t)->semi_part.cpu_job_no[1]) ++/* What is the current cpu position in the array? */ ++#define edffm_cpu_pos(cpu,t) \ ++ ((cpu == edffm_params(t).cpus[0]) ? \ ++ 0 : 1) ++ ++/* ++ * EDF-fm: migratory tasks have higher prio than fixed, EDF in both classes. ++ * (Both first and second may be NULL). ++ */ ++int edffm_higher_prio(struct task_struct* first, struct task_struct* second) ++{ ++ if ((first && edffm_params(first).nr_cpus) || ++ (second && edffm_params(second).nr_cpus)) { ++ if ((first && edffm_params(first).nr_cpus) && ++ (second && edffm_params(second).nr_cpus)) ++ /* both are migrating */ ++ return edf_higher_prio(first, second); ++ ++ if (first && edffm_params(first).nr_cpus) ++ /* first is migrating */ ++ return 1; ++ else ++ /* second is migrating */ ++ return 0; ++ } ++ ++ /* both are fixed or not real time */ ++ return edf_higher_prio(first, second); ++} ++ ++/* need_to_preempt - check whether the task t needs to be preempted ++ * call only with irqs disabled and with ready_lock acquired ++ */ ++int edffm_preemption_needed(rt_domain_t* rt, struct task_struct *t) ++{ ++ /* we need the read lock for edf_ready_queue */ ++ /* no need to preempt if there is nothing pending */ ++ if (!__jobs_pending(rt)) ++ return 0; ++ /* we need to reschedule if t doesn't exist */ ++ if (!t) ++ return 1; ++ ++ /* make sure to get non-rt stuff out of the way */ ++ return !is_realtime(t) || edffm_higher_prio(__next_ready(rt), t); ++} ++ ++/* we assume the lock is being held */ ++static void preempt(edffm_domain_t *edffm) ++{ ++ preempt_if_preemptable(edffm->scheduled, edffm->cpu); ++} ++ ++static void edffm_release_jobs(rt_domain_t* rt, struct bheap* tasks) ++{ ++ unsigned long flags; ++ edffm_domain_t *edffm = container_of(rt, edffm_domain_t, domain); ++ ++ raw_spin_lock_irqsave(&edffm->slock, flags); ++ ++ __merge_ready(rt, tasks); ++ ++ if (edffm_preemption_needed(rt, edffm->scheduled)) ++ preempt(edffm); ++ ++ raw_spin_unlock_irqrestore(&edffm->slock, flags); ++} ++ ++/* EDF-fm uses the "release_master" field to force the next release for ++ * the task 'task' to happen on a remote CPU. The remote cpu for task is ++ * previously set up during job_completion() taking into consideration ++ * whether a task is a migratory task or not. ++ */ ++static inline void ++edffm_add_release_remote(struct task_struct *task) ++{ ++ unsigned long flags; ++ rt_domain_t *rt = task_edf(task); ++ ++ raw_spin_lock_irqsave(&rt->tobe_lock, flags); ++ ++ /* "modify" destination cpu */ ++ rt->release_master = get_partition(task); ++ ++ TRACE_TASK(task, "Add remote release: smp_proc_id = %d, cpu = %d, remote = %d\n", ++ smp_processor_id(), task_cpu(task), rt->release_master); ++ ++ /* trigger future release */ ++ __add_release(rt, task); ++ ++ /* reset proper release_master and unlock */ ++ rt->release_master = NO_CPU; ++ raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); ++} ++ ++/* perform double ready_queue locking in an orderwise fashion ++ * this is called with: interrupt disabled and rq->lock held (from ++ * schedule()) ++ */ ++static noinline void double_domain_lock(edffm_domain_t *dom1, edffm_domain_t *dom2) ++{ ++ if (dom1 == dom2) { ++ /* fake */ ++ raw_spin_lock(&dom1->slock); ++ } else { ++ if (dom1 < dom2) { ++ raw_spin_lock(&dom1->slock); ++ raw_spin_lock(&dom2->slock); ++ TRACE("acquired %d and %d\n", dom1->cpu, dom2->cpu); ++ } else { ++ raw_spin_lock(&dom2->slock); ++ raw_spin_lock(&dom1->slock); ++ TRACE("acquired %d and %d\n", dom2->cpu, dom1->cpu); ++ } ++ } ++} ++ ++/* Directly insert a task in a remote ready queue. This function ++ * should only be called if this task is a migrating task and its ++ * last job for this CPU just completed (a new one is released for ++ * a remote CPU), but the new job is already tardy. ++ */ ++static noinline void insert_task_in_remote_ready(struct task_struct *task) ++{ ++ edffm_domain_t *this = remote_edffm(task_cpu(task)); ++ edffm_domain_t *remote = remote_edffm(get_partition(task)); ++ ++ BUG_ON(get_partition(task) != remote->cpu); ++ ++ TRACE_TASK(task, "Migrate From P%d -> To P%d\n", ++ this->cpu, remote->cpu); ++ TRACE_TASK(task, "Inserting in remote ready queue\n"); ++ ++ WARN_ON(!irqs_disabled()); ++ ++ raw_spin_unlock(&this->slock); ++ mb(); ++ TRACE_TASK(task,"edffm_lock %d released\n", this->cpu); ++ ++ /* lock both ready queues */ ++ double_domain_lock(this, remote); ++ mb(); ++ ++ __add_ready(&remote->domain, task); ++ ++ /* release remote but keep ours */ ++ raw_spin_unlock(&remote->slock); ++ TRACE_TASK(task,"edffm_lock %d released\n", remote->cpu); ++ ++ /* ask remote cpu to reschedule, we are already rescheduling on this */ ++ preempt(remote); ++} ++ ++static void requeue(struct task_struct* t, rt_domain_t *edf) ++{ ++ if (t->state != TASK_RUNNING) ++ TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); ++ ++ set_rt_flags(t, RT_F_RUNNING); ++ if (is_released(t, litmus_clock())) { ++ if (wrong_cpu(t)) { ++ /* this should only happen if t just completed, but ++ * its next release is already tardy, so it should be ++ * migrated and inserted in the remote ready queue ++ */ ++ TRACE_TASK(t, "Migrating task already released, " ++ "move from P%d to P%d\n", ++ task_cpu(t), get_partition(t)); ++ ++ insert_task_in_remote_ready(t); ++ } else { ++ /* not a migrat task or the job is on the right CPU */ ++ __add_ready(edf, t); ++ } ++ } else { ++ if (wrong_cpu(t)) { ++ ++ TRACE_TASK(t, "Migrating task, adding remote release\n"); ++ edffm_add_release_remote(t); ++ } else { ++ TRACE_TASK(t, "Adding local release\n"); ++ add_release(edf, t); ++ } ++ } ++} ++ ++/* Update statistics for the _current_ job. ++ * - job_no was incremented _before_ starting this job ++ * (release_at / prepare_for_next_period) ++ * - cpu_job_no is incremented when the job completes ++ */ ++static void update_job_counter(struct task_struct *t) ++{ ++ int cpu_pos; ++ ++ /* Which CPU counter should be incremented? */ ++ cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t); ++ t->rt_param.semi_part.cpu_job_no[cpu_pos]++; ++ ++ TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n", ++ t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t), ++ t->rt_param.task_params.cpu); ++} ++ ++/* What is the next cpu for this job? (eq. 8, in EDF-Fm paper) */ ++static int next_cpu_for_job(struct task_struct *t) ++{ ++ BUG_ON(!is_migrat_task(t)); ++ ++ TRACE_TASK(t, "%u = %u * %u / %u\n", ++ t->rt_param.job_params.job_no, cur_cpu_job_no(t), ++ cur_cpu_fract_den(t), cur_cpu_fract_num(t)); ++ if ((t->rt_param.job_params.job_no) == ++ (((lt_t) cur_cpu_job_no(t) * cur_cpu_fract_den(t)) / ++ cur_cpu_fract_num(t))) ++ return edffm_params(t).cpus[0]; ++ ++ return edffm_params(t).cpus[1]; ++} ++ ++/* If needed (the share for task t on this CPU is exhausted), updates ++ * the task_params.cpu for the _migrating_ task t ++ */ ++static void change_migrat_cpu_if_needed(struct task_struct *t) ++{ ++ BUG_ON(!is_migrat_task(t)); ++ /* EDF-fm: if it is a migrating task and it has already executed ++ * the required number of jobs on this CPU, we need to move it ++ * on its next CPU; changing the cpu here will affect the requeue ++ * and the next release ++ */ ++ if (unlikely(next_cpu_for_job(t) != migrat_cur_cpu(t))) { ++ ++ tsk_rt(t)->task_params.cpu = migrat_next_cpu(t); ++ TRACE_TASK(t, "EDF-fm: will migrate job %d -> %d\n", ++ task_cpu(t), tsk_rt(t)->task_params.cpu); ++ return; ++ } ++ ++ TRACE_TASK(t, "EDF-fm: job will stay on %d -> %d\n", ++ task_cpu(t), tsk_rt(t)->task_params.cpu); ++} ++ ++static void job_completion(struct task_struct* t, int forced) ++{ ++ sched_trace_task_completion(t,forced); ++ TRACE_TASK(t, "job_completion().\n"); ++ ++ if (unlikely(is_migrat_task(t))) { ++ update_job_counter(t); ++ change_migrat_cpu_if_needed(t); ++ } ++ ++ set_rt_flags(t, RT_F_SLEEP); ++ prepare_for_next_period(t); ++} ++ ++static void edffm_tick(struct task_struct *t) ++{ ++ edffm_domain_t *edffm = local_edffm; ++ ++ BUG_ON(is_realtime(t) && t != edffm->scheduled); ++ ++ if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { ++ set_tsk_need_resched(t); ++ TRACE("edffm_scheduler_tick: " ++ "%d is preemptable " ++ " => FORCE_RESCHED\n", t->pid); ++ } ++} ++ ++static struct task_struct* edffm_schedule(struct task_struct * prev) ++{ ++ edffm_domain_t* edffm = local_edffm; ++ rt_domain_t* edf = &edffm->domain; ++ struct task_struct* next; ++ ++ int out_of_time, sleep, preempt, exists, blocks, change_cpu, resched; ++ ++ raw_spin_lock(&edffm->slock); ++ ++ BUG_ON(edffm->scheduled && edffm->scheduled != prev); ++ BUG_ON(edffm->scheduled && !is_realtime(prev)); ++ ++ /* (0) Determine state */ ++ exists = edffm->scheduled != NULL; ++ blocks = exists && !is_running(edffm->scheduled); ++ out_of_time = exists && ++ budget_enforced(edffm->scheduled) && ++ budget_exhausted(edffm->scheduled); ++ sleep = exists && get_rt_flags(edffm->scheduled) == RT_F_SLEEP; ++ change_cpu = exists && wrong_cpu(edffm->scheduled); ++ preempt = edffm_preemption_needed(edf, prev); ++ ++ BUG_ON(blocks && change_cpu); ++ ++ if (exists) ++ TRACE_TASK(prev, ++ "blocks:%d out_of_time:%d sleep:%d preempt:%d " ++ "wrong_cpu:%d state:%d sig:%d\n", ++ blocks, out_of_time, sleep, preempt, ++ change_cpu, prev->state, signal_pending(prev)); ++ ++ /* If we need to preempt do so. */ ++ resched = preempt; ++ ++ /* If a task blocks we have no choice but to reschedule. */ ++ if (blocks) ++ resched = 1; ++ ++ /* If a task has just woken up, it was tardy and the wake up ++ * raced with this schedule, a new job has already been released, ++ * but scheduled should be enqueued on a remote ready queue, and a ++ * new task should be selected for the current queue. ++ */ ++ if (change_cpu) ++ resched = 1; ++ ++ /* Any task that is preemptable and either exhausts its execution ++ * budget or wants to sleep completes. We may have to reschedule after ++ * this. ++ */ ++ if ((out_of_time || sleep) && !blocks) { ++ job_completion(edffm->scheduled, !sleep); ++ resched = 1; ++ } ++ ++ /* The final scheduling decision. Do we need to switch for some reason? ++ * Switch if we are in RT mode and have no task or if we need to ++ * resched. ++ */ ++ next = NULL; ++ if (resched || !exists) { ++ ++ if (edffm->scheduled && !blocks) ++ requeue(edffm->scheduled, edf); ++ next = __take_ready(edf); ++ } else ++ /* Only override Linux scheduler if we have a real-time task ++ * scheduled that needs to continue. ++ */ ++ if (exists) ++ next = prev; ++ ++ if (next) { ++ TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); ++ set_rt_flags(next, RT_F_RUNNING); ++ } else { ++ TRACE("becoming idle at %llu\n", litmus_clock()); ++ } ++ ++ edffm->scheduled = next; ++ raw_spin_unlock(&edffm->slock); ++ ++ return next; ++} ++ ++/* Prepare a task for running in RT mode ++ */ ++static void edffm_task_new(struct task_struct * t, int on_rq, int running) ++{ ++ rt_domain_t* edf = task_edf(t); ++ edffm_domain_t* edffm = task_edffm(t); ++ unsigned long flags; ++ ++ TRACE_TASK(t, "EDF-fm: task new, cpu = %d\n", ++ t->rt_param.task_params.cpu); ++ ++ release_at(t, litmus_clock()); ++ update_job_counter(t); ++ ++ /* The task should be running in the queue, otherwise signal ++ * code will try to wake it up with fatal consequences. ++ */ ++ raw_spin_lock_irqsave(&edffm->slock, flags); ++ if (running) { ++ /* there shouldn't be anything else running at the time */ ++ BUG_ON(edffm->scheduled); ++ edffm->scheduled = t; ++ } else { ++ requeue(t, edf); ++ /* maybe we have to reschedule */ ++ preempt(edffm); ++ } ++ raw_spin_unlock_irqrestore(&edffm->slock, flags); ++} ++ ++static void edffm_task_wake_up(struct task_struct *task) ++{ ++ unsigned long flags; ++ edffm_domain_t* edffm = task_edffm(task); ++ rt_domain_t* edf = task_edf(task); ++ lt_t now; ++ ++ TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); ++ ++ TRACE_TASK(task, "acquire edffm %d\n", edffm->cpu); ++ raw_spin_lock_irqsave(&edffm->slock, flags); ++ ++ BUG_ON(edffm != task_edffm(task)); ++ BUG_ON(is_queued(task)); ++ ++ now = litmus_clock(); ++ if (is_tardy(task, now)) { ++ if (unlikely(is_migrat_task(task))) { ++ /* a new job will be released. ++ * Update current job counter */ ++ update_job_counter(task); ++ /* Switch CPU if needed */ ++ change_migrat_cpu_if_needed(task); ++ } ++ /* new sporadic release */ ++ TRACE_TASK(task, "release new\n"); ++ release_at(task, now); ++ sched_trace_task_release(task); ++ } ++ ++ /* Only add to ready queue if it is not the currently-scheduled ++ * task. This could be the case if a task was woken up concurrently ++ * on a remote CPU before the executing CPU got around to actually ++ * de-scheduling the task, i.e., wake_up() raced with schedule() ++ * and won. ++ */ ++ if (edffm->scheduled != task) ++ requeue(task, edf); ++ ++ raw_spin_unlock_irqrestore(&edffm->slock, flags); ++ TRACE_TASK(task, "release edffm %d\n", edffm->cpu); ++ TRACE_TASK(task, "wake up done\n"); ++} ++ ++static void edffm_task_block(struct task_struct *t) ++{ ++ TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); ++ ++ BUG_ON(!is_realtime(t)); ++ if (is_queued(t)) { ++ edffm_domain_t *edffm = local_edffm; ++ TRACE_TASK(t, "task blocked, race with wakeup, " ++ "remove from queue %d\n", edffm->cpu); ++ remove(&edffm->domain, t); ++ } ++} ++ ++static void edffm_task_exit(struct task_struct * t) ++{ ++ unsigned long flags; ++ edffm_domain_t* edffm = task_edffm(t); ++ rt_domain_t* edf; ++ ++ raw_spin_lock_irqsave(&edffm->slock, flags); ++ if (is_queued(t)) { ++ /* dequeue */ ++ edf = task_edf(t); ++ remove(edf, t); ++ } ++ if (edffm->scheduled == t) ++ edffm->scheduled = NULL; ++ ++ TRACE_TASK(t, "RIP\n"); ++ ++ preempt(edffm); ++ raw_spin_unlock_irqrestore(&edffm->slock, flags); ++} ++ ++static long edffm_admit_task(struct task_struct* tsk) ++{ ++ return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; ++} ++ ++/* Plugin object */ ++static struct sched_plugin edffm_plugin __cacheline_aligned_in_smp = { ++ .plugin_name = "EDF-fm", ++ .tick = edffm_tick, ++ .task_new = edffm_task_new, ++ .complete_job = complete_job, ++ .task_exit = edffm_task_exit, ++ .schedule = edffm_schedule, ++ .task_wake_up = edffm_task_wake_up, ++ .task_block = edffm_task_block, ++ .admit_task = edffm_admit_task ++}; ++ ++static int __init init_edffm(void) ++{ ++ int i; ++ edffm_domain_t *edffm; ++ ++ /* Note, broken if num_online_cpus() may change */ ++ for (i = 0; i < num_online_cpus(); i++) { ++ edffm = remote_edffm(i); ++ edffm->cpu = i; ++ edffm->scheduled = NULL; ++ edf_domain_init(&edffm->domain, NULL, edffm_release_jobs); ++ } ++ ++ return register_sched_plugin(&edffm_plugin); ++} ++ ++module_init(init_edffm); ++ +diff --git a/litmus/sched_edf_wm.c b/litmus/sched_edf_wm.c +new file mode 100644 +index 0000000..8b7be32 +--- /dev/null ++++ b/litmus/sched_edf_wm.c +@@ -0,0 +1,688 @@ ++/* EDF-WM: based on PSN-EDF. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++typedef struct { ++ rt_domain_t domain; ++ int cpu; ++ struct task_struct* scheduled; /* only RT tasks */ ++ ++/* ++ * scheduling lock slock ++ * protects the domain and serializes scheduling decisions ++ */ ++#define slock domain.ready_lock ++ ++} wm_domain_t; ++ ++DEFINE_PER_CPU(wm_domain_t, wm_domains); ++ ++#define TRACE_DOM(dom, fmt, args...) \ ++ TRACE("(wm_domains[%d]) " fmt, (dom)->cpu, ##args) ++ ++ ++#define local_domain (&__get_cpu_var(wm_domains)) ++#define remote_domain(cpu) (&per_cpu(wm_domains, cpu)) ++#define domain_of_task(task) (remote_domain(get_partition(task))) ++ ++static int is_sliced_task(struct task_struct* t) ++{ ++ return tsk_rt(t)->task_params.semi_part.wm.count; ++} ++ ++static struct edf_wm_slice* get_last_slice(struct task_struct* t) ++{ ++ int idx = tsk_rt(t)->task_params.semi_part.wm.count - 1; ++ return tsk_rt(t)->task_params.semi_part.wm.slices + idx; ++} ++ ++static void compute_slice_params(struct task_struct* t) ++{ ++ struct rt_param* p = tsk_rt(t); ++ /* Here we do a little trick to make the generic EDF code ++ * play well with job slices. We overwrite the job-level ++ * release and deadline fields with the slice-specific values ++ * so that we can enqueue this task in an EDF rt_domain_t ++ * without issue. The actual values are cached in the semi_part.wm ++ * structure. */ ++ p->job_params.deadline = p->semi_part.wm.job_release + ++ p->semi_part.wm.slice->deadline; ++ p->job_params.release = p->semi_part.wm.job_release + ++ p->semi_part.wm.slice->offset; ++ ++ /* Similarly, we play a trick on the cpu field. */ ++ p->task_params.cpu = p->semi_part.wm.slice->cpu; ++ ++ /* update the per-slice budget reference */ ++ p->semi_part.wm.exec_time = p->job_params.exec_time; ++} ++ ++static void complete_sliced_job(struct task_struct* t) ++{ ++ struct rt_param* p = tsk_rt(t); ++ ++ /* We need to undo our trickery to the ++ * job parameters (see above). */ ++ p->job_params.release = p->semi_part.wm.job_release; ++ p->job_params.deadline = p->semi_part.wm.job_deadline; ++ ++ /* Ok, now let generic code do the actual work. */ ++ prepare_for_next_period(t); ++ ++ /* And finally cache the updated parameters. */ ++ p->semi_part.wm.job_release = p->job_params.release; ++ p->semi_part.wm.job_deadline = p->job_params.deadline; ++} ++ ++static lt_t slice_exec_time(struct task_struct* t) ++{ ++ struct rt_param* p = tsk_rt(t); ++ ++ /* Compute how much execution time has been consumed ++ * since last slice advancement. */ ++ return p->job_params.exec_time - p->semi_part.wm.exec_time; ++} ++ ++static lt_t slice_budget(struct task_struct* t) ++{ ++ return tsk_rt(t)->semi_part.wm.slice->budget; ++} ++ ++static int slice_budget_exhausted(struct task_struct* t) ++{ ++ return slice_exec_time(t) >= slice_budget(t); ++} ++ ++/* assumes positive remainder; overflows otherwise */ ++static lt_t slice_budget_remaining(struct task_struct* t) ++{ ++ return slice_budget(t) - slice_exec_time(t); ++} ++ ++static int wm_budget_exhausted(struct task_struct* t) ++{ ++ if (is_sliced_task(t)) ++ return slice_budget_exhausted(t); ++ else ++ return budget_exhausted(t); ++} ++ ++static void advance_next_slice(struct task_struct* t, int completion_signaled) ++{ ++ int idx; ++ struct rt_param* p = tsk_rt(t); ++ ++ /* make sure this is actually a sliced job */ ++ BUG_ON(!is_sliced_task(t)); ++ BUG_ON(is_queued(t)); ++ ++ /* determine index of current slice */ ++ idx = p->semi_part.wm.slice - ++ p->task_params.semi_part.wm.slices; ++ ++ TRACE_TASK(t, "advancing slice %d; excess=%lluns; " ++ "completion_signaled=%d.\n", ++ idx, slice_exec_time(t) - slice_budget(t), ++ completion_signaled); ++ ++ if (completion_signaled) ++ idx = 0; ++ else ++ /* increment and wrap around, if necessary */ ++ idx = (idx + 1) % p->task_params.semi_part.wm.count; ++ ++ /* point to next slice */ ++ p->semi_part.wm.slice = ++ p->task_params.semi_part.wm.slices + idx; ++ ++ /* Check if we need to update essential job parameters. */ ++ if (!idx) { ++ /* job completion */ ++ sched_trace_task_completion(t, !completion_signaled); ++ TRACE_TASK(t, "completed sliced job" ++ "(signaled:%d)\n", completion_signaled); ++ complete_sliced_job(t); ++ } ++ ++ /* Update job parameters for new slice. */ ++ compute_slice_params(t); ++} ++ ++/* assumes time_passed does not advance past the last slice */ ++static void fast_forward_slices(struct task_struct* t, lt_t time_passed) ++{ ++ TRACE_TASK(t, "fast forwarding %lluns\n", time_passed); ++ ++ /* this is NOT the slice version */ ++ BUG_ON(budget_remaining(t) <= time_passed); ++ ++ if (wm_budget_exhausted(t)) { ++ /* This can happen if a suspension raced ++ * with a normal slice advancement. wm_schedule() ++ * does not process out_of_time when a task blocks. */ ++ TRACE_TASK(t, "block raced with out_of_time?\n"); ++ advance_next_slice(t, 0); ++ } ++ ++ while (time_passed && ++ time_passed >= slice_budget_remaining(t)) { ++ /* slice completely exhausted */ ++ time_passed -= slice_budget_remaining(t); ++ tsk_rt(t)->job_params.exec_time += ++ slice_budget_remaining(t); ++ ++ BUG_ON(!slice_budget_exhausted(t)); ++ BUG_ON(slice_budget_remaining(t) != 0); ++ BUG_ON(tsk_rt(t)->semi_part.wm.slice == get_last_slice(t)); ++ ++ advance_next_slice(t, 0); ++ } ++ /* add remainder to exec cost */ ++ tsk_rt(t)->job_params.exec_time += time_passed; ++} ++ ++/* we assume the lock is being held */ ++static void preempt(wm_domain_t *dom) ++{ ++ TRACE_DOM(dom, "will be preempted.\n"); ++ /* We pass NULL as the task since non-preemptive sections are not ++ * supported in this plugin, so per-task checks are not needed. */ ++ preempt_if_preemptable(NULL, dom->cpu); ++} ++ ++static void wm_domain_init(wm_domain_t* dom, ++ check_resched_needed_t check, ++ release_jobs_t release, ++ int cpu) ++{ ++ edf_domain_init(&dom->domain, check, release); ++ dom->cpu = cpu; ++ dom->scheduled = NULL; ++} ++ ++static void wm_requeue_remote(struct task_struct *t) ++{ ++ wm_domain_t *dom = domain_of_task(t); ++ ++ set_rt_flags(t, RT_F_RUNNING); ++ if (is_released(t, litmus_clock())) ++ /* acquires necessary lock */ ++ add_ready(&dom->domain, t); ++ else ++ /* force timer on remote CPU */ ++ add_release_on(&dom->domain, t, get_partition(t)); ++} ++ ++static void wm_requeue_local(struct task_struct* t, rt_domain_t *edf) ++{ ++ if (t->state != TASK_RUNNING) ++ TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); ++ ++ set_rt_flags(t, RT_F_RUNNING); ++ if (is_released(t, litmus_clock())) ++ __add_ready(edf, t); ++ else ++ add_release(edf, t); /* it has got to wait */ ++} ++ ++static int wm_check_resched(rt_domain_t *edf) ++{ ++ wm_domain_t *dom = container_of(edf, wm_domain_t, domain); ++ ++ /* because this is a callback from rt_domain_t we already hold ++ * the necessary lock for the ready queue ++ */ ++ if (edf_preemption_needed(edf, dom->scheduled)) { ++ preempt(dom); ++ return 1; ++ } else ++ return 0; ++} ++ ++static void regular_job_completion(struct task_struct* t, int forced) ++{ ++ sched_trace_task_completion(t, forced); ++ TRACE_TASK(t, "job_completion().\n"); ++ ++ set_rt_flags(t, RT_F_SLEEP); ++ prepare_for_next_period(t); ++} ++ ++static void wm_job_or_slice_completion(struct task_struct* t, ++ int completion_signaled) ++{ ++ if (is_sliced_task(t)) ++ advance_next_slice(t, completion_signaled); ++ else ++ regular_job_completion(t, !completion_signaled); ++} ++ ++static void wm_tick(struct task_struct *t) ++{ ++ wm_domain_t *dom = local_domain; ++ ++ /* Check for inconsistency. We don't need the lock for this since ++ * ->scheduled is only changed in schedule, which obviously is not ++ * executing in parallel on this CPU ++ */ ++ BUG_ON(is_realtime(t) && t != dom->scheduled); ++ ++ if (is_realtime(t) && budget_enforced(t) && wm_budget_exhausted(t)) { ++ set_tsk_need_resched(t); ++ TRACE_DOM(dom, "budget of %d exhausted in tick\n", ++ t->pid); ++ } ++} ++ ++static struct task_struct* wm_schedule(struct task_struct * prev) ++{ ++ wm_domain_t *dom = local_domain; ++ rt_domain_t *edf = &dom->domain; ++ struct task_struct *next, *migrate = NULL; ++ ++ int out_of_time, sleep, preempt, wrong_cpu, exists, blocks, resched; ++ ++ raw_spin_lock(&dom->slock); ++ ++ /* Sanity checking: ++ * When a task exits (dead) dom->schedule may be null ++ * and prev _is_ realtime. */ ++ BUG_ON(dom->scheduled && dom->scheduled != prev); ++ BUG_ON(dom->scheduled && !is_realtime(prev)); ++ ++ /* (0) Determine state */ ++ exists = dom->scheduled != NULL; ++ wrong_cpu = exists && get_partition(dom->scheduled) != dom->cpu; ++ blocks = exists && !is_running(dom->scheduled); ++ out_of_time = exists ++ && budget_enforced(dom->scheduled) ++ && wm_budget_exhausted(dom->scheduled); ++ sleep = exists && get_rt_flags(dom->scheduled) == RT_F_SLEEP; ++ preempt = edf_preemption_needed(edf, prev); ++ ++ /* If we need to preempt do so. ++ * The following checks set resched to 1 in case of special ++ * circumstances. ++ */ ++ resched = preempt; ++ ++ ++ if (exists) ++ TRACE_TASK(prev, ++ "blocks:%d out_of_time:%d sleep:%d preempt:%d " ++ "wrong_cpu:%d state:%d sig:%d\n", ++ blocks, out_of_time, sleep, preempt, wrong_cpu, ++ prev->state, signal_pending(prev)); ++ ++ /* If a task blocks we have no choice but to reschedule. ++ */ ++ if (blocks) ++ resched = 1; ++ ++ /* This can happen if sliced task was moved to the next slice ++ * by the wake_up() code path while still being scheduled. ++ */ ++ if (wrong_cpu) ++ resched = 1; ++ ++ /* Any task that is preemptable and either exhausts its execution ++ * budget or wants to sleep completes. We may have to reschedule after ++ * this. ++ */ ++ if ((out_of_time || sleep) && !blocks) { ++ wm_job_or_slice_completion(dom->scheduled, sleep); ++ resched = 1; ++ } ++ ++ /* The final scheduling decision. Do we need to switch for some reason? ++ * Switch if we are in RT mode and have no task or if we need to ++ * resched. ++ */ ++ next = NULL; ++ if (resched || !exists) { ++ if (dom->scheduled && !blocks) { ++ if (get_partition(dom->scheduled) == dom->cpu) ++ /* local task */ ++ wm_requeue_local(dom->scheduled, edf); ++ else ++ /* not local anymore; wait until we drop the ++ * ready queue lock */ ++ migrate = dom->scheduled; ++ } ++ next = __take_ready(edf); ++ } else ++ /* Only override Linux scheduler if we have a real-time task ++ * scheduled that needs to continue. */ ++ if (exists) ++ next = prev; ++ ++ if (next) { ++ TRACE_TASK(next, "scheduled at %llu (state:%d/%d)\n", litmus_clock(), ++ next->state, is_running(next)); ++ set_rt_flags(next, RT_F_RUNNING); ++ } else if (exists) { ++ TRACE("becoming idle at %llu\n", litmus_clock()); ++ } ++ ++ dom->scheduled = next; ++ raw_spin_unlock(&dom->slock); ++ ++ /* check if we need to push the previous task onto another queue */ ++ if (migrate) { ++ TRACE_TASK(migrate, "schedule-initiated migration to %d\n", ++ get_partition(migrate)); ++ wm_requeue_remote(migrate); ++ } ++ ++ return next; ++} ++ ++ ++/* Prepare a task for running in RT mode ++ */ ++static void wm_task_new(struct task_struct * t, int on_rq, int running) ++{ ++ wm_domain_t* dom = domain_of_task(t); ++ rt_domain_t* edf = &dom->domain; ++ unsigned long flags; ++ ++ TRACE_TASK(t, "edf-wm: task new, cpu = %d\n", ++ t->rt_param.task_params.cpu); ++ ++ /* setup job parameters */ ++ release_at(t, litmus_clock()); ++ ++ /* The task should be running in the queue, otherwise signal ++ * code will try to wake it up with fatal consequences. ++ */ ++ raw_spin_lock_irqsave(&dom->slock, flags); ++ ++ if (is_sliced_task(t)) { ++ /* make sure parameters are initialized consistently */ ++ tsk_rt(t)->semi_part.wm.exec_time = 0; ++ tsk_rt(t)->semi_part.wm.job_release = get_release(t); ++ tsk_rt(t)->semi_part.wm.job_deadline = get_deadline(t); ++ tsk_rt(t)->semi_part.wm.slice = tsk_rt(t)->task_params.semi_part.wm.slices; ++ tsk_rt(t)->job_params.exec_time = 0; ++ } ++ ++ if (running) { ++ /* there shouldn't be anything else running at the time */ ++ BUG_ON(dom->scheduled); ++ dom->scheduled = t; ++ } else { ++ wm_requeue_local(t, edf); ++ /* maybe we have to reschedule */ ++ preempt(dom); ++ } ++ raw_spin_unlock_irqrestore(&dom->slock, flags); ++} ++ ++static void wm_release_at(struct task_struct *t, lt_t start) ++{ ++ struct rt_param* p = tsk_rt(t); ++ ++ if (is_sliced_task(t)) { ++ /* simulate wrapping to the first slice */ ++ p->semi_part.wm.job_deadline = start; ++ p->semi_part.wm.slice = get_last_slice(t); ++ /* FIXME: creates bogus completion event... */ ++ advance_next_slice(t, 0); ++ set_rt_flags(t, RT_F_RUNNING); ++ } else ++ /* generic code handles it */ ++ release_at(t, start); ++} ++ ++static lt_t wm_earliest_release(struct task_struct *t, lt_t now) ++{ ++ lt_t deadline; ++ if (is_sliced_task(t)) ++ deadline = tsk_rt(t)->semi_part.wm.job_deadline; ++ else ++ deadline = get_deadline(t); ++ if (lt_before(deadline, now)) ++ return now; ++ else ++ return deadline; ++} ++ ++static void wm_task_wake_up(struct task_struct *t) ++{ ++ unsigned long flags; ++ wm_domain_t* dom = domain_of_task(t); ++ rt_domain_t* edf = &dom->domain; ++ struct rt_param* p = tsk_rt(t); ++ lt_t now, sleep_time; ++ int migrate = 0; ++ ++ raw_spin_lock_irqsave(&dom->slock, flags); ++ BUG_ON(is_queued(t)); ++ ++ now = litmus_clock(); ++ ++ sleep_time = now - p->semi_part.wm.suspend_time; ++ ++ TRACE_TASK(t, "wake_up at %llu after %llu, still-scheduled:%d\n", ++ now, sleep_time, dom->scheduled == t); ++ ++ /* account sleep time as execution time */ ++ if (get_exec_time(t) + sleep_time >= get_exec_cost(t)) { ++ /* new sporadic release */ ++ TRACE_TASK(t, "new sporadic release\n"); ++ wm_release_at(t, wm_earliest_release(t, now)); ++ sched_trace_task_release(t); ++ } else if (is_sliced_task(t)) { ++ /* figure out which slice we should be executing on */ ++ fast_forward_slices(t, sleep_time); ++ /* can't be exhausted now */ ++ BUG_ON(wm_budget_exhausted(t)); ++ } else { ++ /* simply add to the execution time */ ++ tsk_rt(t)->job_params.exec_time += sleep_time; ++ } ++ ++ ++ /* Only add to ready queue if it is not the currently-scheduled ++ * task. This could be the case if a task was woken up concurrently ++ * on a remote CPU before the executing CPU got around to actually ++ * de-scheduling the task, i.e., wake_up() raced with schedule() ++ * and won. ++ */ ++ if (dom->scheduled != t) { ++ if (get_partition(t) == dom->cpu) ++ wm_requeue_local(t, edf); ++ else ++ /* post-pone migration until after unlocking */ ++ migrate = 1; ++ } ++ ++ raw_spin_unlock_irqrestore(&dom->slock, flags); ++ ++ if (migrate) { ++ TRACE_TASK(t, "wake_up-initiated migration to %d\n", ++ get_partition(t)); ++ wm_requeue_remote(t); ++ } ++ ++ TRACE_TASK(t, "wake up done\n"); ++} ++ ++static void wm_task_block(struct task_struct *t) ++{ ++ wm_domain_t* dom = domain_of_task(t); ++ unsigned long flags; ++ lt_t now = litmus_clock(); ++ ++ TRACE_TASK(t, "block at %llu, state=%d\n", now, t->state); ++ ++ tsk_rt(t)->semi_part.wm.suspend_time = now; ++ ++ raw_spin_lock_irqsave(&dom->slock, flags); ++ if (is_queued(t)) { ++ TRACE_TASK(t, "still queued; migration invariant failed?\n"); ++ remove(&dom->domain, t); ++ } ++ raw_spin_unlock_irqrestore(&dom->slock, flags); ++ ++ BUG_ON(!is_realtime(t)); ++} ++ ++static void wm_task_exit(struct task_struct * t) ++{ ++ unsigned long flags; ++ wm_domain_t* dom = domain_of_task(t); ++ rt_domain_t* edf = &dom->domain; ++ ++ raw_spin_lock_irqsave(&dom->slock, flags); ++ if (is_queued(t)) { ++ /* dequeue */ ++ remove(edf, t); ++ } ++ if (dom->scheduled == t) ++ dom->scheduled = NULL; ++ ++ TRACE_TASK(t, "RIP, now reschedule\n"); ++ ++ preempt(dom); ++ raw_spin_unlock_irqrestore(&dom->slock, flags); ++} ++ ++static long wm_check_params(struct task_struct *t) ++{ ++ struct rt_param* p = tsk_rt(t); ++ struct edf_wm_params* wm = &p->task_params.semi_part.wm; ++ int i; ++ lt_t tmp; ++ ++ if (!is_sliced_task(t)) { ++ /* regular task; nothing to check */ ++ TRACE_TASK(t, "accepted regular (non-sliced) task with " ++ "%d slices\n", ++ wm->count); ++ return 0; ++ } ++ ++ /* (1) Either not sliced, or more than 1 slice. */ ++ if (wm->count == 1 || wm->count > MAX_EDF_WM_SLICES) { ++ TRACE_TASK(t, "bad number of slices (%u) \n", ++ wm->count); ++ return -EINVAL; ++ } ++ ++ /* (2) The partition has to agree with the first slice. */ ++ if (get_partition(t) != wm->slices[0].cpu) { ++ TRACE_TASK(t, "partition and first slice CPU differ " ++ "(%d != %d)\n", get_partition(t), wm->slices[0].cpu); ++ return -EINVAL; ++ } ++ ++ /* (3) The total budget must agree. */ ++ for (i = 0, tmp = 0; i < wm->count; i++) ++ tmp += wm->slices[i].budget; ++ if (get_exec_cost(t) != tmp) { ++ TRACE_TASK(t, "total budget and sum of slice budgets differ\n"); ++ return -EINVAL; ++ } ++ ++ /* (4) The release of each slice must not precede the previous ++ * deadline. */ ++ for (i = 0; i < wm->count - 1; i++) ++ if (wm->slices[i].deadline > wm->slices[i + 1].offset) { ++ TRACE_TASK(t, "slice %d overlaps with slice %d\n", ++ i, i + 1); ++ return -EINVAL; ++ } ++ ++ /* (5) The budget of each slice must fit within [offset, deadline] */ ++ for (i = 0; i < wm->count; i++) ++ if (lt_before(wm->slices[i].deadline, wm->slices[i].offset) || ++ wm->slices[i].deadline - wm->slices[i].offset < ++ wm->slices[i].budget) { ++ TRACE_TASK(t, "slice %d is overloaded\n", i); ++ return -EINVAL; ++ } ++ ++ /* (6) The budget of each slice must exceed the minimum budget size. */ ++ for (i = 0; i < wm->count; i++) ++ if (wm->slices[i].budget < MIN_EDF_WM_SLICE_SIZE) { ++ TRACE_TASK(t, "slice %d is too short\n", i); ++ return -EINVAL; ++ } ++ ++ /* (7) The CPU of each slice must be different from the previous CPU. */ ++ for (i = 0; i < wm->count - 1; i++) ++ if (wm->slices[i].cpu == wm->slices[i + 1].cpu) { ++ TRACE_TASK(t, "slice %d does not migrate\n", i); ++ return -EINVAL; ++ } ++ ++ /* (8) The CPU of each slice must be online. */ ++ for (i = 0; i < wm->count; i++) ++ if (!cpu_online(wm->slices[i].cpu)) { ++ TRACE_TASK(t, "slice %d is allocated on offline CPU\n", ++ i); ++ return -EINVAL; ++ } ++ ++ /* (9) A sliced task's budget must be precisely enforced. */ ++ if (!budget_precisely_enforced(t)) { ++ TRACE_TASK(t, "budget is not precisely enforced " ++ "(policy: %d).\n", ++ tsk_rt(t)->task_params.budget_policy); ++ return -EINVAL; ++ } ++ ++ TRACE_TASK(t, "accepted sliced task with %d slices\n", ++ wm->count); ++ ++ return 0; ++} ++ ++static long wm_admit_task(struct task_struct* t) ++{ ++ return task_cpu(t) == get_partition(t) ? wm_check_params(t) : -EINVAL; ++} ++ ++/* Plugin object */ ++static struct sched_plugin edf_wm_plugin __cacheline_aligned_in_smp = { ++ .plugin_name = "EDF-WM", ++ .tick = wm_tick, ++ .task_new = wm_task_new, ++ .complete_job = complete_job, ++ .task_exit = wm_task_exit, ++ .schedule = wm_schedule, ++ .release_at = wm_release_at, ++ .task_wake_up = wm_task_wake_up, ++ .task_block = wm_task_block, ++ .admit_task = wm_admit_task ++}; ++ ++ ++static int __init init_edf_wm(void) ++{ ++ int i; ++ ++ /* FIXME: breaks with CPU hotplug ++ */ ++ for (i = 0; i < num_online_cpus(); i++) { ++ wm_domain_init(remote_domain(i), ++ wm_check_resched, ++ NULL, i); ++ } ++ return register_sched_plugin(&edf_wm_plugin); ++} ++ ++module_init(init_edf_wm); ++ +diff --git a/litmus/sched_npsf.c b/litmus/sched_npsf.c +new file mode 100644 +index 0000000..aad99c7 +--- /dev/null ++++ b/litmus/sched_npsf.c +@@ -0,0 +1,1185 @@ ++/* ++ * litmus/sched_npsf.c ++ * ++ * Implementation of the NPS-F scheduling algorithm. ++ * ++ * A _server_ may span on multiple _reserves_ on different CPUs. ++ * ++ * * 1 ++ * +--------------+ +--> +--------------+ +--> +--------------+ ++ * | cpu_entry_t | | | npsf_reserve | | | npsf_server | ++ * +--------------+ | +--------------+ | +--------------+ ++ * | |1 | | |1 | | | ++ * | cpu_reserve |--+ 1| server |--+ 1| | ++ * | | +---| cpu | +---| curr_reserve | ++ * +--------------+ <-+ +--------------+ <-+ +--------------+ ++ * 1 * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++/* Be extra verbose (log spam) */ ++#define NPSF_VERBOSE ++ ++#ifdef NPSF_VERBOSE ++#define npsf_printk(fmt, arg...) printk(KERN_INFO fmt, ##arg) ++#else ++#define npsf_printk(fmt, arg...) ++#endif ++ ++struct npsf_reserve; ++ ++/* cpu_entry_t ++ * ++ * Each cpu has a list of reserves assigned on the cpu. ++ * Each reserve has a pointer to its server (Notional processor) ++ * that may be shared among multiple reserves. ++ */ ++typedef struct { ++ /* lock to protect cpu_reserve and list changes */ ++ raw_spinlock_t cpu_res_lock; ++ /* the reserve currently executing on this cpu */ ++ struct npsf_reserve *cpu_reserve; ++ /* list of reserves on this cpu */ ++ struct list_head npsf_reserves; ++ /* cpu ID */ ++ int cpu; ++ /* timer to control reserve switching */ ++ struct hrtimer timer; ++ /* virtual timer expiring (wrt time_origin) */ ++ lt_t should_expire; ++ /* delegate timer firing to proper cpu */ ++ struct hrtimer_start_on_info info; ++ /* FIXME: the ids for servers should be an increasing int >=0 */ ++ int last_seen_npsf_id; ++} cpu_entry_t; ++ ++/* one cpu_entry_t per CPU */ ++DEFINE_PER_CPU(cpu_entry_t, npsf_cpu_entries); ++ ++/* This is the "notional processor" (i.e., simple server) abstraction. */ ++typedef struct npsf_server { ++ /* shared among reserves */ ++ rt_domain_t dom; ++ /* the real-time task that this server *SHOULD* be scheduling */ ++ struct task_struct *highest_prio; ++ /* current reserve where this dom is executing */ ++ struct npsf_reserve *curr_reserve; ++ /* The "first" reserve for this server in a time slot. ++ * For non-migrating servers this will always be the same as curr_reserve. */ ++ struct npsf_reserve *first_reserve; ++ /* Prevent a race between the last CPU in a reserve chain an the first. */ ++ int first_cpu_wants_ipi; ++ /* rt_domain_t lock + npsf_server_t lock */ ++#define lock dom.ready_lock ++} npsf_server_t; ++ ++typedef struct npsf_reserve { ++ /* Pointer to the server for this reserve: a server may be shared among ++ * multiple cpus with different budget per cpu, but same npsf_id. */ ++ npsf_server_t *server; ++ /* we queue here in npsf_reserves */ ++ struct list_head node; ++ /* budget of this npsf_id on this cpu */ ++ lt_t budget; ++ /* cpu for this (portion of) server */ ++ cpu_entry_t *cpu; ++ /* id of this server, it is the same for the ++ * same server on different cpus */ ++ int npsf_id; ++ /* Can be used to identify if a reserve continues ++ * next npsf in the chain, needed for proper server deletion */ ++ struct npsf_reserve *next_npsf; ++ /* flag that is true if the reserve is currently scheduled */ ++ int is_currently_scheduled; ++} npsf_reserve_t; ++ ++/* synchronization point to start moving and switching servers only ++ * when all servers have been properly set up by the user. ++ */ ++static atomic_t all_servers_added; ++static atomic_t timers_activated = ATOMIC_INIT(0); ++ ++/* Virtual time starts here */ ++static lt_t time_origin; ++ ++/* save number of online cpus seen at init time */ ++static unsigned int _online_cpus = 1; ++ ++#define no_reserves(entry) (list_empty(&((entry)->npsf_reserves))) ++#define local_entry (&__get_cpu_var(npsf_cpu_entries)) ++#define remote_entry(cpu) (&per_cpu(npsf_cpu_entries, (cpu))) ++ ++#define server_from_dom(domain) (container_of((domain), npsf_server_t, dom)) ++ ++/* task_entry uses get_partition() therefore we must take care of ++ * updating correclty the task_params.cpu whenever we switch task, ++ * otherwise we'll deadlock. ++ */ ++#define task_entry(task) remote_entry(get_partition(task)) ++#define domain_edf(npsf) (&((npsf)->server->dom)) ++ ++#define task_npsfid(task) ((task)->rt_param.task_params.semi_part.npsf_id) ++ ++static inline int owns_server(npsf_reserve_t *npsf) ++{ ++ return (npsf->server->curr_reserve == npsf); ++} ++ ++/* utility functions to get next and prev domains; must hold entry lock */ ++static inline npsf_reserve_t* local_next_reserve(npsf_reserve_t *curr, ++ cpu_entry_t *entry) ++{ ++ return (list_is_last(&curr->node, &entry->npsf_reserves)) ? ++ list_entry(entry->npsf_reserves.next, npsf_reserve_t, node) : ++ list_entry(curr->node.next, npsf_reserve_t, node); ++ ++} ++ ++static inline npsf_reserve_t* local_prev_reserve(npsf_reserve_t *curr, ++ cpu_entry_t *entry) ++{ ++ return ((curr->node.prev == &entry->npsf_reserves) ? ++ list_entry(entry->npsf_reserves.prev, npsf_reserve_t, node) : ++ list_entry(curr->node.prev, npsf_reserve_t, node)); ++} ++static void requeue(struct task_struct* t, rt_domain_t *edf) ++{ ++ if (t->state != TASK_RUNNING) ++ TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); ++ ++ BUG_ON(is_queued(t)); ++ ++ set_rt_flags(t, RT_F_RUNNING); ++ if (is_released(t, litmus_clock())) ++ __add_ready(edf, t); ++ else ++ add_release(edf, t); /* it has got to wait */ ++} ++ ++/* we assume the lock is being held */ ++static void preempt(npsf_reserve_t *npsf) ++{ ++ /* Since we do not support non-preemptable sections, ++ * we don't need to pass in a task. If we call this, ++ * we want the remote CPU to reschedule, no matter what. ++ */ ++ preempt_if_preemptable(NULL, npsf->cpu->cpu); ++} ++ ++ ++static void npsf_preempt_if_server_is_scheduled(npsf_server_t* srv) ++{ ++ npsf_reserve_t *reserve = srv->curr_reserve; ++ if (reserve->is_currently_scheduled) { ++ preempt(reserve); ++ } ++} ++ ++/* assumes lock is held by caller */ ++static void npsf_reschedule_server(npsf_server_t* srv) ++{ ++ struct task_struct* hp = srv->highest_prio; ++ rt_domain_t* edf = &srv->dom; ++ ++ if (edf_preemption_needed(edf, hp)) { ++ srv->highest_prio = __take_ready(edf); ++ if (hp) { ++ TRACE_TASK(hp, "requeue: no longer highest prio\n"); ++ requeue(hp, edf); ++ } ++ npsf_preempt_if_server_is_scheduled(srv); ++ } ++} ++ ++static void npsf_release_jobs(rt_domain_t* rt, struct bheap* tasks) ++{ ++ npsf_server_t *srv = server_from_dom(rt); ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&srv->lock, flags); ++ ++ __merge_ready(rt, tasks); ++ npsf_reschedule_server(srv); ++ ++ raw_spin_unlock_irqrestore(&srv->lock, flags); ++} ++ ++static void job_completion(struct task_struct* t, int forced) ++{ ++ sched_trace_task_completion(t, forced); ++ TRACE_TASK(t, "job_completion().\n"); ++ ++ set_rt_flags(t, RT_F_SLEEP); ++ prepare_for_next_period(t); ++} ++ ++/* When did this slot start ? */ ++static inline lt_t slot_begin(lt_t now) ++{ ++ return (((now - time_origin) / npsf_slot_length) ++ * npsf_slot_length + time_origin); ++} ++ ++/* Compute the delta from the beginning of the current slot. */ ++static inline lt_t delta_from_slot_begin(lt_t now) ++{ ++ return (now - slot_begin(now)); ++} ++ ++/* Given an offset into a slot, return the corresponding eligible reserve. ++ * The output param reservation_end is used to return the (relative) time at which ++ * the returned reserve ends. ++ */ ++static npsf_reserve_t* get_reserve_for_offset(cpu_entry_t *entry, lt_t offset, ++ lt_t *reservation_end) ++{ ++ npsf_reserve_t *tmp; ++ ++ *reservation_end = 0; ++ ++ /* linear search through all reserves, figure out which one is the last one ++ * to become eligible before delta */ ++ list_for_each_entry(tmp, &entry->npsf_reserves, node) { ++ *reservation_end += tmp->budget; ++ ++ /* We are always "late". Found tmp is the right one */ ++ if ((*reservation_end > offset)) ++ return tmp; ++ } ++ ++ /* error: we should never fall of the reserve list */ ++ BUG(); ++ return NULL; ++} ++ ++/* Determine which reserve is eligible based on the current time. ++ */ ++static npsf_reserve_t* get_current_reserve(cpu_entry_t *entry) ++{ ++ lt_t reservation_end; ++ lt_t offset = delta_from_slot_begin(litmus_clock()); ++ return get_reserve_for_offset(entry, offset, &reservation_end); ++} ++ ++/* This is used to ensure that we are "always" late, i.e., to make ++ * sure that the timer jitter is always positive. This should ++ * only trigger in KVM (or in real machines with bad TSC drift after ++ * an IPI). ++ * ++ * ATM proper tracing for this event is done in reserve_switch_tick(). ++ */ ++static noinline ktime_t catchup_time(lt_t from, lt_t target) ++{ ++ while(lt_before(from, target)) { ++ from = litmus_clock(); ++ ++ mb(); ++ cpu_relax(); ++ } ++ ++ return ns_to_ktime(from); ++} ++ ++ ++/* compute the next ABSOLUTE timer value */ ++static lt_t get_next_reserve_switch_time(void) ++{ ++ cpu_entry_t *entry = local_entry; ++ lt_t now = litmus_clock(); ++ lt_t slot_start = slot_begin(now); ++ lt_t offset = now - slot_start; ++ lt_t next_time; ++ npsf_reserve_t* reserve; ++ ++ /* compute the absolute litmus time of the next reserve switch */ ++ reserve = get_reserve_for_offset(entry, offset, &next_time); ++ /* get_reserve_for_offset returns a relative start time; let's make it ++ absolute */ ++ next_time += slot_start; ++ ++ /* Let's see if we need to skip the next timer. */ ++ reserve = local_next_reserve(reserve, entry); ++ /* if the next reserve is a continuing reserve ++ * (i.e., if it belongs to a migrating server), ++ * then we skip the timer event because we will ++ * receive an IPI from the previous processor instead. */ ++ if (reserve->server->first_reserve != reserve) { ++ /* it is indeed not the first reserve */ ++ next_time += reserve->budget; ++ } ++ ++ return next_time; ++} ++ ++/* This is the callback for reserve-switching interrupts. ++ * The timer is reprogrammed to expire at the beginning of every logical ++ * reserve (i.e., a continuing reserve may be split among different CPUs ++ * but is a _single_ logical reserve). get_next_reserve_switch_time() ++ * will return the right next_expire time. ++ */ ++static enum hrtimer_restart reserve_switch_tick(struct hrtimer *timer) ++{ ++ unsigned long flags; ++ cpu_entry_t *entry; ++ /* we are using CLOCK_MONOTONIC */ ++ ktime_t now = ktime_get(); ++ ktime_t delta; ++ int late; ++ ++ entry = container_of(timer, cpu_entry_t, timer); ++ raw_spin_lock_irqsave(&entry->cpu_res_lock, flags); ++ ++ /* jitter wrt virtual time */ ++ delta = ktime_sub(now, ns_to_ktime(entry->should_expire)); ++ late = (ktime_to_ns(delta) >= 0) ? 1 : 0; ++ ++#ifdef NPSF_VERBOSE ++ if (entry->cpu_reserve && atomic_read(&all_servers_added)) ++ TRACE("(npsf_id: %d) tick starts at %Ld, " ++ "now - should_expire: %Ld\n", ++ entry->cpu_reserve->npsf_id, ++ ktime_to_ns(now), ktime_to_ns(delta)); ++#endif ++ /* if the timer expires earlier than the should_expire time, ++ * we delay the switching until time it's synchronized with ++ * the switch boundary. Otherwise next reserve will execute ++ * longer (wrong). ++ */ ++ if (!late) { ++ TRACE("+++ Timer fired early, waiting...\n"); ++ now = catchup_time(ktime_to_ns(now), entry->should_expire); ++ ++ delta = ktime_sub(now, ns_to_ktime(entry->should_expire)); ++ TRACE("+++ done, tick restarts at %Ld, " ++ "now - should_expire: %Ld\n", ++ ktime_to_ns(now), ktime_to_ns(delta)); ++ } ++ ++ BUG_ON(!atomic_read(&all_servers_added)); ++ BUG_ON(no_reserves(entry)); ++ ++ /* Compute the next time that we need to be notified. */ ++ entry->should_expire = get_next_reserve_switch_time(); ++ ++ /* kindly ask the Penguin to let us know... */ ++ hrtimer_set_expires(timer, ns_to_ktime(entry->should_expire)); ++ ++ /* set resched flag to reschedule local cpu */ ++ set_need_resched(); ++ ++ raw_spin_unlock_irqrestore(&entry->cpu_res_lock, flags); ++#ifdef NPSF_VERBOSE ++ if (atomic_read(&all_servers_added)) ++ TRACE("(npsf_id: %d) tick ends at %Ld, should_expire: %llu\n", ++ entry->cpu_reserve->npsf_id, ktime_to_ns(ktime_get()), ++ entry->should_expire); ++#endif ++ ++ return HRTIMER_RESTART; ++} ++ ++static void npsf_scheduler_tick(struct task_struct *t) ++{ ++ if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { ++ set_tsk_need_resched(t); ++ TRACE("npsf_tick: %d is preemptable " ++ " => FORCE_RESCHED\n", t->pid); ++ } ++} ++ ++/* Assumption: caller holds srv lock and prev belongs to ++ * the currently-scheduled reservation. ++ */ ++static void npsf_schedule_server(struct task_struct* prev, ++ cpu_entry_t *entry) ++{ ++ npsf_server_t* srv = entry->cpu_reserve->server; ++ ++ int out_of_time, sleep, exists, blocks; ++ ++ exists = is_realtime(prev); ++ blocks = exists && !is_running(prev); ++ out_of_time = exists && ++ budget_enforced(prev) && ++ budget_exhausted(prev); ++ sleep = exists && get_rt_flags(prev) == RT_F_SLEEP; ++ ++ if (exists) ++ TRACE_TASK(prev, "(npsf_id %d) blocks:%d " ++ "out_of_time:%d sleep:%d state:%d sig:%d\n", ++ task_npsfid(prev), ++ blocks, out_of_time, sleep, ++ prev->state, ++ signal_pending(prev)); ++ ++ /* Any task that is preemptable and either exhausts its ++ * execution budget or wants to sleep completes. We may have ++ * to reschedule after this. ++ */ ++ if ((out_of_time || sleep) && !blocks) { ++ job_completion(prev, !sleep); ++ ++ if (srv->highest_prio != prev) { ++ BUG_ON(!is_queued(prev)); ++ remove(&srv->dom, prev); ++ } ++ ++ requeue(prev, &srv->dom); ++ ++ if (srv->highest_prio == prev) ++ srv->highest_prio = __take_ready(&srv->dom); ++ } ++ ++ BUG_ON(blocks && prev == srv->highest_prio); ++// BUG_ON(!srv->highest_prio && jobs_pending(&srv->dom)); ++} ++ ++static void npsf_notify_next_cpu(npsf_reserve_t *npsf_prev) ++{ ++ npsf_server_t *srv; ++ ++ if (unlikely(npsf_prev->next_npsf != npsf_prev)) { ++ /* This reserve is actually shared. Let's update its 'owner' ++ * and notify the next CPU. */ ++ srv = npsf_prev->server; ++ raw_spin_lock(&srv->lock); ++ srv->curr_reserve = npsf_prev->next_npsf; ++ if (srv->first_reserve != srv->curr_reserve || ++ srv->first_cpu_wants_ipi) { ++ /* send an IPI to notify next CPU in chain */ ++ srv->first_cpu_wants_ipi = 0; ++ TRACE("sending IPI\n"); ++ preempt(srv->curr_reserve); ++ } ++ raw_spin_unlock(&srv->lock); ++ } ++} ++ ++static struct task_struct* npsf_schedule(struct task_struct * prev) ++{ ++ npsf_reserve_t *npsf_prev, *npsf_next; ++ npsf_server_t *srv_prev, *srv_next; ++ cpu_entry_t *entry = local_entry; ++ struct task_struct *next; ++ ++ int reserve_switch; ++ ++ /* servers not ready yet, yield to linux */ ++ if (!atomic_read(&all_servers_added)) ++ return NULL; ++ ++#ifdef NPSF_VERBOSE ++ TRACE_TASK(prev, "schedule\n"); ++#endif ++ raw_spin_lock(&entry->cpu_res_lock); ++ ++ BUG_ON(no_reserves(entry)); ++ ++ /* step 1: what are we currently serving? */ ++ npsf_prev = entry->cpu_reserve; ++ srv_prev = npsf_prev->server; ++ ++ /* step 2: what SHOULD we be currently serving? */ ++ npsf_next = get_current_reserve(entry); ++ srv_next = npsf_next->server; ++ ++ /* TODO second measuring point for IPI receiving ++ * if (!srv_next->measure_wait_IPI) --- the remote reset ++ * trace_time_end. ++ */ ++ raw_spin_lock(&srv_prev->lock); ++ ++ ++ /* step 3: update prev server */ ++ if (is_realtime(prev) && task_npsfid(prev) == entry->cpu_reserve->npsf_id) ++ npsf_schedule_server(prev, entry); ++ else if (is_realtime(prev)) ++ TRACE_TASK(prev, "npsf_id %d != cpu_reserve npsf_id %d\n", ++ task_npsfid(prev), entry->cpu_reserve->npsf_id); ++ ++ /* step 4: determine if we need to switch to another reserve */ ++ reserve_switch = npsf_prev != npsf_next; ++ ++ if (!reserve_switch) { ++ /* easy case: just enact what the server scheduler decided */ ++ next = srv_prev->highest_prio; ++ ++ /* Unlock AFTER observing highest_prio to avoid races with ++ * remote rescheduling activity. */ ++ raw_spin_unlock(&srv_prev->lock); ++ } else { ++ /* In this case we have a reserve switch. We are done with the ++ * previous server, so release its lock. */ ++ TRACE("switch reserve npsf_id %d -> npsf_id %d\n", ++ npsf_prev->npsf_id, npsf_next->npsf_id); ++ npsf_prev->is_currently_scheduled = 0; ++ raw_spin_unlock(&srv_prev->lock); ++ ++ /* Move on to the next server. */ ++ ++ raw_spin_lock(&srv_next->lock); ++ npsf_next->is_currently_scheduled = 1; ++ ++ /* make sure we are owner of a server (if it is shared) */ ++ if (unlikely(srv_next->curr_reserve != npsf_next)) { ++ /* We raced with the previous owner. Let's schedule ++ * the previous reserve for now. The previous owner ++ * will send us an IPI when the server has been pushed ++ * to us. ++ */ ++ TRACE("(npsf_id %d) raced with previous server owner\n", ++ npsf_next->npsf_id); ++ ++ /* check if we are the first CPU, in which case we need ++ * to request a notification explicitly */ ++ if (srv_next->first_reserve == npsf_next) ++ srv_next->first_cpu_wants_ipi = 1; ++ ++ npsf_next->is_currently_scheduled = 0; ++ raw_spin_unlock(&srv_next->lock); ++ ++ /* just keep the previous reserve one more time */ ++ raw_spin_lock(&srv_prev->lock); ++ ++ npsf_prev->is_currently_scheduled = 1; ++ /* Note that there is not a race condition here. ++ * Since curr_reserve didn't point yet to this reserve, ++ * so no processor would have observed the one in npsf_next. ++ * A processor might have observed the flag being zero ++ * in npsf_prev and decided not to send an IPI, which ++ * doesn't matter since we are going to reschedule ++ * below anyay. */ ++ ++ next = srv_prev->highest_prio; ++ ++ raw_spin_unlock(&srv_prev->lock); ++ ++ /* TODO first measuring point for '0'-switching time ++ * remote is not ready yet and will send us an IPI ++ * when it's done. ++ * local: ++ * srv_next->measure_wait_IPI = 1; ++ * remote before sending IPI: ++ * if (srv_next->measure_wait_IPI) reset; ++ */ ++ } else { ++ /* invariant: srv->highest_prio is always the ++ * highest-priority job in the server, and it is always ++ * runnable. Any update to the server must maintain ++ * this invariant. */ ++ next = srv_next->highest_prio; ++ ++ entry->cpu_reserve = npsf_next; ++ raw_spin_unlock(&srv_next->lock); ++ ++ /* send an IPI (if necessary) */ ++ npsf_notify_next_cpu(npsf_prev); ++ } ++ ++ } ++ ++ if (next) { ++ TRACE_TASK(next, "(npsf_id %d) scheduled at %llu\n", ++ task_npsfid(next), litmus_clock()); ++ set_rt_flags(next, RT_F_RUNNING); ++ /* The TASK_RUNNING flag is set by the Penguin _way_ after ++ * activating a task. This dosn't matter much to Linux as ++ * the rq lock will prevent any changes, but it matters to ++ * us. It is possible for a remote cpu waking up this task ++ * to requeue the task before it's runnable, send an IPI here, ++ * we schedule that task (still "not-runnable"), and only ++ * before the real execution of next, the running flag is set. ++ */ ++ if (!is_running(next)) ++ TRACE_TASK(next, "BAD: !TASK_RUNNING\n"); ++ } else { ++ /* FIXME npsf_id is wrong if reserve switch but "switching back" ++ * if we race */ ++ TRACE("(npsf_id %d) becoming idle at %llu\n", ++ reserve_switch ? npsf_next->npsf_id : npsf_prev->npsf_id, ++ litmus_clock()); ++ } ++ ++ raw_spin_unlock(&entry->cpu_res_lock); ++ ++ return next; ++} ++ ++/* Prepare a task for running in RT mode ++ * ++ * We can only be sure that the cpu is a right one (admit checks ++ * against tasks released on a cpu that doesn't host the right npsf_id) ++ * but we _cannot_ be sure that: ++ * 1) the found npsf is the reserve currently running on this cpu. ++ * 2) the current reserve (the one in charge of scheduling) is not ++ * running on a different cpu. ++ */ ++static void npsf_task_new(struct task_struct * t, int on_rq, int running) ++{ ++ npsf_reserve_t *npsf; ++ npsf_server_t *srv; ++ cpu_entry_t *entry = task_entry(t); ++ rt_domain_t *edf; ++ unsigned long flags; ++ ++ BUG_ON(no_reserves(entry)); ++ ++ /* search the proper npsf_server where to add the new task */ ++ list_for_each_entry(npsf, &entry->npsf_reserves, node) { ++ if (npsf->npsf_id == task_npsfid(t)) ++ break; ++ } ++ ++ ++ srv = npsf->server; ++ ++ /* The task should be running in the queue, otherwise signal ++ * code will try to wake it up with fatal consequences. ++ */ ++ raw_spin_lock_irqsave(&entry->cpu_res_lock, flags); ++ raw_spin_lock(&srv->lock); ++ ++ edf = domain_edf(npsf); ++ tsk_rt(t)->domain = edf; ++ ++ TRACE_TASK(t, "task_new: P%d, task_npsfid %d, " ++ "npsf->npsf_id %d, entry->cpu %d\n", ++ t->rt_param.task_params.cpu, task_npsfid(t), ++ npsf->npsf_id, entry->cpu); ++ ++ /* setup job parameters */ ++ release_at(t, litmus_clock()); ++ ++ /* There are four basic scenarios that could happen: ++ * 1) the server is on another cpu and scheduled; ++ * 2) the server is on another cpu and not scheduled; ++ * 3) the server is on this cpu and scheduled; and ++ * 4) the server is on this cpu and not scheduled. ++ * ++ * Whatever scenario we're in, it cannot change while we are ++ * holding the server lock. ++ * ++ * If the new task does not have a high priority, then ++ * we can just queue it and be done. ++ * ++ * In theory, the requeue() and reschedule_server() code ++ * take care of all that. ++ */ ++ ++ requeue(t, edf); ++ /* reschedule will cause a remote preemption, if required */ ++ npsf_reschedule_server(srv); ++ /* always preempt to make sure we don't ++ * use the stack if it needs to migrate */ ++ set_tsk_need_resched(t); ++ ++ raw_spin_unlock(&srv->lock); ++ raw_spin_unlock_irqrestore(&entry->cpu_res_lock, flags); ++} ++ ++static void npsf_task_wake_up(struct task_struct *t) ++{ ++ rt_domain_t *edf; ++ npsf_server_t* srv; ++ unsigned long flags; ++ lt_t now; ++ ++ BUG_ON(!is_realtime(t)); ++ ++ edf = tsk_rt(t)->domain; ++ srv = server_from_dom(edf); ++ ++ raw_spin_lock_irqsave(&srv->lock, flags); ++ ++ BUG_ON(is_queued(t)); ++ ++ now = litmus_clock(); ++ /* FIXME: this should be a configurable policy... */ ++ if (is_tardy(t, now)) { ++ /* new sporadic release */ ++ release_at(t, now); ++ sched_trace_task_release(t); ++ } ++ ++ /* Only add to ready queue if it is not the ++ * currently-scheduled task. ++ */ ++ if (srv->highest_prio != t) { ++ requeue(t, edf); ++ npsf_reschedule_server(srv); ++ } ++#ifdef NPSF_VERBOSE ++ else ++ TRACE_TASK(t, "wake_up, is curr_sched, not requeued\n"); ++#endif ++ ++ raw_spin_unlock_irqrestore(&srv->lock, flags); ++ ++ TRACE_TASK(t, "wake up done\n"); ++} ++ ++static void remove_from_server(struct task_struct *t, npsf_server_t* srv) ++{ ++ if (srv->highest_prio == t) { ++ TRACE_TASK(t, "remove from server: is highest-prio task\n"); ++ srv->highest_prio = NULL; ++ npsf_reschedule_server(srv); ++ } else if (is_queued(t)) { ++ TRACE_TASK(t, "remove from server: removed from queue\n"); ++ remove(&srv->dom, t); ++ } ++#ifdef NPSF_VERBOSE ++ else ++ TRACE_TASK(t, "WARN: where is this task?\n"); ++#endif ++} ++ ++static void npsf_task_block(struct task_struct *t) ++{ ++ rt_domain_t *edf; ++ npsf_server_t* srv; ++ unsigned long flags; ++ ++ TRACE_TASK(t, "(npsf_id %d) block at %llu, state=%d\n", ++ task_npsfid(t), litmus_clock(), t->state); ++ ++ BUG_ON(!is_realtime(t)); ++ ++ edf = tsk_rt(t)->domain; ++ srv = server_from_dom(edf); ++ ++ raw_spin_lock_irqsave(&srv->lock, flags); ++ ++ remove_from_server(t, srv); ++ ++ raw_spin_unlock_irqrestore(&srv->lock, flags); ++} ++ ++static void npsf_task_exit(struct task_struct * t) ++{ ++ rt_domain_t *edf; ++ npsf_server_t* srv; ++ unsigned long flags; ++ ++ BUG_ON(!is_realtime(t)); ++ ++ edf = tsk_rt(t)->domain; ++ srv = server_from_dom(edf); ++ ++ raw_spin_lock_irqsave(&srv->lock, flags); ++ ++ remove_from_server(t, srv); ++ ++ raw_spin_unlock_irqrestore(&srv->lock, flags); ++ ++ TRACE_TASK(t, "RIP, now reschedule\n"); ++} ++ ++static long npsf_admit_task(struct task_struct* tsk) ++{ ++ npsf_reserve_t *npsf; ++ cpu_entry_t *entry = task_entry(tsk); ++ int id_ok = 0; ++ ++ if (!atomic_read(&all_servers_added)) { ++ printk(KERN_DEBUG "not all servers added\n"); ++ return -ENODEV; ++ } ++ ++ /* check to be on the right cpu and on the right server */ ++ if (task_cpu(tsk) != tsk->rt_param.task_params.cpu) { ++ printk(KERN_DEBUG "wrong CPU(%d, %d, %d) for npsf_id %d\n", ++ task_cpu(tsk), tsk->rt_param.task_params.cpu, ++ entry->cpu, task_npsfid(tsk)); ++ return -EINVAL; ++ } ++ ++ /* 1) this cpu should have the proper npsf_id in the list ++ * 2) the rt_domain for the proper npsf_id is not null ++ */ ++ list_for_each_entry(npsf, &entry->npsf_reserves, node) { ++ if (npsf->npsf_id == task_npsfid(tsk)) { ++ id_ok = 1; ++ break; ++ } ++ } ++ if (!id_ok) ++ printk(KERN_DEBUG "wrong npsf_id (%d) for entry %d\n", ++ task_npsfid(tsk), entry->cpu); ++ ++ return id_ok ? 0 : -EINVAL; ++} ++ ++/* in litmus.c */ ++extern atomic_t rt_task_count; ++ ++/* initialization status control */ ++static int reserves_allocated = 0; ++ ++#ifdef NPSF_VERBOSE ++static void print_reserve(cpu_entry_t *cpu) ++{ ++ npsf_reserve_t *tmp; ++ ++ printk(KERN_INFO "NPS-F: reserves on CPU %d:\n", cpu->cpu); ++ list_for_each_entry(tmp, &cpu->npsf_reserves, node) { ++ BUG_ON(!tmp->server); ++ BUG_ON(!&(tmp->server->dom)); ++ BUG_ON(tmp->server->highest_prio); ++ printk(KERN_INFO "%d: %d us\n", tmp->npsf_id, ++ (int)(tmp->budget / 1000)); ++ } ++} ++#endif ++/* ++ * do_add_reserve: add a reserve(cpu, id, budget) ++ * ++ * Callback for syscall add_server(); it allows to add the reserve "id" ++ * to the CPU "cpu". "budget" is the length of the reserve for the ++ * notional processor (server) id on the cpu cpu. ++ */ ++static long do_add_reserve(npsf_reserve_t **new, cpu_entry_t *cpu, ++ npsf_server_t *the_dom, int npsf_id, lt_t budget) ++{ ++ unsigned long flags; ++ ++ /* npsf_id for each cpu should be given in increasing order, ++ * it doesn't make sense the same np on the same cpu. ++ * The last_seen_npsf_id is reset upon plugin insertion. ++ */ ++ if (cpu->last_seen_npsf_id >= npsf_id) ++ return -EINVAL; ++ ++ /* don't allow server changes if there are tasks in the system */ ++ if (atomic_read(&rt_task_count)) ++ return -EACCES; ++ ++ if ((*new = kmalloc(sizeof(npsf_reserve_t), GFP_ATOMIC)) == NULL) ++ return -ENOMEM; ++ ++ (*new)->server = the_dom; ++ (*new)->npsf_id = npsf_id; ++ (*new)->budget = budget; ++ (*new)->cpu = cpu; ++ ++ npsf_printk("Add npsf_id %d on P%d with budget %llu\n", (*new)->npsf_id, ++ (*new)->cpu->cpu, (*new)->budget); ++ ++ raw_spin_lock_irqsave(&cpu->cpu_res_lock, flags); ++ ++ list_add_tail(&(*new)->node, &cpu->npsf_reserves); ++ cpu->last_seen_npsf_id = npsf_id; ++ cpu->cpu_reserve = list_first_entry(&cpu->npsf_reserves, npsf_reserve_t, node); ++ ++ raw_spin_unlock_irqrestore(&cpu->cpu_res_lock, flags); ++ ++ return 0; ++} ++ ++static void kickoff_timers(void) ++{ ++ int cpu; ++ cpu_entry_t *entry; ++ lt_t kickoff; ++ ++ kickoff = slot_begin(litmus_clock() + npsf_slot_length * 2); ++ ++ for_each_online_cpu(cpu) { ++ entry = &per_cpu(npsf_cpu_entries, cpu); ++ hrtimer_start_on(cpu, &entry->info, &entry->timer, ++ ns_to_ktime(kickoff), ++ HRTIMER_MODE_ABS_PINNED); ++ entry->should_expire = kickoff; ++ } ++ atomic_set(&timers_activated, 1); ++} ++ ++/* We offer to library a budgets array interface (so we go through the ++ * syscall path only once) and we internally cycle on do_add_reserve. ++ * ++ * last == 1 means that the user is adding the last server and after ++ * the insertion the plugin is properly set up. (FIXME it should be ++ * done in a better way, but I doubt this plugin will ever go ++ * to the master branch). ++ */ ++asmlinkage long sys_add_server(int __user *__id, ++ struct npsf_budgets __user *__budgets, int last) ++{ ++ int id, i; ++ int ret = -EFAULT; ++ struct npsf_budgets *budgets; ++ cpu_entry_t *entry; ++ npsf_server_t *npsfserver; ++ npsf_reserve_t *npsf_reserve_array[NR_CPUS]; ++ npsf_reserve_t *first_reserve; ++ ++ if (_online_cpus != num_online_cpus()) ++ return ret; ++ ++ if (copy_from_user(&id, __id, sizeof(id))) ++ return ret; ++ ++ budgets = kmalloc(_online_cpus * sizeof(struct npsf_budgets), ++ GFP_ATOMIC); ++ ++ for (i = 0; i < _online_cpus; i++) { ++ budgets[i].cpu = NO_CPU; ++ budgets[i].budget = 0; ++ } ++ ++ if (copy_from_user(budgets, __budgets, ++ sizeof(budgets) * _online_cpus)) ++ goto err; ++ ++ /* initialize the npsf_server_t for this npsf_server series */ ++ npsfserver = kmalloc(sizeof(npsf_server_t), GFP_ATOMIC); ++ if (!npsfserver) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ edf_domain_init(&npsfserver->dom, NULL, npsf_release_jobs); ++ npsfserver->highest_prio = NULL; ++ ++ /* initialize all npsf_reserve_t for this server */ ++ for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) { ++ entry = &per_cpu(npsf_cpu_entries, budgets[i].cpu); ++ if ((ret = do_add_reserve(&npsf_reserve_array[i], entry, ++ npsfserver, ++ id, budgets[i].budget)) < 0) ++ goto err; ++ } ++ /* set the current reserve to the first (and possibly unique) ++ * slice for this npsf_id */ ++ npsfserver->curr_reserve = npsf_reserve_array[0]; ++ npsfserver->first_reserve = npsf_reserve_array[0]; ++ npsfserver->first_cpu_wants_ipi = 0; ++ for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) { ++ ++ if (i == 0 && budgets[i+1].cpu == NO_CPU) { ++ /* Fixed reserve always has itself as next */ ++ npsf_reserve_array[i]->next_npsf = npsf_reserve_array[i]; ++ } else if (((i+1) < _online_cpus) && ++ (i > 0 && budgets[i+1].cpu == NO_CPU)) { ++ /* Last reserve in the chain has the first reserve as next */ ++ npsf_reserve_array[i]->next_npsf = npsf_reserve_array[0]; ++ } else { ++ /* Normal continuing reserve */ ++ npsf_reserve_array[i]->next_npsf = npsf_reserve_array[i+1]; ++ } ++ } ++#ifdef NPSF_VERBOSE ++ for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) { ++ entry = &per_cpu(npsf_cpu_entries, budgets[i].cpu); ++ print_reserve(entry); ++ } ++#endif ++ ++ if (last) { ++ /* force the first slot switching by setting the ++ * current_reserve to the last server for each cpu. ++ * ++ * FIXME:don't assume there exists at least one reserve per CPU ++ */ ++ for_each_online_cpu(i) { ++ entry = &per_cpu(npsf_cpu_entries, i); ++ first_reserve = list_entry(entry->npsf_reserves.next, ++ npsf_reserve_t, node); ++ ++ first_reserve->server->curr_reserve = first_reserve; ++ entry->cpu_reserve = first_reserve; ++ npsf_printk("npsf_id %d is the current reserve " ++ "and server on CPU %d\n", ++ first_reserve->npsf_id, entry->cpu); ++ ++ } ++ ++ kickoff_timers(); ++ ++ /* real plugin enable */ ++ atomic_set(&all_servers_added, 1); ++ mb(); ++ } ++ ++ /* at least one server was initialized and may need deletion */ ++ reserves_allocated = 1; ++err: ++ kfree(budgets); ++ return ret; ++} ++ ++ ++/* Cancel server_reschedule_tick() hrtimers. Wait for all callbacks ++ * to complete. The function is triggered writing 0 as npsf_slot_length. ++ */ ++void npsf_hrtimers_cleanup(void) ++{ ++ int cpu; ++ cpu_entry_t *entry; ++ int redo; ++ ++ if (!atomic_read(&timers_activated)) ++ return; ++ ++ atomic_set(&timers_activated, 0); ++ ++ /* prevent the firing of the timer on this cpu */ ++ do { ++ redo = 0; ++ for_each_online_cpu(cpu) { ++ entry = &per_cpu(npsf_cpu_entries, cpu); ++ ++ /* if callback active, skip it for now and redo later */ ++ if (hrtimer_try_to_cancel(&entry->timer) == -1) { ++ redo = 1; ++#ifdef NPSF_VERBOSE ++ printk(KERN_INFO "(P%d) hrtimer on P%d was " ++ "active, try to delete again\n", ++ get_cpu(), cpu); ++ put_cpu(); ++#endif ++ } ++ } ++ } while (redo); ++ ++ printk(KERN_INFO "npsf hrtimers deleted\n"); ++} ++ ++static void cleanup_npsf(void) ++{ ++ int cpu; ++ cpu_entry_t *entry; ++ struct list_head *nd, *next; ++ npsf_reserve_t *tmp, *tmp_save; ++ ++ for_each_online_cpu(cpu) { ++ entry = &per_cpu(npsf_cpu_entries, cpu); ++ ++ /* FIXME probably not needed as we should be the only cpu ++ * doing the removal */ ++ raw_spin_lock(&entry->cpu_res_lock); ++ ++ list_for_each_safe(nd, next, &entry->npsf_reserves) { ++ tmp = list_entry(nd, npsf_reserve_t, node); ++ npsf_printk("Del. (id, cpu):(%d, %d)\n", ++ tmp->npsf_id, ++ tmp->cpu->cpu); ++ if (tmp->server) { ++ npsf_printk("Del. reserves for npsf_id %d\n", ++ tmp->npsf_id); ++ tmp_save = tmp; ++ while (tmp_save->next_npsf && ++ tmp_save->next_npsf != tmp) { ++ tmp_save = tmp_save->next_npsf; ++ tmp_save->server = NULL; ++ } ++ npsf_printk("Freeing server 0x%p\n", tmp->server); ++ kfree(tmp->server); ++ } ++ npsf_printk("Freeing npsf_reserve_t 0x%p\n", tmp); ++ kfree(tmp); ++ } ++ list_del(&entry->npsf_reserves); ++ raw_spin_unlock(&entry->cpu_res_lock); ++ } ++} ++ ++/* prevent plugin deactivation if timers are still active */ ++static long npsf_deactivate_plugin(void) ++{ ++ return (atomic_read(&timers_activated)) ? -1 : 0; ++} ++ ++static long npsf_activate_plugin(void) ++{ ++ int cpu; ++ cpu_entry_t *entry; ++ ktime_t now = ktime_get(); ++ ++ /* prevent plugin switching if timers are active */ ++ if (atomic_read(&timers_activated)) ++ return -1; ++ ++ atomic_set(&all_servers_added, 0); ++ ++ /* de-allocate old servers (if any) */ ++ if (reserves_allocated) ++ cleanup_npsf(); ++ ++ _online_cpus = num_online_cpus(); ++ ++ for_each_online_cpu(cpu) { ++ entry = &per_cpu(npsf_cpu_entries, cpu); ++ ++ raw_spin_lock_init(&entry->cpu_res_lock); ++ ++ entry->cpu_reserve = NULL; ++ INIT_LIST_HEAD(&entry->npsf_reserves); ++ ++ entry->cpu = cpu; ++ hrtimer_init(&entry->timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_ABS_PINNED); ++ ++ /* initialize (reinitialize) pull timers */ ++ hrtimer_start_on_info_init(&entry->info); ++ ++ entry->timer.function = reserve_switch_tick; ++ entry->last_seen_npsf_id = -1; ++ } ++ ++ printk(KERN_INFO "NPS-F activated: slot length = %lld ns\n", ++ npsf_slot_length); ++ ++ /* time starts now! */ ++ time_origin = (lt_t) ktime_to_ns(now); ++ TRACE("Time_origin = %llu\n", time_origin); ++ return 0; ++} ++ ++/* Plugin object */ ++static struct sched_plugin npsf_plugin __cacheline_aligned_in_smp = { ++ .plugin_name = "NPS-F", ++ ++ .tick = npsf_scheduler_tick, ++ .task_new = npsf_task_new, ++ .complete_job = complete_job, ++ .task_exit = npsf_task_exit, ++ .schedule = npsf_schedule, ++ .task_wake_up = npsf_task_wake_up, ++ .task_block = npsf_task_block, ++ .admit_task = npsf_admit_task, ++ .activate_plugin = npsf_activate_plugin, ++ .deactivate_plugin = npsf_deactivate_plugin, ++}; ++ ++static int __init init_npsf(void) ++{ ++ return register_sched_plugin(&npsf_plugin); ++} ++ ++static void __exit exit_npsf(void) ++{ ++ if (atomic_read(&timers_activated)) { ++ atomic_set(&timers_activated, 0); ++ return; ++ } ++ ++ if (reserves_allocated) ++ cleanup_npsf(); ++} ++ ++module_init(init_npsf); ++module_exit(exit_npsf); ++ +diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c +index 3543b7b..3036df9 100644 +--- a/litmus/sched_plugin.c ++++ b/litmus/sched_plugin.c +@@ -179,6 +179,12 @@ struct sched_plugin linux_sched_plugin = { + int cluster_cache_index = 2; + + /* ++ * Slot length (in ns) for NPS-F semi-partitioned plugin. ++ * This value can be changed at "runtime" through proc file. ++ */ ++lt_t npsf_slot_length = 5 * NSEC_PER_MSEC; ++ ++/* + * The reference to current plugin that is used to schedule tasks within + * the system. It stores references to actual function implementations + * Should be initialized by calling "init_***_plugin()" diff --git a/index.html b/index.html index 7d3d555..653e8f3 100644 --- a/index.html +++ b/index.html @@ -135,9 +135,30 @@ Have a look at our group's
    + +
  1. + A. Bastoni, B. Brandenburg and J. Anderson, + “Is Semi-Partitioned Scheduling Practical?”, + in submission, January 2011. + PDF. + Longer version with all graphs: PDF

    +

    For reference, all evaluated plugins as well as the required userspace tools are provided as part of the following patches (against version 2010.2). +

    + +
  2. + +
  3. - A.Bastoni, B. Brandenburg and J. Anderson, + A. Bastoni, B. Brandenburg and J. Anderson, “An Empirical Comparison of Global, Partitioned, and Clustered Multiprocessor Real-Time Schedulers”, Proceedings of the 31th IEEE Real-Time Systems Symposium, pp. 14-24, December 2010. PDF. -- cgit v1.2.2