diff options
author | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-01-26 17:17:27 -0500 |
---|---|---|
committer | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-01-26 17:44:47 -0500 |
commit | 2f25da028afca99d903f60a0396a44d657b863dd (patch) | |
tree | 8141487e48175b33161573467421fd18605d9ebf | |
parent | 00ffad8cfa533223121c8b400ae829ccef2ddfe8 (diff) |
Add NPS-F plugin
[semi-part backport]
-rw-r--r-- | include/litmus/rt_param.h | 18 | ||||
-rw-r--r-- | include/litmus/sched_plugin.h | 5 | ||||
-rw-r--r-- | include/litmus/unistd_64.h | 4 | ||||
-rw-r--r-- | litmus/Makefile | 3 | ||||
-rw-r--r-- | litmus/litmus.c | 64 | ||||
-rw-r--r-- | litmus/sched_npsf.c | 1185 | ||||
-rw-r--r-- | litmus/sched_plugin.c | 6 |
7 files changed, 1282 insertions, 3 deletions
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 9927b09e0a01..1290e2939e33 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -35,6 +35,17 @@ typedef enum { | |||
35 | PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ | 35 | PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ |
36 | } budget_policy_t; | 36 | } budget_policy_t; |
37 | 37 | ||
38 | /* Parameters for NPS-F semi-partitioned scheduling algorithm. | ||
39 | * Each (cpu, budget) entry defines the share ('budget' in ns, a % of | ||
40 | * the slot_length) of the notional processor on the CPU 'cpu'. | ||
41 | * This structure is used by the library - syscall interface in order | ||
42 | * to go through the overhead of a syscall only once per server. | ||
43 | */ | ||
44 | struct npsf_budgets { | ||
45 | int cpu; | ||
46 | lt_t budget; | ||
47 | }; | ||
48 | |||
38 | /* The parameters for the EDF-WM semi-partitioned scheduler. | 49 | /* The parameters for the EDF-WM semi-partitioned scheduler. |
39 | * Each task may be split across multiple cpus. Each per-cpu allocation | 50 | * Each task may be split across multiple cpus. Each per-cpu allocation |
40 | * is called a 'slice'. | 51 | * is called a 'slice'. |
@@ -75,6 +86,13 @@ struct rt_task { | |||
75 | 86 | ||
76 | /* parameters used by the semi-partitioned algorithms */ | 87 | /* parameters used by the semi-partitioned algorithms */ |
77 | union { | 88 | union { |
89 | /* NPS-F; defined in sched_npsf.c | ||
90 | * id for the server (notional processor) that holds | ||
91 | * this task; the same npfs_id can be assigned to "the same" | ||
92 | * server split on different cpus | ||
93 | */ | ||
94 | int npsf_id; | ||
95 | |||
78 | /* EDF-WM; defined in sched_edf_wm.c */ | 96 | /* EDF-WM; defined in sched_edf_wm.c */ |
79 | struct edf_wm_params wm; | 97 | struct edf_wm_params wm; |
80 | } semi_part; | 98 | } semi_part; |
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index 9c1c9f28ba79..7ea9176624ff 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h | |||
@@ -6,6 +6,8 @@ | |||
6 | #define _LINUX_SCHED_PLUGIN_H_ | 6 | #define _LINUX_SCHED_PLUGIN_H_ |
7 | 7 | ||
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | /* NSEC_PER... conversions */ | ||
10 | #include <linux/time.h> | ||
9 | 11 | ||
10 | /* struct for semaphore with priority inheritance */ | 12 | /* struct for semaphore with priority inheritance */ |
11 | struct pi_semaphore { | 13 | struct pi_semaphore { |
@@ -136,6 +138,9 @@ extern struct sched_plugin *litmus; | |||
136 | /* cluster size: cache_index = 2 L2, cache_index = 3 L3 */ | 138 | /* cluster size: cache_index = 2 L2, cache_index = 3 L3 */ |
137 | extern int cluster_cache_index; | 139 | extern int cluster_cache_index; |
138 | 140 | ||
141 | /* Slot length (ns) for NPS-F semi-part. algo */ | ||
142 | extern lt_t npsf_slot_length; | ||
143 | |||
139 | int register_sched_plugin(struct sched_plugin* plugin); | 144 | int register_sched_plugin(struct sched_plugin* plugin); |
140 | struct sched_plugin* find_sched_plugin(const char* name); | 145 | struct sched_plugin* find_sched_plugin(const char* name); |
141 | int print_sched_plugins(char* buf, int max); | 146 | int print_sched_plugins(char* buf, int max); |
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h index f0618e75348d..4e82c52722c8 100644 --- a/include/litmus/unistd_64.h +++ b/include/litmus/unistd_64.h | |||
@@ -33,5 +33,7 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release) | |||
33 | __SYSCALL(__NR_release_ts, sys_release_ts) | 33 | __SYSCALL(__NR_release_ts, sys_release_ts) |
34 | #define __NR_null_call __LSC(13) | 34 | #define __NR_null_call __LSC(13) |
35 | __SYSCALL(__NR_null_call, sys_null_call) | 35 | __SYSCALL(__NR_null_call, sys_null_call) |
36 | #define __NR_add_server __LSC(14) | ||
37 | __SYSCALL(__NR_add_server, sys_add_server) | ||
36 | 38 | ||
37 | #define NR_litmus_syscalls 14 | 39 | #define NR_litmus_syscalls 15 |
diff --git a/litmus/Makefile b/litmus/Makefile index 7fe37a59c425..f26736964479 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -15,7 +15,8 @@ obj-y = sched_plugin.o litmus.o \ | |||
15 | ctrldev.o \ | 15 | ctrldev.o \ |
16 | sched_gsn_edf.o \ | 16 | sched_gsn_edf.o \ |
17 | sched_psn_edf.o \ | 17 | sched_psn_edf.o \ |
18 | sched_edf_wm.o | 18 | sched_edf_wm.o \ |
19 | sched_npsf.o | ||
19 | 20 | ||
20 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o | 21 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o |
21 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o | 22 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o |
diff --git a/litmus/litmus.c b/litmus/litmus.c index b04a42b0da9c..2f780222d8e8 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c | |||
@@ -632,6 +632,55 @@ static int proc_write_cluster_size(struct file *file, | |||
632 | return len; | 632 | return len; |
633 | } | 633 | } |
634 | 634 | ||
635 | static int proc_read_npsf_slot_length(char *page, char **start, | ||
636 | off_t off, int count, | ||
637 | int *eof, void *data) | ||
638 | { | ||
639 | return snprintf(page, PAGE_SIZE, "%d us\n", | ||
640 | (int) (npsf_slot_length / NSEC_PER_USEC)); | ||
641 | } | ||
642 | |||
643 | extern void npsf_hrtimers_cleanup(void); | ||
644 | /* NPS-F slot length in us. | ||
645 | * | ||
646 | * Writing 0 as npsf_slot_length will trigger the removal of the | ||
647 | * hrtimers for the domain_reschedule_tick() in the NPS-F plugin. | ||
648 | */ | ||
649 | static int proc_write_npsf_slot_length(struct file *file, | ||
650 | const char *buffer, | ||
651 | unsigned long count, | ||
652 | void *data) | ||
653 | { | ||
654 | int err, slot_length; | ||
655 | char msg[64]; | ||
656 | |||
657 | if (count > 63) | ||
658 | return -EINVAL; | ||
659 | |||
660 | if (copy_from_user(msg, buffer, count)) | ||
661 | return -EFAULT; | ||
662 | |||
663 | /* terminate */ | ||
664 | msg[count] = '\0'; | ||
665 | /* chomp */ | ||
666 | if (count > 1 && msg[count - 1] == '\n') | ||
667 | msg[count - 1] = '\0'; | ||
668 | |||
669 | err = sscanf(msg, "%d", &slot_length); | ||
670 | |||
671 | if (err == 1) { | ||
672 | if (!slot_length) { | ||
673 | npsf_hrtimers_cleanup(); | ||
674 | /* reset to default */ | ||
675 | slot_length = 5000; | ||
676 | } | ||
677 | npsf_slot_length = (lt_t)((lt_t) slot_length * NSEC_PER_USEC); | ||
678 | return count; | ||
679 | } | ||
680 | |||
681 | return -EINVAL; | ||
682 | } | ||
683 | |||
635 | #ifdef CONFIG_RELEASE_MASTER | 684 | #ifdef CONFIG_RELEASE_MASTER |
636 | static int proc_read_release_master(char *page, char **start, | 685 | static int proc_read_release_master(char *page, char **start, |
637 | off_t off, int count, | 686 | off_t off, int count, |
@@ -691,7 +740,8 @@ static struct proc_dir_entry *litmus_dir = NULL, | |||
691 | #ifdef CONFIG_RELEASE_MASTER | 740 | #ifdef CONFIG_RELEASE_MASTER |
692 | *release_master_file = NULL, | 741 | *release_master_file = NULL, |
693 | #endif | 742 | #endif |
694 | *clus_cache_idx_file = NULL; | 743 | *clus_cache_idx_file = NULL, |
744 | *npsf_slot_length_file = NULL; | ||
695 | 745 | ||
696 | static int __init init_litmus_proc(void) | 746 | static int __init init_litmus_proc(void) |
697 | { | 747 | { |
@@ -733,6 +783,16 @@ static int __init init_litmus_proc(void) | |||
733 | clus_cache_idx_file->read_proc = proc_read_cluster_size; | 783 | clus_cache_idx_file->read_proc = proc_read_cluster_size; |
734 | clus_cache_idx_file->write_proc = proc_write_cluster_size; | 784 | clus_cache_idx_file->write_proc = proc_write_cluster_size; |
735 | 785 | ||
786 | npsf_slot_length_file = create_proc_entry("npsf_slot_length", | ||
787 | 0644, litmus_dir); | ||
788 | if (!npsf_slot_length_file) { | ||
789 | printk(KERN_ERR "Could not allocate npsf_slot_length " | ||
790 | "procfs entry.\n"); | ||
791 | return -ENOMEM; | ||
792 | } | ||
793 | npsf_slot_length_file->read_proc = proc_read_npsf_slot_length; | ||
794 | npsf_slot_length_file->write_proc = proc_write_npsf_slot_length; | ||
795 | |||
736 | stat_file = create_proc_read_entry("stats", 0444, litmus_dir, | 796 | stat_file = create_proc_read_entry("stats", 0444, litmus_dir, |
737 | proc_read_stats, NULL); | 797 | proc_read_stats, NULL); |
738 | 798 | ||
@@ -752,6 +812,8 @@ static void exit_litmus_proc(void) | |||
752 | remove_proc_entry("active_plugin", litmus_dir); | 812 | remove_proc_entry("active_plugin", litmus_dir); |
753 | if (clus_cache_idx_file) | 813 | if (clus_cache_idx_file) |
754 | remove_proc_entry("cluster_cache", litmus_dir); | 814 | remove_proc_entry("cluster_cache", litmus_dir); |
815 | if (npsf_slot_length_file) | ||
816 | remove_proc_entry("npsf_slot_length", litmus_dir); | ||
755 | #ifdef CONFIG_RELEASE_MASTER | 817 | #ifdef CONFIG_RELEASE_MASTER |
756 | if (release_master_file) | 818 | if (release_master_file) |
757 | remove_proc_entry("release_master", litmus_dir); | 819 | remove_proc_entry("release_master", litmus_dir); |
diff --git a/litmus/sched_npsf.c b/litmus/sched_npsf.c new file mode 100644 index 000000000000..aad99c7e447c --- /dev/null +++ b/litmus/sched_npsf.c | |||
@@ -0,0 +1,1185 @@ | |||
1 | /* | ||
2 | * litmus/sched_npsf.c | ||
3 | * | ||
4 | * Implementation of the NPS-F scheduling algorithm. | ||
5 | * | ||
6 | * A _server_ may span on multiple _reserves_ on different CPUs. | ||
7 | * | ||
8 | * * 1 | ||
9 | * +--------------+ +--> +--------------+ +--> +--------------+ | ||
10 | * | cpu_entry_t | | | npsf_reserve | | | npsf_server | | ||
11 | * +--------------+ | +--------------+ | +--------------+ | ||
12 | * | |1 | | |1 | | | | ||
13 | * | cpu_reserve |--+ 1| server |--+ 1| | | ||
14 | * | | +---| cpu | +---| curr_reserve | | ||
15 | * +--------------+ <-+ +--------------+ <-+ +--------------+ | ||
16 | * 1 * | ||
17 | */ | ||
18 | |||
19 | #include <asm/uaccess.h> | ||
20 | #include <linux/percpu.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/list.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | #include <linux/slab.h> | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | |||
28 | #include <litmus/litmus.h> | ||
29 | #include <litmus/jobs.h> | ||
30 | #include <litmus/sched_plugin.h> | ||
31 | #include <litmus/edf_common.h> | ||
32 | |||
33 | /* Be extra verbose (log spam) */ | ||
34 | #define NPSF_VERBOSE | ||
35 | |||
36 | #ifdef NPSF_VERBOSE | ||
37 | #define npsf_printk(fmt, arg...) printk(KERN_INFO fmt, ##arg) | ||
38 | #else | ||
39 | #define npsf_printk(fmt, arg...) | ||
40 | #endif | ||
41 | |||
42 | struct npsf_reserve; | ||
43 | |||
44 | /* cpu_entry_t | ||
45 | * | ||
46 | * Each cpu has a list of reserves assigned on the cpu. | ||
47 | * Each reserve has a pointer to its server (Notional processor) | ||
48 | * that may be shared among multiple reserves. | ||
49 | */ | ||
50 | typedef struct { | ||
51 | /* lock to protect cpu_reserve and list changes */ | ||
52 | raw_spinlock_t cpu_res_lock; | ||
53 | /* the reserve currently executing on this cpu */ | ||
54 | struct npsf_reserve *cpu_reserve; | ||
55 | /* list of reserves on this cpu */ | ||
56 | struct list_head npsf_reserves; | ||
57 | /* cpu ID */ | ||
58 | int cpu; | ||
59 | /* timer to control reserve switching */ | ||
60 | struct hrtimer timer; | ||
61 | /* virtual timer expiring (wrt time_origin) */ | ||
62 | lt_t should_expire; | ||
63 | /* delegate timer firing to proper cpu */ | ||
64 | struct hrtimer_start_on_info info; | ||
65 | /* FIXME: the ids for servers should be an increasing int >=0 */ | ||
66 | int last_seen_npsf_id; | ||
67 | } cpu_entry_t; | ||
68 | |||
69 | /* one cpu_entry_t per CPU */ | ||
70 | DEFINE_PER_CPU(cpu_entry_t, npsf_cpu_entries); | ||
71 | |||
72 | /* This is the "notional processor" (i.e., simple server) abstraction. */ | ||
73 | typedef struct npsf_server { | ||
74 | /* shared among reserves */ | ||
75 | rt_domain_t dom; | ||
76 | /* the real-time task that this server *SHOULD* be scheduling */ | ||
77 | struct task_struct *highest_prio; | ||
78 | /* current reserve where this dom is executing */ | ||
79 | struct npsf_reserve *curr_reserve; | ||
80 | /* The "first" reserve for this server in a time slot. | ||
81 | * For non-migrating servers this will always be the same as curr_reserve. */ | ||
82 | struct npsf_reserve *first_reserve; | ||
83 | /* Prevent a race between the last CPU in a reserve chain an the first. */ | ||
84 | int first_cpu_wants_ipi; | ||
85 | /* rt_domain_t lock + npsf_server_t lock */ | ||
86 | #define lock dom.ready_lock | ||
87 | } npsf_server_t; | ||
88 | |||
89 | typedef struct npsf_reserve { | ||
90 | /* Pointer to the server for this reserve: a server may be shared among | ||
91 | * multiple cpus with different budget per cpu, but same npsf_id. */ | ||
92 | npsf_server_t *server; | ||
93 | /* we queue here in npsf_reserves */ | ||
94 | struct list_head node; | ||
95 | /* budget of this npsf_id on this cpu */ | ||
96 | lt_t budget; | ||
97 | /* cpu for this (portion of) server */ | ||
98 | cpu_entry_t *cpu; | ||
99 | /* id of this server, it is the same for the | ||
100 | * same server on different cpus */ | ||
101 | int npsf_id; | ||
102 | /* Can be used to identify if a reserve continues | ||
103 | * next npsf in the chain, needed for proper server deletion */ | ||
104 | struct npsf_reserve *next_npsf; | ||
105 | /* flag that is true if the reserve is currently scheduled */ | ||
106 | int is_currently_scheduled; | ||
107 | } npsf_reserve_t; | ||
108 | |||
109 | /* synchronization point to start moving and switching servers only | ||
110 | * when all servers have been properly set up by the user. | ||
111 | */ | ||
112 | static atomic_t all_servers_added; | ||
113 | static atomic_t timers_activated = ATOMIC_INIT(0); | ||
114 | |||
115 | /* Virtual time starts here */ | ||
116 | static lt_t time_origin; | ||
117 | |||
118 | /* save number of online cpus seen at init time */ | ||
119 | static unsigned int _online_cpus = 1; | ||
120 | |||
121 | #define no_reserves(entry) (list_empty(&((entry)->npsf_reserves))) | ||
122 | #define local_entry (&__get_cpu_var(npsf_cpu_entries)) | ||
123 | #define remote_entry(cpu) (&per_cpu(npsf_cpu_entries, (cpu))) | ||
124 | |||
125 | #define server_from_dom(domain) (container_of((domain), npsf_server_t, dom)) | ||
126 | |||
127 | /* task_entry uses get_partition() therefore we must take care of | ||
128 | * updating correclty the task_params.cpu whenever we switch task, | ||
129 | * otherwise we'll deadlock. | ||
130 | */ | ||
131 | #define task_entry(task) remote_entry(get_partition(task)) | ||
132 | #define domain_edf(npsf) (&((npsf)->server->dom)) | ||
133 | |||
134 | #define task_npsfid(task) ((task)->rt_param.task_params.semi_part.npsf_id) | ||
135 | |||
136 | static inline int owns_server(npsf_reserve_t *npsf) | ||
137 | { | ||
138 | return (npsf->server->curr_reserve == npsf); | ||
139 | } | ||
140 | |||
141 | /* utility functions to get next and prev domains; must hold entry lock */ | ||
142 | static inline npsf_reserve_t* local_next_reserve(npsf_reserve_t *curr, | ||
143 | cpu_entry_t *entry) | ||
144 | { | ||
145 | return (list_is_last(&curr->node, &entry->npsf_reserves)) ? | ||
146 | list_entry(entry->npsf_reserves.next, npsf_reserve_t, node) : | ||
147 | list_entry(curr->node.next, npsf_reserve_t, node); | ||
148 | |||
149 | } | ||
150 | |||
151 | static inline npsf_reserve_t* local_prev_reserve(npsf_reserve_t *curr, | ||
152 | cpu_entry_t *entry) | ||
153 | { | ||
154 | return ((curr->node.prev == &entry->npsf_reserves) ? | ||
155 | list_entry(entry->npsf_reserves.prev, npsf_reserve_t, node) : | ||
156 | list_entry(curr->node.prev, npsf_reserve_t, node)); | ||
157 | } | ||
158 | static void requeue(struct task_struct* t, rt_domain_t *edf) | ||
159 | { | ||
160 | if (t->state != TASK_RUNNING) | ||
161 | TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); | ||
162 | |||
163 | BUG_ON(is_queued(t)); | ||
164 | |||
165 | set_rt_flags(t, RT_F_RUNNING); | ||
166 | if (is_released(t, litmus_clock())) | ||
167 | __add_ready(edf, t); | ||
168 | else | ||
169 | add_release(edf, t); /* it has got to wait */ | ||
170 | } | ||
171 | |||
172 | /* we assume the lock is being held */ | ||
173 | static void preempt(npsf_reserve_t *npsf) | ||
174 | { | ||
175 | /* Since we do not support non-preemptable sections, | ||
176 | * we don't need to pass in a task. If we call this, | ||
177 | * we want the remote CPU to reschedule, no matter what. | ||
178 | */ | ||
179 | preempt_if_preemptable(NULL, npsf->cpu->cpu); | ||
180 | } | ||
181 | |||
182 | |||
183 | static void npsf_preempt_if_server_is_scheduled(npsf_server_t* srv) | ||
184 | { | ||
185 | npsf_reserve_t *reserve = srv->curr_reserve; | ||
186 | if (reserve->is_currently_scheduled) { | ||
187 | preempt(reserve); | ||
188 | } | ||
189 | } | ||
190 | |||
191 | /* assumes lock is held by caller */ | ||
192 | static void npsf_reschedule_server(npsf_server_t* srv) | ||
193 | { | ||
194 | struct task_struct* hp = srv->highest_prio; | ||
195 | rt_domain_t* edf = &srv->dom; | ||
196 | |||
197 | if (edf_preemption_needed(edf, hp)) { | ||
198 | srv->highest_prio = __take_ready(edf); | ||
199 | if (hp) { | ||
200 | TRACE_TASK(hp, "requeue: no longer highest prio\n"); | ||
201 | requeue(hp, edf); | ||
202 | } | ||
203 | npsf_preempt_if_server_is_scheduled(srv); | ||
204 | } | ||
205 | } | ||
206 | |||
207 | static void npsf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
208 | { | ||
209 | npsf_server_t *srv = server_from_dom(rt); | ||
210 | unsigned long flags; | ||
211 | |||
212 | raw_spin_lock_irqsave(&srv->lock, flags); | ||
213 | |||
214 | __merge_ready(rt, tasks); | ||
215 | npsf_reschedule_server(srv); | ||
216 | |||
217 | raw_spin_unlock_irqrestore(&srv->lock, flags); | ||
218 | } | ||
219 | |||
220 | static void job_completion(struct task_struct* t, int forced) | ||
221 | { | ||
222 | sched_trace_task_completion(t, forced); | ||
223 | TRACE_TASK(t, "job_completion().\n"); | ||
224 | |||
225 | set_rt_flags(t, RT_F_SLEEP); | ||
226 | prepare_for_next_period(t); | ||
227 | } | ||
228 | |||
229 | /* When did this slot start ? */ | ||
230 | static inline lt_t slot_begin(lt_t now) | ||
231 | { | ||
232 | return (((now - time_origin) / npsf_slot_length) | ||
233 | * npsf_slot_length + time_origin); | ||
234 | } | ||
235 | |||
236 | /* Compute the delta from the beginning of the current slot. */ | ||
237 | static inline lt_t delta_from_slot_begin(lt_t now) | ||
238 | { | ||
239 | return (now - slot_begin(now)); | ||
240 | } | ||
241 | |||
242 | /* Given an offset into a slot, return the corresponding eligible reserve. | ||
243 | * The output param reservation_end is used to return the (relative) time at which | ||
244 | * the returned reserve ends. | ||
245 | */ | ||
246 | static npsf_reserve_t* get_reserve_for_offset(cpu_entry_t *entry, lt_t offset, | ||
247 | lt_t *reservation_end) | ||
248 | { | ||
249 | npsf_reserve_t *tmp; | ||
250 | |||
251 | *reservation_end = 0; | ||
252 | |||
253 | /* linear search through all reserves, figure out which one is the last one | ||
254 | * to become eligible before delta */ | ||
255 | list_for_each_entry(tmp, &entry->npsf_reserves, node) { | ||
256 | *reservation_end += tmp->budget; | ||
257 | |||
258 | /* We are always "late". Found tmp is the right one */ | ||
259 | if ((*reservation_end > offset)) | ||
260 | return tmp; | ||
261 | } | ||
262 | |||
263 | /* error: we should never fall of the reserve list */ | ||
264 | BUG(); | ||
265 | return NULL; | ||
266 | } | ||
267 | |||
268 | /* Determine which reserve is eligible based on the current time. | ||
269 | */ | ||
270 | static npsf_reserve_t* get_current_reserve(cpu_entry_t *entry) | ||
271 | { | ||
272 | lt_t reservation_end; | ||
273 | lt_t offset = delta_from_slot_begin(litmus_clock()); | ||
274 | return get_reserve_for_offset(entry, offset, &reservation_end); | ||
275 | } | ||
276 | |||
277 | /* This is used to ensure that we are "always" late, i.e., to make | ||
278 | * sure that the timer jitter is always positive. This should | ||
279 | * only trigger in KVM (or in real machines with bad TSC drift after | ||
280 | * an IPI). | ||
281 | * | ||
282 | * ATM proper tracing for this event is done in reserve_switch_tick(). | ||
283 | */ | ||
284 | static noinline ktime_t catchup_time(lt_t from, lt_t target) | ||
285 | { | ||
286 | while(lt_before(from, target)) { | ||
287 | from = litmus_clock(); | ||
288 | |||
289 | mb(); | ||
290 | cpu_relax(); | ||
291 | } | ||
292 | |||
293 | return ns_to_ktime(from); | ||
294 | } | ||
295 | |||
296 | |||
297 | /* compute the next ABSOLUTE timer value */ | ||
298 | static lt_t get_next_reserve_switch_time(void) | ||
299 | { | ||
300 | cpu_entry_t *entry = local_entry; | ||
301 | lt_t now = litmus_clock(); | ||
302 | lt_t slot_start = slot_begin(now); | ||
303 | lt_t offset = now - slot_start; | ||
304 | lt_t next_time; | ||
305 | npsf_reserve_t* reserve; | ||
306 | |||
307 | /* compute the absolute litmus time of the next reserve switch */ | ||
308 | reserve = get_reserve_for_offset(entry, offset, &next_time); | ||
309 | /* get_reserve_for_offset returns a relative start time; let's make it | ||
310 | absolute */ | ||
311 | next_time += slot_start; | ||
312 | |||
313 | /* Let's see if we need to skip the next timer. */ | ||
314 | reserve = local_next_reserve(reserve, entry); | ||
315 | /* if the next reserve is a continuing reserve | ||
316 | * (i.e., if it belongs to a migrating server), | ||
317 | * then we skip the timer event because we will | ||
318 | * receive an IPI from the previous processor instead. */ | ||
319 | if (reserve->server->first_reserve != reserve) { | ||
320 | /* it is indeed not the first reserve */ | ||
321 | next_time += reserve->budget; | ||
322 | } | ||
323 | |||
324 | return next_time; | ||
325 | } | ||
326 | |||
327 | /* This is the callback for reserve-switching interrupts. | ||
328 | * The timer is reprogrammed to expire at the beginning of every logical | ||
329 | * reserve (i.e., a continuing reserve may be split among different CPUs | ||
330 | * but is a _single_ logical reserve). get_next_reserve_switch_time() | ||
331 | * will return the right next_expire time. | ||
332 | */ | ||
333 | static enum hrtimer_restart reserve_switch_tick(struct hrtimer *timer) | ||
334 | { | ||
335 | unsigned long flags; | ||
336 | cpu_entry_t *entry; | ||
337 | /* we are using CLOCK_MONOTONIC */ | ||
338 | ktime_t now = ktime_get(); | ||
339 | ktime_t delta; | ||
340 | int late; | ||
341 | |||
342 | entry = container_of(timer, cpu_entry_t, timer); | ||
343 | raw_spin_lock_irqsave(&entry->cpu_res_lock, flags); | ||
344 | |||
345 | /* jitter wrt virtual time */ | ||
346 | delta = ktime_sub(now, ns_to_ktime(entry->should_expire)); | ||
347 | late = (ktime_to_ns(delta) >= 0) ? 1 : 0; | ||
348 | |||
349 | #ifdef NPSF_VERBOSE | ||
350 | if (entry->cpu_reserve && atomic_read(&all_servers_added)) | ||
351 | TRACE("(npsf_id: %d) tick starts at %Ld, " | ||
352 | "now - should_expire: %Ld\n", | ||
353 | entry->cpu_reserve->npsf_id, | ||
354 | ktime_to_ns(now), ktime_to_ns(delta)); | ||
355 | #endif | ||
356 | /* if the timer expires earlier than the should_expire time, | ||
357 | * we delay the switching until time it's synchronized with | ||
358 | * the switch boundary. Otherwise next reserve will execute | ||
359 | * longer (wrong). | ||
360 | */ | ||
361 | if (!late) { | ||
362 | TRACE("+++ Timer fired early, waiting...\n"); | ||
363 | now = catchup_time(ktime_to_ns(now), entry->should_expire); | ||
364 | |||
365 | delta = ktime_sub(now, ns_to_ktime(entry->should_expire)); | ||
366 | TRACE("+++ done, tick restarts at %Ld, " | ||
367 | "now - should_expire: %Ld\n", | ||
368 | ktime_to_ns(now), ktime_to_ns(delta)); | ||
369 | } | ||
370 | |||
371 | BUG_ON(!atomic_read(&all_servers_added)); | ||
372 | BUG_ON(no_reserves(entry)); | ||
373 | |||
374 | /* Compute the next time that we need to be notified. */ | ||
375 | entry->should_expire = get_next_reserve_switch_time(); | ||
376 | |||
377 | /* kindly ask the Penguin to let us know... */ | ||
378 | hrtimer_set_expires(timer, ns_to_ktime(entry->should_expire)); | ||
379 | |||
380 | /* set resched flag to reschedule local cpu */ | ||
381 | set_need_resched(); | ||
382 | |||
383 | raw_spin_unlock_irqrestore(&entry->cpu_res_lock, flags); | ||
384 | #ifdef NPSF_VERBOSE | ||
385 | if (atomic_read(&all_servers_added)) | ||
386 | TRACE("(npsf_id: %d) tick ends at %Ld, should_expire: %llu\n", | ||
387 | entry->cpu_reserve->npsf_id, ktime_to_ns(ktime_get()), | ||
388 | entry->should_expire); | ||
389 | #endif | ||
390 | |||
391 | return HRTIMER_RESTART; | ||
392 | } | ||
393 | |||
394 | static void npsf_scheduler_tick(struct task_struct *t) | ||
395 | { | ||
396 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
397 | set_tsk_need_resched(t); | ||
398 | TRACE("npsf_tick: %d is preemptable " | ||
399 | " => FORCE_RESCHED\n", t->pid); | ||
400 | } | ||
401 | } | ||
402 | |||
403 | /* Assumption: caller holds srv lock and prev belongs to | ||
404 | * the currently-scheduled reservation. | ||
405 | */ | ||
406 | static void npsf_schedule_server(struct task_struct* prev, | ||
407 | cpu_entry_t *entry) | ||
408 | { | ||
409 | npsf_server_t* srv = entry->cpu_reserve->server; | ||
410 | |||
411 | int out_of_time, sleep, exists, blocks; | ||
412 | |||
413 | exists = is_realtime(prev); | ||
414 | blocks = exists && !is_running(prev); | ||
415 | out_of_time = exists && | ||
416 | budget_enforced(prev) && | ||
417 | budget_exhausted(prev); | ||
418 | sleep = exists && get_rt_flags(prev) == RT_F_SLEEP; | ||
419 | |||
420 | if (exists) | ||
421 | TRACE_TASK(prev, "(npsf_id %d) blocks:%d " | ||
422 | "out_of_time:%d sleep:%d state:%d sig:%d\n", | ||
423 | task_npsfid(prev), | ||
424 | blocks, out_of_time, sleep, | ||
425 | prev->state, | ||
426 | signal_pending(prev)); | ||
427 | |||
428 | /* Any task that is preemptable and either exhausts its | ||
429 | * execution budget or wants to sleep completes. We may have | ||
430 | * to reschedule after this. | ||
431 | */ | ||
432 | if ((out_of_time || sleep) && !blocks) { | ||
433 | job_completion(prev, !sleep); | ||
434 | |||
435 | if (srv->highest_prio != prev) { | ||
436 | BUG_ON(!is_queued(prev)); | ||
437 | remove(&srv->dom, prev); | ||
438 | } | ||
439 | |||
440 | requeue(prev, &srv->dom); | ||
441 | |||
442 | if (srv->highest_prio == prev) | ||
443 | srv->highest_prio = __take_ready(&srv->dom); | ||
444 | } | ||
445 | |||
446 | BUG_ON(blocks && prev == srv->highest_prio); | ||
447 | // BUG_ON(!srv->highest_prio && jobs_pending(&srv->dom)); | ||
448 | } | ||
449 | |||
450 | static void npsf_notify_next_cpu(npsf_reserve_t *npsf_prev) | ||
451 | { | ||
452 | npsf_server_t *srv; | ||
453 | |||
454 | if (unlikely(npsf_prev->next_npsf != npsf_prev)) { | ||
455 | /* This reserve is actually shared. Let's update its 'owner' | ||
456 | * and notify the next CPU. */ | ||
457 | srv = npsf_prev->server; | ||
458 | raw_spin_lock(&srv->lock); | ||
459 | srv->curr_reserve = npsf_prev->next_npsf; | ||
460 | if (srv->first_reserve != srv->curr_reserve || | ||
461 | srv->first_cpu_wants_ipi) { | ||
462 | /* send an IPI to notify next CPU in chain */ | ||
463 | srv->first_cpu_wants_ipi = 0; | ||
464 | TRACE("sending IPI\n"); | ||
465 | preempt(srv->curr_reserve); | ||
466 | } | ||
467 | raw_spin_unlock(&srv->lock); | ||
468 | } | ||
469 | } | ||
470 | |||
471 | static struct task_struct* npsf_schedule(struct task_struct * prev) | ||
472 | { | ||
473 | npsf_reserve_t *npsf_prev, *npsf_next; | ||
474 | npsf_server_t *srv_prev, *srv_next; | ||
475 | cpu_entry_t *entry = local_entry; | ||
476 | struct task_struct *next; | ||
477 | |||
478 | int reserve_switch; | ||
479 | |||
480 | /* servers not ready yet, yield to linux */ | ||
481 | if (!atomic_read(&all_servers_added)) | ||
482 | return NULL; | ||
483 | |||
484 | #ifdef NPSF_VERBOSE | ||
485 | TRACE_TASK(prev, "schedule\n"); | ||
486 | #endif | ||
487 | raw_spin_lock(&entry->cpu_res_lock); | ||
488 | |||
489 | BUG_ON(no_reserves(entry)); | ||
490 | |||
491 | /* step 1: what are we currently serving? */ | ||
492 | npsf_prev = entry->cpu_reserve; | ||
493 | srv_prev = npsf_prev->server; | ||
494 | |||
495 | /* step 2: what SHOULD we be currently serving? */ | ||
496 | npsf_next = get_current_reserve(entry); | ||
497 | srv_next = npsf_next->server; | ||
498 | |||
499 | /* TODO second measuring point for IPI receiving | ||
500 | * if (!srv_next->measure_wait_IPI) --- the remote reset | ||
501 | * trace_time_end. | ||
502 | */ | ||
503 | raw_spin_lock(&srv_prev->lock); | ||
504 | |||
505 | |||
506 | /* step 3: update prev server */ | ||
507 | if (is_realtime(prev) && task_npsfid(prev) == entry->cpu_reserve->npsf_id) | ||
508 | npsf_schedule_server(prev, entry); | ||
509 | else if (is_realtime(prev)) | ||
510 | TRACE_TASK(prev, "npsf_id %d != cpu_reserve npsf_id %d\n", | ||
511 | task_npsfid(prev), entry->cpu_reserve->npsf_id); | ||
512 | |||
513 | /* step 4: determine if we need to switch to another reserve */ | ||
514 | reserve_switch = npsf_prev != npsf_next; | ||
515 | |||
516 | if (!reserve_switch) { | ||
517 | /* easy case: just enact what the server scheduler decided */ | ||
518 | next = srv_prev->highest_prio; | ||
519 | |||
520 | /* Unlock AFTER observing highest_prio to avoid races with | ||
521 | * remote rescheduling activity. */ | ||
522 | raw_spin_unlock(&srv_prev->lock); | ||
523 | } else { | ||
524 | /* In this case we have a reserve switch. We are done with the | ||
525 | * previous server, so release its lock. */ | ||
526 | TRACE("switch reserve npsf_id %d -> npsf_id %d\n", | ||
527 | npsf_prev->npsf_id, npsf_next->npsf_id); | ||
528 | npsf_prev->is_currently_scheduled = 0; | ||
529 | raw_spin_unlock(&srv_prev->lock); | ||
530 | |||
531 | /* Move on to the next server. */ | ||
532 | |||
533 | raw_spin_lock(&srv_next->lock); | ||
534 | npsf_next->is_currently_scheduled = 1; | ||
535 | |||
536 | /* make sure we are owner of a server (if it is shared) */ | ||
537 | if (unlikely(srv_next->curr_reserve != npsf_next)) { | ||
538 | /* We raced with the previous owner. Let's schedule | ||
539 | * the previous reserve for now. The previous owner | ||
540 | * will send us an IPI when the server has been pushed | ||
541 | * to us. | ||
542 | */ | ||
543 | TRACE("(npsf_id %d) raced with previous server owner\n", | ||
544 | npsf_next->npsf_id); | ||
545 | |||
546 | /* check if we are the first CPU, in which case we need | ||
547 | * to request a notification explicitly */ | ||
548 | if (srv_next->first_reserve == npsf_next) | ||
549 | srv_next->first_cpu_wants_ipi = 1; | ||
550 | |||
551 | npsf_next->is_currently_scheduled = 0; | ||
552 | raw_spin_unlock(&srv_next->lock); | ||
553 | |||
554 | /* just keep the previous reserve one more time */ | ||
555 | raw_spin_lock(&srv_prev->lock); | ||
556 | |||
557 | npsf_prev->is_currently_scheduled = 1; | ||
558 | /* Note that there is not a race condition here. | ||
559 | * Since curr_reserve didn't point yet to this reserve, | ||
560 | * so no processor would have observed the one in npsf_next. | ||
561 | * A processor might have observed the flag being zero | ||
562 | * in npsf_prev and decided not to send an IPI, which | ||
563 | * doesn't matter since we are going to reschedule | ||
564 | * below anyay. */ | ||
565 | |||
566 | next = srv_prev->highest_prio; | ||
567 | |||
568 | raw_spin_unlock(&srv_prev->lock); | ||
569 | |||
570 | /* TODO first measuring point for '0'-switching time | ||
571 | * remote is not ready yet and will send us an IPI | ||
572 | * when it's done. | ||
573 | * local: | ||
574 | * srv_next->measure_wait_IPI = 1; | ||
575 | * remote before sending IPI: | ||
576 | * if (srv_next->measure_wait_IPI) reset; | ||
577 | */ | ||
578 | } else { | ||
579 | /* invariant: srv->highest_prio is always the | ||
580 | * highest-priority job in the server, and it is always | ||
581 | * runnable. Any update to the server must maintain | ||
582 | * this invariant. */ | ||
583 | next = srv_next->highest_prio; | ||
584 | |||
585 | entry->cpu_reserve = npsf_next; | ||
586 | raw_spin_unlock(&srv_next->lock); | ||
587 | |||
588 | /* send an IPI (if necessary) */ | ||
589 | npsf_notify_next_cpu(npsf_prev); | ||
590 | } | ||
591 | |||
592 | } | ||
593 | |||
594 | if (next) { | ||
595 | TRACE_TASK(next, "(npsf_id %d) scheduled at %llu\n", | ||
596 | task_npsfid(next), litmus_clock()); | ||
597 | set_rt_flags(next, RT_F_RUNNING); | ||
598 | /* The TASK_RUNNING flag is set by the Penguin _way_ after | ||
599 | * activating a task. This dosn't matter much to Linux as | ||
600 | * the rq lock will prevent any changes, but it matters to | ||
601 | * us. It is possible for a remote cpu waking up this task | ||
602 | * to requeue the task before it's runnable, send an IPI here, | ||
603 | * we schedule that task (still "not-runnable"), and only | ||
604 | * before the real execution of next, the running flag is set. | ||
605 | */ | ||
606 | if (!is_running(next)) | ||
607 | TRACE_TASK(next, "BAD: !TASK_RUNNING\n"); | ||
608 | } else { | ||
609 | /* FIXME npsf_id is wrong if reserve switch but "switching back" | ||
610 | * if we race */ | ||
611 | TRACE("(npsf_id %d) becoming idle at %llu\n", | ||
612 | reserve_switch ? npsf_next->npsf_id : npsf_prev->npsf_id, | ||
613 | litmus_clock()); | ||
614 | } | ||
615 | |||
616 | raw_spin_unlock(&entry->cpu_res_lock); | ||
617 | |||
618 | return next; | ||
619 | } | ||
620 | |||
621 | /* Prepare a task for running in RT mode | ||
622 | * | ||
623 | * We can only be sure that the cpu is a right one (admit checks | ||
624 | * against tasks released on a cpu that doesn't host the right npsf_id) | ||
625 | * but we _cannot_ be sure that: | ||
626 | * 1) the found npsf is the reserve currently running on this cpu. | ||
627 | * 2) the current reserve (the one in charge of scheduling) is not | ||
628 | * running on a different cpu. | ||
629 | */ | ||
630 | static void npsf_task_new(struct task_struct * t, int on_rq, int running) | ||
631 | { | ||
632 | npsf_reserve_t *npsf; | ||
633 | npsf_server_t *srv; | ||
634 | cpu_entry_t *entry = task_entry(t); | ||
635 | rt_domain_t *edf; | ||
636 | unsigned long flags; | ||
637 | |||
638 | BUG_ON(no_reserves(entry)); | ||
639 | |||
640 | /* search the proper npsf_server where to add the new task */ | ||
641 | list_for_each_entry(npsf, &entry->npsf_reserves, node) { | ||
642 | if (npsf->npsf_id == task_npsfid(t)) | ||
643 | break; | ||
644 | } | ||
645 | |||
646 | |||
647 | srv = npsf->server; | ||
648 | |||
649 | /* The task should be running in the queue, otherwise signal | ||
650 | * code will try to wake it up with fatal consequences. | ||
651 | */ | ||
652 | raw_spin_lock_irqsave(&entry->cpu_res_lock, flags); | ||
653 | raw_spin_lock(&srv->lock); | ||
654 | |||
655 | edf = domain_edf(npsf); | ||
656 | tsk_rt(t)->domain = edf; | ||
657 | |||
658 | TRACE_TASK(t, "task_new: P%d, task_npsfid %d, " | ||
659 | "npsf->npsf_id %d, entry->cpu %d\n", | ||
660 | t->rt_param.task_params.cpu, task_npsfid(t), | ||
661 | npsf->npsf_id, entry->cpu); | ||
662 | |||
663 | /* setup job parameters */ | ||
664 | release_at(t, litmus_clock()); | ||
665 | |||
666 | /* There are four basic scenarios that could happen: | ||
667 | * 1) the server is on another cpu and scheduled; | ||
668 | * 2) the server is on another cpu and not scheduled; | ||
669 | * 3) the server is on this cpu and scheduled; and | ||
670 | * 4) the server is on this cpu and not scheduled. | ||
671 | * | ||
672 | * Whatever scenario we're in, it cannot change while we are | ||
673 | * holding the server lock. | ||
674 | * | ||
675 | * If the new task does not have a high priority, then | ||
676 | * we can just queue it and be done. | ||
677 | * | ||
678 | * In theory, the requeue() and reschedule_server() code | ||
679 | * take care of all that. | ||
680 | */ | ||
681 | |||
682 | requeue(t, edf); | ||
683 | /* reschedule will cause a remote preemption, if required */ | ||
684 | npsf_reschedule_server(srv); | ||
685 | /* always preempt to make sure we don't | ||
686 | * use the stack if it needs to migrate */ | ||
687 | set_tsk_need_resched(t); | ||
688 | |||
689 | raw_spin_unlock(&srv->lock); | ||
690 | raw_spin_unlock_irqrestore(&entry->cpu_res_lock, flags); | ||
691 | } | ||
692 | |||
693 | static void npsf_task_wake_up(struct task_struct *t) | ||
694 | { | ||
695 | rt_domain_t *edf; | ||
696 | npsf_server_t* srv; | ||
697 | unsigned long flags; | ||
698 | lt_t now; | ||
699 | |||
700 | BUG_ON(!is_realtime(t)); | ||
701 | |||
702 | edf = tsk_rt(t)->domain; | ||
703 | srv = server_from_dom(edf); | ||
704 | |||
705 | raw_spin_lock_irqsave(&srv->lock, flags); | ||
706 | |||
707 | BUG_ON(is_queued(t)); | ||
708 | |||
709 | now = litmus_clock(); | ||
710 | /* FIXME: this should be a configurable policy... */ | ||
711 | if (is_tardy(t, now)) { | ||
712 | /* new sporadic release */ | ||
713 | release_at(t, now); | ||
714 | sched_trace_task_release(t); | ||
715 | } | ||
716 | |||
717 | /* Only add to ready queue if it is not the | ||
718 | * currently-scheduled task. | ||
719 | */ | ||
720 | if (srv->highest_prio != t) { | ||
721 | requeue(t, edf); | ||
722 | npsf_reschedule_server(srv); | ||
723 | } | ||
724 | #ifdef NPSF_VERBOSE | ||
725 | else | ||
726 | TRACE_TASK(t, "wake_up, is curr_sched, not requeued\n"); | ||
727 | #endif | ||
728 | |||
729 | raw_spin_unlock_irqrestore(&srv->lock, flags); | ||
730 | |||
731 | TRACE_TASK(t, "wake up done\n"); | ||
732 | } | ||
733 | |||
734 | static void remove_from_server(struct task_struct *t, npsf_server_t* srv) | ||
735 | { | ||
736 | if (srv->highest_prio == t) { | ||
737 | TRACE_TASK(t, "remove from server: is highest-prio task\n"); | ||
738 | srv->highest_prio = NULL; | ||
739 | npsf_reschedule_server(srv); | ||
740 | } else if (is_queued(t)) { | ||
741 | TRACE_TASK(t, "remove from server: removed from queue\n"); | ||
742 | remove(&srv->dom, t); | ||
743 | } | ||
744 | #ifdef NPSF_VERBOSE | ||
745 | else | ||
746 | TRACE_TASK(t, "WARN: where is this task?\n"); | ||
747 | #endif | ||
748 | } | ||
749 | |||
750 | static void npsf_task_block(struct task_struct *t) | ||
751 | { | ||
752 | rt_domain_t *edf; | ||
753 | npsf_server_t* srv; | ||
754 | unsigned long flags; | ||
755 | |||
756 | TRACE_TASK(t, "(npsf_id %d) block at %llu, state=%d\n", | ||
757 | task_npsfid(t), litmus_clock(), t->state); | ||
758 | |||
759 | BUG_ON(!is_realtime(t)); | ||
760 | |||
761 | edf = tsk_rt(t)->domain; | ||
762 | srv = server_from_dom(edf); | ||
763 | |||
764 | raw_spin_lock_irqsave(&srv->lock, flags); | ||
765 | |||
766 | remove_from_server(t, srv); | ||
767 | |||
768 | raw_spin_unlock_irqrestore(&srv->lock, flags); | ||
769 | } | ||
770 | |||
771 | static void npsf_task_exit(struct task_struct * t) | ||
772 | { | ||
773 | rt_domain_t *edf; | ||
774 | npsf_server_t* srv; | ||
775 | unsigned long flags; | ||
776 | |||
777 | BUG_ON(!is_realtime(t)); | ||
778 | |||
779 | edf = tsk_rt(t)->domain; | ||
780 | srv = server_from_dom(edf); | ||
781 | |||
782 | raw_spin_lock_irqsave(&srv->lock, flags); | ||
783 | |||
784 | remove_from_server(t, srv); | ||
785 | |||
786 | raw_spin_unlock_irqrestore(&srv->lock, flags); | ||
787 | |||
788 | TRACE_TASK(t, "RIP, now reschedule\n"); | ||
789 | } | ||
790 | |||
791 | static long npsf_admit_task(struct task_struct* tsk) | ||
792 | { | ||
793 | npsf_reserve_t *npsf; | ||
794 | cpu_entry_t *entry = task_entry(tsk); | ||
795 | int id_ok = 0; | ||
796 | |||
797 | if (!atomic_read(&all_servers_added)) { | ||
798 | printk(KERN_DEBUG "not all servers added\n"); | ||
799 | return -ENODEV; | ||
800 | } | ||
801 | |||
802 | /* check to be on the right cpu and on the right server */ | ||
803 | if (task_cpu(tsk) != tsk->rt_param.task_params.cpu) { | ||
804 | printk(KERN_DEBUG "wrong CPU(%d, %d, %d) for npsf_id %d\n", | ||
805 | task_cpu(tsk), tsk->rt_param.task_params.cpu, | ||
806 | entry->cpu, task_npsfid(tsk)); | ||
807 | return -EINVAL; | ||
808 | } | ||
809 | |||
810 | /* 1) this cpu should have the proper npsf_id in the list | ||
811 | * 2) the rt_domain for the proper npsf_id is not null | ||
812 | */ | ||
813 | list_for_each_entry(npsf, &entry->npsf_reserves, node) { | ||
814 | if (npsf->npsf_id == task_npsfid(tsk)) { | ||
815 | id_ok = 1; | ||
816 | break; | ||
817 | } | ||
818 | } | ||
819 | if (!id_ok) | ||
820 | printk(KERN_DEBUG "wrong npsf_id (%d) for entry %d\n", | ||
821 | task_npsfid(tsk), entry->cpu); | ||
822 | |||
823 | return id_ok ? 0 : -EINVAL; | ||
824 | } | ||
825 | |||
826 | /* in litmus.c */ | ||
827 | extern atomic_t rt_task_count; | ||
828 | |||
829 | /* initialization status control */ | ||
830 | static int reserves_allocated = 0; | ||
831 | |||
832 | #ifdef NPSF_VERBOSE | ||
833 | static void print_reserve(cpu_entry_t *cpu) | ||
834 | { | ||
835 | npsf_reserve_t *tmp; | ||
836 | |||
837 | printk(KERN_INFO "NPS-F: reserves on CPU %d:\n", cpu->cpu); | ||
838 | list_for_each_entry(tmp, &cpu->npsf_reserves, node) { | ||
839 | BUG_ON(!tmp->server); | ||
840 | BUG_ON(!&(tmp->server->dom)); | ||
841 | BUG_ON(tmp->server->highest_prio); | ||
842 | printk(KERN_INFO "%d: %d us\n", tmp->npsf_id, | ||
843 | (int)(tmp->budget / 1000)); | ||
844 | } | ||
845 | } | ||
846 | #endif | ||
847 | /* | ||
848 | * do_add_reserve: add a reserve(cpu, id, budget) | ||
849 | * | ||
850 | * Callback for syscall add_server(); it allows to add the reserve "id" | ||
851 | * to the CPU "cpu". "budget" is the length of the reserve for the | ||
852 | * notional processor (server) id on the cpu cpu. | ||
853 | */ | ||
854 | static long do_add_reserve(npsf_reserve_t **new, cpu_entry_t *cpu, | ||
855 | npsf_server_t *the_dom, int npsf_id, lt_t budget) | ||
856 | { | ||
857 | unsigned long flags; | ||
858 | |||
859 | /* npsf_id for each cpu should be given in increasing order, | ||
860 | * it doesn't make sense the same np on the same cpu. | ||
861 | * The last_seen_npsf_id is reset upon plugin insertion. | ||
862 | */ | ||
863 | if (cpu->last_seen_npsf_id >= npsf_id) | ||
864 | return -EINVAL; | ||
865 | |||
866 | /* don't allow server changes if there are tasks in the system */ | ||
867 | if (atomic_read(&rt_task_count)) | ||
868 | return -EACCES; | ||
869 | |||
870 | if ((*new = kmalloc(sizeof(npsf_reserve_t), GFP_ATOMIC)) == NULL) | ||
871 | return -ENOMEM; | ||
872 | |||
873 | (*new)->server = the_dom; | ||
874 | (*new)->npsf_id = npsf_id; | ||
875 | (*new)->budget = budget; | ||
876 | (*new)->cpu = cpu; | ||
877 | |||
878 | npsf_printk("Add npsf_id %d on P%d with budget %llu\n", (*new)->npsf_id, | ||
879 | (*new)->cpu->cpu, (*new)->budget); | ||
880 | |||
881 | raw_spin_lock_irqsave(&cpu->cpu_res_lock, flags); | ||
882 | |||
883 | list_add_tail(&(*new)->node, &cpu->npsf_reserves); | ||
884 | cpu->last_seen_npsf_id = npsf_id; | ||
885 | cpu->cpu_reserve = list_first_entry(&cpu->npsf_reserves, npsf_reserve_t, node); | ||
886 | |||
887 | raw_spin_unlock_irqrestore(&cpu->cpu_res_lock, flags); | ||
888 | |||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | static void kickoff_timers(void) | ||
893 | { | ||
894 | int cpu; | ||
895 | cpu_entry_t *entry; | ||
896 | lt_t kickoff; | ||
897 | |||
898 | kickoff = slot_begin(litmus_clock() + npsf_slot_length * 2); | ||
899 | |||
900 | for_each_online_cpu(cpu) { | ||
901 | entry = &per_cpu(npsf_cpu_entries, cpu); | ||
902 | hrtimer_start_on(cpu, &entry->info, &entry->timer, | ||
903 | ns_to_ktime(kickoff), | ||
904 | HRTIMER_MODE_ABS_PINNED); | ||
905 | entry->should_expire = kickoff; | ||
906 | } | ||
907 | atomic_set(&timers_activated, 1); | ||
908 | } | ||
909 | |||
910 | /* We offer to library a budgets array interface (so we go through the | ||
911 | * syscall path only once) and we internally cycle on do_add_reserve. | ||
912 | * | ||
913 | * last == 1 means that the user is adding the last server and after | ||
914 | * the insertion the plugin is properly set up. (FIXME it should be | ||
915 | * done in a better way, but I doubt this plugin will ever go | ||
916 | * to the master branch). | ||
917 | */ | ||
918 | asmlinkage long sys_add_server(int __user *__id, | ||
919 | struct npsf_budgets __user *__budgets, int last) | ||
920 | { | ||
921 | int id, i; | ||
922 | int ret = -EFAULT; | ||
923 | struct npsf_budgets *budgets; | ||
924 | cpu_entry_t *entry; | ||
925 | npsf_server_t *npsfserver; | ||
926 | npsf_reserve_t *npsf_reserve_array[NR_CPUS]; | ||
927 | npsf_reserve_t *first_reserve; | ||
928 | |||
929 | if (_online_cpus != num_online_cpus()) | ||
930 | return ret; | ||
931 | |||
932 | if (copy_from_user(&id, __id, sizeof(id))) | ||
933 | return ret; | ||
934 | |||
935 | budgets = kmalloc(_online_cpus * sizeof(struct npsf_budgets), | ||
936 | GFP_ATOMIC); | ||
937 | |||
938 | for (i = 0; i < _online_cpus; i++) { | ||
939 | budgets[i].cpu = NO_CPU; | ||
940 | budgets[i].budget = 0; | ||
941 | } | ||
942 | |||
943 | if (copy_from_user(budgets, __budgets, | ||
944 | sizeof(budgets) * _online_cpus)) | ||
945 | goto err; | ||
946 | |||
947 | /* initialize the npsf_server_t for this npsf_server series */ | ||
948 | npsfserver = kmalloc(sizeof(npsf_server_t), GFP_ATOMIC); | ||
949 | if (!npsfserver) { | ||
950 | ret = -ENOMEM; | ||
951 | goto err; | ||
952 | } | ||
953 | edf_domain_init(&npsfserver->dom, NULL, npsf_release_jobs); | ||
954 | npsfserver->highest_prio = NULL; | ||
955 | |||
956 | /* initialize all npsf_reserve_t for this server */ | ||
957 | for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) { | ||
958 | entry = &per_cpu(npsf_cpu_entries, budgets[i].cpu); | ||
959 | if ((ret = do_add_reserve(&npsf_reserve_array[i], entry, | ||
960 | npsfserver, | ||
961 | id, budgets[i].budget)) < 0) | ||
962 | goto err; | ||
963 | } | ||
964 | /* set the current reserve to the first (and possibly unique) | ||
965 | * slice for this npsf_id */ | ||
966 | npsfserver->curr_reserve = npsf_reserve_array[0]; | ||
967 | npsfserver->first_reserve = npsf_reserve_array[0]; | ||
968 | npsfserver->first_cpu_wants_ipi = 0; | ||
969 | for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) { | ||
970 | |||
971 | if (i == 0 && budgets[i+1].cpu == NO_CPU) { | ||
972 | /* Fixed reserve always has itself as next */ | ||
973 | npsf_reserve_array[i]->next_npsf = npsf_reserve_array[i]; | ||
974 | } else if (((i+1) < _online_cpus) && | ||
975 | (i > 0 && budgets[i+1].cpu == NO_CPU)) { | ||
976 | /* Last reserve in the chain has the first reserve as next */ | ||
977 | npsf_reserve_array[i]->next_npsf = npsf_reserve_array[0]; | ||
978 | } else { | ||
979 | /* Normal continuing reserve */ | ||
980 | npsf_reserve_array[i]->next_npsf = npsf_reserve_array[i+1]; | ||
981 | } | ||
982 | } | ||
983 | #ifdef NPSF_VERBOSE | ||
984 | for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) { | ||
985 | entry = &per_cpu(npsf_cpu_entries, budgets[i].cpu); | ||
986 | print_reserve(entry); | ||
987 | } | ||
988 | #endif | ||
989 | |||
990 | if (last) { | ||
991 | /* force the first slot switching by setting the | ||
992 | * current_reserve to the last server for each cpu. | ||
993 | * | ||
994 | * FIXME:don't assume there exists at least one reserve per CPU | ||
995 | */ | ||
996 | for_each_online_cpu(i) { | ||
997 | entry = &per_cpu(npsf_cpu_entries, i); | ||
998 | first_reserve = list_entry(entry->npsf_reserves.next, | ||
999 | npsf_reserve_t, node); | ||
1000 | |||
1001 | first_reserve->server->curr_reserve = first_reserve; | ||
1002 | entry->cpu_reserve = first_reserve; | ||
1003 | npsf_printk("npsf_id %d is the current reserve " | ||
1004 | "and server on CPU %d\n", | ||
1005 | first_reserve->npsf_id, entry->cpu); | ||
1006 | |||
1007 | } | ||
1008 | |||
1009 | kickoff_timers(); | ||
1010 | |||
1011 | /* real plugin enable */ | ||
1012 | atomic_set(&all_servers_added, 1); | ||
1013 | mb(); | ||
1014 | } | ||
1015 | |||
1016 | /* at least one server was initialized and may need deletion */ | ||
1017 | reserves_allocated = 1; | ||
1018 | err: | ||
1019 | kfree(budgets); | ||
1020 | return ret; | ||
1021 | } | ||
1022 | |||
1023 | |||
1024 | /* Cancel server_reschedule_tick() hrtimers. Wait for all callbacks | ||
1025 | * to complete. The function is triggered writing 0 as npsf_slot_length. | ||
1026 | */ | ||
1027 | void npsf_hrtimers_cleanup(void) | ||
1028 | { | ||
1029 | int cpu; | ||
1030 | cpu_entry_t *entry; | ||
1031 | int redo; | ||
1032 | |||
1033 | if (!atomic_read(&timers_activated)) | ||
1034 | return; | ||
1035 | |||
1036 | atomic_set(&timers_activated, 0); | ||
1037 | |||
1038 | /* prevent the firing of the timer on this cpu */ | ||
1039 | do { | ||
1040 | redo = 0; | ||
1041 | for_each_online_cpu(cpu) { | ||
1042 | entry = &per_cpu(npsf_cpu_entries, cpu); | ||
1043 | |||
1044 | /* if callback active, skip it for now and redo later */ | ||
1045 | if (hrtimer_try_to_cancel(&entry->timer) == -1) { | ||
1046 | redo = 1; | ||
1047 | #ifdef NPSF_VERBOSE | ||
1048 | printk(KERN_INFO "(P%d) hrtimer on P%d was " | ||
1049 | "active, try to delete again\n", | ||
1050 | get_cpu(), cpu); | ||
1051 | put_cpu(); | ||
1052 | #endif | ||
1053 | } | ||
1054 | } | ||
1055 | } while (redo); | ||
1056 | |||
1057 | printk(KERN_INFO "npsf hrtimers deleted\n"); | ||
1058 | } | ||
1059 | |||
1060 | static void cleanup_npsf(void) | ||
1061 | { | ||
1062 | int cpu; | ||
1063 | cpu_entry_t *entry; | ||
1064 | struct list_head *nd, *next; | ||
1065 | npsf_reserve_t *tmp, *tmp_save; | ||
1066 | |||
1067 | for_each_online_cpu(cpu) { | ||
1068 | entry = &per_cpu(npsf_cpu_entries, cpu); | ||
1069 | |||
1070 | /* FIXME probably not needed as we should be the only cpu | ||
1071 | * doing the removal */ | ||
1072 | raw_spin_lock(&entry->cpu_res_lock); | ||
1073 | |||
1074 | list_for_each_safe(nd, next, &entry->npsf_reserves) { | ||
1075 | tmp = list_entry(nd, npsf_reserve_t, node); | ||
1076 | npsf_printk("Del. (id, cpu):(%d, %d)\n", | ||
1077 | tmp->npsf_id, | ||
1078 | tmp->cpu->cpu); | ||
1079 | if (tmp->server) { | ||
1080 | npsf_printk("Del. reserves for npsf_id %d\n", | ||
1081 | tmp->npsf_id); | ||
1082 | tmp_save = tmp; | ||
1083 | while (tmp_save->next_npsf && | ||
1084 | tmp_save->next_npsf != tmp) { | ||
1085 | tmp_save = tmp_save->next_npsf; | ||
1086 | tmp_save->server = NULL; | ||
1087 | } | ||
1088 | npsf_printk("Freeing server 0x%p\n", tmp->server); | ||
1089 | kfree(tmp->server); | ||
1090 | } | ||
1091 | npsf_printk("Freeing npsf_reserve_t 0x%p\n", tmp); | ||
1092 | kfree(tmp); | ||
1093 | } | ||
1094 | list_del(&entry->npsf_reserves); | ||
1095 | raw_spin_unlock(&entry->cpu_res_lock); | ||
1096 | } | ||
1097 | } | ||
1098 | |||
1099 | /* prevent plugin deactivation if timers are still active */ | ||
1100 | static long npsf_deactivate_plugin(void) | ||
1101 | { | ||
1102 | return (atomic_read(&timers_activated)) ? -1 : 0; | ||
1103 | } | ||
1104 | |||
1105 | static long npsf_activate_plugin(void) | ||
1106 | { | ||
1107 | int cpu; | ||
1108 | cpu_entry_t *entry; | ||
1109 | ktime_t now = ktime_get(); | ||
1110 | |||
1111 | /* prevent plugin switching if timers are active */ | ||
1112 | if (atomic_read(&timers_activated)) | ||
1113 | return -1; | ||
1114 | |||
1115 | atomic_set(&all_servers_added, 0); | ||
1116 | |||
1117 | /* de-allocate old servers (if any) */ | ||
1118 | if (reserves_allocated) | ||
1119 | cleanup_npsf(); | ||
1120 | |||
1121 | _online_cpus = num_online_cpus(); | ||
1122 | |||
1123 | for_each_online_cpu(cpu) { | ||
1124 | entry = &per_cpu(npsf_cpu_entries, cpu); | ||
1125 | |||
1126 | raw_spin_lock_init(&entry->cpu_res_lock); | ||
1127 | |||
1128 | entry->cpu_reserve = NULL; | ||
1129 | INIT_LIST_HEAD(&entry->npsf_reserves); | ||
1130 | |||
1131 | entry->cpu = cpu; | ||
1132 | hrtimer_init(&entry->timer, CLOCK_MONOTONIC, | ||
1133 | HRTIMER_MODE_ABS_PINNED); | ||
1134 | |||
1135 | /* initialize (reinitialize) pull timers */ | ||
1136 | hrtimer_start_on_info_init(&entry->info); | ||
1137 | |||
1138 | entry->timer.function = reserve_switch_tick; | ||
1139 | entry->last_seen_npsf_id = -1; | ||
1140 | } | ||
1141 | |||
1142 | printk(KERN_INFO "NPS-F activated: slot length = %lld ns\n", | ||
1143 | npsf_slot_length); | ||
1144 | |||
1145 | /* time starts now! */ | ||
1146 | time_origin = (lt_t) ktime_to_ns(now); | ||
1147 | TRACE("Time_origin = %llu\n", time_origin); | ||
1148 | return 0; | ||
1149 | } | ||
1150 | |||
1151 | /* Plugin object */ | ||
1152 | static struct sched_plugin npsf_plugin __cacheline_aligned_in_smp = { | ||
1153 | .plugin_name = "NPS-F", | ||
1154 | |||
1155 | .tick = npsf_scheduler_tick, | ||
1156 | .task_new = npsf_task_new, | ||
1157 | .complete_job = complete_job, | ||
1158 | .task_exit = npsf_task_exit, | ||
1159 | .schedule = npsf_schedule, | ||
1160 | .task_wake_up = npsf_task_wake_up, | ||
1161 | .task_block = npsf_task_block, | ||
1162 | .admit_task = npsf_admit_task, | ||
1163 | .activate_plugin = npsf_activate_plugin, | ||
1164 | .deactivate_plugin = npsf_deactivate_plugin, | ||
1165 | }; | ||
1166 | |||
1167 | static int __init init_npsf(void) | ||
1168 | { | ||
1169 | return register_sched_plugin(&npsf_plugin); | ||
1170 | } | ||
1171 | |||
1172 | static void __exit exit_npsf(void) | ||
1173 | { | ||
1174 | if (atomic_read(&timers_activated)) { | ||
1175 | atomic_set(&timers_activated, 0); | ||
1176 | return; | ||
1177 | } | ||
1178 | |||
1179 | if (reserves_allocated) | ||
1180 | cleanup_npsf(); | ||
1181 | } | ||
1182 | |||
1183 | module_init(init_npsf); | ||
1184 | module_exit(exit_npsf); | ||
1185 | |||
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index 3543b7baff53..3036df9b12e3 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c | |||
@@ -179,6 +179,12 @@ struct sched_plugin linux_sched_plugin = { | |||
179 | int cluster_cache_index = 2; | 179 | int cluster_cache_index = 2; |
180 | 180 | ||
181 | /* | 181 | /* |
182 | * Slot length (in ns) for NPS-F semi-partitioned plugin. | ||
183 | * This value can be changed at "runtime" through proc file. | ||
184 | */ | ||
185 | lt_t npsf_slot_length = 5 * NSEC_PER_MSEC; | ||
186 | |||
187 | /* | ||
182 | * The reference to current plugin that is used to schedule tasks within | 188 | * The reference to current plugin that is used to schedule tasks within |
183 | * the system. It stores references to actual function implementations | 189 | * the system. It stores references to actual function implementations |
184 | * Should be initialized by calling "init_***_plugin()" | 190 | * Should be initialized by calling "init_***_plugin()" |