aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2015-08-09 07:18:48 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2017-05-26 17:12:28 -0400
commit3baa55c19ffb567aa48568fa69dd17ad6f70d31d (patch)
tree7e79fd398705929f2db40ba239895cc60762f61f
parentcbe61859a233702ed8e6723b3b133d1f2ae1ae2c (diff)
Add LITMUS^RT core implementation
This patch adds the core of LITMUS^RT: - library functionality (heaps, rt_domain, prioritization, etc.) - budget enforcement logic - job management - system call backends - virtual devices (control page, etc.) - scheduler plugin API (and dummy plugin) This code compiles, but is not yet integrated with the rest of Linux. Squashed changes: LITMUS^RT Core: add get_current_budget() system call Allow userspace to figure out the used-up and remaining budget of a task. Adds deadline field to control page and updates it when setting up jobs for release. Adds control page deadline offset ftdev: respect O_NONBLOCK flag in ftdev_read() Don't block if userspace wants to go on doing something else. Export job release time and job sequence number in ctrl page Add alternate complete_job() default implementation Let jobs sleep like regular Linux tasks by suspending and waking them with a one-shot timer. Plugins can opt into using this implementation instead of the classic complete_job() implementation (or custom implementations). Fix RCU locking in sys_get_rt_task_param() sys_get_rt_task_param() is rarely used and apparently attracted some bitrot. Free before setting NULL to prevent memory leak Add hrtimer_start_on() support This patch replaces the previous implementation of hrtimer_start_on() by now using smp_call_function_single_async() to arm hrtimers on remote CPUs. Expose LITMUS^RT system calls via control page ioctl() Rationale: make LITMUS^RT ops available in a way that does not create merge conflicts each time we rebase LITMUS^RT on top of a new kernel version. This also helps with portability to different architectures, as we no longer need to patch each architecture's syscall table. Pick non-zero syscall ID start range To avoid interfering with Linux's magic reserved IOCTL numbers Don't preempt before time check in sleep_until_next_release() Avoid preempting jobs that are about to go to sleep soon anyway. LITMUS^RT proc: fix wrong memset() TRACE(): add TRACE_WARN_ON() helper Useful to replace BUG_ON() and WARN_ON() with a non-fatal TRACE()-based equivalent. Add void* plugin_state pointer to task_struct LITMUS^RT: split task admission into two functions Plugin interface: add fork_task() callback LITMUS^RT: Enable plugins to permit RT tasks to fork one-shot complete_job(): set completed flag This could race with a SIGSTOP or some other forced suspension, but we'll let plugins handle this, should they actually care. FP: add list-based ready queue LITMUS^RT core: add should_wait_for_stack() callback Allow plugins to give up when waiting for a stack to become available. LITMUS^RT core: add next_became_invalid() callback LITMUS^RT core: add post-migration validation callback LITMUS^RT core: be more careful when pull-migrating tasks Close more race windows and give plugins a chance to validate tasks after they have been migrated. Add KConfig options for timer latency warnings Add reservation creation API to plugin interface & syscalls LITMUS^RT syscall: expose sys_reservation_create() via ioctl() Add reservation configuration types to rt_param.h Add basic generic reservation-based scheduling infrastructure Switch to aligned quanta by default. For first-time users, aligned quanta is likely what's expected. LITMUS^RT core: keep track of time of last suspension This information is needed to insert ST_COMPLETION records for sporadic tasks. add fields for clock_nanosleep() support Need to communicate the intended wake-up time to the plugin wake-up handler. LITMUS^RT core: add generic handler for sporadic job arrivals In particular, check if a job arrival is triggered from a clock_nanosleep() call. add litmus->task_change_params() callback to plugin interface Will be used by adaptive C-EDF. Call litmus->task_change_params() from sys_set_rt_task_param() Move trace point definition to litmus/litmus.c If !CONFIG_SCHED_TASK_TRACE, but CONFIG_SCHED_LITMUS_TRACEPOINT, then we still need to define the tracepoint structures. This patch should be integrated with the earlier sched_task_trace.c patches during one of the next major rebasing efforts. LITMUS^RT scheduling class: mark enqueued task as present Remove unistd_*.h rebase fix: update to new hrtimer API The new API is actually nicer and cleaner. rebase fix: call lockdep_unpin_lock(&rq->lock, cookie) The LITMUS^RT scheduling class should also do the LOCKDEP dance. LITMUS^RT core: break out non-preemptive flag defs Not every file including litmus.h needs to know this. LITMUS^RT core: don't include debug_trace.h in litmus.h Including debug_trace.h introduces the TRACE() macro, which causes symbol clashes in some (rather obscure) drivers. LITMUS^RT core: add litmus_preemption_in_progress flags Used to communicate that a preemption is in progress. Set by the scheduler; read by the plugins. LITMUS^RT core: revise is_current_running() macro
-rw-r--r--include/litmus/affinity.h52
-rw-r--r--include/litmus/bheap.h77
-rw-r--r--include/litmus/binheap.h205
-rw-r--r--include/litmus/budget.h38
-rw-r--r--include/litmus/ceiling.h36
-rw-r--r--include/litmus/clustered.h46
-rw-r--r--include/litmus/ctrlpage.h105
-rw-r--r--include/litmus/debug_trace.h5
-rw-r--r--include/litmus/edf_common.h25
-rw-r--r--include/litmus/fdso.h78
-rw-r--r--include/litmus/fp_common.h183
-rw-r--r--include/litmus/fpmath.h147
-rw-r--r--include/litmus/jobs.h13
-rw-r--r--include/litmus/litmus.h163
-rw-r--r--include/litmus/litmus_proc.h63
-rw-r--r--include/litmus/locking.h28
-rw-r--r--include/litmus/np.h121
-rw-r--r--include/litmus/preempt.h188
-rw-r--r--include/litmus/reservations/alloc.h15
-rw-r--r--include/litmus/reservations/budget-notifier.h50
-rw-r--r--include/litmus/reservations/polling.h19
-rw-r--r--include/litmus/reservations/reservation.h224
-rw-r--r--include/litmus/reservations/table-driven.h23
-rw-r--r--include/litmus/rt_domain.h182
-rw-r--r--include/litmus/rt_param.h105
-rw-r--r--include/litmus/sched_plugin.h180
-rw-r--r--include/litmus/srp.h28
-rw-r--r--include/litmus/wait.h57
-rw-r--r--kernel/sched/litmus.c385
-rw-r--r--litmus/Kconfig213
-rw-r--r--litmus/Makefile20
-rw-r--r--litmus/bheap.c316
-rw-r--r--litmus/binheap.c387
-rw-r--r--litmus/budget.c167
-rw-r--r--litmus/clustered.c119
-rw-r--r--litmus/ctrldev.c264
-rw-r--r--litmus/edf_common.c201
-rw-r--r--litmus/fdso.c308
-rw-r--r--litmus/fp_common.c130
-rw-r--r--litmus/ftdev.c11
-rw-r--r--litmus/jobs.c163
-rw-r--r--litmus/litmus.c773
-rw-r--r--litmus/litmus_proc.c574
-rw-r--r--litmus/locking.c189
-rw-r--r--litmus/preempt.c143
-rw-r--r--litmus/reservations/Makefile3
-rw-r--r--litmus/reservations/alloc.c143
-rw-r--r--litmus/reservations/budget-notifier.c26
-rw-r--r--litmus/reservations/core.c393
-rw-r--r--litmus/reservations/polling.c256
-rw-r--r--litmus/reservations/table-driven.c269
-rw-r--r--litmus/rt_domain.c351
-rw-r--r--litmus/sched_plugin.c289
-rw-r--r--litmus/sched_task_trace.c5
-rw-r--r--litmus/srp.c310
-rw-r--r--litmus/sync.c153
-rw-r--r--litmus/trace.c17
-rw-r--r--litmus/uncachedev.c102
58 files changed, 9077 insertions, 59 deletions
diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h
new file mode 100644
index 000000000000..4d7c618c8175
--- /dev/null
+++ b/include/litmus/affinity.h
@@ -0,0 +1,52 @@
1#ifndef __LITMUS_AFFINITY_H
2#define __LITMUS_AFFINITY_H
3
4#include <linux/cpumask.h>
5
6/* Works like:
7void get_nearest_available_cpu(
8 cpu_entry_t **nearest,
9 cpu_entry_t *start,
10 cpu_entry_t *entries,
11 int release_master,
12 cpumask_var_t cpus_to_test)
13
14Set release_master = NO_CPU for no Release Master.
15
16We use a macro here to exploit the fact that C-EDF and G-EDF
17have similar structures for their cpu_entry_t structs, even though
18they do not share a common base-struct. The macro allows us to
19avoid code duplication.
20
21 */
22#define get_nearest_available_cpu(nearest, start, entries, release_master, cpus_to_test) \
23{ \
24 (nearest) = NULL; \
25 if (!(start)->linked && likely((start)->cpu != (release_master))) { \
26 (nearest) = (start); \
27 } else { \
28 int __cpu; \
29 \
30 /* FIXME: get rid of the iteration with a bitmask + AND */ \
31 for_each_cpu(__cpu, cpus_to_test) { \
32 if (likely(__cpu != release_master)) { \
33 cpu_entry_t *__entry = &per_cpu((entries), __cpu); \
34 if (cpus_share_cache((start)->cpu, __entry->cpu) \
35 && !__entry->linked) { \
36 (nearest) = __entry; \
37 break; \
38 } \
39 } \
40 } \
41 } \
42 \
43 if ((nearest)) { \
44 TRACE("P%d is closest available CPU to P%d\n", \
45 (nearest)->cpu, (start)->cpu); \
46 } else { \
47 TRACE("Could not find an available CPU close to P%d\n", \
48 (start)->cpu); \
49 } \
50}
51
52#endif
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
new file mode 100644
index 000000000000..cf4864a498d8
--- /dev/null
+++ b/include/litmus/bheap.h
@@ -0,0 +1,77 @@
1/* bheaps.h -- Binomial Heaps
2 *
3 * (c) 2008, 2009 Bjoern Brandenburg
4 */
5
6#ifndef BHEAP_H
7#define BHEAP_H
8
9#define NOT_IN_HEAP UINT_MAX
10
11struct bheap_node {
12 struct bheap_node* parent;
13 struct bheap_node* next;
14 struct bheap_node* child;
15
16 unsigned int degree;
17 void* value;
18 struct bheap_node** ref;
19};
20
21struct bheap {
22 struct bheap_node* head;
23 /* We cache the minimum of the heap.
24 * This speeds up repeated peek operations.
25 */
26 struct bheap_node* min;
27};
28
29typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
30
31void bheap_init(struct bheap* heap);
32void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value);
33
34static inline int bheap_node_in_heap(struct bheap_node* h)
35{
36 return h->degree != NOT_IN_HEAP;
37}
38
39static inline int bheap_empty(struct bheap* heap)
40{
41 return heap->head == NULL && heap->min == NULL;
42}
43
44/* insert (and reinitialize) a node into the heap */
45void bheap_insert(bheap_prio_t higher_prio,
46 struct bheap* heap,
47 struct bheap_node* node);
48
49/* merge addition into target */
50void bheap_union(bheap_prio_t higher_prio,
51 struct bheap* target,
52 struct bheap* addition);
53
54struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
55 struct bheap* heap);
56
57struct bheap_node* bheap_take(bheap_prio_t higher_prio,
58 struct bheap* heap);
59
60void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap);
61int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node);
62
63void bheap_delete(bheap_prio_t higher_prio,
64 struct bheap* heap,
65 struct bheap_node* node);
66
67/* allocate from memcache */
68struct bheap_node* bheap_node_alloc(int gfp_flags);
69void bheap_node_free(struct bheap_node* hn);
70
71/* allocate a heap node for value and insert into the heap */
72int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
73 void* value, int gfp_flags);
74
75void* bheap_take_del(bheap_prio_t higher_prio,
76 struct bheap* heap);
77#endif
diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
new file mode 100644
index 000000000000..1cf364701da8
--- /dev/null
+++ b/include/litmus/binheap.h
@@ -0,0 +1,205 @@
1#ifndef LITMUS_BINARY_HEAP_H
2#define LITMUS_BINARY_HEAP_H
3
4#include <linux/kernel.h>
5
6/**
7 * Simple binary heap with add, arbitrary delete, delete_root, and top
8 * operations.
9 *
10 * Style meant to conform with list.h.
11 *
12 * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
13 * heap may be overkill (and perhaps not general enough) for some applications.
14 *
15 * Note: In order to make node swaps fast, a node inserted with a data pointer
16 * may not always hold said data pointer. This is similar to the binomial heap
17 * implementation. This does make node deletion tricky since we have to
18 * (1) locate the node that holds the data pointer to delete, and (2) the
19 * node that was originally inserted with said data pointer. These have to be
20 * coalesced into a single node before removal (see usage of
21 * __binheap_safe_swap()). We have to track node references to accomplish this.
22 */
23
24struct binheap_node {
25 void *data;
26 struct binheap_node *parent;
27 struct binheap_node *left;
28 struct binheap_node *right;
29
30 /* pointer to binheap_node that holds *data for which this binheap_node
31 * was originally inserted. (*data "owns" this node)
32 */
33 struct binheap_node *ref;
34 struct binheap_node **ref_ptr;
35};
36
37/**
38 * Signature of compator function. Assumed 'less-than' (min-heap).
39 * Pass in 'greater-than' for max-heap.
40 *
41 * TODO: Consider macro-based implementation that allows comparator to be
42 * inlined (similar to Linux red/black tree) for greater efficiency.
43 */
44typedef int (*binheap_order_t)(struct binheap_node *a,
45 struct binheap_node *b);
46
47
48struct binheap {
49 struct binheap_node *root;
50
51 /* pointer to node to take next inserted child */
52 struct binheap_node *next;
53
54 /* pointer to last node in complete binary tree */
55 struct binheap_node *last;
56
57 /* comparator function pointer */
58 binheap_order_t compare;
59};
60
61
62/* Initialized heap nodes not in a heap have parent
63 * set to BINHEAP_POISON.
64 */
65#define BINHEAP_POISON ((void*)(0xdeadbeef))
66
67
68/**
69 * binheap_entry - get the struct for this heap node.
70 * Only valid when called upon heap nodes other than the root handle.
71 * @ptr: the heap node.
72 * @type: the type of struct pointed to by binheap_node::data.
73 * @member: unused.
74 */
75#define binheap_entry(ptr, type, member) \
76((type *)((ptr)->data))
77
78/**
79 * binheap_node_container - get the struct that contains this node.
80 * Only valid when called upon heap nodes other than the root handle.
81 * @ptr: the heap node.
82 * @type: the type of struct the node is embedded in.
83 * @member: the name of the binheap_struct within the (type) struct.
84 */
85#define binheap_node_container(ptr, type, member) \
86container_of((ptr), type, member)
87
88/**
89 * binheap_top_entry - get the struct for the node at the top of the heap.
90 * Only valid when called upon the heap handle node.
91 * @ptr: the special heap-handle node.
92 * @type: the type of the struct the head is embedded in.
93 * @member: the name of the binheap_struct within the (type) struct.
94 */
95#define binheap_top_entry(ptr, type, member) \
96binheap_entry((ptr)->root, type, member)
97
98/**
99 * binheap_delete_root - remove the root element from the heap.
100 * @handle: handle to the heap.
101 * @type: the type of the struct the head is embedded in.
102 * @member: the name of the binheap_struct within the (type) struct.
103 */
104#define binheap_delete_root(handle, type, member) \
105__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
106
107/**
108 * binheap_delete - remove an arbitrary element from the heap.
109 * @to_delete: pointer to node to be removed.
110 * @handle: handle to the heap.
111 */
112#define binheap_delete(to_delete, handle) \
113__binheap_delete((to_delete), (handle))
114
115/**
116 * binheap_add - insert an element to the heap
117 * new_node: node to add.
118 * @handle: handle to the heap.
119 * @type: the type of the struct the head is embedded in.
120 * @member: the name of the binheap_struct within the (type) struct.
121 */
122#define binheap_add(new_node, handle, type, member) \
123__binheap_add((new_node), (handle), container_of((new_node), type, member))
124
125/**
126 * binheap_decrease - re-eval the position of a node (based upon its
127 * original data pointer).
128 * @handle: handle to the heap.
129 * @orig_node: node that was associated with the data pointer
130 * (whose value has changed) when said pointer was
131 * added to the heap.
132 */
133#define binheap_decrease(orig_node, handle) \
134__binheap_decrease((orig_node), (handle))
135
136#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
137
138#define BINHEAP_NODE(name) \
139 struct binheap_node name = BINHEAP_NODE_INIT()
140
141
142static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
143{
144 n->data = NULL;
145 n->parent = BINHEAP_POISON;
146 n->left = NULL;
147 n->right = NULL;
148 n->ref = NULL;
149 n->ref_ptr = NULL;
150}
151
152static inline void INIT_BINHEAP_HANDLE(struct binheap *handle,
153 binheap_order_t compare)
154{
155 handle->root = NULL;
156 handle->next = NULL;
157 handle->last = NULL;
158 handle->compare = compare;
159}
160
161/* Returns true if binheap is empty. */
162static inline int binheap_empty(struct binheap *handle)
163{
164 return(handle->root == NULL);
165}
166
167/* Returns true if binheap node is in a heap. */
168static inline int binheap_is_in_heap(struct binheap_node *node)
169{
170 return (node->parent != BINHEAP_POISON);
171}
172
173/* Returns true if binheap node is in given heap. */
174int binheap_is_in_this_heap(struct binheap_node *node, struct binheap* heap);
175
176/* Add a node to a heap */
177void __binheap_add(struct binheap_node *new_node,
178 struct binheap *handle,
179 void *data);
180
181/**
182 * Removes the root node from the heap. The node is removed after coalescing
183 * the binheap_node with its original data pointer at the root of the tree.
184 *
185 * The 'last' node in the tree is then swapped up to the root and bubbled
186 * down.
187 */
188void __binheap_delete_root(struct binheap *handle,
189 struct binheap_node *container);
190
191/**
192 * Delete an arbitrary node. Bubble node to delete up to the root,
193 * and then delete to root.
194 */
195void __binheap_delete(struct binheap_node *node_to_delete,
196 struct binheap *handle);
197
198/**
199 * Bubble up a node whose pointer has decreased in value.
200 */
201void __binheap_decrease(struct binheap_node *orig_node,
202 struct binheap *handle);
203
204
205#endif
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
new file mode 100644
index 000000000000..60eb814fc82b
--- /dev/null
+++ b/include/litmus/budget.h
@@ -0,0 +1,38 @@
1#ifndef _LITMUS_BUDGET_H_
2#define _LITMUS_BUDGET_H_
3
4/* Update the per-processor enforcement timer (arm/reproram/cancel) for
5 * the next task. */
6void update_enforcement_timer(struct task_struct* t);
7
8inline static int budget_exhausted(struct task_struct* t)
9{
10 return get_exec_time(t) >= get_exec_cost(t);
11}
12
13inline static lt_t budget_remaining(struct task_struct* t)
14{
15 if (!budget_exhausted(t))
16 return get_exec_cost(t) - get_exec_time(t);
17 else
18 /* avoid overflow */
19 return 0;
20}
21
22#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
23
24#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
25 == PRECISE_ENFORCEMENT)
26
27static inline int requeue_preempted_job(struct task_struct* t)
28{
29 /* Add task to ready queue only if not subject to budget enforcement or
30 * if the job has budget remaining. t may be NULL.
31 */
32 return t && !is_completed(t) &&
33 (!budget_exhausted(t) || !budget_enforced(t));
34}
35
36void litmus_current_budget(lt_t *used_so_far, lt_t *remaining);
37
38#endif
diff --git a/include/litmus/ceiling.h b/include/litmus/ceiling.h
new file mode 100644
index 000000000000..f3d3889315f7
--- /dev/null
+++ b/include/litmus/ceiling.h
@@ -0,0 +1,36 @@
1#ifndef _LITMUS_CEILING_H_
2#define _LITMUS_CEILING_H_
3
4#ifdef CONFIG_LITMUS_LOCKING
5
6void __srp_ceiling_block(struct task_struct *cur);
7
8DECLARE_PER_CPU(int, srp_objects_in_use);
9
10/* assumes preemptions off */
11void srp_ceiling_block(void)
12{
13 struct task_struct *tsk = current;
14
15 /* Only applies to real-time tasks. */
16 if (!is_realtime(tsk))
17 return;
18
19 /* Bail out early if there aren't any SRP resources around. */
20 if (likely(!raw_cpu_read(srp_objects_in_use)))
21 return;
22
23 /* Avoid recursive ceiling blocking. */
24 if (unlikely(tsk->rt_param.srp_non_recurse))
25 return;
26
27 /* must take slow path */
28 __srp_ceiling_block(tsk);
29}
30
31#else
32#define srp_ceiling_block() /* nothing */
33#endif
34
35
36#endif \ No newline at end of file
diff --git a/include/litmus/clustered.h b/include/litmus/clustered.h
new file mode 100644
index 000000000000..fc7f0f87966e
--- /dev/null
+++ b/include/litmus/clustered.h
@@ -0,0 +1,46 @@
1#ifndef CLUSTERED_H
2#define CLUSTERED_H
3
4/* Which cache level should be used to group CPUs into clusters?
5 * GLOBAL_CLUSTER means that all CPUs form a single cluster (just like under
6 * global scheduling).
7 */
8enum cache_level {
9 GLOBAL_CLUSTER = 0,
10 L1_CLUSTER = 1,
11 L2_CLUSTER = 2,
12 L3_CLUSTER = 3
13};
14
15int parse_cache_level(const char *str, enum cache_level *level);
16const char* cache_level_name(enum cache_level level);
17
18/* expose a cache level in a /proc dir */
19struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
20 enum cache_level* level);
21
22
23
24struct scheduling_cluster {
25 unsigned int id;
26 /* list of CPUs that are part of this cluster */
27 struct list_head cpus;
28};
29
30struct cluster_cpu {
31 unsigned int id; /* which CPU is this? */
32 struct list_head cluster_list; /* List of the CPUs in this cluster. */
33 struct scheduling_cluster* cluster; /* The cluster that this CPU belongs to. */
34};
35
36int get_cluster_size(enum cache_level level);
37
38int assign_cpus_to_clusters(enum cache_level level,
39 struct scheduling_cluster* clusters[],
40 unsigned int num_clusters,
41 struct cluster_cpu* cpus[],
42 unsigned int num_cpus);
43
44int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, unsigned int index);
45
46#endif
diff --git a/include/litmus/ctrlpage.h b/include/litmus/ctrlpage.h
new file mode 100644
index 000000000000..f7b03e1aedd6
--- /dev/null
+++ b/include/litmus/ctrlpage.h
@@ -0,0 +1,105 @@
1#ifndef _LITMUS_CTRLPAGE_H_
2#define _LITMUS_CTRLPAGE_H_
3
4#include <litmus/rt_param.h>
5
6union np_flag {
7 uint32_t raw;
8 struct {
9 /* Is the task currently in a non-preemptive section? */
10 uint32_t flag:31;
11 /* Should the task call into the scheduler? */
12 uint32_t preempt:1;
13 } np;
14};
15
16/* The definition of the data that is shared between the kernel and real-time
17 * tasks via a shared page (see litmus/ctrldev.c).
18 *
19 * WARNING: User space can write to this, so don't trust
20 * the correctness of the fields!
21 *
22 * This servees two purposes: to enable efficient signaling
23 * of non-preemptive sections (user->kernel) and
24 * delayed preemptions (kernel->user), and to export
25 * some real-time relevant statistics such as preemption and
26 * migration data to user space. We can't use a device to export
27 * statistics because we want to avoid system call overhead when
28 * determining preemption/migration overheads).
29 */
30struct control_page {
31 /* This flag is used by userspace to communicate non-preempive
32 * sections. */
33 volatile __attribute__ ((aligned (8))) union np_flag sched;
34
35 /* Incremented by the kernel each time an IRQ is handled. */
36 volatile __attribute__ ((aligned (8))) uint64_t irq_count;
37
38 /* Locking overhead tracing: userspace records here the time stamp
39 * and IRQ counter prior to starting the system call. */
40 uint64_t ts_syscall_start; /* Feather-Trace cycles */
41 uint64_t irq_syscall_start; /* Snapshot of irq_count when the syscall
42 * started. */
43
44 lt_t deadline; /* Deadline for the currently executing job */
45 lt_t release; /* Release time of current job */
46 uint64_t job_index; /* Job sequence number of current job */
47
48 /* to be extended */
49};
50
51/* Expected offsets within the control page. */
52
53#define LITMUS_CP_OFFSET_SCHED 0
54#define LITMUS_CP_OFFSET_IRQ_COUNT 8
55#define LITMUS_CP_OFFSET_TS_SC_START 16
56#define LITMUS_CP_OFFSET_IRQ_SC_START 24
57#define LITMUS_CP_OFFSET_DEADLINE 32
58#define LITMUS_CP_OFFSET_RELEASE 40
59#define LITMUS_CP_OFFSET_JOB_INDEX 48
60
61/* System call emulation via ioctl() */
62
63typedef enum {
64 LRT_null_call = 2006,
65 LRT_set_rt_task_param,
66 LRT_get_rt_task_param,
67 LRT_reservation_create,
68 LRT_complete_job,
69 LRT_od_open,
70 LRT_od_close,
71 LRT_litmus_lock,
72 LRT_litmus_unlock,
73 LRT_wait_for_job_release,
74 LRT_wait_for_ts_release,
75 LRT_release_ts,
76 LRT_get_current_budget,
77} litmus_syscall_id_t;
78
79union litmus_syscall_args {
80 struct {
81 pid_t pid;
82 struct rt_task __user *param;
83 } get_set_task_param;
84
85 struct {
86 uint32_t type;
87 void __user *config;
88 } reservation_create;
89
90 struct {
91 uint32_t fd;
92 uint32_t obj_type;
93 uint32_t obj_id;
94 void __user *config;
95 } od_open;
96
97 struct {
98 lt_t __user *expended;
99 lt_t __user *remaining;
100 } get_current_budget;
101};
102
103
104#endif
105
diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h
index 1266ac6a760c..a760631d4fca 100644
--- a/include/litmus/debug_trace.h
+++ b/include/litmus/debug_trace.h
@@ -37,4 +37,9 @@ extern atomic_t __log_seq_no;
37#define TRACE_CUR(fmt, args...) \ 37#define TRACE_CUR(fmt, args...) \
38 TRACE_TASK(current, fmt, ## args) 38 TRACE_TASK(current, fmt, ## args)
39 39
40#define TRACE_WARN_ON(cond) \
41 if (unlikely(cond)) \
42 TRACE("WARNING: '%s' [%s@%s:%d]\n", \
43 #cond, __FUNCTION__, __FILE__, __LINE__)
44
40#endif 45#endif
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
new file mode 100644
index 000000000000..bbaf22ea7f12
--- /dev/null
+++ b/include/litmus/edf_common.h
@@ -0,0 +1,25 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_EDF_COMMON_H__
11#define __UNC_EDF_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int edf_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
new file mode 100644
index 000000000000..fd9b30dbfb34
--- /dev/null
+++ b/include/litmus/fdso.h
@@ -0,0 +1,78 @@
1/* fdso.h - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 */
5
6#ifndef _LINUX_FDSO_H_
7#define _LINUX_FDSO_H_
8
9#include <linux/list.h>
10#include <asm/atomic.h>
11
12#include <linux/fs.h>
13#include <linux/slab.h>
14
15#define MAX_OBJECT_DESCRIPTORS 85
16
17typedef enum {
18 MIN_OBJ_TYPE = 0,
19
20 FMLP_SEM = 0,
21 SRP_SEM = 1,
22
23 MPCP_SEM = 2,
24 MPCP_VS_SEM = 3,
25 DPCP_SEM = 4,
26 PCP_SEM = 5,
27
28 DFLP_SEM = 6,
29
30 MAX_OBJ_TYPE = 6
31} obj_type_t;
32
33struct inode_obj_id {
34 struct list_head list;
35 atomic_t count;
36 struct inode* inode;
37
38 obj_type_t type;
39 void* obj;
40 unsigned int id;
41};
42
43struct fdso_ops;
44
45struct od_table_entry {
46 unsigned int used;
47
48 struct inode_obj_id* obj;
49 const struct fdso_ops* class;
50};
51
52struct fdso_ops {
53 int (*create)(void** obj_ref, obj_type_t type, void* __user);
54 void (*destroy)(obj_type_t type, void*);
55 int (*open) (struct od_table_entry*, void* __user);
56 int (*close) (struct od_table_entry*);
57};
58
59/* translate a userspace supplied od into the raw table entry
60 * returns NULL if od is invalid
61 */
62struct od_table_entry* get_entry_for_od(int od);
63
64/* translate a userspace supplied od into the associated object
65 * returns NULL if od is invalid
66 */
67static inline void* od_lookup(int od, obj_type_t type)
68{
69 struct od_table_entry* e = get_entry_for_od(od);
70 return e && e->obj->type == type ? e->obj->obj : NULL;
71}
72
73#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
74#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
75#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
76
77
78#endif
diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h
new file mode 100644
index 000000000000..71c0d0142fc4
--- /dev/null
+++ b/include/litmus/fp_common.h
@@ -0,0 +1,183 @@
1/* Fixed-priority scheduler support.
2 */
3
4#ifndef __FP_COMMON_H__
5#define __FP_COMMON_H__
6
7#include <litmus/rt_domain.h>
8
9#include <asm/bitops.h>
10
11
12void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
13 release_jobs_t release);
14
15int fp_higher_prio(struct task_struct* first,
16 struct task_struct* second);
17
18int fp_ready_order(struct bheap_node* a, struct bheap_node* b);
19
20#define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG)
21
22#if (LITMUS_MAX_PRIORITY % BITS_PER_LONG)
23#error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG
24#endif
25
26/* bitmask-inexed priority queue */
27struct fp_prio_queue {
28 unsigned long bitmask[FP_PRIO_BIT_WORDS];
29 struct bheap queue[LITMUS_MAX_PRIORITY];
30};
31
32void fp_prio_queue_init(struct fp_prio_queue* q);
33
34static inline void fpq_set(struct fp_prio_queue* q, unsigned int index)
35{
36 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
37 __set_bit(index % BITS_PER_LONG, word);
38}
39
40static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index)
41{
42 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
43 __clear_bit(index % BITS_PER_LONG, word);
44}
45
46static inline unsigned int fpq_find(struct fp_prio_queue* q)
47{
48 int i;
49
50 /* loop optimizer should unroll this */
51 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
52 if (q->bitmask[i])
53 return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
54
55 return LITMUS_MAX_PRIORITY; /* nothing found */
56}
57
58static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
59{
60 BUG_ON(index >= LITMUS_MAX_PRIORITY);
61 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
62
63 fpq_set(q, index);
64 bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
65}
66
67static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
68{
69 BUG_ON(!is_queued(t));
70
71 bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
72 if (likely(bheap_empty(&q->queue[index])))
73 fpq_clear(q, index);
74}
75
76static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q)
77{
78 unsigned int idx = fpq_find(q);
79 struct bheap_node* hn;
80
81 if (idx < LITMUS_MAX_PRIORITY) {
82 hn = bheap_peek(fp_ready_order, &q->queue[idx]);
83 return bheap2task(hn);
84 } else
85 return NULL;
86}
87
88static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q)
89{
90 unsigned int idx = fpq_find(q);
91 struct bheap_node* hn;
92
93 if (idx < LITMUS_MAX_PRIORITY) {
94 hn = bheap_take(fp_ready_order, &q->queue[idx]);
95 if (likely(bheap_empty(&q->queue[idx])))
96 fpq_clear(q, idx);
97 return bheap2task(hn);
98 } else
99 return NULL;
100}
101
102int fp_preemption_needed(struct fp_prio_queue* q, struct task_struct *t);
103
104
105/* ******* list-based version ******** */
106
107/* bitmask-inexed priority queue */
108struct fp_ready_list {
109 unsigned long bitmask[FP_PRIO_BIT_WORDS];
110 struct list_head queue[LITMUS_MAX_PRIORITY];
111};
112
113void fp_ready_list_init(struct fp_ready_list* q);
114
115static inline void fp_rl_set(struct fp_ready_list* q, unsigned int index)
116{
117 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
118 __set_bit(index % BITS_PER_LONG, word);
119}
120
121static inline void fp_rl_clear(struct fp_ready_list* q, unsigned int index)
122{
123 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
124 __clear_bit(index % BITS_PER_LONG, word);
125}
126
127static inline unsigned int fp_rl_find(struct fp_ready_list* q)
128{
129 int i;
130
131 /* loop optimizer should unroll this */
132 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
133 if (q->bitmask[i])
134 return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
135
136 return LITMUS_MAX_PRIORITY; /* nothing found */
137}
138
139static inline void fp_ready_list_add(
140 struct fp_ready_list* q, struct list_head* lh, unsigned int index)
141{
142 BUG_ON(index >= LITMUS_MAX_PRIORITY);
143 BUG_ON(in_list(lh));
144
145 fp_rl_set(q, index);
146 list_add_tail(lh, &q->queue[index]);
147}
148
149static inline void fp_ready_list_remove(
150 struct fp_ready_list* q, struct list_head* lh, unsigned int index)
151{
152 BUG_ON(!in_list(lh));
153
154 list_del(lh);
155 if (likely(list_empty(q->queue + index)))
156 fp_rl_clear(q, index);
157}
158
159static inline struct list_head* fp_ready_list_peek(struct fp_ready_list* q)
160{
161 unsigned int idx = fp_rl_find(q);
162
163 if (idx < LITMUS_MAX_PRIORITY) {
164 return q->queue[idx].next;
165 } else
166 return NULL;
167}
168
169static inline struct list_head* fp_ready_list_take(struct fp_ready_list* q)
170{
171 unsigned int idx = fp_rl_find(q);
172 struct list_head* lh;
173
174 if (idx < LITMUS_MAX_PRIORITY) {
175 lh = q->queue[idx].next;
176 fp_ready_list_remove(q, lh, idx);
177 return lh;
178 } else
179 return NULL;
180}
181
182
183#endif
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..642de98542c8
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,147 @@
1#ifndef __FP_MATH_H__
2#define __FP_MATH_H__
3
4#include <linux/math64.h>
5
6#ifndef __KERNEL__
7#include <stdint.h>
8#define abs(x) (((x) < 0) ? -(x) : x)
9#endif
10
11// Use 64-bit because we want to track things at the nanosecond scale.
12// This can lead to very large numbers.
13typedef int64_t fpbuf_t;
14typedef struct
15{
16 fpbuf_t val;
17} fp_t;
18
19#define FP_SHIFT 10
20#define ROUND_BIT (FP_SHIFT - 1)
21
22#define _fp(x) ((fp_t) {x})
23
24#ifdef __KERNEL__
25static const fp_t LITMUS_FP_ZERO = {.val = 0};
26static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
27#endif
28
29static inline fp_t FP(fpbuf_t x)
30{
31 return _fp(((fpbuf_t) x) << FP_SHIFT);
32}
33
34/* divide two integers to obtain a fixed point value */
35static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
36{
37 return _fp(div64_s64(FP(a).val, (b)));
38}
39
40static inline fpbuf_t _point(fp_t x)
41{
42 return (x.val % (1 << FP_SHIFT));
43
44}
45
46#define fp2str(x) x.val
47/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
48#define _FP_ "%ld/1024"
49
50static inline fpbuf_t _floor(fp_t x)
51{
52 return x.val >> FP_SHIFT;
53}
54
55/* FIXME: negative rounding */
56static inline fpbuf_t _round(fp_t x)
57{
58 return _floor(x) + ((x.val >> ROUND_BIT) & 1);
59}
60
61/* multiply two fixed point values */
62static inline fp_t _mul(fp_t a, fp_t b)
63{
64 return _fp((a.val * b.val) >> FP_SHIFT);
65}
66
67static inline fp_t _div(fp_t a, fp_t b)
68{
69#if !defined(__KERNEL__) && !defined(unlikely)
70#define unlikely(x) (x)
71#define DO_UNDEF_UNLIKELY
72#endif
73 /* try not to overflow */
74 if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
75 return _fp((a.val / b.val) << FP_SHIFT);
76 else
77 return _fp((a.val << FP_SHIFT) / b.val);
78#ifdef DO_UNDEF_UNLIKELY
79#undef unlikely
80#undef DO_UNDEF_UNLIKELY
81#endif
82}
83
84static inline fp_t _add(fp_t a, fp_t b)
85{
86 return _fp(a.val + b.val);
87}
88
89static inline fp_t _sub(fp_t a, fp_t b)
90{
91 return _fp(a.val - b.val);
92}
93
94static inline fp_t _neg(fp_t x)
95{
96 return _fp(-x.val);
97}
98
99static inline fp_t _abs(fp_t x)
100{
101 return _fp(abs(x.val));
102}
103
104/* works the same as casting float/double to integer */
105static inline fpbuf_t _fp_to_integer(fp_t x)
106{
107 return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
108}
109
110static inline fp_t _integer_to_fp(fpbuf_t x)
111{
112 return _frac(x,1);
113}
114
115static inline int _leq(fp_t a, fp_t b)
116{
117 return a.val <= b.val;
118}
119
120static inline int _geq(fp_t a, fp_t b)
121{
122 return a.val >= b.val;
123}
124
125static inline int _lt(fp_t a, fp_t b)
126{
127 return a.val < b.val;
128}
129
130static inline int _gt(fp_t a, fp_t b)
131{
132 return a.val > b.val;
133}
134
135static inline int _eq(fp_t a, fp_t b)
136{
137 return a.val == b.val;
138}
139
140static inline fp_t _max(fp_t a, fp_t b)
141{
142 if (a.val < b.val)
143 return b;
144 else
145 return a;
146}
147#endif
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
new file mode 100644
index 000000000000..7033393148df
--- /dev/null
+++ b/include/litmus/jobs.h
@@ -0,0 +1,13 @@
1#ifndef __LITMUS_JOBS_H__
2#define __LITMUS_JOBS_H__
3
4void prepare_for_next_period(struct task_struct *t);
5void release_at(struct task_struct *t, lt_t start);
6
7void inferred_sporadic_job_release_at(struct task_struct *t, lt_t when);
8
9long default_wait_for_release_at(lt_t release_time);
10long complete_job(void);
11long complete_job_oneshot(void);
12
13#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index c87863c9b231..f550367ddd4b 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -6,7 +6,50 @@
6#ifndef _LINUX_LITMUS_H_ 6#ifndef _LINUX_LITMUS_H_
7#define _LINUX_LITMUS_H_ 7#define _LINUX_LITMUS_H_
8 8
9#include <litmus/ctrlpage.h>
10
11#ifdef CONFIG_RELEASE_MASTER
12extern atomic_t release_master_cpu;
13#endif
14
15/* in_list - is a given list_head queued on some list?
16 */
17static inline int in_list(struct list_head* list)
18{
19 return !( /* case 1: deleted */
20 (list->next == LIST_POISON1 &&
21 list->prev == LIST_POISON2)
22 ||
23 /* case 2: initialized */
24 (list->next == list &&
25 list->prev == list)
26 );
27}
28
29struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
30
31#define NO_CPU 0xffffffff
32
33void litmus_fork(struct task_struct *tsk);
34void litmus_exec(void);
35/* clean up real-time state of a task */
36void litmus_clear_state(struct task_struct *dead_tsk);
37void exit_litmus(struct task_struct *dead_tsk);
38
39/* Prevent the plugin from being switched-out from underneath a code
40 * path. Might sleep, so may be called only from non-atomic context. */
41void litmus_plugin_switch_disable(void);
42void litmus_plugin_switch_enable(void);
43
44long litmus_admit_task(struct task_struct *tsk);
45void litmus_exit_task(struct task_struct *tsk);
46void litmus_dealloc(struct task_struct *tsk);
47void litmus_do_exit(struct task_struct *tsk);
48int litmus_be_migrate_to(int cpu);
49
9#define is_realtime(t) ((t)->policy == SCHED_LITMUS) 50#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
51#define rt_transition_pending(t) \
52 ((t)->rt_param.transition_pending)
10 53
11#define tsk_rt(t) (&(t)->rt_param) 54#define tsk_rt(t) (&(t)->rt_param)
12 55
@@ -28,6 +71,7 @@
28#define get_partition(t) (tsk_rt(t)->task_params.cpu) 71#define get_partition(t) (tsk_rt(t)->task_params.cpu)
29#define get_priority(t) (tsk_rt(t)->task_params.priority) 72#define get_priority(t) (tsk_rt(t)->task_params.priority)
30#define get_class(t) (tsk_rt(t)->task_params.cls) 73#define get_class(t) (tsk_rt(t)->task_params.cls)
74#define get_release_policy(t) (tsk_rt(t)->task_params.release_policy)
31 75
32/* job_param macros */ 76/* job_param macros */
33#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) 77#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
@@ -35,6 +79,15 @@
35#define get_release(t) (tsk_rt(t)->job_params.release) 79#define get_release(t) (tsk_rt(t)->job_params.release)
36#define get_lateness(t) (tsk_rt(t)->job_params.lateness) 80#define get_lateness(t) (tsk_rt(t)->job_params.lateness)
37 81
82/* release policy macros */
83#define is_periodic(t) (get_release_policy(t) == TASK_PERIODIC)
84#define is_sporadic(t) (get_release_policy(t) == TASK_SPORADIC)
85#ifdef CONFIG_ALLOW_EARLY_RELEASE
86#define is_early_releasing(t) (get_release_policy(t) == TASK_EARLY)
87#else
88#define is_early_releasing(t) (0)
89#endif
90
38#define is_hrt(t) \ 91#define is_hrt(t) \
39 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD) 92 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
40#define is_srt(t) \ 93#define is_srt(t) \
@@ -48,6 +101,67 @@ static inline lt_t litmus_clock(void)
48 return ktime_to_ns(ktime_get()); 101 return ktime_to_ns(ktime_get());
49} 102}
50 103
104/* A macro to convert from nanoseconds to ktime_t. */
105#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t)
106
107#define is_released(t, now) \
108 (lt_before_eq(get_release(t), now))
109#define is_tardy(t, now) \
110 (lt_before_eq(tsk_rt(t)->job_params.deadline, now))
111
112/* real-time comparison macros */
113#define earlier_deadline(a, b) (lt_before(\
114 (a)->rt_param.job_params.deadline,\
115 (b)->rt_param.job_params.deadline))
116#define earlier_release(a, b) (lt_before(\
117 (a)->rt_param.job_params.release,\
118 (b)->rt_param.job_params.release))
119
120void preempt_if_preemptable(struct task_struct* t, int on_cpu);
121
122#define bheap2task(hn) ((struct task_struct*) hn->value)
123
124static inline int is_present(struct task_struct* t)
125{
126 return t && tsk_rt(t)->present;
127}
128
129static inline int is_completed(struct task_struct* t)
130{
131 return t && tsk_rt(t)->completed;
132}
133
134
135/* Used to convert ns-specified execution costs and periods into
136 * integral quanta equivalents.
137 */
138#define LITMUS_QUANTUM_LENGTH_NS (CONFIG_LITMUS_QUANTUM_LENGTH_US * 1000ULL)
139
140/* make the unit explicit */
141typedef unsigned long quanta_t;
142
143enum round {
144 FLOOR,
145 CEIL
146};
147
148static inline quanta_t time2quanta(lt_t time, enum round round)
149{
150 s64 quantum_length = LITMUS_QUANTUM_LENGTH_NS;
151
152 if (do_div(time, quantum_length) && round == CEIL)
153 time++;
154 return (quanta_t) time;
155}
156
157static inline lt_t quanta2time(quanta_t quanta)
158{
159 return quanta * LITMUS_QUANTUM_LENGTH_NS;
160}
161
162/* By how much is cpu staggered behind CPU 0? */
163u64 cpu_stagger_offset(int cpu);
164
51static inline struct control_page* get_control_page(struct task_struct *t) 165static inline struct control_page* get_control_page(struct task_struct *t)
52{ 166{
53 return tsk_rt(t)->ctrl_page; 167 return tsk_rt(t)->ctrl_page;
@@ -58,4 +172,53 @@ static inline int has_control_page(struct task_struct* t)
58 return tsk_rt(t)->ctrl_page != NULL; 172 return tsk_rt(t)->ctrl_page != NULL;
59} 173}
60 174
175
176#ifdef CONFIG_SCHED_OVERHEAD_TRACE
177
178#define TS_SYSCALL_IN_START \
179 if (has_control_page(current)) { \
180 __TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start); \
181 }
182
183#define TS_SYSCALL_IN_END \
184 if (has_control_page(current)) { \
185 unsigned long flags; \
186 uint64_t irqs; \
187 local_irq_save(flags); \
188 irqs = get_control_page(current)->irq_count - \
189 get_control_page(current)->irq_syscall_start; \
190 __TS_SYSCALL_IN_END(&irqs); \
191 local_irq_restore(flags); \
192 }
193
194#else
195
196#define TS_SYSCALL_IN_START
197#define TS_SYSCALL_IN_END
198
199#endif
200
201#ifdef CONFIG_SMP
202
203/*
204 * struct hrtimer_start_on_info - timer info on remote cpu
205 * @timer: timer to be triggered on remote cpu
206 * @time: time event
207 * @mode: timer mode
208 * @csd: smp_call_function parameter to call hrtimer_pull on remote cpu
209 */
210struct hrtimer_start_on_info {
211 struct hrtimer *timer;
212 ktime_t time;
213 enum hrtimer_mode mode;
214 struct call_single_data csd;
215};
216
217void hrtimer_pull(void *csd_info);
218extern void hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
219 struct hrtimer *timer, ktime_t time,
220 const enum hrtimer_mode mode);
221
222#endif
223
61#endif 224#endif
diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h
new file mode 100644
index 000000000000..a5db24c03ec0
--- /dev/null
+++ b/include/litmus/litmus_proc.h
@@ -0,0 +1,63 @@
1#include <litmus/sched_plugin.h>
2#include <linux/proc_fs.h>
3
4int __init init_litmus_proc(void);
5void exit_litmus_proc(void);
6
7struct cd_mapping
8{
9 int id;
10 cpumask_var_t mask;
11 struct proc_dir_entry *proc_file;
12};
13
14struct domain_proc_info
15{
16 int num_cpus;
17 int num_domains;
18
19 struct cd_mapping *cpu_to_domains;
20 struct cd_mapping *domain_to_cpus;
21};
22
23/*
24 * On success, returns 0 and sets the pointer to the location of the new
25 * proc dir entry, otherwise returns an error code and sets pde to NULL.
26 */
27long make_plugin_proc_dir(struct sched_plugin* plugin,
28 struct proc_dir_entry** pde);
29
30/*
31 * Plugins should deallocate all child proc directory entries before
32 * calling this, to avoid memory leaks.
33 */
34void remove_plugin_proc_dir(struct sched_plugin* plugin);
35
36/*
37 * Setup the CPU <-> sched domain mappings in proc
38 */
39long activate_domain_proc(struct domain_proc_info* map);
40
41/*
42 * Remove the CPU <-> sched domain mappings from proc
43 */
44long deactivate_domain_proc(void);
45
46/*
47 * Alloc memory for the mapping
48 * Note: Does not set up proc files. Use make_sched_domain_maps for that.
49 */
50long init_domain_proc_info(struct domain_proc_info* map,
51 int num_cpus, int num_domains);
52
53/*
54 * Free memory of the mapping
55 * Note: Does not clean up proc files. Use deactivate_domain_proc for that.
56 */
57void destroy_domain_proc_info(struct domain_proc_info* map);
58
59/* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and
60 * remove a '\n' if present. Returns the number of bytes that were read or
61 * -EFAULT. */
62int copy_and_chomp(char *kbuf, unsigned long ksize,
63 __user const char* ubuf, unsigned long ulength);
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
new file mode 100644
index 000000000000..4d7b870cb443
--- /dev/null
+++ b/include/litmus/locking.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_LOCKING_H
2#define LITMUS_LOCKING_H
3
4struct litmus_lock_ops;
5
6/* Generic base struct for LITMUS^RT userspace semaphores.
7 * This structure should be embedded in protocol-specific semaphores.
8 */
9struct litmus_lock {
10 struct litmus_lock_ops *ops;
11 int type;
12};
13
14struct litmus_lock_ops {
15 /* Current task tries to obtain / drop a reference to a lock.
16 * Optional methods, allowed by default. */
17 int (*open)(struct litmus_lock*, void* __user);
18 int (*close)(struct litmus_lock*);
19
20 /* Current tries to lock/unlock this lock (mandatory methods). */
21 int (*lock)(struct litmus_lock*);
22 int (*unlock)(struct litmus_lock*);
23
24 /* The lock is no longer being referenced (mandatory method). */
25 void (*deallocate)(struct litmus_lock*);
26};
27
28#endif
diff --git a/include/litmus/np.h b/include/litmus/np.h
new file mode 100644
index 000000000000..dbe2b695f74a
--- /dev/null
+++ b/include/litmus/np.h
@@ -0,0 +1,121 @@
1#ifndef _LITMUS_NP_H_
2#define _LITMUS_NP_H_
3
4/* Definitions related to non-preemptive sections signaled via the control
5 * page
6 */
7
8#ifdef CONFIG_NP_SECTION
9
10static inline int is_kernel_np(struct task_struct *t)
11{
12 return tsk_rt(t)->kernel_np;
13}
14
15static inline int is_user_np(struct task_struct *t)
16{
17 return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0;
18}
19
20static inline void request_exit_np(struct task_struct *t)
21{
22 if (is_user_np(t)) {
23 /* Set the flag that tells user space to call
24 * into the kernel at the end of a critical section. */
25 if (likely(tsk_rt(t)->ctrl_page)) {
26 TRACE_TASK(t, "setting delayed_preemption flag\n");
27 tsk_rt(t)->ctrl_page->sched.np.preempt = 1;
28 }
29 }
30}
31
32static inline void make_np(struct task_struct *t)
33{
34 tsk_rt(t)->kernel_np++;
35}
36
37/* Caller should check if preemption is necessary when
38 * the function return 0.
39 */
40static inline int take_np(struct task_struct *t)
41{
42 return --tsk_rt(t)->kernel_np;
43}
44
45/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
46static inline int request_exit_np_atomic(struct task_struct *t)
47{
48 union np_flag old, new;
49
50 if (tsk_rt(t)->ctrl_page) {
51 old.raw = tsk_rt(t)->ctrl_page->sched.raw;
52 if (old.np.flag == 0) {
53 /* no longer non-preemptive */
54 return 0;
55 } else if (old.np.preempt) {
56 /* already set, nothing for us to do */
57 return 1;
58 } else {
59 /* non preemptive and flag not set */
60 new.raw = old.raw;
61 new.np.preempt = 1;
62 /* if we get old back, then we atomically set the flag */
63 return cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw;
64 /* If we raced with a concurrent change, then so be
65 * it. Deliver it by IPI. We don't want an unbounded
66 * retry loop here since tasks might exploit that to
67 * keep the kernel busy indefinitely. */
68 }
69 } else
70 return 0;
71}
72
73#else
74
75static inline int is_kernel_np(struct task_struct* t)
76{
77 return 0;
78}
79
80static inline int is_user_np(struct task_struct* t)
81{
82 return 0;
83}
84
85static inline void request_exit_np(struct task_struct *t)
86{
87 /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */
88 BUG();
89}
90
91static inline int request_exit_np_atomic(struct task_struct *t)
92{
93 return 0;
94}
95
96#endif
97
98static inline void clear_exit_np(struct task_struct *t)
99{
100 if (likely(tsk_rt(t)->ctrl_page))
101 tsk_rt(t)->ctrl_page->sched.np.preempt = 0;
102}
103
104static inline int is_np(struct task_struct *t)
105{
106#ifdef CONFIG_SCHED_DEBUG_TRACE
107 int kernel, user;
108 kernel = is_kernel_np(t);
109 user = is_user_np(t);
110 if (kernel || user)
111 TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
112
113 kernel, user);
114 return kernel || user;
115#else
116 return unlikely(is_kernel_np(t) || is_user_np(t));
117#endif
118}
119
120#endif
121
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
new file mode 100644
index 000000000000..8f8bb635cb21
--- /dev/null
+++ b/include/litmus/preempt.h
@@ -0,0 +1,188 @@
1#ifndef LITMUS_PREEMPT_H
2#define LITMUS_PREEMPT_H
3
4#include <linux/types.h>
5#include <linux/cache.h>
6#include <linux/percpu.h>
7#include <asm/atomic.h>
8
9#include <litmus/debug_trace.h>
10
11DECLARE_PER_CPU(bool, litmus_preemption_in_progress);
12
13/* is_current_running() is legacy macro (and a hack) that is used to make
14 * the plugin logic, which still stems from the 2.6.20 era, work with current
15 * kernels.
16 *
17 * It used to honor the flag in the preempt_count variable that was
18 * set when scheduling is in progress. This doesn't exist anymore in recent
19 * Linux versions. Instead, Linux has moved to passing a 'preempt' flag to
20 * __schedule(). In particular, Linux ignores prev->state != TASK_RUNNING and
21 * does *not* process self-suspensions if an interrupt (i.e., a preemption)
22 * races with a task that is about to call schedule() anyway.
23 *
24 * The value of the 'preempt' flag in __schedule() is crucial
25 * information for some of the LITMUS^RT plugins, which must re-add
26 * soon-to-block tasks to the ready queue if the rest of the system doesn't
27 * process the preemption yet. Unfortunately, the flag is not passed to
28 * pick_next_task(). Hence, as a hack, we communicate it out of band via the
29 * global, per-core variable litmus_preemption_in_progress, which is set by
30 * the scheduler in __schedule() and read by the plugins via the
31 * is_current_running() macro.
32 */
33#define is_current_running() \
34 ((current)->state == TASK_RUNNING || \
35 this_cpu_read(litmus_preemption_in_progress))
36
37DECLARE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
38
39#ifdef CONFIG_PREEMPT_STATE_TRACE
40const char* sched_state_name(int s);
41#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args)
42#else
43#define TRACE_STATE(fmt, args...) /* ignore */
44#endif
45
46#define VERIFY_SCHED_STATE(x) \
47 do { int __s = get_sched_state(); \
48 if ((__s & (x)) == 0) \
49 TRACE_STATE("INVALID s=0x%x (%s) not " \
50 "in 0x%x (%s) [%s]\n", \
51 __s, sched_state_name(__s), \
52 (x), #x, __FUNCTION__); \
53 } while (0);
54
55#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
56 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
57 cpu, (x), sched_state_name(x), \
58 (y), sched_state_name(y))
59
60
61typedef enum scheduling_state {
62 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that
63 * should be scheduled, and the processor does not
64 * plan to invoke schedule(). */
65 SHOULD_SCHEDULE = (1 << 1), /* A remote processor has determined that the
66 * processor should reschedule, but this has not
67 * been communicated yet (IPI still pending). */
68 WILL_SCHEDULE = (1 << 2), /* The processor has noticed that it has to
69 * reschedule and will do so shortly. */
70 TASK_PICKED = (1 << 3), /* The processor is currently executing schedule(),
71 * has selected a new task to schedule, but has not
72 * yet performed the actual context switch. */
73 PICKED_WRONG_TASK = (1 << 4), /* The processor has not yet performed the context
74 * switch, but a remote processor has already
75 * determined that a higher-priority task became
76 * eligible after the task was picked. */
77} sched_state_t;
78
79static inline sched_state_t get_sched_state_on(int cpu)
80{
81 return atomic_read(&per_cpu(resched_state, cpu));
82}
83
84static inline sched_state_t get_sched_state(void)
85{
86 return atomic_read(this_cpu_ptr(&resched_state));
87}
88
89static inline int is_in_sched_state(int possible_states)
90{
91 return get_sched_state() & possible_states;
92}
93
94static inline int cpu_is_in_sched_state(int cpu, int possible_states)
95{
96 return get_sched_state_on(cpu) & possible_states;
97}
98
99static inline void set_sched_state(sched_state_t s)
100{
101 TRACE_SCHED_STATE_CHANGE(get_sched_state(), s, smp_processor_id());
102 atomic_set(this_cpu_ptr(&resched_state), s);
103}
104
105static inline int sched_state_transition(sched_state_t from, sched_state_t to)
106{
107 sched_state_t old_state;
108
109 old_state = atomic_cmpxchg(this_cpu_ptr(&resched_state), from, to);
110 if (old_state == from) {
111 TRACE_SCHED_STATE_CHANGE(from, to, smp_processor_id());
112 return 1;
113 } else
114 return 0;
115}
116
117static inline int sched_state_transition_on(int cpu,
118 sched_state_t from,
119 sched_state_t to)
120{
121 sched_state_t old_state;
122
123 old_state = atomic_cmpxchg(&per_cpu(resched_state, cpu), from, to);
124 if (old_state == from) {
125 TRACE_SCHED_STATE_CHANGE(from, to, cpu);
126 return 1;
127 } else
128 return 0;
129}
130
131/* Plugins must call this function after they have decided which job to
132 * schedule next. IMPORTANT: this function must be called while still holding
133 * the lock that is used to serialize scheduling decisions.
134 *
135 * (Ideally, we would like to use runqueue locks for this purpose, but that
136 * would lead to deadlocks with the migration code.)
137 */
138static inline void sched_state_task_picked(void)
139{
140 VERIFY_SCHED_STATE(WILL_SCHEDULE);
141
142 /* WILL_SCHEDULE has only a local tansition => simple store is ok */
143 set_sched_state(TASK_PICKED);
144}
145
146static inline void sched_state_entered_schedule(void)
147{
148 /* Update state for the case that we entered schedule() not due to
149 * set_tsk_need_resched() */
150 set_sched_state(WILL_SCHEDULE);
151}
152
153/* Called by schedule() to check if the scheduling decision is still valid
154 * after a context switch. Returns 1 if the CPU needs to reschdule. */
155static inline int sched_state_validate_switch(void)
156{
157 int decision_ok = 0;
158
159 VERIFY_SCHED_STATE(PICKED_WRONG_TASK | TASK_PICKED | WILL_SCHEDULE);
160
161 if (is_in_sched_state(TASK_PICKED)) {
162 /* Might be good; let's try to transition out of this
163 * state. This must be done atomically since remote processors
164 * may try to change the state, too. */
165 decision_ok = sched_state_transition(TASK_PICKED, TASK_SCHEDULED);
166 }
167
168 if (!decision_ok)
169 TRACE_STATE("validation failed (%s)\n",
170 sched_state_name(get_sched_state()));
171
172 return !decision_ok;
173}
174
175/* State transition events. See litmus/preempt.c for details. */
176void sched_state_will_schedule(struct task_struct* tsk);
177void sched_state_ipi(void);
178/* Cause a CPU (remote or local) to reschedule. */
179void litmus_reschedule(int cpu);
180void litmus_reschedule_local(void);
181
182#ifdef CONFIG_DEBUG_KERNEL
183void sched_state_plugin_check(void);
184#else
185#define sched_state_plugin_check() /* no check */
186#endif
187
188#endif
diff --git a/include/litmus/reservations/alloc.h b/include/litmus/reservations/alloc.h
new file mode 100644
index 000000000000..b3471288c9f1
--- /dev/null
+++ b/include/litmus/reservations/alloc.h
@@ -0,0 +1,15 @@
1#ifndef LITMUS_RESERVATIONS_ALLOC_H
2#define LITMUS_RESERVATIONS_ALLOC_H
3
4#include <litmus/reservations/reservation.h>
5
6long alloc_polling_reservation(
7 int res_type,
8 struct reservation_config *config,
9 struct reservation **_res);
10
11long alloc_table_driven_reservation(
12 struct reservation_config *config,
13 struct reservation **_res);
14
15#endif \ No newline at end of file
diff --git a/include/litmus/reservations/budget-notifier.h b/include/litmus/reservations/budget-notifier.h
new file mode 100644
index 000000000000..d831fa9d5153
--- /dev/null
+++ b/include/litmus/reservations/budget-notifier.h
@@ -0,0 +1,50 @@
1#ifndef LITMUS_BUDGET_NOTIFIER_H
2#define LITMUS_BUDGET_NOTIFIER_H
3
4#include <linux/list.h>
5#include <linux/spinlock.h>
6
7struct budget_notifier;
8
9typedef void (*budget_callback_t) (
10 struct budget_notifier *bn
11);
12
13struct budget_notifier {
14 struct list_head list;
15 budget_callback_t budget_exhausted;
16 budget_callback_t budget_replenished;
17};
18
19struct budget_notifier_list {
20 struct list_head list;
21 raw_spinlock_t lock;
22};
23
24void budget_notifier_list_init(struct budget_notifier_list* bnl);
25
26static inline void budget_notifier_add(
27 struct budget_notifier_list *bnl,
28 struct budget_notifier *bn)
29{
30 unsigned long flags;
31
32 raw_spin_lock_irqsave(&bnl->lock, flags);
33 list_add(&bn->list, &bnl->list);
34 raw_spin_unlock_irqrestore(&bnl->lock, flags);
35}
36
37static inline void budget_notifier_remove(
38 struct budget_notifier_list *bnl,
39 struct budget_notifier *bn)
40{
41 unsigned long flags;
42
43 raw_spin_lock_irqsave(&bnl->lock, flags);
44 list_del(&bn->list);
45 raw_spin_unlock_irqrestore(&bnl->lock, flags);
46}
47
48void budget_notifiers_fire(struct budget_notifier_list *bnl, bool replenished);
49
50#endif
diff --git a/include/litmus/reservations/polling.h b/include/litmus/reservations/polling.h
new file mode 100644
index 000000000000..230e12b1088a
--- /dev/null
+++ b/include/litmus/reservations/polling.h
@@ -0,0 +1,19 @@
1#ifndef LITMUS_POLLING_RESERVATIONS_H
2#define LITMUS_POLLING_RESERVATIONS_H
3
4#include <litmus/reservations/reservation.h>
5
6struct polling_reservation {
7 /* extend basic reservation */
8 struct reservation res;
9
10 lt_t max_budget;
11 lt_t period;
12 lt_t deadline;
13 lt_t offset;
14};
15
16void polling_reservation_init(struct polling_reservation *pres, int use_edf_prio,
17 int use_periodic_polling, lt_t budget, lt_t period, lt_t deadline, lt_t offset);
18
19#endif
diff --git a/include/litmus/reservations/reservation.h b/include/litmus/reservations/reservation.h
new file mode 100644
index 000000000000..1752dac4e698
--- /dev/null
+++ b/include/litmus/reservations/reservation.h
@@ -0,0 +1,224 @@
1#ifndef LITMUS_RESERVATION_H
2#define LITMUS_RESERVATION_H
3
4#include <linux/list.h>
5#include <linux/hrtimer.h>
6
7#include <litmus/debug_trace.h>
8#include <litmus/reservations/budget-notifier.h>
9
10struct reservation_client;
11struct reservation_environment;
12struct reservation;
13
14typedef enum {
15 /* reservation has no clients, is not consuming budget */
16 RESERVATION_INACTIVE = 0,
17
18 /* reservation has clients, consumes budget when scheduled */
19 RESERVATION_ACTIVE,
20
21 /* reservation has no clients, but may be consuming budget */
22 RESERVATION_ACTIVE_IDLE,
23
24 /* Reservation has no budget and waits for
25 * replenishment. May or may not have clients. */
26 RESERVATION_DEPLETED,
27} reservation_state_t;
28
29
30/* ************************************************************************** */
31
32/* Select which task to dispatch. If NULL is returned, it means there is nothing
33 * to schedule right now and background work can be scheduled. */
34typedef struct task_struct * (*dispatch_t) (
35 struct reservation_client *client
36);
37
38/* Something that can be managed in a reservation and that can yield
39 * a process for dispatching. Contains a pointer to the reservation
40 * to which it "belongs". */
41struct reservation_client {
42 struct list_head list;
43 struct reservation* reservation;
44 dispatch_t dispatch;
45};
46
47
48/* ************************************************************************** */
49
50/* Called by reservations to request state change. */
51typedef void (*reservation_change_state_t) (
52 struct reservation_environment* env,
53 struct reservation *res,
54 reservation_state_t new_state
55);
56
57/* Called by reservations to request replenishment while not DEPLETED.
58 * Useful for soft reservations that remain ACTIVE with lower priority. */
59typedef void (*request_replenishment_t)(
60 struct reservation_environment* env,
61 struct reservation *res
62);
63
64/* The framework within wich reservations operate. */
65struct reservation_environment {
66 lt_t time_zero;
67 lt_t current_time;
68
69 /* services invoked by reservations */
70 reservation_change_state_t change_state;
71 request_replenishment_t request_replenishment;
72};
73
74/* ************************************************************************** */
75
76/* A new client is added or an existing client resumes. */
77typedef void (*client_arrives_t) (
78 struct reservation *reservation,
79 struct reservation_client *client
80);
81
82/* A client suspends or terminates. */
83typedef void (*client_departs_t) (
84 struct reservation *reservation,
85 struct reservation_client *client,
86 int did_signal_job_completion
87);
88
89/* A previously requested replenishment has occurred. */
90typedef void (*on_replenishment_timer_t) (
91 struct reservation *reservation
92);
93
94/* Update the reservation's budget to reflect execution or idling. */
95typedef void (*drain_budget_t) (
96 struct reservation *reservation,
97 lt_t how_much
98);
99
100/* Select a ready task from one of the clients for scheduling. */
101typedef struct task_struct* (*dispatch_client_t) (
102 struct reservation *reservation,
103 lt_t *time_slice /* May be used to force rescheduling after
104 some amount of time. 0 => no limit */
105);
106
107/* Destructor: called before scheduler is deactivated. */
108typedef void (*shutdown_t)(struct reservation *reservation);
109
110struct reservation_ops {
111 dispatch_client_t dispatch_client;
112
113 client_arrives_t client_arrives;
114 client_departs_t client_departs;
115
116 on_replenishment_timer_t replenish;
117 drain_budget_t drain_budget;
118
119 shutdown_t shutdown;
120};
121
122#define RESERVATION_BACKGROUND_PRIORITY ULLONG_MAX
123
124struct reservation {
125 /* used to queue in environment */
126 struct list_head list;
127 struct list_head replenish_list;
128
129 reservation_state_t state;
130 unsigned int id;
131 unsigned int kind;
132
133 /* exact meaning defined by impl. */
134 lt_t priority;
135 lt_t cur_budget;
136 lt_t next_replenishment;
137
138 /* budget stats */
139 lt_t budget_consumed; /* how much budget consumed in this allocation cycle? */
140 lt_t budget_consumed_total;
141
142 /* list of registered budget callbacks */
143 struct budget_notifier_list budget_notifiers;
144
145 /* for memory reclamation purposes */
146 struct list_head all_list;
147
148 /* interaction with framework */
149 struct reservation_environment *env;
150 struct reservation_ops *ops;
151
152 struct list_head clients;
153};
154
155void reservation_init(struct reservation *res);
156
157/* Default implementations */
158
159/* simply select the first client in the list, set *for_at_most to zero */
160struct task_struct* default_dispatch_client(
161 struct reservation *res,
162 lt_t *for_at_most
163);
164
165/* drain budget at linear rate, enter DEPLETED state when budget used up */
166void common_drain_budget(struct reservation *res, lt_t how_much);
167
168/* "connector" reservation client to hook up tasks with reservations */
169struct task_client {
170 struct reservation_client client;
171 struct task_struct *task;
172};
173
174void task_client_init(struct task_client *tc, struct task_struct *task,
175 struct reservation *reservation);
176
177#define SUP_RESCHEDULE_NOW (0)
178#define SUP_NO_SCHEDULER_UPDATE (ULLONG_MAX)
179
180/* A simple uniprocessor (SUP) flat (i.e., non-hierarchical) reservation
181 * environment.
182 */
183struct sup_reservation_environment {
184 struct reservation_environment env;
185
186 /* ordered by priority */
187 struct list_head active_reservations;
188
189 /* ordered by next_replenishment */
190 struct list_head depleted_reservations;
191
192 /* unordered */
193 struct list_head inactive_reservations;
194
195 /* list of all reservations */
196 struct list_head all_reservations;
197
198 /* - SUP_RESCHEDULE_NOW means call sup_dispatch() now
199 * - SUP_NO_SCHEDULER_UPDATE means nothing to do
200 * any other value means program a timer for the given time
201 */
202 lt_t next_scheduler_update;
203 /* set to true if a call to sup_dispatch() is imminent */
204 bool will_schedule;
205};
206
207/* Contract:
208 * - before calling into sup_ code, or any reservation methods,
209 * update the time with sup_update_time(); and
210 * - after calling into sup_ code, or any reservation methods,
211 * check next_scheduler_update and program timer or trigger
212 * scheduler invocation accordingly.
213 */
214
215void sup_init(struct sup_reservation_environment* sup_env);
216void sup_add_new_reservation(struct sup_reservation_environment* sup_env,
217 struct reservation* new_res);
218void sup_update_time(struct sup_reservation_environment* sup_env, lt_t now);
219struct task_struct* sup_dispatch(struct sup_reservation_environment* sup_env);
220
221struct reservation* sup_find_by_id(struct sup_reservation_environment* sup_env,
222 unsigned int id);
223
224#endif
diff --git a/include/litmus/reservations/table-driven.h b/include/litmus/reservations/table-driven.h
new file mode 100644
index 000000000000..b6302a2f200d
--- /dev/null
+++ b/include/litmus/reservations/table-driven.h
@@ -0,0 +1,23 @@
1#ifndef LITMUS_RESERVATIONS_TABLE_DRIVEN_H
2#define LITMUS_RESERVATIONS_TABLE_DRIVEN_H
3
4#include <litmus/reservations/reservation.h>
5
6struct table_driven_reservation {
7 /* extend basic reservation */
8 struct reservation res;
9
10 lt_t major_cycle;
11 unsigned int next_interval;
12 unsigned int num_intervals;
13 struct lt_interval *intervals;
14
15 /* info about current scheduling slot */
16 struct lt_interval cur_interval;
17 lt_t major_cycle_start;
18};
19
20void table_driven_reservation_init(struct table_driven_reservation *tdres,
21 lt_t major_cycle, struct lt_interval *intervals, unsigned int num_intervals);
22
23#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
new file mode 100644
index 000000000000..ac249292e866
--- /dev/null
+++ b/include/litmus/rt_domain.h
@@ -0,0 +1,182 @@
1/* CLEANUP: Add comments and make it less messy.
2 *
3 */
4
5#ifndef __UNC_RT_DOMAIN_H__
6#define __UNC_RT_DOMAIN_H__
7
8#include <litmus/bheap.h>
9
10#define RELEASE_QUEUE_SLOTS 127 /* prime */
11
12struct _rt_domain;
13
14typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
15typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks);
16
17struct release_queue {
18 /* each slot maintains a list of release heaps sorted
19 * by release time */
20 struct list_head slot[RELEASE_QUEUE_SLOTS];
21};
22
23typedef struct _rt_domain {
24 /* runnable rt tasks are in here */
25 raw_spinlock_t ready_lock;
26 struct bheap ready_queue;
27
28 /* real-time tasks waiting for release are in here */
29 raw_spinlock_t release_lock;
30 struct release_queue release_queue;
31
32#ifdef CONFIG_RELEASE_MASTER
33 int release_master;
34#endif
35
36 /* for moving tasks to the release queue */
37 raw_spinlock_t tobe_lock;
38 struct list_head tobe_released;
39
40 /* how do we check if we need to kick another CPU? */
41 check_resched_needed_t check_resched;
42
43 /* how do we release jobs? */
44 release_jobs_t release_jobs;
45
46 /* how are tasks ordered in the ready queue? */
47 bheap_prio_t order;
48} rt_domain_t;
49
50struct release_heap {
51 /* list_head for per-time-slot list */
52 struct list_head list;
53 lt_t release_time;
54 /* all tasks to be released at release_time */
55 struct bheap heap;
56 /* used to trigger the release */
57 struct hrtimer timer;
58
59#ifdef CONFIG_RELEASE_MASTER
60 /* used to delegate releases */
61 struct hrtimer_start_on_info info;
62#endif
63 /* required for the timer callback */
64 rt_domain_t* dom;
65};
66
67
68static inline struct task_struct* __next_ready(rt_domain_t* rt)
69{
70 struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue);
71 if (hn)
72 return bheap2task(hn);
73 else
74 return NULL;
75}
76
77void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
78 check_resched_needed_t check,
79 release_jobs_t relase);
80
81void __add_ready(rt_domain_t* rt, struct task_struct *new);
82void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
83void __add_release(rt_domain_t* rt, struct task_struct *task);
84
85static inline struct task_struct* __take_ready(rt_domain_t* rt)
86{
87 struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
88 if (hn)
89 return bheap2task(hn);
90 else
91 return NULL;
92}
93
94static inline struct task_struct* __peek_ready(rt_domain_t* rt)
95{
96 struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue);
97 if (hn)
98 return bheap2task(hn);
99 else
100 return NULL;
101}
102
103static inline int is_queued(struct task_struct *t)
104{
105 BUG_ON(!tsk_rt(t)->heap_node);
106 return bheap_node_in_heap(tsk_rt(t)->heap_node);
107}
108
109static inline void remove(rt_domain_t* rt, struct task_struct *t)
110{
111 bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
112}
113
114static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
115{
116 unsigned long flags;
117 /* first we need the write lock for rt_ready_queue */
118 raw_spin_lock_irqsave(&rt->ready_lock, flags);
119 __add_ready(rt, new);
120 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
121}
122
123static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks)
124{
125 unsigned long flags;
126 raw_spin_lock_irqsave(&rt->ready_lock, flags);
127 __merge_ready(rt, tasks);
128 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
129}
130
131static inline struct task_struct* take_ready(rt_domain_t* rt)
132{
133 unsigned long flags;
134 struct task_struct* ret;
135 /* first we need the write lock for rt_ready_queue */
136 raw_spin_lock_irqsave(&rt->ready_lock, flags);
137 ret = __take_ready(rt);
138 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
139 return ret;
140}
141
142
143static inline void add_release(rt_domain_t* rt, struct task_struct *task)
144{
145 unsigned long flags;
146 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
147 __add_release(rt, task);
148 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
149}
150
151#ifdef CONFIG_RELEASE_MASTER
152void __add_release_on(rt_domain_t* rt, struct task_struct *task,
153 int target_cpu);
154
155static inline void add_release_on(rt_domain_t* rt,
156 struct task_struct *task,
157 int target_cpu)
158{
159 unsigned long flags;
160 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
161 __add_release_on(rt, task, target_cpu);
162 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
163}
164#endif
165
166static inline int __jobs_pending(rt_domain_t* rt)
167{
168 return !bheap_empty(&rt->ready_queue);
169}
170
171static inline int jobs_pending(rt_domain_t* rt)
172{
173 unsigned long flags;
174 int ret;
175 /* first we need the write lock for rt_ready_queue */
176 raw_spin_lock_irqsave(&rt->ready_lock, flags);
177 ret = !bheap_empty(&rt->ready_queue);
178 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
179 return ret;
180}
181
182#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 8c7869f46bfb..9b291343714f 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -62,6 +62,7 @@ typedef enum {
62#define LITMUS_MAX_PRIORITY 512 62#define LITMUS_MAX_PRIORITY 512
63#define LITMUS_HIGHEST_PRIORITY 1 63#define LITMUS_HIGHEST_PRIORITY 1
64#define LITMUS_LOWEST_PRIORITY (LITMUS_MAX_PRIORITY - 1) 64#define LITMUS_LOWEST_PRIORITY (LITMUS_MAX_PRIORITY - 1)
65#define LITMUS_NO_PRIORITY UINT_MAX
65 66
66/* Provide generic comparison macros for userspace, 67/* Provide generic comparison macros for userspace,
67 * in case that we change this later. */ 68 * in case that we change this later. */
@@ -71,6 +72,46 @@ typedef enum {
71 ((p) >= LITMUS_HIGHEST_PRIORITY && \ 72 ((p) >= LITMUS_HIGHEST_PRIORITY && \
72 (p) <= LITMUS_LOWEST_PRIORITY) 73 (p) <= LITMUS_LOWEST_PRIORITY)
73 74
75/* reservation support */
76
77typedef enum {
78 PERIODIC_POLLING = 10,
79 SPORADIC_POLLING,
80 TABLE_DRIVEN,
81} reservation_type_t;
82
83struct lt_interval {
84 lt_t start;
85 lt_t end;
86};
87
88#ifndef __KERNEL__
89#define __user
90#endif
91
92struct reservation_config {
93 unsigned int id;
94 lt_t priority;
95 int cpu;
96
97 union {
98 struct {
99 lt_t period;
100 lt_t budget;
101 lt_t relative_deadline;
102 lt_t offset;
103 } polling_params;
104
105 struct {
106 lt_t major_cycle_length;
107 unsigned int num_intervals;
108 struct lt_interval __user *intervals;
109 } table_driven_params;
110 };
111};
112
113/* regular sporadic task support */
114
74struct rt_task { 115struct rt_task {
75 lt_t exec_cost; 116 lt_t exec_cost;
76 lt_t period; 117 lt_t period;
@@ -83,54 +124,6 @@ struct rt_task {
83 release_policy_t release_policy; 124 release_policy_t release_policy;
84}; 125};
85 126
86union np_flag {
87 uint64_t raw;
88 struct {
89 /* Is the task currently in a non-preemptive section? */
90 uint64_t flag:31;
91 /* Should the task call into the scheduler? */
92 uint64_t preempt:1;
93 } np;
94};
95
96/* The definition of the data that is shared between the kernel and real-time
97 * tasks via a shared page (see litmus/ctrldev.c).
98 *
99 * WARNING: User space can write to this, so don't trust
100 * the correctness of the fields!
101 *
102 * This servees two purposes: to enable efficient signaling
103 * of non-preemptive sections (user->kernel) and
104 * delayed preemptions (kernel->user), and to export
105 * some real-time relevant statistics such as preemption and
106 * migration data to user space. We can't use a device to export
107 * statistics because we want to avoid system call overhead when
108 * determining preemption/migration overheads).
109 */
110struct control_page {
111 /* This flag is used by userspace to communicate non-preempive
112 * sections. */
113 volatile union np_flag sched;
114
115 volatile uint64_t irq_count; /* Incremented by the kernel each time an IRQ is
116 * handled. */
117
118 /* Locking overhead tracing: userspace records here the time stamp
119 * and IRQ counter prior to starting the system call. */
120 uint64_t ts_syscall_start; /* Feather-Trace cycles */
121 uint64_t irq_syscall_start; /* Snapshot of irq_count when the syscall
122 * started. */
123
124 /* to be extended */
125};
126
127/* Expected offsets within the control page. */
128
129#define LITMUS_CP_OFFSET_SCHED 0
130#define LITMUS_CP_OFFSET_IRQ_COUNT 8
131#define LITMUS_CP_OFFSET_TS_SC_START 16
132#define LITMUS_CP_OFFSET_IRQ_SC_START 24
133
134/* don't export internal data structures to user space (liblitmus) */ 127/* don't export internal data structures to user space (liblitmus) */
135#ifdef __KERNEL__ 128#ifdef __KERNEL__
136 129
@@ -177,9 +170,6 @@ struct pfair_param;
177 * be explicitly set up before the task set is launched. 170 * be explicitly set up before the task set is launched.
178 */ 171 */
179struct rt_param { 172struct rt_param {
180 /* Generic flags available for plugin-internal use. */
181 unsigned int flags:8;
182
183 /* do we need to check for srp blocking? */ 173 /* do we need to check for srp blocking? */
184 unsigned int srp_non_recurse:1; 174 unsigned int srp_non_recurse:1;
185 175
@@ -207,13 +197,19 @@ struct rt_param {
207 /* timing parameters */ 197 /* timing parameters */
208 struct rt_job job_params; 198 struct rt_job job_params;
209 199
200
201 /* Special handling for periodic tasks executing
202 * clock_nanosleep(CLOCK_MONOTONIC, ...).
203 */
204 lt_t nanosleep_wakeup;
205 unsigned int doing_abs_nanosleep:1;
206
210 /* Should the next job be released at some time other than 207 /* Should the next job be released at some time other than
211 * just period time units after the last release? 208 * just period time units after the last release?
212 */ 209 */
213 unsigned int sporadic_release:1; 210 unsigned int sporadic_release:1;
214 lt_t sporadic_release_time; 211 lt_t sporadic_release_time;
215 212
216
217 /* task representing the current "inherited" task 213 /* task representing the current "inherited" task
218 * priority, assigned by inherit_priority and 214 * priority, assigned by inherit_priority and
219 * return priority in the scheduler plugins. 215 * return priority in the scheduler plugins.
@@ -255,7 +251,10 @@ struct rt_param {
255 volatile int linked_on; 251 volatile int linked_on;
256 252
257 /* PFAIR/PD^2 state. Allocated on demand. */ 253 /* PFAIR/PD^2 state. Allocated on demand. */
258 struct pfair_param* pfair; 254 union {
255 void *plugin_state;
256 struct pfair_param *pfair;
257 };
259 258
260 /* Fields saved before BE->RT transition. 259 /* Fields saved before BE->RT transition.
261 */ 260 */
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
new file mode 100644
index 000000000000..0923f26b745a
--- /dev/null
+++ b/include/litmus/sched_plugin.h
@@ -0,0 +1,180 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_SCHED_PLUGIN_H_
6#define _LINUX_SCHED_PLUGIN_H_
7
8#include <linux/sched.h>
9
10#ifdef CONFIG_LITMUS_LOCKING
11#include <litmus/locking.h>
12#endif
13
14/************************ setup/tear down ********************/
15
16typedef long (*activate_plugin_t) (void);
17typedef long (*deactivate_plugin_t) (void);
18
19struct domain_proc_info;
20typedef long (*get_domain_proc_info_t) (struct domain_proc_info **info);
21
22
23/********************* scheduler invocation ******************/
24/* The main scheduling function, called to select the next task to dispatch. */
25typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
26/* Clean up after the task switch has occured.
27 * This function is called after every (even non-rt) task switch.
28 */
29typedef void (*finish_switch_t)(struct task_struct *prev);
30
31
32/* When waiting for the stack of the task selected by the plugin
33 * to become available, this callback is invoked to give the
34 * plugin a chance to cancel the wait. If the plugin returns false,
35 * the scheduler is invoked again. */
36typedef bool (*should_wait_for_stack_t)(struct task_struct *next);
37
38/* After a pull migration (which involves dropping scheduler locks),
39 * the plugin is given the chance to validate that the task is still
40 * the right one. If the plugin returns false, the scheduler
41 * will be invoked again. */
42typedef bool (*post_migration_validate_t)(struct task_struct *next);
43
44/* After dropping the lock to facilitate a pull migration, the task
45 * state may have changed. In this case, the core notifies the plugin
46 * with this callback and then invokes the scheduler again. */
47typedef void (*next_became_invalid_t)(struct task_struct *next);
48
49/********************* task state changes ********************/
50
51/* Called to setup a new real-time task.
52 * Release the first job, enqueue, etc.
53 * Task may already be running.
54 */
55typedef void (*task_new_t) (struct task_struct *task,
56 int on_rq,
57 int running);
58
59/* Called when userspace seeks to set new task parameters for a task
60 * that is already in real-time mode (i.e., is_realtime(task)).
61 */
62typedef long (*task_change_params_t) (struct task_struct *task,
63 struct rt_task *new_params);
64
65/* Called to re-introduce a task after blocking.
66 * Can potentially be called multiple times.
67 */
68typedef void (*task_wake_up_t) (struct task_struct *task);
69/* called to notify the plugin of a blocking real-time task
70 * it will only be called for real-time tasks and before schedule is called */
71typedef void (*task_block_t) (struct task_struct *task);
72/* Called when a real-time task exits or changes to a different scheduling
73 * class.
74 * Free any allocated resources
75 */
76typedef void (*task_exit_t) (struct task_struct *);
77
78/* task_exit() is called with interrupts disabled and runqueue locks held, and
79 * thus and cannot block or spin. task_cleanup() is called sometime later
80 * without any locks being held.
81 */
82typedef void (*task_cleanup_t) (struct task_struct *);
83
84#ifdef CONFIG_LITMUS_LOCKING
85/* Called when the current task attempts to create a new lock of a given
86 * protocol type. */
87typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
88 void* __user config);
89#endif
90
91
92/********************* sys call backends ********************/
93/* This function causes the caller to sleep until the next release */
94typedef long (*complete_job_t) (void);
95
96typedef long (*admit_task_t)(struct task_struct* tsk);
97
98/* return false to indicate that the plugin does not support forking */
99typedef bool (*fork_task_t)(struct task_struct* tsk);
100
101typedef long (*wait_for_release_at_t)(lt_t release_time);
102
103/* Informs the plugin when a synchronous release takes place. */
104typedef void (*synchronous_release_at_t)(lt_t time_zero);
105
106/* How much budget has the current task consumed so far, and how much
107 * has it left? The default implementation ties into the per-task
108 * budget enforcement code. Plugins can override this to report
109 * reservation-specific values. */
110typedef void (*current_budget_t)(lt_t *used_so_far, lt_t *remaining);
111
112/* Reservation creation/removal backends. Meaning of reservation_type and
113 * reservation_id are entirely plugin-specific. */
114typedef long (*reservation_create_t)(int reservation_type, void* __user config);
115typedef long (*reservation_destroy_t)(unsigned int reservation_id, int cpu);
116
117/************************ misc routines ***********************/
118
119
120struct sched_plugin {
121 struct list_head list;
122 /* basic info */
123 char *plugin_name;
124
125 /* setup */
126 activate_plugin_t activate_plugin;
127 deactivate_plugin_t deactivate_plugin;
128 get_domain_proc_info_t get_domain_proc_info;
129
130 /* scheduler invocation */
131 schedule_t schedule;
132 finish_switch_t finish_switch;
133
134 /* control over pull migrations */
135 should_wait_for_stack_t should_wait_for_stack;
136 next_became_invalid_t next_became_invalid;
137 post_migration_validate_t post_migration_validate;
138
139 /* syscall backend */
140 complete_job_t complete_job;
141 wait_for_release_at_t wait_for_release_at;
142 synchronous_release_at_t synchronous_release_at;
143
144 /* task state changes */
145 admit_task_t admit_task;
146 fork_task_t fork_task;
147
148 task_new_t task_new;
149 task_wake_up_t task_wake_up;
150 task_block_t task_block;
151
152 /* optional: support task parameter changes at runtime */
153 task_change_params_t task_change_params;
154
155 task_exit_t task_exit;
156 task_cleanup_t task_cleanup;
157
158 current_budget_t current_budget;
159
160 /* Reservation support */
161 reservation_create_t reservation_create;
162 reservation_destroy_t reservation_destroy;
163
164#ifdef CONFIG_LITMUS_LOCKING
165 /* locking protocols */
166 allocate_lock_t allocate_lock;
167#endif
168} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
169
170
171extern struct sched_plugin *litmus;
172
173int register_sched_plugin(struct sched_plugin* plugin);
174struct sched_plugin* find_sched_plugin(const char* name);
175void print_sched_plugins(struct seq_file *m);
176
177
178extern struct sched_plugin linux_sched_plugin;
179
180#endif
diff --git a/include/litmus/srp.h b/include/litmus/srp.h
new file mode 100644
index 000000000000..c9a4552b2bf3
--- /dev/null
+++ b/include/litmus/srp.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_SRP_H
2#define LITMUS_SRP_H
3
4struct srp_semaphore;
5
6struct srp_priority {
7 struct list_head list;
8 unsigned int priority;
9 pid_t pid;
10};
11#define list2prio(l) list_entry(l, struct srp_priority, list)
12
13/* struct for uniprocessor SRP "semaphore" */
14struct srp_semaphore {
15 struct litmus_lock litmus_lock;
16 struct srp_priority ceiling;
17 struct task_struct* owner;
18 int cpu; /* cpu associated with this "semaphore" and resource */
19};
20
21/* map a task to its SRP preemption level priority */
22typedef unsigned int (*srp_prioritization_t)(struct task_struct* t);
23/* Must be updated by each plugin that uses SRP.*/
24extern srp_prioritization_t get_srp_prio;
25
26struct srp_semaphore* allocate_srp_semaphore(void);
27
28#endif
diff --git a/include/litmus/wait.h b/include/litmus/wait.h
new file mode 100644
index 000000000000..ce1347c355f8
--- /dev/null
+++ b/include/litmus/wait.h
@@ -0,0 +1,57 @@
1#ifndef _LITMUS_WAIT_H_
2#define _LITMUS_WAIT_H_
3
4struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
5
6/* wrap regular wait_queue_t head */
7struct __prio_wait_queue {
8 wait_queue_t wq;
9
10 /* some priority point */
11 lt_t priority;
12 /* break ties in priority by lower tie_breaker */
13 unsigned int tie_breaker;
14};
15
16typedef struct __prio_wait_queue prio_wait_queue_t;
17
18static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq,
19 struct task_struct* t,
20 lt_t priority)
21{
22 init_waitqueue_entry(&pwq->wq, t);
23 pwq->priority = priority;
24 pwq->tie_breaker = 0;
25}
26
27static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq,
28 struct task_struct* t,
29 lt_t priority,
30 unsigned int tie_breaker)
31{
32 init_waitqueue_entry(&pwq->wq, t);
33 pwq->priority = priority;
34 pwq->tie_breaker = tie_breaker;
35}
36
37unsigned int __add_wait_queue_prio_exclusive(
38 wait_queue_head_t* head,
39 prio_wait_queue_t *new);
40
41static inline unsigned int add_wait_queue_prio_exclusive(
42 wait_queue_head_t* head,
43 prio_wait_queue_t *new)
44{
45 unsigned long flags;
46 unsigned int passed;
47
48 spin_lock_irqsave(&head->lock, flags);
49 passed = __add_wait_queue_prio_exclusive(head, new);
50
51 spin_unlock_irqrestore(&head->lock, flags);
52
53 return passed;
54}
55
56
57#endif
diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c
new file mode 100644
index 000000000000..80e6928d9c55
--- /dev/null
+++ b/kernel/sched/litmus.c
@@ -0,0 +1,385 @@
1/* This file is included from kernel/sched.c */
2
3#include "sched.h"
4
5#include <litmus/trace.h>
6#include <litmus/sched_trace.h>
7
8#include <litmus/litmus.h>
9#include <litmus/budget.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/preempt.h>
12#include <litmus/np.h>
13
14static void update_time_litmus(struct rq *rq, struct task_struct *p)
15{
16 u64 delta = rq->clock - p->se.exec_start;
17 if (unlikely((s64)delta < 0))
18 delta = 0;
19 /* per job counter */
20 p->rt_param.job_params.exec_time += delta;
21 /* task counter */
22 p->se.sum_exec_runtime += delta;
23 if (delta) {
24 TRACE_TASK(p, "charged %llu exec time (total:%llu, rem:%llu)\n",
25 delta, p->rt_param.job_params.exec_time, budget_remaining(p));
26 }
27 /* sched_clock() */
28 p->se.exec_start = rq->clock;
29 cpuacct_charge(p, delta);
30}
31
32static void double_rq_lock(struct rq *rq1, struct rq *rq2);
33static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
34
35static struct task_struct *
36litmus_schedule(struct rq *rq, struct task_struct *prev)
37{
38 struct task_struct *next;
39
40#ifdef CONFIG_SMP
41 struct rq* other_rq;
42 long was_running;
43 int from_where;
44 lt_t _maybe_deadlock = 0;
45#endif
46
47 /* let the plugin schedule */
48 next = litmus->schedule(prev);
49
50 sched_state_plugin_check();
51
52#ifdef CONFIG_SMP
53 /* check if a global plugin pulled a task from a different RQ */
54 if (next && task_rq(next) != rq) {
55 /* we need to migrate the task */
56 other_rq = task_rq(next);
57 from_where = other_rq->cpu;
58 TRACE_TASK(next, "migrate from %d\n", from_where);
59
60 /* while we drop the lock, the prev task could change its
61 * state
62 */
63 BUG_ON(prev != current);
64 was_running = is_current_running();
65
66 /* Don't race with a concurrent switch. This could deadlock in
67 * the case of cross or circular migrations. It's the job of
68 * the plugin to make sure that doesn't happen.
69 */
70 TRACE_TASK(next, "stack_in_use=%d\n",
71 next->rt_param.stack_in_use);
72 if (next->rt_param.stack_in_use != NO_CPU) {
73 TRACE_TASK(next, "waiting to deschedule\n");
74 _maybe_deadlock = litmus_clock();
75 }
76
77 raw_spin_unlock(&rq->lock);
78
79 while (next->rt_param.stack_in_use != NO_CPU) {
80 cpu_relax();
81 mb();
82 if (next->rt_param.stack_in_use == NO_CPU)
83 TRACE_TASK(next,"descheduled. Proceeding.\n");
84
85 if (!litmus->should_wait_for_stack(next)) {
86 /* plugin aborted the wait */
87 TRACE_TASK(next,
88 "plugin gave up waiting for stack\n");
89 next = NULL;
90 /* Make sure plugin is given a chance to
91 * reconsider. */
92 litmus_reschedule_local();
93 /* give up */
94 raw_spin_lock(&rq->lock);
95 goto out;
96 }
97
98 if (from_where != task_rq(next)->cpu) {
99 /* The plugin should not give us something
100 * that other cores are trying to pull, too */
101 TRACE_TASK(next, "next invalid: task keeps "
102 "shifting around!? "
103 "(%d->%d)\n",
104 from_where,
105 task_rq(next)->cpu);
106
107 /* bail out */
108 raw_spin_lock(&rq->lock);
109 litmus->next_became_invalid(next);
110 litmus_reschedule_local();
111 next = NULL;
112 goto out;
113 }
114
115 if (lt_before(_maybe_deadlock + 1000000000L,
116 litmus_clock())) {
117 /* We've been spinning for 1s.
118 * Something can't be right!
119 * Let's abandon the task and bail out; at least
120 * we will have debug info instead of a hard
121 * deadlock.
122 */
123#ifdef CONFIG_BUG_ON_MIGRATION_DEADLOCK
124 BUG();
125#else
126 TRACE_TASK(next,"stack too long in use. "
127 "Deadlock?\n");
128 next = NULL;
129
130 /* bail out */
131 raw_spin_lock(&rq->lock);
132 goto out;
133#endif
134 }
135 }
136#ifdef __ARCH_WANT_UNLOCKED_CTXSW
137 if (next->on_cpu)
138 TRACE_TASK(next, "waiting for !oncpu");
139 while (next->on_cpu) {
140 cpu_relax();
141 mb();
142 }
143#endif
144 double_rq_lock(rq, other_rq);
145 if (other_rq == task_rq(next) &&
146 next->rt_param.stack_in_use == NO_CPU) {
147 /* ok, we can grab it */
148 set_task_cpu(next, rq->cpu);
149 /* release the other CPU's runqueue, but keep ours */
150 raw_spin_unlock(&other_rq->lock);
151 } else {
152 /* Either it moved or the stack was claimed; both is
153 * bad and forces us to abort the migration. */
154 TRACE_TASK(next, "next invalid: no longer available\n");
155 raw_spin_unlock(&other_rq->lock);
156 litmus->next_became_invalid(next);
157 next = NULL;
158 goto out;
159 }
160
161 if (!litmus->post_migration_validate(next)) {
162 TRACE_TASK(next, "plugin deems task now invalid\n");
163 litmus_reschedule_local();
164 next = NULL;
165 }
166 }
167#endif
168
169 /* check if the task became invalid while we dropped the lock */
170 if (next && (!is_realtime(next) || !tsk_rt(next)->present)) {
171 TRACE_TASK(next,
172 "BAD: next (no longer?) valid\n");
173 litmus->next_became_invalid(next);
174 litmus_reschedule_local();
175 next = NULL;
176 }
177
178 if (next) {
179#ifdef CONFIG_SMP
180 next->rt_param.stack_in_use = rq->cpu;
181#else
182 next->rt_param.stack_in_use = 0;
183#endif
184 update_rq_clock(rq);
185 next->se.exec_start = rq->clock;
186 }
187
188out:
189 update_enforcement_timer(next);
190 return next;
191}
192
193static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
194 int flags)
195{
196 tsk_rt(p)->present = 1;
197 if (flags & ENQUEUE_WAKEUP) {
198 sched_trace_task_resume(p);
199 /* LITMUS^RT plugins need to update the state
200 * _before_ making it available in global structures.
201 * Linux gets away with being lazy about the task state
202 * update. We can't do that, hence we update the task
203 * state already here.
204 *
205 * WARNING: this needs to be re-evaluated when porting
206 * to newer kernel versions.
207 */
208 p->state = TASK_RUNNING;
209 litmus->task_wake_up(p);
210
211 rq->litmus.nr_running++;
212 } else {
213 TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
214 p->se.exec_start = rq->clock;
215 }
216}
217
218static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
219 int flags)
220{
221 if (flags & DEQUEUE_SLEEP) {
222#ifdef CONFIG_SCHED_TASK_TRACE
223 tsk_rt(p)->job_params.last_suspension = litmus_clock();
224#endif
225 litmus->task_block(p);
226 tsk_rt(p)->present = 0;
227 sched_trace_task_block(p);
228
229 rq->litmus.nr_running--;
230 } else
231 TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
232}
233
234static void yield_task_litmus(struct rq *rq)
235{
236 TS_SYSCALL_IN_START;
237 TS_SYSCALL_IN_END;
238
239 BUG_ON(rq->curr != current);
240 /* sched_yield() is called to trigger delayed preemptions.
241 * Thus, mark the current task as needing to be rescheduled.
242 * This will cause the scheduler plugin to be invoked, which can
243 * then determine if a preemption is still required.
244 */
245 clear_exit_np(current);
246 litmus_reschedule_local();
247
248 TS_SYSCALL_OUT_START;
249}
250
251/* Plugins are responsible for this.
252 */
253static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
254{
255}
256
257static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
258{
259}
260
261/* pick_next_task_litmus() - litmus_schedule() function
262 *
263 * return the next task to be scheduled
264 */
265static struct task_struct *pick_next_task_litmus(struct rq *rq,
266 struct task_struct *prev, struct pin_cookie cookie)
267{
268 struct task_struct *next;
269
270 if (is_realtime(prev))
271 update_time_litmus(rq, prev);
272
273 lockdep_unpin_lock(&rq->lock, cookie);
274 TS_PLUGIN_SCHED_START;
275 next = litmus_schedule(rq, prev);
276 TS_PLUGIN_SCHED_END;
277 lockdep_repin_lock(&rq->lock, cookie);
278
279 /* This is a bit backwards: the other classes call put_prev_task()
280 * _after_ they've determined that the class has some queued tasks.
281 * We can't determine this easily because each plugin manages its own
282 * ready queues, and because in the case of globally shared queues,
283 * we really don't know whether we'll have something ready even if
284 * we test here. So we do it in reverse: first ask the plugin to
285 * provide a task, and if we find one, call put_prev_task() on the
286 * previously scheduled task.
287 */
288 if (next)
289 put_prev_task(rq, prev);
290
291 return next;
292}
293
294static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
295{
296 if (is_realtime(p) && !queued) {
297 update_time_litmus(rq, p);
298 /* budget check for QUANTUM_ENFORCEMENT tasks */
299 if (budget_enforced(p) && budget_exhausted(p)) {
300 litmus_reschedule_local();
301 }
302 }
303}
304
305static void switched_to_litmus(struct rq *rq, struct task_struct *p)
306{
307}
308
309static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
310 int oldprio)
311{
312}
313
314unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
315{
316 /* return infinity */
317 return 0;
318}
319
320/* This is called when a task became a real-time task, either due to a SCHED_*
321 * class transition or due to PI mutex inheritance. We don't handle Linux PI
322 * mutex inheritance yet (and probably never will). Use LITMUS provided
323 * synchronization primitives instead.
324 */
325static void set_curr_task_litmus(struct rq *rq)
326{
327 rq->curr->se.exec_start = rq->clock;
328}
329
330
331#ifdef CONFIG_SMP
332/* execve tries to rebalance task in this scheduling domain.
333 * We don't care about the scheduling domain; can gets called from
334 * exec, fork, wakeup.
335 */
336static int
337select_task_rq_litmus(struct task_struct *p, int cpu, int sd_flag, int flags)
338{
339 /* preemption is already disabled.
340 * We don't want to change cpu here
341 */
342 return task_cpu(p);
343}
344#endif
345
346static void update_curr_litmus(struct rq *rq)
347{
348 struct task_struct *p = rq->curr;
349
350 if (!is_realtime(p))
351 return;
352
353 update_time_litmus(rq, p);
354}
355
356const struct sched_class litmus_sched_class = {
357 /* From 34f971f6 the stop/migrate worker threads have a class on
358 * their own, which is the highest prio class. We don't support
359 * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
360 * CPU capacity.
361 */
362 .next = &dl_sched_class,
363 .enqueue_task = enqueue_task_litmus,
364 .dequeue_task = dequeue_task_litmus,
365 .yield_task = yield_task_litmus,
366
367 .check_preempt_curr = check_preempt_curr_litmus,
368
369 .pick_next_task = pick_next_task_litmus,
370 .put_prev_task = put_prev_task_litmus,
371
372#ifdef CONFIG_SMP
373 .select_task_rq = select_task_rq_litmus,
374#endif
375
376 .set_curr_task = set_curr_task_litmus,
377 .task_tick = task_tick_litmus,
378
379 .get_rr_interval = get_rr_interval_litmus,
380
381 .prio_changed = prio_changed_litmus,
382 .switched_to = switched_to_litmus,
383
384 .update_curr = update_curr_litmus,
385};
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 5408ef6b159b..cf011532f6a3 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -1,5 +1,184 @@
1menu "LITMUS^RT" 1menu "LITMUS^RT"
2 2
3menu "Scheduling"
4
5config RELEASE_MASTER
6 bool "Release-master Support"
7 depends on SMP
8 default n
9 help
10 Allow one processor to act as a dedicated interrupt processor
11 that services all timer interrupts, but that does not schedule
12 real-time tasks. See RTSS'09 paper for details
13 (http://www.cs.unc.edu/~anderson/papers.html).
14
15config PREFER_LOCAL_LINKING
16 bool "Link newly arrived tasks locally if possible"
17 depends on SMP
18 default y
19 help
20 In linking-based schedulers such as GSN-EDF, if an idle CPU processes
21 a job arrival (i.e., when a job resumed or was released), it can
22 either link the task to itself and schedule it immediately (to avoid
23 unnecessary scheduling latency) or it can try to link it to the CPU
24 where it executed previously (to maximize cache affinity, at the
25 expense of increased latency due to the need to send an IPI).
26
27 In lightly loaded systems, this option can significantly reduce
28 scheduling latencies. In heavily loaded systems (where CPUs are
29 rarely idle), it will likely make hardly a difference.
30
31 If unsure, say yes.
32
33config LITMUS_QUANTUM_LENGTH_US
34 int "quantum length (in us)"
35 default 1000
36 range 500 10000
37 help
38 Determine the desired quantum length, in microseconds, which
39 is used to determine the granularity of scheduling in
40 quantum-driven plugins (primarily PFAIR). This parameter does not
41 affect event-driven plugins (such as the EDF-based plugins and P-FP).
42 Default: 1000us = 1ms.
43
44config BUG_ON_MIGRATION_DEADLOCK
45 bool "Panic on suspected migration deadlock"
46 default y
47 help
48 This is a debugging option. The LITMUS^RT migration support code for
49 global scheduling contains a simple heuristic to detect when the
50 system deadlocks due to circular stack dependencies.
51
52 For example, such a deadlock exists if CPU 0 waits for task A's stack
53 to become available while using task B's stack, and CPU 1 waits for
54 task B's stack to become available while using task A's stack. Such
55 a situation can arise in (buggy) global scheduling plugins.
56
57 With this option enabled, such a scenario with result in a BUG().
58 You can turn off this option when debugging on real hardware (e.g.,
59 to rescue traces, etc. that would be hard to get after a panic).
60
61 Only turn this off if you really know what you are doing. If this
62 BUG() triggers, the scheduler is broken and turning off this option
63 won't fix it.
64
65
66endmenu
67
68menu "Real-Time Synchronization"
69
70config NP_SECTION
71 bool "Non-preemptive section support"
72 default y
73 help
74 Allow tasks to become non-preemptable.
75 Note that plugins still need to explicitly support non-preemptivity.
76 Currently, only the GSN-EDF, PSN-EDF, and P-FP plugins have such support.
77
78 This is required to support locking protocols such as the FMLP.
79 If disabled, all tasks will be considered preemptable at all times.
80
81config LITMUS_LOCKING
82 bool "Support for real-time locking protocols"
83 depends on NP_SECTION
84 default y
85 help
86 Enable LITMUS^RT's multiprocessor real-time locking protocols with
87 predicable maximum blocking times.
88
89 Say Yes if you want to include locking protocols such as the FMLP and
90 Baker's SRP.
91
92endmenu
93
94menu "Performance Enhancements"
95
96config SCHED_CPU_AFFINITY
97 bool "Local Migration Affinity"
98 depends on X86 && SYSFS
99 default y
100 help
101 Rescheduled tasks prefer CPUs near to their previously used CPU.
102 This may improve cache performance through possible preservation of
103 cache affinity, at the expense of (slightly) more involved scheduling
104 logic.
105
106 Warning: May make bugs harder to find since tasks may migrate less often.
107
108 NOTES:
109 * Feature is not utilized by PFair/PD^2.
110
111 Say Yes if unsure.
112
113config ALLOW_EARLY_RELEASE
114 bool "Allow Early Releasing"
115 default y
116 help
117 Allow tasks to release jobs early (while still maintaining job
118 precedence constraints). Only supported by EDF schedulers. Early
119 releasing must be explicitly requested by real-time tasks via
120 the task_params passed to sys_set_task_rt_param().
121
122 Early releasing can improve job response times while maintaining
123 real-time correctness. However, it can easily peg your CPUs
124 since tasks never suspend to wait for their next job. As such, early
125 releasing is really only useful in the context of implementing
126 bandwidth servers, interrupt handling threads, or short-lived
127 computations.
128
129 Beware that early releasing may affect real-time analysis
130 if using locking protocols or I/O.
131
132 Say Yes if unsure.
133
134choice
135 prompt "EDF Tie-Break Behavior"
136 default EDF_TIE_BREAK_LATENESS_NORM
137 help
138 Allows the configuration of tie-breaking behavior when the deadlines
139 of two EDF-scheduled tasks are equal.
140
141 config EDF_TIE_BREAK_LATENESS
142 bool "Lateness-based Tie Break"
143 help
144 Break ties between two jobs, A and B, based upon the lateness of their
145 prior jobs. The job with the greatest lateness has priority. Note that
146 lateness has a negative value if the prior job finished before its
147 deadline.
148
149 config EDF_TIE_BREAK_LATENESS_NORM
150 bool "Normalized Lateness-based Tie Break"
151 help
152 Break ties between two jobs, A and B, based upon the lateness, normalized
153 by relative deadline, of their prior jobs. The job with the greatest
154 normalized lateness has priority. Note that lateness has a negative value
155 if the prior job finished before its deadline.
156
157 Normalized lateness tie-breaks are likely desireable over non-normalized
158 tie-breaks if the execution times and/or relative deadlines of tasks in a
159 task set vary greatly.
160
161 config EDF_TIE_BREAK_HASH
162 bool "Hash-based Tie Breaks"
163 help
164 Break ties between two jobs, A and B, with equal deadlines by using a
165 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
166 A has ~50% of winning a given tie-break.
167
168 config EDF_PID_TIE_BREAK
169 bool "PID-based Tie Breaks"
170 help
171 Break ties based upon OS-assigned thread IDs. Use this option if
172 required by algorithm's real-time analysis or per-task response-time
173 jitter must be minimized.
174
175 NOTES:
176 * This tie-breaking method was default in Litmus 2012.2 and before.
177
178endchoice
179
180endmenu
181
3menu "Tracing" 182menu "Tracing"
4 183
5config FEATHER_TRACE 184config FEATHER_TRACE
@@ -154,6 +333,40 @@ config SCHED_DEBUG_TRACE_CALLER
154 333
155 If unsure, say No. 334 If unsure, say No.
156 335
336config PREEMPT_STATE_TRACE
337 bool "Trace preemption state machine transitions"
338 depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
339 default n
340 help
341 With this option enabled, each CPU will log when it transitions
342 states in the preemption state machine. This state machine is
343 used to determine how to react to IPIs (avoid races with in-flight IPIs).
344
345 Warning: this creates a lot of information in the debug trace. Only
346 recommended when you are debugging preemption-related races.
347
348 If unsure, say No.
349
350config REPORT_TIMER_LATENCY
351 bool "Warn when hrtimers incur large latency"
352 default n
353 help
354 With this option enabled, the hrtimer code will printk()
355 a warning when a timer fires much after its intended
356 time. This can useful when debugging latency issues.
357
358 If unsure, say No.
359
360config REPORT_TIMER_LATENCY_THRESHOLD
361 int "Maximum acceptable timer latency (in nanoseconds)"
362 depends on REPORT_TIMER_LATENCY
363 range 10000 100000000
364 default 1000000
365 help
366 If a timer fires more than the given threshold after its intended
367 expiration time, a warning message is printed to the kernel log.
368 By default, the threshold is one millisecond (= one million nanoseconds).
369
157endmenu 370endmenu
158 371
159endmenu 372endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index 6318f1c6fac8..56499562defb 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -2,7 +2,27 @@
2# Makefile for LITMUS^RT 2# Makefile for LITMUS^RT
3# 3#
4 4
5obj-y = sched_plugin.o litmus.o \
6 preempt.o \
7 litmus_proc.o \
8 budget.o \
9 clustered.o \
10 jobs.o \
11 sync.o \
12 rt_domain.o \
13 edf_common.o \
14 fp_common.o \
15 fdso.o \
16 locking.o \
17 srp.o \
18 bheap.o \
19 binheap.o \
20 ctrldev.o \
21 uncachedev.o
22
5obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 23obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
6obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 24obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
7obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 25obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
8obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o 26obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
27
28obj-y += reservations/
diff --git a/litmus/bheap.c b/litmus/bheap.c
new file mode 100644
index 000000000000..2707e0122b6d
--- /dev/null
+++ b/litmus/bheap.c
@@ -0,0 +1,316 @@
1#include <linux/bug.h>
2#include <linux/kernel.h>
3#include <litmus/bheap.h>
4
5void bheap_init(struct bheap* heap)
6{
7 heap->head = NULL;
8 heap->min = NULL;
9}
10
11void bheap_node_init(struct bheap_node** _h, void* value)
12{
13 struct bheap_node* h = *_h;
14 h->parent = NULL;
15 h->next = NULL;
16 h->child = NULL;
17 h->degree = NOT_IN_HEAP;
18 h->value = value;
19 h->ref = _h;
20}
21
22
23/* make child a subtree of root */
24static void __bheap_link(struct bheap_node* root,
25 struct bheap_node* child)
26{
27 child->parent = root;
28 child->next = root->child;
29 root->child = child;
30 root->degree++;
31}
32
33/* merge root lists */
34static struct bheap_node* __bheap_merge(struct bheap_node* a,
35 struct bheap_node* b)
36{
37 struct bheap_node* head = NULL;
38 struct bheap_node** pos = &head;
39
40 while (a && b) {
41 if (a->degree < b->degree) {
42 *pos = a;
43 a = a->next;
44 } else {
45 *pos = b;
46 b = b->next;
47 }
48 pos = &(*pos)->next;
49 }
50 if (a)
51 *pos = a;
52 else
53 *pos = b;
54 return head;
55}
56
57/* reverse a linked list of nodes. also clears parent pointer */
58static struct bheap_node* __bheap_reverse(struct bheap_node* h)
59{
60 struct bheap_node* tail = NULL;
61 struct bheap_node* next;
62
63 if (!h)
64 return h;
65
66 h->parent = NULL;
67 while (h->next) {
68 next = h->next;
69 h->next = tail;
70 tail = h;
71 h = next;
72 h->parent = NULL;
73 }
74 h->next = tail;
75 return h;
76}
77
78static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
79 struct bheap_node** prev, struct bheap_node** node)
80{
81 struct bheap_node *_prev, *cur;
82 *prev = NULL;
83
84 if (!heap->head) {
85 *node = NULL;
86 return;
87 }
88
89 *node = heap->head;
90 _prev = heap->head;
91 cur = heap->head->next;
92 while (cur) {
93 if (higher_prio(cur, *node)) {
94 *node = cur;
95 *prev = _prev;
96 }
97 _prev = cur;
98 cur = cur->next;
99 }
100}
101
102static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
103 struct bheap_node* h2)
104{
105 struct bheap_node* h1;
106 struct bheap_node *prev, *x, *next;
107 if (!h2)
108 return;
109 h1 = heap->head;
110 if (!h1) {
111 heap->head = h2;
112 return;
113 }
114 h1 = __bheap_merge(h1, h2);
115 prev = NULL;
116 x = h1;
117 next = x->next;
118 while (next) {
119 if (x->degree != next->degree ||
120 (next->next && next->next->degree == x->degree)) {
121 /* nothing to do, advance */
122 prev = x;
123 x = next;
124 } else if (higher_prio(x, next)) {
125 /* x becomes the root of next */
126 x->next = next->next;
127 __bheap_link(x, next);
128 } else {
129 /* next becomes the root of x */
130 if (prev)
131 prev->next = next;
132 else
133 h1 = next;
134 __bheap_link(next, x);
135 x = next;
136 }
137 next = x->next;
138 }
139 heap->head = h1;
140}
141
142static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
143 struct bheap* heap)
144{
145 struct bheap_node *prev, *node;
146 __bheap_min(higher_prio, heap, &prev, &node);
147 if (!node)
148 return NULL;
149 if (prev)
150 prev->next = node->next;
151 else
152 heap->head = node->next;
153 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
154 return node;
155}
156
157/* insert (and reinitialize) a node into the heap */
158void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
159 struct bheap_node* node)
160{
161 struct bheap_node *min;
162 node->child = NULL;
163 node->parent = NULL;
164 node->next = NULL;
165 node->degree = 0;
166 if (heap->min && higher_prio(node, heap->min)) {
167 /* swap min cache */
168 min = heap->min;
169 min->child = NULL;
170 min->parent = NULL;
171 min->next = NULL;
172 min->degree = 0;
173 __bheap_union(higher_prio, heap, min);
174 heap->min = node;
175 } else
176 __bheap_union(higher_prio, heap, node);
177}
178
179void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
180{
181 struct bheap_node* min;
182 if (heap->min) {
183 min = heap->min;
184 heap->min = NULL;
185 bheap_insert(higher_prio, heap, min);
186 }
187}
188
189/* merge addition into target */
190void bheap_union(bheap_prio_t higher_prio,
191 struct bheap* target, struct bheap* addition)
192{
193 /* first insert any cached minima, if necessary */
194 bheap_uncache_min(higher_prio, target);
195 bheap_uncache_min(higher_prio, addition);
196 __bheap_union(higher_prio, target, addition->head);
197 /* this is a destructive merge */
198 addition->head = NULL;
199}
200
201struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
202 struct bheap* heap)
203{
204 if (!heap->min)
205 heap->min = __bheap_extract_min(higher_prio, heap);
206 return heap->min;
207}
208
209struct bheap_node* bheap_take(bheap_prio_t higher_prio,
210 struct bheap* heap)
211{
212 struct bheap_node *node;
213 if (!heap->min)
214 heap->min = __bheap_extract_min(higher_prio, heap);
215 node = heap->min;
216 heap->min = NULL;
217 if (node)
218 node->degree = NOT_IN_HEAP;
219 return node;
220}
221
222int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
223{
224 struct bheap_node *parent;
225 struct bheap_node** tmp_ref;
226 void* tmp;
227
228 /* bubble up */
229 parent = node->parent;
230 while (parent && higher_prio(node, parent)) {
231 /* swap parent and node */
232 tmp = parent->value;
233 parent->value = node->value;
234 node->value = tmp;
235 /* swap references */
236 *(parent->ref) = node;
237 *(node->ref) = parent;
238 tmp_ref = parent->ref;
239 parent->ref = node->ref;
240 node->ref = tmp_ref;
241 /* step up */
242 node = parent;
243 parent = node->parent;
244 }
245
246 return parent != NULL;
247}
248
249void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
250 struct bheap_node* node)
251{
252 struct bheap_node *parent, *prev, *pos;
253 struct bheap_node** tmp_ref;
254 void* tmp;
255
256 if (heap->min != node) {
257 /* bubble up */
258 parent = node->parent;
259 while (parent) {
260 /* swap parent and node */
261 tmp = parent->value;
262 parent->value = node->value;
263 node->value = tmp;
264 /* swap references */
265 *(parent->ref) = node;
266 *(node->ref) = parent;
267 tmp_ref = parent->ref;
268 parent->ref = node->ref;
269 node->ref = tmp_ref;
270 /* step up */
271 node = parent;
272 parent = node->parent;
273 }
274 /* now delete:
275 * first find prev */
276 prev = NULL;
277 pos = heap->head;
278 while (pos != node) {
279 BUG_ON(!pos); /* fell off the list -> deleted from wrong heap */
280 prev = pos;
281 pos = pos->next;
282 }
283 /* we have prev, now remove node */
284 if (prev)
285 prev->next = node->next;
286 else
287 heap->head = node->next;
288 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
289 } else
290 heap->min = NULL;
291 node->degree = NOT_IN_HEAP;
292}
293
294/* allocate a heap node for value and insert into the heap */
295int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
296 void* value, int gfp_flags)
297{
298 struct bheap_node* hn = bheap_node_alloc(gfp_flags);
299 if (likely(hn)) {
300 bheap_node_init(&hn, value);
301 bheap_insert(higher_prio, heap, hn);
302 }
303 return hn != NULL;
304}
305
306void* bheap_take_del(bheap_prio_t higher_prio,
307 struct bheap* heap)
308{
309 struct bheap_node* hn = bheap_take(higher_prio, heap);
310 void* ret = NULL;
311 if (hn) {
312 ret = hn->value;
313 bheap_node_free(hn);
314 }
315 return ret;
316}
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..d3ab34b92096
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,387 @@
1#include <litmus/binheap.h>
2
3/* Returns true of the root ancestor of node is the root of the given heap. */
4int binheap_is_in_this_heap(struct binheap_node *node,
5 struct binheap* heap)
6{
7 if(!binheap_is_in_heap(node)) {
8 return 0;
9 }
10
11 while(node->parent != NULL) {
12 node = node->parent;
13 }
14
15 return (node == heap->root);
16}
17
18
19/* Update the node reference pointers. Same logic as Litmus binomial heap. */
20static void __update_ref(struct binheap_node *parent,
21 struct binheap_node *child)
22{
23 *(parent->ref_ptr) = child;
24 *(child->ref_ptr) = parent;
25
26 swap(parent->ref_ptr, child->ref_ptr);
27}
28
29
30/* Swaps data between two nodes. */
31static void __binheap_swap(struct binheap_node *parent,
32 struct binheap_node *child)
33{
34 swap(parent->data, child->data);
35 __update_ref(parent, child);
36}
37
38
39/* Swaps memory and data between two nodes. Actual nodes swap instead of
40 * just data. Needed when we delete nodes from the heap.
41 */
42static void __binheap_swap_safe(struct binheap *handle,
43 struct binheap_node *a,
44 struct binheap_node *b)
45{
46 swap(a->data, b->data);
47 __update_ref(a, b);
48
49 if((a->parent != NULL) && (a->parent == b->parent)) {
50 /* special case: shared parent */
51 swap(a->parent->left, a->parent->right);
52 }
53 else {
54 /* Update pointers to swap parents. */
55
56 if(a->parent) {
57 if(a == a->parent->left) {
58 a->parent->left = b;
59 }
60 else {
61 a->parent->right = b;
62 }
63 }
64
65 if(b->parent) {
66 if(b == b->parent->left) {
67 b->parent->left = a;
68 }
69 else {
70 b->parent->right = a;
71 }
72 }
73
74 swap(a->parent, b->parent);
75 }
76
77 /* swap children */
78
79 if(a->left) {
80 a->left->parent = b;
81
82 if(a->right) {
83 a->right->parent = b;
84 }
85 }
86
87 if(b->left) {
88 b->left->parent = a;
89
90 if(b->right) {
91 b->right->parent = a;
92 }
93 }
94
95 swap(a->left, b->left);
96 swap(a->right, b->right);
97
98
99 /* update next/last/root pointers */
100
101 if(a == handle->next) {
102 handle->next = b;
103 }
104 else if(b == handle->next) {
105 handle->next = a;
106 }
107
108 if(a == handle->last) {
109 handle->last = b;
110 }
111 else if(b == handle->last) {
112 handle->last = a;
113 }
114
115 if(a == handle->root) {
116 handle->root = b;
117 }
118 else if(b == handle->root) {
119 handle->root = a;
120 }
121}
122
123
124/**
125 * Update the pointer to the last node in the complete binary tree.
126 * Called internally after the root node has been deleted.
127 */
128static void __binheap_update_last(struct binheap *handle)
129{
130 struct binheap_node *temp = handle->last;
131
132 /* find a "bend" in the tree. */
133 while(temp->parent && (temp == temp->parent->left)) {
134 temp = temp->parent;
135 }
136
137 /* step over to sibling if we're not at root */
138 if(temp->parent != NULL) {
139 temp = temp->parent->left;
140 }
141
142 /* now travel right as far as possible. */
143 while(temp->right != NULL) {
144 temp = temp->right;
145 }
146
147 /* take one step to the left if we're not at the bottom-most level. */
148 if(temp->left != NULL) {
149 temp = temp->left;
150 }
151
152 handle->last = temp;
153}
154
155
156/**
157 * Update the pointer to the node that will take the next inserted node.
158 * Called internally after a node has been inserted.
159 */
160static void __binheap_update_next(struct binheap *handle)
161{
162 struct binheap_node *temp = handle->next;
163
164 /* find a "bend" in the tree. */
165 while(temp->parent && (temp == temp->parent->right)) {
166 temp = temp->parent;
167 }
168
169 /* step over to sibling if we're not at root */
170 if(temp->parent != NULL) {
171 temp = temp->parent->right;
172 }
173
174 /* now travel left as far as possible. */
175 while(temp->left != NULL) {
176 temp = temp->left;
177 }
178
179 handle->next = temp;
180}
181
182
183
184/* bubble node up towards root */
185static void __binheap_bubble_up(struct binheap *handle,
186 struct binheap_node *node)
187{
188 /* let BINHEAP_POISON data bubble to the top */
189
190 while((node->parent != NULL) &&
191 ((node->data == BINHEAP_POISON) ||
192 handle->compare(node, node->parent))) {
193 __binheap_swap(node->parent, node);
194 node = node->parent;
195 }
196}
197
198
199/* bubble node down, swapping with min-child */
200static void __binheap_bubble_down(struct binheap *handle)
201{
202 struct binheap_node *node = handle->root;
203
204 while(node->left != NULL) {
205 if(node->right && handle->compare(node->right, node->left)) {
206 if(handle->compare(node->right, node)) {
207 __binheap_swap(node, node->right);
208 node = node->right;
209 }
210 else {
211 break;
212 }
213 }
214 else {
215 if(handle->compare(node->left, node)) {
216 __binheap_swap(node, node->left);
217 node = node->left;
218 }
219 else {
220 break;
221 }
222 }
223 }
224}
225
226
227void __binheap_add(struct binheap_node *new_node,
228 struct binheap *handle,
229 void *data)
230{
231 new_node->data = data;
232 new_node->ref = new_node;
233 new_node->ref_ptr = &(new_node->ref);
234
235 if(!binheap_empty(handle)) {
236 /* insert left side first */
237 if(handle->next->left == NULL) {
238 handle->next->left = new_node;
239 new_node->parent = handle->next;
240 new_node->left = NULL;
241 new_node->right = NULL;
242
243 handle->last = new_node;
244
245 __binheap_bubble_up(handle, new_node);
246 }
247 else {
248 /* left occupied. insert right. */
249 handle->next->right = new_node;
250 new_node->parent = handle->next;
251 new_node->left = NULL;
252 new_node->right = NULL;
253
254 handle->last = new_node;
255
256 __binheap_update_next(handle);
257 __binheap_bubble_up(handle, new_node);
258 }
259 }
260 else {
261 /* first node in heap */
262
263 new_node->parent = NULL;
264 new_node->left = NULL;
265 new_node->right = NULL;
266
267 handle->root = new_node;
268 handle->next = new_node;
269 handle->last = new_node;
270 }
271}
272
273
274/**
275 * Removes the root node from the heap. The node is removed after coalescing
276 * the binheap_node with its original data pointer at the root of the tree.
277 *
278 * The 'last' node in the tree is then swapped up to the root and bubbled
279 * down.
280 */
281void __binheap_delete_root(struct binheap *handle,
282 struct binheap_node *container)
283{
284 struct binheap_node *root = handle->root;
285
286 if(root != container) {
287 /* coalesce */
288 __binheap_swap_safe(handle, root, container);
289 root = container;
290 }
291
292 if(handle->last != root) {
293 /* swap 'last' node up to root and bubble it down. */
294
295 struct binheap_node *to_move = handle->last;
296
297 if(to_move->parent != root) {
298 handle->next = to_move->parent;
299
300 if(handle->next->right == to_move) {
301 /* disconnect from parent */
302 to_move->parent->right = NULL;
303 handle->last = handle->next->left;
304 }
305 else {
306 /* find new 'last' before we disconnect */
307 __binheap_update_last(handle);
308
309 /* disconnect from parent */
310 to_move->parent->left = NULL;
311 }
312 }
313 else {
314 /* 'last' is direct child of root */
315
316 handle->next = to_move;
317
318 if(to_move == to_move->parent->right) {
319 to_move->parent->right = NULL;
320 handle->last = to_move->parent->left;
321 }
322 else {
323 to_move->parent->left = NULL;
324 handle->last = to_move;
325 }
326 }
327 to_move->parent = NULL;
328
329 /* reconnect as root. We can't just swap data ptrs since root node
330 * may be freed after this function returns.
331 */
332 to_move->left = root->left;
333 to_move->right = root->right;
334 if(to_move->left != NULL) {
335 to_move->left->parent = to_move;
336 }
337 if(to_move->right != NULL) {
338 to_move->right->parent = to_move;
339 }
340
341 handle->root = to_move;
342
343 /* bubble down */
344 __binheap_bubble_down(handle);
345 }
346 else {
347 /* removing last node in tree */
348 handle->root = NULL;
349 handle->next = NULL;
350 handle->last = NULL;
351 }
352
353 /* mark as removed */
354 container->parent = BINHEAP_POISON;
355}
356
357
358/**
359 * Delete an arbitrary node. Bubble node to delete up to the root,
360 * and then delete to root.
361 */
362void __binheap_delete(struct binheap_node *node_to_delete,
363 struct binheap *handle)
364{
365 struct binheap_node *target = node_to_delete->ref;
366 void *temp_data = target->data;
367
368 /* temporarily set data to null to allow node to bubble up to the top. */
369 target->data = BINHEAP_POISON;
370
371 __binheap_bubble_up(handle, target);
372 __binheap_delete_root(handle, node_to_delete);
373
374 node_to_delete->data = temp_data; /* restore node data pointer */
375}
376
377
378/**
379 * Bubble up a node whose pointer has decreased in value.
380 */
381void __binheap_decrease(struct binheap_node *orig_node,
382 struct binheap *handle)
383{
384 struct binheap_node *target = orig_node->ref;
385
386 __binheap_bubble_up(handle, target);
387}
diff --git a/litmus/budget.c b/litmus/budget.c
new file mode 100644
index 000000000000..d7b250ebcc26
--- /dev/null
+++ b/litmus/budget.c
@@ -0,0 +1,167 @@
1#include <linux/sched.h>
2#include <linux/percpu.h>
3#include <linux/hrtimer.h>
4#include <linux/uaccess.h>
5#include <linux/module.h>
6
7#include <litmus/litmus.h>
8#include <litmus/preempt.h>
9#include <litmus/sched_plugin.h>
10#include <litmus/np.h>
11
12#include <litmus/budget.h>
13
14struct enforcement_timer {
15 /* The enforcement timer is used to accurately police
16 * slice budgets. */
17 struct hrtimer timer;
18 int armed;
19};
20
21DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
22
23static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
24{
25 struct enforcement_timer* et = container_of(timer,
26 struct enforcement_timer,
27 timer);
28 unsigned long flags;
29
30 local_irq_save(flags);
31 TRACE("enforcement timer fired.\n");
32 et->armed = 0;
33 /* activate scheduler */
34 litmus_reschedule_local();
35 local_irq_restore(flags);
36
37 return HRTIMER_NORESTART;
38}
39
40/* assumes called with IRQs off */
41static void cancel_enforcement_timer(struct enforcement_timer* et)
42{
43 int ret;
44
45 TRACE("cancelling enforcement timer.\n");
46
47 /* Since interrupts are disabled and et->armed is only
48 * modified locally, we do not need any locks.
49 */
50
51 if (et->armed) {
52 ret = hrtimer_try_to_cancel(&et->timer);
53 /* Should never be inactive. */
54 BUG_ON(ret == 0);
55 /* Should never be running concurrently. */
56 BUG_ON(ret == -1);
57
58 et->armed = 0;
59 }
60}
61
62/* assumes called with IRQs off */
63static void arm_enforcement_timer(struct enforcement_timer* et,
64 struct task_struct* t)
65{
66 lt_t when_to_fire;
67 TRACE_TASK(t, "arming enforcement timer.\n");
68
69 WARN_ONCE(!hrtimer_is_hres_active(&et->timer),
70 KERN_ERR "WARNING: no high resolution timers available!?\n");
71
72 /* Calling this when there is no budget left for the task
73 * makes no sense, unless the task is non-preemptive. */
74 BUG_ON(budget_exhausted(t) && (!is_np(t)));
75
76 /* hrtimer_start_range_ns() cancels the timer
77 * anyway, so we don't have to check whether it is still armed */
78
79 if (likely(!is_np(t))) {
80 when_to_fire = litmus_clock() + budget_remaining(t);
81 hrtimer_start(&et->timer, ns_to_ktime(when_to_fire),
82 HRTIMER_MODE_ABS_PINNED);
83 et->armed = 1;
84 }
85}
86
87
88/* expects to be called with IRQs off */
89void update_enforcement_timer(struct task_struct* t)
90{
91 struct enforcement_timer* et = this_cpu_ptr(&budget_timer);
92
93 if (t && budget_precisely_enforced(t)) {
94 /* Make sure we call into the scheduler when this budget
95 * expires. */
96 arm_enforcement_timer(et, t);
97 } else if (et->armed) {
98 /* Make sure we don't cause unnecessary interrupts. */
99 cancel_enforcement_timer(et);
100 }
101}
102
103
104static int __init init_budget_enforcement(void)
105{
106 int cpu;
107 struct enforcement_timer* et;
108
109 for (cpu = 0; cpu < NR_CPUS; cpu++) {
110 et = &per_cpu(budget_timer, cpu);
111 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
112 et->timer.function = on_enforcement_timeout;
113 }
114 return 0;
115}
116
117void litmus_current_budget(lt_t *used_so_far, lt_t *remaining)
118{
119 struct task_struct *t = current;
120 unsigned long flags;
121 s64 delta;
122
123 local_irq_save(flags);
124
125 delta = sched_clock_cpu(smp_processor_id()) - t->se.exec_start;
126 if (delta < 0)
127 delta = 0;
128
129 TRACE_CUR("current_budget: sc:%llu start:%llu lt_t:%llu delta:%lld exec-time:%llu rem:%llu\n",
130 sched_clock_cpu(smp_processor_id()), t->se.exec_start,
131 litmus_clock(), delta,
132 tsk_rt(t)->job_params.exec_time,
133 budget_remaining(t));
134
135 if (used_so_far)
136 *used_so_far = tsk_rt(t)->job_params.exec_time + delta;
137
138 if (remaining) {
139 *remaining = budget_remaining(t);
140 if (*remaining > delta)
141 *remaining -= delta;
142 else
143 *remaining = 0;
144 }
145
146 local_irq_restore(flags);
147}
148
149asmlinkage long sys_get_current_budget(
150 lt_t __user * _expended,
151 lt_t __user *_remaining)
152{
153 lt_t expended = 0, remaining = 0;
154
155 if (is_realtime(current))
156 litmus->current_budget(&expended, &remaining);
157
158 if (_expended && put_user(expended, _expended))
159 return -EFAULT;
160
161 if (_remaining && put_user(remaining, _remaining))
162 return -EFAULT;
163
164 return 0;
165}
166
167module_init(init_budget_enforcement);
diff --git a/litmus/clustered.c b/litmus/clustered.c
new file mode 100644
index 000000000000..de2aca2a271c
--- /dev/null
+++ b/litmus/clustered.c
@@ -0,0 +1,119 @@
1#include <linux/gfp.h>
2#include <linux/cpumask.h>
3#include <linux/list.h>
4#include <linux/cacheinfo.h>
5
6#include <litmus/debug_trace.h>
7#include <litmus/clustered.h>
8
9int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, unsigned int index)
10{
11 struct cpu_cacheinfo* info = get_cpu_cacheinfo(cpu);
12 struct cacheinfo *ci;
13
14 if (!info || index >= info->num_leaves) {
15 TRACE("no shared-cache CPUs: info=%d index=%u\n",
16 info != NULL, index);
17 return 1;
18 }
19
20 if (!info->info_list) {
21 TRACE("no shared-cache CPUs: no info_list (cpu\n");
22 }
23 ci = info->info_list + index;
24
25 cpumask_copy(mask, &ci->shared_cpu_map);
26
27 TRACE("get_shared: P%u@L%u -> %d siblings\n ", cpu, index, cpumask_weight(mask));
28
29 return 0;
30}
31
32int get_cluster_size(enum cache_level level)
33{
34 cpumask_var_t mask;
35 int ok;
36 int num_cpus;
37
38 if (level == GLOBAL_CLUSTER)
39 return num_online_cpus();
40 else {
41 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
42 return -ENOMEM;
43 /* assumes CPU 0 is representative of all CPUs */
44 ok = get_shared_cpu_map(mask, 0, level);
45 /* ok == 0 means we got the map; otherwise it's an invalid cache level */
46 if (ok == 0)
47 num_cpus = cpumask_weight(mask);
48 free_cpumask_var(mask);
49
50 if (ok == 0)
51 return num_cpus;
52 else
53 return -EINVAL;
54 }
55}
56
57int assign_cpus_to_clusters(enum cache_level level,
58 struct scheduling_cluster* clusters[],
59 unsigned int num_clusters,
60 struct cluster_cpu* cpus[],
61 unsigned int num_cpus)
62{
63 cpumask_var_t mask;
64 unsigned int i, free_cluster = 0, low_cpu;
65 int err = 0;
66
67 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
68 return -ENOMEM;
69
70 /* clear cluster pointers */
71 for (i = 0; i < num_cpus; i++) {
72 cpus[i]->id = i;
73 cpus[i]->cluster = NULL;
74 }
75
76 /* initialize clusters */
77 for (i = 0; i < num_clusters; i++) {
78 clusters[i]->id = i;
79 INIT_LIST_HEAD(&clusters[i]->cpus);
80 }
81
82 /* Assign each CPU. Two assumtions are made:
83 * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
84 * 2) All cpus that belong to some cluster are online.
85 */
86 for_each_online_cpu(i) {
87 /* get lowest-id CPU in cluster */
88 if (level != GLOBAL_CLUSTER) {
89 err = get_shared_cpu_map(mask, cpus[i]->id, level);
90 if (err != 0) {
91 /* ugh... wrong cache level? Either caller screwed up
92 * or the CPU topology is weird. */
93 printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
94 level, err);
95 err = -EINVAL;
96 goto out;
97 }
98 low_cpu = cpumask_first(mask);
99 } else
100 low_cpu = 0;
101 if (low_cpu == i) {
102 /* caller must provide an appropriate number of clusters */
103 BUG_ON(free_cluster >= num_clusters);
104
105 /* create new cluster */
106 cpus[i]->cluster = clusters[free_cluster++];
107 } else {
108 /* low_cpu points to the right cluster
109 * Assumption: low_cpu is actually online and was processed earlier. */
110 cpus[i]->cluster = cpus[low_cpu]->cluster;
111 }
112 /* enqueue in cpus list */
113 list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
114 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
115 }
116out:
117 free_cpumask_var(mask);
118 return err;
119}
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
new file mode 100644
index 000000000000..3fc769c573a5
--- /dev/null
+++ b/litmus/ctrldev.c
@@ -0,0 +1,264 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/module.h>
6#include <linux/uaccess.h>
7
8
9#include <litmus/litmus.h>
10#include <litmus/debug_trace.h>
11
12/* only one page for now, but we might want to add a RO version at some point */
13
14#define CTRL_NAME "litmus/ctrl"
15
16/* allocate t->rt_param.ctrl_page*/
17static int alloc_ctrl_page(struct task_struct *t)
18{
19 int err = 0;
20
21 /* only allocate if the task doesn't have one yet */
22 if (!tsk_rt(t)->ctrl_page) {
23 tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
24 if (!tsk_rt(t)->ctrl_page)
25 err = -ENOMEM;
26 /* will get de-allocated in task teardown */
27 TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
28 tsk_rt(t)->ctrl_page);
29 }
30 return err;
31}
32
33static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
34{
35 int err;
36
37 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
38
39 TRACE_CUR(CTRL_NAME
40 ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n",
41 tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start,
42 vma->vm_page_prot);
43
44 /* Map it into the vma. */
45 err = vm_insert_page(vma, vma->vm_start, ctrl);
46
47 if (err)
48 TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
49
50 return err;
51}
52
53static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
54{
55 TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
56 vma->vm_flags, vma->vm_page_prot);
57
58 TRACE_CUR(CTRL_NAME
59 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
60 (void*) vma->vm_start, (void*) vma->vm_end, vma,
61 vma->vm_private_data);
62}
63
64static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
65 struct vm_fault* vmf)
66{
67 TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__,
68 vma->vm_flags, vmf->pgoff);
69
70 /* This function should never be called, since all pages should have
71 * been mapped by mmap() already. */
72 WARN_ONCE(1, "Page faults should be impossible in the control page\n");
73
74 return VM_FAULT_SIGBUS;
75}
76
77static struct vm_operations_struct litmus_ctrl_vm_ops = {
78 .close = litmus_ctrl_vm_close,
79 .fault = litmus_ctrl_vm_fault,
80};
81
82static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
83{
84 int err = 0;
85
86 /* first make sure mapper knows what he's doing */
87
88 /* you can only get one page */
89 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
90 return -EINVAL;
91
92 /* you can only map the "first" page */
93 if (vma->vm_pgoff != 0)
94 return -EINVAL;
95
96 /* you can't share it with anyone */
97 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
98 return -EINVAL;
99
100 vma->vm_ops = &litmus_ctrl_vm_ops;
101 /* This mapping should not be kept across forks,
102 * cannot be expanded, and is not a "normal" page. */
103 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_READ | VM_WRITE;
104
105 /* We don't want the first write access to trigger a "minor" page fault
106 * to mark the page as dirty. This is transient, private memory, we
107 * don't care if it was touched or not. PAGE_SHARED means RW access, but
108 * not execute, and avoids copy-on-write behavior.
109 * See protection_map in mmap.c. */
110 vma->vm_page_prot = PAGE_SHARED;
111
112 err = alloc_ctrl_page(current);
113 if (!err)
114 err = map_ctrl_page(current, vma);
115
116 TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
117 __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
118
119 return err;
120}
121
122/* LITMUS^RT system calls */
123
124asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param);
125asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param);
126asmlinkage long sys_reservation_create(int type, void __user *config);
127asmlinkage long sys_get_current_budget(lt_t __user * _expended, lt_t __user *_remaining);
128asmlinkage long sys_null_call(cycles_t __user *ts);
129asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config);
130asmlinkage long sys_od_close(int od);
131asmlinkage long sys_complete_job(void);
132asmlinkage long sys_litmus_lock(int lock_od);
133asmlinkage long sys_litmus_unlock(int lock_od);
134asmlinkage long sys_wait_for_job_release(unsigned int job);
135asmlinkage long sys_wait_for_ts_release(void);
136asmlinkage long sys_release_ts(lt_t __user *__delay);
137
138static long litmus_ctrl_ioctl(struct file *filp,
139 unsigned int cmd, unsigned long arg)
140{
141 long err = -ENOIOCTLCMD;
142
143 /* LITMUS^RT syscall emulation: we expose LITMUS^RT-specific operations
144 * via ioctl() to avoid merge conflicts with the syscall tables when
145 * rebasing LITMUS^RT. Whi this is not the most elegant way to expose
146 * syscall-like functionality, it helps with reducing the effort
147 * required to maintain LITMUS^RT out of tree.
148 */
149
150 union litmus_syscall_args syscall_args;
151
152 switch (cmd) {
153 case LRT_set_rt_task_param:
154 case LRT_get_rt_task_param:
155 case LRT_reservation_create:
156 case LRT_get_current_budget:
157 case LRT_od_open:
158 /* multiple arguments => need to get args via pointer */
159 /* get syscall parameters */
160 if (copy_from_user(&syscall_args, (void*) arg,
161 sizeof(syscall_args))) {
162 return -EFAULT;
163 }
164
165 switch (cmd) {
166 case LRT_set_rt_task_param:
167 return sys_set_rt_task_param(
168 syscall_args.get_set_task_param.pid,
169 syscall_args.get_set_task_param.param);
170 case LRT_get_rt_task_param:
171 return sys_get_rt_task_param(
172 syscall_args.get_set_task_param.pid,
173 syscall_args.get_set_task_param.param);
174 case LRT_reservation_create:
175 return sys_reservation_create(
176 syscall_args.reservation_create.type,
177 syscall_args.reservation_create.config);
178 case LRT_get_current_budget:
179 return sys_get_current_budget(
180 syscall_args.get_current_budget.expended,
181 syscall_args.get_current_budget.remaining);
182 case LRT_od_open:
183 return sys_od_open(
184 syscall_args.od_open.fd,
185 syscall_args.od_open.obj_type,
186 syscall_args.od_open.obj_id,
187 syscall_args.od_open.config);
188 }
189
190
191 case LRT_null_call:
192 return sys_null_call((cycles_t __user *) arg);
193
194 case LRT_od_close:
195 return sys_od_close(arg);
196
197 case LRT_complete_job:
198 return sys_complete_job();
199
200 case LRT_litmus_lock:
201 return sys_litmus_lock(arg);
202
203 case LRT_litmus_unlock:
204 return sys_litmus_unlock(arg);
205
206 case LRT_wait_for_job_release:
207 return sys_wait_for_job_release(arg);
208
209 case LRT_wait_for_ts_release:
210 return sys_wait_for_ts_release();
211
212 case LRT_release_ts:
213 return sys_release_ts((lt_t __user *) arg);
214
215 default:
216 printk(KERN_DEBUG "ctrldev: strange ioctl (%u, %lu)\n", cmd, arg);
217 };
218
219 return err;
220}
221
222static struct file_operations litmus_ctrl_fops = {
223 .owner = THIS_MODULE,
224 .mmap = litmus_ctrl_mmap,
225 .unlocked_ioctl = litmus_ctrl_ioctl,
226};
227
228static struct miscdevice litmus_ctrl_dev = {
229 .name = CTRL_NAME,
230 .minor = MISC_DYNAMIC_MINOR,
231 .fops = &litmus_ctrl_fops,
232};
233
234static int __init init_litmus_ctrl_dev(void)
235{
236 int err;
237
238 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
239
240 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
241
242 BUILD_BUG_ON(offsetof(struct control_page, sched.raw)
243 != LITMUS_CP_OFFSET_SCHED);
244 BUILD_BUG_ON(offsetof(struct control_page, irq_count)
245 != LITMUS_CP_OFFSET_IRQ_COUNT);
246 BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start)
247 != LITMUS_CP_OFFSET_TS_SC_START);
248 BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start)
249 != LITMUS_CP_OFFSET_IRQ_SC_START);
250
251 printk("Initializing LITMUS^RT control device.\n");
252 err = misc_register(&litmus_ctrl_dev);
253 if (err)
254 printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
255 return err;
256}
257
258static void __exit exit_litmus_ctrl_dev(void)
259{
260 misc_deregister(&litmus_ctrl_dev);
261}
262
263module_init(init_litmus_ctrl_dev);
264module_exit(exit_litmus_ctrl_dev);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 000000000000..1cd5ec711d28
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,201 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14#include <litmus/debug_trace.h>
15
16#include <litmus/edf_common.h>
17
18#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
19#include <litmus/fpmath.h>
20#endif
21
22#ifdef CONFIG_EDF_TIE_BREAK_HASH
23#include <linux/hash.h>
24static inline long edf_hash(struct task_struct *t)
25{
26 /* pid is 32 bits, so normally we would shove that into the
27 * upper 32-bits and and put the job number in the bottom
28 * and hash the 64-bit number with hash_64(). Sadly,
29 * in testing, hash_64() doesn't distribute keys were the
30 * upper bits are close together (as would be the case with
31 * pids) and job numbers are equal (as would be the case with
32 * synchronous task sets with all relative deadlines equal).
33 *
34 * A 2006 Linux patch proposed the following solution
35 * (but for some reason it wasn't accepted...).
36 *
37 * At least this workaround works for 32-bit systems as well.
38 */
39 return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
40}
41#endif
42
43
44/* edf_higher_prio - returns true if first has a higher EDF priority
45 * than second. Deadline ties are broken by PID.
46 *
47 * both first and second may be NULL
48 */
49int edf_higher_prio(struct task_struct* first,
50 struct task_struct* second)
51{
52 struct task_struct *first_task = first;
53 struct task_struct *second_task = second;
54
55 /* There is no point in comparing a task to itself. */
56 if (first && first == second) {
57 TRACE_TASK(first,
58 "WARNING: pointless edf priority comparison.\n");
59 return 0;
60 }
61
62
63 /* check for NULL tasks */
64 if (!first || !second)
65 return first && !second;
66
67#ifdef CONFIG_LITMUS_LOCKING
68
69 /* Check for inherited priorities. Change task
70 * used for comparison in such a case.
71 */
72 if (unlikely(first->rt_param.inh_task))
73 first_task = first->rt_param.inh_task;
74 if (unlikely(second->rt_param.inh_task))
75 second_task = second->rt_param.inh_task;
76
77 /* Check for priority boosting. Tie-break by start of boosting.
78 */
79 if (unlikely(is_priority_boosted(first_task))) {
80 /* first_task is boosted, how about second_task? */
81 if (!is_priority_boosted(second_task) ||
82 lt_before(get_boost_start(first_task),
83 get_boost_start(second_task)))
84 return 1;
85 else
86 return 0;
87 } else if (unlikely(is_priority_boosted(second_task)))
88 /* second_task is boosted, first is not*/
89 return 0;
90
91#endif
92
93 if (earlier_deadline(first_task, second_task)) {
94 return 1;
95 }
96 else if (get_deadline(first_task) == get_deadline(second_task)) {
97 /* Need to tie break. All methods must set pid_break to 0/1 if
98 * first_task does not have priority over second_task.
99 */
100 int pid_break;
101
102
103#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
104 /* Tie break by lateness. Jobs with greater lateness get
105 * priority. This should spread tardiness across all tasks,
106 * especially in task sets where all tasks have the same
107 * period and relative deadlines.
108 */
109 if (get_lateness(first_task) > get_lateness(second_task)) {
110 return 1;
111 }
112 pid_break = (get_lateness(first_task) == get_lateness(second_task));
113
114
115#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
116 /* Tie break by lateness, normalized by relative deadline. Jobs with
117 * greater normalized lateness get priority.
118 *
119 * Note: Considered using the algebraically equivalent
120 * lateness(first)*relative_deadline(second) >
121 lateness(second)*relative_deadline(first)
122 * to avoid fixed-point math, but values are prone to overflow if inputs
123 * are on the order of several seconds, even in 64-bit.
124 */
125 fp_t fnorm = _frac(get_lateness(first_task),
126 get_rt_relative_deadline(first_task));
127 fp_t snorm = _frac(get_lateness(second_task),
128 get_rt_relative_deadline(second_task));
129 if (_gt(fnorm, snorm)) {
130 return 1;
131 }
132 pid_break = _eq(fnorm, snorm);
133
134
135#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
136 /* Tie break by comparing hashs of (pid, job#) tuple. There should be
137 * a 50% chance that first_task has a higher priority than second_task.
138 */
139 long fhash = edf_hash(first_task);
140 long shash = edf_hash(second_task);
141 if (fhash < shash) {
142 return 1;
143 }
144 pid_break = (fhash == shash);
145#else
146
147
148 /* CONFIG_EDF_PID_TIE_BREAK */
149 pid_break = 1; // fall through to tie-break by pid;
150#endif
151
152 /* Tie break by pid */
153 if(pid_break) {
154 if (first_task->pid < second_task->pid) {
155 return 1;
156 }
157 else if (first_task->pid == second_task->pid) {
158 /* If the PIDs are the same then the task with the
159 * inherited priority wins.
160 */
161 if (!second->rt_param.inh_task) {
162 return 1;
163 }
164 }
165 }
166 }
167 return 0; /* fall-through. prio(second_task) > prio(first_task) */
168}
169
170int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
171{
172 return edf_higher_prio(bheap2task(a), bheap2task(b));
173}
174
175void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
176 release_jobs_t release)
177{
178 rt_domain_init(rt, edf_ready_order, resched, release);
179}
180
181/* need_to_preempt - check whether the task t needs to be preempted
182 * call only with irqs disabled and with ready_lock acquired
183 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
184 */
185int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
186{
187 /* we need the read lock for edf_ready_queue */
188 /* no need to preempt if there is nothing pending */
189 if (!__jobs_pending(rt))
190 return 0;
191 /* we need to reschedule if t doesn't exist */
192 if (!t)
193 return 1;
194
195 /* NOTE: We cannot check for non-preemptibility since we
196 * don't know what address space we're currently in.
197 */
198
199 /* make sure to get non-rt stuff out of the way */
200 return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
201}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 000000000000..0ff54e41839c
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,308 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21extern struct fdso_ops generic_lock_ops;
22
23static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */
26 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* PCP_SEM */
30 &generic_lock_ops, /* DFLP_SEM */
31};
32
33static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
34{
35 BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
36
37 if (fdso_ops[type]->create)
38 return fdso_ops[type]->create(obj_ref, type, config);
39 else
40 return -EINVAL;
41}
42
43static void fdso_destroy(obj_type_t type, void* obj)
44{
45 fdso_ops[type]->destroy(type, obj);
46}
47
48static int fdso_open(struct od_table_entry* entry, void* __user config)
49{
50 if (fdso_ops[entry->obj->type]->open)
51 return fdso_ops[entry->obj->type]->open(entry, config);
52 else
53 return 0;
54}
55
56static int fdso_close(struct od_table_entry* entry)
57{
58 if (fdso_ops[entry->obj->type]->close)
59 return fdso_ops[entry->obj->type]->close(entry);
60 else
61 return 0;
62}
63
64/* inode must be locked already */
65static int alloc_inode_obj(struct inode_obj_id** obj_ref,
66 struct inode* inode,
67 obj_type_t type,
68 unsigned int id,
69 void* __user config)
70{
71 struct inode_obj_id* obj;
72 void* raw_obj;
73 int err;
74
75 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
76 if (!obj) {
77 return -ENOMEM;
78 }
79
80 err = fdso_create(&raw_obj, type, config);
81 if (err != 0) {
82 kfree(obj);
83 return err;
84 }
85
86 INIT_LIST_HEAD(&obj->list);
87 atomic_set(&obj->count, 1);
88 obj->type = type;
89 obj->id = id;
90 obj->obj = raw_obj;
91 obj->inode = inode;
92
93 list_add(&obj->list, &inode->i_obj_list);
94 atomic_inc(&inode->i_count);
95
96 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
97
98 *obj_ref = obj;
99 return 0;
100}
101
102/* inode must be locked already */
103static struct inode_obj_id* get_inode_obj(struct inode* inode,
104 obj_type_t type,
105 unsigned int id)
106{
107 struct list_head* pos;
108 struct inode_obj_id* obj = NULL;
109
110 list_for_each(pos, &inode->i_obj_list) {
111 obj = list_entry(pos, struct inode_obj_id, list);
112 if (obj->id == id && obj->type == type) {
113 atomic_inc(&obj->count);
114 return obj;
115 }
116 }
117 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
118 return NULL;
119}
120
121
122static void put_inode_obj(struct inode_obj_id* obj)
123{
124 struct inode* inode;
125 int let_go = 0;
126
127 inode = obj->inode;
128 if (atomic_dec_and_test(&obj->count)) {
129
130 mutex_lock(&inode->i_obj_mutex);
131 /* no new references can be obtained */
132 if (!atomic_read(&obj->count)) {
133 list_del(&obj->list);
134 fdso_destroy(obj->type, obj->obj);
135 kfree(obj);
136 let_go = 1;
137 }
138 mutex_unlock(&inode->i_obj_mutex);
139 if (let_go)
140 iput(inode);
141 }
142}
143
144static struct od_table_entry* get_od_entry(struct task_struct* t)
145{
146 struct od_table_entry* table;
147 int i;
148
149
150 table = t->od_table;
151 if (!table) {
152 table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
153 GFP_KERNEL);
154 t->od_table = table;
155 }
156
157 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
158 if (!table[i].used) {
159 table[i].used = 1;
160 return table + i;
161 }
162 return NULL;
163}
164
165static int put_od_entry(struct od_table_entry* od)
166{
167 put_inode_obj(od->obj);
168 od->used = 0;
169 return 0;
170}
171
172static long close_od_entry(struct od_table_entry *od)
173{
174 long ret;
175
176 /* Give the class a chance to reject the close. */
177 ret = fdso_close(od);
178 if (ret == 0)
179 ret = put_od_entry(od);
180
181 return ret;
182}
183
184void exit_od_table(struct task_struct* t)
185{
186 int i;
187
188 if (t->od_table) {
189 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
190 if (t->od_table[i].used)
191 close_od_entry(t->od_table + i);
192 kfree(t->od_table);
193 t->od_table = NULL;
194 }
195}
196
197static int do_sys_od_open(struct file* file, obj_type_t type, int id,
198 void* __user config)
199{
200 int idx = 0, err = 0;
201 struct inode* inode;
202 struct inode_obj_id* obj = NULL;
203 struct od_table_entry* entry;
204
205 inode = file_inode(file);
206
207 entry = get_od_entry(current);
208 if (!entry)
209 return -ENOMEM;
210
211 mutex_lock(&inode->i_obj_mutex);
212 obj = get_inode_obj(inode, type, id);
213 if (!obj)
214 err = alloc_inode_obj(&obj, inode, type, id, config);
215 if (err != 0) {
216 obj = NULL;
217 idx = err;
218 entry->used = 0;
219 } else {
220 entry->obj = obj;
221 entry->class = fdso_ops[type];
222 idx = entry - current->od_table;
223 }
224
225 mutex_unlock(&inode->i_obj_mutex);
226
227 /* open only if creation succeeded */
228 if (!err)
229 err = fdso_open(entry, config);
230 if (err < 0) {
231 /* The class rejected the open call.
232 * We need to clean up and tell user space.
233 */
234 if (obj)
235 put_od_entry(entry);
236 idx = err;
237 }
238
239 return idx;
240}
241
242
243struct od_table_entry* get_entry_for_od(int od)
244{
245 struct task_struct *t = current;
246
247 if (!t->od_table)
248 return NULL;
249 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
250 return NULL;
251 if (!t->od_table[od].used)
252 return NULL;
253 return t->od_table + od;
254}
255
256
257asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
258{
259 int ret = 0;
260 struct file* file;
261
262 /*
263 1) get file from fd, get inode from file
264 2) lock inode
265 3) try to lookup object
266 4) if not present create and enqueue object, inc inode refcnt
267 5) increment refcnt of object
268 6) alloc od_table_entry, setup ptrs
269 7) unlock inode
270 8) return offset in od_table as OD
271 */
272
273 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
274 ret = -EINVAL;
275 goto out;
276 }
277
278 file = fget(fd);
279 if (!file) {
280 ret = -EBADF;
281 goto out;
282 }
283
284 ret = do_sys_od_open(file, type, obj_id, config);
285
286 fput(file);
287
288out:
289 return ret;
290}
291
292
293asmlinkage long sys_od_close(int od)
294{
295 int ret = -EINVAL;
296 struct task_struct *t = current;
297
298 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
299 return ret;
300
301 if (!t->od_table || !t->od_table[od].used)
302 return ret;
303
304
305 ret = close_od_entry(t->od_table + od);
306
307 return ret;
308}
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 000000000000..242542a510d3
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,130 @@
1/*
2 * litmus/fp_common.c
3 *
4 * Common functions for fixed-priority scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14#include <litmus/debug_trace.h>
15
16#include <litmus/fp_common.h>
17
18/* fp_higher_prio - returns true if first has a higher static priority
19 * than second. Ties are broken by PID.
20 *
21 * both first and second may be NULL
22 */
23int fp_higher_prio(struct task_struct* first,
24 struct task_struct* second)
25{
26 struct task_struct *first_task = first;
27 struct task_struct *second_task = second;
28
29 /* There is no point in comparing a task to itself. */
30 if (unlikely(first && first == second)) {
31 TRACE_TASK(first,
32 "WARNING: pointless FP priority comparison.\n");
33 return 0;
34 }
35
36
37 /* check for NULL tasks */
38 if (!first || !second)
39 return first && !second;
40
41 if (!is_realtime(second_task))
42 return 1;
43
44#ifdef CONFIG_LITMUS_LOCKING
45
46 /* Check for inherited priorities. Change task
47 * used for comparison in such a case.
48 */
49 if (unlikely(first->rt_param.inh_task))
50 first_task = first->rt_param.inh_task;
51 if (unlikely(second->rt_param.inh_task))
52 second_task = second->rt_param.inh_task;
53
54 /* Check for priority boosting. Tie-break by start of boosting.
55 */
56 if (unlikely(is_priority_boosted(first_task))) {
57 /* first_task is boosted, how about second_task? */
58 if (is_priority_boosted(second_task))
59 /* break by priority point */
60 return lt_before(get_boost_start(first_task),
61 get_boost_start(second_task));
62 else
63 /* priority boosting wins. */
64 return 1;
65 } else if (unlikely(is_priority_boosted(second_task)))
66 /* second_task is boosted, first is not*/
67 return 0;
68
69#endif
70
71 /* Comparisons to itself are not expected; priority inheritance
72 * should also not cause this to happen. */
73 BUG_ON(first_task == second_task);
74
75 if (get_priority(first_task) < get_priority(second_task))
76 return 1;
77 else if (get_priority(first_task) == get_priority(second_task))
78 /* Break by PID. */
79 return first_task->pid < second_task->pid;
80 else
81 return 0;
82}
83
84int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
85{
86 return fp_higher_prio(bheap2task(a), bheap2task(b));
87}
88
89void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
90 release_jobs_t release)
91{
92 rt_domain_init(rt, fp_ready_order, resched, release);
93}
94
95/* need_to_preempt - check whether the task t needs to be preempted
96 */
97int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
98{
99 struct task_struct *pending;
100
101 pending = fp_prio_peek(q);
102
103 if (!pending)
104 return 0;
105 if (!t)
106 return 1;
107
108 /* make sure to get non-rt stuff out of the way */
109 return !is_realtime(t) || fp_higher_prio(pending, t);
110}
111
112void fp_prio_queue_init(struct fp_prio_queue* q)
113{
114 int i;
115
116 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
117 q->bitmask[i] = 0;
118 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
119 bheap_init(&q->queue[i]);
120}
121
122void fp_ready_list_init(struct fp_ready_list* q)
123{
124 int i;
125
126 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
127 q->bitmask[i] = 0;
128 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
129 INIT_LIST_HEAD(q->queue + i);
130}
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
index 9d539251f4cb..095301a23450 100644
--- a/litmus/ftdev.c
+++ b/litmus/ftdev.c
@@ -216,6 +216,17 @@ static ssize_t ftdev_read(struct file *filp,
216 * here with copied data because that data would get 216 * here with copied data because that data would get
217 * lost if the task is interrupted (e.g., killed). 217 * lost if the task is interrupted (e.g., killed).
218 */ 218 */
219
220 /* Before sleeping, check wether a non-blocking
221 * read was requested.
222 */
223 if (filp->f_flags & O_NONBLOCK)
224 {
225 /* bug out, userspace doesn't want us to sleep */
226 err = -EWOULDBLOCK;
227 break;
228 }
229
219 mutex_unlock(&ftdm->lock); 230 mutex_unlock(&ftdm->lock);
220 set_current_state(TASK_INTERRUPTIBLE); 231 set_current_state(TASK_INTERRUPTIBLE);
221 232
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 000000000000..1c700f9acd24
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,163 @@
1/* litmus/jobs.c - common job control code
2 */
3
4#include <linux/sched.h>
5
6#include <litmus/preempt.h>
7#include <litmus/litmus.h>
8#include <litmus/sched_plugin.h>
9#include <litmus/sched_trace.h>
10#include <litmus/jobs.h>
11
12static inline void setup_release(struct task_struct *t, lt_t release)
13{
14 /* prepare next release */
15 t->rt_param.job_params.release = release;
16 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
17 t->rt_param.job_params.exec_time = 0;
18
19 /* update job sequence number */
20 t->rt_param.job_params.job_no++;
21
22 /* expose to user space */
23 if (has_control_page(t)) {
24 struct control_page* cp = get_control_page(t);
25 cp->deadline = t->rt_param.job_params.deadline;
26 cp->release = get_release(t);
27 cp->job_index = t->rt_param.job_params.job_no;
28 }
29}
30
31void prepare_for_next_period(struct task_struct *t)
32{
33 BUG_ON(!t);
34
35 /* Record lateness before we set up the next job's
36 * release and deadline. Lateness may be negative.
37 */
38 t->rt_param.job_params.lateness =
39 (long long)litmus_clock() -
40 (long long)t->rt_param.job_params.deadline;
41
42 if (tsk_rt(t)->sporadic_release) {
43 TRACE_TASK(t, "sporadic release at %llu\n",
44 tsk_rt(t)->sporadic_release_time);
45 /* sporadic release */
46 setup_release(t, tsk_rt(t)->sporadic_release_time);
47 tsk_rt(t)->sporadic_release = 0;
48 } else {
49 /* periodic release => add period */
50 setup_release(t, get_release(t) + get_rt_period(t));
51 }
52}
53
54void release_at(struct task_struct *t, lt_t start)
55{
56 BUG_ON(!t);
57 setup_release(t, start);
58 tsk_rt(t)->completed = 0;
59}
60
61void inferred_sporadic_job_release_at(struct task_struct *t, lt_t when)
62{
63 /* new sporadic release */
64 sched_trace_last_suspension_as_completion(t);
65 /* check if this task is resuming from a clock_nanosleep() call */
66 if (tsk_rt(t)->doing_abs_nanosleep &&
67 lt_after_eq(tsk_rt(t)->nanosleep_wakeup,
68 get_release(t) + get_rt_period(t))) {
69 /* clock_nanosleep() is supposed to wake up the task
70 * at a time that is a valid release time. Use that time
71 * rather than guessing the intended release time from the
72 * current time. */
73 TRACE_TASK(t, "nanosleep: backdating release "
74 "to %llu instead of %llu\n",
75 tsk_rt(t)->nanosleep_wakeup, when);
76 when = tsk_rt(t)->nanosleep_wakeup;
77 }
78 release_at(t, when);
79 sched_trace_task_release(t);
80}
81
82long default_wait_for_release_at(lt_t release_time)
83{
84 struct task_struct *t = current;
85 unsigned long flags;
86
87 local_irq_save(flags);
88 tsk_rt(t)->sporadic_release_time = release_time;
89 smp_wmb();
90 tsk_rt(t)->sporadic_release = 1;
91 local_irq_restore(flags);
92
93 return litmus->complete_job();
94}
95
96
97/*
98 * Deactivate current task until the beginning of the next period.
99 */
100long complete_job(void)
101{
102 preempt_disable();
103 TRACE_CUR("job completion indicated at %llu\n", litmus_clock());
104 /* Mark that we do not excute anymore */
105 tsk_rt(current)->completed = 1;
106 /* call schedule, this will return when a new job arrives
107 * it also takes care of preparing for the next release
108 */
109 litmus_reschedule_local();
110 preempt_enable();
111 return 0;
112}
113
114static long sleep_until_next_release(void);
115
116/* alternative job completion implementation that suspends the task */
117long complete_job_oneshot(void)
118{
119 struct task_struct *t = current;
120
121 preempt_disable();
122
123 TRACE_CUR("job completes at %llu (deadline: %llu)\n", litmus_clock(),
124 get_deadline(t));
125
126 sched_trace_task_completion(t, 0);
127 prepare_for_next_period(t);
128 sched_trace_task_release(t);
129
130 return sleep_until_next_release();
131}
132
133/* assumes caller has disabled preemptions;
134 * re-enables preemptions before returning */
135static long sleep_until_next_release(void)
136{
137 struct task_struct *t = current;
138 ktime_t next_release;
139 long err;
140
141 next_release = ns_to_ktime(get_release(t));
142
143 TRACE_CUR("next_release=%llu\n", get_release(t));
144
145 if (lt_after(get_release(t), litmus_clock())) {
146 set_current_state(TASK_INTERRUPTIBLE);
147 tsk_rt(t)->completed = 1;
148 preempt_enable_no_resched();
149 err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
150 /* If we get woken by a signal, we return early.
151 * This is intentional; we want to be able to kill tasks
152 * that are waiting for the next job release.
153 */
154 tsk_rt(t)->completed = 0;
155 } else {
156 err = 0;
157 TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(t), litmus_clock());
158 preempt_enable();
159 }
160
161 TRACE_CUR("return to next job at %llu\n", litmus_clock());
162 return err;
163}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..bd192180fef7
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,773 @@
1/*
2 * litmus.c -- Implementation of the LITMUS syscalls,
3 * the LITMUS intialization code,
4 * and the procfs interface..
5 */
6#include <asm/uaccess.h>
7#include <linux/uaccess.h>
8#include <linux/sysrq.h>
9#include <linux/sched.h>
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/reboot.h>
13#include <linux/stop_machine.h>
14#include <linux/sched/rt.h>
15#include <linux/rwsem.h>
16#include <linux/interrupt.h>
17
18#include <litmus/debug_trace.h>
19#include <litmus/litmus.h>
20#include <litmus/bheap.h>
21#include <litmus/trace.h>
22#include <litmus/rt_domain.h>
23#include <litmus/litmus_proc.h>
24#include <litmus/sched_trace.h>
25
26#ifdef CONFIG_SCHED_CPU_AFFINITY
27#include <litmus/affinity.h>
28#endif
29
30#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
31#define CREATE_TRACE_POINTS
32#include <trace/events/litmus.h>
33#endif
34
35/* Number of RT tasks that exist in the system */
36atomic_t rt_task_count = ATOMIC_INIT(0);
37
38#ifdef CONFIG_RELEASE_MASTER
39/* current master CPU for handling timer IRQs */
40atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
41#endif
42
43static struct kmem_cache * bheap_node_cache;
44extern struct kmem_cache * release_heap_cache;
45
46struct bheap_node* bheap_node_alloc(int gfp_flags)
47{
48 return kmem_cache_alloc(bheap_node_cache, gfp_flags);
49}
50
51void bheap_node_free(struct bheap_node* hn)
52{
53 kmem_cache_free(bheap_node_cache, hn);
54}
55
56struct release_heap* release_heap_alloc(int gfp_flags);
57void release_heap_free(struct release_heap* rh);
58
59/**
60 * Get the quantum alignment as a cmdline option.
61 * Default is aligned quanta..
62 */
63static bool aligned_quanta = 1;
64module_param(aligned_quanta, bool, 0644);
65
66u64 cpu_stagger_offset(int cpu)
67{
68 u64 offset = 0;
69
70 if (!aligned_quanta) {
71 offset = LITMUS_QUANTUM_LENGTH_NS;
72 do_div(offset, num_possible_cpus());
73 offset *= cpu;
74 }
75 return offset;
76}
77
78/*
79 * sys_set_task_rt_param
80 * @pid: Pid of the task which scheduling parameters must be changed
81 * @param: New real-time extension parameters such as the execution cost and
82 * period
83 * Syscall for manipulating with task rt extension params
84 * Returns EFAULT if param is NULL.
85 * ESRCH if pid is not corrsponding
86 * to a valid task.
87 * EINVAL if either period or execution cost is <=0
88 * EPERM if pid is a real-time task
89 * 0 if success
90 *
91 * Only non-real-time tasks may be configured with this system call
92 * to avoid races with the scheduler. In practice, this means that a
93 * task's parameters must be set _before_ calling sys_prepare_rt_task()
94 *
95 * find_task_by_vpid() assumes that we are in the same namespace of the
96 * target.
97 */
98asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
99{
100 struct rt_task tp;
101 struct task_struct *target;
102 int retval = -EINVAL;
103
104 printk("Setting up rt task parameters for process %d.\n", pid);
105
106 if (pid < 0 || param == 0) {
107 goto out;
108 }
109 if (copy_from_user(&tp, param, sizeof(tp))) {
110 retval = -EFAULT;
111 goto out;
112 }
113
114 /* Task search and manipulation must be protected */
115 read_lock_irq(&tasklist_lock);
116 rcu_read_lock();
117 if (!(target = find_task_by_vpid(pid))) {
118 retval = -ESRCH;
119 rcu_read_unlock();
120 goto out_unlock;
121 }
122 rcu_read_unlock();
123
124 /* set relative deadline to be implicit if left unspecified */
125 if (tp.relative_deadline == 0)
126 tp.relative_deadline = tp.period;
127
128 if (tp.exec_cost <= 0)
129 goto out_unlock;
130 if (tp.period <= 0)
131 goto out_unlock;
132 if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
133 {
134 printk(KERN_INFO "litmus: real-time task %d rejected "
135 "because task density > 1.0\n", pid);
136 goto out_unlock;
137 }
138 if (tp.cls != RT_CLASS_HARD &&
139 tp.cls != RT_CLASS_SOFT &&
140 tp.cls != RT_CLASS_BEST_EFFORT)
141 {
142 printk(KERN_INFO "litmus: real-time task %d rejected "
143 "because its class is invalid\n", pid);
144 goto out_unlock;
145 }
146 if (tp.budget_policy != NO_ENFORCEMENT &&
147 tp.budget_policy != QUANTUM_ENFORCEMENT &&
148 tp.budget_policy != PRECISE_ENFORCEMENT)
149 {
150 printk(KERN_INFO "litmus: real-time task %d rejected "
151 "because unsupported budget enforcement policy "
152 "specified (%d)\n",
153 pid, tp.budget_policy);
154 goto out_unlock;
155 }
156
157 if (is_realtime(target)) {
158 /* The task is already a real-time task.
159 * Let plugin decide whether it wants to support
160 * parameter changes at runtime.
161 */
162 retval = litmus->task_change_params(target, &tp);
163 } else {
164 target->rt_param.task_params = tp;
165 retval = 0;
166 }
167 out_unlock:
168 read_unlock_irq(&tasklist_lock);
169 out:
170 return retval;
171}
172
173/*
174 * Getter of task's RT params
175 * returns EINVAL if param or pid is NULL
176 * returns ESRCH if pid does not correspond to a valid task
177 * returns EFAULT if copying of parameters has failed.
178 *
179 * find_task_by_vpid() assumes that we are in the same namespace of the
180 * target.
181 */
182asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
183{
184 int retval = -EINVAL;
185 struct task_struct *source;
186 struct rt_task lp;
187
188 if (param == 0 || pid < 0)
189 goto out;
190
191 read_lock_irq(&tasklist_lock);
192 rcu_read_lock();
193 source = find_task_by_vpid(pid);
194 rcu_read_unlock();
195 if (!source) {
196 retval = -ESRCH;
197 read_unlock_irq(&tasklist_lock);
198 goto out;
199 }
200 lp = source->rt_param.task_params;
201 read_unlock_irq(&tasklist_lock);
202 /* Do copying outside the lock */
203 retval =
204 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
205 out:
206 return retval;
207
208}
209
210/*
211 * This is the crucial function for periodic task implementation,
212 * It checks if a task is periodic, checks if such kind of sleep
213 * is permitted and calls plugin-specific sleep, which puts the
214 * task into a wait array.
215 * returns 0 on successful wakeup
216 * returns EPERM if current conditions do not permit such sleep
217 * returns EINVAL if current task is not able to go to sleep
218 */
219asmlinkage long sys_complete_job(void)
220{
221 int retval = -EPERM;
222 if (!is_realtime(current)) {
223 retval = -EINVAL;
224 goto out;
225 }
226 /* Task with negative or zero period cannot sleep */
227 if (get_rt_period(current) <= 0) {
228 retval = -EINVAL;
229 goto out;
230 }
231 /* The plugin has to put the task into an
232 * appropriate queue and call schedule
233 */
234 retval = litmus->complete_job();
235 out:
236 return retval;
237}
238
239/* This is an "improved" version of sys_complete_job that
240 * addresses the problem of unintentionally missing a job after
241 * an overrun.
242 *
243 * returns 0 on successful wakeup
244 * returns EPERM if current conditions do not permit such sleep
245 * returns EINVAL if current task is not able to go to sleep
246 */
247asmlinkage long sys_wait_for_job_release(unsigned int job)
248{
249 int retval = -EPERM;
250 if (!is_realtime(current)) {
251 retval = -EINVAL;
252 goto out;
253 }
254
255 /* Task with negative or zero period cannot sleep */
256 if (get_rt_period(current) <= 0) {
257 retval = -EINVAL;
258 goto out;
259 }
260
261 retval = 0;
262
263 /* first wait until we have "reached" the desired job
264 *
265 * This implementation has at least two problems:
266 *
267 * 1) It doesn't gracefully handle the wrap around of
268 * job_no. Since LITMUS is a prototype, this is not much
269 * of a problem right now.
270 *
271 * 2) It is theoretically racy if a job release occurs
272 * between checking job_no and calling sleep_next_period().
273 * A proper solution would requiring adding another callback
274 * in the plugin structure and testing the condition with
275 * interrupts disabled.
276 *
277 * FIXME: At least problem 2 should be taken care of eventually.
278 */
279 while (!retval && job > current->rt_param.job_params.job_no)
280 /* If the last job overran then job <= job_no and we
281 * don't send the task to sleep.
282 */
283 retval = litmus->complete_job();
284 out:
285 return retval;
286}
287
288/* This is a helper syscall to query the current job sequence number.
289 *
290 * returns 0 on successful query
291 * returns EPERM if task is not a real-time task.
292 * returns EFAULT if &job is not a valid pointer.
293 */
294asmlinkage long sys_query_job_no(unsigned int __user *job)
295{
296 int retval = -EPERM;
297 if (is_realtime(current))
298 retval = put_user(current->rt_param.job_params.job_no, job);
299
300 return retval;
301}
302
303/* sys_null_call() is only used for determining raw system call
304 * overheads (kernel entry, kernel exit). It has no useful side effects.
305 * If ts is non-NULL, then the current Feather-Trace time is recorded.
306 */
307asmlinkage long sys_null_call(cycles_t __user *ts)
308{
309 long ret = 0;
310 cycles_t now;
311
312 if (ts) {
313 now = get_cycles();
314 ret = put_user(now, ts);
315 }
316
317 return ret;
318}
319
320asmlinkage long sys_reservation_create(int type, void __user *config)
321{
322 return litmus->reservation_create(type, config);
323}
324
325asmlinkage long sys_reservation_destroy(unsigned int reservation_id, int cpu)
326{
327 return litmus->reservation_destroy(reservation_id, cpu);
328}
329
330/* p is a real-time task. Re-init its state as a best-effort task. */
331static void reinit_litmus_state(struct task_struct* p, int restore)
332{
333 struct rt_task user_config = {};
334 void* ctrl_page = NULL;
335
336 if (restore) {
337 /* Safe user-space provided configuration data.
338 * and allocated page. */
339 user_config = p->rt_param.task_params;
340 ctrl_page = p->rt_param.ctrl_page;
341 }
342
343 /* We probably should not be inheriting any task's priority
344 * at this point in time.
345 */
346 WARN_ON(p->rt_param.inh_task);
347
348 /* Cleanup everything else. */
349 memset(&p->rt_param, 0, sizeof(p->rt_param));
350
351 /* Restore preserved fields. */
352 if (restore) {
353 p->rt_param.task_params = user_config;
354 p->rt_param.ctrl_page = ctrl_page;
355 }
356}
357
358static long __litmus_admit_task(struct task_struct* tsk)
359{
360 long err;
361
362 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
363
364 /* allocate heap node for this task */
365 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
366 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
367
368 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
369 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
370
371 return -ENOMEM;
372 } else {
373 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
374 }
375
376 preempt_disable();
377
378 err = litmus->admit_task(tsk);
379
380 if (!err) {
381 sched_trace_task_name(tsk);
382 sched_trace_task_param(tsk);
383 atomic_inc(&rt_task_count);
384 }
385
386 preempt_enable();
387
388 return err;
389}
390
391long litmus_admit_task(struct task_struct* tsk)
392{
393 long retval = 0;
394
395 BUG_ON(is_realtime(tsk));
396
397 tsk_rt(tsk)->heap_node = NULL;
398 tsk_rt(tsk)->rel_heap = NULL;
399
400 if (get_rt_relative_deadline(tsk) == 0 ||
401 get_exec_cost(tsk) >
402 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
403 TRACE_TASK(tsk,
404 "litmus admit: invalid task parameters "
405 "(e = %lu, p = %lu, d = %lu)\n",
406 get_exec_cost(tsk), get_rt_period(tsk),
407 get_rt_relative_deadline(tsk));
408 retval = -EINVAL;
409 goto out;
410 }
411
412 retval = __litmus_admit_task(tsk);
413
414out:
415 if (retval) {
416 if (tsk_rt(tsk)->heap_node)
417 bheap_node_free(tsk_rt(tsk)->heap_node);
418 if (tsk_rt(tsk)->rel_heap)
419 release_heap_free(tsk_rt(tsk)->rel_heap);
420 }
421 return retval;
422}
423
424void litmus_clear_state(struct task_struct* tsk)
425{
426 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
427 bheap_node_free(tsk_rt(tsk)->heap_node);
428 release_heap_free(tsk_rt(tsk)->rel_heap);
429
430 atomic_dec(&rt_task_count);
431 reinit_litmus_state(tsk, 1);
432}
433
434/* called from sched_setscheduler() */
435void litmus_exit_task(struct task_struct* tsk)
436{
437 if (is_realtime(tsk)) {
438 sched_trace_task_completion(tsk, 1);
439
440 litmus->task_exit(tsk);
441 }
442}
443
444static DECLARE_RWSEM(plugin_switch_mutex);
445
446void litmus_plugin_switch_disable(void)
447{
448 down_read(&plugin_switch_mutex);
449}
450
451void litmus_plugin_switch_enable(void)
452{
453 up_read(&plugin_switch_mutex);
454}
455
456static int __do_plugin_switch(struct sched_plugin* plugin)
457{
458 int ret;
459
460
461 /* don't switch if there are active real-time tasks */
462 if (atomic_read(&rt_task_count) == 0) {
463 TRACE("deactivating plugin %s\n", litmus->plugin_name);
464 ret = litmus->deactivate_plugin();
465 if (0 != ret)
466 goto out;
467
468 TRACE("activating plugin %s\n", plugin->plugin_name);
469 ret = plugin->activate_plugin();
470 if (0 != ret) {
471 printk(KERN_INFO "Can't activate %s (%d).\n",
472 plugin->plugin_name, ret);
473 plugin = &linux_sched_plugin;
474 }
475
476 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
477 litmus = plugin;
478 } else
479 ret = -EBUSY;
480out:
481 TRACE("do_plugin_switch() => %d\n", ret);
482 return ret;
483}
484
485static atomic_t ready_to_switch;
486
487static int do_plugin_switch(void *_plugin)
488{
489 unsigned long flags;
490 int ret = 0;
491
492 local_save_flags(flags);
493 local_irq_disable();
494 hard_irq_disable();
495
496 if (atomic_dec_and_test(&ready_to_switch))
497 {
498 ret = __do_plugin_switch((struct sched_plugin*) _plugin);
499 atomic_set(&ready_to_switch, INT_MAX);
500 }
501
502 do {
503 cpu_relax();
504 } while (atomic_read(&ready_to_switch) != INT_MAX);
505
506 local_irq_restore(flags);
507 return ret;
508}
509
510/* Switching a plugin in use is tricky.
511 * We must watch out that no real-time tasks exists
512 * (and that none is created in parallel) and that the plugin is not
513 * currently in use on any processor (in theory).
514 */
515int switch_sched_plugin(struct sched_plugin* plugin)
516{
517 int err;
518 struct domain_proc_info* domain_info;
519
520 BUG_ON(!plugin);
521
522 if (atomic_read(&rt_task_count) == 0) {
523 down_write(&plugin_switch_mutex);
524
525 deactivate_domain_proc();
526
527 get_online_cpus();
528 atomic_set(&ready_to_switch, num_online_cpus());
529 err = stop_cpus(cpu_online_mask, do_plugin_switch, plugin);
530 put_online_cpus();
531
532 if (!litmus->get_domain_proc_info(&domain_info))
533 activate_domain_proc(domain_info);
534
535 up_write(&plugin_switch_mutex);
536 return err;
537 } else
538 return -EBUSY;
539}
540
541/* Called upon fork.
542 * p is the newly forked task.
543 */
544void litmus_fork(struct task_struct* p)
545{
546 /* non-rt tasks might have ctrl_page set */
547 tsk_rt(p)->ctrl_page = NULL;
548
549 if (is_realtime(p)) {
550 reinit_litmus_state(p, 1);
551 if (litmus->fork_task(p)) {
552 if (__litmus_admit_task(p))
553 /* something went wrong, give up */
554 p->sched_reset_on_fork = 1;
555 } else {
556 /* clean out any litmus related state */
557 reinit_litmus_state(p, 0);
558
559 TRACE_TASK(p, "fork: real-time status denied\n");
560 /* Don't let the child be a real-time task. */
561 p->sched_reset_on_fork = 1;
562 }
563 }
564
565 /* od tables are never inherited across a fork */
566 p->od_table = NULL;
567}
568
569/* Called upon execve().
570 * current is doing the exec.
571 * Don't let address space specific stuff leak.
572 */
573void litmus_exec(void)
574{
575 struct task_struct* p = current;
576
577 if (is_realtime(p)) {
578 WARN_ON(p->rt_param.inh_task);
579 if (tsk_rt(p)->ctrl_page) {
580 free_page((unsigned long) tsk_rt(p)->ctrl_page);
581 tsk_rt(p)->ctrl_page = NULL;
582 }
583 }
584}
585
586/* Called when dead_tsk is being deallocated
587 */
588void exit_litmus(struct task_struct *dead_tsk)
589{
590 /* We also allow non-RT tasks to
591 * allocate control pages to allow
592 * measurements with non-RT tasks.
593 * So check if we need to free the page
594 * in any case.
595 */
596 if (tsk_rt(dead_tsk)->ctrl_page) {
597 TRACE_TASK(dead_tsk,
598 "freeing ctrl_page %p\n",
599 tsk_rt(dead_tsk)->ctrl_page);
600 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
601 }
602
603 /* Tasks should not be real-time tasks any longer at this point. */
604 BUG_ON(is_realtime(dead_tsk));
605}
606
607void litmus_do_exit(struct task_struct *exiting_tsk)
608{
609 /* This task called do_exit(), but is still a real-time task. To avoid
610 * complications later, we force it to be a non-real-time task now. */
611
612 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
613
614 TRACE_TASK(exiting_tsk, "exiting, demoted to SCHED_FIFO\n");
615 sched_setscheduler_nocheck(exiting_tsk, SCHED_FIFO, &param);
616}
617
618void litmus_dealloc(struct task_struct *tsk)
619{
620 /* tsk is no longer a real-time task */
621 TRACE_TASK(tsk, "Deallocating real-time task data\n");
622 litmus->task_cleanup(tsk);
623 litmus_clear_state(tsk);
624}
625
626/* move current non-RT task to a specific CPU */
627int litmus_be_migrate_to(int cpu)
628{
629 struct cpumask single_cpu_aff;
630
631 cpumask_clear(&single_cpu_aff);
632 cpumask_set_cpu(cpu, &single_cpu_aff);
633 return sched_setaffinity(current->pid, &single_cpu_aff);
634}
635
636#ifdef CONFIG_MAGIC_SYSRQ
637int sys_kill(int pid, int sig);
638
639static void sysrq_handle_kill_rt_tasks(int key)
640{
641 struct task_struct *t;
642 read_lock(&tasklist_lock);
643 for_each_process(t) {
644 if (is_realtime(t)) {
645 sys_kill(t->pid, SIGKILL);
646 }
647 }
648 read_unlock(&tasklist_lock);
649}
650
651static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
652 .handler = sysrq_handle_kill_rt_tasks,
653 .help_msg = "quit-rt-tasks(X)",
654 .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
655};
656#endif
657
658extern struct sched_plugin linux_sched_plugin;
659
660static int litmus_shutdown_nb(struct notifier_block *unused1,
661 unsigned long unused2, void *unused3)
662{
663 /* Attempt to switch back to regular Linux scheduling.
664 * Forces the active plugin to clean up.
665 */
666 if (litmus != &linux_sched_plugin) {
667 int ret = switch_sched_plugin(&linux_sched_plugin);
668 if (ret) {
669 printk("Auto-shutdown of active Litmus plugin failed.\n");
670 }
671 }
672 return NOTIFY_DONE;
673}
674
675static struct notifier_block shutdown_notifier = {
676 .notifier_call = litmus_shutdown_nb,
677};
678
679/**
680 * Triggering hrtimers on specific cpus as required by arm_release_timer(_on)
681 */
682#ifdef CONFIG_SMP
683
684/**
685 * hrtimer_pull - smp_call_function_single_async callback on remote cpu
686 */
687void hrtimer_pull(void *csd_info)
688{
689 struct hrtimer_start_on_info *info = csd_info;
690 TRACE("pulled timer 0x%x\n", info->timer);
691 hrtimer_start_range_ns(info->timer, info->time, 0, info->mode);
692}
693
694/**
695 * hrtimer_start_on - trigger timer arming on remote cpu
696 * @cpu: remote cpu
697 * @info: save timer information for enqueuing on remote cpu
698 * @timer: timer to be pulled
699 * @time: expire time
700 * @mode: timer mode
701 */
702void hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
703 struct hrtimer *timer, ktime_t time,
704 const enum hrtimer_mode mode)
705{
706 info->timer = timer;
707 info->time = time;
708 info->mode = mode;
709
710 /* initialize call_single_data struct */
711 info->csd.func = &hrtimer_pull;
712 info->csd.info = info;
713 info->csd.flags = 0;
714
715 /* initiate pull */
716 preempt_disable();
717 if (cpu == smp_processor_id()) {
718 /* start timer locally; we may get called
719 * with rq->lock held, do not wake up anything
720 */
721 TRACE("hrtimer_start_on: starting on local CPU\n");
722 hrtimer_start(info->timer, info->time, info->mode);
723 } else {
724 /* call hrtimer_pull() on remote cpu
725 * to start remote timer asynchronously
726 */
727 TRACE("hrtimer_start_on: pulling to remote CPU\n");
728 smp_call_function_single_async(cpu, &info->csd);
729 }
730 preempt_enable();
731}
732
733#endif /* CONFIG_SMP */
734
735static int __init _init_litmus(void)
736{
737 /* Common initializers,
738 * mode change lock is used to enforce single mode change
739 * operation.
740 */
741 printk("Starting LITMUS^RT kernel\n");
742
743 register_sched_plugin(&linux_sched_plugin);
744
745 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
746 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
747
748#ifdef CONFIG_MAGIC_SYSRQ
749 /* offer some debugging help */
750 if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
751 printk("Registered kill rt tasks magic sysrq.\n");
752 else
753 printk("Could not register kill rt tasks magic sysrq.\n");
754#endif
755
756 init_litmus_proc();
757
758 register_reboot_notifier(&shutdown_notifier);
759
760 return 0;
761}
762
763static void _exit_litmus(void)
764{
765 unregister_reboot_notifier(&shutdown_notifier);
766
767 exit_litmus_proc();
768 kmem_cache_destroy(bheap_node_cache);
769 kmem_cache_destroy(release_heap_cache);
770}
771
772module_init(_init_litmus);
773module_exit(_exit_litmus);
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
new file mode 100644
index 000000000000..de5e3f37fe88
--- /dev/null
+++ b/litmus/litmus_proc.c
@@ -0,0 +1,574 @@
1/*
2 * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
3 */
4
5#include <linux/sched.h>
6#include <linux/slab.h>
7#include <linux/uaccess.h>
8#include <linux/seq_file.h>
9
10#include <litmus/debug_trace.h>
11#include <litmus/litmus.h>
12#include <litmus/litmus_proc.h>
13
14#include <litmus/clustered.h>
15
16/* in litmus/litmus.c */
17extern atomic_t rt_task_count;
18
19static struct proc_dir_entry *litmus_dir = NULL,
20 *curr_file = NULL,
21 *stat_file = NULL,
22 *plugs_dir = NULL,
23#ifdef CONFIG_RELEASE_MASTER
24 *release_master_file = NULL,
25#endif
26 *plugs_file = NULL,
27 *domains_dir = NULL,
28 *cpus_dir = NULL;
29
30
31/* in litmus/sync.c */
32int count_tasks_waiting_for_release(void);
33
34static int litmus_stats_proc_show(struct seq_file *m, void *v)
35{
36 seq_printf(m,
37 "real-time tasks = %d\n"
38 "ready for release = %d\n",
39 atomic_read(&rt_task_count),
40 count_tasks_waiting_for_release());
41 return 0;
42}
43
44static int litmus_stats_proc_open(struct inode *inode, struct file *file)
45{
46 return single_open(file, litmus_stats_proc_show, PDE_DATA(inode));
47}
48
49static const struct file_operations litmus_stats_proc_fops = {
50 .open = litmus_stats_proc_open,
51 .read = seq_read,
52 .llseek = seq_lseek,
53 .release = single_release,
54};
55
56
57static int litmus_loaded_proc_show(struct seq_file *m, void *v)
58{
59 print_sched_plugins(m);
60 return 0;
61}
62
63static int litmus_loaded_proc_open(struct inode *inode, struct file *file)
64{
65 return single_open(file, litmus_loaded_proc_show, PDE_DATA(inode));
66}
67
68static const struct file_operations litmus_loaded_proc_fops = {
69 .open = litmus_loaded_proc_open,
70 .read = seq_read,
71 .llseek = seq_lseek,
72 .release = single_release,
73};
74
75
76
77
78/* in litmus/litmus.c */
79int switch_sched_plugin(struct sched_plugin*);
80
81static ssize_t litmus_active_proc_write(struct file *file,
82 const char __user *buffer, size_t count,
83 loff_t *ppos)
84{
85 char name[65];
86 struct sched_plugin* found;
87 ssize_t ret = -EINVAL;
88 int err;
89
90
91 ret = copy_and_chomp(name, sizeof(name), buffer, count);
92 if (ret < 0)
93 return ret;
94
95 found = find_sched_plugin(name);
96
97 if (found) {
98 err = switch_sched_plugin(found);
99 if (err) {
100 printk(KERN_INFO "Could not switch plugin: %d\n", err);
101 ret = err;
102 }
103 } else {
104 printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
105 ret = -ESRCH;
106 }
107
108 return ret;
109}
110
111static int litmus_active_proc_show(struct seq_file *m, void *v)
112{
113 seq_printf(m, "%s\n", litmus->plugin_name);
114 return 0;
115}
116
117static int litmus_active_proc_open(struct inode *inode, struct file *file)
118{
119 return single_open(file, litmus_active_proc_show, PDE_DATA(inode));
120}
121
122static const struct file_operations litmus_active_proc_fops = {
123 .open = litmus_active_proc_open,
124 .read = seq_read,
125 .llseek = seq_lseek,
126 .release = single_release,
127 .write = litmus_active_proc_write,
128};
129
130
131#ifdef CONFIG_RELEASE_MASTER
132static ssize_t litmus_release_master_proc_write(
133 struct file *file,
134 const char __user *buffer, size_t count,
135 loff_t *ppos)
136{
137 int cpu, err, online = 0;
138 char msg[64];
139 ssize_t len;
140
141 len = copy_and_chomp(msg, sizeof(msg), buffer, count);
142
143 if (len < 0)
144 return len;
145
146 if (strcmp(msg, "NO_CPU") == 0)
147 atomic_set(&release_master_cpu, NO_CPU);
148 else {
149 err = sscanf(msg, "%d", &cpu);
150 if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
151 atomic_set(&release_master_cpu, cpu);
152 } else {
153 TRACE("invalid release master: '%s' "
154 "(err:%d cpu:%d online:%d)\n",
155 msg, err, cpu, online);
156 len = -EINVAL;
157 }
158 }
159 return len;
160}
161
162static int litmus_release_master_proc_show(struct seq_file *m, void *v)
163{
164 int master;
165 master = atomic_read(&release_master_cpu);
166 if (master == NO_CPU)
167 seq_printf(m, "NO_CPU\n");
168 else
169 seq_printf(m, "%d\n", master);
170 return 0;
171}
172
173static int litmus_release_master_proc_open(struct inode *inode, struct file *file)
174{
175 return single_open(file, litmus_release_master_proc_show, PDE_DATA(inode));
176}
177
178static const struct file_operations litmus_release_master_proc_fops = {
179 .open = litmus_release_master_proc_open,
180 .read = seq_read,
181 .llseek = seq_lseek,
182 .release = single_release,
183 .write = litmus_release_master_proc_write,
184};
185#endif
186
187int __init init_litmus_proc(void)
188{
189 litmus_dir = proc_mkdir("litmus", NULL);
190 if (!litmus_dir) {
191 printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
192 return -ENOMEM;
193 }
194
195 curr_file = proc_create("active_plugin", 0644, litmus_dir,
196 &litmus_active_proc_fops);
197
198 if (!curr_file) {
199 printk(KERN_ERR "Could not allocate active_plugin "
200 "procfs entry.\n");
201 return -ENOMEM;
202 }
203
204#ifdef CONFIG_RELEASE_MASTER
205 release_master_file = proc_create("release_master", 0644, litmus_dir,
206 &litmus_release_master_proc_fops);
207 if (!release_master_file) {
208 printk(KERN_ERR "Could not allocate release_master "
209 "procfs entry.\n");
210 return -ENOMEM;
211 }
212#endif
213
214 stat_file = proc_create("stats", 0444, litmus_dir, &litmus_stats_proc_fops);
215
216 plugs_dir = proc_mkdir("plugins", litmus_dir);
217 if (!plugs_dir){
218 printk(KERN_ERR "Could not allocate plugins directory "
219 "procfs entry.\n");
220 return -ENOMEM;
221 }
222
223 plugs_file = proc_create("loaded", 0444, plugs_dir,
224 &litmus_loaded_proc_fops);
225
226 domains_dir = proc_mkdir("domains", litmus_dir);
227 if (!domains_dir) {
228 printk(KERN_ERR "Could not allocate domains directory "
229 "procfs entry.\n");
230 return -ENOMEM;
231 }
232
233 cpus_dir = proc_mkdir("cpus", litmus_dir);
234 if (!cpus_dir) {
235 printk(KERN_ERR "Could not allocate cpus directory "
236 "procfs entry.\n");
237 return -ENOMEM;
238 }
239
240 return 0;
241}
242
243void exit_litmus_proc(void)
244{
245 if (cpus_dir || domains_dir) {
246 deactivate_domain_proc();
247 if (cpus_dir)
248 remove_proc_entry("cpus", litmus_dir);
249 if (domains_dir)
250 remove_proc_entry("domains", litmus_dir);
251 }
252 if (plugs_file)
253 remove_proc_entry("loaded", plugs_dir);
254 if (plugs_dir)
255 remove_proc_entry("plugins", litmus_dir);
256 if (stat_file)
257 remove_proc_entry("stats", litmus_dir);
258 if (curr_file)
259 remove_proc_entry("active_plugin", litmus_dir);
260#ifdef CONFIG_RELEASE_MASTER
261 if (release_master_file)
262 remove_proc_entry("release_master", litmus_dir);
263#endif
264 if (litmus_dir)
265 remove_proc_entry("litmus", NULL);
266}
267
268long make_plugin_proc_dir(struct sched_plugin* plugin,
269 struct proc_dir_entry** pde_in)
270{
271 struct proc_dir_entry *pde_new = NULL;
272 long rv;
273
274 if (!plugin || !plugin->plugin_name){
275 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
276 __func__);
277 rv = -EINVAL;
278 goto out_no_pde;
279 }
280
281 if (!plugs_dir){
282 printk(KERN_ERR "Could not make plugin sub-directory, because "
283 "/proc/litmus/plugins does not exist.\n");
284 rv = -ENOENT;
285 goto out_no_pde;
286 }
287
288 pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
289 if (!pde_new){
290 printk(KERN_ERR "Could not make plugin sub-directory: "
291 "out of memory?.\n");
292 rv = -ENOMEM;
293 goto out_no_pde;
294 }
295
296 rv = 0;
297 *pde_in = pde_new;
298 goto out_ok;
299
300out_no_pde:
301 *pde_in = NULL;
302out_ok:
303 return rv;
304}
305
306void remove_plugin_proc_dir(struct sched_plugin* plugin)
307{
308 if (!plugin || !plugin->plugin_name){
309 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
310 __func__);
311 return;
312 }
313 remove_proc_entry(plugin->plugin_name, plugs_dir);
314}
315
316
317
318/* misc. I/O helper functions */
319
320int copy_and_chomp(char *kbuf, unsigned long ksize,
321 __user const char* ubuf, unsigned long ulength)
322{
323 /* caller must provide buffer space */
324 BUG_ON(!ksize);
325
326 ksize--; /* leave space for null byte */
327
328 if (ksize > ulength)
329 ksize = ulength;
330
331 if(copy_from_user(kbuf, ubuf, ksize))
332 return -EFAULT;
333
334 kbuf[ksize] = '\0';
335
336 /* chomp kbuf */
337 if (ksize > 0 && kbuf[ksize - 1] == '\n')
338 kbuf[ksize - 1] = '\0';
339
340 return ksize;
341}
342
343/* helper functions for clustered plugins */
344static const char* cache_level_names[] = {
345 "ALL",
346 "L1",
347 "L2",
348 "L3",
349};
350
351int parse_cache_level(const char *cache_name, enum cache_level *level)
352{
353 int err = -EINVAL;
354 int i;
355 /* do a quick and dirty comparison to find the cluster size */
356 for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
357 if (!strcmp(cache_name, cache_level_names[i])) {
358 *level = (enum cache_level) i;
359 err = 0;
360 break;
361 }
362 return err;
363}
364
365const char* cache_level_name(enum cache_level level)
366{
367 int idx = level;
368
369 if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
370 return cache_level_names[idx];
371 else
372 return "INVALID";
373}
374
375
376
377
378/* proc file interface to configure the cluster size */
379
380static ssize_t litmus_cluster_proc_write(struct file *file,
381 const char __user *buffer, size_t count,
382 loff_t *ppos)
383{
384 enum cache_level *level = (enum cache_level *) PDE_DATA(file_inode(file));
385 ssize_t len;
386 char cache_name[8];
387
388 len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
389
390 if (len > 0 && parse_cache_level(cache_name, level)) {
391 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
392 len = -EINVAL;
393 }
394
395 return len;
396}
397
398static int litmus_cluster_proc_show(struct seq_file *m, void *v)
399{
400 enum cache_level *level = (enum cache_level *) m->private;
401
402 seq_printf(m, "%s\n", cache_level_name(*level));
403 return 0;
404}
405
406static int litmus_cluster_proc_open(struct inode *inode, struct file *file)
407{
408 return single_open(file, litmus_cluster_proc_show, PDE_DATA(inode));
409}
410
411static const struct file_operations litmus_cluster_proc_fops = {
412 .open = litmus_cluster_proc_open,
413 .read = seq_read,
414 .llseek = seq_lseek,
415 .release = single_release,
416 .write = litmus_cluster_proc_write,
417};
418
419struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
420 enum cache_level* level)
421{
422 struct proc_dir_entry* cluster_file;
423
424
425 cluster_file = proc_create_data("cluster", 0644, parent,
426 &litmus_cluster_proc_fops,
427 (void *) level);
428 if (!cluster_file) {
429 printk(KERN_ERR
430 "Could not cluster procfs entry.\n");
431 }
432 return cluster_file;
433}
434
435static struct domain_proc_info* active_mapping = NULL;
436
437static int litmus_mapping_proc_show(struct seq_file *m, void *v)
438{
439 struct cd_mapping *mapping = (struct cd_mapping*) m->private;
440
441 if(!mapping)
442 return 0;
443
444 seq_printf(m, "%*pb\n", cpumask_pr_args(mapping->mask));
445 return 0;
446}
447
448static int litmus_mapping_proc_open(struct inode *inode, struct file *file)
449{
450 return single_open(file, litmus_mapping_proc_show, PDE_DATA(inode));
451}
452
453static const struct file_operations litmus_domain_proc_fops = {
454 .open = litmus_mapping_proc_open,
455 .read = seq_read,
456 .llseek = seq_lseek,
457 .release = single_release,
458};
459
460long activate_domain_proc(struct domain_proc_info* map)
461{
462 int i;
463 char name[8];
464
465 if (!map)
466 return -EINVAL;
467 if (cpus_dir == NULL || domains_dir == NULL)
468 return -EINVAL;
469
470 if (active_mapping)
471 deactivate_domain_proc();
472
473 active_mapping = map;
474
475 for (i = 0; i < map->num_cpus; ++i) {
476 struct cd_mapping* m = &map->cpu_to_domains[i];
477 snprintf(name, sizeof(name), "%d", m->id);
478 m->proc_file = proc_create_data(name, 0444, cpus_dir,
479 &litmus_domain_proc_fops, (void*)m);
480 }
481
482 for (i = 0; i < map->num_domains; ++i) {
483 struct cd_mapping* m = &map->domain_to_cpus[i];
484 snprintf(name, sizeof(name), "%d", m->id);
485 m->proc_file = proc_create_data(name, 0444, domains_dir,
486 &litmus_domain_proc_fops, (void*)m);
487 }
488
489 return 0;
490}
491
492long deactivate_domain_proc()
493{
494 int i;
495 char name[65];
496
497 struct domain_proc_info* map = active_mapping;
498
499 if (!map)
500 return -EINVAL;
501
502 for (i = 0; i < map->num_cpus; ++i) {
503 struct cd_mapping* m = &map->cpu_to_domains[i];
504 snprintf(name, sizeof(name), "%d", m->id);
505 remove_proc_entry(name, cpus_dir);
506 m->proc_file = NULL;
507 }
508 for (i = 0; i < map->num_domains; ++i) {
509 struct cd_mapping* m = &map->domain_to_cpus[i];
510 snprintf(name, sizeof(name), "%d", m->id);
511 remove_proc_entry(name, domains_dir);
512 m->proc_file = NULL;
513 }
514
515 active_mapping = NULL;
516
517 return 0;
518}
519
520long init_domain_proc_info(struct domain_proc_info* m,
521 int num_cpus, int num_domains)
522{
523 int i;
524 int num_alloced_cpu_masks = 0;
525 int num_alloced_domain_masks = 0;
526
527 m->cpu_to_domains =
528 kmalloc(sizeof(*(m->cpu_to_domains))*num_cpus,
529 GFP_ATOMIC);
530 if(!m->cpu_to_domains)
531 goto failure;
532
533 m->domain_to_cpus =
534 kmalloc(sizeof(*(m->domain_to_cpus))*num_domains,
535 GFP_ATOMIC);
536 if(!m->domain_to_cpus)
537 goto failure;
538
539 for(i = 0; i < num_cpus; ++i) {
540 if(!zalloc_cpumask_var(&m->cpu_to_domains[i].mask, GFP_ATOMIC))
541 goto failure;
542 ++num_alloced_cpu_masks;
543 }
544 for(i = 0; i < num_domains; ++i) {
545 if(!zalloc_cpumask_var(&m->domain_to_cpus[i].mask, GFP_ATOMIC))
546 goto failure;
547 ++num_alloced_domain_masks;
548 }
549
550 return 0;
551
552failure:
553 for(i = 0; i < num_alloced_cpu_masks; ++i)
554 free_cpumask_var(m->cpu_to_domains[i].mask);
555 for(i = 0; i < num_alloced_domain_masks; ++i)
556 free_cpumask_var(m->domain_to_cpus[i].mask);
557 if(m->cpu_to_domains)
558 kfree(m->cpu_to_domains);
559 if(m->domain_to_cpus)
560 kfree(m->domain_to_cpus);
561 return -ENOMEM;
562}
563
564void destroy_domain_proc_info(struct domain_proc_info* m)
565{
566 int i;
567 for(i = 0; i < m->num_cpus; ++i)
568 free_cpumask_var(m->cpu_to_domains[i].mask);
569 for(i = 0; i < m->num_domains; ++i)
570 free_cpumask_var(m->domain_to_cpus[i].mask);
571 kfree(m->cpu_to_domains);
572 kfree(m->domain_to_cpus);
573 memset(m, 0, sizeof(*m));
574}
diff --git a/litmus/locking.c b/litmus/locking.c
new file mode 100644
index 000000000000..a1d0515c5613
--- /dev/null
+++ b/litmus/locking.c
@@ -0,0 +1,189 @@
1#include <linux/sched.h>
2#include <litmus/litmus.h>
3#include <litmus/fdso.h>
4#include <litmus/debug_trace.h>
5
6#ifdef CONFIG_LITMUS_LOCKING
7
8#include <linux/sched.h>
9#include <litmus/litmus.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/trace.h>
12#include <litmus/wait.h>
13
14static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
15static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
16static int close_generic_lock(struct od_table_entry* entry);
17static void destroy_generic_lock(obj_type_t type, void* sem);
18
19struct fdso_ops generic_lock_ops = {
20 .create = create_generic_lock,
21 .open = open_generic_lock,
22 .close = close_generic_lock,
23 .destroy = destroy_generic_lock
24};
25
26static inline bool is_lock(struct od_table_entry* entry)
27{
28 return entry->class == &generic_lock_ops;
29}
30
31static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
32{
33 BUG_ON(!is_lock(entry));
34 return (struct litmus_lock*) entry->obj->obj;
35}
36
37static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
38{
39 struct litmus_lock* lock;
40 int err;
41
42 err = litmus->allocate_lock(&lock, type, arg);
43 if (err == 0)
44 *obj_ref = lock;
45 return err;
46}
47
48static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
49{
50 struct litmus_lock* lock = get_lock(entry);
51 if (lock->ops->open)
52 return lock->ops->open(lock, arg);
53 else
54 return 0; /* default: any task can open it */
55}
56
57static int close_generic_lock(struct od_table_entry* entry)
58{
59 struct litmus_lock* lock = get_lock(entry);
60 if (lock->ops->close)
61 return lock->ops->close(lock);
62 else
63 return 0; /* default: closing succeeds */
64}
65
66static void destroy_generic_lock(obj_type_t type, void* obj)
67{
68 struct litmus_lock* lock = (struct litmus_lock*) obj;
69 lock->ops->deallocate(lock);
70}
71
72asmlinkage long sys_litmus_lock(int lock_od)
73{
74 long err = -EINVAL;
75 struct od_table_entry* entry;
76 struct litmus_lock* l;
77
78 TS_SYSCALL_IN_START;
79
80 TS_SYSCALL_IN_END;
81
82 TS_LOCK_START;
83
84 entry = get_entry_for_od(lock_od);
85 if (entry && is_lock(entry)) {
86 l = get_lock(entry);
87 TRACE_CUR("attempts to lock 0x%p\n", l);
88 err = l->ops->lock(l);
89 }
90
91 /* Note: task my have been suspended or preempted in between! Take
92 * this into account when computing overheads. */
93 TS_LOCK_END;
94
95 TS_SYSCALL_OUT_START;
96
97 return err;
98}
99
100asmlinkage long sys_litmus_unlock(int lock_od)
101{
102 long err = -EINVAL;
103 struct od_table_entry* entry;
104 struct litmus_lock* l;
105
106 TS_SYSCALL_IN_START;
107
108 TS_SYSCALL_IN_END;
109
110 TS_UNLOCK_START;
111
112 entry = get_entry_for_od(lock_od);
113 if (entry && is_lock(entry)) {
114 l = get_lock(entry);
115 TRACE_CUR("attempts to unlock 0x%p\n", l);
116 err = l->ops->unlock(l);
117 }
118
119 /* Note: task my have been preempted in between! Take this into
120 * account when computing overheads. */
121 TS_UNLOCK_END;
122
123 TS_SYSCALL_OUT_START;
124
125 return err;
126}
127
128struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
129{
130 wait_queue_t* q;
131 struct task_struct* t = NULL;
132
133 if (waitqueue_active(wq)) {
134 q = list_entry(wq->task_list.next,
135 wait_queue_t, task_list);
136 t = (struct task_struct*) q->private;
137 __remove_wait_queue(wq, q);
138 }
139 return(t);
140}
141
142unsigned int __add_wait_queue_prio_exclusive(
143 wait_queue_head_t* head,
144 prio_wait_queue_t *new)
145{
146 struct list_head *pos;
147 unsigned int passed = 0;
148
149 new->wq.flags |= WQ_FLAG_EXCLUSIVE;
150
151 /* find a spot where the new entry is less than the next */
152 list_for_each(pos, &head->task_list) {
153 prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
154 wq.task_list);
155
156 if (unlikely(lt_before(new->priority, queued->priority) ||
157 (new->priority == queued->priority &&
158 new->tie_breaker < queued->tie_breaker))) {
159 /* pos is not less than new, thus insert here */
160 __list_add(&new->wq.task_list, pos->prev, pos);
161 goto out;
162 }
163 passed++;
164 }
165
166 /* if we get to this point either the list is empty or every entry
167 * queued element is less than new.
168 * Let's add new to the end. */
169 list_add_tail(&new->wq.task_list, &head->task_list);
170out:
171 return passed;
172}
173
174
175#else
176
177struct fdso_ops generic_lock_ops = {};
178
179asmlinkage long sys_litmus_lock(int sem_od)
180{
181 return -ENOSYS;
182}
183
184asmlinkage long sys_litmus_unlock(int sem_od)
185{
186 return -ENOSYS;
187}
188
189#endif
diff --git a/litmus/preempt.c b/litmus/preempt.c
new file mode 100644
index 000000000000..9e2356db86df
--- /dev/null
+++ b/litmus/preempt.c
@@ -0,0 +1,143 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/preempt.h>
5#include <litmus/trace.h>
6
7DEFINE_PER_CPU(bool, litmus_preemption_in_progress);
8
9/* The rescheduling state of each processor.
10 */
11DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
12
13void sched_state_will_schedule(struct task_struct* tsk)
14{
15 /* Litmus hack: we only care about processor-local invocations of
16 * set_tsk_need_resched(). We can't reliably set the flag remotely
17 * since it might race with other updates to the scheduling state. We
18 * can't rely on the runqueue lock protecting updates to the sched
19 * state since processors do not acquire the runqueue locks for all
20 * updates to the sched state (to avoid acquiring two runqueue locks at
21 * the same time). Further, if tsk is residing on a remote processor,
22 * then that processor doesn't actually know yet that it is going to
23 * reschedule; it still must receive an IPI (unless a local invocation
24 * races).
25 */
26 if (likely(task_cpu(tsk) == smp_processor_id())) {
27 VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
28 if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
29 set_sched_state(PICKED_WRONG_TASK);
30 else
31 set_sched_state(WILL_SCHEDULE);
32 } else
33 /* Litmus tasks should never be subject to a remote
34 * set_tsk_need_resched(). */
35 BUG_ON(is_realtime(tsk));
36#ifdef CONFIG_PREEMPT_STATE_TRACE
37 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
38 __builtin_return_address(0));
39#endif
40}
41
42/* Called by the IPI handler after another CPU called smp_send_resched(). */
43void sched_state_ipi(void)
44{
45 /* If the IPI was slow, we might be in any state right now. The IPI is
46 * only meaningful if we are in SHOULD_SCHEDULE. */
47 if (is_in_sched_state(SHOULD_SCHEDULE)) {
48 /* Cause scheduler to be invoked.
49 * This will cause a transition to WILL_SCHEDULE. */
50 set_tsk_need_resched(current);
51 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
52 current->comm, current->pid);
53 TS_SEND_RESCHED_END;
54 } else {
55 /* ignore */
56 TRACE_STATE("ignoring IPI in state %x (%s)\n",
57 get_sched_state(),
58 sched_state_name(get_sched_state()));
59 }
60}
61
62/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
63 * hold the lock that is used to serialize scheduling decisions. */
64void litmus_reschedule(int cpu)
65{
66 int picked_transition_ok = 0;
67 int scheduled_transition_ok = 0;
68
69 /* The (remote) CPU could be in any state. */
70
71 /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
72 * is not aware of the need to reschedule at this point. */
73
74 /* is a context switch in progress? */
75 if (cpu_is_in_sched_state(cpu, TASK_PICKED))
76 picked_transition_ok = sched_state_transition_on(
77 cpu, TASK_PICKED, PICKED_WRONG_TASK);
78
79 if (!picked_transition_ok &&
80 cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
81 /* We either raced with the end of the context switch, or the
82 * CPU was in TASK_SCHEDULED anyway. */
83 scheduled_transition_ok = sched_state_transition_on(
84 cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
85 }
86
87 /* If the CPU was in state TASK_SCHEDULED, then we need to cause the
88 * scheduler to be invoked. */
89 if (scheduled_transition_ok) {
90 if (smp_processor_id() == cpu) {
91 set_tsk_need_resched(current);
92 preempt_set_need_resched();
93 } else {
94 TS_SEND_RESCHED_START(cpu);
95 smp_send_reschedule(cpu);
96 }
97 }
98
99 TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
100 __FUNCTION__,
101 picked_transition_ok,
102 scheduled_transition_ok);
103}
104
105void litmus_reschedule_local(void)
106{
107 if (is_in_sched_state(TASK_PICKED))
108 set_sched_state(PICKED_WRONG_TASK);
109 else if (is_in_sched_state(TASK_SCHEDULED
110 | SHOULD_SCHEDULE
111 | PICKED_WRONG_TASK)) {
112 set_sched_state(WILL_SCHEDULE);
113 set_tsk_need_resched(current);
114 preempt_set_need_resched();
115 }
116}
117
118#ifdef CONFIG_DEBUG_KERNEL
119
120void sched_state_plugin_check(void)
121{
122 if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
123 TRACE("!!!! plugin did not call sched_state_task_picked()!"
124 "Calling sched_state_task_picked() is mandatory---fix this.\n");
125 set_sched_state(TASK_PICKED);
126 }
127}
128
129#define NAME_CHECK(x) case x: return #x
130const char* sched_state_name(int s)
131{
132 switch (s) {
133 NAME_CHECK(TASK_SCHEDULED);
134 NAME_CHECK(SHOULD_SCHEDULE);
135 NAME_CHECK(WILL_SCHEDULE);
136 NAME_CHECK(TASK_PICKED);
137 NAME_CHECK(PICKED_WRONG_TASK);
138 default:
139 return "UNKNOWN";
140 };
141}
142
143#endif
diff --git a/litmus/reservations/Makefile b/litmus/reservations/Makefile
new file mode 100644
index 000000000000..517fc2ff8a76
--- /dev/null
+++ b/litmus/reservations/Makefile
@@ -0,0 +1,3 @@
1obj-y += core.o budget-notifier.o alloc.o
2obj-y += polling.o
3obj-y += table-driven.o
diff --git a/litmus/reservations/alloc.c b/litmus/reservations/alloc.c
new file mode 100644
index 000000000000..1f93f223f504
--- /dev/null
+++ b/litmus/reservations/alloc.c
@@ -0,0 +1,143 @@
1#include <linux/slab.h>
2#include <asm/uaccess.h>
3
4#include <litmus/rt_param.h>
5
6#include <litmus/reservations/alloc.h>
7#include <litmus/reservations/polling.h>
8#include <litmus/reservations/table-driven.h>
9
10
11long alloc_polling_reservation(
12 int res_type,
13 struct reservation_config *config,
14 struct reservation **_res)
15{
16 struct polling_reservation *pres;
17 int use_edf = config->priority == LITMUS_NO_PRIORITY;
18 int periodic = res_type == PERIODIC_POLLING;
19
20 if (config->polling_params.budget >
21 config->polling_params.period) {
22 printk(KERN_ERR "invalid polling reservation (%u): "
23 "budget > period\n", config->id);
24 return -EINVAL;
25 }
26 if (config->polling_params.budget >
27 config->polling_params.relative_deadline
28 && config->polling_params.relative_deadline) {
29 printk(KERN_ERR "invalid polling reservation (%u): "
30 "budget > deadline\n", config->id);
31 return -EINVAL;
32 }
33 if (config->polling_params.offset >
34 config->polling_params.period) {
35 printk(KERN_ERR "invalid polling reservation (%u): "
36 "offset > period\n", config->id);
37 return -EINVAL;
38 }
39
40 /* XXX: would be nice to use a core-local allocation. */
41 pres = kzalloc(sizeof(*pres), GFP_KERNEL);
42 if (!pres)
43 return -ENOMEM;
44
45 polling_reservation_init(pres, use_edf, periodic,
46 config->polling_params.budget,
47 config->polling_params.period,
48 config->polling_params.relative_deadline,
49 config->polling_params.offset);
50 pres->res.id = config->id;
51 if (!use_edf)
52 pres->res.priority = config->priority;
53
54 *_res = &pres->res;
55 return 0;
56}
57
58
59#define MAX_INTERVALS 1024
60
61long alloc_table_driven_reservation(
62 struct reservation_config *config,
63 struct reservation **_res)
64{
65 struct table_driven_reservation *td_res = NULL;
66 struct lt_interval *slots = NULL;
67 size_t slots_size;
68 unsigned int i, num_slots;
69 long err = -EINVAL;
70 void *mem;
71
72 if (!config->table_driven_params.num_intervals) {
73 printk(KERN_ERR "invalid table-driven reservation (%u): "
74 "no intervals\n", config->id);
75 return -EINVAL;
76 }
77
78 if (config->table_driven_params.num_intervals > MAX_INTERVALS) {
79 printk(KERN_ERR "invalid table-driven reservation (%u): "
80 "too many intervals (max: %d)\n", config->id, MAX_INTERVALS);
81 return -EINVAL;
82 }
83
84 num_slots = config->table_driven_params.num_intervals;
85 slots_size = sizeof(slots[0]) * num_slots;
86
87 mem = kzalloc(sizeof(*td_res) + slots_size, GFP_KERNEL);
88 if (!mem) {
89 return -ENOMEM;
90 } else {
91 slots = mem + sizeof(*td_res);
92 td_res = mem;
93 err = copy_from_user(slots,
94 config->table_driven_params.intervals, slots_size);
95 }
96
97 if (!err) {
98 /* sanity checks */
99 for (i = 0; !err && i < num_slots; i++)
100 if (slots[i].end <= slots[i].start) {
101 printk(KERN_ERR
102 "invalid table-driven reservation (%u): "
103 "invalid interval %u => [%llu, %llu]\n",
104 config->id, i,
105 slots[i].start, slots[i].end);
106 err = -EINVAL;
107 }
108
109 for (i = 0; !err && i + 1 < num_slots; i++)
110 if (slots[i + 1].start <= slots[i].end) {
111 printk(KERN_ERR
112 "invalid table-driven reservation (%u): "
113 "overlapping intervals %u, %u\n",
114 config->id, i, i + 1);
115 err = -EINVAL;
116 }
117
118 if (slots[num_slots - 1].end >
119 config->table_driven_params.major_cycle_length) {
120 printk(KERN_ERR
121 "invalid table-driven reservation (%u): last "
122 "interval ends past major cycle %llu > %llu\n",
123 config->id,
124 slots[num_slots - 1].end,
125 config->table_driven_params.major_cycle_length);
126 err = -EINVAL;
127 }
128 }
129
130 if (err) {
131 kfree(td_res);
132 } else {
133 table_driven_reservation_init(td_res,
134 config->table_driven_params.major_cycle_length,
135 slots, num_slots);
136 td_res->res.id = config->id;
137 td_res->res.priority = config->priority;
138 *_res = &td_res->res;
139 }
140
141 return err;
142}
143
diff --git a/litmus/reservations/budget-notifier.c b/litmus/reservations/budget-notifier.c
new file mode 100644
index 000000000000..0b0f42687882
--- /dev/null
+++ b/litmus/reservations/budget-notifier.c
@@ -0,0 +1,26 @@
1#include <litmus/reservations/budget-notifier.h>
2
3void budget_notifier_list_init(struct budget_notifier_list* bnl)
4{
5 INIT_LIST_HEAD(&bnl->list);
6 raw_spin_lock_init(&bnl->lock);
7}
8
9void budget_notifiers_fire(struct budget_notifier_list *bnl, bool replenished)
10{
11 struct budget_notifier *bn, *next;
12
13 unsigned long flags;
14
15 raw_spin_lock_irqsave(&bnl->lock, flags);
16
17 list_for_each_entry_safe(bn, next, &bnl->list, list) {
18 if (replenished)
19 bn->budget_replenished(bn);
20 else
21 bn->budget_exhausted(bn);
22 }
23
24 raw_spin_unlock_irqrestore(&bnl->lock, flags);
25}
26
diff --git a/litmus/reservations/core.c b/litmus/reservations/core.c
new file mode 100644
index 000000000000..5137eda0f643
--- /dev/null
+++ b/litmus/reservations/core.c
@@ -0,0 +1,393 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/debug_trace.h>
5#include <litmus/reservations/reservation.h>
6
7void reservation_init(struct reservation *res)
8{
9 memset(res, 0, sizeof(*res));
10 res->state = RESERVATION_INACTIVE;
11 INIT_LIST_HEAD(&res->clients);
12 INIT_LIST_HEAD(&res->replenish_list);
13 budget_notifier_list_init(&res->budget_notifiers);
14}
15
16struct task_struct* default_dispatch_client(
17 struct reservation *res,
18 lt_t *for_at_most)
19{
20 struct reservation_client *client, *next;
21 struct task_struct* tsk;
22
23 BUG_ON(res->state != RESERVATION_ACTIVE);
24 *for_at_most = 0;
25
26 list_for_each_entry_safe(client, next, &res->clients, list) {
27 tsk = client->dispatch(client);
28 if (likely(tsk)) {
29 /* Primitive form of round-robin scheduling:
30 * make sure we alternate between multiple clients
31 * with at least the granularity of the replenishment
32 * period. Reservations that need more fine-grained
33 * or more predictable alternation between threads
34 * within a reservation should provide a custom
35 * dispatch function. */
36 list_del(&client->list);
37 /* move to back of list */
38 list_add_tail(&client->list, &res->clients);
39 return tsk;
40 }
41 }
42 return NULL;
43}
44
45void common_drain_budget(
46 struct reservation *res,
47 lt_t how_much)
48{
49 if (how_much >= res->cur_budget)
50 res->cur_budget = 0;
51 else
52 res->cur_budget -= how_much;
53
54 res->budget_consumed += how_much;
55 res->budget_consumed_total += how_much;
56
57 switch (res->state) {
58 case RESERVATION_DEPLETED:
59 case RESERVATION_INACTIVE:
60 BUG();
61 break;
62
63 case RESERVATION_ACTIVE_IDLE:
64 case RESERVATION_ACTIVE:
65 if (!res->cur_budget) {
66 res->env->change_state(res->env, res,
67 RESERVATION_DEPLETED);
68 } /* else: stay in current state */
69 break;
70 }
71}
72
73static struct task_struct * task_client_dispatch(struct reservation_client *client)
74{
75 struct task_client *tc = container_of(client, struct task_client, client);
76 return tc->task;
77}
78
79void task_client_init(struct task_client *tc, struct task_struct *tsk,
80 struct reservation *res)
81{
82 memset(&tc->client, 0, sizeof(tc->client));
83 tc->client.dispatch = task_client_dispatch;
84 tc->client.reservation = res;
85 tc->task = tsk;
86}
87
88static void sup_scheduler_update_at(
89 struct sup_reservation_environment* sup_env,
90 lt_t when)
91{
92 if (sup_env->next_scheduler_update > when)
93 sup_env->next_scheduler_update = when;
94}
95
96static void sup_scheduler_update_after(
97 struct sup_reservation_environment* sup_env,
98 lt_t timeout)
99{
100 sup_scheduler_update_at(sup_env, sup_env->env.current_time + timeout);
101}
102
103static int _sup_queue_depleted(
104 struct sup_reservation_environment* sup_env,
105 struct reservation *res)
106{
107 struct list_head *pos;
108 struct reservation *queued;
109 int passed_earlier = 0;
110
111 BUG_ON(in_list(&res->replenish_list));
112
113 list_for_each(pos, &sup_env->depleted_reservations) {
114 queued = list_entry(pos, struct reservation, replenish_list);
115 if (queued->next_replenishment > res->next_replenishment) {
116 list_add(&res->replenish_list, pos->prev);
117 return passed_earlier;
118 } else
119 passed_earlier = 1;
120 }
121
122 list_add_tail(&res->replenish_list, &sup_env->depleted_reservations);
123
124 return passed_earlier;
125}
126
127static void sup_queue_depleted(
128 struct sup_reservation_environment* sup_env,
129 struct reservation *res)
130{
131 int passed_earlier = _sup_queue_depleted(sup_env, res);
132
133 /* check for updated replenishment time */
134 if (!passed_earlier)
135 sup_scheduler_update_at(sup_env, res->next_replenishment);
136}
137
138static int _sup_queue_active(
139 struct sup_reservation_environment* sup_env,
140 struct reservation *res)
141{
142 struct list_head *pos;
143 struct reservation *queued;
144 int passed_active = 0;
145
146 if (likely(res->priority != RESERVATION_BACKGROUND_PRIORITY)) {
147 /* enqueue in order of priority */
148 list_for_each(pos, &sup_env->active_reservations) {
149 queued = list_entry(pos, struct reservation, list);
150 if (queued->priority > res->priority) {
151 list_add(&res->list, pos->prev);
152 return passed_active;
153 } else if (queued->state == RESERVATION_ACTIVE)
154 passed_active = 1;
155 }
156 } else {
157 /* don't preempt unless the list happens to be empty */
158 passed_active = !list_empty(&sup_env->active_reservations);
159 }
160 /* Either a background reservation, or we fell off the end of the list.
161 * In both cases, just add the reservation to the end of the list of
162 * active reservations. */
163 list_add_tail(&res->list, &sup_env->active_reservations);
164 return passed_active;
165}
166
167static void sup_queue_active(
168 struct sup_reservation_environment* sup_env,
169 struct reservation *res)
170{
171 int passed_active = _sup_queue_active(sup_env, res);
172
173 /* check for possible preemption */
174 if (res->state == RESERVATION_ACTIVE && !passed_active)
175 sup_env->next_scheduler_update = SUP_RESCHEDULE_NOW;
176 else if (res == list_first_entry(&sup_env->active_reservations,
177 struct reservation, list)) {
178 /* First reservation is draining budget => make sure
179 * the scheduler is called to notice when the reservation
180 * budget has been drained completely. */
181 sup_scheduler_update_after(sup_env, res->cur_budget);
182 }
183}
184
185static void sup_queue_reservation(
186 struct sup_reservation_environment* sup_env,
187 struct reservation *res)
188{
189 switch (res->state) {
190 case RESERVATION_INACTIVE:
191 list_add(&res->list, &sup_env->inactive_reservations);
192 break;
193
194 case RESERVATION_DEPLETED:
195 sup_queue_depleted(sup_env, res);
196 break;
197
198 case RESERVATION_ACTIVE_IDLE:
199 case RESERVATION_ACTIVE:
200 sup_queue_active(sup_env, res);
201 break;
202 }
203}
204
205void sup_add_new_reservation(
206 struct sup_reservation_environment* sup_env,
207 struct reservation* new_res)
208{
209 new_res->env = &sup_env->env;
210 list_add(&new_res->all_list, &sup_env->all_reservations);
211 sup_queue_reservation(sup_env, new_res);
212}
213
214struct reservation* sup_find_by_id(struct sup_reservation_environment* sup_env,
215 unsigned int id)
216{
217 struct reservation *res;
218
219 list_for_each_entry(res, &sup_env->all_reservations, all_list) {
220 if (res->id == id)
221 return res;
222 }
223
224 return NULL;
225}
226
227static void sup_charge_budget(
228 struct sup_reservation_environment* sup_env,
229 lt_t delta)
230{
231 struct reservation *res;
232
233 /* charge the highest-priority ACTIVE or ACTIVE_IDLE reservation */
234
235 res = list_first_entry_or_null(
236 &sup_env->active_reservations, struct reservation, list);
237
238 if (res) {
239 TRACE("R%d: charging at %llu for %llu execution, budget before: %llu\n",
240 res->id, res->env->current_time, delta, res->cur_budget);
241 res->ops->drain_budget(res, delta);
242 TRACE("R%d: budget now: %llu, priority: %llu\n",
243 res->id, res->cur_budget, res->priority);
244 }
245
246 /* check when the next budget expires */
247
248 res = list_first_entry_or_null(
249 &sup_env->active_reservations, struct reservation, list);
250
251 if (res) {
252 /* make sure scheduler is invoked when this reservation expires
253 * its remaining budget */
254 TRACE("requesting scheduler update for reservation %u "
255 "in %llu nanoseconds\n",
256 res->id, res->cur_budget);
257 sup_scheduler_update_after(sup_env, res->cur_budget);
258 }
259}
260
261static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
262{
263 struct list_head *pos, *next;
264 struct reservation *res;
265
266 list_for_each_safe(pos, next, &sup_env->depleted_reservations) {
267 res = list_entry(pos, struct reservation, replenish_list);
268 if (res->next_replenishment <= sup_env->env.current_time) {
269 TRACE("R%d: replenishing budget at %llu, "
270 "priority: %llu\n",
271 res->id, res->env->current_time, res->priority);
272 res->ops->replenish(res);
273 } else {
274 /* list is ordered by increasing depletion times */
275 break;
276 }
277 }
278
279 /* request a scheduler update at the next replenishment instant */
280 res = list_first_entry_or_null(&sup_env->depleted_reservations,
281 struct reservation, replenish_list);
282 if (res)
283 sup_scheduler_update_at(sup_env, res->next_replenishment);
284}
285
286void sup_update_time(
287 struct sup_reservation_environment* sup_env,
288 lt_t now)
289{
290 lt_t delta;
291
292 /* If the time didn't advance, there is nothing to do.
293 * This check makes it safe to call sup_advance_time() potentially
294 * multiple times (e.g., via different code paths. */
295 if (!list_empty(&sup_env->active_reservations))
296 TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now,
297 sup_env->env.current_time);
298 if (unlikely(now <= sup_env->env.current_time))
299 return;
300
301 delta = now - sup_env->env.current_time;
302 sup_env->env.current_time = now;
303
304 /* check if future updates are required */
305 if (sup_env->next_scheduler_update <= sup_env->env.current_time)
306 sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
307
308 /* deplete budgets by passage of time */
309 sup_charge_budget(sup_env, delta);
310
311 /* check if any budgets were replenished */
312 sup_replenish_budgets(sup_env);
313}
314
315struct task_struct* sup_dispatch(struct sup_reservation_environment* sup_env)
316{
317 struct reservation *res, *next;
318 struct task_struct *tsk = NULL;
319 lt_t time_slice;
320
321 list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
322 if (res->state == RESERVATION_ACTIVE) {
323 tsk = res->ops->dispatch_client(res, &time_slice);
324 if (likely(tsk)) {
325 if (time_slice)
326 sup_scheduler_update_after(sup_env, time_slice);
327 sup_scheduler_update_after(sup_env, res->cur_budget);
328 return tsk;
329 }
330 }
331 }
332
333 return NULL;
334}
335
336static void sup_res_change_state(
337 struct reservation_environment* env,
338 struct reservation *res,
339 reservation_state_t new_state)
340{
341 struct sup_reservation_environment* sup_env;
342
343 sup_env = container_of(env, struct sup_reservation_environment, env);
344
345 TRACE("reservation R%d state %d->%d at %llu\n",
346 res->id, res->state, new_state, env->current_time);
347
348 if (new_state == RESERVATION_DEPLETED
349 && (res->state == RESERVATION_ACTIVE ||
350 res->state == RESERVATION_ACTIVE_IDLE)) {
351 budget_notifiers_fire(&res->budget_notifiers, false);
352 } else if (res->state == RESERVATION_DEPLETED
353 && new_state == RESERVATION_ACTIVE) {
354 budget_notifiers_fire(&res->budget_notifiers, true);
355 }
356
357 /* dequeue prior to re-queuing */
358 if (res->state == RESERVATION_DEPLETED)
359 list_del(&res->replenish_list);
360 else
361 list_del(&res->list);
362
363 /* check if we need to reschedule because we lost an active reservation */
364 if (res->state == RESERVATION_ACTIVE && !sup_env->will_schedule)
365 sup_env->next_scheduler_update = SUP_RESCHEDULE_NOW;
366 res->state = new_state;
367 sup_queue_reservation(sup_env, res);
368}
369
370static void sup_request_replenishment(
371 struct reservation_environment* env,
372 struct reservation *res)
373{
374 struct sup_reservation_environment* sup_env;
375
376 sup_env = container_of(env, struct sup_reservation_environment, env);
377 sup_queue_depleted(sup_env, res);
378}
379
380void sup_init(struct sup_reservation_environment* sup_env)
381{
382 memset(sup_env, 0, sizeof(*sup_env));
383
384 INIT_LIST_HEAD(&sup_env->all_reservations);
385 INIT_LIST_HEAD(&sup_env->active_reservations);
386 INIT_LIST_HEAD(&sup_env->depleted_reservations);
387 INIT_LIST_HEAD(&sup_env->inactive_reservations);
388
389 sup_env->env.change_state = sup_res_change_state;
390 sup_env->env.request_replenishment = sup_request_replenishment;
391
392 sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
393}
diff --git a/litmus/reservations/polling.c b/litmus/reservations/polling.c
new file mode 100644
index 000000000000..63e0bed566e8
--- /dev/null
+++ b/litmus/reservations/polling.c
@@ -0,0 +1,256 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/reservations/reservation.h>
5#include <litmus/reservations/polling.h>
6
7
8static void periodic_polling_client_arrives(
9 struct reservation* res,
10 struct reservation_client *client
11)
12{
13 struct polling_reservation *pres =
14 container_of(res, struct polling_reservation, res);
15 lt_t instances, tmp;
16
17 list_add_tail(&client->list, &res->clients);
18
19 switch (res->state) {
20 case RESERVATION_INACTIVE:
21 /* Figure out next replenishment time. */
22 tmp = res->env->current_time - res->env->time_zero;
23 instances = div64_u64(tmp, pres->period);
24 res->next_replenishment =
25 (instances + 1) * pres->period + pres->offset;
26
27 TRACE("pol-res: activate tmp=%llu instances=%llu period=%llu nextrp=%llu cur=%llu\n",
28 tmp, instances, pres->period, res->next_replenishment,
29 res->env->current_time);
30
31 res->env->change_state(res->env, res,
32 RESERVATION_DEPLETED);
33 break;
34
35 case RESERVATION_ACTIVE:
36 case RESERVATION_DEPLETED:
37 /* do nothing */
38 break;
39
40 case RESERVATION_ACTIVE_IDLE:
41 res->env->change_state(res->env, res,
42 RESERVATION_ACTIVE);
43 break;
44 }
45}
46
47
48static void periodic_polling_client_departs(
49 struct reservation *res,
50 struct reservation_client *client,
51 int did_signal_job_completion
52)
53{
54 list_del(&client->list);
55
56 switch (res->state) {
57 case RESERVATION_INACTIVE:
58 case RESERVATION_ACTIVE_IDLE:
59 BUG(); /* INACTIVE or IDLE <=> no client */
60 break;
61
62 case RESERVATION_ACTIVE:
63 if (list_empty(&res->clients)) {
64 res->env->change_state(res->env, res,
65 RESERVATION_ACTIVE_IDLE);
66 } /* else: nothing to do, more clients ready */
67 break;
68
69 case RESERVATION_DEPLETED:
70 /* do nothing */
71 break;
72 }
73}
74
75static void periodic_polling_on_replenishment(
76 struct reservation *res
77)
78{
79 struct polling_reservation *pres =
80 container_of(res, struct polling_reservation, res);
81
82 /* replenish budget */
83 res->cur_budget = pres->max_budget;
84 res->next_replenishment += pres->period;
85 res->budget_consumed = 0;
86
87 switch (res->state) {
88 case RESERVATION_DEPLETED:
89 case RESERVATION_INACTIVE:
90 case RESERVATION_ACTIVE_IDLE:
91 if (list_empty(&res->clients))
92 /* no clients => poll again later */
93 res->env->change_state(res->env, res,
94 RESERVATION_INACTIVE);
95 else
96 /* we have clients & budget => ACTIVE */
97 res->env->change_state(res->env, res,
98 RESERVATION_ACTIVE);
99 break;
100
101 case RESERVATION_ACTIVE:
102 /* Replenished while active => tardy? In any case,
103 * go ahead and stay active. */
104 break;
105 }
106}
107
108static void periodic_polling_on_replenishment_edf(
109 struct reservation *res
110)
111{
112 struct polling_reservation *pres =
113 container_of(res, struct polling_reservation, res);
114
115 /* update current priority */
116 res->priority = res->next_replenishment + pres->deadline;
117
118 /* do common updates */
119 periodic_polling_on_replenishment(res);
120}
121
122static struct reservation_ops periodic_polling_ops_fp = {
123 .dispatch_client = default_dispatch_client,
124 .client_arrives = periodic_polling_client_arrives,
125 .client_departs = periodic_polling_client_departs,
126 .replenish = periodic_polling_on_replenishment,
127 .drain_budget = common_drain_budget,
128};
129
130static struct reservation_ops periodic_polling_ops_edf = {
131 .dispatch_client = default_dispatch_client,
132 .client_arrives = periodic_polling_client_arrives,
133 .client_departs = periodic_polling_client_departs,
134 .replenish = periodic_polling_on_replenishment_edf,
135 .drain_budget = common_drain_budget,
136};
137
138
139
140
141static void sporadic_polling_client_arrives_fp(
142 struct reservation* res,
143 struct reservation_client *client
144)
145{
146 struct polling_reservation *pres =
147 container_of(res, struct polling_reservation, res);
148
149 list_add_tail(&client->list, &res->clients);
150
151 switch (res->state) {
152 case RESERVATION_INACTIVE:
153 /* Replenish now. */
154 res->cur_budget = pres->max_budget;
155 res->next_replenishment =
156 res->env->current_time + pres->period;
157
158 res->env->change_state(res->env, res,
159 RESERVATION_ACTIVE);
160 break;
161
162 case RESERVATION_ACTIVE:
163 case RESERVATION_DEPLETED:
164 /* do nothing */
165 break;
166
167 case RESERVATION_ACTIVE_IDLE:
168 res->env->change_state(res->env, res,
169 RESERVATION_ACTIVE);
170 break;
171 }
172}
173
174static void sporadic_polling_client_arrives_edf(
175 struct reservation* res,
176 struct reservation_client *client
177)
178{
179 struct polling_reservation *pres =
180 container_of(res, struct polling_reservation, res);
181
182 list_add_tail(&client->list, &res->clients);
183
184 switch (res->state) {
185 case RESERVATION_INACTIVE:
186 /* Replenish now. */
187 res->cur_budget = pres->max_budget;
188 res->next_replenishment =
189 res->env->current_time + pres->period;
190 res->priority =
191 res->env->current_time + pres->deadline;
192
193 res->env->change_state(res->env, res,
194 RESERVATION_ACTIVE);
195 break;
196
197 case RESERVATION_ACTIVE:
198 case RESERVATION_DEPLETED:
199 /* do nothing */
200 break;
201
202 case RESERVATION_ACTIVE_IDLE:
203 res->env->change_state(res->env, res,
204 RESERVATION_ACTIVE);
205 break;
206 }
207}
208
209static struct reservation_ops sporadic_polling_ops_fp = {
210 .dispatch_client = default_dispatch_client,
211 .client_arrives = sporadic_polling_client_arrives_fp,
212 .client_departs = periodic_polling_client_departs,
213 .replenish = periodic_polling_on_replenishment,
214 .drain_budget = common_drain_budget,
215};
216
217static struct reservation_ops sporadic_polling_ops_edf = {
218 .dispatch_client = default_dispatch_client,
219 .client_arrives = sporadic_polling_client_arrives_edf,
220 .client_departs = periodic_polling_client_departs,
221 .replenish = periodic_polling_on_replenishment_edf,
222 .drain_budget = common_drain_budget,
223};
224
225void polling_reservation_init(
226 struct polling_reservation *pres,
227 int use_edf_prio,
228 int use_periodic_polling,
229 lt_t budget, lt_t period, lt_t deadline, lt_t offset
230)
231{
232 if (!deadline)
233 deadline = period;
234 BUG_ON(budget > period);
235 BUG_ON(budget > deadline);
236 BUG_ON(offset >= period);
237
238 reservation_init(&pres->res);
239 pres->max_budget = budget;
240 pres->period = period;
241 pres->deadline = deadline;
242 pres->offset = offset;
243 if (use_periodic_polling) {
244 pres->res.kind = PERIODIC_POLLING;
245 if (use_edf_prio)
246 pres->res.ops = &periodic_polling_ops_edf;
247 else
248 pres->res.ops = &periodic_polling_ops_fp;
249 } else {
250 pres->res.kind = SPORADIC_POLLING;
251 if (use_edf_prio)
252 pres->res.ops = &sporadic_polling_ops_edf;
253 else
254 pres->res.ops = &sporadic_polling_ops_fp;
255 }
256}
diff --git a/litmus/reservations/table-driven.c b/litmus/reservations/table-driven.c
new file mode 100644
index 000000000000..e4debcb5d4d2
--- /dev/null
+++ b/litmus/reservations/table-driven.c
@@ -0,0 +1,269 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/reservations/reservation.h>
5#include <litmus/reservations/table-driven.h>
6
7static lt_t td_cur_major_cycle_start(struct table_driven_reservation *tdres)
8{
9 lt_t x, tmp;
10
11 tmp = tdres->res.env->current_time - tdres->res.env->time_zero;
12 x = div64_u64(tmp, tdres->major_cycle);
13 x *= tdres->major_cycle;
14 return x;
15}
16
17
18static lt_t td_next_major_cycle_start(struct table_driven_reservation *tdres)
19{
20 lt_t x, tmp;
21
22 tmp = tdres->res.env->current_time - tdres->res.env->time_zero;
23 x = div64_u64(tmp, tdres->major_cycle) + 1;
24 x *= tdres->major_cycle;
25 return x;
26}
27
28static void td_client_arrives(
29 struct reservation* res,
30 struct reservation_client *client
31)
32{
33 struct table_driven_reservation *tdres =
34 container_of(res, struct table_driven_reservation, res);
35
36 list_add_tail(&client->list, &res->clients);
37
38 switch (res->state) {
39 case RESERVATION_INACTIVE:
40 /* Figure out first replenishment time. */
41 tdres->major_cycle_start = td_next_major_cycle_start(tdres);
42 res->next_replenishment = tdres->major_cycle_start;
43 res->next_replenishment += tdres->intervals[0].start;
44 tdres->next_interval = 0;
45
46 res->env->change_state(res->env, res,
47 RESERVATION_DEPLETED);
48 break;
49
50 case RESERVATION_ACTIVE:
51 case RESERVATION_DEPLETED:
52 /* do nothing */
53 break;
54
55 case RESERVATION_ACTIVE_IDLE:
56 res->env->change_state(res->env, res,
57 RESERVATION_ACTIVE);
58 break;
59 }
60}
61
62static void td_client_departs(
63 struct reservation *res,
64 struct reservation_client *client,
65 int did_signal_job_completion
66)
67{
68 list_del(&client->list);
69
70 switch (res->state) {
71 case RESERVATION_INACTIVE:
72 case RESERVATION_ACTIVE_IDLE:
73 BUG(); /* INACTIVE or IDLE <=> no client */
74 break;
75
76 case RESERVATION_ACTIVE:
77 if (list_empty(&res->clients)) {
78 res->env->change_state(res->env, res,
79 RESERVATION_ACTIVE_IDLE);
80 } /* else: nothing to do, more clients ready */
81 break;
82
83 case RESERVATION_DEPLETED:
84 /* do nothing */
85 break;
86 }
87}
88
89static lt_t td_time_remaining_until_end(struct table_driven_reservation *tdres)
90{
91 lt_t now = tdres->res.env->current_time;
92 lt_t end = tdres->cur_interval.end;
93 TRACE("td_remaining(%u): start=%llu now=%llu end=%llu state=%d\n",
94 tdres->res.id,
95 tdres->cur_interval.start,
96 now, end,
97 tdres->res.state);
98 if (now >= end)
99 return 0;
100 else
101 return end - now;
102}
103
104static void td_replenish(
105 struct reservation *res)
106{
107 struct table_driven_reservation *tdres =
108 container_of(res, struct table_driven_reservation, res);
109
110 TRACE("td_replenish(%u): expected_replenishment=%llu\n", res->id,
111 res->next_replenishment);
112
113 /* figure out current interval */
114 tdres->cur_interval.start = tdres->major_cycle_start +
115 tdres->intervals[tdres->next_interval].start;
116 tdres->cur_interval.end = tdres->major_cycle_start +
117 tdres->intervals[tdres->next_interval].end;
118 TRACE("major_cycle_start=%llu => [%llu, %llu]\n",
119 tdres->major_cycle_start,
120 tdres->cur_interval.start,
121 tdres->cur_interval.end);
122
123 /* reset budget */
124 res->cur_budget = td_time_remaining_until_end(tdres);
125 res->budget_consumed = 0;
126 TRACE("td_replenish(%u): %s budget=%llu\n", res->id,
127 res->cur_budget ? "" : "WARNING", res->cur_budget);
128
129 /* prepare next slot */
130 tdres->next_interval = (tdres->next_interval + 1) % tdres->num_intervals;
131 if (!tdres->next_interval)
132 /* wrap to next major cycle */
133 tdres->major_cycle_start += tdres->major_cycle;
134
135 /* determine next time this reservation becomes eligible to execute */
136 res->next_replenishment = tdres->major_cycle_start;
137 res->next_replenishment += tdres->intervals[tdres->next_interval].start;
138 TRACE("td_replenish(%u): next_replenishment=%llu\n", res->id,
139 res->next_replenishment);
140
141
142 switch (res->state) {
143 case RESERVATION_DEPLETED:
144 case RESERVATION_ACTIVE:
145 case RESERVATION_ACTIVE_IDLE:
146 if (list_empty(&res->clients))
147 res->env->change_state(res->env, res,
148 RESERVATION_ACTIVE_IDLE);
149 else
150 /* we have clients & budget => ACTIVE */
151 res->env->change_state(res->env, res,
152 RESERVATION_ACTIVE);
153 break;
154
155 case RESERVATION_INACTIVE:
156 BUG();
157 break;
158 }
159}
160
161static void td_drain_budget(
162 struct reservation *res,
163 lt_t how_much)
164{
165 struct table_driven_reservation *tdres =
166 container_of(res, struct table_driven_reservation, res);
167
168 res->budget_consumed += how_much;
169 res->budget_consumed_total += how_much;
170
171 /* Table-driven scheduling: instead of tracking the budget, we compute
172 * how much time is left in this allocation interval. */
173
174 /* sanity check: we should never try to drain from future slots */
175 BUG_ON(tdres->cur_interval.start > res->env->current_time);
176
177 switch (res->state) {
178 case RESERVATION_DEPLETED:
179 case RESERVATION_INACTIVE:
180 BUG();
181 break;
182
183 case RESERVATION_ACTIVE_IDLE:
184 case RESERVATION_ACTIVE:
185 res->cur_budget = td_time_remaining_until_end(tdres);
186 TRACE("td_drain_budget(%u): drained to budget=%llu\n",
187 res->id, res->cur_budget);
188 if (!res->cur_budget) {
189 res->env->change_state(res->env, res,
190 RESERVATION_DEPLETED);
191 } else {
192 /* sanity check budget calculation */
193 BUG_ON(res->env->current_time >= tdres->cur_interval.end);
194 BUG_ON(res->env->current_time < tdres->cur_interval.start);
195 }
196
197 break;
198 }
199}
200
201static struct task_struct* td_dispatch_client(
202 struct reservation *res,
203 lt_t *for_at_most)
204{
205 struct task_struct *t;
206 struct table_driven_reservation *tdres =
207 container_of(res, struct table_driven_reservation, res);
208
209 /* usual logic for selecting a client */
210 t = default_dispatch_client(res, for_at_most);
211
212 TRACE_TASK(t, "td_dispatch_client(%u): selected, budget=%llu\n",
213 res->id, res->cur_budget);
214
215 /* check how much budget we have left in this time slot */
216 res->cur_budget = td_time_remaining_until_end(tdres);
217
218 TRACE_TASK(t, "td_dispatch_client(%u): updated to budget=%llu next=%d\n",
219 res->id, res->cur_budget, tdres->next_interval);
220
221 if (unlikely(!res->cur_budget)) {
222 /* Unlikely case: if we ran out of budget, the user configured
223 * a broken scheduling table (overlapping table slots).
224 * Not much we can do about this, but we can't dispatch a job
225 * now without causing overload. So let's register this reservation
226 * as depleted and wait for the next allocation. */
227 TRACE("td_dispatch_client(%u): budget unexpectedly depleted "
228 "(check scheduling table for unintended overlap)\n",
229 res->id);
230 res->env->change_state(res->env, res,
231 RESERVATION_DEPLETED);
232 return NULL;
233 } else
234 return t;
235}
236
237static struct reservation_ops td_ops = {
238 .dispatch_client = td_dispatch_client,
239 .client_arrives = td_client_arrives,
240 .client_departs = td_client_departs,
241 .replenish = td_replenish,
242 .drain_budget = td_drain_budget,
243};
244
245void table_driven_reservation_init(
246 struct table_driven_reservation *tdres,
247 lt_t major_cycle,
248 struct lt_interval *intervals,
249 unsigned int num_intervals)
250{
251 unsigned int i;
252
253 /* sanity checking */
254 BUG_ON(!num_intervals);
255 for (i = 0; i < num_intervals; i++)
256 BUG_ON(intervals[i].end <= intervals[i].start);
257 for (i = 0; i + 1 < num_intervals; i++)
258 BUG_ON(intervals[i + 1].start <= intervals[i].end);
259 BUG_ON(intervals[num_intervals - 1].end > major_cycle);
260
261 reservation_init(&tdres->res);
262 tdres->res.kind = TABLE_DRIVEN;
263 tdres->major_cycle = major_cycle;
264 tdres->intervals = intervals;
265 tdres->cur_interval.start = 0;
266 tdres->cur_interval.end = 0;
267 tdres->num_intervals = num_intervals;
268 tdres->res.ops = &td_ops;
269}
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 000000000000..733a483e3084
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,351 @@
1/*
2 * litmus/rt_domain.c
3 *
4 * LITMUS real-time infrastructure. This file contains the
5 * functions that manipulate RT domains. RT domains are an abstraction
6 * of a ready queue and a release queue.
7 */
8
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/list.h>
12#include <linux/slab.h>
13
14#include <litmus/litmus.h>
15#include <litmus/sched_plugin.h>
16#include <litmus/sched_trace.h>
17#include <litmus/debug_trace.h>
18
19#include <litmus/rt_domain.h>
20
21#include <litmus/trace.h>
22
23#include <litmus/bheap.h>
24
25/* Uncomment when debugging timer races... */
26#if 0
27#define VTRACE_TASK TRACE_TASK
28#define VTRACE TRACE
29#else
30#define VTRACE_TASK(t, fmt, args...) /* shut up */
31#define VTRACE(fmt, args...) /* be quiet already */
32#endif
33
34static int dummy_resched(rt_domain_t *rt)
35{
36 return 0;
37}
38
39static int dummy_order(struct bheap_node* a, struct bheap_node* b)
40{
41 return 0;
42}
43
44/* default implementation: use default lock */
45static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
46{
47 merge_ready(rt, tasks);
48}
49
50static unsigned int time2slot(lt_t time)
51{
52 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
53}
54
55static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
56{
57 unsigned long flags;
58 struct release_heap* rh;
59 rh = container_of(timer, struct release_heap, timer);
60
61 TS_RELEASE_LATENCY(rh->release_time);
62
63 VTRACE("on_release_timer(0x%p) starts.\n", timer);
64
65 TS_RELEASE_START;
66
67
68 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
69 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
70 /* remove from release queue */
71 list_del(&rh->list);
72 raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
73 VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
74
75 /* call release callback */
76 rh->dom->release_jobs(rh->dom, &rh->heap);
77 /* WARNING: rh can be referenced from other CPUs from now on. */
78
79 TS_RELEASE_END;
80
81 VTRACE("on_release_timer(0x%p) ends.\n", timer);
82
83 return HRTIMER_NORESTART;
84}
85
86/* allocated in litmus.c */
87struct kmem_cache * release_heap_cache;
88
89struct release_heap* release_heap_alloc(int gfp_flags)
90{
91 struct release_heap* rh;
92 rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
93 if (rh) {
94 /* initialize timer */
95 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
96 rh->timer.function = on_release_timer;
97 }
98 return rh;
99}
100
101void release_heap_free(struct release_heap* rh)
102{
103 /* make sure timer is no longer in use */
104 hrtimer_cancel(&rh->timer);
105 kmem_cache_free(release_heap_cache, rh);
106}
107
108/* Caller must hold release lock.
109 * Will return heap for given time. If no such heap exists prior to
110 * the invocation it will be created.
111 */
112static struct release_heap* get_release_heap(rt_domain_t *rt,
113 struct task_struct* t,
114 int use_task_heap)
115{
116 struct list_head* pos;
117 struct release_heap* heap = NULL;
118 struct release_heap* rh;
119 lt_t release_time = get_release(t);
120 unsigned int slot = time2slot(release_time);
121
122 /* initialize pos for the case that the list is empty */
123 pos = rt->release_queue.slot[slot].next;
124 list_for_each(pos, &rt->release_queue.slot[slot]) {
125 rh = list_entry(pos, struct release_heap, list);
126 if (release_time == rh->release_time) {
127 /* perfect match -- this happens on hyperperiod
128 * boundaries
129 */
130 heap = rh;
131 break;
132 } else if (lt_before(release_time, rh->release_time)) {
133 /* we need to insert a new node since rh is
134 * already in the future
135 */
136 break;
137 }
138 }
139 if (!heap && use_task_heap) {
140 /* use pre-allocated release heap */
141 rh = tsk_rt(t)->rel_heap;
142
143 rh->dom = rt;
144 rh->release_time = release_time;
145
146 /* add to release queue */
147 list_add(&rh->list, pos->prev);
148 heap = rh;
149 }
150 return heap;
151}
152
153static void reinit_release_heap(struct task_struct* t)
154{
155 struct release_heap* rh;
156
157 /* use pre-allocated release heap */
158 rh = tsk_rt(t)->rel_heap;
159
160 /* Make sure it is safe to use. The timer callback could still
161 * be executing on another CPU; hrtimer_cancel() will wait
162 * until the timer callback has completed. However, under no
163 * circumstances should the timer be active (= yet to be
164 * triggered).
165 *
166 * WARNING: If the CPU still holds the release_lock at this point,
167 * deadlock may occur!
168 */
169 BUG_ON(hrtimer_cancel(&rh->timer));
170
171 /* initialize */
172 bheap_init(&rh->heap);
173}
174/* arm_release_timer() - start local release timer or trigger
175 * remote timer (pull timer)
176 *
177 * Called by add_release() with:
178 * - tobe_lock taken
179 * - IRQ disabled
180 */
181#ifdef CONFIG_RELEASE_MASTER
182#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
183static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
184#else
185static void arm_release_timer(rt_domain_t *_rt)
186#endif
187{
188 rt_domain_t *rt = _rt;
189 struct list_head list;
190 struct list_head *pos, *safe;
191 struct task_struct* t;
192 struct release_heap* rh;
193
194 VTRACE("arm_release_timer() at %llu\n", litmus_clock());
195 list_replace_init(&rt->tobe_released, &list);
196
197 list_for_each_safe(pos, safe, &list) {
198 /* pick task of work list */
199 t = list_entry(pos, struct task_struct, rt_param.list);
200 sched_trace_task_release(t);
201 list_del(pos);
202
203 /* put into release heap while holding release_lock */
204 raw_spin_lock(&rt->release_lock);
205 VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
206
207 rh = get_release_heap(rt, t, 0);
208 if (!rh) {
209 /* need to use our own, but drop lock first */
210 raw_spin_unlock(&rt->release_lock);
211 VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
212 &rt->release_lock);
213
214 reinit_release_heap(t);
215 VTRACE_TASK(t, "release_heap ready\n");
216
217 raw_spin_lock(&rt->release_lock);
218 VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
219 &rt->release_lock);
220
221 rh = get_release_heap(rt, t, 1);
222 }
223 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
224 VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
225
226 raw_spin_unlock(&rt->release_lock);
227 VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
228
229 /* To avoid arming the timer multiple times, we only let the
230 * owner do the arming (which is the "first" task to reference
231 * this release_heap anyway).
232 */
233 if (rh == tsk_rt(t)->rel_heap) {
234 VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
235
236 if (!hrtimer_is_hres_active(&rh->timer)) {
237 TRACE_TASK(t, "WARNING: no hires timer!!!\n");
238 }
239
240 /* we cannot arm the timer using hrtimer_start()
241 * as it may deadlock on rq->lock
242 *
243 * PINNED mode is ok on both local and remote CPU
244 */
245#ifdef CONFIG_RELEASE_MASTER
246 if (rt->release_master == NO_CPU &&
247 target_cpu == NO_CPU)
248#endif
249 hrtimer_start(&rh->timer,
250 ns_to_ktime(rh->release_time),
251 HRTIMER_MODE_ABS_PINNED);
252#ifdef CONFIG_RELEASE_MASTER
253 else
254 hrtimer_start_on(
255 /* target_cpu overrides release master */
256 (target_cpu != NO_CPU ?
257 target_cpu : rt->release_master),
258 &rh->info, &rh->timer,
259 ns_to_ktime(rh->release_time),
260 HRTIMER_MODE_ABS_PINNED);
261#endif
262 } else
263 VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
264 }
265}
266
267void rt_domain_init(rt_domain_t *rt,
268 bheap_prio_t order,
269 check_resched_needed_t check,
270 release_jobs_t release
271 )
272{
273 int i;
274
275 BUG_ON(!rt);
276 if (!check)
277 check = dummy_resched;
278 if (!release)
279 release = default_release_jobs;
280 if (!order)
281 order = dummy_order;
282
283#ifdef CONFIG_RELEASE_MASTER
284 rt->release_master = NO_CPU;
285#endif
286
287 bheap_init(&rt->ready_queue);
288 INIT_LIST_HEAD(&rt->tobe_released);
289 for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
290 INIT_LIST_HEAD(&rt->release_queue.slot[i]);
291
292 raw_spin_lock_init(&rt->ready_lock);
293 raw_spin_lock_init(&rt->release_lock);
294 raw_spin_lock_init(&rt->tobe_lock);
295
296 rt->check_resched = check;
297 rt->release_jobs = release;
298 rt->order = order;
299}
300
301/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
302 * @new: the newly released task
303 */
304void __add_ready(rt_domain_t* rt, struct task_struct *new)
305{
306 TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
307 "to ready queue at %llu\n",
308 new->comm, new->pid,
309 get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
310 get_release(new), litmus_clock());
311
312 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
313
314 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
315 rt->check_resched(rt);
316}
317
318/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
319 * @tasks - the newly released tasks
320 */
321void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
322{
323 bheap_union(rt->order, &rt->ready_queue, tasks);
324 rt->check_resched(rt);
325}
326
327
328#ifdef CONFIG_RELEASE_MASTER
329void __add_release_on(rt_domain_t* rt, struct task_struct *task,
330 int target_cpu)
331{
332 TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
333 get_release(task), target_cpu);
334 list_add(&tsk_rt(task)->list, &rt->tobe_released);
335 task->rt_param.domain = rt;
336
337 arm_release_timer_on(rt, target_cpu);
338}
339#endif
340
341/* add_release - add a real-time task to the rt release queue.
342 * @task: the sleeping task
343 */
344void __add_release(rt_domain_t* rt, struct task_struct *task)
345{
346 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
347 list_add(&tsk_rt(task)->list, &rt->tobe_released);
348 task->rt_param.domain = rt;
349
350 arm_release_timer(rt);
351}
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 000000000000..54afe73b4771
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,289 @@
1/* sched_plugin.c -- core infrastructure for the scheduler plugin system
2 *
3 * This file includes the initialization of the plugin system, the no-op Linux
4 * scheduler plugin, some dummy functions, and some helper functions.
5 */
6
7#include <linux/list.h>
8#include <linux/spinlock.h>
9#include <linux/sched.h>
10#include <linux/seq_file.h>
11
12#include <litmus/litmus.h>
13#include <litmus/sched_plugin.h>
14#include <litmus/preempt.h>
15#include <litmus/jobs.h>
16#include <litmus/budget.h>
17#include <litmus/np.h>
18
19/*
20 * Generic function to trigger preemption on either local or remote cpu
21 * from scheduler plugins. The key feature is that this function is
22 * non-preemptive section aware and does not invoke the scheduler / send
23 * IPIs if the to-be-preempted task is actually non-preemptive.
24 */
25void preempt_if_preemptable(struct task_struct* t, int cpu)
26{
27 /* t is the real-time task executing on CPU on_cpu If t is NULL, then
28 * on_cpu is currently scheduling background work.
29 */
30
31 int reschedule = 0;
32
33 if (!t)
34 /* move non-real-time task out of the way */
35 reschedule = 1;
36 else {
37 if (smp_processor_id() == cpu) {
38 /* local CPU case */
39 /* check if we need to poke userspace */
40 if (is_user_np(t))
41 /* Yes, poke it. This doesn't have to be atomic since
42 * the task is definitely not executing. */
43 request_exit_np(t);
44 else if (!is_kernel_np(t))
45 /* only if we are allowed to preempt the
46 * currently-executing task */
47 reschedule = 1;
48 } else {
49 /* Remote CPU case. Only notify if it's not a kernel
50 * NP section and if we didn't set the userspace
51 * flag. */
52 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
53 }
54 }
55 if (likely(reschedule))
56 litmus_reschedule(cpu);
57}
58
59
60/*************************************************************
61 * Dummy plugin functions *
62 *************************************************************/
63
64static void litmus_dummy_finish_switch(struct task_struct * prev)
65{
66}
67
68static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
69{
70 sched_state_task_picked();
71 return NULL;
72}
73
74static bool litmus_dummy_should_wait_for_stack(struct task_struct *next)
75{
76 return true; /* by default, wait indefinitely */
77}
78
79static void litmus_dummy_next_became_invalid(struct task_struct *next)
80{
81}
82
83static bool litmus_dummy_post_migration_validate(struct task_struct *next)
84{
85 return true; /* by default, anything is ok */
86}
87
88static long litmus_dummy_admit_task(struct task_struct* tsk)
89{
90 printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
91 tsk->comm, tsk->pid);
92 return -EINVAL;
93}
94
95static bool litmus_dummy_fork_task(struct task_struct* tsk)
96{
97 /* Default behavior: return false to demote to non-real-time task */
98 return false;
99}
100
101static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
102{
103}
104
105static void litmus_dummy_task_wake_up(struct task_struct *task)
106{
107}
108
109static void litmus_dummy_task_block(struct task_struct *task)
110{
111}
112
113static void litmus_dummy_task_exit(struct task_struct *task)
114{
115}
116
117static void litmus_dummy_task_cleanup(struct task_struct *task)
118{
119}
120
121static long litmus_dummy_complete_job(void)
122{
123 return -ENOSYS;
124}
125
126static long litmus_dummy_activate_plugin(void)
127{
128 return 0;
129}
130
131static long litmus_dummy_deactivate_plugin(void)
132{
133 return 0;
134}
135
136static long litmus_dummy_get_domain_proc_info(struct domain_proc_info **d)
137{
138 *d = NULL;
139 return 0;
140}
141
142static void litmus_dummy_synchronous_release_at(lt_t time_zero)
143{
144 /* ignore */
145}
146
147static long litmus_dummy_task_change_params(
148 struct task_struct *task,
149 struct rt_task *new_params)
150{
151 /* by default, do not allow changes to task parameters */
152 return -EBUSY;
153}
154
155#ifdef CONFIG_LITMUS_LOCKING
156
157static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
158 void* __user config)
159{
160 return -ENXIO;
161}
162
163#endif
164
165static long litmus_dummy_reservation_create(
166 int reservation_type,
167 void* __user config)
168{
169 return -ENOSYS;
170}
171
172static long litmus_dummy_reservation_destroy(unsigned int reservation_id, int cpu)
173{
174 return -ENOSYS;
175}
176
177/* The default scheduler plugin. It doesn't do anything and lets Linux do its
178 * job.
179 */
180struct sched_plugin linux_sched_plugin = {
181 .plugin_name = "Linux",
182 .task_new = litmus_dummy_task_new,
183 .task_exit = litmus_dummy_task_exit,
184 .task_wake_up = litmus_dummy_task_wake_up,
185 .task_block = litmus_dummy_task_block,
186 .complete_job = litmus_dummy_complete_job,
187 .schedule = litmus_dummy_schedule,
188 .finish_switch = litmus_dummy_finish_switch,
189 .activate_plugin = litmus_dummy_activate_plugin,
190 .deactivate_plugin = litmus_dummy_deactivate_plugin,
191 .get_domain_proc_info = litmus_dummy_get_domain_proc_info,
192 .synchronous_release_at = litmus_dummy_synchronous_release_at,
193#ifdef CONFIG_LITMUS_LOCKING
194 .allocate_lock = litmus_dummy_allocate_lock,
195#endif
196 .admit_task = litmus_dummy_admit_task
197};
198
199/*
200 * The reference to current plugin that is used to schedule tasks within
201 * the system. It stores references to actual function implementations
202 * Should be initialized by calling "init_***_plugin()"
203 */
204struct sched_plugin *litmus = &linux_sched_plugin;
205
206/* the list of registered scheduling plugins */
207static LIST_HEAD(sched_plugins);
208static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
209
210#define CHECK(func) {\
211 if (!plugin->func) \
212 plugin->func = litmus_dummy_ ## func;}
213
214/* FIXME: get reference to module */
215int register_sched_plugin(struct sched_plugin* plugin)
216{
217 printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
218 plugin->plugin_name);
219
220 /* make sure we don't trip over null pointers later */
221 CHECK(finish_switch);
222 CHECK(schedule);
223 CHECK(should_wait_for_stack);
224 CHECK(post_migration_validate);
225 CHECK(next_became_invalid);
226 CHECK(task_wake_up);
227 CHECK(task_exit);
228 CHECK(task_cleanup);
229 CHECK(task_block);
230 CHECK(task_new);
231 CHECK(task_change_params);
232 CHECK(complete_job);
233 CHECK(activate_plugin);
234 CHECK(deactivate_plugin);
235 CHECK(get_domain_proc_info);
236#ifdef CONFIG_LITMUS_LOCKING
237 CHECK(allocate_lock);
238#endif
239 CHECK(admit_task);
240 CHECK(fork_task);
241 CHECK(synchronous_release_at);
242 CHECK(reservation_destroy);
243 CHECK(reservation_create);
244
245 if (!plugin->wait_for_release_at)
246 plugin->wait_for_release_at = default_wait_for_release_at;
247
248 if (!plugin->current_budget)
249 plugin->current_budget = litmus_current_budget;
250
251 raw_spin_lock(&sched_plugins_lock);
252 list_add(&plugin->list, &sched_plugins);
253 raw_spin_unlock(&sched_plugins_lock);
254
255 return 0;
256}
257
258
259/* FIXME: reference counting, etc. */
260struct sched_plugin* find_sched_plugin(const char* name)
261{
262 struct list_head *pos;
263 struct sched_plugin *plugin;
264
265 raw_spin_lock(&sched_plugins_lock);
266 list_for_each(pos, &sched_plugins) {
267 plugin = list_entry(pos, struct sched_plugin, list);
268 if (!strcmp(plugin->plugin_name, name))
269 goto out_unlock;
270 }
271 plugin = NULL;
272
273out_unlock:
274 raw_spin_unlock(&sched_plugins_lock);
275 return plugin;
276}
277
278void print_sched_plugins(struct seq_file *m)
279{
280 struct list_head *pos;
281 struct sched_plugin *plugin;
282
283 raw_spin_lock(&sched_plugins_lock);
284 list_for_each(pos, &sched_plugins) {
285 plugin = list_entry(pos, struct sched_plugin, list);
286 seq_printf(m, "%s\n", plugin->plugin_name);
287 }
288 raw_spin_unlock(&sched_plugins_lock);
289}
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index f89d08405944..a6088f16bb08 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -15,11 +15,6 @@
15#include <litmus/feather_trace.h> 15#include <litmus/feather_trace.h>
16#include <litmus/ftdev.h> 16#include <litmus/ftdev.h>
17 17
18#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
19#define CREATE_TRACE_POINTS
20#include <trace/events/litmus.h>
21#endif
22
23#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) 18#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
24 19
25#define now() litmus_clock() 20#define now() litmus_clock()
diff --git a/litmus/srp.c b/litmus/srp.c
new file mode 100644
index 000000000000..7e3c057c0752
--- /dev/null
+++ b/litmus/srp.c
@@ -0,0 +1,310 @@
1/* ************************************************************************** */
2/* STACK RESOURCE POLICY */
3/* ************************************************************************** */
4
5#include <linux/module.h>
6#include <asm/atomic.h>
7#include <linux/sched.h>
8#include <linux/wait.h>
9
10#include <litmus/litmus.h>
11#include <litmus/sched_plugin.h>
12#include <litmus/debug_trace.h>
13#include <litmus/fdso.h>
14#include <litmus/trace.h>
15
16
17#ifdef CONFIG_LITMUS_LOCKING
18
19#include <litmus/srp.h>
20
21srp_prioritization_t get_srp_prio;
22
23struct srp {
24 struct list_head ceiling;
25 wait_queue_head_t ceiling_blocked;
26};
27#define system_ceiling(srp) list2prio(srp->ceiling.next)
28#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
29
30#define UNDEF_SEM -2
31
32DEFINE_PER_CPU(struct srp, srp);
33
34DEFINE_PER_CPU(int, srp_objects_in_use);
35
36/* Initialize SRP semaphores at boot time. */
37static int __init srp_init(void)
38{
39 int i;
40
41 printk("Initializing SRP per-CPU ceilings...");
42 for (i = 0; i < NR_CPUS; i++) {
43 init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
44 INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
45 per_cpu(srp_objects_in_use, i) = 0;
46 }
47 printk(" done!\n");
48
49 return 0;
50}
51module_init(srp_init);
52
53/* SRP task priority comparison function. Smaller numeric values have higher
54 * priority, tie-break is PID. Special case: priority == 0 <=> no priority
55 */
56static int srp_higher_prio(struct srp_priority* first,
57 struct srp_priority* second)
58{
59 if (!first->priority)
60 return 0;
61 else
62 return !second->priority ||
63 first->priority < second->priority || (
64 first->priority == second->priority &&
65 first->pid < second->pid);
66}
67
68
69static int srp_exceeds_ceiling(struct task_struct* first,
70 struct srp* srp)
71{
72 struct srp_priority prio;
73
74 if (list_empty(&srp->ceiling))
75 return 1;
76 else {
77 prio.pid = first->pid;
78 prio.priority = get_srp_prio(first);
79 return srp_higher_prio(&prio, system_ceiling(srp)) ||
80 ceiling2sem(system_ceiling(srp))->owner == first;
81 }
82}
83
84static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
85{
86 struct list_head *pos;
87 if (in_list(&prio->list)) {
88 printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
89 "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
90 return;
91 }
92 list_for_each(pos, &srp->ceiling)
93 if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
94 __list_add(&prio->list, pos->prev, pos);
95 return;
96 }
97
98 list_add_tail(&prio->list, &srp->ceiling);
99}
100
101
102static int lock_srp_semaphore(struct litmus_lock* l)
103{
104 struct task_struct* t = current;
105 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
106
107 if (!is_realtime(t))
108 return -EPERM;
109
110 /* prevent acquisition of local locks in global critical sections */
111 if (tsk_rt(t)->num_locks_held)
112 return -EBUSY;
113
114 preempt_disable();
115
116 /* Update ceiling. */
117 srp_add_prio(this_cpu_ptr(&srp), &sem->ceiling);
118
119 /* SRP invariant: all resources available */
120 BUG_ON(sem->owner != NULL);
121
122 sem->owner = t;
123 TRACE_CUR("acquired srp 0x%p\n", sem);
124
125 tsk_rt(t)->num_local_locks_held++;
126
127 preempt_enable();
128
129 return 0;
130}
131
132static int unlock_srp_semaphore(struct litmus_lock* l)
133{
134 struct task_struct* t = current;
135 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
136 int err = 0;
137
138 preempt_disable();
139
140 if (sem->owner != t) {
141 err = -EINVAL;
142 } else {
143 /* The current owner should be executing on the correct CPU.
144 *
145 * If the owner transitioned out of RT mode or is exiting, then
146 * we it might have already been migrated away by the best-effort
147 * scheduler and we just have to deal with it. */
148 if (unlikely(!is_realtime(t) && sem->cpu != smp_processor_id())) {
149 TRACE_TASK(t, "SRP unlock cpu=%d, sem->cpu=%d\n",
150 smp_processor_id(), sem->cpu);
151 preempt_enable();
152 err = litmus_be_migrate_to(sem->cpu);
153 preempt_disable();
154 TRACE_TASK(t, "post-migrate: cpu=%d, sem->cpu=%d err=%d\n",
155 smp_processor_id(), sem->cpu, err);
156 }
157 BUG_ON(sem->cpu != smp_processor_id());
158 err = 0;
159
160 /* Determine new system priority ceiling for this CPU. */
161 BUG_ON(!in_list(&sem->ceiling.list));
162
163 list_del(&sem->ceiling.list);
164 sem->owner = NULL;
165
166 /* Wake tasks on this CPU, if they exceed current ceiling. */
167 TRACE_CUR("released srp 0x%p\n", sem);
168 wake_up_all(&this_cpu_ptr(&srp)->ceiling_blocked);
169
170 tsk_rt(t)->num_local_locks_held--;
171 }
172
173 preempt_enable();
174 return err;
175}
176
177static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
178{
179 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
180 int err = 0;
181 struct task_struct* t = current;
182 struct srp_priority t_prio;
183
184 if (!is_realtime(t))
185 return -EPERM;
186
187 TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
188
189 preempt_disable();
190
191 if (sem->owner != NULL)
192 err = -EBUSY;
193
194 if (err == 0) {
195 if (sem->cpu == UNDEF_SEM)
196 sem->cpu = get_partition(t);
197 else if (sem->cpu != get_partition(t))
198 err = -EPERM;
199 }
200
201 if (err == 0) {
202 t_prio.priority = get_srp_prio(t);
203 t_prio.pid = t->pid;
204 if (srp_higher_prio(&t_prio, &sem->ceiling)) {
205 sem->ceiling.priority = t_prio.priority;
206 sem->ceiling.pid = t_prio.pid;
207 }
208 }
209
210 preempt_enable();
211
212 return err;
213}
214
215static int close_srp_semaphore(struct litmus_lock* l)
216{
217 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
218 int err = 0;
219
220 preempt_disable();
221
222 if (sem->owner == current)
223 unlock_srp_semaphore(l);
224
225 preempt_enable();
226
227 return err;
228}
229
230static void deallocate_srp_semaphore(struct litmus_lock* l)
231{
232 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
233 raw_cpu_dec(srp_objects_in_use);
234 kfree(sem);
235}
236
237static struct litmus_lock_ops srp_lock_ops = {
238 .open = open_srp_semaphore,
239 .close = close_srp_semaphore,
240 .lock = lock_srp_semaphore,
241 .unlock = unlock_srp_semaphore,
242 .deallocate = deallocate_srp_semaphore,
243};
244
245struct srp_semaphore* allocate_srp_semaphore(void)
246{
247 struct srp_semaphore* sem;
248
249 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
250 if (!sem)
251 return NULL;
252
253 INIT_LIST_HEAD(&sem->ceiling.list);
254 sem->ceiling.priority = 0;
255 sem->cpu = UNDEF_SEM;
256 sem->owner = NULL;
257
258 sem->litmus_lock.ops = &srp_lock_ops;
259
260 raw_cpu_inc(srp_objects_in_use);
261 return sem;
262}
263
264static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
265 void *key)
266{
267 int cpu = smp_processor_id();
268 struct task_struct *tsk = wait->private;
269 if (cpu != get_partition(tsk))
270 TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
271 get_partition(tsk));
272 else if (srp_exceeds_ceiling(tsk, this_cpu_ptr(&srp)))
273 return default_wake_function(wait, mode, sync, key);
274 return 0;
275}
276
277static void do_ceiling_block(struct task_struct *tsk)
278{
279 wait_queue_t wait = {
280 .private = tsk,
281 .func = srp_wake_up,
282 .task_list = {NULL, NULL}
283 };
284
285 tsk->state = TASK_UNINTERRUPTIBLE;
286 add_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait);
287 tsk->rt_param.srp_non_recurse = 1;
288 preempt_enable_no_resched();
289 schedule();
290 preempt_disable();
291 tsk->rt_param.srp_non_recurse = 0;
292 remove_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait);
293}
294
295/* Wait for current task priority to exceed system-wide priority ceiling.
296 */
297void __srp_ceiling_block(struct task_struct *cur)
298{
299 preempt_disable();
300 if (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp))) {
301 TRACE_CUR("is priority ceiling blocked.\n");
302 while (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp)))
303 do_ceiling_block(cur);
304 TRACE_CUR("finally exceeds system ceiling.\n");
305 } else
306 TRACE_CUR("is not priority ceiling blocked\n");
307 preempt_enable();
308}
309
310#endif
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 000000000000..941545a05383
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,153 @@
1/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
2 *
3 *
4 */
5
6#include <asm/atomic.h>
7#include <asm/uaccess.h>
8#include <linux/spinlock.h>
9#include <linux/list.h>
10#include <linux/sched.h>
11#include <linux/completion.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/jobs.h>
16
17#include <litmus/sched_trace.h>
18#include <litmus/debug_trace.h>
19
20struct ts_release_wait {
21 struct list_head list;
22 struct completion completion;
23 lt_t ts_release_time;
24};
25
26#define DECLARE_TS_RELEASE_WAIT(symb) \
27 struct ts_release_wait symb = \
28 { \
29 LIST_HEAD_INIT(symb.list), \
30 COMPLETION_INITIALIZER_ONSTACK(symb.completion), \
31 0 \
32 }
33
34static LIST_HEAD(task_release_list);
35static DEFINE_MUTEX(task_release_lock);
36
37static long do_wait_for_ts_release(void)
38{
39 DECLARE_TS_RELEASE_WAIT(wait);
40
41 long ret = -ERESTARTSYS;
42
43 if (mutex_lock_interruptible(&task_release_lock))
44 goto out;
45
46 list_add(&wait.list, &task_release_list);
47
48 mutex_unlock(&task_release_lock);
49
50 /* We are enqueued, now we wait for someone to wake us up. */
51 ret = wait_for_completion_interruptible(&wait.completion);
52
53 if (!ret) {
54 /* Completion succeeded, setup release time. */
55 ret = litmus->wait_for_release_at(
56 wait.ts_release_time + get_rt_phase(current));
57 } else {
58 /* We were interrupted, must cleanup list. */
59 mutex_lock(&task_release_lock);
60 if (!wait.completion.done)
61 list_del(&wait.list);
62 mutex_unlock(&task_release_lock);
63 }
64
65out:
66 return ret;
67}
68
69int count_tasks_waiting_for_release(void)
70{
71 int task_count = 0;
72 struct list_head *pos;
73
74 mutex_lock(&task_release_lock);
75
76 list_for_each(pos, &task_release_list) {
77 task_count++;
78 }
79
80 mutex_unlock(&task_release_lock);
81
82
83 return task_count;
84}
85
86static long do_release_ts(lt_t start)
87{
88 long task_count = 0;
89
90 struct list_head *pos, *safe;
91 struct ts_release_wait *wait;
92
93 if (mutex_lock_interruptible(&task_release_lock)) {
94 task_count = -ERESTARTSYS;
95 goto out;
96 }
97
98 TRACE("<<<<<< synchronous task system release >>>>>>\n");
99 sched_trace_sys_release(&start);
100 litmus->synchronous_release_at(start);
101
102 task_count = 0;
103 list_for_each_safe(pos, safe, &task_release_list) {
104 wait = (struct ts_release_wait*)
105 list_entry(pos, struct ts_release_wait, list);
106
107 task_count++;
108 wait->ts_release_time = start;
109 complete(&wait->completion);
110 }
111
112 /* clear stale list */
113 INIT_LIST_HEAD(&task_release_list);
114
115 mutex_unlock(&task_release_lock);
116
117out:
118 return task_count;
119}
120
121
122asmlinkage long sys_wait_for_ts_release(void)
123{
124 long ret = -EPERM;
125 struct task_struct *t = current;
126
127 if (is_realtime(t))
128 ret = do_wait_for_ts_release();
129
130 return ret;
131}
132
133#define ONE_MS 1000000
134
135asmlinkage long sys_release_ts(lt_t __user *__delay)
136{
137 long ret;
138 lt_t delay;
139 lt_t start_time;
140
141 /* FIXME: check capabilities... */
142
143 ret = copy_from_user(&delay, __delay, sizeof(delay));
144 if (ret == 0) {
145 /* round up to next larger integral millisecond */
146 start_time = litmus_clock();
147 do_div(start_time, ONE_MS);
148 start_time *= ONE_MS;
149 ret = do_release_ts(start_time + delay);
150 }
151
152 return ret;
153}
diff --git a/litmus/trace.c b/litmus/trace.c
index 7d698aac257d..0724e31a49ff 100644
--- a/litmus/trace.c
+++ b/litmus/trace.c
@@ -254,6 +254,19 @@ feather_callback void save_cpu_timestamp_irq(unsigned long event,
254 0, RECORD_LOCAL_TIMESTAMP); 254 0, RECORD_LOCAL_TIMESTAMP);
255} 255}
256 256
257feather_callback void save_cpu_task_latency(unsigned long event,
258 unsigned long when_ptr)
259{
260 lt_t now = litmus_clock();
261 lt_t *when = (lt_t*) when_ptr;
262 lt_t delta = now - *when;
263
264 write_cpu_timestamp(event, TSK_RT,
265 0,
266 0, LOCAL_IRQ_COUNT, 0,
267 delta, DO_NOT_RECORD_TIMESTAMP);
268}
269
257/* Record to remote trace buffer */ 270/* Record to remote trace buffer */
258feather_callback void msg_sent_to(unsigned long event, unsigned long to) 271feather_callback void msg_sent_to(unsigned long event, unsigned long to)
259{ 272{
@@ -422,11 +435,13 @@ static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
422 435
423static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) 436static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
424{ 437{
438 struct ft_buffer* tmp = ftdev->minor[idx].buf;
439 smp_rmb();
425 ftdev->minor[idx].buf = NULL; 440 ftdev->minor[idx].buf = NULL;
426 /* Make sure all cores have actually seen buf == NULL before 441 /* Make sure all cores have actually seen buf == NULL before
427 * yanking out the mappings from underneath them. */ 442 * yanking out the mappings from underneath them. */
428 smp_wmb(); 443 smp_wmb();
429 free_ft_buffer(ftdev->minor[idx].buf); 444 free_ft_buffer(tmp);
430} 445}
431 446
432static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len, 447static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len,
diff --git a/litmus/uncachedev.c b/litmus/uncachedev.c
new file mode 100644
index 000000000000..06a6a7c17983
--- /dev/null
+++ b/litmus/uncachedev.c
@@ -0,0 +1,102 @@
1#include <linux/sched.h>
2#include <linux/kernel.h>
3#include <linux/mm.h>
4#include <linux/fs.h>
5#include <linux/errno.h>
6#include <linux/highmem.h>
7#include <asm/page.h>
8#include <linux/miscdevice.h>
9#include <linux/module.h>
10
11#include <litmus/litmus.h>
12
13/* device for allocating pages not cached by the CPU */
14
15#define UNCACHE_NAME "litmus/uncache"
16
17void litmus_uncache_vm_open(struct vm_area_struct *vma)
18{
19}
20
21void litmus_uncache_vm_close(struct vm_area_struct *vma)
22{
23}
24
25int litmus_uncache_vm_fault(struct vm_area_struct* vma,
26 struct vm_fault* vmf)
27{
28 /* modeled after SG DMA video4linux, but without DMA. */
29 /* (see drivers/media/video/videobuf-dma-sg.c) */
30 struct page *page;
31
32 page = alloc_page(GFP_USER);
33 if (!page)
34 return VM_FAULT_OOM;
35
36 clear_user_highpage(page, (unsigned long)vmf->virtual_address);
37 vmf->page = page;
38
39 return 0;
40}
41
42static struct vm_operations_struct litmus_uncache_vm_ops = {
43 .open = litmus_uncache_vm_open,
44 .close = litmus_uncache_vm_close,
45 .fault = litmus_uncache_vm_fault,
46};
47
48static int litmus_uncache_mmap(struct file* filp, struct vm_area_struct* vma)
49{
50 /* first make sure mapper knows what he's doing */
51
52 /* you can only map the "first" page */
53 if (vma->vm_pgoff != 0)
54 return -EINVAL;
55
56 /* you can't share it with anyone */
57 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
58 return -EINVAL;
59
60 /* cannot be expanded, and is not a "normal" page. */
61 vma->vm_flags |= VM_DONTEXPAND;
62
63 /* noncached pages are not explicitly locked in memory (for now). */
64 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
65
66 vma->vm_ops = &litmus_uncache_vm_ops;
67
68 return 0;
69}
70
71static struct file_operations litmus_uncache_fops = {
72 .owner = THIS_MODULE,
73 .mmap = litmus_uncache_mmap,
74};
75
76static struct miscdevice litmus_uncache_dev = {
77 .name = UNCACHE_NAME,
78 .minor = MISC_DYNAMIC_MINOR,
79 .fops = &litmus_uncache_fops,
80 /* pages are not locked, so there is no reason why
81 anyone cannot allocate an uncache pages */
82 .mode = (S_IRUGO | S_IWUGO),
83};
84
85static int __init init_litmus_uncache_dev(void)
86{
87 int err;
88
89 printk("Initializing LITMUS^RT uncache device.\n");
90 err = misc_register(&litmus_uncache_dev);
91 if (err)
92 printk("Could not allocate %s device (%d).\n", UNCACHE_NAME, err);
93 return err;
94}
95
96static void __exit exit_litmus_uncache_dev(void)
97{
98 misc_deregister(&litmus_uncache_dev);
99}
100
101module_init(init_litmus_uncache_dev);
102module_exit(exit_litmus_uncache_dev);