summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/litmus/affinity.h52
-rw-r--r--include/litmus/bheap.h77
-rw-r--r--include/litmus/binheap.h205
-rw-r--r--include/litmus/budget.h38
-rw-r--r--include/litmus/ceiling.h36
-rw-r--r--include/litmus/clustered.h46
-rw-r--r--include/litmus/ctrlpage.h105
-rw-r--r--include/litmus/debug_trace.h57
-rw-r--r--include/litmus/edf_common.h25
-rw-r--r--include/litmus/fdso.h78
-rw-r--r--include/litmus/feather_buffer.h118
-rw-r--r--include/litmus/feather_trace.h50
-rw-r--r--include/litmus/fp_common.h183
-rw-r--r--include/litmus/fpmath.h147
-rw-r--r--include/litmus/ftdev.h58
-rw-r--r--include/litmus/jobs.h13
-rw-r--r--include/litmus/litmus.h224
-rw-r--r--include/litmus/litmus_proc.h63
-rw-r--r--include/litmus/locking.h28
-rw-r--r--include/litmus/np.h121
-rw-r--r--include/litmus/preempt.h191
-rw-r--r--include/litmus/reservations/alloc.h15
-rw-r--r--include/litmus/reservations/budget-notifier.h50
-rw-r--r--include/litmus/reservations/polling.h19
-rw-r--r--include/litmus/reservations/reservation.h224
-rw-r--r--include/litmus/reservations/table-driven.h23
-rw-r--r--include/litmus/rt_domain.h182
-rw-r--r--include/litmus/rt_param.h290
-rw-r--r--include/litmus/sched_plugin.h180
-rw-r--r--include/litmus/sched_trace.h267
-rw-r--r--include/litmus/srp.h28
-rw-r--r--include/litmus/trace.h161
-rw-r--r--include/litmus/trace_irq.h14
-rw-r--r--include/litmus/wait.h57
-rw-r--r--litmus/Kconfig384
-rw-r--r--litmus/Makefile36
-rw-r--r--litmus/bheap.c316
-rw-r--r--litmus/binheap.c387
-rw-r--r--litmus/budget.c168
-rw-r--r--litmus/clustered.c119
-rw-r--r--litmus/ctrldev.c264
-rw-r--r--litmus/edf_common.c201
-rw-r--r--litmus/fdso.c308
-rw-r--r--litmus/fp_common.c137
-rw-r--r--litmus/ft_event.c43
-rw-r--r--litmus/ftdev.c457
-rw-r--r--litmus/jobs.c164
-rw-r--r--litmus/litmus.c773
-rw-r--r--litmus/litmus_proc.c574
-rw-r--r--litmus/locking.c189
-rw-r--r--litmus/preempt.c144
-rw-r--r--litmus/reservations/Makefile3
-rw-r--r--litmus/reservations/alloc.c143
-rw-r--r--litmus/reservations/budget-notifier.c26
-rw-r--r--litmus/reservations/core.c393
-rw-r--r--litmus/reservations/polling.c256
-rw-r--r--litmus/reservations/table-driven.c269
-rw-r--r--litmus/rt_domain.c351
-rw-r--r--litmus/sched_cedf.c890
-rw-r--r--litmus/sched_gsn_edf.c1070
-rw-r--r--litmus/sched_pfair.c1231
-rw-r--r--litmus/sched_pfp.c2048
-rw-r--r--litmus/sched_plugin.c290
-rw-r--r--litmus/sched_pres.c612
-rw-r--r--litmus/sched_psn_edf.c688
-rw-r--r--litmus/sched_task_trace.c258
-rw-r--r--litmus/sched_trace.c251
-rw-r--r--litmus/srp.c310
-rw-r--r--litmus/sync.c153
-rw-r--r--litmus/trace.c575
-rw-r--r--litmus/uncachedev.c102
71 files changed, 18008 insertions, 0 deletions
diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h
new file mode 100644
index 000000000000..4d7c618c8175
--- /dev/null
+++ b/include/litmus/affinity.h
@@ -0,0 +1,52 @@
1#ifndef __LITMUS_AFFINITY_H
2#define __LITMUS_AFFINITY_H
3
4#include <linux/cpumask.h>
5
6/* Works like:
7void get_nearest_available_cpu(
8 cpu_entry_t **nearest,
9 cpu_entry_t *start,
10 cpu_entry_t *entries,
11 int release_master,
12 cpumask_var_t cpus_to_test)
13
14Set release_master = NO_CPU for no Release Master.
15
16We use a macro here to exploit the fact that C-EDF and G-EDF
17have similar structures for their cpu_entry_t structs, even though
18they do not share a common base-struct. The macro allows us to
19avoid code duplication.
20
21 */
22#define get_nearest_available_cpu(nearest, start, entries, release_master, cpus_to_test) \
23{ \
24 (nearest) = NULL; \
25 if (!(start)->linked && likely((start)->cpu != (release_master))) { \
26 (nearest) = (start); \
27 } else { \
28 int __cpu; \
29 \
30 /* FIXME: get rid of the iteration with a bitmask + AND */ \
31 for_each_cpu(__cpu, cpus_to_test) { \
32 if (likely(__cpu != release_master)) { \
33 cpu_entry_t *__entry = &per_cpu((entries), __cpu); \
34 if (cpus_share_cache((start)->cpu, __entry->cpu) \
35 && !__entry->linked) { \
36 (nearest) = __entry; \
37 break; \
38 } \
39 } \
40 } \
41 } \
42 \
43 if ((nearest)) { \
44 TRACE("P%d is closest available CPU to P%d\n", \
45 (nearest)->cpu, (start)->cpu); \
46 } else { \
47 TRACE("Could not find an available CPU close to P%d\n", \
48 (start)->cpu); \
49 } \
50}
51
52#endif
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
new file mode 100644
index 000000000000..cf4864a498d8
--- /dev/null
+++ b/include/litmus/bheap.h
@@ -0,0 +1,77 @@
1/* bheaps.h -- Binomial Heaps
2 *
3 * (c) 2008, 2009 Bjoern Brandenburg
4 */
5
6#ifndef BHEAP_H
7#define BHEAP_H
8
9#define NOT_IN_HEAP UINT_MAX
10
11struct bheap_node {
12 struct bheap_node* parent;
13 struct bheap_node* next;
14 struct bheap_node* child;
15
16 unsigned int degree;
17 void* value;
18 struct bheap_node** ref;
19};
20
21struct bheap {
22 struct bheap_node* head;
23 /* We cache the minimum of the heap.
24 * This speeds up repeated peek operations.
25 */
26 struct bheap_node* min;
27};
28
29typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
30
31void bheap_init(struct bheap* heap);
32void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value);
33
34static inline int bheap_node_in_heap(struct bheap_node* h)
35{
36 return h->degree != NOT_IN_HEAP;
37}
38
39static inline int bheap_empty(struct bheap* heap)
40{
41 return heap->head == NULL && heap->min == NULL;
42}
43
44/* insert (and reinitialize) a node into the heap */
45void bheap_insert(bheap_prio_t higher_prio,
46 struct bheap* heap,
47 struct bheap_node* node);
48
49/* merge addition into target */
50void bheap_union(bheap_prio_t higher_prio,
51 struct bheap* target,
52 struct bheap* addition);
53
54struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
55 struct bheap* heap);
56
57struct bheap_node* bheap_take(bheap_prio_t higher_prio,
58 struct bheap* heap);
59
60void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap);
61int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node);
62
63void bheap_delete(bheap_prio_t higher_prio,
64 struct bheap* heap,
65 struct bheap_node* node);
66
67/* allocate from memcache */
68struct bheap_node* bheap_node_alloc(int gfp_flags);
69void bheap_node_free(struct bheap_node* hn);
70
71/* allocate a heap node for value and insert into the heap */
72int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
73 void* value, int gfp_flags);
74
75void* bheap_take_del(bheap_prio_t higher_prio,
76 struct bheap* heap);
77#endif
diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
new file mode 100644
index 000000000000..1cf364701da8
--- /dev/null
+++ b/include/litmus/binheap.h
@@ -0,0 +1,205 @@
1#ifndef LITMUS_BINARY_HEAP_H
2#define LITMUS_BINARY_HEAP_H
3
4#include <linux/kernel.h>
5
6/**
7 * Simple binary heap with add, arbitrary delete, delete_root, and top
8 * operations.
9 *
10 * Style meant to conform with list.h.
11 *
12 * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
13 * heap may be overkill (and perhaps not general enough) for some applications.
14 *
15 * Note: In order to make node swaps fast, a node inserted with a data pointer
16 * may not always hold said data pointer. This is similar to the binomial heap
17 * implementation. This does make node deletion tricky since we have to
18 * (1) locate the node that holds the data pointer to delete, and (2) the
19 * node that was originally inserted with said data pointer. These have to be
20 * coalesced into a single node before removal (see usage of
21 * __binheap_safe_swap()). We have to track node references to accomplish this.
22 */
23
24struct binheap_node {
25 void *data;
26 struct binheap_node *parent;
27 struct binheap_node *left;
28 struct binheap_node *right;
29
30 /* pointer to binheap_node that holds *data for which this binheap_node
31 * was originally inserted. (*data "owns" this node)
32 */
33 struct binheap_node *ref;
34 struct binheap_node **ref_ptr;
35};
36
37/**
38 * Signature of compator function. Assumed 'less-than' (min-heap).
39 * Pass in 'greater-than' for max-heap.
40 *
41 * TODO: Consider macro-based implementation that allows comparator to be
42 * inlined (similar to Linux red/black tree) for greater efficiency.
43 */
44typedef int (*binheap_order_t)(struct binheap_node *a,
45 struct binheap_node *b);
46
47
48struct binheap {
49 struct binheap_node *root;
50
51 /* pointer to node to take next inserted child */
52 struct binheap_node *next;
53
54 /* pointer to last node in complete binary tree */
55 struct binheap_node *last;
56
57 /* comparator function pointer */
58 binheap_order_t compare;
59};
60
61
62/* Initialized heap nodes not in a heap have parent
63 * set to BINHEAP_POISON.
64 */
65#define BINHEAP_POISON ((void*)(0xdeadbeef))
66
67
68/**
69 * binheap_entry - get the struct for this heap node.
70 * Only valid when called upon heap nodes other than the root handle.
71 * @ptr: the heap node.
72 * @type: the type of struct pointed to by binheap_node::data.
73 * @member: unused.
74 */
75#define binheap_entry(ptr, type, member) \
76((type *)((ptr)->data))
77
78/**
79 * binheap_node_container - get the struct that contains this node.
80 * Only valid when called upon heap nodes other than the root handle.
81 * @ptr: the heap node.
82 * @type: the type of struct the node is embedded in.
83 * @member: the name of the binheap_struct within the (type) struct.
84 */
85#define binheap_node_container(ptr, type, member) \
86container_of((ptr), type, member)
87
88/**
89 * binheap_top_entry - get the struct for the node at the top of the heap.
90 * Only valid when called upon the heap handle node.
91 * @ptr: the special heap-handle node.
92 * @type: the type of the struct the head is embedded in.
93 * @member: the name of the binheap_struct within the (type) struct.
94 */
95#define binheap_top_entry(ptr, type, member) \
96binheap_entry((ptr)->root, type, member)
97
98/**
99 * binheap_delete_root - remove the root element from the heap.
100 * @handle: handle to the heap.
101 * @type: the type of the struct the head is embedded in.
102 * @member: the name of the binheap_struct within the (type) struct.
103 */
104#define binheap_delete_root(handle, type, member) \
105__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
106
107/**
108 * binheap_delete - remove an arbitrary element from the heap.
109 * @to_delete: pointer to node to be removed.
110 * @handle: handle to the heap.
111 */
112#define binheap_delete(to_delete, handle) \
113__binheap_delete((to_delete), (handle))
114
115/**
116 * binheap_add - insert an element to the heap
117 * new_node: node to add.
118 * @handle: handle to the heap.
119 * @type: the type of the struct the head is embedded in.
120 * @member: the name of the binheap_struct within the (type) struct.
121 */
122#define binheap_add(new_node, handle, type, member) \
123__binheap_add((new_node), (handle), container_of((new_node), type, member))
124
125/**
126 * binheap_decrease - re-eval the position of a node (based upon its
127 * original data pointer).
128 * @handle: handle to the heap.
129 * @orig_node: node that was associated with the data pointer
130 * (whose value has changed) when said pointer was
131 * added to the heap.
132 */
133#define binheap_decrease(orig_node, handle) \
134__binheap_decrease((orig_node), (handle))
135
136#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
137
138#define BINHEAP_NODE(name) \
139 struct binheap_node name = BINHEAP_NODE_INIT()
140
141
142static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
143{
144 n->data = NULL;
145 n->parent = BINHEAP_POISON;
146 n->left = NULL;
147 n->right = NULL;
148 n->ref = NULL;
149 n->ref_ptr = NULL;
150}
151
152static inline void INIT_BINHEAP_HANDLE(struct binheap *handle,
153 binheap_order_t compare)
154{
155 handle->root = NULL;
156 handle->next = NULL;
157 handle->last = NULL;
158 handle->compare = compare;
159}
160
161/* Returns true if binheap is empty. */
162static inline int binheap_empty(struct binheap *handle)
163{
164 return(handle->root == NULL);
165}
166
167/* Returns true if binheap node is in a heap. */
168static inline int binheap_is_in_heap(struct binheap_node *node)
169{
170 return (node->parent != BINHEAP_POISON);
171}
172
173/* Returns true if binheap node is in given heap. */
174int binheap_is_in_this_heap(struct binheap_node *node, struct binheap* heap);
175
176/* Add a node to a heap */
177void __binheap_add(struct binheap_node *new_node,
178 struct binheap *handle,
179 void *data);
180
181/**
182 * Removes the root node from the heap. The node is removed after coalescing
183 * the binheap_node with its original data pointer at the root of the tree.
184 *
185 * The 'last' node in the tree is then swapped up to the root and bubbled
186 * down.
187 */
188void __binheap_delete_root(struct binheap *handle,
189 struct binheap_node *container);
190
191/**
192 * Delete an arbitrary node. Bubble node to delete up to the root,
193 * and then delete to root.
194 */
195void __binheap_delete(struct binheap_node *node_to_delete,
196 struct binheap *handle);
197
198/**
199 * Bubble up a node whose pointer has decreased in value.
200 */
201void __binheap_decrease(struct binheap_node *orig_node,
202 struct binheap *handle);
203
204
205#endif
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
new file mode 100644
index 000000000000..60eb814fc82b
--- /dev/null
+++ b/include/litmus/budget.h
@@ -0,0 +1,38 @@
1#ifndef _LITMUS_BUDGET_H_
2#define _LITMUS_BUDGET_H_
3
4/* Update the per-processor enforcement timer (arm/reproram/cancel) for
5 * the next task. */
6void update_enforcement_timer(struct task_struct* t);
7
8inline static int budget_exhausted(struct task_struct* t)
9{
10 return get_exec_time(t) >= get_exec_cost(t);
11}
12
13inline static lt_t budget_remaining(struct task_struct* t)
14{
15 if (!budget_exhausted(t))
16 return get_exec_cost(t) - get_exec_time(t);
17 else
18 /* avoid overflow */
19 return 0;
20}
21
22#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
23
24#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
25 == PRECISE_ENFORCEMENT)
26
27static inline int requeue_preempted_job(struct task_struct* t)
28{
29 /* Add task to ready queue only if not subject to budget enforcement or
30 * if the job has budget remaining. t may be NULL.
31 */
32 return t && !is_completed(t) &&
33 (!budget_exhausted(t) || !budget_enforced(t));
34}
35
36void litmus_current_budget(lt_t *used_so_far, lt_t *remaining);
37
38#endif
diff --git a/include/litmus/ceiling.h b/include/litmus/ceiling.h
new file mode 100644
index 000000000000..f3d3889315f7
--- /dev/null
+++ b/include/litmus/ceiling.h
@@ -0,0 +1,36 @@
1#ifndef _LITMUS_CEILING_H_
2#define _LITMUS_CEILING_H_
3
4#ifdef CONFIG_LITMUS_LOCKING
5
6void __srp_ceiling_block(struct task_struct *cur);
7
8DECLARE_PER_CPU(int, srp_objects_in_use);
9
10/* assumes preemptions off */
11void srp_ceiling_block(void)
12{
13 struct task_struct *tsk = current;
14
15 /* Only applies to real-time tasks. */
16 if (!is_realtime(tsk))
17 return;
18
19 /* Bail out early if there aren't any SRP resources around. */
20 if (likely(!raw_cpu_read(srp_objects_in_use)))
21 return;
22
23 /* Avoid recursive ceiling blocking. */
24 if (unlikely(tsk->rt_param.srp_non_recurse))
25 return;
26
27 /* must take slow path */
28 __srp_ceiling_block(tsk);
29}
30
31#else
32#define srp_ceiling_block() /* nothing */
33#endif
34
35
36#endif \ No newline at end of file
diff --git a/include/litmus/clustered.h b/include/litmus/clustered.h
new file mode 100644
index 000000000000..fc7f0f87966e
--- /dev/null
+++ b/include/litmus/clustered.h
@@ -0,0 +1,46 @@
1#ifndef CLUSTERED_H
2#define CLUSTERED_H
3
4/* Which cache level should be used to group CPUs into clusters?
5 * GLOBAL_CLUSTER means that all CPUs form a single cluster (just like under
6 * global scheduling).
7 */
8enum cache_level {
9 GLOBAL_CLUSTER = 0,
10 L1_CLUSTER = 1,
11 L2_CLUSTER = 2,
12 L3_CLUSTER = 3
13};
14
15int parse_cache_level(const char *str, enum cache_level *level);
16const char* cache_level_name(enum cache_level level);
17
18/* expose a cache level in a /proc dir */
19struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
20 enum cache_level* level);
21
22
23
24struct scheduling_cluster {
25 unsigned int id;
26 /* list of CPUs that are part of this cluster */
27 struct list_head cpus;
28};
29
30struct cluster_cpu {
31 unsigned int id; /* which CPU is this? */
32 struct list_head cluster_list; /* List of the CPUs in this cluster. */
33 struct scheduling_cluster* cluster; /* The cluster that this CPU belongs to. */
34};
35
36int get_cluster_size(enum cache_level level);
37
38int assign_cpus_to_clusters(enum cache_level level,
39 struct scheduling_cluster* clusters[],
40 unsigned int num_clusters,
41 struct cluster_cpu* cpus[],
42 unsigned int num_cpus);
43
44int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, unsigned int index);
45
46#endif
diff --git a/include/litmus/ctrlpage.h b/include/litmus/ctrlpage.h
new file mode 100644
index 000000000000..f7b03e1aedd6
--- /dev/null
+++ b/include/litmus/ctrlpage.h
@@ -0,0 +1,105 @@
1#ifndef _LITMUS_CTRLPAGE_H_
2#define _LITMUS_CTRLPAGE_H_
3
4#include <litmus/rt_param.h>
5
6union np_flag {
7 uint32_t raw;
8 struct {
9 /* Is the task currently in a non-preemptive section? */
10 uint32_t flag:31;
11 /* Should the task call into the scheduler? */
12 uint32_t preempt:1;
13 } np;
14};
15
16/* The definition of the data that is shared between the kernel and real-time
17 * tasks via a shared page (see litmus/ctrldev.c).
18 *
19 * WARNING: User space can write to this, so don't trust
20 * the correctness of the fields!
21 *
22 * This servees two purposes: to enable efficient signaling
23 * of non-preemptive sections (user->kernel) and
24 * delayed preemptions (kernel->user), and to export
25 * some real-time relevant statistics such as preemption and
26 * migration data to user space. We can't use a device to export
27 * statistics because we want to avoid system call overhead when
28 * determining preemption/migration overheads).
29 */
30struct control_page {
31 /* This flag is used by userspace to communicate non-preempive
32 * sections. */
33 volatile __attribute__ ((aligned (8))) union np_flag sched;
34
35 /* Incremented by the kernel each time an IRQ is handled. */
36 volatile __attribute__ ((aligned (8))) uint64_t irq_count;
37
38 /* Locking overhead tracing: userspace records here the time stamp
39 * and IRQ counter prior to starting the system call. */
40 uint64_t ts_syscall_start; /* Feather-Trace cycles */
41 uint64_t irq_syscall_start; /* Snapshot of irq_count when the syscall
42 * started. */
43
44 lt_t deadline; /* Deadline for the currently executing job */
45 lt_t release; /* Release time of current job */
46 uint64_t job_index; /* Job sequence number of current job */
47
48 /* to be extended */
49};
50
51/* Expected offsets within the control page. */
52
53#define LITMUS_CP_OFFSET_SCHED 0
54#define LITMUS_CP_OFFSET_IRQ_COUNT 8
55#define LITMUS_CP_OFFSET_TS_SC_START 16
56#define LITMUS_CP_OFFSET_IRQ_SC_START 24
57#define LITMUS_CP_OFFSET_DEADLINE 32
58#define LITMUS_CP_OFFSET_RELEASE 40
59#define LITMUS_CP_OFFSET_JOB_INDEX 48
60
61/* System call emulation via ioctl() */
62
63typedef enum {
64 LRT_null_call = 2006,
65 LRT_set_rt_task_param,
66 LRT_get_rt_task_param,
67 LRT_reservation_create,
68 LRT_complete_job,
69 LRT_od_open,
70 LRT_od_close,
71 LRT_litmus_lock,
72 LRT_litmus_unlock,
73 LRT_wait_for_job_release,
74 LRT_wait_for_ts_release,
75 LRT_release_ts,
76 LRT_get_current_budget,
77} litmus_syscall_id_t;
78
79union litmus_syscall_args {
80 struct {
81 pid_t pid;
82 struct rt_task __user *param;
83 } get_set_task_param;
84
85 struct {
86 uint32_t type;
87 void __user *config;
88 } reservation_create;
89
90 struct {
91 uint32_t fd;
92 uint32_t obj_type;
93 uint32_t obj_id;
94 void __user *config;
95 } od_open;
96
97 struct {
98 lt_t __user *expended;
99 lt_t __user *remaining;
100 } get_current_budget;
101};
102
103
104#endif
105
diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h
new file mode 100644
index 000000000000..f87f25a5f40e
--- /dev/null
+++ b/include/litmus/debug_trace.h
@@ -0,0 +1,57 @@
1#ifndef LITMUS_DEBUG_TRACE_H
2#define LITMUS_DEBUG_TRACE_H
3
4#ifdef CONFIG_SCHED_DEBUG_TRACE
5void sched_trace_log_message(const char* fmt, ...);
6void dump_trace_buffer(int max);
7#else
8
9#define sched_trace_log_message(fmt, ...)
10
11#endif
12
13extern atomic_t __log_seq_no;
14
15#ifdef CONFIG_SCHED_DEBUG_TRACE_CALLER
16#define LITMUS_TRACE_PREFIX "%d P%d [%s@%s:%d]: "
17#define LITMUS_TRACE_ARGS atomic_add_return(1, &__log_seq_no), \
18 raw_smp_processor_id(), \
19 __FUNCTION__, __FILE__, __LINE__
20#else
21#define LITMUS_TRACE_PREFIX "%d P%d: "
22#define LITMUS_TRACE_ARGS atomic_add_return(1, &__log_seq_no), \
23 raw_smp_processor_id()
24#endif
25
26#define LITMUS_TRACE(fmt, args...) \
27 sched_trace_log_message(LITMUS_TRACE_PREFIX fmt, \
28 LITMUS_TRACE_ARGS, ## args)
29
30#define LITMUS_TRACE_TASK(t, fmt, args...) \
31 LITMUS_TRACE("(%s/%d:%d) " fmt, \
32 t ? (t)->comm : "null", \
33 t ? (t)->pid : 0, \
34 t ? (t)->rt_param.job_params.job_no : 0, \
35 ##args)
36
37#define LITMUS_TRACE_CUR(fmt, args...) \
38 LITMUS_TRACE_TASK(current, fmt, ## args)
39
40#define LITMUS_TRACE_WARN_ON(cond) \
41 if (unlikely(cond)) \
42 LITMUS_TRACE("WARNING: '%s' [%s@%s:%d]\n", \
43 #cond, __FUNCTION__, __FILE__, __LINE__)
44
45#endif
46
47#ifndef LITMUS_DEBUG_TRACE_DONT_POLLUTE_NAMESPACE
48#ifndef LITMUS_DEBUG_TRACE_H_UNQUALIFIED_NAMES
49
50#define LITMUS_DEBUG_TRACE_H_UNQUALIFIED_NAMES
51#define TRACE(fmt, args...) LITMUS_TRACE(fmt, ## args)
52#define TRACE_TASK(t, fmt, args...) LITMUS_TRACE_TASK(t, fmt, ## args)
53#define TRACE_CUR(fmt, args...) LITMUS_TRACE_CUR(fmt, ## args)
54#define TRACE_WARN_ON(cond) LITMUS_TRACE_WARN_ON(cond)
55
56#endif
57#endif
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
new file mode 100644
index 000000000000..bbaf22ea7f12
--- /dev/null
+++ b/include/litmus/edf_common.h
@@ -0,0 +1,25 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_EDF_COMMON_H__
11#define __UNC_EDF_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int edf_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
new file mode 100644
index 000000000000..fd9b30dbfb34
--- /dev/null
+++ b/include/litmus/fdso.h
@@ -0,0 +1,78 @@
1/* fdso.h - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 */
5
6#ifndef _LINUX_FDSO_H_
7#define _LINUX_FDSO_H_
8
9#include <linux/list.h>
10#include <asm/atomic.h>
11
12#include <linux/fs.h>
13#include <linux/slab.h>
14
15#define MAX_OBJECT_DESCRIPTORS 85
16
17typedef enum {
18 MIN_OBJ_TYPE = 0,
19
20 FMLP_SEM = 0,
21 SRP_SEM = 1,
22
23 MPCP_SEM = 2,
24 MPCP_VS_SEM = 3,
25 DPCP_SEM = 4,
26 PCP_SEM = 5,
27
28 DFLP_SEM = 6,
29
30 MAX_OBJ_TYPE = 6
31} obj_type_t;
32
33struct inode_obj_id {
34 struct list_head list;
35 atomic_t count;
36 struct inode* inode;
37
38 obj_type_t type;
39 void* obj;
40 unsigned int id;
41};
42
43struct fdso_ops;
44
45struct od_table_entry {
46 unsigned int used;
47
48 struct inode_obj_id* obj;
49 const struct fdso_ops* class;
50};
51
52struct fdso_ops {
53 int (*create)(void** obj_ref, obj_type_t type, void* __user);
54 void (*destroy)(obj_type_t type, void*);
55 int (*open) (struct od_table_entry*, void* __user);
56 int (*close) (struct od_table_entry*);
57};
58
59/* translate a userspace supplied od into the raw table entry
60 * returns NULL if od is invalid
61 */
62struct od_table_entry* get_entry_for_od(int od);
63
64/* translate a userspace supplied od into the associated object
65 * returns NULL if od is invalid
66 */
67static inline void* od_lookup(int od, obj_type_t type)
68{
69 struct od_table_entry* e = get_entry_for_od(od);
70 return e && e->obj->type == type ? e->obj->obj : NULL;
71}
72
73#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
74#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
75#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
76
77
78#endif
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
new file mode 100644
index 000000000000..7857cd2c1938
--- /dev/null
+++ b/include/litmus/feather_buffer.h
@@ -0,0 +1,118 @@
1#ifndef _FEATHER_BUFFER_H_
2#define _FEATHER_BUFFER_H_
3
4/* requires UINT_MAX and memcpy */
5
6#define SLOT_FREE 0
7#define SLOT_BUSY 1
8#define SLOT_READY 2
9
10struct ft_buffer {
11 unsigned int slot_count;
12 unsigned int slot_size;
13
14 atomic_t free_count;
15 atomic_t write_idx;
16 unsigned int read_idx;
17
18 char* slots;
19 void* buffer_mem;
20 atomic_t failed_writes;
21};
22
23static inline int init_ft_buffer(struct ft_buffer* buf,
24 unsigned int slot_count,
25 unsigned int slot_size,
26 char* slots,
27 void* buffer_mem)
28{
29 int i = 0;
30 if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
31 /* The slot count must divide UNIT_MAX + 1 so that when it
32 * wraps around the index correctly points to 0.
33 */
34 return 0;
35 } else {
36 buf->slot_count = slot_count;
37 buf->slot_size = slot_size;
38 buf->slots = slots;
39 buf->buffer_mem = buffer_mem;
40 atomic_set(&buf->free_count, slot_count);
41 atomic_set(&buf->write_idx, 0);
42 buf->read_idx = 0;
43 atomic_set(&buf->failed_writes, 0);
44 for (i = 0; i < slot_count; i++)
45 buf->slots[i] = SLOT_FREE;
46 return 1;
47 }
48}
49
50static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
51{
52 int free = atomic_fetch_dec(&buf->free_count);
53 unsigned int idx;
54 if (free <= 0) {
55 atomic_fetch_inc(&buf->free_count);
56 *ptr = 0;
57 atomic_fetch_inc(&buf->failed_writes);
58 return 0;
59 } else {
60 idx = atomic_fetch_inc(&buf->write_idx) % buf->slot_count;
61 buf->slots[idx] = SLOT_BUSY;
62 *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
63 return 1;
64 }
65}
66
67/* For single writer scenarios, with fewer atomic ops. */
68static inline int ft_buffer_start_single_write(struct ft_buffer* buf, void **ptr)
69{
70 unsigned int idx;
71
72 if (buf->free_count.counter <= 0) {
73 *ptr = 0;
74 /* single writer: no atomicity needed */
75 buf->failed_writes.counter++;
76 return 0;
77 } else {
78 /* free_count is positive, and can only increase since we are
79 * (by assumption) the only writer accessing the buffer.
80 */
81
82 idx = buf->write_idx.counter++ % buf->slot_count;
83 buf->slots[idx] = SLOT_BUSY;
84 *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
85
86 atomic_dec(&buf->free_count);
87 return 1;
88 }
89}
90
91static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
92{
93 unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
94 buf->slots[idx] = SLOT_READY;
95}
96
97
98/* exclusive reader access is assumed */
99static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
100{
101 unsigned int idx;
102 if (atomic_read(&buf->free_count) == buf->slot_count)
103 /* nothing available */
104 return 0;
105 idx = buf->read_idx % buf->slot_count;
106 if (buf->slots[idx] == SLOT_READY) {
107 memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
108 buf->slot_size);
109 buf->slots[idx] = SLOT_FREE;
110 buf->read_idx++;
111 atomic_fetch_inc(&buf->free_count);
112 return 1;
113 } else
114 return 0;
115}
116
117
118#endif
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
new file mode 100644
index 000000000000..1a7f41ea6a79
--- /dev/null
+++ b/include/litmus/feather_trace.h
@@ -0,0 +1,50 @@
1#ifndef _FEATHER_TRACE_H_
2#define _FEATHER_TRACE_H_
3
4#include <linux/atomic.h>
5
6int ft_enable_event(unsigned long id);
7int ft_disable_event(unsigned long id);
8int ft_is_event_enabled(unsigned long id);
9int ft_disable_all_events(void);
10
11/* Don't use rewriting implementation if the kernel is relocatable.
12 */
13#if defined(CONFIG_ARCH_HAS_FEATHER_TRACE) && !defined(CONFIG_RELOCATABLE)
14
15#include <asm/feather_trace.h>
16
17#else /* !__ARCH_HAS_FEATHER_TRACE */
18
19/* provide default implementation */
20#include <linux/timex.h> /* for get_cycles() */
21
22static inline unsigned long long ft_timestamp(void)
23{
24 return get_cycles();
25}
26
27#define feather_callback
28
29#define MAX_EVENTS 1024
30
31extern int ft_events[MAX_EVENTS];
32
33#define ft_event(id, callback) \
34 if (ft_events[id]) callback();
35
36#define ft_event0(id, callback) \
37 if (ft_events[id]) callback(id);
38
39#define ft_event1(id, callback, param) \
40 if (ft_events[id]) callback(id, param);
41
42#define ft_event2(id, callback, param, param2) \
43 if (ft_events[id]) callback(id, param, param2);
44
45#define ft_event3(id, callback, p, p2, p3) \
46 if (ft_events[id]) callback(id, p, p2, p3);
47
48#endif /* __ARCH_HAS_FEATHER_TRACE */
49
50#endif
diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h
new file mode 100644
index 000000000000..71c0d0142fc4
--- /dev/null
+++ b/include/litmus/fp_common.h
@@ -0,0 +1,183 @@
1/* Fixed-priority scheduler support.
2 */
3
4#ifndef __FP_COMMON_H__
5#define __FP_COMMON_H__
6
7#include <litmus/rt_domain.h>
8
9#include <asm/bitops.h>
10
11
12void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
13 release_jobs_t release);
14
15int fp_higher_prio(struct task_struct* first,
16 struct task_struct* second);
17
18int fp_ready_order(struct bheap_node* a, struct bheap_node* b);
19
20#define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG)
21
22#if (LITMUS_MAX_PRIORITY % BITS_PER_LONG)
23#error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG
24#endif
25
26/* bitmask-inexed priority queue */
27struct fp_prio_queue {
28 unsigned long bitmask[FP_PRIO_BIT_WORDS];
29 struct bheap queue[LITMUS_MAX_PRIORITY];
30};
31
32void fp_prio_queue_init(struct fp_prio_queue* q);
33
34static inline void fpq_set(struct fp_prio_queue* q, unsigned int index)
35{
36 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
37 __set_bit(index % BITS_PER_LONG, word);
38}
39
40static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index)
41{
42 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
43 __clear_bit(index % BITS_PER_LONG, word);
44}
45
46static inline unsigned int fpq_find(struct fp_prio_queue* q)
47{
48 int i;
49
50 /* loop optimizer should unroll this */
51 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
52 if (q->bitmask[i])
53 return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
54
55 return LITMUS_MAX_PRIORITY; /* nothing found */
56}
57
58static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
59{
60 BUG_ON(index >= LITMUS_MAX_PRIORITY);
61 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
62
63 fpq_set(q, index);
64 bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
65}
66
67static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
68{
69 BUG_ON(!is_queued(t));
70
71 bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
72 if (likely(bheap_empty(&q->queue[index])))
73 fpq_clear(q, index);
74}
75
76static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q)
77{
78 unsigned int idx = fpq_find(q);
79 struct bheap_node* hn;
80
81 if (idx < LITMUS_MAX_PRIORITY) {
82 hn = bheap_peek(fp_ready_order, &q->queue[idx]);
83 return bheap2task(hn);
84 } else
85 return NULL;
86}
87
88static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q)
89{
90 unsigned int idx = fpq_find(q);
91 struct bheap_node* hn;
92
93 if (idx < LITMUS_MAX_PRIORITY) {
94 hn = bheap_take(fp_ready_order, &q->queue[idx]);
95 if (likely(bheap_empty(&q->queue[idx])))
96 fpq_clear(q, idx);
97 return bheap2task(hn);
98 } else
99 return NULL;
100}
101
102int fp_preemption_needed(struct fp_prio_queue* q, struct task_struct *t);
103
104
105/* ******* list-based version ******** */
106
107/* bitmask-inexed priority queue */
108struct fp_ready_list {
109 unsigned long bitmask[FP_PRIO_BIT_WORDS];
110 struct list_head queue[LITMUS_MAX_PRIORITY];
111};
112
113void fp_ready_list_init(struct fp_ready_list* q);
114
115static inline void fp_rl_set(struct fp_ready_list* q, unsigned int index)
116{
117 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
118 __set_bit(index % BITS_PER_LONG, word);
119}
120
121static inline void fp_rl_clear(struct fp_ready_list* q, unsigned int index)
122{
123 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
124 __clear_bit(index % BITS_PER_LONG, word);
125}
126
127static inline unsigned int fp_rl_find(struct fp_ready_list* q)
128{
129 int i;
130
131 /* loop optimizer should unroll this */
132 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
133 if (q->bitmask[i])
134 return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
135
136 return LITMUS_MAX_PRIORITY; /* nothing found */
137}
138
139static inline void fp_ready_list_add(
140 struct fp_ready_list* q, struct list_head* lh, unsigned int index)
141{
142 BUG_ON(index >= LITMUS_MAX_PRIORITY);
143 BUG_ON(in_list(lh));
144
145 fp_rl_set(q, index);
146 list_add_tail(lh, &q->queue[index]);
147}
148
149static inline void fp_ready_list_remove(
150 struct fp_ready_list* q, struct list_head* lh, unsigned int index)
151{
152 BUG_ON(!in_list(lh));
153
154 list_del(lh);
155 if (likely(list_empty(q->queue + index)))
156 fp_rl_clear(q, index);
157}
158
159static inline struct list_head* fp_ready_list_peek(struct fp_ready_list* q)
160{
161 unsigned int idx = fp_rl_find(q);
162
163 if (idx < LITMUS_MAX_PRIORITY) {
164 return q->queue[idx].next;
165 } else
166 return NULL;
167}
168
169static inline struct list_head* fp_ready_list_take(struct fp_ready_list* q)
170{
171 unsigned int idx = fp_rl_find(q);
172 struct list_head* lh;
173
174 if (idx < LITMUS_MAX_PRIORITY) {
175 lh = q->queue[idx].next;
176 fp_ready_list_remove(q, lh, idx);
177 return lh;
178 } else
179 return NULL;
180}
181
182
183#endif
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..642de98542c8
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,147 @@
1#ifndef __FP_MATH_H__
2#define __FP_MATH_H__
3
4#include <linux/math64.h>
5
6#ifndef __KERNEL__
7#include <stdint.h>
8#define abs(x) (((x) < 0) ? -(x) : x)
9#endif
10
11// Use 64-bit because we want to track things at the nanosecond scale.
12// This can lead to very large numbers.
13typedef int64_t fpbuf_t;
14typedef struct
15{
16 fpbuf_t val;
17} fp_t;
18
19#define FP_SHIFT 10
20#define ROUND_BIT (FP_SHIFT - 1)
21
22#define _fp(x) ((fp_t) {x})
23
24#ifdef __KERNEL__
25static const fp_t LITMUS_FP_ZERO = {.val = 0};
26static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
27#endif
28
29static inline fp_t FP(fpbuf_t x)
30{
31 return _fp(((fpbuf_t) x) << FP_SHIFT);
32}
33
34/* divide two integers to obtain a fixed point value */
35static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
36{
37 return _fp(div64_s64(FP(a).val, (b)));
38}
39
40static inline fpbuf_t _point(fp_t x)
41{
42 return (x.val % (1 << FP_SHIFT));
43
44}
45
46#define fp2str(x) x.val
47/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
48#define _FP_ "%ld/1024"
49
50static inline fpbuf_t _floor(fp_t x)
51{
52 return x.val >> FP_SHIFT;
53}
54
55/* FIXME: negative rounding */
56static inline fpbuf_t _round(fp_t x)
57{
58 return _floor(x) + ((x.val >> ROUND_BIT) & 1);
59}
60
61/* multiply two fixed point values */
62static inline fp_t _mul(fp_t a, fp_t b)
63{
64 return _fp((a.val * b.val) >> FP_SHIFT);
65}
66
67static inline fp_t _div(fp_t a, fp_t b)
68{
69#if !defined(__KERNEL__) && !defined(unlikely)
70#define unlikely(x) (x)
71#define DO_UNDEF_UNLIKELY
72#endif
73 /* try not to overflow */
74 if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
75 return _fp((a.val / b.val) << FP_SHIFT);
76 else
77 return _fp((a.val << FP_SHIFT) / b.val);
78#ifdef DO_UNDEF_UNLIKELY
79#undef unlikely
80#undef DO_UNDEF_UNLIKELY
81#endif
82}
83
84static inline fp_t _add(fp_t a, fp_t b)
85{
86 return _fp(a.val + b.val);
87}
88
89static inline fp_t _sub(fp_t a, fp_t b)
90{
91 return _fp(a.val - b.val);
92}
93
94static inline fp_t _neg(fp_t x)
95{
96 return _fp(-x.val);
97}
98
99static inline fp_t _abs(fp_t x)
100{
101 return _fp(abs(x.val));
102}
103
104/* works the same as casting float/double to integer */
105static inline fpbuf_t _fp_to_integer(fp_t x)
106{
107 return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
108}
109
110static inline fp_t _integer_to_fp(fpbuf_t x)
111{
112 return _frac(x,1);
113}
114
115static inline int _leq(fp_t a, fp_t b)
116{
117 return a.val <= b.val;
118}
119
120static inline int _geq(fp_t a, fp_t b)
121{
122 return a.val >= b.val;
123}
124
125static inline int _lt(fp_t a, fp_t b)
126{
127 return a.val < b.val;
128}
129
130static inline int _gt(fp_t a, fp_t b)
131{
132 return a.val > b.val;
133}
134
135static inline int _eq(fp_t a, fp_t b)
136{
137 return a.val == b.val;
138}
139
140static inline fp_t _max(fp_t a, fp_t b)
141{
142 if (a.val < b.val)
143 return b;
144 else
145 return a;
146}
147#endif
diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h
new file mode 100644
index 000000000000..a566b0b6ae05
--- /dev/null
+++ b/include/litmus/ftdev.h
@@ -0,0 +1,58 @@
1#ifndef _LITMUS_FTDEV_H_
2#define _LITMUS_FTDEV_H_
3
4#include <litmus/feather_trace.h>
5#include <litmus/feather_buffer.h>
6#include <linux/mutex.h>
7#include <linux/cdev.h>
8
9#define FTDEV_ENABLE_CMD 0
10#define FTDEV_DISABLE_CMD 1
11#define FTDEV_CALIBRATE 0x1410
12
13struct ftdev;
14
15/* return 0 if buffer can be opened, otherwise -$REASON */
16typedef int (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no);
17/* return 0 on success, otherwise -$REASON */
18typedef int (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no);
19typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no);
20typedef long (*ftdev_calibrate_t)(struct ftdev* dev, unsigned int buf_no, unsigned long user_arg);
21/* Let devices handle writes from userspace. No synchronization provided. */
22typedef ssize_t (*ftdev_write_t)(struct ft_buffer* buf, size_t len, const char __user *from);
23
24struct ftdev_event;
25
26struct ftdev_minor {
27 struct ft_buffer* buf;
28 unsigned int readers;
29 struct mutex lock;
30 /* FIXME: filter for authorized events */
31 struct ftdev_event* events;
32 struct device* device;
33 struct ftdev* ftdev;
34};
35
36struct ftdev {
37 dev_t major;
38 struct cdev cdev;
39 struct class* class;
40 const char* name;
41 struct ftdev_minor* minor;
42 unsigned int minor_cnt;
43 ftdev_alloc_t alloc;
44 ftdev_free_t free;
45 ftdev_can_open_t can_open;
46 ftdev_write_t write;
47 ftdev_calibrate_t calibrate;
48};
49
50struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size);
51void free_ft_buffer(struct ft_buffer* buf);
52
53int ftdev_init( struct ftdev* ftdev, struct module* owner,
54 const int minor_cnt, const char* name);
55void ftdev_exit(struct ftdev* ftdev);
56int register_ftdev(struct ftdev* ftdev);
57
58#endif
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
new file mode 100644
index 000000000000..7033393148df
--- /dev/null
+++ b/include/litmus/jobs.h
@@ -0,0 +1,13 @@
1#ifndef __LITMUS_JOBS_H__
2#define __LITMUS_JOBS_H__
3
4void prepare_for_next_period(struct task_struct *t);
5void release_at(struct task_struct *t, lt_t start);
6
7void inferred_sporadic_job_release_at(struct task_struct *t, lt_t when);
8
9long default_wait_for_release_at(lt_t release_time);
10long complete_job(void);
11long complete_job_oneshot(void);
12
13#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
new file mode 100644
index 000000000000..f550367ddd4b
--- /dev/null
+++ b/include/litmus/litmus.h
@@ -0,0 +1,224 @@
1/*
2 * Constant definitions related to
3 * scheduling policy.
4 */
5
6#ifndef _LINUX_LITMUS_H_
7#define _LINUX_LITMUS_H_
8
9#include <litmus/ctrlpage.h>
10
11#ifdef CONFIG_RELEASE_MASTER
12extern atomic_t release_master_cpu;
13#endif
14
15/* in_list - is a given list_head queued on some list?
16 */
17static inline int in_list(struct list_head* list)
18{
19 return !( /* case 1: deleted */
20 (list->next == LIST_POISON1 &&
21 list->prev == LIST_POISON2)
22 ||
23 /* case 2: initialized */
24 (list->next == list &&
25 list->prev == list)
26 );
27}
28
29struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
30
31#define NO_CPU 0xffffffff
32
33void litmus_fork(struct task_struct *tsk);
34void litmus_exec(void);
35/* clean up real-time state of a task */
36void litmus_clear_state(struct task_struct *dead_tsk);
37void exit_litmus(struct task_struct *dead_tsk);
38
39/* Prevent the plugin from being switched-out from underneath a code
40 * path. Might sleep, so may be called only from non-atomic context. */
41void litmus_plugin_switch_disable(void);
42void litmus_plugin_switch_enable(void);
43
44long litmus_admit_task(struct task_struct *tsk);
45void litmus_exit_task(struct task_struct *tsk);
46void litmus_dealloc(struct task_struct *tsk);
47void litmus_do_exit(struct task_struct *tsk);
48int litmus_be_migrate_to(int cpu);
49
50#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
51#define rt_transition_pending(t) \
52 ((t)->rt_param.transition_pending)
53
54#define tsk_rt(t) (&(t)->rt_param)
55
56/* Realtime utility macros */
57#ifdef CONFIG_LITMUS_LOCKING
58#define is_priority_boosted(t) (tsk_rt(t)->priority_boosted)
59#define get_boost_start(t) (tsk_rt(t)->boost_start_time)
60#else
61#define is_priority_boosted(t) 0
62#define get_boost_start(t) 0
63#endif
64
65
66/* task_params macros */
67#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost)
68#define get_rt_period(t) (tsk_rt(t)->task_params.period)
69#define get_rt_relative_deadline(t) (tsk_rt(t)->task_params.relative_deadline)
70#define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
71#define get_partition(t) (tsk_rt(t)->task_params.cpu)
72#define get_priority(t) (tsk_rt(t)->task_params.priority)
73#define get_class(t) (tsk_rt(t)->task_params.cls)
74#define get_release_policy(t) (tsk_rt(t)->task_params.release_policy)
75
76/* job_param macros */
77#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
78#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
79#define get_release(t) (tsk_rt(t)->job_params.release)
80#define get_lateness(t) (tsk_rt(t)->job_params.lateness)
81
82/* release policy macros */
83#define is_periodic(t) (get_release_policy(t) == TASK_PERIODIC)
84#define is_sporadic(t) (get_release_policy(t) == TASK_SPORADIC)
85#ifdef CONFIG_ALLOW_EARLY_RELEASE
86#define is_early_releasing(t) (get_release_policy(t) == TASK_EARLY)
87#else
88#define is_early_releasing(t) (0)
89#endif
90
91#define is_hrt(t) \
92 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
93#define is_srt(t) \
94 (tsk_rt(t)->task_params.cls == RT_CLASS_SOFT)
95#define is_be(t) \
96 (tsk_rt(t)->task_params.cls == RT_CLASS_BEST_EFFORT)
97
98/* Our notion of time within LITMUS: kernel monotonic time. */
99static inline lt_t litmus_clock(void)
100{
101 return ktime_to_ns(ktime_get());
102}
103
104/* A macro to convert from nanoseconds to ktime_t. */
105#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t)
106
107#define is_released(t, now) \
108 (lt_before_eq(get_release(t), now))
109#define is_tardy(t, now) \
110 (lt_before_eq(tsk_rt(t)->job_params.deadline, now))
111
112/* real-time comparison macros */
113#define earlier_deadline(a, b) (lt_before(\
114 (a)->rt_param.job_params.deadline,\
115 (b)->rt_param.job_params.deadline))
116#define earlier_release(a, b) (lt_before(\
117 (a)->rt_param.job_params.release,\
118 (b)->rt_param.job_params.release))
119
120void preempt_if_preemptable(struct task_struct* t, int on_cpu);
121
122#define bheap2task(hn) ((struct task_struct*) hn->value)
123
124static inline int is_present(struct task_struct* t)
125{
126 return t && tsk_rt(t)->present;
127}
128
129static inline int is_completed(struct task_struct* t)
130{
131 return t && tsk_rt(t)->completed;
132}
133
134
135/* Used to convert ns-specified execution costs and periods into
136 * integral quanta equivalents.
137 */
138#define LITMUS_QUANTUM_LENGTH_NS (CONFIG_LITMUS_QUANTUM_LENGTH_US * 1000ULL)
139
140/* make the unit explicit */
141typedef unsigned long quanta_t;
142
143enum round {
144 FLOOR,
145 CEIL
146};
147
148static inline quanta_t time2quanta(lt_t time, enum round round)
149{
150 s64 quantum_length = LITMUS_QUANTUM_LENGTH_NS;
151
152 if (do_div(time, quantum_length) && round == CEIL)
153 time++;
154 return (quanta_t) time;
155}
156
157static inline lt_t quanta2time(quanta_t quanta)
158{
159 return quanta * LITMUS_QUANTUM_LENGTH_NS;
160}
161
162/* By how much is cpu staggered behind CPU 0? */
163u64 cpu_stagger_offset(int cpu);
164
165static inline struct control_page* get_control_page(struct task_struct *t)
166{
167 return tsk_rt(t)->ctrl_page;
168}
169
170static inline int has_control_page(struct task_struct* t)
171{
172 return tsk_rt(t)->ctrl_page != NULL;
173}
174
175
176#ifdef CONFIG_SCHED_OVERHEAD_TRACE
177
178#define TS_SYSCALL_IN_START \
179 if (has_control_page(current)) { \
180 __TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start); \
181 }
182
183#define TS_SYSCALL_IN_END \
184 if (has_control_page(current)) { \
185 unsigned long flags; \
186 uint64_t irqs; \
187 local_irq_save(flags); \
188 irqs = get_control_page(current)->irq_count - \
189 get_control_page(current)->irq_syscall_start; \
190 __TS_SYSCALL_IN_END(&irqs); \
191 local_irq_restore(flags); \
192 }
193
194#else
195
196#define TS_SYSCALL_IN_START
197#define TS_SYSCALL_IN_END
198
199#endif
200
201#ifdef CONFIG_SMP
202
203/*
204 * struct hrtimer_start_on_info - timer info on remote cpu
205 * @timer: timer to be triggered on remote cpu
206 * @time: time event
207 * @mode: timer mode
208 * @csd: smp_call_function parameter to call hrtimer_pull on remote cpu
209 */
210struct hrtimer_start_on_info {
211 struct hrtimer *timer;
212 ktime_t time;
213 enum hrtimer_mode mode;
214 struct call_single_data csd;
215};
216
217void hrtimer_pull(void *csd_info);
218extern void hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
219 struct hrtimer *timer, ktime_t time,
220 const enum hrtimer_mode mode);
221
222#endif
223
224#endif
diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h
new file mode 100644
index 000000000000..a5db24c03ec0
--- /dev/null
+++ b/include/litmus/litmus_proc.h
@@ -0,0 +1,63 @@
1#include <litmus/sched_plugin.h>
2#include <linux/proc_fs.h>
3
4int __init init_litmus_proc(void);
5void exit_litmus_proc(void);
6
7struct cd_mapping
8{
9 int id;
10 cpumask_var_t mask;
11 struct proc_dir_entry *proc_file;
12};
13
14struct domain_proc_info
15{
16 int num_cpus;
17 int num_domains;
18
19 struct cd_mapping *cpu_to_domains;
20 struct cd_mapping *domain_to_cpus;
21};
22
23/*
24 * On success, returns 0 and sets the pointer to the location of the new
25 * proc dir entry, otherwise returns an error code and sets pde to NULL.
26 */
27long make_plugin_proc_dir(struct sched_plugin* plugin,
28 struct proc_dir_entry** pde);
29
30/*
31 * Plugins should deallocate all child proc directory entries before
32 * calling this, to avoid memory leaks.
33 */
34void remove_plugin_proc_dir(struct sched_plugin* plugin);
35
36/*
37 * Setup the CPU <-> sched domain mappings in proc
38 */
39long activate_domain_proc(struct domain_proc_info* map);
40
41/*
42 * Remove the CPU <-> sched domain mappings from proc
43 */
44long deactivate_domain_proc(void);
45
46/*
47 * Alloc memory for the mapping
48 * Note: Does not set up proc files. Use make_sched_domain_maps for that.
49 */
50long init_domain_proc_info(struct domain_proc_info* map,
51 int num_cpus, int num_domains);
52
53/*
54 * Free memory of the mapping
55 * Note: Does not clean up proc files. Use deactivate_domain_proc for that.
56 */
57void destroy_domain_proc_info(struct domain_proc_info* map);
58
59/* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and
60 * remove a '\n' if present. Returns the number of bytes that were read or
61 * -EFAULT. */
62int copy_and_chomp(char *kbuf, unsigned long ksize,
63 __user const char* ubuf, unsigned long ulength);
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
new file mode 100644
index 000000000000..4d7b870cb443
--- /dev/null
+++ b/include/litmus/locking.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_LOCKING_H
2#define LITMUS_LOCKING_H
3
4struct litmus_lock_ops;
5
6/* Generic base struct for LITMUS^RT userspace semaphores.
7 * This structure should be embedded in protocol-specific semaphores.
8 */
9struct litmus_lock {
10 struct litmus_lock_ops *ops;
11 int type;
12};
13
14struct litmus_lock_ops {
15 /* Current task tries to obtain / drop a reference to a lock.
16 * Optional methods, allowed by default. */
17 int (*open)(struct litmus_lock*, void* __user);
18 int (*close)(struct litmus_lock*);
19
20 /* Current tries to lock/unlock this lock (mandatory methods). */
21 int (*lock)(struct litmus_lock*);
22 int (*unlock)(struct litmus_lock*);
23
24 /* The lock is no longer being referenced (mandatory method). */
25 void (*deallocate)(struct litmus_lock*);
26};
27
28#endif
diff --git a/include/litmus/np.h b/include/litmus/np.h
new file mode 100644
index 000000000000..dbe2b695f74a
--- /dev/null
+++ b/include/litmus/np.h
@@ -0,0 +1,121 @@
1#ifndef _LITMUS_NP_H_
2#define _LITMUS_NP_H_
3
4/* Definitions related to non-preemptive sections signaled via the control
5 * page
6 */
7
8#ifdef CONFIG_NP_SECTION
9
10static inline int is_kernel_np(struct task_struct *t)
11{
12 return tsk_rt(t)->kernel_np;
13}
14
15static inline int is_user_np(struct task_struct *t)
16{
17 return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0;
18}
19
20static inline void request_exit_np(struct task_struct *t)
21{
22 if (is_user_np(t)) {
23 /* Set the flag that tells user space to call
24 * into the kernel at the end of a critical section. */
25 if (likely(tsk_rt(t)->ctrl_page)) {
26 TRACE_TASK(t, "setting delayed_preemption flag\n");
27 tsk_rt(t)->ctrl_page->sched.np.preempt = 1;
28 }
29 }
30}
31
32static inline void make_np(struct task_struct *t)
33{
34 tsk_rt(t)->kernel_np++;
35}
36
37/* Caller should check if preemption is necessary when
38 * the function return 0.
39 */
40static inline int take_np(struct task_struct *t)
41{
42 return --tsk_rt(t)->kernel_np;
43}
44
45/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
46static inline int request_exit_np_atomic(struct task_struct *t)
47{
48 union np_flag old, new;
49
50 if (tsk_rt(t)->ctrl_page) {
51 old.raw = tsk_rt(t)->ctrl_page->sched.raw;
52 if (old.np.flag == 0) {
53 /* no longer non-preemptive */
54 return 0;
55 } else if (old.np.preempt) {
56 /* already set, nothing for us to do */
57 return 1;
58 } else {
59 /* non preemptive and flag not set */
60 new.raw = old.raw;
61 new.np.preempt = 1;
62 /* if we get old back, then we atomically set the flag */
63 return cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw;
64 /* If we raced with a concurrent change, then so be
65 * it. Deliver it by IPI. We don't want an unbounded
66 * retry loop here since tasks might exploit that to
67 * keep the kernel busy indefinitely. */
68 }
69 } else
70 return 0;
71}
72
73#else
74
75static inline int is_kernel_np(struct task_struct* t)
76{
77 return 0;
78}
79
80static inline int is_user_np(struct task_struct* t)
81{
82 return 0;
83}
84
85static inline void request_exit_np(struct task_struct *t)
86{
87 /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */
88 BUG();
89}
90
91static inline int request_exit_np_atomic(struct task_struct *t)
92{
93 return 0;
94}
95
96#endif
97
98static inline void clear_exit_np(struct task_struct *t)
99{
100 if (likely(tsk_rt(t)->ctrl_page))
101 tsk_rt(t)->ctrl_page->sched.np.preempt = 0;
102}
103
104static inline int is_np(struct task_struct *t)
105{
106#ifdef CONFIG_SCHED_DEBUG_TRACE
107 int kernel, user;
108 kernel = is_kernel_np(t);
109 user = is_user_np(t);
110 if (kernel || user)
111 TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
112
113 kernel, user);
114 return kernel || user;
115#else
116 return unlikely(is_kernel_np(t) || is_user_np(t));
117#endif
118}
119
120#endif
121
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
new file mode 100644
index 000000000000..ffb602772896
--- /dev/null
+++ b/include/litmus/preempt.h
@@ -0,0 +1,191 @@
1#ifndef LITMUS_PREEMPT_H
2#define LITMUS_PREEMPT_H
3
4#include <linux/types.h>
5#include <linux/cache.h>
6#include <linux/percpu.h>
7#include <asm/atomic.h>
8
9DECLARE_PER_CPU(bool, litmus_preemption_in_progress);
10
11/* is_current_running() is legacy macro (and a hack) that is used to make
12 * the plugin logic, which still stems from the 2.6.20 era, work with current
13 * kernels.
14 *
15 * It used to honor the flag in the preempt_count variable that was
16 * set when scheduling is in progress. This doesn't exist anymore in recent
17 * Linux versions. Instead, Linux has moved to passing a 'preempt' flag to
18 * __schedule(). In particular, Linux ignores prev->state != TASK_RUNNING and
19 * does *not* process self-suspensions if an interrupt (i.e., a preemption)
20 * races with a task that is about to call schedule() anyway.
21 *
22 * The value of the 'preempt' flag in __schedule() is crucial
23 * information for some of the LITMUS^RT plugins, which must re-add
24 * soon-to-block tasks to the ready queue if the rest of the system doesn't
25 * process the preemption yet. Unfortunately, the flag is not passed to
26 * pick_next_task(). Hence, as a hack, we communicate it out of band via the
27 * global, per-core variable litmus_preemption_in_progress, which is set by
28 * the scheduler in __schedule() and read by the plugins via the
29 * is_current_running() macro.
30 */
31#define is_current_running() \
32 ((current)->state == TASK_RUNNING || \
33 this_cpu_read(litmus_preemption_in_progress))
34
35DECLARE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
36
37#ifdef CONFIG_PREEMPT_STATE_TRACE
38/* this file is included widely --- be careful not to pollute the namespace
39 * with the TRACE() symbol */
40#define LITMUS_DEBUG_TRACE_DONT_POLLUTE_NAMESPACE
41#include <litmus/debug_trace.h>
42#undef LITMUS_DEBUG_TRACE_DONT_POLLUTE_NAMESPACE
43const char* sched_state_name(int s);
44#define TRACE_STATE(fmt, args...) LITMUS_TRACE("SCHED_STATE " fmt, args)
45#else
46#define TRACE_STATE(fmt, args...) /* ignore */
47#endif
48
49#define VERIFY_SCHED_STATE(x) \
50 do { int __s = get_sched_state(); \
51 if ((__s & (x)) == 0) \
52 TRACE_STATE("INVALID s=0x%x (%s) not " \
53 "in 0x%x (%s) [%s]\n", \
54 __s, sched_state_name(__s), \
55 (x), #x, __FUNCTION__); \
56 } while (0);
57
58#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
59 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
60 cpu, (x), sched_state_name(x), \
61 (y), sched_state_name(y))
62
63
64typedef enum scheduling_state {
65 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that
66 * should be scheduled, and the processor does not
67 * plan to invoke schedule(). */
68 SHOULD_SCHEDULE = (1 << 1), /* A remote processor has determined that the
69 * processor should reschedule, but this has not
70 * been communicated yet (IPI still pending). */
71 WILL_SCHEDULE = (1 << 2), /* The processor has noticed that it has to
72 * reschedule and will do so shortly. */
73 TASK_PICKED = (1 << 3), /* The processor is currently executing schedule(),
74 * has selected a new task to schedule, but has not
75 * yet performed the actual context switch. */
76 PICKED_WRONG_TASK = (1 << 4), /* The processor has not yet performed the context
77 * switch, but a remote processor has already
78 * determined that a higher-priority task became
79 * eligible after the task was picked. */
80} sched_state_t;
81
82static inline sched_state_t get_sched_state_on(int cpu)
83{
84 return atomic_read(&per_cpu(resched_state, cpu));
85}
86
87static inline sched_state_t get_sched_state(void)
88{
89 return atomic_read(this_cpu_ptr(&resched_state));
90}
91
92static inline int is_in_sched_state(int possible_states)
93{
94 return get_sched_state() & possible_states;
95}
96
97static inline int cpu_is_in_sched_state(int cpu, int possible_states)
98{
99 return get_sched_state_on(cpu) & possible_states;
100}
101
102static inline void set_sched_state(sched_state_t s)
103{
104 TRACE_SCHED_STATE_CHANGE(get_sched_state(), s, smp_processor_id());
105 atomic_set(this_cpu_ptr(&resched_state), s);
106}
107
108static inline int sched_state_transition(sched_state_t from, sched_state_t to)
109{
110 sched_state_t old_state;
111
112 old_state = atomic_cmpxchg(this_cpu_ptr(&resched_state), from, to);
113 if (old_state == from) {
114 TRACE_SCHED_STATE_CHANGE(from, to, smp_processor_id());
115 return 1;
116 } else
117 return 0;
118}
119
120static inline int sched_state_transition_on(int cpu,
121 sched_state_t from,
122 sched_state_t to)
123{
124 sched_state_t old_state;
125
126 old_state = atomic_cmpxchg(&per_cpu(resched_state, cpu), from, to);
127 if (old_state == from) {
128 TRACE_SCHED_STATE_CHANGE(from, to, cpu);
129 return 1;
130 } else
131 return 0;
132}
133
134/* Plugins must call this function after they have decided which job to
135 * schedule next. IMPORTANT: this function must be called while still holding
136 * the lock that is used to serialize scheduling decisions.
137 *
138 * (Ideally, we would like to use runqueue locks for this purpose, but that
139 * would lead to deadlocks with the migration code.)
140 */
141static inline void sched_state_task_picked(void)
142{
143 VERIFY_SCHED_STATE(WILL_SCHEDULE);
144
145 /* WILL_SCHEDULE has only a local tansition => simple store is ok */
146 set_sched_state(TASK_PICKED);
147}
148
149static inline void sched_state_entered_schedule(void)
150{
151 /* Update state for the case that we entered schedule() not due to
152 * set_tsk_need_resched() */
153 set_sched_state(WILL_SCHEDULE);
154}
155
156/* Called by schedule() to check if the scheduling decision is still valid
157 * after a context switch. Returns 1 if the CPU needs to reschdule. */
158static inline int sched_state_validate_switch(void)
159{
160 int decision_ok = 0;
161
162 VERIFY_SCHED_STATE(PICKED_WRONG_TASK | TASK_PICKED | WILL_SCHEDULE);
163
164 if (is_in_sched_state(TASK_PICKED)) {
165 /* Might be good; let's try to transition out of this
166 * state. This must be done atomically since remote processors
167 * may try to change the state, too. */
168 decision_ok = sched_state_transition(TASK_PICKED, TASK_SCHEDULED);
169 }
170
171 if (!decision_ok)
172 TRACE_STATE("validation failed (%s)\n",
173 sched_state_name(get_sched_state()));
174
175 return !decision_ok;
176}
177
178/* State transition events. See litmus/preempt.c for details. */
179void sched_state_will_schedule(struct task_struct* tsk);
180void sched_state_ipi(void);
181/* Cause a CPU (remote or local) to reschedule. */
182void litmus_reschedule(int cpu);
183void litmus_reschedule_local(void);
184
185#ifdef CONFIG_DEBUG_KERNEL
186void sched_state_plugin_check(void);
187#else
188#define sched_state_plugin_check() /* no check */
189#endif
190
191#endif
diff --git a/include/litmus/reservations/alloc.h b/include/litmus/reservations/alloc.h
new file mode 100644
index 000000000000..b3471288c9f1
--- /dev/null
+++ b/include/litmus/reservations/alloc.h
@@ -0,0 +1,15 @@
1#ifndef LITMUS_RESERVATIONS_ALLOC_H
2#define LITMUS_RESERVATIONS_ALLOC_H
3
4#include <litmus/reservations/reservation.h>
5
6long alloc_polling_reservation(
7 int res_type,
8 struct reservation_config *config,
9 struct reservation **_res);
10
11long alloc_table_driven_reservation(
12 struct reservation_config *config,
13 struct reservation **_res);
14
15#endif \ No newline at end of file
diff --git a/include/litmus/reservations/budget-notifier.h b/include/litmus/reservations/budget-notifier.h
new file mode 100644
index 000000000000..d831fa9d5153
--- /dev/null
+++ b/include/litmus/reservations/budget-notifier.h
@@ -0,0 +1,50 @@
1#ifndef LITMUS_BUDGET_NOTIFIER_H
2#define LITMUS_BUDGET_NOTIFIER_H
3
4#include <linux/list.h>
5#include <linux/spinlock.h>
6
7struct budget_notifier;
8
9typedef void (*budget_callback_t) (
10 struct budget_notifier *bn
11);
12
13struct budget_notifier {
14 struct list_head list;
15 budget_callback_t budget_exhausted;
16 budget_callback_t budget_replenished;
17};
18
19struct budget_notifier_list {
20 struct list_head list;
21 raw_spinlock_t lock;
22};
23
24void budget_notifier_list_init(struct budget_notifier_list* bnl);
25
26static inline void budget_notifier_add(
27 struct budget_notifier_list *bnl,
28 struct budget_notifier *bn)
29{
30 unsigned long flags;
31
32 raw_spin_lock_irqsave(&bnl->lock, flags);
33 list_add(&bn->list, &bnl->list);
34 raw_spin_unlock_irqrestore(&bnl->lock, flags);
35}
36
37static inline void budget_notifier_remove(
38 struct budget_notifier_list *bnl,
39 struct budget_notifier *bn)
40{
41 unsigned long flags;
42
43 raw_spin_lock_irqsave(&bnl->lock, flags);
44 list_del(&bn->list);
45 raw_spin_unlock_irqrestore(&bnl->lock, flags);
46}
47
48void budget_notifiers_fire(struct budget_notifier_list *bnl, bool replenished);
49
50#endif
diff --git a/include/litmus/reservations/polling.h b/include/litmus/reservations/polling.h
new file mode 100644
index 000000000000..230e12b1088a
--- /dev/null
+++ b/include/litmus/reservations/polling.h
@@ -0,0 +1,19 @@
1#ifndef LITMUS_POLLING_RESERVATIONS_H
2#define LITMUS_POLLING_RESERVATIONS_H
3
4#include <litmus/reservations/reservation.h>
5
6struct polling_reservation {
7 /* extend basic reservation */
8 struct reservation res;
9
10 lt_t max_budget;
11 lt_t period;
12 lt_t deadline;
13 lt_t offset;
14};
15
16void polling_reservation_init(struct polling_reservation *pres, int use_edf_prio,
17 int use_periodic_polling, lt_t budget, lt_t period, lt_t deadline, lt_t offset);
18
19#endif
diff --git a/include/litmus/reservations/reservation.h b/include/litmus/reservations/reservation.h
new file mode 100644
index 000000000000..1752dac4e698
--- /dev/null
+++ b/include/litmus/reservations/reservation.h
@@ -0,0 +1,224 @@
1#ifndef LITMUS_RESERVATION_H
2#define LITMUS_RESERVATION_H
3
4#include <linux/list.h>
5#include <linux/hrtimer.h>
6
7#include <litmus/debug_trace.h>
8#include <litmus/reservations/budget-notifier.h>
9
10struct reservation_client;
11struct reservation_environment;
12struct reservation;
13
14typedef enum {
15 /* reservation has no clients, is not consuming budget */
16 RESERVATION_INACTIVE = 0,
17
18 /* reservation has clients, consumes budget when scheduled */
19 RESERVATION_ACTIVE,
20
21 /* reservation has no clients, but may be consuming budget */
22 RESERVATION_ACTIVE_IDLE,
23
24 /* Reservation has no budget and waits for
25 * replenishment. May or may not have clients. */
26 RESERVATION_DEPLETED,
27} reservation_state_t;
28
29
30/* ************************************************************************** */
31
32/* Select which task to dispatch. If NULL is returned, it means there is nothing
33 * to schedule right now and background work can be scheduled. */
34typedef struct task_struct * (*dispatch_t) (
35 struct reservation_client *client
36);
37
38/* Something that can be managed in a reservation and that can yield
39 * a process for dispatching. Contains a pointer to the reservation
40 * to which it "belongs". */
41struct reservation_client {
42 struct list_head list;
43 struct reservation* reservation;
44 dispatch_t dispatch;
45};
46
47
48/* ************************************************************************** */
49
50/* Called by reservations to request state change. */
51typedef void (*reservation_change_state_t) (
52 struct reservation_environment* env,
53 struct reservation *res,
54 reservation_state_t new_state
55);
56
57/* Called by reservations to request replenishment while not DEPLETED.
58 * Useful for soft reservations that remain ACTIVE with lower priority. */
59typedef void (*request_replenishment_t)(
60 struct reservation_environment* env,
61 struct reservation *res
62);
63
64/* The framework within wich reservations operate. */
65struct reservation_environment {
66 lt_t time_zero;
67 lt_t current_time;
68
69 /* services invoked by reservations */
70 reservation_change_state_t change_state;
71 request_replenishment_t request_replenishment;
72};
73
74/* ************************************************************************** */
75
76/* A new client is added or an existing client resumes. */
77typedef void (*client_arrives_t) (
78 struct reservation *reservation,
79 struct reservation_client *client
80);
81
82/* A client suspends or terminates. */
83typedef void (*client_departs_t) (
84 struct reservation *reservation,
85 struct reservation_client *client,
86 int did_signal_job_completion
87);
88
89/* A previously requested replenishment has occurred. */
90typedef void (*on_replenishment_timer_t) (
91 struct reservation *reservation
92);
93
94/* Update the reservation's budget to reflect execution or idling. */
95typedef void (*drain_budget_t) (
96 struct reservation *reservation,
97 lt_t how_much
98);
99
100/* Select a ready task from one of the clients for scheduling. */
101typedef struct task_struct* (*dispatch_client_t) (
102 struct reservation *reservation,
103 lt_t *time_slice /* May be used to force rescheduling after
104 some amount of time. 0 => no limit */
105);
106
107/* Destructor: called before scheduler is deactivated. */
108typedef void (*shutdown_t)(struct reservation *reservation);
109
110struct reservation_ops {
111 dispatch_client_t dispatch_client;
112
113 client_arrives_t client_arrives;
114 client_departs_t client_departs;
115
116 on_replenishment_timer_t replenish;
117 drain_budget_t drain_budget;
118
119 shutdown_t shutdown;
120};
121
122#define RESERVATION_BACKGROUND_PRIORITY ULLONG_MAX
123
124struct reservation {
125 /* used to queue in environment */
126 struct list_head list;
127 struct list_head replenish_list;
128
129 reservation_state_t state;
130 unsigned int id;
131 unsigned int kind;
132
133 /* exact meaning defined by impl. */
134 lt_t priority;
135 lt_t cur_budget;
136 lt_t next_replenishment;
137
138 /* budget stats */
139 lt_t budget_consumed; /* how much budget consumed in this allocation cycle? */
140 lt_t budget_consumed_total;
141
142 /* list of registered budget callbacks */
143 struct budget_notifier_list budget_notifiers;
144
145 /* for memory reclamation purposes */
146 struct list_head all_list;
147
148 /* interaction with framework */
149 struct reservation_environment *env;
150 struct reservation_ops *ops;
151
152 struct list_head clients;
153};
154
155void reservation_init(struct reservation *res);
156
157/* Default implementations */
158
159/* simply select the first client in the list, set *for_at_most to zero */
160struct task_struct* default_dispatch_client(
161 struct reservation *res,
162 lt_t *for_at_most
163);
164
165/* drain budget at linear rate, enter DEPLETED state when budget used up */
166void common_drain_budget(struct reservation *res, lt_t how_much);
167
168/* "connector" reservation client to hook up tasks with reservations */
169struct task_client {
170 struct reservation_client client;
171 struct task_struct *task;
172};
173
174void task_client_init(struct task_client *tc, struct task_struct *task,
175 struct reservation *reservation);
176
177#define SUP_RESCHEDULE_NOW (0)
178#define SUP_NO_SCHEDULER_UPDATE (ULLONG_MAX)
179
180/* A simple uniprocessor (SUP) flat (i.e., non-hierarchical) reservation
181 * environment.
182 */
183struct sup_reservation_environment {
184 struct reservation_environment env;
185
186 /* ordered by priority */
187 struct list_head active_reservations;
188
189 /* ordered by next_replenishment */
190 struct list_head depleted_reservations;
191
192 /* unordered */
193 struct list_head inactive_reservations;
194
195 /* list of all reservations */
196 struct list_head all_reservations;
197
198 /* - SUP_RESCHEDULE_NOW means call sup_dispatch() now
199 * - SUP_NO_SCHEDULER_UPDATE means nothing to do
200 * any other value means program a timer for the given time
201 */
202 lt_t next_scheduler_update;
203 /* set to true if a call to sup_dispatch() is imminent */
204 bool will_schedule;
205};
206
207/* Contract:
208 * - before calling into sup_ code, or any reservation methods,
209 * update the time with sup_update_time(); and
210 * - after calling into sup_ code, or any reservation methods,
211 * check next_scheduler_update and program timer or trigger
212 * scheduler invocation accordingly.
213 */
214
215void sup_init(struct sup_reservation_environment* sup_env);
216void sup_add_new_reservation(struct sup_reservation_environment* sup_env,
217 struct reservation* new_res);
218void sup_update_time(struct sup_reservation_environment* sup_env, lt_t now);
219struct task_struct* sup_dispatch(struct sup_reservation_environment* sup_env);
220
221struct reservation* sup_find_by_id(struct sup_reservation_environment* sup_env,
222 unsigned int id);
223
224#endif
diff --git a/include/litmus/reservations/table-driven.h b/include/litmus/reservations/table-driven.h
new file mode 100644
index 000000000000..b6302a2f200d
--- /dev/null
+++ b/include/litmus/reservations/table-driven.h
@@ -0,0 +1,23 @@
1#ifndef LITMUS_RESERVATIONS_TABLE_DRIVEN_H
2#define LITMUS_RESERVATIONS_TABLE_DRIVEN_H
3
4#include <litmus/reservations/reservation.h>
5
6struct table_driven_reservation {
7 /* extend basic reservation */
8 struct reservation res;
9
10 lt_t major_cycle;
11 unsigned int next_interval;
12 unsigned int num_intervals;
13 struct lt_interval *intervals;
14
15 /* info about current scheduling slot */
16 struct lt_interval cur_interval;
17 lt_t major_cycle_start;
18};
19
20void table_driven_reservation_init(struct table_driven_reservation *tdres,
21 lt_t major_cycle, struct lt_interval *intervals, unsigned int num_intervals);
22
23#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
new file mode 100644
index 000000000000..ac249292e866
--- /dev/null
+++ b/include/litmus/rt_domain.h
@@ -0,0 +1,182 @@
1/* CLEANUP: Add comments and make it less messy.
2 *
3 */
4
5#ifndef __UNC_RT_DOMAIN_H__
6#define __UNC_RT_DOMAIN_H__
7
8#include <litmus/bheap.h>
9
10#define RELEASE_QUEUE_SLOTS 127 /* prime */
11
12struct _rt_domain;
13
14typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
15typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks);
16
17struct release_queue {
18 /* each slot maintains a list of release heaps sorted
19 * by release time */
20 struct list_head slot[RELEASE_QUEUE_SLOTS];
21};
22
23typedef struct _rt_domain {
24 /* runnable rt tasks are in here */
25 raw_spinlock_t ready_lock;
26 struct bheap ready_queue;
27
28 /* real-time tasks waiting for release are in here */
29 raw_spinlock_t release_lock;
30 struct release_queue release_queue;
31
32#ifdef CONFIG_RELEASE_MASTER
33 int release_master;
34#endif
35
36 /* for moving tasks to the release queue */
37 raw_spinlock_t tobe_lock;
38 struct list_head tobe_released;
39
40 /* how do we check if we need to kick another CPU? */
41 check_resched_needed_t check_resched;
42
43 /* how do we release jobs? */
44 release_jobs_t release_jobs;
45
46 /* how are tasks ordered in the ready queue? */
47 bheap_prio_t order;
48} rt_domain_t;
49
50struct release_heap {
51 /* list_head for per-time-slot list */
52 struct list_head list;
53 lt_t release_time;
54 /* all tasks to be released at release_time */
55 struct bheap heap;
56 /* used to trigger the release */
57 struct hrtimer timer;
58
59#ifdef CONFIG_RELEASE_MASTER
60 /* used to delegate releases */
61 struct hrtimer_start_on_info info;
62#endif
63 /* required for the timer callback */
64 rt_domain_t* dom;
65};
66
67
68static inline struct task_struct* __next_ready(rt_domain_t* rt)
69{
70 struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue);
71 if (hn)
72 return bheap2task(hn);
73 else
74 return NULL;
75}
76
77void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
78 check_resched_needed_t check,
79 release_jobs_t relase);
80
81void __add_ready(rt_domain_t* rt, struct task_struct *new);
82void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
83void __add_release(rt_domain_t* rt, struct task_struct *task);
84
85static inline struct task_struct* __take_ready(rt_domain_t* rt)
86{
87 struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
88 if (hn)
89 return bheap2task(hn);
90 else
91 return NULL;
92}
93
94static inline struct task_struct* __peek_ready(rt_domain_t* rt)
95{
96 struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue);
97 if (hn)
98 return bheap2task(hn);
99 else
100 return NULL;
101}
102
103static inline int is_queued(struct task_struct *t)
104{
105 BUG_ON(!tsk_rt(t)->heap_node);
106 return bheap_node_in_heap(tsk_rt(t)->heap_node);
107}
108
109static inline void remove(rt_domain_t* rt, struct task_struct *t)
110{
111 bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
112}
113
114static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
115{
116 unsigned long flags;
117 /* first we need the write lock for rt_ready_queue */
118 raw_spin_lock_irqsave(&rt->ready_lock, flags);
119 __add_ready(rt, new);
120 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
121}
122
123static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks)
124{
125 unsigned long flags;
126 raw_spin_lock_irqsave(&rt->ready_lock, flags);
127 __merge_ready(rt, tasks);
128 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
129}
130
131static inline struct task_struct* take_ready(rt_domain_t* rt)
132{
133 unsigned long flags;
134 struct task_struct* ret;
135 /* first we need the write lock for rt_ready_queue */
136 raw_spin_lock_irqsave(&rt->ready_lock, flags);
137 ret = __take_ready(rt);
138 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
139 return ret;
140}
141
142
143static inline void add_release(rt_domain_t* rt, struct task_struct *task)
144{
145 unsigned long flags;
146 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
147 __add_release(rt, task);
148 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
149}
150
151#ifdef CONFIG_RELEASE_MASTER
152void __add_release_on(rt_domain_t* rt, struct task_struct *task,
153 int target_cpu);
154
155static inline void add_release_on(rt_domain_t* rt,
156 struct task_struct *task,
157 int target_cpu)
158{
159 unsigned long flags;
160 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
161 __add_release_on(rt, task, target_cpu);
162 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
163}
164#endif
165
166static inline int __jobs_pending(rt_domain_t* rt)
167{
168 return !bheap_empty(&rt->ready_queue);
169}
170
171static inline int jobs_pending(rt_domain_t* rt)
172{
173 unsigned long flags;
174 int ret;
175 /* first we need the write lock for rt_ready_queue */
176 raw_spin_lock_irqsave(&rt->ready_lock, flags);
177 ret = !bheap_empty(&rt->ready_queue);
178 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
179 return ret;
180}
181
182#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
new file mode 100644
index 000000000000..9b291343714f
--- /dev/null
+++ b/include/litmus/rt_param.h
@@ -0,0 +1,290 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_RT_PARAM_H_
6#define _LINUX_RT_PARAM_H_
7
8/* Litmus time type. */
9typedef unsigned long long lt_t;
10
11static inline int lt_after(lt_t a, lt_t b)
12{
13 return ((long long) b) - ((long long) a) < 0;
14}
15#define lt_before(a, b) lt_after(b, a)
16
17static inline int lt_after_eq(lt_t a, lt_t b)
18{
19 return ((long long) a) - ((long long) b) >= 0;
20}
21#define lt_before_eq(a, b) lt_after_eq(b, a)
22
23/* different types of clients */
24typedef enum {
25 RT_CLASS_HARD,
26 RT_CLASS_SOFT,
27 RT_CLASS_BEST_EFFORT
28} task_class_t;
29
30typedef enum {
31 NO_ENFORCEMENT, /* job may overrun unhindered */
32 QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */
33 PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */
34} budget_policy_t;
35
36/* Release behaviors for jobs. PERIODIC and EARLY jobs
37 must end by calling sys_complete_job() (or equivalent)
38 to set up their next release and deadline. */
39typedef enum {
40 /* Jobs are released sporadically (provided job precedence
41 constraints are met). */
42 TASK_SPORADIC,
43
44 /* Jobs are released periodically (provided job precedence
45 constraints are met). */
46 TASK_PERIODIC,
47
48 /* Jobs are released immediately after meeting precedence
49 constraints. Beware this can peg your CPUs if used in
50 the wrong applications. Only supported by EDF schedulers. */
51 TASK_EARLY
52} release_policy_t;
53
54/* We use the common priority interpretation "lower index == higher priority",
55 * which is commonly used in fixed-priority schedulability analysis papers.
56 * So, a numerically lower priority value implies higher scheduling priority,
57 * with priority 1 being the highest priority. Priority 0 is reserved for
58 * priority boosting. LITMUS_MAX_PRIORITY denotes the maximum priority value
59 * range.
60 */
61
62#define LITMUS_MAX_PRIORITY 512
63#define LITMUS_HIGHEST_PRIORITY 1
64#define LITMUS_LOWEST_PRIORITY (LITMUS_MAX_PRIORITY - 1)
65#define LITMUS_NO_PRIORITY UINT_MAX
66
67/* Provide generic comparison macros for userspace,
68 * in case that we change this later. */
69#define litmus_higher_fixed_prio(a, b) (a < b)
70#define litmus_lower_fixed_prio(a, b) (a > b)
71#define litmus_is_valid_fixed_prio(p) \
72 ((p) >= LITMUS_HIGHEST_PRIORITY && \
73 (p) <= LITMUS_LOWEST_PRIORITY)
74
75/* reservation support */
76
77typedef enum {
78 PERIODIC_POLLING = 10,
79 SPORADIC_POLLING,
80 TABLE_DRIVEN,
81} reservation_type_t;
82
83struct lt_interval {
84 lt_t start;
85 lt_t end;
86};
87
88#ifndef __KERNEL__
89#define __user
90#endif
91
92struct reservation_config {
93 unsigned int id;
94 lt_t priority;
95 int cpu;
96
97 union {
98 struct {
99 lt_t period;
100 lt_t budget;
101 lt_t relative_deadline;
102 lt_t offset;
103 } polling_params;
104
105 struct {
106 lt_t major_cycle_length;
107 unsigned int num_intervals;
108 struct lt_interval __user *intervals;
109 } table_driven_params;
110 };
111};
112
113/* regular sporadic task support */
114
115struct rt_task {
116 lt_t exec_cost;
117 lt_t period;
118 lt_t relative_deadline;
119 lt_t phase;
120 unsigned int cpu;
121 unsigned int priority;
122 task_class_t cls;
123 budget_policy_t budget_policy; /* ignored by pfair */
124 release_policy_t release_policy;
125};
126
127/* don't export internal data structures to user space (liblitmus) */
128#ifdef __KERNEL__
129
130struct _rt_domain;
131struct bheap_node;
132struct release_heap;
133
134struct rt_job {
135 /* Time instant the the job was or will be released. */
136 lt_t release;
137
138 /* What is the current deadline? */
139 lt_t deadline;
140
141 /* How much service has this job received so far? */
142 lt_t exec_time;
143
144 /* By how much did the prior job miss its deadline by?
145 * Value differs from tardiness in that lateness may
146 * be negative (when job finishes before its deadline).
147 */
148 long long lateness;
149
150 /* Which job is this. This is used to let user space
151 * specify which job to wait for, which is important if jobs
152 * overrun. If we just call sys_sleep_next_period() then we
153 * will unintentionally miss jobs after an overrun.
154 *
155 * Increase this sequence number when a job is released.
156 */
157 unsigned int job_no;
158
159#ifdef CONFIG_SCHED_TASK_TRACE
160 /* Keep track of the last time the job suspended.
161 * -> used for tracing sporadic tasks. */
162 lt_t last_suspension;
163#endif
164};
165
166struct pfair_param;
167
168/* RT task parameters for scheduling extensions
169 * These parameters are inherited during clone and therefore must
170 * be explicitly set up before the task set is launched.
171 */
172struct rt_param {
173 /* do we need to check for srp blocking? */
174 unsigned int srp_non_recurse:1;
175
176 /* is the task present? (true if it can be scheduled) */
177 unsigned int present:1;
178
179 /* has the task completed? */
180 unsigned int completed:1;
181
182#ifdef CONFIG_LITMUS_LOCKING
183 /* Is the task being priority-boosted by a locking protocol? */
184 unsigned int priority_boosted:1;
185 /* If so, when did this start? */
186 lt_t boost_start_time;
187
188 /* How many LITMUS^RT locks does the task currently hold/wait for? */
189 unsigned int num_locks_held;
190 /* How many PCP/SRP locks does the task currently hold/wait for? */
191 unsigned int num_local_locks_held;
192#endif
193
194 /* user controlled parameters */
195 struct rt_task task_params;
196
197 /* timing parameters */
198 struct rt_job job_params;
199
200
201 /* Special handling for periodic tasks executing
202 * clock_nanosleep(CLOCK_MONOTONIC, ...).
203 */
204 lt_t nanosleep_wakeup;
205 unsigned int doing_abs_nanosleep:1;
206
207 /* Should the next job be released at some time other than
208 * just period time units after the last release?
209 */
210 unsigned int sporadic_release:1;
211 lt_t sporadic_release_time;
212
213 /* task representing the current "inherited" task
214 * priority, assigned by inherit_priority and
215 * return priority in the scheduler plugins.
216 * could point to self if PI does not result in
217 * an increased task priority.
218 */
219 struct task_struct* inh_task;
220
221#ifdef CONFIG_NP_SECTION
222 /* For the FMLP under PSN-EDF, it is required to make the task
223 * non-preemptive from kernel space. In order not to interfere with
224 * user space, this counter indicates the kernel space np setting.
225 * kernel_np > 0 => task is non-preemptive
226 */
227 unsigned int kernel_np;
228#endif
229
230 /* This field can be used by plugins to store where the task
231 * is currently scheduled. It is the responsibility of the
232 * plugin to avoid race conditions.
233 *
234 * This used by GSN-EDF and PFAIR.
235 */
236 volatile int scheduled_on;
237
238 /* Is the stack of the task currently in use? This is updated by
239 * the LITMUS core.
240 *
241 * Be careful to avoid deadlocks!
242 */
243 volatile int stack_in_use;
244
245 /* This field can be used by plugins to store where the task
246 * is currently linked. It is the responsibility of the plugin
247 * to avoid race conditions.
248 *
249 * Used by GSN-EDF.
250 */
251 volatile int linked_on;
252
253 /* PFAIR/PD^2 state. Allocated on demand. */
254 union {
255 void *plugin_state;
256 struct pfair_param *pfair;
257 };
258
259 /* Fields saved before BE->RT transition.
260 */
261 int old_policy;
262 int old_prio;
263
264 /* ready queue for this task */
265 struct _rt_domain* domain;
266
267 /* heap element for this task
268 *
269 * Warning: Don't statically allocate this node. The heap
270 * implementation swaps these between tasks, thus after
271 * dequeuing from a heap you may end up with a different node
272 * then the one you had when enqueuing the task. For the same
273 * reason, don't obtain and store references to this node
274 * other than this pointer (which is updated by the heap
275 * implementation).
276 */
277 struct bheap_node* heap_node;
278 struct release_heap* rel_heap;
279
280 /* Used by rt_domain to queue task in release list.
281 */
282 struct list_head list;
283
284 /* Pointer to the page shared between userspace and kernel. */
285 struct control_page * ctrl_page;
286};
287
288#endif
289
290#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
new file mode 100644
index 000000000000..0923f26b745a
--- /dev/null
+++ b/include/litmus/sched_plugin.h
@@ -0,0 +1,180 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_SCHED_PLUGIN_H_
6#define _LINUX_SCHED_PLUGIN_H_
7
8#include <linux/sched.h>
9
10#ifdef CONFIG_LITMUS_LOCKING
11#include <litmus/locking.h>
12#endif
13
14/************************ setup/tear down ********************/
15
16typedef long (*activate_plugin_t) (void);
17typedef long (*deactivate_plugin_t) (void);
18
19struct domain_proc_info;
20typedef long (*get_domain_proc_info_t) (struct domain_proc_info **info);
21
22
23/********************* scheduler invocation ******************/
24/* The main scheduling function, called to select the next task to dispatch. */
25typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
26/* Clean up after the task switch has occured.
27 * This function is called after every (even non-rt) task switch.
28 */
29typedef void (*finish_switch_t)(struct task_struct *prev);
30
31
32/* When waiting for the stack of the task selected by the plugin
33 * to become available, this callback is invoked to give the
34 * plugin a chance to cancel the wait. If the plugin returns false,
35 * the scheduler is invoked again. */
36typedef bool (*should_wait_for_stack_t)(struct task_struct *next);
37
38/* After a pull migration (which involves dropping scheduler locks),
39 * the plugin is given the chance to validate that the task is still
40 * the right one. If the plugin returns false, the scheduler
41 * will be invoked again. */
42typedef bool (*post_migration_validate_t)(struct task_struct *next);
43
44/* After dropping the lock to facilitate a pull migration, the task
45 * state may have changed. In this case, the core notifies the plugin
46 * with this callback and then invokes the scheduler again. */
47typedef void (*next_became_invalid_t)(struct task_struct *next);
48
49/********************* task state changes ********************/
50
51/* Called to setup a new real-time task.
52 * Release the first job, enqueue, etc.
53 * Task may already be running.
54 */
55typedef void (*task_new_t) (struct task_struct *task,
56 int on_rq,
57 int running);
58
59/* Called when userspace seeks to set new task parameters for a task
60 * that is already in real-time mode (i.e., is_realtime(task)).
61 */
62typedef long (*task_change_params_t) (struct task_struct *task,
63 struct rt_task *new_params);
64
65/* Called to re-introduce a task after blocking.
66 * Can potentially be called multiple times.
67 */
68typedef void (*task_wake_up_t) (struct task_struct *task);
69/* called to notify the plugin of a blocking real-time task
70 * it will only be called for real-time tasks and before schedule is called */
71typedef void (*task_block_t) (struct task_struct *task);
72/* Called when a real-time task exits or changes to a different scheduling
73 * class.
74 * Free any allocated resources
75 */
76typedef void (*task_exit_t) (struct task_struct *);
77
78/* task_exit() is called with interrupts disabled and runqueue locks held, and
79 * thus and cannot block or spin. task_cleanup() is called sometime later
80 * without any locks being held.
81 */
82typedef void (*task_cleanup_t) (struct task_struct *);
83
84#ifdef CONFIG_LITMUS_LOCKING
85/* Called when the current task attempts to create a new lock of a given
86 * protocol type. */
87typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
88 void* __user config);
89#endif
90
91
92/********************* sys call backends ********************/
93/* This function causes the caller to sleep until the next release */
94typedef long (*complete_job_t) (void);
95
96typedef long (*admit_task_t)(struct task_struct* tsk);
97
98/* return false to indicate that the plugin does not support forking */
99typedef bool (*fork_task_t)(struct task_struct* tsk);
100
101typedef long (*wait_for_release_at_t)(lt_t release_time);
102
103/* Informs the plugin when a synchronous release takes place. */
104typedef void (*synchronous_release_at_t)(lt_t time_zero);
105
106/* How much budget has the current task consumed so far, and how much
107 * has it left? The default implementation ties into the per-task
108 * budget enforcement code. Plugins can override this to report
109 * reservation-specific values. */
110typedef void (*current_budget_t)(lt_t *used_so_far, lt_t *remaining);
111
112/* Reservation creation/removal backends. Meaning of reservation_type and
113 * reservation_id are entirely plugin-specific. */
114typedef long (*reservation_create_t)(int reservation_type, void* __user config);
115typedef long (*reservation_destroy_t)(unsigned int reservation_id, int cpu);
116
117/************************ misc routines ***********************/
118
119
120struct sched_plugin {
121 struct list_head list;
122 /* basic info */
123 char *plugin_name;
124
125 /* setup */
126 activate_plugin_t activate_plugin;
127 deactivate_plugin_t deactivate_plugin;
128 get_domain_proc_info_t get_domain_proc_info;
129
130 /* scheduler invocation */
131 schedule_t schedule;
132 finish_switch_t finish_switch;
133
134 /* control over pull migrations */
135 should_wait_for_stack_t should_wait_for_stack;
136 next_became_invalid_t next_became_invalid;
137 post_migration_validate_t post_migration_validate;
138
139 /* syscall backend */
140 complete_job_t complete_job;
141 wait_for_release_at_t wait_for_release_at;
142 synchronous_release_at_t synchronous_release_at;
143
144 /* task state changes */
145 admit_task_t admit_task;
146 fork_task_t fork_task;
147
148 task_new_t task_new;
149 task_wake_up_t task_wake_up;
150 task_block_t task_block;
151
152 /* optional: support task parameter changes at runtime */
153 task_change_params_t task_change_params;
154
155 task_exit_t task_exit;
156 task_cleanup_t task_cleanup;
157
158 current_budget_t current_budget;
159
160 /* Reservation support */
161 reservation_create_t reservation_create;
162 reservation_destroy_t reservation_destroy;
163
164#ifdef CONFIG_LITMUS_LOCKING
165 /* locking protocols */
166 allocate_lock_t allocate_lock;
167#endif
168} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
169
170
171extern struct sched_plugin *litmus;
172
173int register_sched_plugin(struct sched_plugin* plugin);
174struct sched_plugin* find_sched_plugin(const char* name);
175void print_sched_plugins(struct seq_file *m);
176
177
178extern struct sched_plugin linux_sched_plugin;
179
180#endif
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
new file mode 100644
index 000000000000..0532424fbee6
--- /dev/null
+++ b/include/litmus/sched_trace.h
@@ -0,0 +1,267 @@
1/*
2 * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
3 */
4#ifndef _LINUX_SCHED_TRACE_H_
5#define _LINUX_SCHED_TRACE_H_
6
7/* all times in nanoseconds */
8
9struct st_trace_header {
10 u8 type; /* Of what type is this record? */
11 u8 cpu; /* On which CPU was it recorded? */
12 u16 pid; /* PID of the task. */
13 u32 job; /* The job sequence number. */
14};
15
16#define ST_NAME_LEN 16
17struct st_name_data {
18 char cmd[ST_NAME_LEN];/* The name of the executable of this process. */
19};
20
21struct st_param_data { /* regular params */
22 u32 wcet;
23 u32 period;
24 u32 phase;
25 u8 partition;
26 u8 class;
27 u8 __unused[2];
28};
29
30struct st_release_data { /* A job is was/is going to be released. */
31 u64 release; /* What's the release time? */
32 u64 deadline; /* By when must it finish? */
33};
34
35struct st_assigned_data { /* A job was asigned to a CPU. */
36 u64 when;
37 u8 target; /* Where should it execute? */
38 u8 __unused[7];
39};
40
41struct st_switch_to_data { /* A process was switched to on a given CPU. */
42 u64 when; /* When did this occur? */
43 u32 exec_time; /* Time the current job has executed. */
44 u8 __unused[4];
45
46};
47
48struct st_switch_away_data { /* A process was switched away from on a given CPU. */
49 u64 when;
50 u64 exec_time;
51};
52
53struct st_completion_data { /* A job completed. */
54 u64 when;
55 u64 forced:1; /* Set to 1 if job overran and kernel advanced to the
56 * next task automatically; set to 0 otherwise.
57 */
58 u64 exec_time:63; /* Actual execution time of job. */
59};
60
61struct st_block_data { /* A task blocks. */
62 u64 when;
63 u64 __unused;
64};
65
66struct st_resume_data { /* A task resumes. */
67 u64 when;
68 u64 __unused;
69};
70
71struct st_action_data {
72 u64 when;
73 u8 action;
74 u8 __unused[7];
75};
76
77struct st_sys_release_data {
78 u64 when;
79 u64 release;
80};
81
82#define DATA(x) struct st_ ## x ## _data x;
83
84typedef enum {
85 ST_NAME = 1, /* Start at one, so that we can spot
86 * uninitialized records. */
87 ST_PARAM,
88 ST_RELEASE,
89 ST_ASSIGNED,
90 ST_SWITCH_TO,
91 ST_SWITCH_AWAY,
92 ST_COMPLETION,
93 ST_BLOCK,
94 ST_RESUME,
95 ST_ACTION,
96 ST_SYS_RELEASE
97} st_event_record_type_t;
98
99struct st_event_record {
100 struct st_trace_header hdr;
101 union {
102 u64 raw[2];
103
104 DATA(name);
105 DATA(param);
106 DATA(release);
107 DATA(assigned);
108 DATA(switch_to);
109 DATA(switch_away);
110 DATA(completion);
111 DATA(block);
112 DATA(resume);
113 DATA(action);
114 DATA(sys_release);
115 } data;
116};
117
118#undef DATA
119
120#ifdef __KERNEL__
121
122#include <linux/sched.h>
123#include <litmus/feather_trace.h>
124
125#ifdef CONFIG_SCHED_TASK_TRACE
126
127#define SCHED_TRACE(id, callback, task) \
128 ft_event1(id, callback, task)
129#define SCHED_TRACE2(id, callback, task, xtra) \
130 ft_event2(id, callback, task, xtra)
131
132/* provide prototypes; needed on sparc64 */
133#ifndef NO_TASK_TRACE_DECLS
134feather_callback void do_sched_trace_task_name(unsigned long id,
135 struct task_struct* task);
136feather_callback void do_sched_trace_task_param(unsigned long id,
137 struct task_struct* task);
138feather_callback void do_sched_trace_task_release(unsigned long id,
139 struct task_struct* task);
140feather_callback void do_sched_trace_task_switch_to(unsigned long id,
141 struct task_struct* task);
142feather_callback void do_sched_trace_task_switch_away(unsigned long id,
143 struct task_struct* task);
144feather_callback void do_sched_trace_task_completion(unsigned long id,
145 struct task_struct* task,
146 unsigned long forced);
147feather_callback void do_sched_trace_last_suspension_as_completion(
148 unsigned long id,
149 struct task_struct* task);
150feather_callback void do_sched_trace_task_block(unsigned long id,
151 struct task_struct* task);
152feather_callback void do_sched_trace_task_resume(unsigned long id,
153 struct task_struct* task);
154feather_callback void do_sched_trace_action(unsigned long id,
155 struct task_struct* task,
156 unsigned long action);
157feather_callback void do_sched_trace_sys_release(unsigned long id,
158 lt_t* start);
159
160#endif
161
162#else
163
164#define SCHED_TRACE(id, callback, task) /* no tracing */
165#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
166
167#endif
168
169#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
170
171#include <trace/events/litmus.h>
172
173#else
174
175/* Override trace macros to actually do nothing */
176#define trace_litmus_task_param(t)
177#define trace_litmus_task_release(t)
178#define trace_litmus_switch_to(t)
179#define trace_litmus_switch_away(prev)
180#define trace_litmus_task_completion(t, forced)
181#define trace_litmus_task_block(t)
182#define trace_litmus_task_resume(t)
183#define trace_litmus_sys_release(start)
184
185#endif
186
187
188#define SCHED_TRACE_BASE_ID 500
189
190
191#define sched_trace_task_name(t) \
192 SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, \
193 do_sched_trace_task_name, t)
194
195#define sched_trace_task_param(t) \
196 do { \
197 SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, \
198 do_sched_trace_task_param, t); \
199 trace_litmus_task_param(t); \
200 } while (0)
201
202#define sched_trace_task_release(t) \
203 do { \
204 SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, \
205 do_sched_trace_task_release, t); \
206 trace_litmus_task_release(t); \
207 } while (0)
208
209#define sched_trace_task_switch_to(t) \
210 do { \
211 SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, \
212 do_sched_trace_task_switch_to, t); \
213 trace_litmus_switch_to(t); \
214 } while (0)
215
216#define sched_trace_task_switch_away(t) \
217 do { \
218 SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, \
219 do_sched_trace_task_switch_away, t); \
220 trace_litmus_switch_away(t); \
221 } while (0)
222
223#define sched_trace_task_completion(t, forced) \
224 do { \
225 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, \
226 do_sched_trace_task_completion, t, \
227 (unsigned long) forced); \
228 trace_litmus_task_completion(t, forced); \
229 } while (0)
230
231#define sched_trace_task_block(t) \
232 do { \
233 SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, \
234 do_sched_trace_task_block, t); \
235 trace_litmus_task_block(t); \
236 } while (0)
237
238#define sched_trace_task_resume(t) \
239 do { \
240 SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, \
241 do_sched_trace_task_resume, t); \
242 trace_litmus_task_resume(t); \
243 } while (0)
244
245#define sched_trace_action(t, action) \
246 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 9, \
247 do_sched_trace_action, t, (unsigned long) action);
248
249/* when is a pointer, it does not need an explicit cast to unsigned long */
250#define sched_trace_sys_release(when) \
251 do { \
252 SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, \
253 do_sched_trace_sys_release, when); \
254 trace_litmus_sys_release(when); \
255 } while (0)
256
257#define sched_trace_last_suspension_as_completion(t) \
258 do { \
259 SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, \
260 do_sched_trace_last_suspension_as_completion, t); \
261 } while (0)
262
263#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
264
265#endif /* __KERNEL__ */
266
267#endif
diff --git a/include/litmus/srp.h b/include/litmus/srp.h
new file mode 100644
index 000000000000..c9a4552b2bf3
--- /dev/null
+++ b/include/litmus/srp.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_SRP_H
2#define LITMUS_SRP_H
3
4struct srp_semaphore;
5
6struct srp_priority {
7 struct list_head list;
8 unsigned int priority;
9 pid_t pid;
10};
11#define list2prio(l) list_entry(l, struct srp_priority, list)
12
13/* struct for uniprocessor SRP "semaphore" */
14struct srp_semaphore {
15 struct litmus_lock litmus_lock;
16 struct srp_priority ceiling;
17 struct task_struct* owner;
18 int cpu; /* cpu associated with this "semaphore" and resource */
19};
20
21/* map a task to its SRP preemption level priority */
22typedef unsigned int (*srp_prioritization_t)(struct task_struct* t);
23/* Must be updated by each plugin that uses SRP.*/
24extern srp_prioritization_t get_srp_prio;
25
26struct srp_semaphore* allocate_srp_semaphore(void);
27
28#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
new file mode 100644
index 000000000000..2646136e3881
--- /dev/null
+++ b/include/litmus/trace.h
@@ -0,0 +1,161 @@
1#ifndef _SYS_TRACE_H_
2#define _SYS_TRACE_H_
3
4#ifdef CONFIG_SCHED_OVERHEAD_TRACE
5
6
7#include <litmus/feather_trace.h>
8#include <litmus/feather_buffer.h>
9
10
11/*********************** TIMESTAMPS ************************/
12
13enum task_type_marker {
14 TSK_BE,
15 TSK_RT,
16 TSK_UNKNOWN
17};
18
19struct timestamp {
20 uint64_t timestamp:48;
21 uint64_t pid:16;
22 uint32_t seq_no;
23 uint8_t cpu;
24 uint8_t event;
25 uint8_t task_type:2;
26 uint8_t irq_flag:1;
27 uint8_t irq_count:5;
28};
29
30/* tracing callbacks */
31feather_callback void msg_sent_to(unsigned long event, unsigned long to);
32feather_callback void msg_received_local(unsigned long event);
33
34feather_callback void msg_sent_local(unsigned long event);
35feather_callback void msg_received_from(unsigned long event, unsigned long from);
36
37#define MSG_TIMESTAMP_SENT(id, to) \
38 ft_event1(id, msg_sent_to, (unsigned long) (to));
39
40#define MSG_TIMESTAMP_RECEIVED(id) \
41 ft_event0(id, msg_received_local);
42
43#define MSG_TIMESTAMP_SENT_LOCAL(id) \
44 ft_event0(id, msg_sent_local);
45
46#define MSG_TIMESTAMP_RECEIVED_FROM(id, from) \
47 ft_event1(id, msg_received_from, (unsigned long) (from))
48
49feather_callback void save_cpu_timestamp(unsigned long event);
50feather_callback void save_cpu_timestamp_time(unsigned long event, unsigned long time_ptr);
51feather_callback void save_cpu_timestamp_irq(unsigned long event, unsigned long irq_count_ptr);
52feather_callback void save_cpu_timestamp_task(unsigned long event, unsigned long t_ptr);
53feather_callback void save_cpu_timestamp_def(unsigned long event, unsigned long type);
54feather_callback void save_cpu_task_latency(unsigned long event, unsigned long when_ptr);
55
56#define CPU_TIMESTAMP_TIME(id, time_ptr) \
57 ft_event1(id, save_cpu_timestamp_time, (unsigned long) time_ptr)
58
59#define CPU_TIMESTAMP_IRQ(id, irq_count_ptr) \
60 ft_event1(id, save_cpu_timestamp_irq, (unsigned long) irq_count_ptr)
61
62#define CPU_TIMESTAMP(id) ft_event0(id, save_cpu_timestamp)
63
64#define CPU_DTIMESTAMP(id, def) ft_event1(id, save_cpu_timestamp_def, (unsigned long) def)
65
66#define CPU_TIMESTAMP_CUR(id) CPU_DTIMESTAMP(id, is_realtime(current) ? TSK_RT : TSK_BE)
67
68#define CPU_TTIMESTAMP(id, task) \
69 ft_event1(id, save_cpu_timestamp_task, (unsigned long) task)
70
71#define CPU_LTIMESTAMP(id, task) \
72 ft_event1(id, save_cpu_task_latency, (unsigned long) task)
73
74#else /* !CONFIG_SCHED_OVERHEAD_TRACE */
75
76#define MSG_TIMESTAMP_SENT(id, to)
77#define MSG_TIMESTAMP_RECEIVED(id)
78
79#define CPU_TIMESTAMP_TIME(id, time_ptr)
80#define CPU_TIMESTAMP_IRQ(id, irq_count_ptr)
81#define CPU_TIMESTAMP(id)
82#define CPU_DTIMESTAMP(id, def)
83#define CPU_TIMESTAMP_CUR(id)
84#define CPU_TTIMESTAMP(id, task)
85#define CPU_LTIMESTAMP(id, task)
86
87#endif
88
89
90/* Convention for timestamps
91 * =========================
92 *
93 * In order to process the trace files with a common tool, we use the following
94 * convention to measure execution times: The end time id of a code segment is
95 * always the next number after the start time event id.
96 */
97
98#define __TS_SYSCALL_IN_START(p) CPU_TIMESTAMP_TIME(10, p)
99#define __TS_SYSCALL_IN_END(p) CPU_TIMESTAMP_IRQ(11, p)
100
101#define TS_SYSCALL_OUT_START CPU_TIMESTAMP_CUR(20)
102#define TS_SYSCALL_OUT_END CPU_TIMESTAMP_CUR(21)
103
104#define TS_LOCK_START CPU_TIMESTAMP_CUR(30)
105#define TS_LOCK_END CPU_TIMESTAMP_CUR(31)
106
107#define TS_LOCK_SUSPEND CPU_TIMESTAMP_CUR(38)
108#define TS_LOCK_RESUME CPU_TIMESTAMP_CUR(39)
109
110#define TS_UNLOCK_START CPU_TIMESTAMP_CUR(40)
111#define TS_UNLOCK_END CPU_TIMESTAMP_CUR(41)
112
113#define TS_SCHED_START CPU_DTIMESTAMP(100, TSK_UNKNOWN) /* we only
114 * care
115 * about
116 * next */
117#define TS_SCHED_END(t) CPU_TTIMESTAMP(101, t)
118#define TS_SCHED2_START(t) CPU_TTIMESTAMP(102, t)
119#define TS_SCHED2_END(t) CPU_TTIMESTAMP(103, t)
120
121#define TS_CXS_START(t) CPU_TTIMESTAMP(104, t)
122#define TS_CXS_END(t) CPU_TTIMESTAMP(105, t)
123
124#define TS_RELEASE_START CPU_DTIMESTAMP(106, TSK_RT)
125#define TS_RELEASE_END CPU_DTIMESTAMP(107, TSK_RT)
126
127#define TS_XCALL_START CPU_DTIMESTAMP(108, TSK_RT)
128#define TS_XCALL_END CPU_DTIMESTAMP(109, TSK_RT)
129
130#define TS_TICK_START(t) CPU_TTIMESTAMP(110, t)
131#define TS_TICK_END(t) CPU_TTIMESTAMP(111, t)
132
133#define TS_QUANTUM_BOUNDARY_START CPU_TIMESTAMP_CUR(112)
134#define TS_QUANTUM_BOUNDARY_END CPU_TIMESTAMP_CUR(113)
135
136#define TS_SCHED_TIMER_START CPU_TIMESTAMP_CUR(114)
137#define TS_SCHED_TIMER_END CPU_TIMESTAMP_CUR(115)
138
139
140#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */
141#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */
142
143#define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */
144#define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */
145
146#define TS_ENTER_NP_START CPU_TIMESTAMP(140)
147#define TS_ENTER_NP_END CPU_TIMESTAMP(141)
148
149#define TS_EXIT_NP_START CPU_TIMESTAMP(150)
150#define TS_EXIT_NP_END CPU_TIMESTAMP(151)
151
152#define TS_SEND_RESCHED_START(c) MSG_TIMESTAMP_SENT(190, c)
153#define TS_SEND_RESCHED_END MSG_TIMESTAMP_RECEIVED(191)
154
155#define TS_SEND_XCALL_START(c) MSG_TIMESTAMP_SENT(192, c)
156#define TS_SEND_XCALL_END MSG_TIMESTAMP_RECEIVED(193)
157
158#define TS_RELEASE_LATENCY(when) CPU_LTIMESTAMP(208, &(when))
159#define TS_TIMER_LATENCY(when) CPU_LTIMESTAMP(209, &(when))
160
161#endif /* !_SYS_TRACE_H_ */
diff --git a/include/litmus/trace_irq.h b/include/litmus/trace_irq.h
new file mode 100644
index 000000000000..0d0c042ba9c3
--- /dev/null
+++ b/include/litmus/trace_irq.h
@@ -0,0 +1,14 @@
1#ifndef _LITMUS_TRACE_IRQ_H_
2#define _LITMUS_TRACE_IRQ_H_
3
4#ifdef CONFIG_SCHED_OVERHEAD_TRACE
5
6void ft_irq_fired(void);
7
8#else
9
10#define ft_irq_fired() /* nothing to do */
11
12#endif
13
14#endif
diff --git a/include/litmus/wait.h b/include/litmus/wait.h
new file mode 100644
index 000000000000..ce1347c355f8
--- /dev/null
+++ b/include/litmus/wait.h
@@ -0,0 +1,57 @@
1#ifndef _LITMUS_WAIT_H_
2#define _LITMUS_WAIT_H_
3
4struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
5
6/* wrap regular wait_queue_t head */
7struct __prio_wait_queue {
8 wait_queue_t wq;
9
10 /* some priority point */
11 lt_t priority;
12 /* break ties in priority by lower tie_breaker */
13 unsigned int tie_breaker;
14};
15
16typedef struct __prio_wait_queue prio_wait_queue_t;
17
18static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq,
19 struct task_struct* t,
20 lt_t priority)
21{
22 init_waitqueue_entry(&pwq->wq, t);
23 pwq->priority = priority;
24 pwq->tie_breaker = 0;
25}
26
27static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq,
28 struct task_struct* t,
29 lt_t priority,
30 unsigned int tie_breaker)
31{
32 init_waitqueue_entry(&pwq->wq, t);
33 pwq->priority = priority;
34 pwq->tie_breaker = tie_breaker;
35}
36
37unsigned int __add_wait_queue_prio_exclusive(
38 wait_queue_head_t* head,
39 prio_wait_queue_t *new);
40
41static inline unsigned int add_wait_queue_prio_exclusive(
42 wait_queue_head_t* head,
43 prio_wait_queue_t *new)
44{
45 unsigned long flags;
46 unsigned int passed;
47
48 spin_lock_irqsave(&head->lock, flags);
49 passed = __add_wait_queue_prio_exclusive(head, new);
50
51 spin_unlock_irqrestore(&head->lock, flags);
52
53 return passed;
54}
55
56
57#endif
diff --git a/litmus/Kconfig b/litmus/Kconfig
new file mode 100644
index 000000000000..be25d7c53971
--- /dev/null
+++ b/litmus/Kconfig
@@ -0,0 +1,384 @@
1menu "LITMUS^RT"
2
3menu "Scheduling"
4
5config PLUGIN_CEDF
6 bool "Clustered-EDF"
7 depends on X86 && SYSFS
8 default y
9 help
10 Include the Clustered EDF (C-EDF) plugin in the kernel.
11 This is appropriate for large platforms with shared caches.
12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
13 makes little sense since there aren't any shared caches.
14
15config PLUGIN_PFAIR
16 bool "PFAIR"
17 default y
18 help
19 Include the PFAIR plugin (i.e., the PD^2 scheduler) in the kernel.
20 The PFAIR plugin requires high resolution timers (for staggered
21 quanta) and also requires HZ_PERIODIC (i.e., periodic timer ticks
22 even if a processor is idle, as quanta could be missed otherwise).
23 Further, the PFAIR plugin uses the system tick and thus requires
24 HZ=1000 to achive reasonable granularity.
25
26 If unsure, say Yes.
27
28config RELEASE_MASTER
29 bool "Release-master Support"
30 depends on SMP
31 default n
32 help
33 Allow one processor to act as a dedicated interrupt processor
34 that services all timer interrupts, but that does not schedule
35 real-time tasks. See RTSS'09 paper for details
36 (http://www.cs.unc.edu/~anderson/papers.html).
37
38config PREFER_LOCAL_LINKING
39 bool "Link newly arrived tasks locally if possible"
40 depends on SMP
41 default y
42 help
43 In linking-based schedulers such as GSN-EDF, if an idle CPU processes
44 a job arrival (i.e., when a job resumed or was released), it can
45 either link the task to itself and schedule it immediately (to avoid
46 unnecessary scheduling latency) or it can try to link it to the CPU
47 where it executed previously (to maximize cache affinity, at the
48 expense of increased latency due to the need to send an IPI).
49
50 In lightly loaded systems, this option can significantly reduce
51 scheduling latencies. In heavily loaded systems (where CPUs are
52 rarely idle), it will likely make hardly a difference.
53
54 If unsure, say yes.
55
56config LITMUS_QUANTUM_LENGTH_US
57 int "quantum length (in us)"
58 default 1000
59 range 500 10000
60 help
61 Determine the desired quantum length, in microseconds, which
62 is used to determine the granularity of scheduling in
63 quantum-driven plugins (primarily PFAIR). This parameter does not
64 affect event-driven plugins (such as the EDF-based plugins and P-FP).
65 Default: 1000us = 1ms.
66
67config BUG_ON_MIGRATION_DEADLOCK
68 bool "Panic on suspected migration deadlock"
69 default y
70 help
71 This is a debugging option. The LITMUS^RT migration support code for
72 global scheduling contains a simple heuristic to detect when the
73 system deadlocks due to circular stack dependencies.
74
75 For example, such a deadlock exists if CPU 0 waits for task A's stack
76 to become available while using task B's stack, and CPU 1 waits for
77 task B's stack to become available while using task A's stack. Such
78 a situation can arise in (buggy) global scheduling plugins.
79
80 With this option enabled, such a scenario with result in a BUG().
81 You can turn off this option when debugging on real hardware (e.g.,
82 to rescue traces, etc. that would be hard to get after a panic).
83
84 Only turn this off if you really know what you are doing. If this
85 BUG() triggers, the scheduler is broken and turning off this option
86 won't fix it.
87
88
89endmenu
90
91menu "Real-Time Synchronization"
92
93config NP_SECTION
94 bool "Non-preemptive section support"
95 default y
96 help
97 Allow tasks to become non-preemptable.
98 Note that plugins still need to explicitly support non-preemptivity.
99 Currently, only the GSN-EDF, PSN-EDF, and P-FP plugins have such support.
100
101 This is required to support locking protocols such as the FMLP.
102 If disabled, all tasks will be considered preemptable at all times.
103
104config LITMUS_LOCKING
105 bool "Support for real-time locking protocols"
106 depends on NP_SECTION
107 default y
108 help
109 Enable LITMUS^RT's multiprocessor real-time locking protocols with
110 predicable maximum blocking times.
111
112 Say Yes if you want to include locking protocols such as the FMLP and
113 Baker's SRP.
114
115endmenu
116
117menu "Performance Enhancements"
118
119config SCHED_CPU_AFFINITY
120 bool "Local Migration Affinity"
121 depends on X86 && SYSFS
122 default y
123 help
124 Rescheduled tasks prefer CPUs near to their previously used CPU.
125 This may improve cache performance through possible preservation of
126 cache affinity, at the expense of (slightly) more involved scheduling
127 logic.
128
129 Warning: May make bugs harder to find since tasks may migrate less often.
130
131 NOTES:
132 * Feature is not utilized by PFair/PD^2.
133
134 Say Yes if unsure.
135
136config ALLOW_EARLY_RELEASE
137 bool "Allow Early Releasing"
138 default y
139 help
140 Allow tasks to release jobs early (while still maintaining job
141 precedence constraints). Only supported by EDF schedulers. Early
142 releasing must be explicitly requested by real-time tasks via
143 the task_params passed to sys_set_task_rt_param().
144
145 Early releasing can improve job response times while maintaining
146 real-time correctness. However, it can easily peg your CPUs
147 since tasks never suspend to wait for their next job. As such, early
148 releasing is really only useful in the context of implementing
149 bandwidth servers, interrupt handling threads, or short-lived
150 computations.
151
152 Beware that early releasing may affect real-time analysis
153 if using locking protocols or I/O.
154
155 Say Yes if unsure.
156
157choice
158 prompt "EDF Tie-Break Behavior"
159 default EDF_TIE_BREAK_LATENESS_NORM
160 help
161 Allows the configuration of tie-breaking behavior when the deadlines
162 of two EDF-scheduled tasks are equal.
163
164 config EDF_TIE_BREAK_LATENESS
165 bool "Lateness-based Tie Break"
166 help
167 Break ties between two jobs, A and B, based upon the lateness of their
168 prior jobs. The job with the greatest lateness has priority. Note that
169 lateness has a negative value if the prior job finished before its
170 deadline.
171
172 config EDF_TIE_BREAK_LATENESS_NORM
173 bool "Normalized Lateness-based Tie Break"
174 help
175 Break ties between two jobs, A and B, based upon the lateness, normalized
176 by relative deadline, of their prior jobs. The job with the greatest
177 normalized lateness has priority. Note that lateness has a negative value
178 if the prior job finished before its deadline.
179
180 Normalized lateness tie-breaks are likely desireable over non-normalized
181 tie-breaks if the execution times and/or relative deadlines of tasks in a
182 task set vary greatly.
183
184 config EDF_TIE_BREAK_HASH
185 bool "Hash-based Tie Breaks"
186 help
187 Break ties between two jobs, A and B, with equal deadlines by using a
188 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
189 A has ~50% of winning a given tie-break.
190
191 config EDF_PID_TIE_BREAK
192 bool "PID-based Tie Breaks"
193 help
194 Break ties based upon OS-assigned thread IDs. Use this option if
195 required by algorithm's real-time analysis or per-task response-time
196 jitter must be minimized.
197
198 NOTES:
199 * This tie-breaking method was default in Litmus 2012.2 and before.
200
201endchoice
202
203endmenu
204
205menu "Tracing"
206
207config FEATHER_TRACE
208 bool "Feather-Trace Infrastructure"
209 default y
210 help
211 Feather-Trace basic tracing infrastructure. Includes device file
212 driver and instrumentation point support.
213
214 Note that this option only enables the basic Feather-Trace infrastructure;
215 you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to
216 actually enable any events.
217
218config SCHED_TASK_TRACE
219 bool "Trace real-time tasks"
220 depends on FEATHER_TRACE
221 default y
222 help
223 Include support for the sched_trace_XXX() tracing functions. This
224 allows the collection of real-time task events such as job
225 completions, job releases, early completions, etc. This results in a
226 small overhead in the scheduling code. Disable if the overhead is not
227 acceptable (e.g., benchmarking).
228
229 Say Yes for debugging.
230 Say No for overhead tracing.
231
232config SCHED_TASK_TRACE_SHIFT
233 int "Buffer size for sched_trace_xxx() events"
234 depends on SCHED_TASK_TRACE
235 range 8 13
236 default 9
237 help
238
239 Select the buffer size of sched_trace_xxx() events as a power of two.
240 These buffers are statically allocated as per-CPU data. Each event
241 requires 24 bytes storage plus one additional flag byte. Too large
242 buffers can cause issues with the per-cpu allocator (and waste
243 memory). Too small buffers can cause scheduling events to be lost. The
244 "right" size is workload dependent and depends on the number of tasks,
245 each task's period, each task's number of suspensions, and how often
246 the buffer is flushed.
247
248 Examples: 12 => 4k events
249 10 => 1k events
250 8 => 512 events
251
252config SCHED_LITMUS_TRACEPOINT
253 bool "Enable Event/Tracepoint Tracing for real-time task tracing"
254 depends on TRACEPOINTS
255 default n
256 help
257 Enable kernel-style events (tracepoint) for Litmus. Litmus events
258 trace the same functions as the above sched_trace_XXX(), but can
259 be enabled independently.
260 Litmus tracepoints can be recorded and analyzed together (single
261 time reference) with all other kernel tracing events (e.g.,
262 sched:sched_switch, etc.).
263
264 This also enables a quick way to visualize schedule traces using
265 trace-cmd utility and kernelshark visualizer.
266
267 Say Yes for debugging and visualization purposes.
268 Say No for overhead tracing.
269
270config SCHED_OVERHEAD_TRACE
271 bool "Record timestamps for overhead measurements"
272 depends on FEATHER_TRACE
273 default y
274 help
275 Export event stream for overhead tracing.
276 Say Yes for overhead tracing.
277
278config SCHED_OVERHEAD_TRACE_SHIFT
279 int "Buffer size for Feather-Trace overhead data"
280 depends on SCHED_OVERHEAD_TRACE
281 range 15 32
282 default 22
283 help
284
285 Select the buffer size for the Feather-Trace overhead tracing
286 infrastructure (/dev/litmus/ft_trace0 & ftcat) as a power of two. The
287 larger the buffer, the less likely the chance of buffer overflows if
288 the ftcat process is starved by real-time activity. In machines with
289 large memories, large buffer sizes are recommended.
290
291 Examples: 16 => 2 MB
292 24 => 512 MB
293 26 => 2G MB
294
295config SCHED_DEBUG_TRACE
296 bool "TRACE() debugging"
297 default n
298 help
299 Include support for sched_trace_log_messageg(), which is used to
300 implement TRACE(). If disabled, no TRACE() messages will be included
301 in the kernel, and no overheads due to debugging statements will be
302 incurred by the scheduler. Disable if the overhead is not acceptable
303 (e.g. benchmarking).
304
305 Say Yes for debugging.
306 Say No for overhead tracing.
307
308config SCHED_DEBUG_TRACE_SHIFT
309 int "Buffer size for TRACE() buffer"
310 depends on SCHED_DEBUG_TRACE
311 range 14 22
312 default 18
313 help
314
315 Select the amount of memory needed per for the TRACE() buffer, as a
316 power of two. The TRACE() buffer is global and statically allocated. If
317 the buffer is too small, there will be holes in the TRACE() log if the
318 buffer-flushing task is starved.
319
320 The default should be sufficient for most systems. Increase the buffer
321 size if the log contains holes. Reduce the buffer size when running on
322 a memory-constrained system.
323
324 Examples: 14 => 16KB
325 18 => 256KB
326 20 => 1MB
327
328 This buffer is exported to usespace using a misc device as
329 'litmus/log'. On a system with default udev rules, a corresponding
330 character device node should be created at /dev/litmus/log. The buffer
331 can be flushed using cat, e.g., 'cat /dev/litmus/log > my_log_file.txt'.
332
333config SCHED_DEBUG_TRACE_CALLER
334 bool "Include [function@file:line] tag in TRACE() log"
335 depends on SCHED_DEBUG_TRACE
336 default n
337 help
338 With this option enabled, TRACE() prepends
339
340 "[<function name>@<filename>:<line number>]"
341
342 to each message in the debug log. Enable this to aid in figuring out
343 what was called in which order. The downside is that it adds a lot of
344 clutter.
345
346 If unsure, say No.
347
348config PREEMPT_STATE_TRACE
349 bool "Trace preemption state machine transitions"
350 depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
351 default n
352 help
353 With this option enabled, each CPU will log when it transitions
354 states in the preemption state machine. This state machine is
355 used to determine how to react to IPIs (avoid races with in-flight IPIs).
356
357 Warning: this creates a lot of information in the debug trace. Only
358 recommended when you are debugging preemption-related races.
359
360 If unsure, say No.
361
362config REPORT_TIMER_LATENCY
363 bool "Warn when hrtimers incur large latency"
364 default n
365 help
366 With this option enabled, the hrtimer code will printk()
367 a warning when a timer fires much after its intended
368 time. This can useful when debugging latency issues.
369
370 If unsure, say No.
371
372config REPORT_TIMER_LATENCY_THRESHOLD
373 int "Maximum acceptable timer latency (in nanoseconds)"
374 depends on REPORT_TIMER_LATENCY
375 range 10000 100000000
376 default 1000000
377 help
378 If a timer fires more than the given threshold after its intended
379 expiration time, a warning message is printed to the kernel log.
380 By default, the threshold is one millisecond (= one million nanoseconds).
381
382endmenu
383
384endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
new file mode 100644
index 000000000000..ecaa28dc68ad
--- /dev/null
+++ b/litmus/Makefile
@@ -0,0 +1,36 @@
1#
2# Makefile for LITMUS^RT
3#
4
5obj-y = sched_plugin.o litmus.o \
6 preempt.o \
7 litmus_proc.o \
8 budget.o \
9 clustered.o \
10 jobs.o \
11 sync.o \
12 rt_domain.o \
13 edf_common.o \
14 fp_common.o \
15 fdso.o \
16 locking.o \
17 srp.o \
18 bheap.o \
19 binheap.o \
20 ctrldev.o \
21 uncachedev.o \
22 sched_gsn_edf.o \
23 sched_psn_edf.o \
24 sched_pfp.o
25
26obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
27obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
28
29obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
30obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
31obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
32obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
33
34obj-y += sched_pres.o
35
36obj-y += reservations/
diff --git a/litmus/bheap.c b/litmus/bheap.c
new file mode 100644
index 000000000000..2707e0122b6d
--- /dev/null
+++ b/litmus/bheap.c
@@ -0,0 +1,316 @@
1#include <linux/bug.h>
2#include <linux/kernel.h>
3#include <litmus/bheap.h>
4
5void bheap_init(struct bheap* heap)
6{
7 heap->head = NULL;
8 heap->min = NULL;
9}
10
11void bheap_node_init(struct bheap_node** _h, void* value)
12{
13 struct bheap_node* h = *_h;
14 h->parent = NULL;
15 h->next = NULL;
16 h->child = NULL;
17 h->degree = NOT_IN_HEAP;
18 h->value = value;
19 h->ref = _h;
20}
21
22
23/* make child a subtree of root */
24static void __bheap_link(struct bheap_node* root,
25 struct bheap_node* child)
26{
27 child->parent = root;
28 child->next = root->child;
29 root->child = child;
30 root->degree++;
31}
32
33/* merge root lists */
34static struct bheap_node* __bheap_merge(struct bheap_node* a,
35 struct bheap_node* b)
36{
37 struct bheap_node* head = NULL;
38 struct bheap_node** pos = &head;
39
40 while (a && b) {
41 if (a->degree < b->degree) {
42 *pos = a;
43 a = a->next;
44 } else {
45 *pos = b;
46 b = b->next;
47 }
48 pos = &(*pos)->next;
49 }
50 if (a)
51 *pos = a;
52 else
53 *pos = b;
54 return head;
55}
56
57/* reverse a linked list of nodes. also clears parent pointer */
58static struct bheap_node* __bheap_reverse(struct bheap_node* h)
59{
60 struct bheap_node* tail = NULL;
61 struct bheap_node* next;
62
63 if (!h)
64 return h;
65
66 h->parent = NULL;
67 while (h->next) {
68 next = h->next;
69 h->next = tail;
70 tail = h;
71 h = next;
72 h->parent = NULL;
73 }
74 h->next = tail;
75 return h;
76}
77
78static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
79 struct bheap_node** prev, struct bheap_node** node)
80{
81 struct bheap_node *_prev, *cur;
82 *prev = NULL;
83
84 if (!heap->head) {
85 *node = NULL;
86 return;
87 }
88
89 *node = heap->head;
90 _prev = heap->head;
91 cur = heap->head->next;
92 while (cur) {
93 if (higher_prio(cur, *node)) {
94 *node = cur;
95 *prev = _prev;
96 }
97 _prev = cur;
98 cur = cur->next;
99 }
100}
101
102static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
103 struct bheap_node* h2)
104{
105 struct bheap_node* h1;
106 struct bheap_node *prev, *x, *next;
107 if (!h2)
108 return;
109 h1 = heap->head;
110 if (!h1) {
111 heap->head = h2;
112 return;
113 }
114 h1 = __bheap_merge(h1, h2);
115 prev = NULL;
116 x = h1;
117 next = x->next;
118 while (next) {
119 if (x->degree != next->degree ||
120 (next->next && next->next->degree == x->degree)) {
121 /* nothing to do, advance */
122 prev = x;
123 x = next;
124 } else if (higher_prio(x, next)) {
125 /* x becomes the root of next */
126 x->next = next->next;
127 __bheap_link(x, next);
128 } else {
129 /* next becomes the root of x */
130 if (prev)
131 prev->next = next;
132 else
133 h1 = next;
134 __bheap_link(next, x);
135 x = next;
136 }
137 next = x->next;
138 }
139 heap->head = h1;
140}
141
142static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
143 struct bheap* heap)
144{
145 struct bheap_node *prev, *node;
146 __bheap_min(higher_prio, heap, &prev, &node);
147 if (!node)
148 return NULL;
149 if (prev)
150 prev->next = node->next;
151 else
152 heap->head = node->next;
153 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
154 return node;
155}
156
157/* insert (and reinitialize) a node into the heap */
158void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
159 struct bheap_node* node)
160{
161 struct bheap_node *min;
162 node->child = NULL;
163 node->parent = NULL;
164 node->next = NULL;
165 node->degree = 0;
166 if (heap->min && higher_prio(node, heap->min)) {
167 /* swap min cache */
168 min = heap->min;
169 min->child = NULL;
170 min->parent = NULL;
171 min->next = NULL;
172 min->degree = 0;
173 __bheap_union(higher_prio, heap, min);
174 heap->min = node;
175 } else
176 __bheap_union(higher_prio, heap, node);
177}
178
179void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
180{
181 struct bheap_node* min;
182 if (heap->min) {
183 min = heap->min;
184 heap->min = NULL;
185 bheap_insert(higher_prio, heap, min);
186 }
187}
188
189/* merge addition into target */
190void bheap_union(bheap_prio_t higher_prio,
191 struct bheap* target, struct bheap* addition)
192{
193 /* first insert any cached minima, if necessary */
194 bheap_uncache_min(higher_prio, target);
195 bheap_uncache_min(higher_prio, addition);
196 __bheap_union(higher_prio, target, addition->head);
197 /* this is a destructive merge */
198 addition->head = NULL;
199}
200
201struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
202 struct bheap* heap)
203{
204 if (!heap->min)
205 heap->min = __bheap_extract_min(higher_prio, heap);
206 return heap->min;
207}
208
209struct bheap_node* bheap_take(bheap_prio_t higher_prio,
210 struct bheap* heap)
211{
212 struct bheap_node *node;
213 if (!heap->min)
214 heap->min = __bheap_extract_min(higher_prio, heap);
215 node = heap->min;
216 heap->min = NULL;
217 if (node)
218 node->degree = NOT_IN_HEAP;
219 return node;
220}
221
222int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
223{
224 struct bheap_node *parent;
225 struct bheap_node** tmp_ref;
226 void* tmp;
227
228 /* bubble up */
229 parent = node->parent;
230 while (parent && higher_prio(node, parent)) {
231 /* swap parent and node */
232 tmp = parent->value;
233 parent->value = node->value;
234 node->value = tmp;
235 /* swap references */
236 *(parent->ref) = node;
237 *(node->ref) = parent;
238 tmp_ref = parent->ref;
239 parent->ref = node->ref;
240 node->ref = tmp_ref;
241 /* step up */
242 node = parent;
243 parent = node->parent;
244 }
245
246 return parent != NULL;
247}
248
249void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
250 struct bheap_node* node)
251{
252 struct bheap_node *parent, *prev, *pos;
253 struct bheap_node** tmp_ref;
254 void* tmp;
255
256 if (heap->min != node) {
257 /* bubble up */
258 parent = node->parent;
259 while (parent) {
260 /* swap parent and node */
261 tmp = parent->value;
262 parent->value = node->value;
263 node->value = tmp;
264 /* swap references */
265 *(parent->ref) = node;
266 *(node->ref) = parent;
267 tmp_ref = parent->ref;
268 parent->ref = node->ref;
269 node->ref = tmp_ref;
270 /* step up */
271 node = parent;
272 parent = node->parent;
273 }
274 /* now delete:
275 * first find prev */
276 prev = NULL;
277 pos = heap->head;
278 while (pos != node) {
279 BUG_ON(!pos); /* fell off the list -> deleted from wrong heap */
280 prev = pos;
281 pos = pos->next;
282 }
283 /* we have prev, now remove node */
284 if (prev)
285 prev->next = node->next;
286 else
287 heap->head = node->next;
288 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
289 } else
290 heap->min = NULL;
291 node->degree = NOT_IN_HEAP;
292}
293
294/* allocate a heap node for value and insert into the heap */
295int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
296 void* value, int gfp_flags)
297{
298 struct bheap_node* hn = bheap_node_alloc(gfp_flags);
299 if (likely(hn)) {
300 bheap_node_init(&hn, value);
301 bheap_insert(higher_prio, heap, hn);
302 }
303 return hn != NULL;
304}
305
306void* bheap_take_del(bheap_prio_t higher_prio,
307 struct bheap* heap)
308{
309 struct bheap_node* hn = bheap_take(higher_prio, heap);
310 void* ret = NULL;
311 if (hn) {
312 ret = hn->value;
313 bheap_node_free(hn);
314 }
315 return ret;
316}
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..d3ab34b92096
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,387 @@
1#include <litmus/binheap.h>
2
3/* Returns true of the root ancestor of node is the root of the given heap. */
4int binheap_is_in_this_heap(struct binheap_node *node,
5 struct binheap* heap)
6{
7 if(!binheap_is_in_heap(node)) {
8 return 0;
9 }
10
11 while(node->parent != NULL) {
12 node = node->parent;
13 }
14
15 return (node == heap->root);
16}
17
18
19/* Update the node reference pointers. Same logic as Litmus binomial heap. */
20static void __update_ref(struct binheap_node *parent,
21 struct binheap_node *child)
22{
23 *(parent->ref_ptr) = child;
24 *(child->ref_ptr) = parent;
25
26 swap(parent->ref_ptr, child->ref_ptr);
27}
28
29
30/* Swaps data between two nodes. */
31static void __binheap_swap(struct binheap_node *parent,
32 struct binheap_node *child)
33{
34 swap(parent->data, child->data);
35 __update_ref(parent, child);
36}
37
38
39/* Swaps memory and data between two nodes. Actual nodes swap instead of
40 * just data. Needed when we delete nodes from the heap.
41 */
42static void __binheap_swap_safe(struct binheap *handle,
43 struct binheap_node *a,
44 struct binheap_node *b)
45{
46 swap(a->data, b->data);
47 __update_ref(a, b);
48
49 if((a->parent != NULL) && (a->parent == b->parent)) {
50 /* special case: shared parent */
51 swap(a->parent->left, a->parent->right);
52 }
53 else {
54 /* Update pointers to swap parents. */
55
56 if(a->parent) {
57 if(a == a->parent->left) {
58 a->parent->left = b;
59 }
60 else {
61 a->parent->right = b;
62 }
63 }
64
65 if(b->parent) {
66 if(b == b->parent->left) {
67 b->parent->left = a;
68 }
69 else {
70 b->parent->right = a;
71 }
72 }
73
74 swap(a->parent, b->parent);
75 }
76
77 /* swap children */
78
79 if(a->left) {
80 a->left->parent = b;
81
82 if(a->right) {
83 a->right->parent = b;
84 }
85 }
86
87 if(b->left) {
88 b->left->parent = a;
89
90 if(b->right) {
91 b->right->parent = a;
92 }
93 }
94
95 swap(a->left, b->left);
96 swap(a->right, b->right);
97
98
99 /* update next/last/root pointers */
100
101 if(a == handle->next) {
102 handle->next = b;
103 }
104 else if(b == handle->next) {
105 handle->next = a;
106 }
107
108 if(a == handle->last) {
109 handle->last = b;
110 }
111 else if(b == handle->last) {
112 handle->last = a;
113 }
114
115 if(a == handle->root) {
116 handle->root = b;
117 }
118 else if(b == handle->root) {
119 handle->root = a;
120 }
121}
122
123
124/**
125 * Update the pointer to the last node in the complete binary tree.
126 * Called internally after the root node has been deleted.
127 */
128static void __binheap_update_last(struct binheap *handle)
129{
130 struct binheap_node *temp = handle->last;
131
132 /* find a "bend" in the tree. */
133 while(temp->parent && (temp == temp->parent->left)) {
134 temp = temp->parent;
135 }
136
137 /* step over to sibling if we're not at root */
138 if(temp->parent != NULL) {
139 temp = temp->parent->left;
140 }
141
142 /* now travel right as far as possible. */
143 while(temp->right != NULL) {
144 temp = temp->right;
145 }
146
147 /* take one step to the left if we're not at the bottom-most level. */
148 if(temp->left != NULL) {
149 temp = temp->left;
150 }
151
152 handle->last = temp;
153}
154
155
156/**
157 * Update the pointer to the node that will take the next inserted node.
158 * Called internally after a node has been inserted.
159 */
160static void __binheap_update_next(struct binheap *handle)
161{
162 struct binheap_node *temp = handle->next;
163
164 /* find a "bend" in the tree. */
165 while(temp->parent && (temp == temp->parent->right)) {
166 temp = temp->parent;
167 }
168
169 /* step over to sibling if we're not at root */
170 if(temp->parent != NULL) {
171 temp = temp->parent->right;
172 }
173
174 /* now travel left as far as possible. */
175 while(temp->left != NULL) {
176 temp = temp->left;
177 }
178
179 handle->next = temp;
180}
181
182
183
184/* bubble node up towards root */
185static void __binheap_bubble_up(struct binheap *handle,
186 struct binheap_node *node)
187{
188 /* let BINHEAP_POISON data bubble to the top */
189
190 while((node->parent != NULL) &&
191 ((node->data == BINHEAP_POISON) ||
192 handle->compare(node, node->parent))) {
193 __binheap_swap(node->parent, node);
194 node = node->parent;
195 }
196}
197
198
199/* bubble node down, swapping with min-child */
200static void __binheap_bubble_down(struct binheap *handle)
201{
202 struct binheap_node *node = handle->root;
203
204 while(node->left != NULL) {
205 if(node->right && handle->compare(node->right, node->left)) {
206 if(handle->compare(node->right, node)) {
207 __binheap_swap(node, node->right);
208 node = node->right;
209 }
210 else {
211 break;
212 }
213 }
214 else {
215 if(handle->compare(node->left, node)) {
216 __binheap_swap(node, node->left);
217 node = node->left;
218 }
219 else {
220 break;
221 }
222 }
223 }
224}
225
226
227void __binheap_add(struct binheap_node *new_node,
228 struct binheap *handle,
229 void *data)
230{
231 new_node->data = data;
232 new_node->ref = new_node;
233 new_node->ref_ptr = &(new_node->ref);
234
235 if(!binheap_empty(handle)) {
236 /* insert left side first */
237 if(handle->next->left == NULL) {
238 handle->next->left = new_node;
239 new_node->parent = handle->next;
240 new_node->left = NULL;
241 new_node->right = NULL;
242
243 handle->last = new_node;
244
245 __binheap_bubble_up(handle, new_node);
246 }
247 else {
248 /* left occupied. insert right. */
249 handle->next->right = new_node;
250 new_node->parent = handle->next;
251 new_node->left = NULL;
252 new_node->right = NULL;
253
254 handle->last = new_node;
255
256 __binheap_update_next(handle);
257 __binheap_bubble_up(handle, new_node);
258 }
259 }
260 else {
261 /* first node in heap */
262
263 new_node->parent = NULL;
264 new_node->left = NULL;
265 new_node->right = NULL;
266
267 handle->root = new_node;
268 handle->next = new_node;
269 handle->last = new_node;
270 }
271}
272
273
274/**
275 * Removes the root node from the heap. The node is removed after coalescing
276 * the binheap_node with its original data pointer at the root of the tree.
277 *
278 * The 'last' node in the tree is then swapped up to the root and bubbled
279 * down.
280 */
281void __binheap_delete_root(struct binheap *handle,
282 struct binheap_node *container)
283{
284 struct binheap_node *root = handle->root;
285
286 if(root != container) {
287 /* coalesce */
288 __binheap_swap_safe(handle, root, container);
289 root = container;
290 }
291
292 if(handle->last != root) {
293 /* swap 'last' node up to root and bubble it down. */
294
295 struct binheap_node *to_move = handle->last;
296
297 if(to_move->parent != root) {
298 handle->next = to_move->parent;
299
300 if(handle->next->right == to_move) {
301 /* disconnect from parent */
302 to_move->parent->right = NULL;
303 handle->last = handle->next->left;
304 }
305 else {
306 /* find new 'last' before we disconnect */
307 __binheap_update_last(handle);
308
309 /* disconnect from parent */
310 to_move->parent->left = NULL;
311 }
312 }
313 else {
314 /* 'last' is direct child of root */
315
316 handle->next = to_move;
317
318 if(to_move == to_move->parent->right) {
319 to_move->parent->right = NULL;
320 handle->last = to_move->parent->left;
321 }
322 else {
323 to_move->parent->left = NULL;
324 handle->last = to_move;
325 }
326 }
327 to_move->parent = NULL;
328
329 /* reconnect as root. We can't just swap data ptrs since root node
330 * may be freed after this function returns.
331 */
332 to_move->left = root->left;
333 to_move->right = root->right;
334 if(to_move->left != NULL) {
335 to_move->left->parent = to_move;
336 }
337 if(to_move->right != NULL) {
338 to_move->right->parent = to_move;
339 }
340
341 handle->root = to_move;
342
343 /* bubble down */
344 __binheap_bubble_down(handle);
345 }
346 else {
347 /* removing last node in tree */
348 handle->root = NULL;
349 handle->next = NULL;
350 handle->last = NULL;
351 }
352
353 /* mark as removed */
354 container->parent = BINHEAP_POISON;
355}
356
357
358/**
359 * Delete an arbitrary node. Bubble node to delete up to the root,
360 * and then delete to root.
361 */
362void __binheap_delete(struct binheap_node *node_to_delete,
363 struct binheap *handle)
364{
365 struct binheap_node *target = node_to_delete->ref;
366 void *temp_data = target->data;
367
368 /* temporarily set data to null to allow node to bubble up to the top. */
369 target->data = BINHEAP_POISON;
370
371 __binheap_bubble_up(handle, target);
372 __binheap_delete_root(handle, node_to_delete);
373
374 node_to_delete->data = temp_data; /* restore node data pointer */
375}
376
377
378/**
379 * Bubble up a node whose pointer has decreased in value.
380 */
381void __binheap_decrease(struct binheap_node *orig_node,
382 struct binheap *handle)
383{
384 struct binheap_node *target = orig_node->ref;
385
386 __binheap_bubble_up(handle, target);
387}
diff --git a/litmus/budget.c b/litmus/budget.c
new file mode 100644
index 000000000000..18dac24e5632
--- /dev/null
+++ b/litmus/budget.c
@@ -0,0 +1,168 @@
1#include <linux/sched.h>
2#include <linux/percpu.h>
3#include <linux/hrtimer.h>
4#include <linux/uaccess.h>
5#include <linux/module.h>
6
7#include <litmus/debug_trace.h>
8#include <litmus/litmus.h>
9#include <litmus/preempt.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/np.h>
12
13#include <litmus/budget.h>
14
15struct enforcement_timer {
16 /* The enforcement timer is used to accurately police
17 * slice budgets. */
18 struct hrtimer timer;
19 int armed;
20};
21
22DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
23
24static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
25{
26 struct enforcement_timer* et = container_of(timer,
27 struct enforcement_timer,
28 timer);
29 unsigned long flags;
30
31 local_irq_save(flags);
32 TRACE("enforcement timer fired.\n");
33 et->armed = 0;
34 /* activate scheduler */
35 litmus_reschedule_local();
36 local_irq_restore(flags);
37
38 return HRTIMER_NORESTART;
39}
40
41/* assumes called with IRQs off */
42static void cancel_enforcement_timer(struct enforcement_timer* et)
43{
44 int ret;
45
46 TRACE("cancelling enforcement timer.\n");
47
48 /* Since interrupts are disabled and et->armed is only
49 * modified locally, we do not need any locks.
50 */
51
52 if (et->armed) {
53 ret = hrtimer_try_to_cancel(&et->timer);
54 /* Should never be inactive. */
55 BUG_ON(ret == 0);
56 /* Should never be running concurrently. */
57 BUG_ON(ret == -1);
58
59 et->armed = 0;
60 }
61}
62
63/* assumes called with IRQs off */
64static void arm_enforcement_timer(struct enforcement_timer* et,
65 struct task_struct* t)
66{
67 lt_t when_to_fire;
68 TRACE_TASK(t, "arming enforcement timer.\n");
69
70 WARN_ONCE(!hrtimer_is_hres_active(&et->timer),
71 KERN_ERR "WARNING: no high resolution timers available!?\n");
72
73 /* Calling this when there is no budget left for the task
74 * makes no sense, unless the task is non-preemptive. */
75 BUG_ON(budget_exhausted(t) && (!is_np(t)));
76
77 /* hrtimer_start_range_ns() cancels the timer
78 * anyway, so we don't have to check whether it is still armed */
79
80 if (likely(!is_np(t))) {
81 when_to_fire = litmus_clock() + budget_remaining(t);
82 hrtimer_start(&et->timer, ns_to_ktime(when_to_fire),
83 HRTIMER_MODE_ABS_PINNED);
84 et->armed = 1;
85 }
86}
87
88
89/* expects to be called with IRQs off */
90void update_enforcement_timer(struct task_struct* t)
91{
92 struct enforcement_timer* et = this_cpu_ptr(&budget_timer);
93
94 if (t && budget_precisely_enforced(t)) {
95 /* Make sure we call into the scheduler when this budget
96 * expires. */
97 arm_enforcement_timer(et, t);
98 } else if (et->armed) {
99 /* Make sure we don't cause unnecessary interrupts. */
100 cancel_enforcement_timer(et);
101 }
102}
103
104
105static int __init init_budget_enforcement(void)
106{
107 int cpu;
108 struct enforcement_timer* et;
109
110 for (cpu = 0; cpu < NR_CPUS; cpu++) {
111 et = &per_cpu(budget_timer, cpu);
112 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
113 et->timer.function = on_enforcement_timeout;
114 }
115 return 0;
116}
117
118void litmus_current_budget(lt_t *used_so_far, lt_t *remaining)
119{
120 struct task_struct *t = current;
121 unsigned long flags;
122 s64 delta;
123
124 local_irq_save(flags);
125
126 delta = sched_clock_cpu(smp_processor_id()) - t->se.exec_start;
127 if (delta < 0)
128 delta = 0;
129
130 TRACE_CUR("current_budget: sc:%llu start:%llu lt_t:%llu delta:%lld exec-time:%llu rem:%llu\n",
131 sched_clock_cpu(smp_processor_id()), t->se.exec_start,
132 litmus_clock(), delta,
133 tsk_rt(t)->job_params.exec_time,
134 budget_remaining(t));
135
136 if (used_so_far)
137 *used_so_far = tsk_rt(t)->job_params.exec_time + delta;
138
139 if (remaining) {
140 *remaining = budget_remaining(t);
141 if (*remaining > delta)
142 *remaining -= delta;
143 else
144 *remaining = 0;
145 }
146
147 local_irq_restore(flags);
148}
149
150asmlinkage long sys_get_current_budget(
151 lt_t __user * _expended,
152 lt_t __user *_remaining)
153{
154 lt_t expended = 0, remaining = 0;
155
156 if (is_realtime(current))
157 litmus->current_budget(&expended, &remaining);
158
159 if (_expended && put_user(expended, _expended))
160 return -EFAULT;
161
162 if (_remaining && put_user(remaining, _remaining))
163 return -EFAULT;
164
165 return 0;
166}
167
168module_init(init_budget_enforcement);
diff --git a/litmus/clustered.c b/litmus/clustered.c
new file mode 100644
index 000000000000..de2aca2a271c
--- /dev/null
+++ b/litmus/clustered.c
@@ -0,0 +1,119 @@
1#include <linux/gfp.h>
2#include <linux/cpumask.h>
3#include <linux/list.h>
4#include <linux/cacheinfo.h>
5
6#include <litmus/debug_trace.h>
7#include <litmus/clustered.h>
8
9int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, unsigned int index)
10{
11 struct cpu_cacheinfo* info = get_cpu_cacheinfo(cpu);
12 struct cacheinfo *ci;
13
14 if (!info || index >= info->num_leaves) {
15 TRACE("no shared-cache CPUs: info=%d index=%u\n",
16 info != NULL, index);
17 return 1;
18 }
19
20 if (!info->info_list) {
21 TRACE("no shared-cache CPUs: no info_list (cpu\n");
22 }
23 ci = info->info_list + index;
24
25 cpumask_copy(mask, &ci->shared_cpu_map);
26
27 TRACE("get_shared: P%u@L%u -> %d siblings\n ", cpu, index, cpumask_weight(mask));
28
29 return 0;
30}
31
32int get_cluster_size(enum cache_level level)
33{
34 cpumask_var_t mask;
35 int ok;
36 int num_cpus;
37
38 if (level == GLOBAL_CLUSTER)
39 return num_online_cpus();
40 else {
41 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
42 return -ENOMEM;
43 /* assumes CPU 0 is representative of all CPUs */
44 ok = get_shared_cpu_map(mask, 0, level);
45 /* ok == 0 means we got the map; otherwise it's an invalid cache level */
46 if (ok == 0)
47 num_cpus = cpumask_weight(mask);
48 free_cpumask_var(mask);
49
50 if (ok == 0)
51 return num_cpus;
52 else
53 return -EINVAL;
54 }
55}
56
57int assign_cpus_to_clusters(enum cache_level level,
58 struct scheduling_cluster* clusters[],
59 unsigned int num_clusters,
60 struct cluster_cpu* cpus[],
61 unsigned int num_cpus)
62{
63 cpumask_var_t mask;
64 unsigned int i, free_cluster = 0, low_cpu;
65 int err = 0;
66
67 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
68 return -ENOMEM;
69
70 /* clear cluster pointers */
71 for (i = 0; i < num_cpus; i++) {
72 cpus[i]->id = i;
73 cpus[i]->cluster = NULL;
74 }
75
76 /* initialize clusters */
77 for (i = 0; i < num_clusters; i++) {
78 clusters[i]->id = i;
79 INIT_LIST_HEAD(&clusters[i]->cpus);
80 }
81
82 /* Assign each CPU. Two assumtions are made:
83 * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
84 * 2) All cpus that belong to some cluster are online.
85 */
86 for_each_online_cpu(i) {
87 /* get lowest-id CPU in cluster */
88 if (level != GLOBAL_CLUSTER) {
89 err = get_shared_cpu_map(mask, cpus[i]->id, level);
90 if (err != 0) {
91 /* ugh... wrong cache level? Either caller screwed up
92 * or the CPU topology is weird. */
93 printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
94 level, err);
95 err = -EINVAL;
96 goto out;
97 }
98 low_cpu = cpumask_first(mask);
99 } else
100 low_cpu = 0;
101 if (low_cpu == i) {
102 /* caller must provide an appropriate number of clusters */
103 BUG_ON(free_cluster >= num_clusters);
104
105 /* create new cluster */
106 cpus[i]->cluster = clusters[free_cluster++];
107 } else {
108 /* low_cpu points to the right cluster
109 * Assumption: low_cpu is actually online and was processed earlier. */
110 cpus[i]->cluster = cpus[low_cpu]->cluster;
111 }
112 /* enqueue in cpus list */
113 list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
114 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
115 }
116out:
117 free_cpumask_var(mask);
118 return err;
119}
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
new file mode 100644
index 000000000000..cc74c5afa5c6
--- /dev/null
+++ b/litmus/ctrldev.c
@@ -0,0 +1,264 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/module.h>
6#include <linux/uaccess.h>
7
8
9#include <litmus/litmus.h>
10#include <litmus/debug_trace.h>
11
12/* only one page for now, but we might want to add a RO version at some point */
13
14#define CTRL_NAME "litmus/ctrl"
15
16/* allocate t->rt_param.ctrl_page*/
17static int alloc_ctrl_page(struct task_struct *t)
18{
19 int err = 0;
20
21 /* only allocate if the task doesn't have one yet */
22 if (!tsk_rt(t)->ctrl_page) {
23 tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
24 if (!tsk_rt(t)->ctrl_page)
25 err = -ENOMEM;
26 /* will get de-allocated in task teardown */
27 TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
28 tsk_rt(t)->ctrl_page);
29 }
30 return err;
31}
32
33static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
34{
35 int err;
36
37 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
38
39 TRACE_CUR(CTRL_NAME
40 ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n",
41 tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start,
42 vma->vm_page_prot);
43
44 /* Map it into the vma. */
45 err = vm_insert_page(vma, vma->vm_start, ctrl);
46
47 if (err)
48 TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
49
50 return err;
51}
52
53static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
54{
55 TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
56 vma->vm_flags, vma->vm_page_prot);
57
58 TRACE_CUR(CTRL_NAME
59 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
60 (void*) vma->vm_start, (void*) vma->vm_end, vma,
61 vma->vm_private_data);
62}
63
64static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
65 struct vm_fault* vmf)
66{
67 TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__,
68 vma->vm_flags, vmf->pgoff);
69
70 /* This function should never be called, since all pages should have
71 * been mapped by mmap() already. */
72 WARN_ONCE(1, "Page faults should be impossible in the control page\n");
73
74 return VM_FAULT_SIGBUS;
75}
76
77static struct vm_operations_struct litmus_ctrl_vm_ops = {
78 .close = litmus_ctrl_vm_close,
79 .fault = litmus_ctrl_vm_fault,
80};
81
82static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
83{
84 int err = 0;
85
86 /* first make sure mapper knows what he's doing */
87
88 /* you can only get one page */
89 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
90 return -EINVAL;
91
92 /* you can only map the "first" page */
93 if (vma->vm_pgoff != 0)
94 return -EINVAL;
95
96 /* you can't share it with anyone */
97 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
98 return -EINVAL;
99
100 vma->vm_ops = &litmus_ctrl_vm_ops;
101 /* This mapping should not be kept across forks,
102 * cannot be expanded, and is not a "normal" page. */
103 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_READ | VM_WRITE;
104
105 /* We don't want the first write access to trigger a "minor" page fault
106 * to mark the page as dirty. This is transient, private memory, we
107 * don't care if it was touched or not. PAGE_SHARED means RW access, but
108 * not execute, and avoids copy-on-write behavior.
109 * See protection_map in mmap.c. */
110 vma->vm_page_prot = PAGE_SHARED;
111
112 err = alloc_ctrl_page(current);
113 if (!err)
114 err = map_ctrl_page(current, vma);
115
116 TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
117 __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
118
119 return err;
120}
121
122/* LITMUS^RT system calls */
123
124asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param);
125asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param);
126asmlinkage long sys_reservation_create(int type, void __user *config);
127asmlinkage long sys_get_current_budget(lt_t __user * _expended, lt_t __user *_remaining);
128asmlinkage long sys_null_call(cycles_t __user *ts);
129asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config);
130asmlinkage long sys_od_close(int od);
131asmlinkage long sys_complete_job(void);
132asmlinkage long sys_litmus_lock(int lock_od);
133asmlinkage long sys_litmus_unlock(int lock_od);
134asmlinkage long sys_wait_for_job_release(unsigned int job);
135asmlinkage long sys_wait_for_ts_release(void);
136asmlinkage long sys_release_ts(lt_t __user *__when);
137
138static long litmus_ctrl_ioctl(struct file *filp,
139 unsigned int cmd, unsigned long arg)
140{
141 long err = -ENOIOCTLCMD;
142
143 /* LITMUS^RT syscall emulation: we expose LITMUS^RT-specific operations
144 * via ioctl() to avoid merge conflicts with the syscall tables when
145 * rebasing LITMUS^RT. Whi this is not the most elegant way to expose
146 * syscall-like functionality, it helps with reducing the effort
147 * required to maintain LITMUS^RT out of tree.
148 */
149
150 union litmus_syscall_args syscall_args;
151
152 switch (cmd) {
153 case LRT_set_rt_task_param:
154 case LRT_get_rt_task_param:
155 case LRT_reservation_create:
156 case LRT_get_current_budget:
157 case LRT_od_open:
158 /* multiple arguments => need to get args via pointer */
159 /* get syscall parameters */
160 if (copy_from_user(&syscall_args, (void*) arg,
161 sizeof(syscall_args))) {
162 return -EFAULT;
163 }
164
165 switch (cmd) {
166 case LRT_set_rt_task_param:
167 return sys_set_rt_task_param(
168 syscall_args.get_set_task_param.pid,
169 syscall_args.get_set_task_param.param);
170 case LRT_get_rt_task_param:
171 return sys_get_rt_task_param(
172 syscall_args.get_set_task_param.pid,
173 syscall_args.get_set_task_param.param);
174 case LRT_reservation_create:
175 return sys_reservation_create(
176 syscall_args.reservation_create.type,
177 syscall_args.reservation_create.config);
178 case LRT_get_current_budget:
179 return sys_get_current_budget(
180 syscall_args.get_current_budget.expended,
181 syscall_args.get_current_budget.remaining);
182 case LRT_od_open:
183 return sys_od_open(
184 syscall_args.od_open.fd,
185 syscall_args.od_open.obj_type,
186 syscall_args.od_open.obj_id,
187 syscall_args.od_open.config);
188 }
189
190
191 case LRT_null_call:
192 return sys_null_call((cycles_t __user *) arg);
193
194 case LRT_od_close:
195 return sys_od_close(arg);
196
197 case LRT_complete_job:
198 return sys_complete_job();
199
200 case LRT_litmus_lock:
201 return sys_litmus_lock(arg);
202
203 case LRT_litmus_unlock:
204 return sys_litmus_unlock(arg);
205
206 case LRT_wait_for_job_release:
207 return sys_wait_for_job_release(arg);
208
209 case LRT_wait_for_ts_release:
210 return sys_wait_for_ts_release();
211
212 case LRT_release_ts:
213 return sys_release_ts((lt_t __user *) arg);
214
215 default:
216 printk(KERN_DEBUG "ctrldev: strange ioctl (%u, %lu)\n", cmd, arg);
217 };
218
219 return err;
220}
221
222static struct file_operations litmus_ctrl_fops = {
223 .owner = THIS_MODULE,
224 .mmap = litmus_ctrl_mmap,
225 .unlocked_ioctl = litmus_ctrl_ioctl,
226};
227
228static struct miscdevice litmus_ctrl_dev = {
229 .name = CTRL_NAME,
230 .minor = MISC_DYNAMIC_MINOR,
231 .fops = &litmus_ctrl_fops,
232};
233
234static int __init init_litmus_ctrl_dev(void)
235{
236 int err;
237
238 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
239
240 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
241
242 BUILD_BUG_ON(offsetof(struct control_page, sched.raw)
243 != LITMUS_CP_OFFSET_SCHED);
244 BUILD_BUG_ON(offsetof(struct control_page, irq_count)
245 != LITMUS_CP_OFFSET_IRQ_COUNT);
246 BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start)
247 != LITMUS_CP_OFFSET_TS_SC_START);
248 BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start)
249 != LITMUS_CP_OFFSET_IRQ_SC_START);
250
251 printk("Initializing LITMUS^RT control device.\n");
252 err = misc_register(&litmus_ctrl_dev);
253 if (err)
254 printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
255 return err;
256}
257
258static void __exit exit_litmus_ctrl_dev(void)
259{
260 misc_deregister(&litmus_ctrl_dev);
261}
262
263module_init(init_litmus_ctrl_dev);
264module_exit(exit_litmus_ctrl_dev);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 000000000000..1cd5ec711d28
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,201 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14#include <litmus/debug_trace.h>
15
16#include <litmus/edf_common.h>
17
18#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
19#include <litmus/fpmath.h>
20#endif
21
22#ifdef CONFIG_EDF_TIE_BREAK_HASH
23#include <linux/hash.h>
24static inline long edf_hash(struct task_struct *t)
25{
26 /* pid is 32 bits, so normally we would shove that into the
27 * upper 32-bits and and put the job number in the bottom
28 * and hash the 64-bit number with hash_64(). Sadly,
29 * in testing, hash_64() doesn't distribute keys were the
30 * upper bits are close together (as would be the case with
31 * pids) and job numbers are equal (as would be the case with
32 * synchronous task sets with all relative deadlines equal).
33 *
34 * A 2006 Linux patch proposed the following solution
35 * (but for some reason it wasn't accepted...).
36 *
37 * At least this workaround works for 32-bit systems as well.
38 */
39 return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
40}
41#endif
42
43
44/* edf_higher_prio - returns true if first has a higher EDF priority
45 * than second. Deadline ties are broken by PID.
46 *
47 * both first and second may be NULL
48 */
49int edf_higher_prio(struct task_struct* first,
50 struct task_struct* second)
51{
52 struct task_struct *first_task = first;
53 struct task_struct *second_task = second;
54
55 /* There is no point in comparing a task to itself. */
56 if (first && first == second) {
57 TRACE_TASK(first,
58 "WARNING: pointless edf priority comparison.\n");
59 return 0;
60 }
61
62
63 /* check for NULL tasks */
64 if (!first || !second)
65 return first && !second;
66
67#ifdef CONFIG_LITMUS_LOCKING
68
69 /* Check for inherited priorities. Change task
70 * used for comparison in such a case.
71 */
72 if (unlikely(first->rt_param.inh_task))
73 first_task = first->rt_param.inh_task;
74 if (unlikely(second->rt_param.inh_task))
75 second_task = second->rt_param.inh_task;
76
77 /* Check for priority boosting. Tie-break by start of boosting.
78 */
79 if (unlikely(is_priority_boosted(first_task))) {
80 /* first_task is boosted, how about second_task? */
81 if (!is_priority_boosted(second_task) ||
82 lt_before(get_boost_start(first_task),
83 get_boost_start(second_task)))
84 return 1;
85 else
86 return 0;
87 } else if (unlikely(is_priority_boosted(second_task)))
88 /* second_task is boosted, first is not*/
89 return 0;
90
91#endif
92
93 if (earlier_deadline(first_task, second_task)) {
94 return 1;
95 }
96 else if (get_deadline(first_task) == get_deadline(second_task)) {
97 /* Need to tie break. All methods must set pid_break to 0/1 if
98 * first_task does not have priority over second_task.
99 */
100 int pid_break;
101
102
103#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
104 /* Tie break by lateness. Jobs with greater lateness get
105 * priority. This should spread tardiness across all tasks,
106 * especially in task sets where all tasks have the same
107 * period and relative deadlines.
108 */
109 if (get_lateness(first_task) > get_lateness(second_task)) {
110 return 1;
111 }
112 pid_break = (get_lateness(first_task) == get_lateness(second_task));
113
114
115#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
116 /* Tie break by lateness, normalized by relative deadline. Jobs with
117 * greater normalized lateness get priority.
118 *
119 * Note: Considered using the algebraically equivalent
120 * lateness(first)*relative_deadline(second) >
121 lateness(second)*relative_deadline(first)
122 * to avoid fixed-point math, but values are prone to overflow if inputs
123 * are on the order of several seconds, even in 64-bit.
124 */
125 fp_t fnorm = _frac(get_lateness(first_task),
126 get_rt_relative_deadline(first_task));
127 fp_t snorm = _frac(get_lateness(second_task),
128 get_rt_relative_deadline(second_task));
129 if (_gt(fnorm, snorm)) {
130 return 1;
131 }
132 pid_break = _eq(fnorm, snorm);
133
134
135#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
136 /* Tie break by comparing hashs of (pid, job#) tuple. There should be
137 * a 50% chance that first_task has a higher priority than second_task.
138 */
139 long fhash = edf_hash(first_task);
140 long shash = edf_hash(second_task);
141 if (fhash < shash) {
142 return 1;
143 }
144 pid_break = (fhash == shash);
145#else
146
147
148 /* CONFIG_EDF_PID_TIE_BREAK */
149 pid_break = 1; // fall through to tie-break by pid;
150#endif
151
152 /* Tie break by pid */
153 if(pid_break) {
154 if (first_task->pid < second_task->pid) {
155 return 1;
156 }
157 else if (first_task->pid == second_task->pid) {
158 /* If the PIDs are the same then the task with the
159 * inherited priority wins.
160 */
161 if (!second->rt_param.inh_task) {
162 return 1;
163 }
164 }
165 }
166 }
167 return 0; /* fall-through. prio(second_task) > prio(first_task) */
168}
169
170int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
171{
172 return edf_higher_prio(bheap2task(a), bheap2task(b));
173}
174
175void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
176 release_jobs_t release)
177{
178 rt_domain_init(rt, edf_ready_order, resched, release);
179}
180
181/* need_to_preempt - check whether the task t needs to be preempted
182 * call only with irqs disabled and with ready_lock acquired
183 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
184 */
185int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
186{
187 /* we need the read lock for edf_ready_queue */
188 /* no need to preempt if there is nothing pending */
189 if (!__jobs_pending(rt))
190 return 0;
191 /* we need to reschedule if t doesn't exist */
192 if (!t)
193 return 1;
194
195 /* NOTE: We cannot check for non-preemptibility since we
196 * don't know what address space we're currently in.
197 */
198
199 /* make sure to get non-rt stuff out of the way */
200 return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
201}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 000000000000..0ff54e41839c
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,308 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21extern struct fdso_ops generic_lock_ops;
22
23static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */
26 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* PCP_SEM */
30 &generic_lock_ops, /* DFLP_SEM */
31};
32
33static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
34{
35 BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
36
37 if (fdso_ops[type]->create)
38 return fdso_ops[type]->create(obj_ref, type, config);
39 else
40 return -EINVAL;
41}
42
43static void fdso_destroy(obj_type_t type, void* obj)
44{
45 fdso_ops[type]->destroy(type, obj);
46}
47
48static int fdso_open(struct od_table_entry* entry, void* __user config)
49{
50 if (fdso_ops[entry->obj->type]->open)
51 return fdso_ops[entry->obj->type]->open(entry, config);
52 else
53 return 0;
54}
55
56static int fdso_close(struct od_table_entry* entry)
57{
58 if (fdso_ops[entry->obj->type]->close)
59 return fdso_ops[entry->obj->type]->close(entry);
60 else
61 return 0;
62}
63
64/* inode must be locked already */
65static int alloc_inode_obj(struct inode_obj_id** obj_ref,
66 struct inode* inode,
67 obj_type_t type,
68 unsigned int id,
69 void* __user config)
70{
71 struct inode_obj_id* obj;
72 void* raw_obj;
73 int err;
74
75 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
76 if (!obj) {
77 return -ENOMEM;
78 }
79
80 err = fdso_create(&raw_obj, type, config);
81 if (err != 0) {
82 kfree(obj);
83 return err;
84 }
85
86 INIT_LIST_HEAD(&obj->list);
87 atomic_set(&obj->count, 1);
88 obj->type = type;
89 obj->id = id;
90 obj->obj = raw_obj;
91 obj->inode = inode;
92
93 list_add(&obj->list, &inode->i_obj_list);
94 atomic_inc(&inode->i_count);
95
96 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
97
98 *obj_ref = obj;
99 return 0;
100}
101
102/* inode must be locked already */
103static struct inode_obj_id* get_inode_obj(struct inode* inode,
104 obj_type_t type,
105 unsigned int id)
106{
107 struct list_head* pos;
108 struct inode_obj_id* obj = NULL;
109
110 list_for_each(pos, &inode->i_obj_list) {
111 obj = list_entry(pos, struct inode_obj_id, list);
112 if (obj->id == id && obj->type == type) {
113 atomic_inc(&obj->count);
114 return obj;
115 }
116 }
117 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
118 return NULL;
119}
120
121
122static void put_inode_obj(struct inode_obj_id* obj)
123{
124 struct inode* inode;
125 int let_go = 0;
126
127 inode = obj->inode;
128 if (atomic_dec_and_test(&obj->count)) {
129
130 mutex_lock(&inode->i_obj_mutex);
131 /* no new references can be obtained */
132 if (!atomic_read(&obj->count)) {
133 list_del(&obj->list);
134 fdso_destroy(obj->type, obj->obj);
135 kfree(obj);
136 let_go = 1;
137 }
138 mutex_unlock(&inode->i_obj_mutex);
139 if (let_go)
140 iput(inode);
141 }
142}
143
144static struct od_table_entry* get_od_entry(struct task_struct* t)
145{
146 struct od_table_entry* table;
147 int i;
148
149
150 table = t->od_table;
151 if (!table) {
152 table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
153 GFP_KERNEL);
154 t->od_table = table;
155 }
156
157 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
158 if (!table[i].used) {
159 table[i].used = 1;
160 return table + i;
161 }
162 return NULL;
163}
164
165static int put_od_entry(struct od_table_entry* od)
166{
167 put_inode_obj(od->obj);
168 od->used = 0;
169 return 0;
170}
171
172static long close_od_entry(struct od_table_entry *od)
173{
174 long ret;
175
176 /* Give the class a chance to reject the close. */
177 ret = fdso_close(od);
178 if (ret == 0)
179 ret = put_od_entry(od);
180
181 return ret;
182}
183
184void exit_od_table(struct task_struct* t)
185{
186 int i;
187
188 if (t->od_table) {
189 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
190 if (t->od_table[i].used)
191 close_od_entry(t->od_table + i);
192 kfree(t->od_table);
193 t->od_table = NULL;
194 }
195}
196
197static int do_sys_od_open(struct file* file, obj_type_t type, int id,
198 void* __user config)
199{
200 int idx = 0, err = 0;
201 struct inode* inode;
202 struct inode_obj_id* obj = NULL;
203 struct od_table_entry* entry;
204
205 inode = file_inode(file);
206
207 entry = get_od_entry(current);
208 if (!entry)
209 return -ENOMEM;
210
211 mutex_lock(&inode->i_obj_mutex);
212 obj = get_inode_obj(inode, type, id);
213 if (!obj)
214 err = alloc_inode_obj(&obj, inode, type, id, config);
215 if (err != 0) {
216 obj = NULL;
217 idx = err;
218 entry->used = 0;
219 } else {
220 entry->obj = obj;
221 entry->class = fdso_ops[type];
222 idx = entry - current->od_table;
223 }
224
225 mutex_unlock(&inode->i_obj_mutex);
226
227 /* open only if creation succeeded */
228 if (!err)
229 err = fdso_open(entry, config);
230 if (err < 0) {
231 /* The class rejected the open call.
232 * We need to clean up and tell user space.
233 */
234 if (obj)
235 put_od_entry(entry);
236 idx = err;
237 }
238
239 return idx;
240}
241
242
243struct od_table_entry* get_entry_for_od(int od)
244{
245 struct task_struct *t = current;
246
247 if (!t->od_table)
248 return NULL;
249 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
250 return NULL;
251 if (!t->od_table[od].used)
252 return NULL;
253 return t->od_table + od;
254}
255
256
257asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
258{
259 int ret = 0;
260 struct file* file;
261
262 /*
263 1) get file from fd, get inode from file
264 2) lock inode
265 3) try to lookup object
266 4) if not present create and enqueue object, inc inode refcnt
267 5) increment refcnt of object
268 6) alloc od_table_entry, setup ptrs
269 7) unlock inode
270 8) return offset in od_table as OD
271 */
272
273 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
274 ret = -EINVAL;
275 goto out;
276 }
277
278 file = fget(fd);
279 if (!file) {
280 ret = -EBADF;
281 goto out;
282 }
283
284 ret = do_sys_od_open(file, type, obj_id, config);
285
286 fput(file);
287
288out:
289 return ret;
290}
291
292
293asmlinkage long sys_od_close(int od)
294{
295 int ret = -EINVAL;
296 struct task_struct *t = current;
297
298 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
299 return ret;
300
301 if (!t->od_table || !t->od_table[od].used)
302 return ret;
303
304
305 ret = close_od_entry(t->od_table + od);
306
307 return ret;
308}
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 000000000000..595c7b8e561d
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,137 @@
1/*
2 * litmus/fp_common.c
3 *
4 * Common functions for fixed-priority scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14#include <litmus/debug_trace.h>
15
16#include <litmus/fp_common.h>
17
18/* fp_higher_prio - returns true if first has a higher static priority
19 * than second. Ties are broken by PID.
20 *
21 * both first and second may be NULL
22 */
23int fp_higher_prio(struct task_struct* first,
24 struct task_struct* second)
25{
26 struct task_struct *first_task = first;
27 struct task_struct *second_task = second;
28
29 /* There is no point in comparing a task to itself. */
30 if (unlikely(first && first == second)) {
31 TRACE_TASK(first,
32 "WARNING: pointless FP priority comparison.\n");
33 return 0;
34 }
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40 if (!is_realtime(second_task))
41 return 1;
42
43#ifdef CONFIG_LITMUS_LOCKING
44
45 /* Check for inherited priorities. Change task
46 * used for comparison in such a case.
47 */
48 if (unlikely(first->rt_param.inh_task))
49 first_task = first->rt_param.inh_task;
50 if (unlikely(second->rt_param.inh_task))
51 second_task = second->rt_param.inh_task;
52
53 /* Comparisons to itself are only possible with
54 * priority inheritance when svc_preempt interrupt just
55 * before scheduling (and everything that could follow in the
56 * ready queue). Always favour the original job, as that one will just
57 * suspend itself to resolve this.
58 */
59 if(first_task == second_task)
60 return first_task == first;
61
62 /* Check for priority boosting. Tie-break by start of boosting.
63 */
64 if (unlikely(is_priority_boosted(first_task))) {
65 /* first_task is boosted, how about second_task? */
66 if (is_priority_boosted(second_task))
67 /* break by priority point */
68 return lt_before(get_boost_start(first_task),
69 get_boost_start(second_task));
70 else
71 /* priority boosting wins. */
72 return 1;
73 } else if (unlikely(is_priority_boosted(second_task)))
74 /* second_task is boosted, first is not*/
75 return 0;
76
77#else
78 /* No locks, no priority inheritance, no comparisons to itself */
79 BUG_ON(first_task == second_task);
80#endif
81
82 if (get_priority(first_task) < get_priority(second_task))
83 return 1;
84 else if (get_priority(first_task) == get_priority(second_task))
85 /* Break by PID. */
86 return first_task->pid < second_task->pid;
87 else
88 return 0;
89}
90
91int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
92{
93 return fp_higher_prio(bheap2task(a), bheap2task(b));
94}
95
96void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
97 release_jobs_t release)
98{
99 rt_domain_init(rt, fp_ready_order, resched, release);
100}
101
102/* need_to_preempt - check whether the task t needs to be preempted
103 */
104int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
105{
106 struct task_struct *pending;
107
108 pending = fp_prio_peek(q);
109
110 if (!pending)
111 return 0;
112 if (!t)
113 return 1;
114
115 /* make sure to get non-rt stuff out of the way */
116 return !is_realtime(t) || fp_higher_prio(pending, t);
117}
118
119void fp_prio_queue_init(struct fp_prio_queue* q)
120{
121 int i;
122
123 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
124 q->bitmask[i] = 0;
125 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
126 bheap_init(&q->queue[i]);
127}
128
129void fp_ready_list_init(struct fp_ready_list* q)
130{
131 int i;
132
133 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
134 q->bitmask[i] = 0;
135 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
136 INIT_LIST_HEAD(q->queue + i);
137}
diff --git a/litmus/ft_event.c b/litmus/ft_event.c
new file mode 100644
index 000000000000..dbf61f6c389a
--- /dev/null
+++ b/litmus/ft_event.c
@@ -0,0 +1,43 @@
1#include <linux/types.h>
2
3#include <litmus/feather_trace.h>
4
5#if !defined(CONFIG_ARCH_HAS_FEATHER_TRACE) || defined(CONFIG_RELOCATABLE)
6/* provide dummy implementation */
7
8int ft_events[MAX_EVENTS];
9
10int ft_enable_event(unsigned long id)
11{
12 if (id < MAX_EVENTS) {
13 ft_events[id]++;
14 return 1;
15 } else
16 return 0;
17}
18
19int ft_disable_event(unsigned long id)
20{
21 if (id < MAX_EVENTS && ft_events[id]) {
22 ft_events[id]--;
23 return 1;
24 } else
25 return 0;
26}
27
28int ft_disable_all_events(void)
29{
30 int i;
31
32 for (i = 0; i < MAX_EVENTS; i++)
33 ft_events[i] = 0;
34
35 return MAX_EVENTS;
36}
37
38int ft_is_event_enabled(unsigned long id)
39{
40 return id < MAX_EVENTS && ft_events[id];
41}
42
43#endif
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
new file mode 100644
index 000000000000..646e8c9fe230
--- /dev/null
+++ b/litmus/ftdev.c
@@ -0,0 +1,457 @@
1#include <linux/sched.h>
2#include <linux/fs.h>
3#include <linux/slab.h>
4#include <linux/cdev.h>
5#include <asm/uaccess.h>
6#include <linux/module.h>
7#include <linux/device.h>
8#include <linux/vmalloc.h>
9#include <linux/mutex.h>
10
11#include <litmus/feather_trace.h>
12#include <litmus/ftdev.h>
13
14struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
15{
16 struct ft_buffer* buf;
17 size_t total = (size + 1) * count;
18 char* mem;
19
20 buf = kmalloc(sizeof(*buf), GFP_KERNEL);
21 if (!buf)
22 return NULL;
23
24
25 mem = vmalloc(total);
26
27 if (!mem) {
28 kfree(buf);
29 return NULL;
30 }
31
32 if (!init_ft_buffer(buf, count, size,
33 mem + (count * size), /* markers at the end */
34 mem)) { /* buffer objects */
35 vfree(mem);
36 kfree(buf);
37 return NULL;
38 }
39 return buf;
40}
41
42void free_ft_buffer(struct ft_buffer* buf)
43{
44 if (buf) {
45 vfree(buf->buffer_mem);
46 kfree(buf);
47 }
48}
49
50struct ftdev_event {
51 int id;
52 struct ftdev_event* next;
53};
54
55static DEFINE_MUTEX(ft_event_activation_mutex);
56
57static int activate(struct ftdev_event** chain, int id)
58{
59 struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL);
60 if (ev) {
61 mutex_lock(&ft_event_activation_mutex);
62 printk(KERN_INFO
63 "Enabling feather-trace event %d.\n", (int) id);
64 ft_enable_event(id);
65 mutex_unlock(&ft_event_activation_mutex);
66 ev->id = id;
67 ev->next = *chain;
68 *chain = ev;
69 }
70 return ev ? 0 : -ENOMEM;
71}
72
73static void deactivate(struct ftdev_event** chain, int id)
74{
75 struct ftdev_event **cur = chain;
76 struct ftdev_event *nxt;
77 while (*cur) {
78 if ((*cur)->id == id) {
79 nxt = (*cur)->next;
80 kfree(*cur);
81 *cur = nxt;
82 printk(KERN_INFO
83 "Disabling feather-trace event %d.\n", (int) id);
84 mutex_lock(&ft_event_activation_mutex);
85 ft_disable_event(id);
86 mutex_unlock(&ft_event_activation_mutex);
87 break;
88 }
89 cur = &(*cur)->next;
90 }
91}
92
93static int ftdev_open(struct inode *in, struct file *filp)
94{
95 struct ftdev* ftdev;
96 struct ftdev_minor* ftdm;
97 unsigned int buf_idx = iminor(in);
98 int err = 0;
99
100 ftdev = container_of(in->i_cdev, struct ftdev, cdev);
101
102 if (buf_idx >= ftdev->minor_cnt) {
103 err = -ENODEV;
104 goto out;
105 }
106 if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx)))
107 goto out;
108
109 ftdm = ftdev->minor + buf_idx;
110 ftdm->ftdev = ftdev;
111 filp->private_data = ftdm;
112
113 if (mutex_lock_interruptible(&ftdm->lock)) {
114 err = -ERESTARTSYS;
115 goto out;
116 }
117
118 if (!ftdm->readers && ftdev->alloc)
119 err = ftdev->alloc(ftdev, buf_idx);
120 if (0 == err)
121 ftdm->readers++;
122
123 mutex_unlock(&ftdm->lock);
124out:
125 return err;
126}
127
128static int ftdev_release(struct inode *in, struct file *filp)
129{
130 struct ftdev* ftdev;
131 struct ftdev_minor* ftdm;
132 unsigned int buf_idx = iminor(in);
133 int err = 0;
134
135 ftdev = container_of(in->i_cdev, struct ftdev, cdev);
136
137 if (buf_idx >= ftdev->minor_cnt) {
138 err = -ENODEV;
139 goto out;
140 }
141 ftdm = ftdev->minor + buf_idx;
142
143 if (mutex_lock_interruptible(&ftdm->lock)) {
144 err = -ERESTARTSYS;
145 goto out;
146 }
147
148 if (ftdm->readers == 1) {
149 while (ftdm->events)
150 deactivate(&ftdm->events, ftdm->events->id);
151
152 /* wait for any pending events to complete */
153 set_current_state(TASK_UNINTERRUPTIBLE);
154 schedule_timeout(HZ);
155
156 printk(KERN_ALERT "Failed trace writes: %u\n",
157 atomic_read(&ftdm->buf->failed_writes));
158
159 if (ftdev->free)
160 ftdev->free(ftdev, buf_idx);
161 }
162
163 ftdm->readers--;
164 mutex_unlock(&ftdm->lock);
165out:
166 return err;
167}
168
169/* based on ft_buffer_read
170 * @returns < 0 : page fault
171 * = 0 : no data available
172 * = 1 : one slot copied
173 */
174static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest)
175{
176 unsigned int idx;
177 int err = 0;
178 if (atomic_read(&buf->free_count) != buf->slot_count) {
179 /* data available */
180 idx = buf->read_idx % buf->slot_count;
181 if (buf->slots[idx] == SLOT_READY) {
182 err = copy_to_user(dest, ((char*) buf->buffer_mem) +
183 idx * buf->slot_size,
184 buf->slot_size);
185 if (err == 0) {
186 /* copy ok */
187 buf->slots[idx] = SLOT_FREE;
188 buf->read_idx++;
189 atomic_fetch_inc(&buf->free_count);
190 err = 1;
191 }
192 }
193 }
194 return err;
195}
196
197static ssize_t ftdev_read(struct file *filp,
198 char __user *to, size_t len, loff_t *f_pos)
199{
200 /* we ignore f_pos, this is strictly sequential */
201
202 ssize_t err = 0;
203 size_t chunk;
204 int copied;
205 struct ftdev_minor* ftdm = filp->private_data;
206
207 if (mutex_lock_interruptible(&ftdm->lock)) {
208 err = -ERESTARTSYS;
209 goto out;
210 }
211
212
213 chunk = ftdm->buf->slot_size;
214 while (len >= chunk) {
215 copied = ft_buffer_copy_to_user(ftdm->buf, to);
216 if (copied == 1) {
217 len -= chunk;
218 to += chunk;
219 err += chunk;
220 } else if (err == 0 && copied == 0 && ftdm->events) {
221 /* Only wait if there are any events enabled and only
222 * if we haven't copied some data yet. We cannot wait
223 * here with copied data because that data would get
224 * lost if the task is interrupted (e.g., killed).
225 */
226
227 /* Before sleeping, check wether a non-blocking
228 * read was requested.
229 */
230 if (filp->f_flags & O_NONBLOCK)
231 {
232 /* bug out, userspace doesn't want us to sleep */
233 err = -EWOULDBLOCK;
234 break;
235 }
236
237 mutex_unlock(&ftdm->lock);
238 set_current_state(TASK_INTERRUPTIBLE);
239
240 schedule_timeout(50);
241
242 if (signal_pending(current)) {
243 if (err == 0)
244 /* nothing read yet, signal problem */
245 err = -ERESTARTSYS;
246 goto out;
247 }
248 if (mutex_lock_interruptible(&ftdm->lock)) {
249 err = -ERESTARTSYS;
250 goto out;
251 }
252 } else if (copied < 0) {
253 /* page fault */
254 err = copied;
255 break;
256 } else
257 /* nothing left to get, return to user space */
258 break;
259 }
260 mutex_unlock(&ftdm->lock);
261out:
262 return err;
263}
264
265static long ftdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
266{
267 long err = -ENOIOCTLCMD;
268 struct ftdev_minor* ftdm = filp->private_data;
269
270 if (mutex_lock_interruptible(&ftdm->lock)) {
271 err = -ERESTARTSYS;
272 goto out;
273 }
274
275 /* FIXME: check id against list of acceptable events */
276
277 switch (cmd) {
278 case FTDEV_ENABLE_CMD:
279 if (activate(&ftdm->events, arg))
280 err = -ENOMEM;
281 else
282 err = 0;
283 break;
284
285 case FTDEV_DISABLE_CMD:
286 deactivate(&ftdm->events, arg);
287 err = 0;
288 break;
289
290 case FTDEV_CALIBRATE:
291 if (ftdm->ftdev->calibrate) {
292 err = ftdm->ftdev->calibrate(ftdm->ftdev, iminor(file_inode(filp)), arg);
293 }
294 break;
295
296 default:
297 printk(KERN_DEBUG "ftdev: strange ioctl (%u, %lu)\n", cmd, arg);
298 };
299
300 mutex_unlock(&ftdm->lock);
301out:
302 return err;
303}
304
305static ssize_t ftdev_write(struct file *filp, const char __user *from,
306 size_t len, loff_t *f_pos)
307{
308 struct ftdev_minor* ftdm = filp->private_data;
309 ssize_t err = -EINVAL;
310 struct ftdev* ftdev = ftdm->ftdev;
311
312 /* dispatch write to buffer-specific code, if available */
313 if (ftdev->write)
314 err = ftdev->write(ftdm->buf, len, from);
315
316 return err;
317}
318
319struct file_operations ftdev_fops = {
320 .owner = THIS_MODULE,
321 .open = ftdev_open,
322 .release = ftdev_release,
323 .write = ftdev_write,
324 .read = ftdev_read,
325 .unlocked_ioctl = ftdev_ioctl,
326};
327
328int ftdev_init( struct ftdev* ftdev, struct module* owner,
329 const int minor_cnt, const char* name)
330{
331 int i, err;
332
333 BUG_ON(minor_cnt < 1);
334
335 cdev_init(&ftdev->cdev, &ftdev_fops);
336 ftdev->name = name;
337 ftdev->minor_cnt = minor_cnt;
338 ftdev->cdev.owner = owner;
339 ftdev->cdev.ops = &ftdev_fops;
340 ftdev->alloc = NULL;
341 ftdev->free = NULL;
342 ftdev->can_open = NULL;
343 ftdev->write = NULL;
344 ftdev->calibrate = NULL;
345
346 ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor),
347 GFP_KERNEL);
348 if (!ftdev->minor) {
349 printk(KERN_WARNING "ftdev(%s): Could not allocate memory\n",
350 ftdev->name);
351 err = -ENOMEM;
352 goto err_out;
353 }
354
355 for (i = 0; i < ftdev->minor_cnt; i++) {
356 mutex_init(&ftdev->minor[i].lock);
357 ftdev->minor[i].readers = 0;
358 ftdev->minor[i].buf = NULL;
359 ftdev->minor[i].events = NULL;
360 }
361
362 ftdev->class = class_create(owner, ftdev->name);
363 if (IS_ERR(ftdev->class)) {
364 err = PTR_ERR(ftdev->class);
365 printk(KERN_WARNING "ftdev(%s): "
366 "Could not create device class.\n", ftdev->name);
367 goto err_dealloc;
368 }
369
370 return 0;
371
372err_dealloc:
373 kfree(ftdev->minor);
374err_out:
375 return err;
376}
377
378/*
379 * Destroy minor devices up to, but not including, up_to.
380 */
381static void ftdev_device_destroy(struct ftdev* ftdev, unsigned int up_to)
382{
383 dev_t minor_cntr;
384
385 if (up_to < 1)
386 up_to = (ftdev->minor_cnt < 1) ? 0 : ftdev->minor_cnt;
387
388 for (minor_cntr = 0; minor_cntr < up_to; ++minor_cntr)
389 device_destroy(ftdev->class, MKDEV(ftdev->major, minor_cntr));
390}
391
392void ftdev_exit(struct ftdev* ftdev)
393{
394 printk("ftdev(%s): Exiting\n", ftdev->name);
395 ftdev_device_destroy(ftdev, -1);
396 cdev_del(&ftdev->cdev);
397 unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt);
398 class_destroy(ftdev->class);
399 kfree(ftdev->minor);
400}
401
402int register_ftdev(struct ftdev* ftdev)
403{
404 struct device **device;
405 dev_t trace_dev_tmp, minor_cntr;
406 int err;
407
408 err = alloc_chrdev_region(&trace_dev_tmp, 0, ftdev->minor_cnt,
409 ftdev->name);
410 if (err) {
411 printk(KERN_WARNING "ftdev(%s): "
412 "Could not allocate char. device region (%d minors)\n",
413 ftdev->name, ftdev->minor_cnt);
414 goto err_out;
415 }
416
417 ftdev->major = MAJOR(trace_dev_tmp);
418
419 err = cdev_add(&ftdev->cdev, trace_dev_tmp, ftdev->minor_cnt);
420 if (err) {
421 printk(KERN_WARNING "ftdev(%s): "
422 "Could not add cdev for major %u with %u minor(s).\n",
423 ftdev->name, ftdev->major, ftdev->minor_cnt);
424 goto err_unregister;
425 }
426
427 /* create the minor device(s) */
428 for (minor_cntr = 0; minor_cntr < ftdev->minor_cnt; ++minor_cntr)
429 {
430 trace_dev_tmp = MKDEV(ftdev->major, minor_cntr);
431 device = &ftdev->minor[minor_cntr].device;
432
433 *device = device_create(ftdev->class, NULL, trace_dev_tmp, NULL,
434 "litmus/%s%d", ftdev->name, minor_cntr);
435 if (IS_ERR(*device)) {
436 err = PTR_ERR(*device);
437 printk(KERN_WARNING "ftdev(%s): "
438 "Could not create device major/minor number "
439 "%u/%u\n", ftdev->name, ftdev->major,
440 minor_cntr);
441 printk(KERN_WARNING "ftdev(%s): "
442 "will attempt deletion of allocated devices.\n",
443 ftdev->name);
444 goto err_minors;
445 }
446 }
447
448 return 0;
449
450err_minors:
451 ftdev_device_destroy(ftdev, minor_cntr);
452 cdev_del(&ftdev->cdev);
453err_unregister:
454 unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt);
455err_out:
456 return err;
457}
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 000000000000..43f1f94e0b6e
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,164 @@
1/* litmus/jobs.c - common job control code
2 */
3
4#include <linux/sched.h>
5
6#include <litmus/debug_trace.h>
7#include <litmus/preempt.h>
8#include <litmus/litmus.h>
9#include <litmus/sched_plugin.h>
10#include <litmus/sched_trace.h>
11#include <litmus/jobs.h>
12
13static inline void setup_release(struct task_struct *t, lt_t release)
14{
15 /* prepare next release */
16 t->rt_param.job_params.release = release;
17 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
18 t->rt_param.job_params.exec_time = 0;
19
20 /* update job sequence number */
21 t->rt_param.job_params.job_no++;
22
23 /* expose to user space */
24 if (has_control_page(t)) {
25 struct control_page* cp = get_control_page(t);
26 cp->deadline = t->rt_param.job_params.deadline;
27 cp->release = get_release(t);
28 cp->job_index = t->rt_param.job_params.job_no;
29 }
30}
31
32void prepare_for_next_period(struct task_struct *t)
33{
34 BUG_ON(!t);
35
36 /* Record lateness before we set up the next job's
37 * release and deadline. Lateness may be negative.
38 */
39 t->rt_param.job_params.lateness =
40 (long long)litmus_clock() -
41 (long long)t->rt_param.job_params.deadline;
42
43 if (tsk_rt(t)->sporadic_release) {
44 TRACE_TASK(t, "sporadic release at %llu\n",
45 tsk_rt(t)->sporadic_release_time);
46 /* sporadic release */
47 setup_release(t, tsk_rt(t)->sporadic_release_time);
48 tsk_rt(t)->sporadic_release = 0;
49 } else {
50 /* periodic release => add period */
51 setup_release(t, get_release(t) + get_rt_period(t));
52 }
53}
54
55void release_at(struct task_struct *t, lt_t start)
56{
57 BUG_ON(!t);
58 setup_release(t, start);
59 tsk_rt(t)->completed = 0;
60}
61
62void inferred_sporadic_job_release_at(struct task_struct *t, lt_t when)
63{
64 /* new sporadic release */
65 sched_trace_last_suspension_as_completion(t);
66 /* check if this task is resuming from a clock_nanosleep() call */
67 if (tsk_rt(t)->doing_abs_nanosleep &&
68 lt_after_eq(tsk_rt(t)->nanosleep_wakeup,
69 get_release(t) + get_rt_period(t))) {
70 /* clock_nanosleep() is supposed to wake up the task
71 * at a time that is a valid release time. Use that time
72 * rather than guessing the intended release time from the
73 * current time. */
74 TRACE_TASK(t, "nanosleep: backdating release "
75 "to %llu instead of %llu\n",
76 tsk_rt(t)->nanosleep_wakeup, when);
77 when = tsk_rt(t)->nanosleep_wakeup;
78 }
79 release_at(t, when);
80 sched_trace_task_release(t);
81}
82
83long default_wait_for_release_at(lt_t release_time)
84{
85 struct task_struct *t = current;
86 unsigned long flags;
87
88 local_irq_save(flags);
89 tsk_rt(t)->sporadic_release_time = release_time;
90 smp_wmb();
91 tsk_rt(t)->sporadic_release = 1;
92 local_irq_restore(flags);
93
94 return litmus->complete_job();
95}
96
97
98/*
99 * Deactivate current task until the beginning of the next period.
100 */
101long complete_job(void)
102{
103 preempt_disable();
104 TRACE_CUR("job completion indicated at %llu\n", litmus_clock());
105 /* Mark that we do not excute anymore */
106 tsk_rt(current)->completed = 1;
107 /* call schedule, this will return when a new job arrives
108 * it also takes care of preparing for the next release
109 */
110 litmus_reschedule_local();
111 preempt_enable();
112 return 0;
113}
114
115static long sleep_until_next_release(void);
116
117/* alternative job completion implementation that suspends the task */
118long complete_job_oneshot(void)
119{
120 struct task_struct *t = current;
121
122 preempt_disable();
123
124 TRACE_CUR("job completes at %llu (deadline: %llu)\n", litmus_clock(),
125 get_deadline(t));
126
127 sched_trace_task_completion(t, 0);
128 prepare_for_next_period(t);
129 sched_trace_task_release(t);
130
131 return sleep_until_next_release();
132}
133
134/* assumes caller has disabled preemptions;
135 * re-enables preemptions before returning */
136static long sleep_until_next_release(void)
137{
138 struct task_struct *t = current;
139 ktime_t next_release;
140 long err;
141
142 next_release = ns_to_ktime(get_release(t));
143
144 TRACE_CUR("next_release=%llu\n", get_release(t));
145
146 if (lt_after(get_release(t), litmus_clock())) {
147 set_current_state(TASK_INTERRUPTIBLE);
148 tsk_rt(t)->completed = 1;
149 preempt_enable_no_resched();
150 err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
151 /* If we get woken by a signal, we return early.
152 * This is intentional; we want to be able to kill tasks
153 * that are waiting for the next job release.
154 */
155 tsk_rt(t)->completed = 0;
156 } else {
157 err = 0;
158 TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(t), litmus_clock());
159 preempt_enable();
160 }
161
162 TRACE_CUR("return to next job at %llu\n", litmus_clock());
163 return err;
164}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..bd192180fef7
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,773 @@
1/*
2 * litmus.c -- Implementation of the LITMUS syscalls,
3 * the LITMUS intialization code,
4 * and the procfs interface..
5 */
6#include <asm/uaccess.h>
7#include <linux/uaccess.h>
8#include <linux/sysrq.h>
9#include <linux/sched.h>
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/reboot.h>
13#include <linux/stop_machine.h>
14#include <linux/sched/rt.h>
15#include <linux/rwsem.h>
16#include <linux/interrupt.h>
17
18#include <litmus/debug_trace.h>
19#include <litmus/litmus.h>
20#include <litmus/bheap.h>
21#include <litmus/trace.h>
22#include <litmus/rt_domain.h>
23#include <litmus/litmus_proc.h>
24#include <litmus/sched_trace.h>
25
26#ifdef CONFIG_SCHED_CPU_AFFINITY
27#include <litmus/affinity.h>
28#endif
29
30#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
31#define CREATE_TRACE_POINTS
32#include <trace/events/litmus.h>
33#endif
34
35/* Number of RT tasks that exist in the system */
36atomic_t rt_task_count = ATOMIC_INIT(0);
37
38#ifdef CONFIG_RELEASE_MASTER
39/* current master CPU for handling timer IRQs */
40atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
41#endif
42
43static struct kmem_cache * bheap_node_cache;
44extern struct kmem_cache * release_heap_cache;
45
46struct bheap_node* bheap_node_alloc(int gfp_flags)
47{
48 return kmem_cache_alloc(bheap_node_cache, gfp_flags);
49}
50
51void bheap_node_free(struct bheap_node* hn)
52{
53 kmem_cache_free(bheap_node_cache, hn);
54}
55
56struct release_heap* release_heap_alloc(int gfp_flags);
57void release_heap_free(struct release_heap* rh);
58
59/**
60 * Get the quantum alignment as a cmdline option.
61 * Default is aligned quanta..
62 */
63static bool aligned_quanta = 1;
64module_param(aligned_quanta, bool, 0644);
65
66u64 cpu_stagger_offset(int cpu)
67{
68 u64 offset = 0;
69
70 if (!aligned_quanta) {
71 offset = LITMUS_QUANTUM_LENGTH_NS;
72 do_div(offset, num_possible_cpus());
73 offset *= cpu;
74 }
75 return offset;
76}
77
78/*
79 * sys_set_task_rt_param
80 * @pid: Pid of the task which scheduling parameters must be changed
81 * @param: New real-time extension parameters such as the execution cost and
82 * period
83 * Syscall for manipulating with task rt extension params
84 * Returns EFAULT if param is NULL.
85 * ESRCH if pid is not corrsponding
86 * to a valid task.
87 * EINVAL if either period or execution cost is <=0
88 * EPERM if pid is a real-time task
89 * 0 if success
90 *
91 * Only non-real-time tasks may be configured with this system call
92 * to avoid races with the scheduler. In practice, this means that a
93 * task's parameters must be set _before_ calling sys_prepare_rt_task()
94 *
95 * find_task_by_vpid() assumes that we are in the same namespace of the
96 * target.
97 */
98asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
99{
100 struct rt_task tp;
101 struct task_struct *target;
102 int retval = -EINVAL;
103
104 printk("Setting up rt task parameters for process %d.\n", pid);
105
106 if (pid < 0 || param == 0) {
107 goto out;
108 }
109 if (copy_from_user(&tp, param, sizeof(tp))) {
110 retval = -EFAULT;
111 goto out;
112 }
113
114 /* Task search and manipulation must be protected */
115 read_lock_irq(&tasklist_lock);
116 rcu_read_lock();
117 if (!(target = find_task_by_vpid(pid))) {
118 retval = -ESRCH;
119 rcu_read_unlock();
120 goto out_unlock;
121 }
122 rcu_read_unlock();
123
124 /* set relative deadline to be implicit if left unspecified */
125 if (tp.relative_deadline == 0)
126 tp.relative_deadline = tp.period;
127
128 if (tp.exec_cost <= 0)
129 goto out_unlock;
130 if (tp.period <= 0)
131 goto out_unlock;
132 if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
133 {
134 printk(KERN_INFO "litmus: real-time task %d rejected "
135 "because task density > 1.0\n", pid);
136 goto out_unlock;
137 }
138 if (tp.cls != RT_CLASS_HARD &&
139 tp.cls != RT_CLASS_SOFT &&
140 tp.cls != RT_CLASS_BEST_EFFORT)
141 {
142 printk(KERN_INFO "litmus: real-time task %d rejected "
143 "because its class is invalid\n", pid);
144 goto out_unlock;
145 }
146 if (tp.budget_policy != NO_ENFORCEMENT &&
147 tp.budget_policy != QUANTUM_ENFORCEMENT &&
148 tp.budget_policy != PRECISE_ENFORCEMENT)
149 {
150 printk(KERN_INFO "litmus: real-time task %d rejected "
151 "because unsupported budget enforcement policy "
152 "specified (%d)\n",
153 pid, tp.budget_policy);
154 goto out_unlock;
155 }
156
157 if (is_realtime(target)) {
158 /* The task is already a real-time task.
159 * Let plugin decide whether it wants to support
160 * parameter changes at runtime.
161 */
162 retval = litmus->task_change_params(target, &tp);
163 } else {
164 target->rt_param.task_params = tp;
165 retval = 0;
166 }
167 out_unlock:
168 read_unlock_irq(&tasklist_lock);
169 out:
170 return retval;
171}
172
173/*
174 * Getter of task's RT params
175 * returns EINVAL if param or pid is NULL
176 * returns ESRCH if pid does not correspond to a valid task
177 * returns EFAULT if copying of parameters has failed.
178 *
179 * find_task_by_vpid() assumes that we are in the same namespace of the
180 * target.
181 */
182asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
183{
184 int retval = -EINVAL;
185 struct task_struct *source;
186 struct rt_task lp;
187
188 if (param == 0 || pid < 0)
189 goto out;
190
191 read_lock_irq(&tasklist_lock);
192 rcu_read_lock();
193 source = find_task_by_vpid(pid);
194 rcu_read_unlock();
195 if (!source) {
196 retval = -ESRCH;
197 read_unlock_irq(&tasklist_lock);
198 goto out;
199 }
200 lp = source->rt_param.task_params;
201 read_unlock_irq(&tasklist_lock);
202 /* Do copying outside the lock */
203 retval =
204 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
205 out:
206 return retval;
207
208}
209
210/*
211 * This is the crucial function for periodic task implementation,
212 * It checks if a task is periodic, checks if such kind of sleep
213 * is permitted and calls plugin-specific sleep, which puts the
214 * task into a wait array.
215 * returns 0 on successful wakeup
216 * returns EPERM if current conditions do not permit such sleep
217 * returns EINVAL if current task is not able to go to sleep
218 */
219asmlinkage long sys_complete_job(void)
220{
221 int retval = -EPERM;
222 if (!is_realtime(current)) {
223 retval = -EINVAL;
224 goto out;
225 }
226 /* Task with negative or zero period cannot sleep */
227 if (get_rt_period(current) <= 0) {
228 retval = -EINVAL;
229 goto out;
230 }
231 /* The plugin has to put the task into an
232 * appropriate queue and call schedule
233 */
234 retval = litmus->complete_job();
235 out:
236 return retval;
237}
238
239/* This is an "improved" version of sys_complete_job that
240 * addresses the problem of unintentionally missing a job after
241 * an overrun.
242 *
243 * returns 0 on successful wakeup
244 * returns EPERM if current conditions do not permit such sleep
245 * returns EINVAL if current task is not able to go to sleep
246 */
247asmlinkage long sys_wait_for_job_release(unsigned int job)
248{
249 int retval = -EPERM;
250 if (!is_realtime(current)) {
251 retval = -EINVAL;
252 goto out;
253 }
254
255 /* Task with negative or zero period cannot sleep */
256 if (get_rt_period(current) <= 0) {
257 retval = -EINVAL;
258 goto out;
259 }
260
261 retval = 0;
262
263 /* first wait until we have "reached" the desired job
264 *
265 * This implementation has at least two problems:
266 *
267 * 1) It doesn't gracefully handle the wrap around of
268 * job_no. Since LITMUS is a prototype, this is not much
269 * of a problem right now.
270 *
271 * 2) It is theoretically racy if a job release occurs
272 * between checking job_no and calling sleep_next_period().
273 * A proper solution would requiring adding another callback
274 * in the plugin structure and testing the condition with
275 * interrupts disabled.
276 *
277 * FIXME: At least problem 2 should be taken care of eventually.
278 */
279 while (!retval && job > current->rt_param.job_params.job_no)
280 /* If the last job overran then job <= job_no and we
281 * don't send the task to sleep.
282 */
283 retval = litmus->complete_job();
284 out:
285 return retval;
286}
287
288/* This is a helper syscall to query the current job sequence number.
289 *
290 * returns 0 on successful query
291 * returns EPERM if task is not a real-time task.
292 * returns EFAULT if &job is not a valid pointer.
293 */
294asmlinkage long sys_query_job_no(unsigned int __user *job)
295{
296 int retval = -EPERM;
297 if (is_realtime(current))
298 retval = put_user(current->rt_param.job_params.job_no, job);
299
300 return retval;
301}
302
303/* sys_null_call() is only used for determining raw system call
304 * overheads (kernel entry, kernel exit). It has no useful side effects.
305 * If ts is non-NULL, then the current Feather-Trace time is recorded.
306 */
307asmlinkage long sys_null_call(cycles_t __user *ts)
308{
309 long ret = 0;
310 cycles_t now;
311
312 if (ts) {
313 now = get_cycles();
314 ret = put_user(now, ts);
315 }
316
317 return ret;
318}
319
320asmlinkage long sys_reservation_create(int type, void __user *config)
321{
322 return litmus->reservation_create(type, config);
323}
324
325asmlinkage long sys_reservation_destroy(unsigned int reservation_id, int cpu)
326{
327 return litmus->reservation_destroy(reservation_id, cpu);
328}
329
330/* p is a real-time task. Re-init its state as a best-effort task. */
331static void reinit_litmus_state(struct task_struct* p, int restore)
332{
333 struct rt_task user_config = {};
334 void* ctrl_page = NULL;
335
336 if (restore) {
337 /* Safe user-space provided configuration data.
338 * and allocated page. */
339 user_config = p->rt_param.task_params;
340 ctrl_page = p->rt_param.ctrl_page;
341 }
342
343 /* We probably should not be inheriting any task's priority
344 * at this point in time.
345 */
346 WARN_ON(p->rt_param.inh_task);
347
348 /* Cleanup everything else. */
349 memset(&p->rt_param, 0, sizeof(p->rt_param));
350
351 /* Restore preserved fields. */
352 if (restore) {
353 p->rt_param.task_params = user_config;
354 p->rt_param.ctrl_page = ctrl_page;
355 }
356}
357
358static long __litmus_admit_task(struct task_struct* tsk)
359{
360 long err;
361
362 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
363
364 /* allocate heap node for this task */
365 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
366 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
367
368 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
369 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
370
371 return -ENOMEM;
372 } else {
373 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
374 }
375
376 preempt_disable();
377
378 err = litmus->admit_task(tsk);
379
380 if (!err) {
381 sched_trace_task_name(tsk);
382 sched_trace_task_param(tsk);
383 atomic_inc(&rt_task_count);
384 }
385
386 preempt_enable();
387
388 return err;
389}
390
391long litmus_admit_task(struct task_struct* tsk)
392{
393 long retval = 0;
394
395 BUG_ON(is_realtime(tsk));
396
397 tsk_rt(tsk)->heap_node = NULL;
398 tsk_rt(tsk)->rel_heap = NULL;
399
400 if (get_rt_relative_deadline(tsk) == 0 ||
401 get_exec_cost(tsk) >
402 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
403 TRACE_TASK(tsk,
404 "litmus admit: invalid task parameters "
405 "(e = %lu, p = %lu, d = %lu)\n",
406 get_exec_cost(tsk), get_rt_period(tsk),
407 get_rt_relative_deadline(tsk));
408 retval = -EINVAL;
409 goto out;
410 }
411
412 retval = __litmus_admit_task(tsk);
413
414out:
415 if (retval) {
416 if (tsk_rt(tsk)->heap_node)
417 bheap_node_free(tsk_rt(tsk)->heap_node);
418 if (tsk_rt(tsk)->rel_heap)
419 release_heap_free(tsk_rt(tsk)->rel_heap);
420 }
421 return retval;
422}
423
424void litmus_clear_state(struct task_struct* tsk)
425{
426 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
427 bheap_node_free(tsk_rt(tsk)->heap_node);
428 release_heap_free(tsk_rt(tsk)->rel_heap);
429
430 atomic_dec(&rt_task_count);
431 reinit_litmus_state(tsk, 1);
432}
433
434/* called from sched_setscheduler() */
435void litmus_exit_task(struct task_struct* tsk)
436{
437 if (is_realtime(tsk)) {
438 sched_trace_task_completion(tsk, 1);
439
440 litmus->task_exit(tsk);
441 }
442}
443
444static DECLARE_RWSEM(plugin_switch_mutex);
445
446void litmus_plugin_switch_disable(void)
447{
448 down_read(&plugin_switch_mutex);
449}
450
451void litmus_plugin_switch_enable(void)
452{
453 up_read(&plugin_switch_mutex);
454}
455
456static int __do_plugin_switch(struct sched_plugin* plugin)
457{
458 int ret;
459
460
461 /* don't switch if there are active real-time tasks */
462 if (atomic_read(&rt_task_count) == 0) {
463 TRACE("deactivating plugin %s\n", litmus->plugin_name);
464 ret = litmus->deactivate_plugin();
465 if (0 != ret)
466 goto out;
467
468 TRACE("activating plugin %s\n", plugin->plugin_name);
469 ret = plugin->activate_plugin();
470 if (0 != ret) {
471 printk(KERN_INFO "Can't activate %s (%d).\n",
472 plugin->plugin_name, ret);
473 plugin = &linux_sched_plugin;
474 }
475
476 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
477 litmus = plugin;
478 } else
479 ret = -EBUSY;
480out:
481 TRACE("do_plugin_switch() => %d\n", ret);
482 return ret;
483}
484
485static atomic_t ready_to_switch;
486
487static int do_plugin_switch(void *_plugin)
488{
489 unsigned long flags;
490 int ret = 0;
491
492 local_save_flags(flags);
493 local_irq_disable();
494 hard_irq_disable();
495
496 if (atomic_dec_and_test(&ready_to_switch))
497 {
498 ret = __do_plugin_switch((struct sched_plugin*) _plugin);
499 atomic_set(&ready_to_switch, INT_MAX);
500 }
501
502 do {
503 cpu_relax();
504 } while (atomic_read(&ready_to_switch) != INT_MAX);
505
506 local_irq_restore(flags);
507 return ret;
508}
509
510/* Switching a plugin in use is tricky.
511 * We must watch out that no real-time tasks exists
512 * (and that none is created in parallel) and that the plugin is not
513 * currently in use on any processor (in theory).
514 */
515int switch_sched_plugin(struct sched_plugin* plugin)
516{
517 int err;
518 struct domain_proc_info* domain_info;
519
520 BUG_ON(!plugin);
521
522 if (atomic_read(&rt_task_count) == 0) {
523 down_write(&plugin_switch_mutex);
524
525 deactivate_domain_proc();
526
527 get_online_cpus();
528 atomic_set(&ready_to_switch, num_online_cpus());
529 err = stop_cpus(cpu_online_mask, do_plugin_switch, plugin);
530 put_online_cpus();
531
532 if (!litmus->get_domain_proc_info(&domain_info))
533 activate_domain_proc(domain_info);
534
535 up_write(&plugin_switch_mutex);
536 return err;
537 } else
538 return -EBUSY;
539}
540
541/* Called upon fork.
542 * p is the newly forked task.
543 */
544void litmus_fork(struct task_struct* p)
545{
546 /* non-rt tasks might have ctrl_page set */
547 tsk_rt(p)->ctrl_page = NULL;
548
549 if (is_realtime(p)) {
550 reinit_litmus_state(p, 1);
551 if (litmus->fork_task(p)) {
552 if (__litmus_admit_task(p))
553 /* something went wrong, give up */
554 p->sched_reset_on_fork = 1;
555 } else {
556 /* clean out any litmus related state */
557 reinit_litmus_state(p, 0);
558
559 TRACE_TASK(p, "fork: real-time status denied\n");
560 /* Don't let the child be a real-time task. */
561 p->sched_reset_on_fork = 1;
562 }
563 }
564
565 /* od tables are never inherited across a fork */
566 p->od_table = NULL;
567}
568
569/* Called upon execve().
570 * current is doing the exec.
571 * Don't let address space specific stuff leak.
572 */
573void litmus_exec(void)
574{
575 struct task_struct* p = current;
576
577 if (is_realtime(p)) {
578 WARN_ON(p->rt_param.inh_task);
579 if (tsk_rt(p)->ctrl_page) {
580 free_page((unsigned long) tsk_rt(p)->ctrl_page);
581 tsk_rt(p)->ctrl_page = NULL;
582 }
583 }
584}
585
586/* Called when dead_tsk is being deallocated
587 */
588void exit_litmus(struct task_struct *dead_tsk)
589{
590 /* We also allow non-RT tasks to
591 * allocate control pages to allow
592 * measurements with non-RT tasks.
593 * So check if we need to free the page
594 * in any case.
595 */
596 if (tsk_rt(dead_tsk)->ctrl_page) {
597 TRACE_TASK(dead_tsk,
598 "freeing ctrl_page %p\n",
599 tsk_rt(dead_tsk)->ctrl_page);
600 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
601 }
602
603 /* Tasks should not be real-time tasks any longer at this point. */
604 BUG_ON(is_realtime(dead_tsk));
605}
606
607void litmus_do_exit(struct task_struct *exiting_tsk)
608{
609 /* This task called do_exit(), but is still a real-time task. To avoid
610 * complications later, we force it to be a non-real-time task now. */
611
612 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
613
614 TRACE_TASK(exiting_tsk, "exiting, demoted to SCHED_FIFO\n");
615 sched_setscheduler_nocheck(exiting_tsk, SCHED_FIFO, &param);
616}
617
618void litmus_dealloc(struct task_struct *tsk)
619{
620 /* tsk is no longer a real-time task */
621 TRACE_TASK(tsk, "Deallocating real-time task data\n");
622 litmus->task_cleanup(tsk);
623 litmus_clear_state(tsk);
624}
625
626/* move current non-RT task to a specific CPU */
627int litmus_be_migrate_to(int cpu)
628{
629 struct cpumask single_cpu_aff;
630
631 cpumask_clear(&single_cpu_aff);
632 cpumask_set_cpu(cpu, &single_cpu_aff);
633 return sched_setaffinity(current->pid, &single_cpu_aff);
634}
635
636#ifdef CONFIG_MAGIC_SYSRQ
637int sys_kill(int pid, int sig);
638
639static void sysrq_handle_kill_rt_tasks(int key)
640{
641 struct task_struct *t;
642 read_lock(&tasklist_lock);
643 for_each_process(t) {
644 if (is_realtime(t)) {
645 sys_kill(t->pid, SIGKILL);
646 }
647 }
648 read_unlock(&tasklist_lock);
649}
650
651static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
652 .handler = sysrq_handle_kill_rt_tasks,
653 .help_msg = "quit-rt-tasks(X)",
654 .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
655};
656#endif
657
658extern struct sched_plugin linux_sched_plugin;
659
660static int litmus_shutdown_nb(struct notifier_block *unused1,
661 unsigned long unused2, void *unused3)
662{
663 /* Attempt to switch back to regular Linux scheduling.
664 * Forces the active plugin to clean up.
665 */
666 if (litmus != &linux_sched_plugin) {
667 int ret = switch_sched_plugin(&linux_sched_plugin);
668 if (ret) {
669 printk("Auto-shutdown of active Litmus plugin failed.\n");
670 }
671 }
672 return NOTIFY_DONE;
673}
674
675static struct notifier_block shutdown_notifier = {
676 .notifier_call = litmus_shutdown_nb,
677};
678
679/**
680 * Triggering hrtimers on specific cpus as required by arm_release_timer(_on)
681 */
682#ifdef CONFIG_SMP
683
684/**
685 * hrtimer_pull - smp_call_function_single_async callback on remote cpu
686 */
687void hrtimer_pull(void *csd_info)
688{
689 struct hrtimer_start_on_info *info = csd_info;
690 TRACE("pulled timer 0x%x\n", info->timer);
691 hrtimer_start_range_ns(info->timer, info->time, 0, info->mode);
692}
693
694/**
695 * hrtimer_start_on - trigger timer arming on remote cpu
696 * @cpu: remote cpu
697 * @info: save timer information for enqueuing on remote cpu
698 * @timer: timer to be pulled
699 * @time: expire time
700 * @mode: timer mode
701 */
702void hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
703 struct hrtimer *timer, ktime_t time,
704 const enum hrtimer_mode mode)
705{
706 info->timer = timer;
707 info->time = time;
708 info->mode = mode;
709
710 /* initialize call_single_data struct */
711 info->csd.func = &hrtimer_pull;
712 info->csd.info = info;
713 info->csd.flags = 0;
714
715 /* initiate pull */
716 preempt_disable();
717 if (cpu == smp_processor_id()) {
718 /* start timer locally; we may get called
719 * with rq->lock held, do not wake up anything
720 */
721 TRACE("hrtimer_start_on: starting on local CPU\n");
722 hrtimer_start(info->timer, info->time, info->mode);
723 } else {
724 /* call hrtimer_pull() on remote cpu
725 * to start remote timer asynchronously
726 */
727 TRACE("hrtimer_start_on: pulling to remote CPU\n");
728 smp_call_function_single_async(cpu, &info->csd);
729 }
730 preempt_enable();
731}
732
733#endif /* CONFIG_SMP */
734
735static int __init _init_litmus(void)
736{
737 /* Common initializers,
738 * mode change lock is used to enforce single mode change
739 * operation.
740 */
741 printk("Starting LITMUS^RT kernel\n");
742
743 register_sched_plugin(&linux_sched_plugin);
744
745 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
746 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
747
748#ifdef CONFIG_MAGIC_SYSRQ
749 /* offer some debugging help */
750 if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
751 printk("Registered kill rt tasks magic sysrq.\n");
752 else
753 printk("Could not register kill rt tasks magic sysrq.\n");
754#endif
755
756 init_litmus_proc();
757
758 register_reboot_notifier(&shutdown_notifier);
759
760 return 0;
761}
762
763static void _exit_litmus(void)
764{
765 unregister_reboot_notifier(&shutdown_notifier);
766
767 exit_litmus_proc();
768 kmem_cache_destroy(bheap_node_cache);
769 kmem_cache_destroy(release_heap_cache);
770}
771
772module_init(_init_litmus);
773module_exit(_exit_litmus);
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
new file mode 100644
index 000000000000..de5e3f37fe88
--- /dev/null
+++ b/litmus/litmus_proc.c
@@ -0,0 +1,574 @@
1/*
2 * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
3 */
4
5#include <linux/sched.h>
6#include <linux/slab.h>
7#include <linux/uaccess.h>
8#include <linux/seq_file.h>
9
10#include <litmus/debug_trace.h>
11#include <litmus/litmus.h>
12#include <litmus/litmus_proc.h>
13
14#include <litmus/clustered.h>
15
16/* in litmus/litmus.c */
17extern atomic_t rt_task_count;
18
19static struct proc_dir_entry *litmus_dir = NULL,
20 *curr_file = NULL,
21 *stat_file = NULL,
22 *plugs_dir = NULL,
23#ifdef CONFIG_RELEASE_MASTER
24 *release_master_file = NULL,
25#endif
26 *plugs_file = NULL,
27 *domains_dir = NULL,
28 *cpus_dir = NULL;
29
30
31/* in litmus/sync.c */
32int count_tasks_waiting_for_release(void);
33
34static int litmus_stats_proc_show(struct seq_file *m, void *v)
35{
36 seq_printf(m,
37 "real-time tasks = %d\n"
38 "ready for release = %d\n",
39 atomic_read(&rt_task_count),
40 count_tasks_waiting_for_release());
41 return 0;
42}
43
44static int litmus_stats_proc_open(struct inode *inode, struct file *file)
45{
46 return single_open(file, litmus_stats_proc_show, PDE_DATA(inode));
47}
48
49static const struct file_operations litmus_stats_proc_fops = {
50 .open = litmus_stats_proc_open,
51 .read = seq_read,
52 .llseek = seq_lseek,
53 .release = single_release,
54};
55
56
57static int litmus_loaded_proc_show(struct seq_file *m, void *v)
58{
59 print_sched_plugins(m);
60 return 0;
61}
62
63static int litmus_loaded_proc_open(struct inode *inode, struct file *file)
64{
65 return single_open(file, litmus_loaded_proc_show, PDE_DATA(inode));
66}
67
68static const struct file_operations litmus_loaded_proc_fops = {
69 .open = litmus_loaded_proc_open,
70 .read = seq_read,
71 .llseek = seq_lseek,
72 .release = single_release,
73};
74
75
76
77
78/* in litmus/litmus.c */
79int switch_sched_plugin(struct sched_plugin*);
80
81static ssize_t litmus_active_proc_write(struct file *file,
82 const char __user *buffer, size_t count,
83 loff_t *ppos)
84{
85 char name[65];
86 struct sched_plugin* found;
87 ssize_t ret = -EINVAL;
88 int err;
89
90
91 ret = copy_and_chomp(name, sizeof(name), buffer, count);
92 if (ret < 0)
93 return ret;
94
95 found = find_sched_plugin(name);
96
97 if (found) {
98 err = switch_sched_plugin(found);
99 if (err) {
100 printk(KERN_INFO "Could not switch plugin: %d\n", err);
101 ret = err;
102 }
103 } else {
104 printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
105 ret = -ESRCH;
106 }
107
108 return ret;
109}
110
111static int litmus_active_proc_show(struct seq_file *m, void *v)
112{
113 seq_printf(m, "%s\n", litmus->plugin_name);
114 return 0;
115}
116
117static int litmus_active_proc_open(struct inode *inode, struct file *file)
118{
119 return single_open(file, litmus_active_proc_show, PDE_DATA(inode));
120}
121
122static const struct file_operations litmus_active_proc_fops = {
123 .open = litmus_active_proc_open,
124 .read = seq_read,
125 .llseek = seq_lseek,
126 .release = single_release,
127 .write = litmus_active_proc_write,
128};
129
130
131#ifdef CONFIG_RELEASE_MASTER
132static ssize_t litmus_release_master_proc_write(
133 struct file *file,
134 const char __user *buffer, size_t count,
135 loff_t *ppos)
136{
137 int cpu, err, online = 0;
138 char msg[64];
139 ssize_t len;
140
141 len = copy_and_chomp(msg, sizeof(msg), buffer, count);
142
143 if (len < 0)
144 return len;
145
146 if (strcmp(msg, "NO_CPU") == 0)
147 atomic_set(&release_master_cpu, NO_CPU);
148 else {
149 err = sscanf(msg, "%d", &cpu);
150 if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
151 atomic_set(&release_master_cpu, cpu);
152 } else {
153 TRACE("invalid release master: '%s' "
154 "(err:%d cpu:%d online:%d)\n",
155 msg, err, cpu, online);
156 len = -EINVAL;
157 }
158 }
159 return len;
160}
161
162static int litmus_release_master_proc_show(struct seq_file *m, void *v)
163{
164 int master;
165 master = atomic_read(&release_master_cpu);
166 if (master == NO_CPU)
167 seq_printf(m, "NO_CPU\n");
168 else
169 seq_printf(m, "%d\n", master);
170 return 0;
171}
172
173static int litmus_release_master_proc_open(struct inode *inode, struct file *file)
174{
175 return single_open(file, litmus_release_master_proc_show, PDE_DATA(inode));
176}
177
178static const struct file_operations litmus_release_master_proc_fops = {
179 .open = litmus_release_master_proc_open,
180 .read = seq_read,
181 .llseek = seq_lseek,
182 .release = single_release,
183 .write = litmus_release_master_proc_write,
184};
185#endif
186
187int __init init_litmus_proc(void)
188{
189 litmus_dir = proc_mkdir("litmus", NULL);
190 if (!litmus_dir) {
191 printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
192 return -ENOMEM;
193 }
194
195 curr_file = proc_create("active_plugin", 0644, litmus_dir,
196 &litmus_active_proc_fops);
197
198 if (!curr_file) {
199 printk(KERN_ERR "Could not allocate active_plugin "
200 "procfs entry.\n");
201 return -ENOMEM;
202 }
203
204#ifdef CONFIG_RELEASE_MASTER
205 release_master_file = proc_create("release_master", 0644, litmus_dir,
206 &litmus_release_master_proc_fops);
207 if (!release_master_file) {
208 printk(KERN_ERR "Could not allocate release_master "
209 "procfs entry.\n");
210 return -ENOMEM;
211 }
212#endif
213
214 stat_file = proc_create("stats", 0444, litmus_dir, &litmus_stats_proc_fops);
215
216 plugs_dir = proc_mkdir("plugins", litmus_dir);
217 if (!plugs_dir){
218 printk(KERN_ERR "Could not allocate plugins directory "
219 "procfs entry.\n");
220 return -ENOMEM;
221 }
222
223 plugs_file = proc_create("loaded", 0444, plugs_dir,
224 &litmus_loaded_proc_fops);
225
226 domains_dir = proc_mkdir("domains", litmus_dir);
227 if (!domains_dir) {
228 printk(KERN_ERR "Could not allocate domains directory "
229 "procfs entry.\n");
230 return -ENOMEM;
231 }
232
233 cpus_dir = proc_mkdir("cpus", litmus_dir);
234 if (!cpus_dir) {
235 printk(KERN_ERR "Could not allocate cpus directory "
236 "procfs entry.\n");
237 return -ENOMEM;
238 }
239
240 return 0;
241}
242
243void exit_litmus_proc(void)
244{
245 if (cpus_dir || domains_dir) {
246 deactivate_domain_proc();
247 if (cpus_dir)
248 remove_proc_entry("cpus", litmus_dir);
249 if (domains_dir)
250 remove_proc_entry("domains", litmus_dir);
251 }
252 if (plugs_file)
253 remove_proc_entry("loaded", plugs_dir);
254 if (plugs_dir)
255 remove_proc_entry("plugins", litmus_dir);
256 if (stat_file)
257 remove_proc_entry("stats", litmus_dir);
258 if (curr_file)
259 remove_proc_entry("active_plugin", litmus_dir);
260#ifdef CONFIG_RELEASE_MASTER
261 if (release_master_file)
262 remove_proc_entry("release_master", litmus_dir);
263#endif
264 if (litmus_dir)
265 remove_proc_entry("litmus", NULL);
266}
267
268long make_plugin_proc_dir(struct sched_plugin* plugin,
269 struct proc_dir_entry** pde_in)
270{
271 struct proc_dir_entry *pde_new = NULL;
272 long rv;
273
274 if (!plugin || !plugin->plugin_name){
275 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
276 __func__);
277 rv = -EINVAL;
278 goto out_no_pde;
279 }
280
281 if (!plugs_dir){
282 printk(KERN_ERR "Could not make plugin sub-directory, because "
283 "/proc/litmus/plugins does not exist.\n");
284 rv = -ENOENT;
285 goto out_no_pde;
286 }
287
288 pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
289 if (!pde_new){
290 printk(KERN_ERR "Could not make plugin sub-directory: "
291 "out of memory?.\n");
292 rv = -ENOMEM;
293 goto out_no_pde;
294 }
295
296 rv = 0;
297 *pde_in = pde_new;
298 goto out_ok;
299
300out_no_pde:
301 *pde_in = NULL;
302out_ok:
303 return rv;
304}
305
306void remove_plugin_proc_dir(struct sched_plugin* plugin)
307{
308 if (!plugin || !plugin->plugin_name){
309 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
310 __func__);
311 return;
312 }
313 remove_proc_entry(plugin->plugin_name, plugs_dir);
314}
315
316
317
318/* misc. I/O helper functions */
319
320int copy_and_chomp(char *kbuf, unsigned long ksize,
321 __user const char* ubuf, unsigned long ulength)
322{
323 /* caller must provide buffer space */
324 BUG_ON(!ksize);
325
326 ksize--; /* leave space for null byte */
327
328 if (ksize > ulength)
329 ksize = ulength;
330
331 if(copy_from_user(kbuf, ubuf, ksize))
332 return -EFAULT;
333
334 kbuf[ksize] = '\0';
335
336 /* chomp kbuf */
337 if (ksize > 0 && kbuf[ksize - 1] == '\n')
338 kbuf[ksize - 1] = '\0';
339
340 return ksize;
341}
342
343/* helper functions for clustered plugins */
344static const char* cache_level_names[] = {
345 "ALL",
346 "L1",
347 "L2",
348 "L3",
349};
350
351int parse_cache_level(const char *cache_name, enum cache_level *level)
352{
353 int err = -EINVAL;
354 int i;
355 /* do a quick and dirty comparison to find the cluster size */
356 for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
357 if (!strcmp(cache_name, cache_level_names[i])) {
358 *level = (enum cache_level) i;
359 err = 0;
360 break;
361 }
362 return err;
363}
364
365const char* cache_level_name(enum cache_level level)
366{
367 int idx = level;
368
369 if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
370 return cache_level_names[idx];
371 else
372 return "INVALID";
373}
374
375
376
377
378/* proc file interface to configure the cluster size */
379
380static ssize_t litmus_cluster_proc_write(struct file *file,
381 const char __user *buffer, size_t count,
382 loff_t *ppos)
383{
384 enum cache_level *level = (enum cache_level *) PDE_DATA(file_inode(file));
385 ssize_t len;
386 char cache_name[8];
387
388 len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
389
390 if (len > 0 && parse_cache_level(cache_name, level)) {
391 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
392 len = -EINVAL;
393 }
394
395 return len;
396}
397
398static int litmus_cluster_proc_show(struct seq_file *m, void *v)
399{
400 enum cache_level *level = (enum cache_level *) m->private;
401
402 seq_printf(m, "%s\n", cache_level_name(*level));
403 return 0;
404}
405
406static int litmus_cluster_proc_open(struct inode *inode, struct file *file)
407{
408 return single_open(file, litmus_cluster_proc_show, PDE_DATA(inode));
409}
410
411static const struct file_operations litmus_cluster_proc_fops = {
412 .open = litmus_cluster_proc_open,
413 .read = seq_read,
414 .llseek = seq_lseek,
415 .release = single_release,
416 .write = litmus_cluster_proc_write,
417};
418
419struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
420 enum cache_level* level)
421{
422 struct proc_dir_entry* cluster_file;
423
424
425 cluster_file = proc_create_data("cluster", 0644, parent,
426 &litmus_cluster_proc_fops,
427 (void *) level);
428 if (!cluster_file) {
429 printk(KERN_ERR
430 "Could not cluster procfs entry.\n");
431 }
432 return cluster_file;
433}
434
435static struct domain_proc_info* active_mapping = NULL;
436
437static int litmus_mapping_proc_show(struct seq_file *m, void *v)
438{
439 struct cd_mapping *mapping = (struct cd_mapping*) m->private;
440
441 if(!mapping)
442 return 0;
443
444 seq_printf(m, "%*pb\n", cpumask_pr_args(mapping->mask));
445 return 0;
446}
447
448static int litmus_mapping_proc_open(struct inode *inode, struct file *file)
449{
450 return single_open(file, litmus_mapping_proc_show, PDE_DATA(inode));
451}
452
453static const struct file_operations litmus_domain_proc_fops = {
454 .open = litmus_mapping_proc_open,
455 .read = seq_read,
456 .llseek = seq_lseek,
457 .release = single_release,
458};
459
460long activate_domain_proc(struct domain_proc_info* map)
461{
462 int i;
463 char name[8];
464
465 if (!map)
466 return -EINVAL;
467 if (cpus_dir == NULL || domains_dir == NULL)
468 return -EINVAL;
469
470 if (active_mapping)
471 deactivate_domain_proc();
472
473 active_mapping = map;
474
475 for (i = 0; i < map->num_cpus; ++i) {
476 struct cd_mapping* m = &map->cpu_to_domains[i];
477 snprintf(name, sizeof(name), "%d", m->id);
478 m->proc_file = proc_create_data(name, 0444, cpus_dir,
479 &litmus_domain_proc_fops, (void*)m);
480 }
481
482 for (i = 0; i < map->num_domains; ++i) {
483 struct cd_mapping* m = &map->domain_to_cpus[i];
484 snprintf(name, sizeof(name), "%d", m->id);
485 m->proc_file = proc_create_data(name, 0444, domains_dir,
486 &litmus_domain_proc_fops, (void*)m);
487 }
488
489 return 0;
490}
491
492long deactivate_domain_proc()
493{
494 int i;
495 char name[65];
496
497 struct domain_proc_info* map = active_mapping;
498
499 if (!map)
500 return -EINVAL;
501
502 for (i = 0; i < map->num_cpus; ++i) {
503 struct cd_mapping* m = &map->cpu_to_domains[i];
504 snprintf(name, sizeof(name), "%d", m->id);
505 remove_proc_entry(name, cpus_dir);
506 m->proc_file = NULL;
507 }
508 for (i = 0; i < map->num_domains; ++i) {
509 struct cd_mapping* m = &map->domain_to_cpus[i];
510 snprintf(name, sizeof(name), "%d", m->id);
511 remove_proc_entry(name, domains_dir);
512 m->proc_file = NULL;
513 }
514
515 active_mapping = NULL;
516
517 return 0;
518}
519
520long init_domain_proc_info(struct domain_proc_info* m,
521 int num_cpus, int num_domains)
522{
523 int i;
524 int num_alloced_cpu_masks = 0;
525 int num_alloced_domain_masks = 0;
526
527 m->cpu_to_domains =
528 kmalloc(sizeof(*(m->cpu_to_domains))*num_cpus,
529 GFP_ATOMIC);
530 if(!m->cpu_to_domains)
531 goto failure;
532
533 m->domain_to_cpus =
534 kmalloc(sizeof(*(m->domain_to_cpus))*num_domains,
535 GFP_ATOMIC);
536 if(!m->domain_to_cpus)
537 goto failure;
538
539 for(i = 0; i < num_cpus; ++i) {
540 if(!zalloc_cpumask_var(&m->cpu_to_domains[i].mask, GFP_ATOMIC))
541 goto failure;
542 ++num_alloced_cpu_masks;
543 }
544 for(i = 0; i < num_domains; ++i) {
545 if(!zalloc_cpumask_var(&m->domain_to_cpus[i].mask, GFP_ATOMIC))
546 goto failure;
547 ++num_alloced_domain_masks;
548 }
549
550 return 0;
551
552failure:
553 for(i = 0; i < num_alloced_cpu_masks; ++i)
554 free_cpumask_var(m->cpu_to_domains[i].mask);
555 for(i = 0; i < num_alloced_domain_masks; ++i)
556 free_cpumask_var(m->domain_to_cpus[i].mask);
557 if(m->cpu_to_domains)
558 kfree(m->cpu_to_domains);
559 if(m->domain_to_cpus)
560 kfree(m->domain_to_cpus);
561 return -ENOMEM;
562}
563
564void destroy_domain_proc_info(struct domain_proc_info* m)
565{
566 int i;
567 for(i = 0; i < m->num_cpus; ++i)
568 free_cpumask_var(m->cpu_to_domains[i].mask);
569 for(i = 0; i < m->num_domains; ++i)
570 free_cpumask_var(m->domain_to_cpus[i].mask);
571 kfree(m->cpu_to_domains);
572 kfree(m->domain_to_cpus);
573 memset(m, 0, sizeof(*m));
574}
diff --git a/litmus/locking.c b/litmus/locking.c
new file mode 100644
index 000000000000..a1d0515c5613
--- /dev/null
+++ b/litmus/locking.c
@@ -0,0 +1,189 @@
1#include <linux/sched.h>
2#include <litmus/litmus.h>
3#include <litmus/fdso.h>
4#include <litmus/debug_trace.h>
5
6#ifdef CONFIG_LITMUS_LOCKING
7
8#include <linux/sched.h>
9#include <litmus/litmus.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/trace.h>
12#include <litmus/wait.h>
13
14static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
15static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
16static int close_generic_lock(struct od_table_entry* entry);
17static void destroy_generic_lock(obj_type_t type, void* sem);
18
19struct fdso_ops generic_lock_ops = {
20 .create = create_generic_lock,
21 .open = open_generic_lock,
22 .close = close_generic_lock,
23 .destroy = destroy_generic_lock
24};
25
26static inline bool is_lock(struct od_table_entry* entry)
27{
28 return entry->class == &generic_lock_ops;
29}
30
31static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
32{
33 BUG_ON(!is_lock(entry));
34 return (struct litmus_lock*) entry->obj->obj;
35}
36
37static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
38{
39 struct litmus_lock* lock;
40 int err;
41
42 err = litmus->allocate_lock(&lock, type, arg);
43 if (err == 0)
44 *obj_ref = lock;
45 return err;
46}
47
48static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
49{
50 struct litmus_lock* lock = get_lock(entry);
51 if (lock->ops->open)
52 return lock->ops->open(lock, arg);
53 else
54 return 0; /* default: any task can open it */
55}
56
57static int close_generic_lock(struct od_table_entry* entry)
58{
59 struct litmus_lock* lock = get_lock(entry);
60 if (lock->ops->close)
61 return lock->ops->close(lock);
62 else
63 return 0; /* default: closing succeeds */
64}
65
66static void destroy_generic_lock(obj_type_t type, void* obj)
67{
68 struct litmus_lock* lock = (struct litmus_lock*) obj;
69 lock->ops->deallocate(lock);
70}
71
72asmlinkage long sys_litmus_lock(int lock_od)
73{
74 long err = -EINVAL;
75 struct od_table_entry* entry;
76 struct litmus_lock* l;
77
78 TS_SYSCALL_IN_START;
79
80 TS_SYSCALL_IN_END;
81
82 TS_LOCK_START;
83
84 entry = get_entry_for_od(lock_od);
85 if (entry && is_lock(entry)) {
86 l = get_lock(entry);
87 TRACE_CUR("attempts to lock 0x%p\n", l);
88 err = l->ops->lock(l);
89 }
90
91 /* Note: task my have been suspended or preempted in between! Take
92 * this into account when computing overheads. */
93 TS_LOCK_END;
94
95 TS_SYSCALL_OUT_START;
96
97 return err;
98}
99
100asmlinkage long sys_litmus_unlock(int lock_od)
101{
102 long err = -EINVAL;
103 struct od_table_entry* entry;
104 struct litmus_lock* l;
105
106 TS_SYSCALL_IN_START;
107
108 TS_SYSCALL_IN_END;
109
110 TS_UNLOCK_START;
111
112 entry = get_entry_for_od(lock_od);
113 if (entry && is_lock(entry)) {
114 l = get_lock(entry);
115 TRACE_CUR("attempts to unlock 0x%p\n", l);
116 err = l->ops->unlock(l);
117 }
118
119 /* Note: task my have been preempted in between! Take this into
120 * account when computing overheads. */
121 TS_UNLOCK_END;
122
123 TS_SYSCALL_OUT_START;
124
125 return err;
126}
127
128struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
129{
130 wait_queue_t* q;
131 struct task_struct* t = NULL;
132
133 if (waitqueue_active(wq)) {
134 q = list_entry(wq->task_list.next,
135 wait_queue_t, task_list);
136 t = (struct task_struct*) q->private;
137 __remove_wait_queue(wq, q);
138 }
139 return(t);
140}
141
142unsigned int __add_wait_queue_prio_exclusive(
143 wait_queue_head_t* head,
144 prio_wait_queue_t *new)
145{
146 struct list_head *pos;
147 unsigned int passed = 0;
148
149 new->wq.flags |= WQ_FLAG_EXCLUSIVE;
150
151 /* find a spot where the new entry is less than the next */
152 list_for_each(pos, &head->task_list) {
153 prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
154 wq.task_list);
155
156 if (unlikely(lt_before(new->priority, queued->priority) ||
157 (new->priority == queued->priority &&
158 new->tie_breaker < queued->tie_breaker))) {
159 /* pos is not less than new, thus insert here */
160 __list_add(&new->wq.task_list, pos->prev, pos);
161 goto out;
162 }
163 passed++;
164 }
165
166 /* if we get to this point either the list is empty or every entry
167 * queued element is less than new.
168 * Let's add new to the end. */
169 list_add_tail(&new->wq.task_list, &head->task_list);
170out:
171 return passed;
172}
173
174
175#else
176
177struct fdso_ops generic_lock_ops = {};
178
179asmlinkage long sys_litmus_lock(int sem_od)
180{
181 return -ENOSYS;
182}
183
184asmlinkage long sys_litmus_unlock(int sem_od)
185{
186 return -ENOSYS;
187}
188
189#endif
diff --git a/litmus/preempt.c b/litmus/preempt.c
new file mode 100644
index 000000000000..5f678536b7fa
--- /dev/null
+++ b/litmus/preempt.c
@@ -0,0 +1,144 @@
1#include <linux/sched.h>
2
3#include <litmus/debug_trace.h>
4#include <litmus/litmus.h>
5#include <litmus/preempt.h>
6#include <litmus/trace.h>
7
8DEFINE_PER_CPU(bool, litmus_preemption_in_progress);
9
10/* The rescheduling state of each processor.
11 */
12DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
13
14void sched_state_will_schedule(struct task_struct* tsk)
15{
16 /* Litmus hack: we only care about processor-local invocations of
17 * set_tsk_need_resched(). We can't reliably set the flag remotely
18 * since it might race with other updates to the scheduling state. We
19 * can't rely on the runqueue lock protecting updates to the sched
20 * state since processors do not acquire the runqueue locks for all
21 * updates to the sched state (to avoid acquiring two runqueue locks at
22 * the same time). Further, if tsk is residing on a remote processor,
23 * then that processor doesn't actually know yet that it is going to
24 * reschedule; it still must receive an IPI (unless a local invocation
25 * races).
26 */
27 if (likely(task_cpu(tsk) == smp_processor_id())) {
28 VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
29 if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
30 set_sched_state(PICKED_WRONG_TASK);
31 else
32 set_sched_state(WILL_SCHEDULE);
33 } else
34 /* Litmus tasks should never be subject to a remote
35 * set_tsk_need_resched(). */
36 BUG_ON(is_realtime(tsk));
37#ifdef CONFIG_PREEMPT_STATE_TRACE
38 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
39 __builtin_return_address(0));
40#endif
41}
42
43/* Called by the IPI handler after another CPU called smp_send_resched(). */
44void sched_state_ipi(void)
45{
46 /* If the IPI was slow, we might be in any state right now. The IPI is
47 * only meaningful if we are in SHOULD_SCHEDULE. */
48 if (is_in_sched_state(SHOULD_SCHEDULE)) {
49 /* Cause scheduler to be invoked.
50 * This will cause a transition to WILL_SCHEDULE. */
51 set_tsk_need_resched(current);
52 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
53 current->comm, current->pid);
54 TS_SEND_RESCHED_END;
55 } else {
56 /* ignore */
57 TRACE_STATE("ignoring IPI in state %x (%s)\n",
58 get_sched_state(),
59 sched_state_name(get_sched_state()));
60 }
61}
62
63/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
64 * hold the lock that is used to serialize scheduling decisions. */
65void litmus_reschedule(int cpu)
66{
67 int picked_transition_ok = 0;
68 int scheduled_transition_ok = 0;
69
70 /* The (remote) CPU could be in any state. */
71
72 /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
73 * is not aware of the need to reschedule at this point. */
74
75 /* is a context switch in progress? */
76 if (cpu_is_in_sched_state(cpu, TASK_PICKED))
77 picked_transition_ok = sched_state_transition_on(
78 cpu, TASK_PICKED, PICKED_WRONG_TASK);
79
80 if (!picked_transition_ok &&
81 cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
82 /* We either raced with the end of the context switch, or the
83 * CPU was in TASK_SCHEDULED anyway. */
84 scheduled_transition_ok = sched_state_transition_on(
85 cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
86 }
87
88 /* If the CPU was in state TASK_SCHEDULED, then we need to cause the
89 * scheduler to be invoked. */
90 if (scheduled_transition_ok) {
91 if (smp_processor_id() == cpu) {
92 set_tsk_need_resched(current);
93 preempt_set_need_resched();
94 } else {
95 TS_SEND_RESCHED_START(cpu);
96 smp_send_reschedule(cpu);
97 }
98 }
99
100 TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
101 __FUNCTION__,
102 picked_transition_ok,
103 scheduled_transition_ok);
104}
105
106void litmus_reschedule_local(void)
107{
108 if (is_in_sched_state(TASK_PICKED))
109 set_sched_state(PICKED_WRONG_TASK);
110 else if (is_in_sched_state(TASK_SCHEDULED
111 | SHOULD_SCHEDULE
112 | PICKED_WRONG_TASK)) {
113 set_sched_state(WILL_SCHEDULE);
114 set_tsk_need_resched(current);
115 preempt_set_need_resched();
116 }
117}
118
119#ifdef CONFIG_DEBUG_KERNEL
120
121void sched_state_plugin_check(void)
122{
123 if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
124 TRACE("!!!! plugin did not call sched_state_task_picked()!"
125 "Calling sched_state_task_picked() is mandatory---fix this.\n");
126 set_sched_state(TASK_PICKED);
127 }
128}
129
130#define NAME_CHECK(x) case x: return #x
131const char* sched_state_name(int s)
132{
133 switch (s) {
134 NAME_CHECK(TASK_SCHEDULED);
135 NAME_CHECK(SHOULD_SCHEDULE);
136 NAME_CHECK(WILL_SCHEDULE);
137 NAME_CHECK(TASK_PICKED);
138 NAME_CHECK(PICKED_WRONG_TASK);
139 default:
140 return "UNKNOWN";
141 };
142}
143
144#endif
diff --git a/litmus/reservations/Makefile b/litmus/reservations/Makefile
new file mode 100644
index 000000000000..517fc2ff8a76
--- /dev/null
+++ b/litmus/reservations/Makefile
@@ -0,0 +1,3 @@
1obj-y += core.o budget-notifier.o alloc.o
2obj-y += polling.o
3obj-y += table-driven.o
diff --git a/litmus/reservations/alloc.c b/litmus/reservations/alloc.c
new file mode 100644
index 000000000000..1f93f223f504
--- /dev/null
+++ b/litmus/reservations/alloc.c
@@ -0,0 +1,143 @@
1#include <linux/slab.h>
2#include <asm/uaccess.h>
3
4#include <litmus/rt_param.h>
5
6#include <litmus/reservations/alloc.h>
7#include <litmus/reservations/polling.h>
8#include <litmus/reservations/table-driven.h>
9
10
11long alloc_polling_reservation(
12 int res_type,
13 struct reservation_config *config,
14 struct reservation **_res)
15{
16 struct polling_reservation *pres;
17 int use_edf = config->priority == LITMUS_NO_PRIORITY;
18 int periodic = res_type == PERIODIC_POLLING;
19
20 if (config->polling_params.budget >
21 config->polling_params.period) {
22 printk(KERN_ERR "invalid polling reservation (%u): "
23 "budget > period\n", config->id);
24 return -EINVAL;
25 }
26 if (config->polling_params.budget >
27 config->polling_params.relative_deadline
28 && config->polling_params.relative_deadline) {
29 printk(KERN_ERR "invalid polling reservation (%u): "
30 "budget > deadline\n", config->id);
31 return -EINVAL;
32 }
33 if (config->polling_params.offset >
34 config->polling_params.period) {
35 printk(KERN_ERR "invalid polling reservation (%u): "
36 "offset > period\n", config->id);
37 return -EINVAL;
38 }
39
40 /* XXX: would be nice to use a core-local allocation. */
41 pres = kzalloc(sizeof(*pres), GFP_KERNEL);
42 if (!pres)
43 return -ENOMEM;
44
45 polling_reservation_init(pres, use_edf, periodic,
46 config->polling_params.budget,
47 config->polling_params.period,
48 config->polling_params.relative_deadline,
49 config->polling_params.offset);
50 pres->res.id = config->id;
51 if (!use_edf)
52 pres->res.priority = config->priority;
53
54 *_res = &pres->res;
55 return 0;
56}
57
58
59#define MAX_INTERVALS 1024
60
61long alloc_table_driven_reservation(
62 struct reservation_config *config,
63 struct reservation **_res)
64{
65 struct table_driven_reservation *td_res = NULL;
66 struct lt_interval *slots = NULL;
67 size_t slots_size;
68 unsigned int i, num_slots;
69 long err = -EINVAL;
70 void *mem;
71
72 if (!config->table_driven_params.num_intervals) {
73 printk(KERN_ERR "invalid table-driven reservation (%u): "
74 "no intervals\n", config->id);
75 return -EINVAL;
76 }
77
78 if (config->table_driven_params.num_intervals > MAX_INTERVALS) {
79 printk(KERN_ERR "invalid table-driven reservation (%u): "
80 "too many intervals (max: %d)\n", config->id, MAX_INTERVALS);
81 return -EINVAL;
82 }
83
84 num_slots = config->table_driven_params.num_intervals;
85 slots_size = sizeof(slots[0]) * num_slots;
86
87 mem = kzalloc(sizeof(*td_res) + slots_size, GFP_KERNEL);
88 if (!mem) {
89 return -ENOMEM;
90 } else {
91 slots = mem + sizeof(*td_res);
92 td_res = mem;
93 err = copy_from_user(slots,
94 config->table_driven_params.intervals, slots_size);
95 }
96
97 if (!err) {
98 /* sanity checks */
99 for (i = 0; !err && i < num_slots; i++)
100 if (slots[i].end <= slots[i].start) {
101 printk(KERN_ERR
102 "invalid table-driven reservation (%u): "
103 "invalid interval %u => [%llu, %llu]\n",
104 config->id, i,
105 slots[i].start, slots[i].end);
106 err = -EINVAL;
107 }
108
109 for (i = 0; !err && i + 1 < num_slots; i++)
110 if (slots[i + 1].start <= slots[i].end) {
111 printk(KERN_ERR
112 "invalid table-driven reservation (%u): "
113 "overlapping intervals %u, %u\n",
114 config->id, i, i + 1);
115 err = -EINVAL;
116 }
117
118 if (slots[num_slots - 1].end >
119 config->table_driven_params.major_cycle_length) {
120 printk(KERN_ERR
121 "invalid table-driven reservation (%u): last "
122 "interval ends past major cycle %llu > %llu\n",
123 config->id,
124 slots[num_slots - 1].end,
125 config->table_driven_params.major_cycle_length);
126 err = -EINVAL;
127 }
128 }
129
130 if (err) {
131 kfree(td_res);
132 } else {
133 table_driven_reservation_init(td_res,
134 config->table_driven_params.major_cycle_length,
135 slots, num_slots);
136 td_res->res.id = config->id;
137 td_res->res.priority = config->priority;
138 *_res = &td_res->res;
139 }
140
141 return err;
142}
143
diff --git a/litmus/reservations/budget-notifier.c b/litmus/reservations/budget-notifier.c
new file mode 100644
index 000000000000..0b0f42687882
--- /dev/null
+++ b/litmus/reservations/budget-notifier.c
@@ -0,0 +1,26 @@
1#include <litmus/reservations/budget-notifier.h>
2
3void budget_notifier_list_init(struct budget_notifier_list* bnl)
4{
5 INIT_LIST_HEAD(&bnl->list);
6 raw_spin_lock_init(&bnl->lock);
7}
8
9void budget_notifiers_fire(struct budget_notifier_list *bnl, bool replenished)
10{
11 struct budget_notifier *bn, *next;
12
13 unsigned long flags;
14
15 raw_spin_lock_irqsave(&bnl->lock, flags);
16
17 list_for_each_entry_safe(bn, next, &bnl->list, list) {
18 if (replenished)
19 bn->budget_replenished(bn);
20 else
21 bn->budget_exhausted(bn);
22 }
23
24 raw_spin_unlock_irqrestore(&bnl->lock, flags);
25}
26
diff --git a/litmus/reservations/core.c b/litmus/reservations/core.c
new file mode 100644
index 000000000000..5137eda0f643
--- /dev/null
+++ b/litmus/reservations/core.c
@@ -0,0 +1,393 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/debug_trace.h>
5#include <litmus/reservations/reservation.h>
6
7void reservation_init(struct reservation *res)
8{
9 memset(res, 0, sizeof(*res));
10 res->state = RESERVATION_INACTIVE;
11 INIT_LIST_HEAD(&res->clients);
12 INIT_LIST_HEAD(&res->replenish_list);
13 budget_notifier_list_init(&res->budget_notifiers);
14}
15
16struct task_struct* default_dispatch_client(
17 struct reservation *res,
18 lt_t *for_at_most)
19{
20 struct reservation_client *client, *next;
21 struct task_struct* tsk;
22
23 BUG_ON(res->state != RESERVATION_ACTIVE);
24 *for_at_most = 0;
25
26 list_for_each_entry_safe(client, next, &res->clients, list) {
27 tsk = client->dispatch(client);
28 if (likely(tsk)) {
29 /* Primitive form of round-robin scheduling:
30 * make sure we alternate between multiple clients
31 * with at least the granularity of the replenishment
32 * period. Reservations that need more fine-grained
33 * or more predictable alternation between threads
34 * within a reservation should provide a custom
35 * dispatch function. */
36 list_del(&client->list);
37 /* move to back of list */
38 list_add_tail(&client->list, &res->clients);
39 return tsk;
40 }
41 }
42 return NULL;
43}
44
45void common_drain_budget(
46 struct reservation *res,
47 lt_t how_much)
48{
49 if (how_much >= res->cur_budget)
50 res->cur_budget = 0;
51 else
52 res->cur_budget -= how_much;
53
54 res->budget_consumed += how_much;
55 res->budget_consumed_total += how_much;
56
57 switch (res->state) {
58 case RESERVATION_DEPLETED:
59 case RESERVATION_INACTIVE:
60 BUG();
61 break;
62
63 case RESERVATION_ACTIVE_IDLE:
64 case RESERVATION_ACTIVE:
65 if (!res->cur_budget) {
66 res->env->change_state(res->env, res,
67 RESERVATION_DEPLETED);
68 } /* else: stay in current state */
69 break;
70 }
71}
72
73static struct task_struct * task_client_dispatch(struct reservation_client *client)
74{
75 struct task_client *tc = container_of(client, struct task_client, client);
76 return tc->task;
77}
78
79void task_client_init(struct task_client *tc, struct task_struct *tsk,
80 struct reservation *res)
81{
82 memset(&tc->client, 0, sizeof(tc->client));
83 tc->client.dispatch = task_client_dispatch;
84 tc->client.reservation = res;
85 tc->task = tsk;
86}
87
88static void sup_scheduler_update_at(
89 struct sup_reservation_environment* sup_env,
90 lt_t when)
91{
92 if (sup_env->next_scheduler_update > when)
93 sup_env->next_scheduler_update = when;
94}
95
96static void sup_scheduler_update_after(
97 struct sup_reservation_environment* sup_env,
98 lt_t timeout)
99{
100 sup_scheduler_update_at(sup_env, sup_env->env.current_time + timeout);
101}
102
103static int _sup_queue_depleted(
104 struct sup_reservation_environment* sup_env,
105 struct reservation *res)
106{
107 struct list_head *pos;
108 struct reservation *queued;
109 int passed_earlier = 0;
110
111 BUG_ON(in_list(&res->replenish_list));
112
113 list_for_each(pos, &sup_env->depleted_reservations) {
114 queued = list_entry(pos, struct reservation, replenish_list);
115 if (queued->next_replenishment > res->next_replenishment) {
116 list_add(&res->replenish_list, pos->prev);
117 return passed_earlier;
118 } else
119 passed_earlier = 1;
120 }
121
122 list_add_tail(&res->replenish_list, &sup_env->depleted_reservations);
123
124 return passed_earlier;
125}
126
127static void sup_queue_depleted(
128 struct sup_reservation_environment* sup_env,
129 struct reservation *res)
130{
131 int passed_earlier = _sup_queue_depleted(sup_env, res);
132
133 /* check for updated replenishment time */
134 if (!passed_earlier)
135 sup_scheduler_update_at(sup_env, res->next_replenishment);
136}
137
138static int _sup_queue_active(
139 struct sup_reservation_environment* sup_env,
140 struct reservation *res)
141{
142 struct list_head *pos;
143 struct reservation *queued;
144 int passed_active = 0;
145
146 if (likely(res->priority != RESERVATION_BACKGROUND_PRIORITY)) {
147 /* enqueue in order of priority */
148 list_for_each(pos, &sup_env->active_reservations) {
149 queued = list_entry(pos, struct reservation, list);
150 if (queued->priority > res->priority) {
151 list_add(&res->list, pos->prev);
152 return passed_active;
153 } else if (queued->state == RESERVATION_ACTIVE)
154 passed_active = 1;
155 }
156 } else {
157 /* don't preempt unless the list happens to be empty */
158 passed_active = !list_empty(&sup_env->active_reservations);
159 }
160 /* Either a background reservation, or we fell off the end of the list.
161 * In both cases, just add the reservation to the end of the list of
162 * active reservations. */
163 list_add_tail(&res->list, &sup_env->active_reservations);
164 return passed_active;
165}
166
167static void sup_queue_active(
168 struct sup_reservation_environment* sup_env,
169 struct reservation *res)
170{
171 int passed_active = _sup_queue_active(sup_env, res);
172
173 /* check for possible preemption */
174 if (res->state == RESERVATION_ACTIVE && !passed_active)
175 sup_env->next_scheduler_update = SUP_RESCHEDULE_NOW;
176 else if (res == list_first_entry(&sup_env->active_reservations,
177 struct reservation, list)) {
178 /* First reservation is draining budget => make sure
179 * the scheduler is called to notice when the reservation
180 * budget has been drained completely. */
181 sup_scheduler_update_after(sup_env, res->cur_budget);
182 }
183}
184
185static void sup_queue_reservation(
186 struct sup_reservation_environment* sup_env,
187 struct reservation *res)
188{
189 switch (res->state) {
190 case RESERVATION_INACTIVE:
191 list_add(&res->list, &sup_env->inactive_reservations);
192 break;
193
194 case RESERVATION_DEPLETED:
195 sup_queue_depleted(sup_env, res);
196 break;
197
198 case RESERVATION_ACTIVE_IDLE:
199 case RESERVATION_ACTIVE:
200 sup_queue_active(sup_env, res);
201 break;
202 }
203}
204
205void sup_add_new_reservation(
206 struct sup_reservation_environment* sup_env,
207 struct reservation* new_res)
208{
209 new_res->env = &sup_env->env;
210 list_add(&new_res->all_list, &sup_env->all_reservations);
211 sup_queue_reservation(sup_env, new_res);
212}
213
214struct reservation* sup_find_by_id(struct sup_reservation_environment* sup_env,
215 unsigned int id)
216{
217 struct reservation *res;
218
219 list_for_each_entry(res, &sup_env->all_reservations, all_list) {
220 if (res->id == id)
221 return res;
222 }
223
224 return NULL;
225}
226
227static void sup_charge_budget(
228 struct sup_reservation_environment* sup_env,
229 lt_t delta)
230{
231 struct reservation *res;
232
233 /* charge the highest-priority ACTIVE or ACTIVE_IDLE reservation */
234
235 res = list_first_entry_or_null(
236 &sup_env->active_reservations, struct reservation, list);
237
238 if (res) {
239 TRACE("R%d: charging at %llu for %llu execution, budget before: %llu\n",
240 res->id, res->env->current_time, delta, res->cur_budget);
241 res->ops->drain_budget(res, delta);
242 TRACE("R%d: budget now: %llu, priority: %llu\n",
243 res->id, res->cur_budget, res->priority);
244 }
245
246 /* check when the next budget expires */
247
248 res = list_first_entry_or_null(
249 &sup_env->active_reservations, struct reservation, list);
250
251 if (res) {
252 /* make sure scheduler is invoked when this reservation expires
253 * its remaining budget */
254 TRACE("requesting scheduler update for reservation %u "
255 "in %llu nanoseconds\n",
256 res->id, res->cur_budget);
257 sup_scheduler_update_after(sup_env, res->cur_budget);
258 }
259}
260
261static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
262{
263 struct list_head *pos, *next;
264 struct reservation *res;
265
266 list_for_each_safe(pos, next, &sup_env->depleted_reservations) {
267 res = list_entry(pos, struct reservation, replenish_list);
268 if (res->next_replenishment <= sup_env->env.current_time) {
269 TRACE("R%d: replenishing budget at %llu, "
270 "priority: %llu\n",
271 res->id, res->env->current_time, res->priority);
272 res->ops->replenish(res);
273 } else {
274 /* list is ordered by increasing depletion times */
275 break;
276 }
277 }
278
279 /* request a scheduler update at the next replenishment instant */
280 res = list_first_entry_or_null(&sup_env->depleted_reservations,
281 struct reservation, replenish_list);
282 if (res)
283 sup_scheduler_update_at(sup_env, res->next_replenishment);
284}
285
286void sup_update_time(
287 struct sup_reservation_environment* sup_env,
288 lt_t now)
289{
290 lt_t delta;
291
292 /* If the time didn't advance, there is nothing to do.
293 * This check makes it safe to call sup_advance_time() potentially
294 * multiple times (e.g., via different code paths. */
295 if (!list_empty(&sup_env->active_reservations))
296 TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now,
297 sup_env->env.current_time);
298 if (unlikely(now <= sup_env->env.current_time))
299 return;
300
301 delta = now - sup_env->env.current_time;
302 sup_env->env.current_time = now;
303
304 /* check if future updates are required */
305 if (sup_env->next_scheduler_update <= sup_env->env.current_time)
306 sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
307
308 /* deplete budgets by passage of time */
309 sup_charge_budget(sup_env, delta);
310
311 /* check if any budgets were replenished */
312 sup_replenish_budgets(sup_env);
313}
314
315struct task_struct* sup_dispatch(struct sup_reservation_environment* sup_env)
316{
317 struct reservation *res, *next;
318 struct task_struct *tsk = NULL;
319 lt_t time_slice;
320
321 list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
322 if (res->state == RESERVATION_ACTIVE) {
323 tsk = res->ops->dispatch_client(res, &time_slice);
324 if (likely(tsk)) {
325 if (time_slice)
326 sup_scheduler_update_after(sup_env, time_slice);
327 sup_scheduler_update_after(sup_env, res->cur_budget);
328 return tsk;
329 }
330 }
331 }
332
333 return NULL;
334}
335
336static void sup_res_change_state(
337 struct reservation_environment* env,
338 struct reservation *res,
339 reservation_state_t new_state)
340{
341 struct sup_reservation_environment* sup_env;
342
343 sup_env = container_of(env, struct sup_reservation_environment, env);
344
345 TRACE("reservation R%d state %d->%d at %llu\n",
346 res->id, res->state, new_state, env->current_time);
347
348 if (new_state == RESERVATION_DEPLETED
349 && (res->state == RESERVATION_ACTIVE ||
350 res->state == RESERVATION_ACTIVE_IDLE)) {
351 budget_notifiers_fire(&res->budget_notifiers, false);
352 } else if (res->state == RESERVATION_DEPLETED
353 && new_state == RESERVATION_ACTIVE) {
354 budget_notifiers_fire(&res->budget_notifiers, true);
355 }
356
357 /* dequeue prior to re-queuing */
358 if (res->state == RESERVATION_DEPLETED)
359 list_del(&res->replenish_list);
360 else
361 list_del(&res->list);
362
363 /* check if we need to reschedule because we lost an active reservation */
364 if (res->state == RESERVATION_ACTIVE && !sup_env->will_schedule)
365 sup_env->next_scheduler_update = SUP_RESCHEDULE_NOW;
366 res->state = new_state;
367 sup_queue_reservation(sup_env, res);
368}
369
370static void sup_request_replenishment(
371 struct reservation_environment* env,
372 struct reservation *res)
373{
374 struct sup_reservation_environment* sup_env;
375
376 sup_env = container_of(env, struct sup_reservation_environment, env);
377 sup_queue_depleted(sup_env, res);
378}
379
380void sup_init(struct sup_reservation_environment* sup_env)
381{
382 memset(sup_env, 0, sizeof(*sup_env));
383
384 INIT_LIST_HEAD(&sup_env->all_reservations);
385 INIT_LIST_HEAD(&sup_env->active_reservations);
386 INIT_LIST_HEAD(&sup_env->depleted_reservations);
387 INIT_LIST_HEAD(&sup_env->inactive_reservations);
388
389 sup_env->env.change_state = sup_res_change_state;
390 sup_env->env.request_replenishment = sup_request_replenishment;
391
392 sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
393}
diff --git a/litmus/reservations/polling.c b/litmus/reservations/polling.c
new file mode 100644
index 000000000000..63e0bed566e8
--- /dev/null
+++ b/litmus/reservations/polling.c
@@ -0,0 +1,256 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/reservations/reservation.h>
5#include <litmus/reservations/polling.h>
6
7
8static void periodic_polling_client_arrives(
9 struct reservation* res,
10 struct reservation_client *client
11)
12{
13 struct polling_reservation *pres =
14 container_of(res, struct polling_reservation, res);
15 lt_t instances, tmp;
16
17 list_add_tail(&client->list, &res->clients);
18
19 switch (res->state) {
20 case RESERVATION_INACTIVE:
21 /* Figure out next replenishment time. */
22 tmp = res->env->current_time - res->env->time_zero;
23 instances = div64_u64(tmp, pres->period);
24 res->next_replenishment =
25 (instances + 1) * pres->period + pres->offset;
26
27 TRACE("pol-res: activate tmp=%llu instances=%llu period=%llu nextrp=%llu cur=%llu\n",
28 tmp, instances, pres->period, res->next_replenishment,
29 res->env->current_time);
30
31 res->env->change_state(res->env, res,
32 RESERVATION_DEPLETED);
33 break;
34
35 case RESERVATION_ACTIVE:
36 case RESERVATION_DEPLETED:
37 /* do nothing */
38 break;
39
40 case RESERVATION_ACTIVE_IDLE:
41 res->env->change_state(res->env, res,
42 RESERVATION_ACTIVE);
43 break;
44 }
45}
46
47
48static void periodic_polling_client_departs(
49 struct reservation *res,
50 struct reservation_client *client,
51 int did_signal_job_completion
52)
53{
54 list_del(&client->list);
55
56 switch (res->state) {
57 case RESERVATION_INACTIVE:
58 case RESERVATION_ACTIVE_IDLE:
59 BUG(); /* INACTIVE or IDLE <=> no client */
60 break;
61
62 case RESERVATION_ACTIVE:
63 if (list_empty(&res->clients)) {
64 res->env->change_state(res->env, res,
65 RESERVATION_ACTIVE_IDLE);
66 } /* else: nothing to do, more clients ready */
67 break;
68
69 case RESERVATION_DEPLETED:
70 /* do nothing */
71 break;
72 }
73}
74
75static void periodic_polling_on_replenishment(
76 struct reservation *res
77)
78{
79 struct polling_reservation *pres =
80 container_of(res, struct polling_reservation, res);
81
82 /* replenish budget */
83 res->cur_budget = pres->max_budget;
84 res->next_replenishment += pres->period;
85 res->budget_consumed = 0;
86
87 switch (res->state) {
88 case RESERVATION_DEPLETED:
89 case RESERVATION_INACTIVE:
90 case RESERVATION_ACTIVE_IDLE:
91 if (list_empty(&res->clients))
92 /* no clients => poll again later */
93 res->env->change_state(res->env, res,
94 RESERVATION_INACTIVE);
95 else
96 /* we have clients & budget => ACTIVE */
97 res->env->change_state(res->env, res,
98 RESERVATION_ACTIVE);
99 break;
100
101 case RESERVATION_ACTIVE:
102 /* Replenished while active => tardy? In any case,
103 * go ahead and stay active. */
104 break;
105 }
106}
107
108static void periodic_polling_on_replenishment_edf(
109 struct reservation *res
110)
111{
112 struct polling_reservation *pres =
113 container_of(res, struct polling_reservation, res);
114
115 /* update current priority */
116 res->priority = res->next_replenishment + pres->deadline;
117
118 /* do common updates */
119 periodic_polling_on_replenishment(res);
120}
121
122static struct reservation_ops periodic_polling_ops_fp = {
123 .dispatch_client = default_dispatch_client,
124 .client_arrives = periodic_polling_client_arrives,
125 .client_departs = periodic_polling_client_departs,
126 .replenish = periodic_polling_on_replenishment,
127 .drain_budget = common_drain_budget,
128};
129
130static struct reservation_ops periodic_polling_ops_edf = {
131 .dispatch_client = default_dispatch_client,
132 .client_arrives = periodic_polling_client_arrives,
133 .client_departs = periodic_polling_client_departs,
134 .replenish = periodic_polling_on_replenishment_edf,
135 .drain_budget = common_drain_budget,
136};
137
138
139
140
141static void sporadic_polling_client_arrives_fp(
142 struct reservation* res,
143 struct reservation_client *client
144)
145{
146 struct polling_reservation *pres =
147 container_of(res, struct polling_reservation, res);
148
149 list_add_tail(&client->list, &res->clients);
150
151 switch (res->state) {
152 case RESERVATION_INACTIVE:
153 /* Replenish now. */
154 res->cur_budget = pres->max_budget;
155 res->next_replenishment =
156 res->env->current_time + pres->period;
157
158 res->env->change_state(res->env, res,
159 RESERVATION_ACTIVE);
160 break;
161
162 case RESERVATION_ACTIVE:
163 case RESERVATION_DEPLETED:
164 /* do nothing */
165 break;
166
167 case RESERVATION_ACTIVE_IDLE:
168 res->env->change_state(res->env, res,
169 RESERVATION_ACTIVE);
170 break;
171 }
172}
173
174static void sporadic_polling_client_arrives_edf(
175 struct reservation* res,
176 struct reservation_client *client
177)
178{
179 struct polling_reservation *pres =
180 container_of(res, struct polling_reservation, res);
181
182 list_add_tail(&client->list, &res->clients);
183
184 switch (res->state) {
185 case RESERVATION_INACTIVE:
186 /* Replenish now. */
187 res->cur_budget = pres->max_budget;
188 res->next_replenishment =
189 res->env->current_time + pres->period;
190 res->priority =
191 res->env->current_time + pres->deadline;
192
193 res->env->change_state(res->env, res,
194 RESERVATION_ACTIVE);
195 break;
196
197 case RESERVATION_ACTIVE:
198 case RESERVATION_DEPLETED:
199 /* do nothing */
200 break;
201
202 case RESERVATION_ACTIVE_IDLE:
203 res->env->change_state(res->env, res,
204 RESERVATION_ACTIVE);
205 break;
206 }
207}
208
209static struct reservation_ops sporadic_polling_ops_fp = {
210 .dispatch_client = default_dispatch_client,
211 .client_arrives = sporadic_polling_client_arrives_fp,
212 .client_departs = periodic_polling_client_departs,
213 .replenish = periodic_polling_on_replenishment,
214 .drain_budget = common_drain_budget,
215};
216
217static struct reservation_ops sporadic_polling_ops_edf = {
218 .dispatch_client = default_dispatch_client,
219 .client_arrives = sporadic_polling_client_arrives_edf,
220 .client_departs = periodic_polling_client_departs,
221 .replenish = periodic_polling_on_replenishment_edf,
222 .drain_budget = common_drain_budget,
223};
224
225void polling_reservation_init(
226 struct polling_reservation *pres,
227 int use_edf_prio,
228 int use_periodic_polling,
229 lt_t budget, lt_t period, lt_t deadline, lt_t offset
230)
231{
232 if (!deadline)
233 deadline = period;
234 BUG_ON(budget > period);
235 BUG_ON(budget > deadline);
236 BUG_ON(offset >= period);
237
238 reservation_init(&pres->res);
239 pres->max_budget = budget;
240 pres->period = period;
241 pres->deadline = deadline;
242 pres->offset = offset;
243 if (use_periodic_polling) {
244 pres->res.kind = PERIODIC_POLLING;
245 if (use_edf_prio)
246 pres->res.ops = &periodic_polling_ops_edf;
247 else
248 pres->res.ops = &periodic_polling_ops_fp;
249 } else {
250 pres->res.kind = SPORADIC_POLLING;
251 if (use_edf_prio)
252 pres->res.ops = &sporadic_polling_ops_edf;
253 else
254 pres->res.ops = &sporadic_polling_ops_fp;
255 }
256}
diff --git a/litmus/reservations/table-driven.c b/litmus/reservations/table-driven.c
new file mode 100644
index 000000000000..e4debcb5d4d2
--- /dev/null
+++ b/litmus/reservations/table-driven.c
@@ -0,0 +1,269 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/reservations/reservation.h>
5#include <litmus/reservations/table-driven.h>
6
7static lt_t td_cur_major_cycle_start(struct table_driven_reservation *tdres)
8{
9 lt_t x, tmp;
10
11 tmp = tdres->res.env->current_time - tdres->res.env->time_zero;
12 x = div64_u64(tmp, tdres->major_cycle);
13 x *= tdres->major_cycle;
14 return x;
15}
16
17
18static lt_t td_next_major_cycle_start(struct table_driven_reservation *tdres)
19{
20 lt_t x, tmp;
21
22 tmp = tdres->res.env->current_time - tdres->res.env->time_zero;
23 x = div64_u64(tmp, tdres->major_cycle) + 1;
24 x *= tdres->major_cycle;
25 return x;
26}
27
28static void td_client_arrives(
29 struct reservation* res,
30 struct reservation_client *client
31)
32{
33 struct table_driven_reservation *tdres =
34 container_of(res, struct table_driven_reservation, res);
35
36 list_add_tail(&client->list, &res->clients);
37
38 switch (res->state) {
39 case RESERVATION_INACTIVE:
40 /* Figure out first replenishment time. */
41 tdres->major_cycle_start = td_next_major_cycle_start(tdres);
42 res->next_replenishment = tdres->major_cycle_start;
43 res->next_replenishment += tdres->intervals[0].start;
44 tdres->next_interval = 0;
45
46 res->env->change_state(res->env, res,
47 RESERVATION_DEPLETED);
48 break;
49
50 case RESERVATION_ACTIVE:
51 case RESERVATION_DEPLETED:
52 /* do nothing */
53 break;
54
55 case RESERVATION_ACTIVE_IDLE:
56 res->env->change_state(res->env, res,
57 RESERVATION_ACTIVE);
58 break;
59 }
60}
61
62static void td_client_departs(
63 struct reservation *res,
64 struct reservation_client *client,
65 int did_signal_job_completion
66)
67{
68 list_del(&client->list);
69
70 switch (res->state) {
71 case RESERVATION_INACTIVE:
72 case RESERVATION_ACTIVE_IDLE:
73 BUG(); /* INACTIVE or IDLE <=> no client */
74 break;
75
76 case RESERVATION_ACTIVE:
77 if (list_empty(&res->clients)) {
78 res->env->change_state(res->env, res,
79 RESERVATION_ACTIVE_IDLE);
80 } /* else: nothing to do, more clients ready */
81 break;
82
83 case RESERVATION_DEPLETED:
84 /* do nothing */
85 break;
86 }
87}
88
89static lt_t td_time_remaining_until_end(struct table_driven_reservation *tdres)
90{
91 lt_t now = tdres->res.env->current_time;
92 lt_t end = tdres->cur_interval.end;
93 TRACE("td_remaining(%u): start=%llu now=%llu end=%llu state=%d\n",
94 tdres->res.id,
95 tdres->cur_interval.start,
96 now, end,
97 tdres->res.state);
98 if (now >= end)
99 return 0;
100 else
101 return end - now;
102}
103
104static void td_replenish(
105 struct reservation *res)
106{
107 struct table_driven_reservation *tdres =
108 container_of(res, struct table_driven_reservation, res);
109
110 TRACE("td_replenish(%u): expected_replenishment=%llu\n", res->id,
111 res->next_replenishment);
112
113 /* figure out current interval */
114 tdres->cur_interval.start = tdres->major_cycle_start +
115 tdres->intervals[tdres->next_interval].start;
116 tdres->cur_interval.end = tdres->major_cycle_start +
117 tdres->intervals[tdres->next_interval].end;
118 TRACE("major_cycle_start=%llu => [%llu, %llu]\n",
119 tdres->major_cycle_start,
120 tdres->cur_interval.start,
121 tdres->cur_interval.end);
122
123 /* reset budget */
124 res->cur_budget = td_time_remaining_until_end(tdres);
125 res->budget_consumed = 0;
126 TRACE("td_replenish(%u): %s budget=%llu\n", res->id,
127 res->cur_budget ? "" : "WARNING", res->cur_budget);
128
129 /* prepare next slot */
130 tdres->next_interval = (tdres->next_interval + 1) % tdres->num_intervals;
131 if (!tdres->next_interval)
132 /* wrap to next major cycle */
133 tdres->major_cycle_start += tdres->major_cycle;
134
135 /* determine next time this reservation becomes eligible to execute */
136 res->next_replenishment = tdres->major_cycle_start;
137 res->next_replenishment += tdres->intervals[tdres->next_interval].start;
138 TRACE("td_replenish(%u): next_replenishment=%llu\n", res->id,
139 res->next_replenishment);
140
141
142 switch (res->state) {
143 case RESERVATION_DEPLETED:
144 case RESERVATION_ACTIVE:
145 case RESERVATION_ACTIVE_IDLE:
146 if (list_empty(&res->clients))
147 res->env->change_state(res->env, res,
148 RESERVATION_ACTIVE_IDLE);
149 else
150 /* we have clients & budget => ACTIVE */
151 res->env->change_state(res->env, res,
152 RESERVATION_ACTIVE);
153 break;
154
155 case RESERVATION_INACTIVE:
156 BUG();
157 break;
158 }
159}
160
161static void td_drain_budget(
162 struct reservation *res,
163 lt_t how_much)
164{
165 struct table_driven_reservation *tdres =
166 container_of(res, struct table_driven_reservation, res);
167
168 res->budget_consumed += how_much;
169 res->budget_consumed_total += how_much;
170
171 /* Table-driven scheduling: instead of tracking the budget, we compute
172 * how much time is left in this allocation interval. */
173
174 /* sanity check: we should never try to drain from future slots */
175 BUG_ON(tdres->cur_interval.start > res->env->current_time);
176
177 switch (res->state) {
178 case RESERVATION_DEPLETED:
179 case RESERVATION_INACTIVE:
180 BUG();
181 break;
182
183 case RESERVATION_ACTIVE_IDLE:
184 case RESERVATION_ACTIVE:
185 res->cur_budget = td_time_remaining_until_end(tdres);
186 TRACE("td_drain_budget(%u): drained to budget=%llu\n",
187 res->id, res->cur_budget);
188 if (!res->cur_budget) {
189 res->env->change_state(res->env, res,
190 RESERVATION_DEPLETED);
191 } else {
192 /* sanity check budget calculation */
193 BUG_ON(res->env->current_time >= tdres->cur_interval.end);
194 BUG_ON(res->env->current_time < tdres->cur_interval.start);
195 }
196
197 break;
198 }
199}
200
201static struct task_struct* td_dispatch_client(
202 struct reservation *res,
203 lt_t *for_at_most)
204{
205 struct task_struct *t;
206 struct table_driven_reservation *tdres =
207 container_of(res, struct table_driven_reservation, res);
208
209 /* usual logic for selecting a client */
210 t = default_dispatch_client(res, for_at_most);
211
212 TRACE_TASK(t, "td_dispatch_client(%u): selected, budget=%llu\n",
213 res->id, res->cur_budget);
214
215 /* check how much budget we have left in this time slot */
216 res->cur_budget = td_time_remaining_until_end(tdres);
217
218 TRACE_TASK(t, "td_dispatch_client(%u): updated to budget=%llu next=%d\n",
219 res->id, res->cur_budget, tdres->next_interval);
220
221 if (unlikely(!res->cur_budget)) {
222 /* Unlikely case: if we ran out of budget, the user configured
223 * a broken scheduling table (overlapping table slots).
224 * Not much we can do about this, but we can't dispatch a job
225 * now without causing overload. So let's register this reservation
226 * as depleted and wait for the next allocation. */
227 TRACE("td_dispatch_client(%u): budget unexpectedly depleted "
228 "(check scheduling table for unintended overlap)\n",
229 res->id);
230 res->env->change_state(res->env, res,
231 RESERVATION_DEPLETED);
232 return NULL;
233 } else
234 return t;
235}
236
237static struct reservation_ops td_ops = {
238 .dispatch_client = td_dispatch_client,
239 .client_arrives = td_client_arrives,
240 .client_departs = td_client_departs,
241 .replenish = td_replenish,
242 .drain_budget = td_drain_budget,
243};
244
245void table_driven_reservation_init(
246 struct table_driven_reservation *tdres,
247 lt_t major_cycle,
248 struct lt_interval *intervals,
249 unsigned int num_intervals)
250{
251 unsigned int i;
252
253 /* sanity checking */
254 BUG_ON(!num_intervals);
255 for (i = 0; i < num_intervals; i++)
256 BUG_ON(intervals[i].end <= intervals[i].start);
257 for (i = 0; i + 1 < num_intervals; i++)
258 BUG_ON(intervals[i + 1].start <= intervals[i].end);
259 BUG_ON(intervals[num_intervals - 1].end > major_cycle);
260
261 reservation_init(&tdres->res);
262 tdres->res.kind = TABLE_DRIVEN;
263 tdres->major_cycle = major_cycle;
264 tdres->intervals = intervals;
265 tdres->cur_interval.start = 0;
266 tdres->cur_interval.end = 0;
267 tdres->num_intervals = num_intervals;
268 tdres->res.ops = &td_ops;
269}
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 000000000000..733a483e3084
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,351 @@
1/*
2 * litmus/rt_domain.c
3 *
4 * LITMUS real-time infrastructure. This file contains the
5 * functions that manipulate RT domains. RT domains are an abstraction
6 * of a ready queue and a release queue.
7 */
8
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/list.h>
12#include <linux/slab.h>
13
14#include <litmus/litmus.h>
15#include <litmus/sched_plugin.h>
16#include <litmus/sched_trace.h>
17#include <litmus/debug_trace.h>
18
19#include <litmus/rt_domain.h>
20
21#include <litmus/trace.h>
22
23#include <litmus/bheap.h>
24
25/* Uncomment when debugging timer races... */
26#if 0
27#define VTRACE_TASK TRACE_TASK
28#define VTRACE TRACE
29#else
30#define VTRACE_TASK(t, fmt, args...) /* shut up */
31#define VTRACE(fmt, args...) /* be quiet already */
32#endif
33
34static int dummy_resched(rt_domain_t *rt)
35{
36 return 0;
37}
38
39static int dummy_order(struct bheap_node* a, struct bheap_node* b)
40{
41 return 0;
42}
43
44/* default implementation: use default lock */
45static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
46{
47 merge_ready(rt, tasks);
48}
49
50static unsigned int time2slot(lt_t time)
51{
52 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
53}
54
55static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
56{
57 unsigned long flags;
58 struct release_heap* rh;
59 rh = container_of(timer, struct release_heap, timer);
60
61 TS_RELEASE_LATENCY(rh->release_time);
62
63 VTRACE("on_release_timer(0x%p) starts.\n", timer);
64
65 TS_RELEASE_START;
66
67
68 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
69 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
70 /* remove from release queue */
71 list_del(&rh->list);
72 raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
73 VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
74
75 /* call release callback */
76 rh->dom->release_jobs(rh->dom, &rh->heap);
77 /* WARNING: rh can be referenced from other CPUs from now on. */
78
79 TS_RELEASE_END;
80
81 VTRACE("on_release_timer(0x%p) ends.\n", timer);
82
83 return HRTIMER_NORESTART;
84}
85
86/* allocated in litmus.c */
87struct kmem_cache * release_heap_cache;
88
89struct release_heap* release_heap_alloc(int gfp_flags)
90{
91 struct release_heap* rh;
92 rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
93 if (rh) {
94 /* initialize timer */
95 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
96 rh->timer.function = on_release_timer;
97 }
98 return rh;
99}
100
101void release_heap_free(struct release_heap* rh)
102{
103 /* make sure timer is no longer in use */
104 hrtimer_cancel(&rh->timer);
105 kmem_cache_free(release_heap_cache, rh);
106}
107
108/* Caller must hold release lock.
109 * Will return heap for given time. If no such heap exists prior to
110 * the invocation it will be created.
111 */
112static struct release_heap* get_release_heap(rt_domain_t *rt,
113 struct task_struct* t,
114 int use_task_heap)
115{
116 struct list_head* pos;
117 struct release_heap* heap = NULL;
118 struct release_heap* rh;
119 lt_t release_time = get_release(t);
120 unsigned int slot = time2slot(release_time);
121
122 /* initialize pos for the case that the list is empty */
123 pos = rt->release_queue.slot[slot].next;
124 list_for_each(pos, &rt->release_queue.slot[slot]) {
125 rh = list_entry(pos, struct release_heap, list);
126 if (release_time == rh->release_time) {
127 /* perfect match -- this happens on hyperperiod
128 * boundaries
129 */
130 heap = rh;
131 break;
132 } else if (lt_before(release_time, rh->release_time)) {
133 /* we need to insert a new node since rh is
134 * already in the future
135 */
136 break;
137 }
138 }
139 if (!heap && use_task_heap) {
140 /* use pre-allocated release heap */
141 rh = tsk_rt(t)->rel_heap;
142
143 rh->dom = rt;
144 rh->release_time = release_time;
145
146 /* add to release queue */
147 list_add(&rh->list, pos->prev);
148 heap = rh;
149 }
150 return heap;
151}
152
153static void reinit_release_heap(struct task_struct* t)
154{
155 struct release_heap* rh;
156
157 /* use pre-allocated release heap */
158 rh = tsk_rt(t)->rel_heap;
159
160 /* Make sure it is safe to use. The timer callback could still
161 * be executing on another CPU; hrtimer_cancel() will wait
162 * until the timer callback has completed. However, under no
163 * circumstances should the timer be active (= yet to be
164 * triggered).
165 *
166 * WARNING: If the CPU still holds the release_lock at this point,
167 * deadlock may occur!
168 */
169 BUG_ON(hrtimer_cancel(&rh->timer));
170
171 /* initialize */
172 bheap_init(&rh->heap);
173}
174/* arm_release_timer() - start local release timer or trigger
175 * remote timer (pull timer)
176 *
177 * Called by add_release() with:
178 * - tobe_lock taken
179 * - IRQ disabled
180 */
181#ifdef CONFIG_RELEASE_MASTER
182#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
183static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
184#else
185static void arm_release_timer(rt_domain_t *_rt)
186#endif
187{
188 rt_domain_t *rt = _rt;
189 struct list_head list;
190 struct list_head *pos, *safe;
191 struct task_struct* t;
192 struct release_heap* rh;
193
194 VTRACE("arm_release_timer() at %llu\n", litmus_clock());
195 list_replace_init(&rt->tobe_released, &list);
196
197 list_for_each_safe(pos, safe, &list) {
198 /* pick task of work list */
199 t = list_entry(pos, struct task_struct, rt_param.list);
200 sched_trace_task_release(t);
201 list_del(pos);
202
203 /* put into release heap while holding release_lock */
204 raw_spin_lock(&rt->release_lock);
205 VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
206
207 rh = get_release_heap(rt, t, 0);
208 if (!rh) {
209 /* need to use our own, but drop lock first */
210 raw_spin_unlock(&rt->release_lock);
211 VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
212 &rt->release_lock);
213
214 reinit_release_heap(t);
215 VTRACE_TASK(t, "release_heap ready\n");
216
217 raw_spin_lock(&rt->release_lock);
218 VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
219 &rt->release_lock);
220
221 rh = get_release_heap(rt, t, 1);
222 }
223 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
224 VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
225
226 raw_spin_unlock(&rt->release_lock);
227 VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
228
229 /* To avoid arming the timer multiple times, we only let the
230 * owner do the arming (which is the "first" task to reference
231 * this release_heap anyway).
232 */
233 if (rh == tsk_rt(t)->rel_heap) {
234 VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
235
236 if (!hrtimer_is_hres_active(&rh->timer)) {
237 TRACE_TASK(t, "WARNING: no hires timer!!!\n");
238 }
239
240 /* we cannot arm the timer using hrtimer_start()
241 * as it may deadlock on rq->lock
242 *
243 * PINNED mode is ok on both local and remote CPU
244 */
245#ifdef CONFIG_RELEASE_MASTER
246 if (rt->release_master == NO_CPU &&
247 target_cpu == NO_CPU)
248#endif
249 hrtimer_start(&rh->timer,
250 ns_to_ktime(rh->release_time),
251 HRTIMER_MODE_ABS_PINNED);
252#ifdef CONFIG_RELEASE_MASTER
253 else
254 hrtimer_start_on(
255 /* target_cpu overrides release master */
256 (target_cpu != NO_CPU ?
257 target_cpu : rt->release_master),
258 &rh->info, &rh->timer,
259 ns_to_ktime(rh->release_time),
260 HRTIMER_MODE_ABS_PINNED);
261#endif
262 } else
263 VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
264 }
265}
266
267void rt_domain_init(rt_domain_t *rt,
268 bheap_prio_t order,
269 check_resched_needed_t check,
270 release_jobs_t release
271 )
272{
273 int i;
274
275 BUG_ON(!rt);
276 if (!check)
277 check = dummy_resched;
278 if (!release)
279 release = default_release_jobs;
280 if (!order)
281 order = dummy_order;
282
283#ifdef CONFIG_RELEASE_MASTER
284 rt->release_master = NO_CPU;
285#endif
286
287 bheap_init(&rt->ready_queue);
288 INIT_LIST_HEAD(&rt->tobe_released);
289 for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
290 INIT_LIST_HEAD(&rt->release_queue.slot[i]);
291
292 raw_spin_lock_init(&rt->ready_lock);
293 raw_spin_lock_init(&rt->release_lock);
294 raw_spin_lock_init(&rt->tobe_lock);
295
296 rt->check_resched = check;
297 rt->release_jobs = release;
298 rt->order = order;
299}
300
301/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
302 * @new: the newly released task
303 */
304void __add_ready(rt_domain_t* rt, struct task_struct *new)
305{
306 TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
307 "to ready queue at %llu\n",
308 new->comm, new->pid,
309 get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
310 get_release(new), litmus_clock());
311
312 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
313
314 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
315 rt->check_resched(rt);
316}
317
318/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
319 * @tasks - the newly released tasks
320 */
321void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
322{
323 bheap_union(rt->order, &rt->ready_queue, tasks);
324 rt->check_resched(rt);
325}
326
327
328#ifdef CONFIG_RELEASE_MASTER
329void __add_release_on(rt_domain_t* rt, struct task_struct *task,
330 int target_cpu)
331{
332 TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
333 get_release(task), target_cpu);
334 list_add(&tsk_rt(task)->list, &rt->tobe_released);
335 task->rt_param.domain = rt;
336
337 arm_release_timer_on(rt, target_cpu);
338}
339#endif
340
341/* add_release - add a real-time task to the rt release queue.
342 * @task: the sleeping task
343 */
344void __add_release(rt_domain_t* rt, struct task_struct *task)
345{
346 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
347 list_add(&tsk_rt(task)->list, &rt->tobe_released);
348 task->rt_param.domain = rt;
349
350 arm_release_timer(rt);
351}
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
new file mode 100644
index 000000000000..d12a5958f5dc
--- /dev/null
+++ b/litmus/sched_cedf.c
@@ -0,0 +1,890 @@
1/*
2 * litmus/sched_cedf.c
3 *
4 * Implementation of the C-EDF scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32
33#include <linux/module.h>
34
35#include <litmus/debug_trace.h>
36#include <litmus/litmus.h>
37#include <litmus/jobs.h>
38#include <litmus/preempt.h>
39#include <litmus/budget.h>
40#include <litmus/np.h>
41#include <litmus/sched_plugin.h>
42#include <litmus/edf_common.h>
43#include <litmus/sched_trace.h>
44
45#include <litmus/clustered.h>
46
47#include <litmus/bheap.h>
48
49#ifdef CONFIG_SCHED_CPU_AFFINITY
50#include <litmus/affinity.h>
51#endif
52
53/* to configure the cluster size */
54#include <litmus/litmus_proc.h>
55#include <linux/uaccess.h>
56
57/* Reference configuration variable. Determines which cache level is used to
58 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
59 * all CPUs form a single cluster (just like GSN-EDF).
60 */
61static enum cache_level cluster_config = GLOBAL_CLUSTER;
62
63struct clusterdomain;
64
65/* cpu_entry_t - maintain the linked and scheduled state
66 *
67 * A cpu also contains a pointer to the cedf_domain_t cluster
68 * that owns it (struct clusterdomain*)
69 */
70typedef struct {
71 int cpu;
72 struct clusterdomain* cluster; /* owning cluster */
73 struct task_struct* linked; /* only RT tasks */
74 struct task_struct* scheduled; /* only RT tasks */
75 atomic_t will_schedule; /* prevent unneeded IPIs */
76 struct bheap_node* hn;
77} cpu_entry_t;
78
79/* one cpu_entry_t per CPU */
80DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
81
82/*
83 * In C-EDF there is a cedf domain _per_ cluster
84 * The number of clusters is dynamically determined accordingly to the
85 * total cpu number and the cluster size
86 */
87typedef struct clusterdomain {
88 /* rt_domain for this cluster */
89 rt_domain_t domain;
90 /* cpus in this cluster */
91 cpu_entry_t* *cpus;
92 /* map of this cluster cpus */
93 cpumask_var_t cpu_map;
94 /* the cpus queue themselves according to priority in here */
95 struct bheap_node *heap_node;
96 struct bheap cpu_heap;
97 /* lock for this cluster */
98#define cluster_lock domain.ready_lock
99} cedf_domain_t;
100
101/* a cedf_domain per cluster; allocation is done at init/activation time */
102cedf_domain_t *cedf;
103
104#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
105#define task_cpu_cluster(task) remote_cluster(get_partition(task))
106
107/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
108 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
109 * information during the initialization of the plugin (e.g., topology)
110#define WANT_ALL_SCHED_EVENTS
111 */
112#define VERBOSE_INIT
113
114static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
115{
116 cpu_entry_t *a, *b;
117 a = _a->value;
118 b = _b->value;
119 /* Note that a and b are inverted: we want the lowest-priority CPU at
120 * the top of the heap.
121 */
122 return edf_higher_prio(b->linked, a->linked);
123}
124
125/* update_cpu_position - Move the cpu entry to the correct place to maintain
126 * order in the cpu queue. Caller must hold cedf lock.
127 */
128static void update_cpu_position(cpu_entry_t *entry)
129{
130 cedf_domain_t *cluster = entry->cluster;
131
132 if (likely(bheap_node_in_heap(entry->hn)))
133 bheap_delete(cpu_lower_prio,
134 &cluster->cpu_heap,
135 entry->hn);
136
137 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
138}
139
140/* caller must hold cedf lock */
141static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
142{
143 struct bheap_node* hn;
144 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
145 return hn->value;
146}
147
148
149/* link_task_to_cpu - Update the link of a CPU.
150 * Handles the case where the to-be-linked task is already
151 * scheduled on a different CPU.
152 */
153static noinline void link_task_to_cpu(struct task_struct* linked,
154 cpu_entry_t *entry)
155{
156 cpu_entry_t *sched;
157 struct task_struct* tmp;
158 int on_cpu;
159
160 BUG_ON(linked && !is_realtime(linked));
161
162 /* Currently linked task is set to be unlinked. */
163 if (entry->linked) {
164 entry->linked->rt_param.linked_on = NO_CPU;
165 }
166
167 /* Link new task to CPU. */
168 if (linked) {
169 /* handle task is already scheduled somewhere! */
170 on_cpu = linked->rt_param.scheduled_on;
171 if (on_cpu != NO_CPU) {
172 sched = &per_cpu(cedf_cpu_entries, on_cpu);
173 /* this should only happen if not linked already */
174 BUG_ON(sched->linked == linked);
175
176 /* If we are already scheduled on the CPU to which we
177 * wanted to link, we don't need to do the swap --
178 * we just link ourselves to the CPU and depend on
179 * the caller to get things right.
180 */
181 if (entry != sched) {
182 TRACE_TASK(linked,
183 "already scheduled on %d, updating link.\n",
184 sched->cpu);
185 tmp = sched->linked;
186 linked->rt_param.linked_on = sched->cpu;
187 sched->linked = linked;
188 update_cpu_position(sched);
189 linked = tmp;
190 }
191 }
192 if (linked) /* might be NULL due to swap */
193 linked->rt_param.linked_on = entry->cpu;
194 }
195 entry->linked = linked;
196#ifdef WANT_ALL_SCHED_EVENTS
197 if (linked)
198 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
199 else
200 TRACE("NULL linked to %d.\n", entry->cpu);
201#endif
202 update_cpu_position(entry);
203}
204
205/* unlink - Make sure a task is not linked any longer to an entry
206 * where it was linked before. Must hold cedf_lock.
207 */
208static noinline void unlink(struct task_struct* t)
209{
210 cpu_entry_t *entry;
211
212 if (t->rt_param.linked_on != NO_CPU) {
213 /* unlink */
214 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
215 t->rt_param.linked_on = NO_CPU;
216 link_task_to_cpu(NULL, entry);
217 } else if (is_queued(t)) {
218 /* This is an interesting situation: t is scheduled,
219 * but was just recently unlinked. It cannot be
220 * linked anywhere else (because then it would have
221 * been relinked to this CPU), thus it must be in some
222 * queue. We must remove it from the list in this
223 * case.
224 *
225 * in C-EDF case is should be somewhere in the queue for
226 * its domain, therefore and we can get the domain using
227 * task_cpu_cluster
228 */
229 remove(&(task_cpu_cluster(t))->domain, t);
230 }
231}
232
233
234/* preempt - force a CPU to reschedule
235 */
236static void preempt(cpu_entry_t *entry)
237{
238 preempt_if_preemptable(entry->scheduled, entry->cpu);
239}
240
241/* requeue - Put an unlinked task into gsn-edf domain.
242 * Caller must hold cedf_lock.
243 */
244static noinline void requeue(struct task_struct* task)
245{
246 cedf_domain_t *cluster = task_cpu_cluster(task);
247 BUG_ON(!task);
248 /* sanity check before insertion */
249 BUG_ON(is_queued(task));
250
251 if (is_early_releasing(task) || is_released(task, litmus_clock()))
252 __add_ready(&cluster->domain, task);
253 else {
254 /* it has got to wait */
255 add_release(&cluster->domain, task);
256 }
257}
258
259#ifdef CONFIG_SCHED_CPU_AFFINITY
260static cpu_entry_t* cedf_get_nearest_available_cpu(
261 cedf_domain_t *cluster, cpu_entry_t *start)
262{
263 cpu_entry_t *affinity;
264
265 get_nearest_available_cpu(affinity, start, cedf_cpu_entries,
266#ifdef CONFIG_RELEASE_MASTER
267 cluster->domain.release_master,
268#else
269 NO_CPU,
270#endif
271 cluster->cpu_map);
272
273 /* make sure CPU is in our cluster */
274 if (affinity && cpumask_test_cpu(affinity->cpu, cluster->cpu_map))
275 return(affinity);
276 else
277 return(NULL);
278}
279#endif
280
281
282/* check for any necessary preemptions */
283static void check_for_preemptions(cedf_domain_t *cluster)
284{
285 struct task_struct *task;
286 cpu_entry_t *last;
287
288#ifdef CONFIG_PREFER_LOCAL_LINKING
289 cpu_entry_t *local;
290
291 /* Before linking to other CPUs, check first whether the local CPU is
292 * idle. */
293 local = this_cpu_ptr(&cedf_cpu_entries);
294 task = __peek_ready(&cluster->domain);
295
296 if (task && !local->linked
297#ifdef CONFIG_RELEASE_MASTER
298 && likely(local->cpu != cluster->domain.release_master)
299#endif
300 ) {
301 task = __take_ready(&cluster->domain);
302 TRACE_TASK(task, "linking to local CPU %d to avoid IPI\n", local->cpu);
303 link_task_to_cpu(task, local);
304 preempt(local);
305 }
306#endif
307
308
309 for(last = lowest_prio_cpu(cluster);
310 edf_preemption_needed(&cluster->domain, last->linked);
311 last = lowest_prio_cpu(cluster)) {
312 /* preemption necessary */
313 task = __take_ready(&cluster->domain);
314 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
315 task->pid, last->cpu);
316#ifdef CONFIG_SCHED_CPU_AFFINITY
317 {
318 cpu_entry_t *affinity =
319 cedf_get_nearest_available_cpu(cluster,
320 &per_cpu(cedf_cpu_entries, task_cpu(task)));
321 if(affinity)
322 last = affinity;
323 else if(requeue_preempted_job(last->linked))
324 requeue(last->linked);
325 }
326#else
327 if (requeue_preempted_job(last->linked))
328 requeue(last->linked);
329#endif
330 link_task_to_cpu(task, last);
331 preempt(last);
332 }
333}
334
335/* cedf_job_arrival: task is either resumed or released */
336static noinline void cedf_job_arrival(struct task_struct* task)
337{
338 cedf_domain_t *cluster = task_cpu_cluster(task);
339 BUG_ON(!task);
340
341 requeue(task);
342 check_for_preemptions(cluster);
343}
344
345static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
346{
347 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
348 unsigned long flags;
349
350 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
351
352 __merge_ready(&cluster->domain, tasks);
353 check_for_preemptions(cluster);
354
355 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
356}
357
358/* caller holds cedf_lock */
359static noinline void current_job_completion(int forced)
360{
361 struct task_struct *t = current;
362
363 sched_trace_task_completion(t, forced);
364
365 TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
366
367 /* set flags */
368 tsk_rt(t)->completed = 0;
369 /* prepare for next period */
370 prepare_for_next_period(t);
371 if (is_early_releasing(t) || is_released(t, litmus_clock()))
372 sched_trace_task_release(t);
373 /* unlink */
374 unlink(t);
375 /* requeue
376 * But don't requeue a blocking task. */
377 if (is_current_running())
378 cedf_job_arrival(t);
379}
380
381/* Getting schedule() right is a bit tricky. schedule() may not make any
382 * assumptions on the state of the current task since it may be called for a
383 * number of reasons. The reasons include a scheduler_tick() determined that it
384 * was necessary, because sys_exit_np() was called, because some Linux
385 * subsystem determined so, or even (in the worst case) because there is a bug
386 * hidden somewhere. Thus, we must take extreme care to determine what the
387 * current state is.
388 *
389 * The CPU could currently be scheduling a task (or not), be linked (or not).
390 *
391 * The following assertions for the scheduled task could hold:
392 *
393 * - !is_running(scheduled) // the job blocks
394 * - scheduled->timeslice == 0 // the job completed (forcefully)
395 * - is_completed() // the job completed (by syscall)
396 * - linked != scheduled // we need to reschedule (for any reason)
397 * - is_np(scheduled) // rescheduling must be delayed,
398 * sys_exit_np must be requested
399 *
400 * Any of these can occur together.
401 */
402static struct task_struct* cedf_schedule(struct task_struct * prev)
403{
404 cpu_entry_t* entry = this_cpu_ptr(&cedf_cpu_entries);
405 cedf_domain_t *cluster = entry->cluster;
406 int out_of_time, sleep, preempt, np, exists, blocks;
407 struct task_struct* next = NULL;
408
409#ifdef CONFIG_RELEASE_MASTER
410 /* Bail out early if we are the release master.
411 * The release master never schedules any real-time tasks.
412 */
413 if (unlikely(cluster->domain.release_master == entry->cpu)) {
414 sched_state_task_picked();
415 return NULL;
416 }
417#endif
418
419 raw_spin_lock(&cluster->cluster_lock);
420
421 /* sanity checking */
422 BUG_ON(entry->scheduled && entry->scheduled != prev);
423 BUG_ON(entry->scheduled && !is_realtime(prev));
424 BUG_ON(is_realtime(prev) && !entry->scheduled);
425
426 /* (0) Determine state */
427 exists = entry->scheduled != NULL;
428 blocks = exists && !is_current_running();
429 out_of_time = exists && budget_enforced(entry->scheduled)
430 && budget_exhausted(entry->scheduled);
431 np = exists && is_np(entry->scheduled);
432 sleep = exists && is_completed(entry->scheduled);
433 preempt = entry->scheduled != entry->linked;
434
435#ifdef WANT_ALL_SCHED_EVENTS
436 TRACE_TASK(prev, "invoked cedf_schedule.\n");
437#endif
438
439 if (exists)
440 TRACE_TASK(prev,
441 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
442 "state:%d sig:%d\n",
443 blocks, out_of_time, np, sleep, preempt,
444 prev->state, signal_pending(prev));
445 if (entry->linked && preempt)
446 TRACE_TASK(prev, "will be preempted by %s/%d\n",
447 entry->linked->comm, entry->linked->pid);
448
449
450 /* If a task blocks we have no choice but to reschedule.
451 */
452 if (blocks)
453 unlink(entry->scheduled);
454
455 /* Request a sys_exit_np() call if we would like to preempt but cannot.
456 * We need to make sure to update the link structure anyway in case
457 * that we are still linked. Multiple calls to request_exit_np() don't
458 * hurt.
459 */
460 if (np && (out_of_time || preempt || sleep)) {
461 unlink(entry->scheduled);
462 request_exit_np(entry->scheduled);
463 }
464
465 /* Any task that is preemptable and either exhausts its execution
466 * budget or wants to sleep completes. We may have to reschedule after
467 * this. Don't do a job completion if we block (can't have timers running
468 * for blocked jobs).
469 */
470 if (!np && (out_of_time || sleep))
471 current_job_completion(!sleep);
472
473 /* Link pending task if we became unlinked.
474 */
475 if (!entry->linked)
476 link_task_to_cpu(__take_ready(&cluster->domain), entry);
477
478 /* The final scheduling decision. Do we need to switch for some reason?
479 * If linked is different from scheduled, then select linked as next.
480 */
481 if ((!np || blocks) &&
482 entry->linked != entry->scheduled) {
483 /* Schedule a linked job? */
484 if (entry->linked) {
485 entry->linked->rt_param.scheduled_on = entry->cpu;
486 next = entry->linked;
487 }
488 if (entry->scheduled) {
489 /* not gonna be scheduled soon */
490 entry->scheduled->rt_param.scheduled_on = NO_CPU;
491 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
492 }
493 } else
494 /* Only override Linux scheduler if we have a real-time task
495 * scheduled that needs to continue.
496 */
497 if (exists)
498 next = prev;
499
500 sched_state_task_picked();
501 raw_spin_unlock(&cluster->cluster_lock);
502
503#ifdef WANT_ALL_SCHED_EVENTS
504 TRACE("cedf_lock released, next=0x%p\n", next);
505
506 if (next)
507 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
508 else if (exists && !next)
509 TRACE("becomes idle at %llu.\n", litmus_clock());
510#endif
511
512
513 return next;
514}
515
516
517/* _finish_switch - we just finished the switch away from prev
518 */
519static void cedf_finish_switch(struct task_struct *prev)
520{
521 cpu_entry_t* entry = this_cpu_ptr(&cedf_cpu_entries);
522
523 entry->scheduled = is_realtime(current) ? current : NULL;
524#ifdef WANT_ALL_SCHED_EVENTS
525 TRACE_TASK(prev, "switched away from\n");
526#endif
527}
528
529
530/* Prepare a task for running in RT mode
531 */
532static void cedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
533{
534 unsigned long flags;
535 cpu_entry_t* entry;
536 cedf_domain_t* cluster;
537
538 TRACE("gsn edf: task new %d\n", t->pid);
539
540 /* the cluster doesn't change even if t is scheduled */
541 cluster = task_cpu_cluster(t);
542
543 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
544
545 /* setup job params */
546 release_at(t, litmus_clock());
547
548 if (is_scheduled) {
549 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
550 BUG_ON(entry->scheduled);
551
552#ifdef CONFIG_RELEASE_MASTER
553 if (entry->cpu != cluster->domain.release_master) {
554#endif
555 entry->scheduled = t;
556 tsk_rt(t)->scheduled_on = task_cpu(t);
557#ifdef CONFIG_RELEASE_MASTER
558 } else {
559 /* do not schedule on release master */
560 preempt(entry); /* force resched */
561 tsk_rt(t)->scheduled_on = NO_CPU;
562 }
563#endif
564 } else {
565 t->rt_param.scheduled_on = NO_CPU;
566 }
567 t->rt_param.linked_on = NO_CPU;
568
569 if (on_rq || is_scheduled)
570 cedf_job_arrival(t);
571 raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
572}
573
574static void cedf_task_wake_up(struct task_struct *task)
575{
576 unsigned long flags;
577 lt_t now;
578 cedf_domain_t *cluster;
579
580 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
581
582 cluster = task_cpu_cluster(task);
583
584 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
585 now = litmus_clock();
586 if (is_sporadic(task) && is_tardy(task, now)) {
587 inferred_sporadic_job_release_at(task, now);
588 }
589 cedf_job_arrival(task);
590 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
591}
592
593static void cedf_task_block(struct task_struct *t)
594{
595 unsigned long flags;
596 cedf_domain_t *cluster;
597
598 TRACE_TASK(t, "block at %llu\n", litmus_clock());
599
600 cluster = task_cpu_cluster(t);
601
602 /* unlink if necessary */
603 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
604 unlink(t);
605 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
606
607 BUG_ON(!is_realtime(t));
608}
609
610
611static void cedf_task_exit(struct task_struct * t)
612{
613 unsigned long flags;
614 cedf_domain_t *cluster = task_cpu_cluster(t);
615
616 /* unlink if necessary */
617 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
618 unlink(t);
619 if (tsk_rt(t)->scheduled_on != NO_CPU) {
620 cpu_entry_t *cpu;
621 cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
622 cpu->scheduled = NULL;
623 tsk_rt(t)->scheduled_on = NO_CPU;
624 }
625 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
626
627 BUG_ON(!is_realtime(t));
628 TRACE_TASK(t, "RIP\n");
629}
630
631static long cedf_admit_task(struct task_struct* tsk)
632{
633 return (remote_cluster(task_cpu(tsk)) == task_cpu_cluster(tsk)) ?
634 0 : -EINVAL;
635}
636
637/* total number of cluster */
638static int num_clusters;
639/* we do not support cluster of different sizes */
640static unsigned int cluster_size;
641
642#ifdef VERBOSE_INIT
643static void print_cluster_topology(cpumask_var_t mask, int cpu)
644{
645 printk(KERN_INFO "CPU = %d, shared cpu(s) = %*pbl\n", cpu,
646 cpumask_pr_args(mask));
647
648}
649#endif
650
651static int clusters_allocated = 0;
652
653static void cleanup_cedf(void)
654{
655 int i;
656
657 if (clusters_allocated) {
658 for (i = 0; i < num_clusters; i++) {
659 kfree(cedf[i].cpus);
660 kfree(cedf[i].heap_node);
661 free_cpumask_var(cedf[i].cpu_map);
662 }
663
664 kfree(cedf);
665 }
666}
667
668static struct domain_proc_info cedf_domain_proc_info;
669static long cedf_get_domain_proc_info(struct domain_proc_info **ret)
670{
671 *ret = &cedf_domain_proc_info;
672 return 0;
673}
674
675static void cedf_setup_domain_proc(void)
676{
677 int i, cpu, domain;
678#ifdef CONFIG_RELEASE_MASTER
679 int release_master = atomic_read(&release_master_cpu);
680 /* skip over the domain with the release master if cluster size is 1 */
681 int skip_domain = (1 == cluster_size && release_master != NO_CPU) ?
682 release_master : NO_CPU;
683#else
684 int release_master = NO_CPU;
685 int skip_domain = NO_CPU;
686#endif
687 int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
688 int num_rt_domains = num_clusters - (skip_domain != NO_CPU);
689 struct cd_mapping *map;
690
691 memset(&cedf_domain_proc_info, 0, sizeof(cedf_domain_proc_info));
692 init_domain_proc_info(&cedf_domain_proc_info, num_rt_cpus, num_rt_domains);
693 cedf_domain_proc_info.num_cpus = num_rt_cpus;
694 cedf_domain_proc_info.num_domains = num_rt_domains;
695
696 for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
697 if (cpu == release_master)
698 continue;
699 map = &cedf_domain_proc_info.cpu_to_domains[i];
700 /* pointer math to figure out the domain index */
701 domain = remote_cluster(cpu) - cedf;
702 map->id = cpu;
703 cpumask_set_cpu(domain, map->mask);
704 ++i;
705 }
706
707 for (domain = 0, i = 0; domain < num_clusters; ++domain) {
708 if (domain == skip_domain)
709 continue;
710 map = &cedf_domain_proc_info.domain_to_cpus[i];
711 map->id = i;
712 cpumask_copy(map->mask, cedf[domain].cpu_map);
713 ++i;
714 }
715}
716
717static long cedf_activate_plugin(void)
718{
719 int i, j, cpu, ccpu, cpu_count;
720 cpu_entry_t *entry;
721
722 cpumask_var_t mask;
723 int chk = 0;
724
725 /* de-allocate old clusters, if any */
726 cleanup_cedf();
727
728 printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n",
729 cluster_config);
730
731 /* need to get cluster_size first */
732 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
733 return -ENOMEM;
734
735 if (cluster_config == GLOBAL_CLUSTER) {
736 cluster_size = num_online_cpus();
737 } else {
738 chk = get_shared_cpu_map(mask, 0, cluster_config);
739 if (chk) {
740 /* if chk != 0 then it is the max allowed index */
741 printk(KERN_INFO "C-EDF: Cluster configuration = %d "
742 "is not supported on this hardware.\n",
743 cluster_config);
744 /* User should notice that the configuration failed, so
745 * let's bail out. */
746 return -EINVAL;
747 }
748
749 cluster_size = cpumask_weight(mask);
750 }
751
752 if ((num_online_cpus() % cluster_size) != 0) {
753 /* this can't be right, some cpus are left out */
754 printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
755 num_online_cpus(), cluster_size);
756 return -1;
757 }
758
759 num_clusters = num_online_cpus() / cluster_size;
760 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
761 num_clusters, cluster_size);
762
763 /* initialize clusters */
764 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
765 for (i = 0; i < num_clusters; i++) {
766
767 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
768 GFP_ATOMIC);
769 cedf[i].heap_node = kmalloc(
770 cluster_size * sizeof(struct bheap_node),
771 GFP_ATOMIC);
772 bheap_init(&(cedf[i].cpu_heap));
773 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
774
775 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
776 return -ENOMEM;
777#ifdef CONFIG_RELEASE_MASTER
778 cedf[i].domain.release_master = atomic_read(&release_master_cpu);
779#endif
780 }
781
782 /* cycle through cluster and add cpus to them */
783 for (i = 0; i < num_clusters; i++) {
784
785 for_each_online_cpu(cpu) {
786 /* check if the cpu is already in a cluster */
787 for (j = 0; j < num_clusters; j++)
788 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
789 break;
790 /* if it is in a cluster go to next cpu */
791 if (j < num_clusters &&
792 cpumask_test_cpu(cpu, cedf[j].cpu_map))
793 continue;
794
795 /* this cpu isn't in any cluster */
796 /* get the shared cpus */
797 if (unlikely(cluster_config == GLOBAL_CLUSTER))
798 cpumask_copy(mask, cpu_online_mask);
799 else
800 get_shared_cpu_map(mask, cpu, cluster_config);
801
802 cpumask_copy(cedf[i].cpu_map, mask);
803#ifdef VERBOSE_INIT
804 print_cluster_topology(mask, cpu);
805#endif
806 /* add cpus to current cluster and init cpu_entry_t */
807 cpu_count = 0;
808 for_each_cpu(ccpu, cedf[i].cpu_map) {
809
810 entry = &per_cpu(cedf_cpu_entries, ccpu);
811 cedf[i].cpus[cpu_count] = entry;
812 atomic_set(&entry->will_schedule, 0);
813 entry->cpu = ccpu;
814 entry->cluster = &cedf[i];
815 entry->hn = &(cedf[i].heap_node[cpu_count]);
816 bheap_node_init(&entry->hn, entry);
817
818 cpu_count++;
819
820 entry->linked = NULL;
821 entry->scheduled = NULL;
822#ifdef CONFIG_RELEASE_MASTER
823 /* only add CPUs that should schedule jobs */
824 if (entry->cpu != entry->cluster->domain.release_master)
825#endif
826 update_cpu_position(entry);
827 }
828 /* done with this cluster */
829 break;
830 }
831 }
832
833 clusters_allocated = 1;
834 free_cpumask_var(mask);
835
836 cedf_setup_domain_proc();
837
838 return 0;
839}
840
841static long cedf_deactivate_plugin(void)
842{
843 destroy_domain_proc_info(&cedf_domain_proc_info);
844 return 0;
845}
846
847/* Plugin object */
848static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
849 .plugin_name = "C-EDF",
850 .finish_switch = cedf_finish_switch,
851 .task_new = cedf_task_new,
852 .complete_job = complete_job,
853 .task_exit = cedf_task_exit,
854 .schedule = cedf_schedule,
855 .task_wake_up = cedf_task_wake_up,
856 .task_block = cedf_task_block,
857 .admit_task = cedf_admit_task,
858 .activate_plugin = cedf_activate_plugin,
859 .deactivate_plugin = cedf_deactivate_plugin,
860 .get_domain_proc_info = cedf_get_domain_proc_info,
861};
862
863static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
864
865static int __init init_cedf(void)
866{
867 int err, fs;
868
869 err = register_sched_plugin(&cedf_plugin);
870 if (!err) {
871 fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir);
872 if (!fs)
873 cluster_file = create_cluster_file(cedf_dir, &cluster_config);
874 else
875 printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n");
876 }
877 return err;
878}
879
880static void clean_cedf(void)
881{
882 cleanup_cedf();
883 if (cluster_file)
884 remove_proc_entry("cluster", cedf_dir);
885 if (cedf_dir)
886 remove_plugin_proc_dir(&cedf_plugin);
887}
888
889module_init(init_cedf);
890module_exit(clean_cedf);
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
new file mode 100644
index 000000000000..8f28dc4e5192
--- /dev/null
+++ b/litmus/sched_gsn_edf.c
@@ -0,0 +1,1070 @@
1/*
2 * litmus/sched_gsn_edf.c
3 *
4 * Implementation of the GSN-EDF scheduling algorithm.
5 *
6 * This version uses the simple approach and serializes all scheduling
7 * decisions by the use of a queue lock. This is probably not the
8 * best way to do it, but it should suffice for now.
9 */
10
11#include <linux/spinlock.h>
12#include <linux/percpu.h>
13#include <linux/sched.h>
14#include <linux/slab.h>
15
16#include <litmus/debug_trace.h>
17#include <litmus/litmus.h>
18#include <litmus/jobs.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/edf_common.h>
21#include <litmus/sched_trace.h>
22#include <litmus/trace.h>
23
24#include <litmus/preempt.h>
25#include <litmus/budget.h>
26#include <litmus/np.h>
27
28#include <litmus/bheap.h>
29
30#ifdef CONFIG_SCHED_CPU_AFFINITY
31#include <litmus/affinity.h>
32#endif
33
34/* to set up domain/cpu mappings */
35#include <litmus/litmus_proc.h>
36
37#include <linux/module.h>
38
39/* Overview of GSN-EDF operations.
40 *
41 * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
42 * description only covers how the individual operations are implemented in
43 * LITMUS.
44 *
45 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
46 * structure (NOT the actually scheduled
47 * task). If there is another linked task To
48 * already it will set To->linked_on = NO_CPU
49 * (thereby removing its association with this
50 * CPU). However, it will not requeue the
51 * previously linked task (if any). It will set
52 * T's state to not completed and check whether
53 * it is already running somewhere else. If T
54 * is scheduled somewhere else it will link
55 * it to that CPU instead (and pull the linked
56 * task to cpu). T may be NULL.
57 *
58 * unlink(T) - Unlink removes T from all scheduler data
59 * structures. If it is linked to some CPU it
60 * will link NULL to that CPU. If it is
61 * currently queued in the gsnedf queue it will
62 * be removed from the rt_domain. It is safe to
63 * call unlink(T) if T is not linked. T may not
64 * be NULL.
65 *
66 * requeue(T) - Requeue will insert T into the appropriate
67 * queue. If the system is in real-time mode and
68 * the T is released already, it will go into the
69 * ready queue. If the system is not in
70 * real-time mode is T, then T will go into the
71 * release queue. If T's release time is in the
72 * future, it will go into the release
73 * queue. That means that T's release time/job
74 * no/etc. has to be updated before requeu(T) is
75 * called. It is not safe to call requeue(T)
76 * when T is already queued. T may not be NULL.
77 *
78 * gsnedf_job_arrival(T) - This is the catch all function when T enters
79 * the system after either a suspension or at a
80 * job release. It will queue T (which means it
81 * is not safe to call gsnedf_job_arrival(T) if
82 * T is already queued) and then check whether a
83 * preemption is necessary. If a preemption is
84 * necessary it will update the linkage
85 * accordingly and cause scheduled to be called
86 * (either with an IPI or need_resched). It is
87 * safe to call gsnedf_job_arrival(T) if T's
88 * next job has not been actually released yet
89 * (releast time in the future). T will be put
90 * on the release queue in that case.
91 *
92 * curr_job_completion() - Take care of everything that needs to be done
93 * to prepare the current task for its next
94 * release and place it in the right queue with
95 * gsnedf_job_arrival().
96 *
97 *
98 * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
99 * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
100 * the functions will automatically propagate pending task from the ready queue
101 * to a linked task. This is the job of the calling function ( by means of
102 * __take_ready).
103 */
104
105
106/* cpu_entry_t - maintain the linked and scheduled state
107 */
108typedef struct {
109 int cpu;
110 struct task_struct* linked; /* only RT tasks */
111 struct task_struct* scheduled; /* only RT tasks */
112 struct bheap_node* hn;
113} cpu_entry_t;
114DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
115
116cpu_entry_t* gsnedf_cpus[NR_CPUS];
117
118/* the cpus queue themselves according to priority in here */
119static struct bheap_node gsnedf_heap_node[NR_CPUS];
120static struct bheap gsnedf_cpu_heap;
121
122static rt_domain_t gsnedf;
123#define gsnedf_lock (gsnedf.ready_lock)
124
125
126/* Uncomment this if you want to see all scheduling decisions in the
127 * TRACE() log.
128#define WANT_ALL_SCHED_EVENTS
129 */
130
131static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
132{
133 cpu_entry_t *a, *b;
134 a = _a->value;
135 b = _b->value;
136 /* Note that a and b are inverted: we want the lowest-priority CPU at
137 * the top of the heap.
138 */
139 return edf_higher_prio(b->linked, a->linked);
140}
141
142/* update_cpu_position - Move the cpu entry to the correct place to maintain
143 * order in the cpu queue. Caller must hold gsnedf lock.
144 */
145static void update_cpu_position(cpu_entry_t *entry)
146{
147 if (likely(bheap_node_in_heap(entry->hn)))
148 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
149 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
150}
151
152/* caller must hold gsnedf lock */
153static cpu_entry_t* lowest_prio_cpu(void)
154{
155 struct bheap_node* hn;
156 hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
157 return hn->value;
158}
159
160
161/* link_task_to_cpu - Update the link of a CPU.
162 * Handles the case where the to-be-linked task is already
163 * scheduled on a different CPU.
164 */
165static noinline void link_task_to_cpu(struct task_struct* linked,
166 cpu_entry_t *entry)
167{
168 cpu_entry_t *sched;
169 struct task_struct* tmp;
170 int on_cpu;
171
172 BUG_ON(linked && !is_realtime(linked));
173
174 /* Currently linked task is set to be unlinked. */
175 if (entry->linked) {
176 entry->linked->rt_param.linked_on = NO_CPU;
177 }
178
179 /* Link new task to CPU. */
180 if (linked) {
181 /* handle task is already scheduled somewhere! */
182 on_cpu = linked->rt_param.scheduled_on;
183 if (on_cpu != NO_CPU) {
184 sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
185 /* this should only happen if not linked already */
186 BUG_ON(sched->linked == linked);
187
188 /* If we are already scheduled on the CPU to which we
189 * wanted to link, we don't need to do the swap --
190 * we just link ourselves to the CPU and depend on
191 * the caller to get things right.
192 */
193 if (entry != sched) {
194 TRACE_TASK(linked,
195 "already scheduled on %d, updating link.\n",
196 sched->cpu);
197 tmp = sched->linked;
198 linked->rt_param.linked_on = sched->cpu;
199 sched->linked = linked;
200 update_cpu_position(sched);
201 linked = tmp;
202 }
203 }
204 if (linked) /* might be NULL due to swap */
205 linked->rt_param.linked_on = entry->cpu;
206 }
207 entry->linked = linked;
208#ifdef WANT_ALL_SCHED_EVENTS
209 if (linked)
210 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
211 else
212 TRACE("NULL linked to %d.\n", entry->cpu);
213#endif
214 update_cpu_position(entry);
215}
216
217/* unlink - Make sure a task is not linked any longer to an entry
218 * where it was linked before. Must hold gsnedf_lock.
219 */
220static noinline void unlink(struct task_struct* t)
221{
222 cpu_entry_t *entry;
223
224 if (t->rt_param.linked_on != NO_CPU) {
225 /* unlink */
226 entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
227 t->rt_param.linked_on = NO_CPU;
228 link_task_to_cpu(NULL, entry);
229 } else if (is_queued(t)) {
230 /* This is an interesting situation: t is scheduled,
231 * but was just recently unlinked. It cannot be
232 * linked anywhere else (because then it would have
233 * been relinked to this CPU), thus it must be in some
234 * queue. We must remove it from the list in this
235 * case.
236 */
237 remove(&gsnedf, t);
238 }
239}
240
241
242/* preempt - force a CPU to reschedule
243 */
244static void preempt(cpu_entry_t *entry)
245{
246 preempt_if_preemptable(entry->scheduled, entry->cpu);
247}
248
249/* requeue - Put an unlinked task into gsn-edf domain.
250 * Caller must hold gsnedf_lock.
251 */
252static noinline void requeue(struct task_struct* task)
253{
254 BUG_ON(!task);
255 /* sanity check before insertion */
256 BUG_ON(is_queued(task));
257
258 if (is_early_releasing(task) || is_released(task, litmus_clock()))
259 __add_ready(&gsnedf, task);
260 else {
261 /* it has got to wait */
262 add_release(&gsnedf, task);
263 }
264}
265
266#ifdef CONFIG_SCHED_CPU_AFFINITY
267static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start)
268{
269 cpu_entry_t *affinity;
270
271 get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
272#ifdef CONFIG_RELEASE_MASTER
273 gsnedf.release_master,
274#else
275 NO_CPU,
276#endif
277 cpu_online_mask);
278
279 return(affinity);
280}
281#endif
282
283/* check for any necessary preemptions */
284static void check_for_preemptions(void)
285{
286 struct task_struct *task;
287 cpu_entry_t *last;
288
289
290#ifdef CONFIG_PREFER_LOCAL_LINKING
291 cpu_entry_t *local;
292
293 /* Before linking to other CPUs, check first whether the local CPU is
294 * idle. */
295 local = this_cpu_ptr(&gsnedf_cpu_entries);
296 task = __peek_ready(&gsnedf);
297
298 if (task && !local->linked
299#ifdef CONFIG_RELEASE_MASTER
300 && likely(local->cpu != gsnedf.release_master)
301#endif
302 ) {
303 task = __take_ready(&gsnedf);
304 TRACE_TASK(task, "linking to local CPU %d to avoid IPI\n", local->cpu);
305 link_task_to_cpu(task, local);
306 preempt(local);
307 }
308#endif
309
310 for (last = lowest_prio_cpu();
311 edf_preemption_needed(&gsnedf, last->linked);
312 last = lowest_prio_cpu()) {
313 /* preemption necessary */
314 task = __take_ready(&gsnedf);
315 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
316 task->pid, last->cpu);
317
318#ifdef CONFIG_SCHED_CPU_AFFINITY
319 {
320 cpu_entry_t *affinity =
321 gsnedf_get_nearest_available_cpu(
322 &per_cpu(gsnedf_cpu_entries, task_cpu(task)));
323 if (affinity)
324 last = affinity;
325 else if (requeue_preempted_job(last->linked))
326 requeue(last->linked);
327 }
328#else
329 if (requeue_preempted_job(last->linked))
330 requeue(last->linked);
331#endif
332
333 link_task_to_cpu(task, last);
334 preempt(last);
335 }
336}
337
338/* gsnedf_job_arrival: task is either resumed or released */
339static noinline void gsnedf_job_arrival(struct task_struct* task)
340{
341 BUG_ON(!task);
342
343 requeue(task);
344 check_for_preemptions();
345}
346
347static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
348{
349 unsigned long flags;
350
351 raw_spin_lock_irqsave(&gsnedf_lock, flags);
352
353 __merge_ready(rt, tasks);
354 check_for_preemptions();
355
356 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
357}
358
359/* caller holds gsnedf_lock */
360static noinline void curr_job_completion(int forced)
361{
362 struct task_struct *t = current;
363 BUG_ON(!t);
364
365 sched_trace_task_completion(t, forced);
366
367 TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
368
369 /* set flags */
370 tsk_rt(t)->completed = 0;
371 /* prepare for next period */
372 prepare_for_next_period(t);
373 if (is_early_releasing(t) || is_released(t, litmus_clock()))
374 sched_trace_task_release(t);
375 /* unlink */
376 unlink(t);
377 /* requeue
378 * But don't requeue a blocking task. */
379 if (is_current_running())
380 gsnedf_job_arrival(t);
381}
382
383/* Getting schedule() right is a bit tricky. schedule() may not make any
384 * assumptions on the state of the current task since it may be called for a
385 * number of reasons. The reasons include a scheduler_tick() determined that it
386 * was necessary, because sys_exit_np() was called, because some Linux
387 * subsystem determined so, or even (in the worst case) because there is a bug
388 * hidden somewhere. Thus, we must take extreme care to determine what the
389 * current state is.
390 *
391 * The CPU could currently be scheduling a task (or not), be linked (or not).
392 *
393 * The following assertions for the scheduled task could hold:
394 *
395 * - !is_running(scheduled) // the job blocks
396 * - scheduled->timeslice == 0 // the job completed (forcefully)
397 * - is_completed() // the job completed (by syscall)
398 * - linked != scheduled // we need to reschedule (for any reason)
399 * - is_np(scheduled) // rescheduling must be delayed,
400 * sys_exit_np must be requested
401 *
402 * Any of these can occur together.
403 */
404static struct task_struct* gsnedf_schedule(struct task_struct * prev)
405{
406 cpu_entry_t* entry = this_cpu_ptr(&gsnedf_cpu_entries);
407 int out_of_time, sleep, preempt, np, exists, blocks;
408 struct task_struct* next = NULL;
409
410#ifdef CONFIG_RELEASE_MASTER
411 /* Bail out early if we are the release master.
412 * The release master never schedules any real-time tasks.
413 */
414 if (unlikely(gsnedf.release_master == entry->cpu)) {
415 sched_state_task_picked();
416 return NULL;
417 }
418#endif
419
420 raw_spin_lock(&gsnedf_lock);
421
422 /* sanity checking */
423 BUG_ON(entry->scheduled && entry->scheduled != prev);
424 BUG_ON(entry->scheduled && !is_realtime(prev));
425 BUG_ON(is_realtime(prev) && !entry->scheduled);
426
427 /* (0) Determine state */
428 exists = entry->scheduled != NULL;
429 blocks = exists && !is_current_running();
430 out_of_time = exists && budget_enforced(entry->scheduled)
431 && budget_exhausted(entry->scheduled);
432 np = exists && is_np(entry->scheduled);
433 sleep = exists && is_completed(entry->scheduled);
434 preempt = entry->scheduled != entry->linked;
435
436#ifdef WANT_ALL_SCHED_EVENTS
437 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
438#endif
439
440 if (exists)
441 TRACE_TASK(prev,
442 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
443 "state:%d sig:%d\n",
444 blocks, out_of_time, np, sleep, preempt,
445 prev->state, signal_pending(prev));
446 if (entry->linked && preempt)
447 TRACE_TASK(prev, "will be preempted by %s/%d\n",
448 entry->linked->comm, entry->linked->pid);
449
450
451 /* If a task blocks we have no choice but to reschedule.
452 */
453 if (blocks)
454 unlink(entry->scheduled);
455
456 /* Request a sys_exit_np() call if we would like to preempt but cannot.
457 * We need to make sure to update the link structure anyway in case
458 * that we are still linked. Multiple calls to request_exit_np() don't
459 * hurt.
460 */
461 if (np && (out_of_time || preempt || sleep)) {
462 unlink(entry->scheduled);
463 request_exit_np(entry->scheduled);
464 }
465
466 /* Any task that is preemptable and either exhausts its execution
467 * budget or wants to sleep completes. We may have to reschedule after
468 * this. Don't do a job completion if we block (can't have timers running
469 * for blocked jobs).
470 */
471 if (!np && (out_of_time || sleep))
472 curr_job_completion(!sleep);
473
474 /* Link pending task if we became unlinked.
475 */
476 if (!entry->linked)
477 link_task_to_cpu(__take_ready(&gsnedf), entry);
478
479 /* The final scheduling decision. Do we need to switch for some reason?
480 * If linked is different from scheduled, then select linked as next.
481 */
482 if ((!np || blocks) &&
483 entry->linked != entry->scheduled) {
484 /* Schedule a linked job? */
485 if (entry->linked) {
486 entry->linked->rt_param.scheduled_on = entry->cpu;
487 next = entry->linked;
488 TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id());
489 }
490 if (entry->scheduled) {
491 /* not gonna be scheduled soon */
492 entry->scheduled->rt_param.scheduled_on = NO_CPU;
493 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
494 }
495 } else
496 /* Only override Linux scheduler if we have a real-time task
497 * scheduled that needs to continue.
498 */
499 if (exists)
500 next = prev;
501
502 sched_state_task_picked();
503
504 raw_spin_unlock(&gsnedf_lock);
505
506#ifdef WANT_ALL_SCHED_EVENTS
507 TRACE("gsnedf_lock released, next=0x%p\n", next);
508
509 if (next)
510 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
511 else if (exists && !next)
512 TRACE("becomes idle at %llu.\n", litmus_clock());
513#endif
514
515
516 return next;
517}
518
519
520/* _finish_switch - we just finished the switch away from prev
521 */
522static void gsnedf_finish_switch(struct task_struct *prev)
523{
524 cpu_entry_t* entry = this_cpu_ptr(&gsnedf_cpu_entries);
525
526 entry->scheduled = is_realtime(current) ? current : NULL;
527#ifdef WANT_ALL_SCHED_EVENTS
528 TRACE_TASK(prev, "switched away from\n");
529#endif
530}
531
532
533/* Prepare a task for running in RT mode
534 */
535static void gsnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
536{
537 unsigned long flags;
538 cpu_entry_t* entry;
539
540 TRACE("gsn edf: task new %d\n", t->pid);
541
542 raw_spin_lock_irqsave(&gsnedf_lock, flags);
543
544 /* setup job params */
545 release_at(t, litmus_clock());
546
547 if (is_scheduled) {
548 entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
549 BUG_ON(entry->scheduled);
550
551#ifdef CONFIG_RELEASE_MASTER
552 if (entry->cpu != gsnedf.release_master) {
553#endif
554 entry->scheduled = t;
555 tsk_rt(t)->scheduled_on = task_cpu(t);
556#ifdef CONFIG_RELEASE_MASTER
557 } else {
558 /* do not schedule on release master */
559 preempt(entry); /* force resched */
560 tsk_rt(t)->scheduled_on = NO_CPU;
561 }
562#endif
563 } else {
564 t->rt_param.scheduled_on = NO_CPU;
565 }
566 t->rt_param.linked_on = NO_CPU;
567
568 if (on_rq || is_scheduled)
569 gsnedf_job_arrival(t);
570 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
571}
572
573static void gsnedf_task_wake_up(struct task_struct *task)
574{
575 unsigned long flags;
576 lt_t now;
577
578 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
579
580 raw_spin_lock_irqsave(&gsnedf_lock, flags);
581 now = litmus_clock();
582 if (is_sporadic(task) && is_tardy(task, now)) {
583 inferred_sporadic_job_release_at(task, now);
584 }
585 gsnedf_job_arrival(task);
586 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
587}
588
589static void gsnedf_task_block(struct task_struct *t)
590{
591 unsigned long flags;
592
593 TRACE_TASK(t, "block at %llu\n", litmus_clock());
594
595 /* unlink if necessary */
596 raw_spin_lock_irqsave(&gsnedf_lock, flags);
597 unlink(t);
598 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
599
600 BUG_ON(!is_realtime(t));
601}
602
603
604static void gsnedf_task_exit(struct task_struct * t)
605{
606 unsigned long flags;
607
608 /* unlink if necessary */
609 raw_spin_lock_irqsave(&gsnedf_lock, flags);
610 unlink(t);
611 if (tsk_rt(t)->scheduled_on != NO_CPU) {
612 gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
613 tsk_rt(t)->scheduled_on = NO_CPU;
614 }
615 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
616
617 BUG_ON(!is_realtime(t));
618 TRACE_TASK(t, "RIP\n");
619}
620
621
622static long gsnedf_admit_task(struct task_struct* tsk)
623{
624 return 0;
625}
626
627#ifdef CONFIG_LITMUS_LOCKING
628
629#include <litmus/fdso.h>
630
631/* called with IRQs off */
632static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
633{
634 int linked_on;
635 int check_preempt = 0;
636
637 raw_spin_lock(&gsnedf_lock);
638
639 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
640 tsk_rt(t)->inh_task = prio_inh;
641
642 linked_on = tsk_rt(t)->linked_on;
643
644 /* If it is scheduled, then we need to reorder the CPU heap. */
645 if (linked_on != NO_CPU) {
646 TRACE_TASK(t, "%s: linked on %d\n",
647 __FUNCTION__, linked_on);
648 /* Holder is scheduled; need to re-order CPUs.
649 * We can't use heap_decrease() here since
650 * the cpu_heap is ordered in reverse direction, so
651 * it is actually an increase. */
652 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
653 gsnedf_cpus[linked_on]->hn);
654 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
655 gsnedf_cpus[linked_on]->hn);
656 } else {
657 /* holder may be queued: first stop queue changes */
658 raw_spin_lock(&gsnedf.release_lock);
659 if (is_queued(t)) {
660 TRACE_TASK(t, "%s: is queued\n",
661 __FUNCTION__);
662 /* We need to update the position of holder in some
663 * heap. Note that this could be a release heap if we
664 * budget enforcement is used and this job overran. */
665 check_preempt =
666 !bheap_decrease(edf_ready_order,
667 tsk_rt(t)->heap_node);
668 } else {
669 /* Nothing to do: if it is not queued and not linked
670 * then it is either sleeping or currently being moved
671 * by other code (e.g., a timer interrupt handler) that
672 * will use the correct priority when enqueuing the
673 * task. */
674 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
675 __FUNCTION__);
676 }
677 raw_spin_unlock(&gsnedf.release_lock);
678
679 /* If holder was enqueued in a release heap, then the following
680 * preemption check is pointless, but we can't easily detect
681 * that case. If you want to fix this, then consider that
682 * simply adding a state flag requires O(n) time to update when
683 * releasing n tasks, which conflicts with the goal to have
684 * O(log n) merges. */
685 if (check_preempt) {
686 /* heap_decrease() hit the top level of the heap: make
687 * sure preemption checks get the right task, not the
688 * potentially stale cache. */
689 bheap_uncache_min(edf_ready_order,
690 &gsnedf.ready_queue);
691 check_for_preemptions();
692 }
693 }
694
695 raw_spin_unlock(&gsnedf_lock);
696}
697
698/* called with IRQs off */
699static void clear_priority_inheritance(struct task_struct* t)
700{
701 raw_spin_lock(&gsnedf_lock);
702
703 /* A job only stops inheriting a priority when it releases a
704 * resource. Thus we can make the following assumption.*/
705 BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
706
707 TRACE_TASK(t, "priority restored\n");
708 tsk_rt(t)->inh_task = NULL;
709
710 /* Check if rescheduling is necessary. We can't use heap_decrease()
711 * since the priority was effectively lowered. */
712 unlink(t);
713 gsnedf_job_arrival(t);
714
715 raw_spin_unlock(&gsnedf_lock);
716}
717
718
719/* ******************** FMLP support ********************** */
720
721/* struct for semaphore with priority inheritance */
722struct fmlp_semaphore {
723 struct litmus_lock litmus_lock;
724
725 /* current resource holder */
726 struct task_struct *owner;
727
728 /* highest-priority waiter */
729 struct task_struct *hp_waiter;
730
731 /* FIFO queue of waiting tasks */
732 wait_queue_head_t wait;
733};
734
735static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
736{
737 return container_of(lock, struct fmlp_semaphore, litmus_lock);
738}
739
740/* caller is responsible for locking */
741struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem,
742 struct task_struct* skip)
743{
744 struct list_head *pos;
745 struct task_struct *queued, *found = NULL;
746
747 list_for_each(pos, &sem->wait.task_list) {
748 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
749 task_list)->private;
750
751 /* Compare task prios, find high prio task. */
752 if (queued != skip && edf_higher_prio(queued, found))
753 found = queued;
754 }
755 return found;
756}
757
758int gsnedf_fmlp_lock(struct litmus_lock* l)
759{
760 struct task_struct* t = current;
761 struct fmlp_semaphore *sem = fmlp_from_lock(l);
762 wait_queue_t wait;
763 unsigned long flags;
764
765 if (!is_realtime(t))
766 return -EPERM;
767
768 /* prevent nested lock acquisition --- not supported by FMLP */
769 if (tsk_rt(t)->num_locks_held)
770 return -EBUSY;
771
772 spin_lock_irqsave(&sem->wait.lock, flags);
773
774 if (sem->owner) {
775 /* resource is not free => must suspend and wait */
776
777 init_waitqueue_entry(&wait, t);
778
779 /* FIXME: interruptible would be nice some day */
780 set_task_state(t, TASK_UNINTERRUPTIBLE);
781
782 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
783
784 /* check if we need to activate priority inheritance */
785 if (edf_higher_prio(t, sem->hp_waiter)) {
786 sem->hp_waiter = t;
787 if (edf_higher_prio(t, sem->owner))
788 set_priority_inheritance(sem->owner, sem->hp_waiter);
789 }
790
791 TS_LOCK_SUSPEND;
792
793 /* release lock before sleeping */
794 spin_unlock_irqrestore(&sem->wait.lock, flags);
795
796 /* We depend on the FIFO order. Thus, we don't need to recheck
797 * when we wake up; we are guaranteed to have the lock since
798 * there is only one wake up per release.
799 */
800
801 schedule();
802
803 TS_LOCK_RESUME;
804
805 /* Since we hold the lock, no other task will change
806 * ->owner. We can thus check it without acquiring the spin
807 * lock. */
808 BUG_ON(sem->owner != t);
809 } else {
810 /* it's ours now */
811 sem->owner = t;
812
813 spin_unlock_irqrestore(&sem->wait.lock, flags);
814 }
815
816 tsk_rt(t)->num_locks_held++;
817
818 return 0;
819}
820
821int gsnedf_fmlp_unlock(struct litmus_lock* l)
822{
823 struct task_struct *t = current, *next;
824 struct fmlp_semaphore *sem = fmlp_from_lock(l);
825 unsigned long flags;
826 int err = 0;
827
828 spin_lock_irqsave(&sem->wait.lock, flags);
829
830 if (sem->owner != t) {
831 err = -EINVAL;
832 goto out;
833 }
834
835 tsk_rt(t)->num_locks_held--;
836
837 /* check if there are jobs waiting for this resource */
838 next = __waitqueue_remove_first(&sem->wait);
839 if (next) {
840 /* next becomes the resouce holder */
841 sem->owner = next;
842 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
843
844 /* determine new hp_waiter if necessary */
845 if (next == sem->hp_waiter) {
846 TRACE_TASK(next, "was highest-prio waiter\n");
847 /* next has the highest priority --- it doesn't need to
848 * inherit. However, we need to make sure that the
849 * next-highest priority in the queue is reflected in
850 * hp_waiter. */
851 sem->hp_waiter = find_hp_waiter(sem, next);
852 if (sem->hp_waiter)
853 TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n");
854 else
855 TRACE("no further waiters\n");
856 } else {
857 /* Well, if next is not the highest-priority waiter,
858 * then it ought to inherit the highest-priority
859 * waiter's priority. */
860 set_priority_inheritance(next, sem->hp_waiter);
861 }
862
863 /* wake up next */
864 wake_up_process(next);
865 } else
866 /* becomes available */
867 sem->owner = NULL;
868
869 /* we lose the benefit of priority inheritance (if any) */
870 if (tsk_rt(t)->inh_task)
871 clear_priority_inheritance(t);
872
873out:
874 spin_unlock_irqrestore(&sem->wait.lock, flags);
875
876 return err;
877}
878
879int gsnedf_fmlp_close(struct litmus_lock* l)
880{
881 struct task_struct *t = current;
882 struct fmlp_semaphore *sem = fmlp_from_lock(l);
883 unsigned long flags;
884
885 int owner;
886
887 spin_lock_irqsave(&sem->wait.lock, flags);
888
889 owner = sem->owner == t;
890
891 spin_unlock_irqrestore(&sem->wait.lock, flags);
892
893 if (owner)
894 gsnedf_fmlp_unlock(l);
895
896 return 0;
897}
898
899void gsnedf_fmlp_free(struct litmus_lock* lock)
900{
901 kfree(fmlp_from_lock(lock));
902}
903
904static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
905 .close = gsnedf_fmlp_close,
906 .lock = gsnedf_fmlp_lock,
907 .unlock = gsnedf_fmlp_unlock,
908 .deallocate = gsnedf_fmlp_free,
909};
910
911static struct litmus_lock* gsnedf_new_fmlp(void)
912{
913 struct fmlp_semaphore* sem;
914
915 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
916 if (!sem)
917 return NULL;
918
919 sem->owner = NULL;
920 sem->hp_waiter = NULL;
921 init_waitqueue_head(&sem->wait);
922 sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops;
923
924 return &sem->litmus_lock;
925}
926
927/* **** lock constructor **** */
928
929
930static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
931 void* __user unused)
932{
933 int err = -ENXIO;
934
935 /* GSN-EDF currently only supports the FMLP for global resources. */
936 switch (type) {
937
938 case FMLP_SEM:
939 /* Flexible Multiprocessor Locking Protocol */
940 *lock = gsnedf_new_fmlp();
941 if (*lock)
942 err = 0;
943 else
944 err = -ENOMEM;
945 break;
946
947 };
948
949 return err;
950}
951
952#endif
953
954static struct domain_proc_info gsnedf_domain_proc_info;
955static long gsnedf_get_domain_proc_info(struct domain_proc_info **ret)
956{
957 *ret = &gsnedf_domain_proc_info;
958 return 0;
959}
960
961static void gsnedf_setup_domain_proc(void)
962{
963 int i, cpu;
964 int release_master =
965#ifdef CONFIG_RELEASE_MASTER
966 atomic_read(&release_master_cpu);
967#else
968 NO_CPU;
969#endif
970 int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
971 struct cd_mapping *map;
972
973 memset(&gsnedf_domain_proc_info, 0, sizeof(gsnedf_domain_proc_info));
974 init_domain_proc_info(&gsnedf_domain_proc_info, num_rt_cpus, 1);
975 gsnedf_domain_proc_info.num_cpus = num_rt_cpus;
976 gsnedf_domain_proc_info.num_domains = 1;
977
978 gsnedf_domain_proc_info.domain_to_cpus[0].id = 0;
979 for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
980 if (cpu == release_master)
981 continue;
982 map = &gsnedf_domain_proc_info.cpu_to_domains[i];
983 map->id = cpu;
984 cpumask_set_cpu(0, map->mask);
985 ++i;
986
987 /* add cpu to the domain */
988 cpumask_set_cpu(cpu,
989 gsnedf_domain_proc_info.domain_to_cpus[0].mask);
990 }
991}
992
993static long gsnedf_activate_plugin(void)
994{
995 int cpu;
996 cpu_entry_t *entry;
997
998 bheap_init(&gsnedf_cpu_heap);
999#ifdef CONFIG_RELEASE_MASTER
1000 gsnedf.release_master = atomic_read(&release_master_cpu);
1001#endif
1002
1003 for_each_online_cpu(cpu) {
1004 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1005 bheap_node_init(&entry->hn, entry);
1006 entry->linked = NULL;
1007 entry->scheduled = NULL;
1008#ifdef CONFIG_RELEASE_MASTER
1009 if (cpu != gsnedf.release_master) {
1010#endif
1011 TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu);
1012 update_cpu_position(entry);
1013#ifdef CONFIG_RELEASE_MASTER
1014 } else {
1015 TRACE("GSN-EDF: CPU %d is release master.\n", cpu);
1016 }
1017#endif
1018 }
1019
1020 gsnedf_setup_domain_proc();
1021
1022 return 0;
1023}
1024
1025static long gsnedf_deactivate_plugin(void)
1026{
1027 destroy_domain_proc_info(&gsnedf_domain_proc_info);
1028 return 0;
1029}
1030
1031/* Plugin object */
1032static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
1033 .plugin_name = "GSN-EDF",
1034 .finish_switch = gsnedf_finish_switch,
1035 .task_new = gsnedf_task_new,
1036 .complete_job = complete_job,
1037 .task_exit = gsnedf_task_exit,
1038 .schedule = gsnedf_schedule,
1039 .task_wake_up = gsnedf_task_wake_up,
1040 .task_block = gsnedf_task_block,
1041 .admit_task = gsnedf_admit_task,
1042 .activate_plugin = gsnedf_activate_plugin,
1043 .deactivate_plugin = gsnedf_deactivate_plugin,
1044 .get_domain_proc_info = gsnedf_get_domain_proc_info,
1045#ifdef CONFIG_LITMUS_LOCKING
1046 .allocate_lock = gsnedf_allocate_lock,
1047#endif
1048};
1049
1050
1051static int __init init_gsn_edf(void)
1052{
1053 int cpu;
1054 cpu_entry_t *entry;
1055
1056 bheap_init(&gsnedf_cpu_heap);
1057 /* initialize CPU state */
1058 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1059 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1060 gsnedf_cpus[cpu] = entry;
1061 entry->cpu = cpu;
1062 entry->hn = &gsnedf_heap_node[cpu];
1063 bheap_node_init(&entry->hn, entry);
1064 }
1065 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
1066 return register_sched_plugin(&gsn_edf_plugin);
1067}
1068
1069
1070module_init(init_gsn_edf);
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
new file mode 100644
index 000000000000..f66488dc6a12
--- /dev/null
+++ b/litmus/sched_pfair.c
@@ -0,0 +1,1231 @@
1/*
2 * kernel/sched_pfair.c
3 *
4 * Implementation of the PD^2 pfair scheduling algorithm. This
5 * implementation realizes "early releasing," i.e., it is work-conserving.
6 *
7 */
8
9#include <asm/div64.h>
10#include <linux/delay.h>
11#include <linux/module.h>
12#include <linux/spinlock.h>
13#include <linux/percpu.h>
14#include <linux/sched.h>
15#include <linux/list.h>
16#include <linux/slab.h>
17
18#include <litmus/debug_trace.h>
19#include <litmus/litmus.h>
20#include <litmus/jobs.h>
21#include <litmus/preempt.h>
22#include <litmus/rt_domain.h>
23#include <litmus/sched_plugin.h>
24#include <litmus/sched_trace.h>
25#include <litmus/trace.h>
26
27#include <litmus/bheap.h>
28
29/* to configure the cluster size */
30#include <litmus/litmus_proc.h>
31
32#include <litmus/clustered.h>
33
34static enum cache_level pfair_cluster_level = GLOBAL_CLUSTER;
35
36struct subtask {
37 /* measured in quanta relative to job release */
38 quanta_t release;
39 quanta_t deadline;
40 quanta_t overlap; /* called "b bit" by PD^2 */
41 quanta_t group_deadline;
42};
43
44struct pfair_param {
45 quanta_t quanta; /* number of subtasks */
46 quanta_t cur; /* index of current subtask */
47
48 quanta_t release; /* in quanta */
49 quanta_t period; /* in quanta */
50
51 quanta_t last_quantum; /* when scheduled last */
52 int last_cpu; /* where scheduled last */
53
54 unsigned int needs_requeue:1;
55
56 struct pfair_cluster* cluster; /* where this task is scheduled */
57
58 struct subtask subtasks[0]; /* allocate together with pfair_param */
59};
60
61#define tsk_pfair(tsk) ((tsk)->rt_param.pfair)
62
63struct pfair_state {
64 struct cluster_cpu topology;
65
66 struct hrtimer quantum_timer;
67
68 volatile quanta_t cur_tick; /* updated by the CPU that is advancing
69 * the time */
70 volatile quanta_t local_tick; /* What tick is the local CPU currently
71 * executing? Updated only by the local
72 * CPU. In QEMU, this may lag behind the
73 * current tick. In a real system, with
74 * proper timers and aligned quanta,
75 * that should only be the case for a
76 * very short time after the time
77 * advanced. With staggered quanta, it
78 * will lag for the duration of the
79 * offset.
80 */
81
82 struct task_struct* linked; /* the task that should be executing */
83 struct task_struct* local; /* the local copy of linked */
84 struct task_struct* scheduled; /* what is actually scheduled */
85
86 struct list_head out_of_budget; /* list of tasks that exhausted their allocation */
87
88 lt_t offset; /* stagger offset */
89 unsigned int missed_updates;
90 unsigned int missed_quanta;
91};
92
93struct pfair_cluster {
94 struct scheduling_cluster topology;
95
96 /* The "global" time in this cluster. */
97 quanta_t pfair_time; /* the "official" PFAIR clock */
98
99 /* The ready queue for this cluster. */
100 rt_domain_t pfair;
101
102 /* The set of jobs that should have their release enacted at the next
103 * quantum boundary.
104 */
105 struct bheap release_queue;
106 raw_spinlock_t release_lock;
107};
108
109static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)
110{
111 return container_of(state->topology.cluster, struct pfair_cluster, topology);
112}
113
114static inline int cpu_id(struct pfair_state* state)
115{
116 return state->topology.id;
117}
118
119static inline struct pfair_state* from_cluster_list(struct list_head* pos)
120{
121 return list_entry(pos, struct pfair_state, topology.cluster_list);
122}
123
124static inline struct pfair_cluster* from_domain(rt_domain_t* rt)
125{
126 return container_of(rt, struct pfair_cluster, pfair);
127}
128
129static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)
130{
131 /* The ready_lock is used to serialize all scheduling events. */
132 return &cluster->pfair.ready_lock;
133}
134
135static inline raw_spinlock_t* cpu_lock(struct pfair_state* state)
136{
137 return cluster_lock(cpu_cluster(state));
138}
139
140DEFINE_PER_CPU(struct pfair_state, pfair_state);
141struct pfair_state* *pstate; /* short cut */
142
143static struct pfair_cluster* pfair_clusters;
144static int num_pfair_clusters;
145
146/* Enable for lots of trace info.
147 * #define PFAIR_DEBUG
148 */
149
150#ifdef PFAIR_DEBUG
151#define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args)
152#define PTRACE(f, args...) TRACE(f, ## args)
153#else
154#define PTRACE_TASK(t, f, args...)
155#define PTRACE(f, args...)
156#endif
157
158/* gcc will inline all of these accessor functions... */
159static struct subtask* cur_subtask(struct task_struct* t)
160{
161 return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur;
162}
163
164static quanta_t cur_deadline(struct task_struct* t)
165{
166 return cur_subtask(t)->deadline + tsk_pfair(t)->release;
167}
168
169static quanta_t cur_release(struct task_struct* t)
170{
171 /* This is early releasing: only the release of the first subtask
172 * counts. */
173 return tsk_pfair(t)->release;
174}
175
176static quanta_t cur_overlap(struct task_struct* t)
177{
178 return cur_subtask(t)->overlap;
179}
180
181static quanta_t cur_group_deadline(struct task_struct* t)
182{
183 quanta_t gdl = cur_subtask(t)->group_deadline;
184 if (gdl)
185 return gdl + tsk_pfair(t)->release;
186 else
187 return gdl;
188}
189
190
191static int pfair_higher_prio(struct task_struct* first,
192 struct task_struct* second)
193{
194 return /* first task must exist */
195 first && (
196 /* Does the second task exist and is it a real-time task? If
197 * not, the first task (which is a RT task) has higher
198 * priority.
199 */
200 !second || !is_realtime(second) ||
201
202 /* Is the (subtask) deadline of the first task earlier?
203 * Then it has higher priority.
204 */
205 time_before(cur_deadline(first), cur_deadline(second)) ||
206
207 /* Do we have a deadline tie?
208 * Then break by B-bit.
209 */
210 (cur_deadline(first) == cur_deadline(second) &&
211 (cur_overlap(first) > cur_overlap(second) ||
212
213 /* Do we have a B-bit tie?
214 * Then break by group deadline.
215 */
216 (cur_overlap(first) == cur_overlap(second) &&
217 (time_after(cur_group_deadline(first),
218 cur_group_deadline(second)) ||
219
220 /* Do we have a group deadline tie?
221 * Then break by PID, which are unique.
222 */
223 (cur_group_deadline(first) ==
224 cur_group_deadline(second) &&
225 first->pid < second->pid))))));
226}
227
228int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
229{
230 return pfair_higher_prio(bheap2task(a), bheap2task(b));
231}
232
233static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks)
234{
235 struct pfair_cluster* cluster = from_domain(rt);
236 unsigned long flags;
237
238 raw_spin_lock_irqsave(&cluster->release_lock, flags);
239
240 bheap_union(pfair_ready_order, &cluster->release_queue, tasks);
241
242 raw_spin_unlock_irqrestore(&cluster->release_lock, flags);
243}
244
245static void prepare_release(struct task_struct* t, quanta_t at)
246{
247 tsk_pfair(t)->release = at;
248 tsk_pfair(t)->cur = 0;
249}
250
251/* pull released tasks from the release queue */
252static void poll_releases(struct pfair_cluster* cluster)
253{
254 raw_spin_lock(&cluster->release_lock);
255 __merge_ready(&cluster->pfair, &cluster->release_queue);
256 raw_spin_unlock(&cluster->release_lock);
257}
258
259static void check_preempt(struct task_struct* t)
260{
261 int cpu = NO_CPU;
262 if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on &&
263 is_present(t)) {
264 /* the task can be scheduled and
265 * is not scheduled where it ought to be scheduled
266 */
267 cpu = tsk_rt(t)->linked_on != NO_CPU ?
268 tsk_rt(t)->linked_on :
269 tsk_rt(t)->scheduled_on;
270 PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n",
271 tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on);
272 /* preempt */
273 litmus_reschedule(cpu);
274 }
275}
276
277/* caller must hold pfair.ready_lock */
278static void drop_all_references(struct task_struct *t)
279{
280 int cpu;
281 struct pfair_state* s;
282 struct pfair_cluster* cluster;
283 if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
284 /* It must be in the ready queue; drop references isn't called
285 * when the job is in a release queue. */
286 cluster = tsk_pfair(t)->cluster;
287 bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue,
288 tsk_rt(t)->heap_node);
289 }
290 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
291 s = &per_cpu(pfair_state, cpu);
292 if (s->linked == t)
293 s->linked = NULL;
294 if (s->local == t)
295 s->local = NULL;
296 if (s->scheduled == t)
297 s->scheduled = NULL;
298 }
299 /* make sure we don't have a stale linked_on field */
300 tsk_rt(t)->linked_on = NO_CPU;
301
302 /* make sure we're not queued for re-releasing */
303 if (in_list(&tsk_rt(t)->list))
304 {
305 TRACE_TASK(t, "removing from out_of_budget queue\n");
306 list_del(&tsk_rt(t)->list);
307 }
308}
309
310static void pfair_prepare_next_period(struct task_struct* t)
311{
312 struct pfair_param* p = tsk_pfair(t);
313
314 prepare_for_next_period(t);
315 tsk_rt(t)->completed = 0;
316 p->release = time2quanta(get_release(t), CEIL);
317}
318
319/* returns 1 if the task needs to go the release queue */
320static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
321{
322 struct pfair_param* p = tsk_pfair(t);
323 int to_relq;
324 p->cur = (p->cur + 1) % p->quanta;
325 if (!p->cur) {
326 if (is_present(t)) {
327 /* The job overran; we start a new budget allocation. */
328 TRACE_TASK(t, "overran budget, preparing next period\n");
329 sched_trace_task_completion(t, 1);
330 pfair_prepare_next_period(t);
331 } else {
332 /* remove task from system until it wakes */
333 drop_all_references(t);
334 p->needs_requeue = 1;
335 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
336 cpu, p->cur);
337 return 0;
338 }
339 }
340 to_relq = time_after(cur_release(t), time);
341 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d "
342 "(cur_release:%lu time:%lu present:%d on_cpu=%d)\n",
343 cpu, p->cur, to_relq, cur_release(t), time,
344 tsk_rt(t)->present, tsk_rt(t)->scheduled_on);
345 return to_relq;
346}
347
348static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
349{
350 struct task_struct* l;
351 struct pfair_param* p;
352 struct list_head* pos;
353 struct pfair_state* cpu;
354
355 list_for_each(pos, &cluster->topology.cpus) {
356 cpu = from_cluster_list(pos);
357 l = cpu->linked;
358 cpu->missed_updates += cpu->linked != cpu->local;
359 if (l) {
360 p = tsk_pfair(l);
361 p->last_quantum = time;
362 p->last_cpu = cpu_id(cpu);
363 if (advance_subtask(time, l, cpu_id(cpu))) {
364 cpu->linked = NULL;
365 tsk_rt(l)->linked_on = NO_CPU;
366 PTRACE_TASK(l, "should go to release queue. "
367 "scheduled_on=%d present=%d\n",
368 tsk_rt(l)->scheduled_on,
369 tsk_rt(l)->present);
370 list_add(&tsk_rt(l)->list, &cpu->out_of_budget);
371 }
372 }
373 }
374}
375
376static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu)
377{
378 int cpu;
379 if (tsk_rt(t)->scheduled_on != NO_CPU) {
380 /* always observe scheduled_on linkage */
381 default_cpu = tsk_rt(t)->scheduled_on;
382 } else if (tsk_pfair(t)->last_quantum == time - 1) {
383 /* back2back quanta */
384 /* Only observe last_quantum if no scheduled_on is in the way.
385 * This should only kick in if a CPU missed quanta, and that
386 * *should* only happen in QEMU.
387 */
388 cpu = tsk_pfair(t)->last_cpu;
389 if (!pstate[cpu]->linked ||
390 tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) {
391 default_cpu = cpu;
392 }
393 }
394 return default_cpu;
395}
396
397/* returns one if linking was redirected */
398static int pfair_link(quanta_t time, int cpu,
399 struct task_struct* t)
400{
401 int target = target_cpu(time, t, cpu);
402 struct task_struct* prev = pstate[cpu]->linked;
403 struct task_struct* other;
404 struct pfair_cluster* cluster = cpu_cluster(pstate[cpu]);
405
406 if (target != cpu) {
407 BUG_ON(pstate[target]->topology.cluster != pstate[cpu]->topology.cluster);
408 other = pstate[target]->linked;
409 pstate[target]->linked = t;
410 tsk_rt(t)->linked_on = target;
411 if (!other)
412 /* linked ok, but reschedule this CPU */
413 return 1;
414 if (target < cpu) {
415 /* link other to cpu instead */
416 tsk_rt(other)->linked_on = cpu;
417 pstate[cpu]->linked = other;
418 if (prev) {
419 /* prev got pushed back into the ready queue */
420 tsk_rt(prev)->linked_on = NO_CPU;
421 __add_ready(&cluster->pfair, prev);
422 }
423 /* we are done with this cpu */
424 return 0;
425 } else {
426 /* re-add other, it's original CPU was not considered yet */
427 tsk_rt(other)->linked_on = NO_CPU;
428 __add_ready(&cluster->pfair, other);
429 /* reschedule this CPU */
430 return 1;
431 }
432 } else {
433 pstate[cpu]->linked = t;
434 tsk_rt(t)->linked_on = cpu;
435 if (prev) {
436 /* prev got pushed back into the ready queue */
437 tsk_rt(prev)->linked_on = NO_CPU;
438 __add_ready(&cluster->pfair, prev);
439 }
440 /* we are done with this CPU */
441 return 0;
442 }
443}
444
445static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time)
446{
447 int retry;
448 struct list_head *pos;
449 struct pfair_state *cpu_state;
450
451 list_for_each(pos, &cluster->topology.cpus) {
452 cpu_state = from_cluster_list(pos);
453 retry = 1;
454#ifdef CONFIG_RELEASE_MASTER
455 /* skip release master */
456 if (cluster->pfair.release_master == cpu_id(cpu_state))
457 continue;
458#endif
459 while (retry) {
460 if (pfair_higher_prio(__peek_ready(&cluster->pfair),
461 cpu_state->linked))
462 retry = pfair_link(time, cpu_id(cpu_state),
463 __take_ready(&cluster->pfair));
464 else
465 retry = 0;
466 }
467 }
468}
469
470static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time)
471{
472 struct pfair_state *cpu;
473 struct list_head* pos;
474
475 /* called with interrupts disabled */
476 PTRACE("--- Q %lu at %llu PRE-SPIN\n",
477 time, litmus_clock());
478 raw_spin_lock(cluster_lock(cluster));
479 PTRACE("<<< Q %lu at %llu\n",
480 time, litmus_clock());
481
482 sched_trace_quantum_boundary();
483
484 advance_subtasks(cluster, time);
485 poll_releases(cluster);
486 schedule_subtasks(cluster, time);
487
488 list_for_each(pos, &cluster->topology.cpus) {
489 cpu = from_cluster_list(pos);
490 if (cpu->linked)
491 PTRACE_TASK(cpu->linked,
492 " linked on %d.\n", cpu_id(cpu));
493 else
494 PTRACE("(null) linked on %d.\n", cpu_id(cpu));
495 }
496 /* We are done. Advance time. */
497 mb();
498 list_for_each(pos, &cluster->topology.cpus) {
499 cpu = from_cluster_list(pos);
500 if (cpu->local_tick != cpu->cur_tick) {
501 TRACE("BAD Quantum not acked on %d "
502 "(l:%lu c:%lu p:%lu)\n",
503 cpu_id(cpu),
504 cpu->local_tick,
505 cpu->cur_tick,
506 cluster->pfair_time);
507 cpu->missed_quanta++;
508 }
509 cpu->cur_tick = time;
510 }
511 PTRACE(">>> Q %lu at %llu\n",
512 time, litmus_clock());
513 raw_spin_unlock(cluster_lock(cluster));
514}
515
516static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state)
517{
518 quanta_t loc;
519
520 goto first; /* skip mb() on first iteration */
521 do {
522 cpu_relax();
523 mb();
524 first: loc = state->cur_tick;
525 /* FIXME: what if loc > cur? */
526 } while (time_before(loc, q));
527 PTRACE("observed cur_tick:%lu >= q:%lu\n",
528 loc, q);
529}
530
531static quanta_t current_quantum(struct pfair_state* state)
532{
533 lt_t t = litmus_clock() - state->offset;
534 return time2quanta(t, FLOOR);
535}
536
537static void catchup_quanta(quanta_t from, quanta_t target,
538 struct pfair_state* state)
539{
540 quanta_t cur = from, time;
541 TRACE("+++< BAD catching up quanta from %lu to %lu\n",
542 from, target);
543 while (time_before(cur, target)) {
544 wait_for_quantum(cur, state);
545 cur++;
546 time = cmpxchg(&cpu_cluster(state)->pfair_time,
547 cur - 1, /* expected */
548 cur /* next */
549 );
550 if (time == cur - 1)
551 schedule_next_quantum(cpu_cluster(state), cur);
552 }
553 TRACE("+++> catching up done\n");
554}
555
556/* pfair_tick - this function is called for every local timer
557 * interrupt.
558 */
559static void pfair_tick(struct task_struct* t)
560{
561 struct pfair_state* state = this_cpu_ptr(&pfair_state);
562 quanta_t time, cur;
563 int retry = 10;
564
565 do {
566 cur = current_quantum(state);
567 PTRACE("q %lu at %llu\n", cur, litmus_clock());
568
569 /* Attempt to advance time. First CPU to get here
570 * will prepare the next quantum.
571 */
572 time = cpu_cluster(state)->pfair_time;
573 if (time == cur - 1)
574 {
575 /* looks good, see if we can advance the time */
576 time = cmpxchg(&cpu_cluster(state)->pfair_time,
577 cur - 1, /* expected */
578 cur /* next */
579 );
580 }
581
582 if (time == cur - 1) {
583 /* exchange succeeded */
584 wait_for_quantum(cur - 1, state);
585 schedule_next_quantum(cpu_cluster(state), cur);
586 retry = 0;
587 } else if (time_before(time, cur - 1)) {
588 /* the whole system missed a tick !? */
589 catchup_quanta(time, cur, state);
590 retry--;
591 } else if (time_after(time, cur)) {
592 /* our timer lagging behind!? */
593 TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur);
594 retry--;
595 } else {
596 /* Some other CPU already started scheduling
597 * this quantum. Let it do its job and then update.
598 */
599 retry = 0;
600 }
601 } while (retry);
602
603 /* Spin locally until time advances. */
604 wait_for_quantum(cur, state);
605
606 /* copy assignment */
607 /* FIXME: what if we race with a future update? Corrupted state? */
608 state->local = state->linked;
609 /* signal that we are done */
610 mb();
611 state->local_tick = state->cur_tick;
612
613 if (state->local != current
614 && (is_realtime(current) || is_present(state->local)))
615 litmus_reschedule_local();
616}
617
618static void process_out_of_budget_tasks(
619 struct pfair_state* state,
620 struct task_struct* prev,
621 unsigned int blocks)
622{
623 struct task_struct *t;
624
625 while (!list_empty(&state->out_of_budget))
626 {
627
628 t = list_first_entry(&state->out_of_budget,
629 struct task_struct, rt_param.list);
630 TRACE_TASK(t, "found on out_of_budget queue is_prev=%d\n", t == prev);
631 list_del(&tsk_rt(t)->list);
632 if (t != prev || !blocks)
633 {
634 if (time_after(cur_release(t), state->local_tick)) {
635 TRACE_TASK(t, "adding to release queue (budget exhausted)\n");
636 add_release(&cpu_cluster(state)->pfair, t);
637 } else {
638 TRACE_TASK(t, "adding to ready queue (budget exhausted)\n");
639 sched_trace_task_release(t);
640 __add_ready(&cpu_cluster(state)->pfair, t);
641 }
642 } else {
643 TRACE_TASK(t, "not added to release queue (blocks=%d)\n", blocks);
644 tsk_pfair(t)->needs_requeue = 1;
645 }
646 if (unlikely(state->local == t)) {
647 TRACE_TASK(t, "still linked as ->local, cleaning up\n");
648 state->local = NULL;
649 }
650 }
651}
652
653/* Custom scheduling tick: called on each quantum boundary. */
654static enum hrtimer_restart on_quantum_boundary(struct hrtimer *timer)
655{
656 TS_QUANTUM_BOUNDARY_START;
657
658 pfair_tick(current);
659 hrtimer_add_expires_ns(timer, LITMUS_QUANTUM_LENGTH_NS);
660
661 TS_QUANTUM_BOUNDARY_END;
662 return HRTIMER_RESTART;
663}
664
665static int safe_to_schedule(struct task_struct* t, int cpu)
666{
667 int where = tsk_rt(t)->scheduled_on;
668 if (where != NO_CPU && where != cpu) {
669 TRACE_TASK(t, "BAD: can't be scheduled on %d, "
670 "scheduled already on %d.\n", cpu, where);
671 return 0;
672 } else
673 return is_present(t) && !is_completed(t);
674}
675
676static struct task_struct* pfair_schedule(struct task_struct * prev)
677{
678 struct pfair_state* state = this_cpu_ptr(&pfair_state);
679 struct pfair_cluster* cluster = cpu_cluster(state);
680 int blocks, completion, out_of_time;
681 struct task_struct* next = NULL;
682
683#ifdef CONFIG_RELEASE_MASTER
684 /* Bail out early if we are the release master.
685 * The release master never schedules any real-time tasks.
686 */
687 if (unlikely(cluster->pfair.release_master == cpu_id(state))) {
688 goto out;
689 }
690#endif
691
692 raw_spin_lock(cpu_lock(state));
693
694 blocks = is_realtime(prev) && !is_current_running();
695 completion = is_realtime(prev) && is_completed(prev);
696 out_of_time = is_realtime(prev) && time_after(cur_release(prev),
697 state->local_tick);
698
699 if (is_realtime(prev))
700 PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n",
701 blocks, completion, out_of_time);
702
703 if (completion && !out_of_time) {
704 sched_trace_task_completion(prev, 0);
705 pfair_prepare_next_period(prev);
706 prepare_release(prev, cur_release(prev));
707 drop_all_references(prev);
708 list_add(&tsk_rt(prev)->list, &state->out_of_budget);
709 }
710
711 process_out_of_budget_tasks(state, prev, blocks);
712
713 if (state->local && safe_to_schedule(state->local, cpu_id(state)))
714 next = state->local;
715
716 if (prev != next) {
717 tsk_rt(prev)->scheduled_on = NO_CPU;
718 if (next)
719 tsk_rt(next)->scheduled_on = cpu_id(state);
720 }
721 sched_state_task_picked();
722 raw_spin_unlock(cpu_lock(state));
723
724 if (next)
725 TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n",
726 tsk_pfair(next)->release, cpu_cluster(state)->pfair_time, litmus_clock());
727 else if (is_realtime(prev))
728 TRACE("Becomes idle at %lu (%llu)\n", cpu_cluster(state)->pfair_time, litmus_clock());
729
730#ifdef CONFIG_RELEASE_MASTER
731out:
732#endif
733
734 if (unlikely(!hrtimer_active(&state->quantum_timer))) {
735 TRACE("activating quantum timer start=%llu\n",
736 hrtimer_get_expires(&state->quantum_timer));
737 hrtimer_start(&state->quantum_timer,
738 hrtimer_get_expires(&state->quantum_timer),
739 HRTIMER_MODE_ABS);
740 }
741
742 return next;
743}
744
745static void pfair_task_new(struct task_struct * t, int on_rq, int is_scheduled)
746{
747 unsigned long flags;
748 struct pfair_cluster* cluster;
749
750 TRACE("pfair: task new %d state:%d\n", t->pid, t->state);
751
752 cluster = tsk_pfair(t)->cluster;
753
754 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
755
756 prepare_release(t, cluster->pfair_time + 1);
757 release_at(t, quanta2time(cur_release(t)));
758
759 t->rt_param.scheduled_on = NO_CPU;
760 t->rt_param.linked_on = NO_CPU;
761
762 if (is_scheduled) {
763#ifdef CONFIG_RELEASE_MASTER
764 if (task_cpu(t) != cluster->pfair.release_master)
765#endif
766 t->rt_param.scheduled_on = task_cpu(t);
767 }
768
769 if (on_rq || is_scheduled) {
770 tsk_rt(t)->present = 1;
771 __add_ready(&cluster->pfair, t);
772 } else {
773 tsk_rt(t)->present = 0;
774 tsk_pfair(t)->needs_requeue = 1;
775 }
776
777 check_preempt(t);
778
779 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
780}
781
782static void pfair_task_wake_up(struct task_struct *t)
783{
784 unsigned long flags;
785 lt_t now;
786 struct pfair_cluster* cluster;
787 struct pfair_state* state;
788 int sporadic_release = 0;
789
790 cluster = tsk_pfair(t)->cluster;
791
792 TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n",
793 litmus_clock(), cur_release(t), cluster->pfair_time);
794
795 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
796
797 state = this_cpu_ptr(&pfair_state);
798
799 /* If a task blocks and wakes before its next job release,
800 * then it may resume if it is currently linked somewhere
801 * (as if it never blocked at all). Otherwise, we have a
802 * new sporadic job release.
803 */
804 now = litmus_clock();
805 if (is_tardy(t, now)) {
806 TRACE_TASK(t, "sporadic release!\n");
807 sporadic_release = 1;
808 inferred_sporadic_job_release_at(t, now);
809 prepare_release(t, time2quanta(now, CEIL));
810 }
811
812 /* only add to ready queue if the task isn't still linked somewhere */
813 if (tsk_pfair(t)->needs_requeue) {
814 tsk_pfair(t)->needs_requeue = 0;
815 TRACE_TASK(t, "requeueing required (released:%d)\n",
816 !time_after(cur_release(t), state->local_tick));
817 tsk_rt(t)->completed = 0;
818 if (time_after(cur_release(t), state->local_tick)
819 && !sporadic_release)
820 add_release(&cluster->pfair, t);
821 else
822 __add_ready(&cluster->pfair, t);
823 }
824
825 check_preempt(t);
826
827 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
828 TRACE_TASK(t, "wake up done at %llu\n", litmus_clock());
829}
830
831static void pfair_task_block(struct task_struct *t)
832{
833 BUG_ON(!is_realtime(t));
834 TRACE_TASK(t, "blocks at %llu, state:%d\n",
835 litmus_clock(), t->state);
836}
837
838static void pfair_task_exit(struct task_struct * t)
839{
840 unsigned long flags;
841 struct pfair_cluster *cluster;
842
843 BUG_ON(!is_realtime(t));
844
845 cluster = tsk_pfair(t)->cluster;
846
847 /* Remote task from release or ready queue, and ensure
848 * that it is not the scheduled task for ANY CPU. We
849 * do this blanket check because occassionally when
850 * tasks exit while blocked, the task_cpu of the task
851 * might not be the same as the CPU that the PFAIR scheduler
852 * has chosen for it.
853 */
854 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
855
856 TRACE_TASK(t, "RIP, state:%d\n", t->state);
857 drop_all_references(t);
858
859 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
860
861 kfree(t->rt_param.pfair);
862 t->rt_param.pfair = NULL;
863}
864
865static void init_subtask(struct subtask* sub, unsigned long i,
866 lt_t quanta, lt_t period)
867{
868 /* since i is zero-based, the formulas are shifted by one */
869 lt_t tmp;
870
871 /* release */
872 tmp = period * i;
873 do_div(tmp, quanta); /* floor */
874 sub->release = (quanta_t) tmp;
875
876 /* deadline */
877 tmp = period * (i + 1);
878 if (do_div(tmp, quanta)) /* ceil */
879 tmp++;
880 sub->deadline = (quanta_t) tmp;
881
882 /* next release */
883 tmp = period * (i + 1);
884 do_div(tmp, quanta); /* floor */
885 sub->overlap = sub->deadline - (quanta_t) tmp;
886
887 /* Group deadline.
888 * Based on the formula given in Uma's thesis.
889 */
890 if (2 * quanta >= period) {
891 /* heavy */
892 tmp = (sub->deadline - (i + 1)) * period;
893 if (period > quanta &&
894 do_div(tmp, (period - quanta))) /* ceil */
895 tmp++;
896 sub->group_deadline = (quanta_t) tmp;
897 } else
898 sub->group_deadline = 0;
899}
900
901static void dump_subtasks(struct task_struct* t)
902{
903 unsigned long i;
904 for (i = 0; i < t->rt_param.pfair->quanta; i++)
905 TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n",
906 i + 1,
907 t->rt_param.pfair->subtasks[i].release,
908 t->rt_param.pfair->subtasks[i].deadline,
909 t->rt_param.pfair->subtasks[i].overlap,
910 t->rt_param.pfair->subtasks[i].group_deadline);
911}
912
913static long pfair_admit_task(struct task_struct* t)
914{
915 lt_t quanta;
916 lt_t period;
917 s64 quantum_length = LITMUS_QUANTUM_LENGTH_NS;
918 struct pfair_param* param;
919 unsigned long i;
920
921 /* first check that the task is in the right cluster */
922 if (cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]) !=
923 cpu_cluster(pstate[task_cpu(t)]))
924 return -EINVAL;
925
926 if (get_rt_period(t) != get_rt_relative_deadline(t)) {
927 printk(KERN_INFO "%s: Admission rejected. "
928 "Only implicit deadlines are currently supported.\n",
929 litmus->plugin_name);
930 return -EINVAL;
931 }
932
933 /* Pfair is a tick-based scheduler, so the unit of time
934 * is one quantum. Calculate quantum-based parameters for everything.
935 * (Ceiling of exec cost, floor of period.)
936 */
937
938 quanta = get_exec_cost(t);
939 period = get_rt_period(t);
940
941 quanta = time2quanta(get_exec_cost(t), CEIL);
942
943 if (do_div(period, quantum_length))
944 printk(KERN_WARNING
945 "The period of %s/%d is not a multiple of %llu.\n",
946 t->comm, t->pid, (unsigned long long) quantum_length);
947
948 if (quanta == period) {
949 PTRACE_TASK(t, "Admitting weight 1.0 task. (%llu, %llu).\n", quanta, period);
950 }
951
952 param = kzalloc(sizeof(*param) +
953 quanta * sizeof(struct subtask), GFP_ATOMIC);
954
955 if (!param)
956 return -ENOMEM;
957
958 param->quanta = quanta;
959 param->period = period;
960
961 param->cluster = cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]);
962
963 for (i = 0; i < quanta; i++)
964 init_subtask(param->subtasks + i, i, quanta, period);
965
966 if (t->rt_param.pfair)
967 /* get rid of stale allocation */
968 kfree(t->rt_param.pfair);
969
970 t->rt_param.pfair = param;
971
972 /* spew out some debug info */
973 dump_subtasks(t);
974
975 /* Disable generic budget enforcement (if enabled).
976 * The plugin provides its own (non-optional) enforcement
977 * of allocations at quantum granularity. */
978 tsk_rt(t)->task_params.budget_policy = NO_ENFORCEMENT;
979
980 return 0;
981}
982
983static void pfair_init_cluster(struct pfair_cluster* cluster)
984{
985 rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs);
986 bheap_init(&cluster->release_queue);
987 raw_spin_lock_init(&cluster->release_lock);
988 INIT_LIST_HEAD(&cluster->topology.cpus);
989}
990
991static void cleanup_clusters(void)
992{
993 int i;
994
995 if (num_pfair_clusters)
996 kfree(pfair_clusters);
997 pfair_clusters = NULL;
998 num_pfair_clusters = 0;
999
1000 /* avoid stale pointers */
1001 for (i = 0; i < num_online_cpus(); i++) {
1002 pstate[i]->topology.cluster = NULL;
1003 printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]),
1004 pstate[i]->missed_updates, pstate[i]->missed_quanta);
1005 }
1006}
1007
1008static struct domain_proc_info pfair_domain_proc_info;
1009static long pfair_get_domain_proc_info(struct domain_proc_info **ret)
1010{
1011 *ret = &pfair_domain_proc_info;
1012 return 0;
1013}
1014
1015static void pfair_setup_domain_proc(void)
1016{
1017 int i, cpu, domain;
1018#ifdef CONFIG_RELEASE_MASTER
1019 int release_master = atomic_read(&release_master_cpu);
1020 /* skip over the domain with the release master if cluster size is 1 */
1021 int cluster_size = num_online_cpus() / num_pfair_clusters;
1022 int skip_domain = (1 == cluster_size && release_master != NO_CPU) ?
1023 release_master : NO_CPU;
1024#else
1025 int release_master = NO_CPU;
1026 int skip_domain = NO_CPU;
1027#endif
1028 int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
1029 int num_rt_domains = num_pfair_clusters - (skip_domain != NO_CPU);
1030 struct cd_mapping *map;
1031
1032 memset(&pfair_domain_proc_info, 0, sizeof(pfair_domain_proc_info));
1033 init_domain_proc_info(&pfair_domain_proc_info, num_rt_cpus, num_pfair_clusters);
1034 pfair_domain_proc_info.num_cpus = num_rt_cpus;
1035 pfair_domain_proc_info.num_domains = num_rt_domains;
1036
1037 for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
1038 if (cpu == release_master)
1039 continue;
1040 map = &pfair_domain_proc_info.cpu_to_domains[i];
1041 /* pointer math to figure out the domain index */
1042 domain = cpu_cluster(&per_cpu(pfair_state, cpu)) - pfair_clusters;
1043 map->id = cpu;
1044 cpumask_set_cpu(domain, map->mask);
1045 ++i;
1046 }
1047
1048 for (domain = 0, i = 0; domain < num_pfair_clusters; ++domain) {
1049 struct pfair_cluster *cluster;
1050 struct list_head *pos;
1051
1052 if (domain == skip_domain)
1053 continue;
1054
1055 cluster = &pfair_clusters[domain];
1056 map = &pfair_domain_proc_info.domain_to_cpus[i];
1057 map->id = i;
1058
1059 list_for_each(pos, &cluster->topology.cpus) {
1060 cpu = cpu_id(from_cluster_list(pos));
1061 if (cpu != release_master)
1062 cpumask_set_cpu(cpu, map->mask);
1063 }
1064 ++i;
1065 }
1066}
1067
1068static long pfair_activate_plugin(void)
1069{
1070 int err, i;
1071 struct pfair_state* state;
1072 struct pfair_cluster* cluster;
1073 quanta_t now, start;
1074 int cluster_size;
1075 struct cluster_cpu* cpus[NR_CPUS];
1076 struct scheduling_cluster* clust[NR_CPUS];
1077 lt_t quantum_timer_start;
1078
1079 cluster_size = get_cluster_size(pfair_cluster_level);
1080
1081 if (cluster_size <= 0 || num_online_cpus() % cluster_size != 0)
1082 return -EINVAL;
1083
1084 num_pfair_clusters = num_online_cpus() / cluster_size;
1085
1086 pfair_clusters = kzalloc(num_pfair_clusters * sizeof(struct pfair_cluster), GFP_ATOMIC);
1087 if (!pfair_clusters) {
1088 num_pfair_clusters = 0;
1089 printk(KERN_ERR "Could not allocate Pfair clusters!\n");
1090 return -ENOMEM;
1091 }
1092
1093 state = this_cpu_ptr(&pfair_state);
1094 now = current_quantum(state);
1095 start = now + 50;
1096 quantum_timer_start = quanta2time(start);
1097 TRACE("Activating PFAIR at %llu (q=%lu), first tick at %llu (q=%lu)\n",
1098 litmus_clock(),
1099 now,
1100 quantum_timer_start,
1101 time2quanta(quantum_timer_start, CEIL));
1102
1103 for (i = 0; i < num_pfair_clusters; i++) {
1104 cluster = &pfair_clusters[i];
1105 pfair_init_cluster(cluster);
1106 cluster->pfair_time = start;
1107 clust[i] = &cluster->topology;
1108#ifdef CONFIG_RELEASE_MASTER
1109 cluster->pfair.release_master = atomic_read(&release_master_cpu);
1110#endif
1111 }
1112
1113 for_each_online_cpu(i) {
1114 state = &per_cpu(pfair_state, i);
1115 state->cur_tick = start;
1116 state->local_tick = start;
1117 state->missed_quanta = 0;
1118 state->missed_updates = 0;
1119 state->offset = cpu_stagger_offset(i);
1120 hrtimer_set_expires(&state->quantum_timer,
1121 ns_to_ktime(quantum_timer_start + state->offset));
1122 cpus[i] = &state->topology;
1123 TRACE("cpus[%d] set; offset=%llu; %d\n", i, state->offset, num_online_cpus());
1124 INIT_LIST_HEAD(&state->out_of_budget);
1125 /* force rescheduling to start quantum timer */
1126 litmus_reschedule(i);
1127
1128 WARN_ONCE(!hrtimer_is_hres_active(&state->quantum_timer),
1129 KERN_ERR "WARNING: no high resolution timers available!?\n");
1130 }
1131
1132 err = assign_cpus_to_clusters(pfair_cluster_level, clust, num_pfair_clusters,
1133 cpus, num_online_cpus());
1134
1135 if (err < 0)
1136 cleanup_clusters();
1137 else
1138 pfair_setup_domain_proc();
1139
1140 return err;
1141}
1142
1143static long pfair_deactivate_plugin(void)
1144{
1145 int cpu;
1146 struct pfair_state* state;
1147
1148 for_each_online_cpu(cpu) {
1149 state = &per_cpu(pfair_state, cpu);
1150 TRACE("stopping quantum timer on CPU%d\n", cpu);
1151 hrtimer_cancel(&state->quantum_timer);
1152 }
1153 cleanup_clusters();
1154 destroy_domain_proc_info(&pfair_domain_proc_info);
1155 return 0;
1156}
1157
1158/* Plugin object */
1159static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = {
1160 .plugin_name = "PFAIR",
1161 .task_new = pfair_task_new,
1162 .task_exit = pfair_task_exit,
1163 .schedule = pfair_schedule,
1164 .task_wake_up = pfair_task_wake_up,
1165 .task_block = pfair_task_block,
1166 .admit_task = pfair_admit_task,
1167 .complete_job = complete_job,
1168 .activate_plugin = pfair_activate_plugin,
1169 .deactivate_plugin = pfair_deactivate_plugin,
1170 .get_domain_proc_info = pfair_get_domain_proc_info,
1171};
1172
1173
1174static struct proc_dir_entry *cluster_file = NULL, *pfair_dir = NULL;
1175
1176static int __init init_pfair(void)
1177{
1178 int cpu, err, fs;
1179 struct pfair_state *state;
1180
1181 /*
1182 * initialize short_cut for per-cpu pfair state;
1183 * there may be a problem here if someone removes a cpu
1184 * while we are doing this initialization... and if cpus
1185 * are added / removed later... but we don't support CPU hotplug atm anyway.
1186 */
1187 pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL);
1188
1189 /* initialize CPU state */
1190 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
1191 state = &per_cpu(pfair_state, cpu);
1192 hrtimer_init(&state->quantum_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
1193 state->quantum_timer.function = on_quantum_boundary;
1194 state->topology.id = cpu;
1195 state->cur_tick = 0;
1196 state->local_tick = 0;
1197 state->linked = NULL;
1198 state->local = NULL;
1199 state->scheduled = NULL;
1200 state->missed_quanta = 0;
1201 state->offset = cpu_stagger_offset(cpu);
1202 pstate[cpu] = state;
1203 }
1204
1205 pfair_clusters = NULL;
1206 num_pfair_clusters = 0;
1207
1208 err = register_sched_plugin(&pfair_plugin);
1209 if (!err) {
1210 fs = make_plugin_proc_dir(&pfair_plugin, &pfair_dir);
1211 if (!fs)
1212 cluster_file = create_cluster_file(pfair_dir, &pfair_cluster_level);
1213 else
1214 printk(KERN_ERR "Could not allocate PFAIR procfs dir.\n");
1215 }
1216
1217 return err;
1218}
1219
1220static void __exit clean_pfair(void)
1221{
1222 kfree(pstate);
1223
1224 if (cluster_file)
1225 remove_proc_entry("cluster", pfair_dir);
1226 if (pfair_dir)
1227 remove_plugin_proc_dir(&pfair_plugin);
1228}
1229
1230module_init(init_pfair);
1231module_exit(clean_pfair);
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
new file mode 100644
index 000000000000..c7f2e60d010b
--- /dev/null
+++ b/litmus/sched_pfp.c
@@ -0,0 +1,2048 @@
1/*
2 * litmus/sched_pfp.c
3 *
4 * Implementation of partitioned fixed-priority scheduling.
5 * Based on PSN-EDF.
6 */
7
8#include <linux/percpu.h>
9#include <linux/sched.h>
10#include <linux/list.h>
11#include <linux/spinlock.h>
12#include <linux/module.h>
13
14#include <litmus/debug_trace.h>
15#include <litmus/litmus.h>
16#include <litmus/wait.h>
17#include <litmus/jobs.h>
18#include <litmus/preempt.h>
19#include <litmus/fp_common.h>
20#include <litmus/sched_plugin.h>
21#include <litmus/sched_trace.h>
22#include <litmus/trace.h>
23#include <litmus/budget.h>
24#include <litmus/np.h>
25
26/* to set up domain/cpu mappings */
27#include <litmus/litmus_proc.h>
28#include <linux/uaccess.h>
29
30
31typedef struct {
32 rt_domain_t domain;
33 struct fp_prio_queue ready_queue;
34 int cpu;
35 struct task_struct* scheduled; /* only RT tasks */
36/*
37 * scheduling lock slock
38 * protects the domain and serializes scheduling decisions
39 */
40#define slock domain.ready_lock
41
42} pfp_domain_t;
43
44DEFINE_PER_CPU(pfp_domain_t, pfp_domains);
45
46pfp_domain_t* pfp_doms[NR_CPUS];
47
48#define local_pfp (this_cpu_ptr(&pfp_domains))
49#define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain)
50#define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu))
51#define task_dom(task) remote_dom(get_partition(task))
52#define task_pfp(task) remote_pfp(get_partition(task))
53
54
55#ifdef CONFIG_LITMUS_LOCKING
56DEFINE_PER_CPU(uint64_t,fmlp_timestamp);
57#endif
58
59/* we assume the lock is being held */
60static void preempt(pfp_domain_t *pfp)
61{
62 preempt_if_preemptable(pfp->scheduled, pfp->cpu);
63}
64
65static unsigned int priority_index(struct task_struct* t)
66{
67#ifdef CONFIG_LITMUS_LOCKING
68 if (unlikely(t->rt_param.inh_task))
69 /* use effective priority */
70 t = t->rt_param.inh_task;
71
72 if (is_priority_boosted(t)) {
73 /* zero is reserved for priority-boosted tasks */
74 return 0;
75 } else
76#endif
77 return get_priority(t);
78}
79
80static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks)
81{
82 pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain);
83 unsigned long flags;
84 struct task_struct* t;
85 struct bheap_node* hn;
86
87 raw_spin_lock_irqsave(&pfp->slock, flags);
88
89 while (!bheap_empty(tasks)) {
90 hn = bheap_take(fp_ready_order, tasks);
91 t = bheap2task(hn);
92 TRACE_TASK(t, "released (part:%d prio:%d)\n",
93 get_partition(t), get_priority(t));
94 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
95 }
96
97 /* do we need to preempt? */
98 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) {
99 TRACE_CUR("preempted by new release\n");
100 preempt(pfp);
101 }
102
103 raw_spin_unlock_irqrestore(&pfp->slock, flags);
104}
105
106static void pfp_preempt_check(pfp_domain_t *pfp)
107{
108 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
109 preempt(pfp);
110}
111
112static void pfp_domain_init(pfp_domain_t* pfp,
113 int cpu)
114{
115 fp_domain_init(&pfp->domain, NULL, pfp_release_jobs);
116 pfp->cpu = cpu;
117 pfp->scheduled = NULL;
118 fp_prio_queue_init(&pfp->ready_queue);
119}
120
121static void requeue(struct task_struct* t, pfp_domain_t *pfp)
122{
123 tsk_rt(t)->completed = 0;
124 if (is_released(t, litmus_clock())) {
125 TRACE_TASK(t, "add to ready\n");
126 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
127 } else
128 add_release(&pfp->domain, t); /* it has got to wait */
129}
130
131static void job_completion(struct task_struct* t, int forced)
132{
133 sched_trace_task_completion(t, forced);
134 TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
135
136 tsk_rt(t)->completed = 0;
137 prepare_for_next_period(t);
138 if (is_released(t, litmus_clock()))
139 sched_trace_task_release(t);
140}
141
142static struct task_struct* pfp_schedule(struct task_struct * prev)
143{
144 pfp_domain_t* pfp = local_pfp;
145 struct task_struct* next;
146
147 int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate;
148
149 raw_spin_lock(&pfp->slock);
150
151 /* sanity checking
152 * differently from gedf, when a task exits (dead)
153 * pfp->schedule may be null and prev _is_ realtime
154 */
155 BUG_ON(pfp->scheduled && pfp->scheduled != prev);
156 BUG_ON(pfp->scheduled && !is_realtime(prev));
157
158 /* (0) Determine state */
159 exists = pfp->scheduled != NULL;
160 blocks = exists && !is_current_running();
161 out_of_time = exists && budget_enforced(pfp->scheduled)
162 && budget_exhausted(pfp->scheduled);
163 np = exists && is_np(pfp->scheduled);
164 sleep = exists && is_completed(pfp->scheduled);
165 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
166 preempt = !blocks && (migrate || fp_preemption_needed(&pfp->ready_queue, prev));
167
168 /* If we need to preempt do so.
169 * The following checks set resched to 1 in case of special
170 * circumstances.
171 */
172 resched = preempt;
173
174 /* If a task blocks we have no choice but to reschedule.
175 */
176 if (blocks)
177 resched = 1;
178
179 /* Request a sys_exit_np() call if we would like to preempt but cannot.
180 * Multiple calls to request_exit_np() don't hurt.
181 */
182 if (np && (out_of_time || preempt || sleep))
183 request_exit_np(pfp->scheduled);
184
185 /* Any task that is preemptable and either exhausts its execution
186 * budget or wants to sleep completes. We may have to reschedule after
187 * this.
188 */
189 if (!np && (out_of_time || sleep)) {
190 job_completion(pfp->scheduled, !sleep);
191 resched = 1;
192 }
193
194 if (exists)
195 TRACE_TASK(pfp->scheduled, "state:%d blocks:%d oot:%d np:%d sleep:%d "
196 "mig:%d preempt:%d resched:%d on_rq:%d on_cpu:%d\n",
197 pfp->scheduled->state,
198 blocks, out_of_time, np, sleep, migrate, preempt, resched,
199 pfp->scheduled->on_rq, pfp->scheduled->on_cpu);
200
201 /* The final scheduling decision. Do we need to switch for some reason?
202 * Switch if we are in RT mode and have no task or if we need to
203 * resched.
204 */
205 next = NULL;
206 if ((!np || blocks) && (resched || !exists)) {
207 /* When preempting a task that does not block, then
208 * re-insert it into either the ready queue or the
209 * release queue (if it completed). requeue() picks
210 * the appropriate queue.
211 */
212 if (pfp->scheduled && !blocks && !migrate)
213 requeue(pfp->scheduled, pfp);
214 next = fp_prio_take(&pfp->ready_queue);
215 if (next == prev) {
216 struct task_struct *t = fp_prio_peek(&pfp->ready_queue);
217 TRACE_TASK(next, "next==prev sleep=%d oot=%d np=%d preempt=%d migrate=%d "
218 "boost=%d empty=%d prio-idx=%u prio=%u\n",
219 sleep, out_of_time, np, preempt, migrate,
220 is_priority_boosted(next),
221 t == NULL,
222 priority_index(next),
223 get_priority(next));
224 if (t)
225 TRACE_TASK(t, "waiter boost=%d prio-idx=%u prio=%u\n",
226 is_priority_boosted(t),
227 priority_index(t),
228 get_priority(t));
229 }
230 /* If preempt is set, we should not see the same task again. */
231 BUG_ON(preempt && next == prev);
232 /* Similarly, if preempt is set, then next may not be NULL,
233 * unless it's a migration. */
234 BUG_ON(preempt && !migrate && next == NULL);
235 } else
236 /* Only override Linux scheduler if we have a real-time task
237 * scheduled that needs to continue.
238 */
239 if (exists)
240 next = prev;
241
242 if (next) {
243 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
244 } else if (exists) {
245 TRACE("becoming idle at %llu\n", litmus_clock());
246 }
247
248 pfp->scheduled = next;
249 sched_state_task_picked();
250 raw_spin_unlock(&pfp->slock);
251
252 return next;
253}
254
255#ifdef CONFIG_LITMUS_LOCKING
256
257/* prev is no longer scheduled --- see if it needs to migrate */
258static void pfp_finish_switch(struct task_struct *prev)
259{
260 pfp_domain_t *to;
261
262 if (is_realtime(prev))
263 TRACE_TASK(prev, "state:%d on_rq:%d on_cpu:%d\n",
264 prev->state, prev->on_rq, prev->on_cpu);
265
266 if (is_realtime(prev) &&
267 prev->state == TASK_RUNNING &&
268 get_partition(prev) != smp_processor_id()) {
269 TRACE_TASK(prev, "needs to migrate from P%d to P%d\n",
270 smp_processor_id(), get_partition(prev));
271
272 to = task_pfp(prev);
273
274 raw_spin_lock(&to->slock);
275
276 TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu);
277 requeue(prev, to);
278 if (fp_preemption_needed(&to->ready_queue, to->scheduled))
279 preempt(to);
280
281 raw_spin_unlock(&to->slock);
282
283 }
284}
285
286#endif
287
288/* Prepare a task for running in RT mode
289 */
290static void pfp_task_new(struct task_struct * t, int on_rq, int is_scheduled)
291{
292 pfp_domain_t* pfp = task_pfp(t);
293 unsigned long flags;
294
295 TRACE_TASK(t, "P-FP: task new, cpu = %d\n",
296 t->rt_param.task_params.cpu);
297
298 /* setup job parameters */
299 release_at(t, litmus_clock());
300
301 raw_spin_lock_irqsave(&pfp->slock, flags);
302 if (is_scheduled) {
303 /* there shouldn't be anything else running at the time */
304 BUG_ON(pfp->scheduled);
305 pfp->scheduled = t;
306 } else if (on_rq) {
307 requeue(t, pfp);
308 /* maybe we have to reschedule */
309 pfp_preempt_check(pfp);
310 }
311 raw_spin_unlock_irqrestore(&pfp->slock, flags);
312}
313
314static void pfp_task_wake_up(struct task_struct *task)
315{
316 unsigned long flags;
317 pfp_domain_t* pfp = task_pfp(task);
318 lt_t now;
319
320 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
321 raw_spin_lock_irqsave(&pfp->slock, flags);
322
323#ifdef CONFIG_LITMUS_LOCKING
324 /* Should only be queued when processing a fake-wake up due to a
325 * migration-related state change. */
326 if (unlikely(is_queued(task))) {
327 TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n");
328 goto out_unlock;
329 }
330#else
331 BUG_ON(is_queued(task));
332#endif
333 now = litmus_clock();
334 if (is_sporadic(task) && is_tardy(task, now)
335#ifdef CONFIG_LITMUS_LOCKING
336 /* We need to take suspensions because of semaphores into
337 * account! If a job resumes after being suspended due to acquiring
338 * a semaphore, it should never be treated as a new job release.
339 */
340 && !is_priority_boosted(task)
341#endif
342 ) {
343 inferred_sporadic_job_release_at(task, now);
344 }
345
346 /* Only add to ready queue if it is not the currently-scheduled
347 * task. This could be the case if a task was woken up concurrently
348 * on a remote CPU before the executing CPU got around to actually
349 * de-scheduling the task, i.e., wake_up() raced with schedule()
350 * and won. Also, don't requeue if it is still queued, which can
351 * happen under the DPCP due wake-ups racing with migrations.
352 */
353 if (pfp->scheduled != task) {
354 requeue(task, pfp);
355 pfp_preempt_check(pfp);
356 }
357
358#ifdef CONFIG_LITMUS_LOCKING
359out_unlock:
360#endif
361 raw_spin_unlock_irqrestore(&pfp->slock, flags);
362 TRACE_TASK(task, "wake up done\n");
363}
364
365static void pfp_task_block(struct task_struct *t)
366{
367 /* only running tasks can block, thus t is in no queue */
368 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
369
370 BUG_ON(!is_realtime(t));
371
372 /* If this task blocked normally, it shouldn't be queued. The exception is
373 * if this is a simulated block()/wakeup() pair from the pull-migration code path.
374 * This should only happen if the DPCP is being used.
375 */
376#ifdef CONFIG_LITMUS_LOCKING
377 if (unlikely(is_queued(t)))
378 TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n");
379#else
380 BUG_ON(is_queued(t));
381#endif
382}
383
384static void pfp_task_exit(struct task_struct * t)
385{
386 unsigned long flags;
387 pfp_domain_t* pfp = task_pfp(t);
388 rt_domain_t* dom;
389
390 raw_spin_lock_irqsave(&pfp->slock, flags);
391 if (is_queued(t)) {
392 BUG(); /* This currently doesn't work. */
393 /* dequeue */
394 dom = task_dom(t);
395 remove(dom, t);
396 }
397 if (pfp->scheduled == t) {
398 pfp->scheduled = NULL;
399 preempt(pfp);
400 }
401 TRACE_TASK(t, "RIP, now reschedule\n");
402
403 raw_spin_unlock_irqrestore(&pfp->slock, flags);
404}
405
406#ifdef CONFIG_LITMUS_LOCKING
407
408#include <litmus/fdso.h>
409#include <litmus/srp.h>
410
411static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t)
412{
413 BUG_ON(pfp->scheduled == t && is_queued(t));
414 if (is_queued(t))
415 fp_prio_remove(&pfp->ready_queue, t, priority_index(t));
416}
417
418static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t,
419 struct task_struct* prio_inh)
420{
421 int requeue;
422
423 if (!t || t->rt_param.inh_task == prio_inh) {
424 /* no update required */
425 if (t)
426 TRACE_TASK(t, "no prio-inh update required\n");
427 return;
428 }
429
430 requeue = is_queued(t);
431 TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue);
432
433 if (requeue)
434 /* first remove */
435 fp_dequeue(pfp, t);
436
437 t->rt_param.inh_task = prio_inh;
438
439 if (requeue)
440 /* add again to the right queue */
441 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
442}
443
444static int effective_agent_priority(int prio)
445{
446 /* make sure agents have higher priority */
447 return prio - LITMUS_MAX_PRIORITY;
448}
449
450static lt_t prio_point(int eprio)
451{
452 /* make sure we have non-negative prio points */
453 return eprio + LITMUS_MAX_PRIORITY;
454}
455
456static void boost_priority(struct task_struct* t, lt_t priority_point)
457{
458 unsigned long flags;
459 pfp_domain_t* pfp = task_pfp(t);
460
461 raw_spin_lock_irqsave(&pfp->slock, flags);
462
463
464 TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock());
465
466 tsk_rt(t)->priority_boosted = 1;
467 /* tie-break by protocol-specific priority point */
468 tsk_rt(t)->boost_start_time = priority_point;
469
470 /* Priority boosting currently only takes effect for already-scheduled
471 * tasks. This is sufficient since priority boosting only kicks in as
472 * part of lock acquisitions. */
473 BUG_ON(pfp->scheduled != t);
474
475 raw_spin_unlock_irqrestore(&pfp->slock, flags);
476}
477
478static void unboost_priority(struct task_struct* t)
479{
480 unsigned long flags;
481 pfp_domain_t* pfp = task_pfp(t);
482
483 raw_spin_lock_irqsave(&pfp->slock, flags);
484
485 /* Assumption: this only happens when the job is scheduled.
486 * Exception: If t transitioned to non-real-time mode, we no longer
487 * care abou tit. */
488 BUG_ON(pfp->scheduled != t && is_realtime(t));
489
490 TRACE_TASK(t, "priority restored at %llu\n", litmus_clock());
491
492 tsk_rt(t)->priority_boosted = 0;
493 tsk_rt(t)->boost_start_time = 0;
494
495 /* check if this changes anything */
496 if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
497 preempt(pfp);
498
499 raw_spin_unlock_irqrestore(&pfp->slock, flags);
500}
501
502/* ******************** SRP support ************************ */
503
504static unsigned int pfp_get_srp_prio(struct task_struct* t)
505{
506 return get_priority(t);
507}
508
509/* ******************** FMLP support ********************** */
510
511struct fmlp_semaphore {
512 struct litmus_lock litmus_lock;
513
514 /* current resource holder */
515 struct task_struct *owner;
516
517 /* FIFO queue of waiting tasks */
518 wait_queue_head_t wait;
519};
520
521static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
522{
523 return container_of(lock, struct fmlp_semaphore, litmus_lock);
524}
525
526static inline lt_t
527fmlp_clock(void)
528{
529 return (lt_t) this_cpu_inc_return(fmlp_timestamp);
530}
531
532int pfp_fmlp_lock(struct litmus_lock* l)
533{
534 struct task_struct* t = current;
535 struct fmlp_semaphore *sem = fmlp_from_lock(l);
536 wait_queue_t wait;
537 unsigned long flags;
538 lt_t time_of_request;
539
540 if (!is_realtime(t))
541 return -EPERM;
542
543 /* prevent nested lock acquisition --- not supported by FMLP */
544 if (tsk_rt(t)->num_locks_held ||
545 tsk_rt(t)->num_local_locks_held)
546 return -EBUSY;
547
548 spin_lock_irqsave(&sem->wait.lock, flags);
549
550 /* tie-break by this point in time */
551 time_of_request = fmlp_clock();
552
553 /* Priority-boost ourself *before* we suspend so that
554 * our priority is boosted when we resume. */
555 boost_priority(t, time_of_request);
556
557 if (sem->owner) {
558 /* resource is not free => must suspend and wait */
559
560 init_waitqueue_entry(&wait, t);
561
562 /* FIXME: interruptible would be nice some day */
563 set_task_state(t, TASK_UNINTERRUPTIBLE);
564
565 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
566
567 TS_LOCK_SUSPEND;
568
569 /* release lock before sleeping */
570 spin_unlock_irqrestore(&sem->wait.lock, flags);
571
572 /* We depend on the FIFO order. Thus, we don't need to recheck
573 * when we wake up; we are guaranteed to have the lock since
574 * there is only one wake up per release.
575 */
576
577 schedule();
578
579 TS_LOCK_RESUME;
580
581 /* Since we hold the lock, no other task will change
582 * ->owner. We can thus check it without acquiring the spin
583 * lock. */
584 BUG_ON(sem->owner != t);
585 } else {
586 /* it's ours now */
587 sem->owner = t;
588
589 spin_unlock_irqrestore(&sem->wait.lock, flags);
590 }
591
592 tsk_rt(t)->num_locks_held++;
593
594 return 0;
595}
596
597int pfp_fmlp_unlock(struct litmus_lock* l)
598{
599 struct task_struct *t = current, *next = NULL;
600 struct fmlp_semaphore *sem = fmlp_from_lock(l);
601 unsigned long flags;
602 int err = 0;
603
604 preempt_disable();
605
606 spin_lock_irqsave(&sem->wait.lock, flags);
607
608 if (sem->owner != t) {
609 err = -EINVAL;
610 goto out;
611 }
612
613 tsk_rt(t)->num_locks_held--;
614
615 /* we lose the benefit of priority boosting */
616
617 unboost_priority(t);
618
619 /* check if there are jobs waiting for this resource */
620 next = __waitqueue_remove_first(&sem->wait);
621 sem->owner = next;
622
623out:
624 spin_unlock_irqrestore(&sem->wait.lock, flags);
625
626 /* Wake up next. The waiting job is already priority-boosted. */
627 if(next) {
628 wake_up_process(next);
629 }
630
631 preempt_enable();
632
633 return err;
634}
635
636int pfp_fmlp_close(struct litmus_lock* l)
637{
638 struct task_struct *t = current;
639 struct fmlp_semaphore *sem = fmlp_from_lock(l);
640 unsigned long flags;
641
642 int owner;
643
644 spin_lock_irqsave(&sem->wait.lock, flags);
645
646 owner = sem->owner == t;
647
648 spin_unlock_irqrestore(&sem->wait.lock, flags);
649
650 if (owner)
651 pfp_fmlp_unlock(l);
652
653 return 0;
654}
655
656void pfp_fmlp_free(struct litmus_lock* lock)
657{
658 kfree(fmlp_from_lock(lock));
659}
660
661static struct litmus_lock_ops pfp_fmlp_lock_ops = {
662 .close = pfp_fmlp_close,
663 .lock = pfp_fmlp_lock,
664 .unlock = pfp_fmlp_unlock,
665 .deallocate = pfp_fmlp_free,
666};
667
668static struct litmus_lock* pfp_new_fmlp(void)
669{
670 struct fmlp_semaphore* sem;
671
672 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
673 if (!sem)
674 return NULL;
675
676 sem->owner = NULL;
677 init_waitqueue_head(&sem->wait);
678 sem->litmus_lock.ops = &pfp_fmlp_lock_ops;
679
680 return &sem->litmus_lock;
681}
682
683/* ******************** MPCP support ********************** */
684
685struct mpcp_semaphore {
686 struct litmus_lock litmus_lock;
687
688 /* current resource holder */
689 struct task_struct *owner;
690
691 /* priority queue of waiting tasks */
692 wait_queue_head_t wait;
693
694 /* priority ceiling per cpu */
695 unsigned int prio_ceiling[NR_CPUS];
696
697 /* should jobs spin "virtually" for this resource? */
698 int vspin;
699};
700
701#define OMEGA_CEILING UINT_MAX
702
703/* Since jobs spin "virtually" while waiting to acquire a lock,
704 * they first must aquire a local per-cpu resource.
705 */
706static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait);
707static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin);
708
709/* called with preemptions off <=> no local modifications */
710static void mpcp_vspin_enter(void)
711{
712 struct task_struct* t = current;
713
714 while (1) {
715 if (this_cpu_read(mpcpvs_vspin) == NULL) {
716 /* good, we get to issue our request */
717 this_cpu_write(mpcpvs_vspin, t);
718 break;
719 } else {
720 /* some job is spinning => enqueue in request queue */
721 prio_wait_queue_t wait;
722 wait_queue_head_t* vspin = this_cpu_ptr(&mpcpvs_vspin_wait);
723 unsigned long flags;
724
725 /* ordered by regular priority */
726 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
727
728 spin_lock_irqsave(&vspin->lock, flags);
729
730 set_task_state(t, TASK_UNINTERRUPTIBLE);
731
732 __add_wait_queue_prio_exclusive(vspin, &wait);
733
734 spin_unlock_irqrestore(&vspin->lock, flags);
735
736 TS_LOCK_SUSPEND;
737
738 preempt_enable_no_resched();
739
740 schedule();
741
742 preempt_disable();
743
744 TS_LOCK_RESUME;
745 /* Recheck if we got it --- some higher-priority process might
746 * have swooped in. */
747 }
748 }
749 /* ok, now it is ours */
750}
751
752/* called with preemptions off */
753static void mpcp_vspin_exit(void)
754{
755 struct task_struct* t = current, *next;
756 unsigned long flags;
757 wait_queue_head_t* vspin = this_cpu_ptr(&mpcpvs_vspin_wait);
758
759 BUG_ON(this_cpu_read(mpcpvs_vspin) != t);
760
761 /* no spinning job */
762 this_cpu_write(mpcpvs_vspin, NULL);
763
764 /* see if anyone is waiting for us to stop "spinning" */
765 spin_lock_irqsave(&vspin->lock, flags);
766 next = __waitqueue_remove_first(vspin);
767
768 if (next)
769 wake_up_process(next);
770
771 spin_unlock_irqrestore(&vspin->lock, flags);
772}
773
774static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock)
775{
776 return container_of(lock, struct mpcp_semaphore, litmus_lock);
777}
778
779int pfp_mpcp_lock(struct litmus_lock* l)
780{
781 struct task_struct* t = current;
782 struct mpcp_semaphore *sem = mpcp_from_lock(l);
783 prio_wait_queue_t wait;
784 unsigned long flags;
785
786 if (!is_realtime(t))
787 return -EPERM;
788
789 /* prevent nested lock acquisition */
790 if (tsk_rt(t)->num_locks_held ||
791 tsk_rt(t)->num_local_locks_held)
792 return -EBUSY;
793
794 preempt_disable();
795
796 if (sem->vspin)
797 mpcp_vspin_enter();
798
799 /* Priority-boost ourself *before* we suspend so that
800 * our priority is boosted when we resume. Use the priority
801 * ceiling for the local partition. */
802 boost_priority(t, sem->prio_ceiling[get_partition(t)]);
803
804 spin_lock_irqsave(&sem->wait.lock, flags);
805
806 preempt_enable_no_resched();
807
808 if (sem->owner) {
809 /* resource is not free => must suspend and wait */
810
811 /* ordered by regular priority */
812 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
813
814 /* FIXME: interruptible would be nice some day */
815 set_task_state(t, TASK_UNINTERRUPTIBLE);
816
817 __add_wait_queue_prio_exclusive(&sem->wait, &wait);
818
819 TS_LOCK_SUSPEND;
820
821 /* release lock before sleeping */
822 spin_unlock_irqrestore(&sem->wait.lock, flags);
823
824 /* We depend on the FIFO order. Thus, we don't need to recheck
825 * when we wake up; we are guaranteed to have the lock since
826 * there is only one wake up per release.
827 */
828
829 schedule();
830
831 TS_LOCK_RESUME;
832
833 /* Since we hold the lock, no other task will change
834 * ->owner. We can thus check it without acquiring the spin
835 * lock. */
836 BUG_ON(sem->owner != t);
837 } else {
838 /* it's ours now */
839 sem->owner = t;
840
841 spin_unlock_irqrestore(&sem->wait.lock, flags);
842 }
843
844 tsk_rt(t)->num_locks_held++;
845
846 return 0;
847}
848
849int pfp_mpcp_unlock(struct litmus_lock* l)
850{
851 struct task_struct *t = current, *next = NULL;
852 struct mpcp_semaphore *sem = mpcp_from_lock(l);
853 unsigned long flags;
854 int err = 0;
855
856 preempt_disable();
857
858 spin_lock_irqsave(&sem->wait.lock, flags);
859
860 if (sem->owner != t) {
861 err = -EINVAL;
862 goto out;
863 }
864
865 tsk_rt(t)->num_locks_held--;
866
867 /* we lose the benefit of priority boosting */
868 unboost_priority(t);
869
870 /* check if there are jobs waiting for this resource */
871 next = __waitqueue_remove_first(&sem->wait);
872 sem->owner = next;
873
874out:
875 spin_unlock_irqrestore(&sem->wait.lock, flags);
876
877 /* Wake up next. The waiting job is already priority-boosted. */
878 if(next) {
879 wake_up_process(next);
880 }
881
882 if (sem->vspin && err == 0) {
883 mpcp_vspin_exit();
884 }
885
886 preempt_enable();
887
888 return err;
889}
890
891int pfp_mpcp_open(struct litmus_lock* l, void* config)
892{
893 struct task_struct *t = current;
894 int cpu, local_cpu;
895 struct mpcp_semaphore *sem = mpcp_from_lock(l);
896 unsigned long flags;
897
898 if (!is_realtime(t))
899 /* we need to know the real-time priority */
900 return -EPERM;
901
902 local_cpu = get_partition(t);
903
904 spin_lock_irqsave(&sem->wait.lock, flags);
905 for (cpu = 0; cpu < NR_CPUS; cpu++) {
906 if (cpu != local_cpu) {
907 sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu],
908 get_priority(t));
909 TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n",
910 sem, sem->prio_ceiling[cpu], cpu);
911 }
912 }
913 spin_unlock_irqrestore(&sem->wait.lock, flags);
914
915 return 0;
916}
917
918int pfp_mpcp_close(struct litmus_lock* l)
919{
920 struct task_struct *t = current;
921 struct mpcp_semaphore *sem = mpcp_from_lock(l);
922 unsigned long flags;
923
924 int owner;
925
926 spin_lock_irqsave(&sem->wait.lock, flags);
927
928 owner = sem->owner == t;
929
930 spin_unlock_irqrestore(&sem->wait.lock, flags);
931
932 if (owner)
933 pfp_mpcp_unlock(l);
934
935 return 0;
936}
937
938void pfp_mpcp_free(struct litmus_lock* lock)
939{
940 kfree(mpcp_from_lock(lock));
941}
942
943static struct litmus_lock_ops pfp_mpcp_lock_ops = {
944 .close = pfp_mpcp_close,
945 .lock = pfp_mpcp_lock,
946 .open = pfp_mpcp_open,
947 .unlock = pfp_mpcp_unlock,
948 .deallocate = pfp_mpcp_free,
949};
950
951static struct litmus_lock* pfp_new_mpcp(int vspin)
952{
953 struct mpcp_semaphore* sem;
954 int cpu;
955
956 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
957 if (!sem)
958 return NULL;
959
960 sem->owner = NULL;
961 init_waitqueue_head(&sem->wait);
962 sem->litmus_lock.ops = &pfp_mpcp_lock_ops;
963
964 for (cpu = 0; cpu < NR_CPUS; cpu++)
965 sem->prio_ceiling[cpu] = OMEGA_CEILING;
966
967 /* mark as virtual spinning */
968 sem->vspin = vspin;
969
970 return &sem->litmus_lock;
971}
972
973
974/* ******************** PCP support ********************** */
975
976
977struct pcp_semaphore {
978 struct litmus_lock litmus_lock;
979
980 struct list_head ceiling;
981
982 /* current resource holder */
983 struct task_struct *owner;
984
985 /* priority ceiling --- can be negative due to DPCP support */
986 int prio_ceiling;
987
988 /* on which processor is this PCP semaphore allocated? */
989 int on_cpu;
990};
991
992static inline struct pcp_semaphore* pcp_from_lock(struct litmus_lock* lock)
993{
994 return container_of(lock, struct pcp_semaphore, litmus_lock);
995}
996
997
998struct pcp_state {
999 struct list_head system_ceiling;
1000
1001 /* highest-priority waiting task */
1002 struct task_struct* hp_waiter;
1003
1004 /* list of jobs waiting to get past the system ceiling */
1005 wait_queue_head_t ceiling_blocked;
1006};
1007
1008static void pcp_init_state(struct pcp_state* s)
1009{
1010 INIT_LIST_HEAD(&s->system_ceiling);
1011 s->hp_waiter = NULL;
1012 init_waitqueue_head(&s->ceiling_blocked);
1013}
1014
1015static DEFINE_PER_CPU(struct pcp_state, pcp_state);
1016
1017/* assumes preemptions are off */
1018static struct pcp_semaphore* pcp_get_ceiling(void)
1019{
1020 struct list_head* top = &(this_cpu_ptr(&pcp_state)->system_ceiling);
1021 return list_first_entry_or_null(top, struct pcp_semaphore, ceiling);
1022}
1023
1024/* assumes preempt off */
1025static void pcp_add_ceiling(struct pcp_semaphore* sem)
1026{
1027 struct list_head *pos;
1028 struct list_head *in_use = &(this_cpu_ptr(&pcp_state)->system_ceiling);
1029 struct pcp_semaphore* held;
1030
1031 BUG_ON(sem->on_cpu != smp_processor_id());
1032 BUG_ON(in_list(&sem->ceiling));
1033
1034 list_for_each(pos, in_use) {
1035 held = list_entry(pos, struct pcp_semaphore, ceiling);
1036 if (held->prio_ceiling >= sem->prio_ceiling) {
1037 __list_add(&sem->ceiling, pos->prev, pos);
1038 return;
1039 }
1040 }
1041
1042 /* we hit the end of the list */
1043
1044 list_add_tail(&sem->ceiling, in_use);
1045}
1046
1047/* assumes preempt off */
1048static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling,
1049 struct task_struct* task,
1050 int effective_prio)
1051{
1052 return ceiling == NULL ||
1053 ceiling->prio_ceiling > effective_prio ||
1054 ceiling->owner == task;
1055}
1056
1057/* assumes preempt off */
1058static void pcp_priority_inheritance(void)
1059{
1060 unsigned long flags;
1061 pfp_domain_t* pfp = local_pfp;
1062
1063 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1064 struct task_struct *blocker, *blocked;
1065
1066 blocker = ceiling ? ceiling->owner : NULL;
1067 blocked = this_cpu_ptr(&pcp_state)->hp_waiter;
1068
1069 raw_spin_lock_irqsave(&pfp->slock, flags);
1070
1071 /* Current is no longer inheriting anything by default. This should be
1072 * the currently scheduled job, and hence not currently queued.
1073 * Special case: if current stopped being a real-time task, it will no longer
1074 * be registered as pfp->scheduled. */
1075 BUG_ON(current != pfp->scheduled && is_realtime(current));
1076
1077 fp_set_prio_inh(pfp, current, NULL);
1078 fp_set_prio_inh(pfp, blocked, NULL);
1079 fp_set_prio_inh(pfp, blocker, NULL);
1080
1081 /* Let blocking job inherit priority of blocked job, if required. */
1082 if (blocker && blocked &&
1083 fp_higher_prio(blocked, blocker)) {
1084 TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n",
1085 blocked->comm, blocked->pid,
1086 get_priority(blocker), get_priority(blocked));
1087 fp_set_prio_inh(pfp, blocker, blocked);
1088 }
1089
1090 /* Check if anything changed. If the blocked job is current, then it is
1091 * just blocking and hence is going to call the scheduler anyway. */
1092 if (blocked != current &&
1093 fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
1094 preempt(pfp);
1095
1096 raw_spin_unlock_irqrestore(&pfp->slock, flags);
1097}
1098
1099/* called with preemptions off */
1100static void pcp_raise_ceiling(struct pcp_semaphore* sem,
1101 int effective_prio)
1102{
1103 struct task_struct* t = current;
1104 struct pcp_semaphore* ceiling;
1105 prio_wait_queue_t wait;
1106 unsigned int waiting_higher_prio;
1107
1108 while(1) {
1109 ceiling = pcp_get_ceiling();
1110 if (pcp_exceeds_ceiling(ceiling, t, effective_prio))
1111 break;
1112
1113 TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n",
1114 sem, ceiling->owner->comm, ceiling->owner->pid);
1115
1116 /* we need to wait until the ceiling is lowered */
1117
1118 /* enqueue in priority order */
1119 init_prio_waitqueue_entry(&wait, t, effective_prio);
1120 set_task_state(t, TASK_UNINTERRUPTIBLE);
1121 waiting_higher_prio = add_wait_queue_prio_exclusive(
1122 &(this_cpu_ptr(&pcp_state)->ceiling_blocked), &wait);
1123
1124 if (waiting_higher_prio == 0) {
1125 TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n");
1126
1127 /* we are the new highest-priority waiting job
1128 * => update inheritance */
1129 this_cpu_ptr(&pcp_state)->hp_waiter = t;
1130 pcp_priority_inheritance();
1131 }
1132
1133 TS_LOCK_SUSPEND;
1134
1135 preempt_enable_no_resched();
1136 schedule();
1137 preempt_disable();
1138
1139 /* pcp_resume_unblocked() removed us from wait queue */
1140
1141 TS_LOCK_RESUME;
1142 }
1143
1144 TRACE_CUR("PCP got the ceiling and sem %p\n", sem);
1145
1146 /* We are good to go. The semaphore should be available. */
1147 BUG_ON(sem->owner != NULL);
1148
1149 sem->owner = t;
1150
1151 pcp_add_ceiling(sem);
1152}
1153
1154static void pcp_resume_unblocked(void)
1155{
1156 wait_queue_head_t *blocked = &(this_cpu_ptr(&pcp_state)->ceiling_blocked);
1157 unsigned long flags;
1158 prio_wait_queue_t* q;
1159 struct task_struct* t = NULL;
1160
1161 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1162
1163 spin_lock_irqsave(&blocked->lock, flags);
1164
1165 while (waitqueue_active(blocked)) {
1166 /* check first == highest-priority waiting job */
1167 q = list_entry(blocked->task_list.next,
1168 prio_wait_queue_t, wq.task_list);
1169 t = (struct task_struct*) q->wq.private;
1170
1171 /* can it proceed now? => let it go */
1172 if (pcp_exceeds_ceiling(ceiling, t, q->priority)) {
1173 __remove_wait_queue(blocked, &q->wq);
1174 wake_up_process(t);
1175 } else {
1176 /* We are done. Update highest-priority waiter. */
1177 this_cpu_ptr(&pcp_state)->hp_waiter = t;
1178 goto out;
1179 }
1180 }
1181 /* If we get here, then there are no more waiting
1182 * jobs. */
1183 this_cpu_ptr(&pcp_state)->hp_waiter = NULL;
1184out:
1185 spin_unlock_irqrestore(&blocked->lock, flags);
1186}
1187
1188/* assumes preempt off */
1189static void pcp_lower_ceiling(struct pcp_semaphore* sem)
1190{
1191 BUG_ON(!in_list(&sem->ceiling));
1192 BUG_ON(sem->owner != current);
1193 BUG_ON(sem->on_cpu != smp_processor_id());
1194
1195 /* remove from ceiling list */
1196 list_del(&sem->ceiling);
1197
1198 /* release */
1199 sem->owner = NULL;
1200
1201 TRACE_CUR("PCP released sem %p\n", sem);
1202
1203 /* Wake up all ceiling-blocked jobs that now pass the ceiling. */
1204 pcp_resume_unblocked();
1205
1206 pcp_priority_inheritance();
1207}
1208
1209static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
1210 int effective_prio)
1211{
1212 /* This needs to be synchronized on something.
1213 * Might as well use waitqueue lock for the processor.
1214 * We assume this happens only before the task set starts execution,
1215 * (i.e., during initialization), but it may happen on multiple processors
1216 * at the same time.
1217 */
1218 unsigned long flags;
1219
1220 struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu);
1221
1222 spin_lock_irqsave(&s->ceiling_blocked.lock, flags);
1223
1224 sem->prio_ceiling = min(sem->prio_ceiling, effective_prio);
1225
1226 spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags);
1227}
1228
1229static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu)
1230{
1231 sem->owner = NULL;
1232 INIT_LIST_HEAD(&sem->ceiling);
1233 sem->prio_ceiling = INT_MAX;
1234 sem->on_cpu = cpu;
1235}
1236
1237int pfp_pcp_lock(struct litmus_lock* l)
1238{
1239 struct task_struct* t = current;
1240 struct pcp_semaphore *sem = pcp_from_lock(l);
1241
1242 /* The regular PCP uses the regular task priorities, not agent
1243 * priorities. */
1244 int eprio = get_priority(t);
1245 int from = get_partition(t);
1246 int to = sem->on_cpu;
1247
1248 if (!is_realtime(t) || from != to)
1249 return -EPERM;
1250
1251 /* prevent nested lock acquisition in global critical section */
1252 if (tsk_rt(t)->num_locks_held)
1253 return -EBUSY;
1254
1255 preempt_disable();
1256
1257 pcp_raise_ceiling(sem, eprio);
1258
1259 preempt_enable();
1260
1261 tsk_rt(t)->num_local_locks_held++;
1262
1263 return 0;
1264}
1265
1266int pfp_pcp_unlock(struct litmus_lock* l)
1267{
1268 struct task_struct *t = current;
1269 struct pcp_semaphore *sem = pcp_from_lock(l);
1270
1271 int err = 0;
1272
1273 preempt_disable();
1274
1275 if (sem->owner != t) {
1276 err = -EINVAL;
1277 goto out;
1278 }
1279
1280 /* The current owner should be executing on the correct CPU.
1281 *
1282 * If the owner transitioned out of RT mode or is exiting, then
1283 * we it might have already been migrated away by the best-effort
1284 * scheduler and we just have to deal with it. */
1285 if (unlikely(!is_realtime(t) && sem->on_cpu != smp_processor_id())) {
1286 TRACE_TASK(t, "PCP unlock cpu=%d, sem->on_cpu=%d\n",
1287 smp_processor_id(), sem->on_cpu);
1288 preempt_enable();
1289 err = litmus_be_migrate_to(sem->on_cpu);
1290 preempt_disable();
1291 TRACE_TASK(t, "post-migrate: cpu=%d, sem->on_cpu=%d err=%d\n",
1292 smp_processor_id(), sem->on_cpu, err);
1293 }
1294 BUG_ON(sem->on_cpu != smp_processor_id());
1295 err = 0;
1296
1297 tsk_rt(t)->num_local_locks_held--;
1298
1299 /* give it back */
1300 pcp_lower_ceiling(sem);
1301
1302out:
1303 preempt_enable();
1304
1305 return err;
1306}
1307
1308int pfp_pcp_open(struct litmus_lock* l, void* __user config)
1309{
1310 struct task_struct *t = current;
1311 struct pcp_semaphore *sem = pcp_from_lock(l);
1312
1313 int cpu, eprio;
1314
1315 if (!is_realtime(t))
1316 /* we need to know the real-time priority */
1317 return -EPERM;
1318
1319 if (!config)
1320 cpu = get_partition(t);
1321 else if (get_user(cpu, (int*) config))
1322 return -EFAULT;
1323
1324 /* make sure the resource location matches */
1325 if (cpu != sem->on_cpu)
1326 return -EINVAL;
1327
1328 /* The regular PCP uses regular task priorites, not agent
1329 * priorities. */
1330 eprio = get_priority(t);
1331
1332 pcp_update_prio_ceiling(sem, eprio);
1333
1334 return 0;
1335}
1336
1337int pfp_pcp_close(struct litmus_lock* l)
1338{
1339 struct task_struct *t = current;
1340 struct pcp_semaphore *sem = pcp_from_lock(l);
1341
1342 int owner = 0;
1343
1344 preempt_disable();
1345
1346 if (sem->on_cpu == smp_processor_id())
1347 owner = sem->owner == t;
1348
1349 preempt_enable();
1350
1351 if (owner)
1352 pfp_pcp_unlock(l);
1353
1354 return 0;
1355}
1356
1357void pfp_pcp_free(struct litmus_lock* lock)
1358{
1359 kfree(pcp_from_lock(lock));
1360}
1361
1362
1363static struct litmus_lock_ops pfp_pcp_lock_ops = {
1364 .close = pfp_pcp_close,
1365 .lock = pfp_pcp_lock,
1366 .open = pfp_pcp_open,
1367 .unlock = pfp_pcp_unlock,
1368 .deallocate = pfp_pcp_free,
1369};
1370
1371
1372static struct litmus_lock* pfp_new_pcp(int on_cpu)
1373{
1374 struct pcp_semaphore* sem;
1375
1376 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1377 if (!sem)
1378 return NULL;
1379
1380 sem->litmus_lock.ops = &pfp_pcp_lock_ops;
1381 pcp_init_semaphore(sem, on_cpu);
1382
1383 return &sem->litmus_lock;
1384}
1385
1386/* ******************** DPCP support ********************** */
1387
1388struct dpcp_semaphore {
1389 struct litmus_lock litmus_lock;
1390 struct pcp_semaphore pcp;
1391 int owner_cpu;
1392};
1393
1394static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock)
1395{
1396 return container_of(lock, struct dpcp_semaphore, litmus_lock);
1397}
1398
1399/* called with preemptions disabled */
1400static void pfp_migrate_to(int target_cpu)
1401{
1402 struct task_struct* t = current;
1403 pfp_domain_t *from;
1404
1405 if (get_partition(t) == target_cpu)
1406 return;
1407
1408 if (!is_realtime(t))
1409 {
1410 TRACE_TASK(t, "not migrating, not a RT task (anymore?)\n");
1411 return;
1412 }
1413
1414 /* make sure target_cpu makes sense */
1415 BUG_ON(target_cpu >= NR_CPUS || !cpu_online(target_cpu));
1416
1417 local_irq_disable();
1418
1419 from = task_pfp(t);
1420 raw_spin_lock(&from->slock);
1421
1422 /* Scheduled task should not be in any ready or release queue. Check
1423 * this while holding the lock to avoid RT mode transitions.*/
1424 BUG_ON(is_realtime(t) && is_queued(t));
1425
1426 /* switch partitions */
1427 tsk_rt(t)->task_params.cpu = target_cpu;
1428
1429 raw_spin_unlock(&from->slock);
1430
1431 /* Don't trace scheduler costs as part of
1432 * locking overhead. Scheduling costs are accounted for
1433 * explicitly. */
1434 TS_LOCK_SUSPEND;
1435
1436 local_irq_enable();
1437 preempt_enable_no_resched();
1438
1439 /* deschedule to be migrated */
1440 schedule();
1441
1442 /* we are now on the target processor */
1443 preempt_disable();
1444
1445 /* start recording costs again */
1446 TS_LOCK_RESUME;
1447
1448 BUG_ON(smp_processor_id() != target_cpu && is_realtime(t));
1449}
1450
1451int pfp_dpcp_lock(struct litmus_lock* l)
1452{
1453 struct task_struct* t = current;
1454 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1455 int eprio = effective_agent_priority(get_priority(t));
1456 int from = get_partition(t);
1457 int to = sem->pcp.on_cpu;
1458
1459 if (!is_realtime(t))
1460 return -EPERM;
1461
1462 /* prevent nested lock accquisition */
1463 if (tsk_rt(t)->num_locks_held ||
1464 tsk_rt(t)->num_local_locks_held)
1465 return -EBUSY;
1466
1467 preempt_disable();
1468
1469 /* Priority-boost ourself *before* we suspend so that
1470 * our priority is boosted when we resume. */
1471
1472 boost_priority(t, get_priority(t));
1473
1474 pfp_migrate_to(to);
1475
1476 pcp_raise_ceiling(&sem->pcp, eprio);
1477
1478 /* yep, we got it => execute request */
1479 sem->owner_cpu = from;
1480
1481 preempt_enable();
1482
1483 tsk_rt(t)->num_locks_held++;
1484
1485 return 0;
1486}
1487
1488int pfp_dpcp_unlock(struct litmus_lock* l)
1489{
1490 struct task_struct *t = current;
1491 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1492 int err = 0;
1493 int home;
1494
1495 preempt_disable();
1496
1497 if (sem->pcp.owner != t) {
1498 err = -EINVAL;
1499 goto out;
1500 }
1501
1502 /* The current owner should be executing on the correct CPU.
1503 *
1504 * If the owner transitioned out of RT mode or is exiting, then
1505 * we it might have already been migrated away by the best-effort
1506 * scheduler and we just have to deal with it. */
1507 if (unlikely(!is_realtime(t) && sem->pcp.on_cpu != smp_processor_id())) {
1508 TRACE_TASK(t, "DPCP unlock cpu=%d, sem->pcp.on_cpu=%d\n", smp_processor_id(), sem->pcp.on_cpu);
1509 preempt_enable();
1510 err = litmus_be_migrate_to(sem->pcp.on_cpu);
1511 preempt_disable();
1512 TRACE_TASK(t, "post-migrate: cpu=%d, sem->pcp.on_cpu=%d err=%d\n", smp_processor_id(), sem->pcp.on_cpu, err);
1513 }
1514 BUG_ON(sem->pcp.on_cpu != smp_processor_id());
1515 err = 0;
1516
1517 tsk_rt(t)->num_locks_held--;
1518
1519 home = sem->owner_cpu;
1520
1521 /* give it back */
1522 pcp_lower_ceiling(&sem->pcp);
1523
1524 /* we lose the benefit of priority boosting */
1525 unboost_priority(t);
1526
1527 pfp_migrate_to(home);
1528
1529out:
1530 preempt_enable();
1531
1532 return err;
1533}
1534
1535int pfp_dpcp_open(struct litmus_lock* l, void* __user config)
1536{
1537 struct task_struct *t = current;
1538 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1539 int cpu, eprio;
1540
1541 if (!is_realtime(t))
1542 /* we need to know the real-time priority */
1543 return -EPERM;
1544
1545 if (get_user(cpu, (int*) config))
1546 return -EFAULT;
1547
1548 /* make sure the resource location matches */
1549 if (cpu != sem->pcp.on_cpu)
1550 return -EINVAL;
1551
1552 eprio = effective_agent_priority(get_priority(t));
1553
1554 pcp_update_prio_ceiling(&sem->pcp, eprio);
1555
1556 return 0;
1557}
1558
1559int pfp_dpcp_close(struct litmus_lock* l)
1560{
1561 struct task_struct *t = current;
1562 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1563 int owner = 0;
1564
1565 preempt_disable();
1566
1567 if (sem->pcp.on_cpu == smp_processor_id())
1568 owner = sem->pcp.owner == t;
1569
1570 preempt_enable();
1571
1572 if (owner)
1573 pfp_dpcp_unlock(l);
1574
1575 return 0;
1576}
1577
1578void pfp_dpcp_free(struct litmus_lock* lock)
1579{
1580 kfree(dpcp_from_lock(lock));
1581}
1582
1583static struct litmus_lock_ops pfp_dpcp_lock_ops = {
1584 .close = pfp_dpcp_close,
1585 .lock = pfp_dpcp_lock,
1586 .open = pfp_dpcp_open,
1587 .unlock = pfp_dpcp_unlock,
1588 .deallocate = pfp_dpcp_free,
1589};
1590
1591static struct litmus_lock* pfp_new_dpcp(int on_cpu)
1592{
1593 struct dpcp_semaphore* sem;
1594
1595 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1596 if (!sem)
1597 return NULL;
1598
1599 sem->litmus_lock.ops = &pfp_dpcp_lock_ops;
1600 sem->owner_cpu = NO_CPU;
1601 pcp_init_semaphore(&sem->pcp, on_cpu);
1602
1603 return &sem->litmus_lock;
1604}
1605
1606
1607/* ******************** DFLP support ********************** */
1608
1609struct dflp_semaphore {
1610 struct litmus_lock litmus_lock;
1611
1612 /* current resource holder */
1613 struct task_struct *owner;
1614 int owner_cpu;
1615
1616 /* FIFO queue of waiting tasks */
1617 wait_queue_head_t wait;
1618
1619 /* where is the resource assigned to */
1620 int on_cpu;
1621};
1622
1623static inline struct dflp_semaphore* dflp_from_lock(struct litmus_lock* lock)
1624{
1625 return container_of(lock, struct dflp_semaphore, litmus_lock);
1626}
1627
1628int pfp_dflp_lock(struct litmus_lock* l)
1629{
1630 struct task_struct* t = current;
1631 struct dflp_semaphore *sem = dflp_from_lock(l);
1632 int from = get_partition(t);
1633 int to = sem->on_cpu;
1634 unsigned long flags;
1635 wait_queue_t wait;
1636 lt_t time_of_request;
1637
1638 if (!is_realtime(t))
1639 return -EPERM;
1640
1641 /* prevent nested lock accquisition */
1642 if (tsk_rt(t)->num_locks_held ||
1643 tsk_rt(t)->num_local_locks_held)
1644 return -EBUSY;
1645
1646 preempt_disable();
1647
1648 /* tie-break by this point in time */
1649 time_of_request = litmus_clock();
1650
1651 /* Priority-boost ourself *before* we suspend so that
1652 * our priority is boosted when we resume. */
1653 boost_priority(t, time_of_request);
1654
1655 pfp_migrate_to(to);
1656
1657 /* Now on the right CPU, preemptions still disabled. */
1658
1659 spin_lock_irqsave(&sem->wait.lock, flags);
1660
1661 if (sem->owner) {
1662 /* resource is not free => must suspend and wait */
1663
1664 init_waitqueue_entry(&wait, t);
1665
1666 /* FIXME: interruptible would be nice some day */
1667 set_task_state(t, TASK_UNINTERRUPTIBLE);
1668
1669 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
1670
1671 TS_LOCK_SUSPEND;
1672
1673 /* release lock before sleeping */
1674 spin_unlock_irqrestore(&sem->wait.lock, flags);
1675
1676 /* We depend on the FIFO order. Thus, we don't need to recheck
1677 * when we wake up; we are guaranteed to have the lock since
1678 * there is only one wake up per release.
1679 */
1680
1681 preempt_enable_no_resched();
1682
1683 schedule();
1684
1685 preempt_disable();
1686
1687 TS_LOCK_RESUME;
1688
1689 /* Since we hold the lock, no other task will change
1690 * ->owner. We can thus check it without acquiring the spin
1691 * lock. */
1692 BUG_ON(sem->owner != t);
1693 } else {
1694 /* it's ours now */
1695 sem->owner = t;
1696
1697 spin_unlock_irqrestore(&sem->wait.lock, flags);
1698 }
1699
1700 sem->owner_cpu = from;
1701
1702 preempt_enable();
1703
1704 tsk_rt(t)->num_locks_held++;
1705
1706 return 0;
1707}
1708
1709int pfp_dflp_unlock(struct litmus_lock* l)
1710{
1711 struct task_struct *t = current, *next;
1712 struct dflp_semaphore *sem = dflp_from_lock(l);
1713 int err = 0;
1714 int home;
1715 unsigned long flags;
1716
1717 preempt_disable();
1718
1719 spin_lock_irqsave(&sem->wait.lock, flags);
1720
1721 if (sem->owner != t) {
1722 err = -EINVAL;
1723 spin_unlock_irqrestore(&sem->wait.lock, flags);
1724 goto out;
1725 }
1726
1727 /* check if there are jobs waiting for this resource */
1728 next = __waitqueue_remove_first(&sem->wait);
1729 if (next) {
1730 /* next becomes the resouce holder */
1731 sem->owner = next;
1732
1733 /* Wake up next. The waiting job is already priority-boosted. */
1734 wake_up_process(next);
1735 } else
1736 /* resource becomes available */
1737 sem->owner = NULL;
1738
1739 tsk_rt(t)->num_locks_held--;
1740
1741 home = sem->owner_cpu;
1742
1743 spin_unlock_irqrestore(&sem->wait.lock, flags);
1744
1745 /* we lose the benefit of priority boosting */
1746 unboost_priority(t);
1747
1748 pfp_migrate_to(home);
1749
1750out:
1751 preempt_enable();
1752
1753 return err;
1754}
1755
1756int pfp_dflp_open(struct litmus_lock* l, void* __user config)
1757{
1758 struct dflp_semaphore *sem = dflp_from_lock(l);
1759 int cpu;
1760
1761 if (get_user(cpu, (int*) config))
1762 return -EFAULT;
1763
1764 /* make sure the resource location matches */
1765 if (cpu != sem->on_cpu)
1766 return -EINVAL;
1767
1768 return 0;
1769}
1770
1771int pfp_dflp_close(struct litmus_lock* l)
1772{
1773 struct task_struct *t = current;
1774 struct dflp_semaphore *sem = dflp_from_lock(l);
1775 int owner = 0;
1776
1777 preempt_disable();
1778
1779 if (sem->on_cpu == smp_processor_id())
1780 owner = sem->owner == t;
1781
1782 preempt_enable();
1783
1784 if (owner)
1785 pfp_dflp_unlock(l);
1786
1787 return 0;
1788}
1789
1790void pfp_dflp_free(struct litmus_lock* lock)
1791{
1792 kfree(dflp_from_lock(lock));
1793}
1794
1795static struct litmus_lock_ops pfp_dflp_lock_ops = {
1796 .close = pfp_dflp_close,
1797 .lock = pfp_dflp_lock,
1798 .open = pfp_dflp_open,
1799 .unlock = pfp_dflp_unlock,
1800 .deallocate = pfp_dflp_free,
1801};
1802
1803static struct litmus_lock* pfp_new_dflp(int on_cpu)
1804{
1805 struct dflp_semaphore* sem;
1806
1807 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1808 if (!sem)
1809 return NULL;
1810
1811 sem->litmus_lock.ops = &pfp_dflp_lock_ops;
1812 sem->owner_cpu = NO_CPU;
1813 sem->owner = NULL;
1814 sem->on_cpu = on_cpu;
1815 init_waitqueue_head(&sem->wait);
1816
1817 return &sem->litmus_lock;
1818}
1819
1820
1821/* **** lock constructor **** */
1822
1823
1824static long pfp_allocate_lock(struct litmus_lock **lock, int type,
1825 void* __user config)
1826{
1827 int err = -ENXIO, cpu;
1828 struct srp_semaphore* srp;
1829
1830 /* P-FP currently supports the SRP for local resources and the FMLP
1831 * for global resources. */
1832 switch (type) {
1833 case FMLP_SEM:
1834 /* FIFO Mutex Locking Protocol */
1835 *lock = pfp_new_fmlp();
1836 if (*lock)
1837 err = 0;
1838 else
1839 err = -ENOMEM;
1840 break;
1841
1842 case MPCP_SEM:
1843 /* Multiprocesor Priority Ceiling Protocol */
1844 *lock = pfp_new_mpcp(0);
1845 if (*lock)
1846 err = 0;
1847 else
1848 err = -ENOMEM;
1849 break;
1850
1851 case MPCP_VS_SEM:
1852 /* Multiprocesor Priority Ceiling Protocol with virtual spinning */
1853 *lock = pfp_new_mpcp(1);
1854 if (*lock)
1855 err = 0;
1856 else
1857 err = -ENOMEM;
1858 break;
1859
1860 case DPCP_SEM:
1861 /* Distributed Priority Ceiling Protocol */
1862 if (get_user(cpu, (int*) config))
1863 return -EFAULT;
1864
1865 TRACE("DPCP_SEM: provided cpu=%d\n", cpu);
1866
1867 if (cpu >= NR_CPUS || !cpu_online(cpu))
1868 return -EINVAL;
1869
1870 *lock = pfp_new_dpcp(cpu);
1871 if (*lock)
1872 err = 0;
1873 else
1874 err = -ENOMEM;
1875 break;
1876
1877 case DFLP_SEM:
1878 /* Distributed FIFO Locking Protocol */
1879 if (get_user(cpu, (int*) config))
1880 return -EFAULT;
1881
1882 TRACE("DPCP_SEM: provided cpu=%d\n", cpu);
1883
1884 if (cpu >= NR_CPUS || !cpu_online(cpu))
1885 return -EINVAL;
1886
1887 *lock = pfp_new_dflp(cpu);
1888 if (*lock)
1889 err = 0;
1890 else
1891 err = -ENOMEM;
1892 break;
1893
1894 case SRP_SEM:
1895 /* Baker's Stack Resource Policy */
1896 srp = allocate_srp_semaphore();
1897 if (srp) {
1898 *lock = &srp->litmus_lock;
1899 err = 0;
1900 } else
1901 err = -ENOMEM;
1902 break;
1903
1904 case PCP_SEM:
1905 /* Priority Ceiling Protocol */
1906 if (!config)
1907 cpu = get_partition(current);
1908 else if (get_user(cpu, (int*) config))
1909 return -EFAULT;
1910
1911 if (cpu >= NR_CPUS || !cpu_online(cpu))
1912 return -EINVAL;
1913
1914 *lock = pfp_new_pcp(cpu);
1915 if (*lock)
1916 err = 0;
1917 else
1918 err = -ENOMEM;
1919 break;
1920 };
1921
1922 return err;
1923}
1924
1925#endif
1926
1927static long pfp_admit_task(struct task_struct* tsk)
1928{
1929 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu &&
1930#ifdef CONFIG_RELEASE_MASTER
1931 /* don't allow tasks on release master CPU */
1932 task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master &&
1933#endif
1934 litmus_is_valid_fixed_prio(get_priority(tsk)))
1935 return 0;
1936 else
1937 return -EINVAL;
1938}
1939
1940static struct domain_proc_info pfp_domain_proc_info;
1941static long pfp_get_domain_proc_info(struct domain_proc_info **ret)
1942{
1943 *ret = &pfp_domain_proc_info;
1944 return 0;
1945}
1946
1947static void pfp_setup_domain_proc(void)
1948{
1949 int i, cpu;
1950 int release_master =
1951#ifdef CONFIG_RELEASE_MASTER
1952 atomic_read(&release_master_cpu);
1953#else
1954 NO_CPU;
1955#endif
1956 int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
1957 struct cd_mapping *cpu_map, *domain_map;
1958
1959 memset(&pfp_domain_proc_info, 0, sizeof(pfp_domain_proc_info));
1960 init_domain_proc_info(&pfp_domain_proc_info, num_rt_cpus, num_rt_cpus);
1961 pfp_domain_proc_info.num_cpus = num_rt_cpus;
1962 pfp_domain_proc_info.num_domains = num_rt_cpus;
1963 for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
1964 if (cpu == release_master)
1965 continue;
1966 cpu_map = &pfp_domain_proc_info.cpu_to_domains[i];
1967 domain_map = &pfp_domain_proc_info.domain_to_cpus[i];
1968
1969 cpu_map->id = cpu;
1970 domain_map->id = i; /* enumerate w/o counting the release master */
1971 cpumask_set_cpu(i, cpu_map->mask);
1972 cpumask_set_cpu(cpu, domain_map->mask);
1973 ++i;
1974 }
1975}
1976
1977static long pfp_activate_plugin(void)
1978{
1979#if defined(CONFIG_RELEASE_MASTER) || defined(CONFIG_LITMUS_LOCKING)
1980 int cpu;
1981#endif
1982
1983#ifdef CONFIG_RELEASE_MASTER
1984 for_each_online_cpu(cpu) {
1985 remote_dom(cpu)->release_master = atomic_read(&release_master_cpu);
1986 }
1987#endif
1988
1989#ifdef CONFIG_LITMUS_LOCKING
1990 get_srp_prio = pfp_get_srp_prio;
1991
1992 for_each_online_cpu(cpu) {
1993 init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu));
1994 per_cpu(mpcpvs_vspin, cpu) = NULL;
1995
1996 pcp_init_state(&per_cpu(pcp_state, cpu));
1997 pfp_doms[cpu] = remote_pfp(cpu);
1998 per_cpu(fmlp_timestamp,cpu) = 0;
1999 }
2000
2001#endif
2002
2003 pfp_setup_domain_proc();
2004
2005 return 0;
2006}
2007
2008static long pfp_deactivate_plugin(void)
2009{
2010 destroy_domain_proc_info(&pfp_domain_proc_info);
2011 return 0;
2012}
2013
2014/* Plugin object */
2015static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
2016 .plugin_name = "P-FP",
2017 .task_new = pfp_task_new,
2018 .complete_job = complete_job,
2019 .task_exit = pfp_task_exit,
2020 .schedule = pfp_schedule,
2021 .task_wake_up = pfp_task_wake_up,
2022 .task_block = pfp_task_block,
2023 .admit_task = pfp_admit_task,
2024 .activate_plugin = pfp_activate_plugin,
2025 .deactivate_plugin = pfp_deactivate_plugin,
2026 .get_domain_proc_info = pfp_get_domain_proc_info,
2027#ifdef CONFIG_LITMUS_LOCKING
2028 .allocate_lock = pfp_allocate_lock,
2029 .finish_switch = pfp_finish_switch,
2030#endif
2031};
2032
2033
2034static int __init init_pfp(void)
2035{
2036 int i;
2037
2038 /* We do not really want to support cpu hotplug, do we? ;)
2039 * However, if we are so crazy to do so,
2040 * we cannot use num_online_cpu()
2041 */
2042 for (i = 0; i < num_online_cpus(); i++) {
2043 pfp_domain_init(remote_pfp(i), i);
2044 }
2045 return register_sched_plugin(&pfp_plugin);
2046}
2047
2048module_init(init_pfp);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 000000000000..9390eb9141bf
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,290 @@
1/* sched_plugin.c -- core infrastructure for the scheduler plugin system
2 *
3 * This file includes the initialization of the plugin system, the no-op Linux
4 * scheduler plugin, some dummy functions, and some helper functions.
5 */
6
7#include <linux/list.h>
8#include <linux/spinlock.h>
9#include <linux/sched.h>
10#include <linux/seq_file.h>
11
12#include <litmus/debug_trace.h>
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/preempt.h>
16#include <litmus/jobs.h>
17#include <litmus/budget.h>
18#include <litmus/np.h>
19
20/*
21 * Generic function to trigger preemption on either local or remote cpu
22 * from scheduler plugins. The key feature is that this function is
23 * non-preemptive section aware and does not invoke the scheduler / send
24 * IPIs if the to-be-preempted task is actually non-preemptive.
25 */
26void preempt_if_preemptable(struct task_struct* t, int cpu)
27{
28 /* t is the real-time task executing on CPU on_cpu If t is NULL, then
29 * on_cpu is currently scheduling background work.
30 */
31
32 int reschedule = 0;
33
34 if (!t)
35 /* move non-real-time task out of the way */
36 reschedule = 1;
37 else {
38 if (smp_processor_id() == cpu) {
39 /* local CPU case */
40 /* check if we need to poke userspace */
41 if (is_user_np(t))
42 /* Yes, poke it. This doesn't have to be atomic since
43 * the task is definitely not executing. */
44 request_exit_np(t);
45 else if (!is_kernel_np(t))
46 /* only if we are allowed to preempt the
47 * currently-executing task */
48 reschedule = 1;
49 } else {
50 /* Remote CPU case. Only notify if it's not a kernel
51 * NP section and if we didn't set the userspace
52 * flag. */
53 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
54 }
55 }
56 if (likely(reschedule))
57 litmus_reschedule(cpu);
58}
59
60
61/*************************************************************
62 * Dummy plugin functions *
63 *************************************************************/
64
65static void litmus_dummy_finish_switch(struct task_struct * prev)
66{
67}
68
69static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
70{
71 sched_state_task_picked();
72 return NULL;
73}
74
75static bool litmus_dummy_should_wait_for_stack(struct task_struct *next)
76{
77 return true; /* by default, wait indefinitely */
78}
79
80static void litmus_dummy_next_became_invalid(struct task_struct *next)
81{
82}
83
84static bool litmus_dummy_post_migration_validate(struct task_struct *next)
85{
86 return true; /* by default, anything is ok */
87}
88
89static long litmus_dummy_admit_task(struct task_struct* tsk)
90{
91 printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
92 tsk->comm, tsk->pid);
93 return -EINVAL;
94}
95
96static bool litmus_dummy_fork_task(struct task_struct* tsk)
97{
98 /* Default behavior: return false to demote to non-real-time task */
99 return false;
100}
101
102static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
103{
104}
105
106static void litmus_dummy_task_wake_up(struct task_struct *task)
107{
108}
109
110static void litmus_dummy_task_block(struct task_struct *task)
111{
112}
113
114static void litmus_dummy_task_exit(struct task_struct *task)
115{
116}
117
118static void litmus_dummy_task_cleanup(struct task_struct *task)
119{
120}
121
122static long litmus_dummy_complete_job(void)
123{
124 return -ENOSYS;
125}
126
127static long litmus_dummy_activate_plugin(void)
128{
129 return 0;
130}
131
132static long litmus_dummy_deactivate_plugin(void)
133{
134 return 0;
135}
136
137static long litmus_dummy_get_domain_proc_info(struct domain_proc_info **d)
138{
139 *d = NULL;
140 return 0;
141}
142
143static void litmus_dummy_synchronous_release_at(lt_t time_zero)
144{
145 /* ignore */
146}
147
148static long litmus_dummy_task_change_params(
149 struct task_struct *task,
150 struct rt_task *new_params)
151{
152 /* by default, do not allow changes to task parameters */
153 return -EBUSY;
154}
155
156#ifdef CONFIG_LITMUS_LOCKING
157
158static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
159 void* __user config)
160{
161 return -ENXIO;
162}
163
164#endif
165
166static long litmus_dummy_reservation_create(
167 int reservation_type,
168 void* __user config)
169{
170 return -ENOSYS;
171}
172
173static long litmus_dummy_reservation_destroy(unsigned int reservation_id, int cpu)
174{
175 return -ENOSYS;
176}
177
178/* The default scheduler plugin. It doesn't do anything and lets Linux do its
179 * job.
180 */
181struct sched_plugin linux_sched_plugin = {
182 .plugin_name = "Linux",
183 .task_new = litmus_dummy_task_new,
184 .task_exit = litmus_dummy_task_exit,
185 .task_wake_up = litmus_dummy_task_wake_up,
186 .task_block = litmus_dummy_task_block,
187 .complete_job = litmus_dummy_complete_job,
188 .schedule = litmus_dummy_schedule,
189 .finish_switch = litmus_dummy_finish_switch,
190 .activate_plugin = litmus_dummy_activate_plugin,
191 .deactivate_plugin = litmus_dummy_deactivate_plugin,
192 .get_domain_proc_info = litmus_dummy_get_domain_proc_info,
193 .synchronous_release_at = litmus_dummy_synchronous_release_at,
194#ifdef CONFIG_LITMUS_LOCKING
195 .allocate_lock = litmus_dummy_allocate_lock,
196#endif
197 .admit_task = litmus_dummy_admit_task
198};
199
200/*
201 * The reference to current plugin that is used to schedule tasks within
202 * the system. It stores references to actual function implementations
203 * Should be initialized by calling "init_***_plugin()"
204 */
205struct sched_plugin *litmus = &linux_sched_plugin;
206
207/* the list of registered scheduling plugins */
208static LIST_HEAD(sched_plugins);
209static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
210
211#define CHECK(func) {\
212 if (!plugin->func) \
213 plugin->func = litmus_dummy_ ## func;}
214
215/* FIXME: get reference to module */
216int register_sched_plugin(struct sched_plugin* plugin)
217{
218 printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
219 plugin->plugin_name);
220
221 /* make sure we don't trip over null pointers later */
222 CHECK(finish_switch);
223 CHECK(schedule);
224 CHECK(should_wait_for_stack);
225 CHECK(post_migration_validate);
226 CHECK(next_became_invalid);
227 CHECK(task_wake_up);
228 CHECK(task_exit);
229 CHECK(task_cleanup);
230 CHECK(task_block);
231 CHECK(task_new);
232 CHECK(task_change_params);
233 CHECK(complete_job);
234 CHECK(activate_plugin);
235 CHECK(deactivate_plugin);
236 CHECK(get_domain_proc_info);
237#ifdef CONFIG_LITMUS_LOCKING
238 CHECK(allocate_lock);
239#endif
240 CHECK(admit_task);
241 CHECK(fork_task);
242 CHECK(synchronous_release_at);
243 CHECK(reservation_destroy);
244 CHECK(reservation_create);
245
246 if (!plugin->wait_for_release_at)
247 plugin->wait_for_release_at = default_wait_for_release_at;
248
249 if (!plugin->current_budget)
250 plugin->current_budget = litmus_current_budget;
251
252 raw_spin_lock(&sched_plugins_lock);
253 list_add(&plugin->list, &sched_plugins);
254 raw_spin_unlock(&sched_plugins_lock);
255
256 return 0;
257}
258
259
260/* FIXME: reference counting, etc. */
261struct sched_plugin* find_sched_plugin(const char* name)
262{
263 struct list_head *pos;
264 struct sched_plugin *plugin;
265
266 raw_spin_lock(&sched_plugins_lock);
267 list_for_each(pos, &sched_plugins) {
268 plugin = list_entry(pos, struct sched_plugin, list);
269 if (!strcmp(plugin->plugin_name, name))
270 goto out_unlock;
271 }
272 plugin = NULL;
273
274out_unlock:
275 raw_spin_unlock(&sched_plugins_lock);
276 return plugin;
277}
278
279void print_sched_plugins(struct seq_file *m)
280{
281 struct list_head *pos;
282 struct sched_plugin *plugin;
283
284 raw_spin_lock(&sched_plugins_lock);
285 list_for_each(pos, &sched_plugins) {
286 plugin = list_entry(pos, struct sched_plugin, list);
287 seq_printf(m, "%s\n", plugin->plugin_name);
288 }
289 raw_spin_unlock(&sched_plugins_lock);
290}
diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
new file mode 100644
index 000000000000..0a3270346656
--- /dev/null
+++ b/litmus/sched_pres.c
@@ -0,0 +1,612 @@
1#include <linux/percpu.h>
2#include <linux/slab.h>
3#include <linux/module.h>
4#include <asm/uaccess.h>
5
6#include <litmus/sched_plugin.h>
7#include <litmus/preempt.h>
8#include <litmus/debug_trace.h>
9
10#include <litmus/litmus.h>
11#include <litmus/jobs.h>
12#include <litmus/budget.h>
13#include <litmus/litmus_proc.h>
14#include <litmus/sched_trace.h>
15
16#include <litmus/reservations/reservation.h>
17#include <litmus/reservations/alloc.h>
18
19struct pres_task_state {
20 struct reservation_client *client;
21 int cpu;
22 struct task_client res_info;
23};
24
25struct pres_cpu_state {
26 raw_spinlock_t lock;
27
28 struct sup_reservation_environment sup_env;
29 struct hrtimer timer;
30
31 int cpu;
32 struct task_struct* scheduled;
33};
34
35static DEFINE_PER_CPU(struct pres_cpu_state, pres_cpu_state);
36
37#define cpu_state_for(cpu_id) (&per_cpu(pres_cpu_state, cpu_id))
38#define local_cpu_state() (this_cpu_ptr(&pres_cpu_state))
39
40static struct pres_task_state* get_pres_state(struct task_struct *tsk)
41{
42 return (struct pres_task_state*) tsk_rt(tsk)->plugin_state;
43}
44
45static void task_departs(struct task_struct *tsk, int job_complete)
46{
47 struct pres_task_state* state = get_pres_state(tsk);
48 struct reservation* res;
49 struct reservation_client *client;
50
51 client = state->client;
52 res = client->reservation;
53
54 res->ops->client_departs(res, client, job_complete);
55 TRACE_TASK(tsk, "client_departs: removed from reservation R%d\n", res->id);
56}
57
58static void task_arrives(struct task_struct *tsk)
59{
60 struct pres_task_state* state = get_pres_state(tsk);
61 struct reservation* res;
62 struct reservation_client *client;
63
64 client = state->client;
65 res = client->reservation;
66
67 res->ops->client_arrives(res, client);
68 TRACE_TASK(tsk, "client_arrives: added to reservation R%d\n", res->id);
69}
70
71/* NOTE: drops state->lock */
72static void pres_update_timer_and_unlock(struct pres_cpu_state *state)
73{
74 int local;
75 lt_t update, now;
76
77 update = state->sup_env.next_scheduler_update;
78 now = state->sup_env.env.current_time;
79
80 /* Be sure we're actually running on the right core,
81 * as pres_update_timer() is also called from pres_task_resume(),
82 * which might be called on any CPU when a thread resumes.
83 */
84 local = local_cpu_state() == state;
85
86 /* Must drop state lock before calling into hrtimer_start(), which
87 * may raise a softirq, which in turn may wake ksoftirqd. */
88 raw_spin_unlock(&state->lock);
89
90 if (update <= now) {
91 litmus_reschedule(state->cpu);
92 } else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
93 /* Reprogram only if not already set correctly. */
94 if (!hrtimer_active(&state->timer) ||
95 ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
96 TRACE("canceling timer...\n");
97 hrtimer_cancel(&state->timer);
98 TRACE("setting scheduler timer for %llu\n", update);
99 hrtimer_start(&state->timer,
100 ns_to_ktime(update),
101 HRTIMER_MODE_ABS_PINNED);
102 if (update < litmus_clock()) {
103 /* uh oh, timer expired while trying to set it */
104 TRACE("timer expired during setting "
105 "update:%llu now:%llu actual:%llu\n",
106 update, now, litmus_clock());
107 /* The timer HW may not have been reprogrammed
108 * correctly; force rescheduling now. */
109 litmus_reschedule(state->cpu);
110 }
111 }
112 } else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) {
113 /* Poke remote core only if timer needs to be set earlier than
114 * it is currently set.
115 */
116 TRACE("pres_update_timer for remote CPU %d (update=%llu, "
117 "active:%d, set:%llu)\n",
118 state->cpu,
119 update,
120 hrtimer_active(&state->timer),
121 ktime_to_ns(hrtimer_get_expires(&state->timer)));
122 if (!hrtimer_active(&state->timer) ||
123 ktime_to_ns(hrtimer_get_expires(&state->timer)) > update) {
124 TRACE("poking CPU %d so that it can update its "
125 "scheduling timer (active:%d, set:%llu)\n",
126 state->cpu,
127 hrtimer_active(&state->timer),
128 ktime_to_ns(hrtimer_get_expires(&state->timer)));
129 litmus_reschedule(state->cpu);
130 }
131 }
132}
133
134static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
135{
136 unsigned long flags;
137 enum hrtimer_restart restart = HRTIMER_NORESTART;
138 struct pres_cpu_state *state;
139 lt_t update, now;
140
141 state = container_of(timer, struct pres_cpu_state, timer);
142
143 /* The scheduling timer should only fire on the local CPU, because
144 * otherwise deadlocks via timer_cancel() are possible.
145 * Note: this does not interfere with dedicated interrupt handling, as
146 * even under dedicated interrupt handling scheduling timers for
147 * budget enforcement must occur locally on each CPU.
148 */
149 BUG_ON(state->cpu != raw_smp_processor_id());
150
151 raw_spin_lock_irqsave(&state->lock, flags);
152 sup_update_time(&state->sup_env, litmus_clock());
153
154 update = state->sup_env.next_scheduler_update;
155 now = state->sup_env.env.current_time;
156
157 TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d)\n",
158 now, update, state->cpu);
159
160 if (update <= now) {
161 litmus_reschedule_local();
162 } else if (update != SUP_NO_SCHEDULER_UPDATE) {
163 hrtimer_set_expires(timer, ns_to_ktime(update));
164 restart = HRTIMER_RESTART;
165 }
166
167 raw_spin_unlock_irqrestore(&state->lock, flags);
168
169 return restart;
170}
171
172static struct task_struct* pres_schedule(struct task_struct * prev)
173{
174 /* next == NULL means "schedule background work". */
175 struct pres_cpu_state *state = local_cpu_state();
176
177 raw_spin_lock(&state->lock);
178
179 BUG_ON(state->scheduled && state->scheduled != prev);
180 BUG_ON(state->scheduled && !is_realtime(prev));
181
182 /* update time */
183 state->sup_env.will_schedule = true;
184 sup_update_time(&state->sup_env, litmus_clock());
185
186 /* figure out what to schedule next */
187 state->scheduled = sup_dispatch(&state->sup_env);
188
189 /* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
190 sched_state_task_picked();
191
192 /* program scheduler timer */
193 state->sup_env.will_schedule = false;
194 /* NOTE: drops state->lock */
195 pres_update_timer_and_unlock(state);
196
197 if (prev != state->scheduled && is_realtime(prev))
198 TRACE_TASK(prev, "descheduled.\n");
199 if (state->scheduled)
200 TRACE_TASK(state->scheduled, "scheduled.\n");
201
202 return state->scheduled;
203}
204
205static void resume_legacy_task_model_updates(struct task_struct *tsk)
206{
207 lt_t now;
208 if (is_sporadic(tsk)) {
209 /* If this sporadic task was gone for a "long" time and woke up past
210 * its deadline, then give it a new budget by triggering a job
211 * release. This is purely cosmetic and has no effect on the
212 * P-RES scheduler. */
213
214 now = litmus_clock();
215 if (is_tardy(tsk, now)) {
216 inferred_sporadic_job_release_at(tsk, now);
217 }
218 }
219}
220
221
222/* Called when a task should be removed from the ready queue.
223 */
224static void pres_task_block(struct task_struct *tsk)
225{
226 unsigned long flags;
227 struct pres_task_state* tinfo = get_pres_state(tsk);
228 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
229
230 TRACE_TASK(tsk, "thread suspends at %llu (state:%d, running:%d)\n",
231 litmus_clock(), tsk->state, is_current_running());
232
233 raw_spin_lock_irqsave(&state->lock, flags);
234 sup_update_time(&state->sup_env, litmus_clock());
235 task_departs(tsk, is_completed(tsk));
236 raw_spin_unlock_irqrestore(&state->lock, flags);
237}
238
239
240/* Called when the state of tsk changes back to TASK_RUNNING.
241 * We need to requeue the task.
242 */
243static void pres_task_resume(struct task_struct *tsk)
244{
245 unsigned long flags;
246 struct pres_task_state* tinfo = get_pres_state(tsk);
247 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
248
249 TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
250
251 raw_spin_lock_irqsave(&state->lock, flags);
252 /* Assumption: litmus_clock() is synchronized across cores,
253 * since we might not actually be executing on tinfo->cpu
254 * at the moment. */
255 sup_update_time(&state->sup_env, litmus_clock());
256 task_arrives(tsk);
257 /* NOTE: drops state->lock */
258 pres_update_timer_and_unlock(state);
259 local_irq_restore(flags);
260
261 resume_legacy_task_model_updates(tsk);
262}
263
264static long pres_admit_task(struct task_struct *tsk)
265{
266 long err = -EINVAL;
267 unsigned long flags;
268 struct reservation *res;
269 struct pres_cpu_state *state;
270 struct pres_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC);
271
272 if (!tinfo)
273 return -ENOMEM;
274
275 preempt_disable();
276
277 /* NOTE: this is obviously racy w.r.t. affinity changes since
278 * we are not holding any runqueue locks. */
279 if (tsk->nr_cpus_allowed != 1) {
280 printk(KERN_WARNING "%s/%d: task does not have "
281 "singleton affinity mask\n",
282 tsk->comm, tsk->pid);
283 state = cpu_state_for(task_cpu(tsk));
284 } else {
285 state = cpu_state_for(cpumask_first(&tsk->cpus_allowed));
286 }
287
288 TRACE_TASK(tsk, "on CPU %d, valid?:%d\n",
289 task_cpu(tsk), cpumask_test_cpu(task_cpu(tsk), &tsk->cpus_allowed));
290
291 raw_spin_lock_irqsave(&state->lock, flags);
292
293 res = sup_find_by_id(&state->sup_env, tsk_rt(tsk)->task_params.cpu);
294
295 /* found the appropriate reservation (or vCPU) */
296 if (res) {
297 task_client_init(&tinfo->res_info, tsk, res);
298 tinfo->cpu = state->cpu;
299 tinfo->client = &tinfo->res_info.client;
300 tsk_rt(tsk)->plugin_state = tinfo;
301 err = 0;
302
303 /* disable LITMUS^RT's per-thread budget enforcement */
304 tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
305 } else {
306 printk(KERN_WARNING "Could not find reservation %d on "
307 "core %d for task %s/%d\n",
308 tsk_rt(tsk)->task_params.cpu, state->cpu,
309 tsk->comm, tsk->pid);
310 }
311
312 raw_spin_unlock_irqrestore(&state->lock, flags);
313
314 preempt_enable();
315
316 if (err)
317 kfree(tinfo);
318
319 return err;
320}
321
322static void task_new_legacy_task_model_updates(struct task_struct *tsk)
323{
324 lt_t now = litmus_clock();
325
326 /* the first job exists starting as of right now */
327 release_at(tsk, now);
328 sched_trace_task_release(tsk);
329}
330
331static void pres_task_new(struct task_struct *tsk, int on_runqueue,
332 int is_running)
333{
334 unsigned long flags;
335 struct pres_task_state* tinfo = get_pres_state(tsk);
336 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
337
338 TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
339 litmus_clock(), on_runqueue, is_running);
340
341 /* acquire the lock protecting the state and disable interrupts */
342 raw_spin_lock_irqsave(&state->lock, flags);
343
344 if (is_running) {
345 state->scheduled = tsk;
346 /* make sure this task should actually be running */
347 litmus_reschedule_local();
348 }
349
350 if (on_runqueue || is_running) {
351 /* Assumption: litmus_clock() is synchronized across cores
352 * [see comment in pres_task_resume()] */
353 sup_update_time(&state->sup_env, litmus_clock());
354 task_arrives(tsk);
355 /* NOTE: drops state->lock */
356 pres_update_timer_and_unlock(state);
357 local_irq_restore(flags);
358 } else
359 raw_spin_unlock_irqrestore(&state->lock, flags);
360
361 task_new_legacy_task_model_updates(tsk);
362}
363
364static bool pres_fork_task(struct task_struct *tsk)
365{
366 TRACE_CUR("is forking\n");
367 TRACE_TASK(tsk, "forked child rt:%d cpu:%d task_cpu:%d "
368 "wcet:%llu per:%llu\n",
369 is_realtime(tsk),
370 tsk_rt(tsk)->task_params.cpu,
371 task_cpu(tsk),
372 tsk_rt(tsk)->task_params.exec_cost,
373 tsk_rt(tsk)->task_params.period);
374
375 /* We always allow forking. */
376 /* The newly forked task will be in the same reservation. */
377 return true;
378}
379
380static void pres_task_exit(struct task_struct *tsk)
381{
382 unsigned long flags;
383 struct pres_task_state* tinfo = get_pres_state(tsk);
384 struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
385
386 raw_spin_lock_irqsave(&state->lock, flags);
387
388 TRACE_TASK(tsk, "task exits at %llu (present:%d sched:%d)\n",
389 litmus_clock(), is_present(tsk), state->scheduled == tsk);
390
391 if (state->scheduled == tsk)
392 state->scheduled = NULL;
393
394 /* remove from queues */
395 if (is_present(tsk)) {
396 /* Assumption: litmus_clock() is synchronized across cores
397 * [see comment in pres_task_resume()] */
398 sup_update_time(&state->sup_env, litmus_clock());
399 task_departs(tsk, 0);
400 /* NOTE: drops state->lock */
401 pres_update_timer_and_unlock(state);
402 local_irq_restore(flags);
403 } else
404 raw_spin_unlock_irqrestore(&state->lock, flags);
405
406 kfree(tsk_rt(tsk)->plugin_state);
407 tsk_rt(tsk)->plugin_state = NULL;
408}
409
410static void pres_current_budget(lt_t *used_so_far, lt_t *remaining)
411{
412 struct pres_task_state *tstate = get_pres_state(current);
413 struct pres_cpu_state *state;
414
415 /* FIXME: protect against concurrent task_exit() */
416
417 local_irq_disable();
418
419 state = cpu_state_for(tstate->cpu);
420
421 raw_spin_lock(&state->lock);
422
423 sup_update_time(&state->sup_env, litmus_clock());
424 if (remaining)
425 *remaining = tstate->client->reservation->cur_budget;
426 if (used_so_far)
427 *used_so_far = tstate->client->reservation->budget_consumed;
428 pres_update_timer_and_unlock(state);
429
430 local_irq_enable();
431}
432
433static long do_pres_reservation_create(
434 int res_type,
435 struct reservation_config *config)
436{
437 struct pres_cpu_state *state;
438 struct reservation* res;
439 struct reservation* new_res = NULL;
440 unsigned long flags;
441 long err;
442
443 /* Allocate before we grab a spin lock. */
444 switch (res_type) {
445 case PERIODIC_POLLING:
446 case SPORADIC_POLLING:
447 err = alloc_polling_reservation(res_type, config, &new_res);
448 break;
449
450 case TABLE_DRIVEN:
451 err = alloc_table_driven_reservation(config, &new_res);
452 break;
453
454 default:
455 err = -EINVAL;
456 break;
457 }
458
459 if (err)
460 return err;
461
462 state = cpu_state_for(config->cpu);
463 raw_spin_lock_irqsave(&state->lock, flags);
464
465 res = sup_find_by_id(&state->sup_env, config->id);
466 if (!res) {
467 sup_add_new_reservation(&state->sup_env, new_res);
468 err = config->id;
469 } else {
470 err = -EEXIST;
471 }
472
473 raw_spin_unlock_irqrestore(&state->lock, flags);
474
475 if (err < 0)
476 kfree(new_res);
477
478 return err;
479}
480
481static long pres_reservation_create(int res_type, void* __user _config)
482{
483 struct reservation_config config;
484
485 TRACE("Attempt to create reservation (%d)\n", res_type);
486
487 if (copy_from_user(&config, _config, sizeof(config)))
488 return -EFAULT;
489
490 if (config.cpu < 0 || !cpu_online(config.cpu)) {
491 printk(KERN_ERR "invalid polling reservation (%u): "
492 "CPU %d offline\n", config.id, config.cpu);
493 return -EINVAL;
494 }
495
496 return do_pres_reservation_create(res_type, &config);
497}
498
499static struct domain_proc_info pres_domain_proc_info;
500
501static long pres_get_domain_proc_info(struct domain_proc_info **ret)
502{
503 *ret = &pres_domain_proc_info;
504 return 0;
505}
506
507static void pres_setup_domain_proc(void)
508{
509 int i, cpu;
510 int num_rt_cpus = num_online_cpus();
511
512 struct cd_mapping *cpu_map, *domain_map;
513
514 memset(&pres_domain_proc_info, 0, sizeof(pres_domain_proc_info));
515 init_domain_proc_info(&pres_domain_proc_info, num_rt_cpus, num_rt_cpus);
516 pres_domain_proc_info.num_cpus = num_rt_cpus;
517 pres_domain_proc_info.num_domains = num_rt_cpus;
518
519 i = 0;
520 for_each_online_cpu(cpu) {
521 cpu_map = &pres_domain_proc_info.cpu_to_domains[i];
522 domain_map = &pres_domain_proc_info.domain_to_cpus[i];
523
524 cpu_map->id = cpu;
525 domain_map->id = i;
526 cpumask_set_cpu(i, cpu_map->mask);
527 cpumask_set_cpu(cpu, domain_map->mask);
528 ++i;
529 }
530}
531
532static long pres_activate_plugin(void)
533{
534 int cpu;
535 struct pres_cpu_state *state;
536
537 for_each_online_cpu(cpu) {
538 TRACE("Initializing CPU%d...\n", cpu);
539
540 state = cpu_state_for(cpu);
541
542 raw_spin_lock_init(&state->lock);
543 state->cpu = cpu;
544 state->scheduled = NULL;
545
546 sup_init(&state->sup_env);
547
548 hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
549 state->timer.function = on_scheduling_timer;
550 }
551
552 pres_setup_domain_proc();
553
554 return 0;
555}
556
557static long pres_deactivate_plugin(void)
558{
559 int cpu;
560 struct pres_cpu_state *state;
561 struct reservation *res;
562
563 for_each_online_cpu(cpu) {
564 state = cpu_state_for(cpu);
565 raw_spin_lock(&state->lock);
566
567 hrtimer_cancel(&state->timer);
568
569 /* Delete all reservations --- assumes struct reservation
570 * is prefix of containing struct. */
571
572 while (!list_empty(&state->sup_env.all_reservations)) {
573 res = list_first_entry(
574 &state->sup_env.all_reservations,
575 struct reservation, all_list);
576 list_del(&res->all_list);
577 if (res->ops->shutdown)
578 res->ops->shutdown(res);
579 kfree(res);
580 }
581
582 raw_spin_unlock(&state->lock);
583 }
584
585 destroy_domain_proc_info(&pres_domain_proc_info);
586 return 0;
587}
588
589static struct sched_plugin pres_plugin = {
590 .plugin_name = "P-RES",
591 .schedule = pres_schedule,
592 .task_block = pres_task_block,
593 .task_wake_up = pres_task_resume,
594 .admit_task = pres_admit_task,
595 .task_new = pres_task_new,
596 .fork_task = pres_fork_task,
597 .task_exit = pres_task_exit,
598 .complete_job = complete_job_oneshot,
599 .get_domain_proc_info = pres_get_domain_proc_info,
600 .activate_plugin = pres_activate_plugin,
601 .deactivate_plugin = pres_deactivate_plugin,
602 .reservation_create = pres_reservation_create,
603 .current_budget = pres_current_budget,
604};
605
606static int __init init_pres(void)
607{
608 return register_sched_plugin(&pres_plugin);
609}
610
611module_init(init_pres);
612
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 000000000000..4e60695578b5
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,688 @@
1/*
2 * kernel/sched_psn_edf.c
3 *
4 * Implementation of the PSN-EDF scheduler plugin.
5 * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
6 *
7 * Suspensions and non-preemptable sections are supported.
8 * Priority inheritance is not supported.
9 */
10
11#include <linux/percpu.h>
12#include <linux/sched.h>
13#include <linux/list.h>
14#include <linux/spinlock.h>
15#include <linux/module.h>
16
17#include <litmus/debug_trace.h>
18#include <litmus/litmus.h>
19#include <litmus/jobs.h>
20#include <litmus/preempt.h>
21#include <litmus/budget.h>
22#include <litmus/np.h>
23#include <litmus/sched_plugin.h>
24#include <litmus/edf_common.h>
25#include <litmus/sched_trace.h>
26#include <litmus/trace.h>
27
28/* to set up domain/cpu mappings */
29#include <litmus/litmus_proc.h>
30
31typedef struct {
32 rt_domain_t domain;
33 int cpu;
34 struct task_struct* scheduled; /* only RT tasks */
35/*
36 * scheduling lock slock
37 * protects the domain and serializes scheduling decisions
38 */
39#define slock domain.ready_lock
40
41} psnedf_domain_t;
42
43DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
44
45#define local_edf (&(this_cpu_ptr(&psnedf_domains)->domain))
46#define local_pedf (this_cpu_ptr(&psnedf_domains))
47#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
48#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
49#define task_edf(task) remote_edf(get_partition(task))
50#define task_pedf(task) remote_pedf(get_partition(task))
51
52
53static void psnedf_domain_init(psnedf_domain_t* pedf,
54 check_resched_needed_t check,
55 release_jobs_t release,
56 int cpu)
57{
58 edf_domain_init(&pedf->domain, check, release);
59 pedf->cpu = cpu;
60 pedf->scheduled = NULL;
61}
62
63static void requeue(struct task_struct* t, rt_domain_t *edf)
64{
65 if (t->state != TASK_RUNNING)
66 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
67
68 tsk_rt(t)->completed = 0;
69 if (is_early_releasing(t) || is_released(t, litmus_clock()))
70 __add_ready(edf, t);
71 else
72 add_release(edf, t); /* it has got to wait */
73}
74
75/* we assume the lock is being held */
76static void preempt(psnedf_domain_t *pedf)
77{
78 preempt_if_preemptable(pedf->scheduled, pedf->cpu);
79}
80
81#ifdef CONFIG_LITMUS_LOCKING
82
83static void boost_priority(struct task_struct* t)
84{
85 unsigned long flags;
86 psnedf_domain_t* pedf = task_pedf(t);
87 lt_t now;
88
89 raw_spin_lock_irqsave(&pedf->slock, flags);
90 now = litmus_clock();
91
92 TRACE_TASK(t, "priority boosted at %llu\n", now);
93
94 tsk_rt(t)->priority_boosted = 1;
95 tsk_rt(t)->boost_start_time = now;
96
97 if (pedf->scheduled != t) {
98 /* holder may be queued: first stop queue changes */
99 raw_spin_lock(&pedf->domain.release_lock);
100 if (is_queued(t) &&
101 /* If it is queued, then we need to re-order. */
102 bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
103 /* If we bubbled to the top, then we need to check for preemptions. */
104 edf_preemption_needed(&pedf->domain, pedf->scheduled))
105 preempt(pedf);
106 raw_spin_unlock(&pedf->domain.release_lock);
107 } /* else: nothing to do since the job is not queued while scheduled */
108
109 raw_spin_unlock_irqrestore(&pedf->slock, flags);
110}
111
112static void unboost_priority(struct task_struct* t)
113{
114 unsigned long flags;
115 psnedf_domain_t* pedf = task_pedf(t);
116 lt_t now;
117
118 raw_spin_lock_irqsave(&pedf->slock, flags);
119 now = litmus_clock();
120
121 /* Assumption: this only happens when the job is scheduled.
122 * Exception: If t transitioned to non-real-time mode, we no longer
123 * care about it. */
124 BUG_ON(pedf->scheduled != t && is_realtime(t));
125
126 TRACE_TASK(t, "priority restored at %llu\n", now);
127
128 tsk_rt(t)->priority_boosted = 0;
129 tsk_rt(t)->boost_start_time = 0;
130
131 /* check if this changes anything */
132 if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
133 preempt(pedf);
134
135 raw_spin_unlock_irqrestore(&pedf->slock, flags);
136}
137
138#endif
139
140static int psnedf_preempt_check(psnedf_domain_t *pedf)
141{
142 if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
143 preempt(pedf);
144 return 1;
145 } else
146 return 0;
147}
148
149/* This check is trivial in partioned systems as we only have to consider
150 * the CPU of the partition.
151 */
152static int psnedf_check_resched(rt_domain_t *edf)
153{
154 psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
155
156 /* because this is a callback from rt_domain_t we already hold
157 * the necessary lock for the ready queue
158 */
159 return psnedf_preempt_check(pedf);
160}
161
162static void job_completion(struct task_struct* t, int forced)
163{
164 sched_trace_task_completion(t, forced);
165 TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
166
167 tsk_rt(t)->completed = 0;
168 prepare_for_next_period(t);
169}
170
171static struct task_struct* psnedf_schedule(struct task_struct * prev)
172{
173 psnedf_domain_t* pedf = local_pedf;
174 rt_domain_t* edf = &pedf->domain;
175 struct task_struct* next;
176
177 int out_of_time, sleep, preempt,
178 np, exists, blocks, resched;
179
180 raw_spin_lock(&pedf->slock);
181
182 /* sanity checking
183 * differently from gedf, when a task exits (dead)
184 * pedf->schedule may be null and prev _is_ realtime
185 */
186 BUG_ON(pedf->scheduled && pedf->scheduled != prev);
187 BUG_ON(pedf->scheduled && !is_realtime(prev));
188
189 /* (0) Determine state */
190 exists = pedf->scheduled != NULL;
191 blocks = exists && !is_current_running();
192 out_of_time = exists && budget_enforced(pedf->scheduled)
193 && budget_exhausted(pedf->scheduled);
194 np = exists && is_np(pedf->scheduled);
195 sleep = exists && is_completed(pedf->scheduled);
196 preempt = edf_preemption_needed(edf, prev);
197
198 /* If we need to preempt do so.
199 * The following checks set resched to 1 in case of special
200 * circumstances.
201 */
202 resched = preempt;
203
204 /* If a task blocks we have no choice but to reschedule.
205 */
206 if (blocks)
207 resched = 1;
208
209 /* Request a sys_exit_np() call if we would like to preempt but cannot.
210 * Multiple calls to request_exit_np() don't hurt.
211 */
212 if (np && (out_of_time || preempt || sleep))
213 request_exit_np(pedf->scheduled);
214
215 /* Any task that is preemptable and either exhausts its execution
216 * budget or wants to sleep completes. We may have to reschedule after
217 * this.
218 */
219 if (!np && (out_of_time || sleep)) {
220 job_completion(pedf->scheduled, !sleep);
221 resched = 1;
222 }
223
224 /* The final scheduling decision. Do we need to switch for some reason?
225 * Switch if we are in RT mode and have no task or if we need to
226 * resched.
227 */
228 next = NULL;
229 if ((!np || blocks) && (resched || !exists)) {
230 /* When preempting a task that does not block, then
231 * re-insert it into either the ready queue or the
232 * release queue (if it completed). requeue() picks
233 * the appropriate queue.
234 */
235 if (pedf->scheduled && !blocks)
236 requeue(pedf->scheduled, edf);
237 next = __take_ready(edf);
238 } else
239 /* Only override Linux scheduler if we have a real-time task
240 * scheduled that needs to continue.
241 */
242 if (exists)
243 next = prev;
244
245 if (next) {
246 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
247 } else {
248 TRACE("becoming idle at %llu\n", litmus_clock());
249 }
250
251 pedf->scheduled = next;
252 sched_state_task_picked();
253 raw_spin_unlock(&pedf->slock);
254
255 return next;
256}
257
258
259/* Prepare a task for running in RT mode
260 */
261static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
262{
263 rt_domain_t* edf = task_edf(t);
264 psnedf_domain_t* pedf = task_pedf(t);
265 unsigned long flags;
266
267 TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
268 t->rt_param.task_params.cpu);
269
270 /* setup job parameters */
271 release_at(t, litmus_clock());
272
273 /* The task should be running in the queue, otherwise signal
274 * code will try to wake it up with fatal consequences.
275 */
276 raw_spin_lock_irqsave(&pedf->slock, flags);
277 if (is_scheduled) {
278 /* there shouldn't be anything else scheduled at the time */
279 BUG_ON(pedf->scheduled);
280 pedf->scheduled = t;
281 } else {
282 /* !is_scheduled means it is not scheduled right now, but it
283 * does not mean that it is suspended. If it is not suspended,
284 * it still needs to be requeued. If it is suspended, there is
285 * nothing that we need to do as it will be handled by the
286 * wake_up() handler. */
287 if (on_rq) {
288 requeue(t, edf);
289 /* maybe we have to reschedule */
290 psnedf_preempt_check(pedf);
291 }
292 }
293 raw_spin_unlock_irqrestore(&pedf->slock, flags);
294}
295
296static void psnedf_task_wake_up(struct task_struct *task)
297{
298 unsigned long flags;
299 psnedf_domain_t* pedf = task_pedf(task);
300 rt_domain_t* edf = task_edf(task);
301 lt_t now;
302
303 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
304 raw_spin_lock_irqsave(&pedf->slock, flags);
305 BUG_ON(is_queued(task));
306 now = litmus_clock();
307 if (is_sporadic(task) && is_tardy(task, now)
308#ifdef CONFIG_LITMUS_LOCKING
309 /* We need to take suspensions because of semaphores into
310 * account! If a job resumes after being suspended due to acquiring
311 * a semaphore, it should never be treated as a new job release.
312 */
313 && !is_priority_boosted(task)
314#endif
315 ) {
316 inferred_sporadic_job_release_at(task, now);
317 }
318
319 /* Only add to ready queue if it is not the currently-scheduled
320 * task. This could be the case if a task was woken up concurrently
321 * on a remote CPU before the executing CPU got around to actually
322 * de-scheduling the task, i.e., wake_up() raced with schedule()
323 * and won.
324 */
325 if (pedf->scheduled != task) {
326 requeue(task, edf);
327 psnedf_preempt_check(pedf);
328 }
329
330 raw_spin_unlock_irqrestore(&pedf->slock, flags);
331 TRACE_TASK(task, "wake up done\n");
332}
333
334static void psnedf_task_block(struct task_struct *t)
335{
336 /* only running tasks can block, thus t is in no queue */
337 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
338
339 BUG_ON(!is_realtime(t));
340 BUG_ON(is_queued(t));
341}
342
343static void psnedf_task_exit(struct task_struct * t)
344{
345 unsigned long flags;
346 psnedf_domain_t* pedf = task_pedf(t);
347 rt_domain_t* edf;
348
349 raw_spin_lock_irqsave(&pedf->slock, flags);
350 if (is_queued(t)) {
351 /* dequeue */
352 edf = task_edf(t);
353 remove(edf, t);
354 }
355 if (pedf->scheduled == t)
356 pedf->scheduled = NULL;
357
358 TRACE_TASK(t, "RIP, now reschedule\n");
359
360 preempt(pedf);
361 raw_spin_unlock_irqrestore(&pedf->slock, flags);
362}
363
364#ifdef CONFIG_LITMUS_LOCKING
365
366#include <litmus/fdso.h>
367#include <litmus/srp.h>
368
369/* ******************** SRP support ************************ */
370
371static unsigned int psnedf_get_srp_prio(struct task_struct* t)
372{
373 return get_rt_relative_deadline(t);
374}
375
376/* ******************** FMLP support ********************** */
377
378/* struct for semaphore with priority inheritance */
379struct fmlp_semaphore {
380 struct litmus_lock litmus_lock;
381
382 /* current resource holder */
383 struct task_struct *owner;
384
385 /* FIFO queue of waiting tasks */
386 wait_queue_head_t wait;
387};
388
389static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
390{
391 return container_of(lock, struct fmlp_semaphore, litmus_lock);
392}
393int psnedf_fmlp_lock(struct litmus_lock* l)
394{
395 struct task_struct* t = current;
396 struct fmlp_semaphore *sem = fmlp_from_lock(l);
397 wait_queue_t wait;
398 unsigned long flags;
399
400 if (!is_realtime(t))
401 return -EPERM;
402
403 /* prevent nested lock acquisition --- not supported by FMLP */
404 if (tsk_rt(t)->num_locks_held ||
405 tsk_rt(t)->num_local_locks_held)
406 return -EBUSY;
407
408 spin_lock_irqsave(&sem->wait.lock, flags);
409
410 if (sem->owner) {
411 /* resource is not free => must suspend and wait */
412
413 init_waitqueue_entry(&wait, t);
414
415 /* FIXME: interruptible would be nice some day */
416 set_task_state(t, TASK_UNINTERRUPTIBLE);
417
418 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
419
420 TS_LOCK_SUSPEND;
421
422 /* release lock before sleeping */
423 spin_unlock_irqrestore(&sem->wait.lock, flags);
424
425 /* We depend on the FIFO order. Thus, we don't need to recheck
426 * when we wake up; we are guaranteed to have the lock since
427 * there is only one wake up per release.
428 */
429
430 schedule();
431
432 TS_LOCK_RESUME;
433
434 /* Since we hold the lock, no other task will change
435 * ->owner. We can thus check it without acquiring the spin
436 * lock. */
437 BUG_ON(sem->owner != t);
438 } else {
439 /* it's ours now */
440 sem->owner = t;
441
442 /* mark the task as priority-boosted. */
443 boost_priority(t);
444
445 spin_unlock_irqrestore(&sem->wait.lock, flags);
446 }
447
448 tsk_rt(t)->num_locks_held++;
449
450 return 0;
451}
452
453int psnedf_fmlp_unlock(struct litmus_lock* l)
454{
455 struct task_struct *t = current, *next;
456 struct fmlp_semaphore *sem = fmlp_from_lock(l);
457 unsigned long flags;
458 int err = 0;
459
460 spin_lock_irqsave(&sem->wait.lock, flags);
461
462 if (sem->owner != t) {
463 err = -EINVAL;
464 goto out;
465 }
466
467 tsk_rt(t)->num_locks_held--;
468
469 /* we lose the benefit of priority boosting */
470
471 unboost_priority(t);
472
473 /* check if there are jobs waiting for this resource */
474 next = __waitqueue_remove_first(&sem->wait);
475 if (next) {
476 /* boost next job */
477 boost_priority(next);
478
479 /* next becomes the resouce holder */
480 sem->owner = next;
481
482 /* wake up next */
483 wake_up_process(next);
484 } else
485 /* resource becomes available */
486 sem->owner = NULL;
487
488out:
489 spin_unlock_irqrestore(&sem->wait.lock, flags);
490 return err;
491}
492
493int psnedf_fmlp_close(struct litmus_lock* l)
494{
495 struct task_struct *t = current;
496 struct fmlp_semaphore *sem = fmlp_from_lock(l);
497 unsigned long flags;
498
499 int owner;
500
501 spin_lock_irqsave(&sem->wait.lock, flags);
502
503 owner = sem->owner == t;
504
505 spin_unlock_irqrestore(&sem->wait.lock, flags);
506
507 if (owner)
508 psnedf_fmlp_unlock(l);
509
510 return 0;
511}
512
513void psnedf_fmlp_free(struct litmus_lock* lock)
514{
515 kfree(fmlp_from_lock(lock));
516}
517
518static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
519 .close = psnedf_fmlp_close,
520 .lock = psnedf_fmlp_lock,
521 .unlock = psnedf_fmlp_unlock,
522 .deallocate = psnedf_fmlp_free,
523};
524
525static struct litmus_lock* psnedf_new_fmlp(void)
526{
527 struct fmlp_semaphore* sem;
528
529 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
530 if (!sem)
531 return NULL;
532
533 sem->owner = NULL;
534 init_waitqueue_head(&sem->wait);
535 sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
536
537 return &sem->litmus_lock;
538}
539
540/* **** lock constructor **** */
541
542
543static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
544 void* __user unused)
545{
546 int err = -ENXIO;
547 struct srp_semaphore* srp;
548
549 /* PSN-EDF currently supports the SRP for local resources and the FMLP
550 * for global resources. */
551 switch (type) {
552 case FMLP_SEM:
553 /* Flexible Multiprocessor Locking Protocol */
554 *lock = psnedf_new_fmlp();
555 if (*lock)
556 err = 0;
557 else
558 err = -ENOMEM;
559 break;
560
561 case SRP_SEM:
562 /* Baker's Stack Resource Policy */
563 srp = allocate_srp_semaphore();
564 if (srp) {
565 *lock = &srp->litmus_lock;
566 err = 0;
567 } else
568 err = -ENOMEM;
569 break;
570 };
571
572 return err;
573}
574
575#endif
576
577static struct domain_proc_info psnedf_domain_proc_info;
578static long psnedf_get_domain_proc_info(struct domain_proc_info **ret)
579{
580 *ret = &psnedf_domain_proc_info;
581 return 0;
582}
583
584static void psnedf_setup_domain_proc(void)
585{
586 int i, cpu;
587 int release_master =
588#ifdef CONFIG_RELEASE_MASTER
589 atomic_read(&release_master_cpu);
590#else
591 NO_CPU;
592#endif
593 int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
594 struct cd_mapping *cpu_map, *domain_map;
595
596 memset(&psnedf_domain_proc_info, 0, sizeof(psnedf_domain_proc_info));
597 init_domain_proc_info(&psnedf_domain_proc_info, num_rt_cpus, num_rt_cpus);
598 psnedf_domain_proc_info.num_cpus = num_rt_cpus;
599 psnedf_domain_proc_info.num_domains = num_rt_cpus;
600
601 for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
602 if (cpu == release_master)
603 continue;
604 cpu_map = &psnedf_domain_proc_info.cpu_to_domains[i];
605 domain_map = &psnedf_domain_proc_info.domain_to_cpus[i];
606
607 cpu_map->id = cpu;
608 domain_map->id = i; /* enumerate w/o counting the release master */
609 cpumask_set_cpu(i, cpu_map->mask);
610 cpumask_set_cpu(cpu, domain_map->mask);
611 ++i;
612 }
613}
614
615static long psnedf_activate_plugin(void)
616{
617#ifdef CONFIG_RELEASE_MASTER
618 int cpu;
619
620 for_each_online_cpu(cpu) {
621 remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
622 }
623#endif
624
625#ifdef CONFIG_LITMUS_LOCKING
626 get_srp_prio = psnedf_get_srp_prio;
627#endif
628
629 psnedf_setup_domain_proc();
630
631 return 0;
632}
633
634static long psnedf_deactivate_plugin(void)
635{
636 destroy_domain_proc_info(&psnedf_domain_proc_info);
637 return 0;
638}
639
640static long psnedf_admit_task(struct task_struct* tsk)
641{
642 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
643#ifdef CONFIG_RELEASE_MASTER
644 /* don't allow tasks on release master CPU */
645 && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
646#endif
647 )
648 return 0;
649 else
650 return -EINVAL;
651}
652
653/* Plugin object */
654static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
655 .plugin_name = "PSN-EDF",
656 .task_new = psnedf_task_new,
657 .complete_job = complete_job,
658 .task_exit = psnedf_task_exit,
659 .schedule = psnedf_schedule,
660 .task_wake_up = psnedf_task_wake_up,
661 .task_block = psnedf_task_block,
662 .admit_task = psnedf_admit_task,
663 .activate_plugin = psnedf_activate_plugin,
664 .deactivate_plugin = psnedf_deactivate_plugin,
665 .get_domain_proc_info = psnedf_get_domain_proc_info,
666#ifdef CONFIG_LITMUS_LOCKING
667 .allocate_lock = psnedf_allocate_lock,
668#endif
669};
670
671
672static int __init init_psn_edf(void)
673{
674 int i;
675
676 /* We do not really want to support cpu hotplug, do we? ;)
677 * However, if we are so crazy to do so,
678 * we cannot use num_online_cpu()
679 */
680 for (i = 0; i < num_online_cpus(); i++) {
681 psnedf_domain_init(remote_pedf(i),
682 psnedf_check_resched,
683 NULL, i);
684 }
685 return register_sched_plugin(&psn_edf_plugin);
686}
687
688module_init(init_psn_edf);
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
new file mode 100644
index 000000000000..a6088f16bb08
--- /dev/null
+++ b/litmus/sched_task_trace.c
@@ -0,0 +1,258 @@
1/*
2 * sched_task_trace.c -- record scheduling events to a byte stream
3 */
4
5#define NO_TASK_TRACE_DECLS
6
7#include <linux/module.h>
8#include <linux/sched.h>
9#include <linux/percpu.h>
10
11#include <litmus/ftdev.h>
12#include <litmus/litmus.h>
13
14#include <litmus/sched_trace.h>
15#include <litmus/feather_trace.h>
16#include <litmus/ftdev.h>
17
18#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
19
20#define now() litmus_clock()
21
22struct local_buffer {
23 struct st_event_record record[NO_EVENTS];
24 char flag[NO_EVENTS];
25 struct ft_buffer ftbuf;
26};
27
28DEFINE_PER_CPU(struct local_buffer, st_event_buffer);
29
30static struct ftdev st_dev;
31
32static int st_dev_can_open(struct ftdev *dev, unsigned int cpu)
33{
34 return cpu_online(cpu) ? 0 : -ENODEV;
35}
36
37static int __init init_sched_task_trace(void)
38{
39 struct local_buffer* buf;
40 int i, ok = 0, err;
41 printk("Allocated %u sched_trace_xxx() events per CPU "
42 "(buffer size: %d bytes)\n",
43 NO_EVENTS, (int) sizeof(struct local_buffer));
44
45 err = ftdev_init(&st_dev, THIS_MODULE,
46 num_online_cpus(), "sched_trace");
47 if (err)
48 goto err_out;
49
50 for (i = 0; i < st_dev.minor_cnt; i++) {
51 buf = &per_cpu(st_event_buffer, i);
52 ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
53 sizeof(struct st_event_record),
54 buf->flag,
55 buf->record);
56 st_dev.minor[i].buf = &buf->ftbuf;
57 }
58 if (ok == st_dev.minor_cnt) {
59 st_dev.can_open = st_dev_can_open;
60 err = register_ftdev(&st_dev);
61 if (err)
62 goto err_dealloc;
63 } else {
64 err = -EINVAL;
65 goto err_dealloc;
66 }
67
68 return 0;
69
70err_dealloc:
71 ftdev_exit(&st_dev);
72err_out:
73 printk(KERN_WARNING "Could not register sched_trace module\n");
74 return err;
75}
76
77static void __exit exit_sched_task_trace(void)
78{
79 ftdev_exit(&st_dev);
80}
81
82module_init(init_sched_task_trace);
83module_exit(exit_sched_task_trace);
84
85
86static inline struct st_event_record* get_record(u8 type, struct task_struct* t)
87{
88 struct st_event_record* rec = NULL;
89 struct local_buffer* buf;
90
91 buf = &get_cpu_var(st_event_buffer);
92 if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) {
93 rec->hdr.type = type;
94 rec->hdr.cpu = smp_processor_id();
95 rec->hdr.pid = t ? t->pid : 0;
96 rec->hdr.job = t ? t->rt_param.job_params.job_no : 0;
97 } else {
98 put_cpu_var(st_event_buffer);
99 }
100 /* rec will be NULL if it failed */
101 return rec;
102}
103
104static inline void put_record(struct st_event_record* rec)
105{
106 struct local_buffer* buf;
107 /* don't use get_cpu_var() here, get_record() did that already for us */
108 buf = this_cpu_ptr(&st_event_buffer);
109 ft_buffer_finish_write(&buf->ftbuf, rec);
110 /* matches the get_cpu_var() in get_record() */
111 put_cpu_var(st_event_buffer);
112}
113
114feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task)
115{
116 struct task_struct *t = (struct task_struct*) _task;
117 struct st_event_record* rec = get_record(ST_NAME, t);
118 int i;
119 if (rec) {
120 for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++)
121 rec->data.name.cmd[i] = t->comm[i];
122 put_record(rec);
123 }
124}
125
126feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task)
127{
128 struct task_struct *t = (struct task_struct*) _task;
129 struct st_event_record* rec = get_record(ST_PARAM, t);
130 if (rec) {
131 rec->data.param.wcet = get_exec_cost(t);
132 rec->data.param.period = get_rt_period(t);
133 rec->data.param.phase = get_rt_phase(t);
134 rec->data.param.partition = get_partition(t);
135 rec->data.param.class = get_class(t);
136 put_record(rec);
137 }
138}
139
140feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task)
141{
142 struct task_struct *t = (struct task_struct*) _task;
143 struct st_event_record* rec = get_record(ST_RELEASE, t);
144 if (rec) {
145 rec->data.release.release = get_release(t);
146 rec->data.release.deadline = get_deadline(t);
147 put_record(rec);
148 }
149}
150
151/* skipped: st_assigned_data, we don't use it atm */
152
153feather_callback void do_sched_trace_task_switch_to(unsigned long id,
154 unsigned long _task)
155{
156 struct task_struct *t = (struct task_struct*) _task;
157 struct st_event_record* rec;
158 if (is_realtime(t)) {
159 rec = get_record(ST_SWITCH_TO, t);
160 if (rec) {
161 rec->data.switch_to.when = now();
162 rec->data.switch_to.exec_time = get_exec_time(t);
163 put_record(rec);
164 }
165 }
166}
167
168feather_callback void do_sched_trace_task_switch_away(unsigned long id,
169 unsigned long _task)
170{
171 struct task_struct *t = (struct task_struct*) _task;
172 struct st_event_record* rec;
173 if (is_realtime(t)) {
174 rec = get_record(ST_SWITCH_AWAY, t);
175 if (rec) {
176 rec->data.switch_away.when = now();
177 rec->data.switch_away.exec_time = get_exec_time(t);
178 put_record(rec);
179 }
180 }
181}
182
183feather_callback void do_sched_trace_task_completion(unsigned long id,
184 unsigned long _task,
185 unsigned long forced)
186{
187 struct task_struct *t = (struct task_struct*) _task;
188 struct st_event_record* rec = get_record(ST_COMPLETION, t);
189 if (rec) {
190 rec->data.completion.when = now();
191 rec->data.completion.forced = forced;
192 rec->data.completion.exec_time = get_exec_time(t);
193 put_record(rec);
194 }
195}
196
197feather_callback void do_sched_trace_last_suspension_as_completion(
198 unsigned long id,
199 unsigned long _task)
200{
201 struct task_struct *t = (struct task_struct*) _task;
202 struct st_event_record* rec = get_record(ST_COMPLETION, t);
203 if (rec) {
204 rec->data.completion.when
205 = tsk_rt(t)->job_params.last_suspension;
206 rec->data.completion.forced = 0;
207 rec->data.completion.exec_time = get_exec_time(t);
208 put_record(rec);
209 }
210}
211
212feather_callback void do_sched_trace_task_block(unsigned long id,
213 unsigned long _task)
214{
215 struct task_struct *t = (struct task_struct*) _task;
216 struct st_event_record* rec = get_record(ST_BLOCK, t);
217 if (rec) {
218 rec->data.block.when = now();
219 put_record(rec);
220 }
221}
222
223feather_callback void do_sched_trace_task_resume(unsigned long id,
224 unsigned long _task)
225{
226 struct task_struct *t = (struct task_struct*) _task;
227 struct st_event_record* rec = get_record(ST_RESUME, t);
228 if (rec) {
229 rec->data.resume.when = now();
230 put_record(rec);
231 }
232}
233
234feather_callback void do_sched_trace_sys_release(unsigned long id,
235 unsigned long _start)
236{
237 lt_t *start = (lt_t*) _start;
238 struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL);
239 if (rec) {
240 rec->data.sys_release.when = now();
241 rec->data.sys_release.release = *start;
242 put_record(rec);
243 }
244}
245
246feather_callback void do_sched_trace_action(unsigned long id,
247 unsigned long _task,
248 unsigned long action)
249{
250 struct task_struct *t = (struct task_struct*) _task;
251 struct st_event_record* rec = get_record(ST_ACTION, t);
252
253 if (rec) {
254 rec->data.action.when = now();
255 rec->data.action.action = action;
256 put_record(rec);
257 }
258}
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
new file mode 100644
index 000000000000..e8648f308ccd
--- /dev/null
+++ b/litmus/sched_trace.c
@@ -0,0 +1,251 @@
1/*
2 * sched_trace.c -- record scheduling events to a byte stream.
3 */
4#include <linux/spinlock.h>
5#include <linux/mutex.h>
6
7#include <linux/fs.h>
8#include <linux/slab.h>
9#include <linux/miscdevice.h>
10#include <asm/uaccess.h>
11#include <linux/module.h>
12#include <linux/sysrq.h>
13#include <linux/sched.h>
14#include <linux/kfifo.h>
15
16atomic_t __log_seq_no = ATOMIC_INIT(0);
17
18#define SCHED_TRACE_NAME "litmus/log"
19
20/* Compute size of TRACE() buffer */
21#define LITMUS_TRACE_BUF_SIZE (1 << CONFIG_SCHED_DEBUG_TRACE_SHIFT)
22
23/* Max length of one read from the buffer */
24#define MAX_READ_LEN (64 * 1024)
25
26/* Max length for one write --- by TRACE() --- to the buffer. This is used to
27 * allocate a per-cpu buffer for printf() formatting. */
28#define MSG_SIZE 255
29
30
31static DEFINE_MUTEX(reader_mutex);
32static atomic_t reader_cnt = ATOMIC_INIT(0);
33static DEFINE_KFIFO(debug_buffer, char, LITMUS_TRACE_BUF_SIZE);
34
35
36static DEFINE_RAW_SPINLOCK(log_buffer_lock);
37static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
38
39/*
40 * sched_trace_log_message - Write to the trace buffer (log_buffer)
41 *
42 * This is the only function accessing the log_buffer from inside the
43 * kernel for writing.
44 * Concurrent access to sched_trace_log_message must be serialized using
45 * log_buffer_lock
46 * The maximum length of a formatted message is 255
47 */
48void sched_trace_log_message(const char* fmt, ...)
49{
50 unsigned long flags;
51 va_list args;
52 size_t len;
53 char* buf;
54
55 if (!atomic_read(&reader_cnt))
56 /* early exit if nobody is listening */
57 return;
58
59 va_start(args, fmt);
60 local_irq_save(flags);
61
62 /* format message */
63 buf = this_cpu_ptr(fmt_buffer);
64 len = vscnprintf(buf, MSG_SIZE, fmt, args);
65
66 raw_spin_lock(&log_buffer_lock);
67 /* Don't copy the trailing null byte, we don't want null bytes in a
68 * text file.
69 */
70 kfifo_in(&debug_buffer, buf, len);
71 raw_spin_unlock(&log_buffer_lock);
72
73 local_irq_restore(flags);
74 va_end(args);
75}
76
77
78/*
79 * log_read - Read the trace buffer
80 *
81 * This function is called as a file operation from userspace.
82 * Readers can sleep. Access is serialized through reader_mutex
83 */
84static ssize_t log_read(struct file *filp,
85 char __user *to, size_t len,
86 loff_t *f_pos)
87{
88 /* we ignore f_pos, this is strictly sequential */
89
90 ssize_t error = -EINVAL;
91 char* mem;
92
93 if (mutex_lock_interruptible(&reader_mutex)) {
94 error = -ERESTARTSYS;
95 goto out;
96 }
97
98 if (len > MAX_READ_LEN)
99 len = MAX_READ_LEN;
100
101 mem = kmalloc(len, GFP_KERNEL);
102 if (!mem) {
103 error = -ENOMEM;
104 goto out_unlock;
105 }
106
107 error = kfifo_out(&debug_buffer, mem, len);
108 while (!error) {
109 set_current_state(TASK_INTERRUPTIBLE);
110 schedule_timeout(110);
111 if (signal_pending(current))
112 error = -ERESTARTSYS;
113 else
114 error = kfifo_out(&debug_buffer, mem, len);
115 }
116
117 if (error > 0 && copy_to_user(to, mem, error))
118 error = -EFAULT;
119
120 kfree(mem);
121 out_unlock:
122 mutex_unlock(&reader_mutex);
123 out:
124 return error;
125}
126
127/*
128 * Enable redirection of printk() messages to the trace buffer.
129 * Defined in kernel/printk.c
130 */
131extern int trace_override;
132extern int trace_recurse;
133
134/*
135 * log_open - open the global log message ring buffer.
136 */
137static int log_open(struct inode *in, struct file *filp)
138{
139 int error = -EINVAL;
140
141 if (mutex_lock_interruptible(&reader_mutex)) {
142 error = -ERESTARTSYS;
143 goto out;
144 }
145
146 atomic_inc(&reader_cnt);
147 error = 0;
148
149 printk(KERN_DEBUG
150 "sched_trace kfifo with buffer starting at: 0x%p\n",
151 debug_buffer.buf);
152
153 /* override printk() */
154 trace_override++;
155
156 mutex_unlock(&reader_mutex);
157 out:
158 return error;
159}
160
161static int log_release(struct inode *in, struct file *filp)
162{
163 int error = -EINVAL;
164
165 if (mutex_lock_interruptible(&reader_mutex)) {
166 error = -ERESTARTSYS;
167 goto out;
168 }
169
170 atomic_dec(&reader_cnt);
171
172 /* release printk() overriding */
173 trace_override--;
174
175 printk(KERN_DEBUG "sched_trace kfifo released\n");
176
177 mutex_unlock(&reader_mutex);
178 out:
179 return error;
180}
181
182/*
183 * log_fops - The file operations for accessing the global LITMUS log message
184 * buffer.
185 *
186 * Except for opening the device file it uses the same operations as trace_fops.
187 */
188static struct file_operations log_fops = {
189 .owner = THIS_MODULE,
190 .open = log_open,
191 .release = log_release,
192 .read = log_read,
193};
194
195static struct miscdevice litmus_log_dev = {
196 .name = SCHED_TRACE_NAME,
197 .minor = MISC_DYNAMIC_MINOR,
198 .fops = &log_fops,
199};
200
201#ifdef CONFIG_MAGIC_SYSRQ
202void dump_trace_buffer(int max)
203{
204 char line[80];
205 int len;
206 int count = 0;
207
208 /* potential, but very unlikely, race... */
209 trace_recurse = 1;
210 while ((max == 0 || count++ < max) &&
211 (len = kfifo_out(&debug_buffer, line, sizeof(line - 1))) > 0) {
212 line[len] = '\0';
213 printk("%s", line);
214 }
215 trace_recurse = 0;
216}
217
218static void sysrq_dump_trace_buffer(int key)
219{
220 dump_trace_buffer(100);
221}
222
223static struct sysrq_key_op sysrq_dump_trace_buffer_op = {
224 .handler = sysrq_dump_trace_buffer,
225 .help_msg = "dump-trace-buffer(Y)",
226 .action_msg = "writing content of TRACE() buffer",
227};
228#endif
229
230static int __init init_sched_trace(void)
231{
232 printk("Initializing TRACE() device\n");
233
234#ifdef CONFIG_MAGIC_SYSRQ
235 /* offer some debugging help */
236 if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op))
237 printk("Registered dump-trace-buffer(Y) magic sysrq.\n");
238 else
239 printk("Could not register dump-trace-buffer(Y) magic sysrq.\n");
240#endif
241
242 return misc_register(&litmus_log_dev);
243}
244
245static void __exit exit_sched_trace(void)
246{
247 misc_deregister(&litmus_log_dev);
248}
249
250module_init(init_sched_trace);
251module_exit(exit_sched_trace);
diff --git a/litmus/srp.c b/litmus/srp.c
new file mode 100644
index 000000000000..7e3c057c0752
--- /dev/null
+++ b/litmus/srp.c
@@ -0,0 +1,310 @@
1/* ************************************************************************** */
2/* STACK RESOURCE POLICY */
3/* ************************************************************************** */
4
5#include <linux/module.h>
6#include <asm/atomic.h>
7#include <linux/sched.h>
8#include <linux/wait.h>
9
10#include <litmus/litmus.h>
11#include <litmus/sched_plugin.h>
12#include <litmus/debug_trace.h>
13#include <litmus/fdso.h>
14#include <litmus/trace.h>
15
16
17#ifdef CONFIG_LITMUS_LOCKING
18
19#include <litmus/srp.h>
20
21srp_prioritization_t get_srp_prio;
22
23struct srp {
24 struct list_head ceiling;
25 wait_queue_head_t ceiling_blocked;
26};
27#define system_ceiling(srp) list2prio(srp->ceiling.next)
28#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
29
30#define UNDEF_SEM -2
31
32DEFINE_PER_CPU(struct srp, srp);
33
34DEFINE_PER_CPU(int, srp_objects_in_use);
35
36/* Initialize SRP semaphores at boot time. */
37static int __init srp_init(void)
38{
39 int i;
40
41 printk("Initializing SRP per-CPU ceilings...");
42 for (i = 0; i < NR_CPUS; i++) {
43 init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
44 INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
45 per_cpu(srp_objects_in_use, i) = 0;
46 }
47 printk(" done!\n");
48
49 return 0;
50}
51module_init(srp_init);
52
53/* SRP task priority comparison function. Smaller numeric values have higher
54 * priority, tie-break is PID. Special case: priority == 0 <=> no priority
55 */
56static int srp_higher_prio(struct srp_priority* first,
57 struct srp_priority* second)
58{
59 if (!first->priority)
60 return 0;
61 else
62 return !second->priority ||
63 first->priority < second->priority || (
64 first->priority == second->priority &&
65 first->pid < second->pid);
66}
67
68
69static int srp_exceeds_ceiling(struct task_struct* first,
70 struct srp* srp)
71{
72 struct srp_priority prio;
73
74 if (list_empty(&srp->ceiling))
75 return 1;
76 else {
77 prio.pid = first->pid;
78 prio.priority = get_srp_prio(first);
79 return srp_higher_prio(&prio, system_ceiling(srp)) ||
80 ceiling2sem(system_ceiling(srp))->owner == first;
81 }
82}
83
84static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
85{
86 struct list_head *pos;
87 if (in_list(&prio->list)) {
88 printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
89 "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
90 return;
91 }
92 list_for_each(pos, &srp->ceiling)
93 if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
94 __list_add(&prio->list, pos->prev, pos);
95 return;
96 }
97
98 list_add_tail(&prio->list, &srp->ceiling);
99}
100
101
102static int lock_srp_semaphore(struct litmus_lock* l)
103{
104 struct task_struct* t = current;
105 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
106
107 if (!is_realtime(t))
108 return -EPERM;
109
110 /* prevent acquisition of local locks in global critical sections */
111 if (tsk_rt(t)->num_locks_held)
112 return -EBUSY;
113
114 preempt_disable();
115
116 /* Update ceiling. */
117 srp_add_prio(this_cpu_ptr(&srp), &sem->ceiling);
118
119 /* SRP invariant: all resources available */
120 BUG_ON(sem->owner != NULL);
121
122 sem->owner = t;
123 TRACE_CUR("acquired srp 0x%p\n", sem);
124
125 tsk_rt(t)->num_local_locks_held++;
126
127 preempt_enable();
128
129 return 0;
130}
131
132static int unlock_srp_semaphore(struct litmus_lock* l)
133{
134 struct task_struct* t = current;
135 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
136 int err = 0;
137
138 preempt_disable();
139
140 if (sem->owner != t) {
141 err = -EINVAL;
142 } else {
143 /* The current owner should be executing on the correct CPU.
144 *
145 * If the owner transitioned out of RT mode or is exiting, then
146 * we it might have already been migrated away by the best-effort
147 * scheduler and we just have to deal with it. */
148 if (unlikely(!is_realtime(t) && sem->cpu != smp_processor_id())) {
149 TRACE_TASK(t, "SRP unlock cpu=%d, sem->cpu=%d\n",
150 smp_processor_id(), sem->cpu);
151 preempt_enable();
152 err = litmus_be_migrate_to(sem->cpu);
153 preempt_disable();
154 TRACE_TASK(t, "post-migrate: cpu=%d, sem->cpu=%d err=%d\n",
155 smp_processor_id(), sem->cpu, err);
156 }
157 BUG_ON(sem->cpu != smp_processor_id());
158 err = 0;
159
160 /* Determine new system priority ceiling for this CPU. */
161 BUG_ON(!in_list(&sem->ceiling.list));
162
163 list_del(&sem->ceiling.list);
164 sem->owner = NULL;
165
166 /* Wake tasks on this CPU, if they exceed current ceiling. */
167 TRACE_CUR("released srp 0x%p\n", sem);
168 wake_up_all(&this_cpu_ptr(&srp)->ceiling_blocked);
169
170 tsk_rt(t)->num_local_locks_held--;
171 }
172
173 preempt_enable();
174 return err;
175}
176
177static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
178{
179 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
180 int err = 0;
181 struct task_struct* t = current;
182 struct srp_priority t_prio;
183
184 if (!is_realtime(t))
185 return -EPERM;
186
187 TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
188
189 preempt_disable();
190
191 if (sem->owner != NULL)
192 err = -EBUSY;
193
194 if (err == 0) {
195 if (sem->cpu == UNDEF_SEM)
196 sem->cpu = get_partition(t);
197 else if (sem->cpu != get_partition(t))
198 err = -EPERM;
199 }
200
201 if (err == 0) {
202 t_prio.priority = get_srp_prio(t);
203 t_prio.pid = t->pid;
204 if (srp_higher_prio(&t_prio, &sem->ceiling)) {
205 sem->ceiling.priority = t_prio.priority;
206 sem->ceiling.pid = t_prio.pid;
207 }
208 }
209
210 preempt_enable();
211
212 return err;
213}
214
215static int close_srp_semaphore(struct litmus_lock* l)
216{
217 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
218 int err = 0;
219
220 preempt_disable();
221
222 if (sem->owner == current)
223 unlock_srp_semaphore(l);
224
225 preempt_enable();
226
227 return err;
228}
229
230static void deallocate_srp_semaphore(struct litmus_lock* l)
231{
232 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
233 raw_cpu_dec(srp_objects_in_use);
234 kfree(sem);
235}
236
237static struct litmus_lock_ops srp_lock_ops = {
238 .open = open_srp_semaphore,
239 .close = close_srp_semaphore,
240 .lock = lock_srp_semaphore,
241 .unlock = unlock_srp_semaphore,
242 .deallocate = deallocate_srp_semaphore,
243};
244
245struct srp_semaphore* allocate_srp_semaphore(void)
246{
247 struct srp_semaphore* sem;
248
249 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
250 if (!sem)
251 return NULL;
252
253 INIT_LIST_HEAD(&sem->ceiling.list);
254 sem->ceiling.priority = 0;
255 sem->cpu = UNDEF_SEM;
256 sem->owner = NULL;
257
258 sem->litmus_lock.ops = &srp_lock_ops;
259
260 raw_cpu_inc(srp_objects_in_use);
261 return sem;
262}
263
264static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
265 void *key)
266{
267 int cpu = smp_processor_id();
268 struct task_struct *tsk = wait->private;
269 if (cpu != get_partition(tsk))
270 TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
271 get_partition(tsk));
272 else if (srp_exceeds_ceiling(tsk, this_cpu_ptr(&srp)))
273 return default_wake_function(wait, mode, sync, key);
274 return 0;
275}
276
277static void do_ceiling_block(struct task_struct *tsk)
278{
279 wait_queue_t wait = {
280 .private = tsk,
281 .func = srp_wake_up,
282 .task_list = {NULL, NULL}
283 };
284
285 tsk->state = TASK_UNINTERRUPTIBLE;
286 add_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait);
287 tsk->rt_param.srp_non_recurse = 1;
288 preempt_enable_no_resched();
289 schedule();
290 preempt_disable();
291 tsk->rt_param.srp_non_recurse = 0;
292 remove_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait);
293}
294
295/* Wait for current task priority to exceed system-wide priority ceiling.
296 */
297void __srp_ceiling_block(struct task_struct *cur)
298{
299 preempt_disable();
300 if (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp))) {
301 TRACE_CUR("is priority ceiling blocked.\n");
302 while (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp)))
303 do_ceiling_block(cur);
304 TRACE_CUR("finally exceeds system ceiling.\n");
305 } else
306 TRACE_CUR("is not priority ceiling blocked\n");
307 preempt_enable();
308}
309
310#endif
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 000000000000..123cefd68a36
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,153 @@
1/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
2 *
3 *
4 */
5
6#include <asm/atomic.h>
7#include <asm/uaccess.h>
8#include <linux/spinlock.h>
9#include <linux/list.h>
10#include <linux/sched.h>
11#include <linux/completion.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/jobs.h>
16
17#include <litmus/sched_trace.h>
18#include <litmus/debug_trace.h>
19
20struct ts_release_wait {
21 struct list_head list;
22 struct completion completion;
23 lt_t ts_release_time;
24};
25
26#define DECLARE_TS_RELEASE_WAIT(symb) \
27 struct ts_release_wait symb = \
28 { \
29 LIST_HEAD_INIT(symb.list), \
30 COMPLETION_INITIALIZER_ONSTACK(symb.completion), \
31 0 \
32 }
33
34static LIST_HEAD(task_release_list);
35static DEFINE_MUTEX(task_release_lock);
36
37static long do_wait_for_ts_release(void)
38{
39 DECLARE_TS_RELEASE_WAIT(wait);
40
41 long ret = -ERESTARTSYS;
42
43 if (mutex_lock_interruptible(&task_release_lock))
44 goto out;
45
46 list_add(&wait.list, &task_release_list);
47
48 mutex_unlock(&task_release_lock);
49
50 /* We are enqueued, now we wait for someone to wake us up. */
51 ret = wait_for_completion_interruptible(&wait.completion);
52
53 if (!ret) {
54 /* Completion succeeded, setup release time. */
55 ret = litmus->wait_for_release_at(
56 wait.ts_release_time + get_rt_phase(current));
57 } else {
58 /* We were interrupted, must cleanup list. */
59 mutex_lock(&task_release_lock);
60 if (!wait.completion.done)
61 list_del(&wait.list);
62 mutex_unlock(&task_release_lock);
63 }
64
65out:
66 return ret;
67}
68
69int count_tasks_waiting_for_release(void)
70{
71 int task_count = 0;
72 struct list_head *pos;
73
74 mutex_lock(&task_release_lock);
75
76 list_for_each(pos, &task_release_list) {
77 task_count++;
78 }
79
80 mutex_unlock(&task_release_lock);
81
82
83 return task_count;
84}
85
86static long do_release_ts(lt_t start)
87{
88 long task_count = 0;
89
90 struct list_head *pos, *safe;
91 struct ts_release_wait *wait;
92
93 if (mutex_lock_interruptible(&task_release_lock)) {
94 task_count = -ERESTARTSYS;
95 goto out;
96 }
97
98 TRACE("<<<<<< synchronous task system release >>>>>>\n");
99 sched_trace_sys_release(&start);
100 litmus->synchronous_release_at(start);
101
102 task_count = 0;
103 list_for_each_safe(pos, safe, &task_release_list) {
104 wait = (struct ts_release_wait*)
105 list_entry(pos, struct ts_release_wait, list);
106
107 task_count++;
108 wait->ts_release_time = start;
109 complete(&wait->completion);
110 }
111
112 /* clear stale list */
113 INIT_LIST_HEAD(&task_release_list);
114
115 mutex_unlock(&task_release_lock);
116
117out:
118 return task_count;
119}
120
121
122asmlinkage long sys_wait_for_ts_release(void)
123{
124 long ret = -EPERM;
125 struct task_struct *t = current;
126
127 if (is_realtime(t))
128 ret = do_wait_for_ts_release();
129
130 return ret;
131}
132
133#define ONE_MS 1000000ULL
134#define ONE_SECOND (ONE_MS * 1000)
135
136asmlinkage long sys_release_ts(lt_t __user *__when)
137{
138 long ret;
139 lt_t start_time;
140 lt_t now;
141
142 /* FIXME: check capabilities... */
143
144 ret = copy_from_user(&start_time, __when, sizeof(start_time));
145 if (ret == 0) {
146 now = litmus_clock();
147 if (lt_before(start_time, now))
148 start_time = now + ONE_SECOND;
149 ret = do_release_ts(start_time);
150 }
151
152 return ret;
153}
diff --git a/litmus/trace.c b/litmus/trace.c
new file mode 100644
index 000000000000..eeb54a26104b
--- /dev/null
+++ b/litmus/trace.c
@@ -0,0 +1,575 @@
1#include <linux/sched.h>
2#include <linux/module.h>
3#include <linux/uaccess.h>
4
5#include <litmus/ftdev.h>
6#include <litmus/litmus.h>
7#include <litmus/trace.h>
8
9/******************************************************************************/
10/* Allocation */
11/******************************************************************************/
12
13static struct ftdev cpu_overhead_dev;
14static struct ftdev msg_overhead_dev;
15
16#define cpu_trace_ts_buf(cpu) cpu_overhead_dev.minor[(cpu)].buf
17#define msg_trace_ts_buf(cpu) msg_overhead_dev.minor[(cpu)].buf
18
19DEFINE_PER_CPU(unsigned int, local_irq_count;)
20DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, msg_irq_count);
21
22static DEFINE_PER_CPU(unsigned int, cpu_ts_seq_no);
23static DEFINE_PER_CPU(atomic_t, msg_ts_seq_no);
24
25static int64_t cycle_offset[NR_CPUS][NR_CPUS];
26
27void ft_irq_fired(void)
28{
29 /* Only called with preemptions disabled. */
30 /* local counter => not atomic, trace points disable interrupts */
31 this_cpu_inc(local_irq_count);
32 /* counter for messages => read remotely */
33 atomic_inc(this_cpu_ptr(&msg_irq_count));
34
35 if (has_control_page(current))
36 get_control_page(current)->irq_count++;
37}
38
39static inline unsigned int snapshot_local_irqs(void)
40{
41 return this_cpu_xchg(local_irq_count, 0);
42}
43
44static inline unsigned int snapshot_msg_irq_for(int cpu)
45{
46 return atomic_xchg(&per_cpu(msg_irq_count, cpu), 0);
47}
48
49static inline unsigned int snapshot_msg_irq_locally(void)
50{
51 return atomic_xchg(raw_cpu_ptr(&msg_irq_count), 0);
52}
53
54static inline void save_irq_flags(struct timestamp *ts, unsigned int irq_count)
55{
56 /* Store how many interrupts occurred. */
57 ts->irq_count = irq_count;
58 /* Extra flag because ts->irq_count overflows quickly. */
59 ts->irq_flag = irq_count > 0;
60}
61
62#define NO_IRQ_COUNT 0
63#define LOCAL_IRQ_COUNT 1
64#define REMOTE_IRQ_COUNT 2
65
66#define DO_NOT_RECORD_TIMESTAMP 0
67#define RECORD_LOCAL_TIMESTAMP 1
68#define RECORD_OFFSET_TIMESTAMP 2
69
70static inline void __write_record(
71 uint8_t event,
72 uint8_t type,
73 uint16_t pid_fragment,
74 unsigned int irq_count,
75 int record_irq,
76 int hide_irq,
77 uint64_t timestamp,
78 int record_timestamp,
79
80 int only_single_writer,
81 int is_cpu_timestamp,
82 int local_cpu,
83 uint8_t other_cpu)
84{
85 unsigned long flags;
86 unsigned int seq_no;
87 struct timestamp *ts;
88 int cpu;
89 struct ft_buffer* buf;
90
91 /* Avoid preemptions while recording the timestamp. This reduces the
92 * number of "out of order" timestamps in the stream and makes
93 * post-processing easier. */
94
95 local_irq_save(flags);
96
97 if (local_cpu)
98 cpu = smp_processor_id();
99 else
100 cpu = other_cpu;
101
102 /* resolved during function inlining */
103 if (is_cpu_timestamp) {
104 seq_no = __this_cpu_inc_return(cpu_ts_seq_no);
105 buf = cpu_trace_ts_buf(cpu);
106 } else {
107 seq_no = atomic_fetch_inc(&per_cpu(msg_ts_seq_no, cpu));
108 buf = msg_trace_ts_buf(cpu);
109 }
110
111 /* If buf is non-NULL here, then the buffer cannot be deallocated until
112 * we turn interrupts on again. This is because free_timestamp_buffer()
113 * indirectly causes TLB invalidations due to modifications of the
114 * kernel address space, namely via vfree() in free_ft_buffer(), which
115 * cannot be processed until we turn on interrupts again.
116 */
117
118 if (buf &&
119 (only_single_writer /* resolved during function inlining */
120 ? ft_buffer_start_single_write(buf, (void**) &ts)
121 : ft_buffer_start_write(buf, (void**) &ts))) {
122 ts->event = event;
123 ts->seq_no = seq_no;
124
125 ts->task_type = type;
126 ts->pid = pid_fragment;
127
128 ts->cpu = cpu;
129
130 switch (record_irq) {
131 case LOCAL_IRQ_COUNT:
132 if (is_cpu_timestamp)
133 irq_count = snapshot_local_irqs();
134 else
135 irq_count = snapshot_msg_irq_locally();
136 break;
137 case REMOTE_IRQ_COUNT:
138 irq_count = snapshot_msg_irq_for(other_cpu);
139 break;
140 case NO_IRQ_COUNT:
141 /* fall through */
142 default:
143 /* do nothing */
144 break;
145 }
146
147 save_irq_flags(ts, irq_count - hide_irq);
148
149 if (record_timestamp)
150 timestamp = ft_timestamp();
151 if (record_timestamp == RECORD_OFFSET_TIMESTAMP)
152 timestamp += cycle_offset[smp_processor_id()][cpu];
153
154 ts->timestamp = timestamp;
155 ft_buffer_finish_write(buf, ts);
156 }
157
158 local_irq_restore(flags);
159}
160
161
162static inline void write_cpu_timestamp(
163 uint8_t event,
164 uint8_t type,
165 uint16_t pid_fragment,
166 unsigned int irq_count,
167 int record_irq,
168 int hide_irq,
169 uint64_t timestamp,
170 int record_timestamp)
171{
172 __write_record(event, type,
173 pid_fragment,
174 irq_count, record_irq, hide_irq,
175 timestamp, record_timestamp,
176 1 /* only_single_writer */,
177 1 /* is_cpu_timestamp */,
178 1 /* local_cpu */,
179 0xff /* other_cpu */);
180}
181
182static inline void save_msg_timestamp(
183 uint8_t event,
184 int hide_irq)
185{
186 struct task_struct *t = current;
187 __write_record(event, is_realtime(t) ? TSK_RT : TSK_BE,
188 t->pid,
189 0, LOCAL_IRQ_COUNT, hide_irq,
190 0, RECORD_LOCAL_TIMESTAMP,
191 0 /* only_single_writer */,
192 0 /* is_cpu_timestamp */,
193 1 /* local_cpu */,
194 0xff /* other_cpu */);
195}
196
197static inline void save_remote_msg_timestamp(
198 uint8_t event,
199 uint8_t remote_cpu)
200{
201 struct task_struct *t = current;
202 __write_record(event, is_realtime(t) ? TSK_RT : TSK_BE,
203 t->pid,
204 0, REMOTE_IRQ_COUNT, 0,
205 0, RECORD_OFFSET_TIMESTAMP,
206 0 /* only_single_writer */,
207 0 /* is_cpu_timestamp */,
208 0 /* local_cpu */,
209 remote_cpu);
210}
211
212feather_callback void save_cpu_timestamp_def(unsigned long event,
213 unsigned long type)
214{
215 write_cpu_timestamp(event, type,
216 current->pid,
217 0, LOCAL_IRQ_COUNT, 0,
218 0, RECORD_LOCAL_TIMESTAMP);
219}
220
221feather_callback void save_cpu_timestamp_task(unsigned long event,
222 unsigned long t_ptr)
223{
224 struct task_struct *t = (struct task_struct *) t_ptr;
225 int rt = is_realtime(t);
226
227 write_cpu_timestamp(event, rt ? TSK_RT : TSK_BE,
228 t->pid,
229 0, LOCAL_IRQ_COUNT, 0,
230 0, RECORD_LOCAL_TIMESTAMP);
231}
232
233/* fake timestamp to user-reported time */
234feather_callback void save_cpu_timestamp_time(unsigned long event,
235 unsigned long ptr)
236{
237 uint64_t* time = (uint64_t*) ptr;
238
239 write_cpu_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE,
240 current->pid,
241 0, LOCAL_IRQ_COUNT, 0,
242 *time, DO_NOT_RECORD_TIMESTAMP);
243}
244
245/* Record user-reported IRQ count */
246feather_callback void save_cpu_timestamp_irq(unsigned long event,
247 unsigned long irq_counter_ptr)
248{
249 uint64_t* irqs = (uint64_t*) irq_counter_ptr;
250
251 write_cpu_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE,
252 current->pid,
253 *irqs, NO_IRQ_COUNT, 0,
254 0, RECORD_LOCAL_TIMESTAMP);
255}
256
257feather_callback void save_cpu_task_latency(unsigned long event,
258 unsigned long when_ptr)
259{
260 lt_t now = litmus_clock();
261 lt_t *when = (lt_t*) when_ptr;
262 lt_t delta = now - *when;
263
264 write_cpu_timestamp(event, TSK_RT,
265 0,
266 0, LOCAL_IRQ_COUNT, 0,
267 delta, DO_NOT_RECORD_TIMESTAMP);
268}
269
270/* Record to remote trace buffer */
271feather_callback void msg_sent_to(unsigned long event, unsigned long to)
272{
273 save_remote_msg_timestamp(event, to);
274}
275
276/* Record to local trace buffer */
277feather_callback void msg_sent_local(unsigned long event)
278{
279 save_msg_timestamp(event, 0);
280}
281
282/* Suppresses one IRQ from the irq count. Used by TS_SEND_RESCHED_END, which is
283 * called from within an interrupt that is expected. */
284feather_callback void msg_received_local(unsigned long event)
285{
286 save_msg_timestamp(event, 1);
287}
288
289/* Record to remote trace buffer */
290feather_callback void msg_received_from(unsigned long event, unsigned long from)
291{
292 save_remote_msg_timestamp(event, from);
293}
294
295static void __add_timestamp_user(struct timestamp *pre_recorded)
296{
297 unsigned long flags;
298 unsigned int seq_no;
299 struct timestamp *ts;
300 struct ft_buffer* buf;
301 int cpu;
302
303 local_irq_save(flags);
304
305 cpu = smp_processor_id();
306 buf = cpu_trace_ts_buf(cpu);
307
308 seq_no = __this_cpu_inc_return(cpu_ts_seq_no);
309 if (buf && ft_buffer_start_single_write(buf, (void**) &ts)) {
310 *ts = *pre_recorded;
311 ts->seq_no = seq_no;
312 ts->cpu = raw_smp_processor_id();
313 save_irq_flags(ts, snapshot_local_irqs());
314 ft_buffer_finish_write(buf, ts);
315 }
316
317 local_irq_restore(flags);
318}
319
320/******************************************************************************/
321/* DEVICE FILE DRIVER */
322/******************************************************************************/
323
324struct calibrate_info {
325 atomic_t ready;
326
327 uint64_t cycle_count;
328};
329
330static void calibrate_helper(void *_info)
331{
332 struct calibrate_info *info = _info;
333 /* check in with master */
334 atomic_inc(&info->ready);
335
336 /* wait for master to signal start */
337 while (atomic_read(&info->ready))
338 cpu_relax();
339
340 /* report time stamp */
341 info->cycle_count = ft_timestamp();
342
343 /* tell master that we are done */
344 atomic_inc(&info->ready);
345}
346
347
348static int64_t calibrate_cpu(int cpu)
349{
350 uint64_t cycles;
351 struct calibrate_info info;
352 unsigned long flags;
353 int64_t delta;
354
355 atomic_set(&info.ready, 0);
356 info.cycle_count = 0;
357 smp_wmb();
358
359 smp_call_function_single(cpu, calibrate_helper, &info, 0);
360
361 /* wait for helper to become active */
362 while (!atomic_read(&info.ready))
363 cpu_relax();
364
365 /* avoid interrupt interference */
366 local_irq_save(flags);
367
368 /* take measurement */
369 atomic_set(&info.ready, 0);
370 smp_wmb();
371 cycles = ft_timestamp();
372
373 /* wait for helper reading */
374 while (!atomic_read(&info.ready))
375 cpu_relax();
376
377 /* positive offset: the other guy is ahead of us */
378 delta = (int64_t) info.cycle_count;
379 delta -= (int64_t) cycles;
380
381 local_irq_restore(flags);
382
383 return delta;
384}
385
386#define NUM_SAMPLES 10
387
388static long calibrate_tsc_offsets(struct ftdev* ftdev, unsigned int idx,
389 unsigned long uarg)
390{
391 int cpu, self, i;
392 int64_t delta, sample;
393
394 preempt_disable();
395 self = smp_processor_id();
396
397 if (uarg)
398 printk(KERN_INFO "Feather-Trace: determining TSC offsets for P%d\n", self);
399
400 for_each_online_cpu(cpu)
401 if (cpu != self) {
402 delta = calibrate_cpu(cpu);
403 for (i = 1; i < NUM_SAMPLES; i++) {
404 sample = calibrate_cpu(cpu);
405 delta = sample < delta ? sample : delta;
406 }
407
408 cycle_offset[self][cpu] = delta;
409
410 if (uarg)
411 printk(KERN_INFO "Feather-Trace: TSC offset for P%d->P%d is %lld cycles.\n",
412 self, cpu, cycle_offset[self][cpu]);
413 }
414
415 preempt_enable();
416 return 0;
417}
418
419#define NO_TIMESTAMPS (2 << CONFIG_SCHED_OVERHEAD_TRACE_SHIFT)
420
421static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
422{
423 unsigned int count = NO_TIMESTAMPS;
424
425 /* An overhead-tracing timestamp should be exactly 16 bytes long. */
426 BUILD_BUG_ON(sizeof(struct timestamp) != 16);
427
428 while (count && !ftdev->minor[idx].buf) {
429 printk("time stamp buffer: trying to allocate %u time stamps for minor=%u.\n", count, idx);
430 ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
431 count /= 2;
432 }
433 return ftdev->minor[idx].buf ? 0 : -ENOMEM;
434}
435
436static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
437{
438 struct ft_buffer* tmp = ftdev->minor[idx].buf;
439 smp_rmb();
440 ftdev->minor[idx].buf = NULL;
441 /* Make sure all cores have actually seen buf == NULL before
442 * yanking out the mappings from underneath them. */
443 smp_wmb();
444 free_ft_buffer(tmp);
445}
446
447static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len,
448 const char __user *from)
449{
450 ssize_t consumed = 0;
451 struct timestamp ts;
452
453 /* don't give us partial timestamps */
454 if (len % sizeof(ts))
455 return -EINVAL;
456
457 while (len >= sizeof(ts)) {
458 if (copy_from_user(&ts, from, sizeof(ts))) {
459 consumed = -EFAULT;
460 goto out;
461 }
462 len -= sizeof(ts);
463 from += sizeof(ts);
464 consumed += sizeof(ts);
465
466 /* Note: this always adds to the buffer of the CPU-local
467 * device, not necessarily to the device that the system call
468 * was invoked on. This is admittedly a bit ugly, but requiring
469 * tasks to only write to the appropriate device would make
470 * tracing from userspace under global and clustered scheduling
471 * exceedingly difficult. Writing to remote buffers would
472 * require to not use ft_buffer_start_single_write(), which we
473 * want to do to reduce the number of atomic ops in the common
474 * case (which is the recording of CPU-local scheduling
475 * overheads).
476 */
477 __add_timestamp_user(&ts);
478 }
479
480out:
481 return consumed;
482}
483
484static int __init init_cpu_ft_overhead_trace(void)
485{
486 int err, cpu;
487
488 printk("Initializing Feather-Trace per-cpu overhead tracing device.\n");
489 err = ftdev_init(&cpu_overhead_dev, THIS_MODULE,
490 num_online_cpus(), "ft_cpu_trace");
491 if (err)
492 goto err_out;
493
494 cpu_overhead_dev.alloc = alloc_timestamp_buffer;
495 cpu_overhead_dev.free = free_timestamp_buffer;
496 cpu_overhead_dev.write = write_timestamp_from_user;
497
498 err = register_ftdev(&cpu_overhead_dev);
499 if (err)
500 goto err_dealloc;
501
502 for (cpu = 0; cpu < NR_CPUS; cpu++) {
503 per_cpu(cpu_ts_seq_no, cpu) = 0;
504 }
505
506 return 0;
507
508err_dealloc:
509 ftdev_exit(&cpu_overhead_dev);
510err_out:
511 printk(KERN_WARNING "Could not register per-cpu ft_trace device.\n");
512 return err;
513}
514
515static int __init init_msg_ft_overhead_trace(void)
516{
517 int err, cpu;
518
519 printk("Initializing Feather-Trace per-cpu message overhead tracing device.\n");
520 err = ftdev_init(&msg_overhead_dev, THIS_MODULE,
521 num_online_cpus(), "ft_msg_trace");
522 if (err)
523 goto err_out;
524
525 msg_overhead_dev.alloc = alloc_timestamp_buffer;
526 msg_overhead_dev.free = free_timestamp_buffer;
527 msg_overhead_dev.calibrate = calibrate_tsc_offsets;
528
529 err = register_ftdev(&msg_overhead_dev);
530 if (err)
531 goto err_dealloc;
532
533 for (cpu = 0; cpu < NR_CPUS; cpu++) {
534 atomic_set(&per_cpu(msg_ts_seq_no, cpu), 0);
535 }
536
537 return 0;
538
539err_dealloc:
540 ftdev_exit(&msg_overhead_dev);
541err_out:
542 printk(KERN_WARNING "Could not register message ft_trace device.\n");
543 return err;
544}
545
546
547static int __init init_ft_overhead_trace(void)
548{
549 int err, i, j;
550
551 for (i = 0; i < NR_CPUS; i++)
552 for (j = 0; j < NR_CPUS; j++)
553 cycle_offset[i][j] = 0;
554
555 err = init_cpu_ft_overhead_trace();
556 if (err)
557 return err;
558
559 err = init_msg_ft_overhead_trace();
560 if (err){
561 ftdev_exit(&cpu_overhead_dev);
562 return err;
563 }
564
565 return 0;
566}
567
568static void __exit exit_ft_overhead_trace(void)
569{
570 ftdev_exit(&cpu_overhead_dev);
571 ftdev_exit(&msg_overhead_dev);
572}
573
574module_init(init_ft_overhead_trace);
575module_exit(exit_ft_overhead_trace);
diff --git a/litmus/uncachedev.c b/litmus/uncachedev.c
new file mode 100644
index 000000000000..06a6a7c17983
--- /dev/null
+++ b/litmus/uncachedev.c
@@ -0,0 +1,102 @@
1#include <linux/sched.h>
2#include <linux/kernel.h>
3#include <linux/mm.h>
4#include <linux/fs.h>
5#include <linux/errno.h>
6#include <linux/highmem.h>
7#include <asm/page.h>
8#include <linux/miscdevice.h>
9#include <linux/module.h>
10
11#include <litmus/litmus.h>
12
13/* device for allocating pages not cached by the CPU */
14
15#define UNCACHE_NAME "litmus/uncache"
16
17void litmus_uncache_vm_open(struct vm_area_struct *vma)
18{
19}
20
21void litmus_uncache_vm_close(struct vm_area_struct *vma)
22{
23}
24
25int litmus_uncache_vm_fault(struct vm_area_struct* vma,
26 struct vm_fault* vmf)
27{
28 /* modeled after SG DMA video4linux, but without DMA. */
29 /* (see drivers/media/video/videobuf-dma-sg.c) */
30 struct page *page;
31
32 page = alloc_page(GFP_USER);
33 if (!page)
34 return VM_FAULT_OOM;
35
36 clear_user_highpage(page, (unsigned long)vmf->virtual_address);
37 vmf->page = page;
38
39 return 0;
40}
41
42static struct vm_operations_struct litmus_uncache_vm_ops = {
43 .open = litmus_uncache_vm_open,
44 .close = litmus_uncache_vm_close,
45 .fault = litmus_uncache_vm_fault,
46};
47
48static int litmus_uncache_mmap(struct file* filp, struct vm_area_struct* vma)
49{
50 /* first make sure mapper knows what he's doing */
51
52 /* you can only map the "first" page */
53 if (vma->vm_pgoff != 0)
54 return -EINVAL;
55
56 /* you can't share it with anyone */
57 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
58 return -EINVAL;
59
60 /* cannot be expanded, and is not a "normal" page. */
61 vma->vm_flags |= VM_DONTEXPAND;
62
63 /* noncached pages are not explicitly locked in memory (for now). */
64 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
65
66 vma->vm_ops = &litmus_uncache_vm_ops;
67
68 return 0;
69}
70
71static struct file_operations litmus_uncache_fops = {
72 .owner = THIS_MODULE,
73 .mmap = litmus_uncache_mmap,
74};
75
76static struct miscdevice litmus_uncache_dev = {
77 .name = UNCACHE_NAME,
78 .minor = MISC_DYNAMIC_MINOR,
79 .fops = &litmus_uncache_fops,
80 /* pages are not locked, so there is no reason why
81 anyone cannot allocate an uncache pages */
82 .mode = (S_IRUGO | S_IWUGO),
83};
84
85static int __init init_litmus_uncache_dev(void)
86{
87 int err;
88
89 printk("Initializing LITMUS^RT uncache device.\n");
90 err = misc_register(&litmus_uncache_dev);
91 if (err)
92 printk("Could not allocate %s device (%d).\n", UNCACHE_NAME, err);
93 return err;
94}
95
96static void __exit exit_litmus_uncache_dev(void)
97{
98 misc_deregister(&litmus_uncache_dev);
99}
100
101module_init(init_litmus_uncache_dev);
102module_exit(exit_litmus_uncache_dev);