aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2015-08-09 07:18:48 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2015-08-09 06:21:18 -0400
commit8e048c798adaabef530a1526f7ce8c6c3cd3475e (patch)
tree5a96b3eaeaafecec1bf08ba71a9d0084d39d46eb
parentbd175e94795774908317a861a883761b75750e35 (diff)
Add LITMUS^RT core implementation
This patch adds the core of LITMUS^RT: - library functionality (heaps, rt_domain, prioritization, etc.) - budget enforcement logic - job management - system call backends - virtual devices (control page, etc.) - scheduler plugin API (and dummy plugin) This code compiles, but is not yet integrated with the rest of Linux.
-rw-r--r--include/litmus/affinity.h52
-rw-r--r--include/litmus/bheap.h77
-rw-r--r--include/litmus/binheap.h205
-rw-r--r--include/litmus/budget.h36
-rw-r--r--include/litmus/ceiling.h36
-rw-r--r--include/litmus/clustered.h46
-rw-r--r--include/litmus/edf_common.h25
-rw-r--r--include/litmus/fdso.h78
-rw-r--r--include/litmus/fp_common.h105
-rw-r--r--include/litmus/fpmath.h147
-rw-r--r--include/litmus/jobs.h10
-rw-r--r--include/litmus/litmus.h261
-rw-r--r--include/litmus/litmus_proc.h63
-rw-r--r--include/litmus/locking.h28
-rw-r--r--include/litmus/preempt.h162
-rw-r--r--include/litmus/rt_domain.h182
-rw-r--r--include/litmus/rt_param.h15
-rw-r--r--include/litmus/sched_plugin.h128
-rw-r--r--include/litmus/srp.h28
-rw-r--r--include/litmus/unistd_32.h21
-rw-r--r--include/litmus/unistd_64.h33
-rw-r--r--include/litmus/wait.h57
-rw-r--r--kernel/sched/litmus.c350
-rw-r--r--litmus/Kconfig193
-rw-r--r--litmus/Makefile18
-rw-r--r--litmus/bheap.c316
-rw-r--r--litmus/binheap.c387
-rw-r--r--litmus/budget.c116
-rw-r--r--litmus/clustered.c119
-rw-r--r--litmus/ctrldev.c160
-rw-r--r--litmus/edf_common.c200
-rw-r--r--litmus/fdso.c308
-rw-r--r--litmus/fp_common.c119
-rw-r--r--litmus/jobs.c82
-rw-r--r--litmus/litmus.c681
-rw-r--r--litmus/litmus_proc.c573
-rw-r--r--litmus/locking.c188
-rw-r--r--litmus/preempt.c141
-rw-r--r--litmus/rt_domain.c353
-rw-r--r--litmus/sched_plugin.c238
-rw-r--r--litmus/srp.c308
-rw-r--r--litmus/sync.c152
-rw-r--r--litmus/trace.c11
-rw-r--r--litmus/uncachedev.c102
44 files changed, 6901 insertions, 9 deletions
diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h
new file mode 100644
index 000000000000..4d7c618c8175
--- /dev/null
+++ b/include/litmus/affinity.h
@@ -0,0 +1,52 @@
1#ifndef __LITMUS_AFFINITY_H
2#define __LITMUS_AFFINITY_H
3
4#include <linux/cpumask.h>
5
6/* Works like:
7void get_nearest_available_cpu(
8 cpu_entry_t **nearest,
9 cpu_entry_t *start,
10 cpu_entry_t *entries,
11 int release_master,
12 cpumask_var_t cpus_to_test)
13
14Set release_master = NO_CPU for no Release Master.
15
16We use a macro here to exploit the fact that C-EDF and G-EDF
17have similar structures for their cpu_entry_t structs, even though
18they do not share a common base-struct. The macro allows us to
19avoid code duplication.
20
21 */
22#define get_nearest_available_cpu(nearest, start, entries, release_master, cpus_to_test) \
23{ \
24 (nearest) = NULL; \
25 if (!(start)->linked && likely((start)->cpu != (release_master))) { \
26 (nearest) = (start); \
27 } else { \
28 int __cpu; \
29 \
30 /* FIXME: get rid of the iteration with a bitmask + AND */ \
31 for_each_cpu(__cpu, cpus_to_test) { \
32 if (likely(__cpu != release_master)) { \
33 cpu_entry_t *__entry = &per_cpu((entries), __cpu); \
34 if (cpus_share_cache((start)->cpu, __entry->cpu) \
35 && !__entry->linked) { \
36 (nearest) = __entry; \
37 break; \
38 } \
39 } \
40 } \
41 } \
42 \
43 if ((nearest)) { \
44 TRACE("P%d is closest available CPU to P%d\n", \
45 (nearest)->cpu, (start)->cpu); \
46 } else { \
47 TRACE("Could not find an available CPU close to P%d\n", \
48 (start)->cpu); \
49 } \
50}
51
52#endif
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
new file mode 100644
index 000000000000..cf4864a498d8
--- /dev/null
+++ b/include/litmus/bheap.h
@@ -0,0 +1,77 @@
1/* bheaps.h -- Binomial Heaps
2 *
3 * (c) 2008, 2009 Bjoern Brandenburg
4 */
5
6#ifndef BHEAP_H
7#define BHEAP_H
8
9#define NOT_IN_HEAP UINT_MAX
10
11struct bheap_node {
12 struct bheap_node* parent;
13 struct bheap_node* next;
14 struct bheap_node* child;
15
16 unsigned int degree;
17 void* value;
18 struct bheap_node** ref;
19};
20
21struct bheap {
22 struct bheap_node* head;
23 /* We cache the minimum of the heap.
24 * This speeds up repeated peek operations.
25 */
26 struct bheap_node* min;
27};
28
29typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
30
31void bheap_init(struct bheap* heap);
32void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value);
33
34static inline int bheap_node_in_heap(struct bheap_node* h)
35{
36 return h->degree != NOT_IN_HEAP;
37}
38
39static inline int bheap_empty(struct bheap* heap)
40{
41 return heap->head == NULL && heap->min == NULL;
42}
43
44/* insert (and reinitialize) a node into the heap */
45void bheap_insert(bheap_prio_t higher_prio,
46 struct bheap* heap,
47 struct bheap_node* node);
48
49/* merge addition into target */
50void bheap_union(bheap_prio_t higher_prio,
51 struct bheap* target,
52 struct bheap* addition);
53
54struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
55 struct bheap* heap);
56
57struct bheap_node* bheap_take(bheap_prio_t higher_prio,
58 struct bheap* heap);
59
60void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap);
61int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node);
62
63void bheap_delete(bheap_prio_t higher_prio,
64 struct bheap* heap,
65 struct bheap_node* node);
66
67/* allocate from memcache */
68struct bheap_node* bheap_node_alloc(int gfp_flags);
69void bheap_node_free(struct bheap_node* hn);
70
71/* allocate a heap node for value and insert into the heap */
72int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
73 void* value, int gfp_flags);
74
75void* bheap_take_del(bheap_prio_t higher_prio,
76 struct bheap* heap);
77#endif
diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
new file mode 100644
index 000000000000..1cf364701da8
--- /dev/null
+++ b/include/litmus/binheap.h
@@ -0,0 +1,205 @@
1#ifndef LITMUS_BINARY_HEAP_H
2#define LITMUS_BINARY_HEAP_H
3
4#include <linux/kernel.h>
5
6/**
7 * Simple binary heap with add, arbitrary delete, delete_root, and top
8 * operations.
9 *
10 * Style meant to conform with list.h.
11 *
12 * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
13 * heap may be overkill (and perhaps not general enough) for some applications.
14 *
15 * Note: In order to make node swaps fast, a node inserted with a data pointer
16 * may not always hold said data pointer. This is similar to the binomial heap
17 * implementation. This does make node deletion tricky since we have to
18 * (1) locate the node that holds the data pointer to delete, and (2) the
19 * node that was originally inserted with said data pointer. These have to be
20 * coalesced into a single node before removal (see usage of
21 * __binheap_safe_swap()). We have to track node references to accomplish this.
22 */
23
24struct binheap_node {
25 void *data;
26 struct binheap_node *parent;
27 struct binheap_node *left;
28 struct binheap_node *right;
29
30 /* pointer to binheap_node that holds *data for which this binheap_node
31 * was originally inserted. (*data "owns" this node)
32 */
33 struct binheap_node *ref;
34 struct binheap_node **ref_ptr;
35};
36
37/**
38 * Signature of compator function. Assumed 'less-than' (min-heap).
39 * Pass in 'greater-than' for max-heap.
40 *
41 * TODO: Consider macro-based implementation that allows comparator to be
42 * inlined (similar to Linux red/black tree) for greater efficiency.
43 */
44typedef int (*binheap_order_t)(struct binheap_node *a,
45 struct binheap_node *b);
46
47
48struct binheap {
49 struct binheap_node *root;
50
51 /* pointer to node to take next inserted child */
52 struct binheap_node *next;
53
54 /* pointer to last node in complete binary tree */
55 struct binheap_node *last;
56
57 /* comparator function pointer */
58 binheap_order_t compare;
59};
60
61
62/* Initialized heap nodes not in a heap have parent
63 * set to BINHEAP_POISON.
64 */
65#define BINHEAP_POISON ((void*)(0xdeadbeef))
66
67
68/**
69 * binheap_entry - get the struct for this heap node.
70 * Only valid when called upon heap nodes other than the root handle.
71 * @ptr: the heap node.
72 * @type: the type of struct pointed to by binheap_node::data.
73 * @member: unused.
74 */
75#define binheap_entry(ptr, type, member) \
76((type *)((ptr)->data))
77
78/**
79 * binheap_node_container - get the struct that contains this node.
80 * Only valid when called upon heap nodes other than the root handle.
81 * @ptr: the heap node.
82 * @type: the type of struct the node is embedded in.
83 * @member: the name of the binheap_struct within the (type) struct.
84 */
85#define binheap_node_container(ptr, type, member) \
86container_of((ptr), type, member)
87
88/**
89 * binheap_top_entry - get the struct for the node at the top of the heap.
90 * Only valid when called upon the heap handle node.
91 * @ptr: the special heap-handle node.
92 * @type: the type of the struct the head is embedded in.
93 * @member: the name of the binheap_struct within the (type) struct.
94 */
95#define binheap_top_entry(ptr, type, member) \
96binheap_entry((ptr)->root, type, member)
97
98/**
99 * binheap_delete_root - remove the root element from the heap.
100 * @handle: handle to the heap.
101 * @type: the type of the struct the head is embedded in.
102 * @member: the name of the binheap_struct within the (type) struct.
103 */
104#define binheap_delete_root(handle, type, member) \
105__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
106
107/**
108 * binheap_delete - remove an arbitrary element from the heap.
109 * @to_delete: pointer to node to be removed.
110 * @handle: handle to the heap.
111 */
112#define binheap_delete(to_delete, handle) \
113__binheap_delete((to_delete), (handle))
114
115/**
116 * binheap_add - insert an element to the heap
117 * new_node: node to add.
118 * @handle: handle to the heap.
119 * @type: the type of the struct the head is embedded in.
120 * @member: the name of the binheap_struct within the (type) struct.
121 */
122#define binheap_add(new_node, handle, type, member) \
123__binheap_add((new_node), (handle), container_of((new_node), type, member))
124
125/**
126 * binheap_decrease - re-eval the position of a node (based upon its
127 * original data pointer).
128 * @handle: handle to the heap.
129 * @orig_node: node that was associated with the data pointer
130 * (whose value has changed) when said pointer was
131 * added to the heap.
132 */
133#define binheap_decrease(orig_node, handle) \
134__binheap_decrease((orig_node), (handle))
135
136#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
137
138#define BINHEAP_NODE(name) \
139 struct binheap_node name = BINHEAP_NODE_INIT()
140
141
142static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
143{
144 n->data = NULL;
145 n->parent = BINHEAP_POISON;
146 n->left = NULL;
147 n->right = NULL;
148 n->ref = NULL;
149 n->ref_ptr = NULL;
150}
151
152static inline void INIT_BINHEAP_HANDLE(struct binheap *handle,
153 binheap_order_t compare)
154{
155 handle->root = NULL;
156 handle->next = NULL;
157 handle->last = NULL;
158 handle->compare = compare;
159}
160
161/* Returns true if binheap is empty. */
162static inline int binheap_empty(struct binheap *handle)
163{
164 return(handle->root == NULL);
165}
166
167/* Returns true if binheap node is in a heap. */
168static inline int binheap_is_in_heap(struct binheap_node *node)
169{
170 return (node->parent != BINHEAP_POISON);
171}
172
173/* Returns true if binheap node is in given heap. */
174int binheap_is_in_this_heap(struct binheap_node *node, struct binheap* heap);
175
176/* Add a node to a heap */
177void __binheap_add(struct binheap_node *new_node,
178 struct binheap *handle,
179 void *data);
180
181/**
182 * Removes the root node from the heap. The node is removed after coalescing
183 * the binheap_node with its original data pointer at the root of the tree.
184 *
185 * The 'last' node in the tree is then swapped up to the root and bubbled
186 * down.
187 */
188void __binheap_delete_root(struct binheap *handle,
189 struct binheap_node *container);
190
191/**
192 * Delete an arbitrary node. Bubble node to delete up to the root,
193 * and then delete to root.
194 */
195void __binheap_delete(struct binheap_node *node_to_delete,
196 struct binheap *handle);
197
198/**
199 * Bubble up a node whose pointer has decreased in value.
200 */
201void __binheap_decrease(struct binheap_node *orig_node,
202 struct binheap *handle);
203
204
205#endif
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
new file mode 100644
index 000000000000..bd2d5c964f92
--- /dev/null
+++ b/include/litmus/budget.h
@@ -0,0 +1,36 @@
1#ifndef _LITMUS_BUDGET_H_
2#define _LITMUS_BUDGET_H_
3
4/* Update the per-processor enforcement timer (arm/reproram/cancel) for
5 * the next task. */
6void update_enforcement_timer(struct task_struct* t);
7
8inline static int budget_exhausted(struct task_struct* t)
9{
10 return get_exec_time(t) >= get_exec_cost(t);
11}
12
13inline static lt_t budget_remaining(struct task_struct* t)
14{
15 if (!budget_exhausted(t))
16 return get_exec_cost(t) - get_exec_time(t);
17 else
18 /* avoid overflow */
19 return 0;
20}
21
22#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
23
24#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
25 == PRECISE_ENFORCEMENT)
26
27static inline int requeue_preempted_job(struct task_struct* t)
28{
29 /* Add task to ready queue only if not subject to budget enforcement or
30 * if the job has budget remaining. t may be NULL.
31 */
32 return t && !is_completed(t) &&
33 (!budget_exhausted(t) || !budget_enforced(t));
34}
35
36#endif
diff --git a/include/litmus/ceiling.h b/include/litmus/ceiling.h
new file mode 100644
index 000000000000..f3d3889315f7
--- /dev/null
+++ b/include/litmus/ceiling.h
@@ -0,0 +1,36 @@
1#ifndef _LITMUS_CEILING_H_
2#define _LITMUS_CEILING_H_
3
4#ifdef CONFIG_LITMUS_LOCKING
5
6void __srp_ceiling_block(struct task_struct *cur);
7
8DECLARE_PER_CPU(int, srp_objects_in_use);
9
10/* assumes preemptions off */
11void srp_ceiling_block(void)
12{
13 struct task_struct *tsk = current;
14
15 /* Only applies to real-time tasks. */
16 if (!is_realtime(tsk))
17 return;
18
19 /* Bail out early if there aren't any SRP resources around. */
20 if (likely(!raw_cpu_read(srp_objects_in_use)))
21 return;
22
23 /* Avoid recursive ceiling blocking. */
24 if (unlikely(tsk->rt_param.srp_non_recurse))
25 return;
26
27 /* must take slow path */
28 __srp_ceiling_block(tsk);
29}
30
31#else
32#define srp_ceiling_block() /* nothing */
33#endif
34
35
36#endif \ No newline at end of file
diff --git a/include/litmus/clustered.h b/include/litmus/clustered.h
new file mode 100644
index 000000000000..fc7f0f87966e
--- /dev/null
+++ b/include/litmus/clustered.h
@@ -0,0 +1,46 @@
1#ifndef CLUSTERED_H
2#define CLUSTERED_H
3
4/* Which cache level should be used to group CPUs into clusters?
5 * GLOBAL_CLUSTER means that all CPUs form a single cluster (just like under
6 * global scheduling).
7 */
8enum cache_level {
9 GLOBAL_CLUSTER = 0,
10 L1_CLUSTER = 1,
11 L2_CLUSTER = 2,
12 L3_CLUSTER = 3
13};
14
15int parse_cache_level(const char *str, enum cache_level *level);
16const char* cache_level_name(enum cache_level level);
17
18/* expose a cache level in a /proc dir */
19struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
20 enum cache_level* level);
21
22
23
24struct scheduling_cluster {
25 unsigned int id;
26 /* list of CPUs that are part of this cluster */
27 struct list_head cpus;
28};
29
30struct cluster_cpu {
31 unsigned int id; /* which CPU is this? */
32 struct list_head cluster_list; /* List of the CPUs in this cluster. */
33 struct scheduling_cluster* cluster; /* The cluster that this CPU belongs to. */
34};
35
36int get_cluster_size(enum cache_level level);
37
38int assign_cpus_to_clusters(enum cache_level level,
39 struct scheduling_cluster* clusters[],
40 unsigned int num_clusters,
41 struct cluster_cpu* cpus[],
42 unsigned int num_cpus);
43
44int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, unsigned int index);
45
46#endif
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
new file mode 100644
index 000000000000..bbaf22ea7f12
--- /dev/null
+++ b/include/litmus/edf_common.h
@@ -0,0 +1,25 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_EDF_COMMON_H__
11#define __UNC_EDF_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int edf_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
new file mode 100644
index 000000000000..fd9b30dbfb34
--- /dev/null
+++ b/include/litmus/fdso.h
@@ -0,0 +1,78 @@
1/* fdso.h - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 */
5
6#ifndef _LINUX_FDSO_H_
7#define _LINUX_FDSO_H_
8
9#include <linux/list.h>
10#include <asm/atomic.h>
11
12#include <linux/fs.h>
13#include <linux/slab.h>
14
15#define MAX_OBJECT_DESCRIPTORS 85
16
17typedef enum {
18 MIN_OBJ_TYPE = 0,
19
20 FMLP_SEM = 0,
21 SRP_SEM = 1,
22
23 MPCP_SEM = 2,
24 MPCP_VS_SEM = 3,
25 DPCP_SEM = 4,
26 PCP_SEM = 5,
27
28 DFLP_SEM = 6,
29
30 MAX_OBJ_TYPE = 6
31} obj_type_t;
32
33struct inode_obj_id {
34 struct list_head list;
35 atomic_t count;
36 struct inode* inode;
37
38 obj_type_t type;
39 void* obj;
40 unsigned int id;
41};
42
43struct fdso_ops;
44
45struct od_table_entry {
46 unsigned int used;
47
48 struct inode_obj_id* obj;
49 const struct fdso_ops* class;
50};
51
52struct fdso_ops {
53 int (*create)(void** obj_ref, obj_type_t type, void* __user);
54 void (*destroy)(obj_type_t type, void*);
55 int (*open) (struct od_table_entry*, void* __user);
56 int (*close) (struct od_table_entry*);
57};
58
59/* translate a userspace supplied od into the raw table entry
60 * returns NULL if od is invalid
61 */
62struct od_table_entry* get_entry_for_od(int od);
63
64/* translate a userspace supplied od into the associated object
65 * returns NULL if od is invalid
66 */
67static inline void* od_lookup(int od, obj_type_t type)
68{
69 struct od_table_entry* e = get_entry_for_od(od);
70 return e && e->obj->type == type ? e->obj->obj : NULL;
71}
72
73#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
74#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
75#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
76
77
78#endif
diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h
new file mode 100644
index 000000000000..19356c0fa6c1
--- /dev/null
+++ b/include/litmus/fp_common.h
@@ -0,0 +1,105 @@
1/* Fixed-priority scheduler support.
2 */
3
4#ifndef __FP_COMMON_H__
5#define __FP_COMMON_H__
6
7#include <litmus/rt_domain.h>
8
9#include <asm/bitops.h>
10
11
12void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
13 release_jobs_t release);
14
15int fp_higher_prio(struct task_struct* first,
16 struct task_struct* second);
17
18int fp_ready_order(struct bheap_node* a, struct bheap_node* b);
19
20#define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG)
21
22#if (LITMUS_MAX_PRIORITY % BITS_PER_LONG)
23#error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG
24#endif
25
26/* bitmask-inexed priority queue */
27struct fp_prio_queue {
28 unsigned long bitmask[FP_PRIO_BIT_WORDS];
29 struct bheap queue[LITMUS_MAX_PRIORITY];
30};
31
32void fp_prio_queue_init(struct fp_prio_queue* q);
33
34static inline void fpq_set(struct fp_prio_queue* q, unsigned int index)
35{
36 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
37 __set_bit(index % BITS_PER_LONG, word);
38}
39
40static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index)
41{
42 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
43 __clear_bit(index % BITS_PER_LONG, word);
44}
45
46static inline unsigned int fpq_find(struct fp_prio_queue* q)
47{
48 int i;
49
50 /* loop optimizer should unroll this */
51 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
52 if (q->bitmask[i])
53 return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
54
55 return LITMUS_MAX_PRIORITY; /* nothing found */
56}
57
58static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
59{
60 BUG_ON(index >= LITMUS_MAX_PRIORITY);
61 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
62
63 fpq_set(q, index);
64 bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
65}
66
67static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
68{
69 BUG_ON(!is_queued(t));
70
71 bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
72 if (likely(bheap_empty(&q->queue[index])))
73 fpq_clear(q, index);
74}
75
76static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q)
77{
78 unsigned int idx = fpq_find(q);
79 struct bheap_node* hn;
80
81 if (idx < LITMUS_MAX_PRIORITY) {
82 hn = bheap_peek(fp_ready_order, &q->queue[idx]);
83 return bheap2task(hn);
84 } else
85 return NULL;
86}
87
88static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q)
89{
90 unsigned int idx = fpq_find(q);
91 struct bheap_node* hn;
92
93 if (idx < LITMUS_MAX_PRIORITY) {
94 hn = bheap_take(fp_ready_order, &q->queue[idx]);
95 if (likely(bheap_empty(&q->queue[idx])))
96 fpq_clear(q, idx);
97 return bheap2task(hn);
98 } else
99 return NULL;
100}
101
102int fp_preemption_needed(struct fp_prio_queue* q, struct task_struct *t);
103
104
105#endif
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..642de98542c8
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,147 @@
1#ifndef __FP_MATH_H__
2#define __FP_MATH_H__
3
4#include <linux/math64.h>
5
6#ifndef __KERNEL__
7#include <stdint.h>
8#define abs(x) (((x) < 0) ? -(x) : x)
9#endif
10
11// Use 64-bit because we want to track things at the nanosecond scale.
12// This can lead to very large numbers.
13typedef int64_t fpbuf_t;
14typedef struct
15{
16 fpbuf_t val;
17} fp_t;
18
19#define FP_SHIFT 10
20#define ROUND_BIT (FP_SHIFT - 1)
21
22#define _fp(x) ((fp_t) {x})
23
24#ifdef __KERNEL__
25static const fp_t LITMUS_FP_ZERO = {.val = 0};
26static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
27#endif
28
29static inline fp_t FP(fpbuf_t x)
30{
31 return _fp(((fpbuf_t) x) << FP_SHIFT);
32}
33
34/* divide two integers to obtain a fixed point value */
35static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
36{
37 return _fp(div64_s64(FP(a).val, (b)));
38}
39
40static inline fpbuf_t _point(fp_t x)
41{
42 return (x.val % (1 << FP_SHIFT));
43
44}
45
46#define fp2str(x) x.val
47/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
48#define _FP_ "%ld/1024"
49
50static inline fpbuf_t _floor(fp_t x)
51{
52 return x.val >> FP_SHIFT;
53}
54
55/* FIXME: negative rounding */
56static inline fpbuf_t _round(fp_t x)
57{
58 return _floor(x) + ((x.val >> ROUND_BIT) & 1);
59}
60
61/* multiply two fixed point values */
62static inline fp_t _mul(fp_t a, fp_t b)
63{
64 return _fp((a.val * b.val) >> FP_SHIFT);
65}
66
67static inline fp_t _div(fp_t a, fp_t b)
68{
69#if !defined(__KERNEL__) && !defined(unlikely)
70#define unlikely(x) (x)
71#define DO_UNDEF_UNLIKELY
72#endif
73 /* try not to overflow */
74 if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
75 return _fp((a.val / b.val) << FP_SHIFT);
76 else
77 return _fp((a.val << FP_SHIFT) / b.val);
78#ifdef DO_UNDEF_UNLIKELY
79#undef unlikely
80#undef DO_UNDEF_UNLIKELY
81#endif
82}
83
84static inline fp_t _add(fp_t a, fp_t b)
85{
86 return _fp(a.val + b.val);
87}
88
89static inline fp_t _sub(fp_t a, fp_t b)
90{
91 return _fp(a.val - b.val);
92}
93
94static inline fp_t _neg(fp_t x)
95{
96 return _fp(-x.val);
97}
98
99static inline fp_t _abs(fp_t x)
100{
101 return _fp(abs(x.val));
102}
103
104/* works the same as casting float/double to integer */
105static inline fpbuf_t _fp_to_integer(fp_t x)
106{
107 return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
108}
109
110static inline fp_t _integer_to_fp(fpbuf_t x)
111{
112 return _frac(x,1);
113}
114
115static inline int _leq(fp_t a, fp_t b)
116{
117 return a.val <= b.val;
118}
119
120static inline int _geq(fp_t a, fp_t b)
121{
122 return a.val >= b.val;
123}
124
125static inline int _lt(fp_t a, fp_t b)
126{
127 return a.val < b.val;
128}
129
130static inline int _gt(fp_t a, fp_t b)
131{
132 return a.val > b.val;
133}
134
135static inline int _eq(fp_t a, fp_t b)
136{
137 return a.val == b.val;
138}
139
140static inline fp_t _max(fp_t a, fp_t b)
141{
142 if (a.val < b.val)
143 return b;
144 else
145 return a;
146}
147#endif
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
new file mode 100644
index 000000000000..24771dfaebf8
--- /dev/null
+++ b/include/litmus/jobs.h
@@ -0,0 +1,10 @@
1#ifndef __LITMUS_JOBS_H__
2#define __LITMUS_JOBS_H__
3
4void prepare_for_next_period(struct task_struct *t);
5void release_at(struct task_struct *t, lt_t start);
6
7long default_wait_for_release_at(lt_t release_time);
8long complete_job(void);
9
10#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index c87863c9b231..a6eb534ee0fa 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -6,7 +6,50 @@
6#ifndef _LINUX_LITMUS_H_ 6#ifndef _LINUX_LITMUS_H_
7#define _LINUX_LITMUS_H_ 7#define _LINUX_LITMUS_H_
8 8
9#include <litmus/debug_trace.h>
10
11#ifdef CONFIG_RELEASE_MASTER
12extern atomic_t release_master_cpu;
13#endif
14
15/* in_list - is a given list_head queued on some list?
16 */
17static inline int in_list(struct list_head* list)
18{
19 return !( /* case 1: deleted */
20 (list->next == LIST_POISON1 &&
21 list->prev == LIST_POISON2)
22 ||
23 /* case 2: initialized */
24 (list->next == list &&
25 list->prev == list)
26 );
27}
28
29struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
30
31#define NO_CPU 0xffffffff
32
33void litmus_fork(struct task_struct *tsk);
34void litmus_exec(void);
35/* clean up real-time state of a task */
36void litmus_clear_state(struct task_struct *dead_tsk);
37void exit_litmus(struct task_struct *dead_tsk);
38
39/* Prevent the plugin from being switched-out from underneath a code
40 * path. Might sleep, so may be called only from non-atomic context. */
41void litmus_plugin_switch_disable(void);
42void litmus_plugin_switch_enable(void);
43
44long litmus_admit_task(struct task_struct *tsk);
45void litmus_exit_task(struct task_struct *tsk);
46void litmus_dealloc(struct task_struct *tsk);
47void litmus_do_exit(struct task_struct *tsk);
48int litmus_be_migrate_to(int cpu);
49
9#define is_realtime(t) ((t)->policy == SCHED_LITMUS) 50#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
51#define rt_transition_pending(t) \
52 ((t)->rt_param.transition_pending)
10 53
11#define tsk_rt(t) (&(t)->rt_param) 54#define tsk_rt(t) (&(t)->rt_param)
12 55
@@ -28,6 +71,7 @@
28#define get_partition(t) (tsk_rt(t)->task_params.cpu) 71#define get_partition(t) (tsk_rt(t)->task_params.cpu)
29#define get_priority(t) (tsk_rt(t)->task_params.priority) 72#define get_priority(t) (tsk_rt(t)->task_params.priority)
30#define get_class(t) (tsk_rt(t)->task_params.cls) 73#define get_class(t) (tsk_rt(t)->task_params.cls)
74#define get_release_policy(t) (tsk_rt(t)->task_params.release_policy)
31 75
32/* job_param macros */ 76/* job_param macros */
33#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) 77#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
@@ -35,6 +79,15 @@
35#define get_release(t) (tsk_rt(t)->job_params.release) 79#define get_release(t) (tsk_rt(t)->job_params.release)
36#define get_lateness(t) (tsk_rt(t)->job_params.lateness) 80#define get_lateness(t) (tsk_rt(t)->job_params.lateness)
37 81
82/* release policy macros */
83#define is_periodic(t) (get_release_policy(t) == TASK_PERIODIC)
84#define is_sporadic(t) (get_release_policy(t) == TASK_SPORADIC)
85#ifdef CONFIG_ALLOW_EARLY_RELEASE
86#define is_early_releasing(t) (get_release_policy(t) == TASK_EARLY)
87#else
88#define is_early_releasing(t) (0)
89#endif
90
38#define is_hrt(t) \ 91#define is_hrt(t) \
39 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD) 92 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
40#define is_srt(t) \ 93#define is_srt(t) \
@@ -48,6 +101,188 @@ static inline lt_t litmus_clock(void)
48 return ktime_to_ns(ktime_get()); 101 return ktime_to_ns(ktime_get());
49} 102}
50 103
104/* A macro to convert from nanoseconds to ktime_t. */
105#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t)
106
107#define get_domain(t) (tsk_rt(t)->domain)
108
109/* Honor the flag in the preempt_count variable that is set
110 * when scheduling is in progress.
111 */
112#define is_current_running() \
113 ((current)->state == TASK_RUNNING || \
114 preempt_count() & PREEMPT_ACTIVE)
115
116#define is_released(t, now) \
117 (lt_before_eq(get_release(t), now))
118#define is_tardy(t, now) \
119 (lt_before_eq(tsk_rt(t)->job_params.deadline, now))
120
121/* real-time comparison macros */
122#define earlier_deadline(a, b) (lt_before(\
123 (a)->rt_param.job_params.deadline,\
124 (b)->rt_param.job_params.deadline))
125#define earlier_release(a, b) (lt_before(\
126 (a)->rt_param.job_params.release,\
127 (b)->rt_param.job_params.release))
128
129void preempt_if_preemptable(struct task_struct* t, int on_cpu);
130
131#define bheap2task(hn) ((struct task_struct*) hn->value)
132
133#ifdef CONFIG_NP_SECTION
134
135static inline int is_kernel_np(struct task_struct *t)
136{
137 return tsk_rt(t)->kernel_np;
138}
139
140static inline int is_user_np(struct task_struct *t)
141{
142 return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0;
143}
144
145static inline void request_exit_np(struct task_struct *t)
146{
147 if (is_user_np(t)) {
148 /* Set the flag that tells user space to call
149 * into the kernel at the end of a critical section. */
150 if (likely(tsk_rt(t)->ctrl_page)) {
151 TRACE_TASK(t, "setting delayed_preemption flag\n");
152 tsk_rt(t)->ctrl_page->sched.np.preempt = 1;
153 }
154 }
155}
156
157static inline void make_np(struct task_struct *t)
158{
159 tsk_rt(t)->kernel_np++;
160}
161
162/* Caller should check if preemption is necessary when
163 * the function return 0.
164 */
165static inline int take_np(struct task_struct *t)
166{
167 return --tsk_rt(t)->kernel_np;
168}
169
170/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
171static inline int request_exit_np_atomic(struct task_struct *t)
172{
173 union np_flag old, new;
174
175 if (tsk_rt(t)->ctrl_page) {
176 old.raw = tsk_rt(t)->ctrl_page->sched.raw;
177 if (old.np.flag == 0) {
178 /* no longer non-preemptive */
179 return 0;
180 } else if (old.np.preempt) {
181 /* already set, nothing for us to do */
182 return 1;
183 } else {
184 /* non preemptive and flag not set */
185 new.raw = old.raw;
186 new.np.preempt = 1;
187 /* if we get old back, then we atomically set the flag */
188 return cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw;
189 /* If we raced with a concurrent change, then so be
190 * it. Deliver it by IPI. We don't want an unbounded
191 * retry loop here since tasks might exploit that to
192 * keep the kernel busy indefinitely. */
193 }
194 } else
195 return 0;
196}
197
198#else
199
200static inline int is_kernel_np(struct task_struct* t)
201{
202 return 0;
203}
204
205static inline int is_user_np(struct task_struct* t)
206{
207 return 0;
208}
209
210static inline void request_exit_np(struct task_struct *t)
211{
212 /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */
213 BUG();
214}
215
216static inline int request_exit_np_atomic(struct task_struct *t)
217{
218 return 0;
219}
220
221#endif
222
223static inline void clear_exit_np(struct task_struct *t)
224{
225 if (likely(tsk_rt(t)->ctrl_page))
226 tsk_rt(t)->ctrl_page->sched.np.preempt = 0;
227}
228
229static inline int is_np(struct task_struct *t)
230{
231#ifdef CONFIG_SCHED_DEBUG_TRACE
232 int kernel, user;
233 kernel = is_kernel_np(t);
234 user = is_user_np(t);
235 if (kernel || user)
236 TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
237
238 kernel, user);
239 return kernel || user;
240#else
241 return unlikely(is_kernel_np(t) || is_user_np(t));
242#endif
243}
244
245static inline int is_present(struct task_struct* t)
246{
247 return t && tsk_rt(t)->present;
248}
249
250static inline int is_completed(struct task_struct* t)
251{
252 return t && tsk_rt(t)->completed;
253}
254
255
256/* Used to convert ns-specified execution costs and periods into
257 * integral quanta equivalents.
258 */
259#define LITMUS_QUANTUM_LENGTH_NS (CONFIG_LITMUS_QUANTUM_LENGTH_US * 1000ULL)
260
261/* make the unit explicit */
262typedef unsigned long quanta_t;
263
264enum round {
265 FLOOR,
266 CEIL
267};
268
269static inline quanta_t time2quanta(lt_t time, enum round round)
270{
271 s64 quantum_length = LITMUS_QUANTUM_LENGTH_NS;
272
273 if (do_div(time, quantum_length) && round == CEIL)
274 time++;
275 return (quanta_t) time;
276}
277
278static inline lt_t quanta2time(quanta_t quanta)
279{
280 return quanta * LITMUS_QUANTUM_LENGTH_NS;
281}
282
283/* By how much is cpu staggered behind CPU 0? */
284u64 cpu_stagger_offset(int cpu);
285
51static inline struct control_page* get_control_page(struct task_struct *t) 286static inline struct control_page* get_control_page(struct task_struct *t)
52{ 287{
53 return tsk_rt(t)->ctrl_page; 288 return tsk_rt(t)->ctrl_page;
@@ -58,4 +293,30 @@ static inline int has_control_page(struct task_struct* t)
58 return tsk_rt(t)->ctrl_page != NULL; 293 return tsk_rt(t)->ctrl_page != NULL;
59} 294}
60 295
296
297#ifdef CONFIG_SCHED_OVERHEAD_TRACE
298
299#define TS_SYSCALL_IN_START \
300 if (has_control_page(current)) { \
301 __TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start); \
302 }
303
304#define TS_SYSCALL_IN_END \
305 if (has_control_page(current)) { \
306 unsigned long flags; \
307 uint64_t irqs; \
308 local_irq_save(flags); \
309 irqs = get_control_page(current)->irq_count - \
310 get_control_page(current)->irq_syscall_start; \
311 __TS_SYSCALL_IN_END(&irqs); \
312 local_irq_restore(flags); \
313 }
314
315#else
316
317#define TS_SYSCALL_IN_START
318#define TS_SYSCALL_IN_END
319
320#endif
321
61#endif 322#endif
diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h
new file mode 100644
index 000000000000..a5db24c03ec0
--- /dev/null
+++ b/include/litmus/litmus_proc.h
@@ -0,0 +1,63 @@
1#include <litmus/sched_plugin.h>
2#include <linux/proc_fs.h>
3
4int __init init_litmus_proc(void);
5void exit_litmus_proc(void);
6
7struct cd_mapping
8{
9 int id;
10 cpumask_var_t mask;
11 struct proc_dir_entry *proc_file;
12};
13
14struct domain_proc_info
15{
16 int num_cpus;
17 int num_domains;
18
19 struct cd_mapping *cpu_to_domains;
20 struct cd_mapping *domain_to_cpus;
21};
22
23/*
24 * On success, returns 0 and sets the pointer to the location of the new
25 * proc dir entry, otherwise returns an error code and sets pde to NULL.
26 */
27long make_plugin_proc_dir(struct sched_plugin* plugin,
28 struct proc_dir_entry** pde);
29
30/*
31 * Plugins should deallocate all child proc directory entries before
32 * calling this, to avoid memory leaks.
33 */
34void remove_plugin_proc_dir(struct sched_plugin* plugin);
35
36/*
37 * Setup the CPU <-> sched domain mappings in proc
38 */
39long activate_domain_proc(struct domain_proc_info* map);
40
41/*
42 * Remove the CPU <-> sched domain mappings from proc
43 */
44long deactivate_domain_proc(void);
45
46/*
47 * Alloc memory for the mapping
48 * Note: Does not set up proc files. Use make_sched_domain_maps for that.
49 */
50long init_domain_proc_info(struct domain_proc_info* map,
51 int num_cpus, int num_domains);
52
53/*
54 * Free memory of the mapping
55 * Note: Does not clean up proc files. Use deactivate_domain_proc for that.
56 */
57void destroy_domain_proc_info(struct domain_proc_info* map);
58
59/* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and
60 * remove a '\n' if present. Returns the number of bytes that were read or
61 * -EFAULT. */
62int copy_and_chomp(char *kbuf, unsigned long ksize,
63 __user const char* ubuf, unsigned long ulength);
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
new file mode 100644
index 000000000000..4d7b870cb443
--- /dev/null
+++ b/include/litmus/locking.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_LOCKING_H
2#define LITMUS_LOCKING_H
3
4struct litmus_lock_ops;
5
6/* Generic base struct for LITMUS^RT userspace semaphores.
7 * This structure should be embedded in protocol-specific semaphores.
8 */
9struct litmus_lock {
10 struct litmus_lock_ops *ops;
11 int type;
12};
13
14struct litmus_lock_ops {
15 /* Current task tries to obtain / drop a reference to a lock.
16 * Optional methods, allowed by default. */
17 int (*open)(struct litmus_lock*, void* __user);
18 int (*close)(struct litmus_lock*);
19
20 /* Current tries to lock/unlock this lock (mandatory methods). */
21 int (*lock)(struct litmus_lock*);
22 int (*unlock)(struct litmus_lock*);
23
24 /* The lock is no longer being referenced (mandatory method). */
25 void (*deallocate)(struct litmus_lock*);
26};
27
28#endif
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
new file mode 100644
index 000000000000..bdf5d8e52344
--- /dev/null
+++ b/include/litmus/preempt.h
@@ -0,0 +1,162 @@
1#ifndef LITMUS_PREEMPT_H
2#define LITMUS_PREEMPT_H
3
4#include <linux/types.h>
5#include <linux/cache.h>
6#include <linux/percpu.h>
7#include <asm/atomic.h>
8
9#include <litmus/debug_trace.h>
10
11DECLARE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
12
13#ifdef CONFIG_PREEMPT_STATE_TRACE
14const char* sched_state_name(int s);
15#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args)
16#else
17#define TRACE_STATE(fmt, args...) /* ignore */
18#endif
19
20#define VERIFY_SCHED_STATE(x) \
21 do { int __s = get_sched_state(); \
22 if ((__s & (x)) == 0) \
23 TRACE_STATE("INVALID s=0x%x (%s) not " \
24 "in 0x%x (%s) [%s]\n", \
25 __s, sched_state_name(__s), \
26 (x), #x, __FUNCTION__); \
27 } while (0);
28
29#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
30 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
31 cpu, (x), sched_state_name(x), \
32 (y), sched_state_name(y))
33
34
35typedef enum scheduling_state {
36 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that
37 * should be scheduled, and the processor does not
38 * plan to invoke schedule(). */
39 SHOULD_SCHEDULE = (1 << 1), /* A remote processor has determined that the
40 * processor should reschedule, but this has not
41 * been communicated yet (IPI still pending). */
42 WILL_SCHEDULE = (1 << 2), /* The processor has noticed that it has to
43 * reschedule and will do so shortly. */
44 TASK_PICKED = (1 << 3), /* The processor is currently executing schedule(),
45 * has selected a new task to schedule, but has not
46 * yet performed the actual context switch. */
47 PICKED_WRONG_TASK = (1 << 4), /* The processor has not yet performed the context
48 * switch, but a remote processor has already
49 * determined that a higher-priority task became
50 * eligible after the task was picked. */
51} sched_state_t;
52
53static inline sched_state_t get_sched_state_on(int cpu)
54{
55 return atomic_read(&per_cpu(resched_state, cpu));
56}
57
58static inline sched_state_t get_sched_state(void)
59{
60 return atomic_read(this_cpu_ptr(&resched_state));
61}
62
63static inline int is_in_sched_state(int possible_states)
64{
65 return get_sched_state() & possible_states;
66}
67
68static inline int cpu_is_in_sched_state(int cpu, int possible_states)
69{
70 return get_sched_state_on(cpu) & possible_states;
71}
72
73static inline void set_sched_state(sched_state_t s)
74{
75 TRACE_SCHED_STATE_CHANGE(get_sched_state(), s, smp_processor_id());
76 atomic_set(this_cpu_ptr(&resched_state), s);
77}
78
79static inline int sched_state_transition(sched_state_t from, sched_state_t to)
80{
81 sched_state_t old_state;
82
83 old_state = atomic_cmpxchg(this_cpu_ptr(&resched_state), from, to);
84 if (old_state == from) {
85 TRACE_SCHED_STATE_CHANGE(from, to, smp_processor_id());
86 return 1;
87 } else
88 return 0;
89}
90
91static inline int sched_state_transition_on(int cpu,
92 sched_state_t from,
93 sched_state_t to)
94{
95 sched_state_t old_state;
96
97 old_state = atomic_cmpxchg(&per_cpu(resched_state, cpu), from, to);
98 if (old_state == from) {
99 TRACE_SCHED_STATE_CHANGE(from, to, cpu);
100 return 1;
101 } else
102 return 0;
103}
104
105/* Plugins must call this function after they have decided which job to
106 * schedule next. IMPORTANT: this function must be called while still holding
107 * the lock that is used to serialize scheduling decisions.
108 *
109 * (Ideally, we would like to use runqueue locks for this purpose, but that
110 * would lead to deadlocks with the migration code.)
111 */
112static inline void sched_state_task_picked(void)
113{
114 VERIFY_SCHED_STATE(WILL_SCHEDULE);
115
116 /* WILL_SCHEDULE has only a local tansition => simple store is ok */
117 set_sched_state(TASK_PICKED);
118}
119
120static inline void sched_state_entered_schedule(void)
121{
122 /* Update state for the case that we entered schedule() not due to
123 * set_tsk_need_resched() */
124 set_sched_state(WILL_SCHEDULE);
125}
126
127/* Called by schedule() to check if the scheduling decision is still valid
128 * after a context switch. Returns 1 if the CPU needs to reschdule. */
129static inline int sched_state_validate_switch(void)
130{
131 int decision_ok = 0;
132
133 VERIFY_SCHED_STATE(PICKED_WRONG_TASK | TASK_PICKED | WILL_SCHEDULE);
134
135 if (is_in_sched_state(TASK_PICKED)) {
136 /* Might be good; let's try to transition out of this
137 * state. This must be done atomically since remote processors
138 * may try to change the state, too. */
139 decision_ok = sched_state_transition(TASK_PICKED, TASK_SCHEDULED);
140 }
141
142 if (!decision_ok)
143 TRACE_STATE("validation failed (%s)\n",
144 sched_state_name(get_sched_state()));
145
146 return !decision_ok;
147}
148
149/* State transition events. See litmus/preempt.c for details. */
150void sched_state_will_schedule(struct task_struct* tsk);
151void sched_state_ipi(void);
152/* Cause a CPU (remote or local) to reschedule. */
153void litmus_reschedule(int cpu);
154void litmus_reschedule_local(void);
155
156#ifdef CONFIG_DEBUG_KERNEL
157void sched_state_plugin_check(void);
158#else
159#define sched_state_plugin_check() /* no check */
160#endif
161
162#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
new file mode 100644
index 000000000000..ac249292e866
--- /dev/null
+++ b/include/litmus/rt_domain.h
@@ -0,0 +1,182 @@
1/* CLEANUP: Add comments and make it less messy.
2 *
3 */
4
5#ifndef __UNC_RT_DOMAIN_H__
6#define __UNC_RT_DOMAIN_H__
7
8#include <litmus/bheap.h>
9
10#define RELEASE_QUEUE_SLOTS 127 /* prime */
11
12struct _rt_domain;
13
14typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
15typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks);
16
17struct release_queue {
18 /* each slot maintains a list of release heaps sorted
19 * by release time */
20 struct list_head slot[RELEASE_QUEUE_SLOTS];
21};
22
23typedef struct _rt_domain {
24 /* runnable rt tasks are in here */
25 raw_spinlock_t ready_lock;
26 struct bheap ready_queue;
27
28 /* real-time tasks waiting for release are in here */
29 raw_spinlock_t release_lock;
30 struct release_queue release_queue;
31
32#ifdef CONFIG_RELEASE_MASTER
33 int release_master;
34#endif
35
36 /* for moving tasks to the release queue */
37 raw_spinlock_t tobe_lock;
38 struct list_head tobe_released;
39
40 /* how do we check if we need to kick another CPU? */
41 check_resched_needed_t check_resched;
42
43 /* how do we release jobs? */
44 release_jobs_t release_jobs;
45
46 /* how are tasks ordered in the ready queue? */
47 bheap_prio_t order;
48} rt_domain_t;
49
50struct release_heap {
51 /* list_head for per-time-slot list */
52 struct list_head list;
53 lt_t release_time;
54 /* all tasks to be released at release_time */
55 struct bheap heap;
56 /* used to trigger the release */
57 struct hrtimer timer;
58
59#ifdef CONFIG_RELEASE_MASTER
60 /* used to delegate releases */
61 struct hrtimer_start_on_info info;
62#endif
63 /* required for the timer callback */
64 rt_domain_t* dom;
65};
66
67
68static inline struct task_struct* __next_ready(rt_domain_t* rt)
69{
70 struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue);
71 if (hn)
72 return bheap2task(hn);
73 else
74 return NULL;
75}
76
77void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
78 check_resched_needed_t check,
79 release_jobs_t relase);
80
81void __add_ready(rt_domain_t* rt, struct task_struct *new);
82void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
83void __add_release(rt_domain_t* rt, struct task_struct *task);
84
85static inline struct task_struct* __take_ready(rt_domain_t* rt)
86{
87 struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
88 if (hn)
89 return bheap2task(hn);
90 else
91 return NULL;
92}
93
94static inline struct task_struct* __peek_ready(rt_domain_t* rt)
95{
96 struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue);
97 if (hn)
98 return bheap2task(hn);
99 else
100 return NULL;
101}
102
103static inline int is_queued(struct task_struct *t)
104{
105 BUG_ON(!tsk_rt(t)->heap_node);
106 return bheap_node_in_heap(tsk_rt(t)->heap_node);
107}
108
109static inline void remove(rt_domain_t* rt, struct task_struct *t)
110{
111 bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
112}
113
114static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
115{
116 unsigned long flags;
117 /* first we need the write lock for rt_ready_queue */
118 raw_spin_lock_irqsave(&rt->ready_lock, flags);
119 __add_ready(rt, new);
120 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
121}
122
123static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks)
124{
125 unsigned long flags;
126 raw_spin_lock_irqsave(&rt->ready_lock, flags);
127 __merge_ready(rt, tasks);
128 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
129}
130
131static inline struct task_struct* take_ready(rt_domain_t* rt)
132{
133 unsigned long flags;
134 struct task_struct* ret;
135 /* first we need the write lock for rt_ready_queue */
136 raw_spin_lock_irqsave(&rt->ready_lock, flags);
137 ret = __take_ready(rt);
138 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
139 return ret;
140}
141
142
143static inline void add_release(rt_domain_t* rt, struct task_struct *task)
144{
145 unsigned long flags;
146 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
147 __add_release(rt, task);
148 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
149}
150
151#ifdef CONFIG_RELEASE_MASTER
152void __add_release_on(rt_domain_t* rt, struct task_struct *task,
153 int target_cpu);
154
155static inline void add_release_on(rt_domain_t* rt,
156 struct task_struct *task,
157 int target_cpu)
158{
159 unsigned long flags;
160 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
161 __add_release_on(rt, task, target_cpu);
162 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
163}
164#endif
165
166static inline int __jobs_pending(rt_domain_t* rt)
167{
168 return !bheap_empty(&rt->ready_queue);
169}
170
171static inline int jobs_pending(rt_domain_t* rt)
172{
173 unsigned long flags;
174 int ret;
175 /* first we need the write lock for rt_ready_queue */
176 raw_spin_lock_irqsave(&rt->ready_lock, flags);
177 ret = !bheap_empty(&rt->ready_queue);
178 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
179 return ret;
180}
181
182#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index ce76faa9c6d7..7b9a90965c25 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -84,12 +84,12 @@ struct rt_task {
84}; 84};
85 85
86union np_flag { 86union np_flag {
87 uint64_t raw; 87 uint32_t raw;
88 struct { 88 struct {
89 /* Is the task currently in a non-preemptive section? */ 89 /* Is the task currently in a non-preemptive section? */
90 uint64_t flag:31; 90 uint32_t flag:31;
91 /* Should the task call into the scheduler? */ 91 /* Should the task call into the scheduler? */
92 uint64_t preempt:1; 92 uint32_t preempt:1;
93 } np; 93 } np;
94}; 94};
95 95
@@ -110,10 +110,10 @@ union np_flag {
110struct control_page { 110struct control_page {
111 /* This flag is used by userspace to communicate non-preempive 111 /* This flag is used by userspace to communicate non-preempive
112 * sections. */ 112 * sections. */
113 volatile union np_flag sched; 113 volatile __attribute__ ((aligned (8))) union np_flag sched;
114 114
115 volatile uint64_t irq_count; /* Incremented by the kernel each time an IRQ is 115 /* Incremented by the kernel each time an IRQ is handled. */
116 * handled. */ 116 volatile __attribute__ ((aligned (8))) uint64_t irq_count;
117 117
118 /* Locking overhead tracing: userspace records here the time stamp 118 /* Locking overhead tracing: userspace records here the time stamp
119 * and IRQ counter prior to starting the system call. */ 119 * and IRQ counter prior to starting the system call. */
@@ -171,9 +171,6 @@ struct pfair_param;
171 * be explicitly set up before the task set is launched. 171 * be explicitly set up before the task set is launched.
172 */ 172 */
173struct rt_param { 173struct rt_param {
174 /* Generic flags available for plugin-internal use. */
175 unsigned int flags:8;
176
177 /* do we need to check for srp blocking? */ 174 /* do we need to check for srp blocking? */
178 unsigned int srp_non_recurse:1; 175 unsigned int srp_non_recurse:1;
179 176
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
new file mode 100644
index 000000000000..0ccccd6ae1af
--- /dev/null
+++ b/include/litmus/sched_plugin.h
@@ -0,0 +1,128 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_SCHED_PLUGIN_H_
6#define _LINUX_SCHED_PLUGIN_H_
7
8#include <linux/sched.h>
9
10#ifdef CONFIG_LITMUS_LOCKING
11#include <litmus/locking.h>
12#endif
13
14/************************ setup/tear down ********************/
15
16typedef long (*activate_plugin_t) (void);
17typedef long (*deactivate_plugin_t) (void);
18
19struct domain_proc_info;
20typedef long (*get_domain_proc_info_t) (struct domain_proc_info **info);
21
22
23/********************* scheduler invocation ******************/
24/* The main scheduling function, called to select the next task to dispatch. */
25typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
26/* Clean up after the task switch has occured.
27 * This function is called after every (even non-rt) task switch.
28 */
29typedef void (*finish_switch_t)(struct task_struct *prev);
30
31
32/********************* task state changes ********************/
33
34/* Called to setup a new real-time task.
35 * Release the first job, enqueue, etc.
36 * Task may already be running.
37 */
38typedef void (*task_new_t) (struct task_struct *task,
39 int on_rq,
40 int running);
41
42/* Called to re-introduce a task after blocking.
43 * Can potentially be called multiple times.
44 */
45typedef void (*task_wake_up_t) (struct task_struct *task);
46/* called to notify the plugin of a blocking real-time task
47 * it will only be called for real-time tasks and before schedule is called */
48typedef void (*task_block_t) (struct task_struct *task);
49/* Called when a real-time task exits or changes to a different scheduling
50 * class.
51 * Free any allocated resources
52 */
53typedef void (*task_exit_t) (struct task_struct *);
54
55/* task_exit() is called with interrupts disabled and runqueue locks held, and
56 * thus and cannot block or spin. task_cleanup() is called sometime later
57 * without any locks being held.
58 */
59typedef void (*task_cleanup_t) (struct task_struct *);
60
61#ifdef CONFIG_LITMUS_LOCKING
62/* Called when the current task attempts to create a new lock of a given
63 * protocol type. */
64typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
65 void* __user config);
66#endif
67
68
69/********************* sys call backends ********************/
70/* This function causes the caller to sleep until the next release */
71typedef long (*complete_job_t) (void);
72
73typedef long (*admit_task_t)(struct task_struct* tsk);
74
75typedef long (*wait_for_release_at_t)(lt_t release_time);
76
77/* Informs the plugin when a synchronous release takes place. */
78typedef void (*synchronous_release_at_t)(lt_t time_zero);
79
80/************************ misc routines ***********************/
81
82
83struct sched_plugin {
84 struct list_head list;
85 /* basic info */
86 char *plugin_name;
87
88 /* setup */
89 activate_plugin_t activate_plugin;
90 deactivate_plugin_t deactivate_plugin;
91 get_domain_proc_info_t get_domain_proc_info;
92
93 /* scheduler invocation */
94 schedule_t schedule;
95 finish_switch_t finish_switch;
96
97 /* syscall backend */
98 complete_job_t complete_job;
99 wait_for_release_at_t wait_for_release_at;
100 synchronous_release_at_t synchronous_release_at;
101
102 /* task state changes */
103 admit_task_t admit_task;
104
105 task_new_t task_new;
106 task_wake_up_t task_wake_up;
107 task_block_t task_block;
108
109 task_exit_t task_exit;
110 task_cleanup_t task_cleanup;
111
112#ifdef CONFIG_LITMUS_LOCKING
113 /* locking protocols */
114 allocate_lock_t allocate_lock;
115#endif
116} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
117
118
119extern struct sched_plugin *litmus;
120
121int register_sched_plugin(struct sched_plugin* plugin);
122struct sched_plugin* find_sched_plugin(const char* name);
123void print_sched_plugins(struct seq_file *m);
124
125
126extern struct sched_plugin linux_sched_plugin;
127
128#endif
diff --git a/include/litmus/srp.h b/include/litmus/srp.h
new file mode 100644
index 000000000000..c9a4552b2bf3
--- /dev/null
+++ b/include/litmus/srp.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_SRP_H
2#define LITMUS_SRP_H
3
4struct srp_semaphore;
5
6struct srp_priority {
7 struct list_head list;
8 unsigned int priority;
9 pid_t pid;
10};
11#define list2prio(l) list_entry(l, struct srp_priority, list)
12
13/* struct for uniprocessor SRP "semaphore" */
14struct srp_semaphore {
15 struct litmus_lock litmus_lock;
16 struct srp_priority ceiling;
17 struct task_struct* owner;
18 int cpu; /* cpu associated with this "semaphore" and resource */
19};
20
21/* map a task to its SRP preemption level priority */
22typedef unsigned int (*srp_prioritization_t)(struct task_struct* t);
23/* Must be updated by each plugin that uses SRP.*/
24extern srp_prioritization_t get_srp_prio;
25
26struct srp_semaphore* allocate_srp_semaphore(void);
27
28#endif
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
new file mode 100644
index 000000000000..94264c27d9ac
--- /dev/null
+++ b/include/litmus/unistd_32.h
@@ -0,0 +1,21 @@
1/*
2 * included from arch/x86/include/asm/unistd_32.h
3 *
4 * LITMUS^RT syscalls with "relative" numbers
5 */
6#define __LSC(x) (__NR_LITMUS + x)
7
8#define __NR_set_rt_task_param __LSC(0)
9#define __NR_get_rt_task_param __LSC(1)
10#define __NR_complete_job __LSC(2)
11#define __NR_od_open __LSC(3)
12#define __NR_od_close __LSC(4)
13#define __NR_litmus_lock __LSC(5)
14#define __NR_litmus_unlock __LSC(6)
15#define __NR_query_job_no __LSC(7)
16#define __NR_wait_for_job_release __LSC(8)
17#define __NR_wait_for_ts_release __LSC(9)
18#define __NR_release_ts __LSC(10)
19#define __NR_null_call __LSC(11)
20
21#define NR_litmus_syscalls 12
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
new file mode 100644
index 000000000000..d5ced0d2642c
--- /dev/null
+++ b/include/litmus/unistd_64.h
@@ -0,0 +1,33 @@
1/*
2 * included from arch/x86/include/asm/unistd_64.h
3 *
4 * LITMUS^RT syscalls with "relative" numbers
5 */
6#define __LSC(x) (__NR_LITMUS + x)
7
8#define __NR_set_rt_task_param __LSC(0)
9__SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param)
10#define __NR_get_rt_task_param __LSC(1)
11__SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param)
12#define __NR_complete_job __LSC(2)
13__SYSCALL(__NR_complete_job, sys_complete_job)
14#define __NR_od_open __LSC(3)
15__SYSCALL(__NR_od_open, sys_od_open)
16#define __NR_od_close __LSC(4)
17__SYSCALL(__NR_od_close, sys_od_close)
18#define __NR_litmus_lock __LSC(5)
19__SYSCALL(__NR_litmus_lock, sys_litmus_lock)
20#define __NR_litmus_unlock __LSC(6)
21__SYSCALL(__NR_litmus_unlock, sys_litmus_unlock)
22#define __NR_query_job_no __LSC(7)
23__SYSCALL(__NR_query_job_no, sys_query_job_no)
24#define __NR_wait_for_job_release __LSC(8)
25__SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release)
26#define __NR_wait_for_ts_release __LSC(9)
27__SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
28#define __NR_release_ts __LSC(10)
29__SYSCALL(__NR_release_ts, sys_release_ts)
30#define __NR_null_call __LSC(11)
31__SYSCALL(__NR_null_call, sys_null_call)
32
33#define NR_litmus_syscalls 12
diff --git a/include/litmus/wait.h b/include/litmus/wait.h
new file mode 100644
index 000000000000..ce1347c355f8
--- /dev/null
+++ b/include/litmus/wait.h
@@ -0,0 +1,57 @@
1#ifndef _LITMUS_WAIT_H_
2#define _LITMUS_WAIT_H_
3
4struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
5
6/* wrap regular wait_queue_t head */
7struct __prio_wait_queue {
8 wait_queue_t wq;
9
10 /* some priority point */
11 lt_t priority;
12 /* break ties in priority by lower tie_breaker */
13 unsigned int tie_breaker;
14};
15
16typedef struct __prio_wait_queue prio_wait_queue_t;
17
18static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq,
19 struct task_struct* t,
20 lt_t priority)
21{
22 init_waitqueue_entry(&pwq->wq, t);
23 pwq->priority = priority;
24 pwq->tie_breaker = 0;
25}
26
27static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq,
28 struct task_struct* t,
29 lt_t priority,
30 unsigned int tie_breaker)
31{
32 init_waitqueue_entry(&pwq->wq, t);
33 pwq->priority = priority;
34 pwq->tie_breaker = tie_breaker;
35}
36
37unsigned int __add_wait_queue_prio_exclusive(
38 wait_queue_head_t* head,
39 prio_wait_queue_t *new);
40
41static inline unsigned int add_wait_queue_prio_exclusive(
42 wait_queue_head_t* head,
43 prio_wait_queue_t *new)
44{
45 unsigned long flags;
46 unsigned int passed;
47
48 spin_lock_irqsave(&head->lock, flags);
49 passed = __add_wait_queue_prio_exclusive(head, new);
50
51 spin_unlock_irqrestore(&head->lock, flags);
52
53 return passed;
54}
55
56
57#endif
diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c
new file mode 100644
index 000000000000..924358babde2
--- /dev/null
+++ b/kernel/sched/litmus.c
@@ -0,0 +1,350 @@
1/* This file is included from kernel/sched.c */
2
3#include "sched.h"
4
5#include <litmus/trace.h>
6#include <litmus/sched_trace.h>
7
8#include <litmus/litmus.h>
9#include <litmus/budget.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/preempt.h>
12
13static void update_time_litmus(struct rq *rq, struct task_struct *p)
14{
15 u64 delta = rq->clock - p->se.exec_start;
16 if (unlikely((s64)delta < 0))
17 delta = 0;
18 /* per job counter */
19 p->rt_param.job_params.exec_time += delta;
20 /* task counter */
21 p->se.sum_exec_runtime += delta;
22 if (delta) {
23 TRACE_TASK(p, "charged %llu exec time (total:%llu, rem:%llu)\n",
24 delta, p->rt_param.job_params.exec_time, budget_remaining(p));
25 }
26 /* sched_clock() */
27 p->se.exec_start = rq->clock;
28 cpuacct_charge(p, delta);
29}
30
31static void double_rq_lock(struct rq *rq1, struct rq *rq2);
32static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
33
34static struct task_struct *
35litmus_schedule(struct rq *rq, struct task_struct *prev)
36{
37 struct task_struct *next;
38
39#ifdef CONFIG_SMP
40 struct rq* other_rq;
41 long was_running;
42 lt_t _maybe_deadlock = 0;
43#endif
44
45 /* let the plugin schedule */
46 next = litmus->schedule(prev);
47
48 sched_state_plugin_check();
49
50#ifdef CONFIG_SMP
51 /* check if a global plugin pulled a task from a different RQ */
52 if (next && task_rq(next) != rq) {
53 /* we need to migrate the task */
54 other_rq = task_rq(next);
55 TRACE_TASK(next, "migrate from %d\n", other_rq->cpu);
56
57 /* while we drop the lock, the prev task could change its
58 * state
59 */
60 BUG_ON(prev != current);
61 was_running = is_current_running();
62 mb();
63 raw_spin_unlock(&rq->lock);
64
65 /* Don't race with a concurrent switch. This could deadlock in
66 * the case of cross or circular migrations. It's the job of
67 * the plugin to make sure that doesn't happen.
68 */
69 TRACE_TASK(next, "stack_in_use=%d\n",
70 next->rt_param.stack_in_use);
71 if (next->rt_param.stack_in_use != NO_CPU) {
72 TRACE_TASK(next, "waiting to deschedule\n");
73 _maybe_deadlock = litmus_clock();
74 }
75 while (next->rt_param.stack_in_use != NO_CPU) {
76 cpu_relax();
77 mb();
78 if (next->rt_param.stack_in_use == NO_CPU)
79 TRACE_TASK(next,"descheduled. Proceeding.\n");
80
81 if (lt_before(_maybe_deadlock + 1000000000L,
82 litmus_clock())) {
83 /* We've been spinning for 1s.
84 * Something can't be right!
85 * Let's abandon the task and bail out; at least
86 * we will have debug info instead of a hard
87 * deadlock.
88 */
89#ifdef CONFIG_BUG_ON_MIGRATION_DEADLOCK
90 BUG();
91#else
92 TRACE_TASK(next,"stack too long in use. "
93 "Deadlock?\n");
94 next = NULL;
95
96 /* bail out */
97 raw_spin_lock(&rq->lock);
98 return next;
99#endif
100 }
101 }
102#ifdef __ARCH_WANT_UNLOCKED_CTXSW
103 if (next->on_cpu)
104 TRACE_TASK(next, "waiting for !oncpu");
105 while (next->on_cpu) {
106 cpu_relax();
107 mb();
108 }
109#endif
110 double_rq_lock(rq, other_rq);
111 mb();
112 if (is_realtime(current) && is_current_running() != was_running) {
113 TRACE_TASK(prev,
114 "state changed while we dropped"
115 " the lock: is_running=%d, was_running=%d\n",
116 is_current_running(), was_running);
117 if (is_current_running() && !was_running) {
118 /* prev task became unblocked
119 * we need to simulate normal sequence of events
120 * to scheduler plugins.
121 */
122 litmus->task_block(prev);
123 litmus->task_wake_up(prev);
124 }
125 }
126
127 set_task_cpu(next, smp_processor_id());
128
129 /* DEBUG: now that we have the lock we need to make sure a
130 * couple of things still hold:
131 * - it is still a real-time task
132 * - it is still runnable (could have been stopped)
133 * If either is violated, then the active plugin is
134 * doing something wrong.
135 */
136 if (!is_realtime(next) || !tsk_rt(next)->present) {
137 /* BAD BAD BAD */
138 TRACE_TASK(next,"BAD: migration invariant FAILED: "
139 "rt=%d present=%d\n",
140 is_realtime(next),
141 tsk_rt(next)->present);
142 /* drop the task */
143 next = NULL;
144 }
145 /* release the other CPU's runqueue, but keep ours */
146 raw_spin_unlock(&other_rq->lock);
147 }
148#endif
149
150 if (next) {
151#ifdef CONFIG_SMP
152 next->rt_param.stack_in_use = rq->cpu;
153#else
154 next->rt_param.stack_in_use = 0;
155#endif
156 update_rq_clock(rq);
157 next->se.exec_start = rq->clock;
158 }
159
160 update_enforcement_timer(next);
161 return next;
162}
163
164static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
165 int flags)
166{
167 if (flags & ENQUEUE_WAKEUP) {
168 sched_trace_task_resume(p);
169 tsk_rt(p)->present = 1;
170 /* LITMUS^RT plugins need to update the state
171 * _before_ making it available in global structures.
172 * Linux gets away with being lazy about the task state
173 * update. We can't do that, hence we update the task
174 * state already here.
175 *
176 * WARNING: this needs to be re-evaluated when porting
177 * to newer kernel versions.
178 */
179 p->state = TASK_RUNNING;
180 litmus->task_wake_up(p);
181
182 rq->litmus.nr_running++;
183 } else {
184 TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
185 p->se.exec_start = rq->clock;
186 }
187}
188
189static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
190 int flags)
191{
192 if (flags & DEQUEUE_SLEEP) {
193 litmus->task_block(p);
194 tsk_rt(p)->present = 0;
195 sched_trace_task_block(p);
196
197 rq->litmus.nr_running--;
198 } else
199 TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
200}
201
202static void yield_task_litmus(struct rq *rq)
203{
204 TS_SYSCALL_IN_START;
205 TS_SYSCALL_IN_END;
206
207 BUG_ON(rq->curr != current);
208 /* sched_yield() is called to trigger delayed preemptions.
209 * Thus, mark the current task as needing to be rescheduled.
210 * This will cause the scheduler plugin to be invoked, which can
211 * then determine if a preemption is still required.
212 */
213 clear_exit_np(current);
214 litmus_reschedule_local();
215
216 TS_SYSCALL_OUT_START;
217}
218
219/* Plugins are responsible for this.
220 */
221static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
222{
223}
224
225static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
226{
227}
228
229/* pick_next_task_litmus() - litmus_schedule() function
230 *
231 * return the next task to be scheduled
232 */
233static struct task_struct *pick_next_task_litmus(struct rq *rq, struct task_struct *prev)
234{
235 struct task_struct *next;
236
237 if (is_realtime(prev))
238 update_time_litmus(rq, prev);
239
240 TS_PLUGIN_SCHED_START;
241 next = litmus_schedule(rq, prev);
242 TS_PLUGIN_SCHED_END;
243
244 /* This is a bit backwards: the other classes call put_prev_task()
245 * _after_ they've determined that the class has some queued tasks.
246 * We can't determine this easily because each plugin manages its own
247 * ready queues, and because in the case of globally shared queues,
248 * we really don't know whether we'll have something ready even if
249 * we test here. So we do it in reverse: first ask the plugin to
250 * provide a task, and if we find one, call put_prev_task() on the
251 * previously scheduled task.
252 */
253 if (next)
254 put_prev_task(rq, prev);
255
256 return next;
257}
258
259static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
260{
261 if (is_realtime(p) && !queued) {
262 update_time_litmus(rq, p);
263 /* budget check for QUANTUM_ENFORCEMENT tasks */
264 if (budget_enforced(p) && budget_exhausted(p)) {
265 litmus_reschedule_local();
266 }
267 }
268}
269
270static void switched_to_litmus(struct rq *rq, struct task_struct *p)
271{
272}
273
274static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
275 int oldprio)
276{
277}
278
279unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
280{
281 /* return infinity */
282 return 0;
283}
284
285/* This is called when a task became a real-time task, either due to a SCHED_*
286 * class transition or due to PI mutex inheritance. We don't handle Linux PI
287 * mutex inheritance yet (and probably never will). Use LITMUS provided
288 * synchronization primitives instead.
289 */
290static void set_curr_task_litmus(struct rq *rq)
291{
292 rq->curr->se.exec_start = rq->clock;
293}
294
295
296#ifdef CONFIG_SMP
297/* execve tries to rebalance task in this scheduling domain.
298 * We don't care about the scheduling domain; can gets called from
299 * exec, fork, wakeup.
300 */
301static int
302select_task_rq_litmus(struct task_struct *p, int cpu, int sd_flag, int flags)
303{
304 /* preemption is already disabled.
305 * We don't want to change cpu here
306 */
307 return task_cpu(p);
308}
309#endif
310
311static void update_curr_litmus(struct rq *rq)
312{
313 struct task_struct *p = rq->curr;
314
315 if (!is_realtime(p))
316 return;
317
318 update_time_litmus(rq, p);
319}
320
321const struct sched_class litmus_sched_class = {
322 /* From 34f971f6 the stop/migrate worker threads have a class on
323 * their own, which is the highest prio class. We don't support
324 * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
325 * CPU capacity.
326 */
327 .next = &dl_sched_class,
328 .enqueue_task = enqueue_task_litmus,
329 .dequeue_task = dequeue_task_litmus,
330 .yield_task = yield_task_litmus,
331
332 .check_preempt_curr = check_preempt_curr_litmus,
333
334 .pick_next_task = pick_next_task_litmus,
335 .put_prev_task = put_prev_task_litmus,
336
337#ifdef CONFIG_SMP
338 .select_task_rq = select_task_rq_litmus,
339#endif
340
341 .set_curr_task = set_curr_task_litmus,
342 .task_tick = task_tick_litmus,
343
344 .get_rr_interval = get_rr_interval_litmus,
345
346 .prio_changed = prio_changed_litmus,
347 .switched_to = switched_to_litmus,
348
349 .update_curr = update_curr_litmus,
350};
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 5408ef6b159b..fdf31f3dd6c2 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -1,5 +1,184 @@
1menu "LITMUS^RT" 1menu "LITMUS^RT"
2 2
3menu "Scheduling"
4
5config RELEASE_MASTER
6 bool "Release-master Support"
7 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
8 default n
9 help
10 Allow one processor to act as a dedicated interrupt processor
11 that services all timer interrupts, but that does not schedule
12 real-time tasks. See RTSS'09 paper for details
13 (http://www.cs.unc.edu/~anderson/papers.html).
14
15config PREFER_LOCAL_LINKING
16 bool "Link newly arrived tasks locally if possible"
17 depends on SMP
18 default y
19 help
20 In linking-based schedulers such as GSN-EDF, if an idle CPU processes
21 a job arrival (i.e., when a job resumed or was released), it can
22 either link the task to itself and schedule it immediately (to avoid
23 unnecessary scheduling latency) or it can try to link it to the CPU
24 where it executed previously (to maximize cache affinity, at the
25 expense of increased latency due to the need to send an IPI).
26
27 In lightly loaded systems, this option can significantly reduce
28 scheduling latencies. In heavily loaded systems (where CPUs are
29 rarely idle), it will likely make hardly a difference.
30
31 If unsure, say yes.
32
33config LITMUS_QUANTUM_LENGTH_US
34 int "quantum length (in us)"
35 default 1000
36 range 500 10000
37 help
38 Determine the desired quantum length, in microseconds, which
39 is used to determine the granularity of scheduling in
40 quantum-driven plugins (primarily PFAIR). This parameter does not
41 affect event-driven plugins (such as the EDF-based plugins and P-FP).
42 Default: 1000us = 1ms.
43
44config BUG_ON_MIGRATION_DEADLOCK
45 bool "Panic on suspected migration deadlock"
46 default y
47 help
48 This is a debugging option. The LITMUS^RT migration support code for
49 global scheduling contains a simple heuristic to detect when the
50 system deadlocks due to circular stack dependencies.
51
52 For example, such a deadlock exists if CPU 0 waits for task A's stack
53 to become available while using task B's stack, and CPU 1 waits for
54 task B's stack to become available while using task A's stack. Such
55 a situation can arise in (buggy) global scheduling plugins.
56
57 With this option enabled, such a scenario with result in a BUG().
58 You can turn off this option when debugging on real hardware (e.g.,
59 to rescue traces, etc. that would be hard to get after a panic).
60
61 Only turn this off if you really know what you are doing. If this
62 BUG() triggers, the scheduler is broken and turning off this option
63 won't fix it.
64
65
66endmenu
67
68menu "Real-Time Synchronization"
69
70config NP_SECTION
71 bool "Non-preemptive section support"
72 default y
73 help
74 Allow tasks to become non-preemptable.
75 Note that plugins still need to explicitly support non-preemptivity.
76 Currently, only the GSN-EDF, PSN-EDF, and P-FP plugins have such support.
77
78 This is required to support locking protocols such as the FMLP.
79 If disabled, all tasks will be considered preemptable at all times.
80
81config LITMUS_LOCKING
82 bool "Support for real-time locking protocols"
83 depends on NP_SECTION
84 default y
85 help
86 Enable LITMUS^RT's multiprocessor real-time locking protocols with
87 predicable maximum blocking times.
88
89 Say Yes if you want to include locking protocols such as the FMLP and
90 Baker's SRP.
91
92endmenu
93
94menu "Performance Enhancements"
95
96config SCHED_CPU_AFFINITY
97 bool "Local Migration Affinity"
98 depends on X86 && SYSFS
99 default y
100 help
101 Rescheduled tasks prefer CPUs near to their previously used CPU.
102 This may improve cache performance through possible preservation of
103 cache affinity, at the expense of (slightly) more involved scheduling
104 logic.
105
106 Warning: May make bugs harder to find since tasks may migrate less often.
107
108 NOTES:
109 * Feature is not utilized by PFair/PD^2.
110
111 Say Yes if unsure.
112
113config ALLOW_EARLY_RELEASE
114 bool "Allow Early Releasing"
115 default y
116 help
117 Allow tasks to release jobs early (while still maintaining job
118 precedence constraints). Only supported by EDF schedulers. Early
119 releasing must be explicitly requested by real-time tasks via
120 the task_params passed to sys_set_task_rt_param().
121
122 Early releasing can improve job response times while maintaining
123 real-time correctness. However, it can easily peg your CPUs
124 since tasks never suspend to wait for their next job. As such, early
125 releasing is really only useful in the context of implementing
126 bandwidth servers, interrupt handling threads, or short-lived
127 computations.
128
129 Beware that early releasing may affect real-time analysis
130 if using locking protocols or I/O.
131
132 Say Yes if unsure.
133
134choice
135 prompt "EDF Tie-Break Behavior"
136 default EDF_TIE_BREAK_LATENESS_NORM
137 help
138 Allows the configuration of tie-breaking behavior when the deadlines
139 of two EDF-scheduled tasks are equal.
140
141 config EDF_TIE_BREAK_LATENESS
142 bool "Lateness-based Tie Break"
143 help
144 Break ties between two jobs, A and B, based upon the lateness of their
145 prior jobs. The job with the greatest lateness has priority. Note that
146 lateness has a negative value if the prior job finished before its
147 deadline.
148
149 config EDF_TIE_BREAK_LATENESS_NORM
150 bool "Normalized Lateness-based Tie Break"
151 help
152 Break ties between two jobs, A and B, based upon the lateness, normalized
153 by relative deadline, of their prior jobs. The job with the greatest
154 normalized lateness has priority. Note that lateness has a negative value
155 if the prior job finished before its deadline.
156
157 Normalized lateness tie-breaks are likely desireable over non-normalized
158 tie-breaks if the execution times and/or relative deadlines of tasks in a
159 task set vary greatly.
160
161 config EDF_TIE_BREAK_HASH
162 bool "Hash-based Tie Breaks"
163 help
164 Break ties between two jobs, A and B, with equal deadlines by using a
165 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
166 A has ~50% of winning a given tie-break.
167
168 config EDF_PID_TIE_BREAK
169 bool "PID-based Tie Breaks"
170 help
171 Break ties based upon OS-assigned thread IDs. Use this option if
172 required by algorithm's real-time analysis or per-task response-time
173 jitter must be minimized.
174
175 NOTES:
176 * This tie-breaking method was default in Litmus 2012.2 and before.
177
178endchoice
179
180endmenu
181
3menu "Tracing" 182menu "Tracing"
4 183
5config FEATHER_TRACE 184config FEATHER_TRACE
@@ -154,6 +333,20 @@ config SCHED_DEBUG_TRACE_CALLER
154 333
155 If unsure, say No. 334 If unsure, say No.
156 335
336config PREEMPT_STATE_TRACE
337 bool "Trace preemption state machine transitions"
338 depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
339 default n
340 help
341 With this option enabled, each CPU will log when it transitions
342 states in the preemption state machine. This state machine is
343 used to determine how to react to IPIs (avoid races with in-flight IPIs).
344
345 Warning: this creates a lot of information in the debug trace. Only
346 recommended when you are debugging preemption-related races.
347
348 If unsure, say No.
349
157endmenu 350endmenu
158 351
159endmenu 352endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index 6318f1c6fac8..c85abc7389c5 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -2,6 +2,24 @@
2# Makefile for LITMUS^RT 2# Makefile for LITMUS^RT
3# 3#
4 4
5obj-y = sched_plugin.o litmus.o \
6 preempt.o \
7 litmus_proc.o \
8 budget.o \
9 clustered.o \
10 jobs.o \
11 sync.o \
12 rt_domain.o \
13 edf_common.o \
14 fp_common.o \
15 fdso.o \
16 locking.o \
17 srp.o \
18 bheap.o \
19 binheap.o \
20 ctrldev.o \
21 uncachedev.o
22
5obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 23obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
6obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 24obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
7obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 25obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
diff --git a/litmus/bheap.c b/litmus/bheap.c
new file mode 100644
index 000000000000..2707e0122b6d
--- /dev/null
+++ b/litmus/bheap.c
@@ -0,0 +1,316 @@
1#include <linux/bug.h>
2#include <linux/kernel.h>
3#include <litmus/bheap.h>
4
5void bheap_init(struct bheap* heap)
6{
7 heap->head = NULL;
8 heap->min = NULL;
9}
10
11void bheap_node_init(struct bheap_node** _h, void* value)
12{
13 struct bheap_node* h = *_h;
14 h->parent = NULL;
15 h->next = NULL;
16 h->child = NULL;
17 h->degree = NOT_IN_HEAP;
18 h->value = value;
19 h->ref = _h;
20}
21
22
23/* make child a subtree of root */
24static void __bheap_link(struct bheap_node* root,
25 struct bheap_node* child)
26{
27 child->parent = root;
28 child->next = root->child;
29 root->child = child;
30 root->degree++;
31}
32
33/* merge root lists */
34static struct bheap_node* __bheap_merge(struct bheap_node* a,
35 struct bheap_node* b)
36{
37 struct bheap_node* head = NULL;
38 struct bheap_node** pos = &head;
39
40 while (a && b) {
41 if (a->degree < b->degree) {
42 *pos = a;
43 a = a->next;
44 } else {
45 *pos = b;
46 b = b->next;
47 }
48 pos = &(*pos)->next;
49 }
50 if (a)
51 *pos = a;
52 else
53 *pos = b;
54 return head;
55}
56
57/* reverse a linked list of nodes. also clears parent pointer */
58static struct bheap_node* __bheap_reverse(struct bheap_node* h)
59{
60 struct bheap_node* tail = NULL;
61 struct bheap_node* next;
62
63 if (!h)
64 return h;
65
66 h->parent = NULL;
67 while (h->next) {
68 next = h->next;
69 h->next = tail;
70 tail = h;
71 h = next;
72 h->parent = NULL;
73 }
74 h->next = tail;
75 return h;
76}
77
78static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
79 struct bheap_node** prev, struct bheap_node** node)
80{
81 struct bheap_node *_prev, *cur;
82 *prev = NULL;
83
84 if (!heap->head) {
85 *node = NULL;
86 return;
87 }
88
89 *node = heap->head;
90 _prev = heap->head;
91 cur = heap->head->next;
92 while (cur) {
93 if (higher_prio(cur, *node)) {
94 *node = cur;
95 *prev = _prev;
96 }
97 _prev = cur;
98 cur = cur->next;
99 }
100}
101
102static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
103 struct bheap_node* h2)
104{
105 struct bheap_node* h1;
106 struct bheap_node *prev, *x, *next;
107 if (!h2)
108 return;
109 h1 = heap->head;
110 if (!h1) {
111 heap->head = h2;
112 return;
113 }
114 h1 = __bheap_merge(h1, h2);
115 prev = NULL;
116 x = h1;
117 next = x->next;
118 while (next) {
119 if (x->degree != next->degree ||
120 (next->next && next->next->degree == x->degree)) {
121 /* nothing to do, advance */
122 prev = x;
123 x = next;
124 } else if (higher_prio(x, next)) {
125 /* x becomes the root of next */
126 x->next = next->next;
127 __bheap_link(x, next);
128 } else {
129 /* next becomes the root of x */
130 if (prev)
131 prev->next = next;
132 else
133 h1 = next;
134 __bheap_link(next, x);
135 x = next;
136 }
137 next = x->next;
138 }
139 heap->head = h1;
140}
141
142static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
143 struct bheap* heap)
144{
145 struct bheap_node *prev, *node;
146 __bheap_min(higher_prio, heap, &prev, &node);
147 if (!node)
148 return NULL;
149 if (prev)
150 prev->next = node->next;
151 else
152 heap->head = node->next;
153 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
154 return node;
155}
156
157/* insert (and reinitialize) a node into the heap */
158void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
159 struct bheap_node* node)
160{
161 struct bheap_node *min;
162 node->child = NULL;
163 node->parent = NULL;
164 node->next = NULL;
165 node->degree = 0;
166 if (heap->min && higher_prio(node, heap->min)) {
167 /* swap min cache */
168 min = heap->min;
169 min->child = NULL;
170 min->parent = NULL;
171 min->next = NULL;
172 min->degree = 0;
173 __bheap_union(higher_prio, heap, min);
174 heap->min = node;
175 } else
176 __bheap_union(higher_prio, heap, node);
177}
178
179void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
180{
181 struct bheap_node* min;
182 if (heap->min) {
183 min = heap->min;
184 heap->min = NULL;
185 bheap_insert(higher_prio, heap, min);
186 }
187}
188
189/* merge addition into target */
190void bheap_union(bheap_prio_t higher_prio,
191 struct bheap* target, struct bheap* addition)
192{
193 /* first insert any cached minima, if necessary */
194 bheap_uncache_min(higher_prio, target);
195 bheap_uncache_min(higher_prio, addition);
196 __bheap_union(higher_prio, target, addition->head);
197 /* this is a destructive merge */
198 addition->head = NULL;
199}
200
201struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
202 struct bheap* heap)
203{
204 if (!heap->min)
205 heap->min = __bheap_extract_min(higher_prio, heap);
206 return heap->min;
207}
208
209struct bheap_node* bheap_take(bheap_prio_t higher_prio,
210 struct bheap* heap)
211{
212 struct bheap_node *node;
213 if (!heap->min)
214 heap->min = __bheap_extract_min(higher_prio, heap);
215 node = heap->min;
216 heap->min = NULL;
217 if (node)
218 node->degree = NOT_IN_HEAP;
219 return node;
220}
221
222int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
223{
224 struct bheap_node *parent;
225 struct bheap_node** tmp_ref;
226 void* tmp;
227
228 /* bubble up */
229 parent = node->parent;
230 while (parent && higher_prio(node, parent)) {
231 /* swap parent and node */
232 tmp = parent->value;
233 parent->value = node->value;
234 node->value = tmp;
235 /* swap references */
236 *(parent->ref) = node;
237 *(node->ref) = parent;
238 tmp_ref = parent->ref;
239 parent->ref = node->ref;
240 node->ref = tmp_ref;
241 /* step up */
242 node = parent;
243 parent = node->parent;
244 }
245
246 return parent != NULL;
247}
248
249void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
250 struct bheap_node* node)
251{
252 struct bheap_node *parent, *prev, *pos;
253 struct bheap_node** tmp_ref;
254 void* tmp;
255
256 if (heap->min != node) {
257 /* bubble up */
258 parent = node->parent;
259 while (parent) {
260 /* swap parent and node */
261 tmp = parent->value;
262 parent->value = node->value;
263 node->value = tmp;
264 /* swap references */
265 *(parent->ref) = node;
266 *(node->ref) = parent;
267 tmp_ref = parent->ref;
268 parent->ref = node->ref;
269 node->ref = tmp_ref;
270 /* step up */
271 node = parent;
272 parent = node->parent;
273 }
274 /* now delete:
275 * first find prev */
276 prev = NULL;
277 pos = heap->head;
278 while (pos != node) {
279 BUG_ON(!pos); /* fell off the list -> deleted from wrong heap */
280 prev = pos;
281 pos = pos->next;
282 }
283 /* we have prev, now remove node */
284 if (prev)
285 prev->next = node->next;
286 else
287 heap->head = node->next;
288 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
289 } else
290 heap->min = NULL;
291 node->degree = NOT_IN_HEAP;
292}
293
294/* allocate a heap node for value and insert into the heap */
295int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
296 void* value, int gfp_flags)
297{
298 struct bheap_node* hn = bheap_node_alloc(gfp_flags);
299 if (likely(hn)) {
300 bheap_node_init(&hn, value);
301 bheap_insert(higher_prio, heap, hn);
302 }
303 return hn != NULL;
304}
305
306void* bheap_take_del(bheap_prio_t higher_prio,
307 struct bheap* heap)
308{
309 struct bheap_node* hn = bheap_take(higher_prio, heap);
310 void* ret = NULL;
311 if (hn) {
312 ret = hn->value;
313 bheap_node_free(hn);
314 }
315 return ret;
316}
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..d3ab34b92096
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,387 @@
1#include <litmus/binheap.h>
2
3/* Returns true of the root ancestor of node is the root of the given heap. */
4int binheap_is_in_this_heap(struct binheap_node *node,
5 struct binheap* heap)
6{
7 if(!binheap_is_in_heap(node)) {
8 return 0;
9 }
10
11 while(node->parent != NULL) {
12 node = node->parent;
13 }
14
15 return (node == heap->root);
16}
17
18
19/* Update the node reference pointers. Same logic as Litmus binomial heap. */
20static void __update_ref(struct binheap_node *parent,
21 struct binheap_node *child)
22{
23 *(parent->ref_ptr) = child;
24 *(child->ref_ptr) = parent;
25
26 swap(parent->ref_ptr, child->ref_ptr);
27}
28
29
30/* Swaps data between two nodes. */
31static void __binheap_swap(struct binheap_node *parent,
32 struct binheap_node *child)
33{
34 swap(parent->data, child->data);
35 __update_ref(parent, child);
36}
37
38
39/* Swaps memory and data between two nodes. Actual nodes swap instead of
40 * just data. Needed when we delete nodes from the heap.
41 */
42static void __binheap_swap_safe(struct binheap *handle,
43 struct binheap_node *a,
44 struct binheap_node *b)
45{
46 swap(a->data, b->data);
47 __update_ref(a, b);
48
49 if((a->parent != NULL) && (a->parent == b->parent)) {
50 /* special case: shared parent */
51 swap(a->parent->left, a->parent->right);
52 }
53 else {
54 /* Update pointers to swap parents. */
55
56 if(a->parent) {
57 if(a == a->parent->left) {
58 a->parent->left = b;
59 }
60 else {
61 a->parent->right = b;
62 }
63 }
64
65 if(b->parent) {
66 if(b == b->parent->left) {
67 b->parent->left = a;
68 }
69 else {
70 b->parent->right = a;
71 }
72 }
73
74 swap(a->parent, b->parent);
75 }
76
77 /* swap children */
78
79 if(a->left) {
80 a->left->parent = b;
81
82 if(a->right) {
83 a->right->parent = b;
84 }
85 }
86
87 if(b->left) {
88 b->left->parent = a;
89
90 if(b->right) {
91 b->right->parent = a;
92 }
93 }
94
95 swap(a->left, b->left);
96 swap(a->right, b->right);
97
98
99 /* update next/last/root pointers */
100
101 if(a == handle->next) {
102 handle->next = b;
103 }
104 else if(b == handle->next) {
105 handle->next = a;
106 }
107
108 if(a == handle->last) {
109 handle->last = b;
110 }
111 else if(b == handle->last) {
112 handle->last = a;
113 }
114
115 if(a == handle->root) {
116 handle->root = b;
117 }
118 else if(b == handle->root) {
119 handle->root = a;
120 }
121}
122
123
124/**
125 * Update the pointer to the last node in the complete binary tree.
126 * Called internally after the root node has been deleted.
127 */
128static void __binheap_update_last(struct binheap *handle)
129{
130 struct binheap_node *temp = handle->last;
131
132 /* find a "bend" in the tree. */
133 while(temp->parent && (temp == temp->parent->left)) {
134 temp = temp->parent;
135 }
136
137 /* step over to sibling if we're not at root */
138 if(temp->parent != NULL) {
139 temp = temp->parent->left;
140 }
141
142 /* now travel right as far as possible. */
143 while(temp->right != NULL) {
144 temp = temp->right;
145 }
146
147 /* take one step to the left if we're not at the bottom-most level. */
148 if(temp->left != NULL) {
149 temp = temp->left;
150 }
151
152 handle->last = temp;
153}
154
155
156/**
157 * Update the pointer to the node that will take the next inserted node.
158 * Called internally after a node has been inserted.
159 */
160static void __binheap_update_next(struct binheap *handle)
161{
162 struct binheap_node *temp = handle->next;
163
164 /* find a "bend" in the tree. */
165 while(temp->parent && (temp == temp->parent->right)) {
166 temp = temp->parent;
167 }
168
169 /* step over to sibling if we're not at root */
170 if(temp->parent != NULL) {
171 temp = temp->parent->right;
172 }
173
174 /* now travel left as far as possible. */
175 while(temp->left != NULL) {
176 temp = temp->left;
177 }
178
179 handle->next = temp;
180}
181
182
183
184/* bubble node up towards root */
185static void __binheap_bubble_up(struct binheap *handle,
186 struct binheap_node *node)
187{
188 /* let BINHEAP_POISON data bubble to the top */
189
190 while((node->parent != NULL) &&
191 ((node->data == BINHEAP_POISON) ||
192 handle->compare(node, node->parent))) {
193 __binheap_swap(node->parent, node);
194 node = node->parent;
195 }
196}
197
198
199/* bubble node down, swapping with min-child */
200static void __binheap_bubble_down(struct binheap *handle)
201{
202 struct binheap_node *node = handle->root;
203
204 while(node->left != NULL) {
205 if(node->right && handle->compare(node->right, node->left)) {
206 if(handle->compare(node->right, node)) {
207 __binheap_swap(node, node->right);
208 node = node->right;
209 }
210 else {
211 break;
212 }
213 }
214 else {
215 if(handle->compare(node->left, node)) {
216 __binheap_swap(node, node->left);
217 node = node->left;
218 }
219 else {
220 break;
221 }
222 }
223 }
224}
225
226
227void __binheap_add(struct binheap_node *new_node,
228 struct binheap *handle,
229 void *data)
230{
231 new_node->data = data;
232 new_node->ref = new_node;
233 new_node->ref_ptr = &(new_node->ref);
234
235 if(!binheap_empty(handle)) {
236 /* insert left side first */
237 if(handle->next->left == NULL) {
238 handle->next->left = new_node;
239 new_node->parent = handle->next;
240 new_node->left = NULL;
241 new_node->right = NULL;
242
243 handle->last = new_node;
244
245 __binheap_bubble_up(handle, new_node);
246 }
247 else {
248 /* left occupied. insert right. */
249 handle->next->right = new_node;
250 new_node->parent = handle->next;
251 new_node->left = NULL;
252 new_node->right = NULL;
253
254 handle->last = new_node;
255
256 __binheap_update_next(handle);
257 __binheap_bubble_up(handle, new_node);
258 }
259 }
260 else {
261 /* first node in heap */
262
263 new_node->parent = NULL;
264 new_node->left = NULL;
265 new_node->right = NULL;
266
267 handle->root = new_node;
268 handle->next = new_node;
269 handle->last = new_node;
270 }
271}
272
273
274/**
275 * Removes the root node from the heap. The node is removed after coalescing
276 * the binheap_node with its original data pointer at the root of the tree.
277 *
278 * The 'last' node in the tree is then swapped up to the root and bubbled
279 * down.
280 */
281void __binheap_delete_root(struct binheap *handle,
282 struct binheap_node *container)
283{
284 struct binheap_node *root = handle->root;
285
286 if(root != container) {
287 /* coalesce */
288 __binheap_swap_safe(handle, root, container);
289 root = container;
290 }
291
292 if(handle->last != root) {
293 /* swap 'last' node up to root and bubble it down. */
294
295 struct binheap_node *to_move = handle->last;
296
297 if(to_move->parent != root) {
298 handle->next = to_move->parent;
299
300 if(handle->next->right == to_move) {
301 /* disconnect from parent */
302 to_move->parent->right = NULL;
303 handle->last = handle->next->left;
304 }
305 else {
306 /* find new 'last' before we disconnect */
307 __binheap_update_last(handle);
308
309 /* disconnect from parent */
310 to_move->parent->left = NULL;
311 }
312 }
313 else {
314 /* 'last' is direct child of root */
315
316 handle->next = to_move;
317
318 if(to_move == to_move->parent->right) {
319 to_move->parent->right = NULL;
320 handle->last = to_move->parent->left;
321 }
322 else {
323 to_move->parent->left = NULL;
324 handle->last = to_move;
325 }
326 }
327 to_move->parent = NULL;
328
329 /* reconnect as root. We can't just swap data ptrs since root node
330 * may be freed after this function returns.
331 */
332 to_move->left = root->left;
333 to_move->right = root->right;
334 if(to_move->left != NULL) {
335 to_move->left->parent = to_move;
336 }
337 if(to_move->right != NULL) {
338 to_move->right->parent = to_move;
339 }
340
341 handle->root = to_move;
342
343 /* bubble down */
344 __binheap_bubble_down(handle);
345 }
346 else {
347 /* removing last node in tree */
348 handle->root = NULL;
349 handle->next = NULL;
350 handle->last = NULL;
351 }
352
353 /* mark as removed */
354 container->parent = BINHEAP_POISON;
355}
356
357
358/**
359 * Delete an arbitrary node. Bubble node to delete up to the root,
360 * and then delete to root.
361 */
362void __binheap_delete(struct binheap_node *node_to_delete,
363 struct binheap *handle)
364{
365 struct binheap_node *target = node_to_delete->ref;
366 void *temp_data = target->data;
367
368 /* temporarily set data to null to allow node to bubble up to the top. */
369 target->data = BINHEAP_POISON;
370
371 __binheap_bubble_up(handle, target);
372 __binheap_delete_root(handle, node_to_delete);
373
374 node_to_delete->data = temp_data; /* restore node data pointer */
375}
376
377
378/**
379 * Bubble up a node whose pointer has decreased in value.
380 */
381void __binheap_decrease(struct binheap_node *orig_node,
382 struct binheap *handle)
383{
384 struct binheap_node *target = orig_node->ref;
385
386 __binheap_bubble_up(handle, target);
387}
diff --git a/litmus/budget.c b/litmus/budget.c
new file mode 100644
index 000000000000..47bf78a19f87
--- /dev/null
+++ b/litmus/budget.c
@@ -0,0 +1,116 @@
1#include <linux/sched.h>
2#include <linux/percpu.h>
3#include <linux/hrtimer.h>
4
5#include <litmus/litmus.h>
6#include <litmus/preempt.h>
7
8#include <litmus/budget.h>
9
10struct enforcement_timer {
11 /* The enforcement timer is used to accurately police
12 * slice budgets. */
13 struct hrtimer timer;
14 int armed;
15};
16
17DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
18
19static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
20{
21 struct enforcement_timer* et = container_of(timer,
22 struct enforcement_timer,
23 timer);
24 unsigned long flags;
25
26 local_irq_save(flags);
27 TRACE("enforcement timer fired.\n");
28 et->armed = 0;
29 /* activate scheduler */
30 litmus_reschedule_local();
31 local_irq_restore(flags);
32
33 return HRTIMER_NORESTART;
34}
35
36/* assumes called with IRQs off */
37static void cancel_enforcement_timer(struct enforcement_timer* et)
38{
39 int ret;
40
41 TRACE("cancelling enforcement timer.\n");
42
43 /* Since interrupts are disabled and et->armed is only
44 * modified locally, we do not need any locks.
45 */
46
47 if (et->armed) {
48 ret = hrtimer_try_to_cancel(&et->timer);
49 /* Should never be inactive. */
50 BUG_ON(ret == 0);
51 /* Should never be running concurrently. */
52 BUG_ON(ret == -1);
53
54 et->armed = 0;
55 }
56}
57
58/* assumes called with IRQs off */
59static void arm_enforcement_timer(struct enforcement_timer* et,
60 struct task_struct* t)
61{
62 lt_t when_to_fire;
63 TRACE_TASK(t, "arming enforcement timer.\n");
64
65 WARN_ONCE(!hrtimer_is_hres_active(&et->timer),
66 KERN_ERR "WARNING: no high resolution timers available!?\n");
67
68 /* Calling this when there is no budget left for the task
69 * makes no sense, unless the task is non-preemptive. */
70 BUG_ON(budget_exhausted(t) && (!is_np(t)));
71
72 /* __hrtimer_start_range_ns() cancels the timer
73 * anyway, so we don't have to check whether it is still armed */
74
75 if (likely(!is_np(t))) {
76 when_to_fire = litmus_clock() + budget_remaining(t);
77 __hrtimer_start_range_ns(&et->timer,
78 ns_to_ktime(when_to_fire),
79 0 /* delta */,
80 HRTIMER_MODE_ABS_PINNED,
81 0 /* no wakeup */);
82 et->armed = 1;
83 }
84}
85
86
87/* expects to be called with IRQs off */
88void update_enforcement_timer(struct task_struct* t)
89{
90 struct enforcement_timer* et = this_cpu_ptr(&budget_timer);
91
92 if (t && budget_precisely_enforced(t)) {
93 /* Make sure we call into the scheduler when this budget
94 * expires. */
95 arm_enforcement_timer(et, t);
96 } else if (et->armed) {
97 /* Make sure we don't cause unnecessary interrupts. */
98 cancel_enforcement_timer(et);
99 }
100}
101
102
103static int __init init_budget_enforcement(void)
104{
105 int cpu;
106 struct enforcement_timer* et;
107
108 for (cpu = 0; cpu < NR_CPUS; cpu++) {
109 et = &per_cpu(budget_timer, cpu);
110 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
111 et->timer.function = on_enforcement_timeout;
112 }
113 return 0;
114}
115
116module_init(init_budget_enforcement);
diff --git a/litmus/clustered.c b/litmus/clustered.c
new file mode 100644
index 000000000000..de2aca2a271c
--- /dev/null
+++ b/litmus/clustered.c
@@ -0,0 +1,119 @@
1#include <linux/gfp.h>
2#include <linux/cpumask.h>
3#include <linux/list.h>
4#include <linux/cacheinfo.h>
5
6#include <litmus/debug_trace.h>
7#include <litmus/clustered.h>
8
9int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, unsigned int index)
10{
11 struct cpu_cacheinfo* info = get_cpu_cacheinfo(cpu);
12 struct cacheinfo *ci;
13
14 if (!info || index >= info->num_leaves) {
15 TRACE("no shared-cache CPUs: info=%d index=%u\n",
16 info != NULL, index);
17 return 1;
18 }
19
20 if (!info->info_list) {
21 TRACE("no shared-cache CPUs: no info_list (cpu\n");
22 }
23 ci = info->info_list + index;
24
25 cpumask_copy(mask, &ci->shared_cpu_map);
26
27 TRACE("get_shared: P%u@L%u -> %d siblings\n ", cpu, index, cpumask_weight(mask));
28
29 return 0;
30}
31
32int get_cluster_size(enum cache_level level)
33{
34 cpumask_var_t mask;
35 int ok;
36 int num_cpus;
37
38 if (level == GLOBAL_CLUSTER)
39 return num_online_cpus();
40 else {
41 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
42 return -ENOMEM;
43 /* assumes CPU 0 is representative of all CPUs */
44 ok = get_shared_cpu_map(mask, 0, level);
45 /* ok == 0 means we got the map; otherwise it's an invalid cache level */
46 if (ok == 0)
47 num_cpus = cpumask_weight(mask);
48 free_cpumask_var(mask);
49
50 if (ok == 0)
51 return num_cpus;
52 else
53 return -EINVAL;
54 }
55}
56
57int assign_cpus_to_clusters(enum cache_level level,
58 struct scheduling_cluster* clusters[],
59 unsigned int num_clusters,
60 struct cluster_cpu* cpus[],
61 unsigned int num_cpus)
62{
63 cpumask_var_t mask;
64 unsigned int i, free_cluster = 0, low_cpu;
65 int err = 0;
66
67 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
68 return -ENOMEM;
69
70 /* clear cluster pointers */
71 for (i = 0; i < num_cpus; i++) {
72 cpus[i]->id = i;
73 cpus[i]->cluster = NULL;
74 }
75
76 /* initialize clusters */
77 for (i = 0; i < num_clusters; i++) {
78 clusters[i]->id = i;
79 INIT_LIST_HEAD(&clusters[i]->cpus);
80 }
81
82 /* Assign each CPU. Two assumtions are made:
83 * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
84 * 2) All cpus that belong to some cluster are online.
85 */
86 for_each_online_cpu(i) {
87 /* get lowest-id CPU in cluster */
88 if (level != GLOBAL_CLUSTER) {
89 err = get_shared_cpu_map(mask, cpus[i]->id, level);
90 if (err != 0) {
91 /* ugh... wrong cache level? Either caller screwed up
92 * or the CPU topology is weird. */
93 printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
94 level, err);
95 err = -EINVAL;
96 goto out;
97 }
98 low_cpu = cpumask_first(mask);
99 } else
100 low_cpu = 0;
101 if (low_cpu == i) {
102 /* caller must provide an appropriate number of clusters */
103 BUG_ON(free_cluster >= num_clusters);
104
105 /* create new cluster */
106 cpus[i]->cluster = clusters[free_cluster++];
107 } else {
108 /* low_cpu points to the right cluster
109 * Assumption: low_cpu is actually online and was processed earlier. */
110 cpus[i]->cluster = cpus[low_cpu]->cluster;
111 }
112 /* enqueue in cpus list */
113 list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
114 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
115 }
116out:
117 free_cpumask_var(mask);
118 return err;
119}
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
new file mode 100644
index 000000000000..877f2786b4c8
--- /dev/null
+++ b/litmus/ctrldev.c
@@ -0,0 +1,160 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/module.h>
6
7#include <litmus/litmus.h>
8
9/* only one page for now, but we might want to add a RO version at some point */
10
11#define CTRL_NAME "litmus/ctrl"
12
13/* allocate t->rt_param.ctrl_page*/
14static int alloc_ctrl_page(struct task_struct *t)
15{
16 int err = 0;
17
18 /* only allocate if the task doesn't have one yet */
19 if (!tsk_rt(t)->ctrl_page) {
20 tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
21 if (!tsk_rt(t)->ctrl_page)
22 err = -ENOMEM;
23 /* will get de-allocated in task teardown */
24 TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
25 tsk_rt(t)->ctrl_page);
26 }
27 return err;
28}
29
30static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
31{
32 int err;
33
34 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
35
36 TRACE_CUR(CTRL_NAME
37 ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n",
38 tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start,
39 vma->vm_page_prot);
40
41 /* Map it into the vma. */
42 err = vm_insert_page(vma, vma->vm_start, ctrl);
43
44 if (err)
45 TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
46
47 return err;
48}
49
50static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
51{
52 TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
53 vma->vm_flags, vma->vm_page_prot);
54
55 TRACE_CUR(CTRL_NAME
56 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
57 (void*) vma->vm_start, (void*) vma->vm_end, vma,
58 vma->vm_private_data);
59}
60
61static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
62 struct vm_fault* vmf)
63{
64 TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__,
65 vma->vm_flags, vmf->pgoff);
66
67 /* This function should never be called, since all pages should have
68 * been mapped by mmap() already. */
69 WARN_ONCE(1, "Page faults should be impossible in the control page\n");
70
71 return VM_FAULT_SIGBUS;
72}
73
74static struct vm_operations_struct litmus_ctrl_vm_ops = {
75 .close = litmus_ctrl_vm_close,
76 .fault = litmus_ctrl_vm_fault,
77};
78
79static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
80{
81 int err = 0;
82
83 /* first make sure mapper knows what he's doing */
84
85 /* you can only get one page */
86 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
87 return -EINVAL;
88
89 /* you can only map the "first" page */
90 if (vma->vm_pgoff != 0)
91 return -EINVAL;
92
93 /* you can't share it with anyone */
94 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
95 return -EINVAL;
96
97 vma->vm_ops = &litmus_ctrl_vm_ops;
98 /* This mapping should not be kept across forks,
99 * cannot be expanded, and is not a "normal" page. */
100 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_READ | VM_WRITE;
101
102 /* We don't want the first write access to trigger a "minor" page fault
103 * to mark the page as dirty. This is transient, private memory, we
104 * don't care if it was touched or not. PAGE_SHARED means RW access, but
105 * not execute, and avoids copy-on-write behavior.
106 * See protection_map in mmap.c. */
107 vma->vm_page_prot = PAGE_SHARED;
108
109 err = alloc_ctrl_page(current);
110 if (!err)
111 err = map_ctrl_page(current, vma);
112
113 TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
114 __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
115
116 return err;
117}
118
119static struct file_operations litmus_ctrl_fops = {
120 .owner = THIS_MODULE,
121 .mmap = litmus_ctrl_mmap,
122};
123
124static struct miscdevice litmus_ctrl_dev = {
125 .name = CTRL_NAME,
126 .minor = MISC_DYNAMIC_MINOR,
127 .fops = &litmus_ctrl_fops,
128};
129
130static int __init init_litmus_ctrl_dev(void)
131{
132 int err;
133
134 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
135
136 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
137
138 BUILD_BUG_ON(offsetof(struct control_page, sched.raw)
139 != LITMUS_CP_OFFSET_SCHED);
140 BUILD_BUG_ON(offsetof(struct control_page, irq_count)
141 != LITMUS_CP_OFFSET_IRQ_COUNT);
142 BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start)
143 != LITMUS_CP_OFFSET_TS_SC_START);
144 BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start)
145 != LITMUS_CP_OFFSET_IRQ_SC_START);
146
147 printk("Initializing LITMUS^RT control device.\n");
148 err = misc_register(&litmus_ctrl_dev);
149 if (err)
150 printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
151 return err;
152}
153
154static void __exit exit_litmus_ctrl_dev(void)
155{
156 misc_deregister(&litmus_ctrl_dev);
157}
158
159module_init(init_litmus_ctrl_dev);
160module_exit(exit_litmus_ctrl_dev);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 000000000000..5aca2934a7b5
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,200 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/edf_common.h>
16
17#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
18#include <litmus/fpmath.h>
19#endif
20
21#ifdef CONFIG_EDF_TIE_BREAK_HASH
22#include <linux/hash.h>
23static inline long edf_hash(struct task_struct *t)
24{
25 /* pid is 32 bits, so normally we would shove that into the
26 * upper 32-bits and and put the job number in the bottom
27 * and hash the 64-bit number with hash_64(). Sadly,
28 * in testing, hash_64() doesn't distribute keys were the
29 * upper bits are close together (as would be the case with
30 * pids) and job numbers are equal (as would be the case with
31 * synchronous task sets with all relative deadlines equal).
32 *
33 * A 2006 Linux patch proposed the following solution
34 * (but for some reason it wasn't accepted...).
35 *
36 * At least this workaround works for 32-bit systems as well.
37 */
38 return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
39}
40#endif
41
42
43/* edf_higher_prio - returns true if first has a higher EDF priority
44 * than second. Deadline ties are broken by PID.
45 *
46 * both first and second may be NULL
47 */
48int edf_higher_prio(struct task_struct* first,
49 struct task_struct* second)
50{
51 struct task_struct *first_task = first;
52 struct task_struct *second_task = second;
53
54 /* There is no point in comparing a task to itself. */
55 if (first && first == second) {
56 TRACE_TASK(first,
57 "WARNING: pointless edf priority comparison.\n");
58 return 0;
59 }
60
61
62 /* check for NULL tasks */
63 if (!first || !second)
64 return first && !second;
65
66#ifdef CONFIG_LITMUS_LOCKING
67
68 /* Check for inherited priorities. Change task
69 * used for comparison in such a case.
70 */
71 if (unlikely(first->rt_param.inh_task))
72 first_task = first->rt_param.inh_task;
73 if (unlikely(second->rt_param.inh_task))
74 second_task = second->rt_param.inh_task;
75
76 /* Check for priority boosting. Tie-break by start of boosting.
77 */
78 if (unlikely(is_priority_boosted(first_task))) {
79 /* first_task is boosted, how about second_task? */
80 if (!is_priority_boosted(second_task) ||
81 lt_before(get_boost_start(first_task),
82 get_boost_start(second_task)))
83 return 1;
84 else
85 return 0;
86 } else if (unlikely(is_priority_boosted(second_task)))
87 /* second_task is boosted, first is not*/
88 return 0;
89
90#endif
91
92 if (earlier_deadline(first_task, second_task)) {
93 return 1;
94 }
95 else if (get_deadline(first_task) == get_deadline(second_task)) {
96 /* Need to tie break. All methods must set pid_break to 0/1 if
97 * first_task does not have priority over second_task.
98 */
99 int pid_break;
100
101
102#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
103 /* Tie break by lateness. Jobs with greater lateness get
104 * priority. This should spread tardiness across all tasks,
105 * especially in task sets where all tasks have the same
106 * period and relative deadlines.
107 */
108 if (get_lateness(first_task) > get_lateness(second_task)) {
109 return 1;
110 }
111 pid_break = (get_lateness(first_task) == get_lateness(second_task));
112
113
114#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
115 /* Tie break by lateness, normalized by relative deadline. Jobs with
116 * greater normalized lateness get priority.
117 *
118 * Note: Considered using the algebraically equivalent
119 * lateness(first)*relative_deadline(second) >
120 lateness(second)*relative_deadline(first)
121 * to avoid fixed-point math, but values are prone to overflow if inputs
122 * are on the order of several seconds, even in 64-bit.
123 */
124 fp_t fnorm = _frac(get_lateness(first_task),
125 get_rt_relative_deadline(first_task));
126 fp_t snorm = _frac(get_lateness(second_task),
127 get_rt_relative_deadline(second_task));
128 if (_gt(fnorm, snorm)) {
129 return 1;
130 }
131 pid_break = _eq(fnorm, snorm);
132
133
134#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
135 /* Tie break by comparing hashs of (pid, job#) tuple. There should be
136 * a 50% chance that first_task has a higher priority than second_task.
137 */
138 long fhash = edf_hash(first_task);
139 long shash = edf_hash(second_task);
140 if (fhash < shash) {
141 return 1;
142 }
143 pid_break = (fhash == shash);
144#else
145
146
147 /* CONFIG_EDF_PID_TIE_BREAK */
148 pid_break = 1; // fall through to tie-break by pid;
149#endif
150
151 /* Tie break by pid */
152 if(pid_break) {
153 if (first_task->pid < second_task->pid) {
154 return 1;
155 }
156 else if (first_task->pid == second_task->pid) {
157 /* If the PIDs are the same then the task with the
158 * inherited priority wins.
159 */
160 if (!second->rt_param.inh_task) {
161 return 1;
162 }
163 }
164 }
165 }
166 return 0; /* fall-through. prio(second_task) > prio(first_task) */
167}
168
169int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
170{
171 return edf_higher_prio(bheap2task(a), bheap2task(b));
172}
173
174void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
175 release_jobs_t release)
176{
177 rt_domain_init(rt, edf_ready_order, resched, release);
178}
179
180/* need_to_preempt - check whether the task t needs to be preempted
181 * call only with irqs disabled and with ready_lock acquired
182 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
183 */
184int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
185{
186 /* we need the read lock for edf_ready_queue */
187 /* no need to preempt if there is nothing pending */
188 if (!__jobs_pending(rt))
189 return 0;
190 /* we need to reschedule if t doesn't exist */
191 if (!t)
192 return 1;
193
194 /* NOTE: We cannot check for non-preemptibility since we
195 * don't know what address space we're currently in.
196 */
197
198 /* make sure to get non-rt stuff out of the way */
199 return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
200}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 000000000000..0ff54e41839c
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,308 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21extern struct fdso_ops generic_lock_ops;
22
23static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */
26 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* PCP_SEM */
30 &generic_lock_ops, /* DFLP_SEM */
31};
32
33static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
34{
35 BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
36
37 if (fdso_ops[type]->create)
38 return fdso_ops[type]->create(obj_ref, type, config);
39 else
40 return -EINVAL;
41}
42
43static void fdso_destroy(obj_type_t type, void* obj)
44{
45 fdso_ops[type]->destroy(type, obj);
46}
47
48static int fdso_open(struct od_table_entry* entry, void* __user config)
49{
50 if (fdso_ops[entry->obj->type]->open)
51 return fdso_ops[entry->obj->type]->open(entry, config);
52 else
53 return 0;
54}
55
56static int fdso_close(struct od_table_entry* entry)
57{
58 if (fdso_ops[entry->obj->type]->close)
59 return fdso_ops[entry->obj->type]->close(entry);
60 else
61 return 0;
62}
63
64/* inode must be locked already */
65static int alloc_inode_obj(struct inode_obj_id** obj_ref,
66 struct inode* inode,
67 obj_type_t type,
68 unsigned int id,
69 void* __user config)
70{
71 struct inode_obj_id* obj;
72 void* raw_obj;
73 int err;
74
75 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
76 if (!obj) {
77 return -ENOMEM;
78 }
79
80 err = fdso_create(&raw_obj, type, config);
81 if (err != 0) {
82 kfree(obj);
83 return err;
84 }
85
86 INIT_LIST_HEAD(&obj->list);
87 atomic_set(&obj->count, 1);
88 obj->type = type;
89 obj->id = id;
90 obj->obj = raw_obj;
91 obj->inode = inode;
92
93 list_add(&obj->list, &inode->i_obj_list);
94 atomic_inc(&inode->i_count);
95
96 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
97
98 *obj_ref = obj;
99 return 0;
100}
101
102/* inode must be locked already */
103static struct inode_obj_id* get_inode_obj(struct inode* inode,
104 obj_type_t type,
105 unsigned int id)
106{
107 struct list_head* pos;
108 struct inode_obj_id* obj = NULL;
109
110 list_for_each(pos, &inode->i_obj_list) {
111 obj = list_entry(pos, struct inode_obj_id, list);
112 if (obj->id == id && obj->type == type) {
113 atomic_inc(&obj->count);
114 return obj;
115 }
116 }
117 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
118 return NULL;
119}
120
121
122static void put_inode_obj(struct inode_obj_id* obj)
123{
124 struct inode* inode;
125 int let_go = 0;
126
127 inode = obj->inode;
128 if (atomic_dec_and_test(&obj->count)) {
129
130 mutex_lock(&inode->i_obj_mutex);
131 /* no new references can be obtained */
132 if (!atomic_read(&obj->count)) {
133 list_del(&obj->list);
134 fdso_destroy(obj->type, obj->obj);
135 kfree(obj);
136 let_go = 1;
137 }
138 mutex_unlock(&inode->i_obj_mutex);
139 if (let_go)
140 iput(inode);
141 }
142}
143
144static struct od_table_entry* get_od_entry(struct task_struct* t)
145{
146 struct od_table_entry* table;
147 int i;
148
149
150 table = t->od_table;
151 if (!table) {
152 table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
153 GFP_KERNEL);
154 t->od_table = table;
155 }
156
157 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
158 if (!table[i].used) {
159 table[i].used = 1;
160 return table + i;
161 }
162 return NULL;
163}
164
165static int put_od_entry(struct od_table_entry* od)
166{
167 put_inode_obj(od->obj);
168 od->used = 0;
169 return 0;
170}
171
172static long close_od_entry(struct od_table_entry *od)
173{
174 long ret;
175
176 /* Give the class a chance to reject the close. */
177 ret = fdso_close(od);
178 if (ret == 0)
179 ret = put_od_entry(od);
180
181 return ret;
182}
183
184void exit_od_table(struct task_struct* t)
185{
186 int i;
187
188 if (t->od_table) {
189 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
190 if (t->od_table[i].used)
191 close_od_entry(t->od_table + i);
192 kfree(t->od_table);
193 t->od_table = NULL;
194 }
195}
196
197static int do_sys_od_open(struct file* file, obj_type_t type, int id,
198 void* __user config)
199{
200 int idx = 0, err = 0;
201 struct inode* inode;
202 struct inode_obj_id* obj = NULL;
203 struct od_table_entry* entry;
204
205 inode = file_inode(file);
206
207 entry = get_od_entry(current);
208 if (!entry)
209 return -ENOMEM;
210
211 mutex_lock(&inode->i_obj_mutex);
212 obj = get_inode_obj(inode, type, id);
213 if (!obj)
214 err = alloc_inode_obj(&obj, inode, type, id, config);
215 if (err != 0) {
216 obj = NULL;
217 idx = err;
218 entry->used = 0;
219 } else {
220 entry->obj = obj;
221 entry->class = fdso_ops[type];
222 idx = entry - current->od_table;
223 }
224
225 mutex_unlock(&inode->i_obj_mutex);
226
227 /* open only if creation succeeded */
228 if (!err)
229 err = fdso_open(entry, config);
230 if (err < 0) {
231 /* The class rejected the open call.
232 * We need to clean up and tell user space.
233 */
234 if (obj)
235 put_od_entry(entry);
236 idx = err;
237 }
238
239 return idx;
240}
241
242
243struct od_table_entry* get_entry_for_od(int od)
244{
245 struct task_struct *t = current;
246
247 if (!t->od_table)
248 return NULL;
249 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
250 return NULL;
251 if (!t->od_table[od].used)
252 return NULL;
253 return t->od_table + od;
254}
255
256
257asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
258{
259 int ret = 0;
260 struct file* file;
261
262 /*
263 1) get file from fd, get inode from file
264 2) lock inode
265 3) try to lookup object
266 4) if not present create and enqueue object, inc inode refcnt
267 5) increment refcnt of object
268 6) alloc od_table_entry, setup ptrs
269 7) unlock inode
270 8) return offset in od_table as OD
271 */
272
273 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
274 ret = -EINVAL;
275 goto out;
276 }
277
278 file = fget(fd);
279 if (!file) {
280 ret = -EBADF;
281 goto out;
282 }
283
284 ret = do_sys_od_open(file, type, obj_id, config);
285
286 fput(file);
287
288out:
289 return ret;
290}
291
292
293asmlinkage long sys_od_close(int od)
294{
295 int ret = -EINVAL;
296 struct task_struct *t = current;
297
298 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
299 return ret;
300
301 if (!t->od_table || !t->od_table[od].used)
302 return ret;
303
304
305 ret = close_od_entry(t->od_table + od);
306
307 return ret;
308}
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 000000000000..964a4729deff
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,119 @@
1/*
2 * litmus/fp_common.c
3 *
4 * Common functions for fixed-priority scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/fp_common.h>
16
17/* fp_higher_prio - returns true if first has a higher static priority
18 * than second. Ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int fp_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (unlikely(first && first == second)) {
30 TRACE_TASK(first,
31 "WARNING: pointless FP priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40 if (!is_realtime(second_task))
41 return 1;
42
43#ifdef CONFIG_LITMUS_LOCKING
44
45 /* Check for inherited priorities. Change task
46 * used for comparison in such a case.
47 */
48 if (unlikely(first->rt_param.inh_task))
49 first_task = first->rt_param.inh_task;
50 if (unlikely(second->rt_param.inh_task))
51 second_task = second->rt_param.inh_task;
52
53 /* Check for priority boosting. Tie-break by start of boosting.
54 */
55 if (unlikely(is_priority_boosted(first_task))) {
56 /* first_task is boosted, how about second_task? */
57 if (is_priority_boosted(second_task))
58 /* break by priority point */
59 return lt_before(get_boost_start(first_task),
60 get_boost_start(second_task));
61 else
62 /* priority boosting wins. */
63 return 1;
64 } else if (unlikely(is_priority_boosted(second_task)))
65 /* second_task is boosted, first is not*/
66 return 0;
67
68#endif
69
70 /* Comparisons to itself are not expected; priority inheritance
71 * should also not cause this to happen. */
72 BUG_ON(first_task == second_task);
73
74 if (get_priority(first_task) < get_priority(second_task))
75 return 1;
76 else if (get_priority(first_task) == get_priority(second_task))
77 /* Break by PID. */
78 return first_task->pid < second_task->pid;
79 else
80 return 0;
81}
82
83int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
84{
85 return fp_higher_prio(bheap2task(a), bheap2task(b));
86}
87
88void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
89 release_jobs_t release)
90{
91 rt_domain_init(rt, fp_ready_order, resched, release);
92}
93
94/* need_to_preempt - check whether the task t needs to be preempted
95 */
96int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
97{
98 struct task_struct *pending;
99
100 pending = fp_prio_peek(q);
101
102 if (!pending)
103 return 0;
104 if (!t)
105 return 1;
106
107 /* make sure to get non-rt stuff out of the way */
108 return !is_realtime(t) || fp_higher_prio(pending, t);
109}
110
111void fp_prio_queue_init(struct fp_prio_queue* q)
112{
113 int i;
114
115 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
116 q->bitmask[i] = 0;
117 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
118 bheap_init(&q->queue[i]);
119}
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 000000000000..0dd36b9343d6
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,82 @@
1/* litmus/jobs.c - common job control code
2 */
3
4#include <linux/sched.h>
5
6#include <litmus/preempt.h>
7#include <litmus/litmus.h>
8#include <litmus/sched_plugin.h>
9#include <litmus/jobs.h>
10
11static inline void setup_release(struct task_struct *t, lt_t release)
12{
13 /* prepare next release */
14 t->rt_param.job_params.release = release;
15 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
16 t->rt_param.job_params.exec_time = 0;
17
18 /* update job sequence number */
19 t->rt_param.job_params.job_no++;
20}
21
22void prepare_for_next_period(struct task_struct *t)
23{
24 BUG_ON(!t);
25
26 /* Record lateness before we set up the next job's
27 * release and deadline. Lateness may be negative.
28 */
29 t->rt_param.job_params.lateness =
30 (long long)litmus_clock() -
31 (long long)t->rt_param.job_params.deadline;
32
33 if (tsk_rt(t)->sporadic_release) {
34 TRACE_TASK(t, "sporadic release at %llu\n",
35 tsk_rt(t)->sporadic_release_time);
36 /* sporadic release */
37 setup_release(t, tsk_rt(t)->sporadic_release_time);
38 tsk_rt(t)->sporadic_release = 0;
39 } else {
40 /* periodic release => add period */
41 setup_release(t, get_release(t) + get_rt_period(t));
42 }
43}
44
45void release_at(struct task_struct *t, lt_t start)
46{
47 BUG_ON(!t);
48 setup_release(t, start);
49 tsk_rt(t)->completed = 0;
50}
51
52long default_wait_for_release_at(lt_t release_time)
53{
54 struct task_struct *t = current;
55 unsigned long flags;
56
57 local_irq_save(flags);
58 tsk_rt(t)->sporadic_release_time = release_time;
59 smp_wmb();
60 tsk_rt(t)->sporadic_release = 1;
61 local_irq_restore(flags);
62
63 return litmus->complete_job();
64}
65
66
67/*
68 * Deactivate current task until the beginning of the next period.
69 */
70long complete_job(void)
71{
72 preempt_disable();
73 TRACE_CUR("job completion indicated at %llu\n", litmus_clock());
74 /* Mark that we do not excute anymore */
75 tsk_rt(current)->completed = 1;
76 /* call schedule, this will return when a new job arrives
77 * it also takes care of preparing for the next release
78 */
79 litmus_reschedule_local();
80 preempt_enable();
81 return 0;
82}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..703360c68609
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,681 @@
1/*
2 * litmus.c -- Implementation of the LITMUS syscalls,
3 * the LITMUS intialization code,
4 * and the procfs interface..
5 */
6#include <asm/uaccess.h>
7#include <linux/uaccess.h>
8#include <linux/sysrq.h>
9#include <linux/sched.h>
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/reboot.h>
13#include <linux/stop_machine.h>
14#include <linux/sched/rt.h>
15#include <linux/rwsem.h>
16#include <linux/interrupt.h>
17
18#include <litmus/litmus.h>
19#include <litmus/bheap.h>
20#include <litmus/trace.h>
21#include <litmus/rt_domain.h>
22#include <litmus/litmus_proc.h>
23#include <litmus/sched_trace.h>
24
25#ifdef CONFIG_SCHED_CPU_AFFINITY
26#include <litmus/affinity.h>
27#endif
28
29/* Number of RT tasks that exist in the system */
30atomic_t rt_task_count = ATOMIC_INIT(0);
31
32#ifdef CONFIG_RELEASE_MASTER
33/* current master CPU for handling timer IRQs */
34atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
35#endif
36
37static struct kmem_cache * bheap_node_cache;
38extern struct kmem_cache * release_heap_cache;
39
40struct bheap_node* bheap_node_alloc(int gfp_flags)
41{
42 return kmem_cache_alloc(bheap_node_cache, gfp_flags);
43}
44
45void bheap_node_free(struct bheap_node* hn)
46{
47 kmem_cache_free(bheap_node_cache, hn);
48}
49
50struct release_heap* release_heap_alloc(int gfp_flags);
51void release_heap_free(struct release_heap* rh);
52
53/**
54 * Get the quantum alignment as a cmdline option.
55 * Default is staggered quanta, as this results in lower overheads.
56 */
57static bool aligned_quanta = 0;
58module_param(aligned_quanta, bool, 0644);
59
60u64 cpu_stagger_offset(int cpu)
61{
62 u64 offset = 0;
63
64 if (!aligned_quanta) {
65 offset = LITMUS_QUANTUM_LENGTH_NS;
66 do_div(offset, num_possible_cpus());
67 offset *= cpu;
68 }
69 return offset;
70}
71
72/*
73 * sys_set_task_rt_param
74 * @pid: Pid of the task which scheduling parameters must be changed
75 * @param: New real-time extension parameters such as the execution cost and
76 * period
77 * Syscall for manipulating with task rt extension params
78 * Returns EFAULT if param is NULL.
79 * ESRCH if pid is not corrsponding
80 * to a valid task.
81 * EINVAL if either period or execution cost is <=0
82 * EPERM if pid is a real-time task
83 * 0 if success
84 *
85 * Only non-real-time tasks may be configured with this system call
86 * to avoid races with the scheduler. In practice, this means that a
87 * task's parameters must be set _before_ calling sys_prepare_rt_task()
88 *
89 * find_task_by_vpid() assumes that we are in the same namespace of the
90 * target.
91 */
92asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
93{
94 struct rt_task tp;
95 struct task_struct *target;
96 int retval = -EINVAL;
97
98 printk("Setting up rt task parameters for process %d.\n", pid);
99
100 if (pid < 0 || param == 0) {
101 goto out;
102 }
103 if (copy_from_user(&tp, param, sizeof(tp))) {
104 retval = -EFAULT;
105 goto out;
106 }
107
108 /* Task search and manipulation must be protected */
109 read_lock_irq(&tasklist_lock);
110 rcu_read_lock();
111 if (!(target = find_task_by_vpid(pid))) {
112 retval = -ESRCH;
113 rcu_read_unlock();
114 goto out_unlock;
115 }
116 rcu_read_unlock();
117
118 if (is_realtime(target)) {
119 /* The task is already a real-time task.
120 * We cannot not allow parameter changes at this point.
121 */
122 retval = -EBUSY;
123 goto out_unlock;
124 }
125
126 /* set relative deadline to be implicit if left unspecified */
127 if (tp.relative_deadline == 0)
128 tp.relative_deadline = tp.period;
129
130 if (tp.exec_cost <= 0)
131 goto out_unlock;
132 if (tp.period <= 0)
133 goto out_unlock;
134 if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
135 {
136 printk(KERN_INFO "litmus: real-time task %d rejected "
137 "because task density > 1.0\n", pid);
138 goto out_unlock;
139 }
140 if (tp.cls != RT_CLASS_HARD &&
141 tp.cls != RT_CLASS_SOFT &&
142 tp.cls != RT_CLASS_BEST_EFFORT)
143 {
144 printk(KERN_INFO "litmus: real-time task %d rejected "
145 "because its class is invalid\n", pid);
146 goto out_unlock;
147 }
148 if (tp.budget_policy != NO_ENFORCEMENT &&
149 tp.budget_policy != QUANTUM_ENFORCEMENT &&
150 tp.budget_policy != PRECISE_ENFORCEMENT)
151 {
152 printk(KERN_INFO "litmus: real-time task %d rejected "
153 "because unsupported budget enforcement policy "
154 "specified (%d)\n",
155 pid, tp.budget_policy);
156 goto out_unlock;
157 }
158
159 target->rt_param.task_params = tp;
160
161 retval = 0;
162 out_unlock:
163 read_unlock_irq(&tasklist_lock);
164 out:
165 return retval;
166}
167
168/*
169 * Getter of task's RT params
170 * returns EINVAL if param or pid is NULL
171 * returns ESRCH if pid does not correspond to a valid task
172 * returns EFAULT if copying of parameters has failed.
173 *
174 * find_task_by_vpid() assumes that we are in the same namespace of the
175 * target.
176 */
177asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
178{
179 int retval = -EINVAL;
180 struct task_struct *source;
181 struct rt_task lp;
182 if (param == 0 || pid < 0)
183 goto out;
184 read_lock(&tasklist_lock);
185 if (!(source = find_task_by_vpid(pid))) {
186 retval = -ESRCH;
187 goto out_unlock;
188 }
189 lp = source->rt_param.task_params;
190 read_unlock(&tasklist_lock);
191 /* Do copying outside the lock */
192 retval =
193 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
194 return retval;
195 out_unlock:
196 read_unlock(&tasklist_lock);
197 out:
198 return retval;
199
200}
201
202/*
203 * This is the crucial function for periodic task implementation,
204 * It checks if a task is periodic, checks if such kind of sleep
205 * is permitted and calls plugin-specific sleep, which puts the
206 * task into a wait array.
207 * returns 0 on successful wakeup
208 * returns EPERM if current conditions do not permit such sleep
209 * returns EINVAL if current task is not able to go to sleep
210 */
211asmlinkage long sys_complete_job(void)
212{
213 int retval = -EPERM;
214 if (!is_realtime(current)) {
215 retval = -EINVAL;
216 goto out;
217 }
218 /* Task with negative or zero period cannot sleep */
219 if (get_rt_period(current) <= 0) {
220 retval = -EINVAL;
221 goto out;
222 }
223 /* The plugin has to put the task into an
224 * appropriate queue and call schedule
225 */
226 retval = litmus->complete_job();
227 out:
228 return retval;
229}
230
231/* This is an "improved" version of sys_complete_job that
232 * addresses the problem of unintentionally missing a job after
233 * an overrun.
234 *
235 * returns 0 on successful wakeup
236 * returns EPERM if current conditions do not permit such sleep
237 * returns EINVAL if current task is not able to go to sleep
238 */
239asmlinkage long sys_wait_for_job_release(unsigned int job)
240{
241 int retval = -EPERM;
242 if (!is_realtime(current)) {
243 retval = -EINVAL;
244 goto out;
245 }
246
247 /* Task with negative or zero period cannot sleep */
248 if (get_rt_period(current) <= 0) {
249 retval = -EINVAL;
250 goto out;
251 }
252
253 retval = 0;
254
255 /* first wait until we have "reached" the desired job
256 *
257 * This implementation has at least two problems:
258 *
259 * 1) It doesn't gracefully handle the wrap around of
260 * job_no. Since LITMUS is a prototype, this is not much
261 * of a problem right now.
262 *
263 * 2) It is theoretically racy if a job release occurs
264 * between checking job_no and calling sleep_next_period().
265 * A proper solution would requiring adding another callback
266 * in the plugin structure and testing the condition with
267 * interrupts disabled.
268 *
269 * FIXME: At least problem 2 should be taken care of eventually.
270 */
271 while (!retval && job > current->rt_param.job_params.job_no)
272 /* If the last job overran then job <= job_no and we
273 * don't send the task to sleep.
274 */
275 retval = litmus->complete_job();
276 out:
277 return retval;
278}
279
280/* This is a helper syscall to query the current job sequence number.
281 *
282 * returns 0 on successful query
283 * returns EPERM if task is not a real-time task.
284 * returns EFAULT if &job is not a valid pointer.
285 */
286asmlinkage long sys_query_job_no(unsigned int __user *job)
287{
288 int retval = -EPERM;
289 if (is_realtime(current))
290 retval = put_user(current->rt_param.job_params.job_no, job);
291
292 return retval;
293}
294
295/* sys_null_call() is only used for determining raw system call
296 * overheads (kernel entry, kernel exit). It has no useful side effects.
297 * If ts is non-NULL, then the current Feather-Trace time is recorded.
298 */
299asmlinkage long sys_null_call(cycles_t __user *ts)
300{
301 long ret = 0;
302 cycles_t now;
303
304 if (ts) {
305 now = get_cycles();
306 ret = put_user(now, ts);
307 }
308
309 return ret;
310}
311
312/* p is a real-time task. Re-init its state as a best-effort task. */
313static void reinit_litmus_state(struct task_struct* p, int restore)
314{
315 struct rt_task user_config = {};
316 void* ctrl_page = NULL;
317
318 if (restore) {
319 /* Safe user-space provided configuration data.
320 * and allocated page. */
321 user_config = p->rt_param.task_params;
322 ctrl_page = p->rt_param.ctrl_page;
323 }
324
325 /* We probably should not be inheriting any task's priority
326 * at this point in time.
327 */
328 WARN_ON(p->rt_param.inh_task);
329
330 /* Cleanup everything else. */
331 memset(&p->rt_param, 0, sizeof(p->rt_param));
332
333 /* Restore preserved fields. */
334 if (restore) {
335 p->rt_param.task_params = user_config;
336 p->rt_param.ctrl_page = ctrl_page;
337 }
338}
339
340long litmus_admit_task(struct task_struct* tsk)
341{
342 long retval = 0;
343
344 BUG_ON(is_realtime(tsk));
345
346 tsk_rt(tsk)->heap_node = NULL;
347 tsk_rt(tsk)->rel_heap = NULL;
348
349 if (get_rt_relative_deadline(tsk) == 0 ||
350 get_exec_cost(tsk) >
351 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
352 TRACE_TASK(tsk,
353 "litmus admit: invalid task parameters "
354 "(e = %lu, p = %lu, d = %lu)\n",
355 get_exec_cost(tsk), get_rt_period(tsk),
356 get_rt_relative_deadline(tsk));
357 retval = -EINVAL;
358 goto out;
359 }
360
361 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
362
363 /* allocate heap node for this task */
364 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
365 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
366
367 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
368 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
369
370 retval = -ENOMEM;
371 goto out;
372 } else {
373 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
374 }
375
376 preempt_disable();
377
378 retval = litmus->admit_task(tsk);
379
380 if (!retval) {
381 sched_trace_task_name(tsk);
382 sched_trace_task_param(tsk);
383 atomic_inc(&rt_task_count);
384 }
385
386 preempt_enable();
387
388out:
389 if (retval) {
390 if (tsk_rt(tsk)->heap_node)
391 bheap_node_free(tsk_rt(tsk)->heap_node);
392 if (tsk_rt(tsk)->rel_heap)
393 release_heap_free(tsk_rt(tsk)->rel_heap);
394 }
395 return retval;
396}
397
398void litmus_clear_state(struct task_struct* tsk)
399{
400 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
401 bheap_node_free(tsk_rt(tsk)->heap_node);
402 release_heap_free(tsk_rt(tsk)->rel_heap);
403
404 atomic_dec(&rt_task_count);
405 reinit_litmus_state(tsk, 1);
406}
407
408/* called from sched_setscheduler() */
409void litmus_exit_task(struct task_struct* tsk)
410{
411 if (is_realtime(tsk)) {
412 sched_trace_task_completion(tsk, 1);
413
414 litmus->task_exit(tsk);
415 }
416}
417
418static DECLARE_RWSEM(plugin_switch_mutex);
419
420void litmus_plugin_switch_disable(void)
421{
422 down_read(&plugin_switch_mutex);
423}
424
425void litmus_plugin_switch_enable(void)
426{
427 up_read(&plugin_switch_mutex);
428}
429
430static int __do_plugin_switch(struct sched_plugin* plugin)
431{
432 int ret;
433
434
435 /* don't switch if there are active real-time tasks */
436 if (atomic_read(&rt_task_count) == 0) {
437 TRACE("deactivating plugin %s\n", litmus->plugin_name);
438 ret = litmus->deactivate_plugin();
439 if (0 != ret)
440 goto out;
441
442 TRACE("activating plugin %s\n", plugin->plugin_name);
443 ret = plugin->activate_plugin();
444 if (0 != ret) {
445 printk(KERN_INFO "Can't activate %s (%d).\n",
446 plugin->plugin_name, ret);
447 plugin = &linux_sched_plugin;
448 }
449
450 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
451 litmus = plugin;
452 } else
453 ret = -EBUSY;
454out:
455 TRACE("do_plugin_switch() => %d\n", ret);
456 return ret;
457}
458
459static atomic_t ready_to_switch;
460
461static int do_plugin_switch(void *_plugin)
462{
463 unsigned long flags;
464 int ret = 0;
465
466 local_save_flags(flags);
467 local_irq_disable();
468 hard_irq_disable();
469
470 if (atomic_dec_and_test(&ready_to_switch))
471 {
472 ret = __do_plugin_switch((struct sched_plugin*) _plugin);
473 atomic_set(&ready_to_switch, INT_MAX);
474 }
475
476 do {
477 cpu_relax();
478 } while (atomic_read(&ready_to_switch) != INT_MAX);
479
480 local_irq_restore(flags);
481 return ret;
482}
483
484/* Switching a plugin in use is tricky.
485 * We must watch out that no real-time tasks exists
486 * (and that none is created in parallel) and that the plugin is not
487 * currently in use on any processor (in theory).
488 */
489int switch_sched_plugin(struct sched_plugin* plugin)
490{
491 int err;
492 struct domain_proc_info* domain_info;
493
494 BUG_ON(!plugin);
495
496 if (atomic_read(&rt_task_count) == 0) {
497 down_write(&plugin_switch_mutex);
498
499 deactivate_domain_proc();
500
501 get_online_cpus();
502 atomic_set(&ready_to_switch, num_online_cpus());
503 err = stop_cpus(cpu_online_mask, do_plugin_switch, plugin);
504 put_online_cpus();
505
506 if (!litmus->get_domain_proc_info(&domain_info))
507 activate_domain_proc(domain_info);
508
509 up_write(&plugin_switch_mutex);
510 return err;
511 } else
512 return -EBUSY;
513}
514
515/* Called upon fork.
516 * p is the newly forked task.
517 */
518void litmus_fork(struct task_struct* p)
519{
520 if (is_realtime(p)) {
521 /* clean out any litmus related state, don't preserve anything */
522 reinit_litmus_state(p, 0);
523 /* Don't let the child be a real-time task. */
524 p->sched_reset_on_fork = 1;
525 } else
526 /* non-rt tasks might have ctrl_page set */
527 tsk_rt(p)->ctrl_page = NULL;
528
529 /* od tables are never inherited across a fork */
530 p->od_table = NULL;
531}
532
533/* Called upon execve().
534 * current is doing the exec.
535 * Don't let address space specific stuff leak.
536 */
537void litmus_exec(void)
538{
539 struct task_struct* p = current;
540
541 if (is_realtime(p)) {
542 WARN_ON(p->rt_param.inh_task);
543 if (tsk_rt(p)->ctrl_page) {
544 free_page((unsigned long) tsk_rt(p)->ctrl_page);
545 tsk_rt(p)->ctrl_page = NULL;
546 }
547 }
548}
549
550/* Called when dead_tsk is being deallocated
551 */
552void exit_litmus(struct task_struct *dead_tsk)
553{
554 /* We also allow non-RT tasks to
555 * allocate control pages to allow
556 * measurements with non-RT tasks.
557 * So check if we need to free the page
558 * in any case.
559 */
560 if (tsk_rt(dead_tsk)->ctrl_page) {
561 TRACE_TASK(dead_tsk,
562 "freeing ctrl_page %p\n",
563 tsk_rt(dead_tsk)->ctrl_page);
564 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
565 }
566
567 /* Tasks should not be real-time tasks any longer at this point. */
568 BUG_ON(is_realtime(dead_tsk));
569}
570
571void litmus_do_exit(struct task_struct *exiting_tsk)
572{
573 /* This task called do_exit(), but is still a real-time task. To avoid
574 * complications later, we force it to be a non-real-time task now. */
575
576 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
577
578 TRACE_TASK(exiting_tsk, "exiting, demoted to SCHED_FIFO\n");
579 sched_setscheduler_nocheck(exiting_tsk, SCHED_FIFO, &param);
580}
581
582void litmus_dealloc(struct task_struct *tsk)
583{
584 /* tsk is no longer a real-time task */
585 TRACE_TASK(tsk, "Deallocating real-time task data\n");
586 litmus->task_cleanup(tsk);
587 litmus_clear_state(tsk);
588}
589
590/* move current non-RT task to a specific CPU */
591int litmus_be_migrate_to(int cpu)
592{
593 struct cpumask single_cpu_aff;
594
595 cpumask_clear(&single_cpu_aff);
596 cpumask_set_cpu(cpu, &single_cpu_aff);
597 return sched_setaffinity(current->pid, &single_cpu_aff);
598}
599
600#ifdef CONFIG_MAGIC_SYSRQ
601int sys_kill(int pid, int sig);
602
603static void sysrq_handle_kill_rt_tasks(int key)
604{
605 struct task_struct *t;
606 read_lock(&tasklist_lock);
607 for_each_process(t) {
608 if (is_realtime(t)) {
609 sys_kill(t->pid, SIGKILL);
610 }
611 }
612 read_unlock(&tasklist_lock);
613}
614
615static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
616 .handler = sysrq_handle_kill_rt_tasks,
617 .help_msg = "quit-rt-tasks(X)",
618 .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
619};
620#endif
621
622extern struct sched_plugin linux_sched_plugin;
623
624static int litmus_shutdown_nb(struct notifier_block *unused1,
625 unsigned long unused2, void *unused3)
626{
627 /* Attempt to switch back to regular Linux scheduling.
628 * Forces the active plugin to clean up.
629 */
630 if (litmus != &linux_sched_plugin) {
631 int ret = switch_sched_plugin(&linux_sched_plugin);
632 if (ret) {
633 printk("Auto-shutdown of active Litmus plugin failed.\n");
634 }
635 }
636 return NOTIFY_DONE;
637}
638
639static struct notifier_block shutdown_notifier = {
640 .notifier_call = litmus_shutdown_nb,
641};
642
643static int __init _init_litmus(void)
644{
645 /* Common initializers,
646 * mode change lock is used to enforce single mode change
647 * operation.
648 */
649 printk("Starting LITMUS^RT kernel\n");
650
651 register_sched_plugin(&linux_sched_plugin);
652
653 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
654 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
655
656#ifdef CONFIG_MAGIC_SYSRQ
657 /* offer some debugging help */
658 if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
659 printk("Registered kill rt tasks magic sysrq.\n");
660 else
661 printk("Could not register kill rt tasks magic sysrq.\n");
662#endif
663
664 init_litmus_proc();
665
666 register_reboot_notifier(&shutdown_notifier);
667
668 return 0;
669}
670
671static void _exit_litmus(void)
672{
673 unregister_reboot_notifier(&shutdown_notifier);
674
675 exit_litmus_proc();
676 kmem_cache_destroy(bheap_node_cache);
677 kmem_cache_destroy(release_heap_cache);
678}
679
680module_init(_init_litmus);
681module_exit(_exit_litmus);
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
new file mode 100644
index 000000000000..2ef1669eff17
--- /dev/null
+++ b/litmus/litmus_proc.c
@@ -0,0 +1,573 @@
1/*
2 * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
3 */
4
5#include <linux/sched.h>
6#include <linux/slab.h>
7#include <linux/uaccess.h>
8#include <linux/seq_file.h>
9
10#include <litmus/litmus.h>
11#include <litmus/litmus_proc.h>
12
13#include <litmus/clustered.h>
14
15/* in litmus/litmus.c */
16extern atomic_t rt_task_count;
17
18static struct proc_dir_entry *litmus_dir = NULL,
19 *curr_file = NULL,
20 *stat_file = NULL,
21 *plugs_dir = NULL,
22#ifdef CONFIG_RELEASE_MASTER
23 *release_master_file = NULL,
24#endif
25 *plugs_file = NULL,
26 *domains_dir = NULL,
27 *cpus_dir = NULL;
28
29
30/* in litmus/sync.c */
31int count_tasks_waiting_for_release(void);
32
33static int litmus_stats_proc_show(struct seq_file *m, void *v)
34{
35 seq_printf(m,
36 "real-time tasks = %d\n"
37 "ready for release = %d\n",
38 atomic_read(&rt_task_count),
39 count_tasks_waiting_for_release());
40 return 0;
41}
42
43static int litmus_stats_proc_open(struct inode *inode, struct file *file)
44{
45 return single_open(file, litmus_stats_proc_show, PDE_DATA(inode));
46}
47
48static const struct file_operations litmus_stats_proc_fops = {
49 .open = litmus_stats_proc_open,
50 .read = seq_read,
51 .llseek = seq_lseek,
52 .release = single_release,
53};
54
55
56static int litmus_loaded_proc_show(struct seq_file *m, void *v)
57{
58 print_sched_plugins(m);
59 return 0;
60}
61
62static int litmus_loaded_proc_open(struct inode *inode, struct file *file)
63{
64 return single_open(file, litmus_loaded_proc_show, PDE_DATA(inode));
65}
66
67static const struct file_operations litmus_loaded_proc_fops = {
68 .open = litmus_loaded_proc_open,
69 .read = seq_read,
70 .llseek = seq_lseek,
71 .release = single_release,
72};
73
74
75
76
77/* in litmus/litmus.c */
78int switch_sched_plugin(struct sched_plugin*);
79
80static ssize_t litmus_active_proc_write(struct file *file,
81 const char __user *buffer, size_t count,
82 loff_t *ppos)
83{
84 char name[65];
85 struct sched_plugin* found;
86 ssize_t ret = -EINVAL;
87 int err;
88
89
90 ret = copy_and_chomp(name, sizeof(name), buffer, count);
91 if (ret < 0)
92 return ret;
93
94 found = find_sched_plugin(name);
95
96 if (found) {
97 err = switch_sched_plugin(found);
98 if (err) {
99 printk(KERN_INFO "Could not switch plugin: %d\n", err);
100 ret = err;
101 }
102 } else {
103 printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
104 ret = -ESRCH;
105 }
106
107 return ret;
108}
109
110static int litmus_active_proc_show(struct seq_file *m, void *v)
111{
112 seq_printf(m, "%s\n", litmus->plugin_name);
113 return 0;
114}
115
116static int litmus_active_proc_open(struct inode *inode, struct file *file)
117{
118 return single_open(file, litmus_active_proc_show, PDE_DATA(inode));
119}
120
121static const struct file_operations litmus_active_proc_fops = {
122 .open = litmus_active_proc_open,
123 .read = seq_read,
124 .llseek = seq_lseek,
125 .release = single_release,
126 .write = litmus_active_proc_write,
127};
128
129
130#ifdef CONFIG_RELEASE_MASTER
131static ssize_t litmus_release_master_proc_write(
132 struct file *file,
133 const char __user *buffer, size_t count,
134 loff_t *ppos)
135{
136 int cpu, err, online = 0;
137 char msg[64];
138 ssize_t len;
139
140 len = copy_and_chomp(msg, sizeof(msg), buffer, count);
141
142 if (len < 0)
143 return len;
144
145 if (strcmp(msg, "NO_CPU") == 0)
146 atomic_set(&release_master_cpu, NO_CPU);
147 else {
148 err = sscanf(msg, "%d", &cpu);
149 if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
150 atomic_set(&release_master_cpu, cpu);
151 } else {
152 TRACE("invalid release master: '%s' "
153 "(err:%d cpu:%d online:%d)\n",
154 msg, err, cpu, online);
155 len = -EINVAL;
156 }
157 }
158 return len;
159}
160
161static int litmus_release_master_proc_show(struct seq_file *m, void *v)
162{
163 int master;
164 master = atomic_read(&release_master_cpu);
165 if (master == NO_CPU)
166 seq_printf(m, "NO_CPU\n");
167 else
168 seq_printf(m, "%d\n", master);
169 return 0;
170}
171
172static int litmus_release_master_proc_open(struct inode *inode, struct file *file)
173{
174 return single_open(file, litmus_release_master_proc_show, PDE_DATA(inode));
175}
176
177static const struct file_operations litmus_release_master_proc_fops = {
178 .open = litmus_release_master_proc_open,
179 .read = seq_read,
180 .llseek = seq_lseek,
181 .release = single_release,
182 .write = litmus_release_master_proc_write,
183};
184#endif
185
186int __init init_litmus_proc(void)
187{
188 litmus_dir = proc_mkdir("litmus", NULL);
189 if (!litmus_dir) {
190 printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
191 return -ENOMEM;
192 }
193
194 curr_file = proc_create("active_plugin", 0644, litmus_dir,
195 &litmus_active_proc_fops);
196
197 if (!curr_file) {
198 printk(KERN_ERR "Could not allocate active_plugin "
199 "procfs entry.\n");
200 return -ENOMEM;
201 }
202
203#ifdef CONFIG_RELEASE_MASTER
204 release_master_file = proc_create("release_master", 0644, litmus_dir,
205 &litmus_release_master_proc_fops);
206 if (!release_master_file) {
207 printk(KERN_ERR "Could not allocate release_master "
208 "procfs entry.\n");
209 return -ENOMEM;
210 }
211#endif
212
213 stat_file = proc_create("stats", 0444, litmus_dir, &litmus_stats_proc_fops);
214
215 plugs_dir = proc_mkdir("plugins", litmus_dir);
216 if (!plugs_dir){
217 printk(KERN_ERR "Could not allocate plugins directory "
218 "procfs entry.\n");
219 return -ENOMEM;
220 }
221
222 plugs_file = proc_create("loaded", 0444, plugs_dir,
223 &litmus_loaded_proc_fops);
224
225 domains_dir = proc_mkdir("domains", litmus_dir);
226 if (!domains_dir) {
227 printk(KERN_ERR "Could not allocate domains directory "
228 "procfs entry.\n");
229 return -ENOMEM;
230 }
231
232 cpus_dir = proc_mkdir("cpus", litmus_dir);
233 if (!cpus_dir) {
234 printk(KERN_ERR "Could not allocate cpus directory "
235 "procfs entry.\n");
236 return -ENOMEM;
237 }
238
239 return 0;
240}
241
242void exit_litmus_proc(void)
243{
244 if (cpus_dir || domains_dir) {
245 deactivate_domain_proc();
246 if (cpus_dir)
247 remove_proc_entry("cpus", litmus_dir);
248 if (domains_dir)
249 remove_proc_entry("domains", litmus_dir);
250 }
251 if (plugs_file)
252 remove_proc_entry("loaded", plugs_dir);
253 if (plugs_dir)
254 remove_proc_entry("plugins", litmus_dir);
255 if (stat_file)
256 remove_proc_entry("stats", litmus_dir);
257 if (curr_file)
258 remove_proc_entry("active_plugin", litmus_dir);
259#ifdef CONFIG_RELEASE_MASTER
260 if (release_master_file)
261 remove_proc_entry("release_master", litmus_dir);
262#endif
263 if (litmus_dir)
264 remove_proc_entry("litmus", NULL);
265}
266
267long make_plugin_proc_dir(struct sched_plugin* plugin,
268 struct proc_dir_entry** pde_in)
269{
270 struct proc_dir_entry *pde_new = NULL;
271 long rv;
272
273 if (!plugin || !plugin->plugin_name){
274 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
275 __func__);
276 rv = -EINVAL;
277 goto out_no_pde;
278 }
279
280 if (!plugs_dir){
281 printk(KERN_ERR "Could not make plugin sub-directory, because "
282 "/proc/litmus/plugins does not exist.\n");
283 rv = -ENOENT;
284 goto out_no_pde;
285 }
286
287 pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
288 if (!pde_new){
289 printk(KERN_ERR "Could not make plugin sub-directory: "
290 "out of memory?.\n");
291 rv = -ENOMEM;
292 goto out_no_pde;
293 }
294
295 rv = 0;
296 *pde_in = pde_new;
297 goto out_ok;
298
299out_no_pde:
300 *pde_in = NULL;
301out_ok:
302 return rv;
303}
304
305void remove_plugin_proc_dir(struct sched_plugin* plugin)
306{
307 if (!plugin || !plugin->plugin_name){
308 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
309 __func__);
310 return;
311 }
312 remove_proc_entry(plugin->plugin_name, plugs_dir);
313}
314
315
316
317/* misc. I/O helper functions */
318
319int copy_and_chomp(char *kbuf, unsigned long ksize,
320 __user const char* ubuf, unsigned long ulength)
321{
322 /* caller must provide buffer space */
323 BUG_ON(!ksize);
324
325 ksize--; /* leave space for null byte */
326
327 if (ksize > ulength)
328 ksize = ulength;
329
330 if(copy_from_user(kbuf, ubuf, ksize))
331 return -EFAULT;
332
333 kbuf[ksize] = '\0';
334
335 /* chomp kbuf */
336 if (ksize > 0 && kbuf[ksize - 1] == '\n')
337 kbuf[ksize - 1] = '\0';
338
339 return ksize;
340}
341
342/* helper functions for clustered plugins */
343static const char* cache_level_names[] = {
344 "ALL",
345 "L1",
346 "L2",
347 "L3",
348};
349
350int parse_cache_level(const char *cache_name, enum cache_level *level)
351{
352 int err = -EINVAL;
353 int i;
354 /* do a quick and dirty comparison to find the cluster size */
355 for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
356 if (!strcmp(cache_name, cache_level_names[i])) {
357 *level = (enum cache_level) i;
358 err = 0;
359 break;
360 }
361 return err;
362}
363
364const char* cache_level_name(enum cache_level level)
365{
366 int idx = level;
367
368 if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
369 return cache_level_names[idx];
370 else
371 return "INVALID";
372}
373
374
375
376
377/* proc file interface to configure the cluster size */
378
379static ssize_t litmus_cluster_proc_write(struct file *file,
380 const char __user *buffer, size_t count,
381 loff_t *ppos)
382{
383 enum cache_level *level = (enum cache_level *) PDE_DATA(file_inode(file));
384 ssize_t len;
385 char cache_name[8];
386
387 len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
388
389 if (len > 0 && parse_cache_level(cache_name, level)) {
390 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
391 len = -EINVAL;
392 }
393
394 return len;
395}
396
397static int litmus_cluster_proc_show(struct seq_file *m, void *v)
398{
399 enum cache_level *level = (enum cache_level *) m->private;
400
401 seq_printf(m, "%s\n", cache_level_name(*level));
402 return 0;
403}
404
405static int litmus_cluster_proc_open(struct inode *inode, struct file *file)
406{
407 return single_open(file, litmus_cluster_proc_show, PDE_DATA(inode));
408}
409
410static const struct file_operations litmus_cluster_proc_fops = {
411 .open = litmus_cluster_proc_open,
412 .read = seq_read,
413 .llseek = seq_lseek,
414 .release = single_release,
415 .write = litmus_cluster_proc_write,
416};
417
418struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
419 enum cache_level* level)
420{
421 struct proc_dir_entry* cluster_file;
422
423
424 cluster_file = proc_create_data("cluster", 0644, parent,
425 &litmus_cluster_proc_fops,
426 (void *) level);
427 if (!cluster_file) {
428 printk(KERN_ERR
429 "Could not cluster procfs entry.\n");
430 }
431 return cluster_file;
432}
433
434static struct domain_proc_info* active_mapping = NULL;
435
436static int litmus_mapping_proc_show(struct seq_file *m, void *v)
437{
438 struct cd_mapping *mapping = (struct cd_mapping*) m->private;
439
440 if(!mapping)
441 return 0;
442
443 seq_printf(m, "%*pb\n", cpumask_pr_args(mapping->mask));
444 return 0;
445}
446
447static int litmus_mapping_proc_open(struct inode *inode, struct file *file)
448{
449 return single_open(file, litmus_mapping_proc_show, PDE_DATA(inode));
450}
451
452static const struct file_operations litmus_domain_proc_fops = {
453 .open = litmus_mapping_proc_open,
454 .read = seq_read,
455 .llseek = seq_lseek,
456 .release = single_release,
457};
458
459long activate_domain_proc(struct domain_proc_info* map)
460{
461 int i;
462 char name[8];
463
464 if (!map)
465 return -EINVAL;
466 if (cpus_dir == NULL || domains_dir == NULL)
467 return -EINVAL;
468
469 if (active_mapping)
470 deactivate_domain_proc();
471
472 active_mapping = map;
473
474 for (i = 0; i < map->num_cpus; ++i) {
475 struct cd_mapping* m = &map->cpu_to_domains[i];
476 snprintf(name, sizeof(name), "%d", m->id);
477 m->proc_file = proc_create_data(name, 0444, cpus_dir,
478 &litmus_domain_proc_fops, (void*)m);
479 }
480
481 for (i = 0; i < map->num_domains; ++i) {
482 struct cd_mapping* m = &map->domain_to_cpus[i];
483 snprintf(name, sizeof(name), "%d", m->id);
484 m->proc_file = proc_create_data(name, 0444, domains_dir,
485 &litmus_domain_proc_fops, (void*)m);
486 }
487
488 return 0;
489}
490
491long deactivate_domain_proc()
492{
493 int i;
494 char name[65];
495
496 struct domain_proc_info* map = active_mapping;
497
498 if (!map)
499 return -EINVAL;
500
501 for (i = 0; i < map->num_cpus; ++i) {
502 struct cd_mapping* m = &map->cpu_to_domains[i];
503 snprintf(name, sizeof(name), "%d", m->id);
504 remove_proc_entry(name, cpus_dir);
505 m->proc_file = NULL;
506 }
507 for (i = 0; i < map->num_domains; ++i) {
508 struct cd_mapping* m = &map->domain_to_cpus[i];
509 snprintf(name, sizeof(name), "%d", m->id);
510 remove_proc_entry(name, domains_dir);
511 m->proc_file = NULL;
512 }
513
514 active_mapping = NULL;
515
516 return 0;
517}
518
519long init_domain_proc_info(struct domain_proc_info* m,
520 int num_cpus, int num_domains)
521{
522 int i;
523 int num_alloced_cpu_masks = 0;
524 int num_alloced_domain_masks = 0;
525
526 m->cpu_to_domains =
527 kmalloc(sizeof(*(m->cpu_to_domains))*num_cpus,
528 GFP_ATOMIC);
529 if(!m->cpu_to_domains)
530 goto failure;
531
532 m->domain_to_cpus =
533 kmalloc(sizeof(*(m->domain_to_cpus))*num_domains,
534 GFP_ATOMIC);
535 if(!m->domain_to_cpus)
536 goto failure;
537
538 for(i = 0; i < num_cpus; ++i) {
539 if(!zalloc_cpumask_var(&m->cpu_to_domains[i].mask, GFP_ATOMIC))
540 goto failure;
541 ++num_alloced_cpu_masks;
542 }
543 for(i = 0; i < num_domains; ++i) {
544 if(!zalloc_cpumask_var(&m->domain_to_cpus[i].mask, GFP_ATOMIC))
545 goto failure;
546 ++num_alloced_domain_masks;
547 }
548
549 return 0;
550
551failure:
552 for(i = 0; i < num_alloced_cpu_masks; ++i)
553 free_cpumask_var(m->cpu_to_domains[i].mask);
554 for(i = 0; i < num_alloced_domain_masks; ++i)
555 free_cpumask_var(m->domain_to_cpus[i].mask);
556 if(m->cpu_to_domains)
557 kfree(m->cpu_to_domains);
558 if(m->domain_to_cpus)
559 kfree(m->domain_to_cpus);
560 return -ENOMEM;
561}
562
563void destroy_domain_proc_info(struct domain_proc_info* m)
564{
565 int i;
566 for(i = 0; i < m->num_cpus; ++i)
567 free_cpumask_var(m->cpu_to_domains[i].mask);
568 for(i = 0; i < m->num_domains; ++i)
569 free_cpumask_var(m->domain_to_cpus[i].mask);
570 kfree(m->cpu_to_domains);
571 kfree(m->domain_to_cpus);
572 memset(m, sizeof(*m), 0);
573}
diff --git a/litmus/locking.c b/litmus/locking.c
new file mode 100644
index 000000000000..43d9aece2e74
--- /dev/null
+++ b/litmus/locking.c
@@ -0,0 +1,188 @@
1#include <linux/sched.h>
2#include <litmus/litmus.h>
3#include <litmus/fdso.h>
4
5#ifdef CONFIG_LITMUS_LOCKING
6
7#include <linux/sched.h>
8#include <litmus/litmus.h>
9#include <litmus/sched_plugin.h>
10#include <litmus/trace.h>
11#include <litmus/wait.h>
12
13static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
14static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
15static int close_generic_lock(struct od_table_entry* entry);
16static void destroy_generic_lock(obj_type_t type, void* sem);
17
18struct fdso_ops generic_lock_ops = {
19 .create = create_generic_lock,
20 .open = open_generic_lock,
21 .close = close_generic_lock,
22 .destroy = destroy_generic_lock
23};
24
25static inline bool is_lock(struct od_table_entry* entry)
26{
27 return entry->class == &generic_lock_ops;
28}
29
30static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
31{
32 BUG_ON(!is_lock(entry));
33 return (struct litmus_lock*) entry->obj->obj;
34}
35
36static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
37{
38 struct litmus_lock* lock;
39 int err;
40
41 err = litmus->allocate_lock(&lock, type, arg);
42 if (err == 0)
43 *obj_ref = lock;
44 return err;
45}
46
47static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
48{
49 struct litmus_lock* lock = get_lock(entry);
50 if (lock->ops->open)
51 return lock->ops->open(lock, arg);
52 else
53 return 0; /* default: any task can open it */
54}
55
56static int close_generic_lock(struct od_table_entry* entry)
57{
58 struct litmus_lock* lock = get_lock(entry);
59 if (lock->ops->close)
60 return lock->ops->close(lock);
61 else
62 return 0; /* default: closing succeeds */
63}
64
65static void destroy_generic_lock(obj_type_t type, void* obj)
66{
67 struct litmus_lock* lock = (struct litmus_lock*) obj;
68 lock->ops->deallocate(lock);
69}
70
71asmlinkage long sys_litmus_lock(int lock_od)
72{
73 long err = -EINVAL;
74 struct od_table_entry* entry;
75 struct litmus_lock* l;
76
77 TS_SYSCALL_IN_START;
78
79 TS_SYSCALL_IN_END;
80
81 TS_LOCK_START;
82
83 entry = get_entry_for_od(lock_od);
84 if (entry && is_lock(entry)) {
85 l = get_lock(entry);
86 TRACE_CUR("attempts to lock 0x%p\n", l);
87 err = l->ops->lock(l);
88 }
89
90 /* Note: task my have been suspended or preempted in between! Take
91 * this into account when computing overheads. */
92 TS_LOCK_END;
93
94 TS_SYSCALL_OUT_START;
95
96 return err;
97}
98
99asmlinkage long sys_litmus_unlock(int lock_od)
100{
101 long err = -EINVAL;
102 struct od_table_entry* entry;
103 struct litmus_lock* l;
104
105 TS_SYSCALL_IN_START;
106
107 TS_SYSCALL_IN_END;
108
109 TS_UNLOCK_START;
110
111 entry = get_entry_for_od(lock_od);
112 if (entry && is_lock(entry)) {
113 l = get_lock(entry);
114 TRACE_CUR("attempts to unlock 0x%p\n", l);
115 err = l->ops->unlock(l);
116 }
117
118 /* Note: task my have been preempted in between! Take this into
119 * account when computing overheads. */
120 TS_UNLOCK_END;
121
122 TS_SYSCALL_OUT_START;
123
124 return err;
125}
126
127struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
128{
129 wait_queue_t* q;
130 struct task_struct* t = NULL;
131
132 if (waitqueue_active(wq)) {
133 q = list_entry(wq->task_list.next,
134 wait_queue_t, task_list);
135 t = (struct task_struct*) q->private;
136 __remove_wait_queue(wq, q);
137 }
138 return(t);
139}
140
141unsigned int __add_wait_queue_prio_exclusive(
142 wait_queue_head_t* head,
143 prio_wait_queue_t *new)
144{
145 struct list_head *pos;
146 unsigned int passed = 0;
147
148 new->wq.flags |= WQ_FLAG_EXCLUSIVE;
149
150 /* find a spot where the new entry is less than the next */
151 list_for_each(pos, &head->task_list) {
152 prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
153 wq.task_list);
154
155 if (unlikely(lt_before(new->priority, queued->priority) ||
156 (new->priority == queued->priority &&
157 new->tie_breaker < queued->tie_breaker))) {
158 /* pos is not less than new, thus insert here */
159 __list_add(&new->wq.task_list, pos->prev, pos);
160 goto out;
161 }
162 passed++;
163 }
164
165 /* if we get to this point either the list is empty or every entry
166 * queued element is less than new.
167 * Let's add new to the end. */
168 list_add_tail(&new->wq.task_list, &head->task_list);
169out:
170 return passed;
171}
172
173
174#else
175
176struct fdso_ops generic_lock_ops = {};
177
178asmlinkage long sys_litmus_lock(int sem_od)
179{
180 return -ENOSYS;
181}
182
183asmlinkage long sys_litmus_unlock(int sem_od)
184{
185 return -ENOSYS;
186}
187
188#endif
diff --git a/litmus/preempt.c b/litmus/preempt.c
new file mode 100644
index 000000000000..03e9b5acfb5d
--- /dev/null
+++ b/litmus/preempt.c
@@ -0,0 +1,141 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/preempt.h>
5#include <litmus/trace.h>
6
7/* The rescheduling state of each processor.
8 */
9DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
10
11void sched_state_will_schedule(struct task_struct* tsk)
12{
13 /* Litmus hack: we only care about processor-local invocations of
14 * set_tsk_need_resched(). We can't reliably set the flag remotely
15 * since it might race with other updates to the scheduling state. We
16 * can't rely on the runqueue lock protecting updates to the sched
17 * state since processors do not acquire the runqueue locks for all
18 * updates to the sched state (to avoid acquiring two runqueue locks at
19 * the same time). Further, if tsk is residing on a remote processor,
20 * then that processor doesn't actually know yet that it is going to
21 * reschedule; it still must receive an IPI (unless a local invocation
22 * races).
23 */
24 if (likely(task_cpu(tsk) == smp_processor_id())) {
25 VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
26 if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
27 set_sched_state(PICKED_WRONG_TASK);
28 else
29 set_sched_state(WILL_SCHEDULE);
30 } else
31 /* Litmus tasks should never be subject to a remote
32 * set_tsk_need_resched(). */
33 BUG_ON(is_realtime(tsk));
34#ifdef CONFIG_PREEMPT_STATE_TRACE
35 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
36 __builtin_return_address(0));
37#endif
38}
39
40/* Called by the IPI handler after another CPU called smp_send_resched(). */
41void sched_state_ipi(void)
42{
43 /* If the IPI was slow, we might be in any state right now. The IPI is
44 * only meaningful if we are in SHOULD_SCHEDULE. */
45 if (is_in_sched_state(SHOULD_SCHEDULE)) {
46 /* Cause scheduler to be invoked.
47 * This will cause a transition to WILL_SCHEDULE. */
48 set_tsk_need_resched(current);
49 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
50 current->comm, current->pid);
51 TS_SEND_RESCHED_END;
52 } else {
53 /* ignore */
54 TRACE_STATE("ignoring IPI in state %x (%s)\n",
55 get_sched_state(),
56 sched_state_name(get_sched_state()));
57 }
58}
59
60/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
61 * hold the lock that is used to serialize scheduling decisions. */
62void litmus_reschedule(int cpu)
63{
64 int picked_transition_ok = 0;
65 int scheduled_transition_ok = 0;
66
67 /* The (remote) CPU could be in any state. */
68
69 /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
70 * is not aware of the need to reschedule at this point. */
71
72 /* is a context switch in progress? */
73 if (cpu_is_in_sched_state(cpu, TASK_PICKED))
74 picked_transition_ok = sched_state_transition_on(
75 cpu, TASK_PICKED, PICKED_WRONG_TASK);
76
77 if (!picked_transition_ok &&
78 cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
79 /* We either raced with the end of the context switch, or the
80 * CPU was in TASK_SCHEDULED anyway. */
81 scheduled_transition_ok = sched_state_transition_on(
82 cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
83 }
84
85 /* If the CPU was in state TASK_SCHEDULED, then we need to cause the
86 * scheduler to be invoked. */
87 if (scheduled_transition_ok) {
88 if (smp_processor_id() == cpu) {
89 set_tsk_need_resched(current);
90 preempt_set_need_resched();
91 } else {
92 TS_SEND_RESCHED_START(cpu);
93 smp_send_reschedule(cpu);
94 }
95 }
96
97 TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
98 __FUNCTION__,
99 picked_transition_ok,
100 scheduled_transition_ok);
101}
102
103void litmus_reschedule_local(void)
104{
105 if (is_in_sched_state(TASK_PICKED))
106 set_sched_state(PICKED_WRONG_TASK);
107 else if (is_in_sched_state(TASK_SCHEDULED
108 | SHOULD_SCHEDULE
109 | PICKED_WRONG_TASK)) {
110 set_sched_state(WILL_SCHEDULE);
111 set_tsk_need_resched(current);
112 preempt_set_need_resched();
113 }
114}
115
116#ifdef CONFIG_DEBUG_KERNEL
117
118void sched_state_plugin_check(void)
119{
120 if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
121 TRACE("!!!! plugin did not call sched_state_task_picked()!"
122 "Calling sched_state_task_picked() is mandatory---fix this.\n");
123 set_sched_state(TASK_PICKED);
124 }
125}
126
127#define NAME_CHECK(x) case x: return #x
128const char* sched_state_name(int s)
129{
130 switch (s) {
131 NAME_CHECK(TASK_SCHEDULED);
132 NAME_CHECK(SHOULD_SCHEDULE);
133 NAME_CHECK(WILL_SCHEDULE);
134 NAME_CHECK(TASK_PICKED);
135 NAME_CHECK(PICKED_WRONG_TASK);
136 default:
137 return "UNKNOWN";
138 };
139}
140
141#endif
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 000000000000..e5dec0bbbba9
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,353 @@
1/*
2 * litmus/rt_domain.c
3 *
4 * LITMUS real-time infrastructure. This file contains the
5 * functions that manipulate RT domains. RT domains are an abstraction
6 * of a ready queue and a release queue.
7 */
8
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/list.h>
12#include <linux/slab.h>
13
14#include <litmus/litmus.h>
15#include <litmus/sched_plugin.h>
16#include <litmus/sched_trace.h>
17
18#include <litmus/rt_domain.h>
19
20#include <litmus/trace.h>
21
22#include <litmus/bheap.h>
23
24/* Uncomment when debugging timer races... */
25#if 0
26#define VTRACE_TASK TRACE_TASK
27#define VTRACE TRACE
28#else
29#define VTRACE_TASK(t, fmt, args...) /* shut up */
30#define VTRACE(fmt, args...) /* be quiet already */
31#endif
32
33static int dummy_resched(rt_domain_t *rt)
34{
35 return 0;
36}
37
38static int dummy_order(struct bheap_node* a, struct bheap_node* b)
39{
40 return 0;
41}
42
43/* default implementation: use default lock */
44static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
45{
46 merge_ready(rt, tasks);
47}
48
49static unsigned int time2slot(lt_t time)
50{
51 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
52}
53
54static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
55{
56 unsigned long flags;
57 struct release_heap* rh;
58 rh = container_of(timer, struct release_heap, timer);
59
60 TS_RELEASE_LATENCY(rh->release_time);
61
62 VTRACE("on_release_timer(0x%p) starts.\n", timer);
63
64 TS_RELEASE_START;
65
66
67 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
68 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
69 /* remove from release queue */
70 list_del(&rh->list);
71 raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
72 VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
73
74 /* call release callback */
75 rh->dom->release_jobs(rh->dom, &rh->heap);
76 /* WARNING: rh can be referenced from other CPUs from now on. */
77
78 TS_RELEASE_END;
79
80 VTRACE("on_release_timer(0x%p) ends.\n", timer);
81
82 return HRTIMER_NORESTART;
83}
84
85/* allocated in litmus.c */
86struct kmem_cache * release_heap_cache;
87
88struct release_heap* release_heap_alloc(int gfp_flags)
89{
90 struct release_heap* rh;
91 rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
92 if (rh) {
93 /* initialize timer */
94 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
95 rh->timer.function = on_release_timer;
96 }
97 return rh;
98}
99
100void release_heap_free(struct release_heap* rh)
101{
102 /* make sure timer is no longer in use */
103 hrtimer_cancel(&rh->timer);
104 kmem_cache_free(release_heap_cache, rh);
105}
106
107/* Caller must hold release lock.
108 * Will return heap for given time. If no such heap exists prior to
109 * the invocation it will be created.
110 */
111static struct release_heap* get_release_heap(rt_domain_t *rt,
112 struct task_struct* t,
113 int use_task_heap)
114{
115 struct list_head* pos;
116 struct release_heap* heap = NULL;
117 struct release_heap* rh;
118 lt_t release_time = get_release(t);
119 unsigned int slot = time2slot(release_time);
120
121 /* initialize pos for the case that the list is empty */
122 pos = rt->release_queue.slot[slot].next;
123 list_for_each(pos, &rt->release_queue.slot[slot]) {
124 rh = list_entry(pos, struct release_heap, list);
125 if (release_time == rh->release_time) {
126 /* perfect match -- this happens on hyperperiod
127 * boundaries
128 */
129 heap = rh;
130 break;
131 } else if (lt_before(release_time, rh->release_time)) {
132 /* we need to insert a new node since rh is
133 * already in the future
134 */
135 break;
136 }
137 }
138 if (!heap && use_task_heap) {
139 /* use pre-allocated release heap */
140 rh = tsk_rt(t)->rel_heap;
141
142 rh->dom = rt;
143 rh->release_time = release_time;
144
145 /* add to release queue */
146 list_add(&rh->list, pos->prev);
147 heap = rh;
148 }
149 return heap;
150}
151
152static void reinit_release_heap(struct task_struct* t)
153{
154 struct release_heap* rh;
155
156 /* use pre-allocated release heap */
157 rh = tsk_rt(t)->rel_heap;
158
159 /* Make sure it is safe to use. The timer callback could still
160 * be executing on another CPU; hrtimer_cancel() will wait
161 * until the timer callback has completed. However, under no
162 * circumstances should the timer be active (= yet to be
163 * triggered).
164 *
165 * WARNING: If the CPU still holds the release_lock at this point,
166 * deadlock may occur!
167 */
168 BUG_ON(hrtimer_cancel(&rh->timer));
169
170 /* initialize */
171 bheap_init(&rh->heap);
172#ifdef CONFIG_RELEASE_MASTER
173 atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
174#endif
175}
176/* arm_release_timer() - start local release timer or trigger
177 * remote timer (pull timer)
178 *
179 * Called by add_release() with:
180 * - tobe_lock taken
181 * - IRQ disabled
182 */
183#ifdef CONFIG_RELEASE_MASTER
184#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
185static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
186#else
187static void arm_release_timer(rt_domain_t *_rt)
188#endif
189{
190 rt_domain_t *rt = _rt;
191 struct list_head list;
192 struct list_head *pos, *safe;
193 struct task_struct* t;
194 struct release_heap* rh;
195
196 VTRACE("arm_release_timer() at %llu\n", litmus_clock());
197 list_replace_init(&rt->tobe_released, &list);
198
199 list_for_each_safe(pos, safe, &list) {
200 /* pick task of work list */
201 t = list_entry(pos, struct task_struct, rt_param.list);
202 sched_trace_task_release(t);
203 list_del(pos);
204
205 /* put into release heap while holding release_lock */
206 raw_spin_lock(&rt->release_lock);
207 VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
208
209 rh = get_release_heap(rt, t, 0);
210 if (!rh) {
211 /* need to use our own, but drop lock first */
212 raw_spin_unlock(&rt->release_lock);
213 VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
214 &rt->release_lock);
215
216 reinit_release_heap(t);
217 VTRACE_TASK(t, "release_heap ready\n");
218
219 raw_spin_lock(&rt->release_lock);
220 VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
221 &rt->release_lock);
222
223 rh = get_release_heap(rt, t, 1);
224 }
225 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
226 VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
227
228 raw_spin_unlock(&rt->release_lock);
229 VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
230
231 /* To avoid arming the timer multiple times, we only let the
232 * owner do the arming (which is the "first" task to reference
233 * this release_heap anyway).
234 */
235 if (rh == tsk_rt(t)->rel_heap) {
236 VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
237
238 if (!hrtimer_is_hres_active(&rh->timer)) {
239 TRACE_TASK(t, "WARNING: no hires timer!!!\n");
240 }
241
242 /* we cannot arm the timer using hrtimer_start()
243 * as it may deadlock on rq->lock
244 *
245 * PINNED mode is ok on both local and remote CPU
246 */
247#ifdef CONFIG_RELEASE_MASTER
248 if (rt->release_master == NO_CPU &&
249 target_cpu == NO_CPU)
250#endif
251 __hrtimer_start_range_ns(&rh->timer,
252 ns_to_ktime(rh->release_time),
253 0, HRTIMER_MODE_ABS_PINNED, 0);
254#ifdef CONFIG_RELEASE_MASTER
255 else
256 hrtimer_start_on(
257 /* target_cpu overrides release master */
258 (target_cpu != NO_CPU ?
259 target_cpu : rt->release_master),
260 &rh->info, &rh->timer,
261 ns_to_ktime(rh->release_time),
262 HRTIMER_MODE_ABS_PINNED);
263#endif
264 } else
265 VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
266 }
267}
268
269void rt_domain_init(rt_domain_t *rt,
270 bheap_prio_t order,
271 check_resched_needed_t check,
272 release_jobs_t release
273 )
274{
275 int i;
276
277 BUG_ON(!rt);
278 if (!check)
279 check = dummy_resched;
280 if (!release)
281 release = default_release_jobs;
282 if (!order)
283 order = dummy_order;
284
285#ifdef CONFIG_RELEASE_MASTER
286 rt->release_master = NO_CPU;
287#endif
288
289 bheap_init(&rt->ready_queue);
290 INIT_LIST_HEAD(&rt->tobe_released);
291 for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
292 INIT_LIST_HEAD(&rt->release_queue.slot[i]);
293
294 raw_spin_lock_init(&rt->ready_lock);
295 raw_spin_lock_init(&rt->release_lock);
296 raw_spin_lock_init(&rt->tobe_lock);
297
298 rt->check_resched = check;
299 rt->release_jobs = release;
300 rt->order = order;
301}
302
303/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
304 * @new: the newly released task
305 */
306void __add_ready(rt_domain_t* rt, struct task_struct *new)
307{
308 TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
309 "to ready queue at %llu\n",
310 new->comm, new->pid,
311 get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
312 get_release(new), litmus_clock());
313
314 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
315
316 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
317 rt->check_resched(rt);
318}
319
320/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
321 * @tasks - the newly released tasks
322 */
323void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
324{
325 bheap_union(rt->order, &rt->ready_queue, tasks);
326 rt->check_resched(rt);
327}
328
329
330#ifdef CONFIG_RELEASE_MASTER
331void __add_release_on(rt_domain_t* rt, struct task_struct *task,
332 int target_cpu)
333{
334 TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
335 get_release(task), target_cpu);
336 list_add(&tsk_rt(task)->list, &rt->tobe_released);
337 task->rt_param.domain = rt;
338
339 arm_release_timer_on(rt, target_cpu);
340}
341#endif
342
343/* add_release - add a real-time task to the rt release queue.
344 * @task: the sleeping task
345 */
346void __add_release(rt_domain_t* rt, struct task_struct *task)
347{
348 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
349 list_add(&tsk_rt(task)->list, &rt->tobe_released);
350 task->rt_param.domain = rt;
351
352 arm_release_timer(rt);
353}
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 000000000000..edd91e9bf773
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,238 @@
1/* sched_plugin.c -- core infrastructure for the scheduler plugin system
2 *
3 * This file includes the initialization of the plugin system, the no-op Linux
4 * scheduler plugin, some dummy functions, and some helper functions.
5 */
6
7#include <linux/list.h>
8#include <linux/spinlock.h>
9#include <linux/sched.h>
10#include <linux/seq_file.h>
11
12#include <litmus/litmus.h>
13#include <litmus/sched_plugin.h>
14#include <litmus/preempt.h>
15#include <litmus/jobs.h>
16
17/*
18 * Generic function to trigger preemption on either local or remote cpu
19 * from scheduler plugins. The key feature is that this function is
20 * non-preemptive section aware and does not invoke the scheduler / send
21 * IPIs if the to-be-preempted task is actually non-preemptive.
22 */
23void preempt_if_preemptable(struct task_struct* t, int cpu)
24{
25 /* t is the real-time task executing on CPU on_cpu If t is NULL, then
26 * on_cpu is currently scheduling background work.
27 */
28
29 int reschedule = 0;
30
31 if (!t)
32 /* move non-real-time task out of the way */
33 reschedule = 1;
34 else {
35 if (smp_processor_id() == cpu) {
36 /* local CPU case */
37 /* check if we need to poke userspace */
38 if (is_user_np(t))
39 /* Yes, poke it. This doesn't have to be atomic since
40 * the task is definitely not executing. */
41 request_exit_np(t);
42 else if (!is_kernel_np(t))
43 /* only if we are allowed to preempt the
44 * currently-executing task */
45 reschedule = 1;
46 } else {
47 /* Remote CPU case. Only notify if it's not a kernel
48 * NP section and if we didn't set the userspace
49 * flag. */
50 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
51 }
52 }
53 if (likely(reschedule))
54 litmus_reschedule(cpu);
55}
56
57
58/*************************************************************
59 * Dummy plugin functions *
60 *************************************************************/
61
62static void litmus_dummy_finish_switch(struct task_struct * prev)
63{
64}
65
66static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
67{
68 sched_state_task_picked();
69 return NULL;
70}
71
72static long litmus_dummy_admit_task(struct task_struct* tsk)
73{
74 printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
75 tsk->comm, tsk->pid);
76 return -EINVAL;
77}
78
79static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
80{
81}
82
83static void litmus_dummy_task_wake_up(struct task_struct *task)
84{
85}
86
87static void litmus_dummy_task_block(struct task_struct *task)
88{
89}
90
91static void litmus_dummy_task_exit(struct task_struct *task)
92{
93}
94
95static void litmus_dummy_task_cleanup(struct task_struct *task)
96{
97}
98
99static long litmus_dummy_complete_job(void)
100{
101 return -ENOSYS;
102}
103
104static long litmus_dummy_activate_plugin(void)
105{
106 return 0;
107}
108
109static long litmus_dummy_deactivate_plugin(void)
110{
111 return 0;
112}
113
114static long litmus_dummy_get_domain_proc_info(struct domain_proc_info **d)
115{
116 *d = NULL;
117 return 0;
118}
119
120static void litmus_dummy_synchronous_release_at(lt_t time_zero)
121{
122 /* ignore */
123}
124
125#ifdef CONFIG_LITMUS_LOCKING
126
127static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
128 void* __user config)
129{
130 return -ENXIO;
131}
132
133#endif
134
135
136/* The default scheduler plugin. It doesn't do anything and lets Linux do its
137 * job.
138 */
139struct sched_plugin linux_sched_plugin = {
140 .plugin_name = "Linux",
141 .task_new = litmus_dummy_task_new,
142 .task_exit = litmus_dummy_task_exit,
143 .task_wake_up = litmus_dummy_task_wake_up,
144 .task_block = litmus_dummy_task_block,
145 .complete_job = litmus_dummy_complete_job,
146 .schedule = litmus_dummy_schedule,
147 .finish_switch = litmus_dummy_finish_switch,
148 .activate_plugin = litmus_dummy_activate_plugin,
149 .deactivate_plugin = litmus_dummy_deactivate_plugin,
150 .get_domain_proc_info = litmus_dummy_get_domain_proc_info,
151 .synchronous_release_at = litmus_dummy_synchronous_release_at,
152#ifdef CONFIG_LITMUS_LOCKING
153 .allocate_lock = litmus_dummy_allocate_lock,
154#endif
155 .admit_task = litmus_dummy_admit_task
156};
157
158/*
159 * The reference to current plugin that is used to schedule tasks within
160 * the system. It stores references to actual function implementations
161 * Should be initialized by calling "init_***_plugin()"
162 */
163struct sched_plugin *litmus = &linux_sched_plugin;
164
165/* the list of registered scheduling plugins */
166static LIST_HEAD(sched_plugins);
167static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
168
169#define CHECK(func) {\
170 if (!plugin->func) \
171 plugin->func = litmus_dummy_ ## func;}
172
173/* FIXME: get reference to module */
174int register_sched_plugin(struct sched_plugin* plugin)
175{
176 printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
177 plugin->plugin_name);
178
179 /* make sure we don't trip over null pointers later */
180 CHECK(finish_switch);
181 CHECK(schedule);
182 CHECK(task_wake_up);
183 CHECK(task_exit);
184 CHECK(task_cleanup);
185 CHECK(task_block);
186 CHECK(task_new);
187 CHECK(complete_job);
188 CHECK(activate_plugin);
189 CHECK(deactivate_plugin);
190 CHECK(get_domain_proc_info);
191#ifdef CONFIG_LITMUS_LOCKING
192 CHECK(allocate_lock);
193#endif
194 CHECK(admit_task);
195 CHECK(synchronous_release_at);
196
197 if (!plugin->wait_for_release_at)
198 plugin->wait_for_release_at = default_wait_for_release_at;
199
200 raw_spin_lock(&sched_plugins_lock);
201 list_add(&plugin->list, &sched_plugins);
202 raw_spin_unlock(&sched_plugins_lock);
203
204 return 0;
205}
206
207
208/* FIXME: reference counting, etc. */
209struct sched_plugin* find_sched_plugin(const char* name)
210{
211 struct list_head *pos;
212 struct sched_plugin *plugin;
213
214 raw_spin_lock(&sched_plugins_lock);
215 list_for_each(pos, &sched_plugins) {
216 plugin = list_entry(pos, struct sched_plugin, list);
217 if (!strcmp(plugin->plugin_name, name))
218 goto out_unlock;
219 }
220 plugin = NULL;
221
222out_unlock:
223 raw_spin_unlock(&sched_plugins_lock);
224 return plugin;
225}
226
227void print_sched_plugins(struct seq_file *m)
228{
229 struct list_head *pos;
230 struct sched_plugin *plugin;
231
232 raw_spin_lock(&sched_plugins_lock);
233 list_for_each(pos, &sched_plugins) {
234 plugin = list_entry(pos, struct sched_plugin, list);
235 seq_printf(m, "%s\n", plugin->plugin_name);
236 }
237 raw_spin_unlock(&sched_plugins_lock);
238}
diff --git a/litmus/srp.c b/litmus/srp.c
new file mode 100644
index 000000000000..7ab388646e29
--- /dev/null
+++ b/litmus/srp.c
@@ -0,0 +1,308 @@
1/* ************************************************************************** */
2/* STACK RESOURCE POLICY */
3/* ************************************************************************** */
4
5#include <asm/atomic.h>
6#include <linux/sched.h>
7#include <linux/wait.h>
8
9#include <litmus/litmus.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/fdso.h>
12#include <litmus/trace.h>
13
14
15#ifdef CONFIG_LITMUS_LOCKING
16
17#include <litmus/srp.h>
18
19srp_prioritization_t get_srp_prio;
20
21struct srp {
22 struct list_head ceiling;
23 wait_queue_head_t ceiling_blocked;
24};
25#define system_ceiling(srp) list2prio(srp->ceiling.next)
26#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
27
28#define UNDEF_SEM -2
29
30DEFINE_PER_CPU(struct srp, srp);
31
32DEFINE_PER_CPU(int, srp_objects_in_use);
33
34/* Initialize SRP semaphores at boot time. */
35static int __init srp_init(void)
36{
37 int i;
38
39 printk("Initializing SRP per-CPU ceilings...");
40 for (i = 0; i < NR_CPUS; i++) {
41 init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
42 INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
43 per_cpu(srp_objects_in_use, i) = 0;
44 }
45 printk(" done!\n");
46
47 return 0;
48}
49module_init(srp_init);
50
51/* SRP task priority comparison function. Smaller numeric values have higher
52 * priority, tie-break is PID. Special case: priority == 0 <=> no priority
53 */
54static int srp_higher_prio(struct srp_priority* first,
55 struct srp_priority* second)
56{
57 if (!first->priority)
58 return 0;
59 else
60 return !second->priority ||
61 first->priority < second->priority || (
62 first->priority == second->priority &&
63 first->pid < second->pid);
64}
65
66
67static int srp_exceeds_ceiling(struct task_struct* first,
68 struct srp* srp)
69{
70 struct srp_priority prio;
71
72 if (list_empty(&srp->ceiling))
73 return 1;
74 else {
75 prio.pid = first->pid;
76 prio.priority = get_srp_prio(first);
77 return srp_higher_prio(&prio, system_ceiling(srp)) ||
78 ceiling2sem(system_ceiling(srp))->owner == first;
79 }
80}
81
82static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
83{
84 struct list_head *pos;
85 if (in_list(&prio->list)) {
86 printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
87 "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
88 return;
89 }
90 list_for_each(pos, &srp->ceiling)
91 if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
92 __list_add(&prio->list, pos->prev, pos);
93 return;
94 }
95
96 list_add_tail(&prio->list, &srp->ceiling);
97}
98
99
100static int lock_srp_semaphore(struct litmus_lock* l)
101{
102 struct task_struct* t = current;
103 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
104
105 if (!is_realtime(t))
106 return -EPERM;
107
108 /* prevent acquisition of local locks in global critical sections */
109 if (tsk_rt(t)->num_locks_held)
110 return -EBUSY;
111
112 preempt_disable();
113
114 /* Update ceiling. */
115 srp_add_prio(this_cpu_ptr(&srp), &sem->ceiling);
116
117 /* SRP invariant: all resources available */
118 BUG_ON(sem->owner != NULL);
119
120 sem->owner = t;
121 TRACE_CUR("acquired srp 0x%p\n", sem);
122
123 tsk_rt(t)->num_local_locks_held++;
124
125 preempt_enable();
126
127 return 0;
128}
129
130static int unlock_srp_semaphore(struct litmus_lock* l)
131{
132 struct task_struct* t = current;
133 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
134 int err = 0;
135
136 preempt_disable();
137
138 if (sem->owner != t) {
139 err = -EINVAL;
140 } else {
141 /* The current owner should be executing on the correct CPU.
142 *
143 * If the owner transitioned out of RT mode or is exiting, then
144 * we it might have already been migrated away by the best-effort
145 * scheduler and we just have to deal with it. */
146 if (unlikely(!is_realtime(t) && sem->cpu != smp_processor_id())) {
147 TRACE_TASK(t, "SRP unlock cpu=%d, sem->cpu=%d\n",
148 smp_processor_id(), sem->cpu);
149 preempt_enable();
150 err = litmus_be_migrate_to(sem->cpu);
151 preempt_disable();
152 TRACE_TASK(t, "post-migrate: cpu=%d, sem->cpu=%d err=%d\n",
153 smp_processor_id(), sem->cpu, err);
154 }
155 BUG_ON(sem->cpu != smp_processor_id());
156 err = 0;
157
158 /* Determine new system priority ceiling for this CPU. */
159 BUG_ON(!in_list(&sem->ceiling.list));
160
161 list_del(&sem->ceiling.list);
162 sem->owner = NULL;
163
164 /* Wake tasks on this CPU, if they exceed current ceiling. */
165 TRACE_CUR("released srp 0x%p\n", sem);
166 wake_up_all(&this_cpu_ptr(&srp)->ceiling_blocked);
167
168 tsk_rt(t)->num_local_locks_held--;
169 }
170
171 preempt_enable();
172 return err;
173}
174
175static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
176{
177 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
178 int err = 0;
179 struct task_struct* t = current;
180 struct srp_priority t_prio;
181
182 if (!is_realtime(t))
183 return -EPERM;
184
185 TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
186
187 preempt_disable();
188
189 if (sem->owner != NULL)
190 err = -EBUSY;
191
192 if (err == 0) {
193 if (sem->cpu == UNDEF_SEM)
194 sem->cpu = get_partition(t);
195 else if (sem->cpu != get_partition(t))
196 err = -EPERM;
197 }
198
199 if (err == 0) {
200 t_prio.priority = get_srp_prio(t);
201 t_prio.pid = t->pid;
202 if (srp_higher_prio(&t_prio, &sem->ceiling)) {
203 sem->ceiling.priority = t_prio.priority;
204 sem->ceiling.pid = t_prio.pid;
205 }
206 }
207
208 preempt_enable();
209
210 return err;
211}
212
213static int close_srp_semaphore(struct litmus_lock* l)
214{
215 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
216 int err = 0;
217
218 preempt_disable();
219
220 if (sem->owner == current)
221 unlock_srp_semaphore(l);
222
223 preempt_enable();
224
225 return err;
226}
227
228static void deallocate_srp_semaphore(struct litmus_lock* l)
229{
230 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
231 raw_cpu_dec(srp_objects_in_use);
232 kfree(sem);
233}
234
235static struct litmus_lock_ops srp_lock_ops = {
236 .open = open_srp_semaphore,
237 .close = close_srp_semaphore,
238 .lock = lock_srp_semaphore,
239 .unlock = unlock_srp_semaphore,
240 .deallocate = deallocate_srp_semaphore,
241};
242
243struct srp_semaphore* allocate_srp_semaphore(void)
244{
245 struct srp_semaphore* sem;
246
247 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
248 if (!sem)
249 return NULL;
250
251 INIT_LIST_HEAD(&sem->ceiling.list);
252 sem->ceiling.priority = 0;
253 sem->cpu = UNDEF_SEM;
254 sem->owner = NULL;
255
256 sem->litmus_lock.ops = &srp_lock_ops;
257
258 raw_cpu_inc(srp_objects_in_use);
259 return sem;
260}
261
262static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
263 void *key)
264{
265 int cpu = smp_processor_id();
266 struct task_struct *tsk = wait->private;
267 if (cpu != get_partition(tsk))
268 TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
269 get_partition(tsk));
270 else if (srp_exceeds_ceiling(tsk, this_cpu_ptr(&srp)))
271 return default_wake_function(wait, mode, sync, key);
272 return 0;
273}
274
275static void do_ceiling_block(struct task_struct *tsk)
276{
277 wait_queue_t wait = {
278 .private = tsk,
279 .func = srp_wake_up,
280 .task_list = {NULL, NULL}
281 };
282
283 tsk->state = TASK_UNINTERRUPTIBLE;
284 add_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait);
285 tsk->rt_param.srp_non_recurse = 1;
286 preempt_enable_no_resched();
287 schedule();
288 preempt_disable();
289 tsk->rt_param.srp_non_recurse = 0;
290 remove_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait);
291}
292
293/* Wait for current task priority to exceed system-wide priority ceiling.
294 */
295void __srp_ceiling_block(struct task_struct *cur)
296{
297 preempt_disable();
298 if (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp))) {
299 TRACE_CUR("is priority ceiling blocked.\n");
300 while (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp)))
301 do_ceiling_block(cur);
302 TRACE_CUR("finally exceeds system ceiling.\n");
303 } else
304 TRACE_CUR("is not priority ceiling blocked\n");
305 preempt_enable();
306}
307
308#endif
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 000000000000..5d180603f46b
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,152 @@
1/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
2 *
3 *
4 */
5
6#include <asm/atomic.h>
7#include <asm/uaccess.h>
8#include <linux/spinlock.h>
9#include <linux/list.h>
10#include <linux/sched.h>
11#include <linux/completion.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/jobs.h>
16
17#include <litmus/sched_trace.h>
18
19struct ts_release_wait {
20 struct list_head list;
21 struct completion completion;
22 lt_t ts_release_time;
23};
24
25#define DECLARE_TS_RELEASE_WAIT(symb) \
26 struct ts_release_wait symb = \
27 { \
28 LIST_HEAD_INIT(symb.list), \
29 COMPLETION_INITIALIZER_ONSTACK(symb.completion), \
30 0 \
31 }
32
33static LIST_HEAD(task_release_list);
34static DEFINE_MUTEX(task_release_lock);
35
36static long do_wait_for_ts_release(void)
37{
38 DECLARE_TS_RELEASE_WAIT(wait);
39
40 long ret = -ERESTARTSYS;
41
42 if (mutex_lock_interruptible(&task_release_lock))
43 goto out;
44
45 list_add(&wait.list, &task_release_list);
46
47 mutex_unlock(&task_release_lock);
48
49 /* We are enqueued, now we wait for someone to wake us up. */
50 ret = wait_for_completion_interruptible(&wait.completion);
51
52 if (!ret) {
53 /* Completion succeeded, setup release time. */
54 ret = litmus->wait_for_release_at(
55 wait.ts_release_time + get_rt_phase(current));
56 } else {
57 /* We were interrupted, must cleanup list. */
58 mutex_lock(&task_release_lock);
59 if (!wait.completion.done)
60 list_del(&wait.list);
61 mutex_unlock(&task_release_lock);
62 }
63
64out:
65 return ret;
66}
67
68int count_tasks_waiting_for_release(void)
69{
70 int task_count = 0;
71 struct list_head *pos;
72
73 mutex_lock(&task_release_lock);
74
75 list_for_each(pos, &task_release_list) {
76 task_count++;
77 }
78
79 mutex_unlock(&task_release_lock);
80
81
82 return task_count;
83}
84
85static long do_release_ts(lt_t start)
86{
87 long task_count = 0;
88
89 struct list_head *pos, *safe;
90 struct ts_release_wait *wait;
91
92 if (mutex_lock_interruptible(&task_release_lock)) {
93 task_count = -ERESTARTSYS;
94 goto out;
95 }
96
97 TRACE("<<<<<< synchronous task system release >>>>>>\n");
98 sched_trace_sys_release(&start);
99 litmus->synchronous_release_at(start);
100
101 task_count = 0;
102 list_for_each_safe(pos, safe, &task_release_list) {
103 wait = (struct ts_release_wait*)
104 list_entry(pos, struct ts_release_wait, list);
105
106 task_count++;
107 wait->ts_release_time = start;
108 complete(&wait->completion);
109 }
110
111 /* clear stale list */
112 INIT_LIST_HEAD(&task_release_list);
113
114 mutex_unlock(&task_release_lock);
115
116out:
117 return task_count;
118}
119
120
121asmlinkage long sys_wait_for_ts_release(void)
122{
123 long ret = -EPERM;
124 struct task_struct *t = current;
125
126 if (is_realtime(t))
127 ret = do_wait_for_ts_release();
128
129 return ret;
130}
131
132#define ONE_MS 1000000
133
134asmlinkage long sys_release_ts(lt_t __user *__delay)
135{
136 long ret;
137 lt_t delay;
138 lt_t start_time;
139
140 /* FIXME: check capabilities... */
141
142 ret = copy_from_user(&delay, __delay, sizeof(delay));
143 if (ret == 0) {
144 /* round up to next larger integral millisecond */
145 start_time = litmus_clock();
146 do_div(start_time, ONE_MS);
147 start_time *= ONE_MS;
148 ret = do_release_ts(start_time + delay);
149 }
150
151 return ret;
152}
diff --git a/litmus/trace.c b/litmus/trace.c
index 2bcaaf474b7a..6b3e5f77cc5e 100644
--- a/litmus/trace.c
+++ b/litmus/trace.c
@@ -258,6 +258,17 @@ feather_callback void save_cpu_timestamp_irq(unsigned long event,
258 0, RECORD_LOCAL_TIMESTAMP); 258 0, RECORD_LOCAL_TIMESTAMP);
259} 259}
260 260
261feather_callback void save_cpu_task_latency(unsigned long event,
262 unsigned long when_ptr)
263{
264 lt_t now = litmus_clock();
265 lt_t *when = (lt_t*) when_ptr;
266
267 write_cpu_timestamp(event, TSK_RT,
268 0,
269 0, LOCAL_IRQ_COUNT, 0,
270 now - *when, DO_NOT_RECORD_TIMESTAMP);
271}
261 272
262feather_callback void msg_sent(unsigned long event, unsigned long to) 273feather_callback void msg_sent(unsigned long event, unsigned long to)
263{ 274{
diff --git a/litmus/uncachedev.c b/litmus/uncachedev.c
new file mode 100644
index 000000000000..06a6a7c17983
--- /dev/null
+++ b/litmus/uncachedev.c
@@ -0,0 +1,102 @@
1#include <linux/sched.h>
2#include <linux/kernel.h>
3#include <linux/mm.h>
4#include <linux/fs.h>
5#include <linux/errno.h>
6#include <linux/highmem.h>
7#include <asm/page.h>
8#include <linux/miscdevice.h>
9#include <linux/module.h>
10
11#include <litmus/litmus.h>
12
13/* device for allocating pages not cached by the CPU */
14
15#define UNCACHE_NAME "litmus/uncache"
16
17void litmus_uncache_vm_open(struct vm_area_struct *vma)
18{
19}
20
21void litmus_uncache_vm_close(struct vm_area_struct *vma)
22{
23}
24
25int litmus_uncache_vm_fault(struct vm_area_struct* vma,
26 struct vm_fault* vmf)
27{
28 /* modeled after SG DMA video4linux, but without DMA. */
29 /* (see drivers/media/video/videobuf-dma-sg.c) */
30 struct page *page;
31
32 page = alloc_page(GFP_USER);
33 if (!page)
34 return VM_FAULT_OOM;
35
36 clear_user_highpage(page, (unsigned long)vmf->virtual_address);
37 vmf->page = page;
38
39 return 0;
40}
41
42static struct vm_operations_struct litmus_uncache_vm_ops = {
43 .open = litmus_uncache_vm_open,
44 .close = litmus_uncache_vm_close,
45 .fault = litmus_uncache_vm_fault,
46};
47
48static int litmus_uncache_mmap(struct file* filp, struct vm_area_struct* vma)
49{
50 /* first make sure mapper knows what he's doing */
51
52 /* you can only map the "first" page */
53 if (vma->vm_pgoff != 0)
54 return -EINVAL;
55
56 /* you can't share it with anyone */
57 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
58 return -EINVAL;
59
60 /* cannot be expanded, and is not a "normal" page. */
61 vma->vm_flags |= VM_DONTEXPAND;
62
63 /* noncached pages are not explicitly locked in memory (for now). */
64 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
65
66 vma->vm_ops = &litmus_uncache_vm_ops;
67
68 return 0;
69}
70
71static struct file_operations litmus_uncache_fops = {
72 .owner = THIS_MODULE,
73 .mmap = litmus_uncache_mmap,
74};
75
76static struct miscdevice litmus_uncache_dev = {
77 .name = UNCACHE_NAME,
78 .minor = MISC_DYNAMIC_MINOR,
79 .fops = &litmus_uncache_fops,
80 /* pages are not locked, so there is no reason why
81 anyone cannot allocate an uncache pages */
82 .mode = (S_IRUGO | S_IWUGO),
83};
84
85static int __init init_litmus_uncache_dev(void)
86{
87 int err;
88
89 printk("Initializing LITMUS^RT uncache device.\n");
90 err = misc_register(&litmus_uncache_dev);
91 if (err)
92 printk("Could not allocate %s device (%d).\n", UNCACHE_NAME, err);
93 return err;
94}
95
96static void __exit exit_litmus_uncache_dev(void)
97{
98 misc_deregister(&litmus_uncache_dev);
99}
100
101module_init(init_litmus_uncache_dev);
102module_exit(exit_litmus_uncache_dev);